From: mickey Date: Thu, 9 Mar 2000 00:08:07 +0000 (+0000) Subject: new grep 2.4.1 X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=c677fea39bcde6c52af16dd1f56809a87af489ea;p=openbsd new grep 2.4.1 whole bunch of bug fixes, mmap support (w/ --mmap) changed binary file grep behavior, but could be overwritten w/ -a millert@ ok --- diff --git a/gnu/usr.bin/grep/AUTHORS b/gnu/usr.bin/grep/AUTHORS index e3e033b19a0..65ff8a1cd26 100644 --- a/gnu/usr.bin/grep/AUTHORS +++ b/gnu/usr.bin/grep/AUTHORS @@ -20,10 +20,22 @@ non-matching text before calling the regexp matcher was originally due to James Woods. He also contributed some code to early versions of GNU grep. -Finally, I would like to thank Andrew Hume for many fascinating discussions +Mike Haertel would like to thank Andrew Hume for many fascinating discussions of string searching issues over the years. Hume & Sunday's excellent paper on fast string searching (AT&T Bell Laboratories CSTR #156) describes some of the history of the subject, as well as providing exhaustive performance analysis of various implementation alternatives. The inner loop of GNU grep is similar to Hume & Sunday's recommended "Tuned Boyer Moore" inner loop. + +More work was done on regex.[ch] by Ulrich Drepper and Arnold +Robbins. Regex is now part of GNU C library, see this package +for complete details and credits. + +Arnold Robbins contributed to improve dfa.[ch]. In fact +it came straight from gawk-3.0.3 with small editing and fixes. + +Many folks contributed see THANKS, if I omited someone please +send me email. + +Alain Magloire is the current maintainer. diff --git a/gnu/usr.bin/grep/COPYING b/gnu/usr.bin/grep/COPYING index a43ea2126fb..d60c31a97a5 100644 --- a/gnu/usr.bin/grep/COPYING +++ b/gnu/usr.bin/grep/COPYING @@ -2,7 +2,7 @@ Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. - 675 Mass Ave, Cambridge, MA 02139, USA + 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -279,7 +279,7 @@ POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS - Appendix: How to Apply These Terms to Your New Programs + How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it @@ -291,7 +291,7 @@ convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. - Copyright (C) 19yy + Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -305,14 +305,15 @@ the "copyright" line and a pointer to where the full notice is found. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: - Gnomovision version 69, Copyright (C) 19yy name of author + Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. diff --git a/gnu/usr.bin/grep/Makefile b/gnu/usr.bin/grep/Makefile index 188a8b8ed23..82ad746643a 100644 --- a/gnu/usr.bin/grep/Makefile +++ b/gnu/usr.bin/grep/Makefile @@ -1,16 +1,26 @@ -# $OpenBSD: Makefile,v 1.3 1997/08/06 23:44:10 grr Exp $ +# $OpenBSD: Makefile,v 1.4 2000/03/09 00:08:07 mickey Exp $ # $NetBSD: Makefile,v 1.6 1995/04/23 07:58:41 cgd Exp $ PROG= grep -SRCS= dfa.c grep.c getopt.c kwset.c obstack.c regex.c search.c -CFLAGS+=-DGREP -DHAVE_STRING_H=1 -DHAVE_SYS_PARAM_H=1 -DHAVE_UNISTD_H=1 \ - -DHAVE_GETPAGESIZE=1 -DHAVE_MEMCHR=1 -DHAVE_STRERROR=1 -DMULT=2 +SRCS= dfa.c grep.c getopt.c getopt1.c kwset.c obstack.c regex.c savedir.c \ + search.c stpcpy.c +CFLAGS+=-I${.CURDIR} -DVERSION=\"2.4.1\" -DPACKAGE=\"grep\" -DGREP \ + -DSTDC_HEADERS -DHAVE_MEMCHR -DHAVE_MEMMOVE -DHAVE_MEMCPY \ + -DHAVE_DIRENT_H -DHAVE_UNISTD_H -DHAVE_STRERROR \ + -DHAVE_SETMODE -DHAVE_GETPAGESIZE -DHAVE_MMAP LINKS+= ${BINDIR}/grep ${BINDIR}/egrep \ ${BINDIR}/grep ${BINDIR}/fgrep MLINKS= grep.1 egrep.1 grep.1 fgrep.1 + check: all - sh ${.CURDIR}/tests/check.sh ${.CURDIR}/tests + GREP=./grep sh ${.CURDIR}/tests/bre.sh ${.CURDIR}/tests + #GREP=./grep sh ${.CURDIR}/tests/ere.sh ${.CURDIR}/tests + GREP=./grep sh ${.CURDIR}/tests/khadafy.sh ${.CURDIR}/tests + GREP=./grep sh ${.CURDIR}/tests/options.sh ${.CURDIR}/tests + GREP=./grep sh ${.CURDIR}/tests/spencer1.sh ${.CURDIR}/tests + GREP=./grep sh ${.CURDIR}/tests/status.sh ${.CURDIR}/tests + GREP=./grep sh ${.CURDIR}/tests/warning.sh ${.CURDIR}/tests .include diff --git a/gnu/usr.bin/grep/NEWS b/gnu/usr.bin/grep/NEWS index eb0b513d6cc..e763647db3f 100644 --- a/gnu/usr.bin/grep/NEWS +++ b/gnu/usr.bin/grep/NEWS @@ -1,3 +1,131 @@ +Version 2.4.1 + + - If the final byte of an input file is not a newline, grep now silently + supplies one. + + - The new option --binary-files=TYPE makes grep assume that a binary input + file is of type TYPE. + --binary-files='binary' (the default) outputs a 1-line summary of matches. + --binary-files='without-match' assumes binary files do not match. + --binary-files='text' treats binary files as text + (equivalent to the -a or --text option). + + - New option -I; equivalent to --binary-files='without-match'. + +Version 2.4: + + - egrep is now equivalent to `grep -E' as required by POSIX, + removing a longstanding source of confusion and incompatibility. + `grep' is now more forgiving about stray `{'s, for backward + compatibility with traditional egrep. + + - The lower bound of an interval is not optional. + You must use an explicit zero, e.g. `x{0,10}' instead of `x{,10}'. + (The old documentation incorrectly claimed that it was optional.) + + - The --revert-match option has been renamed to --invert-match. + + - The --fixed-regexp option has been renamed to --fixed-string. + + - New option -H or --with-filename. + + - New option --mmap. By default, GNU grep now uses read instead of mmap. + This is faster on some hosts, and is safer on all. + + - The new option -z or --null-data causes `grep' to treat a zero byte + (the ASCII NUL character) as a line terminator in input data, and + to treat newlines as ordinary data. + + - The new option -Z or --null causes `grep' to output a zero byte + instead of the normal separator after a file name. + + - These two options can be used with commands like `find -print0', + `perl -0', `sort -z', and `xargs -0' to process arbitrary file names, + even those that contain newlines. + + - The environment variable GREP_OPTIONS specifies default options; + e.g. GREP_OPTIONS='--directories=skip' reestablishes grep 2.1's + behavior of silently skipping directories. + + - You can specify a matcher multiple times without error, e.g. + `grep -E -E' or `fgrep -F'. It is still an error to specify + conflicting matchers. + + - -u and -U are now allowed on non-DOS hosts, and have no effect. + + - Modifications of the tests scripts to go around the "Broken Pipe" + errors from bash. See Bash FAQ. + + - New option -r or --recursive or --directories=recurse. + (This option was also in grep 2.3, but wasn't announced here.) + + - --without-included-regex disable, was causing bogus reports .i.e + doing more harm then good. + +Version 2.3: + + - When searching a binary file FOO, grep now just reports + `Binary file FOO matches' instead of outputting binary data. + This is typically more useful than the old behavior, + and it is also more consistent with other utilities like `diff'. + A file is considered to be binary if it contains a NUL (i.e. zero) byte. + + The new -a or --text option causes `grep' to assume that all + input is text. (This option has the same meaning as with `diff'.) + Use it if you want binary data in your output. + + - `grep' now searches directories just like ordinary files; it no longer + silently skips directories. This is the traditional behavior of + Unix text utilities (in particular, of traditional `grep'). + Hence `grep PATTERN DIRECTORY' should report + `grep: DIRECTORY: Is a directory' on hosts where the operating system + does not permit programs to read directories directly, and + `grep: DIRECTORY: Binary file matches' (or nothing) otherwise. + + The new -d ACTION or --directories=ACTION option affects directory handling. + `-d skip' causes `grep' to silently skip directories, as in grep 2.1; + `-d read' (the default) causes `grep' to read directories if possible, + as in earlier versions of grep. + + - The MS-DOS and Microsoft Windows ports now behave identically to the + GNU and Unix ports with respect to binary files and directories. + +Version 2.2: + +Bug fix release. + + - Status error number fix. + - Skipping directories removed. + - Many typos fix. + - -f /dev/null fix(not to consider as an empty pattern). + - Checks for wctype/wchar. + - -E was using the wrong matcher fix. + - bug in regex char class fix + - Fixes for DJGPP + +Version 2.1: + +This is a bug fix release(see Changelog) i.e. no new features. + + - More compliance to GNU standard. + - Long options. + - Internationalisation. + - Use automake/autoconf. + - Directory hierarchy change. + - Sigvec with -e on Linux corrected. + - Sigvec with -f on Linux corrected. + - Sigvec with the mmap() corrected. + - Bug in kwset corrected. + - -q, -L and -l stop on first match. + - New and improve regex.[ch] from Ulrich Drepper. + - New and improve dfa.[ch] from Arnold Robbins. + - Prototypes for over zealous C compiler. + - Not scanning a file, if it's a directory + (cause problems on Sun). + - Ported to MS-DOS/MS-Windows with DJGPP tools. + +See Changelog for the full story and proper credits. + Version 2.0: The most important user visible change is that egrep and fgrep have diff --git a/gnu/usr.bin/grep/PROJECTS b/gnu/usr.bin/grep/PROJECTS deleted file mode 100644 index 67e9a2aad6a..00000000000 --- a/gnu/usr.bin/grep/PROJECTS +++ /dev/null @@ -1,15 +0,0 @@ -Write Texinfo documentation for grep. The manual page would be a good -place to start, but Info documents are also supposed to contain a -tutorial and examples. - -Fix the DFA matcher to never use exponential space. (Fortunately, these -cases are rare.) - -Improve the performance of the regex backtracking matcher. This matcher -is agonizingly slow, and is responsible for grep sometimes being slower -than Unix grep when backreferences are used. - -Provide support for the Posix [= =] and [. .] constructs. This is -difficult because it requires locale-dependent details of the character -set and collating sequence, but Posix does not standardize any method -for accessing this information! diff --git a/gnu/usr.bin/grep/README b/gnu/usr.bin/grep/README index bc34a859063..66c1bb2ba78 100644 --- a/gnu/usr.bin/grep/README +++ b/gnu/usr.bin/grep/README @@ -1,6 +1,6 @@ -This is GNU grep 2.0, the "fastest grep in the west" (we hope). All +This is GNU grep, the "fastest grep in the west" (we hope). All bugs reported in previous releases have been fixed. Many exciting new -bugs have probably been introduced in this major revision. +bugs have probably been introduced in this revision. GNU grep is provided "as is" with no warranty. The exact terms under which you may use and (re)distribute this program are detailed @@ -14,15 +14,13 @@ look at every character. The result is typically many times faster than Unix grep or egrep. (Regular expressions containing backreferencing will run more slowly, however.) -See the file AUTHORS for a list of authors and other contributors. +See the files AUTHORS and THANKS for a list of authors and other contributors. See the file INSTALL for compilation and installation instructions. -See the file MANIFEST for a list of files in this distribution. - See the file NEWS for a description of major changes in this release. -See the file PROJECTS if you want to be mentioned in AUTHORS. +See the file TODO for ideas on how you could help us improve grep. -Send bug reports to bug-gnu-utils@prep.ai.mit.edu. Be sure to +Send bug reports to bug-gnu-utils@gnu.org. Be sure to include the word "grep" in your Subject: header field. diff --git a/gnu/usr.bin/grep/THANKS b/gnu/usr.bin/grep/THANKS new file mode 100644 index 00000000000..40295fc8d7e --- /dev/null +++ b/gnu/usr.bin/grep/THANKS @@ -0,0 +1,50 @@ +Aharon Robbins +Akim Demaille +Alain Magloire +Andreas Schwab +Andreas Ley +Ben Elliston +David J MacKenzie +David O'Brien +Eli Zaretskii +Florian La Roche +Franc,ois Pinard +Grant McDorman +Harald Hanche-Olsen +Jeff Bailey +Jim Hand +Jim Meyering +Jochen Hein +Joel N. Weber II +John Hughes +Jorge Stolfi +Karl Berry +Karl Heuer +Kaveh R. Ghazi +Kazuro Furukawa +Keith Bostic +Krishna Sethuraman +Mark Waite +Martin P.J. Zinser +Martin Rex +Michael Aichlmayr +Miles Bader +Olaf Kirch +Paul Eggert +Paul Kimoto +Phillip C. Brisco +Philippe Defert +Philippe De Muyter +Roland Roberts +Ruslan Ermilov +Shannon Hill +Sotiris Vassilopoulos +Stewart Levin +Sydoruk Stepan +Tom 'moof' Spindler +Tom Tromey +Ulrich Drepper +UEBAYASHI Masao +Volker Borchert +Wichert Akkerman +William Bader diff --git a/gnu/usr.bin/grep/TODO b/gnu/usr.bin/grep/TODO new file mode 100644 index 00000000000..49e65c8edd1 --- /dev/null +++ b/gnu/usr.bin/grep/TODO @@ -0,0 +1,62 @@ +Write Texinfo documentation for grep. The manual page would be a good +place to start, but Info documents are also supposed to contain a +tutorial and examples. + +Fix the DFA matcher to never use exponential space. (Fortunately, these +cases are rare.) + +Improve the performance of the regex backtracking matcher. This matcher +is agonizingly slow, and is responsible for grep sometimes being slower +than Unix grep when backreferences are used. + +Provide support for the Posix [= =] and [. .] constructs. This is +difficult because it requires locale-dependent details of the character +set and collating sequence, but Posix does not standardize any method +for accessing this information! + +## +Provide some sort of Hilight ... hmm Not. + +DONE: + Have different binaries for fgrep, egrep and grep. + It needs a complete rewrite of the main and how the matcher + is call; it should not depend on the name of the program. + +DONE: + Port to Win NT/95 see Delorie or cygnus win32 project + +DONE: + Merge all the modifs that are scattering around in the + various Linux distribution. + +Some test in tests/spencer2.tests should have failed !!! +Need to filter out some bugs in dfa.[ch]/regex.[ch]. + +Threads for grep ? + +Grep does 32 bits arithmetic, it needs to move to 64. + +Clean up, to many #ifdef's !! + +DONE: + Merge the work done By Paul Eggert + (--text, -directory=ACTION, large files). + +Check some new Algorithms for matching, talk to Karl Berry and Nelson. +Sunday's "Quick Search" Algorithm (CACM 33, 8 August 1990 pp. 132-142) +claim that his algo. is faster then Boyer-More ???? +Worth Checking. + +Take a look at cgrep (Context grep) seems like nice work. +Take a look at sgrep (Struct grep). +Take a look at agrep (Approximate grep), from glimpse. +Can we merge ? + +POSIX Compliance see p10003.x + +Moving away from GNU regex API for POSIX regex API. + +DONE(well never really finish): + Finish I18N. + +Better and faster !! diff --git a/gnu/usr.bin/grep/dfa.c b/gnu/usr.bin/grep/dfa.c index 38fe1e1caa7..048e901c5e8 100644 --- a/gnu/usr.bin/grep/dfa.c +++ b/gnu/usr.bin/grep/dfa.c @@ -1,5 +1,5 @@ /* dfa.c - deterministic extended regexp routines for GNU - Copyright (C) 1988 Free Software Foundation, Inc. + Copyright 1988, 1998, 2000 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,23 +13,23 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifndef lint -static char rcsid[] = "$Id: dfa.c,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $"; -#endif /* not lint */ + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ /* Written June, 1988 by Mike Haertel Modified July, 1988 by Arthur David Olson to assist BMG speedups */ +#ifdef HAVE_CONFIG_H +#include +#endif + #include #include #include +#include #ifdef STDC_HEADERS #include #else -#include extern char *calloc(), *malloc(), *realloc(); extern void free(); #endif @@ -42,23 +42,16 @@ extern void free(); #include #endif +#ifndef DEBUG /* use the same approach as regex.c */ +#undef assert +#define assert(e) +#endif /* DEBUG */ + #ifndef isgraph #define isgraph(C) (isprint(C) && !isspace(C)) #endif -#ifdef isascii -#define ISALPHA(C) (isascii(C) && isalpha(C)) -#define ISUPPER(C) (isascii(C) && isupper(C)) -#define ISLOWER(C) (isascii(C) && islower(C)) -#define ISDIGIT(C) (isascii(C) && isdigit(C)) -#define ISXDIGIT(C) (isascii(C) && isxdigit(C)) -#define ISSPACE(C) (isascii(C) && isspace(C)) -#define ISPUNCT(C) (isascii(C) && ispunct(C)) -#define ISALNUM(C) (isascii(C) && isalnum(C)) -#define ISPRINT(C) (isascii(C) && isprint(C)) -#define ISGRAPH(C) (isascii(C) && isgraph(C)) -#define ISCNTRL(C) (isascii(C) && iscntrl(C)) -#else +#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) #define ISALPHA(C) isalpha(C) #define ISUPPER(C) isupper(C) #define ISLOWER(C) islower(C) @@ -70,57 +63,130 @@ extern void free(); #define ISPRINT(C) isprint(C) #define ISGRAPH(C) isgraph(C) #define ISCNTRL(C) iscntrl(C) +#else +#define ISALPHA(C) (isascii(C) && isalpha(C)) +#define ISUPPER(C) (isascii(C) && isupper(C)) +#define ISLOWER(C) (isascii(C) && islower(C)) +#define ISDIGIT(C) (isascii(C) && isdigit(C)) +#define ISXDIGIT(C) (isascii(C) && isxdigit(C)) +#define ISSPACE(C) (isascii(C) && isspace(C)) +#define ISPUNCT(C) (isascii(C) && ispunct(C)) +#define ISALNUM(C) (isascii(C) && isalnum(C)) +#define ISPRINT(C) (isascii(C) && isprint(C)) +#define ISGRAPH(C) (isascii(C) && isgraph(C)) +#define ISCNTRL(C) (isascii(C) && iscntrl(C)) +#endif + +/* ISASCIIDIGIT differs from ISDIGIT, as follows: + - Its arg may be any int or unsigned int; it need not be an unsigned char. + - It's guaranteed to evaluate its argument exactly once. + - It's typically faster. + Posix 1003.2-1992 section 2.5.2.1 page 50 lines 1556-1558 says that + only '0' through '9' are digits. Prefer ISASCIIDIGIT to ISDIGIT unless + it's important to use the locale's definition of `digit' even when the + host does not conform to Posix. */ +#define ISASCIIDIGIT(c) ((unsigned) (c) - '0' <= 9) + +/* If we (don't) have I18N. */ +/* glibc defines _ */ +#ifndef _ +# ifdef HAVE_LIBINTL_H +# include +# ifndef _ +# define _(Str) gettext (Str) +# endif +# else +# define _(Str) (Str) +# endif #endif -#include "dfa.h" #include "regex.h" +#include "dfa.h" -#if __STDC__ -typedef void *ptr_t; -#else -typedef char *ptr_t; +/* HPUX, define those as macros in sys/param.h */ +#ifdef setbit +# undef setbit +#endif +#ifdef clrbit +# undef clrbit #endif -static void dfamust(); +static void dfamust PARAMS ((struct dfa *dfa)); + +static ptr_t xcalloc PARAMS ((size_t n, size_t s)); +static ptr_t xmalloc PARAMS ((size_t n)); +static ptr_t xrealloc PARAMS ((ptr_t p, size_t n)); +#ifdef DEBUG +static void prtok PARAMS ((token t)); +#endif +static int tstbit PARAMS ((int b, charclass c)); +static void setbit PARAMS ((int b, charclass c)); +static void clrbit PARAMS ((int b, charclass c)); +static void copyset PARAMS ((charclass src, charclass dst)); +static void zeroset PARAMS ((charclass s)); +static void notset PARAMS ((charclass s)); +static int equal PARAMS ((charclass s1, charclass s2)); +static int charclass_index PARAMS ((charclass s)); +static int looking_at PARAMS ((const char *s)); +static token lex PARAMS ((void)); +static void addtok PARAMS ((token t)); +static void atom PARAMS ((void)); +static int nsubtoks PARAMS ((int tindex)); +static void copytoks PARAMS ((int tindex, int ntokens)); +static void closure PARAMS ((void)); +static void branch PARAMS ((void)); +static void regexp PARAMS ((int toplevel)); +static void copy PARAMS ((position_set *src, position_set *dst)); +static void insert PARAMS ((position p, position_set *s)); +static void merge PARAMS ((position_set *s1, position_set *s2, position_set *m)); +static void delete PARAMS ((position p, position_set *s)); +static int state_index PARAMS ((struct dfa *d, position_set *s, + int newline, int letter)); +static void build_state PARAMS ((int s, struct dfa *d)); +static void build_state_zero PARAMS ((struct dfa *d)); +static char *icatalloc PARAMS ((char *old, char *new)); +static char *icpyalloc PARAMS ((char *string)); +static char *istrstr PARAMS ((char *lookin, char *lookfor)); +static void ifree PARAMS ((char *cp)); +static void freelist PARAMS ((char **cpp)); +static char **enlist PARAMS ((char **cpp, char *new, size_t len)); +static char **comsubs PARAMS ((char *left, char *right)); +static char **addlists PARAMS ((char **old, char **new)); +static char **inboth PARAMS ((char **left, char **right)); static ptr_t -xcalloc(n, s) - int n; - size_t s; +xcalloc (size_t n, size_t s) { ptr_t r = calloc(n, s); if (!r) - dfaerror("Memory exhausted"); + dfaerror(_("Memory exhausted")); return r; } static ptr_t -xmalloc(n) - size_t n; +xmalloc (size_t n) { ptr_t r = malloc(n); assert(n != 0); if (!r) - dfaerror("Memory exhausted"); + dfaerror(_("Memory exhausted")); return r; } static ptr_t -xrealloc(p, n) - ptr_t p; - size_t n; +xrealloc (ptr_t p, size_t n) { ptr_t r = realloc(p, n); assert(n != 0); if (!r) - dfaerror("Memory exhausted"); + dfaerror(_("Memory exhausted")); return r; } -#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t))) +#define CALLOC(p, t, n) ((p) = (t *) xcalloc((size_t)(n), sizeof (t))) #define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t))) #define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t))) @@ -136,8 +202,7 @@ xrealloc(p, n) #ifdef DEBUG static void -prtok(t) - token t; +prtok (token t) { char *s; @@ -175,33 +240,25 @@ prtok(t) /* Stuff pertaining to charclasses. */ static int -tstbit(b, c) - int b; - charclass c; +tstbit (int b, charclass c) { return c[b / INTBITS] & 1 << b % INTBITS; } static void -setbit(b, c) - int b; - charclass c; +setbit (int b, charclass c) { c[b / INTBITS] |= 1 << b % INTBITS; } static void -clrbit(b, c) - int b; - charclass c; +clrbit (int b, charclass c) { c[b / INTBITS] &= ~(1 << b % INTBITS); } static void -copyset(src, dst) - charclass src; - charclass dst; +copyset (charclass src, charclass dst) { int i; @@ -210,8 +267,7 @@ copyset(src, dst) } static void -zeroset(s) - charclass s; +zeroset (charclass s) { int i; @@ -220,8 +276,7 @@ zeroset(s) } static void -notset(s) - charclass s; +notset (charclass s) { int i; @@ -230,9 +285,7 @@ notset(s) } static int -equal(s1, s2) - charclass s1; - charclass s2; +equal (charclass s1, charclass s2) { int i; @@ -247,8 +300,7 @@ static struct dfa *dfa; /* Find the index of charclass s in dfa->charclasses, or allocate a new charclass. */ static int -charclass_index(s) - charclass s; +charclass_index (charclass s) { int i; @@ -262,20 +314,22 @@ charclass_index(s) } /* Syntax bits controlling the behavior of the lexical analyzer. */ -static int syntax_bits, syntax_bits_set; +static reg_syntax_t syntax_bits, syntax_bits_set; /* Flag for case-folding letters into sets. */ static int case_fold; +/* End-of-line byte in data. */ +static unsigned char eolbyte; + /* Entry point to set syntax options. */ void -dfasyntax(bits, fold) - int bits; - int fold; +dfasyntax (reg_syntax_t bits, int fold, int eol) { syntax_bits_set = 1; syntax_bits = bits; case_fold = fold; + eolbyte = eol; } /* Lexical analyzer. All the dross that deals with the obnoxious @@ -285,7 +339,7 @@ dfasyntax(bits, fold) static char *lexstart; /* Pointer to beginning of input string. */ static char *lexptr; /* Pointer to next input character. */ -static lexleft; /* Number of characters remaining. */ +static int lexleft; /* Number of characters remaining. */ static token lasttok; /* Previous token returned; initially END. */ static int laststart; /* True if we're separated from beginning or (, | only by zero-width characters. */ @@ -296,15 +350,21 @@ static int minrep, maxrep; /* Repeat counts for {m,n}. */ #define FETCH(c, eoferr) \ { \ if (! lexleft) \ - if (eoferr != 0) \ - dfaerror(eoferr); \ - else \ - return END; \ + { \ + if (eoferr != 0) \ + dfaerror (eoferr); \ + else \ + return lasttok = END; \ + } \ (c) = (unsigned char) *lexptr++; \ --lexleft; \ } +#ifdef __STDC__ +#define FUNC(F, P) static int F(int c) { return P(c); } +#else #define FUNC(F, P) static int F(c) int c; { return P(c); } +#endif FUNC(is_alpha, ISALPHA) FUNC(is_upper, ISUPPER) @@ -318,32 +378,41 @@ FUNC(is_print, ISPRINT) FUNC(is_graph, ISGRAPH) FUNC(is_cntrl, ISCNTRL) +static int +is_blank (int c) +{ + return (c == ' ' || c == '\t'); +} + /* The following list maps the names of the Posix named character classes to predicate functions that determine whether a given character is in the class. The leading [ has already been eaten by the lexical analyzer. */ static struct { - char *name; - int (*pred)(); + const char *name; + int (*pred) PARAMS ((int)); } prednames[] = { - ":alpha:]", is_alpha, - ":upper:]", is_upper, - ":lower:]", is_lower, - ":digit:]", is_digit, - ":xdigit:]", is_xdigit, - ":space:]", is_space, - ":punct:]", is_punct, - ":alnum:]", is_alnum, - ":print:]", is_print, - ":graph:]", is_graph, - ":cntrl:]", is_cntrl, - 0 + { ":alpha:]", is_alpha }, + { ":upper:]", is_upper }, + { ":lower:]", is_lower }, + { ":digit:]", is_digit }, + { ":xdigit:]", is_xdigit }, + { ":space:]", is_space }, + { ":punct:]", is_punct }, + { ":alnum:]", is_alnum }, + { ":print:]", is_print }, + { ":graph:]", is_graph }, + { ":cntrl:]", is_cntrl }, + { ":blank:]", is_blank }, + { 0 } }; +/* Return non-zero if C is a `word-constituent' byte; zero otherwise. */ +#define IS_WORD_CONSTITUENT(C) (ISALNUM(C) || (C) == '_') + static int -looking_at(s) - char *s; +looking_at (char const *s) { - int len; + size_t len; len = strlen(s); if (lexleft < len) @@ -352,12 +421,14 @@ looking_at(s) } static token -lex() +lex (void) { token c, c1, c2; int backslash = 0, invert; charclass ccl; int i; + char lo[2]; + char hi[2]; /* Basic plan: We fetch a character. If it's a backslash, we set the backslash flag and go through the loop again. @@ -374,7 +445,7 @@ lex() if (backslash) goto normal_char; if (lexleft == 0) - dfaerror("Unfinished \\ escape"); + dfaerror(_("Unfinished \\ escape")); backslash = 1; break; @@ -420,23 +491,33 @@ lex() } goto normal_char; + case '`': + if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) + return lasttok = BEGLINE; /* FIXME: should be beginning of string */ + goto normal_char; + + case '\'': + if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) + return lasttok = ENDLINE; /* FIXME: should be end of string */ + goto normal_char; + case '<': - if (backslash) + if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) return lasttok = BEGWORD; goto normal_char; case '>': - if (backslash) + if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) return lasttok = ENDWORD; goto normal_char; case 'b': - if (backslash) + if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) return lasttok = LIMWORD; goto normal_char; case 'B': - if (backslash) + if (backslash && !(syntax_bits & RE_NO_GNU_OPS)) return lasttok = NOTLIMWORD; goto normal_char; @@ -470,44 +551,76 @@ lex() goto normal_char; if (backslash != ((syntax_bits & RE_NO_BK_BRACES) == 0)) goto normal_char; - minrep = maxrep = 0; + if (!(syntax_bits & RE_CONTEXT_INDEP_OPS) && laststart) + goto normal_char; + + if (syntax_bits & RE_NO_BK_BRACES) + { + /* Scan ahead for a valid interval; if it's not valid, + treat it as a literal '{'. */ + int lo = -1, hi = -1; + char const *p = lexptr; + char const *lim = p + lexleft; + for (; p != lim && ISASCIIDIGIT (*p); p++) + lo = (lo < 0 ? 0 : lo * 10) + *p - '0'; + if (p != lim && *p == ',') + while (++p != lim && ISASCIIDIGIT (*p)) + hi = (hi < 0 ? 0 : hi * 10) + *p - '0'; + else + hi = lo; + if (p == lim || *p != '}' + || lo < 0 || RE_DUP_MAX < hi || (0 <= hi && hi < lo)) + goto normal_char; + } + + minrep = 0; /* Cases: {M} - exact count {M,} - minimum count, maximum is infinity - {,M} - 0 through M {M,N} - M through N */ - FETCH(c, "unfinished repeat count"); - if (ISDIGIT(c)) + FETCH(c, _("unfinished repeat count")); + if (ISASCIIDIGIT (c)) { minrep = c - '0'; for (;;) { - FETCH(c, "unfinished repeat count"); - if (!ISDIGIT(c)) + FETCH(c, _("unfinished repeat count")); + if (! ISASCIIDIGIT (c)) break; minrep = 10 * minrep + c - '0'; } } - else if (c != ',') - dfaerror("malformed repeat count"); + else + dfaerror(_("malformed repeat count")); if (c == ',') - for (;;) - { - FETCH(c, "unfinished repeat count"); - if (!ISDIGIT(c)) - break; - maxrep = 10 * maxrep + c - '0'; - } + { + FETCH (c, _("unfinished repeat count")); + if (! ISASCIIDIGIT (c)) + maxrep = -1; + else + { + maxrep = c - '0'; + for (;;) + { + FETCH (c, _("unfinished repeat count")); + if (! ISASCIIDIGIT (c)) + break; + maxrep = 10 * maxrep + c - '0'; + } + if (0 <= maxrep && maxrep < minrep) + dfaerror (_("malformed repeat count")); + } + } else maxrep = minrep; if (!(syntax_bits & RE_NO_BK_BRACES)) { if (c != '\\') - dfaerror("malformed repeat count"); - FETCH(c, "unfinished repeat count"); + dfaerror(_("malformed repeat count")); + FETCH(c, _("unfinished repeat count")); } if (c != '}') - dfaerror("malformed repeat count"); + dfaerror(_("malformed repeat count")); laststart = 0; return lasttok = REPMN; @@ -549,7 +662,7 @@ lex() zeroset(ccl); notset(ccl); if (!(syntax_bits & RE_DOT_NEWLINE)) - clrbit('\n', ccl); + clrbit(eolbyte, ccl); if (syntax_bits & RE_DOT_NOT_NULL) clrbit('\0', ccl); laststart = 0; @@ -557,25 +670,25 @@ lex() case 'w': case 'W': - if (!backslash) + if (!backslash || (syntax_bits & RE_NO_GNU_OPS)) goto normal_char; zeroset(ccl); for (c2 = 0; c2 < NOTCHAR; ++c2) - if (ISALNUM(c2)) + if (IS_WORD_CONSTITUENT(c2)) setbit(c2, ccl); if (c == 'W') notset(ccl); laststart = 0; return lasttok = CSET + charclass_index(ccl); - + case '[': if (backslash) goto normal_char; zeroset(ccl); - FETCH(c, "Unbalanced ["); + FETCH(c, _("Unbalanced [")); if (c == '^') { - FETCH(c, "Unbalanced ["); + FETCH(c, _("Unbalanced [")); invert = 1; } else @@ -592,20 +705,25 @@ lex() for (c1 = 0; prednames[c1].name; ++c1) if (looking_at(prednames[c1].name)) { + int (*pred)() = prednames[c1].pred; + if (case_fold + && (pred == is_upper || pred == is_lower)) + pred = is_alpha; + for (c2 = 0; c2 < NOTCHAR; ++c2) - if ((*prednames[c1].pred)(c2)) + if ((*pred)(c2)) setbit(c2, ccl); lexptr += strlen(prednames[c1].name); lexleft -= strlen(prednames[c1].name); - FETCH(c1, "Unbalanced ["); + FETCH(c1, _("Unbalanced [")); goto skip; } if (c == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH(c, "Unbalanced ["); - FETCH(c1, "Unbalanced ["); + FETCH(c, _("Unbalanced [")); + FETCH(c1, _("Unbalanced [")); if (c1 == '-') { - FETCH(c2, "Unbalanced ["); + FETCH(c2, _("Unbalanced [")); if (c2 == ']') { /* In the case [x-], the - is an ordinary hyphen, @@ -618,22 +736,32 @@ lex() { if (c2 == '\\' && (syntax_bits & RE_BACKSLASH_ESCAPE_IN_LISTS)) - FETCH(c2, "Unbalanced ["); - FETCH(c1, "Unbalanced ["); + FETCH(c2, _("Unbalanced [")); + FETCH(c1, _("Unbalanced [")); } } else c2 = c; - while (c <= c2) + + lo[0] = c; lo[1] = '\0'; + hi[0] = c2; hi[1] = '\0'; + for (c = 0; c < NOTCHAR; c++) { - setbit(c, ccl); - if (case_fold) - if (ISUPPER(c)) - setbit(tolower(c), ccl); - else if (ISLOWER(c)) - setbit(toupper(c), ccl); - ++c; + char ch[2]; + ch[0] = c; ch[1] = '\0'; + if (strcoll (lo, ch) <= 0 && strcoll (ch, hi) <= 0) + { + setbit (c, ccl); + if (case_fold) + { + if (ISUPPER (c)) + setbit (tolower (c), ccl); + else if (ISLOWER (c)) + setbit (toupper (c), ccl); + } + } } + skip: ; } @@ -642,7 +770,7 @@ lex() { notset(ccl); if (syntax_bits & RE_HAT_LISTS_NOT_NEWLINE) - clrbit('\n', ccl); + clrbit(eolbyte, ccl); } laststart = 0; return lasttok = CSET + charclass_index(ccl); @@ -667,12 +795,13 @@ lex() /* The above loop should consume at most a backslash and some other character. */ abort(); + return END; /* keeps pedantic compilers happy. */ } /* Recursive descent parser for regular expressions. */ static token tok; /* Lookahead token. */ -static depth; /* Current depth of a hypothetical stack +static int depth; /* Current depth of a hypothetical stack holding deferred productions. This is used to determine the depth that will be required of the real stack later on in @@ -681,8 +810,7 @@ static depth; /* Current depth of a hypothetical stack /* Add the given token to the parse tree, maintaining the depth count and updating the maximum depth if necessary. */ static void -addtok(t) - token t; +addtok (token t) { REALLOC_IF_NECESSARY(dfa->tokens, token, dfa->talloc, dfa->tindex); dfa->tokens[dfa->tindex++] = t; @@ -740,14 +868,8 @@ addtok(t) The parser builds a parse tree in postfix form in an array of tokens. */ -#if __STDC__ -static void regexp(int); -#else -static void regexp(); -#endif - static void -atom() +atom (void) { if ((tok >= 0 && tok < NOTCHAR) || tok >= CSET || tok == BACKREF || tok == BEGLINE || tok == ENDLINE || tok == BEGWORD @@ -761,7 +883,7 @@ atom() tok = lex(); regexp(0); if (tok != RPAREN) - dfaerror("Unbalanced ("); + dfaerror(_("Unbalanced (")); tok = lex(); } else @@ -770,7 +892,7 @@ atom() /* Return the number of tokens in the given subexpression. */ static int -nsubtoks(tindex) +nsubtoks (int tindex) { int ntoks1; @@ -792,8 +914,7 @@ nsubtoks(tindex) /* Copy the given subexpression to the top of the tree. */ static void -copytoks(tindex, ntokens) - int tindex, ntokens; +copytoks (int tindex, int ntokens) { int i; @@ -802,7 +923,7 @@ copytoks(tindex, ntokens) } static void -closure() +closure (void) { int tindex, ntokens, i; @@ -812,7 +933,7 @@ closure() { ntokens = nsubtoks(dfa->tindex); tindex = dfa->tindex - ntokens; - if (maxrep == 0) + if (maxrep < 0) addtok(PLUS); if (minrep == 0) addtok(QMARK); @@ -837,7 +958,7 @@ closure() } static void -branch() +branch (void) { closure(); while (tok != RPAREN && tok != OR && tok >= 0) @@ -848,8 +969,7 @@ branch() } static void -regexp(toplevel) - int toplevel; +regexp (int toplevel) { branch(); while (tok == OR) @@ -867,11 +987,7 @@ regexp(toplevel) length of the string, so s can include NUL characters. D is a pointer to the struct dfa to parse into. */ void -dfaparse(s, len, d) - char *s; - size_t len; - struct dfa *d; - +dfaparse (char *s, size_t len, struct dfa *d) { dfa = d; lexstart = lexptr = s; @@ -881,7 +997,7 @@ dfaparse(s, len, d) parens = 0; if (! syntax_bits_set) - dfaerror("No syntax specified"); + dfaerror(_("No syntax specified")); tok = lex(); depth = d->depth; @@ -889,7 +1005,7 @@ dfaparse(s, len, d) regexp(1); if (tok != END) - dfaerror("Unbalanced )"); + dfaerror(_("Unbalanced )")); addtok(END - d->nregexps); addtok(CAT); @@ -904,9 +1020,7 @@ dfaparse(s, len, d) /* Copy one set to another; the destination must be large enough. */ static void -copy(src, dst) - position_set *src; - position_set *dst; +copy (position_set *src, position_set *dst) { int i; @@ -920,15 +1034,13 @@ copy(src, dst) the same index then their constraints are logically or'd together. S->elems must point to an array large enough to hold the resulting set. */ static void -insert(p, s) - position p; - position_set *s; +insert (position p, position_set *s) { int i; position t1, t2; for (i = 0; i < s->nelem && p.index < s->elems[i].index; ++i) - ; + continue; if (i < s->nelem && p.index == s->elems[i].index) s->elems[i].constraint |= p.constraint; else @@ -947,10 +1059,7 @@ insert(p, s) /* Merge two sets of positions into a third. The result is exactly as if the positions of both sets were inserted into an initially empty set. */ static void -merge(s1, s2, m) - position_set *s1; - position_set *s2; - position_set *m; +merge (position_set *s1, position_set *s2, position_set *m) { int i = 0, j = 0; @@ -973,9 +1082,7 @@ merge(s1, s2, m) /* Delete a position from a set. */ static void -delete(p, s) - position p; - position_set *s; +delete (position p, position_set *s) { int i; @@ -992,11 +1099,7 @@ delete(p, s) state. Newline and letter tell whether we got here on a newline or letter, respectively. */ static int -state_index(d, s, newline, letter) - struct dfa *d; - position_set *s; - int newline; - int letter; +state_index (struct dfa *d, position_set *s, int newline, int letter) { int hash = 0; int constraint; @@ -1061,10 +1164,8 @@ state_index(d, s, newline, letter) that position with the elements of its follow labeled with an appropriate constraint. Repeat exhaustively until no funny positions are left. S->elems must be large enough to hold the result. */ -void -epsclosure(s, d) - position_set *s; - struct dfa *d; +static void +epsclosure (position_set *s, struct dfa *d) { int i, j; int *visited; @@ -1176,9 +1277,7 @@ epsclosure(s, d) scheme; the number of elements in each set deeper in the stack can be used to determine the address of a particular set's array. */ void -dfaanalyze(d, searchflag) - struct dfa *d; - int searchflag; +dfaanalyze (struct dfa *d, int searchflag) { int *nullable; /* Nullable stack. */ int *nfirstpos; /* Element count stack for firstpos sets. */ @@ -1439,10 +1538,7 @@ dfaanalyze(d, searchflag) create a new group labeled with the characters of C and insert this position in that group. */ void -dfastate(s, d, trans) - int s; - struct dfa *d; - int trans[]; +dfastate (int s, struct dfa *d, int trans[]) { position_set grps[NOTCHAR]; /* As many as will ever be needed. */ charclass labels[NOTCHAR]; /* Labels corresponding to the groups. */ @@ -1463,7 +1559,7 @@ dfastate(s, d, trans) int state_newline; /* New state on a newline transition. */ int wants_letter; /* New state wants to know letter context. */ int state_letter; /* New state on a letter transition. */ - static initialized; /* Flag for static initialization. */ + static int initialized; /* Flag for static initialization. */ int i, j, k; /* Initialize the set of letters, if necessary. */ @@ -1471,9 +1567,9 @@ dfastate(s, d, trans) { initialized = 1; for (i = 0; i < NOTCHAR; ++i) - if (ISALNUM(i)) + if (IS_WORD_CONSTITUENT(i)) setbit(i, letters); - setbit('\n', newline); + setbit(eolbyte, newline); } zeroset(matches); @@ -1494,7 +1590,7 @@ dfastate(s, d, trans) { if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, d->states[s].newline, 1)) - clrbit('\n', matches); + clrbit(eolbyte, matches); if (! MATCHES_NEWLINE_CONTEXT(pos.constraint, d->states[s].newline, 0)) for (j = 0; j < CHARCLASS_INTS; ++j) @@ -1510,7 +1606,7 @@ dfastate(s, d, trans) /* If there are no characters left, there's no point in going on. */ for (j = 0; j < CHARCLASS_INTS && !matches[j]; ++j) - ; + continue; if (j == CHARCLASS_INTS) continue; } @@ -1528,7 +1624,7 @@ dfastate(s, d, trans) matches. */ intersectf = 0; for (k = 0; k < CHARCLASS_INTS; ++k) - (intersect[k] = matches[k] & labels[j][k]) ? intersectf = 1 : 0; + (intersect[k] = matches[k] & labels[j][k]) ? (intersectf = 1) : 0; if (! intersectf) continue; @@ -1539,8 +1635,8 @@ dfastate(s, d, trans) /* Even an optimizing compiler can't know this for sure. */ int match = matches[k], label = labels[j][k]; - (leftovers[k] = ~match & label) ? leftoversf = 1 : 0; - (matches[k] = match & ~label) ? matchesf = 1 : 0; + (leftovers[k] = ~match & label) ? (leftoversf = 1) : 0; + (matches[k] = match & ~label) ? (matchesf = 1) : 0; } /* If there were leftovers, create a new group labeled with them. */ @@ -1604,12 +1700,8 @@ dfastate(s, d, trans) else state_letter = state; for (i = 0; i < NOTCHAR; ++i) - if (i == '\n') - trans[i] = state_newline; - else if (ISALNUM(i)) - trans[i] = state_letter; - else - trans[i] = state; + trans[i] = (IS_WORD_CONSTITUENT(i)) ? state_letter : state; + trans[eolbyte] = state_newline; } else for (i = 0; i < NOTCHAR; ++i) @@ -1633,7 +1725,7 @@ dfastate(s, d, trans) /* Find out if the new state will want any context information. */ wants_newline = 0; - if (tstbit('\n', labels[i])) + if (tstbit(eolbyte, labels[i])) for (j = 0; j < follows.nelem; ++j) if (PREV_NEWLINE_DEPENDENT(follows.elems[j].constraint)) wants_newline = 1; @@ -1665,9 +1757,9 @@ dfastate(s, d, trans) { int c = j * INTBITS + k; - if (c == '\n') + if (c == eolbyte) trans[c] = state_newline; - else if (ISALNUM(c)) + else if (IS_WORD_CONSTITUENT(c)) trans[c] = state_letter; else if (c < NOTCHAR) trans[c] = state; @@ -1688,9 +1780,7 @@ dfastate(s, d, trans) TODO: Improve this comment, get rid of the unnecessary redundancy. */ static void -build_state(s, d) - int s; - struct dfa *d; +build_state (int s, struct dfa *d) { int *trans; /* The new transition table. */ int i; @@ -1756,8 +1846,8 @@ build_state(s, d) /* Keep the newline transition in a special place so we can use it as a sentinel. */ - d->newlines[s] = trans['\n']; - trans['\n'] = -1; + d->newlines[s] = trans[eolbyte]; + trans[eolbyte] = -1; if (ACCEPTING(s, *d)) d->fails[s] = trans; @@ -1766,8 +1856,7 @@ build_state(s, d) } static void -build_state_zero(d) - struct dfa *d; +build_state_zero (struct dfa *d) { d->tralloc = 1; d->trcount = 0; @@ -1793,20 +1882,16 @@ build_state_zero(d) match needs to be verified by a backtracking matcher. Otherwise we store a 0 in *backref. */ char * -dfaexec(d, begin, end, newline, count, backref) - struct dfa *d; - char *begin; - char *end; - int newline; - int *count; - int *backref; +dfaexec (struct dfa *d, char *begin, char *end, + int newline, int *count, int *backref) { - register s, s1, tmp; /* Current state. */ + register int s, s1, tmp; /* Current state. */ register unsigned char *p; /* Current input character. */ - register **trans, *t; /* Copy of d->trans so it can be optimized + register int **trans, *t; /* Copy of d->trans so it can be optimized into a register. */ - static sbit[NOTCHAR]; /* Table for anding with d->success. */ - static sbit_init; + register unsigned char eol = eolbyte; /* Likewise for eolbyte. */ + static int sbit[NOTCHAR]; /* Table for anding with d->success. */ + static int sbit_init; if (! sbit_init) { @@ -1814,12 +1899,8 @@ dfaexec(d, begin, end, newline, count, backref) sbit_init = 1; for (i = 0; i < NOTCHAR; ++i) - if (i == '\n') - sbit[i] = 4; - else if (ISALNUM(i)) - sbit[i] = 2; - else - sbit[i] = 1; + sbit[i] = (IS_WORD_CONSTITUENT(i)) ? 2 : 1; + sbit[eol] = 4; } if (! d->tralloc) @@ -1828,34 +1909,25 @@ dfaexec(d, begin, end, newline, count, backref) s = s1 = 0; p = (unsigned char *) begin; trans = d->trans; - *end = '\n'; + *end = eol; for (;;) { - /* The dreaded inner loop. */ - if ((t = trans[s]) != 0) - do - { - s1 = t[*p++]; - if (! (t = trans[s1])) - goto last_was_s; - s = t[*p++]; - } - while ((t = trans[s]) != 0); - goto last_was_s1; - last_was_s: - tmp = s, s = s1, s1 = tmp; - last_was_s1: + while ((t = trans[s]) != 0) { /* hand-optimized loop */ + s1 = t[*p++]; + if ((t = trans[s1]) == 0) { + tmp = s ; s = s1 ; s1 = tmp ; /* swap */ + break; + } + s = t[*p++]; + } if (s >= 0 && p <= (unsigned char *) end && d->fails[s]) { if (d->success[s] & sbit[*p]) { if (backref) - if (d->states[s].backref) - *backref = 1; - else - *backref = 0; + *backref = (d->states[s].backref != 0); return (char *) p; } @@ -1865,7 +1937,7 @@ dfaexec(d, begin, end, newline, count, backref) } /* If the previous character was a newline, count it. */ - if (count && (char *) p <= end && p[-1] == '\n') + if (count && (char *) p <= end && p[-1] == eol) ++*count; /* Check if we've run off the end of the buffer. */ @@ -1879,7 +1951,7 @@ dfaexec(d, begin, end, newline, count, backref) continue; } - if (p[-1] == '\n' && newline) + if (p[-1] == eol && newline) { s = d->newlines[s1]; continue; @@ -1892,8 +1964,7 @@ dfaexec(d, begin, end, newline, count, backref) /* Initialize the components of a dfa that the other routines don't initialize for themselves. */ void -dfainit(d) - struct dfa *d; +dfainit (struct dfa *d) { d->calloc = 1; MALLOC(d->charclasses, charclass, d->calloc); @@ -1911,32 +1982,28 @@ dfainit(d) /* Parse and analyze a single string of the given length. */ void -dfacomp(s, len, d, searchflag) - char *s; - size_t len; - struct dfa *d; - int searchflag; +dfacomp (char *s, size_t len, struct dfa *d, int searchflag) { if (case_fold) /* dummy folding in service of dfamust() */ { - char *copy; + char *lcopy; int i; - copy = malloc(len); - if (!copy) - dfaerror("out of memory"); - + lcopy = malloc(len); + if (!lcopy) + dfaerror(_("out of memory")); + /* This is a kludge. */ case_fold = 0; for (i = 0; i < len; ++i) - if (ISUPPER(s[i])) - copy[i] = tolower(s[i]); + if (ISUPPER ((unsigned char) s[i])) + lcopy[i] = tolower ((unsigned char) s[i]); else - copy[i] = s[i]; + lcopy[i] = s[i]; dfainit(d); - dfaparse(copy, len, d); - free(copy); + dfaparse(lcopy, len, d); + free(lcopy); dfamust(d); d->cindex = d->tindex = d->depth = d->nleaves = d->nregexps = 0; case_fold = 1; @@ -1954,8 +2021,7 @@ dfacomp(s, len, d, searchflag) /* Free the storage held by the components of a dfa. */ void -dfafree(d) - struct dfa *d; +dfafree (struct dfa *d) { int i; struct dfamust *dm, *ndm; @@ -1974,9 +2040,10 @@ dfafree(d) free((ptr_t) d->trans[i]); else if (d->fails[i]) free((ptr_t) d->fails[i]); - free((ptr_t) d->realtrans); - free((ptr_t) d->fails); - free((ptr_t) d->newlines); + if (d->realtrans) free((ptr_t) d->realtrans); + if (d->fails) free((ptr_t) d->fails); + if (d->newlines) free((ptr_t) d->newlines); + if (d->success) free((ptr_t) d->success); for (dm = d->musts; dm; dm = ndm) { ndm = dm->next; @@ -2015,9 +2082,9 @@ dfafree(d) Type left right is in ---- ---- ----- -- -- char c # c # c # c # c - + CSET ZERO ZERO ZERO ZERO - + STAR ZERO ZERO ZERO ZERO QMARK ZERO ZERO ZERO ZERO @@ -2028,12 +2095,12 @@ dfafree(d) p->left : q->right : q->is!=ZERO) ? q->in plus p->is##q->left p->right##q->is p->is##q->is : p->right##q->left ZERO - + OR longest common longest common (do p->is and substrings common to leading trailing q->is have same p->in and q->in - (sub)sequence (sub)sequence length and - of p->left of p->right content) ? - and q->left and q->right p->is : NULL + (sub)sequence (sub)sequence length and + of p->left of p->right content) ? + and q->left and q->right p->is : NULL If there's anything else we recognize in the tree, all four sequences get set to zero-length sequences. If there's something we don't recognize in the tree, @@ -2060,18 +2127,16 @@ dfafree(d) Does optimization actually accomplish anything, or is the automaton you get from "psi|epsilon" (for example) the same as the one you get from "psi" (for example)? - + Are optimizable r.e.'s likely to be used in real-life situations (something like 'ab*' is probably unlikely; something like is 'psi|epsilon' is likelier)? */ static char * -icatalloc(old, new) - char *old; - char *new; +icatalloc (char *old, char *new) { char *result; - int oldsize, newsize; + size_t oldsize, newsize; newsize = (new == NULL) ? 0 : strlen(new); if (old == NULL) @@ -2089,19 +2154,16 @@ icatalloc(old, new) } static char * -icpyalloc(string) - char *string; +icpyalloc (char *string) { return icatalloc((char *) NULL, string); } static char * -istrstr(lookin, lookfor) - char *lookin; - char *lookfor; +istrstr (char *lookin, char *lookfor) { char *cp; - int len; + size_t len; len = strlen(lookfor); for (cp = lookin; *cp != '\0'; ++cp) @@ -2111,16 +2173,14 @@ istrstr(lookin, lookfor) } static void -ifree(cp) - char *cp; +ifree (char *cp) { if (cp != NULL) free(cp); } static void -freelist(cpp) - char **cpp; +freelist (char **cpp) { int i; @@ -2134,10 +2194,7 @@ freelist(cpp) } static char ** -enlist(cpp, new, len) - char **cpp; - char *new; - int len; +enlist (char **cpp, char *new, size_t len) { int i, j; @@ -2182,14 +2239,12 @@ enlist(cpp, new, len) list of their distinct common substrings. Return NULL if something seems wild. */ static char ** -comsubs(left, right) - char *left; - char *right; +comsubs (char *left, char *right) { char **cpp; char *lcp; char *rcp; - int i, len; + size_t i, len; if (left == NULL || right == NULL) return NULL; @@ -2204,7 +2259,7 @@ comsubs(left, right) while (rcp != NULL) { for (i = 1; lcp[i] != '\0' && lcp[i] == rcp[i]; ++i) - ; + continue; if (i > len) len = i; rcp = index(rcp + 1, *lcp); @@ -2218,9 +2273,7 @@ comsubs(left, right) } static char ** -addlists(old, new) -char **old; -char **new; +addlists (char **old, char **new) { int i; @@ -2238,9 +2291,7 @@ char **new; /* Given two lists of substrings, return a new list giving substrings common to both. */ static char ** -inboth(left, right) - char **left; - char **right; +inboth (char **left, char **right) { char **both; char **temp; @@ -2264,6 +2315,7 @@ inboth(left, right) } both = addlists(both, temp); freelist(temp); + free(temp); if (both == NULL) return NULL; } @@ -2280,16 +2332,14 @@ typedef struct } must; static void -resetmust(mp) -must *mp; +resetmust (must *mp) { mp->left[0] = mp->right[0] = mp->is[0] = '\0'; freelist(mp->in); } static void -dfamust(dfa) -struct dfa *dfa; +dfamust (struct dfa *dfa) { must *musts; must *mp; @@ -2300,8 +2350,9 @@ struct dfa *dfa; token t; static must must0; struct dfamust *dm; + static char empty_string[] = ""; - result = ""; + result = empty_string; exact = 0; musts = (must *) malloc((dfa->tindex + 1) * sizeof *musts); if (musts == NULL) @@ -2488,7 +2539,7 @@ struct dfa *dfa; resetmust(mp); mp->is[0] = mp->left[0] = mp->right[0] = t; mp->is[1] = mp->left[1] = mp->right[1] = '\0'; - mp->in = enlist(mp->in, mp->is, 1); + mp->in = enlist(mp->in, mp->is, (size_t)1); if (mp->in == NULL) goto done; } diff --git a/gnu/usr.bin/grep/dfa.h b/gnu/usr.bin/grep/dfa.h index 8659bc494ba..f2fef4b7edb 100644 --- a/gnu/usr.bin/grep/dfa.h +++ b/gnu/usr.bin/grep/dfa.h @@ -1,5 +1,5 @@ /* dfa.h - declarations for GNU deterministic regexp compiler - Copyright (C) 1988 Free Software Foundation, Inc. + Copyright (C) 1988, 1998 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,10 +13,7 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - - $Id: dfa.h,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $ -*/ + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA */ /* Written June, 1988 by Mike Haertel */ @@ -25,14 +22,33 @@ In addition to clobbering modularity, we eat up valuable name space. */ +# undef PARAMS +#if __STDC__ +# ifndef _PTR_T +# define _PTR_T + typedef void * ptr_t; +# endif +# define PARAMS(x) x +#else +# ifndef _PTR_T +# define _PTR_T + typedef char * ptr_t; +# endif +# define PARAMS(x) () +#endif + /* Number of bits in an unsigned char. */ +#ifndef CHARBITS #define CHARBITS 8 +#endif /* First integer value that is greater than any character code. */ #define NOTCHAR (1 << CHARBITS) /* INTBITS need not be exact, just a lower bound. */ +#ifndef INTBITS #define INTBITS (CHARBITS * sizeof (int)) +#endif /* Number of ints required to hold a bit for every character. */ #define CHARCLASS_INTS ((NOTCHAR + INTBITS - 1) / INTBITS) @@ -304,16 +320,15 @@ struct dfa /* Entry points. */ -#if __STDC__ - -/* dfasyntax() takes two arguments; the first sets the syntax bits described - earlier in this file, and the second sets the case-folding flag. */ -extern void dfasyntax(int, int); +/* dfasyntax() takes three arguments; the first sets the syntax bits described + earlier in this file, the second sets the case-folding flag, and the + third specifies the line terminator. */ +extern void dfasyntax PARAMS ((reg_syntax_t, int, int)); /* Compile the given string of the given length into the given struct dfa. Final argument is a flag specifying whether to build a searching or an exact matcher. */ -extern void dfacomp(char *, size_t, struct dfa *, int); +extern void dfacomp PARAMS ((char *, size_t, struct dfa *, int)); /* Execute the given struct dfa on the buffer of characters. The first char * points to the beginning, and the second points to the @@ -327,26 +342,26 @@ extern void dfacomp(char *, size_t, struct dfa *, int); order to verify backreferencing; otherwise the flag will be cleared. Returns NULL if no match is found, or a pointer to the first character after the first & shortest matching string in the buffer. */ -extern char *dfaexec(struct dfa *, char *, char *, int, int *, int *); +extern char *dfaexec PARAMS ((struct dfa *, char *, char *, int, int *, int *)); /* Free the storage held by the components of a struct dfa. */ -extern void dfafree(struct dfa *); +extern void dfafree PARAMS ((struct dfa *)); /* Entry points for people who know what they're doing. */ /* Initialize the components of a struct dfa. */ -extern void dfainit(struct dfa *); +extern void dfainit PARAMS ((struct dfa *)); /* Incrementally parse a string of given length into a struct dfa. */ -extern void dfaparse(char *, size_t, struct dfa *); +extern void dfaparse PARAMS ((char *, size_t, struct dfa *)); /* Analyze a parsed regexp; second argument tells whether to build a searching or an exact matcher. */ -extern void dfaanalyze(struct dfa *, int); +extern void dfaanalyze PARAMS ((struct dfa *, int)); /* Compute, for each possible character, the transitions out of a given state, storing them in an array of integers. */ -extern void dfastate(int, struct dfa *, int []); +extern void dfastate PARAMS ((int, struct dfa *, int [])); /* Error handling. */ @@ -354,10 +369,4 @@ extern void dfastate(int, struct dfa *, int []); takes a single argument, a NUL-terminated string describing the error. The default dfaerror() prints the error message to stderr and exits. The user can provide a different dfafree() if so desired. */ -extern void dfaerror(char *); - -#else /* ! __STDC__ */ -extern void dfasyntax(), dfacomp(), dfafree(), dfainit(), dfaparse(); -extern void dfaanalyze(), dfastate(), dfaerror(); -extern char *dfaexec(); -#endif /* ! __STDC__ */ +extern void dfaerror PARAMS ((const char *)); diff --git a/gnu/usr.bin/grep/getopt.c b/gnu/usr.bin/grep/getopt.c index d28913cb507..d176d3e7e72 100644 --- a/gnu/usr.bin/grep/getopt.c +++ b/gnu/usr.bin/grep/getopt.c @@ -1,9 +1,8 @@ /* Getopt for GNU. - NOTE: getopt is now part of the C library, so if you don't know what - "Keep this file name-space clean" means, talk to roland@gnu.ai.mit.edu - before changing it! + NOTE: The canonical source of this file is maintained with the GNU + C Library. Bugs can be reported to bug-glibc@gnu.org. - Copyright (C) 1987, 88, 89, 90, 91, 92, 1993 + Copyright (C) 1987, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it @@ -17,45 +16,25 @@ GNU General Public License for more details. You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ - -#ifndef lint -#if 0 -static char rcsid[] = "$NetBSD: getopt.c,v 1.4 1995/04/28 11:43:53 cgd Exp $"; -#endif -static char rcsid[] = "$OpenBSD: getopt.c,v 1.2 1997/02/17 09:16:31 niklas Exp $"; -#endif /* not lint */ + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -/* NOTE!!! AIX requires this to be the first thing in the file. - Do not put ANYTHING before it! */ -#if !defined (__GNUC__) && defined (_AIX) - #pragma alloca +/* This tells Alpha OSF/1 not to define a getopt prototype in . + Ditto for AIX 3.2 and . */ +#ifndef _NO_PROTO +# define _NO_PROTO #endif #ifdef HAVE_CONFIG_H -#include "config.h" -#endif - -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not __GNUC__ */ -#if defined (HAVE_ALLOCA_H) || (defined(sparc) && (defined(sun) || (!defined(USG) && !defined(SVR4) && !defined(__svr4__)))) -#include +# include #else -#ifndef _AIX -char *alloca (); -#endif -#endif /* alloca.h */ -#endif /* not __GNUC__ */ - -#if !__STDC__ && !defined(const) && IN_GCC -#define const -#endif - -/* This tells Alpha OSF/1 not to define a getopt prototype in . */ -#ifndef _NO_PROTO -#define _NO_PROTO +# if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +# ifndef const +# define const +# endif +# endif #endif #include @@ -68,24 +47,43 @@ char *alloca (); program understand `configure --with-gnu-libc' and omit the object files, it is simpler to just do this in the source for each such file. */ -#if defined (_LIBC) || !defined (__GNU_LIBRARY__) +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +# include +# if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +# define ELIDE_CODE +# endif +#endif + +#ifndef ELIDE_CODE /* This needs to come after some library #include to get __GNU_LIBRARY__ defined. */ #ifdef __GNU_LIBRARY__ -#undef alloca /* Don't include stdlib.h for non-GNU C libraries because some of them contain conflicting prototypes for getopt. */ -#include -#else /* Not GNU C library. */ -#define __alloca alloca +# include +# include #endif /* GNU C library. */ -/* If GETOPT_COMPAT is defined, `+' as well as `--' can introduce a - long-named option. Because this is not POSIX.2 compliant, it is - being phased out. */ -/* #define GETOPT_COMPAT */ +#ifdef VMS +# include +# if HAVE_STRING_H - 0 +# include +# endif +#endif + +#ifndef _ +/* This is for other GNU distributions with internationalized messages. + When compiling libc, the _ macro is predefined. */ +# ifdef HAVE_LIBINTL_H +# include +# define _(msgid) gettext (msgid) +# else +# define _(msgid) (msgid) +# endif +#endif /* This version of `getopt' appears to the caller like standard Unix `getopt' but it behaves differently for the user, since it allows the user @@ -109,7 +107,7 @@ char *alloca (); Also, when `ordering' is RETURN_IN_ORDER, each non-option ARGV-element is returned here. */ -char *optarg = 0; +char *optarg; /* Index in ARGV of the next element to be scanned. This is used for communication to and from the caller @@ -117,14 +115,20 @@ char *optarg = 0; On entry to `getopt', zero means this is the first call; initialize. - When `getopt' returns EOF, this is the index of the first of the + When `getopt' returns -1, this is the index of the first of the non-option elements that the caller should itself scan. Otherwise, `optind' communicates from one call to the next how much of ARGV has been scanned so far. */ -/* XXX 1003.2 says this must be 1 before any call. */ -int optind = 0; +/* 1003.2 says this must be 1 before any call. */ +int optind = 1; + +/* Formerly, initialization of getopt depended on optind==0, which + causes problems with re-calling getopt as programs generally don't + know that. */ + +int __getopt_initialized; /* The next char to be scanned in the option-element in which the last option character we returned was found. @@ -173,27 +177,37 @@ int optopt = '?'; The special argument `--' forces an end of option-scanning regardless of the value of `ordering'. In the case of RETURN_IN_ORDER, only - `--' can cause `getopt' to return EOF with `optind' != ARGC. */ + `--' can cause `getopt' to return -1 with `optind' != ARGC. */ static enum { REQUIRE_ORDER, PERMUTE, RETURN_IN_ORDER } ordering; + +/* Value of POSIXLY_CORRECT environment variable. */ +static char *posixly_correct; -#if defined (__GNU_LIBRARY__) || defined (__NetBSD__) || defined (__OpenBSD__) +#ifdef __GNU_LIBRARY__ /* We want to avoid inclusion of string.h with non-GNU libraries because there are many ways it can cause trouble. On some systems, it contains special magic macros that don't work in GCC. */ -#include -#define my_index strchr -#define my_bcopy(src, dst, n) memcpy ((dst), (src), (n)) +# include +# define my_index strchr #else +# if HAVE_STRING_H +# include +# else +# include +# endif + /* Avoid depending on library functions or files whose names are inconsistent. */ -char *getenv (); +#ifndef getenv +extern char *getenv (); +#endif static char * my_index (str, chr) @@ -209,17 +223,19 @@ my_index (str, chr) return 0; } -static void -my_bcopy (from, to, size) - const char *from; - char *to; - int size; -{ - int i; - for (i = 0; i < size; i++) - to[i] = from[i]; -} -#endif /* GNU C library. */ +/* If using GCC, we can safely declare strlen this way. + If not using GCC, it is ok not to declare it. */ +#ifdef __GNUC__ +/* Note that Motorola Delta 68k R3V7 comes with GCC but not stddef.h. + That was relevant to code that was here before. */ +# if (!defined __STDC__ || !__STDC__) && !defined strlen +/* gcc with -traditional declares the built-in strlen to return int, + and has done so at least since version 2.4.5. -- rms. */ +extern int strlen (const char *); +# endif /* not __STDC__ */ +#endif /* __GNUC__ */ + +#endif /* not __GNU_LIBRARY__ */ /* Handle permutation of arguments. */ @@ -230,6 +246,46 @@ my_bcopy (from, to, size) static int first_nonopt; static int last_nonopt; +#ifdef _LIBC +/* Bash 2.0 gives us an environment variable containing flags + indicating ARGV elements that should not be considered arguments. */ + +/* Defined in getopt_init.c */ +extern char *__getopt_nonoption_flags; + +static int nonoption_flags_max_len; +static int nonoption_flags_len; + +static int original_argc; +static char *const *original_argv; + +/* Make sure the environment variable bash 2.0 puts in the environment + is valid for the getopt call we must make sure that the ARGV passed + to getopt is that one passed to the process. */ +static void +__attribute__ ((unused)) +store_args_and_env (int argc, char *const *argv) +{ + /* XXX This is no good solution. We should rather copy the args so + that we can compare them later. But we must not use malloc(3). */ + original_argc = argc; + original_argv = argv; +} +# ifdef text_set_element +text_set_element (__libc_subinit, store_args_and_env); +# endif /* text_set_element */ + +# define SWAP_FLAGS(ch1, ch2) \ + if (nonoption_flags_len > 0) \ + { \ + char __tmp = __getopt_nonoption_flags[ch1]; \ + __getopt_nonoption_flags[ch1] = __getopt_nonoption_flags[ch2]; \ + __getopt_nonoption_flags[ch2] = __tmp; \ + } +#else /* !_LIBC */ +# define SWAP_FLAGS(ch1, ch2) +#endif /* _LIBC */ + /* Exchange two adjacent subsequences of ARGV. One subsequence is elements [first_nonopt,last_nonopt) which contains all the non-options that have been skipped so far. @@ -239,27 +295,160 @@ static int last_nonopt; `first_nonopt' and `last_nonopt' are relocated so that they describe the new indices of the non-options in ARGV after they are moved. */ +#if defined __STDC__ && __STDC__ +static void exchange (char **); +#endif + static void exchange (argv) char **argv; { - int nonopts_size = (last_nonopt - first_nonopt) * sizeof (char *); - char **temp = (char **) __alloca (nonopts_size); + int bottom = first_nonopt; + int middle = last_nonopt; + int top = optind; + char *tem; + + /* Exchange the shorter segment with the far end of the longer segment. + That puts the shorter segment into the right place. + It leaves the longer segment in the right place overall, + but it consists of two parts that need to be swapped next. */ + +#ifdef _LIBC + /* First make sure the handling of the `__getopt_nonoption_flags' + string can work normally. Our top argument must be in the range + of the string. */ + if (nonoption_flags_len > 0 && top >= nonoption_flags_max_len) + { + /* We must extend the array. The user plays games with us and + presents new arguments. */ + char *new_str = malloc (top + 1); + if (new_str == NULL) + nonoption_flags_len = nonoption_flags_max_len = 0; + else + { + memset (__mempcpy (new_str, __getopt_nonoption_flags, + nonoption_flags_max_len), + '\0', top + 1 - nonoption_flags_max_len); + nonoption_flags_max_len = top + 1; + __getopt_nonoption_flags = new_str; + } + } +#endif + + while (top > middle && middle > bottom) + { + if (top - middle > middle - bottom) + { + /* Bottom segment is the short one. */ + int len = middle - bottom; + register int i; - /* Interchange the two blocks of data in ARGV. */ + /* Swap it with the top part of the top segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[top - (middle - bottom) + i]; + argv[top - (middle - bottom) + i] = tem; + SWAP_FLAGS (bottom + i, top - (middle - bottom) + i); + } + /* Exclude the moved bottom segment from further swapping. */ + top -= len; + } + else + { + /* Top segment is the short one. */ + int len = top - middle; + register int i; - my_bcopy ((char *) &argv[first_nonopt], (char *) temp, nonopts_size); - my_bcopy ((char *) &argv[last_nonopt], (char *) &argv[first_nonopt], - (optind - last_nonopt) * sizeof (char *)); - my_bcopy ((char *) temp, - (char *) &argv[first_nonopt + optind - last_nonopt], - nonopts_size); + /* Swap it with the bottom part of the bottom segment. */ + for (i = 0; i < len; i++) + { + tem = argv[bottom + i]; + argv[bottom + i] = argv[middle + i]; + argv[middle + i] = tem; + SWAP_FLAGS (bottom + i, middle + i); + } + /* Exclude the moved top segment from further swapping. */ + bottom += len; + } + } /* Update records for the slots the non-options now occupy. */ first_nonopt += (optind - last_nonopt); last_nonopt = optind; } + +/* Initialize the internal data when the first call is made. */ + +#if defined __STDC__ && __STDC__ +static const char *_getopt_initialize (int, char *const *, const char *); +#endif +static const char * +_getopt_initialize (argc, argv, optstring) + int argc; + char *const *argv; + const char *optstring; +{ + /* Start processing options with ARGV-element 1 (since ARGV-element 0 + is the program name); the sequence of previously skipped + non-option ARGV-elements is empty. */ + + first_nonopt = last_nonopt = optind; + + nextchar = NULL; + + posixly_correct = getenv ("POSIXLY_CORRECT"); + + /* Determine how to handle the ordering of options and nonoptions. */ + + if (optstring[0] == '-') + { + ordering = RETURN_IN_ORDER; + ++optstring; + } + else if (optstring[0] == '+') + { + ordering = REQUIRE_ORDER; + ++optstring; + } + else if (posixly_correct != NULL) + ordering = REQUIRE_ORDER; + else + ordering = PERMUTE; + +#ifdef _LIBC + if (posixly_correct == NULL + && argc == original_argc && argv == original_argv) + { + if (nonoption_flags_max_len == 0) + { + if (__getopt_nonoption_flags == NULL + || __getopt_nonoption_flags[0] == '\0') + nonoption_flags_max_len = -1; + else + { + const char *orig_str = __getopt_nonoption_flags; + int len = nonoption_flags_max_len = strlen (orig_str); + if (nonoption_flags_max_len < argc) + nonoption_flags_max_len = argc; + __getopt_nonoption_flags = + (char *) malloc (nonoption_flags_max_len); + if (__getopt_nonoption_flags == NULL) + nonoption_flags_max_len = -1; + else + memset (__mempcpy (__getopt_nonoption_flags, orig_str, len), + '\0', nonoption_flags_max_len - len); + } + } + nonoption_flags_len = nonoption_flags_max_len; + } + else + nonoption_flags_len = 0; +#endif + + return optstring; +} /* Scan elements of ARGV (whose length is ARGC) for option characters given in OPTSTRING. @@ -274,7 +463,7 @@ exchange (argv) updating `optind' and `nextchar' so that the next call to `getopt' can resume the scan with the following option character or ARGV-element. - If there are no more option characters, `getopt' returns `EOF'. + If there are no more option characters, `getopt' returns -1. Then `optind' is the index in ARGV of the first ARGV-element that is not an option. (The ARGV-elements have been permuted so that those that are not options now come last.) @@ -326,41 +515,39 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) int *longind; int long_only; { - int option_index; + optarg = NULL; - optarg = 0; - - /* Initialize the internal data when the first call is made. - Start processing options with ARGV-element 1 (since ARGV-element 0 - is the program name); the sequence of previously skipped - non-option ARGV-elements is empty. */ - - if (optind == 0) + if (optind == 0 || !__getopt_initialized) { - first_nonopt = last_nonopt = optind = 1; - - nextchar = NULL; - - /* Determine how to handle the ordering of options and nonoptions. */ - - if (optstring[0] == '-') - { - ordering = RETURN_IN_ORDER; - ++optstring; - } - else if (optstring[0] == '+') - { - ordering = REQUIRE_ORDER; - ++optstring; - } - else if (getenv ("POSIXLY_CORRECT") != NULL) - ordering = REQUIRE_ORDER; - else - ordering = PERMUTE; + if (optind == 0) + optind = 1; /* Don't scan ARGV[0], the program name. */ + optstring = _getopt_initialize (argc, argv, optstring); + __getopt_initialized = 1; } + /* Test whether ARGV[optind] points to a non-option argument. + Either it does not have option syntax, or there is an environment flag + from the shell indicating it is not an option. The later information + is only used when the used in the GNU libc. */ +#ifdef _LIBC +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0' \ + || (optind < nonoption_flags_len \ + && __getopt_nonoption_flags[optind] == '1')) +#else +# define NONOPTION_P (argv[optind][0] != '-' || argv[optind][1] == '\0') +#endif + if (nextchar == NULL || *nextchar == '\0') { + /* Advance to the next ARGV-element. */ + + /* Give FIRST_NONOPT & LAST_NONOPT rational values if OPTIND has been + moved back by the user (who may also have changed the arguments). */ + if (last_nonopt > optind) + last_nonopt = optind; + if (first_nonopt > optind) + first_nonopt = optind; + if (ordering == PERMUTE) { /* If we have just processed some options following some non-options, @@ -371,21 +558,15 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) else if (last_nonopt != optind) first_nonopt = optind; - /* Now skip any additional non-options + /* Skip any additional non-options and extend the range of non-options previously skipped. */ - while (optind < argc - && (argv[optind][0] != '-' || argv[optind][1] == '\0') -#ifdef GETOPT_COMPAT - && (longopts == NULL - || argv[optind][0] != '+' || argv[optind][1] == '\0') -#endif /* GETOPT_COMPAT */ - ) + while (optind < argc && NONOPTION_P) optind++; last_nonopt = optind; } - /* Special ARGV-element `--' means premature end of options. + /* The special ARGV-element `--' means premature end of options. Skip it like a null option, then exchange with previous non-options as if it were an option, then skip everything else like a non-option. */ @@ -412,56 +593,64 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) that we previously skipped, so the caller will digest them. */ if (first_nonopt != last_nonopt) optind = first_nonopt; - return EOF; + return -1; } /* If we have come to a non-option and did not permute it, either stop the scan or describe it to the caller and pass it by. */ - if ((argv[optind][0] != '-' || argv[optind][1] == '\0') -#ifdef GETOPT_COMPAT - && (longopts == NULL - || argv[optind][0] != '+' || argv[optind][1] == '\0') -#endif /* GETOPT_COMPAT */ - ) + if (NONOPTION_P) { if (ordering == REQUIRE_ORDER) - return EOF; + return -1; optarg = argv[optind++]; return 1; } /* We have found another option-ARGV-element. - Start decoding its characters. */ + Skip the initial punctuation. */ nextchar = (argv[optind] + 1 + (longopts != NULL && argv[optind][1] == '-')); } + /* Decode the current option-ARGV-element. */ + + /* Check whether the ARGV-element is a long option. + + If long_only and the ARGV-element has the form "-f", where f is + a valid short option, don't consider it an abbreviated form of + a long option that starts with f. Otherwise there would be no + way to give the -f short option. + + On the other hand, if there's a long option "fubar" and + the ARGV-element is "-fu", do consider that an abbreviation of + the long option, just like "--fu", and not "-f" with arg "u". + + This distinction seems to be the most useful approach. */ + if (longopts != NULL - && ((argv[optind][0] == '-' - && (argv[optind][1] == '-' || long_only)) -#ifdef GETOPT_COMPAT - || argv[optind][0] == '+' -#endif /* GETOPT_COMPAT */ - )) + && (argv[optind][1] == '-' + || (long_only && (argv[optind][2] || !my_index (optstring, argv[optind][1]))))) { + char *nameend; const struct option *p; - char *s = nextchar; + const struct option *pfound = NULL; int exact = 0; int ambig = 0; - const struct option *pfound = NULL; - int indfound; + int indfound = -1; + int option_index; - while (*s && *s != '=') - s++; + for (nameend = nextchar; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; - /* Test all options for either exact match or abbreviated matches. */ - for (p = longopts, option_index = 0; p->name; - p++, option_index++) - if (!strncmp (p->name, nextchar, s - nextchar)) + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) { - if (s - nextchar == strlen (p->name)) + if ((unsigned int) (nameend - nextchar) + == (unsigned int) strlen (p->name)) { /* Exact match found. */ pfound = p; @@ -476,17 +665,18 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) indfound = option_index; } else - /* Second nonexact match found. */ + /* Second or later nonexact match found. */ ambig = 1; } if (ambig && !exact) { if (opterr) - fprintf (stderr, "%s: option `%s' is ambiguous\n", + fprintf (stderr, _("%s: option `%s' is ambiguous\n"), argv[0], argv[optind]); nextchar += strlen (nextchar); optind++; + optopt = 0; return '?'; } @@ -494,12 +684,12 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) { option_index = indfound; optind++; - if (*s) + if (*nameend) { /* Don't test has_arg with >, because some C compilers don't allow it to be used on enums. */ if (pfound->has_arg) - optarg = s + 1; + optarg = nameend + 1; else { if (opterr) @@ -507,15 +697,18 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) if (argv[optind - 1][1] == '-') /* --option */ fprintf (stderr, - "%s: option `--%s' doesn't allow an argument\n", + _("%s: option `--%s' doesn't allow an argument\n"), argv[0], pfound->name); else /* +option or -option */ fprintf (stderr, - "%s: option `%c%s' doesn't allow an argument\n", - argv[0], argv[optind - 1][0], pfound->name); + _("%s: option `%c%s' doesn't allow an argument\n"), + argv[0], argv[optind - 1][0], pfound->name); } + nextchar += strlen (nextchar); + + optopt = pfound->val; return '?'; } } @@ -526,9 +719,11 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) else { if (opterr) - fprintf (stderr, "%s: option `%s' requires an argument\n", - argv[0], argv[optind - 1]); + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); nextchar += strlen (nextchar); + optopt = pfound->val; return optstring[0] == ':' ? ':' : '?'; } } @@ -542,34 +737,33 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) } return pfound->val; } + /* Can't find it as a long option. If this is not getopt_long_only, or the option starts with '--' or is not a valid short option, then it's an error. Otherwise interpret it as a short option. */ if (!long_only || argv[optind][1] == '-' -#ifdef GETOPT_COMPAT - || argv[optind][0] == '+' -#endif /* GETOPT_COMPAT */ || my_index (optstring, *nextchar) == NULL) { if (opterr) { if (argv[optind][1] == '-') /* --option */ - fprintf (stderr, "%s: unrecognized option `--%s'\n", + fprintf (stderr, _("%s: unrecognized option `--%s'\n"), argv[0], nextchar); else /* +option or -option */ - fprintf (stderr, "%s: unrecognized option `%c%s'\n", + fprintf (stderr, _("%s: unrecognized option `%c%s'\n"), argv[0], argv[optind][0], nextchar); } nextchar = (char *) ""; optind++; + optopt = 0; return '?'; } } - /* Look at and handle the next option-character. */ + /* Look at and handle the next short option-character. */ { char c = *nextchar++; @@ -583,20 +777,141 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) { if (opterr) { -#if 0 - if (c < 040 || c >= 0177) - fprintf (stderr, "%s: unrecognized option, character code 0%o\n", + if (posixly_correct) + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, _("%s: illegal option -- %c\n"), argv[0], c); else - fprintf (stderr, "%s: unrecognized option `-%c'\n", argv[0], c); -#else - /* 1003.2 specifies the format of this message. */ - fprintf (stderr, "%s: illegal option -- %c\n", argv[0], c); -#endif + fprintf (stderr, _("%s: invalid option -- %c\n"), + argv[0], c); } optopt = c; return '?'; } + /* Convenience. Treat POSIX -W foo same as long option --foo */ + if (temp[0] == 'W' && temp[1] == ';') + { + char *nameend; + const struct option *p; + const struct option *pfound = NULL; + int exact = 0; + int ambig = 0; + int indfound = 0; + int option_index; + + /* This is an option that requires an argument. */ + if (*nextchar != '\0') + { + optarg = nextchar; + /* If we end this ARGV-element by taking the rest as an arg, + we must advance to the next element now. */ + optind++; + } + else if (optind == argc) + { + if (opterr) + { + /* 1003.2 specifies the format of this message. */ + fprintf (stderr, _("%s: option requires an argument -- %c\n"), + argv[0], c); + } + optopt = c; + if (optstring[0] == ':') + c = ':'; + else + c = '?'; + return c; + } + else + /* We already incremented `optind' once; + increment it again when taking next ARGV-elt as argument. */ + optarg = argv[optind++]; + + /* optarg is now the argument, see if it's in the + table of longopts. */ + + for (nextchar = nameend = optarg; *nameend && *nameend != '='; nameend++) + /* Do nothing. */ ; + + /* Test all long options for either exact match + or abbreviated matches. */ + for (p = longopts, option_index = 0; p->name; p++, option_index++) + if (!strncmp (p->name, nextchar, nameend - nextchar)) + { + if ((unsigned int) (nameend - nextchar) == strlen (p->name)) + { + /* Exact match found. */ + pfound = p; + indfound = option_index; + exact = 1; + break; + } + else if (pfound == NULL) + { + /* First nonexact match found. */ + pfound = p; + indfound = option_index; + } + else + /* Second or later nonexact match found. */ + ambig = 1; + } + if (ambig && !exact) + { + if (opterr) + fprintf (stderr, _("%s: option `-W %s' is ambiguous\n"), + argv[0], argv[optind]); + nextchar += strlen (nextchar); + optind++; + return '?'; + } + if (pfound != NULL) + { + option_index = indfound; + if (*nameend) + { + /* Don't test has_arg with >, because some C compilers don't + allow it to be used on enums. */ + if (pfound->has_arg) + optarg = nameend + 1; + else + { + if (opterr) + fprintf (stderr, _("\ +%s: option `-W %s' doesn't allow an argument\n"), + argv[0], pfound->name); + + nextchar += strlen (nextchar); + return '?'; + } + } + else if (pfound->has_arg == 1) + { + if (optind < argc) + optarg = argv[optind++]; + else + { + if (opterr) + fprintf (stderr, + _("%s: option `%s' requires an argument\n"), + argv[0], argv[optind - 1]); + nextchar += strlen (nextchar); + return optstring[0] == ':' ? ':' : '?'; + } + } + nextchar += strlen (nextchar); + if (longind != NULL) + *longind = option_index; + if (pfound->flag) + { + *(pfound->flag) = pfound->val; + return 0; + } + return pfound->val; + } + nextchar = NULL; + return 'W'; /* Let the application handle it. */ + } if (temp[1] == ':') { if (temp[2] == ':') @@ -608,7 +923,7 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) optind++; } else - optarg = 0; + optarg = NULL; nextchar = NULL; } else @@ -625,14 +940,10 @@ _getopt_internal (argc, argv, optstring, longopts, longind, long_only) { if (opterr) { -#if 0 - fprintf (stderr, "%s: option `-%c' requires an argument\n", - argv[0], c); -#else /* 1003.2 specifies the format of this message. */ - fprintf (stderr, "%s: option requires an argument -- %c\n", - argv[0], c); -#endif + fprintf (stderr, + _("%s: option requires an argument -- %c\n"), + argv[0], c); } optopt = c; if (optstring[0] == ':') @@ -663,7 +974,7 @@ getopt (argc, argv, optstring) 0); } -#endif /* _LIBC or not __GNU_LIBRARY__. */ +#endif /* Not ELIDE_CODE. */ #ifdef TEST @@ -683,7 +994,7 @@ main (argc, argv) int this_option_optind = optind ? optind : 1; c = getopt (argc, argv, "abc:d:0123456789"); - if (c == EOF) + if (c == -1) break; switch (c) diff --git a/gnu/usr.bin/grep/getopt.h b/gnu/usr.bin/grep/getopt.h index a146ef22aa8..39f4a168626 100644 --- a/gnu/usr.bin/grep/getopt.h +++ b/gnu/usr.bin/grep/getopt.h @@ -1,6 +1,7 @@ /* Declarations for getopt. - Copyright (C) 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. - + Copyright (C) 1989,90,91,92,93,94,96,97,98 Free Software Foundation, Inc. + NOTE: The canonical source of this file is maintained with the GNU C Library. + Bugs can be reported to bug-glibc@gnu.org. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any @@ -13,13 +14,14 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. - - $Id: getopt.h,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $ -*/ + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + USA. */ #ifndef _GETOPT_H -#define _GETOPT_H 1 + +#ifndef __need_getopt +# define _GETOPT_H 1 +#endif #ifdef __cplusplus extern "C" { @@ -39,7 +41,7 @@ extern char *optarg; On entry to `getopt', zero means this is the first call; initialize. - When `getopt' returns EOF, this is the index of the first of the + When `getopt' returns -1, this is the index of the first of the non-option elements that the caller should itself scan. Otherwise, `optind' communicates from one call to the next @@ -56,6 +58,7 @@ extern int opterr; extern int optopt; +#ifndef __need_getopt /* Describe the long-named options requested by the application. The LONG_OPTIONS argument to getopt_long or getopt_long_only is a vector of `struct option' terminated by an element containing a name which is @@ -79,11 +82,11 @@ extern int optopt; struct option { -#if __STDC__ +# if defined __STDC__ && __STDC__ const char *name; -#else +# else char *name; -#endif +# endif /* has_arg can't be an enum because some compilers complain about type mismatches in all the code that assumes it is an int. */ int has_arg; @@ -93,40 +96,74 @@ struct option /* Names for the values of the `has_arg' field of `struct option'. */ -#define no_argument 0 -#define required_argument 1 -#define optional_argument 2 +# define no_argument 0 +# define required_argument 1 +# define optional_argument 2 +#endif /* need getopt */ + + +/* Get definitions and prototypes for functions to process the + arguments in ARGV (ARGC of them, minus the program name) for + options given in OPTS. -#if __STDC__ -#if defined(__GNU_LIBRARY__) + Return the option character from OPTS just read. Return -1 when + there are no more options. For unrecognized options, or options + missing arguments, `optopt' is set to the option letter, and '?' is + returned. + + The OPTS string is a list of characters which are recognized option + letters, optionally followed by colons, specifying that that letter + takes an argument, to be placed in `optarg'. + + If a letter in OPTS is followed by two colons, its argument is + optional. This behavior is specific to the GNU `getopt'. + + The argument `--' causes premature termination of argument + scanning, explicitly telling `getopt' that there are no more + options. + + If OPTS begins with `--', then non-option arguments are treated as + arguments to the option '\0'. This behavior is specific to the GNU + `getopt'. */ + +#if (defined __STDC__ && __STDC__) || defined PROTOTYPES +# ifdef __GNU_LIBRARY__ /* Many other libraries have conflicting prototypes for getopt, with differences in the consts, in stdlib.h. To avoid compilation errors, only prototype getopt for the GNU C library. */ -extern int getopt (int argc, char *const *argv, const char *shortopts); -#else /* not __GNU_LIBRARY__ */ +extern int getopt (int __argc, char *const *__argv, const char *__shortopts); +# else /* not __GNU_LIBRARY__ */ extern int getopt (); -#endif /* not __GNU_LIBRARY__ */ -extern int getopt_long (int argc, char *const *argv, const char *shortopts, - const struct option *longopts, int *longind); -extern int getopt_long_only (int argc, char *const *argv, - const char *shortopts, - const struct option *longopts, int *longind); +# endif /* __GNU_LIBRARY__ */ + +# ifndef __need_getopt +extern int getopt_long (int __argc, char *const *__argv, const char *__shortopts, + const struct option *__longopts, int *__longind); +extern int getopt_long_only (int __argc, char *const *__argv, + const char *__shortopts, + const struct option *__longopts, int *__longind); /* Internal only. Users should not call this directly. */ -extern int _getopt_internal (int argc, char *const *argv, - const char *shortopts, - const struct option *longopts, int *longind, - int long_only); -#else /* not __STDC__ */ +extern int _getopt_internal (int __argc, char *const *__argv, + const char *__shortopts, + const struct option *__longopts, int *__longind, + int __long_only); +# endif +#else /* not ((defined __STDC__ && __STDC__) || defined PROTOTYPES) */ extern int getopt (); +# ifndef __need_getopt extern int getopt_long (); extern int getopt_long_only (); extern int _getopt_internal (); -#endif /* not __STDC__ */ +# endif +#endif /* (defined __STDC__ && __STDC__) || defined PROTOTYPES */ #ifdef __cplusplus } #endif -#endif /* _GETOPT_H */ +/* Make sure we later can get all the definitions and declarations. */ +#undef __need_getopt + +#endif /* getopt.h */ diff --git a/gnu/usr.bin/grep/getopt1.c b/gnu/usr.bin/grep/getopt1.c new file mode 100644 index 00000000000..9c8256567c4 --- /dev/null +++ b/gnu/usr.bin/grep/getopt1.c @@ -0,0 +1,188 @@ +/* getopt_long and getopt_long_only entry points for GNU getopt. + Copyright (C) 1987,88,89,90,91,92,93,94,96,97,98 + Free Software Foundation, Inc. + NOTE: The canonical source of this file is maintained with the GNU C Library. + Bugs can be reported to bug-glibc@gnu.org. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +#include +#else +#if !defined __STDC__ || !__STDC__ +/* This is a separate conditional since some stdc systems + reject `defined (const)'. */ +#ifndef const +#define const +#endif +#endif +#endif + +#include "getopt.h" + +#include + +/* Comment out all this code if we are using the GNU C Library, and are not + actually compiling the library itself. This code is part of the GNU C + Library, but also included in many other GNU distributions. Compiling + and linking in this code is a waste when using the GNU C library + (especially if it is a shared library). Rather than having every GNU + program understand `configure --with-gnu-libc' and omit the object files, + it is simpler to just do this in the source for each such file. */ + +#define GETOPT_INTERFACE_VERSION 2 +#if !defined _LIBC && defined __GLIBC__ && __GLIBC__ >= 2 +#include +#if _GNU_GETOPT_INTERFACE_VERSION == GETOPT_INTERFACE_VERSION +#define ELIDE_CODE +#endif +#endif + +#ifndef ELIDE_CODE + + +/* This needs to come after some library #include + to get __GNU_LIBRARY__ defined. */ +#ifdef __GNU_LIBRARY__ +#include +#endif + +#ifndef NULL +#define NULL 0 +#endif + +int +getopt_long (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 0); +} + +/* Like getopt_long, but '-' as well as '--' can indicate a long option. + If an option that starts with '-' (not '--') doesn't match a long option, + but does match a short option, it is parsed as a short option + instead. */ + +int +getopt_long_only (argc, argv, options, long_options, opt_index) + int argc; + char *const *argv; + const char *options; + const struct option *long_options; + int *opt_index; +{ + return _getopt_internal (argc, argv, options, long_options, opt_index, 1); +} + + +#endif /* Not ELIDE_CODE. */ + +#ifdef TEST + +#include + +int +main (argc, argv) + int argc; + char **argv; +{ + int c; + int digit_optind = 0; + + while (1) + { + int this_option_optind = optind ? optind : 1; + int option_index = 0; + static struct option long_options[] = + { + {"add", 1, 0, 0}, + {"append", 0, 0, 0}, + {"delete", 1, 0, 0}, + {"verbose", 0, 0, 0}, + {"create", 0, 0, 0}, + {"file", 1, 0, 0}, + {0, 0, 0, 0} + }; + + c = getopt_long (argc, argv, "abc:d:0123456789", + long_options, &option_index); + if (c == -1) + break; + + switch (c) + { + case 0: + printf ("option %s", long_options[option_index].name); + if (optarg) + printf (" with arg %s", optarg); + printf ("\n"); + break; + + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if (digit_optind != 0 && digit_optind != this_option_optind) + printf ("digits occur in two different argv-elements.\n"); + digit_optind = this_option_optind; + printf ("option %c\n", c); + break; + + case 'a': + printf ("option a\n"); + break; + + case 'b': + printf ("option b\n"); + break; + + case 'c': + printf ("option c with value `%s'\n", optarg); + break; + + case 'd': + printf ("option d with value `%s'\n", optarg); + break; + + case '?': + break; + + default: + printf ("?? getopt returned character code 0%o ??\n", c); + } + } + + if (optind < argc) + { + printf ("non-option ARGV-elements: "); + while (optind < argc) + printf ("%s ", argv[optind++]); + printf ("\n"); + } + + exit (0); +} + +#endif /* TEST */ diff --git a/gnu/usr.bin/grep/getpagesize.h b/gnu/usr.bin/grep/getpagesize.h deleted file mode 100644 index b371de4dde9..00000000000 --- a/gnu/usr.bin/grep/getpagesize.h +++ /dev/null @@ -1,44 +0,0 @@ -/* $Id: getpagesize.h,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $ */ - -#ifdef BSD -#ifndef BSD4_1 -#define HAVE_GETPAGESIZE -#endif -#endif - -#ifndef HAVE_GETPAGESIZE - -#ifdef VMS -#define getpagesize() 512 -#endif - -#ifdef HAVE_UNISTD_H -#include -#endif - -#ifdef _SC_PAGESIZE -#define getpagesize() sysconf(_SC_PAGESIZE) -#else - -#ifdef HAVE_SYS_PARAM_H -#include - -#ifdef EXEC_PAGESIZE -#define getpagesize() EXEC_PAGESIZE -#else -#ifdef NBPG -#define getpagesize() NBPG * CLSIZE -#ifndef CLSIZE -#define CLSIZE 1 -#endif /* no CLSIZE */ -#else /* no NBPG */ -#define getpagesize() NBPC -#endif /* no NBPG */ -#endif /* no EXEC_PAGESIZE */ -#else /* !HAVE_SYS_PARAM_H */ -#define getpagesize() 8192 /* punt totally */ -#endif /* !HAVE_SYS_PARAM_H */ -#endif /* no _SC_PAGESIZE */ - -#endif /* not HAVE_GETPAGESIZE */ - diff --git a/gnu/usr.bin/grep/grep.1 b/gnu/usr.bin/grep/grep.1 index 2b462a1f2c3..5bd9c73608b 100644 --- a/gnu/usr.bin/grep/grep.1 +++ b/gnu/usr.bin/grep/grep.1 @@ -1,165 +1,312 @@ -.\" $Id: grep.1,v 1.4 1997/08/07 20:06:04 kstailey Exp $ -*- nroff -*- -.TH GREP 1 "1992 September 10" "GNU Project" +.\" grep man page +.if !\n(.g \{\ +. if !\w|\*(lq| \{\ +. ds lq `` +. if \w'\(lq' .ds lq "\(lq +. \} +. if !\w|\*(rq| \{\ +. ds rq '' +. if \w'\(rq' .ds rq "\(rq +. \} +.\} +.de Id +.ds Dt \\$4 +.. +.Id $Id: grep.1,v 1.5 2000/03/09 00:08:08 mickey Exp $ +.TH GREP 1 \*(Dt "GNU Project" .SH NAME grep, egrep, fgrep \- print lines matching a pattern .SH SYNOPSIS .B grep -[ -.BR \- [[ AB "] ]\c" -.I "num" -] -[ -.BR \- [ CEFGVBchilnsvwx ] -] -[ -.B \-e -] -.I pattern +.RI [ options ] +.I PATTERN +.RI [ FILE .\|.\|.] +.br +.B grep +.RI [ options ] +.RB [ \-e +.I PATTERN | -.BI \-f file -] [ -.I files... -] +.B \-f +.IR FILE ] +.RI [ FILE .\|.\|.] .SH DESCRIPTION .PP .B Grep searches the named input -.I files +.IR FILE s (or standard input if no files are named, or the file name .B \- is given) for lines containing a match to the given -.IR pattern . +.IR PATTERN . By default, .B grep prints the matching lines. .PP -There are three major variants of -.BR grep , -controlled by the following options. -.PD 0 -.TP -.B \-G -Interpret -.I pattern -as a basic regular expression (see below). This is the default. -.TP -.B \-E -Interpret -.I pattern -as an extended regular expression (see below). -.TP -.B \-F -Interpret -.I pattern -as a list of fixed strings, separated by newlines, -any of which is to be matched. -.LP In addition, two variant programs .B egrep and .B fgrep are available. .B Egrep -is similiar (but not identical) to -.BR "grep\ \-E" , -and is compatible with the historical Unix -.BR egrep . +is the same as +.BR "grep\ \-E" . .B Fgrep is the same as .BR "grep\ \-F" . -.PD -.LP -All variants of -.B grep -understand the following options: -.PD 0 +.SH OPTIONS .TP -.BI \- num -Matches will be printed with -.I num -lines of leading and trailing context. However, -.B grep -will never print any given line more than once. -.TP -.BI \-A " num" +.BI \-A " NUM" "\fR,\fP \-\^\-after-context=" NUM Print -.I num +.I NUM lines of trailing context after matching lines. .TP -.BI \-B " num" +.BR \-a ", " \-\^\-text +Process a binary file as if it were text; this is equivalent to the +.B \-\^\-binary-files=text +option. +.TP +.BI \-B " NUM" "\fR,\fP \-\^\-before-context=" NUM Print -.I num +.I NUM lines of leading context before matching lines. .TP -.B \-C -Equivalent to -.BR \-2 . -.TP -.B \-V -Print the version number of -.B grep -to standard error. This version number should -be included in all bug reports (see below). +\fB\-C\fP [\fINUM\fP], \fB\-\fP\fINUM\fP, \fB\-\^\-context\fP[\fB=\fP\fINUM\fP] +Print +.I NUM +lines (default 2) of output context. .TP -.B \-b +.BR \-b ", " \-\^\-byte-offset Print the byte offset within the input file before each line of output. .TP -.B \-c +.BI \-\^\-binary-files= TYPE +If the first few bytes of a file indicate that the file contains binary +data, assume that the file is of type +.IR TYPE . +By default, +.I TYPE +is +.BR binary , +and +.B grep +normally outputs either +a one-line message saying that a binary file matches, or no message if +there is no match. +If +.I TYPE +is +.BR without-match , +.B grep +assumes that a binary file does not match; this is equivalent to the +.B \-I +option. +If +.I TYPE +is +.BR text , +.B grep +processes a binary file as if it were text; this is equivalent to the +.B \-a +option. +.I Warning: +.B "grep \-\^\-binary-files=text" +might output binary garbage, +which can have nasty side effects if the output is a terminal and if the +terminal driver interprets some of it as commands. +.TP +.BR \-c ", " \-\^\-count Suppress normal output; instead print a count of matching lines for each input file. With the -.B \-v +.BR \-v ", " \-\^\-invert-match option (see below), count non-matching lines. .TP -.BI \-e " pattern" +.BI \-d " ACTION" "\fR,\fP \-\^\-directories=" ACTION +If an input file is a directory, use +.I ACTION +to process it. By default, +.I ACTION +is +.BR read , +which means that directories are read just as if they were ordinary files. +If +.I ACTION +is +.BR skip , +directories are silently skipped. +If +.I ACTION +is +.BR recurse , +.B grep +reads all files under each directory, recursively; +this is equivalent to the +.B \-r +option. +.TP +.BR \-E ", " \-\^\-extended-regexp +Interpret +.I PATTERN +as an extended regular expression (see below). +.TP +.BI \-e " PATTERN" "\fR,\fP \-\^\-regexp=" PATTERN Use -.I pattern +.I PATTERN as the pattern; useful to protect patterns beginning with .BR \- . .TP -.BI \-f " file" -Obtain the pattern from -.IR file . +.BR \-F ", " \-\^\-fixed-strings +Interpret +.I PATTERN +as a list of fixed strings, separated by newlines, +any of which is to be matched. +.TP +.BI \-f " FILE" "\fR,\fP \-\^\-file=" FILE +Obtain patterns from +.IR FILE , +one per line. +The empty file contains zero patterns, and therefore matches nothing. +.TP +.BR \-G ", " \-\^\-basic-regexp +Interpret +.I PATTERN +as a basic regular expression (see below). This is the default. +.TP +.BR \-H ", " \-\^\-with-filename +Print the filename for each match. .TP -.B \-h +.BR \-h ", " \-\^\-no-filename Suppress the prefixing of filenames on output when multiple files are searched. .TP -.B \-i +.B \-\^\-help +Output a brief help message. +.TP +.BR \-I +Process a binary file as if it did not contain matching data; this is +equivalent to the +.B \-\^\-binary-files=without-match +option. +.TP +.BR \-i ", " \-\^\-ignore-case Ignore case distinctions in both the -.I pattern +.I PATTERN and the input files. .TP -.B \-L +.BR \-L ", " \-\^\-files-without-match Suppress normal output; instead print the name of each input file from which no output would -normally have been printed. +normally have been printed. The scanning will stop +on the first match. .TP -.B \-l +.BR \-l ", " \-\^\-files-with-matches Suppress normal output; instead print the name of each input file from which output -would normally have been printed. +would normally have been printed. The scanning will +stop on the first match. +.TP +.B \-\^\-mmap +If possible, use the +.BR mmap (2) +system call to read input, instead of +the default +.BR read (2) +system call. In some situations, +.B \-\^\-mmap +yields better performance. However, +.B \-\^\-mmap +can cause undefined behavior (including core dumps) +if an input file shrinks while +.B grep +is operating, or if an I/O error occurs. .TP -.B \-n +.BR \-n ", " \-\^\-line-number Prefix each line of output with the line number within its input file. .TP -.B \-o -Always print filenames with output lines. +.BR \-q ", " \-\^\-quiet ", " \-\^\-silent +Quiet; suppress normal output. The scanning will stop +on the first match. +Also see the +.B \-s +or +.B \-\^\-no-messages +option below. .TP -.B \-q -Quiet; suppress normal output. +.BR \-r ", " \-\^\-recursive +Read all files under each directory, recursively; +this is equivalent to the +.B "\-d recurse" +option. .TP -.B \-s +.BR \-s ", " \-\^\-no-messages Suppress error messages about nonexistent or unreadable files. +Portability note: unlike \s-1GNU\s0 +.BR grep , +traditional +.B grep +did not conform to \s-1POSIX.2\s0, because traditional +.B grep +lacked a +.B \-q +option and its +.B \-s +option behaved like \s-1GNU\s0 +.BR grep 's +.B \-q +option. +Shell scripts intended to be portable to traditional +.B grep +should avoid both +.B \-q +and +.B \-s +and should redirect output to /dev/null instead. .TP -.B \-v -"Versus" mode. Invert the sense of matching, to select non-matching lines. +.BR \-U ", " \-\^\-binary +Treat the file(s) as binary. By default, under MS-DOS and MS-Windows, +.BR grep +guesses the file type by looking at the contents of the first 32KB +read from the file. If +.BR grep +decides the file is a text file, it strips the CR characters from the +original file contents (to make regular expressions with +.B ^ +and +.B $ +work correctly). Specifying +.B \-U +overrules this guesswork, causing all files to be read and passed to the +matching mechanism verbatim; if the file is a text file with CR/LF +pairs at the end of each line, this will cause some regular +expressions to fail. +This option has no effect on platforms other than MS-DOS and +MS-Windows. .TP -.B \-w +.BR \-u ", " \-\^\-unix-byte-offsets +Report Unix-style byte offsets. This switch causes +.B grep +to report byte offsets as if the file were Unix-style text file, i.e. with +CR characters stripped off. This will produce results identical to running +.B grep +on a Unix machine. This option has no effect unless +.B \-b +option is also used; +it has no effect on platforms other than MS-DOS and MS-Windows. +.TP +.BR \-V ", " \-\^\-version +Print the version number of +.B grep +to standard error. This version number should +be included in all bug reports (see below). +.TP +.BR \-v ", " \-\^\-invert-match +Invert the sense of matching, to select non-matching lines. +.TP +.BR \-w ", " \-\^\-word-regexp Select only those lines containing matches that form whole words. The test is that the matching substring must either be at the beginning of the line, or preceded by a non-word constituent @@ -167,19 +314,40 @@ character. Similarly, it must be either at the end of the line or followed by a non-word constituent character. Word-constituent characters are letters, digits, and the underscore. .TP -.B \-x +.BR \-x ", " \-\^\-line-regexp Select only those matches that exactly match the whole line. -.PD +.TP +.B \-y +Obsolete synonym for +.BR \-i . +.TP +.BR \-Z ", " \-\^\-null +Output a zero byte (the \s-1ASCII\s0 +.B NUL +character) instead of the character that normally follows a file name. +For example, +.B "grep \-lZ" +outputs a zero byte after each file name instead of the usual newline. +This option makes the output unambiguous, even in the presence of file +names containing unusual characters like newlines. This option can be +used with commands like +.BR "find \-print0" , +.BR "perl \-0" , +.BR "sort \-z" , +and +.B "xargs \-0" +to process arbitrary file names, +even those that contain newline characters. .SH "REGULAR EXPRESSIONS" .PP A regular expression is a pattern that describes a set of strings. -Regular expressions are constructed analagously to arithmetic +Regular expressions are constructed analogously to arithmetic expressions, by using various operators to combine smaller expressions. .PP .B Grep understands two different versions of regular expression syntax: -``basic'' and ``extended.'' In -.RB "GNU\ " grep , +\*(lqbasic\*(rq and \*(lqextended.\*(rq In +.RB "\s-1GNU\s0\ " grep , there is no difference in available functionality using either syntax. In other implementations, basic regular expressions are less powerful. The following description applies to extended regular expressions; @@ -203,7 +371,7 @@ then it matches any character in the list. For example, the regular expression .B [0123456789] -matches any single digit. A range of ASCII characters +matches any single digit. A range of characters may be specified by giving the first and last characters, separated by a hyphen. Finally, certain named classes of characters are predefined. @@ -220,12 +388,13 @@ Their names are self explanatory, and they are .BR [:upper:] , and .BR [:xdigit:]. -For example, +For example, .B [[:alnum:]] means .BR [0-9A-Za-z] , -except the latter form is dependent upon the ASCII character encoding, -whereas the former is portable. +except the latter form depends upon the \s-1POSIX\s0 locale and the +\s-1ASCII\s0 character encoding, whereas the former is independent +of locale and character set. (Note that the brackets in these class names are part of the symbolic names, and must be included in addition to the brackets delimiting the bracket list.) Most metacharacters lose their special meaning @@ -247,7 +416,7 @@ is a synonym for and .B \eW is a synonym for -.BR [^[:alnum:]] . +.BR [^[:alnum]] . .PP The caret .B ^ @@ -269,8 +438,7 @@ matches the empty string provided it's .I not at the edge of a word. .PP -A regular expression matching a single character may be followed -by one of several repetition operators: +A regular expression may be followed by one of several repetition operators: .PD 0 .TP .B ? @@ -292,11 +460,6 @@ The preceding item is matched .I n or more times. .TP -.BI {, m } -The preceding item is optional and is matched at most -.I m -times. -.TP .BI { n , m } The preceding item is matched at least .I n @@ -346,12 +509,113 @@ versions and .BR \e) . .PP -In +Traditional .B egrep -the metacharacter +did not support the .B { -loses its special meaning; instead use -.BR \e{ . +metacharacter, and some +.B egrep +implementations support +.B \e{ +instead, so portable scripts should avoid +.B { +in +.B egrep +patterns and should use +.B [{] +to match a literal +.BR { . +.PP +\s-1GNU\s0 +.B egrep +attempts to support traditional usage by assuming that +.B { +is not special if it would be the start of an invalid interval +specification. For example, the shell command +.B "egrep '{1'" +searches for the two-character string +.B {1 +instead of reporting a syntax error in the regular expression. +\s-1POSIX.2\s0 allows this behavior as an extension, but portable scripts +should avoid it. +.SH "ENVIRONMENT VARIABLES" +.TP +.B GREP_OPTIONS +This variable specifies default options to be placed in front of any +explicit options. For example, if +.B GREP_OPTIONS +is +.BR "'\-\^\-binary-files=without-match \-\^\-directories=skip'" , +.B grep +behaves as if the two options +.B \-\^\-binary-files=without-match +and +.B \-\^\-directories=skip +had been specified before any explicit options. +Option specifications are separated by whitespace. +A backslash escapes the next character, +so it can be used to specify an option containing whitespace or a backslash. +.TP +\fBLC_ALL\fP, \fBLC_MESSAGES\fP, \fBLANG\fP +These variables specify the +.B LC_MESSAGES +locale, which determines the language that +.B grep +uses for messages. +The locale is determined by the first of these variables that is set. +American English is used if none of these environment variables are set, +or if the message catalog is not installed, or if +.B grep +was not compiled with national language support (\s-1NLS\s0). +.TP +\fBLC_ALL\fP, \fBLC_CTYPE\fP, \fBLANG\fP +These variables specify the +.B LC_CTYPE +locale, which determines the type of characters, e.g., which +characters are whitespace. +The locale is determined by the first of these variables that is set. +The \s-1POSIX\s0 locale is used if none of these environment variables +are set, or if the locale catalog is not installed, or if +.B grep +was not compiled with national language support (\s-1NLS\s0). +.TP +.B POSIXLY_CORRECT +If set, +.B grep +behaves as \s-1POSIX.2\s0 requires; otherwise, +.B grep +behaves more like other \s-1GNU\s0 programs. +\s-1POSIX.2\s0 requires that options that follow file names must be +treated as file names; by default, such options are permuted to the +front of the operand list and are treated as options. +Also, \s-1POSIX.2\s0 requires that unrecognized options be diagnosed as +\*(lqillegal\*(rq, but since they are not really against the law the default +is to diagnose them as \*(lqinvalid\*(rq. +.B POSIXLY_CORRECT +also disables \fB_\fP\fIN\fP\fB_GNU_nonoption_argv_flags_\fP, +described below. +.TP +\fB_\fP\fIN\fP\fB_GNU_nonoption_argv_flags_\fP +(Here +.I N +is +.BR grep 's +numeric process ID.) If the +.IR i th +character of this environment variable's value is +.BR 1 , +do not consider the +.IR i th +operand of +.B grep +to be an option, even if it appears to be one. +A shell can put this variable in the environment for each command it runs, +specifying which operands are the results of file name wildcard +expansion and therefore should not be treated as options. +This behavior is available only with the \s-1GNU\s0 C library, and only +when +.B POSIXLY_CORRECT +is not set. .SH DIAGNOSTICS .PP Normally, exit status is 0 if matches were found, @@ -364,8 +628,9 @@ other system errors. .SH BUGS .PP Email bug reports to -.BR bug-gnu-utils@prep.ai.mit.edu . -Be sure to include the word ``grep'' somewhere in the ``Subject:'' field. +.BR bug-gnu-utils@gnu.org . +Be sure to include the word \*(lqgrep\*(rq somewhere in the +\*(lqSubject:\*(rq field. .PP Large repetition counts in the .BI { m , n } @@ -377,7 +642,5 @@ and space, and may cause to run out of memory. .PP Backreferences are very slow, and may require exponential time. -.PP -Files which have extremely long sequences of characters without newlines -may cause grep to run out of memory. An example is sparse files, which -can read as arbritarily long sequences of nul characters. +.\" Work around problems with some troff -man implementations. +.br diff --git a/gnu/usr.bin/grep/grep.c b/gnu/usr.bin/grep/grep.c index 37e6d19adfe..36c836274d3 100644 --- a/gnu/usr.bin/grep/grep.c +++ b/gnu/usr.bin/grep/grep.c @@ -1,5 +1,5 @@ /* grep.c - main driver file for grep. - Copyright (C) 1992 Free Software Foundation, Inc. + Copyright 1992, 1997-1999, 2000 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,336 +13,468 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ - Written July 1992 by Mike Haertel. */ +/* Written July 1992 by Mike Haertel. */ -#ifndef lint -static char rcsid[] = "$Id: grep.c,v 1.2 1997/08/06 23:44:11 grr Exp $"; -#endif /* not lint */ - -#include -#include - -#ifndef errno -extern int errno; +#ifdef HAVE_CONFIG_H +# include #endif - -#ifdef STDC_HEADERS -#include -#else #include -extern char *malloc(), *realloc(); -extern void free(); -#endif - -#if defined(STDC_HEADERS) || defined(HAVE_STRING_H) -#include -#ifdef NEED_MEMORY_H -#include -#endif -#else -#include -#ifdef __STDC__ -extern void *memchr(); -#else -extern char *memchr(); -#endif -#define strrchr rindex +#include +#if defined(HAVE_MMAP) +# include #endif - -#ifdef HAVE_UNISTD_H -#include -#include -#include -#else -#define O_RDONLY 0 -extern int open(), read(), close(); +#if defined(HAVE_SETRLIMIT) +# include +# include #endif - -#include "getpagesize.h" +#include +#include "system.h" +#include "getopt.h" #include "grep.h" +#include "savedir.h" #undef MAX #define MAX(A,B) ((A) > (B) ? (A) : (B)) -/* Provide missing ANSI features if necessary. */ +struct stats +{ + struct stats *parent; + struct stat stat; +}; -#ifndef HAVE_STRERROR -extern int sys_nerr; -extern char *sys_errlist[]; -#define strerror(E) ((E) < sys_nerr ? sys_errlist[(E)] : "bogus error number") -#endif +/* base of chain of stat buffers, used to detect directory loops */ +static struct stats stats_base; -#ifndef HAVE_MEMCHR -#ifdef __STDC__ -#define VOID void -#else -#define VOID char -#endif -VOID * -memchr(vp, c, n) - VOID *vp; - int c; - size_t n; +/* if non-zero, display usage information and exit */ +static int show_help; + +/* If non-zero, print the version on standard output and exit. */ +static int show_version; + +/* If nonzero, use mmap if possible. */ +static int mmap_option; + +/* Short options. */ +static char const short_options[] = +"0123456789A:B:C::EFGHIUVX:abcd:e:f:hiLlnqrsuvwxyZz"; + +/* Non-boolean long options that have no corresponding short equivalents. */ +enum { - unsigned char *p; + BINARY_FILES_OPTION = CHAR_MAX + 1 +}; + +/* Long options equivalences. */ +static struct option long_options[] = +{ + {"after-context", required_argument, NULL, 'A'}, + {"basic-regexp", no_argument, NULL, 'G'}, + {"before-context", required_argument, NULL, 'B'}, + {"binary-files", required_argument, NULL, BINARY_FILES_OPTION}, + {"byte-offset", no_argument, NULL, 'b'}, + {"context", optional_argument, NULL, 'C'}, + {"count", no_argument, NULL, 'c'}, + {"directories", required_argument, NULL, 'd'}, + {"extended-regexp", no_argument, NULL, 'E'}, + {"file", required_argument, NULL, 'f'}, + {"files-with-matches", no_argument, NULL, 'l'}, + {"files-without-match", no_argument, NULL, 'L'}, + {"fixed-regexp", no_argument, NULL, 'F'}, + {"fixed-strings", no_argument, NULL, 'F'}, + {"help", no_argument, &show_help, 1}, + {"ignore-case", no_argument, NULL, 'i'}, + {"line-number", no_argument, NULL, 'n'}, + {"line-regexp", no_argument, NULL, 'x'}, + {"mmap", no_argument, &mmap_option, 1}, + {"no-filename", no_argument, NULL, 'h'}, + {"no-messages", no_argument, NULL, 's'}, + {"null", no_argument, NULL, 'Z'}, + {"null-data", no_argument, NULL, 'z'}, + {"quiet", no_argument, NULL, 'q'}, + {"recursive", no_argument, NULL, 'r'}, + {"regexp", required_argument, NULL, 'e'}, + {"invert-match", no_argument, NULL, 'v'}, + {"silent", no_argument, NULL, 'q'}, + {"text", no_argument, NULL, 'a'}, + {"binary", no_argument, NULL, 'U'}, + {"unix-byte-offsets", no_argument, NULL, 'u'}, + {"version", no_argument, NULL, 'V'}, + {"with-filename", no_argument, NULL, 'H'}, + {"word-regexp", no_argument, NULL, 'w'}, + {0, 0, 0, 0} +}; - for (p = (unsigned char *) vp; n--; ++p) - if (*p == c) - return (VOID *) p; - return 0; -} -#endif - /* Define flags declared in grep.h. */ -char *matcher; int match_icase; int match_words; int match_lines; - -/* Functions we'll use to search. */ -static void (*compile)(); -static char *(*execute)(); +unsigned char eolbyte; /* For error messages. */ static char *prog; -static char *filename; +static char const *filename; static int errseen; +char const *matcher; + +/* How to handle directories. */ +static enum + { + READ_DIRECTORIES, + RECURSE_DIRECTORIES, + SKIP_DIRECTORIES + } directories; + +static int ck_atoi PARAMS ((char const *, int *)); +static void usage PARAMS ((int)) __attribute__((noreturn)); +static void error PARAMS ((const char *, int)); +static void setmatcher PARAMS ((char const *)); +static int install_matcher PARAMS ((char const *)); +static int prepend_args PARAMS ((char const *, char *, char **)); +static void prepend_default_options PARAMS ((char const *, int *, char ***)); +static char *page_alloc PARAMS ((size_t, char **)); +static int reset PARAMS ((int, char const *, struct stats *)); +static int fillbuf PARAMS ((size_t, struct stats *)); +static int grepbuf PARAMS ((char *, char *)); +static void prtext PARAMS ((char *, char *, int *)); +static void prpending PARAMS ((char *)); +static void prline PARAMS ((char *, char *, int)); +static void print_offset_sep PARAMS ((off_t, int)); +static void nlscan PARAMS ((char *)); +static int grep PARAMS ((int, char const *, struct stats *)); +static int grepdir PARAMS ((char const *, struct stats *)); +static int grepfile PARAMS ((char const *, struct stats *)); +#if O_BINARY +static inline int undossify_input PARAMS ((register char *, size_t)); +#endif + +/* Functions we'll use to search. */ +static void (*compile) PARAMS ((char *, size_t)); +static char *(*execute) PARAMS ((char *, size_t, char **)); + /* Print a message and possibly an error string. Remember that something awful happened. */ static void -error(mesg, errnum) -#ifdef __STDC__ - const -#endif - char *mesg; - int errnum; +error (const char *mesg, int errnum) { if (errnum) - fprintf(stderr, "%s: %s: %s\n", prog, mesg, strerror(errnum)); + fprintf (stderr, "%s: %s: %s\n", prog, mesg, strerror (errnum)); else - fprintf(stderr, "%s: %s\n", prog, mesg); + fprintf (stderr, "%s: %s\n", prog, mesg); errseen = 1; } -/* Like error(), but die horribly after printing. */ +/* Like error (), but die horribly after printing. */ void -fatal(mesg, errnum) -#ifdef __STDC__ - const -#endif - char *mesg; - int errnum; +fatal (const char *mesg, int errnum) { - error(mesg, errnum); - exit(2); + error (mesg, errnum); + exit (2); } /* Interface to handle errors and fix library lossage. */ char * -xmalloc(size) - size_t size; +xmalloc (size_t size) { char *result; - result = malloc(size); + result = malloc (size); if (size && !result) - fatal("memory exhausted", 0); + fatal (_("memory exhausted"), 0); return result; } /* Interface to handle errors and fix some library lossage. */ char * -xrealloc(ptr, size) - char *ptr; - size_t size; +xrealloc (char *ptr, size_t size) { char *result; if (ptr) - result = realloc(ptr, size); + result = realloc (ptr, size); else - result = malloc(size); + result = malloc (size); if (size && !result) - fatal("memory exhausted", 0); + fatal (_("memory exhausted"), 0); return result; } -#if !defined(HAVE_VALLOC) -#define valloc(x) malloc(x) -#define vfree(x) free(x) -#else -#ifdef __STDC__ -extern void *valloc(size_t); -#define vfree(x) ; -#else -extern char *valloc(); -#define vfree(x) ; -#endif -#endif +/* Convert STR to a positive integer, storing the result in *OUT. + If STR is not a valid integer, return -1 (otherwise 0). */ +static int +ck_atoi (char const *str, int *out) +{ + char const *p; + for (p = str; *p; p++) + if (*p < '0' || *p > '9') + return -1; + + *out = atoi (optarg); + return 0; +} -#ifndef MULT -#define MULT 5 -#endif /* Hairy buffering mechanism for grep. The intent is to keep all reads aligned on a page boundary and multiples of the page size. */ +static char *ubuffer; /* Unaligned base of buffer. */ static char *buffer; /* Base of buffer. */ static size_t bufsalloc; /* Allocated size of buffer save region. */ static size_t bufalloc; /* Total buffer size. */ +#define PREFERRED_SAVE_FACTOR 5 /* Preferred value of bufalloc / bufsalloc. */ static int bufdesc; /* File descriptor. */ static char *bufbeg; /* Beginning of user-visible stuff. */ static char *buflim; /* Limit of user-visible stuff. */ +static size_t pagesize; /* alignment of memory pages */ +static off_t bufoffset; /* Read offset; defined on regular files. */ -#if defined(HAVE_WORKING_MMAP) -#include -#include -#include - -static int bufmapped; /* True for ordinary files. */ -static struct stat bufstat; /* From fstat(). */ -static off_t bufoffset; /* What read() normally remembers. */ +#if defined(HAVE_MMAP) +static int bufmapped; /* True if buffer is memory-mapped. */ +static off_t initial_bufoffset; /* Initial value of bufoffset. */ #endif -/* Reset the buffer for a new file. Initialize - on the first time through. */ -void -reset(fd) - int fd; +/* Return VAL aligned to the next multiple of ALIGNMENT. VAL can be + an integer or a pointer. Both args must be free of side effects. */ +#define ALIGN_TO(val, alignment) \ + ((size_t) (val) % (alignment) == 0 \ + ? (val) \ + : (val) + ((alignment) - (size_t) (val) % (alignment))) + +/* Return the address of a page-aligned buffer of size SIZE, + reallocating it from *UP. Set *UP to the newly allocated (but + possibly unaligned) buffer used to build the aligned buffer. To + free the buffer, free (*UP). */ +static char * +page_alloc (size_t size, char **up) { - static int initialized; + size_t asize = size + pagesize - 1; + if (size <= asize) + { + char *p = *up ? realloc (*up, asize) : malloc (asize); + if (p) + { + *up = p; + return ALIGN_TO (p, pagesize); + } + } + return NULL; +} - if (!initialized) +/* Reset the buffer for a new file, returning zero if we should skip it. + Initialize on the first time through. */ +static int +reset (int fd, char const *file, struct stats *stats) +{ + if (pagesize) + bufsalloc = ALIGN_TO (bufalloc / PREFERRED_SAVE_FACTOR, pagesize); + else { - initialized = 1; + size_t ubufsalloc; + pagesize = getpagesize (); + if (pagesize == 0) + abort (); #ifndef BUFSALLOC - bufsalloc = MAX(8192, getpagesize()); + ubufsalloc = MAX (8192, pagesize); #else - bufsalloc = BUFSALLOC; + ubufsalloc = BUFSALLOC; #endif - bufalloc = MULT * bufsalloc; + bufsalloc = ALIGN_TO (ubufsalloc, pagesize); + bufalloc = PREFERRED_SAVE_FACTOR * bufsalloc; /* The 1 byte of overflow is a kludge for dfaexec(), which inserts a sentinel newline at the end of the buffer being searched. There's gotta be a better way... */ - buffer = valloc(bufalloc + 1); - if (!buffer) - fatal("memory exhausted", 0); - bufbeg = buffer; - buflim = buffer; + if (bufsalloc < ubufsalloc + || bufalloc / PREFERRED_SAVE_FACTOR != bufsalloc + || bufalloc + 1 < bufalloc + || ! (buffer = page_alloc (bufalloc + 1, &ubuffer))) + fatal (_("memory exhausted"), 0); } + + buflim = buffer; bufdesc = fd; -#if defined(HAVE_WORKING_MMAP) - if (fstat(fd, &bufstat) < 0 || !S_ISREG(bufstat.st_mode)) - bufmapped = 0; - else + + if (fstat (fd, &stats->stat) != 0) + { + error ("fstat", errno); + return 0; + } + if (directories == SKIP_DIRECTORIES && S_ISDIR (stats->stat.st_mode)) + return 0; + if (S_ISREG (stats->stat.st_mode)) { - bufmapped = 1; - bufoffset = lseek(fd, 0, 1); + if (file) + bufoffset = 0; + else + { + bufoffset = lseek (fd, 0, SEEK_CUR); + if (bufoffset < 0) + { + error ("lseek", errno); + return 0; + } + } +#ifdef HAVE_MMAP + initial_bufoffset = bufoffset; + bufmapped = mmap_option && bufoffset % pagesize == 0; +#endif } + else + { +#ifdef HAVE_MMAP + bufmapped = 0; #endif + } + return 1; } /* Read new stuff into the buffer, saving the specified amount of old stuff. When we're done, 'bufbeg' points to the beginning of the buffer contents, and 'buflim' - points just after the end. Return count of new stuff. */ + points just after the end. Return zero if there's an error. */ static int -fillbuf(save) - size_t save; +fillbuf (size_t save, struct stats *stats) { - char *nbuffer, *dp, *sp; - int cc; -#if defined(HAVE_WORKING_MMAP) - caddr_t maddr; -#endif - static int pagesize; + size_t fillsize = 0; + int cc = 1; + size_t readsize; - if (pagesize == 0 && (pagesize = getpagesize()) == 0) - abort(); + /* Offset from start of unaligned buffer to start of old stuff + that we want to save. */ + size_t saved_offset = buflim - ubuffer - save; - /* If the current line won't easily fit in the existing buffer - allocate a MULT larger one. This can result in running out - of memory on sparse files or those containing longs runs of - 's or other non- characters */ - - if (save > bufsalloc) - { - while (save > bufsalloc) - bufsalloc *= 2; - bufalloc = MULT * bufsalloc; - nbuffer = valloc(bufalloc + 1); - if (!nbuffer) - fatal("memory exhausted", 0); - } - else + if (bufsalloc < save) { - nbuffer = buffer; - buffer = NULL; - } + size_t aligned_save = ALIGN_TO (save, pagesize); + size_t maxalloc = (size_t) -1; + size_t newalloc; - sp = buflim - save; - dp = nbuffer + bufsalloc - save; - bufbeg = dp; - while (save--) - *dp++ = *sp++; + if (S_ISREG (stats->stat.st_mode)) + { + /* Calculate an upper bound on how much memory we should allocate. + We can't use ALIGN_TO here, since off_t might be longer than + size_t. Watch out for arithmetic overflow. */ + off_t to_be_read = stats->stat.st_size - bufoffset; + size_t slop = to_be_read % pagesize; + off_t aligned_to_be_read = to_be_read + (slop ? pagesize - slop : 0); + off_t maxalloc_off = aligned_save + aligned_to_be_read; + if (0 <= maxalloc_off && maxalloc_off == (size_t) maxalloc_off) + maxalloc = maxalloc_off; + } - /* We may have allocated a new, larger buffer. Since - there is no portable vfree(), we may just have to forget - about the old one. Sorry. */ - if (buffer != NULL) - vfree(buffer); - buffer = nbuffer; + /* Grow bufsalloc until it is at least as great as `save'; but + if there is an overflow, just grow it to the next page boundary. */ + while (bufsalloc < save) + if (bufsalloc < bufsalloc * 2) + bufsalloc *= 2; + else + { + bufsalloc = aligned_save; + break; + } -#if defined(HAVE_WORKING_MMAP) - if (bufmapped && bufoffset % pagesize == 0 - && bufstat.st_size - bufoffset >= bufalloc - bufsalloc) - { - maddr = buffer + bufsalloc; - maddr = mmap(maddr, bufalloc - bufsalloc, PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_FIXED, bufdesc, bufoffset); - if (maddr == (caddr_t) -1) + /* Grow the buffer size to be PREFERRED_SAVE_FACTOR times + bufsalloc.... */ + newalloc = PREFERRED_SAVE_FACTOR * bufsalloc; + if (maxalloc < newalloc) { - fprintf(stderr, "%s: warning: %s: %s\n", filename, - strerror(errno)); - goto tryread; + /* ... except don't grow it more than a pagesize past the + file size, as that might cause unnecessary memory + exhaustion if the file is large. */ + newalloc = maxalloc; + bufsalloc = aligned_save; } -#if 0 - /* You might thing this (or MADV_WILLNEED) would help, - but it doesn't, at least not on a Sun running 4.1. - In fact, it actually slows us down about 30%! */ - madvise(maddr, bufalloc - bufsalloc, MADV_SEQUENTIAL); -#endif - cc = bufalloc - bufsalloc; - bufoffset += cc; + + /* Check that the above calculations made progress, which might + not occur if there is arithmetic overflow. If there's no + progress, or if the new buffer size is larger than the old + and buffer reallocation fails, report memory exhaustion. */ + if (bufsalloc < save || newalloc < save + || (newalloc == save && newalloc != maxalloc) + || (bufalloc < newalloc + && ! (buffer + = page_alloc ((bufalloc = newalloc) + 1, &ubuffer)))) + fatal (_("memory exhausted"), 0); } - else + + bufbeg = buffer + bufsalloc - save; + memmove (bufbeg, ubuffer + saved_offset, save); + readsize = bufalloc - bufsalloc; + +#if defined(HAVE_MMAP) + if (bufmapped) { - tryread: - /* We come here when we're not going to use mmap() any more. - Note that we need to synchronize the file offset the - first time through. */ - if (bufmapped) + size_t mmapsize = readsize; + + /* Don't mmap past the end of the file; some hosts don't allow this. + Use `read' on the last page. */ + if (stats->stat.st_size - bufoffset < mmapsize) { + mmapsize = stats->stat.st_size - bufoffset; + mmapsize -= mmapsize % pagesize; + } + + if (mmapsize + && (mmap ((caddr_t) (buffer + bufsalloc), mmapsize, + PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_FIXED, + bufdesc, bufoffset) + != (caddr_t) -1)) + { + /* Do not bother to use madvise with MADV_SEQUENTIAL or + MADV_WILLNEED on the mmapped memory. One might think it + would help, but it slows us down about 30% on SunOS 4.1. */ + fillsize = mmapsize; + } + else + { + /* Stop using mmap on this file. Synchronize the file + offset. Do not warn about mmap failures. On some hosts + (e.g. Solaris 2.5) mmap can fail merely because some + other process has an advisory read lock on the file. + There's no point alarming the user about this misfeature. */ bufmapped = 0; - lseek(bufdesc, bufoffset, 0); + if (bufoffset != initial_bufoffset + && lseek (bufdesc, bufoffset, SEEK_SET) < 0) + { + error ("lseek", errno); + cc = 0; + } } - cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); } -#else - cc = read(bufdesc, buffer + bufsalloc, bufalloc - bufsalloc); +#endif /*HAVE_MMAP*/ + + if (! fillsize) + { + ssize_t bytesread; + while ((bytesread = read (bufdesc, buffer + bufsalloc, readsize)) < 0 + && errno == EINTR) + continue; + if (bytesread < 0) + cc = 0; + else + fillsize = bytesread; + } + + bufoffset += fillsize; +#if O_BINARY + if (fillsize) + fillsize = undossify_input (buffer + bufsalloc, fillsize); #endif - if (cc > 0) - buflim = buffer + bufsalloc + cc; - else - buflim = buffer + bufsalloc; + buflim = buffer + bufsalloc + fillsize; return cc; } /* Flags controlling the style of output. */ +static enum + { + BINARY_BINARY_FILES, + TEXT_BINARY_FILES, + WITHOUT_MATCH_BINARY_FILES + } binary_files; /* How to handle binary files. */ +static int filename_mask; /* If zero, output nulls after filenames. */ static int out_quiet; /* Suppress all normal output. */ static int out_invert; /* Print nonmatching stuff. */ static int out_file; /* Print filenames. */ @@ -350,54 +482,79 @@ static int out_line; /* Print line numbers. */ static int out_byte; /* Print byte offsets. */ static int out_before; /* Lines of leading context. */ static int out_after; /* Lines of trailing context. */ +static int count_matches; /* Count matching lines. */ +static int list_files; /* List matching files. */ +static int no_filenames; /* Suppress file names. */ +static int suppress_errors; /* Suppress diagnostics. */ /* Internal variables to keep track of byte count, context, etc. */ -static size_t totalcc; /* Total character count before bufbeg. */ +static off_t totalcc; /* Total character count before bufbeg. */ static char *lastnl; /* Pointer after last newline counted. */ static char *lastout; /* Pointer after last character output; NULL if no character has been output or if it's conceptually before bufbeg. */ -static size_t totalnl; /* Total newline count before lastnl. */ +static off_t totalnl; /* Total newline count before lastnl. */ static int pending; /* Pending lines of output. */ +static int done_on_match; /* Stop scanning file on first match */ + +#if O_BINARY +# include "dosbuf.c" +#endif static void -nlscan(lim) - char *lim; +nlscan (char *lim) { char *beg; + for (beg = lastnl; (beg = memchr (beg, eolbyte, lim - beg)); beg++) + totalnl++; + lastnl = lim; +} + +static void +print_offset_sep (off_t pos, int sep) +{ + /* Do not rely on printf to print pos, since off_t may be longer than long, + and long long is not portable. */ + + char buf[sizeof pos * CHAR_BIT]; + char *p = buf + sizeof buf - 1; + *p = sep; + + do + *--p = '0' + pos % 10; + while ((pos /= 10) != 0); - for (beg = lastnl; beg < lim; ++beg) - if (*beg == '\n') - ++totalnl; - lastnl = beg; + fwrite (p, 1, buf + sizeof buf - p, stdout); } static void -prline(beg, lim, sep) - char *beg; - char *lim; - char sep; +prline (char *beg, char *lim, int sep) { if (out_file) - printf("%s%c", filename, sep); + printf ("%s%c", filename, sep & filename_mask); if (out_line) { - nlscan(beg); - printf("%d%c", ++totalnl, sep); + nlscan (beg); + print_offset_sep (++totalnl, sep); lastnl = lim; } if (out_byte) - printf("%lu%c", totalcc + (beg - bufbeg), sep); - fwrite(beg, 1, lim - beg, stdout); - if (ferror(stdout)) - error("writing output", errno); + { + off_t pos = totalcc + (beg - bufbeg); +#if O_BINARY + pos = dossified_pos (pos); +#endif + print_offset_sep (pos, sep); + } + fwrite (beg, 1, lim - beg, stdout); + if (ferror (stdout)) + error (_("writing output"), errno); lastout = lim; } /* Print pending lines of trailing context prior to LIM. */ static void -prpending(lim) - char *lim; +prpending (char *lim) { char *nl; @@ -406,28 +563,26 @@ prpending(lim) while (pending > 0 && lastout < lim) { --pending; - if ((nl = memchr(lastout, '\n', lim - lastout)) != 0) + if ((nl = memchr (lastout, eolbyte, lim - lastout)) != 0) ++nl; else nl = lim; - prline(lastout, nl, '-'); + prline (lastout, nl, '-'); } } /* Print the lines between BEG and LIM. Deal with context crap. If NLINESP is non-null, store a count of lines between BEG and LIM. */ static void -prtext(beg, lim, nlinesp) - char *beg; - char *lim; - int *nlinesp; +prtext (char *beg, char *lim, int *nlinesp) { static int used; /* avoid printing "--" before any output */ char *bp, *p, *nl; + char eol = eolbyte; int i, n; if (!out_quiet && pending > 0) - prpending(beg); + prpending (beg); p = beg; @@ -440,17 +595,17 @@ prtext(beg, lim, nlinesp) if (p > bp) do --p; - while (p > bp && p[-1] != '\n'); + while (p > bp && p[-1] != eol); /* We only print the "--" separator if our output is discontiguous from the last output in the file. */ if ((out_before || out_after) && used && p != lastout) - puts("--"); + puts ("--"); while (p < beg) { - nl = memchr(p, '\n', beg - p); - prline(p, nl + 1, '-'); + nl = memchr (p, eol, beg - p); + prline (p, nl + 1, '-'); p = nl + 1; } } @@ -460,21 +615,21 @@ prtext(beg, lim, nlinesp) /* Caller wants a line count. */ for (n = 0; p < lim; ++n) { - if ((nl = memchr(p, '\n', lim - p)) != 0) + if ((nl = memchr (p, eol, lim - p)) != 0) ++nl; else nl = lim; if (!out_quiet) - prline(p, nl, ':'); + prline (p, nl, ':'); p = nl; } *nlinesp = n; } else if (!out_quiet) - prline(beg, lim, ':'); + prline (beg, lim, ':'); - pending = out_after; + pending = out_quiet ? 0 : out_after; used = 1; } @@ -482,51 +637,66 @@ prtext(beg, lim, nlinesp) between matching lines if OUT_INVERT is true). Return a count of lines printed. */ static int -grepbuf(beg, lim) - char *beg; - char *lim; +grepbuf (char *beg, char *lim) { int nlines, n; register char *p, *b; char *endp; + char eol = eolbyte; nlines = 0; p = beg; while ((b = (*execute)(p, lim - p, &endp)) != 0) { /* Avoid matching the empty line at the end of the buffer. */ - if (b == lim && ((b > beg && b[-1] == '\n') || b == beg)) + if (b == lim && ((b > beg && b[-1] == eol) || b == beg)) break; if (!out_invert) { - prtext(b, endp, (int *) 0); + prtext (b, endp, (int *) 0); nlines += 1; + if (done_on_match) + return nlines; } else if (p < b) { - prtext(p, b, &n); + prtext (p, b, &n); nlines += n; } p = endp; } if (out_invert && p < lim) { - prtext(p, lim, &n); + prtext (p, lim, &n); nlines += n; } return nlines; } -/* Search a given file. Return a count of lines printed. */ +/* Search a given file. Normally, return a count of lines printed; + but if the file is a directory and we search it recursively, then + return -2 if there was a match, and -1 otherwise. */ static int -grep(fd) - int fd; +grep (int fd, char const *file, struct stats *stats) { int nlines, i; + int not_text; size_t residue, save; char *beg, *lim; + char eol = eolbyte; + + if (!reset (fd, file, stats)) + return 0; - reset(fd); + if (file && directories == RECURSE_DIRECTORIES + && S_ISDIR (stats->stat.st_mode)) + { + /* Close fd now, so that we don't open a lot of file descriptors + when we recurse deeply. */ + if (close (fd) != 0) + error (file, errno); + return grepdir (file, stats) - 2; + } totalcc = 0; lastout = 0; @@ -537,27 +707,39 @@ grep(fd) residue = 0; save = 0; + if (! fillbuf (save, stats)) + { + if (! (is_EISDIR (errno, file) && suppress_errors)) + error (filename, errno); + return 0; + } + + not_text = (((binary_files == BINARY_BINARY_FILES && !out_quiet) + || binary_files == WITHOUT_MATCH_BINARY_FILES) + && memchr (bufbeg, eol ? '\0' : '\200', buflim - bufbeg)); + if (not_text && binary_files == WITHOUT_MATCH_BINARY_FILES) + return 0; + done_on_match += not_text; + out_quiet += not_text; + for (;;) { - if (fillbuf(save) < 0) - { - error(filename, errno); - return nlines; - } lastnl = bufbeg; if (lastout) lastout = bufbeg; if (buflim - bufbeg == save) break; beg = bufbeg + save - residue; - for (lim = buflim; lim > beg && lim[-1] != '\n'; --lim) + for (lim = buflim; lim > beg && lim[-1] != eol; --lim) ; residue = buflim - lim; if (beg < lim) { - nlines += grepbuf(beg, lim); + nlines += grepbuf (beg, lim); if (pending) - prpending(lim); + prpending (lim); + if (nlines && done_on_match && !out_invert) + goto finish_grep; } i = 0; beg = lim; @@ -566,82 +748,440 @@ grep(fd) ++i; do --beg; - while (beg > bufbeg && beg[-1] != '\n'); + while (beg > bufbeg && beg[-1] != eol); } if (beg != lastout) lastout = 0; save = residue + lim - beg; totalcc += buflim - bufbeg - save; if (out_line) - nlscan(beg); + nlscan (beg); + if (! fillbuf (save, stats)) + { + if (! (is_EISDIR (errno, file) && suppress_errors)) + error (filename, errno); + goto finish_grep; + } } if (residue) { - nlines += grepbuf(bufbeg + save - residue, buflim); + *buflim++ = eol; + nlines += grepbuf (bufbeg + save - residue, buflim); if (pending) - prpending(buflim); + prpending (buflim); } + + finish_grep: + done_on_match -= not_text; + out_quiet -= not_text; + if ((not_text & ~out_quiet) && nlines != 0) + printf (_("Binary file %s matches\n"), filename); return nlines; } -static char version[] = "GNU grep version 2.0"; +static int +grepfile (char const *file, struct stats *stats) +{ + int desc; + int count; + int status; + + if (! file) + { + desc = 0; + filename = _("(standard input)"); + } + else + { + while ((desc = open (file, O_RDONLY)) < 0 && errno == EINTR) + continue; + + if (desc < 0) + { + int e = errno; + + if (is_EISDIR (e, file) && directories == RECURSE_DIRECTORIES) + { + if (stat (file, &stats->stat) != 0) + { + error (file, errno); + return 1; + } + + return grepdir (file, stats); + } + + if (!suppress_errors) + { + if (directories == SKIP_DIRECTORIES) + switch (e) + { +#ifdef EISDIR + case EISDIR: + return 1; +#endif + case EACCES: + /* When skipping directories, don't worry about + directories that can't be opened. */ + if (stat (file, &stats->stat) == 0 + && S_ISDIR (stats->stat.st_mode)) + return 1; + break; + } + + error (file, e); + } + + return 1; + } + + filename = file; + } + +#if O_BINARY + /* Set input to binary mode. Pipes are simulated with files + on DOS, so this includes the case of "foo | grep bar". */ + if (!isatty (desc)) + SET_BINARY (desc); +#endif + + count = grep (desc, file, stats); + if (count < 0) + status = count + 2; + else + { + if (count_matches) + { + if (out_file) + printf ("%s%c", filename, ':' & filename_mask); + printf ("%d\n", count); + } + + status = !count; + if (list_files == 1 - 2 * status) + printf ("%s%c", filename, '\n' & filename_mask); + + if (file) + while (close (desc) != 0) + if (errno != EINTR) + { + error (file, errno); + break; + } + } + + return status; +} + +static int +grepdir (char const *dir, struct stats *stats) +{ + int status = 1; + struct stats *ancestor; + char *name_space; + + for (ancestor = stats; (ancestor = ancestor->parent) != 0; ) + if (ancestor->stat.st_ino == stats->stat.st_ino + && ancestor->stat.st_dev == stats->stat.st_dev) + { + if (!suppress_errors) + fprintf (stderr, _("%s: warning: %s: %s\n"), prog, dir, + _("recursive directory loop")); + return 1; + } + + name_space = savedir (dir, (unsigned) stats->stat.st_size); + + if (! name_space) + { + if (errno) + { + if (!suppress_errors) + error (dir, errno); + } + else + fatal (_("Memory exhausted"), 0); + } + else + { + size_t dirlen = strlen (dir); + int needs_slash = ! (dirlen == FILESYSTEM_PREFIX_LEN (dir) + || IS_SLASH (dir[dirlen - 1])); + char *file = NULL; + char *namep = name_space; + struct stats child; + child.parent = stats; + out_file += !no_filenames; + while (*namep) + { + size_t namelen = strlen (namep); + file = xrealloc (file, dirlen + 1 + namelen + 1); + strcpy (file, dir); + file[dirlen] = '/'; + strcpy (file + dirlen + needs_slash, namep); + namep += namelen + 1; + status &= grepfile (file, &child); + } + out_file -= !no_filenames; + if (file) + free (file); + free (name_space); + } + + return status; +} -#define USAGE \ - "usage: %s [-[[AB] ]] [-[CEFGVchilnqsvwx]] [-[ef]] []\n" +static void +usage (int status) +{ + if (status != 0) + { + fprintf (stderr, _("Usage: %s [OPTION]... PATTERN [FILE]...\n"), prog); + fprintf (stderr, _("Try `%s --help' for more information.\n"), prog); + } + else + { + printf (_("Usage: %s [OPTION]... PATTERN [FILE] ...\n"), prog); + printf (_("\ +Search for PATTERN in each FILE or standard input.\n\ +Example: %s -i 'hello world' menu.h main.c\n\ +\n\ +Regexp selection and interpretation:\n"), prog); + printf (_("\ + -E, --extended-regexp PATTERN is an extended regular expression\n\ + -F, --fixed-strings PATTERN is a set of newline-separated strings\n\ + -G, --basic-regexp PATTERN is a basic regular expression\n")); + printf (_("\ + -e, --regexp=PATTERN use PATTERN as a regular expression\n\ + -f, --file=FILE obtain PATTERN from FILE\n\ + -i, --ignore-case ignore case distinctions\n\ + -w, --word-regexp force PATTERN to match only whole words\n\ + -x, --line-regexp force PATTERN to match only whole lines\n\ + -z, --null-data a data line ends in 0 byte, not newline\n")); + printf (_("\ +\n\ +Miscellaneous:\n\ + -s, --no-messages suppress error messages\n\ + -v, --invert-match select non-matching lines\n\ + -V, --version print version information and exit\n\ + --help display this help and exit\n\ + --mmap use memory-mapped input if possible\n")); + printf (_("\ +\n\ +Output control:\n\ + -b, --byte-offset print the byte offset with output lines\n\ + -n, --line-number print line number with output lines\n\ + -H, --with-filename print the filename for each match\n\ + -h, --no-filename suppress the prefixing filename on output\n\ + -q, --quiet, --silent suppress all normal output\n\ + --binary-files=TYPE assume that binary files are TYPE\n\ + TYPE is 'binary', 'text', or 'without-match'.\n\ + -a, --text equivalent to --binary-files=text\n\ + -I equivalent to --binary-files=without-match\n\ + -d, --directories=ACTION how to handle directories\n\ + ACTION is 'read', 'recurse', or 'skip'.\n\ + -r, --recursive equivalent to --directories=recurse.\n\ + -L, --files-without-match only print FILE names containing no match\n\ + -l, --files-with-matches only print FILE names containing matches\n\ + -c, --count only print a count of matching lines per FILE\n\ + -Z, --null print 0 byte after FILE name\n")); + printf (_("\ +\n\ +Context control:\n\ + -B, --before-context=NUM print NUM lines of leading context\n\ + -A, --after-context=NUM print NUM lines of trailing context\n\ + -C, --context[=NUM] print NUM (default 2) lines of output context\n\ + unless overridden by -A or -B\n\ + -NUM same as --context=NUM\n\ + -U, --binary do not strip CR characters at EOL (MSDOS)\n\ + -u, --unix-byte-offsets report offsets as if CRs were not there (MSDOS)\n\ +\n\ +`egrep' means `grep -E'. `fgrep' means `grep -F'.\n\ +With no FILE, or when FILE is -, read standard input. If less than\n\ +two FILEs given, assume -h. Exit status is 0 if match, 1 if no match,\n\ +and 2 if trouble.\n")); + printf (_("\nReport bugs to .\n")); + } + exit (status); +} +/* Set the matcher to M, reporting any conflicts. */ static void -usage() +setmatcher (char const *m) { - fprintf(stderr, USAGE, prog); - exit(2); + if (matcher && strcmp (matcher, m) != 0) + fatal (_("conflicting matchers specified"), 0); + matcher = m; } /* Go through the matchers vector and look for the specified matcher. If we find it, install it in compile and execute, and return 1. */ -int -setmatcher(name) - char *name; +static int +install_matcher (char const *name) { int i; +#ifdef HAVE_SETRLIMIT + struct rlimit rlim; +#endif for (i = 0; matchers[i].name; ++i) - if (strcmp(name, matchers[i].name) == 0) + if (strcmp (name, matchers[i].name) == 0) { compile = matchers[i].compile; execute = matchers[i].execute; +#if HAVE_SETRLIMIT && defined(RLIMIT_STACK) + /* I think every platform needs to do this, so that regex.c + doesn't oveflow the stack. The default value of + `re_max_failures' is too large for some platforms: it needs + more than 3MB-large stack. + + The test for HAVE_SETRLIMIT should go into `configure'. */ + if (!getrlimit (RLIMIT_STACK, &rlim)) + { + long newlim; + extern long int re_max_failures; /* from regex.c */ + + /* Approximate the amount regex.c needs, plus some more. */ + newlim = re_max_failures * 2 * 20 * sizeof (char *); + if (newlim > rlim.rlim_max) + { + newlim = rlim.rlim_max; + re_max_failures = newlim / (2 * 20 * sizeof (char *)); + } + if (rlim.rlim_cur < newlim) + rlim.rlim_cur = newlim; + + setrlimit (RLIMIT_STACK, &rlim); + } +#endif return 1; } return 0; -} +} + +/* Find the white-space-separated options specified by OPTIONS, and + using BUF to store copies of these options, set ARGV[0], ARGV[1], + etc. to the option copies. Return the number N of options found. + Do not set ARGV[N] to NULL. If ARGV is NULL, do not store ARGV[0] + etc. Backslash can be used to escape whitespace (and backslashes). */ +static int +prepend_args (char const *options, char *buf, char **argv) +{ + char const *o = options; + char *b = buf; + int n = 0; + + for (;;) + { + while (ISSPACE ((unsigned char) *o)) + o++; + if (!*o) + return n; + if (argv) + argv[n] = b; + n++; + + do + if ((*b++ = *o++) == '\\' && *o) + b[-1] = *o++; + while (*o && ! ISSPACE ((unsigned char) *o)); + + *b++ = '\0'; + } +} + +/* Prepend the whitespace-separated options in OPTIONS to the argument + vector of a main program with argument count *PARGC and argument + vector *PARGV. */ +static void +prepend_default_options (char const *options, int *pargc, char ***pargv) +{ + if (options) + { + char *buf = xmalloc (strlen (options) + 1); + int prepended = prepend_args (options, buf, (char **) NULL); + int argc = *pargc; + char * const *argv = *pargv; + char **pp = (char **) xmalloc ((prepended + argc + 1) * sizeof *pp); + *pargc = prepended + argc; + *pargv = pp; + *pp++ = *argv++; + pp += prepend_args (options, buf, pp); + while ((*pp++ = *argv++)) + continue; + } +} int -main(argc, argv) - int argc; - char *argv[]; +main (int argc, char **argv) { char *keys; size_t keycc, oldcc, keyalloc; - int keyfound, count_matches, no_filenames, list_files, suppress_errors; - int opt, cc, desc, count, status; + int with_filenames; + int opt, cc, status; + int default_context; + unsigned digit_args_val; FILE *fp; extern char *optarg; extern int optind; + initialize_main (&argc, &argv); prog = argv[0]; - if (prog && strrchr(prog, '/')) - prog = strrchr(prog, '/') + 1; + if (prog && strrchr (prog, '/')) + prog = strrchr (prog, '/') + 1; + +#if defined(__MSDOS__) || defined(_WIN32) + /* DOS and MS-Windows use backslashes as directory separators, and usually + have an .exe suffix. They also have case-insensitive filesystems. */ + if (prog) + { + char *p = prog; + char *bslash = strrchr (argv[0], '\\'); + + if (bslash && bslash >= prog) /* for mixed forward/backslash case */ + prog = bslash + 1; + else if (prog == argv[0] + && argv[0][0] && argv[0][1] == ':') /* "c:progname" */ + prog = argv[0] + 2; + + /* Collapse the letter-case, so `strcmp' could be used hence. */ + for ( ; *p; p++) + if (*p >= 'A' && *p <= 'Z') + *p += 'a' - 'A'; + + /* Remove the .exe extension, if any. */ + if ((p = strrchr (prog, '.')) && strcmp (p, ".exe") == 0) + *p = '\0'; + } +#endif keys = NULL; keycc = 0; - keyfound = 0; - count_matches = 0; - no_filenames = 0; - list_files = 0; - suppress_errors = 0; - matcher = NULL; - - while ((opt = getopt(argc, argv, "0123456789A:B:CEFGVX:bce:f:hiLlnoqsvwxy")) - != EOF) + with_filenames = 0; + eolbyte = '\n'; + filename_mask = ~0; + + /* The value -1 means to use DEFAULT_CONTEXT. */ + out_after = out_before = -1; + /* Default before/after context: chaged by -C/-NUM options */ + default_context = 0; + /* Accumulated value of individual digits in a -NUM option */ + digit_args_val = 0; + + +/* Internationalization. */ +#if HAVE_SETLOCALE + setlocale (LC_ALL, ""); +#endif +#if ENABLE_NLS + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); +#endif + + prepend_default_options (getenv ("GREP_OPTIONS"), &argc, &argv); + + while ((opt = getopt_long (argc, argv, short_options, long_options, NULL)) + != -1) switch (opt) { case '0': @@ -654,44 +1194,67 @@ main(argc, argv) case '7': case '8': case '9': - out_before = 10 * out_before + opt - '0'; - out_after = 10 * out_after + opt - '0'; + digit_args_val = 10 * digit_args_val + opt - '0'; + default_context = digit_args_val; break; case 'A': - out_after = atoi(optarg); - if (out_after < 0) - usage(); + if (optarg) + { + if (ck_atoi (optarg, &out_after)) + fatal (_("invalid context length argument"), 0); + } break; case 'B': - out_before = atoi(optarg); - if (out_before < 0) - usage(); + if (optarg) + { + if (ck_atoi (optarg, &out_before)) + fatal (_("invalid context length argument"), 0); + } break; case 'C': - out_before = out_after = 2; + /* Set output match context, but let any explicit leading or + trailing amount specified with -A or -B stand. */ + if (optarg) + { + if (ck_atoi (optarg, &default_context)) + fatal (_("invalid context length argument"), 0); + } + else + default_context = 2; break; case 'E': - if (matcher && strcmp(matcher, "egrep") != 0) - fatal("you may specify only one of -E, -F, or -G", 0); - matcher = "posix-egrep"; + setmatcher ("egrep"); break; case 'F': - if (matcher && strcmp(matcher, "fgrep") != 0) - fatal("you may specify only one of -E, -F, or -G", 0);; - matcher = "fgrep"; + setmatcher ("fgrep"); break; case 'G': - if (matcher && strcmp(matcher, "grep") != 0) - fatal("you may specify only one of -E, -F, or -G", 0); - matcher = "grep"; + setmatcher ("grep"); + break; + case 'H': + with_filenames = 1; + break; + case 'I': + binary_files = WITHOUT_MATCH_BINARY_FILES; + break; + case 'U': +#if O_BINARY + dos_use_file_type = DOS_BINARY; +#endif + break; + case 'u': +#if O_BINARY + dos_report_unix_offset = 1; +#endif break; case 'V': - fprintf(stderr, "%s\n", version); + show_version = 1; break; case 'X': - if (matcher) - fatal("matcher already specified", 0); - matcher = optarg; + setmatcher (optarg); + break; + case 'a': + binary_files = TEXT_BINARY_FILES; break; case 'b': out_byte = 1; @@ -700,38 +1263,43 @@ main(argc, argv) out_quiet = 1; count_matches = 1; break; + case 'd': + if (strcmp (optarg, "read") == 0) + directories = READ_DIRECTORIES; + else if (strcmp (optarg, "skip") == 0) + directories = SKIP_DIRECTORIES; + else if (strcmp (optarg, "recurse") == 0) + directories = RECURSE_DIRECTORIES; + else + fatal (_("unknown directories method"), 0); + break; case 'e': - cc = strlen(optarg); - keys = xrealloc(keys, keycc + cc + 1); - if (keyfound) - keys[keycc++] = '\n'; - strcpy(&keys[keycc], optarg); + cc = strlen (optarg); + keys = xrealloc (keys, keycc + cc + 1); + strcpy (&keys[keycc], optarg); keycc += cc; - keyfound = 1; + keys[keycc++] = '\n'; break; case 'f': - fp = strcmp(optarg, "-") != 0 ? fopen(optarg, "r") : stdin; + fp = strcmp (optarg, "-") != 0 ? fopen (optarg, "r") : stdin; if (!fp) - fatal(optarg, errno); - for (keyalloc = 1; keyalloc <= keycc; keyalloc *= 2) + fatal (optarg, errno); + for (keyalloc = 1; keyalloc <= keycc + 1; keyalloc *= 2) ; - keys = xrealloc(keys, keyalloc); + keys = xrealloc (keys, keyalloc); oldcc = keycc; - if (keyfound) - keys[keycc++] = '\n'; - while (!feof(fp) - && (cc = fread(keys + keycc, 1, keyalloc - keycc, fp)) > 0) + while (!feof (fp) + && (cc = fread (keys + keycc, 1, keyalloc - 1 - keycc, fp)) > 0) { keycc += cc; - if (keycc == keyalloc) - keys = xrealloc(keys, keyalloc *= 2); + if (keycc == keyalloc - 1) + keys = xrealloc (keys, keyalloc *= 2); } if (fp != stdin) fclose(fp); - /* Nuke the final newline to avoid matching a null string. */ - if (keycc - oldcc > 0 && keys[keycc - 1] == '\n') - --keycc; - keyfound = 1; + /* Append final newline if file ended in non-newline. */ + if (oldcc != keycc && keys[keycc - 1] != '\n') + keys[keycc++] = '\n'; break; case 'h': no_filenames = 1; @@ -745,20 +1313,23 @@ main(argc, argv) Inspired by the same option in Hume's gre. */ out_quiet = 1; list_files = -1; + done_on_match = 1; break; case 'l': out_quiet = 1; list_files = 1; + done_on_match = 1; break; case 'n': out_line = 1; break; - case 'o': - out_file = 1; - break; case 'q': + done_on_match = 1; out_quiet = 1; break; + case 'r': + directories = RECURSE_DIRECTORIES; + break; case 's': suppress_errors = 1; break; @@ -771,80 +1342,104 @@ main(argc, argv) case 'x': match_lines = 1; break; + case 'Z': + filename_mask = 0; + break; + case 'z': + eolbyte = '\0'; + break; + case BINARY_FILES_OPTION: + if (strcmp (optarg, "binary") == 0) + binary_files = BINARY_BINARY_FILES; + else if (strcmp (optarg, "text") == 0) + binary_files = TEXT_BINARY_FILES; + else if (strcmp (optarg, "without-match") == 0) + binary_files = WITHOUT_MATCH_BINARY_FILES; + else + fatal (_("unknown binary-files type"), 0); + break; + case 0: + /* long options */ + break; default: - usage(); + usage (2); break; } - if (!keyfound) + if (out_after < 0) + out_after = default_context; + if (out_before < 0) + out_before = default_context; + + if (! matcher) + matcher = prog; + + if (show_version) + { + printf (_("%s (GNU grep) %s\n"), matcher, VERSION); + printf ("\n"); + printf (_("\ +Copyright 1988, 1992-1999, 2000 Free Software Foundation, Inc.\n")); + printf (_("\ +This is free software; see the source for copying conditions. There is NO\n\ +warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n")); + printf ("\n"); + exit (0); + } + + if (show_help) + usage (0); + + if (keys) + { + if (keycc == 0) + /* No keys were specified (e.g. -f /dev/null). Match nothing. */ + out_invert ^= 1; + else + /* Strip trailing newline. */ + --keycc; + } + else if (optind < argc) { keys = argv[optind++]; - keycc = strlen(keys); + keycc = strlen (keys); } else - usage(); - - if (!matcher) - matcher = prog; + usage (2); - if (!setmatcher(matcher) && !setmatcher("default")) - abort(); + if (!install_matcher (matcher) && !install_matcher ("default")) + abort (); (*compile)(keys, keycc); - if (argc - optind > 1 && !no_filenames) + if ((argc - optind > 1 && !no_filenames) || with_filenames) out_file = 1; - status = 1; +#if O_BINARY + /* Output is set to binary mode because we shouldn't convert + NL to CR-LF pairs, especially when grepping binary files. */ + if (!isatty (1)) + SET_BINARY (1); +#endif + if (optind < argc) - while (optind < argc) - { - desc = strcmp(argv[optind], "-") ? open(argv[optind], O_RDONLY) : 0; - if (desc < 0) - { - if (!suppress_errors) - error(argv[optind], errno); - } - else - { - filename = desc == 0 ? "(standard input)" : argv[optind]; - count = grep(desc); - if (count_matches) - { - if (out_file) - printf("%s:", filename); - printf("%d\n", count); - } - if (count) - { - status = 0; - if (list_files == 1) - printf("%s\n", filename); - } - else if (list_files == -1) - printf("%s\n", filename); - } - if (desc != 0) - close(desc); - ++optind; - } - else { - filename = "(standard input)"; - count = grep(0); - if (count_matches) - printf("%d\n", count); - if (count) + status = 1; + do { - status = 0; - if (list_files == 1) - printf("(standard input)\n"); + char *file = argv[optind]; + status &= grepfile (strcmp (file, "-") == 0 ? (char *) NULL : file, + &stats_base); } - else if (list_files == -1) - printf("(standard input)\n"); + while ( ++optind < argc); } + else + status = grepfile ((char *) NULL, &stats_base); + + if (fclose (stdout) == EOF) + error (_("writing output"), errno); - exit(errseen ? 2 : status); + exit (errseen ? 2 : status); } diff --git a/gnu/usr.bin/grep/grep.h b/gnu/usr.bin/grep/grep.h index 1577d998fe1..13f55a230f1 100644 --- a/gnu/usr.bin/grep/grep.h +++ b/gnu/usr.bin/grep/grep.h @@ -1,5 +1,5 @@ /* grep.h - interface to grep driver for searching subroutines. - Copyright (C) 1992 Free Software Foundation, Inc. + Copyright (C) 1992, 1998 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,14 +13,16 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ - $Id: grep.h,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $ -*/ - -#if __STDC__ +#if __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 6) || __STRICT_ANSI__ +# define __attribute__(x) +#endif -extern void fatal(const char *, int); +extern void fatal PARAMS ((const char *, int)) __attribute__((noreturn)); +extern char *xmalloc PARAMS ((size_t size)); +extern char *xrealloc PARAMS ((char *ptr, size_t size)); /* Grep.c expects the matchers vector to be terminated by an entry with a NULL name, and to contain at least @@ -29,28 +31,16 @@ extern void fatal(const char *, int); extern struct matcher { char *name; - void (*compile)(char *, size_t); - char *(*execute)(char *, size_t, char **); + void (*compile) PARAMS ((char *, size_t)); + char *(*execute) PARAMS ((char *, size_t, char **)); } matchers[]; -#else - -extern void fatal(); - -extern struct matcher -{ - char *name; - void (*compile)(); - char *(*execute)(); -} matchers[]; - -#endif - -/* Exported from grep.c. */ -extern char *matcher; +/* Exported from fgrepmat.c, egrepmat.c, grepmat.c. */ +extern char const *matcher; /* The following flags are exported from grep for the matchers to look at. */ extern int match_icase; /* -i */ extern int match_words; /* -w */ extern int match_lines; /* -x */ +extern unsigned char eolbyte; /* -z */ diff --git a/gnu/usr.bin/grep/kwset.c b/gnu/usr.bin/grep/kwset.c index bd4b5665294..61eff7bf72b 100644 --- a/gnu/usr.bin/grep/kwset.c +++ b/gnu/usr.bin/grep/kwset.c @@ -1,10 +1,9 @@ /* kwset.c - search for any of a set of keywords. - Copyright 1989 Free Software Foundation - Written August 1989 by Mike Haertel. + Copyright 1989, 1998, 2000 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) + the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -14,15 +13,13 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ +/* Written August 1989 by Mike Haertel. The author may be reached (Email) at the address mike@ai.mit.edu, or (US mail) as Mike Haertel c/o Free Software Foundation. */ -#ifndef lint -static char rcsid[] = "$Id: kwset.c,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $"; -#endif /* not lint */ - /* The algorithm implemented by these routines bears a startling resemblence to one discovered by Beate Commentz-Walter, although it is not identical. See "A String Matching Algorithm Fast on the Average," Technical Report, @@ -31,43 +28,20 @@ static char rcsid[] = "$Id: kwset.c,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $" String Matching: An Aid to Bibliographic Search," CACM June 1975, Vol. 18, No. 6, which describes the failure function used below. */ - -#ifdef STDC_HEADERS -#include -#include -#else -#define INT_MAX 2147483647 -#define UCHAR_MAX 255 -#ifdef __STDC__ -#include -#else -#include -#endif -extern char *malloc(); -extern void free(); -#endif - -#ifdef HAVE_MEMCHR -#include -#ifdef NEED_MEMORY_H -#include -#endif -#else -#ifdef __STDC__ -extern void *memchr(); -#else -extern char *memchr(); -#endif +#ifdef HAVE_CONFIG_H +# include #endif +#include +#include "system.h" +#include "kwset.h" +#include "obstack.h" #ifdef GREP extern char *xmalloc(); -#define malloc xmalloc +# undef malloc +# define malloc xmalloc #endif -#include "kwset.h" -#include "obstack.h" - #define NCHAR (UCHAR_MAX + 1) #define obstack_chunk_alloc malloc #define obstack_chunk_free free @@ -110,11 +84,19 @@ struct kwset char *trans; /* Character translation table. */ }; +/* prototypes */ +static void enqueue PARAMS((struct tree *, struct trie **)); +static void treefails PARAMS((register struct tree *, struct trie *, struct trie *)); +static void treedelta PARAMS((register struct tree *,register unsigned int, unsigned char *)); +static int hasevery PARAMS((register struct tree *, register struct tree *)); +static void treenext PARAMS((struct tree *, struct trie **)); +static char * bmexec PARAMS((kwset_t, char *, size_t)); +static char * cwexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *)); + /* Allocate and initialize a keyword set object, returning an opaque pointer to it. Return NULL if memory is not available. */ kwset_t -kwsalloc(trans) - char *trans; +kwsalloc (char *trans) { struct kwset *kwset; @@ -149,10 +131,7 @@ kwsalloc(trans) /* Add the given string to the contents of the keyword set. Return NULL for success, an error message otherwise. */ char * -kwsincr(kws, text, len) - kwset_t kws; - char *text; - size_t len; +kwsincr (kwset_t kws, char *text, size_t len) { struct kwset *kwset; register struct trie *trie; @@ -198,13 +177,13 @@ kwsincr(kws, text, len) link = (struct tree *) obstack_alloc(&kwset->obstack, sizeof (struct tree)); if (!link) - return "memory exhausted"; + return _("memory exhausted"); link->llink = 0; link->rlink = 0; link->trie = (struct trie *) obstack_alloc(&kwset->obstack, sizeof (struct trie)); if (!link->trie) - return "memory exhausted"; + return _("memory exhausted"); link->trie->accepting = 0; link->trie->links = 0; link->trie->parent = trie; @@ -253,6 +232,8 @@ kwsincr(kws, text, len) r->balance = t->balance != (char) -1 ? 0 : 1; t->balance = 0; break; + default: + abort (); } break; case 2: @@ -271,8 +252,12 @@ kwsincr(kws, text, len) r->balance = t->balance != (char) -1 ? 0 : 1; t->balance = 0; break; + default: + abort (); } break; + default: + abort (); } if (dirs[depth - 1] == L) @@ -303,9 +288,7 @@ kwsincr(kws, text, len) /* Enqueue the trie nodes referenced from the given tree in the given queue. */ static void -enqueue(tree, last) - struct tree *tree; - struct trie **last; +enqueue (struct tree *tree, struct trie **last) { if (!tree) return; @@ -318,10 +301,7 @@ enqueue(tree, last) from the given tree, given the failure function for their parent as well as a last resort failure node. */ static void -treefails(tree, fail, recourse) - register struct tree *tree; - struct trie *fail; - struct trie *recourse; +treefails (register struct tree *tree, struct trie *fail, struct trie *recourse) { register struct tree *link; @@ -355,10 +335,9 @@ treefails(tree, fail, recourse) /* Set delta entries for the links of the given tree such that the preexisting delta value is larger than the current depth. */ static void -treedelta(tree, depth, delta) - register struct tree *tree; - register unsigned int depth; - unsigned char delta[]; +treedelta (register struct tree *tree, + register unsigned int depth, + unsigned char delta[]) { if (!tree) return; @@ -370,9 +349,7 @@ treedelta(tree, depth, delta) /* Return true if A has every label in B. */ static int -hasevery(a, b) - register struct tree *a; - register struct tree *b; +hasevery (register struct tree *a, register struct tree *b) { if (!b) return 1; @@ -391,9 +368,7 @@ hasevery(a, b) /* Compute a vector, indexed by character code, of the trie nodes referenced from the given tree. */ static void -treenext(tree, next) - struct tree *tree; - struct trie *next[]; +treenext (struct tree *tree, struct trie *next[]) { if (!tree) return; @@ -405,8 +380,7 @@ treenext(tree, next) /* Compute the shift for each trie node, as well as the delta table and next cache for the given keyword set. */ char * -kwsprep(kws) - kwset_t kws; +kwsprep (kwset_t kws) { register struct kwset *kwset; register int i; @@ -524,10 +498,7 @@ kwsprep(kws) /* Fast boyer-moore search. */ static char * -bmexec(kws, text, size) - kwset_t kws; - char *text; - size_t size; +bmexec (kwset_t kws, char *text, size_t size) { struct kwset *kwset; register unsigned char *d1; @@ -595,7 +566,7 @@ bmexec(kws, text, size) d = d1[U((tp += d)[-1])]; if (d != 0) continue; - if (tp[-2] == gc) + if (U(tp[-2]) == gc) { for (i = 3; i <= len && U(tp[-i]) == U(sp[-i]); ++i) ; @@ -610,11 +581,7 @@ bmexec(kws, text, size) /* Hairy multiple string search. */ static char * -cwexec(kws, text, len, kwsmatch) - kwset_t kws; - char *text; - size_t len; - struct kwsmatch *kwsmatch; +cwexec (kwset_t kws, char *text, size_t len, struct kwsmatch *kwsmatch) { struct kwset *kwset; struct trie **next, *trie, *accept; @@ -625,6 +592,10 @@ cwexec(kws, text, len, kwsmatch) register struct tree *tree; register char *trans; +#ifdef lint + accept = NULL; +#endif + /* Initialize register copies and look for easy ways out. */ kwset = (struct kwset *) kws; if (len < kwset->mind) @@ -762,7 +733,7 @@ cwexec(kws, text, len, kwsmatch) } return mch; } - + /* Search through the given text for a match of any member of the given keyword set. Return a pointer to the first character of the matching substring, or NULL if no match is found. If FOUNDLEN @@ -771,11 +742,7 @@ cwexec(kws, text, len, kwsmatch) in the referenced location the index number of the particular keyword matched. */ char * -kwsexec(kws, text, size, kwsmatch) - kwset_t kws; - char *text; - size_t size; - struct kwsmatch *kwsmatch; +kwsexec (kwset_t kws, char *text, size_t size, struct kwsmatch *kwsmatch) { struct kwset *kwset; char *ret; @@ -798,8 +765,7 @@ kwsexec(kws, text, size, kwsmatch) /* Free the components of the given keyword set. */ void -kwsfree(kws) - kwset_t kws; +kwsfree (kwset_t kws) { struct kwset *kwset; diff --git a/gnu/usr.bin/grep/kwset.h b/gnu/usr.bin/grep/kwset.h index 48654b53e21..e699258019d 100644 --- a/gnu/usr.bin/grep/kwset.h +++ b/gnu/usr.bin/grep/kwset.h @@ -1,10 +1,9 @@ /* kwset.h - header declaring the keyword set library. - Copyright 1989 Free Software Foundation - Written August 1989 by Mike Haertel. + Copyright (C) 1989, 1998 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 1, or (at your option) + the Free Software Foundation; either version 2, or (at your option) any later version. This program is distributed in the hope that it will be useful, @@ -14,13 +13,12 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ +/* Written August 1989 by Mike Haertel. The author may be reached (Email) at the address mike@ai.mit.edu, - or (US mail) as Mike Haertel c/o Free Software Foundation. - - $Id: kwset.h,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $ -*/ + or (US mail) as Mike Haertel c/o Free Software Foundation. */ struct kwsmatch { @@ -29,24 +27,22 @@ struct kwsmatch size_t size[1]; /* Length of each submatch. */ }; -#if __STDC__ - -typedef void *kwset_t; +typedef ptr_t kwset_t; /* Return an opaque pointer to a newly allocated keyword set, or NULL if enough memory cannot be obtained. The argument if non-NULL specifies a table of character translations to be applied to all pattern and search text. */ -extern kwset_t kwsalloc(char *); +extern kwset_t kwsalloc PARAMS((char *)); /* Incrementally extend the keyword set to include the given string. Return NULL for success, or an error message. Remember an index number for each keyword included in the set. */ -extern char *kwsincr(kwset_t, char *, size_t); +extern char *kwsincr PARAMS((kwset_t, char *, size_t)); /* When the keyword set has been completely built, prepare it for use. Return NULL for success, or an error message. */ -extern char *kwsprep(kwset_t); +extern char *kwsprep PARAMS((kwset_t)); /* Search through the given buffer for a member of the keyword set. Return a pointer to the leftmost longest match found, or NULL if @@ -54,19 +50,8 @@ extern char *kwsprep(kwset_t); the matching substring in the integer it points to. Similarly, if foundindex is non-NULL, store the index of the particular keyword found therein. */ -extern char *kwsexec(kwset_t, char *, size_t, struct kwsmatch *); +extern char *kwsexec PARAMS((kwset_t, char *, size_t, struct kwsmatch *)); /* Deallocate the given keyword set and all its associated storage. */ -extern void kwsfree(kwset_t); - -#else - -typedef char *kwset_t; - -extern kwset_t kwsalloc(); -extern char *kwsincr(); -extern char *kwsprep(); -extern char *kwsexec(); -extern void kwsfree(); +extern void kwsfree PARAMS((kwset_t)); -#endif diff --git a/gnu/usr.bin/grep/obstack.c b/gnu/usr.bin/grep/obstack.c index 30873b53ce9..17c63134315 100644 --- a/gnu/usr.bin/grep/obstack.c +++ b/gnu/usr.bin/grep/obstack.c @@ -1,41 +1,57 @@ /* obstack.c - subroutines used implicitly by object stack macros - Copyright (C) 1988, 1993 Free Software Foundation, Inc. + Copyright (C) 1988-1994,96,97,98,99 Free Software Foundation, Inc. -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. + This file is part of the GNU C Library. Its master source is NOT part of + the C library, however. The master source lives in /gd/gnu/lib. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */ + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. -#ifndef lint -static char rcsid[] = "$Id: obstack.c,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $"; -#endif /* not lint */ + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ + +#ifdef HAVE_CONFIG_H +#include +#endif #include "obstack.h" -/* This is just to get __GNU_LIBRARY__ defined. */ -#include +/* NOTE BEFORE MODIFYING THIS FILE: This version number must be + incremented whenever callers compiled using an old obstack.h can no + longer properly call the functions in this obstack.c. */ +#define OBSTACK_INTERFACE_VERSION 1 /* Comment out all this code if we are using the GNU C Library, and are not - actually compiling the library itself. This code is part of the GNU C - Library, but also included in many other GNU distributions. Compiling + actually compiling the library itself, and the installed library + supports the same library interface we do. This code is part of the GNU + C Library, but also included in many other GNU distributions. Compiling and linking in this code is a waste when using the GNU C library (especially if it is a shared library). Rather than having every GNU - program understand `configure --with-gnu-libc' and omit the object files, - it is simpler to just do this in the source for each such file. */ + program understand `configure --with-gnu-libc' and omit the object + files, it is simpler to just do this in the source for each such file. */ + +#include /* Random thing to get __GNU_LIBRARY__. */ +#if !defined (_LIBC) && defined (__GNU_LIBRARY__) && __GNU_LIBRARY__ > 1 +#include +#if _GNU_OBSTACK_INTERFACE_VERSION == OBSTACK_INTERFACE_VERSION +#define ELIDE_CODE +#endif +#endif -#if defined (_LIBC) || !defined (__GNU_LIBRARY__) +#ifndef ELIDE_CODE -#ifdef __STDC__ + +#if defined (__STDC__) && __STDC__ #define POINTER void * #else #define POINTER char * @@ -44,7 +60,7 @@ static char rcsid[] = "$Id: obstack.c,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp /* Determine default alignment. */ struct fooalign {char x; double d;}; #define DEFAULT_ALIGNMENT \ - ((PTR_INT_TYPE) ((char *)&((struct fooalign *) 0)->d - (char *)0)) + ((PTR_INT_TYPE) ((char *) &((struct fooalign *) 0)->d - (char *) 0)) /* If malloc were really smart, it would round addresses to DEFAULT_ALIGNMENT. But in fact it might be less smart and round addresses to as much as DEFAULT_ROUNDING. So we prepare for it to do that. */ @@ -59,6 +75,30 @@ union fooround {long x; double d;}; #define COPYING_UNIT int #endif + +/* The functions allocating more room by calling `obstack_chunk_alloc' + jump to the handler pointed to by `obstack_alloc_failed_handler'. + This can be set to a user defined function which should either + abort gracefully or use longjump - but shouldn't return. This + variable by default points to the internal function + `print_and_abort'. */ +#if defined (__STDC__) && __STDC__ +static void print_and_abort (void); +void (*obstack_alloc_failed_handler) (void) = print_and_abort; +#else +static void print_and_abort (); +void (*obstack_alloc_failed_handler) () = print_and_abort; +#endif + +/* Exit value used when `print_and_abort' is used. */ +#if defined __GNU_LIBRARY__ || defined HAVE_STDLIB_H +#include +#endif +#ifndef EXIT_FAILURE +#define EXIT_FAILURE 1 +#endif +int obstack_exit_failure = EXIT_FAILURE; + /* The non-GNU-C macros copy the obstack into this global variable to avoid multiple evaluation. */ @@ -70,37 +110,60 @@ struct obstack *_obstack; For free, do not use ?:, since some compilers, like the MIPS compilers, do not allow (expr) ? void : void. */ +#if defined (__STDC__) && __STDC__ #define CALL_CHUNKFUN(h, size) \ (((h) -> use_extra_arg) \ ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \ - : (*(h)->chunkfun) ((size))) + : (*(struct _obstack_chunk *(*) (long)) (h)->chunkfun) ((size))) #define CALL_FREEFUN(h, old_chunk) \ do { \ if ((h) -> use_extra_arg) \ (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \ else \ - (*(h)->freefun) ((old_chunk)); \ + (*(void (*) (void *)) (h)->freefun) ((old_chunk)); \ } while (0) +#else +#define CALL_CHUNKFUN(h, size) \ + (((h) -> use_extra_arg) \ + ? (*(h)->chunkfun) ((h)->extra_arg, (size)) \ + : (*(struct _obstack_chunk *(*) ()) (h)->chunkfun) ((size))) + +#define CALL_FREEFUN(h, old_chunk) \ + do { \ + if ((h) -> use_extra_arg) \ + (*(h)->freefun) ((h)->extra_arg, (old_chunk)); \ + else \ + (*(void (*) ()) (h)->freefun) ((old_chunk)); \ + } while (0) +#endif /* Initialize an obstack H for use. Specify chunk size SIZE (0 means default). Objects start on multiples of ALIGNMENT (0 means use default). CHUNKFUN is the function to use to allocate chunks, - and FREEFUN the function to free them. */ + and FREEFUN the function to free them. -void + Return nonzero if successful, calls obstack_alloc_failed_handler if + allocation fails. */ + +int _obstack_begin (h, size, alignment, chunkfun, freefun) struct obstack *h; int size; int alignment; +#if defined (__STDC__) && __STDC__ + POINTER (*chunkfun) (long); + void (*freefun) (void *); +#else POINTER (*chunkfun) (); void (*freefun) (); +#endif { - register struct _obstack_chunk* chunk; /* points to new chunk */ + register struct _obstack_chunk *chunk; /* points to new chunk */ if (alignment == 0) - alignment = DEFAULT_ALIGNMENT; + alignment = (int) DEFAULT_ALIGNMENT; if (size == 0) /* Default size is what GNU malloc can fit in a 4096-byte block. */ { @@ -118,34 +181,48 @@ _obstack_begin (h, size, alignment, chunkfun, freefun) size = 4096 - extra; } +#if defined (__STDC__) && __STDC__ + h->chunkfun = (struct _obstack_chunk * (*)(void *, long)) chunkfun; + h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun; +#else h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun; h->freefun = freefun; +#endif h->chunk_size = size; h->alignment_mask = alignment - 1; h->use_extra_arg = 0; chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); + if (!chunk) + (*obstack_alloc_failed_handler) (); h->next_free = h->object_base = chunk->contents; h->chunk_limit = chunk->limit = (char *) chunk + h->chunk_size; chunk->prev = 0; /* The initial chunk now contains no empty object. */ h->maybe_empty_object = 0; + h->alloc_failed = 0; + return 1; } -void +int _obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg) struct obstack *h; int size; int alignment; +#if defined (__STDC__) && __STDC__ + POINTER (*chunkfun) (POINTER, long); + void (*freefun) (POINTER, POINTER); +#else POINTER (*chunkfun) (); void (*freefun) (); +#endif POINTER arg; { - register struct _obstack_chunk* chunk; /* points to new chunk */ + register struct _obstack_chunk *chunk; /* points to new chunk */ if (alignment == 0) - alignment = DEFAULT_ALIGNMENT; + alignment = (int) DEFAULT_ALIGNMENT; if (size == 0) /* Default size is what GNU malloc can fit in a 4096-byte block. */ { @@ -163,20 +240,29 @@ _obstack_begin_1 (h, size, alignment, chunkfun, freefun, arg) size = 4096 - extra; } +#if defined(__STDC__) && __STDC__ + h->chunkfun = (struct _obstack_chunk * (*)(void *,long)) chunkfun; + h->freefun = (void (*) (void *, struct _obstack_chunk *)) freefun; +#else h->chunkfun = (struct _obstack_chunk * (*)()) chunkfun; h->freefun = freefun; +#endif h->chunk_size = size; h->alignment_mask = alignment - 1; h->extra_arg = arg; h->use_extra_arg = 1; chunk = h->chunk = CALL_CHUNKFUN (h, h -> chunk_size); + if (!chunk) + (*obstack_alloc_failed_handler) (); h->next_free = h->object_base = chunk->contents; h->chunk_limit = chunk->limit = (char *) chunk + h->chunk_size; chunk->prev = 0; /* The initial chunk now contains no empty object. */ h->maybe_empty_object = 0; + h->alloc_failed = 0; + return 1; } /* Allocate a new current chunk for the obstack *H @@ -190,12 +276,12 @@ _obstack_newchunk (h, length) struct obstack *h; int length; { - register struct _obstack_chunk* old_chunk = h->chunk; - register struct _obstack_chunk* new_chunk; + register struct _obstack_chunk *old_chunk = h->chunk; + register struct _obstack_chunk *new_chunk; register long new_size; - register int obj_size = h->next_free - h->object_base; - register int i; - int already; + register long obj_size = h->next_free - h->object_base; + register long i; + long already; /* Compute size for new chunk. */ new_size = (obj_size + length) + (obj_size >> 3) + 100; @@ -203,7 +289,10 @@ _obstack_newchunk (h, length) new_size = h->chunk_size; /* Allocate and initialize the new chunk. */ - new_chunk = h->chunk = CALL_CHUNKFUN (h, new_size); + new_chunk = CALL_CHUNKFUN (h, new_size); + if (!new_chunk) + (*obstack_alloc_failed_handler) (); + h->chunk = new_chunk; new_chunk->prev = old_chunk; new_chunk->limit = h->chunk_limit = (char *) new_chunk + new_size; @@ -246,19 +335,25 @@ _obstack_newchunk (h, length) This is here for debugging. If you use it in a program, you are probably losing. */ +#if defined (__STDC__) && __STDC__ +/* Suppress -Wmissing-prototypes warning. We don't want to declare this in + obstack.h because it is just for debugging. */ +int _obstack_allocated_p (struct obstack *h, POINTER obj); +#endif + int _obstack_allocated_p (h, obj) struct obstack *h; POINTER obj; { - register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */ - register struct _obstack_chunk* plp; /* point to previous chunk if any */ + register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ + register struct _obstack_chunk *plp; /* point to previous chunk if any */ lp = (h)->chunk; /* We use >= rather than > since the object cannot be exactly at the beginning of the chunk but might be an empty object exactly - at the end of an adjacent chunk. */ - while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj)) + at the end of an adjacent chunk. */ + while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj)) { plp = lp->prev; lp = plp; @@ -279,14 +374,14 @@ _obstack_free (h, obj) struct obstack *h; POINTER obj; { - register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */ - register struct _obstack_chunk* plp; /* point to previous chunk if any */ + register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ + register struct _obstack_chunk *plp; /* point to previous chunk if any */ lp = h->chunk; /* We use >= because there cannot be an object at the beginning of a chunk. But there can be an empty object at that address at the end of another chunk. */ - while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj)) + while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj)) { plp = lp->prev; CALL_FREEFUN (h, lp); @@ -297,7 +392,7 @@ _obstack_free (h, obj) } if (lp) { - h->object_base = h->next_free = (char *)(obj); + h->object_base = h->next_free = (char *) (obj); h->chunk_limit = lp->limit; h->chunk = lp; } @@ -313,14 +408,14 @@ obstack_free (h, obj) struct obstack *h; POINTER obj; { - register struct _obstack_chunk* lp; /* below addr of any objects in this chunk */ - register struct _obstack_chunk* plp; /* point to previous chunk if any */ + register struct _obstack_chunk *lp; /* below addr of any objects in this chunk */ + register struct _obstack_chunk *plp; /* point to previous chunk if any */ lp = h->chunk; /* We use >= because there cannot be an object at the beginning of a chunk. But there can be an empty object at that address at the end of another chunk. */ - while (lp != 0 && ((POINTER)lp >= obj || (POINTER)(lp)->limit < obj)) + while (lp != 0 && ((POINTER) lp >= obj || (POINTER) (lp)->limit < obj)) { plp = lp->prev; CALL_FREEFUN (h, lp); @@ -331,7 +426,7 @@ obstack_free (h, obj) } if (lp) { - h->object_base = h->next_free = (char *)(obj); + h->object_base = h->next_free = (char *) (obj); h->chunk_limit = lp->limit; h->chunk = lp; } @@ -340,6 +435,44 @@ obstack_free (h, obj) abort (); } +int +_obstack_memory_used (h) + struct obstack *h; +{ + register struct _obstack_chunk* lp; + register int nbytes = 0; + + for (lp = h->chunk; lp != 0; lp = lp->prev) + { + nbytes += lp->limit - (char *) lp; + } + return nbytes; +} + +/* Define the error handler. */ +#ifndef _ +# ifdef HAVE_LIBINTL_H +# include +# ifndef _ +# define _(Str) gettext (Str) +# endif +# else +# define _(Str) (Str) +# endif +#endif +#if defined _LIBC && defined USE_IN_LIBIO +# include +# define fputs(s, f) _IO_fputs (s, f) +#endif + +static void +print_and_abort () +{ + fputs (_("memory exhausted"), stderr); + fputc ('\n', stderr); + exit (obstack_exit_failure); +} + #if 0 /* These are now turned off because the applications do not use it and it uses bcopy via obstack_grow, which causes trouble on sysV. */ @@ -347,7 +480,7 @@ obstack_free (h, obj) /* Now define the functional versions of the obstack macros. Define them to simply use the corresponding macros to do the job. */ -#ifdef __STDC__ +#if defined (__STDC__) && __STDC__ /* These function definitions do not work with non-ANSI preprocessors; they won't pass through the macro names in parentheses. */ @@ -378,6 +511,13 @@ int (obstack_room) (obstack) return obstack_room (obstack); } +int (obstack_make_room) (obstack, length) + struct obstack *obstack; + int length; +{ + return obstack_make_room (obstack, length); +} + void (obstack_grow) (obstack, pointer, length) struct obstack *obstack; POINTER pointer; @@ -455,4 +595,4 @@ POINTER (obstack_copy0) (obstack, pointer, length) #endif /* 0 */ -#endif /* _LIBC or not __GNU_LIBRARY__. */ +#endif /* !ELIDE_CODE */ diff --git a/gnu/usr.bin/grep/obstack.h b/gnu/usr.bin/grep/obstack.h index 546f24c6ccf..4d49ce024b1 100644 --- a/gnu/usr.bin/grep/obstack.h +++ b/gnu/usr.bin/grep/obstack.h @@ -1,22 +1,23 @@ /* obstack.h - object stack macros - Copyright (C) 1988, 1992 Free Software Foundation, Inc. + Copyright (C) 1988,89,90,91,92,93,94,96,97,98,99 Free Software Foundation, Inc. -This program is free software; you can redistribute it and/or modify it -under the terms of the GNU General Public License as published by the -Free Software Foundation; either version 2, or (at your option) any -later version. + This file is part of the GNU C Library. Its master source is NOT part of + the C library, however. The master source lives in /gd/gnu/lib. -This program is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -GNU General Public License for more details. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. -You should have received a copy of the GNU General Public License -along with this program; if not, write to the Free Software -Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. + The GNU C Library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. - $Id: obstack.h,v 1.1.1.1 1995/10/18 08:40:17 deraadt Exp $ -*/ + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ /* Summary: @@ -103,45 +104,51 @@ Summary: /* Don't do the contents of this file more than once. */ -#ifndef __OBSTACKS__ -#define __OBSTACKS__ +#ifndef _OBSTACK_H +#define _OBSTACK_H 1 + +#ifdef __cplusplus +extern "C" { +#endif -/* We use subtraction of (char *)0 instead of casting to int +/* We use subtraction of (char *) 0 instead of casting to int because on word-addressable machines a simple cast to int may ignore the byte-within-word field of the pointer. */ #ifndef __PTR_TO_INT -#define __PTR_TO_INT(P) ((P) - (char *)0) +# define __PTR_TO_INT(P) ((P) - (char *) 0) #endif #ifndef __INT_TO_PTR -#define __INT_TO_PTR(P) ((P) + (char *)0) +# define __INT_TO_PTR(P) ((P) + (char *) 0) #endif -/* We need the type of the resulting object. In ANSI C it is ptrdiff_t - but in traditional C it is usually long. If we are in ANSI C and - don't already have ptrdiff_t get it. */ - -#if defined (__STDC__) && ! defined (offsetof) -#if defined (__GNUC__) && defined (IN_GCC) -/* On Next machine, the system's stddef.h screws up if included - after we have defined just ptrdiff_t, so include all of gstddef.h. - Otherwise, define just ptrdiff_t, which is all we need. */ -#ifndef __NeXT__ -#define __need_ptrdiff_t -#endif +/* We need the type of the resulting object. If __PTRDIFF_TYPE__ is + defined, as with GNU C, use that; that way we don't pollute the + namespace with 's symbols. Otherwise, if is + available, include it and use ptrdiff_t. In traditional C, long is + the best that we can do. */ -/* While building GCC, the stddef.h that goes with GCC has this name. */ -#include "gstddef.h" +#ifdef __PTRDIFF_TYPE__ +# define PTR_INT_TYPE __PTRDIFF_TYPE__ #else -#include -#endif +# ifdef HAVE_STDDEF_H +# include +# define PTR_INT_TYPE ptrdiff_t +# else +# define PTR_INT_TYPE long +# endif #endif -#ifdef __STDC__ -#define PTR_INT_TYPE ptrdiff_t +#if defined _LIBC || defined HAVE_STRING_H +# include +# define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N)) #else -#define PTR_INT_TYPE long +# ifdef memcpy +# define _obstack_memcpy(To, From, N) memcpy ((To), (From), (N)) +# else +# define _obstack_memcpy(To, From, N) bcopy ((From), (To), (N)) +# endif #endif struct _obstack_chunk /* Lives at front of each chunk. */ @@ -154,39 +161,54 @@ struct _obstack_chunk /* Lives at front of each chunk. */ struct obstack /* control current object in current chunk */ { long chunk_size; /* preferred size to allocate chunks in */ - struct _obstack_chunk* chunk; /* address of current struct obstack_chunk */ + struct _obstack_chunk *chunk; /* address of current struct obstack_chunk */ char *object_base; /* address of object we are building */ char *next_free; /* where to add next char to current object */ char *chunk_limit; /* address of char after current chunk */ PTR_INT_TYPE temp; /* Temporary for some macros. */ int alignment_mask; /* Mask of alignment for each object. */ +#if defined __STDC__ && __STDC__ + /* These prototypes vary based on `use_extra_arg', and we use + casts to the prototypeless function type in all assignments, + but having prototypes here quiets -Wstrict-prototypes. */ + struct _obstack_chunk *(*chunkfun) (void *, long); + void (*freefun) (void *, struct _obstack_chunk *); + void *extra_arg; /* first arg for chunk alloc/dealloc funcs */ +#else struct _obstack_chunk *(*chunkfun) (); /* User's fcn to allocate a chunk. */ void (*freefun) (); /* User's function to free a chunk. */ char *extra_arg; /* first arg for chunk alloc/dealloc funcs */ +#endif unsigned use_extra_arg:1; /* chunk alloc/dealloc funcs take extra arg */ unsigned maybe_empty_object:1;/* There is a possibility that the current chunk contains a zero-length object. This prevents freeing the chunk if we allocate a bigger chunk to replace it. */ + unsigned alloc_failed:1; /* No longer used, as we now call the failed + handler on error, but retained for binary + compatibility. */ }; /* Declare the external functions we use; they are in obstack.c. */ -#ifdef __STDC__ +#if defined __STDC__ && __STDC__ extern void _obstack_newchunk (struct obstack *, int); extern void _obstack_free (struct obstack *, void *); -extern void _obstack_begin (struct obstack *, int, int, - void *(*) (), void (*) ()); -extern void _obstack_begin_1 (struct obstack *, int, int, - void *(*) (), void (*) (), void *); +extern int _obstack_begin (struct obstack *, int, int, + void *(*) (long), void (*) (void *)); +extern int _obstack_begin_1 (struct obstack *, int, int, + void *(*) (void *, long), + void (*) (void *, void *), void *); +extern int _obstack_memory_used (struct obstack *); #else extern void _obstack_newchunk (); extern void _obstack_free (); -extern void _obstack_begin (); -extern void _obstack_begin_1 (); +extern int _obstack_begin (); +extern int _obstack_begin_1 (); +extern int _obstack_memory_used (); #endif -#ifdef __STDC__ +#if defined __STDC__ && __STDC__ /* Do the function-declarations after the structs but before defining the macros. */ @@ -214,6 +236,7 @@ void * obstack_finish (struct obstack *obstack); int obstack_object_size (struct obstack *obstack); int obstack_room (struct obstack *obstack); +void obstack_make_room (struct obstack *obstack, int size); void obstack_1grow_fast (struct obstack *obstack, int data_char); void obstack_ptr_grow_fast (struct obstack *obstack, void *data); void obstack_int_grow_fast (struct obstack *obstack, int data); @@ -223,11 +246,25 @@ void * obstack_base (struct obstack *obstack); void * obstack_next_free (struct obstack *obstack); int obstack_alignment_mask (struct obstack *obstack); int obstack_chunk_size (struct obstack *obstack); +int obstack_memory_used (struct obstack *obstack); #endif /* __STDC__ */ /* Non-ANSI C cannot really support alternative functions for these macros, so we do not declare them. */ + +/* Error handler called when `obstack_chunk_alloc' failed to allocate + more memory. This can be set to a user defined function which + should either abort gracefully or use longjump - but shouldn't + return. The default action is to print a message and abort. */ +#if defined __STDC__ && __STDC__ +extern void (*obstack_alloc_failed_handler) (void); +#else +extern void (*obstack_alloc_failed_handler) (); +#endif + +/* Exit value used when `print_and_abort' is used. */ +extern int obstack_exit_failure; /* Pointer to beginning of object being allocated or to be allocated next. Note that this might not be the final address of the object @@ -247,125 +284,175 @@ int obstack_chunk_size (struct obstack *obstack); #define obstack_alignment_mask(h) ((h)->alignment_mask) -#define obstack_init(h) \ +/* To prevent prototype warnings provide complete argument list in + standard C version. */ +#if defined __STDC__ && __STDC__ + +# define obstack_init(h) \ + _obstack_begin ((h), 0, 0, \ + (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free) + +# define obstack_begin(h, size) \ + _obstack_begin ((h), (size), 0, \ + (void *(*) (long)) obstack_chunk_alloc, (void (*) (void *)) obstack_chunk_free) + +# define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ + _obstack_begin ((h), (size), (alignment), \ + (void *(*) (long)) (chunkfun), (void (*) (void *)) (freefun)) + +# define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ + _obstack_begin_1 ((h), (size), (alignment), \ + (void *(*) (void *, long)) (chunkfun), \ + (void (*) (void *, void *)) (freefun), (arg)) + +# define obstack_chunkfun(h, newchunkfun) \ + ((h) -> chunkfun = (struct _obstack_chunk *(*)(void *, long)) (newchunkfun)) + +# define obstack_freefun(h, newfreefun) \ + ((h) -> freefun = (void (*)(void *, struct _obstack_chunk *)) (newfreefun)) + +#else + +# define obstack_init(h) \ _obstack_begin ((h), 0, 0, \ (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free) -#define obstack_begin(h, size) \ +# define obstack_begin(h, size) \ _obstack_begin ((h), (size), 0, \ (void *(*) ()) obstack_chunk_alloc, (void (*) ()) obstack_chunk_free) -#define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ +# define obstack_specify_allocation(h, size, alignment, chunkfun, freefun) \ _obstack_begin ((h), (size), (alignment), \ (void *(*) ()) (chunkfun), (void (*) ()) (freefun)) -#define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ +# define obstack_specify_allocation_with_arg(h, size, alignment, chunkfun, freefun, arg) \ _obstack_begin_1 ((h), (size), (alignment), \ (void *(*) ()) (chunkfun), (void (*) ()) (freefun), (arg)) +# define obstack_chunkfun(h, newchunkfun) \ + ((h) -> chunkfun = (struct _obstack_chunk *(*)()) (newchunkfun)) + +# define obstack_freefun(h, newfreefun) \ + ((h) -> freefun = (void (*)()) (newfreefun)) + +#endif + #define obstack_1grow_fast(h,achar) (*((h)->next_free)++ = achar) #define obstack_blank_fast(h,n) ((h)->next_free += (n)) + +#define obstack_memory_used(h) _obstack_memory_used (h) -#if defined (__GNUC__) && defined (__STDC__) -#if __GNUC__ < 2 || defined(NeXT) -#define __extension__ -#endif +#if defined __GNUC__ && defined __STDC__ && __STDC__ +/* NextStep 2.0 cc is really gcc 1.93 but it defines __GNUC__ = 2 and + does not implement __extension__. But that compiler doesn't define + __GNUC_MINOR__. */ +# if __GNUC__ < 2 || (__NeXT__ && !__GNUC_MINOR__) +# define __extension__ +# endif /* For GNU C, if not -traditional, we can define these macros to compute all args only once without using a global variable. Also, we can avoid using the `temp' slot, to make faster code. */ -#define obstack_object_size(OBSTACK) \ +# define obstack_object_size(OBSTACK) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ (unsigned) (__o->next_free - __o->object_base); }) -#define obstack_room(OBSTACK) \ +# define obstack_room(OBSTACK) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ (unsigned) (__o->chunk_limit - __o->next_free); }) -/* Note that the call to _obstack_newchunk is enclosed in (..., 0) - so that we can avoid having void expressions - in the arms of the conditional expression. - Casting the third operand to void was tried before, - but some compilers won't accept it. */ -#define obstack_grow(OBSTACK,where,length) \ +# define obstack_make_room(OBSTACK,length) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ int __len = (length); \ - ((__o->next_free + __len > __o->chunk_limit) \ - ? (_obstack_newchunk (__o, __len), 0) : 0); \ - bcopy (where, __o->next_free, __len); \ + if (__o->chunk_limit - __o->next_free < __len) \ + _obstack_newchunk (__o, __len); \ + (void) 0; }) + +# define obstack_empty_p(OBSTACK) \ + __extension__ \ + ({ struct obstack *__o = (OBSTACK); \ + (__o->chunk->prev == 0 && __o->next_free - __o->chunk->contents == 0); }) + +# define obstack_grow(OBSTACK,where,length) \ +__extension__ \ +({ struct obstack *__o = (OBSTACK); \ + int __len = (length); \ + if (__o->next_free + __len > __o->chunk_limit) \ + _obstack_newchunk (__o, __len); \ + _obstack_memcpy (__o->next_free, (char *) (where), __len); \ __o->next_free += __len; \ (void) 0; }) -#define obstack_grow0(OBSTACK,where,length) \ +# define obstack_grow0(OBSTACK,where,length) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ int __len = (length); \ - ((__o->next_free + __len + 1 > __o->chunk_limit) \ - ? (_obstack_newchunk (__o, __len + 1), 0) : 0), \ - bcopy (where, __o->next_free, __len), \ - __o->next_free += __len, \ + if (__o->next_free + __len + 1 > __o->chunk_limit) \ + _obstack_newchunk (__o, __len + 1); \ + _obstack_memcpy (__o->next_free, (char *) (where), __len); \ + __o->next_free += __len; \ *(__o->next_free)++ = 0; \ (void) 0; }) -#define obstack_1grow(OBSTACK,datum) \ +# define obstack_1grow(OBSTACK,datum) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ - ((__o->next_free + 1 > __o->chunk_limit) \ - ? (_obstack_newchunk (__o, 1), 0) : 0), \ + if (__o->next_free + 1 > __o->chunk_limit) \ + _obstack_newchunk (__o, 1); \ *(__o->next_free)++ = (datum); \ (void) 0; }) /* These assume that the obstack alignment is good enough for pointers or ints, and that the data added so far to the current object shares that much alignment. */ - -#define obstack_ptr_grow(OBSTACK,datum) \ + +# define obstack_ptr_grow(OBSTACK,datum) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ - ((__o->next_free + sizeof (void *) > __o->chunk_limit) \ - ? (_obstack_newchunk (__o, sizeof (void *)), 0) : 0), \ + if (__o->next_free + sizeof (void *) > __o->chunk_limit) \ + _obstack_newchunk (__o, sizeof (void *)); \ *((void **)__o->next_free)++ = ((void *)datum); \ (void) 0; }) -#define obstack_int_grow(OBSTACK,datum) \ +# define obstack_int_grow(OBSTACK,datum) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ - ((__o->next_free + sizeof (int) > __o->chunk_limit) \ - ? (_obstack_newchunk (__o, sizeof (int)), 0) : 0), \ + if (__o->next_free + sizeof (int) > __o->chunk_limit) \ + _obstack_newchunk (__o, sizeof (int)); \ *((int *)__o->next_free)++ = ((int)datum); \ (void) 0; }) -#define obstack_ptr_grow_fast(h,aptr) (*((void **)(h)->next_free)++ = (void *)aptr) -#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint) +# define obstack_ptr_grow_fast(h,aptr) (*((void **) (h)->next_free)++ = (void *)aptr) +# define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint) -#define obstack_blank(OBSTACK,length) \ +# define obstack_blank(OBSTACK,length) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ int __len = (length); \ - ((__o->chunk_limit - __o->next_free < __len) \ - ? (_obstack_newchunk (__o, __len), 0) : 0); \ + if (__o->chunk_limit - __o->next_free < __len) \ + _obstack_newchunk (__o, __len); \ __o->next_free += __len; \ (void) 0; }) -#define obstack_alloc(OBSTACK,length) \ +# define obstack_alloc(OBSTACK,length) \ __extension__ \ ({ struct obstack *__h = (OBSTACK); \ obstack_blank (__h, (length)); \ obstack_finish (__h); }) -#define obstack_copy(OBSTACK,where,length) \ +# define obstack_copy(OBSTACK,where,length) \ __extension__ \ ({ struct obstack *__h = (OBSTACK); \ obstack_grow (__h, (where), (length)); \ obstack_finish (__h); }) -#define obstack_copy0(OBSTACK,where,length) \ +# define obstack_copy0(OBSTACK,where,length) \ __extension__ \ ({ struct obstack *__h = (OBSTACK); \ obstack_grow0 (__h, (where), (length)); \ @@ -373,86 +460,101 @@ __extension__ \ /* The local variable is named __o1 to avoid a name conflict when obstack_blank is called. */ -#define obstack_finish(OBSTACK) \ +# define obstack_finish(OBSTACK) \ __extension__ \ ({ struct obstack *__o1 = (OBSTACK); \ - void *value = (void *) __o1->object_base; \ + void *value; \ + value = (void *) __o1->object_base; \ if (__o1->next_free == value) \ __o1->maybe_empty_object = 1; \ __o1->next_free \ = __INT_TO_PTR ((__PTR_TO_INT (__o1->next_free)+__o1->alignment_mask)\ & ~ (__o1->alignment_mask)); \ - ((__o1->next_free - (char *)__o1->chunk \ - > __o1->chunk_limit - (char *)__o1->chunk) \ - ? (__o1->next_free = __o1->chunk_limit) : 0); \ + if (__o1->next_free - (char *)__o1->chunk \ + > __o1->chunk_limit - (char *)__o1->chunk) \ + __o1->next_free = __o1->chunk_limit; \ __o1->object_base = __o1->next_free; \ value; }) -#define obstack_free(OBSTACK, OBJ) \ +# define obstack_free(OBSTACK, OBJ) \ __extension__ \ ({ struct obstack *__o = (OBSTACK); \ void *__obj = (OBJ); \ if (__obj > (void *)__o->chunk && __obj < (void *)__o->chunk_limit) \ - __o->next_free = __o->object_base = __obj; \ + __o->next_free = __o->object_base = (char *)__obj; \ else (obstack_free) (__o, __obj); }) #else /* not __GNUC__ or not __STDC__ */ -#define obstack_object_size(h) \ +# define obstack_object_size(h) \ (unsigned) ((h)->next_free - (h)->object_base) -#define obstack_room(h) \ +# define obstack_room(h) \ (unsigned) ((h)->chunk_limit - (h)->next_free) -#define obstack_grow(h,where,length) \ +# define obstack_empty_p(h) \ + ((h)->chunk->prev == 0 && (h)->next_free - (h)->chunk->contents == 0) + +/* Note that the call to _obstack_newchunk is enclosed in (..., 0) + so that we can avoid having void expressions + in the arms of the conditional expression. + Casting the third operand to void was tried before, + but some compilers won't accept it. */ + +# define obstack_make_room(h,length) \ +( (h)->temp = (length), \ + (((h)->next_free + (h)->temp > (h)->chunk_limit) \ + ? (_obstack_newchunk ((h), (h)->temp), 0) : 0)) + +# define obstack_grow(h,where,length) \ ( (h)->temp = (length), \ (((h)->next_free + (h)->temp > (h)->chunk_limit) \ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \ - bcopy (where, (h)->next_free, (h)->temp), \ + _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp), \ (h)->next_free += (h)->temp) -#define obstack_grow0(h,where,length) \ +# define obstack_grow0(h,where,length) \ ( (h)->temp = (length), \ (((h)->next_free + (h)->temp + 1 > (h)->chunk_limit) \ ? (_obstack_newchunk ((h), (h)->temp + 1), 0) : 0), \ - bcopy (where, (h)->next_free, (h)->temp), \ + _obstack_memcpy ((h)->next_free, (char *) (where), (h)->temp), \ (h)->next_free += (h)->temp, \ *((h)->next_free)++ = 0) -#define obstack_1grow(h,datum) \ +# define obstack_1grow(h,datum) \ ( (((h)->next_free + 1 > (h)->chunk_limit) \ ? (_obstack_newchunk ((h), 1), 0) : 0), \ - *((h)->next_free)++ = (datum)) + (*((h)->next_free)++ = (datum))) -#define obstack_ptr_grow(h,datum) \ +# define obstack_ptr_grow(h,datum) \ ( (((h)->next_free + sizeof (char *) > (h)->chunk_limit) \ ? (_obstack_newchunk ((h), sizeof (char *)), 0) : 0), \ - *((char **)(((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *)datum)) + (*((char **) (((h)->next_free+=sizeof(char *))-sizeof(char *))) = ((char *) datum))) -#define obstack_int_grow(h,datum) \ +# define obstack_int_grow(h,datum) \ ( (((h)->next_free + sizeof (int) > (h)->chunk_limit) \ ? (_obstack_newchunk ((h), sizeof (int)), 0) : 0), \ - *((int *)(((h)->next_free+=sizeof(int))-sizeof(int))) = ((int)datum)) + (*((int *) (((h)->next_free+=sizeof(int))-sizeof(int))) = ((int) datum))) -#define obstack_ptr_grow_fast(h,aptr) (*((char **)(h)->next_free)++ = (char *)aptr) -#define obstack_int_grow_fast(h,aint) (*((int *)(h)->next_free)++ = (int)aint) +# define obstack_ptr_grow_fast(h,aptr) (*((char **) (h)->next_free)++ = (char *) aptr) +# define obstack_int_grow_fast(h,aint) (*((int *) (h)->next_free)++ = (int) aint) -#define obstack_blank(h,length) \ +# define obstack_blank(h,length) \ ( (h)->temp = (length), \ (((h)->chunk_limit - (h)->next_free < (h)->temp) \ ? (_obstack_newchunk ((h), (h)->temp), 0) : 0), \ - (h)->next_free += (h)->temp) + ((h)->next_free += (h)->temp)) -#define obstack_alloc(h,length) \ +# define obstack_alloc(h,length) \ (obstack_blank ((h), (length)), obstack_finish ((h))) -#define obstack_copy(h,where,length) \ +# define obstack_copy(h,where,length) \ (obstack_grow ((h), (where), (length)), obstack_finish ((h))) -#define obstack_copy0(h,where,length) \ +# define obstack_copy0(h,where,length) \ (obstack_grow0 ((h), (where), (length)), obstack_finish ((h))) -#define obstack_finish(h) \ +# define obstack_finish(h) \ ( ((h)->next_free == (h)->object_base \ ? (((h)->maybe_empty_object = 1), 0) \ : 0), \ @@ -460,28 +562,32 @@ __extension__ \ (h)->next_free \ = __INT_TO_PTR ((__PTR_TO_INT ((h)->next_free)+(h)->alignment_mask) \ & ~ ((h)->alignment_mask)), \ - (((h)->next_free - (char *)(h)->chunk \ - > (h)->chunk_limit - (char *)(h)->chunk) \ + (((h)->next_free - (char *) (h)->chunk \ + > (h)->chunk_limit - (char *) (h)->chunk) \ ? ((h)->next_free = (h)->chunk_limit) : 0), \ (h)->object_base = (h)->next_free, \ __INT_TO_PTR ((h)->temp)) -#ifdef __STDC__ -#define obstack_free(h,obj) \ -( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \ +# if defined __STDC__ && __STDC__ +# define obstack_free(h,obj) \ +( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \ (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\ ? (int) ((h)->next_free = (h)->object_base \ = (h)->temp + (char *) (h)->chunk) \ : (((obstack_free) ((h), (h)->temp + (char *) (h)->chunk), 0), 0))) -#else -#define obstack_free(h,obj) \ -( (h)->temp = (char *)(obj) - (char *) (h)->chunk, \ +# else +# define obstack_free(h,obj) \ +( (h)->temp = (char *) (obj) - (char *) (h)->chunk, \ (((h)->temp > 0 && (h)->temp < (h)->chunk_limit - (char *) (h)->chunk)\ ? (int) ((h)->next_free = (h)->object_base \ = (h)->temp + (char *) (h)->chunk) \ : (_obstack_free ((h), (h)->temp + (char *) (h)->chunk), 0))) -#endif +# endif #endif /* not __GNUC__ or not __STDC__ */ -#endif /* not __OBSTACKS__ */ +#ifdef __cplusplus +} /* C++ */ +#endif + +#endif /* obstack.h */ diff --git a/gnu/usr.bin/grep/regex.c b/gnu/usr.bin/grep/regex.c index 3aea3224699..640e3fe545b 100644 --- a/gnu/usr.bin/grep/regex.c +++ b/gnu/usr.bin/grep/regex.c @@ -1,132 +1,172 @@ /* Extended regular expression matching and search library, version 0.12. - (Implements POSIX draft P10003.2/D11.2, except for + (Implements POSIX draft P1003.2/D11.2, except for some of the internationalization features.) + Copyright (C) 1993, 94, 95, 96, 97, 98, 99 Free Software Foundation, Inc. - Copyright (C) 1993 Free Software Foundation, Inc. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. -#ifndef lint -static char rcsid[] = "$NetBSD: regex.c,v 1.4 1995/04/23 08:36:17 cgd Exp $"; -#endif /* not lint */ + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ /* AIX requires this to be the first thing in the file. */ -#if defined (_AIX) && !defined (REGEX_MALLOC) +#if defined _AIX && !defined REGEX_MALLOC #pragma alloca #endif +#undef _GNU_SOURCE #define _GNU_SOURCE -/* We need this for `regex.h', and perhaps for the Emacs include files. */ -#include - #ifdef HAVE_CONFIG_H -#include "config.h" +# include #endif -/* The `emacs' switch turns on certain matching commands - that make sense only in Emacs. */ -#ifdef emacs - -#include "lisp.h" -#include "buffer.h" -#include "syntax.h" - -/* Emacs uses `NULL' as a predicate. */ -#undef NULL - -#else /* not emacs */ - -/* We used to test for `BSTRING' here, but only GCC and Emacs define - `BSTRING', as far as I know, and neither of them use this code. */ -#if HAVE_STRING_H || STDC_HEADERS -#include -#ifndef bcmp -#define bcmp(s1, s2, n) memcmp ((s1), (s2), (n)) -#endif -#ifndef bcopy -#define bcopy(s, d, n) memcpy ((d), (s), (n)) -#endif -#ifndef bzero -#define bzero(s, n) memset ((s), 0, (n)) -#endif -#else -#include -#endif +#ifndef PARAMS +# if defined __GNUC__ || (defined __STDC__ && __STDC__) +# define PARAMS(args) args +# else +# define PARAMS(args) () +# endif /* GCC. */ +#endif /* Not PARAMS. */ -#ifdef STDC_HEADERS -#include +#if defined STDC_HEADERS && !defined emacs +# include #else -char *malloc (); -char *realloc (); +/* We need this for `regex.h', and perhaps for the Emacs include files. */ +# include #endif +#define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC) -/* Define the syntax stuff for \<, \>, etc. */ - -/* This must be nonzero for the wordchar and notwordchar pattern - commands in re_match_2. */ -#ifndef Sword -#define Sword 1 +/* For platform which support the ISO C amendement 1 functionality we + support user defined character classes. */ +#if defined _LIBC || WIDE_CHAR_SUPPORT +/* Solaris 2.5 has a bug: must be included before . */ +# include +# include #endif -#ifdef SYNTAX_TABLE - -extern char *re_syntax_table; - -#else /* not SYNTAX_TABLE */ - -/* How many characters in the character set. */ -#define CHAR_SET_SIZE 256 - -static char re_syntax_table[CHAR_SET_SIZE]; +#ifdef _LIBC +/* We have to keep the namespace clean. */ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ + __regerror(errcode, preg, errbuf, errbuf_size) +# define re_set_registers(bu, re, nu, st, en) \ + __re_set_registers (bu, re, nu, st, en) +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ + __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) +# define re_match(bufp, string, size, pos, regs) \ + __re_match (bufp, string, size, pos, regs) +# define re_search(bufp, string, size, startpos, range, regs) \ + __re_search (bufp, string, size, startpos, range, regs) +# define re_compile_pattern(pattern, length, bufp) \ + __re_compile_pattern (pattern, length, bufp) +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ + __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +#define btowc __btowc +#endif -static void -init_syntax_once () -{ - register int c; - static int done = 0; - if (done) - return; +#ifndef _ +/* This is for other GNU distributions with internationalized messages. + When compiling libc, the _ and N_ macros are predefined. */ +# ifdef HAVE_LIBINTL_H +# include +# else +# define gettext(msgid) (msgid) +# endif +# define N_(msgid) (msgid) +#endif - bzero (re_syntax_table, sizeof re_syntax_table); +/* The `emacs' switch turns on certain matching commands + that make sense only in Emacs. */ +#ifdef emacs - for (c = 'a'; c <= 'z'; c++) - re_syntax_table[c] = Sword; +# include "lisp.h" +# include "buffer.h" +# include "syntax.h" - for (c = 'A'; c <= 'Z'; c++) - re_syntax_table[c] = Sword; +#else /* not emacs */ - for (c = '0'; c <= '9'; c++) - re_syntax_table[c] = Sword; +/* If we are not linking with Emacs proper, + we can't use the relocating allocator + even if config.h says that we can. */ +# undef REL_ALLOC - re_syntax_table['_'] = Sword; +# if defined STDC_HEADERS || defined _LIBC +# include +# else +char *malloc (); +char *realloc (); +# endif + +/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. + If nothing else has been done, use the method below. */ +# ifdef INHIBIT_STRING_HEADER +# if !(defined HAVE_BZERO && defined HAVE_BCOPY) +# if !defined bzero && !defined bcopy +# undef INHIBIT_STRING_HEADER +# endif +# endif +# endif + +/* This is the normal way of making sure we have a bcopy and a bzero. + This is used in most programs--a few other programs avoid this + by defining INHIBIT_STRING_HEADER. */ +# ifndef INHIBIT_STRING_HEADER +# if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC +# include +# ifndef bzero +# ifndef _LIBC +# define bzero(s, n) (memset (s, '\0', n), (s)) +# else +# define bzero(s, n) __bzero (s, n) +# endif +# endif +# else +# include +# ifndef memcmp +# define memcmp(s1, s2, n) bcmp (s1, s2, n) +# endif +# ifndef memcpy +# define memcpy(d, s, n) (bcopy (s, d, n), (d)) +# endif +# endif +# endif - done = 1; -} +/* Define the syntax stuff for \<, \>, etc. */ -#endif /* not SYNTAX_TABLE */ +/* This must be nonzero for the wordchar and notwordchar pattern + commands in re_match_2. */ +# ifndef Sword +# define Sword 1 +# endif -#define SYNTAX(c) re_syntax_table[c] +# ifdef SWITCH_ENUM_BUG +# define SWITCH_ENUM_CAST(x) ((int)(x)) +# else +# define SWITCH_ENUM_CAST(x) (x) +# endif #endif /* not emacs */ /* Get the interface, including the syntax bits. */ -#include "regex.h" +#include /* isalpha etc. are used for the character classes. */ #include @@ -140,36 +180,47 @@ init_syntax_once () STDC_HEADERS is defined, then autoconf has verified that the ctype macros don't need to be guarded with references to isascii. ... Defining isascii to 1 should let any compiler worth its salt - eliminate the && through constant folding." */ -#if ! defined (isascii) || defined (STDC_HEADERS) -#undef isascii -#define isascii(c) 1 + eliminate the && through constant folding." + Solaris defines some of these symbols so we must undefine them first. */ + +#undef ISASCII +#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII) +# define ISASCII(c) 1 +#else +# define ISASCII(c) isascii(c) #endif #ifdef isblank -#define ISBLANK(c) (isascii (c) && isblank (c)) +# define ISBLANK(c) (ISASCII (c) && isblank (c)) #else -#define ISBLANK(c) ((c) == ' ' || (c) == '\t') +# define ISBLANK(c) ((c) == ' ' || (c) == '\t') #endif #ifdef isgraph -#define ISGRAPH(c) (isascii (c) && isgraph (c)) +# define ISGRAPH(c) (ISASCII (c) && isgraph (c)) #else -#define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c)) +# define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c)) #endif -#define ISPRINT(c) (isascii (c) && isprint (c)) -#define ISDIGIT(c) (isascii (c) && isdigit (c)) -#define ISALNUM(c) (isascii (c) && isalnum (c)) -#define ISALPHA(c) (isascii (c) && isalpha (c)) -#define ISCNTRL(c) (isascii (c) && iscntrl (c)) -#define ISLOWER(c) (isascii (c) && islower (c)) -#define ISPUNCT(c) (isascii (c) && ispunct (c)) -#define ISSPACE(c) (isascii (c) && isspace (c)) -#define ISUPPER(c) (isascii (c) && isupper (c)) -#define ISXDIGIT(c) (isascii (c) && isxdigit (c)) +#undef ISPRINT +#define ISPRINT(c) (ISASCII (c) && isprint (c)) +#define ISDIGIT(c) (ISASCII (c) && isdigit (c)) +#define ISALNUM(c) (ISASCII (c) && isalnum (c)) +#define ISALPHA(c) (ISASCII (c) && isalpha (c)) +#define ISCNTRL(c) (ISASCII (c) && iscntrl (c)) +#define ISLOWER(c) (ISASCII (c) && islower (c)) +#define ISPUNCT(c) (ISASCII (c) && ispunct (c)) +#define ISSPACE(c) (ISASCII (c) && isspace (c)) +#define ISUPPER(c) (ISASCII (c) && isupper (c)) +#define ISXDIGIT(c) (ISASCII (c) && isxdigit (c)) + +#ifdef _tolower +# define TOLOWER(c) _tolower(c) +#else +# define TOLOWER(c) tolower(c) +#endif #ifndef NULL -#define NULL 0 +# define NULL (void *)0 #endif /* We remove any previous definition of `SIGN_EXTEND_CHAR', @@ -178,57 +229,124 @@ init_syntax_once () (Per Bothner suggested the basic approach.) */ #undef SIGN_EXTEND_CHAR #if __STDC__ -#define SIGN_EXTEND_CHAR(c) ((signed char) (c)) +# define SIGN_EXTEND_CHAR(c) ((signed char) (c)) #else /* not __STDC__ */ /* As in Harbison and Steele. */ -#define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) +# define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128) #endif +#ifndef emacs +/* How many characters in the character set. */ +# define CHAR_SET_SIZE 256 + +# ifdef SYNTAX_TABLE + +extern char *re_syntax_table; + +# else /* not SYNTAX_TABLE */ + +static char re_syntax_table[CHAR_SET_SIZE]; + +static void +init_syntax_once () +{ + register int c; + static int done = 0; + + if (done) + return; + bzero (re_syntax_table, sizeof re_syntax_table); + + for (c = 0; c < CHAR_SET_SIZE; ++c) + if (ISALNUM (c)) + re_syntax_table[c] = Sword; + + re_syntax_table['_'] = Sword; + + done = 1; +} + +# endif /* not SYNTAX_TABLE */ + +# define SYNTAX(c) re_syntax_table[((c) & 0xFF)] + +#endif /* emacs */ + /* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we use `alloca' instead of `malloc'. This is because using malloc in re_search* or re_match* could cause memory leaks when C-g is used in Emacs; also, malloc is slower and causes storage fragmentation. On - the other hand, malloc is more portable, and easier to debug. - + the other hand, malloc is more portable, and easier to debug. + Because we sometimes use alloca, some routines have to be macros, not functions -- `alloca'-allocated space disappears at the end of the function it is called in. */ #ifdef REGEX_MALLOC -#define REGEX_ALLOCATE malloc -#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +# define REGEX_ALLOCATE malloc +# define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE free #else /* not REGEX_MALLOC */ /* Emacs already defines alloca, sometimes. */ -#ifndef alloca +# ifndef alloca /* Make alloca work the best possible way. */ -#ifdef __GNUC__ -#define alloca __builtin_alloca -#else /* not __GNUC__ */ -#if HAVE_ALLOCA_H -#include -#else /* not __GNUC__ or HAVE_ALLOCA_H */ -#ifndef _AIX /* Already did AIX, up at the top. */ -char *alloca (); -#endif /* not _AIX */ -#endif /* not HAVE_ALLOCA_H */ -#endif /* not __GNUC__ */ +# ifdef __GNUC__ +# define alloca __builtin_alloca +# else /* not __GNUC__ */ +# if HAVE_ALLOCA_H +# include +# endif /* HAVE_ALLOCA_H */ +# endif /* not __GNUC__ */ -#endif /* not alloca */ +# endif /* not alloca */ -#define REGEX_ALLOCATE alloca +# define REGEX_ALLOCATE alloca /* Assumes a `char *destination' variable. */ -#define REGEX_REALLOCATE(source, osize, nsize) \ +# define REGEX_REALLOCATE(source, osize, nsize) \ (destination = (char *) alloca (nsize), \ - bcopy (source, destination, osize), \ - destination) + memcpy (destination, source, osize)) + +/* No need to do anything to free, after alloca. */ +# define REGEX_FREE(arg) ((void)0) /* Do nothing! But inhibit gcc warning. */ #endif /* not REGEX_MALLOC */ +/* Define how to allocate the failure stack. */ + +#if defined REL_ALLOC && defined REGEX_MALLOC + +# define REGEX_ALLOCATE_STACK(size) \ + r_alloc (&failure_stack_ptr, (size)) +# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + r_re_alloc (&failure_stack_ptr, (nsize)) +# define REGEX_FREE_STACK(ptr) \ + r_alloc_free (&failure_stack_ptr) + +#else /* not using relocating allocator */ + +# ifdef REGEX_MALLOC + +# define REGEX_ALLOCATE_STACK malloc +# define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize) +# define REGEX_FREE_STACK free + +# else /* not REGEX_MALLOC */ + +# define REGEX_ALLOCATE_STACK alloca + +# define REGEX_REALLOCATE_STACK(source, osize, nsize) \ + REGEX_REALLOCATE (source, osize, nsize) +/* No need to explicitly free anything. */ +# define REGEX_FREE_STACK(arg) + +# endif /* not REGEX_MALLOC */ +#endif /* not using relocating allocator */ + /* True if `size1' is non-NULL and PTR is pointing anywhere inside `string1' or just past its end. This works if PTR is NULL, which is @@ -239,34 +357,44 @@ char *alloca (); /* (Re)Allocate N items of type T using malloc, or fail. */ #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t))) #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t))) +#define RETALLOC_IF(addr, n, t) \ + if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t) #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t))) #define BYTEWIDTH 8 /* In bits. */ #define STREQ(s1, s2) ((strcmp (s1, s2) == 0)) +#undef MAX +#undef MIN #define MAX(a, b) ((a) > (b) ? (a) : (b)) #define MIN(a, b) ((a) < (b) ? (a) : (b)) typedef char boolean; #define false 0 #define true 1 + +static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp, + const char *string1, int size1, + const char *string2, int size2, + int pos, + struct re_registers *regs, + int stop)); /* These are the command codes that appear in compiled regular expressions. Some opcodes are followed by argument bytes. A command code can specify any interpretation whatsoever for its - arguments. Zero bytes may appear in the compiled regular expression. - - The value of `exactn' is needed in search.c (search_buffer) in Emacs. - So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of - `exactn' we use here must also be 1. */ + arguments. Zero bytes may appear in the compiled regular expression. */ typedef enum { no_op = 0, + /* Succeed right away--no more backtracking. */ + succeed, + /* Followed by one byte giving n, then by n literal bytes. */ - exactn = 1, + exactn, /* Matches any (more or less) character. */ anychar, @@ -317,9 +445,9 @@ typedef enum /* Analogously, for end of buffer/string. */ endbuf, - + /* Followed by two byte relative address to which to jump. */ - jump, + jump, /* Same as jump, but marks the end of an alternative. */ jump_past_alt, @@ -327,11 +455,11 @@ typedef enum /* Followed by two-byte relative address of place to resume at in case of failure. */ on_failure_jump, - + /* Like on_failure_jump, but pushes a placeholder instead of the current string position when executed. */ on_failure_keep_string_jump, - + /* Throw away latest failure point and then jump to following two-byte relative address. */ pop_failure_jump, @@ -422,20 +550,21 @@ typedef enum } while (0) #ifdef DEBUG +static void extract_number _RE_ARGS ((int *dest, unsigned char *source)); static void extract_number (dest, source) int *dest; unsigned char *source; { - int temp = SIGN_EXTEND_CHAR (*(source + 1)); + int temp = SIGN_EXTEND_CHAR (*(source + 1)); *dest = *source & 0377; *dest += temp << 8; } -#ifndef EXTRACT_MACROS /* To debug the macros. */ -#undef EXTRACT_NUMBER -#define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) -#endif /* not EXTRACT_MACROS */ +# ifndef EXTRACT_MACROS /* To debug the macros. */ +# undef EXTRACT_NUMBER +# define EXTRACT_NUMBER(dest, src) extract_number (&dest, src) +# endif /* not EXTRACT_MACROS */ #endif /* DEBUG */ @@ -449,20 +578,22 @@ extract_number (dest, source) } while (0) #ifdef DEBUG +static void extract_number_and_incr _RE_ARGS ((int *destination, + unsigned char **source)); static void extract_number_and_incr (destination, source) int *destination; unsigned char **source; -{ +{ extract_number (destination, *source); *source += 2; } -#ifndef EXTRACT_MACROS -#undef EXTRACT_NUMBER_AND_INCR -#define EXTRACT_NUMBER_AND_INCR(dest, src) \ +# ifndef EXTRACT_MACROS +# undef EXTRACT_NUMBER_AND_INCR +# define EXTRACT_NUMBER_AND_INCR(dest, src) \ extract_number_and_incr (&dest, &src) -#endif /* not EXTRACT_MACROS */ +# endif /* not EXTRACT_MACROS */ #endif /* DEBUG */ @@ -475,26 +606,24 @@ extract_number_and_incr (destination, source) #ifdef DEBUG /* We use standard I/O for debugging. */ -#include +# include /* It is useful to test things that ``must'' be true when debugging. */ -#include +# include -static int debug = 0; +static int debug; -#define DEBUG_STATEMENT(e) e -#define DEBUG_PRINT1(x) if (debug) printf (x) -#define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ +# define DEBUG_STATEMENT(e) e +# define DEBUG_PRINT1(x) if (debug) printf (x) +# define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2) +# define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3) +# define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4) +# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \ if (debug) print_partial_compiled_pattern (s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ +# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \ if (debug) print_double_string (w, s1, sz1, s2, sz2) -extern void printchar (); - /* Print the fastmap in human-readable form. */ void @@ -502,14 +631,14 @@ print_fastmap (fastmap) char *fastmap; { unsigned was_a_range = 0; - unsigned i = 0; - + unsigned i = 0; + while (i < (1 << BYTEWIDTH)) { if (fastmap[i++]) { was_a_range = 0; - printchar (i - 1); + putchar (i - 1); while (i < (1 << BYTEWIDTH) && fastmap[i]) { was_a_range = 1; @@ -518,11 +647,11 @@ print_fastmap (fastmap) if (was_a_range) { printf ("-"); - printchar (i - 1); + putchar (i - 1); } } } - putchar ('\n'); + putchar ('\n'); } @@ -535,6 +664,7 @@ print_partial_compiled_pattern (start, end) unsigned char *end; { int mcnt, mcnt2; + unsigned char *p1; unsigned char *p = start; unsigned char *pend = end; @@ -543,7 +673,7 @@ print_partial_compiled_pattern (start, end) printf ("(null)\n"); return; } - + /* Loop over pattern commands. */ while (p < pend) { @@ -561,7 +691,7 @@ print_partial_compiled_pattern (start, end) do { putchar ('/'); - printchar (*p++); + putchar (*p++); } while (--mcnt); break; @@ -592,7 +722,7 @@ print_partial_compiled_pattern (start, end) printf ("/charset [%s", (re_opcode_t) *(p - 1) == charset_not ? "^" : ""); - + assert (p + *p < pend); for (c = 0; c < 256; c++) @@ -608,18 +738,18 @@ print_partial_compiled_pattern (start, end) /* Have we broken a range? */ else if (last + 1 != c && in_range) { - printchar (last); + putchar (last); in_range = 0; } - + if (! in_range) - printchar (c); + putchar (c); last = c; } if (in_range) - printchar (last); + putchar (last); putchar (']'); @@ -653,7 +783,7 @@ print_partial_compiled_pattern (start, end) case push_dummy_failure: printf ("/push_dummy_failure"); break; - + case maybe_pop_jump: extract_number_and_incr (&mcnt, &p); printf ("/maybe_pop_jump to %d", p + mcnt - start); @@ -662,36 +792,39 @@ print_partial_compiled_pattern (start, end) case pop_failure_jump: extract_number_and_incr (&mcnt, &p); printf ("/pop_failure_jump to %d", p + mcnt - start); - break; - + break; + case jump_past_alt: extract_number_and_incr (&mcnt, &p); printf ("/jump_past_alt to %d", p + mcnt - start); - break; - + break; + case jump: extract_number_and_incr (&mcnt, &p); printf ("/jump to %d", p + mcnt - start); break; - case succeed_n: + case succeed_n: extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; extract_number_and_incr (&mcnt2, &p); - printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2); + printf ("/succeed_n to %d, %d times", p1 - start, mcnt2); break; - - case jump_n: + + case jump_n: extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; extract_number_and_incr (&mcnt2, &p); - printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2); + printf ("/jump_n to %d, %d times", p1 - start, mcnt2); break; - - case set_number_at: + + case set_number_at: extract_number_and_incr (&mcnt, &p); + p1 = p + mcnt; extract_number_and_incr (&mcnt2, &p); - printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2); + printf ("/set_number_at location %d to %d", p1 - start, mcnt2); break; - + case wordbound: printf ("/wordbound"); break; @@ -703,11 +836,11 @@ print_partial_compiled_pattern (start, end) case wordbeg: printf ("/wordbeg"); break; - + case wordend: printf ("/wordend"); - -#ifdef emacs + +# ifdef emacs case before_dot: printf ("/before_dot"); break; @@ -725,18 +858,18 @@ print_partial_compiled_pattern (start, end) mcnt = *p++; printf ("/%d", mcnt); break; - + case notsyntaxspec: printf ("/notsyntaxspec"); mcnt = *p++; printf ("/%d", mcnt); break; -#endif /* emacs */ +# endif /* emacs */ case wordchar: printf ("/wordchar"); break; - + case notwordchar: printf ("/notwordchar"); break; @@ -767,7 +900,8 @@ print_compiled_pattern (bufp) unsigned char *buffer = bufp->buffer; print_partial_compiled_pattern (buffer, buffer + bufp->used); - printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated); + printf ("%ld bytes used/%ld bytes allocated.\n", + bufp->used, bufp->allocated); if (bufp->fastmap_accurate && bufp->fastmap) { @@ -782,7 +916,7 @@ print_compiled_pattern (bufp) printf ("no_sub: %d\t", bufp->no_sub); printf ("not_bol: %d\t", bufp->not_bol); printf ("not_eol: %d\t", bufp->not_eol); - printf ("syntax: %d\n", bufp->syntax); + printf ("syntax: %lx\n", bufp->syntax); /* Perhaps we should print the translate table? */ } @@ -795,8 +929,8 @@ print_double_string (where, string1, size1, string2, size2) int size1; int size2; { - unsigned this_char; - + int this_char; + if (where == NULL) printf ("(null)"); else @@ -804,35 +938,44 @@ print_double_string (where, string1, size1, string2, size2) if (FIRST_STRING_P (where)) { for (this_char = where - string1; this_char < size1; this_char++) - printchar (string1[this_char]); + putchar (string1[this_char]); - where = string2; + where = string2; } for (this_char = where - string2; this_char < size2; this_char++) - printchar (string2[this_char]); + putchar (string2[this_char]); } } +void +printchar (c) + int c; +{ + putc (c, stderr); +} + #else /* not DEBUG */ -#undef assert -#define assert(e) +# undef assert +# define assert(e) -#define DEBUG_STATEMENT(e) -#define DEBUG_PRINT1(x) -#define DEBUG_PRINT2(x1, x2) -#define DEBUG_PRINT3(x1, x2, x3) -#define DEBUG_PRINT4(x1, x2, x3, x4) -#define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) -#define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) +# define DEBUG_STATEMENT(e) +# define DEBUG_PRINT1(x) +# define DEBUG_PRINT2(x1, x2) +# define DEBUG_PRINT3(x1, x2, x3) +# define DEBUG_PRINT4(x1, x2, x3, x4) +# define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) +# define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) #endif /* not DEBUG */ /* Set by `re_set_syntax' to the current regexp syntax to recognize. Can also be assigned to arbitrarily: each pattern buffer stores its own syntax, so it can be changed between regex compilations. */ -reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS; +/* This has no initializer because initialized variables in Emacs + become read-only after dumping. */ +reg_syntax_t re_syntax_options; /* Specify the precise syntax of regexps for compilation. This provides @@ -847,133 +990,704 @@ re_set_syntax (syntax) reg_syntax_t syntax; { reg_syntax_t ret = re_syntax_options; - + re_syntax_options = syntax; +#ifdef DEBUG + if (syntax & RE_DEBUG) + debug = 1; + else if (debug) /* was on but now is not */ + debug = 0; +#endif /* DEBUG */ return ret; } +#ifdef _LIBC +weak_alias (__re_set_syntax, re_set_syntax) +#endif /* This table gives an error message for each of the error codes listed - in regex.h. Obviously the order here has to be same as there. */ - -static const char *re_error_msg[] = - { NULL, /* REG_NOERROR */ - "No match", /* REG_NOMATCH */ - "Invalid regular expression", /* REG_BADPAT */ - "Invalid collation character", /* REG_ECOLLATE */ - "Invalid character class name", /* REG_ECTYPE */ - "Trailing backslash", /* REG_EESCAPE */ - "Invalid back reference", /* REG_ESUBREG */ - "Unmatched [ or [^", /* REG_EBRACK */ - "Unmatched ( or \\(", /* REG_EPAREN */ - "Unmatched \\{", /* REG_EBRACE */ - "Invalid content of \\{\\}", /* REG_BADBR */ - "Invalid range end", /* REG_ERANGE */ - "Memory exhausted", /* REG_ESPACE */ - "Invalid preceding regular expression", /* REG_BADRPT */ - "Premature end of regular expression", /* REG_EEND */ - "Regular expression too big", /* REG_ESIZE */ - "Unmatched ) or \\)", /* REG_ERPAREN */ + in regex.h. Obviously the order here has to be same as there. + POSIX doesn't require that we do anything for REG_NOERROR, + but why not be nice? */ + +#if 0 + /* This section is for xgettext; it sees the strings wrapped inside + N_() and marks them as needing translation. They should match + the strings in re_error_msgid. We can't use the usual string + concatenation trick to initialize re_error_msgid, since other GNU + distributions use this file with traditional C, and traditional C + lacks string concatenation. */ + N_("Success") /* REG_NOERROR */ + N_("No match") /* REG_NOMATCH */ + N_("Invalid regular expression") /* REG_BADPAT */ + N_("Invalid collation character") /* REG_ECOLLATE */ + N_("Invalid character class name") /* REG_ECTYPE */ + N_("Trailing backslash") /* REG_EESCAPE */ + N_("Invalid back reference") /* REG_ESUBREG */ + N_("Unmatched [ or [^") /* REG_EBRACK */ + N_("Unmatched ( or \\(") /* REG_EPAREN */ + N_("Unmatched \\{") /* REG_EBRACE */ + N_("Invalid content of \\{\\}") /* REG_BADBR */ + N_("Invalid range end") /* REG_ERANGE */ + N_("Memory exhausted") /* REG_ESPACE */ + N_("Invalid preceding regular expression") /* REG_BADRPT */ + N_("Premature end of regular expression") /* REG_EEND */ + N_("Regular expression too big") /* REG_ESIZE */ + N_("Unmatched ) or \\)") /* REG_ERPAREN */ +#endif + +static const char re_error_msgid[] = "\ +Success\0\ +No match\0\ +Invalid regular expression\0\ +Invalid collation character\0\ +Invalid character class name\0\ +Trailing backslash\0\ +Invalid back reference\0\ +Unmatched [ or [^\0\ +Unmatched ( or \\(\0\ +Unmatched \\{\0\ +Invalid content of \\{\\}\0\ +Invalid range end\0\ +Memory exhausted\0\ +Invalid preceding regular expression\0\ +Premature end of regular expression\0\ +Regular expression too big\0\ +Unmatched ) or \\)"; + +#define REG_NOERROR_IDX 0 +#define REG_NOMATCH_IDX (REG_NOERROR_IDX + sizeof "Success") +#define REG_BADPAT_IDX (REG_NOMATCH_IDX + sizeof "No match") +#define REG_ECOLLATE_IDX (REG_BADPAT_IDX + sizeof "Invalid regular expression") +#define REG_ECTYPE_IDX (REG_ECOLLATE_IDX + sizeof "Invalid collation character") +#define REG_EESCAPE_IDX (REG_ECTYPE_IDX + sizeof "Invalid character class name") +#define REG_ESUBREG_IDX (REG_EESCAPE_IDX + sizeof "Trailing backslash") +#define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference") +#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^") +#define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(") +#define REG_BADBR_IDX (REG_EBRACE_IDX + sizeof "Unmatched \\{") +#define REG_ERANGE_IDX (REG_BADBR_IDX + sizeof "Invalid content of \\{\\}") +#define REG_ESPACE_IDX (REG_ERANGE_IDX + sizeof "Invalid range end") +#define REG_BADRPT_IDX (REG_ESPACE_IDX + sizeof "Memory exhausted") +#define REG_EEND_IDX (REG_BADRPT_IDX + sizeof "Invalid preceding regular expression") +#define REG_ESIZE_IDX (REG_EEND_IDX + sizeof "Premature end of regular expression") +#define REG_ERPAREN_IDX (REG_ESIZE_IDX + sizeof "Regular expression too big") + +static const size_t re_error_msgid_idx[] = + { + REG_NOERROR_IDX, + REG_NOMATCH_IDX, + REG_BADPAT_IDX, + REG_ECOLLATE_IDX, + REG_ECTYPE_IDX, + REG_EESCAPE_IDX, + REG_ESUBREG_IDX, + REG_EBRACK_IDX, + REG_EPAREN_IDX, + REG_EBRACE_IDX, + REG_BADBR_IDX, + REG_ERANGE_IDX, + REG_ESPACE_IDX, + REG_BADRPT_IDX, + REG_EEND_IDX, + REG_ESIZE_IDX, + REG_ERPAREN_IDX }; -/* Subroutine declarations and macros for regex_compile. */ +/* Avoiding alloca during matching, to placate r_alloc. */ + +/* Define MATCH_MAY_ALLOCATE unless we need to make sure that the + searching and matching functions should not call alloca. On some + systems, alloca is implemented in terms of malloc, and if we're + using the relocating allocator routines, then malloc could cause a + relocation, which might (if the strings being searched are in the + ralloc heap) shift the data out from underneath the regexp + routines. + + Here's another reason to avoid allocation: Emacs + processes input from X in a signal handler; processing X input may + call malloc; if input arrives while a matching routine is calling + malloc, then we're scrod. But Emacs can't just block input while + calling matching routines; then we don't notice interrupts when + they come in. So, Emacs blocks input around all regexp calls + except the matching calls, which it leaves unprotected, in the + faith that they will not malloc. */ + +/* Normally, this is fine. */ +#define MATCH_MAY_ALLOCATE + +/* When using GNU C, we are not REALLY using the C alloca, no matter + what config.h may say. So don't take precautions for it. */ +#ifdef __GNUC__ +# undef C_ALLOCA +#endif -static void store_op1 (), store_op2 (); -static void insert_op1 (), insert_op2 (); -static boolean at_begline_loc_p (), at_endline_loc_p (); -static boolean group_in_compile_stack (); -static reg_errcode_t compile_range (); +/* The match routines may not allocate if (1) they would do it with malloc + and (2) it's not safe for them to use malloc. + Note that if REL_ALLOC is defined, matching would not use malloc for the + failure stack, but we would still use it for the register vectors; + so REL_ALLOC should not affect this. */ +#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs +# undef MATCH_MAY_ALLOCATE +#endif -/* Fetch the next character in the uncompiled pattern---translating it - if necessary. Also cast from a signed character in the constant - string passed to us by the user to an unsigned char that we can use - as an array index (in, e.g., `translate'). */ -#define PATFETCH(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - if (translate) c = translate[c]; \ - } while (0) + +/* Failure stack declarations and macros; both re_compile_fastmap and + re_match_2 use a failure stack. These have to be macros because of + REGEX_ALLOCATE_STACK. */ -/* Fetch the next character in the uncompiled pattern, with no - translation. */ -#define PATFETCH_RAW(c) \ - do {if (p == pend) return REG_EEND; \ - c = (unsigned char) *p++; \ - } while (0) -/* Go backwards one character in the pattern. */ -#define PATUNFETCH p-- +/* Number of failure points for which to initially allocate space + when matching. If this number is exceeded, we allocate more + space, so it is not a hard limit. */ +#ifndef INIT_FAILURE_ALLOC +# define INIT_FAILURE_ALLOC 5 +#endif +/* Roughly the maximum number of failure points on the stack. Would be + exactly that if always used MAX_FAILURE_ITEMS items each time we failed. + This is a variable only so users of regex can assign to it; we never + change it ourselves. */ -/* If `translate' is non-null, return translate[D], else just D. We - cast the subscript to translate because some data is declared as - `char *', to avoid warnings when a string constant is passed. But - when we use a character as a subscript we must make it unsigned. */ -#define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d)) +#ifdef INT_IS_16BIT +# if defined MATCH_MAY_ALLOCATE +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +long int re_max_failures = 4000; +# else +long int re_max_failures = 2000; +# endif -/* Macros for outputting the compiled pattern into `buffer'. */ +union fail_stack_elt +{ + unsigned char *pointer; + long int integer; +}; -/* If the buffer isn't allocated when it comes in, use this. */ -#define INIT_BUF_SIZE 32 +typedef union fail_stack_elt fail_stack_elt_t; -/* Make sure we have at least N more bytes of space in buffer. */ -#define GET_BUFFER_SPACE(n) \ - while (b - bufp->buffer + (n) > bufp->allocated) \ - EXTEND_BUFFER () +typedef struct +{ + fail_stack_elt_t *stack; + unsigned long int size; + unsigned long int avail; /* Offset of next open position. */ +} fail_stack_type; -/* Make sure we have one more byte of buffer space and then add C to it. */ -#define BUF_PUSH(c) \ - do { \ - GET_BUFFER_SPACE (1); \ - *b++ = (unsigned char) (c); \ - } while (0) +#else /* not INT_IS_16BIT */ +# if defined MATCH_MAY_ALLOCATE +/* 4400 was enough to cause a crash on Alpha OSF/1, + whose default stack limit is 2mb. */ +int re_max_failures = 20000; +# else +int re_max_failures = 2000; +# endif -/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ -#define BUF_PUSH_2(c1, c2) \ - do { \ - GET_BUFFER_SPACE (2); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - } while (0) +union fail_stack_elt +{ + unsigned char *pointer; + int integer; +}; +typedef union fail_stack_elt fail_stack_elt_t; -/* As with BUF_PUSH_2, except for three bytes. */ -#define BUF_PUSH_3(c1, c2, c3) \ +typedef struct +{ + fail_stack_elt_t *stack; + unsigned size; + unsigned avail; /* Offset of next open position. */ +} fail_stack_type; + +#endif /* INT_IS_16BIT */ + +#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) +#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) +#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) + + +/* Define macros to initialize and free the failure stack. + Do `return -2' if the alloc fails. */ + +#ifdef MATCH_MAY_ALLOCATE +# define INIT_FAIL_STACK() \ do { \ - GET_BUFFER_SPACE (3); \ - *b++ = (unsigned char) (c1); \ - *b++ = (unsigned char) (c2); \ - *b++ = (unsigned char) (c3); \ + fail_stack.stack = (fail_stack_elt_t *) \ + REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ + \ + if (fail_stack.stack == NULL) \ + return -2; \ + \ + fail_stack.size = INIT_FAILURE_ALLOC; \ + fail_stack.avail = 0; \ } while (0) +# define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack) +#else +# define INIT_FAIL_STACK() \ + do { \ + fail_stack.avail = 0; \ + } while (0) -/* Store a jump with opcode OP at LOC to location TO. We store a - relative address offset by the three bytes the jump itself occupies. */ -#define STORE_JUMP(op, loc, to) \ - store_op1 (op, loc, (to) - (loc) - 3) +# define RESET_FAIL_STACK() +#endif -/* Likewise, for a two-argument jump. */ -#define STORE_JUMP2(op, loc, to, arg) \ - store_op2 (op, loc, (to) - (loc) - 3, arg) -/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP(op, loc, to) \ - insert_op1 (op, loc, (to) - (loc) - 3, b) +/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. -/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ -#define INSERT_JUMP2(op, loc, to, arg) \ - insert_op2 (op, loc, (to) - (loc) - 3, arg, b) + Return 1 if succeeds, and 0 if either ran out of memory + allocating space for it or it was already too large. + REGEX_REALLOCATE_STACK requires `destination' be declared. */ -/* This is not an arbitrary limit: the arguments which represent offsets - into the pattern are two bytes long. So if 2^16 bytes turns out to - be too small, many things would have to change. */ -#define MAX_BUF_SIZE (1L << 16) +#define DOUBLE_FAIL_STACK(fail_stack) \ + ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \ + ? 0 \ + : ((fail_stack).stack = (fail_stack_elt_t *) \ + REGEX_REALLOCATE_STACK ((fail_stack).stack, \ + (fail_stack).size * sizeof (fail_stack_elt_t), \ + ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ + \ + (fail_stack).stack == NULL \ + ? 0 \ + : ((fail_stack).size <<= 1, \ + 1))) -/* Extend the buffer by twice its current size via realloc and - reset the pointers that pointed into the old block to point to the +/* Push pointer POINTER on FAIL_STACK. + Return 1 if was able to do so and 0 if ran out of memory allocating + space to do so. */ +#define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \ + ((FAIL_STACK_FULL () \ + && !DOUBLE_FAIL_STACK (FAIL_STACK)) \ + ? 0 \ + : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \ + 1)) + +/* Push a pointer value onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_POINTER(item) \ + fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item) + +/* This pushes an integer-valued item onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_INT(item) \ + fail_stack.stack[fail_stack.avail++].integer = (item) + +/* Push a fail_stack_elt_t value onto the failure stack. + Assumes the variable `fail_stack'. Probably should only + be called from within `PUSH_FAILURE_POINT'. */ +#define PUSH_FAILURE_ELT(item) \ + fail_stack.stack[fail_stack.avail++] = (item) + +/* These three POP... operations complement the three PUSH... operations. + All assume that `fail_stack' is nonempty. */ +#define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer +#define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer +#define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail] + +/* Used to omit pushing failure point id's when we're not debugging. */ +#ifdef DEBUG +# define DEBUG_PUSH PUSH_FAILURE_INT +# define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT () +#else +# define DEBUG_PUSH(item) +# define DEBUG_POP(item_addr) +#endif + + +/* Push the information about the state we will need + if we ever fail back to it. + + Requires variables fail_stack, regstart, regend, reg_info, and + num_regs_pushed be declared. DOUBLE_FAIL_STACK requires `destination' + be declared. + + Does `return FAILURE_CODE' if runs out of memory. */ + +#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ + do { \ + char *destination; \ + /* Must be int, so when we don't save any registers, the arithmetic \ + of 0 + -1 isn't done as unsigned. */ \ + /* Can't be int, since there is not a shred of a guarantee that int \ + is wide enough to hold a value of something to which pointer can \ + be assigned */ \ + active_reg_t this_reg; \ + \ + DEBUG_STATEMENT (failure_id++); \ + DEBUG_STATEMENT (nfailure_points_pushed++); \ + DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ + DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ + DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ + \ + DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \ + DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ + \ + /* Ensure we have enough space allocated for what we will push. */ \ + while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ + { \ + if (!DOUBLE_FAIL_STACK (fail_stack)) \ + return failure_code; \ + \ + DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ + (fail_stack).size); \ + DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ + } \ + \ + /* Push the info, starting with the registers. */ \ + DEBUG_PRINT1 ("\n"); \ + \ + if (1) \ + for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ + this_reg++) \ + { \ + DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \ + DEBUG_STATEMENT (num_regs_pushed++); \ + \ + DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ + PUSH_FAILURE_POINTER (regstart[this_reg]); \ + \ + DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ + PUSH_FAILURE_POINTER (regend[this_reg]); \ + \ + DEBUG_PRINT2 (" info: %p\n ", \ + reg_info[this_reg].word.pointer); \ + DEBUG_PRINT2 (" match_null=%d", \ + REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ + DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ + DEBUG_PRINT2 (" matched_something=%d", \ + MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT2 (" ever_matched=%d", \ + EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ + DEBUG_PRINT1 ("\n"); \ + PUSH_FAILURE_ELT (reg_info[this_reg].word); \ + } \ + \ + DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\ + PUSH_FAILURE_INT (lowest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\ + PUSH_FAILURE_INT (highest_active_reg); \ + \ + DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ + PUSH_FAILURE_POINTER (pattern_place); \ + \ + DEBUG_PRINT2 (" Pushing string %p: `", string_place); \ + DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ + size2); \ + DEBUG_PRINT1 ("'\n"); \ + PUSH_FAILURE_POINTER (string_place); \ + \ + DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ + DEBUG_PUSH (failure_id); \ + } while (0) + +/* This is the number of items that are pushed and popped on the stack + for each register. */ +#define NUM_REG_ITEMS 3 + +/* Individual items aside from the registers. */ +#ifdef DEBUG +# define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ +#else +# define NUM_NONREG_ITEMS 4 +#endif + +/* We push at most this many items on the stack. */ +/* We used to use (num_regs - 1), which is the number of registers + this regexp will save; but that was changed to 5 + to avoid stack overflow for a regexp with lots of parens. */ +#define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + +/* We actually push this many items. */ +#define NUM_FAILURE_ITEMS \ + (((0 \ + ? 0 : highest_active_reg - lowest_active_reg + 1) \ + * NUM_REG_ITEMS) \ + + NUM_NONREG_ITEMS) + +/* How many items can still be added to the stack without overflowing it. */ +#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + + +/* Pops what PUSH_FAIL_STACK pushes. + + We restore into the parameters, all of which should be lvalues: + STR -- the saved data position. + PAT -- the saved pattern position. + LOW_REG, HIGH_REG -- the highest and lowest active registers. + REGSTART, REGEND -- arrays of string positions. + REG_INFO -- array of information about each subexpression. + + Also assumes the variables `fail_stack' and (if debugging), `bufp', + `pend', `string1', `size1', `string2', and `size2'. */ + +#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ +{ \ + DEBUG_STATEMENT (unsigned failure_id;) \ + active_reg_t this_reg; \ + const unsigned char *string_temp; \ + \ + assert (!FAIL_STACK_EMPTY ()); \ + \ + /* Remove failure points and point to how many regs pushed. */ \ + DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ + DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ + DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ + \ + assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ + \ + DEBUG_POP (&failure_id); \ + DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ + \ + /* If the saved string location is NULL, it came from an \ + on_failure_keep_string_jump opcode, and we want to throw away the \ + saved NULL, thus retaining our current position in the string. */ \ + string_temp = POP_FAILURE_POINTER (); \ + if (string_temp != NULL) \ + str = (const char *) string_temp; \ + \ + DEBUG_PRINT2 (" Popping string %p: `", str); \ + DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ + DEBUG_PRINT1 ("'\n"); \ + \ + pat = (unsigned char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \ + DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ + \ + /* Restore register info. */ \ + high_reg = (active_reg_t) POP_FAILURE_INT (); \ + DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \ + \ + low_reg = (active_reg_t) POP_FAILURE_INT (); \ + DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \ + \ + if (1) \ + for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ + { \ + DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \ + \ + reg_info[this_reg].word = POP_FAILURE_ELT (); \ + DEBUG_PRINT2 (" info: %p\n", \ + reg_info[this_reg].word.pointer); \ + \ + regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \ + \ + regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \ + DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \ + } \ + else \ + { \ + for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \ + { \ + reg_info[this_reg].word.integer = 0; \ + regend[this_reg] = 0; \ + regstart[this_reg] = 0; \ + } \ + highest_active_reg = high_reg; \ + } \ + \ + set_regs_matched_done = 0; \ + DEBUG_STATEMENT (nfailure_points_popped++); \ +} /* POP_FAILURE_POINT */ + + + +/* Structure for per-register (a.k.a. per-group) information. + Other register information, such as the + starting and ending positions (which are addresses), and the list of + inner groups (which is a bits list) are maintained in separate + variables. + + We are making a (strictly speaking) nonportable assumption here: that + the compiler will pack our bit fields into something that fits into + the type of `word', i.e., is something that fits into one item on the + failure stack. */ + + +/* Declarations and macros for re_match_2. */ + +typedef union +{ + fail_stack_elt_t word; + struct + { + /* This field is one if this group can match the empty string, + zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ +#define MATCH_NULL_UNSET_VALUE 3 + unsigned match_null_string_p : 2; + unsigned is_active : 1; + unsigned matched_something : 1; + unsigned ever_matched_something : 1; + } bits; +} register_info_type; + +#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) +#define IS_ACTIVE(R) ((R).bits.is_active) +#define MATCHED_SOMETHING(R) ((R).bits.matched_something) +#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) + + +/* Call this when have matched a real character; it sets `matched' flags + for the subexpressions which we are currently inside. Also records + that those subexprs have matched. */ +#define SET_REGS_MATCHED() \ + do \ + { \ + if (!set_regs_matched_done) \ + { \ + active_reg_t r; \ + set_regs_matched_done = 1; \ + for (r = lowest_active_reg; r <= highest_active_reg; r++) \ + { \ + MATCHED_SOMETHING (reg_info[r]) \ + = EVER_MATCHED_SOMETHING (reg_info[r]) \ + = 1; \ + } \ + } \ + } \ + while (0) + +/* Registers are set to a sentinel when they haven't yet matched. */ +static char reg_unset_dummy; +#define REG_UNSET_VALUE (®_unset_dummy) +#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) + +/* Subroutine declarations and macros for regex_compile. */ + +static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size, + reg_syntax_t syntax, + struct re_pattern_buffer *bufp)); +static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg)); +static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg1, int arg2)); +static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg, unsigned char *end)); +static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc, + int arg1, int arg2, unsigned char *end)); +static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p, + reg_syntax_t syntax)); +static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend, + reg_syntax_t syntax)); +static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr, + const char *pend, + char *translate, + reg_syntax_t syntax, + unsigned char *b)); + +/* Fetch the next character in the uncompiled pattern---translating it + if necessary. Also cast from a signed character in the constant + string passed to us by the user to an unsigned char that we can use + as an array index (in, e.g., `translate'). */ +#ifndef PATFETCH +# define PATFETCH(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + if (translate) c = (unsigned char) translate[c]; \ + } while (0) +#endif + +/* Fetch the next character in the uncompiled pattern, with no + translation. */ +#define PATFETCH_RAW(c) \ + do {if (p == pend) return REG_EEND; \ + c = (unsigned char) *p++; \ + } while (0) + +/* Go backwards one character in the pattern. */ +#define PATUNFETCH p-- + + +/* If `translate' is non-null, return translate[D], else just D. We + cast the subscript to translate because some data is declared as + `char *', to avoid warnings when a string constant is passed. But + when we use a character as a subscript we must make it unsigned. */ +#ifndef TRANSLATE +# define TRANSLATE(d) \ + (translate ? (char) translate[(unsigned char) (d)] : (d)) +#endif + + +/* Macros for outputting the compiled pattern into `buffer'. */ + +/* If the buffer isn't allocated when it comes in, use this. */ +#define INIT_BUF_SIZE 32 + +/* Make sure we have at least N more bytes of space in buffer. */ +#define GET_BUFFER_SPACE(n) \ + while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \ + EXTEND_BUFFER () + +/* Make sure we have one more byte of buffer space and then add C to it. */ +#define BUF_PUSH(c) \ + do { \ + GET_BUFFER_SPACE (1); \ + *b++ = (unsigned char) (c); \ + } while (0) + + +/* Ensure we have two more bytes of buffer space and then append C1 and C2. */ +#define BUF_PUSH_2(c1, c2) \ + do { \ + GET_BUFFER_SPACE (2); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + } while (0) + + +/* As with BUF_PUSH_2, except for three bytes. */ +#define BUF_PUSH_3(c1, c2, c3) \ + do { \ + GET_BUFFER_SPACE (3); \ + *b++ = (unsigned char) (c1); \ + *b++ = (unsigned char) (c2); \ + *b++ = (unsigned char) (c3); \ + } while (0) + + +/* Store a jump with opcode OP at LOC to location TO. We store a + relative address offset by the three bytes the jump itself occupies. */ +#define STORE_JUMP(op, loc, to) \ + store_op1 (op, loc, (int) ((to) - (loc) - 3)) + +/* Likewise, for a two-argument jump. */ +#define STORE_JUMP2(op, loc, to, arg) \ + store_op2 (op, loc, (int) ((to) - (loc) - 3), arg) + +/* Like `STORE_JUMP', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP(op, loc, to) \ + insert_op1 (op, loc, (int) ((to) - (loc) - 3), b) + +/* Like `STORE_JUMP2', but for inserting. Assume `b' is the buffer end. */ +#define INSERT_JUMP2(op, loc, to, arg) \ + insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b) + + +/* This is not an arbitrary limit: the arguments which represent offsets + into the pattern are two bytes long. So if 2^16 bytes turns out to + be too small, many things would have to change. */ +/* Any other compiler which, like MSC, has allocation limit below 2^16 + bytes will have to use approach similar to what was done below for + MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up + reallocating to 0 bytes. Such thing is not going to work too well. + You have been warned!! */ +#if defined _MSC_VER && !defined WIN32 +/* Microsoft C 16-bit versions limit malloc to approx 65512 bytes. + The REALLOC define eliminates a flurry of conversion warnings, + but is not required. */ +# define MAX_BUF_SIZE 65500L +# define REALLOC(p,s) realloc ((p), (size_t) (s)) +#else +# define MAX_BUF_SIZE (1L << 16) +# define REALLOC(p,s) realloc ((p), (s)) +#endif + +/* Extend the buffer by twice its current size via realloc and + reset the pointers that pointed into the old block to point to the correct places in the new one. If extending the buffer results in it being larger than MAX_BUF_SIZE, then flag memory exhausted. */ #define EXTEND_BUFFER() \ @@ -984,7 +1698,7 @@ static reg_errcode_t compile_range (); bufp->allocated <<= 1; \ if (bufp->allocated > MAX_BUF_SIZE) \ bufp->allocated = MAX_BUF_SIZE; \ - bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\ + bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\ if (bufp->buffer == NULL) \ return REG_ESPACE; \ /* If the buffer moved, move all the pointers into it. */ \ @@ -1016,14 +1730,15 @@ typedef unsigned regnum_t; /* Since offsets can go either forwards or backwards, this type needs to be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1. */ -typedef int pattern_offset_t; +/* int may be not enough when sizeof(int) == 2. */ +typedef long pattern_offset_t; typedef struct { pattern_offset_t begalt_offset; pattern_offset_t fixup_alt_jump; pattern_offset_t inner_group_offset; - pattern_offset_t laststart_offset; + pattern_offset_t laststart_offset; regnum_t regnum; } compile_stack_elt_t; @@ -1056,7 +1771,7 @@ typedef struct { if (p != pend) \ { \ PATFETCH (c); \ - while (ISDIGIT (c)) \ + while ('0' <= c && c <= '9') \ { \ if (num < 0) \ num = 0; \ @@ -1066,18 +1781,88 @@ typedef struct PATFETCH (c); \ } \ } \ - } + } -#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ +#if defined _LIBC || WIDE_CHAR_SUPPORT +/* The GNU C library provides support for user-defined character classes + and the functions from ISO C amendement 1. */ +# ifdef CHARCLASS_NAME_MAX +# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX +# else +/* This shouldn't happen but some implementation might still have this + problem. Use a reasonable default value. */ +# define CHAR_CLASS_MAX_LENGTH 256 +# endif + +# ifdef _LIBC +# define IS_CHAR_CLASS(string) __wctype (string) +# else +# define IS_CHAR_CLASS(string) wctype (string) +# endif +#else +# define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */ -#define IS_CHAR_CLASS(string) \ +# define IS_CHAR_CLASS(string) \ (STREQ (string, "alpha") || STREQ (string, "upper") \ || STREQ (string, "lower") || STREQ (string, "digit") \ || STREQ (string, "alnum") || STREQ (string, "xdigit") \ || STREQ (string, "space") || STREQ (string, "print") \ || STREQ (string, "punct") || STREQ (string, "graph") \ || STREQ (string, "cntrl") || STREQ (string, "blank")) +#endif +#ifndef MATCH_MAY_ALLOCATE + +/* If we cannot allocate large objects within re_match_2_internal, + we make the fail stack and register vectors global. + The fail stack, we grow to the maximum size when a regexp + is compiled. + The register vectors, we adjust in size each time we + compile a regexp, according to the number of registers it needs. */ + +static fail_stack_type fail_stack; + +/* Size with which the following vectors are currently allocated. + That is so we can make them bigger as needed, + but never make them smaller. */ +static int regs_allocated_size; + +static const char ** regstart, ** regend; +static const char ** old_regstart, ** old_regend; +static const char **best_regstart, **best_regend; +static register_info_type *reg_info; +static const char **reg_dummy; +static register_info_type *reg_info_dummy; + +/* Make the register vectors big enough for NUM_REGS registers, + but don't make them smaller. */ + +static +regex_grow_registers (num_regs) + int num_regs; +{ + if (num_regs > regs_allocated_size) + { + RETALLOC_IF (regstart, num_regs, const char *); + RETALLOC_IF (regend, num_regs, const char *); + RETALLOC_IF (old_regstart, num_regs, const char *); + RETALLOC_IF (old_regend, num_regs, const char *); + RETALLOC_IF (best_regstart, num_regs, const char *); + RETALLOC_IF (best_regend, num_regs, const char *); + RETALLOC_IF (reg_info, num_regs, register_info_type); + RETALLOC_IF (reg_dummy, num_regs, const char *); + RETALLOC_IF (reg_info_dummy, num_regs, register_info_type); + + regs_allocated_size = num_regs; + } +} + +#endif /* not MATCH_MAY_ALLOCATE */ + +static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type + compile_stack, + regnum_t regnum)); + /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX. Returns one of error codes defined in `regex.h', or zero for success. @@ -1092,14 +1877,18 @@ typedef struct `fastmap_accurate' is zero; `re_nsub' is the number of subexpressions in PATTERN; `not_bol' and `not_eol' are zero; - + The `fastmap' and `newline_anchor' fields are neither examined nor set. */ +/* Return, freeing storage we allocated. */ +#define FREE_STACK_RETURN(value) \ + return (free (compile_stack.stack), value) + static reg_errcode_t regex_compile (pattern, size, syntax, bufp) const char *pattern; - int size; + size_t size; reg_syntax_t syntax; struct re_pattern_buffer *bufp; { @@ -1107,22 +1896,22 @@ regex_compile (pattern, size, syntax, bufp) `char *' (i.e., signed), we declare these variables as unsigned, so they can be reliably used as array indices. */ register unsigned char c, c1; - - /* A random tempory spot in PATTERN. */ + + /* A random temporary spot in PATTERN. */ const char *p1; /* Points to the end of the buffer, where we should append. */ register unsigned char *b; - + /* Keeps track of unclosed groups. */ compile_stack_type compile_stack; /* Points to the current (ending) position in the pattern. */ const char *p = pattern; const char *pend = pattern + size; - + /* How to translate the characters in the pattern. */ - char *translate = bufp->translate; + RE_TRANSLATE_TYPE translate = bufp->translate; /* Address of the count-byte of the most recently inserted `exactn' command. This makes it possible to tell if a new exact-match @@ -1141,7 +1930,7 @@ regex_compile (pattern, size, syntax, bufp) /* Place in the uncompiled pattern (i.e., the {) to which to go back if the interval is invalid. */ const char *beg_interval; - + /* Address of the place where a forward jump should go to the end of the containing expression. Each alternative of an `or' -- except the last -- ends with a forward jump of this sort. */ @@ -1157,9 +1946,9 @@ regex_compile (pattern, size, syntax, bufp) if (debug) { unsigned debug_count; - + for (debug_count = 0; debug_count < size; debug_count++) - printchar (pattern[debug_count]); + putchar (pattern[debug_count]); putchar ('\n'); } #endif /* DEBUG */ @@ -1181,11 +1970,11 @@ regex_compile (pattern, size, syntax, bufp) printer (for debugging) will think there's no pattern. We reset it at the end. */ bufp->used = 0; - + /* Always count groups, whether or not bufp->no_sub is set. */ - bufp->re_nsub = 0; + bufp->re_nsub = 0; -#if !defined (emacs) && !defined (SYNTAX_TABLE) +#if !defined emacs && !defined SYNTAX_TABLE /* Initialize the syntax table. */ init_syntax_once (); #endif @@ -1202,7 +1991,7 @@ regex_compile (pattern, size, syntax, bufp) { /* Caller did not allocate a buffer. Do it for them. */ bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char); } - if (!bufp->buffer) return REG_ESPACE; + if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE); bufp->allocated = INIT_BUF_SIZE; } @@ -1234,7 +2023,7 @@ regex_compile (pattern, size, syntax, bufp) case '$': { if ( /* If at end of pattern, it's an operator. */ - p == pend + p == pend /* If context independent, it's an operator. */ || syntax & RE_CONTEXT_INDEP_ANCHORS /* Otherwise, depends on what's next. */ @@ -1257,7 +2046,7 @@ regex_compile (pattern, size, syntax, bufp) if (!laststart) { if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; + FREE_STACK_RETURN (REG_BADRPT); else if (!(syntax & RE_CONTEXT_INDEP_OPS)) goto normal_char; } @@ -1265,7 +2054,7 @@ regex_compile (pattern, size, syntax, bufp) { /* Are we optimizing this jump? */ boolean keep_string_p = false; - + /* 1 means zero (many) matches is allowed. */ char zero_times_ok = 0, many_times_ok = 0; @@ -1290,7 +2079,7 @@ regex_compile (pattern, size, syntax, bufp) else if (syntax & RE_BK_PLUS_QM && c == '\\') { - if (p == pend) return REG_EESCAPE; + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); PATFETCH (c1); if (!(c1 == '+' || c1 == '?')) @@ -1313,7 +2102,7 @@ regex_compile (pattern, size, syntax, bufp) /* Star, etc. applied to an empty pattern is equivalent to an empty pattern. */ - if (!laststart) + if (!laststart) break; /* Now we know whether or not zero matches is allowed @@ -1322,7 +2111,7 @@ regex_compile (pattern, size, syntax, bufp) { /* More than one repetition is allowed, so put in at the end a backward relative jump from `b' to before the next jump we're going to put in below (which jumps from - laststart to after this jump). + laststart to after this jump). But if we are at the `*' in the exact sequence `.*\n', insert an unconditional jump backwards to the ., @@ -1389,7 +2178,7 @@ regex_compile (pattern, size, syntax, bufp) { boolean had_char_class = false; - if (p == pend) return REG_EBRACK; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); /* Ensure that we have enough space to push a charset: the opcode, the length count, and the bitset; 34 bytes in all. */ @@ -1399,7 +2188,7 @@ regex_compile (pattern, size, syntax, bufp) /* We test `*p == '^' twice, instead of using an if statement, so we only need one BUF_PUSH. */ - BUF_PUSH (*p == '^' ? charset_not : charset); + BUF_PUSH (*p == '^' ? charset_not : charset); if (*p == '^') p++; @@ -1420,14 +2209,14 @@ regex_compile (pattern, size, syntax, bufp) /* Read in characters and ranges, setting map bits. */ for (;;) { - if (p == pend) return REG_EBRACK; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); PATFETCH (c); /* \ might escape characters inside [...] and [^...]. */ if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') { - if (p == pend) return REG_EESCAPE; + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); PATFETCH (c1); SET_LIST_BIT (c1); @@ -1443,20 +2232,20 @@ regex_compile (pattern, size, syntax, bufp) /* Look ahead to see if it's a range when the last thing was a character class. */ if (had_char_class && c == '-' && *p != ']') - return REG_ERANGE; + FREE_STACK_RETURN (REG_ERANGE); /* Look ahead to see if it's a range when the last thing was a character: if this is a hyphen not at the beginning or the end of a list, then it's the range operator. */ - if (c == '-' - && !(p - 2 >= pattern && p[-2] == '[') + if (c == '-' + && !(p - 2 >= pattern && p[-2] == '[') && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^') && *p != ']') { reg_errcode_t ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); } else if (p[0] == '-' && p[1] != ']') @@ -1465,9 +2254,9 @@ regex_compile (pattern, size, syntax, bufp) /* Move past the `-'. */ PATFETCH (c1); - + ret = compile_range (&p, pend, translate, syntax, b); - if (ret != REG_NOERROR) return ret; + if (ret != REG_NOERROR) FREE_STACK_RETURN (ret); } /* See if we're at the beginning of a possible character @@ -1481,23 +2270,59 @@ regex_compile (pattern, size, syntax, bufp) c1 = 0; /* If pattern is `[[:'. */ - if (p == pend) return REG_EBRACK; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); for (;;) { PATFETCH (c); - if (c == ':' || c == ']' || p == pend - || c1 == CHAR_CLASS_MAX_LENGTH) + if ((c == ':' && *p == ']') || p == pend) break; - str[c1++] = c; + if (c1 < CHAR_CLASS_MAX_LENGTH) + str[c1++] = c; + else + /* This is in any case an invalid class name. */ + str[0] = '\0'; } str[c1] = '\0'; - /* If isn't a word bracketed by `[:' and:`]': - undo the ending character, the letters, and leave + /* If isn't a word bracketed by `[:' and `:]': + undo the ending character, the letters, and leave the leading `:' and `[' (but set bits for them). */ if (c == ':' && *p == ']') { +#if defined _LIBC || WIDE_CHAR_SUPPORT + boolean is_lower = STREQ (str, "lower"); + boolean is_upper = STREQ (str, "upper"); + wctype_t wt; + int ch; + + wt = IS_CHAR_CLASS (str); + if (wt == 0) + FREE_STACK_RETURN (REG_ECTYPE); + + /* Throw away the ] at the end of the character + class. */ + PATFETCH (c); + + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); + + for (ch = 0; ch < 1 << BYTEWIDTH; ++ch) + { +# ifdef _LIBC + if (__iswctype (__btowc (ch), wt)) + SET_LIST_BIT (ch); +# else + if (iswctype (btowc (ch), wt)) + SET_LIST_BIT (ch); +# endif + + if (translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); + } + + had_char_class = true; +#else int ch; boolean is_alnum = STREQ (str, "alnum"); boolean is_alpha = STREQ (str, "alpha"); @@ -1511,37 +2336,46 @@ regex_compile (pattern, size, syntax, bufp) boolean is_space = STREQ (str, "space"); boolean is_upper = STREQ (str, "upper"); boolean is_xdigit = STREQ (str, "xdigit"); - - if (!IS_CHAR_CLASS (str)) return REG_ECTYPE; + + if (!IS_CHAR_CLASS (str)) + FREE_STACK_RETURN (REG_ECTYPE); /* Throw away the ] at the end of the character class. */ - PATFETCH (c); + PATFETCH (c); - if (p == pend) return REG_EBRACK; + if (p == pend) FREE_STACK_RETURN (REG_EBRACK); for (ch = 0; ch < 1 << BYTEWIDTH; ch++) { + /* This was split into 3 if's to + avoid an arbitrary limit in some compiler. */ if ( (is_alnum && ISALNUM (ch)) || (is_alpha && ISALPHA (ch)) || (is_blank && ISBLANK (ch)) - || (is_cntrl && ISCNTRL (ch)) - || (is_digit && ISDIGIT (ch)) + || (is_cntrl && ISCNTRL (ch))) + SET_LIST_BIT (ch); + if ( (is_digit && ISDIGIT (ch)) || (is_graph && ISGRAPH (ch)) || (is_lower && ISLOWER (ch)) - || (is_print && ISPRINT (ch)) - || (is_punct && ISPUNCT (ch)) + || (is_print && ISPRINT (ch))) + SET_LIST_BIT (ch); + if ( (is_punct && ISPUNCT (ch)) || (is_space && ISSPACE (ch)) || (is_upper && ISUPPER (ch)) || (is_xdigit && ISXDIGIT (ch))) - SET_LIST_BIT (ch); + SET_LIST_BIT (ch); + if ( translate && (is_upper || is_lower) + && (ISUPPER (ch) || ISLOWER (ch))) + SET_LIST_BIT (ch); } had_char_class = true; +#endif /* libc || wctype.h */ } else { c1++; - while (c1--) + while (c1--) PATUNFETCH; SET_LIST_BIT ('['); SET_LIST_BIT (':'); @@ -1557,8 +2391,8 @@ regex_compile (pattern, size, syntax, bufp) /* Discard any (non)matching list bytes that are all 0 at the end of the map. Decrease the map-length byte too. */ - while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) - b[-1]--; + while ((int) b[-1] > 0 && b[b[-1] - 1] == 0) + b[-1]--; b += b[-1]; } break; @@ -1600,7 +2434,7 @@ regex_compile (pattern, size, syntax, bufp) case '\\': - if (p == pend) return REG_EESCAPE; + if (p == pend) FREE_STACK_RETURN (REG_EESCAPE); /* Do not translate the character after the \, so that we can distinguish, e.g., \B from \b, even if we normally would @@ -1618,7 +2452,7 @@ regex_compile (pattern, size, syntax, bufp) regnum++; if (COMPILE_STACK_FULL) - { + { RETALLOC (compile_stack.stack, compile_stack.size << 1, compile_stack_elt_t); if (compile_stack.stack == NULL) return REG_ESPACE; @@ -1631,7 +2465,7 @@ regex_compile (pattern, size, syntax, bufp) whole pattern moves because of realloc, they will still be valid. */ COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer; - COMPILE_STACK_TOP.fixup_alt_jump + COMPILE_STACK_TOP.fixup_alt_jump = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0; COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer; COMPILE_STACK_TOP.regnum = regnum; @@ -1645,7 +2479,7 @@ regex_compile (pattern, size, syntax, bufp) COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2; BUF_PUSH_3 (start_memory, regnum, 0); } - + compile_stack.avail++; fixup_alt_jump = 0; @@ -1662,10 +2496,12 @@ regex_compile (pattern, size, syntax, bufp) if (syntax & RE_NO_BK_PARENS) goto normal_backslash; if (COMPILE_STACK_EMPTY) - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_backslash; - else - return REG_ERPAREN; + { + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_backslash; + else + FREE_STACK_RETURN (REG_ERPAREN); + } handle_close: if (fixup_alt_jump) @@ -1674,7 +2510,7 @@ regex_compile (pattern, size, syntax, bufp) `pop_failure_jump' to pop. See comments at `push_dummy_failure' in `re_match_2'. */ BUF_PUSH (push_dummy_failure); - + /* We allocated space for this jump when we assigned to `fixup_alt_jump', in the `handle_alt' case below. */ STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1); @@ -1682,10 +2518,12 @@ regex_compile (pattern, size, syntax, bufp) /* See similar code for backslashed left paren above. */ if (COMPILE_STACK_EMPTY) - if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) - goto normal_char; - else - return REG_ERPAREN; + { + if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD) + goto normal_char; + else + FREE_STACK_RETURN (REG_ERPAREN); + } /* Since we just checked for an empty stack above, this ``can't happen''. */ @@ -1696,11 +2534,11 @@ regex_compile (pattern, size, syntax, bufp) as in `(ab)c(de)' -- the second group is #2. */ regnum_t this_group_regnum; - compile_stack.avail--; + compile_stack.avail--; begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset; fixup_alt_jump = COMPILE_STACK_TOP.fixup_alt_jump - ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 + ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1 : 0; laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset; this_group_regnum = COMPILE_STACK_TOP.regnum; @@ -1715,7 +2553,7 @@ regex_compile (pattern, size, syntax, bufp) { unsigned char *inner_group_loc = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset; - + *inner_group_loc = regnum - this_group_regnum; BUF_PUSH_3 (stop_memory, this_group_regnum, regnum - this_group_regnum); @@ -1744,10 +2582,10 @@ regex_compile (pattern, size, syntax, bufp) jump (put in below, which in turn will jump to the next (if any) alternative's such jump, etc.). The last such jump jumps to the correct final destination. A picture: - _____ _____ - | | | | - | v | v - a | b | c + _____ _____ + | | | | + | v | v + a | b | c If we are at `b', then fixup_alt_jump right now points to a three-byte space after `a'. We'll put in the jump, set @@ -1769,10 +2607,10 @@ regex_compile (pattern, size, syntax, bufp) break; - case '{': + case '{': /* If \{ is a literal. */ if (!(syntax & RE_INTERVALS) - /* If we're at `\{' and it's not the open-interval + /* If we're at `\{' and it's not the open-interval operator. */ || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES)) || (p - 2 == pattern && p == pend)) @@ -1792,7 +2630,7 @@ regex_compile (pattern, size, syntax, bufp) if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; else - return REG_EBRACE; + FREE_STACK_RETURN (REG_EBRACE); } GET_UNSIGNED_NUMBER (lower_bound); @@ -1811,13 +2649,13 @@ regex_compile (pattern, size, syntax, bufp) { if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; - else - return REG_BADBR; + else + FREE_STACK_RETURN (REG_BADBR); } - if (!(syntax & RE_NO_BK_BRACES)) + if (!(syntax & RE_NO_BK_BRACES)) { - if (c != '\\') return REG_EBRACE; + if (c != '\\') FREE_STACK_RETURN (REG_EBRACE); PATFETCH (c); } @@ -1826,8 +2664,8 @@ regex_compile (pattern, size, syntax, bufp) { if (syntax & RE_NO_BK_BRACES) goto unfetch_interval; - else - return REG_BADBR; + else + FREE_STACK_RETURN (REG_BADBR); } /* We just parsed a valid interval. */ @@ -1836,7 +2674,7 @@ regex_compile (pattern, size, syntax, bufp) if (!laststart) { if (syntax & RE_CONTEXT_INVALID_OPS) - return REG_BADRPT; + FREE_STACK_RETURN (REG_BADRPT); else if (syntax & RE_CONTEXT_INDEP_OPS) laststart = b; else @@ -1857,12 +2695,12 @@ regex_compile (pattern, size, syntax, bufp) we're all done, the pattern will look like: set_number_at set_number_at - succeed_n + succeed_n jump_n (The upper bound and `jump_n' are omitted if `upper_bound' is 1, though.) */ - else + else { /* If the upper bound is > 1, we need to insert more at the end of the loop. */ unsigned nbytes = 10 + (upper_bound > 1) * 10; @@ -1879,7 +2717,7 @@ regex_compile (pattern, size, syntax, bufp) lower_bound); b += 5; - /* Code to initialize the lower bound. Insert + /* Code to initialize the lower bound. Insert before the `succeed_n'. The `5' is the last two bytes of this `set_number_at', plus 3 bytes of the following `succeed_n'. */ @@ -1890,7 +2728,7 @@ regex_compile (pattern, size, syntax, bufp) { /* More than one repetition is allowed, so append a backward jump to the `succeed_n' that starts this interval. - + When we've reached this during matching, we'll have matched the interval once, so jump back only `upper_bound - 1' times. */ @@ -1908,7 +2746,7 @@ regex_compile (pattern, size, syntax, bufp) so everything is getting moved up by 5. Conclusion: (b - 2) - (laststart + 3) + 5, i.e., b - laststart. - + We insert this at the beginning of the loop so that if we fail during matching, we'll reinitialize the bounds. */ @@ -1929,7 +2767,7 @@ regex_compile (pattern, size, syntax, bufp) beg_interval = NULL; /* normal_char and normal_backslash need `c'. */ - PATFETCH (c); + PATFETCH (c); if (!(syntax & RE_NO_BK_BRACES)) { @@ -1945,7 +2783,7 @@ regex_compile (pattern, size, syntax, bufp) BUF_PUSH (at_dot); break; - case 's': + case 's': laststart = b; PATFETCH (c); BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]); @@ -1960,38 +2798,54 @@ regex_compile (pattern, size, syntax, bufp) case 'w': + if (syntax & RE_NO_GNU_OPS) + goto normal_char; laststart = b; BUF_PUSH (wordchar); break; case 'W': + if (syntax & RE_NO_GNU_OPS) + goto normal_char; laststart = b; BUF_PUSH (notwordchar); break; case '<': + if (syntax & RE_NO_GNU_OPS) + goto normal_char; BUF_PUSH (wordbeg); break; case '>': + if (syntax & RE_NO_GNU_OPS) + goto normal_char; BUF_PUSH (wordend); break; case 'b': + if (syntax & RE_NO_GNU_OPS) + goto normal_char; BUF_PUSH (wordbound); break; case 'B': + if (syntax & RE_NO_GNU_OPS) + goto normal_char; BUF_PUSH (notwordbound); break; case '`': + if (syntax & RE_NO_GNU_OPS) + goto normal_char; BUF_PUSH (begbuf); break; case '\'': + if (syntax & RE_NO_GNU_OPS) + goto normal_char; BUF_PUSH (endbuf); break; @@ -2003,10 +2857,10 @@ regex_compile (pattern, size, syntax, bufp) c1 = c - '0'; if (c1 > regnum) - return REG_ESUBREG; + FREE_STACK_RETURN (REG_ESUBREG); /* Can't back reference to a subexpression if inside of it. */ - if (group_in_compile_stack (compile_stack, c1)) + if (group_in_compile_stack (compile_stack, (regnum_t) c1)) goto normal_char; laststart = b; @@ -2036,11 +2890,11 @@ regex_compile (pattern, size, syntax, bufp) /* Expects the character in `c'. */ normal_char: /* If no exactn currently being built. */ - if (!pending_exact + if (!pending_exact /* If last exactn not at current position. */ || pending_exact + *pending_exact + 1 != b - + /* We have only one byte following the exactn for the count. */ || *pending_exact == (1 << BYTEWIDTH) - 1 @@ -2055,27 +2909,32 @@ regex_compile (pattern, size, syntax, bufp) : (p[0] == '\\' && p[1] == '{')))) { /* Start building a new exactn. */ - + laststart = b; BUF_PUSH_2 (exactn, 0); pending_exact = b - 1; } - + BUF_PUSH (c); (*pending_exact)++; break; } /* switch (c) */ } /* while p != pend */ - + /* Through the pattern now. */ - + if (fixup_alt_jump) STORE_JUMP (jump_past_alt, fixup_alt_jump, b); - if (!COMPILE_STACK_EMPTY) - return REG_EPAREN; + if (!COMPILE_STACK_EMPTY) + FREE_STACK_RETURN (REG_EPAREN); + + /* If we don't want backtracking, force success + the first time we reach the end of the compiled pattern. */ + if (syntax & RE_NO_POSIX_BACKTRACKING) + BUF_PUSH (succeed); free (compile_stack.stack); @@ -2090,6 +2949,47 @@ regex_compile (pattern, size, syntax, bufp) } #endif /* DEBUG */ +#ifndef MATCH_MAY_ALLOCATE + /* Initialize the failure stack to the largest possible stack. This + isn't necessary unless we're trying to avoid calling alloca in + the search and match routines. */ + { + int num_regs = bufp->re_nsub + 1; + + /* Since DOUBLE_FAIL_STACK refuses to double only if the current size + is strictly greater than re_max_failures, the largest possible stack + is 2 * re_max_failures failure points. */ + if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS)) + { + fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS); + +# ifdef emacs + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) xmalloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) xrealloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +# else /* not emacs */ + if (! fail_stack.stack) + fail_stack.stack + = (fail_stack_elt_t *) malloc (fail_stack.size + * sizeof (fail_stack_elt_t)); + else + fail_stack.stack + = (fail_stack_elt_t *) realloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); +# endif /* not emacs */ + } + + regex_grow_registers (num_regs); + } +#endif /* not MATCH_MAY_ALLOCATE */ + return REG_NOERROR; } /* regex_compile */ @@ -2130,14 +3030,14 @@ insert_op1 (op, loc, arg, end) re_opcode_t op; unsigned char *loc; int arg; - unsigned char *end; + unsigned char *end; { register unsigned char *pfrom = end; register unsigned char *pto = end + 3; while (pfrom != loc) *--pto = *--pfrom; - + store_op1 (op, loc, arg); } @@ -2149,14 +3049,14 @@ insert_op2 (op, loc, arg1, arg2, end) re_opcode_t op; unsigned char *loc; int arg1, arg2; - unsigned char *end; + unsigned char *end; { register unsigned char *pfrom = end; register unsigned char *pto = end + 5; while (pfrom != loc) *--pto = *--pfrom; - + store_op2 (op, loc, arg1, arg2); } @@ -2172,7 +3072,7 @@ at_begline_loc_p (pattern, p, syntax) { const char *prev = p - 2; boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\'; - + return /* After a subexpression? */ (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash)) @@ -2187,12 +3087,12 @@ at_begline_loc_p (pattern, p, syntax) static boolean at_endline_loc_p (p, pend, syntax) const char *p, *pend; - int syntax; + reg_syntax_t syntax; { const char *next = p; boolean next_backslash = *next == '\\'; - const char *next_next = p + 1 < pend ? p + 1 : NULL; - + const char *next_next = p + 1 < pend ? p + 1 : 0; + return /* Before a subexpression? */ (syntax & RE_NO_BK_PARENS ? *next == ')' @@ -2203,7 +3103,7 @@ at_endline_loc_p (p, pend, syntax) } -/* Returns true if REGNUM is in one of COMPILE_STACK's elements and +/* Returns true if REGNUM is in one of COMPILE_STACK's elements and false if it's not. */ static boolean @@ -2211,347 +3111,74 @@ group_in_compile_stack (compile_stack, regnum) compile_stack_type compile_stack; regnum_t regnum; { - int this_element; - - for (this_element = compile_stack.avail - 1; - this_element >= 0; - this_element--) - if (compile_stack.stack[this_element].regnum == regnum) - return true; - - return false; -} - - -/* Read the ending character of a range (in a bracket expression) from the - uncompiled pattern *P_PTR (which ends at PEND). We assume the - starting character is in `P[-2]'. (`P[-1]' is the character `-'.) - Then we set the translation of all bits between the starting and - ending characters (inclusive) in the compiled pattern B. - - Return an error code. - - We use these short variable names so we can use the same macros as - `regex_compile' itself. */ - -static reg_errcode_t -compile_range (p_ptr, pend, translate, syntax, b) - const char **p_ptr, *pend; - char *translate; - reg_syntax_t syntax; - unsigned char *b; -{ - unsigned this_char; - - const char *p = *p_ptr; - int range_start, range_end; - - if (p == pend) - return REG_ERANGE; - - /* Even though the pattern is a signed `char *', we need to fetch - with unsigned char *'s; if the high bit of the pattern character - is set, the range endpoints will be negative if we fetch using a - signed char *. - - We also want to fetch the endpoints without translating them; the - appropriate translation is done in the bit-setting loop below. */ - range_start = ((unsigned char *) p)[-2]; - range_end = ((unsigned char *) p)[0]; - - /* Have to increment the pointer into the pattern string, so the - caller isn't still at the ending character. */ - (*p_ptr)++; - - /* If the start is after the end, the range is empty. */ - if (range_start > range_end) - return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - - /* Here we see why `this_char' has to be larger than an `unsigned - char' -- the range is inclusive, so if `range_end' == 0xff - (assuming 8-bit characters), we would otherwise go into an infinite - loop, since all characters <= 0xff. */ - for (this_char = range_start; this_char <= range_end; this_char++) - { - SET_LIST_BIT (TRANSLATE (this_char)); - } - - return REG_NOERROR; -} - -/* Failure stack declarations and macros; both re_compile_fastmap and - re_match_2 use a failure stack. These have to be macros because of - REGEX_ALLOCATE. */ - - -/* Number of failure points for which to initially allocate space - when matching. If this number is exceeded, we allocate more - space, so it is not a hard limit. */ -#ifndef INIT_FAILURE_ALLOC -#define INIT_FAILURE_ALLOC 5 -#endif - -/* Roughly the maximum number of failure points on the stack. Would be - exactly that if always used MAX_FAILURE_SPACE each time we failed. - This is a variable only so users of regex can assign to it; we never - change it ourselves. */ -int re_max_failures = 2000; - -typedef const unsigned char *fail_stack_elt_t; - -typedef struct -{ - fail_stack_elt_t *stack; - unsigned size; - unsigned avail; /* Offset of next open position. */ -} fail_stack_type; - -#define FAIL_STACK_EMPTY() (fail_stack.avail == 0) -#define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0) -#define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size) -#define FAIL_STACK_TOP() (fail_stack.stack[fail_stack.avail]) - - -/* Initialize `fail_stack'. Do `return -2' if the alloc fails. */ - -#define INIT_FAIL_STACK() \ - do { \ - fail_stack.stack = (fail_stack_elt_t *) \ - REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \ - \ - if (fail_stack.stack == NULL) \ - return -2; \ - \ - fail_stack.size = INIT_FAILURE_ALLOC; \ - fail_stack.avail = 0; \ - } while (0) - - -/* Double the size of FAIL_STACK, up to approximately `re_max_failures' items. - - Return 1 if succeeds, and 0 if either ran out of memory - allocating space for it or it was already too large. - - REGEX_REALLOCATE requires `destination' be declared. */ - -#define DOUBLE_FAIL_STACK(fail_stack) \ - ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS \ - ? 0 \ - : ((fail_stack).stack = (fail_stack_elt_t *) \ - REGEX_REALLOCATE ((fail_stack).stack, \ - (fail_stack).size * sizeof (fail_stack_elt_t), \ - ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \ - \ - (fail_stack).stack == NULL \ - ? 0 \ - : ((fail_stack).size <<= 1, \ - 1))) - - -/* Push PATTERN_OP on FAIL_STACK. - - Return 1 if was able to do so and 0 if ran out of memory allocating - space to do so. */ -#define PUSH_PATTERN_OP(pattern_op, fail_stack) \ - ((FAIL_STACK_FULL () \ - && !DOUBLE_FAIL_STACK (fail_stack)) \ - ? 0 \ - : ((fail_stack).stack[(fail_stack).avail++] = pattern_op, \ - 1)) - -/* This pushes an item onto the failure stack. Must be a four-byte - value. Assumes the variable `fail_stack'. Probably should only - be called from within `PUSH_FAILURE_POINT'. */ -#define PUSH_FAILURE_ITEM(item) \ - fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) (long) item + int this_element; -/* The complement operation. Assumes `fail_stack' is nonempty. */ -#define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail] + for (this_element = compile_stack.avail - 1; + this_element >= 0; + this_element--) + if (compile_stack.stack[this_element].regnum == regnum) + return true; -/* Used to omit pushing failure point id's when we're not debugging. */ -#ifdef DEBUG -#define DEBUG_PUSH PUSH_FAILURE_ITEM -#define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM () -#else -#define DEBUG_PUSH(item) -#define DEBUG_POP(item_addr) -#endif + return false; +} -/* Push the information about the state we will need - if we ever fail back to it. - - Requires variables fail_stack, regstart, regend, reg_info, and - num_regs be declared. DOUBLE_FAIL_STACK requires `destination' be - declared. - - Does `return FAILURE_CODE' if runs out of memory. */ +/* Read the ending character of a range (in a bracket expression) from the + uncompiled pattern *P_PTR (which ends at PEND). We assume the + starting character is in `P[-2]'. (`P[-1]' is the character `-'.) + Then we set the translation of all bits between the starting and + ending characters (inclusive) in the compiled pattern B. -#define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \ - do { \ - char *destination; \ - /* Must be int, so when we don't save any registers, the arithmetic \ - of 0 + -1 isn't done as unsigned. */ \ - int this_reg; \ - \ - DEBUG_STATEMENT (failure_id++); \ - DEBUG_STATEMENT (nfailure_points_pushed++); \ - DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \ - DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\ - DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\ - \ - DEBUG_PRINT2 (" slots needed: %d\n", NUM_FAILURE_ITEMS); \ - DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \ - \ - /* Ensure we have enough space allocated for what we will push. */ \ - while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \ - { \ - if (!DOUBLE_FAIL_STACK (fail_stack)) \ - return failure_code; \ - \ - DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \ - (fail_stack).size); \ - DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\ - } \ - \ - /* Push the info, starting with the registers. */ \ - DEBUG_PRINT1 ("\n"); \ - \ - for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \ - this_reg++) \ - { \ - DEBUG_PRINT2 (" Pushing reg: %d\n", this_reg); \ - DEBUG_STATEMENT (num_regs_pushed++); \ - \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - PUSH_FAILURE_ITEM (regstart[this_reg]); \ - \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - PUSH_FAILURE_ITEM (regend[this_reg]); \ - \ - DEBUG_PRINT2 (" info: 0x%x\n ", reg_info[this_reg]); \ - DEBUG_PRINT2 (" match_null=%d", \ - REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \ - DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \ - DEBUG_PRINT2 (" matched_something=%d", \ - MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT2 (" ever_matched=%d", \ - EVER_MATCHED_SOMETHING (reg_info[this_reg])); \ - DEBUG_PRINT1 ("\n"); \ - PUSH_FAILURE_ITEM (reg_info[this_reg].word); \ - } \ - \ - DEBUG_PRINT2 (" Pushing low active reg: %d\n", lowest_active_reg);\ - PUSH_FAILURE_ITEM (lowest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing high active reg: %d\n", highest_active_reg);\ - PUSH_FAILURE_ITEM (highest_active_reg); \ - \ - DEBUG_PRINT2 (" Pushing pattern 0x%x: ", pattern_place); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \ - PUSH_FAILURE_ITEM (pattern_place); \ - \ - DEBUG_PRINT2 (" Pushing string 0x%x: `", string_place); \ - DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \ - size2); \ - DEBUG_PRINT1 ("'\n"); \ - PUSH_FAILURE_ITEM (string_place); \ - \ - DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \ - DEBUG_PUSH (failure_id); \ - } while (0) + Return an error code. -/* This is the number of items that are pushed and popped on the stack - for each register. */ -#define NUM_REG_ITEMS 3 + We use these short variable names so we can use the same macros as + `regex_compile' itself. */ -/* Individual items aside from the registers. */ -#ifdef DEBUG -#define NUM_NONREG_ITEMS 5 /* Includes failure point id. */ -#else -#define NUM_NONREG_ITEMS 4 -#endif +static reg_errcode_t +compile_range (p_ptr, pend, translate, syntax, b) + const char **p_ptr, *pend; + RE_TRANSLATE_TYPE translate; + reg_syntax_t syntax; + unsigned char *b; +{ + unsigned this_char; -/* We push at most this many items on the stack. */ -#define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS) + const char *p = *p_ptr; + reg_errcode_t ret; + char range_start[2]; + char range_end[2]; -/* We actually push this many items. */ -#define NUM_FAILURE_ITEMS \ - ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS \ - + NUM_NONREG_ITEMS) + if (p == pend) + return REG_ERANGE; -/* How many items can still be added to the stack without overflowing it. */ -#define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail) + /* Fetch the endpoints without translating them; the + appropriate translation is done in the bit-setting loop below. */ + range_start[0] = p[-2]; range_start[1] = '\0'; + range_end[0] = p[ 0]; range_end[1] = '\0'; + /* Have to increment the pointer into the pattern string, so the + caller isn't still at the ending character. */ + (*p_ptr)++; -/* Pops what PUSH_FAIL_STACK pushes. + /* Report an error if the range is empty and the syntax prohibits this. */ + ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR; - We restore into the parameters, all of which should be lvalues: - STR -- the saved data position. - PAT -- the saved pattern position. - LOW_REG, HIGH_REG -- the highest and lowest active registers. - REGSTART, REGEND -- arrays of string positions. - REG_INFO -- array of information about each subexpression. - - Also assumes the variables `fail_stack' and (if debugging), `bufp', - `pend', `string1', `size1', `string2', and `size2'. */ + /* Here we see why `this_char' has to be larger than an `unsigned + char' -- we would otherwise go into an infinite + loop, since all characters <= 0xff. */ + for (this_char = 0; this_char <= (unsigned char) -1; this_char++) + { + char ch[2]; + ch[0] = this_char; ch[1] = '\0'; + if (strcoll (range_start, ch) <= 0 && strcoll (ch, range_end) <= 0) + { + SET_LIST_BIT (TRANSLATE (this_char)); + ret = REG_NOERROR; + } + } -#define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\ -{ \ - DEBUG_STATEMENT (fail_stack_elt_t failure_id;) \ - int this_reg; \ - const unsigned char *string_temp; \ - \ - assert (!FAIL_STACK_EMPTY ()); \ - \ - /* Remove failure points and point to how many regs pushed. */ \ - DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \ - DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \ - DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \ - \ - assert (fail_stack.avail >= NUM_NONREG_ITEMS); \ - \ - DEBUG_POP (&failure_id); \ - DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \ - \ - /* If the saved string location is NULL, it came from an \ - on_failure_keep_string_jump opcode, and we want to throw away the \ - saved NULL, thus retaining our current position in the string. */ \ - string_temp = POP_FAILURE_ITEM (); \ - if (string_temp != NULL) \ - str = (const char *) string_temp; \ - \ - DEBUG_PRINT2 (" Popping string 0x%x: `", str); \ - DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \ - DEBUG_PRINT1 ("'\n"); \ - \ - pat = (unsigned char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping pattern 0x%x: ", pat); \ - DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \ - \ - /* Restore register info. */ \ - high_reg = (unsigned long) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping high active reg: %d\n", high_reg); \ - \ - low_reg = (unsigned long) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" Popping low active reg: %d\n", low_reg); \ - \ - for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \ - { \ - DEBUG_PRINT2 (" Popping reg: %d\n", this_reg); \ - \ - reg_info[this_reg].word = POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" info: 0x%x\n", reg_info[this_reg]); \ - \ - regend[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" end: 0x%x\n", regend[this_reg]); \ - \ - regstart[this_reg] = (const char *) POP_FAILURE_ITEM (); \ - DEBUG_PRINT2 (" start: 0x%x\n", regstart[this_reg]); \ - } \ - \ - DEBUG_STATEMENT (nfailure_points_popped++); \ -} /* POP_FAILURE_POINT */ + return ret; +} /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible @@ -2560,7 +3187,7 @@ typedef struct The caller must supply the address of a (1 << BYTEWIDTH)-byte data area as BUFP->fastmap. - + We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in the pattern buffer. @@ -2571,18 +3198,23 @@ re_compile_fastmap (bufp) struct re_pattern_buffer *bufp; { int j, k; +#ifdef MATCH_MAY_ALLOCATE fail_stack_type fail_stack; +#endif #ifndef REGEX_MALLOC char *destination; #endif - /* We don't push any register information onto the failure stack. */ - unsigned num_regs = 0; - + register char *fastmap = bufp->fastmap; unsigned char *pattern = bufp->buffer; - unsigned long size = bufp->used; - const unsigned char *p = pattern; - register unsigned char *pend = pattern + size; + unsigned char *p = pattern; + register unsigned char *pend = pattern + bufp->used; + +#ifdef REL_ALLOC + /* This holds the pointer to the failure stack, when + it is allocated relocatably. */ + fail_stack_elt_t *failure_stack_ptr; +#endif /* Assume that each path through the pattern can be null until proven otherwise. We set this false at the bottom of switch @@ -2594,32 +3226,36 @@ re_compile_fastmap (bufp) boolean succeed_n_p = false; assert (fastmap != NULL && p != NULL); - + INIT_FAIL_STACK (); bzero (fastmap, 1 << BYTEWIDTH); /* Assume nothing's valid. */ bufp->fastmap_accurate = 1; /* It will be when we're done. */ bufp->can_be_null = 0; - - while (p != pend || !FAIL_STACK_EMPTY ()) + + while (1) { - if (p == pend) - { - bufp->can_be_null |= path_can_be_null; - - /* Reset for next path. */ - path_can_be_null = true; - - p = fail_stack.stack[--fail_stack.avail]; + if (p == pend || *p == succeed) + { + /* We have reached the (effective) end of pattern. */ + if (!FAIL_STACK_EMPTY ()) + { + bufp->can_be_null |= path_can_be_null; + + /* Reset for next path. */ + path_can_be_null = true; + + p = fail_stack.stack[--fail_stack.avail].pointer; + + continue; + } + else + break; } /* We should never be about to go beyond the end of the pattern. */ assert (p < pend); - -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif + + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) { /* I guess the idea here is to simply not bother with a fastmap @@ -2629,7 +3265,7 @@ re_compile_fastmap (bufp) that is all we do. */ case duplicate: bufp->can_be_null = 1; - return 0; + goto done; /* Following are the cases which match a character. These end @@ -2673,22 +3309,25 @@ re_compile_fastmap (bufp) case anychar: - /* `.' matches anything ... */ - for (j = 0; j < (1 << BYTEWIDTH); j++) - fastmap[j] = 1; + { + int fastmap_newline = fastmap['\n']; - /* ... except perhaps newline. */ - if (!(bufp->syntax & RE_DOT_NEWLINE)) - fastmap['\n'] = 0; + /* `.' matches anything ... */ + for (j = 0; j < (1 << BYTEWIDTH); j++) + fastmap[j] = 1; - /* Return if we have already set `can_be_null'; if we have, - then the fastmap is irrelevant. Something's wrong here. */ - else if (bufp->can_be_null) - return 0; + /* ... except perhaps newline. */ + if (!(bufp->syntax & RE_DOT_NEWLINE)) + fastmap['\n'] = fastmap_newline; - /* Otherwise, have to check alternative paths. */ - break; + /* Return if we have already set `can_be_null'; if we have, + then the fastmap is irrelevant. Something's wrong here. */ + else if (bufp->can_be_null) + goto done; + /* Otherwise, have to check alternative paths. */ + break; + } #ifdef emacs case syntaxspec: @@ -2715,7 +3354,7 @@ re_compile_fastmap (bufp) case at_dot: case after_dot: continue; -#endif /* not emacs */ +#endif /* emacs */ case no_op: @@ -2738,10 +3377,10 @@ re_compile_fastmap (bufp) case jump_past_alt: case dummy_failure_jump: EXTRACT_NUMBER_AND_INCR (j, p); - p += j; + p += j; if (j > 0) continue; - + /* Jump backward implies we just went through the body of a loop and matched nothing. Opcode jumped to should be `on_failure_jump' or `succeed_n'. Just treat it like an @@ -2753,11 +3392,11 @@ re_compile_fastmap (bufp) p++; EXTRACT_NUMBER_AND_INCR (j, p); - p += j; - + p += j; + /* If what's on the stack is where we are now, pop it. */ - if (!FAIL_STACK_EMPTY () - && fail_stack.stack[fail_stack.avail - 1] == p) + if (!FAIL_STACK_EMPTY () + && fail_stack.stack[fail_stack.avail - 1].pointer == p) fail_stack.avail--; continue; @@ -2778,7 +3417,10 @@ re_compile_fastmap (bufp) if (p + j < pend) { if (!PUSH_PATTERN_OP (p + j, fail_stack)) - return -2; + { + RESET_FAIL_STACK (); + return -2; + } } else bufp->can_be_null = 1; @@ -2794,7 +3436,7 @@ re_compile_fastmap (bufp) case succeed_n: /* Get to the number of times to succeed. */ - p += 2; + p += 2; /* Increment p past the n for when k != 0. */ EXTRACT_NUMBER_AND_INCR (k, p); @@ -2835,8 +3477,14 @@ re_compile_fastmap (bufp) /* Set `can_be_null' for the last path (also the first path, if the pattern is empty). */ bufp->can_be_null |= path_can_be_null; + + done: + RESET_FAIL_STACK (); return 0; } /* re_compile_fastmap */ +#ifdef _LIBC +weak_alias (__re_compile_fastmap, re_compile_fastmap) +#endif /* Set REGS to hold NUM_REGS registers, storing them in STARTS and ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use @@ -2869,9 +3517,12 @@ re_set_registers (bufp, regs, num_regs, starts, ends) { bufp->regs_allocated = REGS_UNALLOCATED; regs->num_regs = 0; - regs->start = regs->end = (regoff_t) 0; + regs->start = regs->end = (regoff_t *) 0; } } +#ifdef _LIBC +weak_alias (__re_set_registers, re_set_registers) +#endif /* Searching routines. */ @@ -2885,25 +3536,28 @@ re_search (bufp, string, size, startpos, range, regs) int size, startpos, range; struct re_registers *regs; { - return re_search_2 (bufp, NULL, 0, string, size, startpos, range, + return re_search_2 (bufp, NULL, 0, string, size, startpos, range, regs, size); } +#ifdef _LIBC +weak_alias (__re_search, re_search) +#endif /* Using the compiled pattern in BUFP->buffer, first tries to match the virtual concatenation of STRING1 and STRING2, starting first at index STARTPOS, then at STARTPOS + 1, and so on. - + STRING1 and STRING2 have length SIZE1 and SIZE2, respectively. - + RANGE is how far to scan while trying to match. RANGE = 0 means try only at STARTPOS; in general, the last start tried is STARTPOS + RANGE. - + In REGS, return the indices of the virtual concatenation of STRING1 and STRING2 that matched the entire BUFP->buffer and its contained subexpressions. - + Do not consider matching one past the index STOP in the virtual concatenation of STRING1 and STRING2. @@ -2923,24 +3577,29 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) { int val; register char *fastmap = bufp->fastmap; - register char *translate = bufp->translate; + register RE_TRANSLATE_TYPE translate = bufp->translate; int total_size = size1 + size2; int endpos = startpos + range; /* Check for out-of-range STARTPOS. */ if (startpos < 0 || startpos > total_size) return -1; - + /* Fix up RANGE if it might eventually take us outside - the virtual concatenation of STRING1 and STRING2. */ - if (endpos < -1) - range = -1 - startpos; + the virtual concatenation of STRING1 and STRING2. + Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE. */ + if (endpos < 0) + range = 0 - startpos; else if (endpos > total_size) range = total_size - startpos; /* If the search isn't to be a backwards one, don't waste time in a search for a pattern that must be anchored. */ - if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) + if (bufp->used > 0 && range > 0 + && ((re_opcode_t) bufp->buffer[0] == begbuf + /* `begline' is like `begbuf' if it cannot match at newlines. */ + || ((re_opcode_t) bufp->buffer[0] == begline + && !bufp->newline_anchor))) { if (startpos > 0) return -1; @@ -2948,14 +3607,25 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) range = 1; } +#ifdef emacs + /* In a forward search for something that starts with \=. + don't keep searching past point. */ + if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0) + { + range = PT - startpos; + if (range <= 0) + return -1; + } +#endif /* emacs */ + /* Update the fastmap now if not correct already. */ if (fastmap && !bufp->fastmap_accurate) if (re_compile_fastmap (bufp) == -2) return -2; - + /* Loop through the string, looking for a place to start matching. */ for (;;) - { + { /* If a fastmap is supplied, skip quickly over characters that cannot be the start of a match. If the pattern can match the null string, however, we don't need to skip characters; we want @@ -2972,7 +3642,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) lim = range - (size1 - startpos); d = (startpos >= size1 ? string2 - size1 : string1) + startpos; - + /* Written out as an if-else to avoid testing `translate' inside the loop. */ if (translate) @@ -2989,7 +3659,7 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) else /* Searching backwards. */ { register char c = (size1 == 0 || startpos >= size1 - ? string2[startpos - size1] + ? string2[startpos - size1] : string1[startpos]); if (!fastmap[(unsigned char) TRANSLATE (c)]) @@ -3002,96 +3672,46 @@ re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop) && !bufp->can_be_null) return -1; - val = re_match_2 (bufp, string1, size1, string2, size2, - startpos, regs, stop); - if (val >= 0) - return startpos; - - if (val == -2) - return -2; - - advance: - if (!range) - break; - else if (range > 0) - { - range--; - startpos++; - } - else - { - range++; - startpos--; - } - } - return -1; -} /* re_search_2 */ - -/* Declarations and macros for re_match_2. */ - -static int bcmp_translate (); -static boolean alt_match_null_string_p (), - common_op_match_null_string_p (), - group_match_null_string_p (); - -/* Structure for per-register (a.k.a. per-group) information. - This must not be longer than one word, because we push this value - onto the failure stack. Other register information, such as the - starting and ending positions (which are addresses), and the list of - inner groups (which is a bits list) are maintained in separate - variables. - - We are making a (strictly speaking) nonportable assumption here: that - the compiler will pack our bit fields into something that fits into - the type of `word', i.e., is something that fits into one item on the - failure stack. */ -typedef union -{ - fail_stack_elt_t word; - struct - { - /* This field is one if this group can match the empty string, - zero if not. If not yet determined, `MATCH_NULL_UNSET_VALUE'. */ -#define MATCH_NULL_UNSET_VALUE 3 - unsigned match_null_string_p : 2; - unsigned is_active : 1; - unsigned matched_something : 1; - unsigned ever_matched_something : 1; - } bits; -} register_info_type; - -#define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p) -#define IS_ACTIVE(R) ((R).bits.is_active) -#define MATCHED_SOMETHING(R) ((R).bits.matched_something) -#define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something) - - -/* Call this when have matched a real character; it sets `matched' flags - for the subexpressions which we are currently inside. Also records - that those subexprs have matched. */ -#define SET_REGS_MATCHED() \ - do \ - { \ - unsigned r; \ - for (r = lowest_active_reg; r <= highest_active_reg; r++) \ - { \ - MATCHED_SOMETHING (reg_info[r]) \ - = EVER_MATCHED_SOMETHING (reg_info[r]) \ - = 1; \ - } \ - } \ - while (0) + val = re_match_2_internal (bufp, string1, size1, string2, size2, + startpos, regs, stop); +#ifndef REGEX_MALLOC +# ifdef C_ALLOCA + alloca (0); +# endif +#endif + + if (val >= 0) + return startpos; + if (val == -2) + return -2; + advance: + if (!range) + break; + else if (range > 0) + { + range--; + startpos++; + } + else + { + range++; + startpos--; + } + } + return -1; +} /* re_search_2 */ +#ifdef _LIBC +weak_alias (__re_search_2, re_search_2) +#endif + /* This converts PTR, a pointer into one of the search strings `string1' and `string2' into an offset from the beginning of that string. */ -#define POINTER_TO_OFFSET(ptr) \ - (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1) - -/* Registers are set to a sentinel when they haven't yet matched. */ -#define REG_UNSET_VALUE ((char *) -1) -#define REG_UNSET(e) ((e) == REG_UNSET_VALUE) - +#define POINTER_TO_OFFSET(ptr) \ + (FIRST_STRING_P (ptr) \ + ? ((regoff_t) ((ptr) - string1)) \ + : ((regoff_t) ((ptr) - string2 + size1))) /* Macros for dealing with the split strings in re_match_2. */ @@ -3114,7 +3734,7 @@ typedef union /* Test if at very beginning or at very end of the virtual concatenation of `string1' and `string2'. If only one string, it's `string2'. */ #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2) -#define AT_STRINGS_END(d) ((d) == end2) +#define AT_STRINGS_END(d) ((d) == end2) /* Test if D points to a character which is word-constituent. We have @@ -3126,19 +3746,21 @@ typedef union : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \ == Sword) +/* Disabled due to a compiler bug -- see comment at case wordbound */ +#if 0 /* Test if the character before D and the one at D differ with respect to being word-constituent. */ #define AT_WORD_BOUNDARY(d) \ (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \ || WORDCHAR_P (d - 1) != WORDCHAR_P (d)) - +#endif /* Free everything we malloc. */ -#ifdef REGEX_MALLOC -#define FREE_VAR(var) if (var) free (var); var = NULL -#define FREE_VARIABLES() \ +#ifdef MATCH_MAY_ALLOCATE +# define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL +# define FREE_VARIABLES() \ do { \ - FREE_VAR (fail_stack.stack); \ + REGEX_FREE_STACK (fail_stack.stack); \ FREE_VAR (regstart); \ FREE_VAR (regend); \ FREE_VAR (old_regstart); \ @@ -3149,11 +3771,9 @@ typedef union FREE_VAR (reg_dummy); \ FREE_VAR (reg_info_dummy); \ } while (0) -#else /* not REGEX_MALLOC */ -/* Some MIPS systems (at least) want this to free alloca'd storage. */ -#define FREE_VARIABLES() alloca (0) -#endif /* not REGEX_MALLOC */ - +#else +# define FREE_VARIABLES() ((void)0) /* Do nothing! But inhibit gcc warning. */ +#endif /* not MATCH_MAY_ALLOCATE */ /* These values must meet several constraints. They must not be valid register values; since we have a limit of 255 registers (because @@ -3176,17 +3796,38 @@ re_match (bufp, string, size, pos, regs) const char *string; int size, pos; struct re_registers *regs; - { - return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size); +{ + int result = re_match_2_internal (bufp, NULL, 0, string, size, + pos, regs, size); +# ifndef REGEX_MALLOC +# ifdef C_ALLOCA + alloca (0); +# endif +# endif + return result; } +# ifdef _LIBC +weak_alias (__re_match, re_match) +# endif #endif /* not emacs */ +static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p, + unsigned char *end, + register_info_type *reg_info)); +static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p, + unsigned char *end, + register_info_type *reg_info)); +static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p, + unsigned char *end, + register_info_type *reg_info)); +static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2, + int len, char *translate)); /* re_match_2 matches the compiled pattern in BUFP against the the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1 and SIZE2, respectively). We start matching at POS, and stop matching at STOP. - + If REGS is non-null and the `no_sub' field of BUFP is nonzero, we store offsets for the substring each group matched in REGS. See the documentation for exactly how many groups we fill. @@ -3203,6 +3844,30 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) int pos; struct re_registers *regs; int stop; +{ + int result = re_match_2_internal (bufp, string1, size1, string2, size2, + pos, regs, stop); +#ifndef REGEX_MALLOC +# ifdef C_ALLOCA + alloca (0); +# endif +#endif + return result; +} +#ifdef _LIBC +weak_alias (__re_match_2, re_match_2) +#endif + +/* This is a separate function so that we can force an alloca cleanup + afterwards. */ +static int +re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop) + struct re_pattern_buffer *bufp; + const char *string1, *string2; + int size1, size2; + int pos; + struct re_registers *regs; + int stop; { /* General temporaries. */ int mcnt; @@ -3217,13 +3882,17 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* Where we are in the data, and the end of the current string. */ const char *d, *dend; - + /* Where we are in the pattern, and the end of the pattern. */ unsigned char *p = bufp->buffer; register unsigned char *pend = p + bufp->used; + /* Mark the opcode just after a start_memory, so we can test for an + empty subpattern when we get to the stop_memory. */ + unsigned char *just_past_start_mem = 0; + /* We use this to map every character in the string. */ - char *translate = bufp->translate; + RE_TRANSLATE_TYPE translate = bufp->translate; /* Failure point stack. Each place that can handle a failure further down the line pushes a failure point on this stack. It consists of @@ -3234,20 +3903,28 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) scanning the strings. If the latter is zero, the failure point is a ``dummy''; if a failure happens and the failure point is a dummy, it gets discarded and the next next one is tried. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ fail_stack_type fail_stack; +#endif #ifdef DEBUG - static unsigned failure_id = 0; + static unsigned failure_id; unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0; #endif +#ifdef REL_ALLOC + /* This holds the pointer to the failure stack, when + it is allocated relocatably. */ + fail_stack_elt_t *failure_stack_ptr; +#endif + /* We fill all the registers internally, independent of what we return, for use in backreferences. The number here includes an element for register zero. */ - unsigned num_regs = bufp->re_nsub + 1; - + size_t num_regs = bufp->re_nsub + 1; + /* The currently active registers. */ - unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG; - unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG; + active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG; + active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG; /* Information on the contents of registers. These are pointers into the input strings; they record just what was matched (on this @@ -3256,14 +3933,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) matching and the regnum-th regend points to right after where we stopped matching the regnum-th subexpression. (The zeroth register keeps track of what the whole pattern matches.) */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ const char **regstart, **regend; +#endif /* If a group that's operated upon by a repetition operator fails to match anything, then the register for its start will need to be restored because it will have been set to wherever in the string we are when we last see its open-group operator. Similarly for a register's end. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ const char **old_regstart, **old_regend; +#endif /* The is_active field of reg_info helps us keep track of which (possibly nested) subexpressions we are currently in. The matched_something @@ -3271,15 +3952,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) matched any of the pattern so far this time through the reg_num-th subexpression. These two fields get reset each time through any loop their register is in. */ - register_info_type *reg_info; +#ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global. */ + register_info_type *reg_info; +#endif /* The following record the register info as found in the above - variables when we find a match better than any we've seen before. + variables when we find a match better than any we've seen before. This happens as we backtrack through the failure points, which in turn happens only if we have not yet matched the entire string. */ unsigned best_regs_set = false; +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ const char **best_regstart, **best_regend; - +#endif + /* Logically, this is `best_regend[0]'. But we don't want to have to allocate space for that if we're not allocating space for anything else (see below). Also, we never need info about register 0 for @@ -3290,19 +3975,25 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) and need to test it, it's not garbage. */ const char *match_end = NULL; + /* This helps SET_REGS_MATCHED avoid doing redundant work. */ + int set_regs_matched_done = 0; + /* Used when we pop values we don't care about. */ +#ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global. */ const char **reg_dummy; register_info_type *reg_info_dummy; +#endif #ifdef DEBUG /* Counts the total number of registers pushed. */ - unsigned num_regs_pushed = 0; + unsigned num_regs_pushed = 0; #endif DEBUG_PRINT1 ("\n\nEntering re_match_2.\n"); - + INIT_FAIL_STACK (); - + +#ifdef MATCH_MAY_ALLOCATE /* Do not bother to initialize all the register variables if there are no groups in the pattern, as it takes a fair amount of time. If there are groups, we include space for register 0 (the whole @@ -3320,14 +4011,13 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) reg_dummy = REGEX_TALLOC (num_regs, const char *); reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type); - if (!(regstart && regend && old_regstart && old_regend && reg_info - && best_regstart && best_regend && reg_dummy && reg_info_dummy)) + if (!(regstart && regend && old_regstart && old_regend && reg_info + && best_regstart && best_regend && reg_dummy && reg_info_dummy)) { FREE_VARIABLES (); return -2; } } -#ifdef REGEX_MALLOC else { /* We must initialize all our variables to NULL, so that @@ -3336,7 +4026,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) = best_regend = reg_dummy = NULL; reg_info = reg_info_dummy = (register_info_type *) NULL; } -#endif /* REGEX_MALLOC */ +#endif /* MATCH_MAY_ALLOCATE */ /* The starting position is bogus. */ if (pos < 0 || pos > size1 + size2) @@ -3344,21 +4034,21 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) FREE_VARIABLES (); return -1; } - + /* Initialize subexpression text positions to -1 to mark ones that no start_memory/stop_memory has been seen for. Also initialize the register information struct. */ - for (mcnt = 1; mcnt < num_regs; mcnt++) + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) { - regstart[mcnt] = regend[mcnt] + regstart[mcnt] = regend[mcnt] = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE; - + REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE; IS_ACTIVE (reg_info[mcnt]) = 0; MATCHED_SOMETHING (reg_info[mcnt]) = 0; EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0; } - + /* We move `string1' into `string2' if the latter's empty -- but not if `string1' is null. */ if (size2 == 0 && string1 != NULL) @@ -3383,7 +4073,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) end_match_2 = string2 + stop - size1; } - /* `p' scans through the pattern as `d' scans through the data. + /* `p' scans through the pattern as `d' scans through the data. `dend' is the end of the input string that `d' points within. `d' is advanced into the following input string whenever necessary, but this happens before fetching; therefore, at the beginning of the @@ -3400,55 +4090,71 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) dend = end_match_2; } - DEBUG_PRINT1 ("The compiled pattern is: "); + DEBUG_PRINT1 ("The compiled pattern is:\n"); DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend); DEBUG_PRINT1 ("The string to match is: `"); DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2); DEBUG_PRINT1 ("'\n"); - + /* This loops over pattern commands. It exits by returning from the function if the match is complete, or it drops through if the match fails at this starting point in the input data. */ for (;;) { +#ifdef _LIBC + DEBUG_PRINT2 ("\n%p: ", p); +#else DEBUG_PRINT2 ("\n0x%x: ", p); +#endif if (p == pend) { /* End of pattern means we might have succeeded. */ DEBUG_PRINT1 ("end of pattern ... "); - + /* If we haven't matched the entire string, and we want the longest match, try backtracking. */ if (d != end_match_2) { + /* 1 if this match ends in the same string (string1 or string2) + as the best previous match. */ + boolean same_str_p = (FIRST_STRING_P (match_end) + == MATCHING_IN_FIRST_STRING); + /* 1 if this match is the best seen so far. */ + boolean best_match_p; + + /* AIX compiler got confused when this was combined + with the previous declaration. */ + if (same_str_p) + best_match_p = d > match_end; + else + best_match_p = !MATCHING_IN_FIRST_STRING; + DEBUG_PRINT1 ("backtracking.\n"); - + if (!FAIL_STACK_EMPTY ()) { /* More failure points to try. */ - boolean same_str_p = (FIRST_STRING_P (match_end) - == MATCHING_IN_FIRST_STRING); /* If exceeds best match so far, save it. */ - if (!best_regs_set - || (same_str_p && d > match_end) - || (!same_str_p && !MATCHING_IN_FIRST_STRING)) + if (!best_regs_set || best_match_p) { best_regs_set = true; match_end = d; - + DEBUG_PRINT1 ("\nSAVING match as best so far.\n"); - - for (mcnt = 1; mcnt < num_regs; mcnt++) + + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) { best_regstart[mcnt] = regstart[mcnt]; best_regend[mcnt] = regend[mcnt]; } } - goto fail; + goto fail; } - /* If no failure points, don't restore garbage. */ - else if (best_regs_set) + /* If no failure points, don't restore garbage. And if + last match is real best match, don't restore second + best one. */ + else if (best_regs_set && !best_match_p) { restore_best_regs: /* Restore best match. It may happen that `dend == @@ -3457,12 +4163,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) strings `x-' and `y-z-', if the two strings are not consecutive in memory. */ DEBUG_PRINT1 ("Restoring best registers.\n"); - + d = match_end; dend = ((d >= string1 && d <= end1) ? end_match_1 : end_match_2); - for (mcnt = 1; mcnt < num_regs; mcnt++) + for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++) { regstart[mcnt] = best_regstart[mcnt]; regend[mcnt] = best_regend[mcnt]; @@ -3470,6 +4176,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) } } /* d != end_match_2 */ + succeed_label: DEBUG_PRINT1 ("Accepting match.\n"); /* If caller wants register contents data back, do it. */ @@ -3484,7 +4191,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) regs->start = TALLOC (regs->num_regs, regoff_t); regs->end = TALLOC (regs->num_regs, regoff_t); if (regs->start == NULL || regs->end == NULL) - return -2; + { + FREE_VARIABLES (); + return -2; + } bufp->regs_allocated = REGS_REALLOCATE; } else if (bufp->regs_allocated == REGS_REALLOCATE) @@ -3497,7 +4207,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) RETALLOC (regs->start, regs->num_regs, regoff_t); RETALLOC (regs->end, regs->num_regs, regoff_t); if (regs->start == NULL || regs->end == NULL) - return -2; + { + FREE_VARIABLES (); + return -2; + } } } else @@ -3513,53 +4226,53 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) if (regs->num_regs > 0) { regs->start[0] = pos; - regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1 - : d - string2 + size1); + regs->end[0] = (MATCHING_IN_FIRST_STRING + ? ((regoff_t) (d - string1)) + : ((regoff_t) (d - string2 + size1))); } - + /* Go through the first `min (num_regs, regs->num_regs)' registers, since that is all we initialized. */ - for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++) + for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs); + mcnt++) { if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt])) regs->start[mcnt] = regs->end[mcnt] = -1; else { - regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]); - regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]); + regs->start[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]); + regs->end[mcnt] + = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]); } } - + /* If the regs structure we return has more elements than were in the pattern, set the extra elements to -1. If we (re)allocated the registers, this is the case, because we always allocate enough to have at least one -1 at the end. */ - for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++) + for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++) regs->start[mcnt] = regs->end[mcnt] = -1; } /* regs && !bufp->no_sub */ - FREE_VARIABLES (); DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n", nfailure_points_pushed, nfailure_points_popped, nfailure_points_pushed - nfailure_points_popped); DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed); - mcnt = d - pos - (MATCHING_IN_FIRST_STRING - ? string1 + mcnt = d - pos - (MATCHING_IN_FIRST_STRING + ? string1 : string2 - size1); DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt); + FREE_VARIABLES (); return mcnt; } /* Otherwise match next pattern command. */ -#ifdef SWITCH_ENUM_BUG - switch ((int) ((re_opcode_t) *p++)) -#else - switch ((re_opcode_t) *p++) -#endif + switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++)) { /* Ignore these. Used to ignore the n of succeed_n's which currently have n == 0. */ @@ -3567,6 +4280,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) DEBUG_PRINT1 ("EXECUTING no_op.\n"); break; + case succeed: + DEBUG_PRINT1 ("EXECUTING succeed.\n"); + goto succeed_label; /* Match the next n pattern characters exactly. The following byte in the pattern defines n, and the n bytes after that @@ -3582,7 +4298,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) do { PREFETCH (); - if (translate[(unsigned char) *d++] != (char) *p++) + if ((unsigned char) translate[(unsigned char) *d++] + != (unsigned char) *p++) goto fail; } while (--mcnt); @@ -3636,7 +4353,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) p += 1 + *p; if (!not) goto fail; - + SET_REGS_MATCHED (); d++; break; @@ -3653,9 +4370,9 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* Find out if this group can match the empty string. */ p1 = p; /* To send to group_match_null_string_p. */ - + if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE) - REG_MATCH_NULL_STRING_P (reg_info[*p]) + REG_MATCH_NULL_STRING_P (reg_info[*p]) = group_match_null_string_p (&p1, pend, reg_info); /* Save the position in the string where we were the last time @@ -3666,7 +4383,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) ? REG_UNSET (regstart[*p]) ? d : regstart[*p] : regstart[*p]; - DEBUG_PRINT2 (" old_regstart: %d\n", + DEBUG_PRINT2 (" old_regstart: %d\n", POINTER_TO_OFFSET (old_regstart[*p])); regstart[*p] = d; @@ -3674,10 +4391,13 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) IS_ACTIVE (reg_info[*p]) = 1; MATCHED_SOMETHING (reg_info[*p]) = 0; - + + /* Clear this whenever we change the register activity status. */ + set_regs_matched_done = 0; + /* This is the new highest active register. */ highest_active_reg = *p; - + /* If nothing was active before, this is the new lowest active register. */ if (lowest_active_reg == NO_LOWEST_ACTIVE_REG) @@ -3685,6 +4405,8 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* Move past the register number and inner group count. */ p += 2; + just_past_start_mem = p; + break; @@ -3693,7 +4415,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) number, and the number of inner groups. */ case stop_memory: DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]); - + /* We need to save the string position the last time we were at this close-group operator in case the group is operated upon by a repetition operator, e.g., with `((a*)*(b*)*)*' @@ -3702,7 +4424,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p]) ? REG_UNSET (regend[*p]) ? d : regend[*p] : regend[*p]; - DEBUG_PRINT2 (" old_regend: %d\n", + DEBUG_PRINT2 (" old_regend: %d\n", POINTER_TO_OFFSET (old_regend[*p])); regend[*p] = d; @@ -3710,7 +4432,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* This register isn't active anymore. */ IS_ACTIVE (reg_info[*p]) = 0; - + + /* Clear this whenever we change the register activity status. */ + set_regs_matched_done = 0; + /* If this was the only register active, nothing is active anymore. */ if (lowest_active_reg == highest_active_reg) @@ -3726,7 +4451,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) unsigned char r = *p - 1; while (r > 0 && !IS_ACTIVE (reg_info[r])) r--; - + /* If we end up at register zero, that means that we saved the registers as the result of an `on_failure_jump', not a `start_memory', and we jumped to past the innermost @@ -3742,18 +4467,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) else highest_active_reg = r; } - + /* If just failed to match something this time around with a group that's operated on by a repetition operator, try to force exit from the ``loop'', and restore the register information for this group that we had before trying this last match. */ if ((!MATCHED_SOMETHING (reg_info[*p]) - || (re_opcode_t) p[-3] == start_memory) - && (p + 2) < pend) + || just_past_start_mem == p - 1) + && (p + 2) < pend) { boolean is_a_jump_n = false; - + p1 = p + 2; mcnt = 0; switch ((re_opcode_t) *p1++) @@ -3768,12 +4493,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) if (is_a_jump_n) p1 += 2; break; - + default: /* do nothing */ ; } p1 += mcnt; - + /* If the next operation is a jump backwards in the pattern to an on_failure_jump right before the start_memory corresponding to this stop_memory, exit from the loop @@ -3787,26 +4512,27 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) failed match, e.g., with `(a*)*b' against `ab' for regstart[1], and, e.g., with `((a*)*(b*)*)*' against `aba' for regend[3]. - + Also restore the registers for inner groups for, e.g., `((a*)(b*))*' against `aba' (register 3 would otherwise get trashed). */ - + if (EVER_MATCHED_SOMETHING (reg_info[*p])) { - unsigned r; - + unsigned r; + EVER_MATCHED_SOMETHING (reg_info[*p]) = 0; - + /* Restore this and inner groups' (if any) registers. */ - for (r = *p; r < *p + *(p + 1); r++) + for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1); + r++) { regstart[r] = old_regstart[r]; /* xx why this test? */ - if ((long) old_regend[r] >= (long) regstart[r]) + if (old_regend[r] >= regstart[r]) regend[r] = old_regend[r]; - } + } } p1++; EXTRACT_NUMBER_AND_INCR (mcnt, p1); @@ -3815,7 +4541,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) goto fail; } } - + /* Move past the register number and the inner group count. */ p += 2; break; @@ -3832,16 +4558,16 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* Can't back reference a group which we've never matched. */ if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno])) goto fail; - + /* Where in input to try to start matching. */ d2 = regstart[regno]; - + /* Where to stop matching; if both the place to start and the place to stop matching are in the same string, then set to the place to stop, otherwise, for now have to use the end of the first string. */ - dend2 = ((FIRST_STRING_P (regstart[regno]) + dend2 = ((FIRST_STRING_P (regstart[regno]) == FIRST_STRING_P (regend[regno])) ? regend[regno] : end_match_1); for (;;) @@ -3865,19 +4591,22 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* How many characters left in this segment to match. */ mcnt = dend - d; - + /* Want how many consecutive characters we can match in one shot, so, if necessary, adjust the count. */ if (mcnt > dend2 - d2) mcnt = dend2 - d2; - + /* Compare that many; failure if mismatch, else move past them. */ - if (translate - ? bcmp_translate (d, d2, mcnt, translate) - : bcmp (d, d2, mcnt)) + if (translate + ? bcmp_translate (d, d2, mcnt, translate) + : memcmp (d, d2, mcnt)) goto fail; d += mcnt, d2 += mcnt; + + /* Do this because we've match some characters. */ + SET_REGS_MATCHED (); } } break; @@ -3888,7 +4617,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) `newline_anchor' is set, after newlines. */ case begline: DEBUG_PRINT1 ("EXECUTING begline.\n"); - + if (AT_STRINGS_BEG (d)) { if (!bufp->not_bol) break; @@ -3909,7 +4638,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) { if (!bufp->not_eol) break; } - + /* We have to ``prefetch'' the next character. */ else if ((d == end1 ? *string2 : *d) == '\n' && bufp->newline_anchor) @@ -3943,7 +4672,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) then the . fails against the \n. But the next thing we want to do is match the \n against the \n; if we restored the string value, we would be back at the foo. - + Because this is used only in specific cases, we don't need to check all the things that `on_failure_jump' does, to make sure the right things get saved on the stack. Hence we don't @@ -3953,16 +4682,20 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) case; that seems worse than this. */ case on_failure_keep_string_jump: DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump"); - + EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt); +#else DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt); +#endif PUSH_FAILURE_POINT (p + mcnt, NULL, -2); break; /* Uses of on_failure_jump: - + Each alternative starts with an on_failure_jump that points to the beginning of the next alternative. Each alternative except the last ends with a jump that in effect jumps past @@ -3978,14 +4711,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) DEBUG_PRINT1 ("EXECUTING on_failure_jump"); EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt); +#else DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt); +#endif /* If this on_failure_jump comes right before a group (i.e., the original * applied to a group), save the information for that group and all inner ones, so that if we fail back to this point, the group's information will be correct. For example, in \(a*\)*\1, we need the preceding group, - and in \(\(a*\)b*\)\2, we need the inner group. */ + and in \(zz\(a*\)b*\)\2, we need the inner group. */ /* We can't use `p' to check ahead because we push a failure point to `p + mcnt' after we do this. */ @@ -4028,18 +4765,34 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) would have to backtrack because of (as in, e.g., `a*a') then we can change to pop_failure_jump, because we'll never have to backtrack. - + This is not true in the case of alternatives: in `(a|ab)*' we do need to backtrack to the `ab' alternative (e.g., if the string was `ab'). But instead of trying to detect that here, the alternative has put on a dummy failure point which is what we will end up popping. */ - /* Skip over open/close-group commands. */ - while (p2 + 2 < pend - && ((re_opcode_t) *p2 == stop_memory - || (re_opcode_t) *p2 == start_memory)) - p2 += 3; /* Skip over args, too. */ + /* Skip over open/close-group commands. + If what follows this loop is a ...+ construct, + look at what begins its body, since we will have to + match at least one of that. */ + while (1) + { + if (p2 + 2 < pend + && ((re_opcode_t) *p2 == stop_memory + || (re_opcode_t) *p2 == start_memory)) + p2 += 3; + else if (p2 + 6 < pend + && (re_opcode_t) *p2 == dummy_failure_jump) + p2 += 6; + else + break; + } + + p1 = p + mcnt; + /* p1[0] ... p1[2] are the `on_failure_jump' corresponding + to the `maybe_finalize_jump' of this case. Examine what + follows. */ /* If we're at the end of the pattern, we can change. */ if (p2 == pend) @@ -4057,23 +4810,19 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) { register unsigned char c = *p2 == (unsigned char) endline ? '\n' : p2[2]; - p1 = p + mcnt; - /* p1[0] ... p1[2] are the `on_failure_jump' corresponding - to the `maybe_finalize_jump' of this case. Examine what - follows. */ if ((re_opcode_t) p1[3] == exactn && p1[5] != c) { p[-3] = (unsigned char) pop_failure_jump; DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n", c, p1[5]); } - + else if ((re_opcode_t) p1[3] == charset || (re_opcode_t) p1[3] == charset_not) { int not = (re_opcode_t) p1[3] == charset_not; - + if (c < (unsigned char) (p1[4] * BYTEWIDTH) && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH))) not = !not; @@ -4087,6 +4836,54 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) } } } + else if ((re_opcode_t) *p2 == charset) + { + /* We win if the first character of the loop is not part + of the charset. */ + if ((re_opcode_t) p1[3] == exactn + && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5] + && (p2[2 + p1[5] / BYTEWIDTH] + & (1 << (p1[5] % BYTEWIDTH))))) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + + else if ((re_opcode_t) p1[3] == charset_not) + { + int idx; + /* We win if the charset_not inside the loop + lists every character listed in the charset after. */ + for (idx = 0; idx < (int) p2[1]; idx++) + if (! (p2[2 + idx] == 0 + || (idx < (int) p1[4] + && ((p2[2 + idx] & ~ p1[5 + idx]) == 0)))) + break; + + if (idx == p2[1]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + else if ((re_opcode_t) p1[3] == charset) + { + int idx; + /* We win if the charset inside the loop + has no overlap with the one after the loop. */ + for (idx = 0; + idx < (int) p2[1] && idx < (int) p1[4]; + idx++) + if ((p2[2 + idx] & p1[5 + idx]) != 0) + break; + + if (idx == p2[1] || idx == p1[4]) + { + p[-3] = (unsigned char) pop_failure_jump; + DEBUG_PRINT1 (" No match => pop_failure_jump.\n"); + } + } + } } p -= 2; /* Point at relative address again. */ if ((re_opcode_t) p[-1] != pop_failure_jump) @@ -4111,7 +4908,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) actual values. Otherwise, we will restore only one register from the stack, since lowest will == highest in `pop_failure_point'. */ - unsigned dummy_low_reg, dummy_high_reg; + active_reg_t dummy_low_reg, dummy_high_reg; unsigned char *pdummy; const char *sdummy; @@ -4120,19 +4917,29 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) dummy_low_reg, dummy_high_reg, reg_dummy, reg_dummy, reg_info_dummy); } + /* Note fall through. */ + + unconditional_jump: +#ifdef _LIBC + DEBUG_PRINT2 ("\n%p: ", p); +#else + DEBUG_PRINT2 ("\n0x%x: ", p); +#endif /* Note fall through. */ - /* Unconditionally jump (without popping any failure points). */ case jump: - unconditional_jump: EXTRACT_NUMBER_AND_INCR (mcnt, p); /* Get the amount to jump. */ DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt); p += mcnt; /* Do the jump. */ +#ifdef _LIBC + DEBUG_PRINT2 ("(to %p).\n", p); +#else DEBUG_PRINT2 ("(to 0x%x).\n", p); +#endif break; - + /* We need this opcode so we can detect where alternatives end in `group_match_null_string_p' et al. */ case jump_past_alt: @@ -4149,7 +4956,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n"); /* It doesn't matter what we push for the string here. What the code at `fail' tests is the value for the pattern. */ - PUSH_FAILURE_POINT (0, 0, -2); + PUSH_FAILURE_POINT (NULL, NULL, -2); goto unconditional_jump; @@ -4162,12 +4969,12 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n"); /* See comments just above at `dummy_failure_jump' about the two zeroes. */ - PUSH_FAILURE_POINT (0, 0, -2); + PUSH_FAILURE_POINT (NULL, NULL, -2); break; /* Have to succeed matching what follows at least n times. After that, handle like `on_failure_jump'. */ - case succeed_n: + case succeed_n: EXTRACT_NUMBER (mcnt, p + 2); DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt); @@ -4178,18 +4985,26 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) mcnt--; p += 2; STORE_NUMBER_AND_INCR (p, mcnt); - DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p, mcnt); +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt); +#else + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt); +#endif } else if (mcnt == 0) { +#ifdef _LIBC + DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2); +#else DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2); +#endif p[2] = (unsigned char) no_op; p[3] = (unsigned char) no_op; goto on_failure; } break; - - case jump_n: + + case jump_n: EXTRACT_NUMBER (mcnt, p + 2); DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt); @@ -4198,13 +5013,18 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) { mcnt--; STORE_NUMBER (p + 2, mcnt); - goto unconditional_jump; +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt); +#else + DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt); +#endif + goto unconditional_jump; } /* If don't have to jump any more, skip over the rest of command. */ - else - p += 4; + else + p += 4; break; - + case set_number_at: { DEBUG_PRINT1 ("EXECUTING set_number_at.\n"); @@ -4212,22 +5032,63 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) EXTRACT_NUMBER_AND_INCR (mcnt, p); p1 = p + mcnt; EXTRACT_NUMBER_AND_INCR (mcnt, p); +#ifdef _LIBC + DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt); +#else DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt); +#endif STORE_NUMBER (p1, mcnt); break; } - case wordbound: - DEBUG_PRINT1 ("EXECUTING wordbound.\n"); - if (AT_WORD_BOUNDARY (d)) +#if 0 + /* The DEC Alpha C compiler 3.x generates incorrect code for the + test WORDCHAR_P (d - 1) != WORDCHAR_P (d) in the expansion of + AT_WORD_BOUNDARY, so this code is disabled. Expanding the + macro and introducing temporary variables works around the bug. */ + + case wordbound: + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_WORD_BOUNDARY (d)) break; - goto fail; + goto fail; case notwordbound: - DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); if (AT_WORD_BOUNDARY (d)) goto fail; - break; + break; +#else + case wordbound: + { + boolean prevchar, thischar; + + DEBUG_PRINT1 ("EXECUTING wordbound.\n"); + if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) + break; + + prevchar = WORDCHAR_P (d - 1); + thischar = WORDCHAR_P (d); + if (prevchar != thischar) + break; + goto fail; + } + + case notwordbound: + { + boolean prevchar, thischar; + + DEBUG_PRINT1 ("EXECUTING notwordbound.\n"); + if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)) + goto fail; + + prevchar = WORDCHAR_P (d - 1); + thischar = WORDCHAR_P (d); + if (prevchar != thischar) + goto fail; + break; + } +#endif case wordbeg: DEBUG_PRINT1 ("EXECUTING wordbeg.\n"); @@ -4243,31 +5104,23 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) goto fail; #ifdef emacs -#ifdef emacs19 case before_dot: DEBUG_PRINT1 ("EXECUTING before_dot.\n"); if (PTR_CHAR_POS ((unsigned char *) d) >= point) goto fail; break; - + case at_dot: DEBUG_PRINT1 ("EXECUTING at_dot.\n"); if (PTR_CHAR_POS ((unsigned char *) d) != point) goto fail; break; - + case after_dot: DEBUG_PRINT1 ("EXECUTING after_dot.\n"); if (PTR_CHAR_POS ((unsigned char *) d) <= point) goto fail; break; -#else /* not emacs19 */ - case at_dot: - DEBUG_PRINT1 ("EXECUTING at_dot.\n"); - if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point) - goto fail; - break; -#endif /* not emacs19 */ case syntaxspec: DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt); @@ -4279,8 +5132,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) mcnt = (int) Sword; matchsyntax: PREFETCH (); - if (SYNTAX (*d++) != (enum syntaxcode) mcnt) - goto fail; + /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ + d++; + if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt) + goto fail; SET_REGS_MATCHED (); break; @@ -4294,8 +5149,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) mcnt = (int) Sword; matchnotsyntax: PREFETCH (); - if (SYNTAX (*d++) == (enum syntaxcode) mcnt) - goto fail; + /* Can't use *d++ here; SYNTAX may be an unsafe macro. */ + d++; + if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt) + goto fail; SET_REGS_MATCHED (); break; @@ -4308,7 +5165,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) SET_REGS_MATCHED (); d++; break; - + case notwordchar: DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n"); PREFETCH (); @@ -4318,7 +5175,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) d++; break; #endif /* not emacs */ - + default: abort (); } @@ -4343,7 +5200,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) if (p < pend) { boolean is_a_jump_n = false; - + /* If failed to a backwards jump that's part of a repetition loop, need to pop this failure point and use the next one. */ switch ((re_opcode_t) *p) @@ -4355,7 +5212,7 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) case jump: p1 = p + 1; EXTRACT_NUMBER_AND_INCR (mcnt, p1); - p1 += mcnt; + p1 += mcnt; if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n) || (!is_a_jump_n @@ -4386,10 +5243,10 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) /* We are passed P pointing to a register number after a start_memory. - + Return true if the pattern up to the corresponding stop_memory can match the empty string, and false otherwise. - + If we find the matching stop_memory, sets P to point to one past its number. Otherwise, sets P to an undefined byte less than or equal to END. @@ -4403,20 +5260,20 @@ group_match_null_string_p (p, end, reg_info) int mcnt; /* Point to after the args to the start_memory. */ unsigned char *p1 = *p + 2; - + while (p1 < end) { /* Skip over opcodes that can match nothing, and return true or false, as appropriate, when we get to one that can't, or to the matching stop_memory. */ - + switch ((re_opcode_t) *p1) { /* Could be either a loop or a series of alternatives. */ case on_failure_jump: p1++; EXTRACT_NUMBER_AND_INCR (mcnt, p1); - + /* If the next operation is not a jump backwards in the pattern. */ @@ -4430,7 +5287,7 @@ group_match_null_string_p (p, end, reg_info) /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6 /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3 - /exactn/1/c + /exactn/1/c So, we have to first go through the first (n-1) alternatives and then deal with the last one separately. */ @@ -4446,19 +5303,19 @@ group_match_null_string_p (p, end, reg_info) is, including the ending `jump_past_alt' and its number. */ - if (!alt_match_null_string_p (p1, p1 + mcnt - 3, + if (!alt_match_null_string_p (p1, p1 + mcnt - 3, reg_info)) return false; /* Move to right after this alternative, including the jump_past_alt. */ - p1 += mcnt; + p1 += mcnt; /* Break if it's the beginning of an n-th alternative that doesn't begin with an on_failure_jump. */ if ((re_opcode_t) *p1 != on_failure_jump) break; - + /* Still have to check that it's not an n-th alternative that starts with an on_failure_jump. */ p1++; @@ -4483,14 +5340,14 @@ group_match_null_string_p (p, end, reg_info) } /* if mcnt > 0 */ break; - + case stop_memory: assert (p1[1] == **p); *p = p1 + 2; return true; - - default: + + default: if (!common_op_match_null_string_p (&p1, end, reg_info)) return false; } @@ -4503,7 +5360,7 @@ group_match_null_string_p (p, end, reg_info) /* Similar to group_match_null_string_p, but doesn't deal with alternatives: It expects P to be the first byte of a single alternative and END one byte past the last. The alternative can contain groups. */ - + static boolean alt_match_null_string_p (p, end, reg_info) unsigned char *p, *end; @@ -4511,12 +5368,12 @@ alt_match_null_string_p (p, end, reg_info) { int mcnt; unsigned char *p1 = p; - + while (p1 < end) { - /* Skip over opcodes that can match nothing, and break when we get + /* Skip over opcodes that can match nothing, and break when we get to one that can't. */ - + switch ((re_opcode_t) *p1) { /* It's a loop. */ @@ -4525,8 +5382,8 @@ alt_match_null_string_p (p, end, reg_info) EXTRACT_NUMBER_AND_INCR (mcnt, p1); p1 += mcnt; break; - - default: + + default: if (!common_op_match_null_string_p (&p1, end, reg_info)) return false; } @@ -4537,8 +5394,8 @@ alt_match_null_string_p (p, end, reg_info) /* Deals with the ops common to group_match_null_string_p and - alt_match_null_string_p. - + alt_match_null_string_p. + Sets P to one after the op and its arguments, if any. */ static boolean @@ -4573,7 +5430,7 @@ common_op_match_null_string_p (p, end, reg_info) reg_no = *p1; assert (reg_no > 0 && reg_no <= MAX_REGNUM); ret = group_match_null_string_p (&p1, end, reg_info); - + /* Have to set this here in case we're checking a group which contains a group and a back reference to it. */ @@ -4583,7 +5440,7 @@ common_op_match_null_string_p (p, end, reg_info) if (!ret) return false; break; - + /* If this is an optimized succeed_n for zero times, make the jump. */ case jump: EXTRACT_NUMBER_AND_INCR (mcnt, p1); @@ -4595,7 +5452,7 @@ common_op_match_null_string_p (p, end, reg_info) case succeed_n: /* Get to the number of times to succeed. */ - p1 += 2; + p1 += 2; EXTRACT_NUMBER_AND_INCR (mcnt, p1); if (mcnt == 0) @@ -4608,7 +5465,7 @@ common_op_match_null_string_p (p, end, reg_info) return false; break; - case duplicate: + case duplicate: if (!REG_MATCH_NULL_STRING_P (reg_info[*p1])) return false; break; @@ -4628,14 +5485,15 @@ common_op_match_null_string_p (p, end, reg_info) /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN bytes; nonzero otherwise. */ - + static int bcmp_translate (s1, s2, len, translate) - unsigned char *s1, *s2; + const char *s1, *s2; register int len; - char *translate; + RE_TRANSLATE_TYPE translate; { - register unsigned char *p1 = s1, *p2 = s2; + register const unsigned char *p1 = (const unsigned char *) s1; + register const unsigned char *p2 = (const unsigned char *) s2; while (len) { if (translate[*p1++] != translate[*p2++]) return 1; @@ -4649,55 +5507,66 @@ bcmp_translate (s1, s2, len, translate) /* re_compile_pattern is the GNU regular expression compiler: it compiles PATTERN (of length SIZE) and puts the result in BUFP. Returns 0 if the pattern was valid, otherwise an error string. - + Assumes the `allocated' (and perhaps `buffer') and `translate' fields are set in BUFP on entry. - + We call regex_compile to do the actual compilation. */ const char * re_compile_pattern (pattern, length, bufp) const char *pattern; - int length; + size_t length; struct re_pattern_buffer *bufp; { reg_errcode_t ret; - + /* GNU code is written to assume at least RE_NREGS registers will be set (and at least one extra will be -1). */ bufp->regs_allocated = REGS_UNALLOCATED; - + /* And GNU code determines whether or not to get register information by passing null for the REGS argument to re_match, etc., not by setting no_sub. */ bufp->no_sub = 0; - + /* Match anchors at newline. */ bufp->newline_anchor = 1; - + ret = regex_compile (pattern, length, re_syntax_options, bufp); - return re_error_msg[(int) ret]; -} + if (!ret) + return NULL; + return gettext (re_error_msgid + re_error_msgid_idx[(int) ret]); +} +#ifdef _LIBC +weak_alias (__re_compile_pattern, re_compile_pattern) +#endif /* Entry points compatible with 4.2 BSD regex library. We don't define - them if this is an Emacs or POSIX compilation. */ + them unless specifically requested. */ -#if !defined (emacs) && !defined (_POSIX_SOURCE) +#if defined _REGEX_RE_COMP || defined _LIBC /* BSD has one and only one pattern buffer. */ static struct re_pattern_buffer re_comp_buf; char * +#ifdef _LIBC +/* Make these definitions weak in libc, so POSIX programs can redefine + these names if they don't use our functions, and still use + regcomp/regexec below without link errors. */ +weak_function +#endif re_comp (s) const char *s; { reg_errcode_t ret; - + if (!s) { if (!re_comp_buf.buffer) - return "No previous regular expression"; + return gettext ("No previous regular expression"); return 0; } @@ -4705,12 +5574,14 @@ re_comp (s) { re_comp_buf.buffer = (unsigned char *) malloc (200); if (re_comp_buf.buffer == NULL) - return "Memory exhausted"; + return (char *) gettext (re_error_msgid + + re_error_msgid_idx[(int) REG_ESPACE]); re_comp_buf.allocated = 200; re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH); if (re_comp_buf.fastmap == NULL) - return "Memory exhausted"; + return (char *) gettext (re_error_msgid + + re_error_msgid_idx[(int) REG_ESPACE]); } /* Since `re_exec' always passes NULL for the `regs' argument, we @@ -4720,13 +5591,19 @@ re_comp (s) re_comp_buf.newline_anchor = 1; ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf); - - /* Yes, we're discarding `const' here. */ - return (char *) re_error_msg[(int) ret]; + + if (!ret) + return NULL; + + /* Yes, we're discarding `const' here if !HAVE_LIBINTL. */ + return (char *) gettext (re_error_msgid + re_error_msgid_idx[(int) ret]); } int +#ifdef _LIBC +weak_function +#endif re_exec (s) const char *s; { @@ -4734,7 +5611,8 @@ re_exec (s) return 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0); } -#endif /* not emacs and not _POSIX_SOURCE */ + +#endif /* _REGEX_RE_COMP */ /* POSIX.2 functions. Don't define these for Emacs. */ @@ -4751,7 +5629,8 @@ re_exec (s) REG_EXTENDED bit in CFLAGS is set; otherwise, to RE_SYNTAX_POSIX_BASIC; `newline_anchor' to REG_NEWLINE being set in CFLAGS; - `fastmap' and `fastmap_accurate' to zero; + `fastmap' to an allocated space for the fastmap; + `fastmap_accurate' to zero; `re_nsub' to the number of subexpressions in PATTERN. PATTERN is the address of the pattern string. @@ -4777,11 +5656,11 @@ re_exec (s) int regcomp (preg, pattern, cflags) regex_t *preg; - const char *pattern; + const char *pattern; int cflags; { reg_errcode_t ret; - unsigned syntax + reg_syntax_t syntax = (cflags & REG_EXTENDED) ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC; @@ -4789,24 +5668,23 @@ regcomp (preg, pattern, cflags) preg->buffer = 0; preg->allocated = 0; preg->used = 0; - - /* Don't bother to use a fastmap when searching. This simplifies the - REG_NEWLINE case: if we used a fastmap, we'd have to put all the - characters after newlines into the fastmap. This way, we just try - every character. */ - preg->fastmap = 0; - + + /* Try to allocate space for the fastmap. */ + preg->fastmap = (char *) malloc (1 << BYTEWIDTH); + if (cflags & REG_ICASE) { unsigned i; - - preg->translate = (char *) malloc (CHAR_SET_SIZE); + + preg->translate + = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE + * sizeof (*(RE_TRANSLATE_TYPE)0)); if (preg->translate == NULL) return (int) REG_ESPACE; /* Map uppercase characters to corresponding lowercase ones. */ for (i = 0; i < CHAR_SET_SIZE; i++) - preg->translate[i] = ISUPPER (i) ? tolower (i) : i; + preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i; } else preg->translate = NULL; @@ -4824,38 +5702,54 @@ regcomp (preg, pattern, cflags) preg->no_sub = !!(cflags & REG_NOSUB); - /* POSIX says a null character in the pattern terminates it, so we + /* POSIX says a null character in the pattern terminates it, so we can use strlen here in compiling the pattern. */ ret = regex_compile (pattern, strlen (pattern), syntax, preg); - + /* POSIX doesn't distinguish between an unmatched open-group and an unmatched close-group: both are REG_EPAREN. */ if (ret == REG_ERPAREN) ret = REG_EPAREN; - + + if (ret == REG_NOERROR && preg->fastmap) + { + /* Compute the fastmap now, since regexec cannot modify the pattern + buffer. */ + if (re_compile_fastmap (preg) == -2) + { + /* Some error occured while computing the fastmap, just forget + about it. */ + free (preg->fastmap); + preg->fastmap = NULL; + } + } + return (int) ret; } +#ifdef _LIBC +weak_alias (__regcomp, regcomp) +#endif /* regexec searches for a given pattern, specified by PREG, in the string STRING. - + If NMATCH is zero or REG_NOSUB was set in the cflags argument to `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at least NMATCH elements, and we set them to the offsets of the corresponding matched substrings. - + EFLAGS specifies `execution flags' which affect matching: if REG_NOTBOL is set, then ^ does not match at the beginning of the string; if REG_NOTEOL is set, then $ does not match at the end. - + We return 0 if we find a match and REG_NOMATCH if not. */ int regexec (preg, string, nmatch, pmatch, eflags) const regex_t *preg; - const char *string; - size_t nmatch; - regmatch_t pmatch[]; + const char *string; + size_t nmatch; + regmatch_t pmatch[]; int eflags; { int ret; @@ -4865,29 +5759,29 @@ regexec (preg, string, nmatch, pmatch, eflags) boolean want_reg_info = !preg->no_sub && nmatch > 0; private_preg = *preg; - + private_preg.not_bol = !!(eflags & REG_NOTBOL); private_preg.not_eol = !!(eflags & REG_NOTEOL); - + /* The user has told us exactly how many registers to return information about, via `nmatch'. We have to pass that on to the matching routines. */ private_preg.regs_allocated = REGS_FIXED; - + if (want_reg_info) { regs.num_regs = nmatch; - regs.start = TALLOC (nmatch, regoff_t); - regs.end = TALLOC (nmatch, regoff_t); - if (regs.start == NULL || regs.end == NULL) + regs.start = TALLOC (nmatch * 2, regoff_t); + if (regs.start == NULL) return (int) REG_NOMATCH; + regs.end = regs.start + nmatch; } /* Perform the searching operation. */ ret = re_search (&private_preg, string, len, /* start: */ 0, /* range: */ len, want_reg_info ? ®s : (struct re_registers *) 0); - + /* Copy the register information to the POSIX structure. */ if (want_reg_info) { @@ -4904,12 +5798,14 @@ regexec (preg, string, nmatch, pmatch, eflags) /* If we needed the temporary register info, free the space now. */ free (regs.start); - free (regs.end); } /* We want zero return to mean success, unlike `re_search'. */ return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH; } +#ifdef _LIBC +weak_alias (__regexec, regexec) +#endif /* Returns a message corresponding to an error code, ERRCODE, returned @@ -4926,35 +5822,38 @@ regerror (errcode, preg, errbuf, errbuf_size) size_t msg_size; if (errcode < 0 - || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0]))) - /* Only error codes returned by the rest of the code should be passed + || errcode >= (int) (sizeof (re_error_msgid_idx) + / sizeof (re_error_msgid_idx[0]))) + /* Only error codes returned by the rest of the code should be passed to this routine. If we are given anything else, or if other regex code generates an invalid error code, then the program has a bug. Dump core so we can fix it. */ abort (); - msg = re_error_msg[errcode]; - - /* POSIX doesn't require that we do anything in this case, but why - not be nice. */ - if (! msg) - msg = "Success"; + msg = gettext (re_error_msgid + re_error_msgid_idx[errcode]); msg_size = strlen (msg) + 1; /* Includes the null. */ - + if (errbuf_size != 0) { if (msg_size > errbuf_size) { - strncpy (errbuf, msg, errbuf_size - 1); +#if defined HAVE_MEMPCPY || defined _LIBC + *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0'; +#else + memcpy (errbuf, msg, errbuf_size - 1); errbuf[errbuf_size - 1] = 0; +#endif } else - strcpy (errbuf, msg); + memcpy (errbuf, msg, msg_size); } return msg_size; } +#ifdef _LIBC +weak_alias (__regerror, regerror) +#endif /* Free dynamically allocated space used by PREG. */ @@ -4966,7 +5865,7 @@ regfree (preg) if (preg->buffer != NULL) free (preg->buffer); preg->buffer = NULL; - + preg->allocated = 0; preg->used = 0; @@ -4979,13 +5878,8 @@ regfree (preg) free (preg->translate); preg->translate = NULL; } +#ifdef _LIBC +weak_alias (__regfree, regfree) +#endif #endif /* not emacs */ - -/* -Local variables: -make-backup-files: t -version-control: t -trim-versions-without-asking: nil -End: -*/ diff --git a/gnu/usr.bin/grep/regex.h b/gnu/usr.bin/grep/regex.h index 49dd893bfa6..ebbd57e4274 100644 --- a/gnu/usr.bin/grep/regex.h +++ b/gnu/usr.bin/grep/regex.h @@ -1,51 +1,64 @@ +#ifndef _REGEX_H + /* Definitions for data structures and routines for the regular expression library, version 0.12. + Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc. - Copyright (C) 1985, 1989, 1990, 1991, 1992, 1993 Free Software Foundation, Inc. + This file is part of the GNU C Library. Its master source is NOT part of + the C library, however. The master source lives in /gd/gnu/lib. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU Library General Public License as + published by the Free Software Foundation; either version 2 of the + License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + Library General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + You should have received a copy of the GNU Library General Public + License along with the GNU C Library; see the file COPYING.LIB. If not, + write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330, + Boston, MA 02111-1307, USA. */ - $Id: regex.h,v 1.1.1.1 1995/10/18 08:40:18 deraadt Exp $ -*/ +#ifndef _REGEX_H +#define _REGEX_H 1 -#ifndef __REGEXP_LIBRARY_H__ -#define __REGEXP_LIBRARY_H__ +/* Allow the use in C++ code. */ +#ifdef __cplusplus +extern "C" { +#endif /* POSIX says that must be included (by the caller) before . */ -#ifdef VMS +#if !defined _POSIX_C_SOURCE && !defined _POSIX_SOURCE && defined VMS /* VMS doesn't have `size_t' in , even though POSIX says it should be there. */ -#include +# include #endif +/* The following two types have to be signed and unsigned integer type + wide enough to hold a value of a pointer. For most ANSI compilers + ptrdiff_t and size_t should be likely OK. Still size of these two + types is 2 for Microsoft C. Ugh... */ +typedef long int s_reg_t; +typedef unsigned long int active_reg_t; /* The following bits are used to determine the regexp syntax we recognize. The set/not-set meanings are chosen so that Emacs syntax remains the value 0. The bits are given in alphabetical order, and the definitions shifted by one from the previous bit; thus, when we add or remove a bit, only one other definition need change. */ -typedef unsigned reg_syntax_t; +typedef unsigned long int reg_syntax_t; /* If this bit is not set, then \ inside a bracket expression is literal. If set, then such a \ quotes the following character. */ -#define RE_BACKSLASH_ESCAPE_IN_LISTS (1) +#define RE_BACKSLASH_ESCAPE_IN_LISTS ((unsigned long int) 1) /* If this bit is not set, then + and ? are operators, and \+ and \? are - literals. + literals. If set, then \+ and \? are operators and + and ? are literals. */ #define RE_BK_PLUS_QM (RE_BACKSLASH_ESCAPE_IN_LISTS << 1) @@ -61,7 +74,7 @@ typedef unsigned reg_syntax_t; ^ is an anchor if it is at the beginning of a regular expression or after an open-group or an alternation operator; $ is an anchor if it is at the end of a regular expression, or - before a close-group or an alternation operator. + before a close-group or an alternation operator. This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because POSIX draft 11.2 says that * etc. in leading positions is undefined. @@ -72,7 +85,7 @@ typedef unsigned reg_syntax_t; /* If this bit is set, then special characters are always special regardless of where they are in the pattern. If this bit is not set, then special characters are special only in - some contexts; otherwise they are ordinary. Specifically, + some contexts; otherwise they are ordinary. Specifically, * + ? and intervals are only special when not after the beginning, open-group, or alternation operator. */ #define RE_CONTEXT_INDEP_OPS (RE_CONTEXT_INDEP_ANCHORS << 1) @@ -94,7 +107,7 @@ typedef unsigned reg_syntax_t; #define RE_HAT_LISTS_NOT_NEWLINE (RE_DOT_NOT_NULL << 1) /* If this bit is set, either \{...\} or {...} defines an - interval, depending on RE_NO_BK_BRACES. + interval, depending on RE_NO_BK_BRACES. If not set, \{, \}, {, and } are literals. */ #define RE_INTERVALS (RE_HAT_LISTS_NOT_NEWLINE << 1) @@ -119,7 +132,7 @@ typedef unsigned reg_syntax_t; If not set, then \ is a back-reference. */ #define RE_NO_BK_REFS (RE_NO_BK_PARENS << 1) -/* If this bit is set, then | is an alternation operator, and \| is literal. +/* If this bit is set, then | is an alternation operator, and \| is literal. If not set, then \| is an alternation operator, and | is literal. */ #define RE_NO_BK_VBAR (RE_NO_BK_REFS << 1) @@ -133,6 +146,22 @@ typedef unsigned reg_syntax_t; If not set, then an unmatched ) is invalid. */ #define RE_UNMATCHED_RIGHT_PAREN_ORD (RE_NO_EMPTY_RANGES << 1) +/* If this bit is set, succeed as soon as we match the whole pattern, + without further backtracking. */ +#define RE_NO_POSIX_BACKTRACKING (RE_UNMATCHED_RIGHT_PAREN_ORD << 1) + +/* If this bit is set, do not process the GNU regex operators. + If not set, then the GNU regex operators are recognized. */ +#define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1) + +/* If this bit is set, turn on internal regex debugging. + If not set, and debugging was on, turn it off. + This only works if regex.c is compiled -DDEBUG. + We define this bit always, so that all that's needed to turn on + debugging is to recompile regex.c; the calling code can always have + this bit set, and it won't affect anything in the normal case. */ +#define RE_DEBUG (RE_NO_GNU_OPS << 1) + /* This global variable defines the particular regexp syntax to use (for some interfaces). When a regexp is compiled, the syntax used is stored in the pattern buffer, so changing this does not affect @@ -141,18 +170,24 @@ extern reg_syntax_t re_syntax_options; /* Define combinations of the above bits for the standard possibilities. (The [[[ comments delimit what gets put into the Texinfo file, so - don't delete them!) */ + don't delete them!) */ /* [[[begin syntaxes]]] */ #define RE_SYNTAX_EMACS 0 #define RE_SYNTAX_AWK \ - (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ - | RE_NO_BK_PARENS | RE_NO_BK_REFS \ - | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ - | RE_UNMATCHED_RIGHT_PAREN_ORD) + (RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DOT_NOT_NULL \ + | RE_NO_BK_PARENS | RE_NO_BK_REFS \ + | RE_NO_BK_VBAR | RE_NO_EMPTY_RANGES \ + | RE_DOT_NEWLINE | RE_CONTEXT_INDEP_ANCHORS \ + | RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS) + +#define RE_SYNTAX_GNU_AWK \ + ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS | RE_DEBUG) \ + & ~(RE_DOT_NOT_NULL | RE_INTERVALS | RE_CONTEXT_INDEP_OPS)) #define RE_SYNTAX_POSIX_AWK \ - (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS) + (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \ + | RE_INTERVALS | RE_NO_GNU_OPS) #define RE_SYNTAX_GREP \ (RE_BK_PLUS_QM | RE_CHAR_CLASSES \ @@ -206,9 +241,10 @@ extern reg_syntax_t re_syntax_options; (erroneously) define this in other header files, but we want our value, so remove any previous define. */ #ifdef RE_DUP_MAX -#undef RE_DUP_MAX +# undef RE_DUP_MAX #endif -#define RE_DUP_MAX ((1 << 15) - 1) +/* If sizeof(int) == 2, then ((1 << 15) - 1) overflows. */ +#define RE_DUP_MAX (0x7fff) /* POSIX `cflags' bits (i.e., information for `regcomp'). */ @@ -220,7 +256,7 @@ extern reg_syntax_t re_syntax_options; /* If this bit is set, then ignore case when matching. If not set, then case is significant. */ #define REG_ICASE (REG_EXTENDED << 1) - + /* If this bit is set, then anchors do not match at newline characters in the string. If not set, then anchors do match at newlines. */ @@ -248,6 +284,10 @@ extern reg_syntax_t re_syntax_options; `re_error_msg' table in regex.c. */ typedef enum { +#ifdef _XOPEN_SOURCE + REG_ENOSYS = -1, /* This will never happen for this implementation. */ +#endif + REG_NOERROR = 0, /* Success. */ REG_NOMATCH, /* Didn't find a match (for regexec). */ @@ -259,7 +299,7 @@ typedef enum REG_EESCAPE, /* Trailing backslash. */ REG_ESUBREG, /* Invalid back reference. */ REG_EBRACK, /* Unmatched left bracket. */ - REG_EPAREN, /* Parenthesis imbalance. */ + REG_EPAREN, /* Parenthesis imbalance. */ REG_EBRACE, /* Unmatched \{. */ REG_BADBR, /* Invalid contents of \{\}. */ REG_ERANGE, /* Invalid range end. */ @@ -278,6 +318,10 @@ typedef enum compiled, the `re_nsub' field is available. All other fields are private to the regex routines. */ +#ifndef RE_TRANSLATE_TYPE +# define RE_TRANSLATE_TYPE char * +#endif + struct re_pattern_buffer { /* [[[begin pattern_buffer]]] */ @@ -287,10 +331,10 @@ struct re_pattern_buffer unsigned char *buffer; /* Number of bytes to which `buffer' points. */ - unsigned long allocated; + unsigned long int allocated; /* Number of bytes actually used in `buffer'. */ - unsigned long used; + unsigned long int used; /* Syntax setting with which the pattern was compiled. */ reg_syntax_t syntax; @@ -304,7 +348,7 @@ struct re_pattern_buffer comparing them, or zero for no translation. The translation is applied to a pattern when it is compiled and to a string when it is matched. */ - char *translate; + RE_TRANSLATE_TYPE translate; /* Number of subexpressions found by the compiler. */ size_t re_nsub; @@ -334,7 +378,7 @@ struct re_pattern_buffer unsigned no_sub : 1; /* If set, a beginning-of-line anchor doesn't match at the - beginning of the string. */ + beginning of the string. */ unsigned not_bol : 1; /* Similarly for an end-of-line anchor. */ @@ -347,11 +391,6 @@ struct re_pattern_buffer }; typedef struct re_pattern_buffer regex_t; - - -/* search.c (search_buffer) in Emacs needs this one opcode value. It is - defined both in `regex.c' and here. */ -#define RE_EXACTN_VALUE 1 /* Type for byte offsets within the string. POSIX mandates this. */ typedef int regoff_t; @@ -371,7 +410,7 @@ struct re_registers `re_match_2' returns information about at least this many registers the first time a `regs' structure is passed. */ #ifndef RE_NREGS -#define RE_NREGS 30 +# define RE_NREGS 30 #endif @@ -394,11 +433,11 @@ typedef struct #if __STDC__ -#define _RE_ARGS(args) args +# define _RE_ARGS(args) args #else /* not __STDC__ */ -#define _RE_ARGS(args) () +# define _RE_ARGS(args) () #endif /* not __STDC__ */ @@ -410,7 +449,7 @@ extern reg_syntax_t re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); and syntax given by the global `re_syntax_options', into the buffer BUFFER. Return NULL if successful, and an error string if not. */ extern const char *re_compile_pattern - _RE_ARGS ((const char *pattern, int length, + _RE_ARGS ((const char *pattern, size_t length, struct re_pattern_buffer *buffer)); @@ -446,7 +485,7 @@ extern int re_match /* Relates to `re_match' as `re_search_2' relates to `re_search'. */ -extern int re_match_2 +extern int re_match_2 _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, int length1, const char *string2, int length2, int start, struct re_registers *regs, int stop)); @@ -468,21 +507,33 @@ extern void re_set_registers _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, unsigned num_regs, regoff_t *starts, regoff_t *ends)); +#if defined _REGEX_RE_COMP || defined _LIBC +# ifndef _CRAY /* 4.2 bsd compatibility. */ extern char *re_comp _RE_ARGS ((const char *)); extern int re_exec _RE_ARGS ((const char *)); +# endif +#endif /* POSIX compatibility. */ -extern int regcomp _RE_ARGS ((regex_t *preg, const char *pattern, int cflags)); -extern int regexec - _RE_ARGS ((const regex_t *preg, const char *string, size_t nmatch, - regmatch_t pmatch[], int eflags)); -extern size_t regerror - _RE_ARGS ((int errcode, const regex_t *preg, char *errbuf, - size_t errbuf_size)); -extern void regfree _RE_ARGS ((regex_t *preg)); - -#endif /* not __REGEXP_LIBRARY_H__ */ +extern int regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern, + int __cflags)); + +extern int regexec _RE_ARGS ((const regex_t *__preg, + const char *__string, size_t __nmatch, + regmatch_t __pmatch[], int __eflags)); + +extern size_t regerror _RE_ARGS ((int __errcode, const regex_t *__preg, + char *__errbuf, size_t __errbuf_size)); + +extern void regfree _RE_ARGS ((regex_t *__preg)); + + +#ifdef __cplusplus +} +#endif /* C++ */ + +#endif /* regex.h */ /* Local variables: @@ -491,3 +542,46 @@ version-control: t trim-versions-without-asking: nil End: */ +/* Document internal interfaces. */ +extern reg_syntax_t __re_set_syntax _RE_ARGS ((reg_syntax_t syntax)); + +extern const char *__re_compile_pattern + _RE_ARGS ((const char *pattern, size_t length, + struct re_pattern_buffer *buffer)); + +extern int __re_compile_fastmap _RE_ARGS ((struct re_pattern_buffer *buffer)); + +extern int __re_search + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, int range, struct re_registers *regs)); + +extern int __re_search_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, int range, struct re_registers *regs, int stop)); + +extern int __re_match + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string, + int length, int start, struct re_registers *regs)); + +extern int __re_match_2 + _RE_ARGS ((struct re_pattern_buffer *buffer, const char *string1, + int length1, const char *string2, int length2, + int start, struct re_registers *regs, int stop)); + +extern void __re_set_registers + _RE_ARGS ((struct re_pattern_buffer *buffer, struct re_registers *regs, + unsigned num_regs, regoff_t *starts, regoff_t *ends)); + +extern int __regcomp _RE_ARGS ((regex_t *__preg, const char *__pattern, + int __cflags)); + +extern int __regexec _RE_ARGS ((const regex_t *__preg, + const char *__string, size_t __nmatch, + regmatch_t __pmatch[], int __eflags)); + +extern size_t __regerror _RE_ARGS ((int __errcode, const regex_t *__preg, + char *__errbuf, size_t __errbuf_size)); + +extern void __regfree _RE_ARGS ((regex_t *__preg)); +#endif diff --git a/gnu/usr.bin/grep/savedir.c b/gnu/usr.bin/grep/savedir.c new file mode 100644 index 00000000000..29b3842c040 --- /dev/null +++ b/gnu/usr.bin/grep/savedir.c @@ -0,0 +1,137 @@ +/* savedir.c -- save the list of files in a directory in a string + Copyright (C) 1990, 1997, 1998, 1999, 2000 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software Foundation, + Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ + +/* Written by David MacKenzie . */ + +#if HAVE_CONFIG_H +# include +#endif + +#include + +#if HAVE_UNISTD_H +# include +#endif + +#if HAVE_DIRENT_H +# include +# define NAMLEN(dirent) strlen((dirent)->d_name) +#else +# define dirent direct +# define NAMLEN(dirent) (dirent)->d_namlen +# if HAVE_SYS_NDIR_H +# include +# endif +# if HAVE_SYS_DIR_H +# include +# endif +# if HAVE_NDIR_H +# include +# endif +#endif + +#ifdef CLOSEDIR_VOID +/* Fake a return value. */ +# define CLOSEDIR(d) (closedir (d), 0) +#else +# define CLOSEDIR(d) closedir (d) +#endif + +#ifdef STDC_HEADERS +# include +# include +#else +char *malloc (); +char *realloc (); +#endif +#ifndef NULL +# define NULL 0 +#endif + +#ifndef stpcpy +char *stpcpy (); +#endif + +#include "savedir.h" + +/* Return a freshly allocated string containing the filenames + in directory DIR, separated by '\0' characters; + the end is marked by two '\0' characters in a row. + NAME_SIZE is the number of bytes to initially allocate + for the string; it will be enlarged as needed. + Return NULL if DIR cannot be opened or if out of memory. */ + +char * +savedir (const char *dir, off_t name_size) +{ + DIR *dirp; + struct dirent *dp; + char *name_space; + char *namep; + + dirp = opendir (dir); + if (dirp == NULL) + return NULL; + + /* Be sure name_size is at least `1' so there's room for + the final NUL byte. */ + name_size += !name_size; + + name_space = (char *) malloc (name_size); + if (name_space == NULL) + { + closedir (dirp); + return NULL; + } + namep = name_space; + + while ((dp = readdir (dirp)) != NULL) + { + /* Skip "." and ".." (some NFS filesystems' directories lack them). */ + if (dp->d_name[0] != '.' + || (dp->d_name[1] != '\0' + && (dp->d_name[1] != '.' || dp->d_name[2] != '\0'))) + { + off_t size_needed = (namep - name_space) + NAMLEN (dp) + 2; + + if (size_needed > name_size) + { + char *new_name_space; + + while (size_needed > name_size) + name_size += 1024; + + new_name_space = realloc (name_space, name_size); + if (new_name_space == NULL) + { + closedir (dirp); + return NULL; + } + namep += new_name_space - name_space; + name_space = new_name_space; + } + namep = stpcpy (namep, dp->d_name) + 1; + } + } + *namep = '\0'; + if (CLOSEDIR (dirp)) + { + free (name_space); + return NULL; + } + return name_space; +} diff --git a/gnu/usr.bin/grep/savedir.h b/gnu/usr.bin/grep/savedir.h new file mode 100644 index 00000000000..89be04d0238 --- /dev/null +++ b/gnu/usr.bin/grep/savedir.h @@ -0,0 +1,15 @@ +#if !defined SAVEDIR_H_ +# define SAVEDIR_H_ + +# ifndef PARAMS +# if defined PROTOTYPES || (defined __STDC__ && __STDC__) +# define PARAMS(Args) Args +# else +# define PARAMS(Args) () +# endif +# endif + +char * +savedir PARAMS ((const char *dir, off_t name_size)); + +#endif diff --git a/gnu/usr.bin/grep/search.c b/gnu/usr.bin/grep/search.c index 1a31df684cd..9763dba5f5b 100644 --- a/gnu/usr.bin/grep/search.c +++ b/gnu/usr.bin/grep/search.c @@ -1,5 +1,5 @@ /* search.c - searching subroutines using dfa, kwset and regex for grep. - Copyright (C) 1992 Free Software Foundation, Inc. + Copyright 1992, 1998, 2000 Free Software Foundation, Inc. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by @@ -13,84 +13,37 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ - Written August 1992 by Mike Haertel. */ +/* Written August 1992 by Mike Haertel. */ -#ifndef lint -static char rcsid[] = "$Id: search.c,v 1.1.1.1 1995/10/18 08:40:18 deraadt Exp $"; -#endif /* not lint */ - -#include - -#ifdef STDC_HEADERS -#include -#include -#else -#define UCHAR_MAX 255 -#include -extern char *malloc(); -#endif - -#ifdef HAVE_MEMCHR -#include -#ifdef NEED_MEMORY_H -#include -#endif -#else -#ifdef __STDC__ -extern void *memchr(); -#else -extern char *memchr(); +#ifdef HAVE_CONFIG_H +# include #endif -#endif - -#if defined(HAVE_STRING_H) || defined(STDC_HEADERS) -#undef bcopy -#define bcopy(s, d, n) memcpy((d), (s), (n)) -#endif - -#ifdef isascii -#define ISALNUM(C) (isascii(C) && isalnum(C)) -#define ISUPPER(C) (isascii(C) && isupper(C)) -#else -#define ISALNUM(C) isalnum(C) -#define ISUPPER(C) isupper(C) -#endif - -#define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C)) - +#include +#include "system.h" #include "grep.h" +#include "regex.h" #include "dfa.h" #include "kwset.h" -#include "regex.h" #define NCHAR (UCHAR_MAX + 1) -#if __STDC__ -static void Gcompile(char *, size_t); -static void Ecompile(char *, size_t); -static char *EGexecute(char *, size_t, char **); -static void Fcompile(char *, size_t); -static char *Fexecute(char *, size_t, char **); -#else -static void Gcompile(); -static void Ecompile(); -static char *EGexecute(); -static void Fcompile(); -static char *Fexecute(); -#endif +static void Gcompile PARAMS((char *, size_t)); +static void Ecompile PARAMS((char *, size_t)); +static char *EGexecute PARAMS((char *, size_t, char **)); +static void Fcompile PARAMS((char *, size_t)); +static char *Fexecute PARAMS((char *, size_t, char **)); +static void kwsinit PARAMS((void)); /* Here is the matchers vector for the main program. */ struct matcher matchers[] = { { "default", Gcompile, EGexecute }, { "grep", Gcompile, EGexecute }, - { "ggrep", Gcompile, EGexecute }, { "egrep", Ecompile, EGexecute }, - { "posix-egrep", Ecompile, EGexecute }, - { "gegrep", Ecompile, EGexecute }, + { "awk", Ecompile, EGexecute }, { "fgrep", Fcompile, Fexecute }, - { "gfgrep", Fcompile, Fexecute }, { 0, 0, 0 }, }; @@ -101,7 +54,7 @@ struct matcher matchers[] = { static struct dfa dfa; /* Regex compiled regexp. */ -static struct re_pattern_buffer regex; +static struct re_pattern_buffer regexbuf; /* KWset compiled pattern. For Ecompile and Gcompile, we compile a list of strings, at least one of which is known to occur in @@ -114,14 +67,13 @@ static kwset_t kwset; static int lastexact; void -dfaerror(mesg) - char *mesg; +dfaerror (char const *mesg) { fatal(mesg, 0); } static void -kwsinit() +kwsinit (void) { static char trans[NCHAR]; int i; @@ -132,14 +84,14 @@ kwsinit() if (!(kwset = kwsalloc(match_icase ? trans : (char *) 0))) fatal("memory exhausted", 0); -} +} /* If the DFA turns out to have some set of fixed strings one of which must occur in the match, then we build a kwset matcher to find those strings, and thus quickly filter out impossible matches. */ static void -kwsmusts() +kwsmusts (void) { struct dfamust *dm; char *err; @@ -173,23 +125,16 @@ kwsmusts() } static void -Gcompile(pattern, size) - char *pattern; - size_t size; +Gcompile (char *pattern, size_t size) { -#ifdef __STDC__ - const -#endif - char *err; + const char *err; re_set_syntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE); - dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase); + dfasyntax(RE_SYNTAX_GREP | RE_HAT_LISTS_NOT_NEWLINE, match_icase, eolbyte); - if ((err = re_compile_pattern(pattern, size, ®ex)) != 0) + if ((err = re_compile_pattern(pattern, size, ®exbuf)) != 0) fatal(err, 0); - dfainit(&dfa); - /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. Then if DFA succeeds we do some hairy stuff using the regex matcher @@ -200,7 +145,8 @@ Gcompile(pattern, size) (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$). In the whole-line case, we use the pattern: ^(userpattern)$. - BUG: Using [A-Za-z_] is locale-dependent! */ + BUG: Using [A-Za-z_] is locale-dependent! + So will use [:alnum:] */ char *n = malloc(size + 50); int i = 0; @@ -210,14 +156,14 @@ Gcompile(pattern, size) if (match_lines) strcpy(n, "^\\("); if (match_words) - strcpy(n, "\\(^\\|[^0-9A-Za-z_]\\)\\("); + strcpy(n, "\\(^\\|[^[:alnum:]_]\\)\\("); i = strlen(n); - bcopy(pattern, n + i, size); + memcpy(n + i, pattern, size); i += size; if (match_words) - strcpy(n + i, "\\)\\([^0-9A-Za-z_]\\|$\\)"); + strcpy(n + i, "\\)\\([^[:alnum:]_]\\|$\\)"); if (match_lines) strcpy(n + i, "\\)$"); @@ -231,31 +177,24 @@ Gcompile(pattern, size) } static void -Ecompile(pattern, size) - char *pattern; - size_t size; +Ecompile (char *pattern, size_t size) { -#ifdef __STDC__ - const -#endif - char *err; + const char *err; - if (strcmp(matcher, "posix-egrep") == 0) + if (strcmp(matcher, "awk") == 0) { - re_set_syntax(RE_SYNTAX_POSIX_EGREP); - dfasyntax(RE_SYNTAX_POSIX_EGREP, match_icase); + re_set_syntax(RE_SYNTAX_AWK); + dfasyntax(RE_SYNTAX_AWK, match_icase, eolbyte); } else { - re_set_syntax(RE_SYNTAX_EGREP); - dfasyntax(RE_SYNTAX_EGREP, match_icase); + re_set_syntax (RE_SYNTAX_POSIX_EGREP); + dfasyntax (RE_SYNTAX_POSIX_EGREP, match_icase, eolbyte); } - if ((err = re_compile_pattern(pattern, size, ®ex)) != 0) + if ((err = re_compile_pattern(pattern, size, ®exbuf)) != 0) fatal(err, 0); - dfainit(&dfa); - /* In the match_words and match_lines cases, we use a different pattern for the DFA matcher that will quickly throw out cases that won't work. Then if DFA succeeds we do some hairy stuff using the regex matcher @@ -266,7 +205,8 @@ Ecompile(pattern, size) (^|[^A-Za-z_])(userpattern)([^A-Za-z_]|$). In the whole-line case, we use the pattern: ^(userpattern)$. - BUG: Using [A-Za-z_] is locale-dependent! */ + BUG: Using [A-Za-z_] is locale-dependent! + so will use the char class */ char *n = malloc(size + 50); int i = 0; @@ -276,14 +216,14 @@ Ecompile(pattern, size) if (match_lines) strcpy(n, "^("); if (match_words) - strcpy(n, "(^|[^0-9A-Za-z_])("); + strcpy(n, "(^|[^[:alnum:]_])("); i = strlen(n); - bcopy(pattern, n + i, size); + memcpy(n + i, pattern, size); i += size; if (match_words) - strcpy(n + i, ")([^0-9A-Za-z_]|$)"); + strcpy(n + i, ")([^[:alnum:]_]|$)"); if (match_lines) strcpy(n + i, ")$"); @@ -297,12 +237,10 @@ Ecompile(pattern, size) } static char * -EGexecute(buf, size, endp) - char *buf; - size_t size; - char **endp; +EGexecute (char *buf, size_t size, char **endp) { register char *buflim, *beg, *end, save; + char eol = eolbyte; int backref, start, len; struct kwsmatch kwsm; static struct re_registers regs; /* This is static on account of a BRAIN-DEAD @@ -320,10 +258,10 @@ EGexecute(buf, size, endp) goto failure; /* Narrow down to the line containing the candidate, and run it through DFA. */ - end = memchr(beg, '\n', buflim - beg); + end = memchr(beg, eol, buflim - beg); if (!end) end = buflim; - while (beg > buf && beg[-1] != '\n') + while (beg > buf && beg[-1] != eol) --beg; save = *end; if (kwsm.index < lastexact) @@ -347,10 +285,10 @@ EGexecute(buf, size, endp) if (!beg) goto failure; /* Narrow down to the line we've found. */ - end = memchr(beg, '\n', buflim - beg); + end = memchr(beg, eol, buflim - beg); if (!end) end = buflim; - while (beg > buf && beg[-1] != '\n') + while (beg > buf && beg[-1] != eol) --beg; /* Successful, no backreferences encountered! */ if (!backref) @@ -358,11 +296,12 @@ EGexecute(buf, size, endp) } /* If we've made it to this point, this means DFA has seen a probable match, and we need to run it through Regex. */ - regex.not_eol = 0; - if ((start = re_search(®ex, beg, end - beg, 0, end - beg, ®s)) >= 0) + regexbuf.not_eol = 0; + if ((start = re_search(®exbuf, beg, end - beg, 0, end - beg, ®s)) >= 0) { len = regs.end[0] - start; - if (!match_lines && !match_words || match_lines && len == end - beg) + if ((!match_lines && !match_words) + || (match_lines && len == end - beg)) goto success; /* If -w, check if the match aligns with word boundaries. We do this iteratively because: @@ -373,15 +312,16 @@ EGexecute(buf, size, endp) if (match_words) while (start >= 0) { - if ((start == 0 || !WCHAR(beg[start - 1])) - && (len == end - beg || !WCHAR(beg[start + len]))) + if ((start == 0 || !WCHAR ((unsigned char) beg[start - 1])) + && (len == end - beg + || !WCHAR ((unsigned char) beg[start + len]))) goto success; if (len > 0) { /* Try a shorter length anchored at the same place. */ --len; - regex.not_eol = 1; - len = re_match(®ex, beg, start + len, start, ®s); + regexbuf.not_eol = 1; + len = re_match(®exbuf, beg, start + len, start, ®s); } if (len <= 0) { @@ -389,8 +329,8 @@ EGexecute(buf, size, endp) if (start == end - beg) break; ++start; - regex.not_eol = 0; - start = re_search(®ex, beg, end - beg, + regexbuf.not_eol = 0; + start = re_search(®exbuf, beg, end - beg, start, end - beg - start, ®s); len = regs.end[0] - start; } @@ -407,9 +347,7 @@ EGexecute(buf, size, endp) } static void -Fcompile(pattern, size) - char *pattern; - size_t size; +Fcompile (char *pattern, size_t size) { char *beg, *lim, *err; @@ -432,13 +370,11 @@ Fcompile(pattern, size) } static char * -Fexecute(buf, size, endp) - char *buf; - size_t size; - char **endp; +Fexecute (char *buf, size_t size, char **endp) { register char *beg, *try, *end; register size_t len; + char eol = eolbyte; struct kwsmatch kwsmatch; for (beg = buf; beg <= buf + size; ++beg) @@ -448,9 +384,9 @@ Fexecute(buf, size, endp) len = kwsmatch.size[0]; if (match_lines) { - if (beg > buf && beg[-1] != '\n') + if (beg > buf && beg[-1] != eol) continue; - if (beg + len < buf + size && beg[len] != '\n') + if (beg + len < buf + size && beg[len] != eol) continue; goto success; } @@ -474,7 +410,7 @@ Fexecute(buf, size, endp) return 0; success: - if ((end = memchr(beg + len, '\n', (buf + size) - (beg + len))) != 0) + if ((end = memchr(beg + len, eol, (buf + size) - (beg + len))) != 0) ++end; else end = buf + size; diff --git a/gnu/usr.bin/grep/stpcpy.c b/gnu/usr.bin/grep/stpcpy.c new file mode 100644 index 00000000000..a01636cd1c6 --- /dev/null +++ b/gnu/usr.bin/grep/stpcpy.c @@ -0,0 +1,50 @@ +/* stpcpy.c -- copy a string and return pointer to end of new string + Copyright (C) 1992, 1995, 1997, 1998 Free Software Foundation, Inc. + + NOTE: The canonical source of this file is maintained with the GNU C Library. + Bugs can be reported to bug-glibc@prep.ai.mit.edu. + + This program is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by the + Free Software Foundation; either version 2, or (at your option) any + later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, + USA. */ + +#ifdef HAVE_CONFIG_H +# include +#endif + +#include + +#undef __stpcpy +#undef stpcpy + +#ifndef weak_alias +# define __stpcpy stpcpy +#endif + +/* Copy SRC to DEST, returning the address of the terminating '\0' in DEST. */ +char * +__stpcpy (char *dest, const char *src) +{ + register char *d = dest; + register const char *s = src; + + do + *d++ = *s; + while (*s++ != '\0'); + + return d - 1; +} +#ifdef weak_alias +weak_alias (__stpcpy, stpcpy) +#endif diff --git a/gnu/usr.bin/grep/system.h b/gnu/usr.bin/grep/system.h new file mode 100644 index 00000000000..a6654b694c8 --- /dev/null +++ b/gnu/usr.bin/grep/system.h @@ -0,0 +1,207 @@ +/* Portability cruft. Include after config.h and sys/types.h. + Copyright 1996, 1998, 1999, 2000 Free Software Foundation, Inc. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2, or (at your option) + any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA + 02111-1307, USA. */ + +#undef PARAMS +#if defined (__STDC__) && __STDC__ +# ifndef _PTR_T +# define _PTR_T + typedef void * ptr_t; +# endif +# define PARAMS(x) x +#else +# ifndef _PTR_T +# define _PTR_T + typedef char * ptr_t; +# endif +# define PARAMS(x) () +#endif + +#ifdef HAVE_UNISTD_H +# include +# include +#else +# define O_RDONLY 0 +# define SEEK_SET 0 +# define SEEK_CUR 1 +int open(), read(), close(); +#endif + +#include +#ifndef errno +extern int errno; +#endif + +#ifndef HAVE_STRERROR +extern int sys_nerr; +extern char *sys_errlist[]; +# define strerror(E) (0 <= (E) && (E) < sys_nerr ? _(sys_errlist[E]) : _("Unknown system error")) +#endif + +/* Some operating systems treat text and binary files differently. */ +#if O_BINARY +# include +# ifdef HAVE_SETMODE +# define SET_BINARY(fd) setmode (fd, O_BINARY) +# else +# define SET_BINARY(fd) _setmode (fd, O_BINARY) +# endif +#else +# ifndef O_BINARY +# define O_BINARY 0 +# define SET_BINARY(fd) (void)0 +# endif +#endif + +#ifdef HAVE_DOS_FILE_NAMES +# define IS_SLASH(c) ((c) == '/' || (c) == '\\') +# define FILESYSTEM_PREFIX_LEN(f) ((f)[0] && (f)[1] == ':' ? 2 : 0) +#endif + +#ifndef IS_SLASH +# define IS_SLASH(c) ((c) == '/') +#endif + +#ifndef FILESYSTEM_PREFIX_LEN +# define FILESYSTEM_PREFIX_LEN(f) 0 +#endif + +/* This assumes _WIN32, like DJGPP, has D_OK. Does it? In what header? */ +#ifdef D_OK +# ifdef EISDIR +# define is_EISDIR(e, f) \ + ((e) == EISDIR \ + || ((e) == EACCES && access (f, D_OK) == 0 && ((e) = EISDIR, 1))) +# else +# define is_EISDIR(e, f) ((e) == EACCES && access (f, D_OK) == 0) +# endif +#endif + +#ifndef is_EISDIR +# ifdef EISDIR +# define is_EISDIR(e, f) ((e) == EISDIR) +# else +# define is_EISDIR(e, f) 0 +# endif +#endif + +#if STAT_MACROS_BROKEN +# undef S_ISDIR +# undef S_ISREG +#endif +#if !defined(S_ISDIR) && defined(S_IFDIR) +# define S_ISDIR(Mode) (((Mode) & S_IFMT) == S_IFDIR) +#endif +#if !defined(S_ISREG) && defined(S_IFREG) +# define S_ISREG(Mode) (((Mode) & S_IFMT) == S_IFREG) +#endif + +#ifdef STDC_HEADERS +# include +#else +char *getenv (); +ptr_t malloc(), realloc(), calloc(); +void free(); +#endif + +#if __STDC__ +# include +#endif +#ifdef STDC_HEADERS +# include +#endif +#ifndef CHAR_BIT +# define CHAR_BIT 8 +#endif +/* The extra casts work around common compiler bugs. */ +#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1)) +#define TYPE_MINIMUM(t) ((t) (TYPE_SIGNED (t) \ + ? ~ (t) 0 << (sizeof (t) * CHAR_BIT - 1) \ + : (t) 0)) +#define TYPE_MAXIMUM(t) ((t) (~ (t) 0 - TYPE_MINIMUM (t))) +#ifndef CHAR_MAX +# define CHAR_MAX TYPE_MAXIMUM (char) +#endif +#ifndef INT_MAX +# define INT_MAX TYPE_MAXIMUM (int) +#endif +#ifndef UCHAR_MAX +# define UCHAR_MAX TYPE_MAXIMUM (unsigned char) +#endif + +#if !defined(STDC_HEADERS) && defined(HAVE_STRING_H) && defined(HAVE_MEMORY_H) +# include +#endif +#if defined(STDC_HEADERS) || defined(HAVE_STRING_H) +# include +#else +# include +# undef strchr +# define strchr index +# undef strrchr +# define strrchr rindex +# undef memcpy +# define memcpy(d, s, n) bcopy (s, d, n) +#endif +#ifndef HAVE_MEMCHR +ptr_t memchr(); +#endif +#if ! defined HAVE_MEMMOVE && ! defined memmove +# define memmove(d, s, n) bcopy (s, d, n) +#endif + +#include + +#ifndef isgraph +# define isgraph(C) (isprint(C) && !isspace(C)) +#endif + +#if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII)) +# define IN_CTYPE_DOMAIN(c) 1 +#else +# define IN_CTYPE_DOMAIN(c) isascii(c) +#endif + +#define ISALPHA(C) (IN_CTYPE_DOMAIN (C) && isalpha (C)) +#define ISUPPER(C) (IN_CTYPE_DOMAIN (C) && isupper (C)) +#define ISLOWER(C) (IN_CTYPE_DOMAIN (C) && islower (C)) +#define ISDIGIT(C) (IN_CTYPE_DOMAIN (C) && isdigit (C)) +#define ISXDIGIT(C) (IN_CTYPE_DOMAIN (C) && isxdigit (C)) +#define ISSPACE(C) (IN_CTYPE_DOMAIN (C) && isspace (C)) +#define ISPUNCT(C) (IN_CTYPE_DOMAIN (C) && ispunct (C)) +#define ISALNUM(C) (IN_CTYPE_DOMAIN (C) && isalnum (C)) +#define ISPRINT(C) (IN_CTYPE_DOMAIN (C) && isprint (C)) +#define ISGRAPH(C) (IN_CTYPE_DOMAIN (C) && isgraph (C)) +#define ISCNTRL(C) (IN_CTYPE_DOMAIN (C) && iscntrl (C)) + +#define TOLOWER(C) (ISUPPER(C) ? tolower(C) : (C)) + +#if ENABLE_NLS +# include +# define _(String) gettext (String) +#else +# define _(String) String +#endif +#define N_(String) String + +#if HAVE_SETLOCALE +# include +#endif + +#ifndef initialize_main +#define initialize_main(argcp, argvp) +#endif diff --git a/gnu/usr.bin/grep/tests/bre.awk b/gnu/usr.bin/grep/tests/bre.awk new file mode 100644 index 00000000000..9c9fef8275a --- /dev/null +++ b/gnu/usr.bin/grep/tests/bre.awk @@ -0,0 +1,26 @@ +BEGIN { + FS="@"; + n = 0; + printf ("# Generated Spencer BRE Test\n"); + printf ("failures=0\n"); +} + +$0 ~ /^#/ { next; } + +NF == 3 { + printf ("status=`echo '%s' | { ${GREP} -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2); + printf ("if test $status -ne %s ; then\n", $1); + printf ("\techo Spencer bre test \\#%d failed\n", ++n); + printf ("\tfailures=1\n"); + printf ("fi\n"); +} + +NF == 4 { +#don't alarm users +# printf ("echo '%s' | ${GREP} -e '%s' > /dev/null 2>&1\n",$3, $2); +# printf ("if test $? -ne %s ; then\n", $1); +# printf ("\techo Expected non conformance \\#%d ... continuing\n", ++n); +# printf ("fi\n"); +} + +END { printf ("exit $failures\n"); } diff --git a/gnu/usr.bin/grep/tests/bre.sh b/gnu/usr.bin/grep/tests/bre.sh new file mode 100644 index 00000000000..e0da8358588 --- /dev/null +++ b/gnu/usr.bin/grep/tests/bre.sh @@ -0,0 +1,13 @@ +#! /bin/sh +# Regression test for GNU grep. + +: ${srcdir=$1} + +failures=0 + +# . . . and the following by Henry Spencer. + +${AWK-awk} -f $srcdir/bre.awk $srcdir/bre.tests > bre.script + +sh bre.script && exit $failures +exit 1 diff --git a/gnu/usr.bin/grep/tests/bre.tests b/gnu/usr.bin/grep/tests/bre.tests new file mode 100644 index 00000000000..1ed159dad7a --- /dev/null +++ b/gnu/usr.bin/grep/tests/bre.tests @@ -0,0 +1,62 @@ +0@a\(b\)c@abc +0@a(@a( +2@a\(@EPAREN +2@a\(b@EPAREN +0@a(b@a(b +0@a)@a) +2@a\)@EPAREN +2@\)@EPAREN +0@a\(\)b@ab +0@a^b@a^b@TO CORRECT +0@a$b@a$b +0@\($\)\(^\)@@ +0@a*\(^b$\)c*@b +0@|@| +0@*@* +0@\(\)@abc +2@\(\{1\}a\)@BADRPT@TO CORRECT +0@^*@* +2@^\{1\}@BADRPT@TO CORRECT +0@\{@{ +1@a\(b*\)c\1d@abbcbd +1@a\(b*\)c\1d@abbcbbbd +1@^\(.\)\1@abc +0@a\(\([bc]\)\2\)*d@abbccd +1@a\(\([bc]\)\2\)*d@abbcbd +0@a\(\(b\)*\2\)*d@abbbd +0@\(a\)\1bcd@aabcd +0@\(a\)\1bc*d@aabcd +0@\(a\)\1bc*d@aabd +0@\(a\)\1bc*d@aabcccd +0@\(a\)\1bc*[ce]d@aabcccd@TO CORRECT +0@^\(a\)\1b\(c\)*cd$@aabcccd +0@a\(*\)b@a*b +0@a\(**\)b@ab +2@a\(***\)b@BADRPT@TO CORRECT +0@*a@*a +0@**a@a +2@***a@BADRPT@TO CORRECT +0@a\{1\}b@ab +0@a\{1,\}b@ab +0@a\{1,2\}b@aab +2@a\{1@EBRACE +2@a\{1a@EBRACE +2@a\{1a\}@BADBR +2@a\{,2\}@BADBR +2@a\{,\}@BADBR +2@a\{1,x\}@BADBR +2@a\{1,x@EBRACE +2@a\{32768\}@BADBR +2@a\{1,0\}@BADBR +0@ab\{0,0\}c@abcac +0@ab\{0,1\}c@abcac +0@ab\{0,3\}c@abbcac +0@ab\{1,1\}c@acabc +0@ab\{1,3\}c@acabc +0@ab\{2,2\}c@abcabbc +0@ab\{2,4\}c@abcabbc +2@a\{1\}\{1\}@BADRPT@TO CORRECT +2@a*\{1\}@BADRPT@TO CORRECT +2@a\{1\}*@BADRPT@TO CORRECT +1@a\(b\)?c\1d@acd +0@-\{0,1\}[0-9]*$@-5 diff --git a/gnu/usr.bin/grep/tests/check.sh b/gnu/usr.bin/grep/tests/check.sh deleted file mode 100644 index d2c8fdbc64d..00000000000 --- a/gnu/usr.bin/grep/tests/check.sh +++ /dev/null @@ -1,24 +0,0 @@ -#! /bin/sh -# Regression test for GNU grep. -# Usage: regress.sh [testdir] - -testdir=${1-tests} - -failures=0 - -# The Khadafy test is brought to you by Scott Anderson . . . -./grep -E -f $testdir/khadafy.regexp $testdir/khadafy.lines > khadafy.out -if cmp $testdir/khadafy.lines khadafy.out -then - : -else - echo Khadafy test failed -- output left on khadafy.out - failures=1 -fi - -# . . . and the following by Henry Spencer. - -${AWK-awk} -F: -f $testdir/scriptgen.awk $testdir/spencer.tests > tmp.script - -sh tmp.script && exit $failures -exit 1 diff --git a/gnu/usr.bin/grep/tests/empty.sh b/gnu/usr.bin/grep/tests/empty.sh new file mode 100644 index 00000000000..d8a314f1533 --- /dev/null +++ b/gnu/usr.bin/grep/tests/empty.sh @@ -0,0 +1,30 @@ +#! /bin/sh +# test that the empty file means no pattern +# and an empty pattern means match all. + +: ${srcdir=$1} + +failures=0 + +# should return 0 found a match +echo "abcd" | ${GREP} -E -e '' > /dev/null 2>&1 +if test $? -ne 0 ; then + echo "Status: Wrong status code, test \#1 failed" + failures=1 +fi + +# should return 1 found no match +echo "abcd" | ${GREP} -E -f /dev/null > /dev/null 2>&1 +if test $? -ne 1 ; then + echo "Status: Wrong status code, test \#2 failed" + failures=1 +fi + +# should return 0 found a match +echo "abcd" | ${GREP} -E -f /dev/null -e "abc" > /dev/null 2>&1 +if test $? -ne 0 ; then + echo "Status: Wrong status code, test \#3 failed" + failures=1 +fi + +exit $failures diff --git a/gnu/usr.bin/grep/tests/ere.awk b/gnu/usr.bin/grep/tests/ere.awk new file mode 100644 index 00000000000..8f6a5b59fb1 --- /dev/null +++ b/gnu/usr.bin/grep/tests/ere.awk @@ -0,0 +1,31 @@ +BEGIN { + FS="@"; + n = 0; + printf ("# Generated Spencer ERE Test\n"); + printf ("failures=0\n"); +} + +$0 ~ /^#/ { next; } + +NF == 3 { + printf ("status=`echo '%s' | { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2); + printf ("if test $status -ne %s ; then\n", $1); + printf ("\techo Spencer ere test \\#%d failed\n", ++n); + printf ("\tfailures=1\n"); + printf ("fi\n"); +} + +NF == 4 { +# don't alarm the user for now +# printf ("echo '%s'|${GREP} -E -e '%s' > /dev/null 2>&1\n",$3, $2); +# printf ("if test $? -ne %s ; then\n", $1); +# printf ("\techo Expected non conformance \\#%d ... continuing\n", ++n); +# printf ("fi\n"); +} + +NF == 5 { +# don't alarm the user for now + next; +} + +END { printf ("exit $failures\n"); } diff --git a/gnu/usr.bin/grep/tests/ere.sh b/gnu/usr.bin/grep/tests/ere.sh new file mode 100644 index 00000000000..fa34f06d62c --- /dev/null +++ b/gnu/usr.bin/grep/tests/ere.sh @@ -0,0 +1,13 @@ +#! /bin/sh +# Regression test for GNU grep. + +: ${srcdir=$1} + +failures=0 + +# . . . and the following by Henry Spencer. + +${AWK-awk} -f $srcdir/ere.awk $srcdir/ere.tests > ere.script + +sh ere.script && exit $failures +exit 1 diff --git a/gnu/usr.bin/grep/tests/ere.tests b/gnu/usr.bin/grep/tests/ere.tests new file mode 100644 index 00000000000..20ef2b1b2bf --- /dev/null +++ b/gnu/usr.bin/grep/tests/ere.tests @@ -0,0 +1,215 @@ +0@a@a +0@abc@abc +0@abc|de@abc +0@a|b|c@abc +0@a(b)c@abc +2@a(@EPAREN +0@a\(@a( +2@a(b@EPAREN +0@a)@a)@POSIX BOTCH +0@)@)@POSIX BOTCH +0@a()b@ab +0@^abc$@abc +1@a^b@a^b +1@a$b@a$b +0@^@abc +0@$@abc +0@^$@@ +0@$^@@ +0@^^@@ +0@$$@@ +0@a*(^b$)c*@b +2@|@EMPTY@NO ALTERNATION +2@*@BADRPT@TO CORRECT +2@+@BADRPT@TO CORRECT +2@?@BADRPT@TO CORRECT +1@&C@PASS +0@()@abc +2@a||b@EMPTY@NO ALTERNATION +2@|ab@EMPTY@NO ALTERNATION +2@ab|@EMPTY@NO ALTERNATION +2@(|a)b@EMPTY@NO ALTERNATION +2@(a|)b@EMPTY@NO ALTERNATION +2@(*a)@BADRPT@TO CORRECT +2@(+a)@BADRPT@TO CORRECT +2@(?a)@BADRPT@TO CORRECT +2@({1}a)@BADRPT@TO CORRECT +2@(a|*b)@BADRPT@NO ALTERNATION +2@(a|+b)@BADRPT@NO ALTERNATION +2@(a|?b)@BADRPT@NO ALTERNATION +2@(a|{1}b)@BADRPT@NO ALTERNATION +2@^*@BADRPT@TO CORRECT +2@^+@BADRPT@TO CORRECT +2@^?@BADRPT@TO CORRECT +2@^{1}@BADRPT@TO CORRECT +0@a.c@abc +0@a[bc]d@abd +0@a\*c@a*c +0@a\\b@a\b@TO CORRECT +0@a\\\*b@a\*b@SHELL TROUBLE +0@a\bc@abc@TO CORRECT +2@a\@EESCAPE@SHELL TROUBLE +0@a\\bc@a\bc@TO CORRECT +0@a\[b@a[b +2@a[b@EBRACK +0@a$@a +1@a$@a$ +1@a\$@a@SHELL TROUBLE +0@a\$@a$@SHELL TROUBLE +1@a\\$@a +1@a\\$@a$@SHELL TROUBLE +1@a\\$@a\$@SHELL TROUBLE +0@a\\$@a\@SHEL TROUBLE +0@ab*c@abc +0@ab+c@abc +0@ab?c@abc +0@{@{@TO CORRECT +0@{abc@{abc@TO CORRECT +0@{1@{1 +2@{1}@BADRPT@TO CORRECT +0@a{b@a{b@TO CORRECT +0@a{1}b@ab +0@a{1,}b@ab +0@a{1,2}b@aab +0@a{1@a{1 +1@a{1a@aa +0@a{1a}@a{1a} +0@a{,2}@a{,2} +0@a{,}@a{,} +0@a{1,*}@a{1,,,} +2@a{1,x@EBRACE@TO CORRECT +2@a{300}@BADBR@TO CORRECT +2@a{1,0}@BADBR@TO CORRECT +0@ab{0,0}c@abcac +0@ab{0,1}c@abcac +0@ab{0,3}c@abbcac +0@ab{1,1}c@acabc +0@ab{1,3}c@acabc +0@ab{2,2}c@abcabbc +0@ab{2,4}c@abcabbc +2@a**@BADRPT@TO CORRECT +2@a++@BADRPT@TO CORRECT +2@a??@BADRPT@TO CORRECT +2@a*+@BADRPT@TO CORRECT +2@a*?@BADRPT@TO CORRECT +2@a+*@BADRPT@TO CORRECT +2@a+?@BADRPT@TO CORRECT +2@a?*@BADRPT@TO CORRECT +2@a?+@BADRPT@TO CORRECT +2@a{1}{1}@BADRPT@TO CORRECT +2@a*{1}@BADRPT@TO CORRECT +2@a+{1}@BADRPT@TO CORRECT +2@a?{1}@BADRPT@TO CORRECT +2@a{1}*@BADRPT@TO CORRECT +2@a{1}+@BADRPT@TO CORRECT +2@a{1}?@BADRPT@TO CORRECT +0@a*{b}@a{b}@TO CORRECT +0@a[b]c@abc +0@a[ab]c@abc +0@a[^ab]c@adc +0@a[]b]c@a]c +0@a[[b]c@a[c +0@a[-b]c@a-c +0@a[^]b]c@adc +0@a[^-b]c@adc +0@a[b-]c@a-c +2@a[b@EBRACK +2@a[]@EBRACK +0@a[1-3]c@a2c +2@a[3-1]c@ERANGE@TO CORRECT +2@a[1-3-5]c@ERANGE@TO CORRECT +0@a[[.-.]--]c@a-c@TO CORRECT +2@a[1-@ERANGE +2@a[[.@EBRACK +2@a[[.x@EBRACK +2@a[[.x.@EBRACK +2@a[[.x.]@EBRACK@TO CORRECT +0@a[[.x.]]@ax@TO CORRECT +2@a[[.x,.]]@ECOLLATE@TO CORRECT +0@a[[.one.]]b@a1b@TO CORRECT +2@a[[.notdef.]]b@ECOLLATE@TO CORRECT +0@a[[.].]]b@a]b@TO CORRECT +0@a[[:alpha:]]c@abc +2@a[[:notdef:]]c@ECTYPE +2@a[[:@EBRACK +2@a[[:alpha@EBRACK +2@a[[:alpha:]@EBRACK +2@a[[:alpha,:]@ECTYPE +2@a[[:]:]]b@ECTYPE +2@a[[:-:]]b@ECTYPE +2@a[[:alph:]]@ECTYPE +2@a[[:alphabet:]]@ECTYPE +0@[[:digit:]]+@a019b +0@[[:lower:]]+@AabC +0@[[:upper:]]+@aBCd +0@[[:xdigit:]]+@p0f3Cq +0@a[[=b=]]c@abc@TO CORRECT +2@a[[=@EBRACK +2@a[[=b@EBRACK +2@a[[=b=@EBRACK +2@a[[=b=]@EBRACK@TO CORRECT +2@a[[=b,=]]@ECOLLATE@TO CORRECT +0@a[[=one=]]b@a1b@TO CORRECT +0@a(((b)))c@abc +0@a(b|(c))d@abd +0@a(b*|c)d@abbd +0@a[ab]{20}@aaaaabaaaabaaaabaaaab +0@a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab]@aaaaabaaaabaaaabaaaab +0@a[ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab][ab](wee|week)(knights|night)@aaaaabaaaabaaaabaaaabweeknights +0@12345678901234567890123456789@a12345678901234567890123456789b +0@123456789012345678901234567890@a123456789012345678901234567890b +0@1234567890123456789012345678901@a1234567890123456789012345678901b +0@12345678901234567890123456789012@a12345678901234567890123456789012b +0@123456789012345678901234567890123@a123456789012345678901234567890123b +0@1234567890123456789012345678901234567890123456789012345678901234567890@a1234567890123456789012345678901234567890123456789012345678901234567890b +0@[ab][cd][ef][gh][ij][kl][mn]@xacegikmoq +0@[ab][cd][ef][gh][ij][kl][mn][op]@xacegikmoq +0@[ab][cd][ef][gh][ij][kl][mn][op][qr]@xacegikmoqy +0@[ab][cd][ef][gh][ij][kl][mn][op][q]@xacegikmoqy +0@abc@xabcy +0@aBc@Abc@TO CORRECT +0@a[Bc]*d@abBCcd@TO CORRECT +0@0[[:upper:]]1@0a1@TO CORRECT +0@0[[:lower:]]1@0A1@TO CORRECT +1@a[^b]c@abc +1@a[^b]c@aBc@TO CORRECT +0@a[^b]c@adc +0@[a]b[c]@abc +0@[a]b[a]@aba +0@[abc]b[abc]@abc +0@[abc]b[abd]@abd +0@a(b?c)+d@accd +0@(wee|week)(knights|night)@weeknights +0@(we|wee|week|frob)(knights|night|day)@weeknights +0@a[bc]d@xyzaaabcaababdacd +0@a[ab]c@aaabc +0@a*@b +0@/\*.*\*/@/*x*/ +0@/\*.*\*/@/*x*/y/*z*/ +0@/\*([^*]|\*[^/])*\*/@/*x*/ +0@/\*([^*]|\*[^/])*\*/@/*x*/y/*z*/ +0@/\*([^*]|\*[^/])*\*/@/*x**/y/*z*/ +0@/\*([^*]|\*+[^*/])*\*+/@/*x*/ +0@/\*([^*]|\*+[^*/])*\*+/@/*x*/y/*z*/ +0@/\*([^*]|\*+[^*/])*\*+/@/*x**/y/*z*/ +0@/\*([^*]|\*+[^*/])*\*+/@/*x****/y/*z*/ +0@/\*([^*]|\*+[^*/])*\*+/@/*x**x*/y/*z*/ +0@/\*([^*]|\*+[^*/])*\*+/@/*x***x/y/*z*/ +0@aZb@a@TO CORRECT +0@[[:<:]]a@a@TO CORRECT +1@[[:<:]]a@ba@TO CORRECT +0@[[:<:]]a@-a@TO CORRECT +0@a[[:>:]]@a@TO CORRECT +1@a[[:>:]]@ab@TO CORRECT +0@a[[:>:]]@a-@TO CORRECT +0@[[:<:]]a.c[[:>:]]@axcd-dayc-dazce-abc@TO CORRECT +0@[[:<:]]a.c[[:>:]]@axcd-dayc-dazce-abc-q@TO CORRECT +0@[[:<:]]a.c[[:>:]]@axc-dayc-dazce-abc@TO CORRECT +0@[[:<:]]b.c[[:>:]]@a_bxc-byc_d-bzc-q@TO CORRECT +0@[[:<:]].x..[[:>:]]@y_xa_-_xb_y-_xc_-axdc@TO CORRECT +1@[[:<:]]a_b[[:>:]]@x_a_b@TO CORRECT +0@(A[1])|(A[2])|(A[3])|(A[4])|(A[5])|(A[6])|(A[7])|(A[8])|(A[9])|(A[A])@A1 +0@abcdefghijklmnop@abcdefghijklmnop +0@abcdefghijklmnopqrstuv@abcdefghijklmnopqrstuv +0@CC[13]1|a{21}[23][EO][123][Es][12]a{15}aa[34][EW]aaaaaaa[X]a@CC11 +0@a?b@ab diff --git a/gnu/usr.bin/grep/tests/khadafy.sh b/gnu/usr.bin/grep/tests/khadafy.sh new file mode 100644 index 00000000000..09bad978b1e --- /dev/null +++ b/gnu/usr.bin/grep/tests/khadafy.sh @@ -0,0 +1,19 @@ +#! /bin/sh +# Regression test for GNU grep. + +: ${srcdir=$1} + +failures=0 + +# The Khadafy test is brought to you by Scott Anderson . . . + +${GREP} -E -f $srcdir/khadafy.regexp $srcdir/khadafy.lines > khadafy.out +if cmp $srcdir/khadafy.lines khadafy.out +then + : +else + echo Khadafy test failed -- output left on khadafy.out + failures=1 +fi + +exit $failures diff --git a/gnu/usr.bin/grep/tests/options.sh b/gnu/usr.bin/grep/tests/options.sh new file mode 100644 index 00000000000..2b05c5d54e9 --- /dev/null +++ b/gnu/usr.bin/grep/tests/options.sh @@ -0,0 +1,36 @@ +#! /bin/sh +# Test for POSIX.2 options for grep +# +# grep [ -E| -F][ -c| -l| -q ][-insvx] -e pattern_list +# [-f pattern_file] ... [file. ..] +# grep [ -E| -F][ -c| -l| -q ][-insvx][-e pattern_list] +# -f pattern_file ... [file ...] +# grep [ -E| -F][ -c| -l| -q ][-insvx] pattern_list [file...] +# + +: ${srcdir=$1} + +failures=0 + +# checking for -E extended regex +echo "abababccccccd" | ${GREP} -E -e 'c{3}' > /dev/null 2>&1 +if test $? -ne 0 ; then + echo "Options: Wrong status code, test \#1 failed" + failures=1 +fi + +# checking for basic regex +echo "abababccccccd" | ${GREP} -G -e 'c\{3\}' > /dev/null 2>&1 +if test $? -ne 0 ; then + echo "Options: Wrong status code, test \#2 failed" + failures=1 +fi + +# checking for fixed string +echo "abababccccccd" | ${GREP} -F -e 'c\{3\}' > /dev/null 2>&1 +if test $? -ne 1 ; then + echo "Options: Wrong status code, test \#3 failed" + failures=1 +fi + +exit $failures diff --git a/gnu/usr.bin/grep/tests/scriptgen.awk b/gnu/usr.bin/grep/tests/scriptgen.awk deleted file mode 100644 index 44ef4df16e5..00000000000 --- a/gnu/usr.bin/grep/tests/scriptgen.awk +++ /dev/null @@ -1,10 +0,0 @@ -BEGIN { print "failures=0"; } -$0 !~ /^#/ && NF == 3 { - print "echo '" $3 "' | ./grep -E -e '" $2 "' > /dev/null 2>&1"; - print "if [ $? != " $1 " ]" - print "then" - printf "\techo Spencer test \\#%d failed\n", ++n - print "\tfailures=1" - print "fi" -} -END { print "exit $failures"; } diff --git a/gnu/usr.bin/grep/tests/spencer.tests b/gnu/usr.bin/grep/tests/spencer.tests deleted file mode 100644 index 913f1980c83..00000000000 --- a/gnu/usr.bin/grep/tests/spencer.tests +++ /dev/null @@ -1,122 +0,0 @@ -0:abc:abc -1:abc:xbc -1:abc:axc -1:abc:abx -0:abc:xabcy -0:abc:ababc -0:ab*c:abc -0:ab*bc:abc -0:ab*bc:abbc -0:ab*bc:abbbbc -0:ab+bc:abbc -1:ab+bc:abc -1:ab+bc:abq -0:ab+bc:abbbbc -0:ab?bc:abbc -0:ab?bc:abc -1:ab?bc:abbbbc -0:ab?c:abc -0:^abc$:abc -1:^abc$:abcc -0:^abc:abcc -1:^abc$:aabc -0:abc$:aabc -0:^:abc -0:$:abc -0:a.c:abc -0:a.c:axc -0:a.*c:axyzc -1:a.*c:axyzd -1:a[bc]d:abc -0:a[bc]d:abd -1:a[b-d]e:abd -0:a[b-d]e:ace -0:a[b-d]:aac -0:a[-b]:a- -0:a[b-]:a- -1:a[b-a]:- -2:a[]b:- -2:a[:- -0:a]:a] -0:a[]]b:a]b -0:a[^bc]d:aed -1:a[^bc]d:abd -0:a[^-b]c:adc -1:a[^-b]c:a-c -1:a[^]b]c:a]c -0:a[^]b]c:adc -0:ab|cd:abc -0:ab|cd:abcd -0:()ef:def -0:()*:- -1:*a:- -0:^*:- -0:$*:- -1:(*)b:- -1:$b:b -2:a\:- -0:a\(b:a(b -0:a\(*b:ab -0:a\(*b:a((b -1:a\x:a\x -2:abc):- -2:(abc:- -0:((a)):abc -0:(a)b(c):abc -0:a+b+c:aabbabc -0:a**:- -0:a*?:- -0:(a*)*:- -0:(a*)+:- -0:(a|)*:- -0:(a*|b)*:- -0:(a+|b)*:ab -0:(a+|b)+:ab -0:(a+|b)?:ab -0:[^ab]*:cde -0:(^)*:- -0:(ab|)*:- -2:)(:- -1:abc: -1:abc: -0:a*: -0:([abc])*d:abbbcd -0:([abc])*bcd:abcd -0:a|b|c|d|e:e -0:(a|b|c|d|e)f:ef -0:((a*|b))*:- -0:abcd*efg:abcdefg -0:ab*:xabyabbbz -0:ab*:xayabbbz -0:(ab|cd)e:abcde -0:[abhgefdc]ij:hij -1:^(ab|cd)e:abcde -0:(abc|)ef:abcdef -0:(a|b)c*d:abcd -0:(ab|ab*)bc:abc -0:a([bc]*)c*:abc -0:a([bc]*)(c*d):abcd -0:a([bc]+)(c*d):abcd -0:a([bc]*)(c+d):abcd -0:a[bcd]*dcdcde:adcdcde -1:a[bcd]+dcdcde:adcdcde -0:(ab|a)b*c:abc -0:((a)(b)c)(d):abcd -0:[A-Za-z_][A-Za-z0-9_]*:alpha -0:^a(bc+|b[eh])g|.h$:abh -0:(bc+d$|ef*g.|h?i(j|k)):effgz -0:(bc+d$|ef*g.|h?i(j|k)):ij -1:(bc+d$|ef*g.|h?i(j|k)):effg -1:(bc+d$|ef*g.|h?i(j|k)):bcdd -0:(bc+d$|ef*g.|h?i(j|k)):reffgz -1:((((((((((a)))))))))):- -0:(((((((((a))))))))):a -1:multiple words of text:uh-uh -0:multiple words:multiple words, yeah -0:(.*)c(.*):abcde -1:\((.*),:(.*)\) -1:[k]:ab -0:abcd:abcd -0:a(bc)d:abcd -0:a[-]?c:ac -0:(....).*\1:beriberi diff --git a/gnu/usr.bin/grep/tests/spencer1.awk b/gnu/usr.bin/grep/tests/spencer1.awk new file mode 100644 index 00000000000..70c6118e556 --- /dev/null +++ b/gnu/usr.bin/grep/tests/spencer1.awk @@ -0,0 +1,14 @@ +BEGIN { + FS = "@"; + printf ("failures=0\n"); +} + +$0 !~ /^#/ && NF = 3 { + printf ("status=`echo '%s'| { ${GREP} -E -e '%s' > /dev/null 2>&1; echo $?; cat >/dev/null; }`\n",$3, $2); + printf ("if test $status -ne %s ; then\n", $1); + printf ("\techo Spencer test \\#%d failed\n", ++n); + printf ("\tfailures=1\n"); + printf ("fi\n"); +} + +END { printf ("exit $failures\n"); } diff --git a/gnu/usr.bin/grep/tests/spencer1.sh b/gnu/usr.bin/grep/tests/spencer1.sh new file mode 100644 index 00000000000..63f16df1e38 --- /dev/null +++ b/gnu/usr.bin/grep/tests/spencer1.sh @@ -0,0 +1,13 @@ +#! /bin/sh +# Regression test for GNU grep. + +: ${srcdir=$1} + +failures=0 + +# . . . and the following by Henry Spencer. + +${AWK-awk} -f $srcdir/spencer1.awk $srcdir/spencer1.tests > spencer1.script + +sh spencer1.script && exit $failures +exit 1 diff --git a/gnu/usr.bin/grep/tests/spencer1.tests b/gnu/usr.bin/grep/tests/spencer1.tests new file mode 100644 index 00000000000..2a60f00c141 --- /dev/null +++ b/gnu/usr.bin/grep/tests/spencer1.tests @@ -0,0 +1,122 @@ +0@abc@abc +1@abc@xbc +1@abc@axc +1@abc@abx +0@abc@xabcy +0@abc@ababc +0@ab*c@abc +0@ab*bc@abc +0@ab*bc@abbc +0@ab*bc@abbbbc +0@ab+bc@abbc +1@ab+bc@abc +1@ab+bc@abq +0@ab+bc@abbbbc +0@ab?bc@abbc +0@ab?bc@abc +1@ab?bc@abbbbc +0@ab?c@abc +0@^abc$@abc +1@^abc$@abcc +0@^abc@abcc +1@^abc$@aabc +0@abc$@aabc +0@^@abc +0@$@abc +0@a.c@abc +0@a.c@axc +0@a.*c@axyzc +1@a.*c@axyzd +1@a[bc]d@abc +0@a[bc]d@abd +1@a[b-d]e@abd +0@a[b-d]e@ace +0@a[b-d]@aac +0@a[-b]@a- +0@a[b-]@a- +1@a[b-a]@- +2@a[]b@- +2@a[@- +0@a]@a] +0@a[]]b@a]b +0@a[^bc]d@aed +1@a[^bc]d@abd +0@a[^-b]c@adc +1@a[^-b]c@a-c +1@a[^]b]c@a]c +0@a[^]b]c@adc +0@ab|cd@abc +0@ab|cd@abcd +0@()ef@def +0@()*@- +1@*a@- +0@^*@- +0@$*@- +1@(*)b@- +1@$b@b +2@a\@- +0@a\(b@a(b +0@a\(*b@ab +0@a\(*b@a((b +1@a\x@a\x +2@abc)@- +2@(abc@- +0@((a))@abc +0@(a)b(c)@abc +0@a+b+c@aabbabc +0@a**@- +0@a*?@- +0@(a*)*@- +0@(a*)+@- +0@(a|)*@- +0@(a*|b)*@- +0@(a+|b)*@ab +0@(a+|b)+@ab +0@(a+|b)?@ab +0@[^ab]*@cde +0@(^)*@- +0@(ab|)*@- +2@)(@- +1@abc@ +1@abc@ +0@a*@ +0@([abc])*d@abbbcd +0@([abc])*bcd@abcd +0@a|b|c|d|e@e +0@(a|b|c|d|e)f@ef +0@((a*|b))*@- +0@abcd*efg@abcdefg +0@ab*@xabyabbbz +0@ab*@xayabbbz +0@(ab|cd)e@abcde +0@[abhgefdc]ij@hij +1@^(ab|cd)e@abcde +0@(abc|)ef@abcdef +0@(a|b)c*d@abcd +0@(ab|ab*)bc@abc +0@a([bc]*)c*@abc +0@a([bc]*)(c*d)@abcd +0@a([bc]+)(c*d)@abcd +0@a([bc]*)(c+d)@abcd +0@a[bcd]*dcdcde@adcdcde +1@a[bcd]+dcdcde@adcdcde +0@(ab|a)b*c@abc +0@((a)(b)c)(d)@abcd +0@[A-Za-z_][A-Za-z0-9_]*@alpha +0@^a(bc+|b[eh])g|.h$@abh +0@(bc+d$|ef*g.|h?i(j|k))@effgz +0@(bc+d$|ef*g.|h?i(j|k))@ij +1@(bc+d$|ef*g.|h?i(j|k))@effg +1@(bc+d$|ef*g.|h?i(j|k))@bcdd +0@(bc+d$|ef*g.|h?i(j|k))@reffgz +1@((((((((((a))))))))))@- +0@(((((((((a)))))))))@a +1@multiple words of text@uh-uh +0@multiple words@multiple words, yeah +0@(.*)c(.*)@abcde +1@\((.*),@(.*)\) +1@[k]@ab +0@abcd@abcd +0@a(bc)d@abcd +0@a[-]?c@ac +0@(....).*\1@beriberi diff --git a/gnu/usr.bin/grep/tests/status.sh b/gnu/usr.bin/grep/tests/status.sh new file mode 100644 index 00000000000..33f1ec409d4 --- /dev/null +++ b/gnu/usr.bin/grep/tests/status.sh @@ -0,0 +1,38 @@ +#! /bin/sh +# Test for status code for GNU grep. +# status code +# 0 match found +# 1 no match +# 2 file not found + +: ${srcdir=$1} + +failures=0 + +# should return 0 found a match +echo "abcd" | ${GREP} -E -e 'abc' > /dev/null 2>&1 +if test $? -ne 0 ; then + echo "Status: Wrong status code, test \#1 failed" + failures=1 +fi + +# should return 1 found no match +echo "abcd" | ${GREP} -E -e 'zbc' > /dev/null 2>&1 +if test $? -ne 1 ; then + echo "Status: Wrong status code, test \#2 failed" + failures=1 +fi + +# the filename MMMMMMMM.MMM should not exist hopefully +# should return 2 file not found +if test -b MMMMMMMM.MMM; then + echo "Please remove MMMMMMMM.MMM to run check" +else + ${GREP} -E -e 'abc' MMMMMMMM.MMM> /dev/null 2>&1 + if test $? -ne 2 ; then + echo "Status: Wrong status code, test \#3 failed" + failures=1 + fi +fi + +exit $failures diff --git a/gnu/usr.bin/grep/tests/warning.sh b/gnu/usr.bin/grep/tests/warning.sh new file mode 100644 index 00000000000..ef30f4872a3 --- /dev/null +++ b/gnu/usr.bin/grep/tests/warning.sh @@ -0,0 +1,19 @@ +#! /bin/sh +# +# Tell them not to be alarmed. + +: ${srcdir=$1} + +failures=0 + +# +cat <<\EOF + +Please, do not be alarmed if some of the tests failed. +Report them to , +with the line number, the name of the file, +and grep version number 'grep --version'. +Make sure you have the word grep in the subject. +Thank You. + +EOF