From b8161682f0f265f2eb1e673ec890f8f252a53bf8 Mon Sep 17 00:00:00 2001 From: espie Date: Sat, 11 Mar 2000 15:54:43 +0000 Subject: [PATCH] Add a few builtins for greater compatibility with gnu-m4, and extended functionality. * regular expressions, * line-number reporting * `meta'-macros, builtin and indir. Reviewed by pjanzen@, tested by fries@ and a few others. --- usr.bin/m4/eval.c | 28 +++- usr.bin/m4/extern.h | 14 +- usr.bin/m4/gnum4.c | 327 +++++++++++++++++++++++++++++++++++++++++++- usr.bin/m4/m4.1 | 28 +++- usr.bin/m4/main.c | 25 +++- usr.bin/m4/mdef.h | 8 +- usr.bin/m4/misc.c | 32 ++++- 7 files changed, 449 insertions(+), 13 deletions(-) diff --git a/usr.bin/m4/eval.c b/usr.bin/m4/eval.c index 0d4e85ee6b8..508417e9f03 100644 --- a/usr.bin/m4/eval.c +++ b/usr.bin/m4/eval.c @@ -1,4 +1,4 @@ -/* $OpenBSD: eval.c,v 1.24 2000/01/12 17:49:53 espie Exp $ */ +/* $OpenBSD: eval.c,v 1.25 2000/03/11 15:54:43 espie Exp $ */ /* $NetBSD: eval.c,v 1.7 1996/11/10 21:21:29 pk Exp $ */ /* @@ -41,7 +41,7 @@ #if 0 static char sccsid[] = "@(#)eval.c 8.2 (Berkeley) 4/27/95"; #else -static char rcsid[] = "$OpenBSD: eval.c,v 1.24 2000/01/12 17:49:53 espie Exp $"; +static char rcsid[] = "$OpenBSD: eval.c,v 1.25 2000/03/11 15:54:43 espie Exp $"; #endif #endif /* not lint */ @@ -402,6 +402,30 @@ eval(argv, argc, td) dodefn(argv[n]); break; + case INDIRTYPE: /* Indirect call */ + if (argc > 2) + doindir(argv, argc); + break; + + case BUILTINTYPE: /* Builtins only */ + if (argc > 2) + dobuiltin(argv, argc); + break; + + case PATSTYPE: + if (argc > 2) + dopatsubst(argv, argc); + break; + case REGEXPTYPE: + if (argc > 2) + doregexp(argv, argc); + break; + case LINETYPE: + doprintlineno(infile+ilevel); + break; + case FILENAMETYPE: + doprintfilename(infile+ilevel); + break; case SELFTYPE: pbstr(rquote); pbstr(argv[1]); diff --git a/usr.bin/m4/extern.h b/usr.bin/m4/extern.h index d84f0d55889..69eb8e14030 100644 --- a/usr.bin/m4/extern.h +++ b/usr.bin/m4/extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: extern.h,v 1.16 2000/01/15 14:26:00 espie Exp $ */ +/* $OpenBSD: extern.h,v 1.17 2000/03/11 15:54:44 espie Exp $ */ /* $NetBSD: extern.h,v 1.3 1996/01/13 23:25:24 pk Exp $ */ /*- @@ -50,6 +50,14 @@ extern int expr __P((const char *)); /* gnum4.c */ extern void addtoincludepath __P((const char *dirname)); extern struct input_file *fopen_trypath __P((struct input_file *, const char *filename)); +extern void doindir __P((const char *[], int)); +extern void dobuiltin __P((const char *[], int)); +extern void dopatsubst __P((const char *[], int)); +extern void doregexp __P((const char *[], int)); + +extern void doprintlineno __P((struct input_file *)); +extern void doprintfilename __P((struct input_file *)); + /* look.c */ extern ndptr addent __P((const char *)); @@ -59,6 +67,7 @@ extern void remhash __P((const char *, int)); /* main.c */ extern void outputstr __P((const char *)); +extern int builtin_type __P((const char *)); /* misc.c */ extern void chrsave __P((int)); @@ -69,9 +78,10 @@ extern void initspaces __P((void)); extern void killdiv __P((void)); extern void onintr __P((int)); extern void pbnum __P((int)); +extern void pbunsigned __P((unsigned long)); extern void pbstr __P((const char *)); extern void putback __P((int)); -extern char *xalloc __P((size_t)); +extern void *xalloc __P((size_t)); extern char *xstrdup __P((const char *)); extern void usage __P((void)); diff --git a/usr.bin/m4/gnum4.c b/usr.bin/m4/gnum4.c index 938548cc680..b2d6374b483 100644 --- a/usr.bin/m4/gnum4.c +++ b/usr.bin/m4/gnum4.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gnum4.c,v 1.4 2000/01/12 17:49:53 espie Exp $ */ +/* $OpenBSD: gnum4.c,v 1.5 2000/03/11 15:54:44 espie Exp $ */ /* * Copyright (c) 1999 Marc Espie @@ -30,6 +30,9 @@ */ #include +#include +#include +#include #include #include #include @@ -39,6 +42,9 @@ #include "stdd.h" #include "extern.h" + +int mimic_gnu = 0; + /* * Support for include path search * First search in the the current directory. @@ -152,3 +158,322 @@ fopen_trypath(i, filename) return dopath(i, filename); } +void +doindir(argv, argc) + const char *argv[]; + int argc; +{ + ndptr p; + + p = lookup(argv[2]); + if (p == NULL) + errx(1, "undefined macro %s", argv[2]); + argv[1] = p->defn; + if (p->type == MACRTYPE) + expand(argv+1, argc-1); + else + eval(argv+1, argc-1, p->type); +} + +void +dobuiltin(argv, argc) + const char *argv[]; + int argc; +{ + int n; + argv[1] = NULL; + n = builtin_type(argv[2]); + if (n != -1) + eval(argv+1, argc, n); + else + errx(1, "unknown builtin %s", argv[2]); +} + + +/* We need some temporary buffer space, as pb pushes BACK and substitution + * proceeds forward... */ +static char *buffer; +static size_t bufsize = 0; +static size_t current = 0; + +static void addchars __P((const char *, size_t)); +static void addchar __P((char)); +static char *twiddle __P((const char *)); +static char *getstring __P((void)); +static void exit_regerror __P((int, regex_t *)); +static void do_subst __P((const char *, regex_t *, const char *, regmatch_t *)); +static void do_regexpindex __P((const char *, regex_t *, regmatch_t *)); +static void do_regexp __P((const char *, regex_t *, const char *, regmatch_t *)); +static void add_sub __P((int, const char *, regex_t *, regmatch_t *)); +static void add_replace __P((const char *, regex_t *, const char *, regmatch_t *)); + +static void +addchars(c, n) + const char *c; + size_t n; +{ + if (n == 0) + return; + if (current + n > bufsize) { + if (bufsize == 0) + bufsize = 1024; + else + bufsize *= 2; + buffer = realloc(buffer, bufsize); + if (buffer == NULL) + errx(1, "out of memory"); + } + memcpy(buffer+current, c, n); + current += n; +} + +static void +addchar(c) + char c; +{ + if (current +1 > bufsize) { + if (bufsize == 0) + bufsize = 1024; + else + bufsize *= 2; + buffer = realloc(buffer, bufsize); + if (buffer == NULL) + errx(1, "out of memory"); + } + buffer[current++] = c; +} + +static char * +getstring() +{ + addchar('\0'); + current = 0; + return buffer; +} + + +static void +exit_regerror(er, re) + int er; + regex_t *re; +{ + size_t errlen; + char *errbuf; + + errlen = regerror(er, re, NULL, 0); + errbuf = xalloc(errlen); + regerror(er, re, errbuf, errlen); + errx(1, "regular expression error: %s", errbuf); +} + +static void +add_sub(n, string, re, pm) + int n; + const char *string; + regex_t *re; + regmatch_t *pm; +{ + if (n > re->re_nsub) + warnx("No subexpression %d", n); + /* Subexpressions that did not match are + * not an error. */ + else if (pm[n].rm_so != -1 && + pm[n].rm_eo != -1) { + addchars(string + pm[n].rm_so, + pm[n].rm_eo - pm[n].rm_so); + } +} + +/* Add replacement string to the output buffer, recognizing special + * constructs and replacing them with substrings of the original string. + */ +static void +add_replace(string, re, replace, pm) + const char *string; + regex_t *re; + const char *replace; + regmatch_t *pm; +{ + const char *p; + + for (p = replace; *p != '\0'; p++) { + if (*p == '&' && !mimic_gnu) { + add_sub(0, string, re, pm); + continue; + } + if (*p == '\\') { + if (p[1] == '\\') { + addchar(p[1]); + continue; + } + if (p[1] == '&') { + if (mimic_gnu) + add_sub(0, string, re, pm); + else + addchar(p[1]); + p++; + continue; + } + if (isdigit(p[1])) { + add_sub(*(++p) - '0', string, re, pm); + continue; + } + } + addchar(*p); + } +} + +static void +do_subst(string, re, replace, pm) + const char *string; + regex_t *re; + const char *replace; + regmatch_t *pm; +{ + int error; + regoff_t last_match = -1; + + while ((error = regexec(re, string, re->re_nsub+1, pm, 0)) == 0) { + + /* NULL length matches are special... We use the `vi-mode' + * rule: don't allow a NULL-match at the last match + * position. + */ + if (pm[0].rm_so == pm[0].rm_eo && pm[0].rm_so == last_match) { + if (*string == '\0') + return; + addchar(*string); + string++; + continue; + } + last_match = pm[0].rm_so; + addchars(string, last_match); + add_replace(string, re, replace, pm); + string += pm[0].rm_eo; + } + if (error != REG_NOMATCH) + exit_regerror(error, re); + pbstr(string); +} + +static void +do_regexp(string, re, replace, pm) + const char *string; + regex_t *re; + const char *replace; + regmatch_t *pm; +{ + int error; + const char *p; + + switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { + case 0: + add_replace(string, re, replace, pm); + pbstr(getstring()); + break; + case REG_NOMATCH: + break; + default: + exit_regerror(error, re); + } +} + +static void +do_regexpindex(string, re, pm) + const char *string; + regex_t *re; + regmatch_t *pm; +{ + int error; + + switch(error = regexec(re, string, re->re_nsub+1, pm, 0)) { + case 0: + pbunsigned(pm[0].rm_so); + break; + case REG_NOMATCH: + pbnum(-1); + break; + default: + exit_regerror(error, re); + } +} + +/* In Gnu m4 mode, parentheses for backmatch don't work like POSIX 1003.2 + * says. So we twiddle with the regexp before passing it to regcomp. + */ +static char * +twiddle(p) + const char *p; +{ + /* This could use strcspn for speed... */ + while (*p != '\0') { + if (*p == '\\' && (p[1] == '(' || p[1] == ')')) { + addchar(p[1]); + p+=2; + continue; + } + if (*p == '(' || *p == ')') + addchar('\\'); + + addchar(*p); + p++; + } + return getstring(); +} + +/* patsubst(string, regexp, opt replacement) */ +/* argv[2]: string + * argv[3]: regexp + * argv[4]: opt rep + */ +void +dopatsubst(argv, argc) + const char *argv[]; + int argc; +{ + int error; + regex_t re; + regmatch_t *pmatch; + + if (argc <= 3) { + warnx("Too few arguments to patsubst"); + return; + } + error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], + REG_EXTENDED); + if (error != 0) + exit_regerror(error, &re); + + pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); + do_subst(argv[2], &re, argv[4] != NULL ? argv[4] : "", pmatch); + pbstr(getstring()); + free(pmatch); + regfree(&re); +} + +void +doregexp(argv, argc) + const char *argv[]; + int argc; +{ + int error; + regex_t re; + regmatch_t *pmatch; + + if (argc <= 3) { + warnx("Too few arguments to patsubst"); + return; + } + error = regcomp(&re, mimic_gnu ? twiddle(argv[3]) : argv[3], + REG_EXTENDED); + if (error != 0) + exit_regerror(error, &re); + + pmatch = xalloc(sizeof(regmatch_t) * (re.re_nsub+1)); + if (argv[4] == NULL) + do_regexpindex(argv[2], &re, pmatch); + else + do_regexp(argv[2], &re, argv[4], pmatch); + free(pmatch); + regfree(&re); +} diff --git a/usr.bin/m4/m4.1 b/usr.bin/m4/m4.1 index e249ed24e92..18f0d956b5d 100644 --- a/usr.bin/m4/m4.1 +++ b/usr.bin/m4/m4.1 @@ -1,4 +1,4 @@ -.\" @(#) $OpenBSD: m4.1,v 1.10 2000/03/10 19:07:20 aaron Exp $ +.\" @(#) $OpenBSD: m4.1,v 1.11 2000/03/11 15:54:44 espie Exp $ .\" .\" .Dd January 26, 1993 @@ -71,6 +71,8 @@ provides the following built-in macros. They may be redefined, losing their original meaning. Return values are null unless otherwise stated. .Bl -tag -width changequotexxx +.It Ic builtin +Calls a built-in by its name, overriding possible redefinitions. .It Ic changecom Change the start and end comment sequences. The default is the pound sign @@ -179,6 +181,9 @@ Returns the index of the second argument in the first argument (e.g., returns 16). If the second argument is not found index returns \-1. +.It Ic indir +Indirectly calls the macro whose name is passed as the first arguments, +with the remaining arguments passed as first, ... arguments. .It Ic len Returns the number of characters in the first argument. Extra arguments @@ -205,6 +210,16 @@ Includes the contents of the file specified by the first argument without any macro processing. Aborts with an error message if the file cannot be included. +.It Ic patsubst +Substitutes a regular expression in a string with a replacement string. +Usual substitution patterns apply: an ampersand +.Pq Ql & +is replaced by the string matching the regular expression. +The string +.Ql \e# , +where +.Ql # +is a digit, is replaced by the corresponding back-reference. .It Ic popdef Restores the .Ic pushdef Ns ed @@ -215,6 +230,10 @@ Takes the same arguments as but it saves the definition on a stack for later retrieval by .Ic popdef . +.It Ic regexp +Finds a regular expression in a string. If no further arguments are given, +it returns the first match position or -1 if no match. If a third argument +is provided, it returns the replacement string, with sub-patterns replaced. .It Ic shift Returns all but the first argument, the remaining arguments are quoted and pushed back with commas in between. @@ -252,6 +271,11 @@ Removes the definition for the macro specified by the first argument. Flushes the named output queues (or all queues if no arguments). .It Ic unix A pre-defined macro for testing the OS platform. +.It Ic __line__ +Returns the current file's line number. +.It Ic __file__ +Returns the current file's name. .El .Sh AUTHOR -Ozan Yigit and Richard A. O'Keefe (ok@goanna.cs.rmit.OZ.AU) +Ozan Yigit and Richard A. O'Keefe (ok@goanna.cs.rmit.OZ.AU). +GNU-m4 compatibility extensions by Marc Espie . diff --git a/usr.bin/m4/main.c b/usr.bin/m4/main.c index 99e95a27c0e..af4a0120446 100644 --- a/usr.bin/m4/main.c +++ b/usr.bin/m4/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.30 2000/02/02 14:05:22 espie Exp $ */ +/* $OpenBSD: main.c,v 1.31 2000/03/11 15:54:44 espie Exp $ */ /* $NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $ */ /*- @@ -47,7 +47,7 @@ static char copyright[] = #if 0 static char sccsid[] = "@(#)main.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: main.c,v 1.30 2000/02/02 14:05:22 espie Exp $"; +static char rcsid[] = "$OpenBSD: main.c,v 1.31 2000/03/11 15:54:44 espie Exp $"; #endif #endif /* not lint */ @@ -109,6 +109,13 @@ struct keyblk keywrds[] = { /* m4 keywords to be installed */ #ifdef EXTENDED { "paste", PASTTYPE }, { "spaste", SPASTYPE }, + /* Newer extensions, needed to handle gnu-m4 scripts */ + { "indir", INDIRTYPE}, + { "builtin", BUILTINTYPE}, + { "patsubst", PATSTYPE}, + { "regexp", REGEXPTYPE}, + { "__file__", FILENAMETYPE | NOARGS}, + { "__line__", LINETYPE | NOARGS}, #endif { "popdef", POPDTYPE }, { "pushdef", PUSDTYPE }, @@ -517,6 +524,20 @@ initkwds() } } +/* Look up a builtin type, even if overridden by the user */ +int +builtin_type(key) + const char *key; +{ + int i; + + for (i = 0; i != MAXKEYS; i++) + if (STREQ(keywrds[i].knam, key)) + return keywrds[i].ktyp; + return -1; +} + + static void record(t, lev) struct position *t; diff --git a/usr.bin/m4/mdef.h b/usr.bin/m4/mdef.h index d18fda20f49..8d7278db3b2 100644 --- a/usr.bin/m4/mdef.h +++ b/usr.bin/m4/mdef.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mdef.h,v 1.13 2000/01/15 14:26:00 espie Exp $ */ +/* $OpenBSD: mdef.h,v 1.14 2000/03/11 15:54:44 espie Exp $ */ /* $NetBSD: mdef.h,v 1.7 1996/01/13 23:25:27 pk Exp $ */ /* @@ -73,6 +73,12 @@ #define EXITTYPE 32 #define DEFNTYPE 33 #define SELFTYPE 34 +#define INDIRTYPE 35 +#define BUILTINTYPE 36 +#define PATSTYPE 37 +#define FILENAMETYPE 38 +#define LINETYPE 39 +#define REGEXPTYPE 40 #define TYPEMASK 63 /* Keep bits really corresponding to a type. */ #define STATIC 128 /* Name is statically allocated, don't free. */ diff --git a/usr.bin/m4/misc.c b/usr.bin/m4/misc.c index 68bf5b4ae3a..8858bad5e4c 100644 --- a/usr.bin/m4/misc.c +++ b/usr.bin/m4/misc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: misc.c,v 1.17 2000/01/15 14:26:00 espie Exp $ */ +/* $OpenBSD: misc.c,v 1.18 2000/03/11 15:54:44 espie Exp $ */ /* $NetBSD: misc.c,v 1.6 1995/09/28 05:37:41 tls Exp $ */ /* @@ -41,7 +41,7 @@ #if 0 static char sccsid[] = "@(#)misc.c 8.1 (Berkeley) 6/6/93"; #else -static char rcsid[] = "$OpenBSD: misc.c,v 1.17 2000/01/15 14:26:00 espie Exp $"; +static char rcsid[] = "$OpenBSD: misc.c,v 1.18 2000/03/11 15:54:44 espie Exp $"; #endif #endif /* not lint */ @@ -142,6 +142,18 @@ pbnum(n) putback('-'); } +/* + * pbunsigned - convert unsigned long to string, push back on input. + */ +void +pbunsigned(n) + unsigned long n; +{ + do { + putback(n % 10 + '0'); + } + while ((n /= 10) > 0); +} void initspaces() @@ -269,7 +281,7 @@ killdiv() } } -char * +void * xalloc(n) size_t n; { @@ -334,3 +346,17 @@ release_input(f) * error information pointing to it. */ } + +void +doprintlineno(f) + struct input_file *f; +{ + pbunsigned(f->lineno); +} + +void +doprintfilename(f) + struct input_file *f; +{ + pbstr(f->name); +} -- 2.20.1