From 456f9fcb4d6c04a41332c34b82ff40908fd74afa Mon Sep 17 00:00:00 2001 From: espie Date: Sat, 16 Aug 2008 12:21:46 +0000 Subject: [PATCH] argument parsing should only skip spaces outside of parenthesis. Inside matching parenthesis, keep spaces as is (use chrsave instead of pbstr, since there's no way it can be a further macro expansion). Fixes a long-standing issue with autoconf ( --option -> --option), matches other m4 than gnum4 okay millert@, fries@ --- usr.bin/m4/eval.c | 14 +++--- usr.bin/m4/extern.h | 14 +++--- usr.bin/m4/gnum4.c | 102 +++++++++++++++++++++++++++++------------ usr.bin/m4/m4.1 | 16 ++++--- usr.bin/m4/main.c | 7 +-- usr.bin/m4/misc.c | 18 ++++---- usr.bin/m4/parser.y | 3 +- usr.bin/m4/tokenizer.l | 37 ++++++++++++++- 8 files changed, 148 insertions(+), 63 deletions(-) diff --git a/usr.bin/m4/eval.c b/usr.bin/m4/eval.c index bd9cb89bdeb..e67a71c6392 100644 --- a/usr.bin/m4/eval.c +++ b/usr.bin/m4/eval.c @@ -1,4 +1,4 @@ -/* $OpenBSD: eval.c,v 1.63 2006/03/24 08:03:44 espie Exp $ */ +/* $OpenBSD: eval.c,v 1.64 2008/08/16 12:21:46 espie Exp $ */ /* $NetBSD: eval.c,v 1.7 1996/11/10 21:21:29 pk Exp $ */ /* @@ -139,7 +139,7 @@ expand_builtin(const char *argv[], int argc, int td) */ ac = argc; - if (argc == 3 && !*(argv[2])) + if (argc == 3 && !*(argv[2]) && !mimic_gnu) argc--; switch (td & TYPEMASK) { @@ -576,9 +576,10 @@ expand_macro(const char *argv[], int argc) void dodefine(const char *name, const char *defn) { - if (!*name) + if (!*name && !mimic_gnu) m4errx(1, "null definition."); - macro_define(name, defn); + else + macro_define(name, defn); } /* @@ -612,9 +613,10 @@ dodefn(const char *name) static void dopushdef(const char *name, const char *defn) { - if (!*name) + if (!*name && !mimic_gnu) m4errx(1, "null definition."); - macro_pushdef(name, defn); + else + macro_pushdef(name, defn); } /* diff --git a/usr.bin/m4/extern.h b/usr.bin/m4/extern.h index 01df741404a..16aaf2bbc8a 100644 --- a/usr.bin/m4/extern.h +++ b/usr.bin/m4/extern.h @@ -1,4 +1,4 @@ -/* $OpenBSD: extern.h,v 1.45 2006/03/20 20:27:45 espie Exp $ */ +/* $OpenBSD: extern.h,v 1.46 2008/08/16 12:21:46 espie Exp $ */ /* $NetBSD: extern.h,v 1.3 1996/01/13 23:25:24 pk Exp $ */ /*- @@ -109,7 +109,7 @@ extern void usage(void); extern void resizedivs(int); extern size_t buffer_mark(void); extern void dump_buffer(FILE *, size_t); -extern void m4errx(int, const char *, ...); +extern void __dead m4errx(int, const char *, ...); extern int obtain_char(struct input_file *); extern void set_input(struct input_file *, FILE *, const char *); @@ -133,7 +133,7 @@ extern void release_input(struct input_file *); /* and corresponding exposure for local symbols */ extern void enlarge_bufspace(void); extern void enlarge_strspace(void); -extern char *endpbb; +extern unsigned char *endpbb; extern char *endest; /* trace.c */ @@ -156,10 +156,10 @@ extern int fp; /* m4 call frame pointer */ extern int ilevel; /* input file stack pointer */ extern int oindex; /* diversion index. */ extern int sp; /* current m4 stack pointer */ -extern char *bp; /* first available character */ -extern char *buf; /* push-back buffer */ -extern char *bufbase; /* buffer base for this ilevel */ -extern char *bbase[]; /* buffer base per ilevel */ +extern unsigned char *bp; /* first available character */ +extern unsigned char *buf; /* push-back buffer */ +extern unsigned char *bufbase; /* buffer base for this ilevel */ +extern unsigned char *bbase[]; /* buffer base per ilevel */ extern char ecommt[MAXCCHARS+1];/* end character for comment */ extern char *ep; /* first free char in strspace */ extern char lquote[MAXCCHARS+1];/* left quote character (`) */ diff --git a/usr.bin/m4/gnum4.c b/usr.bin/m4/gnum4.c index 2720c851aaf..f9ee4ed3e94 100644 --- a/usr.bin/m4/gnum4.c +++ b/usr.bin/m4/gnum4.c @@ -1,4 +1,4 @@ -/* $OpenBSD: gnum4.c,v 1.36 2006/03/24 08:03:44 espie Exp $ */ +/* $OpenBSD: gnum4.c,v 1.37 2008/08/16 12:21:46 espie Exp $ */ /* * Copyright (c) 1999 Marc Espie @@ -508,43 +508,85 @@ doformat(const char *argv[], int argc) { const char *format = argv[2]; int pos = 3; + int left_padded; + long width; + size_t l; + const char *thisarg; + char temp[2]; + long extra; while (*format != 0) { if (*format != '%') { addchar(*format++); + continue; + } + + format++; + if (*format == '%') { + addchar(*format++); + continue; + } + if (*format == 0) { + addchar('%'); + break; + } + + if (*format == '*') { + format++; + if (pos >= argc) + m4errx(1, + "Format with too many format specifiers."); + width = strtol(argv[pos++], NULL, 10); + } else { + width = strtol(format, (char **)&format, 10); + } + if (width < 0) { + left_padded = 1; + width = -width; } else { + left_padded = 0; + } + if (*format == '.') { format++; - if (*format == '%' || *format == 0) { - addchar('%'); - if (*format == '%') - format++; + if (*format == '*') { + format++; + if (pos >= argc) + m4errx(1, + "Format with too many format specifiers."); + extra = strtol(argv[pos++], NULL, 10); } else { - int left_padded = 0; - unsigned long width; - size_t l; - - if (*format == '-') { - left_padded = 1; - format++; - } - width = strtoul(format, (char **)&format, 10); - if (*format != 's') { - m4errx(1, "Unsupported format specification: %s.", argv[2]); - } - format++; - if (pos >= argc) - m4errx(1, "Format with too many values."); - l = strlen(argv[pos]); - if (!left_padded) { - while (l < width--) - addchar(' '); - } - addchars(argv[pos++], l); - if (left_padded) { - while (l < width--) - addchar(' '); - } + extra = strtol(format, (char **)&format, 10); } + } else { + extra = LONG_MAX; + } + if (pos >= argc) + m4errx(1, "Format with too many format specifiers."); + switch(*format) { + case 's': + thisarg = argv[pos++]; + break; + case 'c': + temp[0] = strtoul(argv[pos++], NULL, 10); + temp[1] = 0; + thisarg = temp; + break; + default: + m4errx(1, "Unsupported format specification: %s.", + argv[2]); + } + format++; + l = strlen(thisarg); + if (l > extra) + l = extra; + if (!left_padded) { + while (l < width--) + addchar(' '); + } + addchars(thisarg, l); + if (left_padded) { + while (l < width--) + addchar(' '); } } pbstr(getstring()); diff --git a/usr.bin/m4/m4.1 b/usr.bin/m4/m4.1 index 1b637bb161a..8f75066e844 100644 --- a/usr.bin/m4/m4.1 +++ b/usr.bin/m4/m4.1 @@ -1,4 +1,4 @@ -.\" @(#) $OpenBSD: m4.1,v 1.50 2007/05/31 19:20:12 jmc Exp $ +.\" @(#) $OpenBSD: m4.1,v 1.51 2008/08/16 12:21:46 espie Exp $ .\" .\" Copyright (c) 1989, 1993 .\" The Regents of the University of California. All rights reserved. @@ -30,7 +30,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd $Mdocdate: May 31 2007 $ +.Dd $Mdocdate: August 16 2008 $ .Dt M4 1 .Os .Sh NAME @@ -138,7 +138,11 @@ Activate GNU-m4 compatibility mode. In this mode, translit handles simple character ranges (e.g., a-z), regular expressions mimic emacs behavior, multiple m4wrap calls are handled as a stack, -and the number of diversions is unlimited. +the number of diversions is unlimited, +empty names for macro definitions are allowed, +and eval understands +.Sq 0rbase:value +numbers. .It Fl I Ar "dirname" Add directory .Ar dirname @@ -268,9 +272,9 @@ with escape sequences substituted with .Fa arg1 and following arguments, in a way similar to .Xr printf 3 . -This built-in is only available in GNU-m4 compatibility mode, and the -left-padding flag, an optional field width and the %s data type -are the only supported parameters. +This built-in is only available in GNU-m4 compatibility mode, and the only +parameters implemented are there for autoconf compatibility: +left-padding flag, an optional field width, a maximum field width, *-specified field widths, and the %s and %c data type. .It Fn ifdef name yes no If the macro named by the first argument is defined then return the second argument, otherwise the third. diff --git a/usr.bin/m4/main.c b/usr.bin/m4/main.c index b85ac986cee..377af13a2f3 100644 --- a/usr.bin/m4/main.c +++ b/usr.bin/m4/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.75 2008/08/16 12:19:49 espie Exp $ */ +/* $OpenBSD: main.c,v 1.76 2008/08/16 12:21:46 espie Exp $ */ /* $NetBSD: main.c,v 1.12 1997/02/08 23:54:49 cgd Exp $ */ /*- @@ -428,8 +428,9 @@ macro(void) case LPAREN: if (PARLEV > 0) chrsave(t); - while (isspace(l = gpbc())) - ; /* skip blank, tab, nl.. */ + while (isspace(l = gpbc())) /* skip blank, tab, nl.. */ + if (PARLEV > 0) + chrsave(l); pushback(l); record(paren, PARLEV++); break; diff --git a/usr.bin/m4/misc.c b/usr.bin/m4/misc.c index eca470741d4..67dc778865b 100644 --- a/usr.bin/m4/misc.c +++ b/usr.bin/m4/misc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: misc.c,v 1.37 2007/05/05 03:42:49 ray Exp $ */ +/* $OpenBSD: misc.c,v 1.38 2008/08/16 12:21:46 espie Exp $ */ /* $NetBSD: misc.c,v 1.6 1995/09/28 05:37:41 tls Exp $ */ /* @@ -54,11 +54,11 @@ char *endest; /* end of string space */ static size_t strsize = STRSPMAX; static size_t bufsize = BUFSIZE; -char *buf; /* push-back buffer */ -char *bufbase; /* the base for current ilevel */ -char *bbase[MAXINP]; /* the base for each ilevel */ -char *bp; /* first available character */ -char *endpbb; /* end of push-back buffer */ +unsigned char *buf; /* push-back buffer */ +unsigned char *bufbase; /* the base for current ilevel */ +unsigned char *bbase[MAXINP]; /* the base for each ilevel */ +unsigned char *bp; /* first available character */ +unsigned char *endpbb; /* end of push-back buffer */ /* @@ -163,7 +163,7 @@ initspaces() strspace = xalloc(strsize+1, NULL); ep = strspace; endest = strspace+strsize; - buf = (char *)xalloc(bufsize, NULL); + buf = (unsigned char *)xalloc(bufsize, NULL); bufbase = buf; bp = buf; endpbb = buf + bufsize; @@ -195,7 +195,7 @@ enlarge_strspace() void enlarge_bufspace() { - char *newbuf; + unsigned char *newbuf; int i; bufsize += bufsize/2; @@ -418,7 +418,7 @@ buffer_mark() void dump_buffer(FILE *f, size_t m) { - char *s; + unsigned char *s; for (s = bp; s-buf > m;) fputc(*--s, f); diff --git a/usr.bin/m4/parser.y b/usr.bin/m4/parser.y index 7ab40204da8..a67af9548ea 100644 --- a/usr.bin/m4/parser.y +++ b/usr.bin/m4/parser.y @@ -1,5 +1,5 @@ %{ -/* $OpenBSD: parser.y,v 1.3 2006/01/20 23:10:19 espie Exp $ */ +/* $OpenBSD: parser.y,v 1.4 2008/08/16 12:21:46 espie Exp $ */ /* * Copyright (c) 2004 Marc Espie * @@ -22,6 +22,7 @@ extern int yylex(void); extern int yyerror(const char *); %} %token NUMBER +%token ERROR %left LOR %left LAND %left '|' diff --git a/usr.bin/m4/tokenizer.l b/usr.bin/m4/tokenizer.l index 895ea5ca887..10de9a35941 100644 --- a/usr.bin/m4/tokenizer.l +++ b/usr.bin/m4/tokenizer.l @@ -1,5 +1,5 @@ %{ -/* $OpenBSD: tokenizer.l,v 1.3 2006/01/20 23:10:19 espie Exp $ */ +/* $OpenBSD: tokenizer.l,v 1.4 2008/08/16 12:21:46 espie Exp $ */ /* * Copyright (c) 2004 Marc Espie * @@ -21,9 +21,11 @@ #include #include +extern int mimic_gnu; extern int32_t yylval; int32_t number(void); +int32_t parse_radix(void); %} delim [ \t\n] @@ -31,10 +33,17 @@ ws {delim}+ hex 0[xX][0-9a-fA-F]+ oct 0[0-7]* dec [1-9][0-9]* +radix 0[rR][0-9]+:[0-9a-zA-Z]+ %% {ws} {/* just skip it */} {hex}|{oct}|{dec} { yylval = number(); return(NUMBER); } +{radix} { if (mimic_gnu) { + yylval = parse_radix(); return(NUMBER); + } else { + return(ERROR); + } + } "<=" { return(LE); } ">=" { return(GE); } "<<" { return(LSHIFT); } @@ -58,5 +67,31 @@ number() fprintf(stderr, "m4: numeric overflow in expr: %s\n", yytext); } return l; +} + +int32_t +parse_radix() +{ + long base; + char *next; + long l; + l = 0; + base = strtol(yytext+2, &next, 0); + if (base > 36 || next == NULL) { + fprintf(stderr, "m4: error in number %s\n", yytext); + } else { + next++; + while (*next != 0) { + if (*next >= '0' && *next <= '9') + l = base * l + *next - '0'; + else if (*next >= 'a' && *next <= 'z') + l = base * l + *next - 'a' + 10; + else if (*next >= 'A' && *next <= 'Z') + l = base * l + *next - 'A' + 10; + next++; + } + } + return l; } + -- 2.20.1