From c4ead62f1f65c8851ae3bfdb6d7e34fef0cc4223 Mon Sep 17 00:00:00 2001 From: millert Date: Sun, 10 Sep 2023 14:59:00 +0000 Subject: [PATCH] Update awk to Sep 6, 2023 version. --- usr.bin/awk/FIXES | 8 ++++++++ usr.bin/awk/awkgram.y | 16 ++++++++-------- usr.bin/awk/lex.c | 4 ++-- usr.bin/awk/lib.c | 10 ++++------ usr.bin/awk/main.c | 4 ++-- usr.bin/awk/parse.c | 4 ++-- usr.bin/awk/proto.h | 4 ++-- 7 files changed, 28 insertions(+), 22 deletions(-) diff --git a/usr.bin/awk/FIXES b/usr.bin/awk/FIXES index 53c78410647..8cbd6ac1a09 100644 --- a/usr.bin/awk/FIXES +++ b/usr.bin/awk/FIXES @@ -25,6 +25,14 @@ THIS SOFTWARE. This file lists all bug fixes, changes, etc., made since the AWK book was sent to the printers in August 1987. +Sep 06, 2023: + Fix edge case where FS is changed on commandline. Thanks to + Gordon Shephard and Miguel Pineiro Jr. + + Fix regular expression clobbering in the lexer, where lexer does + not make a copy of regexp literals. also makedfa memory leaks have + been plugged. Thanks to Miguel Pineiro Jr. + Dec 15, 2022: Force hex escapes in strings to be no more than two characters, as they already are in regular expressions. This brings internal diff --git a/usr.bin/awk/awkgram.y b/usr.bin/awk/awkgram.y index f9a5330078b..9894bcc7354 100644 --- a/usr.bin/awk/awkgram.y +++ b/usr.bin/awk/awkgram.y @@ -1,4 +1,4 @@ -/* $OpenBSD: awkgram.y,v 1.15 2022/09/01 15:21:28 millert Exp $ */ +/* $OpenBSD: awkgram.y,v 1.16 2023/09/10 14:59:00 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -205,7 +205,7 @@ ppattern: { $$ = op2(BOR, notnull($1), notnull($3)); } | ppattern and ppattern %prec AND { $$ = op2(AND, notnull($1), notnull($3)); } - | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } + | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); } | ppattern MATCHOP ppattern { if (constnode($3)) { $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); @@ -233,7 +233,7 @@ pattern: | pattern LE pattern { $$ = op2($2, $1, $3); } | pattern LT pattern { $$ = op2($2, $1, $3); } | pattern NE pattern { $$ = op2($2, $1, $3); } - | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); } + | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); free($3); } | pattern MATCHOP pattern { if (constnode($3)) { $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0)); @@ -283,7 +283,7 @@ rbrace: re: reg_expr - { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); } + { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); free($1); } | NOT re { $$ = op1(NOT, notnull($2)); } ; @@ -407,7 +407,7 @@ term: $$ = op2(INDEX, $3, (Node*)$5); } | '(' pattern ')' { $$ = $2; } | MATCHFCN '(' pattern comma reg_expr ')' - { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); } + { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); free($5); } | MATCHFCN '(' pattern comma pattern ')' { if (constnode($5)) { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1)); @@ -418,13 +418,13 @@ term: | SPLIT '(' pattern comma varname comma pattern ')' /* string */ { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); } | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */ - { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); } + { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); free($7); } | SPLIT '(' pattern comma varname ')' { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */ | SPRINTF '(' patlist ')' { $$ = op1($1, $3); } | string { $$ = celltonode($1, CCON); } | subop '(' reg_expr comma pattern ')' - { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); } + { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); free($3); } | subop '(' pattern comma pattern ')' { if (constnode($3)) { $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode()); @@ -432,7 +432,7 @@ term: } else $$ = op4($1, (Node *)1, $3, $5, rectonode()); } | subop '(' reg_expr comma pattern comma var ')' - { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); } + { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); free($3); } | subop '(' pattern comma pattern comma var ')' { if (constnode($3)) { $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7); diff --git a/usr.bin/awk/lex.c b/usr.bin/awk/lex.c index 213f5d0a9e7..9ca1d0dda17 100644 --- a/usr.bin/awk/lex.c +++ b/usr.bin/awk/lex.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lex.c,v 1.29 2023/09/09 18:59:43 millert Exp $ */ +/* $OpenBSD: lex.c,v 1.30 2023/09/10 14:59:00 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -588,7 +588,7 @@ int regexpr(void) *bp = 0; if (c == 0) SYNTAX("non-terminated regular expression %.10s...", buf); - yylval.s = buf; + yylval.s = tostring(buf); unput('/'); RET(REGEXPR); } diff --git a/usr.bin/awk/lib.c b/usr.bin/awk/lib.c index 87cc78ea58a..90d34a7eff1 100644 --- a/usr.bin/awk/lib.c +++ b/usr.bin/awk/lib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lib.c,v 1.49 2022/09/01 15:21:28 millert Exp $ */ +/* $OpenBSD: lib.c,v 1.50 2023/09/10 14:59:00 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -148,11 +148,6 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record * } DPRINTF("RS=<%s>, FS=<%s>, ARGC=%g, FILENAME=%s\n", *RS, *FS, *ARGC, *FILENAME); - if (isrecord) { - donefld = false; - donerec = true; - savefs(); - } saveb0 = buf[0]; buf[0] = 0; while (argno < *ARGC || infile == stdin) { @@ -192,6 +187,9 @@ int getrec(char **pbuf, int *pbufsize, bool isrecord) /* get next input record * fldtab[0]->fval = result; fldtab[0]->tval |= NUM; } + donefld = false; + donerec = true; + savefs(); } setfval(nrloc, nrloc->fval+1); setfval(fnrloc, fnrloc->fval+1); diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c index ccd557c1ef2..67eeff92394 100644 --- a/usr.bin/awk/main.c +++ b/usr.bin/awk/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.57 2023/09/09 18:59:43 millert Exp $ */ +/* $OpenBSD: main.c,v 1.58 2023/09/10 14:59:00 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -23,7 +23,7 @@ ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. ****************************************************************/ -const char *version = "version 20221215"; +const char *version = "version 20230909"; #define DEBUG #include diff --git a/usr.bin/awk/parse.c b/usr.bin/awk/parse.c index abd5c4fc6fc..cb192deda60 100644 --- a/usr.bin/awk/parse.c +++ b/usr.bin/awk/parse.c @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.c,v 1.13 2020/12/09 20:00:11 millert Exp $ */ +/* $OpenBSD: parse.c,v 1.14 2023/09/10 14:59:00 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -30,7 +30,7 @@ THIS SOFTWARE. #include "awk.h" #include "awkgram.tab.h" -Node *nodealloc(int n) +Node *nodealloc(size_t n) { Node *x; diff --git a/usr.bin/awk/proto.h b/usr.bin/awk/proto.h index 0c37d53725d..374c5af9451 100644 --- a/usr.bin/awk/proto.h +++ b/usr.bin/awk/proto.h @@ -1,4 +1,4 @@ -/* $OpenBSD: proto.h,v 1.20 2020/12/09 20:00:11 millert Exp $ */ +/* $OpenBSD: proto.h,v 1.21 2023/09/10 14:59:00 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -69,7 +69,7 @@ extern void freefa(fa *); extern int pgetc(void); extern char *cursource(void); -extern Node *nodealloc(int); +extern Node *nodealloc(size_t); extern Node *exptostat(Node *); extern Node *node1(int, Node *); extern Node *node2(int, Node *, Node *); -- 2.20.1