From: millert Date: Mon, 18 Sep 2023 19:32:19 +0000 (+0000) Subject: Disable utf-8 for non-multibyte locales, such as C or POSIX. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=c35264f9919d4da2cd92f67a07fe5a337f4506ad;p=openbsd Disable utf-8 for non-multibyte locales, such as C or POSIX. This makes it possible to get the old awk behavior (where chars are bytes) by setting LC_CTYPE to C or POSIX. OK schwarze@ --- diff --git a/usr.bin/awk/awk.h b/usr.bin/awk/awk.h index bd491ad2908..a57e27eaffd 100644 --- a/usr.bin/awk/awk.h +++ b/usr.bin/awk/awk.h @@ -1,4 +1,4 @@ -/* $OpenBSD: awk.h,v 1.29 2023/09/17 14:49:44 millert Exp $ */ +/* $OpenBSD: awk.h,v 1.30 2023/09/18 19:32:19 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -66,6 +66,8 @@ extern bool do_posix; /* true if POSIXLY_CORRECT set */ #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ extern int recsize; /* size of current record, orig RECSIZE */ +extern size_t awk_mb_cur_max; /* max size of a multi-byte character */ + extern char EMPTY[]; /* this avoid -Wwritable-strings issues */ extern char **FS; extern char **RS; diff --git a/usr.bin/awk/b.c b/usr.bin/awk/b.c index 8601c7465ec..60b38d43198 100644 --- a/usr.bin/awk/b.c +++ b/usr.bin/awk/b.c @@ -1,4 +1,4 @@ -/* $OpenBSD: b.c,v 1.38 2023/09/17 14:49:44 millert Exp $ */ +/* $OpenBSD: b.c,v 1.39 2023/09/18 19:32:19 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -834,7 +834,7 @@ bool fnematch(fa *pfa, FILE *f, char **pbuf, int *pbufsize, int quantum) buf[k++] = (c = getc(f)) != EOF ? c : 0; } c = (uschar)buf[j]; - if (c < 128) + if (c < 128 || awk_mb_cur_max == 1) rune = c; else { j--; diff --git a/usr.bin/awk/main.c b/usr.bin/awk/main.c index f4a359dac7e..5a976d82814 100644 --- a/usr.bin/awk/main.c +++ b/usr.bin/awk/main.c @@ -1,4 +1,4 @@ -/* $OpenBSD: main.c,v 1.60 2023/09/18 15:20:48 jmc Exp $ */ +/* $OpenBSD: main.c,v 1.61 2023/09/18 19:32:19 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -56,6 +56,8 @@ bool CSV = false; /* true for csv input */ bool safe = false; /* true => "safe" mode */ bool do_posix = false; /* true => POSIX mode */ +size_t awk_mb_cur_max = 1; + static noreturn void fpecatch(int n #ifdef SA_SIGINFO , siginfo_t *si, void *uc @@ -135,6 +137,7 @@ int main(int argc, char *argv[]) setlocale(LC_CTYPE, ""); setlocale(LC_NUMERIC, "C"); /* for parsing cmdline & prog */ + awk_mb_cur_max = MB_CUR_MAX; cmdname = __progname; if (pledge("stdio rpath wpath cpath proc exec", NULL) == -1) { diff --git a/usr.bin/awk/run.c b/usr.bin/awk/run.c index c4719bbd516..d6920fa276a 100644 --- a/usr.bin/awk/run.c +++ b/usr.bin/awk/run.c @@ -1,4 +1,4 @@ -/* $OpenBSD: run.c,v 1.76 2023/09/18 15:16:22 deraadt Exp $ */ +/* $OpenBSD: run.c,v 1.77 2023/09/18 19:32:19 millert Exp $ */ /**************************************************************** Copyright (C) Lucent Technologies 1997 All Rights Reserved @@ -606,7 +606,7 @@ int u8_isutf(const char *s) unsigned char c; c = s[0]; - if (c < 128) + if (c < 128 || awk_mb_cur_max == 1) return 1; /* what if it's 0? */ n = strlen(s); @@ -633,7 +633,7 @@ int u8_rune(int *rune, const char *s) unsigned char c; c = s[0]; - if (c < 128) { + if (c < 128 || awk_mb_cur_max == 1) { *rune = c; return 1; } @@ -680,7 +680,7 @@ int u8_strlen(const char *s) totlen = 0; for (i = 0; i < n; i += len) { c = s[i]; - if (c < 128) { + if (c < 128 || awk_mb_cur_max == 1) { len = 1; } else { len = u8_nextlen(&s[i]); @@ -1290,7 +1290,7 @@ int format(char **pbuf, int *pbufsize, const char *s, Node *a) /* printf-like co int charval = (int) getfval(x); if (charval != 0) { - if (charval < 128) + if (charval < 128 || awk_mb_cur_max == 1) snprintf(p, BUFSZ(p), fmt, charval); else { // possible unicode character