From: schwarze Date: Thu, 16 May 2024 21:21:08 +0000 (+0000) Subject: Fix UTF-16 surrogate detection: X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=700ead54516a6e68d161bf3f7b938e4403f771ff;p=openbsd Fix UTF-16 surrogate detection: lower case variants have to be rejected, too. --- diff --git a/usr.bin/mandoc/roff_escape.c b/usr.bin/mandoc/roff_escape.c index ab4ea7343d0..4367876e7bc 100644 --- a/usr.bin/mandoc/roff_escape.c +++ b/usr.bin/mandoc/roff_escape.c @@ -1,4 +1,4 @@ -/* $OpenBSD: roff_escape.c,v 1.14 2022/06/08 13:08:00 schwarze Exp $ */ +/* $OpenBSD: roff_escape.c,v 1.15 2024/05/16 21:21:08 schwarze Exp $ */ /* * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022 * Ingo Schwarze @@ -467,13 +467,12 @@ roff_escape(const char *buf, const int ln, const int aesc, /* * Unicode escapes are defined in groff as \[u0000] * to \[u10FFFF], where the contained value must be - * a valid Unicode codepoint. Here, however, only - * check the length and range. + * a valid Unicode codepoint. */ if (buf[iarg] != 'u' || argl < 5 || argl > 7) break; - if (argl == 7 && + if (argl == 7 && /* beyond the Unicode range */ (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) { err = MANDOCERR_ESC_BADCHAR; break; @@ -482,8 +481,9 @@ roff_escape(const char *buf, const int ln, const int aesc, err = MANDOCERR_ESC_BADCHAR; break; } - if (argl == 5 && buf[iarg + 1] == 'D' && - strchr("89ABCDEF", buf[iarg + 2]) != NULL) { + if (argl == 5 && /* UTF-16 surrogate */ + toupper((unsigned char)buf[iarg + 1]) == 'D' && + strchr("89ABCDEFabcdef", buf[iarg + 2]) != NULL) { err = MANDOCERR_ESC_BADCHAR; break; }