From: schwarze Date: Fri, 3 Jun 2022 11:50:25 +0000 (+0000) Subject: During identifier parsing, handle undefined escape sequences X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=a72149c535858e8c6447105df6192e1459f98c38;p=openbsd During identifier parsing, handle undefined escape sequences in the same way as groff: * \\ is always reduced to \ * \. is always reduced to . * other undefined escape sequences are usually reduced to the escape name, for example \G to G, except during the expansion of expanding escape sequences having the standard argument form (in particular \* and \n), in which case the backslash is preserved literally. Yes, this is confusing indeed. For example, the following have the same meaning: * .ds \. and .ds . which is not the same as .ds \\. * \*[\.] and \*[.] which is not the same as \*[\\.] * .ds \G and .ds G which is not the same as .ds \\G * \*[\G] and \*[\\G] which is not the same as \*[G] <- sic! To feel less dirty, have a leaning toothpick, if you are so inclined. This patch also slightly improves the string shown by the "escaped character not allowed in a name" error message. --- diff --git a/regress/usr.bin/mandoc/roff/args/man.out_lint b/regress/usr.bin/mandoc/roff/args/man.out_lint index 6d6bbd84e3d..5aaebd9e79f 100644 --- a/regress/usr.bin/mandoc/roff/args/man.out_lint +++ b/regress/usr.bin/mandoc/roff/args/man.out_lint @@ -6,4 +6,4 @@ mandoc: man.in:87:26: STYLE: whitespace at end of input line mandoc: man.in:91:27: STYLE: whitespace at end of input line mandoc: man.in:104:5: STYLE: unterminated quoted argument mandoc: man.in:107:9: STYLE: unterminated quoted argument -mandoc: man.in:131:1: ERROR: escaped character not allowed in a name: IB\( +mandoc: man.in:131:1: ERROR: escaped character not allowed in a name: IB\(lq diff --git a/regress/usr.bin/mandoc/roff/args/mdoc.out_lint b/regress/usr.bin/mandoc/roff/args/mdoc.out_lint index 4422d754f9b..1691cf2d4ca 100644 --- a/regress/usr.bin/mandoc/roff/args/mdoc.out_lint +++ b/regress/usr.bin/mandoc/roff/args/mdoc.out_lint @@ -14,4 +14,4 @@ mandoc: mdoc.in:112:5: STYLE: unterminated quoted argument mandoc: mdoc.in:112:11: STYLE: whitespace at end of input line mandoc: mdoc.in:113:9: STYLE: unterminated quoted argument mandoc: mdoc.in:113:15: STYLE: whitespace at end of input line -mandoc: mdoc.in:121:1: ERROR: escaped character not allowed in a name: Fl\( +mandoc: mdoc.in:121:1: ERROR: escaped character not allowed in a name: Fl\(lq diff --git a/regress/usr.bin/mandoc/roff/cond/register.in b/regress/usr.bin/mandoc/roff/cond/register.in index 879c573c2d9..7f137c9c167 100644 --- a/regress/usr.bin/mandoc/roff/cond/register.in +++ b/regress/usr.bin/mandoc/roff/cond/register.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: register.in,v 1.3 2019/02/06 20:54:28 schwarze Exp $ -.TH REGISTER 1 "February 6, 2019" +.\" $OpenBSD: register.in,v 1.4 2022/06/03 11:50:25 schwarze Exp $ +.TH REGISTER 1 "June 3, 2022" .SH NAME register \- conditional testing whether a register is defined .SH DESCRIPTION @@ -11,10 +11,35 @@ register \- conditional testing whether a register is defined .el OOPS .if !rmyreg OOPS .PP -identifier + identifier: +tab after identifier: .ie rmyreg myreg is defined .el OOPS .PP escape sequence after identifier: .ie rmyreg\(enmyreg is defined .el OOPS +.PP +backslash in name: +.nr \\ 0 +.ie r\\ \e is defined +.el OOPS +.rr \\ +.if r\\ is still defined!? +.PP +dot in name: +.nr . 0 +.ie r. \&. is defined +.el OOPS +.ie r\. \e. is defined +.el OOPS +.rr \. +.if r. is still defined!? +.PP +invalid escape in name: +.nr G 0 +.ie rG G is defined +.el OOPS +.ie r\G \eG is defined +.el OOPS +.rr \G +.if rG is still defined!? diff --git a/regress/usr.bin/mandoc/roff/cond/register.out_ascii b/regress/usr.bin/mandoc/roff/cond/register.out_ascii index 928c6138289..673b9f12400 100644 --- a/regress/usr.bin/mandoc/roff/cond/register.out_ascii +++ b/regress/usr.bin/mandoc/roff/cond/register.out_ascii @@ -7,8 +7,14 @@ DDEESSCCRRIIPPTTIIOONN not yet defined now defined - identifier + identifier: myreg is defined + tab after identifier: myreg is defined escape sequence after identifier: -myreg is defined -OpenBSD February 6, 2019 REGISTER(1) + backslash in name: \ is defined + + dot in name: . is defined \. is defined + + invalid escape in name: G is defined \G is defined + +OpenBSD June 3, 2022 REGISTER(1) diff --git a/regress/usr.bin/mandoc/roff/cond/string.in b/regress/usr.bin/mandoc/roff/cond/string.in index 273984be9e7..fedb4d81930 100644 --- a/regress/usr.bin/mandoc/roff/cond/string.in +++ b/regress/usr.bin/mandoc/roff/cond/string.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: string.in,v 1.4 2019/02/06 20:54:28 schwarze Exp $ -.TH STRING 1 "February 6, 2019" +.\" $OpenBSD: string.in,v 1.5 2022/06/03 11:50:25 schwarze Exp $ +.TH STRING 1 "June 3, 2022" .SH NAME string \- conditional testing whether a string is defined .SH DESCRIPTION @@ -40,3 +40,28 @@ identifier and tab: escape sequence after identifier: .ie d mystr\(enmystr is defined .el OOPS +.PP +backslash in name: +.ds \\ value +.ie d \\ \e is defined +.el OOPS +.rm \\ +.if d \\ still defined!? +.PP +dot in name: +.ds . value +.ie d . \&. is defined +.el OOPS +.ie d \. \e. is defined +.el OOPS +.rm . +.if d . still defined!? +.PP +invalid escape in name: +.ds G value +.ie d G G is defined +.el OOPS +.ie d \G \eG is defined +.el OOPS +.rm \G +.if d G still defined!? diff --git a/regress/usr.bin/mandoc/roff/cond/string.out_ascii b/regress/usr.bin/mandoc/roff/cond/string.out_ascii index 2d80a9033f4..c67c0c55a24 100644 --- a/regress/usr.bin/mandoc/roff/cond/string.out_ascii +++ b/regress/usr.bin/mandoc/roff/cond/string.out_ascii @@ -19,4 +19,10 @@ DDEESSCCRRIIPPTTIIOONN escape sequence after identifier: -mystr is defined -OpenBSD February 6, 2019 STRING(1) + backslash in name: \ is defined + + dot in name: . is defined \. is defined + + invalid escape in name: G is defined \G is defined + +OpenBSD June 3, 2022 STRING(1) diff --git a/regress/usr.bin/mandoc/roff/de/escname.in b/regress/usr.bin/mandoc/roff/de/escname.in index 67d26091090..99305eaf7c1 100644 --- a/regress/usr.bin/mandoc/roff/de/escname.in +++ b/regress/usr.bin/mandoc/roff/de/escname.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: escname.in,v 1.4 2017/07/04 14:53:27 schwarze Exp $ -.Dd $Mdocdate: July 4 2017 $ +.\" $OpenBSD: escname.in,v 1.5 2022/06/03 11:50:25 schwarze Exp $ +.Dd $Mdocdate: June 3 2022 $ .Dt DE-ESCNAME 1 .Os .Sh NAME @@ -23,10 +23,33 @@ define first = val1 val1 .. .Pp -Values (first, second, first\esecond): +define first\e.second = val_dot +.de first\.second +val_dot +.. +.Pp +define first\eGsecond = val_inval +.de first\Gsecond +val_inval +.. +.Pp +Values: +.Bl -tag -width first_.second -compact +.It first .first +.It second .second +.It first\esecond .first\\second +.It first.second +.first.second +.It first\e.second +.first\.second +.It firstGsecond +.firstGsecond +.It first\eGsecond +.first\Gsecond +.El .Pp Remove all but second: .rm first\\second first\esecond second diff --git a/regress/usr.bin/mandoc/roff/de/escname.out_ascii b/regress/usr.bin/mandoc/roff/de/escname.out_ascii index 367b5d0fb2c..57b8c34f196 100644 --- a/regress/usr.bin/mandoc/roff/de/escname.out_ascii +++ b/regress/usr.bin/mandoc/roff/de/escname.out_ascii @@ -12,7 +12,18 @@ DDEESSCCRRIIPPTTIIOONN define first = val1 - Values (first, second, first\second): val1 val2 val3 + define first\.second = val_dot + + define first\Gsecond = val_inval + + Values: + first val1 + second val2 + first\second val3 + first.second val_dot + first\.second val_dot + firstGsecond val_inval + first\Gsecond val_inval Remove all but second: val2 diff --git a/regress/usr.bin/mandoc/roff/de/escname.out_lint b/regress/usr.bin/mandoc/roff/de/escname.out_lint index a3f9396f124..5dba1973f3c 100644 --- a/regress/usr.bin/mandoc/roff/de/escname.out_lint +++ b/regress/usr.bin/mandoc/roff/de/escname.out_lint @@ -1,8 +1,10 @@ mandoc: escname.in:22:2: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:32:19: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:33:2: ERROR: skipping unknown macro: .first -mandoc: escname.in:35:2: ERROR: skipping unknown macro: .first\\second -mandoc: escname.in:38:5: ERROR: skipping excess arguments: .de ... excess arguments -mandoc: escname.in:41:1: ERROR: escaped character not allowed in a name: witharg\( -mandoc: escname.in:43:1: ERROR: escaped character not allowed in a name: de\e -mandoc: escname.in:43:2: WARNING: skipping empty request: de +mandoc: escname.in:32:10: WARNING: undefined escape, printing literally: \G +mandoc: escname.in:51:7: WARNING: undefined escape, printing literally: \G +mandoc: escname.in:55:19: ERROR: escaped character not allowed in a name: first\e +mandoc: escname.in:56:2: ERROR: skipping unknown macro: .first +mandoc: escname.in:58:2: ERROR: skipping unknown macro: .first\second +mandoc: escname.in:61:5: ERROR: skipping excess arguments: .de ... excess arguments +mandoc: escname.in:64:1: ERROR: escaped character not allowed in a name: witharg\(en +mandoc: escname.in:66:1: ERROR: escaped character not allowed in a name: de\e +mandoc: escname.in:66:2: WARNING: skipping empty request: de diff --git a/regress/usr.bin/mandoc/roff/ds/Makefile b/regress/usr.bin/mandoc/roff/ds/Makefile index 773105e8e6b..68a8b3bf84e 100644 --- a/regress/usr.bin/mandoc/roff/ds/Makefile +++ b/regress/usr.bin/mandoc/roff/ds/Makefile @@ -1,4 +1,9 @@ -# $OpenBSD: Makefile,v 1.6 2019/02/06 20:54:28 schwarze Exp $ +# $OpenBSD: Makefile,v 1.7 2022/06/03 11:50:25 schwarze Exp $ +# +# This directory is intended for tests of string *definitions*, +# in particular testing the behaviour of the .ds and .as macros. +# Tests of string *expansion* are better placed in the roff/string +# directory. REGRESS_TARGETS = append escname nested quoting tab diff --git a/regress/usr.bin/mandoc/roff/nr/escname.in b/regress/usr.bin/mandoc/roff/nr/escname.in index f81627e16f3..d46255528c6 100644 --- a/regress/usr.bin/mandoc/roff/nr/escname.in +++ b/regress/usr.bin/mandoc/roff/nr/escname.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: escname.in,v 1.3 2017/07/04 14:53:27 schwarze Exp $ -.TH NR-ESCNAME 1 "June 29, 2014" +.\" $OpenBSD: escname.in,v 1.4 2022/06/03 11:50:25 schwarze Exp $ +.TH NR-ESCNAME 1 "June 3, 2022" .SH NAME nr-escname \- escape sequences in register names .SH DESCRIPTION @@ -7,7 +7,9 @@ nr-escname \- escape sequences in register names .nr second 2 .nr first\\second 3 .nr first\esecond 4 -\n[first] \n[second] \n[first\\second] +.nr first\.second 5 +.nr first\Gsecond 6 +\n[first] \n[second] \n[first\\second] \n[first.second] \n[firstGsecond] .PP .rr first\esecond \n[first] \n[second] \n[first\\second] diff --git a/regress/usr.bin/mandoc/roff/nr/escname.out_ascii b/regress/usr.bin/mandoc/roff/nr/escname.out_ascii index ca2d50f5c4f..d0301af91de 100644 --- a/regress/usr.bin/mandoc/roff/nr/escname.out_ascii +++ b/regress/usr.bin/mandoc/roff/nr/escname.out_ascii @@ -4,7 +4,7 @@ NNAAMMEE nr-escname - escape sequences in register names DDEESSCCRRIIPPTTIIOONN - 1 2 3 + 1 2 3 5 6 0 2 3 @@ -12,4 +12,4 @@ DDEESSCCRRIIPPTTIIOONN incomplete: -OpenBSD June 29, 2014 NR-ESCNAME(1) +OpenBSD June 3, 2022 NR-ESCNAME(1) diff --git a/regress/usr.bin/mandoc/roff/nr/escname.out_lint b/regress/usr.bin/mandoc/roff/nr/escname.out_lint index a2dabebeb3e..edec17faea4 100644 --- a/regress/usr.bin/mandoc/roff/nr/escname.out_lint +++ b/regress/usr.bin/mandoc/roff/nr/escname.out_lint @@ -1,4 +1,5 @@ mandoc: escname.in:9:5: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:12:5: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:18:13: WARNING: invalid escape sequence: \n[second -mandoc: escname.in:18:12: STYLE: whitespace at end of input line +mandoc: escname.in:11:10: WARNING: undefined escape, printing literally: \G +mandoc: escname.in:14:5: ERROR: escaped character not allowed in a name: first\e +mandoc: escname.in:20:13: WARNING: invalid escape sequence: \n[second +mandoc: escname.in:20:12: STYLE: whitespace at end of input line diff --git a/regress/usr.bin/mandoc/roff/string/Makefile b/regress/usr.bin/mandoc/roff/string/Makefile index 32f192a9424..2fa10557722 100644 --- a/regress/usr.bin/mandoc/roff/string/Makefile +++ b/regress/usr.bin/mandoc/roff/string/Makefile @@ -1,4 +1,9 @@ -# $OpenBSD: Makefile,v 1.9 2018/08/16 13:49:40 schwarze Exp $ +# $OpenBSD: Makefile,v 1.10 2022/06/03 11:50:25 schwarze Exp $ +# +# This directory is intended for tests of string *expansion*, +# in particular testing the behaviour of the \* escape sequence. +# Tests of string *definitions* are better placed in the roff/ds +# directory. REGRESS_TARGETS = dotT escape infinite name std undef zerolength LINT_TARGETS = name std undef diff --git a/regress/usr.bin/mandoc/roff/string/name.in b/regress/usr.bin/mandoc/roff/string/name.in index 2948f164dcb..b2612be2040 100644 --- a/regress/usr.bin/mandoc/roff/string/name.in +++ b/regress/usr.bin/mandoc/roff/string/name.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: name.in,v 1.4 2017/07/04 14:53:27 schwarze Exp $ -.Dd $Mdocdate: July 4 2017 $ +.\" $OpenBSD: name.in,v 1.5 2022/06/03 11:50:25 schwarze Exp $ +.Dd $Mdocdate: June 3 2022 $ .Dt STRING-NAME 1 .Os .Sh NAME @@ -10,7 +10,9 @@ .ds "quot" value of "quot" .ds bs\e value of bs\ee .ds bs\\e value of bs\e\ee +.ds dot. value of dot. .ds bl\ e value of bl\e e +.ds inval\\G value of inval\eG norm: \*[norm] .br norm without closing brace: \*[norm @@ -26,6 +28,10 @@ bs\e\ee: \*[bs\\e] bse: \*[bse] .br bs: \*[bs] +.br +dot.: \*[dot.] +.br +dot\e.: \*[dot\.] .\".br .\"bl\e e: \*[bl\ e] .br @@ -34,3 +40,7 @@ bl e: \*[bl e] ble: \*[ble] .br bl: \*[bl] +.br +inval\e\eG: \*[inval\\G] +.br +inval\eG: \*[inval\G] diff --git a/regress/usr.bin/mandoc/roff/string/name.out_ascii b/regress/usr.bin/mandoc/roff/string/name.out_ascii index 325e28aabc2..38258a72903 100644 --- a/regress/usr.bin/mandoc/roff/string/name.out_ascii +++ b/regress/usr.bin/mandoc/roff/string/name.out_ascii @@ -11,8 +11,12 @@ DDEESSCCRRIIPPTTIIOONN bs\\e: value of bs\\e bse: bs: + dot.: value of dot. + dot\.: value of dot. bl e: ble: bl: + inval\\G: value of inval\G + inval\G: value of inval\G -OpenBSD July 4, 2017 OpenBSD +OpenBSD June 3, 2022 OpenBSD diff --git a/regress/usr.bin/mandoc/roff/string/name.out_lint b/regress/usr.bin/mandoc/roff/string/name.out_lint index 5a6340a01ca..13283f5dc2d 100644 --- a/regress/usr.bin/mandoc/roff/string/name.out_lint +++ b/regress/usr.bin/mandoc/roff/string/name.out_lint @@ -1,16 +1,17 @@ mandoc: name.in:11:5: ERROR: escaped character not allowed in a name: bs\e -mandoc: name.in:13:5: ERROR: escaped character not allowed in a name: bl\ -mandoc: name.in:16:29: WARNING: invalid escape sequence: \*[norm -mandoc: name.in:16:28: STYLE: whitespace at end of input line -mandoc: name.in:18:7: WARNING: undefined string, using "": quot -mandoc: name.in:18:6: STYLE: whitespace at end of input line -mandoc: name.in:26:6: WARNING: undefined string, using "": bse -mandoc: name.in:26:5: STYLE: whitespace at end of input line -mandoc: name.in:28:5: WARNING: undefined string, using "": bs -mandoc: name.in:28:4: STYLE: whitespace at end of input line -mandoc: name.in:32:7: WARNING: undefined string, using "": bl e -mandoc: name.in:32:6: STYLE: whitespace at end of input line -mandoc: name.in:34:6: WARNING: undefined string, using "": ble -mandoc: name.in:34:5: STYLE: whitespace at end of input line -mandoc: name.in:36:5: WARNING: undefined string, using "": bl -mandoc: name.in:36:4: STYLE: whitespace at end of input line +mandoc: name.in:14:5: ERROR: escaped character not allowed in a name: bl\ +mandoc: name.in:18:29: WARNING: invalid escape sequence: \*[norm +mandoc: name.in:18:28: STYLE: whitespace at end of input line +mandoc: name.in:20:7: WARNING: undefined string, using "": quot +mandoc: name.in:20:6: STYLE: whitespace at end of input line +mandoc: name.in:28:6: WARNING: undefined string, using "": bse +mandoc: name.in:28:5: STYLE: whitespace at end of input line +mandoc: name.in:30:5: WARNING: undefined string, using "": bs +mandoc: name.in:30:4: STYLE: whitespace at end of input line +mandoc: name.in:38:7: WARNING: undefined string, using "": bl e +mandoc: name.in:38:6: STYLE: whitespace at end of input line +mandoc: name.in:40:6: WARNING: undefined string, using "": ble +mandoc: name.in:40:5: STYLE: whitespace at end of input line +mandoc: name.in:42:5: WARNING: undefined string, using "": bl +mandoc: name.in:42:4: STYLE: whitespace at end of input line +mandoc: name.in:46:19: WARNING: undefined escape, printing literally: \G diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c index 724c34543aa..0301b838565 100644 --- a/usr.bin/mandoc/roff.c +++ b/usr.bin/mandoc/roff.c @@ -1,4 +1,4 @@ -/* $OpenBSD: roff.c,v 1.264 2022/06/02 11:28:16 schwarze Exp $ */ +/* $OpenBSD: roff.c,v 1.265 2022/06/03 11:50:25 schwarze Exp $ */ /* * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons @@ -1373,6 +1373,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) int iarg; /* index beginning the argument */ int iendarg; /* index right after the argument */ int iend; /* index right after the sequence */ + int isrc, idst; /* to reduce \\ and \. in names */ int deftype; /* type of definition to paste */ int argi; /* macro argument index */ int quote_args; /* true for \\$@, false for \\$* */ @@ -1426,6 +1427,21 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) continue; } + /* Reduce \\ and \. in names. */ + + if (buf->buf[inam] == '*' || buf->buf[inam] == 'n') { + isrc = idst = iarg; + while (isrc < iendarg) { + if (isrc + 1 < iendarg && + buf->buf[isrc] == '\\' && + (buf->buf[isrc + 1] == '\\' || + buf->buf[isrc + 1] == '.')) + isrc++; + buf->buf[idst++] = buf->buf[isrc++]; + } + iendarg -= isrc - idst; + } + /* Handle expansion. */ res = NULL; @@ -4000,7 +4016,7 @@ static size_t roff_getname(struct roff *r, char **cpp, int ln, int pos) { char *name, *cp; - size_t namesz; + int namesz, inam, iend; name = *cpp; if (*name == '\0') @@ -4008,24 +4024,46 @@ roff_getname(struct roff *r, char **cpp, int ln, int pos) /* Advance cp to the byte after the end of the name. */ - for (cp = name; 1; cp++) { - namesz = cp - name; + cp = name; + namesz = 0; + for (;;) { if (*cp == '\0') break; if (*cp == ' ' || *cp == '\t') { cp++; break; } - if (*cp != '\\') + if (*cp != '\\') { + if (name + namesz < cp) { + name[namesz] = *cp; + *cp = ' '; + } + namesz++; + cp++; continue; + } if (cp[1] == '{' || cp[1] == '}') break; - if (*++cp == '\\') - continue; - mandoc_msg(MANDOCERR_NAMESC, ln, pos, - "%.*s", (int)(cp - name + 1), name); - mandoc_escape((const char **)&cp, NULL, NULL); - break; + if (roff_escape(cp, 0, 0, NULL, &inam, + NULL, NULL, &iend) != ESCAPE_UNDEF) { + mandoc_msg(MANDOCERR_NAMESC, ln, pos, + "%.*s%.*s", namesz, name, iend, cp); + cp += iend; + break; + } + + /* + * In an identifier, \\, \., \G and so on + * are reduced to \, ., G and so on, + * vaguely similar to copy mode. + */ + + name[namesz++] = cp[inam]; + while (iend--) { + if (cp >= name + namesz) + *cp = ' '; + cp++; + } } /* Read past spaces. */