From: schwarze Date: Sun, 5 Jun 2022 13:42:49 +0000 (+0000) Subject: With the improved escape sequence parser, it becomes easy to also improve X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=f8e3b479bff00e265eb456d46fd0a4b09543d4d1;p=openbsd With the improved escape sequence parser, it becomes easy to also improve diagnostics. Distinguish "incomplete escape sequence", "invalid special character", and "unknown special character" from the generic "invalid escape sequence", also promoting them from WARNING to ERROR because incomplete escape sequences are severe syntax violations and because encountering an invalid or unknown special character makes it likely that part of the document content intended by the authors gets lost. --- diff --git a/regress/usr.bin/mandoc/char/accent/nocombine.out_lint b/regress/usr.bin/mandoc/char/accent/nocombine.out_lint index c9de4162307..0f7be4d0bd2 100644 --- a/regress/usr.bin/mandoc/char/accent/nocombine.out_lint +++ b/regress/usr.bin/mandoc/char/accent/nocombine.out_lint @@ -1,2 +1,2 @@ -mandoc: nocombine.in:8:27: WARNING: invalid escape sequence: \['] -mandoc: nocombine.in:14:27: WARNING: invalid escape sequence: \[`] +mandoc: nocombine.in:8:27: ERROR: invalid special character: \['] +mandoc: nocombine.in:14:27: ERROR: invalid special character: \[`] diff --git a/regress/usr.bin/mandoc/char/space/invalid.out_lint b/regress/usr.bin/mandoc/char/space/invalid.out_lint index c05ef38f1f6..4c146853c3d 100644 --- a/regress/usr.bin/mandoc/char/space/invalid.out_lint +++ b/regress/usr.bin/mandoc/char/space/invalid.out_lint @@ -1,9 +1,9 @@ mandoc: invalid.in:7:15: WARNING: invalid escape sequence: \[ -mandoc: invalid.in:8:14: WARNING: invalid escape sequence: \[%] -mandoc: invalid.in:9:16: WARNING: invalid escape sequence: \[&] -mandoc: invalid.in:10:12: WARNING: invalid escape sequence: \[:] -mandoc: invalid.in:11:12: WARNING: invalid escape sequence: \[^] -mandoc: invalid.in:12:16: WARNING: invalid escape sequence: \[_] -mandoc: invalid.in:13:11: WARNING: invalid escape sequence: \[|] -mandoc: invalid.in:14:12: WARNING: invalid escape sequence: \[~] -mandoc: invalid.in:15:18: WARNING: invalid escape sequence: \[0] +mandoc: invalid.in:8:14: ERROR: invalid special character: \[%] +mandoc: invalid.in:9:16: ERROR: invalid special character: \[&] +mandoc: invalid.in:10:12: ERROR: invalid special character: \[:] +mandoc: invalid.in:11:12: ERROR: invalid special character: \[^] +mandoc: invalid.in:12:16: ERROR: invalid special character: \[_] +mandoc: invalid.in:13:11: ERROR: invalid special character: \[|] +mandoc: invalid.in:14:12: ERROR: invalid special character: \[~] +mandoc: invalid.in:15:18: ERROR: invalid special character: \[0] diff --git a/regress/usr.bin/mandoc/char/unicode/input.out_lint b/regress/usr.bin/mandoc/char/unicode/input.out_lint index 578a7704842..fa36f8769fe 100644 --- a/regress/usr.bin/mandoc/char/unicode/input.out_lint +++ b/regress/usr.bin/mandoc/char/unicode/input.out_lint @@ -24,11 +24,11 @@ mandoc: input.in:35:19: ERROR: skipping bad character: 0xbf mandoc: input.in:42:25: ERROR: skipping bad character: 0xed mandoc: input.in:42:26: ERROR: skipping bad character: 0xa0 mandoc: input.in:42:27: ERROR: skipping bad character: 0x80 -mandoc: input.in:42:17: WARNING: invalid escape sequence: \[uD800] +mandoc: input.in:42:17: ERROR: invalid special character: \[uD800] mandoc: input.in:43:25: ERROR: skipping bad character: 0xed mandoc: input.in:43:26: ERROR: skipping bad character: 0xbf mandoc: input.in:43:27: ERROR: skipping bad character: 0xbf -mandoc: input.in:43:17: WARNING: invalid escape sequence: \[uDFFF] +mandoc: input.in:43:17: ERROR: invalid special character: \[uDFFF] mandoc: input.in:53:19: ERROR: skipping bad character: 0xf0 mandoc: input.in:53:20: ERROR: skipping bad character: 0x80 mandoc: input.in:53:21: ERROR: skipping bad character: 0x80 @@ -57,25 +57,25 @@ mandoc: input.in:67:31: ERROR: skipping bad character: 0xf4 mandoc: input.in:67:32: ERROR: skipping bad character: 0x90 mandoc: input.in:67:33: ERROR: skipping bad character: 0x80 mandoc: input.in:67:34: ERROR: skipping bad character: 0x80 -mandoc: input.in:67:21: WARNING: invalid escape sequence: \[u110000] +mandoc: input.in:67:21: ERROR: invalid special character: \[u110000] mandoc: input.in:68:31: ERROR: skipping bad character: 0xf4 mandoc: input.in:68:32: ERROR: skipping bad character: 0xbf mandoc: input.in:68:33: ERROR: skipping bad character: 0xbf mandoc: input.in:68:34: ERROR: skipping bad character: 0xbf -mandoc: input.in:68:21: WARNING: invalid escape sequence: \[u13FFFF] +mandoc: input.in:68:21: ERROR: invalid special character: \[u13FFFF] mandoc: input.in:69:31: ERROR: skipping bad character: 0xf5 mandoc: input.in:69:32: ERROR: skipping bad character: 0x80 mandoc: input.in:69:33: ERROR: skipping bad character: 0x80 mandoc: input.in:69:34: ERROR: skipping bad character: 0x80 -mandoc: input.in:69:21: WARNING: invalid escape sequence: \[u140000] +mandoc: input.in:69:21: ERROR: invalid special character: \[u140000] mandoc: input.in:70:31: ERROR: skipping bad character: 0xf7 mandoc: input.in:70:32: ERROR: skipping bad character: 0xbf mandoc: input.in:70:33: ERROR: skipping bad character: 0xbf mandoc: input.in:70:34: ERROR: skipping bad character: 0xbf -mandoc: input.in:70:21: WARNING: invalid escape sequence: \[u1FFFFF] +mandoc: input.in:70:21: ERROR: invalid special character: \[u1FFFFF] mandoc: input.in:71:33: ERROR: skipping bad character: 0xf8 mandoc: input.in:71:34: ERROR: skipping bad character: 0x88 mandoc: input.in:71:35: ERROR: skipping bad character: 0x80 mandoc: input.in:71:36: ERROR: skipping bad character: 0x80 mandoc: input.in:71:37: ERROR: skipping bad character: 0x80 -mandoc: input.in:71:23: WARNING: invalid escape sequence: \[u200000] +mandoc: input.in:71:23: ERROR: invalid special character: \[u200000] diff --git a/regress/usr.bin/mandoc/char/unicode/invalid.out_lint b/regress/usr.bin/mandoc/char/unicode/invalid.out_lint index ce9de98860e..0717605f7a2 100644 --- a/regress/usr.bin/mandoc/char/unicode/invalid.out_lint +++ b/regress/usr.bin/mandoc/char/unicode/invalid.out_lint @@ -1,9 +1,9 @@ -mandoc: invalid.in:11:13: WARNING: invalid escape sequence: \[u2B] -mandoc: invalid.in:11:20: WARNING: invalid escape sequence: \[u02B] -mandoc: invalid.in:13:12: WARNING: invalid escape sequence: \[u0002B] -mandoc: invalid.in:13:22: WARNING: invalid escape sequence: \[u00002B] -mandoc: invalid.in:13:33: WARNING: invalid escape sequence: \[u000002B] -mandoc: invalid.in:14:13: WARNING: invalid escape sequence: \[u110000] -mandoc: invalid.in:14:24: WARNING: invalid escape sequence: \[u200000] -mandoc: invalid.in:14:35: WARNING: invalid escape sequence: \[u1000000] -mandoc: invalid.in:15:20: WARNING: invalid escape sequence: \[u1234g] +mandoc: invalid.in:11:13: ERROR: unknown special character: \[u2B] +mandoc: invalid.in:11:20: ERROR: unknown special character: \[u02B] +mandoc: invalid.in:13:12: ERROR: invalid special character: \[u0002B] +mandoc: invalid.in:13:22: ERROR: invalid special character: \[u00002B] +mandoc: invalid.in:13:33: ERROR: unknown special character: \[u000002B] +mandoc: invalid.in:14:13: ERROR: invalid special character: \[u110000] +mandoc: invalid.in:14:24: ERROR: invalid special character: \[u200000] +mandoc: invalid.in:14:35: ERROR: unknown special character: \[u1000000] +mandoc: invalid.in:15:20: ERROR: unknown special character: \[u1234g] diff --git a/regress/usr.bin/mandoc/roff/char/badarg.out_lint b/regress/usr.bin/mandoc/roff/char/badarg.out_lint index e07faa3cb14..dd5c7999494 100644 --- a/regress/usr.bin/mandoc/roff/char/badarg.out_lint +++ b/regress/usr.bin/mandoc/roff/char/badarg.out_lint @@ -1,6 +1,6 @@ mandoc: badarg.in:6:6: ERROR: argument is not a character: char mandoc: badarg.in:7:7: ERROR: argument is not a character: char \fR myval -mandoc: badarg.in:8:7: WARNING: invalid escape sequence: \[myc] +mandoc: badarg.in:8:7: ERROR: unknown special character: \[myc] mandoc: badarg.in:8:7: ERROR: argument is not a character: char \[myc]x myval mandoc: badarg.in:9:7: ERROR: argument is not a character: char xy myval -mandoc: badarg.in:10:7: WARNING: invalid escape sequence: \[myc] +mandoc: badarg.in:10:7: ERROR: unknown special character: \[myc] diff --git a/regress/usr.bin/mandoc/roff/esc/B.out_lint b/regress/usr.bin/mandoc/roff/esc/B.out_lint index f52270a4767..fbaded65096 100644 --- a/regress/usr.bin/mandoc/roff/esc/B.out_lint +++ b/regress/usr.bin/mandoc/roff/esc/B.out_lint @@ -1 +1 @@ -mandoc: B.in:37:23: WARNING: invalid escape sequence: \B'1+1 +mandoc: B.in:37:23: ERROR: incomplete escape sequence: \B'1+1 diff --git a/regress/usr.bin/mandoc/roff/esc/ignore.out_lint b/regress/usr.bin/mandoc/roff/esc/ignore.out_lint index 16d64a978c6..ddbc8d6ef0c 100644 --- a/regress/usr.bin/mandoc/roff/esc/ignore.out_lint +++ b/regress/usr.bin/mandoc/roff/esc/ignore.out_lint @@ -1,14 +1,14 @@ -mandoc: ignore.in:7:36: WARNING: invalid escape sequence: \[%] -mandoc: ignore.in:8:35: WARNING: invalid escape sequence: \[&] -mandoc: ignore.in:9:51: WARNING: invalid escape sequence: \[)] -mandoc: ignore.in:10:37: WARNING: invalid escape sequence: \[,] -mandoc: ignore.in:11:38: WARNING: invalid escape sequence: \[/] -mandoc: ignore.in:12:28: WARNING: invalid escape sequence: \[^] -mandoc: ignore.in:13:17: WARNING: invalid escape sequence: \[a] -mandoc: ignore.in:14:25: WARNING: invalid escape sequence: \[d] -mandoc: ignore.in:15:25: WARNING: invalid escape sequence: \[t] -mandoc: ignore.in:16:33: WARNING: invalid escape sequence: \[u] -mandoc: ignore.in:17:20: WARNING: invalid escape sequence: \[{] -mandoc: ignore.in:18:25: WARNING: invalid escape sequence: \[|] -mandoc: ignore.in:19:20: WARNING: invalid escape sequence: \[}] -mandoc: ignore.in:23:56: WARNING: invalid escape sequence: \s- +mandoc: ignore.in:7:36: ERROR: invalid special character: \[%] +mandoc: ignore.in:8:35: ERROR: invalid special character: \[&] +mandoc: ignore.in:9:51: ERROR: invalid special character: \[)] +mandoc: ignore.in:10:37: ERROR: invalid special character: \[,] +mandoc: ignore.in:11:38: ERROR: invalid special character: \[/] +mandoc: ignore.in:12:28: ERROR: invalid special character: \[^] +mandoc: ignore.in:13:17: ERROR: invalid special character: \[a] +mandoc: ignore.in:14:25: ERROR: invalid special character: \[d] +mandoc: ignore.in:15:25: ERROR: invalid special character: \[t] +mandoc: ignore.in:16:33: ERROR: invalid special character: \[u] +mandoc: ignore.in:17:20: ERROR: invalid special character: \[{] +mandoc: ignore.in:18:25: ERROR: invalid special character: \[|] +mandoc: ignore.in:19:20: ERROR: invalid special character: \[}] +mandoc: ignore.in:23:56: ERROR: incomplete escape sequence: \s- diff --git a/regress/usr.bin/mandoc/roff/esc/invalid.out_lint b/regress/usr.bin/mandoc/roff/esc/invalid.out_lint index baef3176710..ff52893fba6 100644 --- a/regress/usr.bin/mandoc/roff/esc/invalid.out_lint +++ b/regress/usr.bin/mandoc/roff/esc/invalid.out_lint @@ -1,43 +1,43 @@ mandoc: invalid.in:7:8: WARNING: undefined escape, printing literally: \+ -mandoc: invalid.in:7:11: WARNING: invalid escape sequence: \[+] +mandoc: invalid.in:7:11: ERROR: invalid special character: \[+] mandoc: invalid.in:8:13: WARNING: undefined escape, printing literally: \; -mandoc: invalid.in:8:16: WARNING: invalid escape sequence: \[;] +mandoc: invalid.in:8:16: ERROR: invalid special character: \[;] mandoc: invalid.in:9:13: WARNING: undefined escape, printing literally: \< -mandoc: invalid.in:9:16: WARNING: invalid escape sequence: \[<] +mandoc: invalid.in:9:16: ERROR: invalid special character: \[<] mandoc: invalid.in:10:12: WARNING: undefined escape, printing literally: \= -mandoc: invalid.in:10:15: WARNING: invalid escape sequence: \[=] +mandoc: invalid.in:10:15: ERROR: invalid special character: \[=] mandoc: invalid.in:11:16: WARNING: undefined escape, printing literally: \> -mandoc: invalid.in:11:19: WARNING: invalid escape sequence: \[>] +mandoc: invalid.in:11:19: ERROR: invalid special character: \[>] mandoc: invalid.in:12:6: WARNING: undefined escape, printing literally: \@ -mandoc: invalid.in:12:9: WARNING: invalid escape sequence: \[@] +mandoc: invalid.in:12:9: ERROR: invalid special character: \[@] mandoc: invalid.in:13:18: WARNING: undefined escape, printing literally: \] -mandoc: invalid.in:14:16: WARNING: invalid escape sequence: \[{] -mandoc: invalid.in:14:21: WARNING: invalid escape sequence: \[}] +mandoc: invalid.in:14:16: ERROR: invalid special character: \[{] +mandoc: invalid.in:14:21: ERROR: invalid special character: \[}] mandoc: invalid.in:15:9: WARNING: undefined escape, printing literally: \1 -mandoc: invalid.in:15:12: WARNING: invalid escape sequence: \[1] +mandoc: invalid.in:15:12: ERROR: invalid special character: \[1] mandoc: invalid.in:16:5: WARNING: undefined escape, printing literally: \G -mandoc: invalid.in:16:8: WARNING: invalid escape sequence: \[G] +mandoc: invalid.in:16:8: ERROR: invalid special character: \[G] mandoc: invalid.in:17:5: WARNING: undefined escape, printing literally: \I -mandoc: invalid.in:17:8: WARNING: invalid escape sequence: \[I] +mandoc: invalid.in:17:8: ERROR: invalid special character: \[I] mandoc: invalid.in:18:5: WARNING: undefined escape, printing literally: \i -mandoc: invalid.in:18:8: WARNING: invalid escape sequence: \[i] +mandoc: invalid.in:18:8: ERROR: invalid special character: \[i] mandoc: invalid.in:19:5: WARNING: undefined escape, printing literally: \J -mandoc: invalid.in:19:8: WARNING: invalid escape sequence: \[J] +mandoc: invalid.in:19:8: ERROR: invalid special character: \[J] mandoc: invalid.in:20:5: WARNING: undefined escape, printing literally: \j -mandoc: invalid.in:20:8: WARNING: invalid escape sequence: \[j] +mandoc: invalid.in:20:8: ERROR: invalid special character: \[j] mandoc: invalid.in:21:5: WARNING: undefined escape, printing literally: \K -mandoc: invalid.in:21:8: WARNING: invalid escape sequence: \[K] +mandoc: invalid.in:21:8: ERROR: invalid special character: \[K] mandoc: invalid.in:22:5: WARNING: undefined escape, printing literally: \P -mandoc: invalid.in:22:8: WARNING: invalid escape sequence: \[P] +mandoc: invalid.in:22:8: ERROR: invalid special character: \[P] mandoc: invalid.in:23:5: WARNING: undefined escape, printing literally: \Q -mandoc: invalid.in:23:8: WARNING: invalid escape sequence: \[Q] +mandoc: invalid.in:23:8: ERROR: invalid special character: \[Q] mandoc: invalid.in:24:5: WARNING: undefined escape, printing literally: \q -mandoc: invalid.in:24:8: WARNING: invalid escape sequence: \[q] +mandoc: invalid.in:24:8: ERROR: invalid special character: \[q] mandoc: invalid.in:25:5: WARNING: undefined escape, printing literally: \T -mandoc: invalid.in:25:8: WARNING: invalid escape sequence: \[T] +mandoc: invalid.in:25:8: ERROR: invalid special character: \[T] mandoc: invalid.in:26:5: WARNING: undefined escape, printing literally: \U -mandoc: invalid.in:26:8: WARNING: invalid escape sequence: \[U] +mandoc: invalid.in:26:8: ERROR: invalid special character: \[U] mandoc: invalid.in:27:5: WARNING: undefined escape, printing literally: \W -mandoc: invalid.in:27:8: WARNING: invalid escape sequence: \[W] +mandoc: invalid.in:27:8: ERROR: invalid special character: \[W] mandoc: invalid.in:28:5: WARNING: undefined escape, printing literally: \y -mandoc: invalid.in:28:8: WARNING: invalid escape sequence: \[y] +mandoc: invalid.in:28:8: ERROR: invalid special character: \[y] diff --git a/regress/usr.bin/mandoc/roff/esc/unsupp.out_lint b/regress/usr.bin/mandoc/roff/esc/unsupp.out_lint index fae97c816fa..db7631c6fe1 100644 --- a/regress/usr.bin/mandoc/roff/esc/unsupp.out_lint +++ b/regress/usr.bin/mandoc/roff/esc/unsupp.out_lint @@ -1,5 +1,5 @@ mandoc: unsupp.in:7:20: UNSUPP: unsupported escape sequence: \! -mandoc: unsupp.in:7:23: WARNING: invalid escape sequence: \[!] +mandoc: unsupp.in:7:23: ERROR: invalid special character: \[!] mandoc: unsupp.in:8:17: UNSUPP: unsupported escape sequence: \? mandoc: unsupp.in:8:21: UNSUPP: unsupported escape sequence: \? -mandoc: unsupp.in:8:24: WARNING: invalid escape sequence: \[?] +mandoc: unsupp.in:8:24: ERROR: invalid special character: \[?] diff --git a/regress/usr.bin/mandoc/roff/esc/w.out_lint b/regress/usr.bin/mandoc/roff/esc/w.out_lint index d48495c28ad..11dfbef51d5 100644 --- a/regress/usr.bin/mandoc/roff/esc/w.out_lint +++ b/regress/usr.bin/mandoc/roff/esc/w.out_lint @@ -1 +1 @@ -mandoc: w.in:17:15: WARNING: invalid escape sequence: \w'foo +mandoc: w.in:17:15: ERROR: incomplete escape sequence: \w'foo diff --git a/regress/usr.bin/mandoc/roff/nr/escname.out_lint b/regress/usr.bin/mandoc/roff/nr/escname.out_lint index edec17faea4..8655da967fc 100644 --- a/regress/usr.bin/mandoc/roff/nr/escname.out_lint +++ b/regress/usr.bin/mandoc/roff/nr/escname.out_lint @@ -1,5 +1,5 @@ mandoc: escname.in:9:5: ERROR: escaped character not allowed in a name: first\e mandoc: escname.in:11:10: WARNING: undefined escape, printing literally: \G mandoc: escname.in:14:5: ERROR: escaped character not allowed in a name: first\e -mandoc: escname.in:20:13: WARNING: invalid escape sequence: \n[second +mandoc: escname.in:20:13: ERROR: incomplete escape sequence: \n[second mandoc: escname.in:20:12: STYLE: whitespace at end of input line diff --git a/regress/usr.bin/mandoc/roff/string/name.out_lint b/regress/usr.bin/mandoc/roff/string/name.out_lint index 13283f5dc2d..55f1f87ba3d 100644 --- a/regress/usr.bin/mandoc/roff/string/name.out_lint +++ b/regress/usr.bin/mandoc/roff/string/name.out_lint @@ -1,6 +1,6 @@ mandoc: name.in:11:5: ERROR: escaped character not allowed in a name: bs\e mandoc: name.in:14:5: ERROR: escaped character not allowed in a name: bl\ -mandoc: name.in:18:29: WARNING: invalid escape sequence: \*[norm +mandoc: name.in:18:29: ERROR: incomplete escape sequence: \*[norm mandoc: name.in:18:28: STYLE: whitespace at end of input line mandoc: name.in:20:7: WARNING: undefined string, using "": quot mandoc: name.in:20:6: STYLE: whitespace at end of input line diff --git a/usr.bin/mandoc/mandoc.1 b/usr.bin/mandoc/mandoc.1 index 1cd1621fc7a..ba7e883d5e7 100644 --- a/usr.bin/mandoc/mandoc.1 +++ b/usr.bin/mandoc/mandoc.1 @@ -1,4 +1,4 @@ -.\" $OpenBSD: mandoc.1,v 1.184 2022/04/28 16:16:46 schwarze Exp $ +.\" $OpenBSD: mandoc.1,v 1.185 2022/06/05 13:42:49 schwarze Exp $ .\" .\" Copyright (c) 2012, 2014-2022 Ingo Schwarze .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons @@ -15,7 +15,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: April 28 2022 $ +.Dd $Mdocdate: June 5 2022 $ .Dt MANDOC 1 .Os .Sh NAME @@ -1799,21 +1799,9 @@ A new sentence starts in the middle of a text line. Start it on a new input line to help formatters produce correct spacing. .It Sy "invalid escape sequence" .Pq roff -An escape sequence has an invalid opening argument delimiter, lacks the -closing argument delimiter, the argument is of an invalid form, or it is -a character escape sequence with an invalid name. -If the argument is incomplete, -.Ic \e* -and -.Ic \en -expand to an empty string, -.Ic \eB -to the digit -.Sq 0 , -and -.Ic \ew -to the length of the incomplete argument. -All other invalid escape sequences are ignored. +An escape sequence has an invalid opening argument delimiter +or the argument is of an invalid form. +Invalid escape sequences are ignored. .It Sy "undefined escape, printing literally" .Pq roff In an escape sequence, the first character @@ -2285,6 +2273,48 @@ with invalid arguments .El The excess arguments are ignored. .El +.Ss "Errors related to escape sequences" +.Bl -ohang +.It Sy "incomplete escape sequence" +.Pq roff +The end of the input line is encountered +while parsing the argument of an escape sequence. +In this case, +.Ic \e* +and +.Ic \en +expand to an empty string, +.Ic \eB +to the digit +.Sq 0 , +and +.Ic \ew +to the length of the incomplete argument. +All other incomplete escape sequences are ignored. +.It Sy "invalid special character" +.Pq roff +A special character escape sequence is invalid, +for example a Unicode sequence pointing to a surrogate +or beyond the Unicode range, a \e[char...] escape sequence +representing a control character or pointing beyond the +.Vt unsigned char +range, or an invalid variable-length form +of a single-byte character escape sequence, for example writing +.Qq \e[e] +or +.Qq \e[~] +instead of +.Qq \ee +or +.Qq \e~ , +respectively. +The escape sequence is ignored. +.It Sy "unknown special character" +.Pq roff +The name given in a special character escape sequence is not known to +.Nm . +The escape sequence is ignored. +.El .Ss Unsupported features .Bl -ohang .It Sy "input too large" diff --git a/usr.bin/mandoc/mandoc.h b/usr.bin/mandoc/mandoc.h index 6a53a298a82..012bb2d3b96 100644 --- a/usr.bin/mandoc/mandoc.h +++ b/usr.bin/mandoc/mandoc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: mandoc.h,v 1.219 2022/05/19 15:17:50 schwarze Exp $ */ +/* $OpenBSD: mandoc.h,v 1.220 2022/06/05 13:42:49 schwarze Exp $ */ /* * Copyright (c) 2012-2022 Ingo Schwarze * Copyright (c) 2010, 2011, 2014 Kristaps Dzonsons @@ -235,6 +235,11 @@ enum mandocerr { MANDOCERR_ARG_EXCESS, /* skipping excess arguments: macro ... args */ MANDOCERR_DIVZERO, /* divide by zero */ + /* related to escape sequences */ + MANDOCERR_ESC_INCOMPLETE, /* incomplete escape sequence: esc */ + MANDOCERR_ESC_BADCHAR, /* invalid special character: esc */ + MANDOCERR_ESC_UNKCHAR, /* unknown special character: esc */ + MANDOCERR_UNSUPP, /* ===== start of unsupported features ===== */ MANDOCERR_TOOLARGE, /* input too large */ diff --git a/usr.bin/mandoc/mandoc_msg.c b/usr.bin/mandoc/mandoc_msg.c index 82140aff9b4..bb364f9b9bc 100644 --- a/usr.bin/mandoc/mandoc_msg.c +++ b/usr.bin/mandoc/mandoc_msg.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mandoc_msg.c,v 1.16 2022/04/28 16:16:46 schwarze Exp $ */ +/* $OpenBSD: mandoc_msg.c,v 1.17 2022/06/05 13:42:49 schwarze Exp $ */ /* * Copyright (c) 2014-2022 Ingo Schwarze * Copyright (c) 2010, 2011 Kristaps Dzonsons @@ -234,6 +234,11 @@ static const char *const type_message[MANDOCERR_MAX] = { "skipping excess arguments", "divide by zero", + /* related to escape sequences */ + "incomplete escape sequence", + "invalid special character", + "unknown special character", + "unsupported feature", "input too large", "unsupported control character", diff --git a/usr.bin/mandoc/roff_escape.c b/usr.bin/mandoc/roff_escape.c index 1c40cd5e33b..371ea6199af 100644 --- a/usr.bin/mandoc/roff_escape.c +++ b/usr.bin/mandoc/roff_escape.c @@ -1,4 +1,4 @@ -/* $OpenBSD: roff_escape.c,v 1.9 2022/06/05 10:19:47 schwarze Exp $ */ +/* $OpenBSD: roff_escape.c,v 1.10 2022/06/05 13:42:49 schwarze Exp $ */ /* * Copyright (c) 2011, 2012, 2013, 2014, 2015, 2017, 2018, 2020, 2022 * Ingo Schwarze @@ -310,13 +310,12 @@ roff_escape(const char *buf, const int ln, const int aesc, iendarg = iarg; while (maxl > 0) { if (buf[iendarg] == '\0') { + err = MANDOCERR_ESC_INCOMPLETE; + if (rval != ESCAPE_EXPAND) + rval = ESCAPE_ERROR; /* Ignore an incomplete argument except for \w. */ if (buf[inam] != 'w') iendarg = iarg; - if (rval == ESCAPE_EXPAND) - err = MANDOCERR_ESC_BAD; - else - rval = ESCAPE_ERROR; break; } if (buf[iendarg] == term) { @@ -401,6 +400,7 @@ roff_escape(const char *buf, const int ln, const int aesc, */ if (term != '\0' && argl == 1 && buf[iarg] != '-') { + err = MANDOCERR_ESC_BADCHAR; rval = ESCAPE_ERROR; break; } @@ -416,8 +416,10 @@ roff_escape(const char *buf, const int ln, const int aesc, c = 0; for (i = iarg; i < iendarg; i++) c = 10 * c + (buf[i] - '0'); - if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) + if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) { + err = MANDOCERR_ESC_BADCHAR; break; + } iarg += 4; rval = ESCAPE_NUMBERED; break; @@ -433,13 +435,19 @@ roff_escape(const char *buf, const int ln, const int aesc, if (buf[iarg] != 'u' || argl < 5 || argl > 7) break; if (argl == 7 && - (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) + (buf[iarg + 1] != '1' || buf[iarg + 2] != '0')) { + err = MANDOCERR_ESC_BADCHAR; break; - if (argl == 6 && buf[iarg + 1] == '0') + } + if (argl == 6 && buf[iarg + 1] == '0') { + err = MANDOCERR_ESC_BADCHAR; break; + } if (argl == 5 && buf[iarg + 1] == 'D' && - strchr("89ABCDEF", buf[iarg + 2]) != NULL) + strchr("89ABCDEF", buf[iarg + 2]) != NULL) { + err = MANDOCERR_ESC_BADCHAR; break; + } if ((int)strspn(buf + iarg + 1, "0123456789ABCDEFabcdef") + 1 == argl) rval = ESCAPE_UNICODE; @@ -477,7 +485,8 @@ out: *resc = iesc; switch (rval) { case ESCAPE_ERROR: - err = MANDOCERR_ESC_BAD; + if (err == MANDOCERR_OK) + err = MANDOCERR_ESC_BAD; break; case ESCAPE_UNSUPP: err = MANDOCERR_ESC_UNSUPP; @@ -487,8 +496,10 @@ out: err = MANDOCERR_ESC_UNDEF; break; case ESCAPE_SPECIAL: - if (mchars_spec2cp(buf + iarg, argl) < 0) - err = MANDOCERR_ESC_BAD; + if (mchars_spec2cp(buf + iarg, argl) >= 0) + err = MANDOCERR_OK; + else if (err == MANDOCERR_OK) + err = MANDOCERR_ESC_UNKCHAR; break; default: break;