From 6d9b308dc5df90a03b34cba3ab639261f8094414 Mon Sep 17 00:00:00 2001 From: schwarze Date: Mon, 23 Oct 2023 20:07:18 +0000 Subject: [PATCH] Support some escape sequences, in particular character escape sequences, inside \w arguments, and skip most other escape sequences when measuring the output length in this way because most escape sequences contribute little or nothing to text width: for example, consider font escapes in terminal output. This implementation is very rudimentary. In particular, it assumes that every character has the same width. No attempt is made to detect double-width or zero-width Unicode characters or to take dependencies on output devices or fonts into account. These limitations are hard to avoid because mandoc has to interpolate \w at the parsing stage when the output device is not yet known. I really do not want the content of the syntax tree to depend on the output device. Feature requested by Paul , who also submitted a patch, but i chose to commit this very different patch with almost the same functionality. His input was still very valuable because complete support for \w is out of the question, and consequently, the main task is identifying subsets of the feature that are needed for real-world manual pages and can be supported without uprooting the whole forest. --- regress/usr.bin/mandoc/roff/esc/w.in | 18 +++++++++-- regress/usr.bin/mandoc/roff/esc/w.out_ascii | 7 +++++ regress/usr.bin/mandoc/roff/esc/w.out_lint | 9 +++--- share/man/man7/roff.7 | 9 +++--- usr.bin/mandoc/roff.c | 35 ++++++++++++++++++--- 5 files changed, 64 insertions(+), 14 deletions(-) diff --git a/regress/usr.bin/mandoc/roff/esc/w.in b/regress/usr.bin/mandoc/roff/esc/w.in index e6ee6be7cd1..cb760107cab 100644 --- a/regress/usr.bin/mandoc/roff/esc/w.in +++ b/regress/usr.bin/mandoc/roff/esc/w.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: w.in,v 1.4 2022/06/08 13:08:00 schwarze Exp $ -.Dd $Mdocdate: June 8 2022 $ +.\" $OpenBSD: w.in,v 1.5 2023/10/23 20:07:19 schwarze Exp $ +.Dd $Mdocdate: October 23 2023 $ .Dt ESC-W 1 .Os .Sh NAME @@ -13,6 +13,20 @@ character: \w'n' blank: \w' ' .br text: \w'text' +.br +special: \w'\(bu' +.br +numbered: \w'\N'100'' +.br +Unicode: \w'\[u2013]' +.br +overstrike: \w'\o'ab'' +.br +undefined: \w'\G' +.br +zero-width: \w'\fB\&\fP' +.br +skipchar: \w'a\zb\z\(buc' .Ss Argument delimiters unsupported \er: \w\rM\ru .br diff --git a/regress/usr.bin/mandoc/roff/esc/w.out_ascii b/regress/usr.bin/mandoc/roff/esc/w.out_ascii index 7ed32ecef88..f5f9c5f2d16 100644 --- a/regress/usr.bin/mandoc/roff/esc/w.out_ascii +++ b/regress/usr.bin/mandoc/roff/esc/w.out_ascii @@ -8,6 +8,13 @@ DDEESSCCRRIIPPTTIIOONN character: 24 blank: 24 text: 96 + special: 24 + numbered: 24 + Unicode: 24 + overstrike: 24 + undefined: 24 + zero-width: 0 + skipchar: 48 AArrgguummeenntt ddeelliimmiitteerrss unsupported \r: 24u diff --git a/regress/usr.bin/mandoc/roff/esc/w.out_lint b/regress/usr.bin/mandoc/roff/esc/w.out_lint index fd2a09482fc..9c6417c58f5 100644 --- a/regress/usr.bin/mandoc/roff/esc/w.out_lint +++ b/regress/usr.bin/mandoc/roff/esc/w.out_lint @@ -1,4 +1,5 @@ -mandoc: w.in:17:20: UNSUPP: unsupported escape sequence: \r -mandoc: w.in:17:23: UNSUPP: unsupported escape sequence: \r -mandoc: w.in:23:16: WARNING: undefined escape, printing literally: \G -mandoc: w.in:51:15: ERROR: incomplete escape sequence: \w'foo +mandoc: w.in:25:15: WARNING: undefined escape, printing literally: \G +mandoc: w.in:31:20: UNSUPP: unsupported escape sequence: \r +mandoc: w.in:31:23: UNSUPP: unsupported escape sequence: \r +mandoc: w.in:37:16: WARNING: undefined escape, printing literally: \G +mandoc: w.in:65:15: ERROR: incomplete escape sequence: \w'foo diff --git a/share/man/man7/roff.7 b/share/man/man7/roff.7 index 3e0dd7b1bed..f92a9c295c1 100644 --- a/share/man/man7/roff.7 +++ b/share/man/man7/roff.7 @@ -1,6 +1,6 @@ -.\" $OpenBSD: roff.7,v 1.101 2022/05/31 20:21:40 schwarze Exp $ +.\" $OpenBSD: roff.7,v 1.102 2023/10/23 20:07:18 schwarze Exp $ .\" -.\" Copyright (c) 2010-2019, 2022 Ingo Schwarze +.\" Copyright (c) 2010-2019, 2022-2023 Ingo Schwarze .\" Copyright (c) 2010, 2011, 2012 Kristaps Dzonsons .\" .\" Permission to use, copy, modify, and distribute this software for any @@ -15,7 +15,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: May 31 2022 $ +.Dd $Mdocdate: October 23 2023 $ .Dt ROFF 7 .Os .Sh NAME @@ -2224,7 +2224,8 @@ The .Xr mandoc 1 implementation assumes that after expansion of user-defined strings, the .Ar string -only contains normal characters, no escape sequences, and that each +only contains normal characters, characters expressed as escape sequences, +and zero-width escape sequences, and that each character has a width of 24 basic units. .It Ic \eX\(aq Ns Ar string Ns Ic \(aq Output diff --git a/usr.bin/mandoc/roff.c b/usr.bin/mandoc/roff.c index 4a2784ba87c..7b3df53d7f4 100644 --- a/usr.bin/mandoc/roff.c +++ b/usr.bin/mandoc/roff.c @@ -1,6 +1,6 @@ -/* $OpenBSD: roff.c,v 1.270 2023/10/22 16:01:58 schwarze Exp $ */ +/* $OpenBSD: roff.c,v 1.271 2023/10/23 20:07:18 schwarze Exp $ */ /* - * Copyright (c) 2010-2015, 2017-2022 Ingo Schwarze + * Copyright (c) 2010-2015, 2017-2023 Ingo Schwarze * Copyright (c) 2008-2012, 2014 Kristaps Dzonsons * * Permission to use, copy, modify, and distribute this software for any @@ -1360,6 +1360,7 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) const char *res; /* the string to be pasted */ const char *src; /* source for copying */ char *dst; /* destination for copying */ + enum mandoc_esc subtype; /* return value from roff_escape */ int iesc; /* index of leading escape char */ int inam; /* index of the escape name */ int iarg; /* index beginning the argument */ @@ -1549,8 +1550,34 @@ roff_expand(struct roff *r, struct buf *buf, int ln, int pos, char ec) res = ubuf; break; case 'w': - (void)snprintf(ubuf, sizeof(ubuf), - "%d", (iendarg - iarg) * 24); + rsz = 0; + subtype = ESCAPE_UNDEF; + while (iarg < iendarg) { + asz = subtype == ESCAPE_SKIPCHAR ? 0 : 1; + if (buf->buf[iarg] != '\\') { + rsz += asz; + iarg++; + continue; + } + switch ((subtype = roff_escape(buf->buf, 0, + iarg, NULL, NULL, NULL, NULL, &iarg))) { + case ESCAPE_SPECIAL: + case ESCAPE_NUMBERED: + case ESCAPE_UNICODE: + case ESCAPE_OVERSTRIKE: + case ESCAPE_UNDEF: + break; + case ESCAPE_DEVICE: + asz *= 8; + break; + case ESCAPE_EXPAND: + abort(); + default: + continue; + } + rsz += asz; + } + (void)snprintf(ubuf, sizeof(ubuf), "%d", rsz * 24); res = ubuf; break; default: -- 2.20.1