From 4028c4ad441097ee435889d307fa4e833f835aca Mon Sep 17 00:00:00 2001 From: schwarze Date: Fri, 10 Aug 2018 22:12:40 +0000 Subject: [PATCH] handle the non-portable GNU-style \[charNN], \[charNNN] character escape sequences, used for example in the groff_char(7) manual page --- share/man/man7/mandoc_char.7 | 14 ++++++++------ usr.bin/mandoc/mandoc.c | 26 ++++++++++++++++++++++---- 2 files changed, 30 insertions(+), 10 deletions(-) diff --git a/share/man/man7/mandoc_char.7 b/share/man/man7/mandoc_char.7 index e464d227f8c..b7a43c060fb 100644 --- a/share/man/man7/mandoc_char.7 +++ b/share/man/man7/mandoc_char.7 @@ -1,4 +1,4 @@ -.\" $OpenBSD: mandoc_char.7,v 1.37 2018/08/08 14:29:42 schwarze Exp $ +.\" $OpenBSD: mandoc_char.7,v 1.38 2018/08/10 22:12:40 schwarze Exp $ .\" .\" Copyright (c) 2003 Jason McIntyre .\" Copyright (c) 2009, 2010, 2011 Kristaps Dzonsons @@ -16,7 +16,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: August 8 2018 $ +.Dd $Mdocdate: August 10 2018 $ .Dt MANDOC_CHAR 7 .Os .Sh NAME @@ -761,14 +761,16 @@ For backward compatibility with existing manuals, .Xr mandoc 1 also supports the .Pp -.Dl \eN\(aq Ns Ar number Ns \(aq +.Dl \eN\(aq Ns Ar number Ns \(aq and \e[ Ns Cm char Ns Ar number ] .Pp -escape sequence, inserting the character +escape sequences, inserting the character .Ar number from the current character set into the output. Of course, this is inherently non-portable and is already marked -as deprecated in the Heirloom roff manual. -For example, do not use \eN\(aq34\(aq, use \e(dq, or even the plain +as deprecated in the Heirloom roff manual; +on top of that, the second form is a GNU extension. +For example, do not use \eN\(aq34\(aq or \e[char34], use \e(dq, +or even the plain .Sq \(dq character where possible. .Sh COMPATIBILITY diff --git a/usr.bin/mandoc/mandoc.c b/usr.bin/mandoc/mandoc.c index 81a658fad92..c33c2a1b6ee 100644 --- a/usr.bin/mandoc/mandoc.c +++ b/usr.bin/mandoc/mandoc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mandoc.c,v 1.72 2018/07/28 18:32:30 schwarze Exp $ */ +/* $OpenBSD: mandoc.c,v 1.73 2018/08/10 22:12:40 schwarze Exp $ */ /* * Copyright (c) 2008-2011, 2014 Kristaps Dzonsons * Copyright (c) 2011-2015, 2017, 2018 Ingo Schwarze @@ -39,7 +39,7 @@ enum mandoc_esc mandoc_escape(const char **end, const char **start, int *sz) { const char *local_start; - int local_sz; + int local_sz, c, i; char term; enum mandoc_esc gly; @@ -328,8 +328,26 @@ mandoc_escape(const char **end, const char **start, int *sz) } break; case ESCAPE_SPECIAL: - if (1 == *sz && 'c' == **start) - gly = ESCAPE_NOSPACE; + if (**start == 'c') { + if (*sz == 1) { + gly = ESCAPE_NOSPACE; + break; + } + if (*sz < 6 || *sz > 7 || + strncmp(*start, "char", 4) != 0 || + (int)strspn(*start + 4, "0123456789") + 4 < *sz) + break; + c = 0; + for (i = 4; i < *sz; i++) + c = 10 * c + ((*start)[i] - '0'); + if (c < 0x21 || (c > 0x7e && c < 0xa0) || c > 0xff) + break; + *start += 4; + *sz -= 4; + gly = ESCAPE_NUMBERED; + break; + } + /* * Unicode escapes are defined in groff as \[u0000] * to \[u10FFFF], where the contained value must be -- 2.20.1