From: schwarze Date: Sat, 10 Jun 2017 16:53:58 +0000 (+0000) Subject: Reduce false positives for the "no blank before trailing delimiter" message. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=b7c50e8744bcb686fc74ce313bebcd685d90c90d;p=openbsd Reduce false positives for the "no blank before trailing delimiter" message. This brings us down to one false positive for about every 18 pages. --- diff --git a/usr.bin/mandoc/mdoc_validate.c b/usr.bin/mandoc/mdoc_validate.c index ec5ff271814..4fe4f25fb10 100644 --- a/usr.bin/mandoc/mdoc_validate.c +++ b/usr.bin/mandoc/mdoc_validate.c @@ -1,4 +1,4 @@ -/* $OpenBSD: mdoc_validate.c,v 1.248 2017/06/10 01:48:31 schwarze Exp $ */ +/* $OpenBSD: mdoc_validate.c,v 1.249 2017/06/10 16:53:58 schwarze Exp $ */ /* * Copyright (c) 2008-2012 Kristaps Dzonsons * Copyright (c) 2010-2017 Ingo Schwarze @@ -410,9 +410,17 @@ static void post_delim(POST_ARGS) { const struct roff_node *nch; - const char *lc; + const char *lc, *cp; + int nw; enum mdelim delim; + enum roff_tok tok; + /* + * Find candidates: at least two bytes, + * the last one a closing or middle delimiter. + */ + + tok = mdoc->last->tok; nch = mdoc->last->last; if (nch == NULL || nch->type != ROFFT_TEXT) return; @@ -422,9 +430,74 @@ post_delim(POST_ARGS) delim = mdoc_isdelim(lc); if (delim == DELIM_NONE || delim == DELIM_OPEN) return; + + /* + * Reduce false positives by allowing various cases. + */ + + /* Escaped delimiters. */ + if (lc > nch->string + 1 && lc[-2] == '\\' && + (lc[-1] == '&' || lc[-1] == 'e')) + return; + + /* Specific byte sequences. */ + switch (*lc) { + case ')': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '(') + return; + break; + case '.': + if (lc > nch->string + 1 && lc[-2] == '.' && lc[-1] == '.') + return; + if (lc[-1] == '.') + return; + break; + case ';': + if (tok == MDOC_Vt) + return; + break; + case '?': + if (lc[-1] == '?') + return; + break; + case ']': + for (cp = lc; cp >= nch->string; cp--) + if (*cp == '[') + return; + break; + case '|': + if (lc == nch->string + 1 && lc[-1] == '|') + return; + default: + break; + } + + /* Exactly two non-alphanumeric bytes. */ + if (lc == nch->string + 1 && !isalnum((unsigned char)lc[-1])) + return; + + /* At least three alphabetic words with a sentence ending. */ + if (strchr("!.:?", *lc) != NULL && (tok == MDOC_Em || + tok == MDOC_Li || tok == MDOC_No || tok == MDOC_Po || + tok == MDOC_Pq || tok == MDOC_Sy)) { + nw = 0; + for (cp = lc - 1; cp >= nch->string; cp--) { + if (*cp == ' ') { + nw++; + if (cp > nch->string && cp[-1] == ',') + cp--; + } else if (isalpha((unsigned int)*cp)) { + if (nw > 1) + return; + } else + break; + } + } + mandoc_vmsg(MANDOCERR_DELIM, mdoc->parse, nch->line, nch->pos + (lc - nch->string), - "%s%s %s", roff_name[mdoc->last->tok], + "%s%s %s", roff_name[tok], nch == mdoc->last->child ? "" : " ...", nch->string); }