From 2b14f697be21be2a4bb5c322e43d2765aa69e247 Mon Sep 17 00:00:00 2001 From: schwarze Date: Fri, 24 Nov 2023 04:48:02 +0000 Subject: [PATCH] 1. Do not put ASCII_HYPH (0x1c) into the tag file. That happened when tagging a string containing '-' on an input text line, most commonly in man(7) .TP next line scope. 2. Do not let "\-" end the tag. In both cases, translate ASCII_HYPH and "\-" to plain '-' for output. For example, this improves handling of unbound.conf(5). These two bugs were found thanks to a posting by weerd@. --- regress/usr.bin/mandoc/mdoc/Cm/tag.out_html | 2 +- regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag | 2 +- usr.bin/mandoc/tag.c | 59 ++++++++++++++++----- 3 files changed, 48 insertions(+), 15 deletions(-) diff --git a/regress/usr.bin/mandoc/mdoc/Cm/tag.out_html b/regress/usr.bin/mandoc/mdoc/Cm/tag.out_html index 5141f52a81d..ceadb4b592b 100644 --- a/regress/usr.bin/mandoc/mdoc/Cm/tag.out_html +++ b/regress/usr.bin/mandoc/mdoc/Cm/tag.out_html @@ -7,7 +7,7 @@
text
text
-
+
text
text
diff --git a/regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag b/regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag index a59da516b3f..d6bd49a5b28 100644 --- a/regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag +++ b/regress/usr.bin/mandoc/mdoc/Cm/tag.out_tag @@ -4,6 +4,6 @@ one tag.mandoc_ascii 9 two tag.mandoc_ascii 9 three tag.mandoc_ascii 12 hyphen tag.mandoc_ascii 14 -minus tag.mandoc_ascii 17 +minus-sign tag.mandoc_ascii 17 backslash tag.mandoc_ascii 20 four tag.mandoc_ascii 22 diff --git a/usr.bin/mandoc/tag.c b/usr.bin/mandoc/tag.c index 8cb7bdb393c..bc3f43f78c8 100644 --- a/usr.bin/mandoc/tag.c +++ b/usr.bin/mandoc/tag.c @@ -1,6 +1,6 @@ -/* $OpenBSD: tag.c,v 1.37 2022/04/26 11:28:35 schwarze Exp $ */ +/* $OpenBSD: tag.c,v 1.38 2023/11/24 04:48:02 schwarze Exp $ */ /* - * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022 + * Copyright (c) 2015, 2016, 2018, 2019, 2020, 2022, 2023 * Ingo Schwarze * * Permission to use, copy, modify, and distribute this software for any @@ -24,11 +24,13 @@ #include #include #include +#include #include #include #include "mandoc_aux.h" #include "mandoc_ohash.h" +#include "mandoc.h" #include "roff.h" #include "mdoc.h" #include "roff_int.h" @@ -86,9 +88,11 @@ tag_put(const char *s, int prio, struct roff_node *n) { struct tag_entry *entry; struct roff_node *nold; - const char *se; + const char *se, *src; + char *cpy; size_t len; unsigned int slot; + int changed; assert(prio <= TAG_FALLBACK); @@ -104,6 +108,7 @@ tag_put(const char *s, int prio, struct roff_node *n) /* Determine the implicit tag. */ + changed = 1; if (s == NULL) { if (n->child == NULL || n->child->type != ROFFT_TEXT) return; @@ -120,27 +125,53 @@ tag_put(const char *s, int prio, struct roff_node *n) s += 2; break; default: - break; + return; } break; default: + changed = 0; break; } } /* + * Translate \- and ASCII_HYPH to plain '-'. * Skip whitespace and escapes and whatever follows, * and if there is any, downgrade the priority. */ - len = strcspn(s, " \t\\"); + cpy = mandoc_malloc(strlen(s) + 1); + for (src = s, len = 0; *src != '\0'; src++, len++) { + switch (*src) { + case '\t': + case ' ': + changed = 1; + break; + case ASCII_HYPH: + cpy[len] = '-'; + changed = 1; + continue; + case '\\': + if (src[1] != '-') + break; + src++; + changed = 1; + /* FALLTHROUGH */ + default: + cpy[len] = *src; + continue; + } + break; + } if (len == 0) - return; + goto out; + cpy[len] = '\0'; - se = s + len; - if (*se != '\0' && prio < TAG_WEAK) + if (*src != '\0' && prio < TAG_WEAK) prio = TAG_WEAK; + s = cpy; + se = cpy + len; slot = ohash_qlookupi(&tag_data, s, &se); entry = ohash_find(&tag_data, slot); @@ -148,8 +179,7 @@ tag_put(const char *s, int prio, struct roff_node *n) if (entry == NULL) { entry = mandoc_malloc(sizeof(*entry) + len + 1); - memcpy(entry->s, s, len); - entry->s[len] = '\0'; + memcpy(entry->s, s, len + 1); entry->nodes = NULL; entry->maxnodes = entry->nnodes = 0; ohash_insert(&tag_data, slot, entry); @@ -161,7 +191,7 @@ tag_put(const char *s, int prio, struct roff_node *n) */ else if (entry->prio < prio) - return; + goto out; /* * If the existing entry is worse, clear it. @@ -178,7 +208,7 @@ tag_put(const char *s, int prio, struct roff_node *n) } if (prio == TAG_FALLBACK) { entry->prio = TAG_DELETE; - return; + goto out; } } @@ -192,10 +222,13 @@ tag_put(const char *s, int prio, struct roff_node *n) entry->nodes[entry->nnodes++] = n; entry->prio = prio; n->flags |= NODE_ID; - if (n->child == NULL || n->child->string != s || *se != '\0') { + if (changed) { assert(n->tag == NULL); n->tag = mandoc_strndup(s, len); } + + out: + free(cpy); } int -- 2.20.1