Garbage collect dead code intended to write five- and six-byte UTF-8

author schwarze <schwarze@openbsd.org>

Tue, 14 May 2024 21:12:44 +0000 (21:12 +0000)

committer schwarze <schwarze@openbsd.org>

Tue, 14 May 2024 21:12:44 +0000 (21:12 +0000)
author schwarze <schwarze@openbsd.org>
Tue, 14 May 2024 21:12:44 +0000 (21:12 +0000)
committer schwarze <schwarze@openbsd.org>
Tue, 14 May 2024 21:12:44 +0000 (21:12 +0000)
diff --git a/usr.bin/mandoc/mandocdb.c b/usr.bin/mandoc/mandocdb.c

index 4343a89..65fbe05 100644 (file)
--- a/usr.bin/mandoc/mandocdb.c
+++ b/usr.bin/mandoc/mandocdb.c
@@ -1,6 +1,6 @@
-/* $OpenBSD: mandocdb.c,v 1.220 2024/05/14 18:38:13 schwarze Exp $ */
+/* $OpenBSD: mandocdb.c,v 1.221 2024/05/14 21:12:44 schwarze Exp $ */
  /*
- * Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org>
+ * Copyright (c) 2011-2021, 2024 Ingo Schwarze <schwarze@openbsd.org>
   * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
   * Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
   *
@@ -155,7 +155,7 @@ static      void     say(const char *, const char *, ...)
                         __attribute__((__format__ (__printf__, 2, 3)));
  static int      set_basedir(const char *, int);
  static int      treescan(void);
-static size_t   utf8(unsigned int, char [7]);
+static size_t   utf8(unsigned int, char[5]);
  
  static int              nodb; /* no database changes */
  static int              mparse_options; /* abort the parse early */
@@ -1863,49 +1863,35 @@ putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v)
   * Take a Unicode codepoint and produce its UTF-8 encoding.
   * This isn't the best way to do this, but it works.
   * The magic numbers are from the UTF-8 packaging.
- * They're not as scary as they seem: read the UTF-8 spec for details.
+ * Read the UTF-8 spec or the utf8(7) manual page for details.
   */
  static size_t
-utf8(unsigned int cp, char out[7])
+utf8(unsigned int cp, char out[5])
  {
         size_t           rc;
  
-       rc = 0;
-       if (cp <= 0x0000007F) {
+       if (cp <= 0x7f) {
                 rc = 1;
                 out[0] = (char)cp;
-       } else if (cp <= 0x000007FF) {
+       } else if (cp <= 0x7ff) {
                 rc = 2;
                 out[0] = (cp >> 6  & 31) | 192;
                 out[1] = (cp       & 63) | 128;
-       } else if (cp <= 0x0000FFFF) {
+       } else if (cp >= 0xd800 && cp <= 0xdfff) {
+               rc = 0; /* reject UTF-16 surrogate */
+       } else if (cp <= 0xffff) {
                 rc = 3;
                 out[0] = (cp >> 12 & 15) | 224;
                 out[1] = (cp >> 6  & 63) | 128;
                 out[2] = (cp       & 63) | 128;
-       } else if (cp <= 0x001FFFFF) {
+       } else if (cp <= 0x10ffff) {
                 rc = 4;
                 out[0] = (cp >> 18 &  7) | 240;
                 out[1] = (cp >> 12 & 63) | 128;
                 out[2] = (cp >> 6  & 63) | 128;
                 out[3] = (cp       & 63) | 128;
-       } else if (cp <= 0x03FFFFFF) {
-               rc = 5;
-               out[0] = (cp >> 24 &  3) | 248;
-               out[1] = (cp >> 18 & 63) | 128;
-               out[2] = (cp >> 12 & 63) | 128;
-               out[3] = (cp >> 6  & 63) | 128;
-               out[4] = (cp       & 63) | 128;
-       } else if (cp <= 0x7FFFFFFF) {
-               rc = 6;
-               out[0] = (cp >> 30 &  1) | 252;
-               out[1] = (cp >> 24 & 63) | 128;
-               out[2] = (cp >> 18 & 63) | 128;
-               out[3] = (cp >> 12 & 63) | 128;
-               out[4] = (cp >> 6  & 63) | 128;
-               out[5] = (cp       & 63) | 128;
         } else
-               return 0;
+               rc = 0;
  
         out[rc] = '\0';
         return rc;
author	schwarze <schwarze@openbsd.org>
	Tue, 14 May 2024 21:12:44 +0000 (21:12 +0000)
committer	schwarze <schwarze@openbsd.org>
	Tue, 14 May 2024 21:12:44 +0000 (21:12 +0000)