From c2eb3b8c127d64282143b29045ee1e714290db9e Mon Sep 17 00:00:00 2001 From: schwarze Date: Thu, 16 May 2024 20:32:24 +0000 Subject: [PATCH] Improve coverage of edge cases for 3-byte UTF-8 sequences. Coverage for 2-byte and 4-byte sequences was already reasonable. --- regress/usr.bin/mandoc/char/unicode/input.in | Bin 2684 -> 2948 bytes .../mandoc/char/unicode/input.out_ascii | 7 +- .../mandoc/char/unicode/input.out_lint | 112 +++++++++--------- .../mandoc/char/unicode/input.out_utf8 | 7 +- 4 files changed, 68 insertions(+), 58 deletions(-) diff --git a/regress/usr.bin/mandoc/char/unicode/input.in b/regress/usr.bin/mandoc/char/unicode/input.in index 276d7c54d1c9e02bcd2f16ac4d825da43cc33a08..845af5ffbb4eeb0dd1000bfc8fdd86c6f6a54fcc 100644 GIT binary patch delta 311 zcmew((jq=VTGUj*$iT=%-@sJg&ETBWMxp1 znqrxjlw=hXUFu@)<`nyO{%)&`%=C=Z;u3|j#GK3&g@V$goXli}l8mC%RNc)NSTq>x zUESP3I)VC<(m*=hfC91ackj1Ksw_#>%`ZwxEmFu$EXw8z)pm1pbB3r*OG*Z-1q#Hz z-@nHyKPf9UxkRBTwIC-kIW;#muS6j^Be5tkxg@m+qR|DWF$Jp81*mbaRgoudEv_&v dsZcGhKrQ>Mic-r`i;7cI6wn;F`3q|Y3jk?7Zb$$C delta 57 zcmZn>|06O%TGT|r$iT=@-@r`Yz(~Q++{)O@%G7eAffkQfX begin of second start byte U+CFFF 0xecbfbf end of last normal start byte U+D000 0xed8080 begin of last start byte + U+D7FB 0xed9fbb highest valid public three-byte U+D7FF 0xed9fbf highest public three-byte U+D800 0xeda080 ??? lowest surrogate U+DFFF 0xedbfbf ??? highest surrogate U+E000 0xee8080 lowest private use U+F8FF 0xefa3bf highest private use U+F900 0xefa480 lowest post-private + U+FEFF 0xefbbbf byte-order mark + U+FFFC 0xefbfbc object replacement character + U+FFFD 0xefbfbd replacement character + U+FFFE 0xefbfbe reversed byte-order mark U+FFFF 0xefbfbf highest three-byte FFoouurr--bbyyttee rraannggee @@ -60,4 +65,4 @@ DDEESSCCRRIIPPTTIIOONN U+1FFFFF 0xf7bfbfbf ???? highest invalid four-byte U+200000 0xf888808080 ????? lowest five-byte -OpenBSD June 2, 2021 CHAR-UNICODE-INPUT(1) +OpenBSD May 16, 2024 CHAR-UNICODE-INPUT(1) diff --git a/regress/usr.bin/mandoc/char/unicode/input.out_lint b/regress/usr.bin/mandoc/char/unicode/input.out_lint index fa36f8769fe..70f10c27ae4 100644 --- a/regress/usr.bin/mandoc/char/unicode/input.out_lint +++ b/regress/usr.bin/mandoc/char/unicode/input.out_lint @@ -21,61 +21,61 @@ mandoc: input.in:34:19: ERROR: skipping bad character: 0x80 mandoc: input.in:35:17: ERROR: skipping bad character: 0xe0 mandoc: input.in:35:18: ERROR: skipping bad character: 0x9f mandoc: input.in:35:19: ERROR: skipping bad character: 0xbf -mandoc: input.in:42:25: ERROR: skipping bad character: 0xed -mandoc: input.in:42:26: ERROR: skipping bad character: 0xa0 -mandoc: input.in:42:27: ERROR: skipping bad character: 0x80 -mandoc: input.in:42:17: ERROR: invalid special character: \[uD800] mandoc: input.in:43:25: ERROR: skipping bad character: 0xed -mandoc: input.in:43:26: ERROR: skipping bad character: 0xbf -mandoc: input.in:43:27: ERROR: skipping bad character: 0xbf -mandoc: input.in:43:17: ERROR: invalid special character: \[uDFFF] -mandoc: input.in:53:19: ERROR: skipping bad character: 0xf0 -mandoc: input.in:53:20: ERROR: skipping bad character: 0x80 -mandoc: input.in:53:21: ERROR: skipping bad character: 0x80 -mandoc: input.in:53:22: ERROR: skipping bad character: 0x80 -mandoc: input.in:54:19: ERROR: skipping bad character: 0xf0 -mandoc: input.in:54:20: ERROR: skipping bad character: 0x80 -mandoc: input.in:54:21: ERROR: skipping bad character: 0x81 -mandoc: input.in:54:22: ERROR: skipping bad character: 0xbf -mandoc: input.in:55:19: ERROR: skipping bad character: 0xf0 -mandoc: input.in:55:20: ERROR: skipping bad character: 0x80 -mandoc: input.in:55:21: ERROR: skipping bad character: 0x82 -mandoc: input.in:55:22: ERROR: skipping bad character: 0x80 -mandoc: input.in:56:19: ERROR: skipping bad character: 0xf0 -mandoc: input.in:56:20: ERROR: skipping bad character: 0x80 -mandoc: input.in:56:21: ERROR: skipping bad character: 0x9f -mandoc: input.in:56:22: ERROR: skipping bad character: 0xbf -mandoc: input.in:57:19: ERROR: skipping bad character: 0xf0 -mandoc: input.in:57:20: ERROR: skipping bad character: 0x80 -mandoc: input.in:57:21: ERROR: skipping bad character: 0xa0 -mandoc: input.in:57:22: ERROR: skipping bad character: 0x80 +mandoc: input.in:43:26: ERROR: skipping bad character: 0xa0 +mandoc: input.in:43:27: ERROR: skipping bad character: 0x80 +mandoc: input.in:43:17: ERROR: invalid special character: \[uD800] +mandoc: input.in:44:25: ERROR: skipping bad character: 0xed +mandoc: input.in:44:26: ERROR: skipping bad character: 0xbf +mandoc: input.in:44:27: ERROR: skipping bad character: 0xbf +mandoc: input.in:44:17: ERROR: invalid special character: \[uDFFF] mandoc: input.in:58:19: ERROR: skipping bad character: 0xf0 -mandoc: input.in:58:20: ERROR: skipping bad character: 0x8f -mandoc: input.in:58:21: ERROR: skipping bad character: 0xbf -mandoc: input.in:58:22: ERROR: skipping bad character: 0xbf -mandoc: input.in:67:31: ERROR: skipping bad character: 0xf4 -mandoc: input.in:67:32: ERROR: skipping bad character: 0x90 -mandoc: input.in:67:33: ERROR: skipping bad character: 0x80 -mandoc: input.in:67:34: ERROR: skipping bad character: 0x80 -mandoc: input.in:67:21: ERROR: invalid special character: \[u110000] -mandoc: input.in:68:31: ERROR: skipping bad character: 0xf4 -mandoc: input.in:68:32: ERROR: skipping bad character: 0xbf -mandoc: input.in:68:33: ERROR: skipping bad character: 0xbf -mandoc: input.in:68:34: ERROR: skipping bad character: 0xbf -mandoc: input.in:68:21: ERROR: invalid special character: \[u13FFFF] -mandoc: input.in:69:31: ERROR: skipping bad character: 0xf5 -mandoc: input.in:69:32: ERROR: skipping bad character: 0x80 -mandoc: input.in:69:33: ERROR: skipping bad character: 0x80 -mandoc: input.in:69:34: ERROR: skipping bad character: 0x80 -mandoc: input.in:69:21: ERROR: invalid special character: \[u140000] -mandoc: input.in:70:31: ERROR: skipping bad character: 0xf7 -mandoc: input.in:70:32: ERROR: skipping bad character: 0xbf -mandoc: input.in:70:33: ERROR: skipping bad character: 0xbf -mandoc: input.in:70:34: ERROR: skipping bad character: 0xbf -mandoc: input.in:70:21: ERROR: invalid special character: \[u1FFFFF] -mandoc: input.in:71:33: ERROR: skipping bad character: 0xf8 -mandoc: input.in:71:34: ERROR: skipping bad character: 0x88 -mandoc: input.in:71:35: ERROR: skipping bad character: 0x80 -mandoc: input.in:71:36: ERROR: skipping bad character: 0x80 -mandoc: input.in:71:37: ERROR: skipping bad character: 0x80 -mandoc: input.in:71:23: ERROR: invalid special character: \[u200000] +mandoc: input.in:58:20: ERROR: skipping bad character: 0x80 +mandoc: input.in:58:21: ERROR: skipping bad character: 0x80 +mandoc: input.in:58:22: ERROR: skipping bad character: 0x80 +mandoc: input.in:59:19: ERROR: skipping bad character: 0xf0 +mandoc: input.in:59:20: ERROR: skipping bad character: 0x80 +mandoc: input.in:59:21: ERROR: skipping bad character: 0x81 +mandoc: input.in:59:22: ERROR: skipping bad character: 0xbf +mandoc: input.in:60:19: ERROR: skipping bad character: 0xf0 +mandoc: input.in:60:20: ERROR: skipping bad character: 0x80 +mandoc: input.in:60:21: ERROR: skipping bad character: 0x82 +mandoc: input.in:60:22: ERROR: skipping bad character: 0x80 +mandoc: input.in:61:19: ERROR: skipping bad character: 0xf0 +mandoc: input.in:61:20: ERROR: skipping bad character: 0x80 +mandoc: input.in:61:21: ERROR: skipping bad character: 0x9f +mandoc: input.in:61:22: ERROR: skipping bad character: 0xbf +mandoc: input.in:62:19: ERROR: skipping bad character: 0xf0 +mandoc: input.in:62:20: ERROR: skipping bad character: 0x80 +mandoc: input.in:62:21: ERROR: skipping bad character: 0xa0 +mandoc: input.in:62:22: ERROR: skipping bad character: 0x80 +mandoc: input.in:63:19: ERROR: skipping bad character: 0xf0 +mandoc: input.in:63:20: ERROR: skipping bad character: 0x8f +mandoc: input.in:63:21: ERROR: skipping bad character: 0xbf +mandoc: input.in:63:22: ERROR: skipping bad character: 0xbf +mandoc: input.in:72:31: ERROR: skipping bad character: 0xf4 +mandoc: input.in:72:32: ERROR: skipping bad character: 0x90 +mandoc: input.in:72:33: ERROR: skipping bad character: 0x80 +mandoc: input.in:72:34: ERROR: skipping bad character: 0x80 +mandoc: input.in:72:21: ERROR: invalid special character: \[u110000] +mandoc: input.in:73:31: ERROR: skipping bad character: 0xf4 +mandoc: input.in:73:32: ERROR: skipping bad character: 0xbf +mandoc: input.in:73:33: ERROR: skipping bad character: 0xbf +mandoc: input.in:73:34: ERROR: skipping bad character: 0xbf +mandoc: input.in:73:21: ERROR: invalid special character: \[u13FFFF] +mandoc: input.in:74:31: ERROR: skipping bad character: 0xf5 +mandoc: input.in:74:32: ERROR: skipping bad character: 0x80 +mandoc: input.in:74:33: ERROR: skipping bad character: 0x80 +mandoc: input.in:74:34: ERROR: skipping bad character: 0x80 +mandoc: input.in:74:21: ERROR: invalid special character: \[u140000] +mandoc: input.in:75:31: ERROR: skipping bad character: 0xf7 +mandoc: input.in:75:32: ERROR: skipping bad character: 0xbf +mandoc: input.in:75:33: ERROR: skipping bad character: 0xbf +mandoc: input.in:75:34: ERROR: skipping bad character: 0xbf +mandoc: input.in:75:21: ERROR: invalid special character: \[u1FFFFF] +mandoc: input.in:76:33: ERROR: skipping bad character: 0xf8 +mandoc: input.in:76:34: ERROR: skipping bad character: 0x88 +mandoc: input.in:76:35: ERROR: skipping bad character: 0x80 +mandoc: input.in:76:36: ERROR: skipping bad character: 0x80 +mandoc: input.in:76:37: ERROR: skipping bad character: 0x80 +mandoc: input.in:76:23: ERROR: invalid special character: \[u200000] diff --git a/regress/usr.bin/mandoc/char/unicode/input.out_utf8 b/regress/usr.bin/mandoc/char/unicode/input.out_utf8 index 429b427ad63..348cb6839b8 100644 --- a/regress/usr.bin/mandoc/char/unicode/input.out_utf8 +++ b/regress/usr.bin/mandoc/char/unicode/input.out_utf8 @@ -31,12 +31,17 @@ DDEESSCCRRIIPPTTIIOONN U+1000 0xe18080 ကက begin of second start byte U+CFFF 0xecbfbf 쿿쿿 end of last normal start byte U+D000 0xed8080 퀀퀀 begin of last start byte + U+D7FB 0xed9fbb ퟻퟻ highest valid public three-byte U+D7FF 0xed9fbf ퟿퟿ highest public three-byte U+D800 0xeda080 ??? lowest surrogate U+DFFF 0xedbfbf ??? highest surrogate U+E000 0xee8080  lowest private use U+F8FF 0xefa3bf  highest private use U+F900 0xefa480 豈豈 lowest post-private + U+FEFF 0xefbbbf  byte-order mark + U+FFFC 0xefbfbc  object replacement character + U+FFFD 0xefbfbd �� replacement character + U+FFFE 0xefbfbe ￾￾ reversed byte-order mark U+FFFF 0xefbfbf ￿￿ highest three-byte FFoouurr--bbyyttee rraannggee @@ -60,4 +65,4 @@ DDEESSCCRRIIPPTTIIOONN U+1FFFFF 0xf7bfbfbf ???? highest invalid four-byte U+200000 0xf888808080 ????? lowest five-byte -OpenBSD June 2, 2021 CHAR-UNICODE-INPUT(1) +OpenBSD May 16, 2024 CHAR-UNICODE-INPUT(1) -- 2.20.1