From 94bd6c5c92ad6988a9a73b8683243ec77f2d3296 Mon Sep 17 00:00:00 2001 From: schwarze Date: Thu, 16 May 2024 18:49:00 +0000 Subject: [PATCH] Add a complete suite of tests covering ASCII characters in UTF-8 spelling. Most of this goes to nogroff.in rather than ascii.in for now because groff-1.23.0 is buggy as hell in this respect and i'm too lazy to debug the horrific function make_glyph_node() in the file groff/src/roff/troff/node.cpp right now. --- regress/usr.bin/mandoc/char/unicode/ascii.in | 5 ++-- .../mandoc/char/unicode/ascii.out_ascii | 3 ++- .../mandoc/char/unicode/ascii.out_html | 1 + .../mandoc/char/unicode/ascii.out_utf8 | 3 ++- .../usr.bin/mandoc/char/unicode/nogroff.in | 25 +++++++++++++++++-- .../mandoc/char/unicode/nogroff.out_ascii | 23 ++++++++++++++++- .../mandoc/char/unicode/nogroff.out_html | 21 ++++++++++++++++ .../mandoc/char/unicode/nogroff.out_utf8 | 23 ++++++++++++++++- 8 files changed, 96 insertions(+), 8 deletions(-) diff --git a/regress/usr.bin/mandoc/char/unicode/ascii.in b/regress/usr.bin/mandoc/char/unicode/ascii.in index b8e561a8a5b..cbefe4eee99 100644 --- a/regress/usr.bin/mandoc/char/unicode/ascii.in +++ b/regress/usr.bin/mandoc/char/unicode/ascii.in @@ -1,10 +1,11 @@ -.\" $OpenBSD: ascii.in,v 1.4 2017/07/04 14:53:23 schwarze Exp $ -.TH CHAR-UNICODE-ASCII 1 "October 27, 2014" +.\" $OpenBSD: ascii.in,v 1.5 2024/05/16 18:49:00 schwarze Exp $ +.TH CHAR-UNICODE-ASCII 1 "May 16, 2024" .SH NAME char-unicode-ascii \- Unicode characters in the ASCII range .SH DESCRIPTION .nf BEGINTEST +\[u0020]\N'32' SPACE \[u0022]\N'34'\(dq QUOTATION MARK \[u0023]\N'35'\(sh NUMBER SIGN \[u0024]\N'36'\(Do DOLLAR SIGN diff --git a/regress/usr.bin/mandoc/char/unicode/ascii.out_ascii b/regress/usr.bin/mandoc/char/unicode/ascii.out_ascii index 8a631354365..2540bca2a44 100644 --- a/regress/usr.bin/mandoc/char/unicode/ascii.out_ascii +++ b/regress/usr.bin/mandoc/char/unicode/ascii.out_ascii @@ -5,6 +5,7 @@ NNAAMMEE DDEESSCCRRIIPPTTIIOONN BEGINTEST + SPACE """ QUOTATION MARK ### NUMBER SIGN $$$ DOLLAR SIGN @@ -27,4 +28,4 @@ DDEESSCCRRIIPPTTIIOONN ~~~~ TILDE ENDTEST -OpenBSD October 27, 2014 CHAR-UNICODE-ASCII(1) +OpenBSD May 16, 2024 CHAR-UNICODE-ASCII(1) diff --git a/regress/usr.bin/mandoc/char/unicode/ascii.out_html b/regress/usr.bin/mandoc/char/unicode/ascii.out_html index 264fa356d1a..441362abf66 100644 --- a/regress/usr.bin/mandoc/char/unicode/ascii.out_html +++ b/regress/usr.bin/mandoc/char/unicode/ascii.out_html @@ -1,3 +1,4 @@ + SPACE """ QUOTATION MARK ### NUMBER SIGN $$$ DOLLAR SIGN diff --git a/regress/usr.bin/mandoc/char/unicode/ascii.out_utf8 b/regress/usr.bin/mandoc/char/unicode/ascii.out_utf8 index 8a631354365..2540bca2a44 100644 --- a/regress/usr.bin/mandoc/char/unicode/ascii.out_utf8 +++ b/regress/usr.bin/mandoc/char/unicode/ascii.out_utf8 @@ -5,6 +5,7 @@ NNAAMMEE DDEESSCCRRIIPPTTIIOONN BEGINTEST + SPACE """ QUOTATION MARK ### NUMBER SIGN $$$ DOLLAR SIGN @@ -27,4 +28,4 @@ DDEESSCCRRIIPPTTIIOONN ~~~~ TILDE ENDTEST -OpenBSD October 27, 2014 CHAR-UNICODE-ASCII(1) +OpenBSD May 16, 2024 CHAR-UNICODE-ASCII(1) diff --git a/regress/usr.bin/mandoc/char/unicode/nogroff.in b/regress/usr.bin/mandoc/char/unicode/nogroff.in index 70853b5d486..cfeba6f4abb 100644 --- a/regress/usr.bin/mandoc/char/unicode/nogroff.in +++ b/regress/usr.bin/mandoc/char/unicode/nogroff.in @@ -1,5 +1,5 @@ -.\" $OpenBSD: nogroff.in,v 1.7 2024/05/15 19:37:24 schwarze Exp $ -.TH CHAR-UNICODE-NOGROFF 1 "June 2, 2021" +.\" $OpenBSD: nogroff.in,v 1.8 2024/05/16 18:49:00 schwarze Exp $ +.TH CHAR-UNICODE-NOGROFF 1 "May 16, 2024" .SH NAME char-unicode-nogroff \- characters handled differently by groff .SH DESCRIPTION @@ -37,6 +37,27 @@ BEGINTEST \[u001D]\N'29' INFORMATION SEPARATOR THREE \[u001E]\N'30' INFORMATION SEPARATOR TWO \[u001F]\N'31' INFORMATION SEPARATOR INE +\[u0021]\N'33' EXCLAMATION MARK +\[u0025]\N'37' PERCENT SIGN +\[u0026]\N'38' AMPERSAND +\[u0028]\N'40' LEFT PARENTHESIS +\[u0029]\N'41' RIGHT PARENTHESIS +\[u002A]\N'42' ASTERISK +\[u002C]\N'44' COMMA +\[u002D]\N'45' HYPHEN-MINUS +\[u002E]\N'46' FULL STOP +\[u0030]\N'48' DIGIT ZERO +\[u0031]\N'49' DIGIT ONE +\[u0039]\N'57' DIGIT NINE +\[u003A]\N'58' COLON +\[u003B]\N'59' SEMICOLON +\[u003C]\N'60' LESS-THAN SIGN +\[u003E]\N'62' GREATER-THAN SIGN +\[u003F]\N'63' QUESTION MARK +\[u0041]\N'65' LATIN CAPITAL LETTER A +\[u005A]\N'90' LATIN CAPITAL LETTER Z +\[u0061]\N'97' LATIN SMALL LETTER A +\[u007A]\N'122' LATIN SMALL LETTER Z \[u007F]\N'127' DELETE \[u0080]\N'128' 0x80 \[u0081]\N'129' 0x81 diff --git a/regress/usr.bin/mandoc/char/unicode/nogroff.out_ascii b/regress/usr.bin/mandoc/char/unicode/nogroff.out_ascii index 8bd4d374de8..3c068797b97 100644 --- a/regress/usr.bin/mandoc/char/unicode/nogroff.out_ascii +++ b/regress/usr.bin/mandoc/char/unicode/nogroff.out_ascii @@ -37,6 +37,27 @@ DDEESSCCRRIIPPTTIIOONN INFORMATION SEPARATOR THREE INFORMATION SEPARATOR TWO INFORMATION SEPARATOR INE + !! EXCLAMATION MARK + %% PERCENT SIGN + && AMPERSAND + (( LEFT PARENTHESIS + )) RIGHT PARENTHESIS + ** ASTERISK + ,, COMMA + -- HYPHEN-MINUS + .. FULL STOP + 00 DIGIT ZERO + 11 DIGIT ONE + 99 DIGIT NINE + :: COLON + ;; SEMICOLON + << LESS-THAN SIGN + >> GREATER-THAN SIGN + ?? QUESTION MARK + AA LATIN CAPITAL LETTER A + ZZ LATIN CAPITAL LETTER Z + aa LATIN SMALL LETTER A + zz LATIN SMALL LETTER Z DELETE <80><80> 0x80 <81><81> 0x81 @@ -98,4 +119,4 @@ DDEESSCCRRIIPPTTIIOONN ENDTEST -OpenBSD June 2, 2021 CHAR-UNICODE-NOGROFF(1) +OpenBSD May 16, 2024 CHAR-UNICODE-NOGROFF(1) diff --git a/regress/usr.bin/mandoc/char/unicode/nogroff.out_html b/regress/usr.bin/mandoc/char/unicode/nogroff.out_html index 5c48e9d7eaf..8f5710988c2 100644 --- a/regress/usr.bin/mandoc/char/unicode/nogroff.out_html +++ b/regress/usr.bin/mandoc/char/unicode/nogroff.out_html @@ -30,6 +30,27 @@ �� <control> INFORMATION SEPARATOR THREE �� <control> INFORMATION SEPARATOR TWO �� <control> INFORMATION SEPARATOR INE +!! EXCLAMATION MARK +%% PERCENT SIGN +&& AMPERSAND +(( LEFT PARENTHESIS +)) RIGHT PARENTHESIS +** ASTERISK +,, COMMA +-- HYPHEN-MINUS +.. FULL STOP +00 DIGIT ZERO +11 DIGIT ONE +99 DIGIT NINE +:: COLON +;; SEMICOLON +<< LESS-THAN SIGN +>> GREATER-THAN SIGN +?? QUESTION MARK +AA LATIN CAPITAL LETTER A +ZZ LATIN CAPITAL LETTER Z +aa LATIN SMALL LETTER A +zz LATIN SMALL LETTER Z �� <control> DELETE �� <control> 0x80 �� <control> 0x81 diff --git a/regress/usr.bin/mandoc/char/unicode/nogroff.out_utf8 b/regress/usr.bin/mandoc/char/unicode/nogroff.out_utf8 index bad1c23817a..429c53f2ac7 100644 --- a/regress/usr.bin/mandoc/char/unicode/nogroff.out_utf8 +++ b/regress/usr.bin/mandoc/char/unicode/nogroff.out_utf8 @@ -37,6 +37,27 @@ DDEESSCCRRIIPPTTIIOONN �� INFORMATION SEPARATOR THREE �� INFORMATION SEPARATOR TWO �� INFORMATION SEPARATOR INE + !! EXCLAMATION MARK + %% PERCENT SIGN + && AMPERSAND + (( LEFT PARENTHESIS + )) RIGHT PARENTHESIS + ** ASTERISK + ,, COMMA + -- HYPHEN-MINUS + .. FULL STOP + 00 DIGIT ZERO + 11 DIGIT ONE + 99 DIGIT NINE + :: COLON + ;; SEMICOLON + << LESS-THAN SIGN + >> GREATER-THAN SIGN + ?? QUESTION MARK + AA LATIN CAPITAL LETTER A + ZZ LATIN CAPITAL LETTER Z + aa LATIN SMALL LETTER A + zz LATIN SMALL LETTER Z �� DELETE �� 0x80 �� 0x81 @@ -98,4 +119,4 @@ DDEESSCCRRIIPPTTIIOONN 􏿿 ENDTEST -OpenBSD June 2, 2021 CHAR-UNICODE-NOGROFF(1) +OpenBSD May 16, 2024 CHAR-UNICODE-NOGROFF(1) -- 2.20.1