Add a complete set of tests for all C0 and C1 control characters.
authorschwarze <schwarze@openbsd.org>
Wed, 15 May 2024 19:37:24 +0000 (19:37 +0000)
committerschwarze <schwarze@openbsd.org>
Wed, 15 May 2024 19:37:24 +0000 (19:37 +0000)
This is particularly useful to make sure that man(1) does not let
control characters sneak through to the output side, no matter how
hard an input file tries to deceive it.

regress/usr.bin/mandoc/char/unicode/nogroff.in
regress/usr.bin/mandoc/char/unicode/nogroff.out_ascii
regress/usr.bin/mandoc/char/unicode/nogroff.out_html
regress/usr.bin/mandoc/char/unicode/nogroff.out_utf8

index 9e2b595..70853b5 100644 (file)
@@ -1,4 +1,4 @@
-.\" $OpenBSD: nogroff.in,v 1.6 2021/06/02 17:36:59 schwarze Exp $
+.\" $OpenBSD: nogroff.in,v 1.7 2024/05/15 19:37:24 schwarze Exp $
 .TH CHAR-UNICODE-NOGROFF 1 "June 2, 2021"
 .SH NAME
 char-unicode-nogroff \- characters handled differently by groff
@@ -7,6 +7,11 @@ char-unicode-nogroff \- characters handled differently by groff
 BEGINTEST
 \[u0000]\N'0'  <control> NULL
 \[u0001]\N'1'  <control> START OF HEADING
+\[u0002]\N'2'  <control> START OF TEXT
+\[u0003]\N'3'  <control> END OF TEXT
+\[u0004]\N'4'  <control> END OF TRANSMISSION
+\[u0005]\N'5'  <control> ENQUIRY
+\[u0006]\N'6'  <control> ACKNOWLEDGE
 \[u0007]\N'7'  <control> BELL
 \[u0008]\N'8'  <control> BACKSPACE
 \[u0009]\N'9'  <control> CHARACTER TABULATION
@@ -14,12 +19,55 @@ BEGINTEST
 \[u000B]\N'11' <control> LINE TABULATION
 \[u000C]\N'12' <control> FORM FEED
 \[u000D]\N'13' <control> CARRIAGE RETURN
+\[u000E]\N'14' <control> SHIFT OUT
+\[u000F]\N'15' <control> SHIFT IN
+\[u0010]\N'16' <control> DATA LINK ESCAPE
+\[u0011]\N'17' <control> DEVICE CONTROL ONE
+\[u0012]\N'18' <control> DEVICE CONTROL TWO
+\[u0013]\N'19' <control> DEVICE CONTROL THREE
+\[u0014]\N'20' <control> DEVICE CONTROL FOUR
+\[u0015]\N'21' <control> NEGATIVE ACKNOWLEDGE
+\[u0016]\N'22' <control> SYNCHRONOUS IDLE
+\[u0017]\N'23' <control> END OF TRANSMISSION BLOCK
+\[u0018]\N'24' <control> CANCEL
+\[u0019]\N'25' <control> END OF MEDIUM
+\[u001A]\N'26' <control> SUBSTITUTE
 \[u001B]\N'27' <control> ESCAPE
+\[u001C]\N'28' <control> INFORMATION SEPARATOR FOUR
+\[u001D]\N'29' <control> INFORMATION SEPARATOR THREE
+\[u001E]\N'30' <control> INFORMATION SEPARATOR TWO
+\[u001F]\N'31' <control> INFORMATION SEPARATOR INE
 \[u007F]\N'127'        <control> DELETE
 \[u0080]\N'128'        <control> 0x80
 \[u0081]\N'129'        <control> 0x81
 \[u0082]\N'130'        <control> BREAK PERMITTED HERE
 \[u0083]\N'131'        <control> NO BREAK HERE
+\[u0084]\N'132'        <control> 0x84
+\[u0085]\N'133'        <control> NEXT LINE (NEL)
+\[u0086]\N'134'        <control> START OF SELECTED AREA
+\[u0087]\N'135'        <control> END OF SELECTED AREA
+\[u0088]\N'136'        <control> CHARACTER TABULATION SET
+\[u0089]\N'137'        <control> CHARACTER TABULATION WITH JUSTIFICATION
+\[u008A]\N'138'        <control> LINE TABULATION SET
+\[u008B]\N'139'        <control> PARTIAL LINE FORWARD
+\[u008C]\N'140'        <control> PARTIAL LINE BACKWARD
+\[u008D]\N'141'        <control> REVERSE LINE FEED
+\[u008E]\N'142'        <control> SINGLE SHIFT TWO
+\[u008F]\N'143'        <control> SINGLE SHIFT THREE
+\[u0090]\N'144'        <control> DEVICE CONTROL STRING
+\[u0091]\N'145'        <control> PRIVATE USE ONE
+\[u0092]\N'146'        <control> PRIVATE USE TWO
+\[u0093]\N'147'        <control> SET TRANSMIT STATE
+\[u0094]\N'148'        <control> CANCEL CHARACTER
+\[u0095]\N'149'        <control> MESSAGE WAITING
+\[u0096]\N'150'        <control> START OF GUARDED AREA
+\[u0097]\N'151'        <control> END OF GUARDED AREA
+\[u0098]\N'152'        <control> START OF STRING
+\[u0099]\N'153'        <control> 0x99
+\[u009A]\N'154'        <control> SINGLE CHARACTER INTRODUCER
+\[u009B]\N'155'        <control> CONTROL SEQUENCE INTRODUCER
+\[u009C]\N'156'        <control> STRING TERMINATOR
+\[u009D]\N'157'        <control> OPERATING SYSTEM COMMAND
 \[u009E]\N'158'        <control> PRIVACY MESSAGE
 \[u009F]\N'159'        <control> APPLICATION PROGRAM COMMAND
 \[u226A]\(<<   MUCH LESS-THAN
index d9d53ce..8bd4d37 100644 (file)
@@ -7,6 +7,11 @@ D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
      BEGINTEST
      <NUL><NUL>     <control> NULL
      <SOH><SOH>     <control> START OF HEADING
+     <STX><STX>     <control> START OF TEXT
+     <ETX><ETX>     <control> END OF TEXT
+     <EOT><EOT>     <control> END OF TRANSMISSION
+     <ENQ><ENQ>     <control> ENQUIRY
+     <ACK><ACK>     <control> ACKNOWLEDGE
      <BEL><BEL>     <control> BELL
      <BS><BS>  <control> BACKSPACE
                     <control> CHARACTER TABULATION
@@ -14,12 +19,55 @@ D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
      <VT><VT>  <control> LINE TABULATION
      <FF><FF>  <control> FORM FEED
      <CR><CR>  <control> CARRIAGE RETURN
+     <SO><SO>  <control> SHIFT OUT
+     <SI><SI>  <control> SHIFT IN
+     <DLE><DLE>     <control> DATA LINK ESCAPE
+     <DC1><DC1>     <control> DEVICE CONTROL ONE
+     <DC2><DC2>     <control> DEVICE CONTROL TWO
+     <DC3><DC3>     <control> DEVICE CONTROL THREE
+     <DC4><DC4>     <control> DEVICE CONTROL FOUR
+     <NAK><NAK>     <control> NEGATIVE ACKNOWLEDGE
+     <SYN><SYN>     <control> SYNCHRONOUS IDLE
+     <ETB><ETB>     <control> END OF TRANSMISSION BLOCK
+     <CAN><CAN>     <control> CANCEL
+     <EM><EM>  <control> END OF MEDIUM
+     <SUB><SUB>     <control> SUBSTITUTE
      <ESC><ESC>     <control> ESCAPE
+     <FS><FS>  <control> INFORMATION SEPARATOR FOUR
+     <GS><GS>  <control> INFORMATION SEPARATOR THREE
+     <RS><RS>  <control> INFORMATION SEPARATOR TWO
+     <US><US>  <control> INFORMATION SEPARATOR INE
      <DEL><DEL>     <control> DELETE
      <80><80>  <control> 0x80
      <81><81>  <control> 0x81
      <82><82>  <control> BREAK PERMITTED HERE
      <83><83>  <control> NO BREAK HERE
+     <84><84>  <control> 0x84
+     <85><85>  <control> NEXT LINE (NEL)
+     <86><86>  <control> START OF SELECTED AREA
+     <87><87>  <control> END OF SELECTED AREA
+     <88><88>  <control> CHARACTER TABULATION SET
+     <89><89>  <control> CHARACTER TABULATION WITH JUSTIFICATION
+     <8A><8A>  <control> LINE TABULATION SET
+     <8B><8B>  <control> PARTIAL LINE FORWARD
+     <8C><8C>  <control> PARTIAL LINE BACKWARD
+     <8D><8D>  <control> REVERSE LINE FEED
+     <8E><8E>  <control> SINGLE SHIFT TWO
+     <8F><8F>  <control> SINGLE SHIFT THREE
+     <90><90>  <control> DEVICE CONTROL STRING
+     <91><91>  <control> PRIVATE USE ONE
+     <92><92>  <control> PRIVATE USE TWO
+     <93><93>  <control> SET TRANSMIT STATE
+     <94><94>  <control> CANCEL CHARACTER
+     <95><95>  <control> MESSAGE WAITING
+     <96><96>  <control> START OF GUARDED AREA
+     <97><97>  <control> END OF GUARDED AREA
+     <98><98>  <control> START OF STRING
+     <99><99>  <control> 0x99
+     <9A><9A>  <control> SINGLE CHARACTER INTRODUCER
+     <9B><9B>  <control> CONTROL SEQUENCE INTRODUCER
+     <9C><9C>  <control> STRING TERMINATOR
+     <9D><9D>  <control> OPERATING SYSTEM COMMAND
      <9E><9E>  <control> PRIVACY MESSAGE
      <9F><9F>  <control> APPLICATION PROGRAM COMMAND
      <<<< MUCH LESS-THAN
index 7863965..5c48e9d 100644 (file)
@@ -1,5 +1,10 @@
 &#xFFFD;&#xFFFD;       &lt;control&gt; NULL
 &#xFFFD;&#xFFFD;       &lt;control&gt; START OF HEADING
+&#xFFFD;&#xFFFD;       &lt;control&gt; START OF TEXT
+&#xFFFD;&#xFFFD;       &lt;control&gt; END OF TEXT
+&#xFFFD;&#xFFFD;       &lt;control&gt; END OF TRANSMISSION
+&#xFFFD;&#xFFFD;       &lt;control&gt; ENQUIRY
+&#xFFFD;&#xFFFD;       &lt;control&gt; ACKNOWLEDGE
 &#xFFFD;&#xFFFD;       &lt;control&gt; BELL
 &#xFFFD;&#xFFFD;       &lt;control&gt; BACKSPACE
                        &lt;control&gt; CHARACTER TABULATION
 &#xFFFD;&#xFFFD;       &lt;control&gt; LINE TABULATION
 &#xFFFD;&#xFFFD;       &lt;control&gt; FORM FEED
 &#xFFFD;&#xFFFD;       &lt;control&gt; CARRIAGE RETURN
+&#xFFFD;&#xFFFD;       &lt;control&gt; SHIFT OUT
+&#xFFFD;&#xFFFD;       &lt;control&gt; SHIFT IN
+&#xFFFD;&#xFFFD;       &lt;control&gt; DATA LINK ESCAPE
+&#xFFFD;&#xFFFD;       &lt;control&gt; DEVICE CONTROL ONE
+&#xFFFD;&#xFFFD;       &lt;control&gt; DEVICE CONTROL TWO
+&#xFFFD;&#xFFFD;       &lt;control&gt; DEVICE CONTROL THREE
+&#xFFFD;&#xFFFD;       &lt;control&gt; DEVICE CONTROL FOUR
+&#xFFFD;&#xFFFD;       &lt;control&gt; NEGATIVE ACKNOWLEDGE
+&#xFFFD;&#xFFFD;       &lt;control&gt; SYNCHRONOUS IDLE
+&#xFFFD;&#xFFFD;       &lt;control&gt; END OF TRANSMISSION BLOCK
+&#xFFFD;&#xFFFD;       &lt;control&gt; CANCEL
+&#xFFFD;&#xFFFD;       &lt;control&gt; END OF MEDIUM
+&#xFFFD;&#xFFFD;       &lt;control&gt; SUBSTITUTE
 &#xFFFD;&#xFFFD;       &lt;control&gt; ESCAPE
+&#xFFFD;&#xFFFD;       &lt;control&gt; INFORMATION SEPARATOR FOUR
+&#xFFFD;&#xFFFD;       &lt;control&gt; INFORMATION SEPARATOR THREE
+&#xFFFD;&#xFFFD;       &lt;control&gt; INFORMATION SEPARATOR TWO
+&#xFFFD;&#xFFFD;       &lt;control&gt; INFORMATION SEPARATOR INE
 &#xFFFD;&#xFFFD;       &lt;control&gt; DELETE
 &#xFFFD;&#xFFFD;       &lt;control&gt; 0x80
 &#xFFFD;&#xFFFD;       &lt;control&gt; 0x81
 &#xFFFD;&#xFFFD;       &lt;control&gt; BREAK PERMITTED HERE
 &#xFFFD;&#xFFFD;       &lt;control&gt; NO BREAK HERE
+&#xFFFD;&#xFFFD;       &lt;control&gt; 0x84
+&#xFFFD;&#xFFFD;       &lt;control&gt; NEXT LINE (NEL)
+&#xFFFD;&#xFFFD;       &lt;control&gt; START OF SELECTED AREA
+&#xFFFD;&#xFFFD;       &lt;control&gt; END OF SELECTED AREA
+&#xFFFD;&#xFFFD;       &lt;control&gt; CHARACTER TABULATION SET
+&#xFFFD;&#xFFFD;       &lt;control&gt; CHARACTER TABULATION WITH JUSTIFICATION
+&#xFFFD;&#xFFFD;       &lt;control&gt; LINE TABULATION SET
+&#xFFFD;&#xFFFD;       &lt;control&gt; PARTIAL LINE FORWARD
+&#xFFFD;&#xFFFD;       &lt;control&gt; PARTIAL LINE BACKWARD
+&#xFFFD;&#xFFFD;       &lt;control&gt; REVERSE LINE FEED
+&#xFFFD;&#xFFFD;       &lt;control&gt; SINGLE SHIFT TWO
+&#xFFFD;&#xFFFD;       &lt;control&gt; SINGLE SHIFT THREE
+&#xFFFD;&#xFFFD;       &lt;control&gt; DEVICE CONTROL STRING
+&#xFFFD;&#xFFFD;       &lt;control&gt; PRIVATE USE ONE
+&#xFFFD;&#xFFFD;       &lt;control&gt; PRIVATE USE TWO
+&#xFFFD;&#xFFFD;       &lt;control&gt; SET TRANSMIT STATE
+&#xFFFD;&#xFFFD;       &lt;control&gt; CANCEL CHARACTER
+&#xFFFD;&#xFFFD;       &lt;control&gt; MESSAGE WAITING
+&#xFFFD;&#xFFFD;       &lt;control&gt; START OF GUARDED AREA
+&#xFFFD;&#xFFFD;       &lt;control&gt; END OF GUARDED AREA
+&#xFFFD;&#xFFFD;       &lt;control&gt; START OF STRING
+&#xFFFD;&#xFFFD;       &lt;control&gt; 0x99
+&#xFFFD;&#xFFFD;       &lt;control&gt; SINGLE CHARACTER INTRODUCER
+&#xFFFD;&#xFFFD;       &lt;control&gt; CONTROL SEQUENCE INTRODUCER
+&#xFFFD;&#xFFFD;       &lt;control&gt; STRING TERMINATOR
+&#xFFFD;&#xFFFD;       &lt;control&gt; OPERATING SYSTEM COMMAND
 &#xFFFD;&#xFFFD;       &lt;control&gt; PRIVACY MESSAGE
 &#xFFFD;&#xFFFD;       &lt;control&gt; APPLICATION PROGRAM COMMAND
 &#x226A;&#x226A;       MUCH LESS-THAN
index 98534ec..bad1c23 100644 (file)
@@ -7,6 +7,11 @@ D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
      BEGINTEST
      ��   <control> NULL
      ��   <control> START OF HEADING
+     ��   <control> START OF TEXT
+     ��   <control> END OF TEXT
+     ��   <control> END OF TRANSMISSION
+     ��   <control> ENQUIRY
+     ��   <control> ACKNOWLEDGE
      ��   <control> BELL
      ��   <control> BACKSPACE
                     <control> CHARACTER TABULATION
@@ -14,12 +19,55 @@ D\bDE\bES\bSC\bCR\bRI\bIP\bPT\bTI\bIO\bON\bN
      ��   <control> LINE TABULATION
      ��   <control> FORM FEED
      ��   <control> CARRIAGE RETURN
+     ��   <control> SHIFT OUT
+     ��   <control> SHIFT IN
+     ��   <control> DATA LINK ESCAPE
+     ��   <control> DEVICE CONTROL ONE
+     ��   <control> DEVICE CONTROL TWO
+     ��   <control> DEVICE CONTROL THREE
+     ��   <control> DEVICE CONTROL FOUR
+     ��   <control> NEGATIVE ACKNOWLEDGE
+     ��   <control> SYNCHRONOUS IDLE
+     ��   <control> END OF TRANSMISSION BLOCK
+     ��   <control> CANCEL
+     ��   <control> END OF MEDIUM
+     ��   <control> SUBSTITUTE
      ��   <control> ESCAPE
+     ��   <control> INFORMATION SEPARATOR FOUR
+     ��   <control> INFORMATION SEPARATOR THREE
+     ��   <control> INFORMATION SEPARATOR TWO
+     ��   <control> INFORMATION SEPARATOR INE
      ��   <control> DELETE
      ��   <control> 0x80
      ��   <control> 0x81
      ��   <control> BREAK PERMITTED HERE
      ��   <control> NO BREAK HERE
+     ��   <control> 0x84
+     ��   <control> NEXT LINE (NEL)
+     ��   <control> START OF SELECTED AREA
+     ��   <control> END OF SELECTED AREA
+     ��   <control> CHARACTER TABULATION SET
+     ��   <control> CHARACTER TABULATION WITH JUSTIFICATION
+     ��   <control> LINE TABULATION SET
+     ��   <control> PARTIAL LINE FORWARD
+     ��   <control> PARTIAL LINE BACKWARD
+     ��   <control> REVERSE LINE FEED
+     ��   <control> SINGLE SHIFT TWO
+     ��   <control> SINGLE SHIFT THREE
+     ��   <control> DEVICE CONTROL STRING
+     ��   <control> PRIVATE USE ONE
+     ��   <control> PRIVATE USE TWO
+     ��   <control> SET TRANSMIT STATE
+     ��   <control> CANCEL CHARACTER
+     ��   <control> MESSAGE WAITING
+     ��   <control> START OF GUARDED AREA
+     ��   <control> END OF GUARDED AREA
+     ��   <control> START OF STRING
+     ��   <control> 0x99
+     ��   <control> SINGLE CHARACTER INTRODUCER
+     ��   <control> CONTROL SEQUENCE INTRODUCER
+     ��   <control> STRING TERMINATOR
+     ��   <control> OPERATING SYSTEM COMMAND
      ��   <control> PRIVACY MESSAGE
      ��   <control> APPLICATION PROGRAM COMMAND
      ≪≪   MUCH LESS-THAN