Improve consistency of the substitution command further.
authorschwarze <schwarze@openbsd.org>
Tue, 14 Aug 2018 18:10:09 +0000 (18:10 +0000)
committerschwarze <schwarze@openbsd.org>
Tue, 14 Aug 2018 18:10:09 +0000 (18:10 +0000)
When the opening square bracket ('[') is abused as the delimiter, the regular
expression contains a bracket expression, and the bracket expression contains
another opening square bracket (sick! - i mean, sic!), then do not require
escaping that innermost bracket and treat a preceding backslash as a literal
backslash character, in accordance with POSIX:

   $ printf 'x[x\\x\n' | sed 's[\[[][R[g'
  xRx\x
   $ printf 'x[x\\x\n' | sed 's[\[\[][R[g'
  xRxRx

While here, also make the implementation more readable and insert
some comments.

Joint work with martijn@ (started during g2k18) and OK martijn@.

usr.bin/sed/compile.c

index f15dd9f..64e31cd 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: compile.c,v 1.48 2018/07/09 09:43:54 schwarze Exp $   */
+/*     $OpenBSD: compile.c,v 1.49 2018/08/14 18:10:09 schwarze Exp $   */
 
 /*-
  * Copyright (c) 1992 Diomidis Spinellis.
@@ -362,28 +362,40 @@ compile_delimited(char *p, char *d)
                error(COMPILE, "\\ can not be used as a string delimiter");
        else if (c == '\n')
                error(COMPILE, "newline can not be used as a string delimiter");
-       while (*p) {
-               if (*p == '[' && *p != c) {
-                       if ((d = compile_ccl(&p, d)) == NULL)
-                               error(COMPILE, "unbalanced brackets ([])");
-                       continue;
-               } else if (*p == '\\' && p[1] == c) {
-                       p++;
-               } else if (*p == '\\' && p[1] == '[') {
-                       *d++ = *p++;
-               } else if (*p == '\\' && p[1] == 'n') {
-                       *d++ = '\n';
-                       p += 2;
-                       continue;
-               } else if (*p == '\\' && p[1] == '\\') {
-                       *d++ = *p++;
-               } else if (*p == c) {
+
+       while (p[0]) {
+               /* Unescaped delimiter: We are done. */
+               if (p[0] == c) {
                        *d = '\0';
-                       return (p + 1);
+                       return p + 1;
                }
-               *d++ = *p++;
+               if (p[0] == '\\') {
+                       /* Escaped delimiter: Skip the backslash. */
+                       if (p[1] == c) {
+                               p++;
+                       } else {
+                               /* Backslash-n: Match linefeed. */
+                               if (p[1] == 'n') {
+                                       *d++ = '\n';
+                                       p += 2;
+                               /* Other escapes remain unchanged. */
+                               } else {
+                                       *d++ = *p++;
+                                       *d++ = *p++;
+                               }
+                               continue;
+                       }
+               }
+               if (p[0] != '[')
+                       *d++ = *p++;
+               /*
+                * Bracket expression:
+                * It may contain the delimiter without escaping.
+                */
+               else if ((d = compile_ccl(&p, d)) == NULL)
+                       error(COMPILE, "unbalanced brackets ([])");
        }
-       return (NULL);
+       return NULL;
 }