Add support for pattern substitution to variables in ksh using a common syntax
authoranton <anton@openbsd.org>
Tue, 4 Jul 2017 07:29:32 +0000 (07:29 +0000)
committeranton <anton@openbsd.org>
Tue, 4 Jul 2017 07:29:32 +0000 (07:29 +0000)
borrowed from ksh93.

Survived a ports build performed by naddy@ and encouraged by many.

bin/ksh/eval.c
bin/ksh/ksh.1
bin/ksh/lex.c
bin/ksh/misc.c
bin/ksh/sh.h
regress/bin/ksh/strsub.t [new file with mode: 0644]

index 39f9e7c..a73906b 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: eval.c,v 1.51 2017/05/01 19:05:49 millert Exp $       */
+/*     $OpenBSD: eval.c,v 1.52 2017/07/04 07:29:32 anton Exp $ */
 
 /*
  * Expansion - quoting, separation, substitution, globbing
@@ -50,6 +50,7 @@ typedef struct Expand {
 
 static int     varsub(Expand *, char *, char *, int *, int *);
 static int     comsub(Expand *, char *);
+static char   *strsub(char *, char *, int);
 static char   *trimsub(char *, char *, int);
 static void    glob(char *, XPtrV *, int);
 static void    globit(XString *, char **, char *, XPtrV *, int);
@@ -143,6 +144,7 @@ typedef struct SubType {
        short   f;              /* saved value of f (DOPAT, etc) */
        struct tbl *var;        /* variable for ${var..} */
        short   quote;          /* saved value of quote (for ${..[%#]..}) */
+       int     strsub;         /* set to 1 if pat in /pat/rep has been ended */
        struct SubType *prev;   /* old type */
        struct SubType *next;   /* poped type (to avoid re-allocating) */
 } SubType;
@@ -209,6 +211,21 @@ expand(char *cp,   /* input word */
                                break;
                        case CHAR:
                                c = *sp++;
+                               if (st->strsub == 0 &&
+                                   (st->stype & 0x7f) == '/' && c == '/') {
+                                       st->strsub = 1;
+                                       /* Write end of pattern. */
+                                       *dp++ = MAGIC;
+                                       *dp++ = ')';
+                                       *dp++ = '\0';
+                                       /*
+                                        * Reset quote and flags for the
+                                        * upcoming replacement.
+                                        */
+                                       quote = 0;
+                                       f = 0;
+                                       continue;
+                               }
                                break;
                        case QCHAR:
                                quote |= 2; /* temporary quote */
@@ -317,6 +334,7 @@ expand(char *cp,    /* input word */
                                        switch (stype & 0x7f) {
                                        case '#':
                                        case '%':
+                                       case '/':
                                                /* ! DOBLANK,DOBRACE_,DOTILDE */
                                                f = DOPAT | (f&DONTRUNCOMMAND) |
                                                    DOTEMP_;
@@ -378,13 +396,20 @@ expand(char *cp,  /* input word */
                                case '%':
                                        /* Append end-pattern */
                                        *dp++ = MAGIC; *dp++ = ')'; *dp = '\0';
+                                       /* FALLTHROUGH */
+                               case '/':
                                        dp = Xrestpos(ds, dp, st->base);
                                        /* Must use st->var since calling
                                         * global would break things
                                         * like x[i+=1].
                                         */
-                                       x.str = trimsub(str_val(st->var),
-                                               dp, st->stype);
+                                       if ((st->stype & 0x7f) == '/')
+                                               x.str = strsub(str_val(st->var),
+                                                   dp, st->stype);
+                                       else
+                                               x.str = trimsub(
+                                                   str_val(st->var),
+                                                   dp, st->stype);
                                        if (x.str[0] != '\0' || st->quote)
                                                type = XSUB;
                                        else
@@ -753,6 +778,9 @@ varsub(Expand *xp, char *sp, char *word,
                stype = 0x80;
                c = word[slen + 0] == CHAR ? word[slen + 1] : 0;
        }
+       if (c == '/' && Flag(FPOSIX))
+               return -1;
+
        if (ctype(c, C_SUBOP1)) {
                slen += 2;
                stype |= c;
@@ -894,6 +922,114 @@ comsub(Expand *xp, char *cp)
        return XCOM;
 }
 
+static char *
+strsub(char *str, char *pat, int how)
+{
+       char    *actpat, *dst, *prepat, *rep, *src;
+       size_t   beg, dstlen, dstsiz, end, match, len, patlen, replen;
+
+       len = strlen(str);
+       if (len == 0)
+               return str;
+       src = str;
+
+       dstlen = 0;
+       dstsiz = len + 1;       /* NUL */
+       dst = alloc(dstsiz, ATEMP);
+
+       actpat = pat;
+       patlen = strlen(actpat) + 1;            /* NUL */
+       prepat = alloc(patlen + 2, ATEMP);      /* make room for wildcard */
+       /*
+        * Copy actpat to prepat and add a wildcard after the open pattern
+        * prefix.
+        */
+       memcpy(prepat, actpat, 2);
+       prepat[2] = MAGIC;
+       prepat[3] = '*';
+       memcpy(&prepat[4], &actpat[2], patlen - 2);
+
+       rep = &actpat[patlen];
+       replen = strlen(rep);
+
+       for (;;) {
+               /*
+                * Find the wildcard prefix in prepat followed by actpat.
+                * This allows occurrences of actpat to be found anywhere in the
+                * string.
+                */
+               match = 0;
+               for (end = 1; end <= len; end++)
+                       if (gnmatch(src, end, prepat, 0))
+                               match = end;
+                       else if (match)
+                               break;
+               if (!match)
+                       break;
+               end = match;
+
+               /*
+                * Find the prefix, if any, that was matched by the wildcard in
+                * prepat.
+                */
+               match = 0;
+               for (beg = 0; beg < end; beg++)
+                       if ((match = gnmatch(src + beg, end - beg, actpat, 0)))
+                               break;
+
+               /*
+                * At this point, [src, beg) contains the prefix that is present
+                * before the actual pattern and [beg, end) what was matched by
+                * the actual pattern.
+                * The first range will be copied over to dst and the latter
+                * replaced with rep.
+                */
+               if (match && beg > 0) {
+                       if (beg + dstlen >= dstsiz) {
+                               dst = areallocarray(dst, 1, dstsiz + beg + 1,
+                                   ATEMP);
+                               dstsiz += beg + 1;
+                       }
+                       memcpy(&dst[dstlen], src, beg);
+                       dstlen += beg;
+               }
+
+               if (replen + dstlen >= dstsiz) {
+                       dst = areallocarray(dst, 1, dstsiz + replen + 1, ATEMP);
+                       dstsiz += replen + 1;
+               }
+               memcpy(&dst[dstlen], rep, replen);
+               dstlen += replen;
+
+               src += end;
+               len -= end;
+               if (len == 0 || how == '/')
+                       break;
+       }
+
+       afree(prepat, ATEMP);
+
+       if (str == src) {
+               /* No substitutions performed. */
+               afree(dst, ATEMP);
+
+               return str;
+       }
+
+       /* Copy unmatched suffix from src. */
+       if (len > 0) {
+               if (len + dstlen >= dstsiz) {
+                       dst = areallocarray(dst, 1, dstsiz + len + 1, ATEMP);
+                       dstsiz += len + 1;
+               }
+               memcpy(&dst[dstlen], src, len);
+               dstlen += len;
+       }
+       dst[dstlen] = '\0';
+
+       return dst;
+}
+
 /*
  * perform #pattern and %pattern substitution in ${}
  */
index bf260d1..4902e06 100644 (file)
@@ -1,8 +1,8 @@
-.\"    $OpenBSD: ksh.1,v 1.188 2017/06/20 17:32:20 brynet Exp $
+.\"    $OpenBSD: ksh.1,v 1.189 2017/07/04 07:29:32 anton Exp $
 .\"
 .\"    Public Domain
 .\"
-.Dd $Mdocdate: June 20 2017 $
+.Dd $Mdocdate: July 4 2017 $
 .Dt KSH 1
 .Os
 .Sh NAME
@@ -1240,6 +1240,18 @@ of them result in the longest match.
 .It Pf ${ Ar name Ns % Ns Ar pattern Ns }
 .It Pf ${ Ar name Ns %% Ns Ar pattern Ns }
 Like ${..#..} substitution, but it deletes from the end of the value.
+.Pp
+.It Pf ${ Ns Ar name Ns / Ns Ar pattern Ns / Ns Ar replacement Ns }
+.It Pf ${ Ns Ar name Ns // Ns Ar pattern Ns / Ns Ar replacement Ns }
+The first longest match of
+.Ar pattern
+in the value of parameter
+.Ar name
+is substituted with
+.Ar replacement .
+Using
+.Ql // ,
+all matches are substituted.
 .El
 .Pp
 The following special parameters are implicitly set by the shell and cannot be
index c33a0b9..31af92c 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: lex.c,v 1.69 2016/04/27 12:46:23 naddy Exp $  */
+/*     $OpenBSD: lex.c,v 1.70 2017/07/04 07:29:32 anton Exp $  */
 
 /*
  * lexical analysis and source input
@@ -385,7 +385,7 @@ yylex(int cf)
                                        /* If this is a trim operation,
                                         * treat (,|,) specially in STBRACE.
                                         */
-                                       if (c == '#' || c == '%') {
+                                       if (c == '#' || c == '%' || c == '/') {
                                                ungetsc(c);
                                                PUSH_STATE(STBRACE);
                                        } else {
index 9ef6e64..b137b1c 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: misc.c,v 1.55 2016/03/20 00:01:21 krw Exp $   */
+/*     $OpenBSD: misc.c,v 1.56 2017/07/04 07:29:32 anton Exp $ */
 
 /*
  * Miscellaneous functions
@@ -52,7 +52,7 @@ initctypes(void)
        setctypes("*@#!$-?", C_VAR1);
        setctypes(" \t\n", C_IFSWS);
        setctypes("=-+?", C_SUBOP1);
-       setctypes("#%", C_SUBOP2);
+       setctypes("#%/", C_SUBOP2);
        setctypes(" \n\t\"#$&'()*;<>?[\\`|", C_QUOTE);
 }
 
@@ -518,6 +518,19 @@ gmatch(const char *s, const char *p, int isfile)
            (const unsigned char *) p, (const unsigned char *) pe);
 }
 
+int
+gnmatch(char *s, size_t n, const char *p, int isfile)
+{
+       int     c, match;
+
+       c = s[n];
+       s[n] = '\0';
+       match = gmatch(s, p, isfile);
+       s[n] = c;
+
+       return match;
+}
+
 /* Returns if p is a syntacticly correct globbing pattern, false
  * if it contains no pattern characters or if there is a syntax error.
  * Syntax errors are:
index be91791..5efb88f 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: sh.h,v 1.59 2017/06/29 16:49:58 martijn Exp $ */
+/*     $OpenBSD: sh.h,v 1.60 2017/07/04 07:29:32 anton Exp $   */
 
 /*
  * Public Domain Bourne/Korn shell
@@ -278,7 +278,7 @@ extern int really_exit;
 #define        C_VAR1   BIT(3)         /* *@#!$-? */
 #define        C_IFSWS  BIT(4)         /* \t \n (IFS white space) */
 #define        C_SUBOP1 BIT(5)         /* "=-+?" */
-#define        C_SUBOP2 BIT(6)         /* "#%" */
+#define        C_SUBOP2 BIT(6)         /* "#%/" */
 #define        C_IFS    BIT(7)         /* $IFS */
 #define        C_QUOTE  BIT(8)         /*  \n\t"#$&'()*;<>?[\`| (needing quoting) */
 
@@ -544,6 +544,7 @@ int parse_args(char **, int, int *);
 int    getn(const char *, int *);
 int    bi_getn(const char *, int *);
 int    gmatch(const char *, const char *, int);
+int    gnmatch(char *, size_t, const char *, int);
 int    has_globbing(const char *, const char *);
 const unsigned char *pat_scan(const unsigned char *, const unsigned char *,
     int);
diff --git a/regress/bin/ksh/strsub.t b/regress/bin/ksh/strsub.t
new file mode 100644 (file)
index 0000000..419ae19
--- /dev/null
@@ -0,0 +1,133 @@
+name: strsub-basic
+description:
+       Valid string substitutions
+stdin:
+       echo empty ${v/old/new}
+       v=old
+       echo empty ${v/old/}
+       echo ${v/new/}
+       v='old new'
+       echo ${v/old/new}
+       v='new old'
+       echo ${v/old/new}
+       echo "${v/old/new}"
+expected-stdout:
+       empty
+       empty
+       old
+       new new
+       new new
+       new new
+---
+
+name: strsub-multiline
+description:
+       Value spanning multiple lines
+stdin:
+       v=`cat <<!
+       bsd
+       bsd.rd
+       bsd.sp
+       !`
+       echo ${v/bsd.rd/}
+expected-stdout:
+       bsd bsd.sp
+---
+
+name: strsub-global
+description:
+       Replace all occurrences
+stdin:
+       v='old new old'
+       echo ${v//old/new}
+       v='w h i t e s p a c e'
+       echo ${v//[[:blank:]]/}
+       v='/usr/src'
+       echo ${v//\////}
+expected-stdout:
+       new new new
+       whitespace
+       //usr//src
+---
+
+name: strsub-nested
+description:
+       Nested substitutions
+stdin:
+       v=old
+       echo ${u:-${v/old/new}}
+       v='old new'
+       echo ${v/old/${v/old/new}}
+expected-stdout:
+       new
+       new new new
+---
+
+name: strsub-longest
+description:
+       Favor the first longest match
+stdin:
+       v='old/old'
+       echo ${v/old?(\/)/new }
+       echo ${v/o*/new}
+       echo ${v//old?(\/)/new}
+       echo ${v//?(\/)old//}
+expected-stdout:
+       new old
+       new
+       newnew
+       //
+---
+
+name: strsub-replacement-1
+description:
+       Variables are expanded
+stdin:
+       v=old
+       r=new
+       echo ${v/old/$r}
+expected-stdout:
+       new
+---
+
+name: strsub-replacement-2
+description:
+       The replacement is not treated as magic
+stdin:
+       v=old
+       echo ${v/old/new*}
+expected-stdout:
+       new*
+---
+
+name: strsub-missing-pattern
+description:
+       A pattern is not required
+stdin:
+       v=old
+       echo ${v/}
+expected-stdout:
+       old
+---
+
+name: strsub-nounset
+description:
+       Respect nounset
+stdin:
+       set -u
+       echo ${v/old/new}
+expected-stderr-pattern:
+       /v: parameter not set/
+expected-exit: 1
+---
+
+name: strsub-posix
+description:
+       Respect POSIX
+env-setup: !POSIXLY_CORRECT=!
+stdin:
+       echo ${v/old/new}
+expected-stderr-pattern:
+       /bad substitution/
+expected-exit: 1
+---