From: anton Date: Tue, 4 Jul 2017 07:29:32 +0000 (+0000) Subject: Add support for pattern substitution to variables in ksh using a common syntax X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=be48121bc7c46cf89d274fe5fe8297088593db2d;p=openbsd Add support for pattern substitution to variables in ksh using a common syntax borrowed from ksh93. Survived a ports build performed by naddy@ and encouraged by many. --- diff --git a/bin/ksh/eval.c b/bin/ksh/eval.c index 39f9e7c8210..a73906bbfda 100644 --- a/bin/ksh/eval.c +++ b/bin/ksh/eval.c @@ -1,4 +1,4 @@ -/* $OpenBSD: eval.c,v 1.51 2017/05/01 19:05:49 millert Exp $ */ +/* $OpenBSD: eval.c,v 1.52 2017/07/04 07:29:32 anton Exp $ */ /* * Expansion - quoting, separation, substitution, globbing @@ -50,6 +50,7 @@ typedef struct Expand { static int varsub(Expand *, char *, char *, int *, int *); static int comsub(Expand *, char *); +static char *strsub(char *, char *, int); static char *trimsub(char *, char *, int); static void glob(char *, XPtrV *, int); static void globit(XString *, char **, char *, XPtrV *, int); @@ -143,6 +144,7 @@ typedef struct SubType { short f; /* saved value of f (DOPAT, etc) */ struct tbl *var; /* variable for ${var..} */ short quote; /* saved value of quote (for ${..[%#]..}) */ + int strsub; /* set to 1 if pat in /pat/rep has been ended */ struct SubType *prev; /* old type */ struct SubType *next; /* poped type (to avoid re-allocating) */ } SubType; @@ -209,6 +211,21 @@ expand(char *cp, /* input word */ break; case CHAR: c = *sp++; + if (st->strsub == 0 && + (st->stype & 0x7f) == '/' && c == '/') { + st->strsub = 1; + /* Write end of pattern. */ + *dp++ = MAGIC; + *dp++ = ')'; + *dp++ = '\0'; + /* + * Reset quote and flags for the + * upcoming replacement. + */ + quote = 0; + f = 0; + continue; + } break; case QCHAR: quote |= 2; /* temporary quote */ @@ -317,6 +334,7 @@ expand(char *cp, /* input word */ switch (stype & 0x7f) { case '#': case '%': + case '/': /* ! DOBLANK,DOBRACE_,DOTILDE */ f = DOPAT | (f&DONTRUNCOMMAND) | DOTEMP_; @@ -378,13 +396,20 @@ expand(char *cp, /* input word */ case '%': /* Append end-pattern */ *dp++ = MAGIC; *dp++ = ')'; *dp = '\0'; + /* FALLTHROUGH */ + case '/': dp = Xrestpos(ds, dp, st->base); /* Must use st->var since calling * global would break things * like x[i+=1]. */ - x.str = trimsub(str_val(st->var), - dp, st->stype); + if ((st->stype & 0x7f) == '/') + x.str = strsub(str_val(st->var), + dp, st->stype); + else + x.str = trimsub( + str_val(st->var), + dp, st->stype); if (x.str[0] != '\0' || st->quote) type = XSUB; else @@ -753,6 +778,9 @@ varsub(Expand *xp, char *sp, char *word, stype = 0x80; c = word[slen + 0] == CHAR ? word[slen + 1] : 0; } + if (c == '/' && Flag(FPOSIX)) + return -1; + if (ctype(c, C_SUBOP1)) { slen += 2; stype |= c; @@ -894,6 +922,114 @@ comsub(Expand *xp, char *cp) return XCOM; } +static char * +strsub(char *str, char *pat, int how) +{ + char *actpat, *dst, *prepat, *rep, *src; + size_t beg, dstlen, dstsiz, end, match, len, patlen, replen; + + len = strlen(str); + if (len == 0) + return str; + src = str; + + dstlen = 0; + dstsiz = len + 1; /* NUL */ + dst = alloc(dstsiz, ATEMP); + + actpat = pat; + patlen = strlen(actpat) + 1; /* NUL */ + prepat = alloc(patlen + 2, ATEMP); /* make room for wildcard */ + /* + * Copy actpat to prepat and add a wildcard after the open pattern + * prefix. + */ + memcpy(prepat, actpat, 2); + prepat[2] = MAGIC; + prepat[3] = '*'; + memcpy(&prepat[4], &actpat[2], patlen - 2); + + rep = &actpat[patlen]; + replen = strlen(rep); + + for (;;) { + /* + * Find the wildcard prefix in prepat followed by actpat. + * This allows occurrences of actpat to be found anywhere in the + * string. + */ + match = 0; + for (end = 1; end <= len; end++) + if (gnmatch(src, end, prepat, 0)) + match = end; + else if (match) + break; + if (!match) + break; + end = match; + + /* + * Find the prefix, if any, that was matched by the wildcard in + * prepat. + */ + match = 0; + for (beg = 0; beg < end; beg++) + if ((match = gnmatch(src + beg, end - beg, actpat, 0))) + break; + + /* + * At this point, [src, beg) contains the prefix that is present + * before the actual pattern and [beg, end) what was matched by + * the actual pattern. + * The first range will be copied over to dst and the latter + * replaced with rep. + */ + if (match && beg > 0) { + if (beg + dstlen >= dstsiz) { + dst = areallocarray(dst, 1, dstsiz + beg + 1, + ATEMP); + dstsiz += beg + 1; + } + memcpy(&dst[dstlen], src, beg); + dstlen += beg; + } + + if (replen + dstlen >= dstsiz) { + dst = areallocarray(dst, 1, dstsiz + replen + 1, ATEMP); + dstsiz += replen + 1; + } + memcpy(&dst[dstlen], rep, replen); + dstlen += replen; + + src += end; + len -= end; + if (len == 0 || how == '/') + break; + } + + afree(prepat, ATEMP); + + if (str == src) { + /* No substitutions performed. */ + afree(dst, ATEMP); + + return str; + } + + /* Copy unmatched suffix from src. */ + if (len > 0) { + if (len + dstlen >= dstsiz) { + dst = areallocarray(dst, 1, dstsiz + len + 1, ATEMP); + dstsiz += len + 1; + } + memcpy(&dst[dstlen], src, len); + dstlen += len; + } + dst[dstlen] = '\0'; + + return dst; +} + /* * perform #pattern and %pattern substitution in ${} */ diff --git a/bin/ksh/ksh.1 b/bin/ksh/ksh.1 index bf260d1bc83..4902e06ffae 100644 --- a/bin/ksh/ksh.1 +++ b/bin/ksh/ksh.1 @@ -1,8 +1,8 @@ -.\" $OpenBSD: ksh.1,v 1.188 2017/06/20 17:32:20 brynet Exp $ +.\" $OpenBSD: ksh.1,v 1.189 2017/07/04 07:29:32 anton Exp $ .\" .\" Public Domain .\" -.Dd $Mdocdate: June 20 2017 $ +.Dd $Mdocdate: July 4 2017 $ .Dt KSH 1 .Os .Sh NAME @@ -1240,6 +1240,18 @@ of them result in the longest match. .It Pf ${ Ar name Ns % Ns Ar pattern Ns } .It Pf ${ Ar name Ns %% Ns Ar pattern Ns } Like ${..#..} substitution, but it deletes from the end of the value. +.Pp +.It Pf ${ Ns Ar name Ns / Ns Ar pattern Ns / Ns Ar replacement Ns } +.It Pf ${ Ns Ar name Ns // Ns Ar pattern Ns / Ns Ar replacement Ns } +The first longest match of +.Ar pattern +in the value of parameter +.Ar name +is substituted with +.Ar replacement . +Using +.Ql // , +all matches are substituted. .El .Pp The following special parameters are implicitly set by the shell and cannot be diff --git a/bin/ksh/lex.c b/bin/ksh/lex.c index c33a0b93354..31af92c67ec 100644 --- a/bin/ksh/lex.c +++ b/bin/ksh/lex.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lex.c,v 1.69 2016/04/27 12:46:23 naddy Exp $ */ +/* $OpenBSD: lex.c,v 1.70 2017/07/04 07:29:32 anton Exp $ */ /* * lexical analysis and source input @@ -385,7 +385,7 @@ yylex(int cf) /* If this is a trim operation, * treat (,|,) specially in STBRACE. */ - if (c == '#' || c == '%') { + if (c == '#' || c == '%' || c == '/') { ungetsc(c); PUSH_STATE(STBRACE); } else { diff --git a/bin/ksh/misc.c b/bin/ksh/misc.c index 9ef6e648aa0..b137b1cf898 100644 --- a/bin/ksh/misc.c +++ b/bin/ksh/misc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: misc.c,v 1.55 2016/03/20 00:01:21 krw Exp $ */ +/* $OpenBSD: misc.c,v 1.56 2017/07/04 07:29:32 anton Exp $ */ /* * Miscellaneous functions @@ -52,7 +52,7 @@ initctypes(void) setctypes("*@#!$-?", C_VAR1); setctypes(" \t\n", C_IFSWS); setctypes("=-+?", C_SUBOP1); - setctypes("#%", C_SUBOP2); + setctypes("#%/", C_SUBOP2); setctypes(" \n\t\"#$&'()*;<>?[\\`|", C_QUOTE); } @@ -518,6 +518,19 @@ gmatch(const char *s, const char *p, int isfile) (const unsigned char *) p, (const unsigned char *) pe); } +int +gnmatch(char *s, size_t n, const char *p, int isfile) +{ + int c, match; + + c = s[n]; + s[n] = '\0'; + match = gmatch(s, p, isfile); + s[n] = c; + + return match; +} + /* Returns if p is a syntacticly correct globbing pattern, false * if it contains no pattern characters or if there is a syntax error. * Syntax errors are: diff --git a/bin/ksh/sh.h b/bin/ksh/sh.h index be91791e31f..5efb88f818c 100644 --- a/bin/ksh/sh.h +++ b/bin/ksh/sh.h @@ -1,4 +1,4 @@ -/* $OpenBSD: sh.h,v 1.59 2017/06/29 16:49:58 martijn Exp $ */ +/* $OpenBSD: sh.h,v 1.60 2017/07/04 07:29:32 anton Exp $ */ /* * Public Domain Bourne/Korn shell @@ -278,7 +278,7 @@ extern int really_exit; #define C_VAR1 BIT(3) /* *@#!$-? */ #define C_IFSWS BIT(4) /* \t \n (IFS white space) */ #define C_SUBOP1 BIT(5) /* "=-+?" */ -#define C_SUBOP2 BIT(6) /* "#%" */ +#define C_SUBOP2 BIT(6) /* "#%/" */ #define C_IFS BIT(7) /* $IFS */ #define C_QUOTE BIT(8) /* \n\t"#$&'()*;<>?[\`| (needing quoting) */ @@ -544,6 +544,7 @@ int parse_args(char **, int, int *); int getn(const char *, int *); int bi_getn(const char *, int *); int gmatch(const char *, const char *, int); +int gnmatch(char *, size_t, const char *, int); int has_globbing(const char *, const char *); const unsigned char *pat_scan(const unsigned char *, const unsigned char *, int); diff --git a/regress/bin/ksh/strsub.t b/regress/bin/ksh/strsub.t new file mode 100644 index 00000000000..419ae19d35e --- /dev/null +++ b/regress/bin/ksh/strsub.t @@ -0,0 +1,133 @@ +name: strsub-basic +description: + Valid string substitutions +stdin: + echo empty ${v/old/new} + v=old + echo empty ${v/old/} + echo ${v/new/} + v='old new' + echo ${v/old/new} + v='new old' + echo ${v/old/new} + echo "${v/old/new}" +expected-stdout: + empty + empty + old + new new + new new + new new +--- + +name: strsub-multiline +description: + Value spanning multiple lines +stdin: + v=`cat <