Implement --exclude/exclude-file and --include/include-file.

author claudio <claudio@openbsd.org>

Sun, 29 Aug 2021 13:43:46 +0000 (13:43 +0000)

committer claudio <claudio@openbsd.org>

Sun, 29 Aug 2021 13:43:46 +0000 (13:43 +0000)
author claudio <claudio@openbsd.org>
Sun, 29 Aug 2021 13:43:46 +0000 (13:43 +0000)
committer claudio <claudio@openbsd.org>
Sun, 29 Aug 2021 13:43:46 +0000 (13:43 +0000)
diff --git a/usr.bin/rsync/Makefile b/usr.bin/rsync/Makefile

index d7af8bd..f2e4d46 100644 (file)
--- a/usr.bin/rsync/Makefile
+++ b/usr.bin/rsync/Makefile
@@ -1,9 +1,9 @@
-#      $OpenBSD: Makefile,v 1.10 2019/05/08 21:30:11 benno Exp $
+#      $OpenBSD: Makefile,v 1.11 2021/08/29 13:43:46 claudio Exp $
  
  PROG=  openrsync
  SRCS=  blocks.c client.c downloader.c fargs.c flist.c hash.c ids.c \
-       io.c log.c mkpath.c mktemp.c receiver.c sender.c server.c session.c \
-       socket.c symlinks.c uploader.c main.c misc.c
+       io.c log.c main.c misc.c mkpath.c mktemp.c receiver.c rmatch.c \
+       rules.c sender.c server.c session.c socket.c symlinks.c uploader.c
  LDADD+= -lcrypto -lm
  DPADD+= ${LIBCRYPTO} ${LIBM}
  MAN=   openrsync.1
diff --git a/usr.bin/rsync/charclass.h b/usr.bin/rsync/charclass.h

new file mode 100644 (file)

index 0000000..1c5ff7e
--- /dev/null
+++ b/usr.bin/rsync/charclass.h
@@ -0,0 +1,29 @@
+/*
+ * Public domain, 2008, Todd C. Miller <millert@openbsd.org>
+ *
+ * $OpenBSD: charclass.h,v 1.1 2021/08/29 13:43:46 claudio Exp $
+ */
+
+/*
+ * POSIX character class support for fnmatch() and glob().
+ */
+static const struct cclass {
+       const char *name;
+       int (*isctype)(int);
+} cclasses[] = {
+       { "alnum",      isalnum },
+       { "alpha",      isalpha },
+       { "blank",      isblank },
+       { "cntrl",      iscntrl },
+       { "digit",      isdigit },
+       { "graph",      isgraph },
+       { "lower",      islower },
+       { "print",      isprint },
+       { "punct",      ispunct },
+       { "space",      isspace },
+       { "upper",      isupper },
+       { "xdigit",     isxdigit },
+       { NULL,         NULL }
+};
+
+#define NCCLASSES      (sizeof(cclasses) / sizeof(cclasses[0]) - 1)
diff --git a/usr.bin/rsync/extern.h b/usr.bin/rsync/extern.h

index d40188d..4681d47 100644 (file)
--- a/usr.bin/rsync/extern.h
+++ b/usr.bin/rsync/extern.h
@@ -1,4 +1,4 @@
-/*     $OpenBSD: extern.h,v 1.39 2021/06/30 15:24:10 claudio Exp $ */
+/*     $OpenBSD: extern.h,v 1.40 2021/08/29 13:43:46 claudio Exp $ */
  /*
   * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
   *
@@ -131,12 +131,28 @@ struct    opts {
         int              no_motd;               /* --no-motd */
         int              numeric_ids;           /* --numeric-ids */
         int              one_file_system;       /* -x */
+       int              from0;                 /* -0 */
         char            *rsync_path;            /* --rsync-path */
         char            *ssh_prog;              /* --rsh or -e */
         char            *port;                  /* --port */
         char            *address;               /* --address */
  };
  
+enum rule_type {
+       RULE_NONE,
+       RULE_EXCLUDE,
+       RULE_INCLUDE,
+       RULE_CLEAR,
+#ifdef NOTYET
+       RULE_MERGE,
+       RULE_DIR_MERGE,
+       RULE_SHOW,
+       RULE_HIDE,
+       RULE_PROTECT,
+       RULE_RISK,
+#endif
+};
+
  /*
   * An individual block description for a file.
   * See struct blkset.
@@ -362,6 +378,14 @@ char               *mkstempnodat(int, char *, mode_t, dev_t);
  char           *mkstempsock(const char *, char *);
  int             mktemplate(char **, const char *, int);
  
+int             parse_rule(char *line, enum rule_type);
+void            parse_file(const char *, enum rule_type, int);
+void            send_rules(struct sess *, int);
+void            recv_rules(struct sess *, int);
+int             rules_match(const char *, int);
+
+int             rmatch(const char *, const char *, int);
+
  char           *symlink_read(const char *);
  char           *symlinkat_read(int, const char *);
  
diff --git a/usr.bin/rsync/flist.c b/usr.bin/rsync/flist.c

index e33f51b..86cde1d 100644 (file)
--- a/usr.bin/rsync/flist.c
+++ b/usr.bin/rsync/flist.c
@@ -1,4 +1,4 @@
-/*     $OpenBSD: flist.c,v 1.32 2021/06/30 13:10:04 claudio Exp $ */
+/*     $OpenBSD: flist.c,v 1.33 2021/08/29 13:43:46 claudio Exp $ */
  /*
   * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
   * Copyright (c) 2019 Florian Obser <florian@openbsd.org>
@@ -823,6 +823,11 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
                 ERR("%s: lstat", root);
                 return 0;
         } else if (S_ISREG(st.st_mode)) {
+               /* filter files */
+               if (rules_match(root, 0) == -1) {
+                       WARNX("%s: skipping excluded file", root);
+                       return 1;
+               }
                 if (!flist_realloc(fl, sz, max)) {
                         ERRX1("flist_realloc");
                         return 0;
@@ -839,7 +844,13 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
                 if (!sess->opts->preserve_links) {
                         WARNX("%s: skipping symlink", root);
                         return 1;
-               } else if (!flist_realloc(fl, sz, max)) {
+               }
+               /* filter files */
+               if (rules_match(root, 0) == -1) {
+                       WARNX("%s: skipping excluded symlink", root);
+                       return 1;
+               }
+               if (!flist_realloc(fl, sz, max)) {
                         ERRX1("flist_realloc");
                         return 0;
                 }
@@ -942,6 +953,15 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
                         nxdev++;
                 }
  
+               /* filter files */
+               if (rules_match(ent->fts_path + stripdir,
+                   (ent->fts_info == FTS_D)) == -1) {
+                       WARNX("%s: skipping excluded file",
+                           ent->fts_path + stripdir);
+                       fts_set(fts, ent, FTS_SKIP);
+                       continue;
+               }
+
                 /* Allocate a new file entry. */
  
                 if (!flist_realloc(fl, sz, max)) {
@@ -1073,6 +1093,11 @@ flist_gen_files(struct sess *sess, size_t argc, char **argv,
                         continue;
                 }
  
+               /* filter files */
+               if (rules_match(argv[i], S_ISDIR(st.st_mode)) == -1) {
+                       WARNX("%s: skipping excluded file", argv[i]);
+                       continue;
+               }
  
                 f = &fl[flsz++];
                 assert(f != NULL);
@@ -1297,6 +1322,16 @@ flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
                                 continue;
                 }
  
+               /* filter files on delete */
+               /* TODO handle --delete-excluded */
+               if (rules_match(ent->fts_path + stripdir,
+                   (ent->fts_info == FTS_D)) == -1) {
+                       WARNX("skip excluded file %s",
+                           ent->fts_path + stripdir);
+                       fts_set(fts, ent, FTS_SKIP);
+                       continue;
+               }
+
                 /* Look up in hashtable. */
  
                 memset(&hent, 0, sizeof(ENTRY));
diff --git a/usr.bin/rsync/main.c b/usr.bin/rsync/main.c

index cb4a034..3117462 100644 (file)
--- a/usr.bin/rsync/main.c
+++ b/usr.bin/rsync/main.c
@@ -1,4 +1,4 @@
-/*     $OpenBSD: main.c,v 1.56 2021/07/14 11:14:27 claudio Exp $ */
+/*     $OpenBSD: main.c,v 1.57 2021/08/29 13:43:46 claudio Exp $ */
  /*
   * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
   *
@@ -276,6 +276,10 @@ static struct opts  opts;
  #define OP_RSYNCPATH   1002
  #define OP_TIMEOUT     1003
  #define OP_VERSION     1004
+#define OP_EXCLUDE     1005
+#define OP_INCLUDE     1006
+#define OP_EXCLUDE_FROM        1007
+#define OP_INCLUDE_FROM        1008
  
  const struct option     lopts[] = {
      { "address",       required_argument, NULL,                OP_ADDRESS },
@@ -286,9 +290,15 @@ const struct option         lopts[] = {
      { "devices",       no_argument,    &opts.devices,          1 },
      { "no-devices",    no_argument,    &opts.devices,          0 },
      { "dry-run",       no_argument,    &opts.dry_run,          1 },
+    { "exclude",       required_argument, NULL,                OP_EXCLUDE },
+    { "exclude-from",  required_argument, NULL,                OP_EXCLUDE_FROM },
+    { "from0",         no_argument,    NULL,                   '0' },
+    { "no-from0",      no_argument,    &opts.from0,            0 },
      { "group",         no_argument,    &opts.preserve_gids,    1 },
      { "no-group",      no_argument,    &opts.preserve_gids,    0 },
      { "help",          no_argument,    NULL,                   'h' },
+    { "include",       required_argument, NULL,                OP_INCLUDE },
+    { "include-from",  required_argument, NULL,                OP_INCLUDE_FROM },
      { "links",         no_argument,    &opts.preserve_links,   1 },
      { "no-links",      no_argument,    &opts.preserve_links,   0 },
      { "no-motd",       no_argument,    &opts.no_motd,          1 },
@@ -324,6 +334,7 @@ main(int argc, char *argv[])
         struct fargs    *fargs;
         char            **args;
         const char      *errstr;
+
         /* Global pledge. */
  
         if (pledge("stdio unix rpath wpath cpath dpath inet fattr chown dns getpw proc exec unveil",
@@ -333,6 +344,9 @@ main(int argc, char *argv[])
         while ((c = getopt_long(argc, argv, "Dae:ghlnoprtvxz", lopts, NULL))
             != -1) {
                 switch (c) {
+               case '0':
+                       opts.from0 = 1;
+                       break;
                 case 'D':
                         opts.devices = 1;
                         opts.specials = 1;
@@ -398,6 +412,24 @@ main(int argc, char *argv[])
                                 errx(ERR_SYNTAX, "timeout is %s: %s",
                                     errstr, optarg);
                         break;
+               case OP_EXCLUDE:
+                       if (parse_rule(optarg, RULE_EXCLUDE) == -1)
+                               errx(ERR_SYNTAX, "syntax error in exclude: %s",
+                                   optarg);
+                       break;
+               case OP_INCLUDE:
+                       if (parse_rule(optarg, RULE_INCLUDE) == -1)
+                               errx(ERR_SYNTAX, "syntax error in include: %s",
+                                   optarg);
+                       break;
+               case OP_EXCLUDE_FROM:
+                       parse_file(optarg, RULE_EXCLUDE,
+                           opts.from0 ? '\0' : '\n' );
+                       break;
+               case OP_INCLUDE_FROM:
+                       parse_file(optarg, RULE_INCLUDE,
+                           opts.from0 ? '\0' : '\n' );
+                       break;
                 case OP_VERSION:
                         fprintf(stderr, "openrsync: protocol version %u\n",
                             RSYNC_PROTOCOL);
diff --git a/usr.bin/rsync/receiver.c b/usr.bin/rsync/receiver.c

index c731211..6e5b016 100644 (file)
--- a/usr.bin/rsync/receiver.c
+++ b/usr.bin/rsync/receiver.c
@@ -1,4 +1,4 @@
-/*     $OpenBSD: receiver.c,v 1.28 2021/06/30 13:10:04 claudio Exp $ */
+/*     $OpenBSD: receiver.c,v 1.29 2021/08/29 13:43:46 claudio Exp $ */
  
  /*
   * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
@@ -172,7 +172,7 @@ int
  rsync_receiver(struct sess *sess, int fdin, int fdout, const char *root)
  {
         struct flist    *fl = NULL, *dfl = NULL;
-       size_t           i, flsz = 0, dflsz = 0, excl;
+       size_t           i, flsz = 0, dflsz = 0;
         char            *tofree;
         int              rc = 0, dfd = -1, phase = 0, c;
         int32_t          ioerror;
@@ -184,22 +184,13 @@ rsync_receiver(struct sess *sess, int fdin, int fdout, const char *root)
         if (pledge("stdio unix rpath wpath cpath dpath fattr chown getpw unveil", NULL) == -1)
                 err(ERR_IPC, "pledge");
  
-       /* Client sends zero-length exclusions. */
+       /* Client sends exclusions. */
+       if (!sess->opts->server)
+               send_rules(sess, fdout);
  
-       if (!sess->opts->server && !io_write_int(sess, fdout, 0)) {
-               ERRX1("io_write_int");
-               goto out;
-       }
-
-       if (sess->opts->server && sess->opts->del) {
-               if (!io_read_size(sess, fdin, &excl)) {
-                       ERRX1("io_read_size");
-                       goto out;
-               } else if (excl != 0) {
-                       ERRX("exclusion list is non-empty");
-                       goto out;
-               }
-       }
+       /* Server receives exclusions if delete is on. */
+       if (sess->opts->server && sess->opts->del)
+               recv_rules(sess, fdin);
  
         /*
          * Start by receiving the file list and our mystery number.
diff --git a/usr.bin/rsync/rmatch.c b/usr.bin/rsync/rmatch.c

new file mode 100644 (file)

index 0000000..b037b80
--- /dev/null
+++ b/usr.bin/rsync/rmatch.c
@@ -0,0 +1,395 @@
+/*     $OpenBSD: rmatch.c,v 1.1 2021/08/29 13:43:46 claudio Exp $      */
+
+/*
+ * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1989, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Guido van Rossum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+#include "charclass.h"
+
+#define        RANGE_MATCH     1
+#define        RANGE_NOMATCH   0
+#define        RANGE_ERROR     (-1)
+
+static int
+classmatch(const char *pattern, char test, const char **ep)
+{
+       const char *mismatch = pattern;
+       const struct cclass *cc;
+       const char *colon;
+       size_t len;
+       int rval = RANGE_NOMATCH;
+
+       if (*pattern++ != ':') {
+               *ep = mismatch;
+               return RANGE_ERROR;
+       }
+       if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
+               *ep = mismatch;
+               return RANGE_ERROR;
+       }
+       *ep = colon + 2;
+       len = (size_t)(colon - pattern);
+
+       for (cc = cclasses; cc->name != NULL; cc++) {
+               if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
+                       if (cc->isctype((unsigned char)test))
+                               rval = RANGE_MATCH;
+                       return rval;
+               }
+       }
+
+       /* invalid character class, treat as normal text */
+       *ep = mismatch;
+       return RANGE_ERROR;
+}
+
+static int
+rangematch(const char **pp, char test)
+{
+       const char *pattern = *pp;
+       int negate, ok;
+       char c, c2;
+
+       /*
+        * A bracket expression starting with an unquoted circumflex
+        * character produces unspecified results (IEEE 1003.2-1992,
+        * 3.13.2).  This implementation treats it like '!', for
+        * consistency with the regular expression syntax.
+        * J.T. Conklin (conklin@ngai.kaleida.com)
+        */
+       if ((negate = (*pattern == '!' || *pattern == '^')))
+               ++pattern;
+
+       /*
+        * A right bracket shall lose its special meaning and represent
+        * itself in a bracket expression if it occurs first in the list.
+        * -- POSIX.2 2.8.3.2
+        */
+       ok = 0;
+       c = *pattern++;
+       do {
+               if (c == '[') {
+                       switch (classmatch(pattern, test, &pattern)) {
+                       case RANGE_MATCH:
+                               ok = 1;
+                               continue;
+                       case RANGE_NOMATCH:
+                               continue;
+                       default:
+                               /* invalid character class, treat litterally. */
+                               break;
+                       }
+               }
+               if (c == '\\')
+                       c = *pattern++;
+               if (c == '\0')
+                       return RANGE_ERROR;
+               /* patterns can not match on '/' */
+               if (c == '/')
+                       return RANGE_NOMATCH;
+               if (*pattern == '-'
+                   && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {
+                       pattern += 2;
+                       if (c2 == '\\')
+                               c2 = *pattern++;
+                       if (c2 == '\0')
+                               return RANGE_ERROR;
+                       if (c <= test && test <= c2)
+                               ok = 1;
+               } else if (c == test)
+                       ok = 1;
+       } while ((c = *pattern++) != ']');
+
+       *pp = pattern;
+       return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
+}
+
+/*
+ * Single character match, advances pattern as much as needed.
+ * Return 0 on match and !0 (aka 1) on missmatch.
+ * When matched pp is advanced to the end of the pattern matched.
+ */
+static int
+matchchar(const char **pp, const char in)
+{
+       const char *pattern = *pp;
+       char c;
+       int rv = 0;
+
+       switch (c = *pattern++) {
+       case '?':
+               if (in == '\0')
+                       rv = 1;
+               if (in == '/')
+                       rv = 1;
+               break;
+       case '[':
+               if (in == '\0')
+                       rv = 1;
+               if (in == '/')
+                       rv = 1;
+               if (rv == 1)
+                       break;
+
+               switch (rangematch(&pattern, in)) {
+               case RANGE_ERROR:
+                       /* not a good range, treat as normal text */
+                       goto normal;
+               case RANGE_MATCH:
+                       break;
+               case RANGE_NOMATCH:
+                       rv = 1;
+               }
+               break;
+       case '\\':
+               if ((c = *pattern++) == '\0') {
+                       c = '\\';
+                       --pattern;
+               }
+               /* FALLTHROUGH */
+       default:
+       normal:
+               if (c != in)
+                       rv = 1;
+               break;
+       }
+
+       *pp = pattern;
+       return rv;
+}
+
+/*
+ * Do a substring match. If wild is set then the pattern started with a '*'.
+ * The match will go until '*', '/' or '\0' is encountered in pattern or
+ * the input string is consumed up to end.
+ * The pattern and string handles pp and ss are updated only on success.
+ */
+static int
+matchsub(const char **pp, const char **ss, const char *end, int wild)
+{
+       const char *pattern = *pp;
+       const char *p = pattern;
+       const char *string = *ss;
+       size_t matchlen;
+
+       /* first calculate how many characters the submatch will consume */
+       for (matchlen = 0; *p != '\0'; matchlen++) {
+               if (p[0] == '*')
+                       break;
+               /* '/' acts as barrier */
+               if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {
+                       if (wild) {
+                               /* match needs to match up to end of segment */
+                               if (string > end - matchlen)
+                                       return 1;
+                               string = end - matchlen;
+                               wild = 0;
+                       }
+                       break;
+               }
+               /*
+                * skip forward one character in pattern by doing a
+                * dummy lookup.
+                */
+               matchchar(&p, ' ');
+       }
+
+       /* not enough char to match */
+       if (string > end - matchlen)
+               return 1;
+
+       if (*p == '\0') {
+               if (wild) {
+                       /* match needs to match up to end of segment */
+                       string = end - matchlen;
+                       wild = 0;
+               }
+       }
+
+       while (*pattern != '\0' && *pattern != '*') {
+               /* eat possible escape char before '/' */
+               if (pattern[0] == '\\' && pattern[1] == '/')
+                       pattern++;
+               if (pattern[0] == '/')
+                       break;
+
+               /* check if there are still characters available to compare */
+               if (string >= end)
+                       return 1;
+               /* Compare one char at a time. */
+               if (!matchchar(&pattern, *string++))
+                       continue;
+               if (wild) {
+                       /* skip forward one char and restart match */
+                       string = ++*ss;
+                       pattern = *pp;
+                       /* can it still match? */
+                       if (string > end - matchlen)
+                               return 1;
+               } else {
+                       /* failed match */
+                       return 1;
+               }
+       }
+
+       *pp = pattern;
+       *ss = string;
+       return 0;
+}
+
+/*
+ * File matching with the addition of the special '**'.
+ * Returns 0 on match and !0 for strings that do not match pattern.
+ */
+int
+rmatch(const char *pattern, const char *string, int leading_dir)
+{
+       const char *segend, *segnext, *mismatch = NULL;
+       int wild, starstar;
+
+       while (*pattern && *string) {
+
+               /* handle leading '/' first */
+               if (pattern[0] == '\\' && pattern[1] == '/')
+                       pattern++;
+               if (*string == '/' && *pattern == '/') {
+                       string++;
+                       pattern++;
+               }
+
+               /* match to the next '/' in string */
+               segend = strchr(string, '/');
+               if (segend == NULL)
+                       segend = strchr(string, '\0');
+
+               while (*pattern) {
+                       /*
+                        * Check for '*' and '**'. For '*' reduce '*' and '?'
+                        * sequences into n-'?' and trailing '*'.
+                        * For '**' this optimisation can not be done
+                        * since '**???/' will match 'a/aa/aaa/' but not
+                        * 'a/aa/aa/' still additional '*' will be reduced.
+                        */
+                       wild = 0;
+                       starstar = 0;
+                       for ( ; *pattern == '*' || *pattern == '?'; pattern++) {
+                               if (pattern[0] == '*') {
+                                       if (pattern[1] == '*') {
+                                               starstar = 1;
+                                               pattern++;
+                                       }
+                                       wild = 1;
+                               } else if (!starstar) { /* pattern[0] == '?' */
+                                       if (string < segend && *string != '/')
+                                               string++;
+                                       else
+                                               /* no match possible */
+                                               return 1;
+                               } else
+                                       break;
+                       }
+
+                       /* pattern ends in '**' so it is a match */
+                       if (starstar && *pattern == '\0')
+                               return 0;
+
+                       if (starstar) {
+                               segnext = segend;
+                               mismatch = pattern;
+                       }
+
+                       while (string < segend) {
+                               if (matchsub(&pattern, &string, segend, wild)) {
+failed_match:
+                                       /*
+                                        * failed to match, if starstar retry
+                                        * with the next segment.
+                                        */
+                                       if (mismatch) {
+                                               pattern = mismatch;
+                                               wild = 1;
+                                               string = segnext;
+                                               if (*string == '/')
+                                                       string++;
+                                               segend = strchr(string, '/');
+                                               if (!segend)
+                                                       segend = strchr(string,
+                                                           '\0');
+                                               segnext = segend;
+                                               if (string < segend)
+                                                       continue;
+                                       }
+                                       /* no match possible */
+                                       return 1;
+                               }
+                               break;
+                       }
+
+                       /* at end of string segment, eat up any extra '*' */
+                       if (string >= segend && *pattern != '*')
+                               break;
+               }
+               if (*string != '\0' && *string != '/')
+                       goto failed_match;
+               if (*pattern != '\0' && *pattern != '/')
+                       goto failed_match;
+       }
+
+       /* if both pattern and string are consumed it was a match */
+       if (*pattern == '\0' && *string == '\0')
+               return 0;
+       /* if leading_dir is set then string can also be '/' for success */
+       if (leading_dir && *pattern == '\0' && *string == '/')
+               return 0;
+       /* else failure */
+       return 1;
+}
diff --git a/usr.bin/rsync/rules.c b/usr.bin/rsync/rules.c

new file mode 100644 (file)

index 0000000..c34e7d9
--- /dev/null
+++ b/usr.bin/rsync/rules.c
@@ -0,0 +1,479 @@
+#include <err.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "extern.h"
+
+struct rule {
+       char                    *pattern;
+       enum rule_type          type;
+#ifdef NOTYET
+       unsigned int            modifiers;
+#endif
+       short                    numseg;
+       unsigned char            anchored;
+       unsigned char            fileonly;
+       unsigned char            nowild;
+       unsigned char            onlydir;
+       unsigned char            leadingdir;
+};
+
+static struct rule     *rules;
+static size_t           numrules;      /* number of rules */
+static size_t           rulesz;        /* available size */
+
+/* up to protocol 29 filter rules only support - + ! and no modifiers */
+
+const struct command {
+       enum rule_type          type;
+       char                    sopt;
+       const char              *lopt;
+} commands[] = {
+       { RULE_EXCLUDE,         '-',    "exclude" },
+       { RULE_INCLUDE,         '+',    "include" },
+       { RULE_CLEAR,           '!',    "clear" },
+#ifdef NOTYET
+       { RULE_MERGE,           '.',    "merge" },
+       { RULE_DIR_MERGE,       ':',    "dir-merge" },
+       { RULE_SHOW,            'S',    "show" },
+       { RULE_HIDE,            'H',    "hide" },
+       { RULE_PROTECT,         'P',    "protect" },
+       { RULE_RISK,            'R',    "risk" },
+#endif
+       { 0 }
+};
+
+#ifdef NOTYET
+#define MOD_ABSOLUTE                   0x0001
+#define MOD_NEGATE                     0x0002
+#define MOD_CVSEXCLUDE                 0x0004
+#define MOD_SENDING                    0x0008
+#define MOD_RECEIVING                  0x0010
+#define MOD_PERISHABLE                 0x0020
+#define MOD_XATTR                      0x0040
+#define MOD_MERGE_EXCLUDE              0x0080
+#define MOD_MERGE_INCLUDE              0x0100
+#define MOD_MERGE_CVSCOMPAT            0x0200
+#define MOD_MERGE_EXCLUDE_FILE         0x0400
+#define MOD_MERGE_NO_INHERIT           0x0800
+#define MOD_MERGE_WORDSPLIT            0x1000
+
+/* maybe support absolute and negate */
+const struct modifier {
+       unsigned int            modifier;
+       char                    sopt;
+} modifiers[] = {
+       { MOD_ABSOLUTE,                 '/' },
+       { MOD_NEGATE,                   '!' },
+       { MOD_CVSEXCLUDE,               'C' },
+       { MOD_SENDING,                  's' },
+       { MOD_RECEIVING,                'r' },
+       { MOD_PERISHABLE,               'p' },
+       { MOD_XATTR,                    'x' },
+       /* for '.' and ':' types */
+       { MOD_MERGE_EXCLUDE,            '-' },
+       { MOD_MERGE_INCLUDE,            '+' },
+       { MOD_MERGE_CVSCOMPAT,          'C' },
+       { MOD_MERGE_EXCLUDE_FILE,       'e' },
+       { MOD_MERGE_NO_INHERIT,         'n' },
+       { MOD_MERGE_WORDSPLIT,          'w' },
+       { 0 }
+}
+#endif
+
+static struct rule *
+get_next_rule(void)
+{
+       struct rule *new;
+       size_t newsz;
+
+       if (++numrules > rulesz) {
+               if (rulesz == 0)
+                       newsz = 16;
+               else
+                       newsz = rulesz * 2;
+
+               new = recallocarray(rules, rulesz, newsz, sizeof(*rules));
+               if (new == NULL)
+                       err(ERR_NOMEM, NULL);
+
+               rules = new;
+               rulesz = newsz;
+       }
+
+       return rules + numrules - 1;
+}
+
+static enum rule_type
+parse_command(const char *command, size_t len)
+{
+       const char *mod;
+       size_t  i;
+       
+       mod = memchr(command, ',', len);
+       if (mod != NULL) {
+               /* XXX modifiers not yet implemented */
+               return RULE_NONE;
+       }
+
+       for (i = 0; commands[i].type != RULE_NONE; i++) {
+               if (strncmp(commands[i].lopt, command, len) == 0)
+                       return commands[i].type;
+               if (len == 1 && commands[i].sopt == *command)
+                       return commands[i].type;
+       }
+
+       return RULE_NONE;
+}
+
+static void
+parse_pattern(struct rule *r, char *pattern)
+{
+       size_t plen;
+       char *p;
+       short nseg = 1;
+
+       /*
+        * check for / at start and end of pattern both are special and
+        * can bypass full path matching.
+        */
+       if (*pattern == '/') {
+               pattern++;
+               r->anchored = 1;
+       }
+       plen = strlen(pattern);
+       /*
+        * check for patterns ending in '/' and '/'+'***' and handle them
+        * specially. Because of this and the check above pattern will never
+        * start or end with a '/'.
+        */
+       if (plen > 1 && pattern[plen - 1] == '/') {
+               r->onlydir = 1;
+               pattern[plen - 1] = '\0';
+       }
+       if (plen > 4 && strcmp(pattern + plen - 4, "/***") == 0) {
+               r->leadingdir = 1;
+               pattern[plen - 4] = '\0';
+       }
+
+       /* count how many segments the pattern has. */
+       for (p = pattern; *p != '\0'; p++)
+               if (*p == '/')
+                       nseg++;
+       r->numseg = nseg;
+
+       /* check if this pattern only matches against the basename */ 
+       if (nseg == 1 && !r->anchored)
+               r->fileonly = 1;
+
+       if (strpbrk(pattern, "*?[") == NULL) {
+               /* no wildchar matching */
+               r->nowild = 1;
+       } else {
+               /* requires wildchar matching */
+               if (strstr(pattern, "**") != NULL)
+                       r->numseg = -1;
+       }
+
+       r->pattern = strdup(pattern);
+       if (r->pattern == NULL)
+               err(ERR_NOMEM, NULL);
+}
+
+int
+parse_rule(char *line, enum rule_type def)
+{
+       enum rule_type type;
+       struct rule *r;
+       char *pattern;
+       size_t len;
+
+       switch (*line) {
+       case '#':
+       case ';':
+               /* comment */
+               return 0;
+       case '\0':
+               /* ingore empty lines */
+               return 0;
+       default:
+               len = strcspn(line, " _");
+               type = parse_command(line, len);
+               if (type == RULE_NONE) {
+                       if (def == RULE_NONE)
+                               return -1;
+                       type = def;
+                       pattern = line;
+               } else 
+                       pattern = line + len + 1;
+
+               if (*pattern == '\0' && type != RULE_CLEAR)
+                       return -1;
+               if (*pattern != '\0' && type == RULE_CLEAR)
+                       return -1;
+               break;
+       }
+
+       r = get_next_rule();
+       r->type = type;
+       parse_pattern(r, pattern);
+
+       return 0;
+}
+
+void
+parse_file(const char *file, enum rule_type def, int delim)
+{
+       FILE *fp;
+       char *line = NULL;
+       size_t linesize = 0, linenum = 0;
+       ssize_t linelen;
+
+       if ((fp = fopen(file, "r")) == NULL)
+               err(ERR_SYNTAX, "open: %s", file);
+
+       while ((linelen = getdelim(&line, &linesize, delim, fp)) != -1) {
+               linenum++;
+               line[linelen - 1] = '\0';
+               if (parse_rule(line, def) == -1)
+                       errx(ERR_SYNTAX, "syntax error in %s at entry %zu",
+                           file, linenum);
+       }
+
+       free(line);
+       if (ferror(fp))
+               err(ERR_SYNTAX, "failed to parse file %s", file);
+       fclose(fp);
+}
+
+static const char *
+send_command(struct rule *r)
+{
+       static char buf[16];
+       char *b = buf;
+       char *ep = buf + sizeof(buf);
+
+       switch (r->type) {
+       case RULE_EXCLUDE:
+               *b++ = '-';
+               break;
+       case RULE_INCLUDE:
+               *b++ = '+';
+               break;
+       case RULE_CLEAR:
+               *b++ = '!';
+               break;
+#ifdef NOTYET
+       case RULE_MERGE:
+               *b++ = '.';
+               break;
+       case RULE_DIR_MERGE:
+               *b++ = ':';
+               break;
+       case RULE_SHOW:
+               *b++ = 'S';
+               break;
+       case RULE_HIDE:
+               *b++ = 'H';
+               break;
+       case RULE_PROTECT:
+               *b++ = 'P';
+               break;
+       case RULE_RISK:
+               *b++ = 'R';
+               break;
+#endif
+       default:
+               err(ERR_SYNTAX, "unknown rule type %d", r->type);
+       }
+
+#ifdef NOTYET
+       for (i = 0; modifiers[i].modifier != 0; i++) {
+               if (rule->modifiers & modifiers[i].modifier)
+                       *b++ = modifiers[i].sopt;
+               if (b >= ep - 3)
+                       err(ERR_SYNTAX, "rule modifiers overflow");
+       }
+#endif
+       if (b >= ep - 3)
+               err(ERR_SYNTAX, "rule prefix overflow");
+       *b++ = ' ';
+
+       /* include the stripped root '/' for anchored patterns */
+       if (r->anchored)
+               *b++ = '/';
+       *b++ = '\0';
+       return buf;
+}
+
+static const char *
+postfix_command(struct rule *r)
+{
+       static char buf[8];
+
+       buf[0] = '\0';
+       if (r->onlydir)
+               strlcpy(buf, "/", sizeof(buf));
+       if (r->leadingdir)
+               strlcpy(buf, "/***", sizeof(buf));
+
+       return buf;
+}
+
+void
+send_rules(struct sess *sess, int fd)
+{
+       const char *cmd;
+       const char *postfix;
+       struct rule *r;
+       size_t cmdlen, len, postlen, i;
+
+       for (i = 0; i < numrules; i++) {
+               r = &rules[i];
+               cmd = send_command(r);
+               if (cmd == NULL)
+                       err(ERR_PROTOCOL,
+                           "rules are incompatible with remote rsync");
+               postfix = postfix_command(r);
+               cmdlen = strlen(cmd);
+               len = strlen(r->pattern);
+               postlen = strlen(postfix);
+
+               if (!io_write_int(sess, fd, cmdlen + len + postlen))
+                       err(ERR_SOCK_IO, "send rules");
+               if (!io_write_buf(sess, fd, cmd, cmdlen))
+                       err(ERR_SOCK_IO, "send rules");
+               if (!io_write_buf(sess, fd, r->pattern, len))
+                       err(ERR_SOCK_IO, "send rules");
+               /* include the '/' stripped by onlydir */
+               if (postlen > 0)
+                       if (!io_write_buf(sess, fd, postfix, postlen))
+                               err(ERR_SOCK_IO, "send rules");
+       }
+
+       if (!io_write_int(sess, fd, 0))
+               err(ERR_SOCK_IO, "send rules");
+}
+
+void
+recv_rules(struct sess *sess, int fd)
+{
+       char line[8192];
+       size_t len;
+
+       do {
+               if (!io_read_size(sess, fd, &len))
+                       err(ERR_SOCK_IO, "receive rules");
+
+               if (len == 0)
+                       return;
+               if (len >= sizeof(line) - 1)
+                       errx(ERR_SOCK_IO, "received rule too long");
+               if (!io_read_buf(sess, fd, line, len))
+                       err(ERR_SOCK_IO, "receive rules");
+               line[len] = '\0';
+               if (parse_rule(line, RULE_NONE) == -1)
+                       errx(ERR_PROTOCOL, "syntax error in received rules");
+       } while (1);
+}
+
+static inline int
+rule_matched(struct rule *r)
+{
+       /* TODO apply negation once modifiers are added */
+
+       if (r->type == RULE_EXCLUDE)
+               return -1;
+       else
+               return 1;
+}
+
+int
+rules_match(const char *path, int isdir)
+{
+       const char *basename, *p = NULL;
+       struct rule *r;
+       size_t i;
+
+       basename = strrchr(path, '/');  
+       if (basename != NULL)
+               basename += 1;
+       else
+               basename = path;
+       
+       for (i = 0; i < numrules; i++) {
+               r = &rules[i];
+
+               if (r->onlydir && !isdir)
+                       continue;
+
+               if (r->nowild) {
+                       /* fileonly and anchored are mutually exclusive */
+                       if (r->fileonly) {
+                               if (strcmp(basename, r->pattern) == 0)
+                                       return rule_matched(r);
+                       } else if (r->anchored) {
+                               /*
+                                * assumes that neither path nor pattern
+                                * start with a '/'.
+                                */
+                               if (strcmp(path, r->pattern) == 0)
+                                       return rule_matched(r);
+                       } else if (r->leadingdir) {
+                               size_t plen = strlen(r->pattern);
+
+                               p = strstr(path, r->pattern);
+                               /*
+                                * match from start or dir boundary also
+                                * match to end or to dir boundary
+                                */
+                               if (p != NULL && (p == path || p[-1] == '/') &&
+                                   (p[plen] == '\0' || p[plen] == '/'))
+                                       return rule_matched(r);
+                       } else {
+                               size_t len = strlen(path);
+                               size_t plen = strlen(r->pattern);
+
+                               if (len >= plen && strcmp(path + len - plen,
+                                   r->pattern) == 0) {
+                                       /* match all or start on dir boundary */
+                                       if (len == plen ||
+                                           path[len - plen - 1] == '/')
+                                               return rule_matched(r);
+                               }
+                       }
+               } else {
+                       if (r->fileonly) {
+                               p = basename;
+                       } else if (r->anchored || r->numseg == -1) {
+                               /* full path matching */
+                               p = path;
+                       } else {
+                               short nseg = 1;
+
+                               /* match against the last numseg elements */
+                               for (p = path; *p != '\0'; p++)
+                                       if (*p == '/')
+                                               nseg++;
+                               if (nseg < r->numseg) {
+                                       p = NULL;
+                               } else {
+                                       nseg -= r->numseg;
+                                       for (p = path; *p != '\0' && nseg > 0;
+                                           p++) {
+                                               if (*p == '/')
+                                                       nseg--;
+                                       }
+                               }
+                       }
+
+                       if (p != NULL) {
+                               if (rmatch(r->pattern, p, r->leadingdir) == 0)
+                                       return rule_matched(r);
+                       }
+               }
+       }
+
+       return 0;
+}
diff --git a/usr.bin/rsync/sender.c b/usr.bin/rsync/sender.c

index 014d91a..e2999aa 100644 (file)
--- a/usr.bin/rsync/sender.c
+++ b/usr.bin/rsync/sender.c
@@ -1,4 +1,4 @@
-/*     $OpenBSD: sender.c,v 1.29 2021/06/30 13:10:04 claudio Exp $ */
+/*     $OpenBSD: sender.c,v 1.30 2021/08/29 13:43:46 claudio Exp $ */
  /*
   * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
   *
@@ -358,7 +358,7 @@ rsync_sender(struct sess *sess, int fdin,
  {
         struct flist       *fl = NULL;
         const struct flist *f;
-       size_t              i, flsz = 0, phase = 0, excl;
+       size_t              i, flsz = 0, phase = 0;
         int                 rc = 0, c;
         int32_t             idx;
         struct pollfd       pfd[3];
@@ -393,12 +393,8 @@ rsync_sender(struct sess *sess, int fdin,
         }
  
         /* Client sends zero-length exclusions if deleting. */
-
-       if (!sess->opts->server && sess->opts->del &&
-           !io_write_int(sess, fdout, 0)) {
-               ERRX1("io_write_int");
-               goto out;
-       }
+       if (!sess->opts->server && sess->opts->del)
+               send_rules(sess, fdout);
  
         /*
          * Then the file list in any mode.
@@ -427,15 +423,8 @@ rsync_sender(struct sess *sess, int fdin,
          * This is always 0 for now.
          */
  
-       if (sess->opts->server) {
-               if (!io_read_size(sess, fdin, &excl)) {
-                       ERRX1("io_read_size");
-                       goto out;
-               } else if (excl != 0) {
-                       ERRX1("exclusion list is non-empty");
-                       goto out;
-               }
-       }
+       if (sess->opts->server)
+               recv_rules(sess, fdin);
  
         /*
          * Set up our poll events.
author	claudio <claudio@openbsd.org>
	Sun, 29 Aug 2021 13:43:46 +0000 (13:43 +0000)
committer	claudio <claudio@openbsd.org>
	Sun, 29 Aug 2021 13:43:46 +0000 (13:43 +0000)
usr.bin/rsync/Makefile		patch \| blob \| history
usr.bin/rsync/charclass.h	[new file with mode: 0644]	patch \| blob
usr.bin/rsync/extern.h		patch \| blob \| history
usr.bin/rsync/flist.c		patch \| blob \| history
usr.bin/rsync/main.c		patch \| blob \| history
usr.bin/rsync/receiver.c		patch \| blob \| history
usr.bin/rsync/rmatch.c	[new file with mode: 0644]	patch \| blob
usr.bin/rsync/rules.c	[new file with mode: 0644]	patch \| blob
usr.bin/rsync/sender.c		patch \| blob \| history