Implement --exclude/exclude-file and --include/include-file.
authorclaudio <claudio@openbsd.org>
Sun, 29 Aug 2021 13:43:46 +0000 (13:43 +0000)
committerclaudio <claudio@openbsd.org>
Sun, 29 Aug 2021 13:43:46 +0000 (13:43 +0000)
Currently only simple include and excludes work, the advanced filters
introduced later in rsync are not implemented. It is unclear if the per
directory filters are something we want to implement. This requires
more modern protocols which openrsync is not able to handle right now.

This adds a special matching function to allow the ** matching which behaves
mostly like rsyncs version with the exception of how bad [] patterns are
expanded. For bad patterns openrsync follows more how fnmatch behaves and
not the somewhat strange rsync behaviour.

Not perfect but committing now so people can test and provide feedback.

usr.bin/rsync/Makefile
usr.bin/rsync/charclass.h [new file with mode: 0644]
usr.bin/rsync/extern.h
usr.bin/rsync/flist.c
usr.bin/rsync/main.c
usr.bin/rsync/receiver.c
usr.bin/rsync/rmatch.c [new file with mode: 0644]
usr.bin/rsync/rules.c [new file with mode: 0644]
usr.bin/rsync/sender.c

index d7af8bd..f2e4d46 100644 (file)
@@ -1,9 +1,9 @@
-#      $OpenBSD: Makefile,v 1.10 2019/05/08 21:30:11 benno Exp $
+#      $OpenBSD: Makefile,v 1.11 2021/08/29 13:43:46 claudio Exp $
 
 PROG=  openrsync
 SRCS=  blocks.c client.c downloader.c fargs.c flist.c hash.c ids.c \
-       io.c log.c mkpath.c mktemp.c receiver.c sender.c server.c session.c \
-       socket.c symlinks.c uploader.c main.c misc.c
+       io.c log.c main.c misc.c mkpath.c mktemp.c receiver.c rmatch.c \
+       rules.c sender.c server.c session.c socket.c symlinks.c uploader.c
 LDADD+= -lcrypto -lm
 DPADD+= ${LIBCRYPTO} ${LIBM}
 MAN=   openrsync.1
diff --git a/usr.bin/rsync/charclass.h b/usr.bin/rsync/charclass.h
new file mode 100644 (file)
index 0000000..1c5ff7e
--- /dev/null
@@ -0,0 +1,29 @@
+/*
+ * Public domain, 2008, Todd C. Miller <millert@openbsd.org>
+ *
+ * $OpenBSD: charclass.h,v 1.1 2021/08/29 13:43:46 claudio Exp $
+ */
+
+/*
+ * POSIX character class support for fnmatch() and glob().
+ */
+static const struct cclass {
+       const char *name;
+       int (*isctype)(int);
+} cclasses[] = {
+       { "alnum",      isalnum },
+       { "alpha",      isalpha },
+       { "blank",      isblank },
+       { "cntrl",      iscntrl },
+       { "digit",      isdigit },
+       { "graph",      isgraph },
+       { "lower",      islower },
+       { "print",      isprint },
+       { "punct",      ispunct },
+       { "space",      isspace },
+       { "upper",      isupper },
+       { "xdigit",     isxdigit },
+       { NULL,         NULL }
+};
+
+#define NCCLASSES      (sizeof(cclasses) / sizeof(cclasses[0]) - 1)
index d40188d..4681d47 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: extern.h,v 1.39 2021/06/30 15:24:10 claudio Exp $ */
+/*     $OpenBSD: extern.h,v 1.40 2021/08/29 13:43:46 claudio Exp $ */
 /*
  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -131,12 +131,28 @@ struct    opts {
        int              no_motd;               /* --no-motd */
        int              numeric_ids;           /* --numeric-ids */
        int              one_file_system;       /* -x */
+       int              from0;                 /* -0 */
        char            *rsync_path;            /* --rsync-path */
        char            *ssh_prog;              /* --rsh or -e */
        char            *port;                  /* --port */
        char            *address;               /* --address */
 };
 
+enum rule_type {
+       RULE_NONE,
+       RULE_EXCLUDE,
+       RULE_INCLUDE,
+       RULE_CLEAR,
+#ifdef NOTYET
+       RULE_MERGE,
+       RULE_DIR_MERGE,
+       RULE_SHOW,
+       RULE_HIDE,
+       RULE_PROTECT,
+       RULE_RISK,
+#endif
+};
+
 /*
  * An individual block description for a file.
  * See struct blkset.
@@ -362,6 +378,14 @@ char               *mkstempnodat(int, char *, mode_t, dev_t);
 char           *mkstempsock(const char *, char *);
 int             mktemplate(char **, const char *, int);
 
+int             parse_rule(char *line, enum rule_type);
+void            parse_file(const char *, enum rule_type, int);
+void            send_rules(struct sess *, int);
+void            recv_rules(struct sess *, int);
+int             rules_match(const char *, int);
+
+int             rmatch(const char *, const char *, int);
+
 char           *symlink_read(const char *);
 char           *symlinkat_read(int, const char *);
 
index e33f51b..86cde1d 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: flist.c,v 1.32 2021/06/30 13:10:04 claudio Exp $ */
+/*     $OpenBSD: flist.c,v 1.33 2021/08/29 13:43:46 claudio Exp $ */
 /*
  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
  * Copyright (c) 2019 Florian Obser <florian@openbsd.org>
@@ -823,6 +823,11 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
                ERR("%s: lstat", root);
                return 0;
        } else if (S_ISREG(st.st_mode)) {
+               /* filter files */
+               if (rules_match(root, 0) == -1) {
+                       WARNX("%s: skipping excluded file", root);
+                       return 1;
+               }
                if (!flist_realloc(fl, sz, max)) {
                        ERRX1("flist_realloc");
                        return 0;
@@ -839,7 +844,13 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
                if (!sess->opts->preserve_links) {
                        WARNX("%s: skipping symlink", root);
                        return 1;
-               } else if (!flist_realloc(fl, sz, max)) {
+               }
+               /* filter files */
+               if (rules_match(root, 0) == -1) {
+                       WARNX("%s: skipping excluded symlink", root);
+                       return 1;
+               }
+               if (!flist_realloc(fl, sz, max)) {
                        ERRX1("flist_realloc");
                        return 0;
                }
@@ -942,6 +953,15 @@ flist_gen_dirent(struct sess *sess, char *root, struct flist **fl, size_t *sz,
                        nxdev++;
                }
 
+               /* filter files */
+               if (rules_match(ent->fts_path + stripdir,
+                   (ent->fts_info == FTS_D)) == -1) {
+                       WARNX("%s: skipping excluded file",
+                           ent->fts_path + stripdir);
+                       fts_set(fts, ent, FTS_SKIP);
+                       continue;
+               }
+
                /* Allocate a new file entry. */
 
                if (!flist_realloc(fl, sz, max)) {
@@ -1073,6 +1093,11 @@ flist_gen_files(struct sess *sess, size_t argc, char **argv,
                        continue;
                }
 
+               /* filter files */
+               if (rules_match(argv[i], S_ISDIR(st.st_mode)) == -1) {
+                       WARNX("%s: skipping excluded file", argv[i]);
+                       continue;
+               }
 
                f = &fl[flsz++];
                assert(f != NULL);
@@ -1297,6 +1322,16 @@ flist_gen_dels(struct sess *sess, const char *root, struct flist **fl,
                                continue;
                }
 
+               /* filter files on delete */
+               /* TODO handle --delete-excluded */
+               if (rules_match(ent->fts_path + stripdir,
+                   (ent->fts_info == FTS_D)) == -1) {
+                       WARNX("skip excluded file %s",
+                           ent->fts_path + stripdir);
+                       fts_set(fts, ent, FTS_SKIP);
+                       continue;
+               }
+
                /* Look up in hashtable. */
 
                memset(&hent, 0, sizeof(ENTRY));
index cb4a034..3117462 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: main.c,v 1.56 2021/07/14 11:14:27 claudio Exp $ */
+/*     $OpenBSD: main.c,v 1.57 2021/08/29 13:43:46 claudio Exp $ */
 /*
  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -276,6 +276,10 @@ static struct opts  opts;
 #define OP_RSYNCPATH   1002
 #define OP_TIMEOUT     1003
 #define OP_VERSION     1004
+#define OP_EXCLUDE     1005
+#define OP_INCLUDE     1006
+#define OP_EXCLUDE_FROM        1007
+#define OP_INCLUDE_FROM        1008
 
 const struct option     lopts[] = {
     { "address",       required_argument, NULL,                OP_ADDRESS },
@@ -286,9 +290,15 @@ const struct option         lopts[] = {
     { "devices",       no_argument,    &opts.devices,          1 },
     { "no-devices",    no_argument,    &opts.devices,          0 },
     { "dry-run",       no_argument,    &opts.dry_run,          1 },
+    { "exclude",       required_argument, NULL,                OP_EXCLUDE },
+    { "exclude-from",  required_argument, NULL,                OP_EXCLUDE_FROM },
+    { "from0",         no_argument,    NULL,                   '0' },
+    { "no-from0",      no_argument,    &opts.from0,            0 },
     { "group",         no_argument,    &opts.preserve_gids,    1 },
     { "no-group",      no_argument,    &opts.preserve_gids,    0 },
     { "help",          no_argument,    NULL,                   'h' },
+    { "include",       required_argument, NULL,                OP_INCLUDE },
+    { "include-from",  required_argument, NULL,                OP_INCLUDE_FROM },
     { "links",         no_argument,    &opts.preserve_links,   1 },
     { "no-links",      no_argument,    &opts.preserve_links,   0 },
     { "no-motd",       no_argument,    &opts.no_motd,          1 },
@@ -324,6 +334,7 @@ main(int argc, char *argv[])
        struct fargs    *fargs;
        char            **args;
        const char      *errstr;
+
        /* Global pledge. */
 
        if (pledge("stdio unix rpath wpath cpath dpath inet fattr chown dns getpw proc exec unveil",
@@ -333,6 +344,9 @@ main(int argc, char *argv[])
        while ((c = getopt_long(argc, argv, "Dae:ghlnoprtvxz", lopts, NULL))
            != -1) {
                switch (c) {
+               case '0':
+                       opts.from0 = 1;
+                       break;
                case 'D':
                        opts.devices = 1;
                        opts.specials = 1;
@@ -398,6 +412,24 @@ main(int argc, char *argv[])
                                errx(ERR_SYNTAX, "timeout is %s: %s",
                                    errstr, optarg);
                        break;
+               case OP_EXCLUDE:
+                       if (parse_rule(optarg, RULE_EXCLUDE) == -1)
+                               errx(ERR_SYNTAX, "syntax error in exclude: %s",
+                                   optarg);
+                       break;
+               case OP_INCLUDE:
+                       if (parse_rule(optarg, RULE_INCLUDE) == -1)
+                               errx(ERR_SYNTAX, "syntax error in include: %s",
+                                   optarg);
+                       break;
+               case OP_EXCLUDE_FROM:
+                       parse_file(optarg, RULE_EXCLUDE,
+                           opts.from0 ? '\0' : '\n' );
+                       break;
+               case OP_INCLUDE_FROM:
+                       parse_file(optarg, RULE_INCLUDE,
+                           opts.from0 ? '\0' : '\n' );
+                       break;
                case OP_VERSION:
                        fprintf(stderr, "openrsync: protocol version %u\n",
                            RSYNC_PROTOCOL);
index c731211..6e5b016 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: receiver.c,v 1.28 2021/06/30 13:10:04 claudio Exp $ */
+/*     $OpenBSD: receiver.c,v 1.29 2021/08/29 13:43:46 claudio Exp $ */
 
 /*
  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
@@ -172,7 +172,7 @@ int
 rsync_receiver(struct sess *sess, int fdin, int fdout, const char *root)
 {
        struct flist    *fl = NULL, *dfl = NULL;
-       size_t           i, flsz = 0, dflsz = 0, excl;
+       size_t           i, flsz = 0, dflsz = 0;
        char            *tofree;
        int              rc = 0, dfd = -1, phase = 0, c;
        int32_t          ioerror;
@@ -184,22 +184,13 @@ rsync_receiver(struct sess *sess, int fdin, int fdout, const char *root)
        if (pledge("stdio unix rpath wpath cpath dpath fattr chown getpw unveil", NULL) == -1)
                err(ERR_IPC, "pledge");
 
-       /* Client sends zero-length exclusions. */
+       /* Client sends exclusions. */
+       if (!sess->opts->server)
+               send_rules(sess, fdout);
 
-       if (!sess->opts->server && !io_write_int(sess, fdout, 0)) {
-               ERRX1("io_write_int");
-               goto out;
-       }
-
-       if (sess->opts->server && sess->opts->del) {
-               if (!io_read_size(sess, fdin, &excl)) {
-                       ERRX1("io_read_size");
-                       goto out;
-               } else if (excl != 0) {
-                       ERRX("exclusion list is non-empty");
-                       goto out;
-               }
-       }
+       /* Server receives exclusions if delete is on. */
+       if (sess->opts->server && sess->opts->del)
+               recv_rules(sess, fdin);
 
        /*
         * Start by receiving the file list and our mystery number.
diff --git a/usr.bin/rsync/rmatch.c b/usr.bin/rsync/rmatch.c
new file mode 100644 (file)
index 0000000..b037b80
--- /dev/null
@@ -0,0 +1,395 @@
+/*     $OpenBSD: rmatch.c,v 1.1 2021/08/29 13:43:46 claudio Exp $      */
+
+/*
+ * Copyright (c) 2021 Claudio Jeker <claudio@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * Copyright (c) 1989, 1993, 1994
+ *     The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Guido van Rossum.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <string.h>
+#include <limits.h>
+
+#include "charclass.h"
+
+#define        RANGE_MATCH     1
+#define        RANGE_NOMATCH   0
+#define        RANGE_ERROR     (-1)
+
+static int
+classmatch(const char *pattern, char test, const char **ep)
+{
+       const char *mismatch = pattern;
+       const struct cclass *cc;
+       const char *colon;
+       size_t len;
+       int rval = RANGE_NOMATCH;
+
+       if (*pattern++ != ':') {
+               *ep = mismatch;
+               return RANGE_ERROR;
+       }
+       if ((colon = strchr(pattern, ':')) == NULL || colon[1] != ']') {
+               *ep = mismatch;
+               return RANGE_ERROR;
+       }
+       *ep = colon + 2;
+       len = (size_t)(colon - pattern);
+
+       for (cc = cclasses; cc->name != NULL; cc++) {
+               if (!strncmp(pattern, cc->name, len) && cc->name[len] == '\0') {
+                       if (cc->isctype((unsigned char)test))
+                               rval = RANGE_MATCH;
+                       return rval;
+               }
+       }
+
+       /* invalid character class, treat as normal text */
+       *ep = mismatch;
+       return RANGE_ERROR;
+}
+
+static int
+rangematch(const char **pp, char test)
+{
+       const char *pattern = *pp;
+       int negate, ok;
+       char c, c2;
+
+       /*
+        * A bracket expression starting with an unquoted circumflex
+        * character produces unspecified results (IEEE 1003.2-1992,
+        * 3.13.2).  This implementation treats it like '!', for
+        * consistency with the regular expression syntax.
+        * J.T. Conklin (conklin@ngai.kaleida.com)
+        */
+       if ((negate = (*pattern == '!' || *pattern == '^')))
+               ++pattern;
+
+       /*
+        * A right bracket shall lose its special meaning and represent
+        * itself in a bracket expression if it occurs first in the list.
+        * -- POSIX.2 2.8.3.2
+        */
+       ok = 0;
+       c = *pattern++;
+       do {
+               if (c == '[') {
+                       switch (classmatch(pattern, test, &pattern)) {
+                       case RANGE_MATCH:
+                               ok = 1;
+                               continue;
+                       case RANGE_NOMATCH:
+                               continue;
+                       default:
+                               /* invalid character class, treat litterally. */
+                               break;
+                       }
+               }
+               if (c == '\\')
+                       c = *pattern++;
+               if (c == '\0')
+                       return RANGE_ERROR;
+               /* patterns can not match on '/' */
+               if (c == '/')
+                       return RANGE_NOMATCH;
+               if (*pattern == '-'
+                   && (c2 = *(pattern + 1)) != '\0' && c2 != ']') {
+                       pattern += 2;
+                       if (c2 == '\\')
+                               c2 = *pattern++;
+                       if (c2 == '\0')
+                               return RANGE_ERROR;
+                       if (c <= test && test <= c2)
+                               ok = 1;
+               } else if (c == test)
+                       ok = 1;
+       } while ((c = *pattern++) != ']');
+
+       *pp = pattern;
+       return (ok == negate ? RANGE_NOMATCH : RANGE_MATCH);
+}
+
+/*
+ * Single character match, advances pattern as much as needed.
+ * Return 0 on match and !0 (aka 1) on missmatch.
+ * When matched pp is advanced to the end of the pattern matched.
+ */
+static int
+matchchar(const char **pp, const char in)
+{
+       const char *pattern = *pp;
+       char c;
+       int rv = 0;
+
+       switch (c = *pattern++) {
+       case '?':
+               if (in == '\0')
+                       rv = 1;
+               if (in == '/')
+                       rv = 1;
+               break;
+       case '[':
+               if (in == '\0')
+                       rv = 1;
+               if (in == '/')
+                       rv = 1;
+               if (rv == 1)
+                       break;
+
+               switch (rangematch(&pattern, in)) {
+               case RANGE_ERROR:
+                       /* not a good range, treat as normal text */
+                       goto normal;
+               case RANGE_MATCH:
+                       break;
+               case RANGE_NOMATCH:
+                       rv = 1;
+               }
+               break;
+       case '\\':
+               if ((c = *pattern++) == '\0') {
+                       c = '\\';
+                       --pattern;
+               }
+               /* FALLTHROUGH */
+       default:
+       normal:
+               if (c != in)
+                       rv = 1;
+               break;
+       }
+
+       *pp = pattern;
+       return rv;
+}
+
+/*
+ * Do a substring match. If wild is set then the pattern started with a '*'.
+ * The match will go until '*', '/' or '\0' is encountered in pattern or
+ * the input string is consumed up to end.
+ * The pattern and string handles pp and ss are updated only on success.
+ */
+static int
+matchsub(const char **pp, const char **ss, const char *end, int wild)
+{
+       const char *pattern = *pp;
+       const char *p = pattern;
+       const char *string = *ss;
+       size_t matchlen;
+
+       /* first calculate how many characters the submatch will consume */
+       for (matchlen = 0; *p != '\0'; matchlen++) {
+               if (p[0] == '*')
+                       break;
+               /* '/' acts as barrier */
+               if (p[0] == '/' || (p[0] == '\\' && p[1] == '/')) {
+                       if (wild) {
+                               /* match needs to match up to end of segment */
+                               if (string > end - matchlen)
+                                       return 1;
+                               string = end - matchlen;
+                               wild = 0;
+                       }
+                       break;
+               }
+               /*
+                * skip forward one character in pattern by doing a
+                * dummy lookup.
+                */
+               matchchar(&p, ' ');
+       }
+
+       /* not enough char to match */
+       if (string > end - matchlen)
+               return 1;
+
+       if (*p == '\0') {
+               if (wild) {
+                       /* match needs to match up to end of segment */
+                       string = end - matchlen;
+                       wild = 0;
+               }
+       }
+
+       while (*pattern != '\0' && *pattern != '*') {
+               /* eat possible escape char before '/' */
+               if (pattern[0] == '\\' && pattern[1] == '/')
+                       pattern++;
+               if (pattern[0] == '/')
+                       break;
+
+               /* check if there are still characters available to compare */
+               if (string >= end)
+                       return 1;
+               /* Compare one char at a time. */
+               if (!matchchar(&pattern, *string++))
+                       continue;
+               if (wild) {
+                       /* skip forward one char and restart match */
+                       string = ++*ss;
+                       pattern = *pp;
+                       /* can it still match? */
+                       if (string > end - matchlen)
+                               return 1;
+               } else {
+                       /* failed match */
+                       return 1;
+               }
+       }
+
+       *pp = pattern;
+       *ss = string;
+       return 0;
+}
+
+/*
+ * File matching with the addition of the special '**'.
+ * Returns 0 on match and !0 for strings that do not match pattern.
+ */
+int
+rmatch(const char *pattern, const char *string, int leading_dir)
+{
+       const char *segend, *segnext, *mismatch = NULL;
+       int wild, starstar;
+
+       while (*pattern && *string) {
+
+               /* handle leading '/' first */
+               if (pattern[0] == '\\' && pattern[1] == '/')
+                       pattern++;
+               if (*string == '/' && *pattern == '/') {
+                       string++;
+                       pattern++;
+               }
+
+               /* match to the next '/' in string */
+               segend = strchr(string, '/');
+               if (segend == NULL)
+                       segend = strchr(string, '\0');
+
+               while (*pattern) {
+                       /*
+                        * Check for '*' and '**'. For '*' reduce '*' and '?'
+                        * sequences into n-'?' and trailing '*'.
+                        * For '**' this optimisation can not be done
+                        * since '**???/' will match 'a/aa/aaa/' but not
+                        * 'a/aa/aa/' still additional '*' will be reduced.
+                        */
+                       wild = 0;
+                       starstar = 0;
+                       for ( ; *pattern == '*' || *pattern == '?'; pattern++) {
+                               if (pattern[0] == '*') {
+                                       if (pattern[1] == '*') {
+                                               starstar = 1;
+                                               pattern++;
+                                       }
+                                       wild = 1;
+                               } else if (!starstar) { /* pattern[0] == '?' */
+                                       if (string < segend && *string != '/')
+                                               string++;
+                                       else
+                                               /* no match possible */
+                                               return 1;
+                               } else
+                                       break;
+                       }
+
+                       /* pattern ends in '**' so it is a match */
+                       if (starstar && *pattern == '\0')
+                               return 0;
+
+                       if (starstar) {
+                               segnext = segend;
+                               mismatch = pattern;
+                       }
+
+                       while (string < segend) {
+                               if (matchsub(&pattern, &string, segend, wild)) {
+failed_match:
+                                       /*
+                                        * failed to match, if starstar retry
+                                        * with the next segment.
+                                        */
+                                       if (mismatch) {
+                                               pattern = mismatch;
+                                               wild = 1;
+                                               string = segnext;
+                                               if (*string == '/')
+                                                       string++;
+                                               segend = strchr(string, '/');
+                                               if (!segend)
+                                                       segend = strchr(string,
+                                                           '\0');
+                                               segnext = segend;
+                                               if (string < segend)
+                                                       continue;
+                                       }
+                                       /* no match possible */
+                                       return 1;
+                               }
+                               break;
+                       }
+
+                       /* at end of string segment, eat up any extra '*' */
+                       if (string >= segend && *pattern != '*')
+                               break;
+               }
+               if (*string != '\0' && *string != '/')
+                       goto failed_match;
+               if (*pattern != '\0' && *pattern != '/')
+                       goto failed_match;
+       }
+
+       /* if both pattern and string are consumed it was a match */
+       if (*pattern == '\0' && *string == '\0')
+               return 0;
+       /* if leading_dir is set then string can also be '/' for success */
+       if (leading_dir && *pattern == '\0' && *string == '/')
+               return 0;
+       /* else failure */
+       return 1;
+}
diff --git a/usr.bin/rsync/rules.c b/usr.bin/rsync/rules.c
new file mode 100644 (file)
index 0000000..c34e7d9
--- /dev/null
@@ -0,0 +1,479 @@
+#include <err.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "extern.h"
+
+struct rule {
+       char                    *pattern;
+       enum rule_type          type;
+#ifdef NOTYET
+       unsigned int            modifiers;
+#endif
+       short                    numseg;
+       unsigned char            anchored;
+       unsigned char            fileonly;
+       unsigned char            nowild;
+       unsigned char            onlydir;
+       unsigned char            leadingdir;
+};
+
+static struct rule     *rules;
+static size_t           numrules;      /* number of rules */
+static size_t           rulesz;        /* available size */
+
+/* up to protocol 29 filter rules only support - + ! and no modifiers */
+
+const struct command {
+       enum rule_type          type;
+       char                    sopt;
+       const char              *lopt;
+} commands[] = {
+       { RULE_EXCLUDE,         '-',    "exclude" },
+       { RULE_INCLUDE,         '+',    "include" },
+       { RULE_CLEAR,           '!',    "clear" },
+#ifdef NOTYET
+       { RULE_MERGE,           '.',    "merge" },
+       { RULE_DIR_MERGE,       ':',    "dir-merge" },
+       { RULE_SHOW,            'S',    "show" },
+       { RULE_HIDE,            'H',    "hide" },
+       { RULE_PROTECT,         'P',    "protect" },
+       { RULE_RISK,            'R',    "risk" },
+#endif
+       { 0 }
+};
+
+#ifdef NOTYET
+#define MOD_ABSOLUTE                   0x0001
+#define MOD_NEGATE                     0x0002
+#define MOD_CVSEXCLUDE                 0x0004
+#define MOD_SENDING                    0x0008
+#define MOD_RECEIVING                  0x0010
+#define MOD_PERISHABLE                 0x0020
+#define MOD_XATTR                      0x0040
+#define MOD_MERGE_EXCLUDE              0x0080
+#define MOD_MERGE_INCLUDE              0x0100
+#define MOD_MERGE_CVSCOMPAT            0x0200
+#define MOD_MERGE_EXCLUDE_FILE         0x0400
+#define MOD_MERGE_NO_INHERIT           0x0800
+#define MOD_MERGE_WORDSPLIT            0x1000
+
+/* maybe support absolute and negate */
+const struct modifier {
+       unsigned int            modifier;
+       char                    sopt;
+} modifiers[] = {
+       { MOD_ABSOLUTE,                 '/' },
+       { MOD_NEGATE,                   '!' },
+       { MOD_CVSEXCLUDE,               'C' },
+       { MOD_SENDING,                  's' },
+       { MOD_RECEIVING,                'r' },
+       { MOD_PERISHABLE,               'p' },
+       { MOD_XATTR,                    'x' },
+       /* for '.' and ':' types */
+       { MOD_MERGE_EXCLUDE,            '-' },
+       { MOD_MERGE_INCLUDE,            '+' },
+       { MOD_MERGE_CVSCOMPAT,          'C' },
+       { MOD_MERGE_EXCLUDE_FILE,       'e' },
+       { MOD_MERGE_NO_INHERIT,         'n' },
+       { MOD_MERGE_WORDSPLIT,          'w' },
+       { 0 }
+}
+#endif
+
+static struct rule *
+get_next_rule(void)
+{
+       struct rule *new;
+       size_t newsz;
+
+       if (++numrules > rulesz) {
+               if (rulesz == 0)
+                       newsz = 16;
+               else
+                       newsz = rulesz * 2;
+
+               new = recallocarray(rules, rulesz, newsz, sizeof(*rules));
+               if (new == NULL)
+                       err(ERR_NOMEM, NULL);
+
+               rules = new;
+               rulesz = newsz;
+       }
+
+       return rules + numrules - 1;
+}
+
+static enum rule_type
+parse_command(const char *command, size_t len)
+{
+       const char *mod;
+       size_t  i;
+       
+       mod = memchr(command, ',', len);
+       if (mod != NULL) {
+               /* XXX modifiers not yet implemented */
+               return RULE_NONE;
+       }
+
+       for (i = 0; commands[i].type != RULE_NONE; i++) {
+               if (strncmp(commands[i].lopt, command, len) == 0)
+                       return commands[i].type;
+               if (len == 1 && commands[i].sopt == *command)
+                       return commands[i].type;
+       }
+
+       return RULE_NONE;
+}
+
+static void
+parse_pattern(struct rule *r, char *pattern)
+{
+       size_t plen;
+       char *p;
+       short nseg = 1;
+
+       /*
+        * check for / at start and end of pattern both are special and
+        * can bypass full path matching.
+        */
+       if (*pattern == '/') {
+               pattern++;
+               r->anchored = 1;
+       }
+       plen = strlen(pattern);
+       /*
+        * check for patterns ending in '/' and '/'+'***' and handle them
+        * specially. Because of this and the check above pattern will never
+        * start or end with a '/'.
+        */
+       if (plen > 1 && pattern[plen - 1] == '/') {
+               r->onlydir = 1;
+               pattern[plen - 1] = '\0';
+       }
+       if (plen > 4 && strcmp(pattern + plen - 4, "/***") == 0) {
+               r->leadingdir = 1;
+               pattern[plen - 4] = '\0';
+       }
+
+       /* count how many segments the pattern has. */
+       for (p = pattern; *p != '\0'; p++)
+               if (*p == '/')
+                       nseg++;
+       r->numseg = nseg;
+
+       /* check if this pattern only matches against the basename */ 
+       if (nseg == 1 && !r->anchored)
+               r->fileonly = 1;
+
+       if (strpbrk(pattern, "*?[") == NULL) {
+               /* no wildchar matching */
+               r->nowild = 1;
+       } else {
+               /* requires wildchar matching */
+               if (strstr(pattern, "**") != NULL)
+                       r->numseg = -1;
+       }
+
+       r->pattern = strdup(pattern);
+       if (r->pattern == NULL)
+               err(ERR_NOMEM, NULL);
+}
+
+int
+parse_rule(char *line, enum rule_type def)
+{
+       enum rule_type type;
+       struct rule *r;
+       char *pattern;
+       size_t len;
+
+       switch (*line) {
+       case '#':
+       case ';':
+               /* comment */
+               return 0;
+       case '\0':
+               /* ingore empty lines */
+               return 0;
+       default:
+               len = strcspn(line, " _");
+               type = parse_command(line, len);
+               if (type == RULE_NONE) {
+                       if (def == RULE_NONE)
+                               return -1;
+                       type = def;
+                       pattern = line;
+               } else 
+                       pattern = line + len + 1;
+
+               if (*pattern == '\0' && type != RULE_CLEAR)
+                       return -1;
+               if (*pattern != '\0' && type == RULE_CLEAR)
+                       return -1;
+               break;
+       }
+
+       r = get_next_rule();
+       r->type = type;
+       parse_pattern(r, pattern);
+
+       return 0;
+}
+
+void
+parse_file(const char *file, enum rule_type def, int delim)
+{
+       FILE *fp;
+       char *line = NULL;
+       size_t linesize = 0, linenum = 0;
+       ssize_t linelen;
+
+       if ((fp = fopen(file, "r")) == NULL)
+               err(ERR_SYNTAX, "open: %s", file);
+
+       while ((linelen = getdelim(&line, &linesize, delim, fp)) != -1) {
+               linenum++;
+               line[linelen - 1] = '\0';
+               if (parse_rule(line, def) == -1)
+                       errx(ERR_SYNTAX, "syntax error in %s at entry %zu",
+                           file, linenum);
+       }
+
+       free(line);
+       if (ferror(fp))
+               err(ERR_SYNTAX, "failed to parse file %s", file);
+       fclose(fp);
+}
+
+static const char *
+send_command(struct rule *r)
+{
+       static char buf[16];
+       char *b = buf;
+       char *ep = buf + sizeof(buf);
+
+       switch (r->type) {
+       case RULE_EXCLUDE:
+               *b++ = '-';
+               break;
+       case RULE_INCLUDE:
+               *b++ = '+';
+               break;
+       case RULE_CLEAR:
+               *b++ = '!';
+               break;
+#ifdef NOTYET
+       case RULE_MERGE:
+               *b++ = '.';
+               break;
+       case RULE_DIR_MERGE:
+               *b++ = ':';
+               break;
+       case RULE_SHOW:
+               *b++ = 'S';
+               break;
+       case RULE_HIDE:
+               *b++ = 'H';
+               break;
+       case RULE_PROTECT:
+               *b++ = 'P';
+               break;
+       case RULE_RISK:
+               *b++ = 'R';
+               break;
+#endif
+       default:
+               err(ERR_SYNTAX, "unknown rule type %d", r->type);
+       }
+
+#ifdef NOTYET
+       for (i = 0; modifiers[i].modifier != 0; i++) {
+               if (rule->modifiers & modifiers[i].modifier)
+                       *b++ = modifiers[i].sopt;
+               if (b >= ep - 3)
+                       err(ERR_SYNTAX, "rule modifiers overflow");
+       }
+#endif
+       if (b >= ep - 3)
+               err(ERR_SYNTAX, "rule prefix overflow");
+       *b++ = ' ';
+
+       /* include the stripped root '/' for anchored patterns */
+       if (r->anchored)
+               *b++ = '/';
+       *b++ = '\0';
+       return buf;
+}
+
+static const char *
+postfix_command(struct rule *r)
+{
+       static char buf[8];
+
+       buf[0] = '\0';
+       if (r->onlydir)
+               strlcpy(buf, "/", sizeof(buf));
+       if (r->leadingdir)
+               strlcpy(buf, "/***", sizeof(buf));
+
+       return buf;
+}
+
+void
+send_rules(struct sess *sess, int fd)
+{
+       const char *cmd;
+       const char *postfix;
+       struct rule *r;
+       size_t cmdlen, len, postlen, i;
+
+       for (i = 0; i < numrules; i++) {
+               r = &rules[i];
+               cmd = send_command(r);
+               if (cmd == NULL)
+                       err(ERR_PROTOCOL,
+                           "rules are incompatible with remote rsync");
+               postfix = postfix_command(r);
+               cmdlen = strlen(cmd);
+               len = strlen(r->pattern);
+               postlen = strlen(postfix);
+
+               if (!io_write_int(sess, fd, cmdlen + len + postlen))
+                       err(ERR_SOCK_IO, "send rules");
+               if (!io_write_buf(sess, fd, cmd, cmdlen))
+                       err(ERR_SOCK_IO, "send rules");
+               if (!io_write_buf(sess, fd, r->pattern, len))
+                       err(ERR_SOCK_IO, "send rules");
+               /* include the '/' stripped by onlydir */
+               if (postlen > 0)
+                       if (!io_write_buf(sess, fd, postfix, postlen))
+                               err(ERR_SOCK_IO, "send rules");
+       }
+
+       if (!io_write_int(sess, fd, 0))
+               err(ERR_SOCK_IO, "send rules");
+}
+
+void
+recv_rules(struct sess *sess, int fd)
+{
+       char line[8192];
+       size_t len;
+
+       do {
+               if (!io_read_size(sess, fd, &len))
+                       err(ERR_SOCK_IO, "receive rules");
+
+               if (len == 0)
+                       return;
+               if (len >= sizeof(line) - 1)
+                       errx(ERR_SOCK_IO, "received rule too long");
+               if (!io_read_buf(sess, fd, line, len))
+                       err(ERR_SOCK_IO, "receive rules");
+               line[len] = '\0';
+               if (parse_rule(line, RULE_NONE) == -1)
+                       errx(ERR_PROTOCOL, "syntax error in received rules");
+       } while (1);
+}
+
+static inline int
+rule_matched(struct rule *r)
+{
+       /* TODO apply negation once modifiers are added */
+
+       if (r->type == RULE_EXCLUDE)
+               return -1;
+       else
+               return 1;
+}
+
+int
+rules_match(const char *path, int isdir)
+{
+       const char *basename, *p = NULL;
+       struct rule *r;
+       size_t i;
+
+       basename = strrchr(path, '/');  
+       if (basename != NULL)
+               basename += 1;
+       else
+               basename = path;
+       
+       for (i = 0; i < numrules; i++) {
+               r = &rules[i];
+
+               if (r->onlydir && !isdir)
+                       continue;
+
+               if (r->nowild) {
+                       /* fileonly and anchored are mutually exclusive */
+                       if (r->fileonly) {
+                               if (strcmp(basename, r->pattern) == 0)
+                                       return rule_matched(r);
+                       } else if (r->anchored) {
+                               /*
+                                * assumes that neither path nor pattern
+                                * start with a '/'.
+                                */
+                               if (strcmp(path, r->pattern) == 0)
+                                       return rule_matched(r);
+                       } else if (r->leadingdir) {
+                               size_t plen = strlen(r->pattern);
+
+                               p = strstr(path, r->pattern);
+                               /*
+                                * match from start or dir boundary also
+                                * match to end or to dir boundary
+                                */
+                               if (p != NULL && (p == path || p[-1] == '/') &&
+                                   (p[plen] == '\0' || p[plen] == '/'))
+                                       return rule_matched(r);
+                       } else {
+                               size_t len = strlen(path);
+                               size_t plen = strlen(r->pattern);
+
+                               if (len >= plen && strcmp(path + len - plen,
+                                   r->pattern) == 0) {
+                                       /* match all or start on dir boundary */
+                                       if (len == plen ||
+                                           path[len - plen - 1] == '/')
+                                               return rule_matched(r);
+                               }
+                       }
+               } else {
+                       if (r->fileonly) {
+                               p = basename;
+                       } else if (r->anchored || r->numseg == -1) {
+                               /* full path matching */
+                               p = path;
+                       } else {
+                               short nseg = 1;
+
+                               /* match against the last numseg elements */
+                               for (p = path; *p != '\0'; p++)
+                                       if (*p == '/')
+                                               nseg++;
+                               if (nseg < r->numseg) {
+                                       p = NULL;
+                               } else {
+                                       nseg -= r->numseg;
+                                       for (p = path; *p != '\0' && nseg > 0;
+                                           p++) {
+                                               if (*p == '/')
+                                                       nseg--;
+                                       }
+                               }
+                       }
+
+                       if (p != NULL) {
+                               if (rmatch(r->pattern, p, r->leadingdir) == 0)
+                                       return rule_matched(r);
+                       }
+               }
+       }
+
+       return 0;
+}
index 014d91a..e2999aa 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: sender.c,v 1.29 2021/06/30 13:10:04 claudio Exp $ */
+/*     $OpenBSD: sender.c,v 1.30 2021/08/29 13:43:46 claudio Exp $ */
 /*
  * Copyright (c) 2019 Kristaps Dzonsons <kristaps@bsd.lv>
  *
@@ -358,7 +358,7 @@ rsync_sender(struct sess *sess, int fdin,
 {
        struct flist       *fl = NULL;
        const struct flist *f;
-       size_t              i, flsz = 0, phase = 0, excl;
+       size_t              i, flsz = 0, phase = 0;
        int                 rc = 0, c;
        int32_t             idx;
        struct pollfd       pfd[3];
@@ -393,12 +393,8 @@ rsync_sender(struct sess *sess, int fdin,
        }
 
        /* Client sends zero-length exclusions if deleting. */
-
-       if (!sess->opts->server && sess->opts->del &&
-           !io_write_int(sess, fdout, 0)) {
-               ERRX1("io_write_int");
-               goto out;
-       }
+       if (!sess->opts->server && sess->opts->del)
+               send_rules(sess, fdout);
 
        /*
         * Then the file list in any mode.
@@ -427,15 +423,8 @@ rsync_sender(struct sess *sess, int fdin,
         * This is always 0 for now.
         */
 
-       if (sess->opts->server) {
-               if (!io_read_size(sess, fdin, &excl)) {
-                       ERRX1("io_read_size");
-                       goto out;
-               } else if (excl != 0) {
-                       ERRX1("exclusion list is non-empty");
-                       goto out;
-               }
-       }
+       if (sess->opts->server)
+               recv_rules(sess, fdin);
 
        /*
         * Set up our poll events.