Add initial support for pattern matching using Lua's pattern matching code.
authorreyk <reyk@openbsd.org>
Tue, 23 Jun 2015 15:23:14 +0000 (15:23 +0000)
committerreyk <reyk@openbsd.org>
Tue, 23 Jun 2015 15:23:14 +0000 (15:23 +0000)
With important help on the pattern matcher from semarie@

OK semarie@

usr.sbin/httpd/Makefile
usr.sbin/httpd/httpd.conf.5
usr.sbin/httpd/httpd.h
usr.sbin/httpd/parse.y
usr.sbin/httpd/patterns.7 [new file with mode: 0644]
usr.sbin/httpd/patterns.c [new file with mode: 0644]
usr.sbin/httpd/patterns.h [new file with mode: 0644]
usr.sbin/httpd/server_http.c

index 885ad42..e01dec1 100644 (file)
@@ -1,4 +1,4 @@
-#      $OpenBSD: Makefile,v 1.27 2015/02/23 10:39:10 reyk Exp $
+#      $OpenBSD: Makefile,v 1.28 2015/06/23 15:23:14 reyk Exp $
 
 PROG=          httpd
 SRCS=          parse.y
@@ -6,6 +6,9 @@ SRCS+=          config.c control.c httpd.c log.c logger.c proc.c
 SRCS+=         server.c server_http.c server_file.c server_fcgi.c
 MAN=           httpd.8 httpd.conf.5
 
+SRCS+=         patterns.c
+MAN+=          patterns.7
+
 LDADD=         -levent -ltls -lssl -lcrypto -lutil
 DPADD=         ${LIBEVENT} ${LIBTLS} ${LIBSSL} ${LIBCRYPTO} ${LIBUTIL}
 #DEBUG=                -g -DDEBUG=3 -O0
index 87866d2..4fe5aef 100644 (file)
@@ -1,4 +1,4 @@
-.\"    $OpenBSD: httpd.conf.5,v 1.61 2015/05/28 19:29:40 jmc Exp $
+.\"    $OpenBSD: httpd.conf.5,v 1.62 2015/06/23 15:23:14 reyk Exp $
 .\"
 .\" Copyright (c) 2014, 2015 Reyk Floeter <reyk@openbsd.org>
 .\"
@@ -14,7 +14,7 @@
 .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
 .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
 .\"
-.Dd $Mdocdate: May 28 2015 $
+.Dd $Mdocdate: June 23 2015 $
 .Dt HTTPD.CONF 5
 .Os
 .Sh NAME
@@ -131,14 +131,38 @@ The configured web servers.
 .Pp
 Each
 .Ic server
-must have a
-.Ar name
-and include one or more lines of the following syntax:
+section starts with a declaration of the server
+.Ar name :
+.Bl -tag -width Ds
+.It Ic server Ar name Brq ...
+Match the server name using shell globbing rules.
+This can be an explicit name,
+.Ar www.example.com ,
+or a name including wildcards,
+.Ar *.example.com .
+.It Ic server match Ar name Brq ...
+Match the server name using pattern matching,
+see
+.Xr patterns 7 .
+.El
+.Pp
+Followed by a block of options that is enclosed in curly brackets:
 .Bl -tag -width Ds
 .It Ic alias Ar name
 Specify an additional alias
 .Ar name
 for this server.
+.It Ic alias match Ar name
+Like the
+.Ic alias
+option,
+but
+.Ic match
+the
+.Ar name
+using pattern matching instead of shell globbing rules,
+see
+.Xr patterns 7 .
 .It Oo Ic no Oc Ic authenticate Oo Ar realm Oc Ic with Pa htpasswd
 Authenticate a remote user for
 .Ar realm
@@ -188,6 +212,12 @@ The configured IP address of the server.
 The configured TCP server port of the server.
 .It Ic $SERVER_NAME
 The name of the server.
+.It Ic Pf % Ar n
+The capture index
+.Ar n
+of a string that was captured by the enclosing
+.Ic location match
+option.
 .El
 .It Ic connection Ar option
 Set the specified options and limits for HTTP connections.
@@ -247,6 +277,22 @@ except
 .Ic location
 and
 .Ic tcp .
+.It Ic location match Ar path Brq ...
+Like the
+.Ic location
+option,
+but
+.Ic match
+the
+.Ar path
+using pattern matching instead of shell globbing rules,
+see
+.Xr patterns 7 .
+The pattern may contain captures that can be used in the
+.Ar uri
+of an enclosed
+.Ic block return
+option.
 .It Oo Ic no Oc Ic log Op Ar option
 Set the specified logging options.
 Logging is enabled by default using the standard
@@ -516,6 +562,7 @@ server "www.example.com" {
 .Ed
 .Sh SEE ALSO
 .Xr htpasswd 1 ,
+.Xr patterns 7 ,
 .Xr httpd 8 ,
 .Xr slowcgi 8
 .Sh AUTHORS
index 1431eaa..20d75a7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: httpd.h,v 1.83 2015/05/20 09:28:47 kettenis Exp $     */
+/*     $OpenBSD: httpd.h,v 1.84 2015/06/23 15:23:14 reyk Exp $ */
 
 /*
  * Copyright (c) 2006 - 2015 Reyk Floeter <reyk@openbsd.org>
@@ -35,6 +35,8 @@
 #include <imsg.h>
 #include <tls.h>
 
+#include "patterns.h"
+
 #define CONF_FILE              "/etc/httpd.conf"
 #define HTTPD_SOCKET           "/var/run/httpd.sock"
 #define HTTPD_USER             "www"
@@ -278,6 +280,7 @@ struct client {
        void                    *clt_srv_conf;
        u_int32_t                clt_srv_id;
        struct sockaddr_storage  clt_srv_ss;
+       struct str_match         clt_srv_match;
 
        int                      clt_s;
        in_port_t                clt_port;
@@ -341,12 +344,15 @@ SPLAY_HEAD(client_tree, client);
 #define SRVFLAG_NO_AUTH                0x00020000
 #define SRVFLAG_BLOCK          0x00040000
 #define SRVFLAG_NO_BLOCK       0x00080000
+#define SRVFLAG_LOCATION_MATCH 0x00100000
+#define SRVFLAG_SERVER_MATCH   0x00200000
 
 #define SRVFLAG_BITS                                                   \
        "\10\01INDEX\02NO_INDEX\03AUTO_INDEX\04NO_AUTO_INDEX"           \
        "\05ROOT\06LOCATION\07FCGI\10NO_FCGI\11LOG\12NO_LOG\13SOCKET"   \
        "\14SYSLOG\15NO_SYSLOG\16TLS\17ACCESS_LOG\20ERROR_LOG"          \
-       "\21AUTH\22NO_AUTH\23BLOCK\24NO_BLOCK"
+       "\21AUTH\22NO_AUTH\23BLOCK\24NO_BLOCK\25LOCATION_MATCH"         \
+       "\26SERVER_MATCH"
 
 #define TCPFLAG_NODELAY                0x01
 #define TCPFLAG_NNODELAY       0x02
index 0aae421..1ba5cf3 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: parse.y,v 1.67 2015/04/01 04:51:15 jsg Exp $  */
+/*     $OpenBSD: parse.y,v 1.68 2015/06/23 15:23:14 reyk Exp $ */
 
 /*
  * Copyright (c) 2007 - 2015 Reyk Floeter <reyk@openbsd.org>
@@ -107,7 +107,7 @@ int          host_if(const char *, struct addresslist *,
 int             host(const char *, struct addresslist *,
                    int, struct portrange *, const char *, int);
 void            host_free(struct addresslist *);
-struct server  *server_inherit(struct server *, const char *,
+struct server  *server_inherit(struct server *, struct server_config *,
                    struct server_config *);
 int             getservice(char *);
 int             is_if_in_group(const char *, const char *);
@@ -131,14 +131,14 @@ typedef struct {
 
 %token ACCESS ALIAS AUTO BACKLOG BODY BUFFER CERTIFICATE CHROOT CIPHERS COMMON
 %token COMBINED CONNECTION DHE DIRECTORY ECDHE ERR FCGI INDEX IP KEY LISTEN
-%token LOCATION LOG LOGDIR MAXIMUM NO NODELAY ON PORT PREFORK PROTOCOLS
+%token LOCATION LOG LOGDIR MATCH MAXIMUM NO NODELAY ON PORT PREFORK PROTOCOLS
 %token REQUEST REQUESTS ROOT SACK SERVER SOCKET STRIP STYLE SYSLOG TCP TIMEOUT
 %token TLS TYPES
 %token ERROR INCLUDE AUTHENTICATE WITH BLOCK DROP RETURN PASS
 %token <v.string>      STRING
 %token  <v.number>     NUMBER
 %type  <v.port>        port
-%type  <v.number>      opttls
+%type  <v.number>      opttls optmatch
 %type  <v.tv>          timeout
 %type  <v.string>      numberstring optstring
 %type  <v.auth>        authopts
@@ -200,26 +200,26 @@ main              : PREFORK NUMBER        {
                }
                ;
 
-server         : SERVER STRING         {
+server         : SERVER optmatch STRING        {
                        struct server   *s;
 
                        if (!loadcfg) {
-                               free($2);
+                               free($3);
                                YYACCEPT;
                        }
 
                        if ((s = calloc(1, sizeof (*s))) == NULL)
                                fatal("out of memory");
 
-                       if (strlcpy(s->srv_conf.name, $2,
+                       if (strlcpy(s->srv_conf.name, $3,
                            sizeof(s->srv_conf.name)) >=
                            sizeof(s->srv_conf.name)) {
                                yyerror("server name truncated");
-                               free($2);
+                               free($3);
                                free(s);
                                YYERROR;
                        }
-                       free($2);
+                       free($3);
 
                        strlcpy(s->srv_conf.root, HTTPD_DOCROOT,
                            sizeof(s->srv_conf.root));
@@ -235,7 +235,9 @@ server              : SERVER STRING         {
                        s->srv_conf.timeout.tv_sec = SERVER_TIMEOUT;
                        s->srv_conf.maxrequests = SERVER_MAXREQUESTS;
                        s->srv_conf.maxrequestbody = SERVER_MAXREQUESTBODY;
-                       s->srv_conf.flags |= SRVFLAG_LOG;
+                       s->srv_conf.flags = SRVFLAG_LOG;
+                       if ($2)
+                               s->srv_conf.flags |= SRVFLAG_SERVER_MATCH;
                        s->srv_conf.logformat = LOG_FORMAT_COMMON;
                        s->srv_conf.tls_protocols = TLS_PROTOCOLS_DEFAULT;
                        if ((s->srv_conf.tls_cert_file =
@@ -334,7 +336,7 @@ server              : SERVER STRING         {
                                                continue;
 
                                        if ((sn = server_inherit(srv,
-                                           b->name, a)) == NULL) {
+                                           b, a)) == NULL) {
                                                serverconfig_free(srv_conf);
                                                free(srv);
                                                YYABORT;
@@ -405,30 +407,35 @@ serveroptsl       : LISTEN ON STRING opttls port {
                        }
 
                        if (alias != NULL) {
+                               /* IP-based; use name match flags from parent */
+                               alias->flags = srv->srv_conf.flags;
                                TAILQ_INSERT_TAIL(&srv->srv_hosts,
                                    alias, entry);
                        }
                }
-               | ALIAS STRING          {
+               | ALIAS optmatch STRING         {
                        struct server_config    *alias;
 
                        if (parentsrv != NULL) {
                                yyerror("alias inside location");
-                               free($2);
+                               free($3);
                                YYERROR;
                        }
 
                        if ((alias = calloc(1, sizeof(*alias))) == NULL)
                                fatal("out of memory");
 
-                       if (strlcpy(alias->name, $2, sizeof(alias->name)) >=
+                       if (strlcpy(alias->name, $3, sizeof(alias->name)) >=
                            sizeof(alias->name)) {
                                yyerror("server alias truncated");
-                               free($2);
+                               free($3);
                                free(alias);
                                YYERROR;
                        }
-                       free($2);
+                       free($3);
+
+                       if ($2)
+                               alias->flags |= SRVFLAG_SERVER_MATCH;
 
                        TAILQ_INSERT_TAIL(&srv->srv_hosts, alias, entry);
                }
@@ -456,38 +463,38 @@ serveroptsl       : LISTEN ON STRING opttls port {
                | fastcgi
                | authenticate
                | filter
-               | LOCATION STRING               {
+               | LOCATION optmatch STRING      {
                        struct server   *s;
 
                        if (srv->srv_conf.ss.ss_family == AF_UNSPEC) {
                                yyerror("listen address not specified");
-                               free($2);
+                               free($3);
                                YYERROR;
                        }
 
                        if (parentsrv != NULL) {
-                               yyerror("location %s inside location", $2);
-                               free($2);
+                               yyerror("location %s inside location", $3);
+                               free($3);
                                YYERROR;
                        }
 
                        if (!loadcfg) {
-                               free($2);
+                               free($3);
                                YYACCEPT;
                        }
 
                        if ((s = calloc(1, sizeof (*s))) == NULL)
                                fatal("out of memory");
 
-                       if (strlcpy(s->srv_conf.location, $2,
+                       if (strlcpy(s->srv_conf.location, $3,
                            sizeof(s->srv_conf.location)) >=
                            sizeof(s->srv_conf.location)) {
                                yyerror("server location truncated");
-                               free($2);
+                               free($3);
                                free(s);
                                YYERROR;
                        }
-                       free($2);
+                       free($3);
 
                        if (strlcpy(s->srv_conf.name, srv->srv_conf.name,
                            sizeof(s->srv_conf.name)) >=
@@ -501,6 +508,8 @@ serveroptsl : LISTEN ON STRING opttls port {
                        /* A location entry uses the parent id */
                        s->srv_conf.parent_id = srv->srv_conf.id;
                        s->srv_conf.flags = SRVFLAG_LOCATION;
+                       if ($2)
+                               s->srv_conf.flags |= SRVFLAG_LOCATION_MATCH;
                        s->srv_s = -1;
                        memcpy(&s->srv_conf.ss, &srv->srv_conf.ss,
                            sizeof(s->srv_conf.ss));
@@ -884,6 +893,10 @@ block              : BLOCK                         {
                }
                ;
 
+optmatch       : /* empty */           { $$ = 0; }
+               | MATCH                 { $$ = 1; }
+               ;
+
 optstring      : /* empty */           { $$ = NULL; }
                | STRING                { $$ = $1; }
                ;
@@ -1108,6 +1121,7 @@ lookup(char *s)
                { "location",           LOCATION },
                { "log",                LOG },
                { "logdir",             LOGDIR },
+               { "match",              MATCH },
                { "max",                MAXIMUM },
                { "no",                 NO },
                { "nodelay",            NODELAY },
@@ -1889,7 +1903,7 @@ host_free(struct addresslist *al)
 }
 
 struct server *
-server_inherit(struct server *src, const char *name,
+server_inherit(struct server *src, struct server_config *alias,
     struct server_config *addr)
 {
        struct server   *dst, *s, *dstl;
@@ -1927,7 +1941,7 @@ server_inherit(struct server *src, const char *name,
        }
 
        /* Now set alias and listen address */
-       strlcpy(dst->srv_conf.name, name, sizeof(dst->srv_conf.name));
+       strlcpy(dst->srv_conf.name, alias->name, sizeof(dst->srv_conf.name));
        memcpy(&dst->srv_conf.ss, &addr->ss, sizeof(dst->srv_conf.ss));
        dst->srv_conf.port = addr->port;
        dst->srv_conf.prefixlen = addr->prefixlen;
@@ -1936,6 +1950,10 @@ server_inherit(struct server *src, const char *name,
        else
                dst->srv_conf.flags &= ~SRVFLAG_TLS;
 
+       /* Don't inherit the "match" option, use it from the alias */
+       dst->srv_conf.flags &= ~SRVFLAG_SERVER_MATCH;
+       dst->srv_conf.flags |= (alias->flags & SRVFLAG_SERVER_MATCH);
+
        if (server_tls_load_keypair(dst) == -1) {
                yyerror("failed to load public/private keys "
                    "for server %s", dst->srv_conf.name);
@@ -1975,7 +1993,8 @@ server_inherit(struct server *src, const char *name,
                        fatal("out of memory");
 
                memcpy(&dstl->srv_conf, &s->srv_conf, sizeof(dstl->srv_conf));
-               strlcpy(dstl->srv_conf.name, name, sizeof(dstl->srv_conf.name));
+               strlcpy(dstl->srv_conf.name, alias->name,
+                   sizeof(dstl->srv_conf.name));
 
                /* Copy the new Id and listen address */
                dstl->srv_conf.id = ++last_server_id;
diff --git a/usr.sbin/httpd/patterns.7 b/usr.sbin/httpd/patterns.7
new file mode 100644 (file)
index 0000000..1ec1592
--- /dev/null
@@ -0,0 +1,305 @@
+.\"    $OpenBSD: patterns.7,v 1.1 2015/06/23 15:23:14 reyk Exp $
+.\"
+.\" Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
+.\" Copyright (C) 1994-2015 Lua.org, PUC-Rio.
+.\"
+.\" Permission is hereby granted, free of charge, to any person obtaining
+.\" a copy of this software and associated documentation files (the
+.\" "Software"), to deal in the Software without restriction, including
+.\" without limitation the rights to use, copy, modify, merge, publish,
+.\" distribute, sublicense, and/or sell copies of the Software, and to
+.\" permit persons to whom the Software is furnished to do so, subject to
+.\" the following conditions:
+.\"
+.\" The above copyright notice and this permission notice shall be
+.\" included in all copies or substantial portions of the Software.
+.\"
+.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+.\" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+.\" MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+.\" IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+.\" CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+.\" TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+.\" SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+.\"
+.\" Derived from section 6.4.1 in manual.html of Lua 5.3.1:
+.\" $Id: patterns.7,v 1.1 2015/06/23 15:23:14 reyk Exp $
+.\"
+.Dd $Mdocdate: June 23 2015 $
+.Dt PATTERNS 7
+.Os
+.Sh NAME
+.Nm patterns
+.Nd Lua's pattern matching rules.
+.Sh DESCRIPTION
+Pattern matching in
+.Xr httpd 8
+is based on the implementation of the Lua scripting language and
+provides a simple and fast alternative to Regular expressions (REs) that
+are described in
+.Xr re_format 7 .
+Patterns are described by regular strings, which are interpreted as
+patterns by the pattern-matching
+.Dq find
+and
+.Dq match
+functions.
+This document describes the syntax and the meaning (that is, what they
+match) of these strings.
+.Sh CHARACTER CLASS
+.Pp
+A character class is used to represent a set of characters.
+The following combinations are allowed in describing a character
+class:
+.Bl -tag -width Ds
+.It Ar x
+(where
+.Ar x
+is not one of the magic characters
+.Sq ^$()%.[]*+-? )
+represents the character
+.Ar x
+itself.
+.It .
+(a dot) represents all characters.
+.It %a
+represents all letters.
+.It %c
+represents all control characters.
+.It %d
+represents all digits.
+.It %g
+represents all printable characters except space.
+.It %l
+represents all lowercase letters.
+.It %p
+represents all punctuation characters.
+.It %s
+represents all space characters.
+.It %u
+represents all uppercase letters.
+.It %w
+represents all alphanumeric characters.
+.It %x
+represents all hexadecimal digits.
+.It Pf % Ar x
+(where
+.Ar x
+is any non-alphanumeric character) represents the character
+.Ar x .
+This is the standard way to escape the magic characters.
+Any non-alphanumeric character (including all punctuation characters,
+even the non-magical) can be preceded by a
+.Eq %
+when used to represent itself in a pattern.
+.It Bq Ar set
+represents the class which is the union of all
+characters in
+.Ar set .
+A range of characters can be specified by separating the end
+characters of the range, in ascending order, with a
+.Sq - .
+All classes
+.Sq Ar %x
+described above can also be used as components in
+.Ar set .
+All other characters in
+.Ar set
+represent themselves.
+For example,
+.Sq [%w_]
+(or
+.Sq [_%w] )
+represents all alphanumeric characters plus the underscore,
+.Sq [0-7]
+represents the octal digits,
+and
+.Sq [0-7%l%-]
+represents the octal digits plus the lowercase letters plus the
+.Sq -
+character.
+.Pp
+The interaction between ranges and classes is not defined.
+Therefore, patterns like
+.Sq [%a-z]
+or
+.Sq [a-%%]
+have no meaning.
+.It Bq Ar ^set
+represents the complement of
+.Ar set ,
+where
+.Ar set
+is interpreted as above.
+.El
+.Pp
+For all classes represented by single letters (
+.Sq %a ,
+.Sq %c ,
+etc.),
+the corresponding uppercase letter represents the complement of the class.
+For instance,
+.Sq %S
+represents all non-space characters.
+.Pp
+The definitions of letter, space, and other character groups depend on
+the current locale.
+In particular, the class
+.Sq [a-z]
+may not be equivalent to
+.Sq %l .
+.Sh PATTERN ITEM
+A  pattern item can be
+.Bl -bullet
+.It
+a single character class, which matches any single character in the class;
+.It
+a single character class followed by
+.Sq * ,
+which matches zero or more repetitions of characters in the class.
+These repetition items will always match the longest possible sequence;
+.It
+a single character class followed by
+.Sq + ,
+which matches one or more repetitions of characters in the class.
+These repetition items will always match the longest possible sequence;
+.It
+a single character class followed by
+.Sq - ,
+which also matches zero or more repetitions of characters in the class.
+Unlike
+.Sq * ,
+these repetition items will always match the shortest possible sequence;
+.It
+a single character class followed by
+.Sq \? ,
+which matches zero or one occurrence of a character in the class.
+It always matches one occurrence if possible;
+.It
+.Sq Pf % Ar n ,
+for
+.Ar n
+between 1 and 9;
+such item matches a substring equal to the n-th captured string (see below);
+.It
+.Sq Pf %b Ar xy ,
+where
+.Ar x
+and
+.Ar y
+are two distinct characters;
+such item matches strings that start with
+.Ar x,
+end with
+.Ar y ,
+and where the
+.Ar x
+and
+.Ar y
+are
+.Em balanced .
+This means that, if one reads the string from left to right, counting
+.Em +1
+for an
+.Ar x
+and
+.Em -1
+for a
+.Ar y ,
+the ending
+.Ar y
+is the first
+.Ar y
+where the count reaches 0.
+For instance, the item
+.Sq %b()
+matches expressions with balanced parentheses.
+.It
+.Sq Pf %f Bq Ar set ,
+a
+.Em frontier pattern ;
+such item matches an empty string at any position such that the next
+character belongs to
+.Ar set
+and the previous character does not belong to
+.Ar set .
+The set
+.Ar set
+is interpreted as previously described.
+The beginning and the end of the subject are handled as if
+they were the character
+.Sq \e0 .
+.El
+.Sh PATTERN
+A pattern is a sequence of pattern items.
+A caret
+.Sq ^
+at the beginning of a pattern anchors the match at the beginning of
+the subject string.
+A
+.Sq \$
+at the end of a pattern anchors the match at the end of the subject string.
+At other positions,
+.Sq ^
+and
+.Sq \$
+have no special meaning and represent themselves.
+.Sh CAPTURES
+A pattern can contain sub-patterns enclosed in parentheses; they
+describe captures.
+When a match succeeds, the substrings of the subject string that match
+captures are stored (captured) for future use.
+Captures are numbered according to their left parentheses.
+For instance, in the pattern
+.Qq (a*(.)%w(%s*)) ,
+the part of the string matching
+.Qq a*(.)%w(%s*)
+is stored as the first capture (and therefore has number 1);
+the character matching
+.So \. Sc
+is captured with number 2,
+and the part matching
+.Qq %s*
+has number 3.
+.Pp
+As a special case, the empty capture
+.Sq ()
+captures the current string position (a number).
+For instance, if we apply the pattern
+.Qq ()aa()
+on the string
+.Qq flaaap ,
+there will be two captures: 3 and 5.
+.Sh SEE ALSO
+.Xr fnmatch 3 ,
+.Xr re_format 3 ,
+.Xr httpd 8 .
+.Rs
+.%A Roberto Ierusalimschy
+.%A Luiz Henrique de Figueiredo
+.%A Waldemar Celes
+.%Q Lua.org
+.%Q PUC-Rio
+.%D June 2015
+.%R Lua 5.3 Reference Manual
+.%T Patterns
+.%U http://www.lua.org/manual/5.3/manual.html#6.4.1
+.Re
+.Sh HISTORY
+The first implementation of the pattern rules were introduced with Lua 2.5.
+Almost twenty years later,
+an implementation based on Lua 5.3.1 appeared in
+.Ox 5.8 .
+.Sh AUTHORS
+The pattern matching is derived from the original implementation of
+the Lua scripting language, that is written by
+.An -nosplit
+.An Roberto Ierusalimschy ,
+.An Waldemar Celes ,
+and
+.An Luiz Henrique de Figueiredo
+at PUC-Rio.
+It was turned into a native C API for
+.Xr httpd 8
+by
+.An Reyk Floeter Aq Mt reyk@openbsd.org .
diff --git a/usr.sbin/httpd/patterns.c b/usr.sbin/httpd/patterns.c
new file mode 100644 (file)
index 0000000..e4e3ab4
--- /dev/null
@@ -0,0 +1,715 @@
+/*     $OpenBSD: patterns.c,v 1.1 2015/06/23 15:23:14 reyk Exp $       */
+
+/*
+ * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
+ * Copyright (C) 1994-2015 Lua.org, PUC-Rio.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * Derived from Lua 5.3.1:
+ * $Id: patterns.c,v 1.1 2015/06/23 15:23:14 reyk Exp $
+ * Standard library for string operations and pattern-matching
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ctype.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "patterns.h"
+
+#define uchar(c)       ((unsigned char)(c)) /* macro to 'unsign' a char */
+#define CAP_UNFINISHED (-1)
+#define CAP_POSITION   (-2)
+#define L_ESC          '%'
+#define SPECIALS       "^$*+?.([%-"
+
+struct match_state {
+       int matchdepth;         /* control for recursive depth (to avoid C
+                                * stack overflow) */
+       int repetitioncounter;  /* control the repetition items */
+       int maxcaptures;        /* configured capture limit */
+       const char *src_init;   /* init of source string */
+       const char *src_end;    /* end ('\0') of source string */
+       const char *p_end;      /* end ('\0') of pattern */
+       const char *error;      /* should be NULL */
+       int level;              /* total number of captures (finished or
+                                * unfinished) */
+       struct {
+               const char *init;
+               ptrdiff_t len;
+       } capture[MAXCAPTURES];
+};
+
+/* recursive function */
+static const char *match(struct match_state *, const char *, const char *);
+
+static int
+match_error(struct match_state *ms, const char *error)
+{
+       ms->error = ms->error == NULL ? error : ms->error;
+       return (-1);
+}
+
+static int
+check_capture(struct match_state *ms, int l)
+{
+       l -= '1';
+       if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED)
+               return match_error(ms, "invalid capture index");
+       return (l);
+}
+
+static int
+capture_to_close(struct match_state *ms)
+{
+       int level = ms->level;
+       for (level--; level >= 0; level--)
+               if (ms->capture[level].len == CAP_UNFINISHED)
+                       return (level);
+       return match_error(ms, "invalid pattern capture");
+}
+
+static const char *
+classend(struct match_state *ms, const char *p)
+{
+       switch (*p++) {
+       case L_ESC:
+               if (p == ms->p_end)
+                       match_error(ms,
+                           "malformed pattern (ends with '%%')");
+               return p + 1;
+       case '[':
+               if (*p == '^')
+                       p++;
+               do {
+                       /* look for a ']' */
+                       if (p == ms->p_end) {
+                               match_error(ms,
+                                   "malformed pattern (missing ']')");
+                               break;
+                       }
+                       if (*(p++) == L_ESC && p < ms->p_end) {
+                               /* skip escapes (e.g. '%]') */
+                               p++;
+                       }
+               } while (*p != ']');
+               return p + 1;
+       default:
+               return p;
+       }
+}
+
+static int
+match_class(int c, int cl)
+{
+       int res;
+       switch (tolower(cl)) {
+       case 'a':
+               res = isalpha(c);
+               break;
+       case 'c':
+               res = iscntrl(c);
+               break;
+       case 'd':
+               res = isdigit(c);
+               break;
+       case 'g':
+               res = isgraph(c);
+               break;
+       case 'l':
+               res = islower(c);
+               break;
+       case 'p':
+               res = ispunct(c);
+               break;
+       case 's':
+               res = isspace(c);
+               break;
+       case 'u':
+               res = isupper(c);
+               break;
+       case 'w':
+               res = isalnum(c);
+               break;
+       case 'x':
+               res = isxdigit(c);
+               break;
+       case 'z':
+               res = (c == 0);
+               break;          /* deprecated option */
+       default:
+               return (cl == c);
+       }
+       return (islower(cl) ? res : !res);
+}
+
+static int
+matchbracketclass(int c, const char *p, const char *ec)
+{
+       int sig = 1;
+       if (*(p + 1) == '^') {
+               sig = 0;
+               /* skip the '^' */
+               p++;
+       }
+       while (++p < ec) {
+               if (*p == L_ESC) {
+                       p++;
+                       if (match_class(c, uchar(*p)))
+                               return sig;
+               } else if ((*(p + 1) == '-') && (p + 2 < ec)) {
+                       p += 2;
+                       if (uchar(*(p - 2)) <= c && c <= uchar(*p))
+                               return sig;
+               } else if (uchar(*p) == c)
+                       return sig;
+       }
+       return !sig;
+}
+
+static int
+singlematch(struct match_state *ms, const char *s, const char *p,
+    const char *ep)
+{
+       if (s >= ms->src_end)
+               return 0;
+       else {
+               int c = uchar(*s);
+               switch (*p) {
+               case '.':
+                       /* matches any char */
+                       return (1);
+               case L_ESC:
+                       return match_class(c, uchar(*(p + 1)));
+               case '[':
+                       return matchbracketclass(c, p, ep - 1);
+               default:
+                       return (uchar(*p) == c);
+               }
+       }
+}
+
+static const char *
+matchbalance(struct match_state *ms, const char *s, const char *p)
+{
+       if (p >= ms->p_end - 1) {
+               match_error(ms,
+                   "malformed pattern (missing arguments to '%b')");
+               return (NULL);
+       }
+       if (*s != *p)
+               return (NULL);
+       else {
+               int b = *p;
+               int e = *(p + 1);
+               int cont = 1;
+               while (++s < ms->src_end) {
+                       if (*s == e) {
+                               if (--cont == 0)
+                                       return s + 1;
+                       } else if (*s == b)
+                               cont++;
+               }
+       }
+
+       /* string ends out of balance */
+       return (NULL);
+}
+
+static const char *
+max_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
+{
+       ptrdiff_t i = 0;
+       /* counts maximum expand for item */
+       while (singlematch(ms, s + i, p, ep))
+               i++;
+       /* keeps trying to match with the maximum repetitions */
+       while (i >= 0) {
+               const char *res = match(ms, (s + i), ep + 1);
+               if (res)
+                       return res;
+               /* else didn't match; reduce 1 repetition to try again */
+               i--;
+       }
+       return NULL;
+}
+
+static const char *
+min_expand(struct match_state *ms, const char *s, const char *p, const char *ep)
+{
+       for (;;) {
+               const char *res = match(ms, s, ep + 1);
+               if (res != NULL)
+                       return res;
+               else if (singlematch(ms, s, p, ep))
+                       s++;    /* try with one more repetition */
+               else
+                       return NULL;
+       }
+}
+
+static const char *
+start_capture(struct match_state *ms, const char *s, const char *p, int what)
+{
+       const char *res;
+
+       int level = ms->level;
+       if (level >= ms->maxcaptures) {
+               match_error(ms, "too many captures");
+               return (NULL);
+       }
+       ms->capture[level].init = s;
+       ms->capture[level].len = what;
+       ms->level = level + 1;
+       /* undo capture if match failed */
+       if ((res = match(ms, s, p)) == NULL)
+               ms->level--;
+       return res;
+}
+
+static const char *
+end_capture(struct match_state *ms, const char *s, const char *p)
+{
+       int l = capture_to_close(ms);
+       const char *res;
+       if (l == -1)
+               return NULL;
+       /* close capture */
+       ms->capture[l].len = s - ms->capture[l].init;
+       /* undo capture if match failed */
+       if ((res = match(ms, s, p)) == NULL)
+               ms->capture[l].len = CAP_UNFINISHED;
+       return res;
+}
+
+static const char *
+match_capture(struct match_state *ms, const char *s, int l)
+{
+       size_t len;
+       l = check_capture(ms, l);
+       if (l == -1)
+               return NULL;
+       len = ms->capture[l].len;
+       if ((size_t) (ms->src_end - s) >= len &&
+           memcmp(ms->capture[l].init, s, len) == 0)
+               return s + len;
+       else
+               return NULL;
+}
+
+static const char *
+match(struct match_state *ms, const char *s, const char *p)
+{
+       const char *ep, *res;
+       char previous;
+
+       if (ms->matchdepth-- == 0) {
+               match_error(ms, "pattern too complex");
+               return (NULL);
+       }
+
+       /* using goto's to optimize tail recursion */
+ init:
+       /* end of pattern? */
+       if (p != ms->p_end) {
+               switch (*p) {
+               case '(':
+                       /* start capture */
+                       if (*(p + 1) == ')')
+                               /* position capture? */
+                               s = start_capture(ms, s, p + 2, CAP_POSITION);
+                       else
+                               s = start_capture(ms, s, p + 1, CAP_UNFINISHED);
+                       break;
+               case ')':
+                       /* end capture */
+                       s = end_capture(ms, s, p + 1);
+                       break;
+               case '$':
+                       /* is the '$' the last char in pattern? */
+                       if ((p + 1) != ms->p_end) {
+                               /* no; go to default */
+                               goto dflt;
+                       }
+                        /* check end of string */
+                       s = (s == ms->src_end) ? s : NULL;
+                       break;
+               case L_ESC:
+                       /* escaped sequences not in the format class[*+?-]? */
+                       switch (*(p + 1)) {
+                       case 'b':
+                               /* balanced string? */
+                               s = matchbalance(ms, s, p + 2);
+                               if (s != NULL) {
+                                       p += 4;
+                                       /* return match(ms, s, p + 4); */
+                                       goto init;
+                               } /* else fail (s == NULL) */
+                               break;
+                       case 'f':
+                               /* frontier? */
+                               p += 2;
+                               if (*p != '[') {
+                                       match_error(ms, "missing '['"
+                                           " after '%f' in pattern");
+                                       break;
+                               }
+                               /* points to what is next */
+                               ep = classend(ms, p);
+                               if (ms->error != NULL)
+                                       break;
+                               previous =
+                                   (s == ms->src_init) ? '\0' : *(s - 1);
+                               if (!matchbracketclass(uchar(previous),
+                                   p, ep - 1) &&
+                                   matchbracketclass(uchar(*s),
+                                   p, ep - 1)) {
+                                       p = ep;
+                                       /* return match(ms, s, ep); */
+                                       goto init;
+                               }
+                               /* match failed */
+                               s = NULL;
+                               break;
+                       case '0':
+                       case '1':
+                       case '2':
+                       case '3':
+                       case '4':
+                       case '5':
+                       case '6':
+                       case '7':
+                       case '8':
+                       case '9':
+                               /* capture results (%0-%9)? */
+                               s = match_capture(ms, s, uchar(*(p + 1)));
+                               if (s != NULL) {
+                                       p += 2;
+                                       /* return match(ms, s, p + 2) */
+                                       goto init;
+                               }
+                               break;
+                       default:
+                               goto dflt;
+                       }
+                       break;
+               default:
+
+                       /* pattern class plus optional suffix */
+       dflt:
+                       /* points to optional suffix */
+                       ep = classend(ms, p);
+                       if (ms->error != NULL)
+                               break;
+
+                       /* does not match at least once? */
+                       if (!singlematch(ms, s, p, ep)) {
+                               if (ms->repetitioncounter-- == 0) {
+                                       match_error(ms, "max repetition items");
+                                       s = NULL; /* fail */
+                               /* accept empty? */
+                               } else if
+                                   (*ep == '*' || *ep == '?' || *ep == '-') {
+                                        p = ep + 1;
+                                       /* return match(ms, s, ep + 1); */
+                                        goto init;
+                               } else {
+                                       /* '+' or no suffix */
+                                       s = NULL; /* fail */
+                               }
+                       } else {
+                               /* matched once */
+                               /* handle optional suffix */
+                               switch (*ep) {
+                               case '?':
+                                       /* optional */
+                                       if ((res =
+                                           match(ms, s + 1, ep + 1)) != NULL)
+                                               s = res;
+                                       else {
+                                               /* 
+                                                * else return
+                                                *     match(ms, s, ep + 1);
+                                                */
+                                               p = ep + 1;
+                                               goto init;
+                                       }
+                                       break;
+                               case '+':
+                                       /* 1 or more repetitions */
+                                       s++; /* 1 match already done */
+                                       /* FALLTHROUGH */
+                               case '*':
+                                       /* 0 or more repetitions */
+                                       s = max_expand(ms, s, p, ep);
+                                       break;
+                               case '-':
+                                       /* 0 or more repetitions (minimum) */
+                                       s = min_expand(ms, s, p, ep);
+                                       break;
+                               default:
+                                       /* no suffix */
+                                       s++;
+                                       p = ep;
+                                       /* return match(ms, s + 1, ep); */
+                                       goto init;
+                               }
+                       }
+                       break;
+               }
+       }
+       ms->matchdepth++;
+       return s;
+}
+
+static const char *
+lmemfind(const char *s1, size_t l1,
+    const char *s2, size_t l2)
+{
+       const char *init;
+
+       if (l2 == 0) {
+               /* empty strings are everywhere */
+               return (s1);
+       } else if (l2 > l1) {
+               /* avoids a negative 'l1' */
+               return (NULL);
+       } else {
+               /*
+                * to search for a '*s2' inside 's1'
+                * - 1st char will be checked by 'memchr'
+                * - 's2' cannot be found after that
+                */
+               l2--;
+               l1 = l1 - l2;
+               while (l1 > 0 &&
+                   (init = (const char *)memchr(s1, *s2, l1)) != NULL) {
+                       /* 1st char is already checked */
+                       init++;
+                       if (memcmp(init, s2 + 1, l2) == 0)
+                               return init - 1;
+                       else {
+                               /* correct 'l1' and 's1' to try again */
+                               l1 -= init - s1;
+                               s1 = init;
+                       }
+               }
+               /* not found */
+               return (NULL);
+       }
+}
+
+static int
+push_onecapture(struct match_state *ms, int i, const char *s,
+    const char *e, struct str_find *sm)
+{
+       if (i >= ms->level) {
+               if (i == 0 || ms->level == 0) {
+                       /* add whole match */
+                       sm->sm_so = (off_t)(s - ms->src_init);
+                       sm->sm_eo = (off_t)(e - s) + sm->sm_so;
+               } else
+                       return match_error(ms, "invalid capture index");
+       } else {
+               ptrdiff_t l = ms->capture[i].len;
+               if (l == CAP_UNFINISHED)
+                       return match_error(ms, "unfinished capture");
+               sm->sm_so = ms->capture[i].init - ms->src_init;
+               sm->sm_eo = sm->sm_so + l;
+       }
+       sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo;
+       return (0);
+}
+
+static int
+push_captures(struct match_state *ms, const char *s, const char *e,
+    struct str_find *sm, size_t nsm)
+{
+       unsigned int i;
+       unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level;
+
+       if (nlevels > nsm)
+               nlevels = nsm;
+       for (i = 0; i < nlevels; i++)
+               if (push_onecapture(ms, i, s, e, sm + i) == -1)
+                       break;
+
+       /* number of strings pushed */
+       return (nlevels);
+}
+
+/* check whether pattern has no special characters */
+static int
+nospecials(const char *p, size_t l)
+{
+       size_t upto = 0;
+
+       do {
+               if (strpbrk(p + upto, SPECIALS)) {
+                       /* pattern has a special character */
+                       return 0;
+               }
+               /* may have more after \0 */
+               upto += strlen(p + upto) + 1;
+       } while (upto <= l);
+
+       /* no special chars found */
+       return (1);
+}
+
+static int
+str_find_aux(struct match_state *ms, const char *pattern, const char *string,
+    struct str_find *sm, size_t nsm, off_t init)
+{
+       size_t           ls = strlen(string);
+       size_t           lp = strlen(pattern);
+       const char      *s = string;
+       const char      *p = pattern;
+       const char      *s1, *s2;
+       int              anchor, i;
+
+       if (init < 0)
+               init = 0;
+       else if (init > (off_t)ls)
+               return match_error(ms, "starting after string's end");
+       s1 = s + init;
+
+       if (nospecials(p, lp)) {
+               /* do a plain search */
+               s2 = lmemfind(s1, ls - (size_t)init, p, lp);
+               if (s2 != NULL) {
+                       i = 0;
+                       sm[i].sm_so = 0;
+                       sm[i].sm_eo = ls;
+                       if (nsm > 1) {
+                               i++;
+                               sm[i].sm_so = s2 - s;
+                               sm[i].sm_eo = (s2 - s) + lp;
+                       }
+                       return (i + 1);
+               }
+               return (0);
+       }
+
+       anchor = (*p == '^');
+       if (anchor) {
+               p++;
+               lp--;   /* skip anchor character */
+       }
+       ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1;
+       ms->matchdepth = MAXCCALLS;
+       ms->repetitioncounter = MAXREPETITION;
+       ms->src_init = s;
+       ms->src_end = s + ls;
+       ms->p_end = p + lp;
+       do {
+               const char *res;
+               ms->level = 0;
+               if ((res = match(ms, s1, p)) != NULL) {
+                       sm->sm_so = 0;
+                       sm->sm_eo = ls;
+                       return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1;
+
+               } else if (ms->error != NULL) {
+                       return 0;
+               }
+       } while (s1++ < ms->src_end && !anchor);
+
+       return 0;
+}
+
+int
+str_find(const char *string, const char *pattern, struct str_find *sm,
+    size_t nsm, const char **errstr)
+{
+       struct match_state      ms;
+       int                     ret;
+
+       memset(&ms, 0, sizeof(ms));
+       memset(sm, 0, nsm * sizeof(*sm));
+
+       ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
+       if (ms.error != NULL) {
+               /* Return 0 on error and store the error string */
+               *errstr = ms.error;
+               ret = 0;
+       } else
+               *errstr = NULL;
+
+       return (ret);
+}
+
+int
+str_match(const char *string, const char *pattern, struct str_match *m,
+    const char **errstr)
+{
+       struct str_find          sm[MAXCAPTURES];
+       struct match_state       ms;
+       int                      ret, i;
+       size_t                   len, nsm;
+
+       nsm = MAXCAPTURES;
+       memset(&ms, 0, sizeof(ms));
+       memset(sm, 0, sizeof(sm));
+       memset(m, 0, sizeof(*m));
+
+       ret = str_find_aux(&ms, pattern, string, sm, nsm, 0);
+       if (ret == 0 || ms.error != NULL) {
+               /* Return -1 on error and store the error string */
+               *errstr = ms.error;
+               return (-1);
+       }
+
+       if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) {
+               *errstr = strerror(errno);
+               return (-1);
+       }
+       m->sm_nmatch = ret;
+
+       for (i = 0; i < ret; i++) {
+               if (sm[i].sm_so > sm[i].sm_eo)
+                       continue;
+               len = sm[i].sm_eo - sm[i].sm_so;
+               if ((m->sm_match[i] = strndup(string +
+                   sm[i].sm_so, len)) == NULL) {
+                       *errstr = strerror(errno);
+                       str_match_free(m);
+                       return (-1);
+               }
+       }
+
+       *errstr = NULL;
+       return (0);
+}
+
+void
+str_match_free(struct str_match *m)
+{
+       unsigned int     i = 0;
+       for (i = 0; i < m->sm_nmatch; i++)
+               free(m->sm_match[i]);
+       free(m->sm_match);
+       m->sm_nmatch = 0;
+}
diff --git a/usr.sbin/httpd/patterns.h b/usr.sbin/httpd/patterns.h
new file mode 100644 (file)
index 0000000..e753849
--- /dev/null
@@ -0,0 +1,47 @@
+/*     $OpenBSD: patterns.h,v 1.1 2015/06/23 15:23:14 reyk Exp $       */
+
+/*
+ * Copyright (c) 2015 Reyk Floeter <reyk@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/types.h>
+#include <sys/cdefs.h>
+
+#ifndef PATTERNS_H
+#define PATTERNS_H
+
+#define MAXCAPTURES    32      /* Max no. of allowed captures in pattern */
+#define MAXCCALLS      200     /* Max recusion depth in pattern matching */
+#define MAXREPETITION  0xfffff /* Max for repetition items */
+
+struct str_find {
+       off_t            sm_so; /* start offset of match */
+       off_t            sm_eo; /* end offset of match */
+};
+
+struct str_match {
+       char            **sm_match; /* allocated array of matched strings */
+       unsigned int     sm_nmatch; /* number of elements in array */
+};
+
+__BEGIN_DECLS
+int     str_find(const char *, const char *, struct str_find *, size_t,
+           const char **);
+int     str_match(const char *, const char *, struct str_match *,
+           const char **);
+void    str_match_free(struct str_match *);
+__END_DECLS
+
+#endif /* PATTERNS_H */
index 37555f8..146b142 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: server_http.c,v 1.82 2015/06/22 11:46:06 reyk Exp $   */
+/*     $OpenBSD: server_http.c,v 1.83 2015/06/23 15:23:14 reyk Exp $   */
 
 /*
  * Copyright (c) 2006 - 2015 Reyk Floeter <reyk@openbsd.org>
 #include <string.h>
 #include <unistd.h>
 #include <limits.h>
+#include <fnmatch.h>
 #include <stdio.h>
 #include <time.h>
 #include <resolv.h>
 #include <event.h>
-#include <fnmatch.h>
+#include <ctype.h>
 
 #include "httpd.h"
 #include "http.h"
+#include "patterns.h"
 
 static int      server_httpmethod_cmp(const void *, const void *);
 static int      server_httperror_cmp(const void *, const void *);
@@ -633,6 +635,7 @@ server_reset_http(struct client *clt)
        clt->clt_remote_user = NULL;
        clt->clt_bev->readcb = server_read_http;
        clt->clt_srv_conf = &srv->srv_conf;
+       str_match_free(&clt->clt_srv_match);
 }
 
 ssize_t
@@ -873,6 +876,8 @@ server_close_http(struct client *clt)
        clt->clt_descresp = NULL;
        free(clt->clt_remote_user);
        clt->clt_remote_user = NULL;
+
+       str_match_free(&clt->clt_srv_match);
 }
 
 char *
@@ -882,11 +887,34 @@ server_expand_http(struct client *clt, const char *val, char *buf,
        struct http_descriptor  *desc = clt->clt_descreq;
        struct server_config    *srv_conf = clt->clt_srv_conf;
        char                     ibuf[128], *str, *path, *query;
-       int                      ret;
+       const char              *errstr = NULL, *p;
+       size_t                   size;
+       int                      n, ret;
 
        if (strlcpy(buf, val, len) >= len)
                return (NULL);
 
+       /* Find previously matched substrings by index */
+       for (p = val; clt->clt_srv_match.sm_nmatch &&
+           (p = strstr(p, "%")) != NULL; p++) {
+               if (!isdigit(*(p + 1)))
+                       continue;
+
+               /* Copy number, leading '%' char and add trailing \0 */
+               size = strspn(p + 1, "0123456789") + 2;
+               if (size  >= sizeof(ibuf))
+                       return (NULL);
+               (void)strlcpy(ibuf, p, size);
+               n = strtonum(ibuf + 1, 0,
+                   clt->clt_srv_match.sm_nmatch - 1, &errstr);
+               if (errstr != NULL)
+                       return (NULL);
+
+               /* Expand variable with matched value */
+               if (expand_string(buf, len, ibuf,
+                   clt->clt_srv_match.sm_match[n]) != 0)
+                       return (NULL);
+       }
        if (strstr(val, "$DOCUMENT_URI") != NULL) {
                if ((path = url_encode(desc->http_path)) == NULL)
                        return (NULL);
@@ -999,8 +1027,10 @@ server_response(struct httpd *httpd, struct client *clt)
        struct server           *srv = clt->clt_srv;
        struct server_config    *srv_conf = &srv->srv_conf;
        struct kv               *kv, key, *host;
-       int                      portval = -1;
+       struct str_find          sm;
+       int                      portval = -1, ret;
        char                    *hostval;
+       const char              *errstr = NULL;
 
        /* Canonicalize the request path */
        if (desc->http_path == NULL ||
@@ -1060,9 +1090,17 @@ server_response(struct httpd *httpd, struct client *clt)
                                    hostname);
                        }
 #endif
-                       if ((srv_conf->flags & SRVFLAG_LOCATION) == 0 &&
-                           fnmatch(srv_conf->name, hostname,
-                           FNM_CASEFOLD) == 0 &&
+                       if (srv_conf->flags & SRVFLAG_LOCATION)
+                               continue;
+                       else if (srv_conf->flags & SRVFLAG_SERVER_MATCH) {
+                               str_find(hostname, srv_conf->name,
+                                   &sm, 1, &errstr);
+                               ret = errstr == NULL ? 0 : -1;
+                       } else {
+                               ret = fnmatch(srv_conf->name,
+                                   hostname, FNM_CASEFOLD);
+                       }
+                       if (ret == 0 &&
                            (portval == -1 ||
                            (portval != -1 && portval == srv_conf->port))) {
                                /* Replace host configuration */
@@ -1132,6 +1170,8 @@ server_getlocation(struct client *clt, const char *path)
 {
        struct server           *srv = clt->clt_srv;
        struct server_config    *srv_conf = clt->clt_srv_conf, *location;
+       const char              *errstr = NULL;
+       int                      ret;
 
        /* Now search for the location */
        TAILQ_FOREACH(location, &srv->srv_hosts, entry) {
@@ -1142,11 +1182,20 @@ server_getlocation(struct client *clt, const char *path)
                }
 #endif
                if ((location->flags & SRVFLAG_LOCATION) &&
-                   location->parent_id == srv_conf->parent_id &&
-                   fnmatch(location->location, path, FNM_CASEFOLD) == 0) {
-                       /* Replace host configuration */
-                       clt->clt_srv_conf = srv_conf = location;
-                       break;
+                   location->parent_id == srv_conf->parent_id) {
+                       errstr = NULL;
+                       if (location->flags & SRVFLAG_LOCATION_MATCH) {
+                               ret = str_match(path, location->location,
+                                   &clt->clt_srv_match, &errstr);
+                       } else {
+                               ret = fnmatch(location->location,
+                                   path, FNM_CASEFOLD);
+                       }
+                       if (ret == 0 && errstr == NULL) {
+                               /* Replace host configuration */
+                               clt->clt_srv_conf = srv_conf = location;
+                               break;
+                       }
                }
        }