From: reyk Date: Tue, 23 Jun 2015 15:23:14 +0000 (+0000) Subject: Add initial support for pattern matching using Lua's pattern matching code. X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=59355b5a31c0f61278cf322f2e739676bfa744ba;p=openbsd Add initial support for pattern matching using Lua's pattern matching code. With important help on the pattern matcher from semarie@ OK semarie@ --- diff --git a/usr.sbin/httpd/Makefile b/usr.sbin/httpd/Makefile index 885ad42c3b8..e01dec1f3a2 100644 --- a/usr.sbin/httpd/Makefile +++ b/usr.sbin/httpd/Makefile @@ -1,4 +1,4 @@ -# $OpenBSD: Makefile,v 1.27 2015/02/23 10:39:10 reyk Exp $ +# $OpenBSD: Makefile,v 1.28 2015/06/23 15:23:14 reyk Exp $ PROG= httpd SRCS= parse.y @@ -6,6 +6,9 @@ SRCS+= config.c control.c httpd.c log.c logger.c proc.c SRCS+= server.c server_http.c server_file.c server_fcgi.c MAN= httpd.8 httpd.conf.5 +SRCS+= patterns.c +MAN+= patterns.7 + LDADD= -levent -ltls -lssl -lcrypto -lutil DPADD= ${LIBEVENT} ${LIBTLS} ${LIBSSL} ${LIBCRYPTO} ${LIBUTIL} #DEBUG= -g -DDEBUG=3 -O0 diff --git a/usr.sbin/httpd/httpd.conf.5 b/usr.sbin/httpd/httpd.conf.5 index 87866d2f28e..4fe5aefc9a8 100644 --- a/usr.sbin/httpd/httpd.conf.5 +++ b/usr.sbin/httpd/httpd.conf.5 @@ -1,4 +1,4 @@ -.\" $OpenBSD: httpd.conf.5,v 1.61 2015/05/28 19:29:40 jmc Exp $ +.\" $OpenBSD: httpd.conf.5,v 1.62 2015/06/23 15:23:14 reyk Exp $ .\" .\" Copyright (c) 2014, 2015 Reyk Floeter .\" @@ -14,7 +14,7 @@ .\" ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF .\" OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. .\" -.Dd $Mdocdate: May 28 2015 $ +.Dd $Mdocdate: June 23 2015 $ .Dt HTTPD.CONF 5 .Os .Sh NAME @@ -131,14 +131,38 @@ The configured web servers. .Pp Each .Ic server -must have a -.Ar name -and include one or more lines of the following syntax: +section starts with a declaration of the server +.Ar name : +.Bl -tag -width Ds +.It Ic server Ar name Brq ... +Match the server name using shell globbing rules. +This can be an explicit name, +.Ar www.example.com , +or a name including wildcards, +.Ar *.example.com . +.It Ic server match Ar name Brq ... +Match the server name using pattern matching, +see +.Xr patterns 7 . +.El +.Pp +Followed by a block of options that is enclosed in curly brackets: .Bl -tag -width Ds .It Ic alias Ar name Specify an additional alias .Ar name for this server. +.It Ic alias match Ar name +Like the +.Ic alias +option, +but +.Ic match +the +.Ar name +using pattern matching instead of shell globbing rules, +see +.Xr patterns 7 . .It Oo Ic no Oc Ic authenticate Oo Ar realm Oc Ic with Pa htpasswd Authenticate a remote user for .Ar realm @@ -188,6 +212,12 @@ The configured IP address of the server. The configured TCP server port of the server. .It Ic $SERVER_NAME The name of the server. +.It Ic Pf % Ar n +The capture index +.Ar n +of a string that was captured by the enclosing +.Ic location match +option. .El .It Ic connection Ar option Set the specified options and limits for HTTP connections. @@ -247,6 +277,22 @@ except .Ic location and .Ic tcp . +.It Ic location match Ar path Brq ... +Like the +.Ic location +option, +but +.Ic match +the +.Ar path +using pattern matching instead of shell globbing rules, +see +.Xr patterns 7 . +The pattern may contain captures that can be used in the +.Ar uri +of an enclosed +.Ic block return +option. .It Oo Ic no Oc Ic log Op Ar option Set the specified logging options. Logging is enabled by default using the standard @@ -516,6 +562,7 @@ server "www.example.com" { .Ed .Sh SEE ALSO .Xr htpasswd 1 , +.Xr patterns 7 , .Xr httpd 8 , .Xr slowcgi 8 .Sh AUTHORS diff --git a/usr.sbin/httpd/httpd.h b/usr.sbin/httpd/httpd.h index 1431eaa2c9e..20d75a78334 100644 --- a/usr.sbin/httpd/httpd.h +++ b/usr.sbin/httpd/httpd.h @@ -1,4 +1,4 @@ -/* $OpenBSD: httpd.h,v 1.83 2015/05/20 09:28:47 kettenis Exp $ */ +/* $OpenBSD: httpd.h,v 1.84 2015/06/23 15:23:14 reyk Exp $ */ /* * Copyright (c) 2006 - 2015 Reyk Floeter @@ -35,6 +35,8 @@ #include #include +#include "patterns.h" + #define CONF_FILE "/etc/httpd.conf" #define HTTPD_SOCKET "/var/run/httpd.sock" #define HTTPD_USER "www" @@ -278,6 +280,7 @@ struct client { void *clt_srv_conf; u_int32_t clt_srv_id; struct sockaddr_storage clt_srv_ss; + struct str_match clt_srv_match; int clt_s; in_port_t clt_port; @@ -341,12 +344,15 @@ SPLAY_HEAD(client_tree, client); #define SRVFLAG_NO_AUTH 0x00020000 #define SRVFLAG_BLOCK 0x00040000 #define SRVFLAG_NO_BLOCK 0x00080000 +#define SRVFLAG_LOCATION_MATCH 0x00100000 +#define SRVFLAG_SERVER_MATCH 0x00200000 #define SRVFLAG_BITS \ "\10\01INDEX\02NO_INDEX\03AUTO_INDEX\04NO_AUTO_INDEX" \ "\05ROOT\06LOCATION\07FCGI\10NO_FCGI\11LOG\12NO_LOG\13SOCKET" \ "\14SYSLOG\15NO_SYSLOG\16TLS\17ACCESS_LOG\20ERROR_LOG" \ - "\21AUTH\22NO_AUTH\23BLOCK\24NO_BLOCK" + "\21AUTH\22NO_AUTH\23BLOCK\24NO_BLOCK\25LOCATION_MATCH" \ + "\26SERVER_MATCH" #define TCPFLAG_NODELAY 0x01 #define TCPFLAG_NNODELAY 0x02 diff --git a/usr.sbin/httpd/parse.y b/usr.sbin/httpd/parse.y index 0aae42164d2..1ba5cf33cb1 100644 --- a/usr.sbin/httpd/parse.y +++ b/usr.sbin/httpd/parse.y @@ -1,4 +1,4 @@ -/* $OpenBSD: parse.y,v 1.67 2015/04/01 04:51:15 jsg Exp $ */ +/* $OpenBSD: parse.y,v 1.68 2015/06/23 15:23:14 reyk Exp $ */ /* * Copyright (c) 2007 - 2015 Reyk Floeter @@ -107,7 +107,7 @@ int host_if(const char *, struct addresslist *, int host(const char *, struct addresslist *, int, struct portrange *, const char *, int); void host_free(struct addresslist *); -struct server *server_inherit(struct server *, const char *, +struct server *server_inherit(struct server *, struct server_config *, struct server_config *); int getservice(char *); int is_if_in_group(const char *, const char *); @@ -131,14 +131,14 @@ typedef struct { %token ACCESS ALIAS AUTO BACKLOG BODY BUFFER CERTIFICATE CHROOT CIPHERS COMMON %token COMBINED CONNECTION DHE DIRECTORY ECDHE ERR FCGI INDEX IP KEY LISTEN -%token LOCATION LOG LOGDIR MAXIMUM NO NODELAY ON PORT PREFORK PROTOCOLS +%token LOCATION LOG LOGDIR MATCH MAXIMUM NO NODELAY ON PORT PREFORK PROTOCOLS %token REQUEST REQUESTS ROOT SACK SERVER SOCKET STRIP STYLE SYSLOG TCP TIMEOUT %token TLS TYPES %token ERROR INCLUDE AUTHENTICATE WITH BLOCK DROP RETURN PASS %token STRING %token NUMBER %type port -%type opttls +%type opttls optmatch %type timeout %type numberstring optstring %type authopts @@ -200,26 +200,26 @@ main : PREFORK NUMBER { } ; -server : SERVER STRING { +server : SERVER optmatch STRING { struct server *s; if (!loadcfg) { - free($2); + free($3); YYACCEPT; } if ((s = calloc(1, sizeof (*s))) == NULL) fatal("out of memory"); - if (strlcpy(s->srv_conf.name, $2, + if (strlcpy(s->srv_conf.name, $3, sizeof(s->srv_conf.name)) >= sizeof(s->srv_conf.name)) { yyerror("server name truncated"); - free($2); + free($3); free(s); YYERROR; } - free($2); + free($3); strlcpy(s->srv_conf.root, HTTPD_DOCROOT, sizeof(s->srv_conf.root)); @@ -235,7 +235,9 @@ server : SERVER STRING { s->srv_conf.timeout.tv_sec = SERVER_TIMEOUT; s->srv_conf.maxrequests = SERVER_MAXREQUESTS; s->srv_conf.maxrequestbody = SERVER_MAXREQUESTBODY; - s->srv_conf.flags |= SRVFLAG_LOG; + s->srv_conf.flags = SRVFLAG_LOG; + if ($2) + s->srv_conf.flags |= SRVFLAG_SERVER_MATCH; s->srv_conf.logformat = LOG_FORMAT_COMMON; s->srv_conf.tls_protocols = TLS_PROTOCOLS_DEFAULT; if ((s->srv_conf.tls_cert_file = @@ -334,7 +336,7 @@ server : SERVER STRING { continue; if ((sn = server_inherit(srv, - b->name, a)) == NULL) { + b, a)) == NULL) { serverconfig_free(srv_conf); free(srv); YYABORT; @@ -405,30 +407,35 @@ serveroptsl : LISTEN ON STRING opttls port { } if (alias != NULL) { + /* IP-based; use name match flags from parent */ + alias->flags = srv->srv_conf.flags; TAILQ_INSERT_TAIL(&srv->srv_hosts, alias, entry); } } - | ALIAS STRING { + | ALIAS optmatch STRING { struct server_config *alias; if (parentsrv != NULL) { yyerror("alias inside location"); - free($2); + free($3); YYERROR; } if ((alias = calloc(1, sizeof(*alias))) == NULL) fatal("out of memory"); - if (strlcpy(alias->name, $2, sizeof(alias->name)) >= + if (strlcpy(alias->name, $3, sizeof(alias->name)) >= sizeof(alias->name)) { yyerror("server alias truncated"); - free($2); + free($3); free(alias); YYERROR; } - free($2); + free($3); + + if ($2) + alias->flags |= SRVFLAG_SERVER_MATCH; TAILQ_INSERT_TAIL(&srv->srv_hosts, alias, entry); } @@ -456,38 +463,38 @@ serveroptsl : LISTEN ON STRING opttls port { | fastcgi | authenticate | filter - | LOCATION STRING { + | LOCATION optmatch STRING { struct server *s; if (srv->srv_conf.ss.ss_family == AF_UNSPEC) { yyerror("listen address not specified"); - free($2); + free($3); YYERROR; } if (parentsrv != NULL) { - yyerror("location %s inside location", $2); - free($2); + yyerror("location %s inside location", $3); + free($3); YYERROR; } if (!loadcfg) { - free($2); + free($3); YYACCEPT; } if ((s = calloc(1, sizeof (*s))) == NULL) fatal("out of memory"); - if (strlcpy(s->srv_conf.location, $2, + if (strlcpy(s->srv_conf.location, $3, sizeof(s->srv_conf.location)) >= sizeof(s->srv_conf.location)) { yyerror("server location truncated"); - free($2); + free($3); free(s); YYERROR; } - free($2); + free($3); if (strlcpy(s->srv_conf.name, srv->srv_conf.name, sizeof(s->srv_conf.name)) >= @@ -501,6 +508,8 @@ serveroptsl : LISTEN ON STRING opttls port { /* A location entry uses the parent id */ s->srv_conf.parent_id = srv->srv_conf.id; s->srv_conf.flags = SRVFLAG_LOCATION; + if ($2) + s->srv_conf.flags |= SRVFLAG_LOCATION_MATCH; s->srv_s = -1; memcpy(&s->srv_conf.ss, &srv->srv_conf.ss, sizeof(s->srv_conf.ss)); @@ -884,6 +893,10 @@ block : BLOCK { } ; +optmatch : /* empty */ { $$ = 0; } + | MATCH { $$ = 1; } + ; + optstring : /* empty */ { $$ = NULL; } | STRING { $$ = $1; } ; @@ -1108,6 +1121,7 @@ lookup(char *s) { "location", LOCATION }, { "log", LOG }, { "logdir", LOGDIR }, + { "match", MATCH }, { "max", MAXIMUM }, { "no", NO }, { "nodelay", NODELAY }, @@ -1889,7 +1903,7 @@ host_free(struct addresslist *al) } struct server * -server_inherit(struct server *src, const char *name, +server_inherit(struct server *src, struct server_config *alias, struct server_config *addr) { struct server *dst, *s, *dstl; @@ -1927,7 +1941,7 @@ server_inherit(struct server *src, const char *name, } /* Now set alias and listen address */ - strlcpy(dst->srv_conf.name, name, sizeof(dst->srv_conf.name)); + strlcpy(dst->srv_conf.name, alias->name, sizeof(dst->srv_conf.name)); memcpy(&dst->srv_conf.ss, &addr->ss, sizeof(dst->srv_conf.ss)); dst->srv_conf.port = addr->port; dst->srv_conf.prefixlen = addr->prefixlen; @@ -1936,6 +1950,10 @@ server_inherit(struct server *src, const char *name, else dst->srv_conf.flags &= ~SRVFLAG_TLS; + /* Don't inherit the "match" option, use it from the alias */ + dst->srv_conf.flags &= ~SRVFLAG_SERVER_MATCH; + dst->srv_conf.flags |= (alias->flags & SRVFLAG_SERVER_MATCH); + if (server_tls_load_keypair(dst) == -1) { yyerror("failed to load public/private keys " "for server %s", dst->srv_conf.name); @@ -1975,7 +1993,8 @@ server_inherit(struct server *src, const char *name, fatal("out of memory"); memcpy(&dstl->srv_conf, &s->srv_conf, sizeof(dstl->srv_conf)); - strlcpy(dstl->srv_conf.name, name, sizeof(dstl->srv_conf.name)); + strlcpy(dstl->srv_conf.name, alias->name, + sizeof(dstl->srv_conf.name)); /* Copy the new Id and listen address */ dstl->srv_conf.id = ++last_server_id; diff --git a/usr.sbin/httpd/patterns.7 b/usr.sbin/httpd/patterns.7 new file mode 100644 index 00000000000..1ec1592d222 --- /dev/null +++ b/usr.sbin/httpd/patterns.7 @@ -0,0 +1,305 @@ +.\" $OpenBSD: patterns.7,v 1.1 2015/06/23 15:23:14 reyk Exp $ +.\" +.\" Copyright (c) 2015 Reyk Floeter +.\" Copyright (C) 1994-2015 Lua.org, PUC-Rio. +.\" +.\" Permission is hereby granted, free of charge, to any person obtaining +.\" a copy of this software and associated documentation files (the +.\" "Software"), to deal in the Software without restriction, including +.\" without limitation the rights to use, copy, modify, merge, publish, +.\" distribute, sublicense, and/or sell copies of the Software, and to +.\" permit persons to whom the Software is furnished to do so, subject to +.\" the following conditions: +.\" +.\" The above copyright notice and this permission notice shall be +.\" included in all copies or substantial portions of the Software. +.\" +.\" THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +.\" EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +.\" MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +.\" IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +.\" CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +.\" TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +.\" SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +.\" +.\" Derived from section 6.4.1 in manual.html of Lua 5.3.1: +.\" $Id: patterns.7,v 1.1 2015/06/23 15:23:14 reyk Exp $ +.\" +.Dd $Mdocdate: June 23 2015 $ +.Dt PATTERNS 7 +.Os +.Sh NAME +.Nm patterns +.Nd Lua's pattern matching rules. +.Sh DESCRIPTION +Pattern matching in +.Xr httpd 8 +is based on the implementation of the Lua scripting language and +provides a simple and fast alternative to Regular expressions (REs) that +are described in +.Xr re_format 7 . +Patterns are described by regular strings, which are interpreted as +patterns by the pattern-matching +.Dq find +and +.Dq match +functions. +This document describes the syntax and the meaning (that is, what they +match) of these strings. +.Sh CHARACTER CLASS +.Pp +A character class is used to represent a set of characters. +The following combinations are allowed in describing a character +class: +.Bl -tag -width Ds +.It Ar x +(where +.Ar x +is not one of the magic characters +.Sq ^$()%.[]*+-? ) +represents the character +.Ar x +itself. +.It . +(a dot) represents all characters. +.It %a +represents all letters. +.It %c +represents all control characters. +.It %d +represents all digits. +.It %g +represents all printable characters except space. +.It %l +represents all lowercase letters. +.It %p +represents all punctuation characters. +.It %s +represents all space characters. +.It %u +represents all uppercase letters. +.It %w +represents all alphanumeric characters. +.It %x +represents all hexadecimal digits. +.It Pf % Ar x +(where +.Ar x +is any non-alphanumeric character) represents the character +.Ar x . +This is the standard way to escape the magic characters. +Any non-alphanumeric character (including all punctuation characters, +even the non-magical) can be preceded by a +.Eq % +when used to represent itself in a pattern. +.It Bq Ar set +represents the class which is the union of all +characters in +.Ar set . +A range of characters can be specified by separating the end +characters of the range, in ascending order, with a +.Sq - . +All classes +.Sq Ar %x +described above can also be used as components in +.Ar set . +All other characters in +.Ar set +represent themselves. +For example, +.Sq [%w_] +(or +.Sq [_%w] ) +represents all alphanumeric characters plus the underscore, +.Sq [0-7] +represents the octal digits, +and +.Sq [0-7%l%-] +represents the octal digits plus the lowercase letters plus the +.Sq - +character. +.Pp +The interaction between ranges and classes is not defined. +Therefore, patterns like +.Sq [%a-z] +or +.Sq [a-%%] +have no meaning. +.It Bq Ar ^set +represents the complement of +.Ar set , +where +.Ar set +is interpreted as above. +.El +.Pp +For all classes represented by single letters ( +.Sq %a , +.Sq %c , +etc.), +the corresponding uppercase letter represents the complement of the class. +For instance, +.Sq %S +represents all non-space characters. +.Pp +The definitions of letter, space, and other character groups depend on +the current locale. +In particular, the class +.Sq [a-z] +may not be equivalent to +.Sq %l . +.Sh PATTERN ITEM +A pattern item can be +.Bl -bullet +.It +a single character class, which matches any single character in the class; +.It +a single character class followed by +.Sq * , +which matches zero or more repetitions of characters in the class. +These repetition items will always match the longest possible sequence; +.It +a single character class followed by +.Sq + , +which matches one or more repetitions of characters in the class. +These repetition items will always match the longest possible sequence; +.It +a single character class followed by +.Sq - , +which also matches zero or more repetitions of characters in the class. +Unlike +.Sq * , +these repetition items will always match the shortest possible sequence; +.It +a single character class followed by +.Sq \? , +which matches zero or one occurrence of a character in the class. +It always matches one occurrence if possible; +.It +.Sq Pf % Ar n , +for +.Ar n +between 1 and 9; +such item matches a substring equal to the n-th captured string (see below); +.It +.Sq Pf %b Ar xy , +where +.Ar x +and +.Ar y +are two distinct characters; +such item matches strings that start with +.Ar x, +end with +.Ar y , +and where the +.Ar x +and +.Ar y +are +.Em balanced . +This means that, if one reads the string from left to right, counting +.Em +1 +for an +.Ar x +and +.Em -1 +for a +.Ar y , +the ending +.Ar y +is the first +.Ar y +where the count reaches 0. +For instance, the item +.Sq %b() +matches expressions with balanced parentheses. +.It +.Sq Pf %f Bq Ar set , +a +.Em frontier pattern ; +such item matches an empty string at any position such that the next +character belongs to +.Ar set +and the previous character does not belong to +.Ar set . +The set +.Ar set +is interpreted as previously described. +The beginning and the end of the subject are handled as if +they were the character +.Sq \e0 . +.El +.Sh PATTERN +A pattern is a sequence of pattern items. +A caret +.Sq ^ +at the beginning of a pattern anchors the match at the beginning of +the subject string. +A +.Sq \$ +at the end of a pattern anchors the match at the end of the subject string. +At other positions, +.Sq ^ +and +.Sq \$ +have no special meaning and represent themselves. +.Sh CAPTURES +A pattern can contain sub-patterns enclosed in parentheses; they +describe captures. +When a match succeeds, the substrings of the subject string that match +captures are stored (captured) for future use. +Captures are numbered according to their left parentheses. +For instance, in the pattern +.Qq (a*(.)%w(%s*)) , +the part of the string matching +.Qq a*(.)%w(%s*) +is stored as the first capture (and therefore has number 1); +the character matching +.So \. Sc +is captured with number 2, +and the part matching +.Qq %s* +has number 3. +.Pp +As a special case, the empty capture +.Sq () +captures the current string position (a number). +For instance, if we apply the pattern +.Qq ()aa() +on the string +.Qq flaaap , +there will be two captures: 3 and 5. +.Sh SEE ALSO +.Xr fnmatch 3 , +.Xr re_format 3 , +.Xr httpd 8 . +.Rs +.%A Roberto Ierusalimschy +.%A Luiz Henrique de Figueiredo +.%A Waldemar Celes +.%Q Lua.org +.%Q PUC-Rio +.%D June 2015 +.%R Lua 5.3 Reference Manual +.%T Patterns +.%U http://www.lua.org/manual/5.3/manual.html#6.4.1 +.Re +.Sh HISTORY +The first implementation of the pattern rules were introduced with Lua 2.5. +Almost twenty years later, +an implementation based on Lua 5.3.1 appeared in +.Ox 5.8 . +.Sh AUTHORS +The pattern matching is derived from the original implementation of +the Lua scripting language, that is written by +.An -nosplit +.An Roberto Ierusalimschy , +.An Waldemar Celes , +and +.An Luiz Henrique de Figueiredo +at PUC-Rio. +It was turned into a native C API for +.Xr httpd 8 +by +.An Reyk Floeter Aq Mt reyk@openbsd.org . diff --git a/usr.sbin/httpd/patterns.c b/usr.sbin/httpd/patterns.c new file mode 100644 index 00000000000..e4e3ab43cd8 --- /dev/null +++ b/usr.sbin/httpd/patterns.c @@ -0,0 +1,715 @@ +/* $OpenBSD: patterns.c,v 1.1 2015/06/23 15:23:14 reyk Exp $ */ + +/* + * Copyright (c) 2015 Reyk Floeter + * Copyright (C) 1994-2015 Lua.org, PUC-Rio. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * Derived from Lua 5.3.1: + * $Id: patterns.c,v 1.1 2015/06/23 15:23:14 reyk Exp $ + * Standard library for string operations and pattern-matching + */ + +#include +#include +#include +#include +#include +#include + +#include "patterns.h" + +#define uchar(c) ((unsigned char)(c)) /* macro to 'unsign' a char */ +#define CAP_UNFINISHED (-1) +#define CAP_POSITION (-2) +#define L_ESC '%' +#define SPECIALS "^$*+?.([%-" + +struct match_state { + int matchdepth; /* control for recursive depth (to avoid C + * stack overflow) */ + int repetitioncounter; /* control the repetition items */ + int maxcaptures; /* configured capture limit */ + const char *src_init; /* init of source string */ + const char *src_end; /* end ('\0') of source string */ + const char *p_end; /* end ('\0') of pattern */ + const char *error; /* should be NULL */ + int level; /* total number of captures (finished or + * unfinished) */ + struct { + const char *init; + ptrdiff_t len; + } capture[MAXCAPTURES]; +}; + +/* recursive function */ +static const char *match(struct match_state *, const char *, const char *); + +static int +match_error(struct match_state *ms, const char *error) +{ + ms->error = ms->error == NULL ? error : ms->error; + return (-1); +} + +static int +check_capture(struct match_state *ms, int l) +{ + l -= '1'; + if (l < 0 || l >= ms->level || ms->capture[l].len == CAP_UNFINISHED) + return match_error(ms, "invalid capture index"); + return (l); +} + +static int +capture_to_close(struct match_state *ms) +{ + int level = ms->level; + for (level--; level >= 0; level--) + if (ms->capture[level].len == CAP_UNFINISHED) + return (level); + return match_error(ms, "invalid pattern capture"); +} + +static const char * +classend(struct match_state *ms, const char *p) +{ + switch (*p++) { + case L_ESC: + if (p == ms->p_end) + match_error(ms, + "malformed pattern (ends with '%%')"); + return p + 1; + case '[': + if (*p == '^') + p++; + do { + /* look for a ']' */ + if (p == ms->p_end) { + match_error(ms, + "malformed pattern (missing ']')"); + break; + } + if (*(p++) == L_ESC && p < ms->p_end) { + /* skip escapes (e.g. '%]') */ + p++; + } + } while (*p != ']'); + return p + 1; + default: + return p; + } +} + +static int +match_class(int c, int cl) +{ + int res; + switch (tolower(cl)) { + case 'a': + res = isalpha(c); + break; + case 'c': + res = iscntrl(c); + break; + case 'd': + res = isdigit(c); + break; + case 'g': + res = isgraph(c); + break; + case 'l': + res = islower(c); + break; + case 'p': + res = ispunct(c); + break; + case 's': + res = isspace(c); + break; + case 'u': + res = isupper(c); + break; + case 'w': + res = isalnum(c); + break; + case 'x': + res = isxdigit(c); + break; + case 'z': + res = (c == 0); + break; /* deprecated option */ + default: + return (cl == c); + } + return (islower(cl) ? res : !res); +} + +static int +matchbracketclass(int c, const char *p, const char *ec) +{ + int sig = 1; + if (*(p + 1) == '^') { + sig = 0; + /* skip the '^' */ + p++; + } + while (++p < ec) { + if (*p == L_ESC) { + p++; + if (match_class(c, uchar(*p))) + return sig; + } else if ((*(p + 1) == '-') && (p + 2 < ec)) { + p += 2; + if (uchar(*(p - 2)) <= c && c <= uchar(*p)) + return sig; + } else if (uchar(*p) == c) + return sig; + } + return !sig; +} + +static int +singlematch(struct match_state *ms, const char *s, const char *p, + const char *ep) +{ + if (s >= ms->src_end) + return 0; + else { + int c = uchar(*s); + switch (*p) { + case '.': + /* matches any char */ + return (1); + case L_ESC: + return match_class(c, uchar(*(p + 1))); + case '[': + return matchbracketclass(c, p, ep - 1); + default: + return (uchar(*p) == c); + } + } +} + +static const char * +matchbalance(struct match_state *ms, const char *s, const char *p) +{ + if (p >= ms->p_end - 1) { + match_error(ms, + "malformed pattern (missing arguments to '%b')"); + return (NULL); + } + if (*s != *p) + return (NULL); + else { + int b = *p; + int e = *(p + 1); + int cont = 1; + while (++s < ms->src_end) { + if (*s == e) { + if (--cont == 0) + return s + 1; + } else if (*s == b) + cont++; + } + } + + /* string ends out of balance */ + return (NULL); +} + +static const char * +max_expand(struct match_state *ms, const char *s, const char *p, const char *ep) +{ + ptrdiff_t i = 0; + /* counts maximum expand for item */ + while (singlematch(ms, s + i, p, ep)) + i++; + /* keeps trying to match with the maximum repetitions */ + while (i >= 0) { + const char *res = match(ms, (s + i), ep + 1); + if (res) + return res; + /* else didn't match; reduce 1 repetition to try again */ + i--; + } + return NULL; +} + +static const char * +min_expand(struct match_state *ms, const char *s, const char *p, const char *ep) +{ + for (;;) { + const char *res = match(ms, s, ep + 1); + if (res != NULL) + return res; + else if (singlematch(ms, s, p, ep)) + s++; /* try with one more repetition */ + else + return NULL; + } +} + +static const char * +start_capture(struct match_state *ms, const char *s, const char *p, int what) +{ + const char *res; + + int level = ms->level; + if (level >= ms->maxcaptures) { + match_error(ms, "too many captures"); + return (NULL); + } + ms->capture[level].init = s; + ms->capture[level].len = what; + ms->level = level + 1; + /* undo capture if match failed */ + if ((res = match(ms, s, p)) == NULL) + ms->level--; + return res; +} + +static const char * +end_capture(struct match_state *ms, const char *s, const char *p) +{ + int l = capture_to_close(ms); + const char *res; + if (l == -1) + return NULL; + /* close capture */ + ms->capture[l].len = s - ms->capture[l].init; + /* undo capture if match failed */ + if ((res = match(ms, s, p)) == NULL) + ms->capture[l].len = CAP_UNFINISHED; + return res; +} + +static const char * +match_capture(struct match_state *ms, const char *s, int l) +{ + size_t len; + l = check_capture(ms, l); + if (l == -1) + return NULL; + len = ms->capture[l].len; + if ((size_t) (ms->src_end - s) >= len && + memcmp(ms->capture[l].init, s, len) == 0) + return s + len; + else + return NULL; +} + +static const char * +match(struct match_state *ms, const char *s, const char *p) +{ + const char *ep, *res; + char previous; + + if (ms->matchdepth-- == 0) { + match_error(ms, "pattern too complex"); + return (NULL); + } + + /* using goto's to optimize tail recursion */ + init: + /* end of pattern? */ + if (p != ms->p_end) { + switch (*p) { + case '(': + /* start capture */ + if (*(p + 1) == ')') + /* position capture? */ + s = start_capture(ms, s, p + 2, CAP_POSITION); + else + s = start_capture(ms, s, p + 1, CAP_UNFINISHED); + break; + case ')': + /* end capture */ + s = end_capture(ms, s, p + 1); + break; + case '$': + /* is the '$' the last char in pattern? */ + if ((p + 1) != ms->p_end) { + /* no; go to default */ + goto dflt; + } + /* check end of string */ + s = (s == ms->src_end) ? s : NULL; + break; + case L_ESC: + /* escaped sequences not in the format class[*+?-]? */ + switch (*(p + 1)) { + case 'b': + /* balanced string? */ + s = matchbalance(ms, s, p + 2); + if (s != NULL) { + p += 4; + /* return match(ms, s, p + 4); */ + goto init; + } /* else fail (s == NULL) */ + break; + case 'f': + /* frontier? */ + p += 2; + if (*p != '[') { + match_error(ms, "missing '['" + " after '%f' in pattern"); + break; + } + /* points to what is next */ + ep = classend(ms, p); + if (ms->error != NULL) + break; + previous = + (s == ms->src_init) ? '\0' : *(s - 1); + if (!matchbracketclass(uchar(previous), + p, ep - 1) && + matchbracketclass(uchar(*s), + p, ep - 1)) { + p = ep; + /* return match(ms, s, ep); */ + goto init; + } + /* match failed */ + s = NULL; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + /* capture results (%0-%9)? */ + s = match_capture(ms, s, uchar(*(p + 1))); + if (s != NULL) { + p += 2; + /* return match(ms, s, p + 2) */ + goto init; + } + break; + default: + goto dflt; + } + break; + default: + + /* pattern class plus optional suffix */ + dflt: + /* points to optional suffix */ + ep = classend(ms, p); + if (ms->error != NULL) + break; + + /* does not match at least once? */ + if (!singlematch(ms, s, p, ep)) { + if (ms->repetitioncounter-- == 0) { + match_error(ms, "max repetition items"); + s = NULL; /* fail */ + /* accept empty? */ + } else if + (*ep == '*' || *ep == '?' || *ep == '-') { + p = ep + 1; + /* return match(ms, s, ep + 1); */ + goto init; + } else { + /* '+' or no suffix */ + s = NULL; /* fail */ + } + } else { + /* matched once */ + /* handle optional suffix */ + switch (*ep) { + case '?': + /* optional */ + if ((res = + match(ms, s + 1, ep + 1)) != NULL) + s = res; + else { + /* + * else return + * match(ms, s, ep + 1); + */ + p = ep + 1; + goto init; + } + break; + case '+': + /* 1 or more repetitions */ + s++; /* 1 match already done */ + /* FALLTHROUGH */ + case '*': + /* 0 or more repetitions */ + s = max_expand(ms, s, p, ep); + break; + case '-': + /* 0 or more repetitions (minimum) */ + s = min_expand(ms, s, p, ep); + break; + default: + /* no suffix */ + s++; + p = ep; + /* return match(ms, s + 1, ep); */ + goto init; + } + } + break; + } + } + ms->matchdepth++; + return s; +} + +static const char * +lmemfind(const char *s1, size_t l1, + const char *s2, size_t l2) +{ + const char *init; + + if (l2 == 0) { + /* empty strings are everywhere */ + return (s1); + } else if (l2 > l1) { + /* avoids a negative 'l1' */ + return (NULL); + } else { + /* + * to search for a '*s2' inside 's1' + * - 1st char will be checked by 'memchr' + * - 's2' cannot be found after that + */ + l2--; + l1 = l1 - l2; + while (l1 > 0 && + (init = (const char *)memchr(s1, *s2, l1)) != NULL) { + /* 1st char is already checked */ + init++; + if (memcmp(init, s2 + 1, l2) == 0) + return init - 1; + else { + /* correct 'l1' and 's1' to try again */ + l1 -= init - s1; + s1 = init; + } + } + /* not found */ + return (NULL); + } +} + +static int +push_onecapture(struct match_state *ms, int i, const char *s, + const char *e, struct str_find *sm) +{ + if (i >= ms->level) { + if (i == 0 || ms->level == 0) { + /* add whole match */ + sm->sm_so = (off_t)(s - ms->src_init); + sm->sm_eo = (off_t)(e - s) + sm->sm_so; + } else + return match_error(ms, "invalid capture index"); + } else { + ptrdiff_t l = ms->capture[i].len; + if (l == CAP_UNFINISHED) + return match_error(ms, "unfinished capture"); + sm->sm_so = ms->capture[i].init - ms->src_init; + sm->sm_eo = sm->sm_so + l; + } + sm->sm_eo = sm->sm_eo < sm->sm_so ? sm->sm_so : sm->sm_eo; + return (0); +} + +static int +push_captures(struct match_state *ms, const char *s, const char *e, + struct str_find *sm, size_t nsm) +{ + unsigned int i; + unsigned int nlevels = (ms->level <= 0 && s) ? 1 : ms->level; + + if (nlevels > nsm) + nlevels = nsm; + for (i = 0; i < nlevels; i++) + if (push_onecapture(ms, i, s, e, sm + i) == -1) + break; + + /* number of strings pushed */ + return (nlevels); +} + +/* check whether pattern has no special characters */ +static int +nospecials(const char *p, size_t l) +{ + size_t upto = 0; + + do { + if (strpbrk(p + upto, SPECIALS)) { + /* pattern has a special character */ + return 0; + } + /* may have more after \0 */ + upto += strlen(p + upto) + 1; + } while (upto <= l); + + /* no special chars found */ + return (1); +} + +static int +str_find_aux(struct match_state *ms, const char *pattern, const char *string, + struct str_find *sm, size_t nsm, off_t init) +{ + size_t ls = strlen(string); + size_t lp = strlen(pattern); + const char *s = string; + const char *p = pattern; + const char *s1, *s2; + int anchor, i; + + if (init < 0) + init = 0; + else if (init > (off_t)ls) + return match_error(ms, "starting after string's end"); + s1 = s + init; + + if (nospecials(p, lp)) { + /* do a plain search */ + s2 = lmemfind(s1, ls - (size_t)init, p, lp); + if (s2 != NULL) { + i = 0; + sm[i].sm_so = 0; + sm[i].sm_eo = ls; + if (nsm > 1) { + i++; + sm[i].sm_so = s2 - s; + sm[i].sm_eo = (s2 - s) + lp; + } + return (i + 1); + } + return (0); + } + + anchor = (*p == '^'); + if (anchor) { + p++; + lp--; /* skip anchor character */ + } + ms->maxcaptures = (nsm > MAXCAPTURES ? MAXCAPTURES : nsm) - 1; + ms->matchdepth = MAXCCALLS; + ms->repetitioncounter = MAXREPETITION; + ms->src_init = s; + ms->src_end = s + ls; + ms->p_end = p + lp; + do { + const char *res; + ms->level = 0; + if ((res = match(ms, s1, p)) != NULL) { + sm->sm_so = 0; + sm->sm_eo = ls; + return push_captures(ms, s1, res, sm + 1, nsm - 1) + 1; + + } else if (ms->error != NULL) { + return 0; + } + } while (s1++ < ms->src_end && !anchor); + + return 0; +} + +int +str_find(const char *string, const char *pattern, struct str_find *sm, + size_t nsm, const char **errstr) +{ + struct match_state ms; + int ret; + + memset(&ms, 0, sizeof(ms)); + memset(sm, 0, nsm * sizeof(*sm)); + + ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); + if (ms.error != NULL) { + /* Return 0 on error and store the error string */ + *errstr = ms.error; + ret = 0; + } else + *errstr = NULL; + + return (ret); +} + +int +str_match(const char *string, const char *pattern, struct str_match *m, + const char **errstr) +{ + struct str_find sm[MAXCAPTURES]; + struct match_state ms; + int ret, i; + size_t len, nsm; + + nsm = MAXCAPTURES; + memset(&ms, 0, sizeof(ms)); + memset(sm, 0, sizeof(sm)); + memset(m, 0, sizeof(*m)); + + ret = str_find_aux(&ms, pattern, string, sm, nsm, 0); + if (ret == 0 || ms.error != NULL) { + /* Return -1 on error and store the error string */ + *errstr = ms.error; + return (-1); + } + + if ((m->sm_match = calloc(ret, sizeof(char *))) == NULL) { + *errstr = strerror(errno); + return (-1); + } + m->sm_nmatch = ret; + + for (i = 0; i < ret; i++) { + if (sm[i].sm_so > sm[i].sm_eo) + continue; + len = sm[i].sm_eo - sm[i].sm_so; + if ((m->sm_match[i] = strndup(string + + sm[i].sm_so, len)) == NULL) { + *errstr = strerror(errno); + str_match_free(m); + return (-1); + } + } + + *errstr = NULL; + return (0); +} + +void +str_match_free(struct str_match *m) +{ + unsigned int i = 0; + for (i = 0; i < m->sm_nmatch; i++) + free(m->sm_match[i]); + free(m->sm_match); + m->sm_nmatch = 0; +} diff --git a/usr.sbin/httpd/patterns.h b/usr.sbin/httpd/patterns.h new file mode 100644 index 00000000000..e753849eaa3 --- /dev/null +++ b/usr.sbin/httpd/patterns.h @@ -0,0 +1,47 @@ +/* $OpenBSD: patterns.h,v 1.1 2015/06/23 15:23:14 reyk Exp $ */ + +/* + * Copyright (c) 2015 Reyk Floeter + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include + +#ifndef PATTERNS_H +#define PATTERNS_H + +#define MAXCAPTURES 32 /* Max no. of allowed captures in pattern */ +#define MAXCCALLS 200 /* Max recusion depth in pattern matching */ +#define MAXREPETITION 0xfffff /* Max for repetition items */ + +struct str_find { + off_t sm_so; /* start offset of match */ + off_t sm_eo; /* end offset of match */ +}; + +struct str_match { + char **sm_match; /* allocated array of matched strings */ + unsigned int sm_nmatch; /* number of elements in array */ +}; + +__BEGIN_DECLS +int str_find(const char *, const char *, struct str_find *, size_t, + const char **); +int str_match(const char *, const char *, struct str_match *, + const char **); +void str_match_free(struct str_match *); +__END_DECLS + +#endif /* PATTERNS_H */ diff --git a/usr.sbin/httpd/server_http.c b/usr.sbin/httpd/server_http.c index 37555f84a36..146b1420350 100644 --- a/usr.sbin/httpd/server_http.c +++ b/usr.sbin/httpd/server_http.c @@ -1,4 +1,4 @@ -/* $OpenBSD: server_http.c,v 1.82 2015/06/22 11:46:06 reyk Exp $ */ +/* $OpenBSD: server_http.c,v 1.83 2015/06/23 15:23:14 reyk Exp $ */ /* * Copyright (c) 2006 - 2015 Reyk Floeter @@ -29,14 +29,16 @@ #include #include #include +#include #include #include #include #include -#include +#include #include "httpd.h" #include "http.h" +#include "patterns.h" static int server_httpmethod_cmp(const void *, const void *); static int server_httperror_cmp(const void *, const void *); @@ -633,6 +635,7 @@ server_reset_http(struct client *clt) clt->clt_remote_user = NULL; clt->clt_bev->readcb = server_read_http; clt->clt_srv_conf = &srv->srv_conf; + str_match_free(&clt->clt_srv_match); } ssize_t @@ -873,6 +876,8 @@ server_close_http(struct client *clt) clt->clt_descresp = NULL; free(clt->clt_remote_user); clt->clt_remote_user = NULL; + + str_match_free(&clt->clt_srv_match); } char * @@ -882,11 +887,34 @@ server_expand_http(struct client *clt, const char *val, char *buf, struct http_descriptor *desc = clt->clt_descreq; struct server_config *srv_conf = clt->clt_srv_conf; char ibuf[128], *str, *path, *query; - int ret; + const char *errstr = NULL, *p; + size_t size; + int n, ret; if (strlcpy(buf, val, len) >= len) return (NULL); + /* Find previously matched substrings by index */ + for (p = val; clt->clt_srv_match.sm_nmatch && + (p = strstr(p, "%")) != NULL; p++) { + if (!isdigit(*(p + 1))) + continue; + + /* Copy number, leading '%' char and add trailing \0 */ + size = strspn(p + 1, "0123456789") + 2; + if (size >= sizeof(ibuf)) + return (NULL); + (void)strlcpy(ibuf, p, size); + n = strtonum(ibuf + 1, 0, + clt->clt_srv_match.sm_nmatch - 1, &errstr); + if (errstr != NULL) + return (NULL); + + /* Expand variable with matched value */ + if (expand_string(buf, len, ibuf, + clt->clt_srv_match.sm_match[n]) != 0) + return (NULL); + } if (strstr(val, "$DOCUMENT_URI") != NULL) { if ((path = url_encode(desc->http_path)) == NULL) return (NULL); @@ -999,8 +1027,10 @@ server_response(struct httpd *httpd, struct client *clt) struct server *srv = clt->clt_srv; struct server_config *srv_conf = &srv->srv_conf; struct kv *kv, key, *host; - int portval = -1; + struct str_find sm; + int portval = -1, ret; char *hostval; + const char *errstr = NULL; /* Canonicalize the request path */ if (desc->http_path == NULL || @@ -1060,9 +1090,17 @@ server_response(struct httpd *httpd, struct client *clt) hostname); } #endif - if ((srv_conf->flags & SRVFLAG_LOCATION) == 0 && - fnmatch(srv_conf->name, hostname, - FNM_CASEFOLD) == 0 && + if (srv_conf->flags & SRVFLAG_LOCATION) + continue; + else if (srv_conf->flags & SRVFLAG_SERVER_MATCH) { + str_find(hostname, srv_conf->name, + &sm, 1, &errstr); + ret = errstr == NULL ? 0 : -1; + } else { + ret = fnmatch(srv_conf->name, + hostname, FNM_CASEFOLD); + } + if (ret == 0 && (portval == -1 || (portval != -1 && portval == srv_conf->port))) { /* Replace host configuration */ @@ -1132,6 +1170,8 @@ server_getlocation(struct client *clt, const char *path) { struct server *srv = clt->clt_srv; struct server_config *srv_conf = clt->clt_srv_conf, *location; + const char *errstr = NULL; + int ret; /* Now search for the location */ TAILQ_FOREACH(location, &srv->srv_hosts, entry) { @@ -1142,11 +1182,20 @@ server_getlocation(struct client *clt, const char *path) } #endif if ((location->flags & SRVFLAG_LOCATION) && - location->parent_id == srv_conf->parent_id && - fnmatch(location->location, path, FNM_CASEFOLD) == 0) { - /* Replace host configuration */ - clt->clt_srv_conf = srv_conf = location; - break; + location->parent_id == srv_conf->parent_id) { + errstr = NULL; + if (location->flags & SRVFLAG_LOCATION_MATCH) { + ret = str_match(path, location->location, + &clt->clt_srv_match, &errstr); + } else { + ret = fnmatch(location->location, + path, FNM_CASEFOLD); + } + if (ret == 0 && errstr == NULL) { + /* Replace host configuration */ + clt->clt_srv_conf = srv_conf = location; + break; + } } }