update the gre driver.
authordlg <dlg@openbsd.org>
Wed, 7 Feb 2018 22:30:59 +0000 (22:30 +0000)
committerdlg <dlg@openbsd.org>
Wed, 7 Feb 2018 22:30:59 +0000 (22:30 +0000)
the main new feature is gre keys, supported by the vnetid ioctls.
this also adds support for gre over ipv6, the use of hfsc, and
allows tx mitigation in the future.

this diff removes keepalive support, but i promised claudio@ and
patrick@ i would put it back after this goes in.

ok claudio@

sys/net/if_gre.c
sys/net/if_gre.h
sys/netinet/ip_gre.c
sys/netinet/ip_gre.h
sys/netinet6/in6_proto.c

index 07cf73c..ff4eca1 100644 (file)
@@ -1,4 +1,4 @@
-/*      $OpenBSD: if_gre.c,v 1.90 2018/02/07 01:52:15 dlg Exp $ */
+/*      $OpenBSD: if_gre.c,v 1.91 2018/02/07 22:30:59 dlg Exp $ */
 /*     $NetBSD: if_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
 
 /*
@@ -38,9 +38,6 @@
  * Also supported: IP in IP encapsulation (proto 55) per RFC 2004.
  */
 
-#include "gre.h"
-#if NGRE > 0
-
 #include "bpfilter.h"
 #include "pf.h"
 
 #include <sys/sockio.h>
 #include <sys/kernel.h>
 #include <sys/systm.h>
-#include <sys/timeout.h>
+#include <sys/errno.h>
+#include <sys/tree.h>
 
 #include <net/if.h>
 #include <net/if_types.h>
+#include <net/if_media.h>
 #include <net/route.h>
 
 #include <netinet/in.h>
 #include <netinet/ip_var.h>
 #include <netinet/if_ether.h>
 
+#ifdef INET6
+#include <netinet/ip6.h>
+#include <netinet6/ip6_var.h>
+#endif
+
+#ifdef PIPEX
+#include <net/pipex.h>
+#endif
+
+#ifdef MPLS
+#include <netmpls/mpls.h>
+#endif /* MPLS */
+
 #if NBPFILTER > 0
 #include <net/bpf.h>
 #endif
 
 #include <net/if_gre.h>
 
-#ifndef GRE_RECURSION_LIMIT
-#define GRE_RECURSION_LIMIT    3   /* How many levels of recursion allowed */
-#endif /* GRE_RECURSION_LIMIT */
+#include <netinet/ip_gre.h>
+#include <sys/sysctl.h>
 
 /*
- * It is not easy to calculate the right value for a GRE MTU.
- * We leave this task to the admin and use the same default that
- * other vendors use.
+ * packet formats
+ */
+struct gre_header {
+       uint16_t                gre_flags;
+#define GRE_CP                         0x8000  /* Checksum Present */
+#define GRE_KP                         0x2000  /* Key Present */
+#define GRE_SP                         0x1000  /* Sequence Present */
+
+#define GRE_VERS_MASK                  0x0007
+#define GRE_VERS_0                     0x0000
+#define GRE_VERS_1                     0x0001
+
+       uint16_t                gre_proto;
+} __packed __aligned(4);
+
+struct gre_h_cksum {
+       uint16_t                gre_cksum;
+       uint16_t                gre_reserved1;
+} __packed __aligned(4);
+
+struct gre_h_key {
+       uint32_t                gre_key;
+} __packed __aligned(4);
+
+struct gre_h_seq {
+       uint32_t                gre_seq;
+} __packed __aligned(4);
+
+
+/*
+ * GRE tunnel metadata
  */
-#define GREMTU 1476
 
-int    gre_clone_create(struct if_clone *, int);
-int    gre_clone_destroy(struct ifnet *);
+struct gre_tunnel {
+       RBT_ENTRY(gre_entry)    t_entry;
 
-struct gre_softc_head gre_softc_list;
+       uint32_t                t_key_mask;
+#define GRE_KEY_NONE                   htonl(0x00000000U)
+#define GRE_KEY_ENTROPY                        htonl(0xffffff00U)
+#define GRE_KEY_MASK                   htonl(0xffffffffU)
+       uint32_t                t_key;
+
+       u_int                   t_rtableid;
+       int                     t_af;
+       uint32_t                t_src[4];
+       uint32_t                t_dst[4];
+
+       uint8_t                 t_ttl;
+};
+
+RBT_HEAD(gre_tree, gre_tunnel);
+
+static inline int
+               gre_cmp(const struct gre_tunnel *, const struct gre_tunnel *);
+
+RBT_PROTOTYPE(gre_tree, gre_tunnel, t_entry, gre_cmp);
+
+static int     gre_set_tunnel(struct gre_tunnel *, struct if_laddrreq *);
+static int     gre_get_tunnel(struct gre_tunnel *, struct if_laddrreq *);
+static int     gre_del_tunnel(struct gre_tunnel *);
+
+static int     gre_set_vnetid(struct gre_tunnel *, struct ifreq *);
+static int     gre_get_vnetid(struct gre_tunnel *, struct ifreq *);
+static int     gre_del_vnetid(struct gre_tunnel *);
+
+static int     gre_ip_output(const struct gre_tunnel *, struct mbuf *,
+                   uint8_t);
+/*
+ * layer 3 GRE tunnels
+ */
+
+struct gre_softc {
+       struct gre_tunnel       sc_tunnel; /* must be first */
+       struct ifnet            sc_if;
+};
+
+static int     gre_clone_create(struct if_clone *, int);
+static int     gre_clone_destroy(struct ifnet *);
 
 struct if_clone gre_cloner =
     IF_CLONE_INITIALIZER("gre", gre_clone_create, gre_clone_destroy);
 
+struct gre_tree gre_softcs = RBT_INITIALIZER();
+
+static int     gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+                   struct rtentry *);
+static void    gre_start(struct ifnet *);
+static int     gre_ioctl(struct ifnet *, u_long, caddr_t);
+
+static int     gre_up(struct gre_softc *);
+static int     gre_down(struct gre_softc *);
+
+static int     gre_input_key(struct mbuf **, int *, int, int,
+                   struct gre_tunnel *);
+
+static struct mbuf *
+               gre_encap(struct gre_softc *, struct mbuf *, uint8_t *);
+
+/*
+ * It is not easy to calculate the right value for a GRE MTU.
+ * We leave this task to the admin and use the same default that
+ * other vendors use.
+ */
+#define GREMTU 1476
+
 /*
  * We can control the acceptance of GRE and MobileIP packets by
  * altering the sysctl net.inet.gre.allow values
@@ -102,231 +204,460 @@ struct if_clone gre_cloner =
 int gre_allow = 0;
 int gre_wccp = 0;
 
-void gre_keepalive(void *);
-void gre_send_keepalive(void *);
-void gre_link_state(struct gre_softc *);
-
 void
 greattach(int n)
 {
-       LIST_INIT(&gre_softc_list);
        if_clone_attach(&gre_cloner);
 }
 
-int
+static int
 gre_clone_create(struct if_clone *ifc, int unit)
 {
        struct gre_softc *sc;
+       struct ifnet *ifp;
 
        sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO);
        snprintf(sc->sc_if.if_xname, sizeof sc->sc_if.if_xname, "%s%d",
            ifc->ifc_name, unit);
-       sc->sc_if.if_softc = sc;
-       sc->sc_if.if_type = IFT_TUNNEL;
-       sc->sc_if.if_hdrlen = 24; /* IP + GRE */
-       sc->sc_if.if_mtu = GREMTU;
-       sc->sc_if.if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
-       sc->sc_if.if_xflags = IFXF_CLONED;
-       sc->sc_if.if_output = gre_output;
-       sc->sc_if.if_ioctl = gre_ioctl;
-       sc->sc_if.if_rtrequest = p2p_rtrequest;
-       sc->g_dst.s_addr = sc->g_src.s_addr = INADDR_ANY;
-       sc->sc_ka_state = GRE_STATE_UKNWN;
-
-       /* GRE encapsulation */
-       sc->g_proto = IPPROTO_GRE;
-
-       timeout_set(&sc->sc_ka_hold, gre_keepalive, sc);
-       timeout_set_proc(&sc->sc_ka_snd, gre_send_keepalive, sc);
-
-       if_attach(&sc->sc_if);
-       if_alloc_sadl(&sc->sc_if);
+
+       ifp = &sc->sc_if;
+       ifp->if_softc = sc;
+       ifp->if_type = IFT_TUNNEL;
+       ifp->if_hdrlen = 24; /* IP + GRE */
+       ifp->if_mtu = GREMTU;
+       ifp->if_flags = IFF_POINTOPOINT|IFF_MULTICAST;
+       ifp->if_xflags = IFXF_CLONED;
+       ifp->if_output = gre_output;
+       ifp->if_start = gre_start;
+       ifp->if_ioctl = gre_ioctl;
+       ifp->if_rtrequest = p2p_rtrequest;
+
+       if_attach(ifp);
+       if_alloc_sadl(ifp);
 
 #if NBPFILTER > 0
-       bpfattach(&sc->sc_if.if_bpf, &sc->sc_if, DLT_LOOP, sizeof(u_int32_t));
+       bpfattach(&ifp->if_bpf, ifp, DLT_LOOP, sizeof(uint32_t));
 #endif
-       NET_LOCK();
-       LIST_INSERT_HEAD(&gre_softc_list, sc, sc_list);
-       NET_UNLOCK();
 
        return (0);
 }
 
-int
+static int
 gre_clone_destroy(struct ifnet *ifp)
 {
        struct gre_softc *sc = ifp->if_softc;
 
-       timeout_del(&sc->sc_ka_snd);
-       timeout_del(&sc->sc_ka_hold);
        NET_LOCK();
-       LIST_REMOVE(sc, sc_list);
+       if (ISSET(ifp->if_flags, IFF_RUNNING))
+               gre_down(sc);
        NET_UNLOCK();
 
        if_detach(ifp);
 
        free(sc, M_DEVBUF, sizeof(*sc));
+
        return (0);
 }
 
-/*
- * The output routine. Takes a packet and encapsulates it in the protocol
- * given by sc->g_proto. See also RFC 1701 and RFC 2004.
- */
+int
+gre_input(struct mbuf **mp, int *offp, int type, int af)
+{
+       struct mbuf *m = *mp;
+       struct gre_tunnel key;
+       struct ip *ip;
+
+       ip = mtod(m, struct ip *);
+
+       key.t_af = AF_INET;
+       key.t_src[0] = ip->ip_dst.s_addr;
+       key.t_dst[0] = ip->ip_src.s_addr;
+
+       if (gre_input_key(mp, offp, type, af, &key) == -1)
+               return (rip_input(mp, offp, type, af));
 
+       return (IPPROTO_DONE);
+}
+
+#ifdef INET6
 int
+gre_input6(struct mbuf **mp, int *offp, int type, int af)
+{
+       struct mbuf *m = *mp;
+       struct gre_tunnel key;
+       struct ip6_hdr *ip6;
+
+       ip6 = mtod(m, struct ip6_hdr *);
+
+       key.t_af = AF_INET6;
+       memcpy(key.t_src, &ip6->ip6_dst, sizeof(key.t_src));
+       memcpy(key.t_dst, &ip6->ip6_src, sizeof(key.t_dst));
+
+       if (gre_input_key(mp, offp, type, af, &key) == -1)
+               return (rip6_input(mp, offp, type, af));
+
+       return (IPPROTO_DONE);
+}
+#endif /* INET6 */
+
+static int
+gre_input_key(struct mbuf **mp, int *offp, int type, int af,
+    struct gre_tunnel *key)
+{
+       struct mbuf *m = *mp;
+       int iphlen = *offp, hlen;
+       struct gre_softc *sc;
+       struct ifnet *ifp;
+       caddr_t buf;
+       struct gre_header *gh;
+       struct gre_h_key *gkh;
+       void (*input)(struct ifnet *, struct mbuf *);
+       int bpf_af = AF_UNSPEC; /* bpf */
+
+       if (!gre_allow)
+               goto decline;
+
+       hlen = iphlen + sizeof(*gh);
+       if (m->m_pkthdr.len < hlen)
+               goto decline;
+
+       m = m_pullup(m, hlen);
+       if (m == NULL)
+               return (IPPROTO_DONE);
+
+       buf = mtod(m, caddr_t);
+       gh = (struct gre_header *)(buf + iphlen);
+
+       /* check the version */
+       switch (gh->gre_flags & htons(GRE_VERS_MASK)) {
+       case htons(GRE_VERS_0):
+               break;
+
+       case htons(GRE_VERS_1):
+#ifdef PIPEX
+               if (pipex_enable) {
+                       struct pipex_session *session;
+
+                       session = pipex_pptp_lookup_session(m);
+                       if (session != NULL &&
+                           pipex_pptp_input(m, session) == NULL)
+                               return (IPPROTO_DONE);
+               }
+#endif
+               /* FALLTHROUGH */
+       default:
+               goto decline;
+       }
+
+       /* the only optional bit in the header is K flag */
+       if ((gh->gre_flags & htons(~(GRE_KP|GRE_VERS_MASK))) != htons(0))
+               goto decline;
+
+       if (gh->gre_flags & htons(GRE_KP)) {
+               hlen += sizeof(*gkh);
+               if (m->m_pkthdr.len < hlen)
+                       goto decline;
+
+               m = m_pullup(m, hlen);
+               if (m == NULL)
+                       return (IPPROTO_DONE);
+
+               buf = mtod(m, caddr_t);
+               gh = (struct gre_header *)(buf + iphlen);
+               gkh = (struct gre_h_key *)(gh + 1);
+
+               key->t_key_mask = GRE_KEY_MASK;
+               key->t_key = gkh->gre_key;
+       } else
+               key->t_key_mask = GRE_KEY_NONE;
+
+       key->t_rtableid = m->m_pkthdr.ph_rtableid;
+
+       switch (gh->gre_proto) {
+       case htons(ETHERTYPE_IP):
+#if NBPFILTER > 0
+               bpf_af = AF_INET;
+#endif
+               input = ipv4_input;
+               break;
+#ifdef INET6
+       case htons(ETHERTYPE_IPV6):
+#if NBPFILTER > 0
+               bpf_af = AF_INET6;
+#endif
+               input = ipv6_input;
+               break;
+#endif
+#ifdef MPLS
+       case htons(ETHERTYPE_MPLS):
+       case htons(ETHERTYPE_MPLS_MCAST):
+#if NBPFILTER > 0
+               bpf_af = AF_MPLS;
+#endif
+               input = mpls_input;
+               break;
+#endif
+
+       case htons(ETHERTYPE_TRANSETHER): /* not yet */
+       default:
+               goto decline;
+       }
+
+       sc = (struct gre_softc *)RBT_FIND(gre_tree, &gre_softcs, key);
+       if (sc == NULL)
+               goto decline;
+
+       ifp = &sc->sc_if;
+
+       m_adj(m, hlen);
+
+       m->m_flags &= ~(M_MCAST|M_BCAST);
+       m->m_pkthdr.ph_ifidx = ifp->if_index;
+       m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
+
+#if NPF > 0
+       pf_pkt_addr_changed(m);
+#endif
+
+       ifp->if_ipackets++;
+       ifp->if_ibytes += m->m_pkthdr.len;
+
+#if NBPFILTER > 0
+       if (ifp->if_bpf)
+               bpf_mtap_af(ifp->if_bpf, bpf_af, m, BPF_DIRECTION_IN);
+#endif
+
+       (*input)(ifp, m);
+       return (IPPROTO_DONE);
+decline:
+       mp = &m;
+       return (-1);
+}
+
+static int
 gre_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
-          struct rtentry *rt)
+    struct rtentry *rt)
 {
-       int error = 0;
-       struct gre_softc *sc = (struct gre_softc *) (ifp->if_softc);
-       struct greip *gh = NULL;
-       struct ip *inp = NULL;
-       u_int8_t ip_tos = 0;
-       u_int16_t etype = 0;
        struct m_tag *mtag;
+       int error = 0;
 
-       if ((ifp->if_flags & IFF_UP) == 0 ||
-           sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY) {
-               m_freem(m);
-               error = ENETDOWN;
-               goto end;
+       if (!gre_allow) {
+               error = EACCES;
+               goto drop;
        }
 
-#ifdef DIAGNOSTIC
-       if (ifp->if_rdomain != rtable_l2(m->m_pkthdr.ph_rtableid)) {
-               printf("%s: trying to send packet on wrong domain. "
-                   "if %d vs. mbuf %d, AF %d\n", ifp->if_xname,
-                   ifp->if_rdomain, rtable_l2(m->m_pkthdr.ph_rtableid),
-                   dst->sa_family);
+       if (!ISSET(ifp->if_flags, IFF_RUNNING)) {
+               error = ENETDOWN;
+               goto drop;
        }
+
+       switch (dst->sa_family) {
+       case AF_INET:
+#ifdef INET6
+       case AF_INET6:
+#endif
+#ifdef MPLS
+       case AF_MPLS:
 #endif
+               break;
+       default:
+               error = EAFNOSUPPORT;
+               goto drop;
+       }
 
        /* Try to limit infinite recursion through misconfiguration. */
        for (mtag = m_tag_find(m, PACKET_TAG_GRE, NULL); mtag;
             mtag = m_tag_find(m, PACKET_TAG_GRE, mtag)) {
-               if (!bcmp((caddr_t)(mtag + 1), &ifp, sizeof(struct ifnet *))) {
+               if (memcmp((caddr_t)(mtag + 1), &ifp->if_index,
+                   sizeof(ifp->if_index)) == 0) {
                        m_freem(m);
                        error = EIO;
                        goto end;
                }
        }
 
-       mtag = m_tag_get(PACKET_TAG_GRE, sizeof(struct ifnet *), M_NOWAIT);
+       mtag = m_tag_get(PACKET_TAG_GRE, sizeof(ifp->if_index), M_NOWAIT);
        if (mtag == NULL) {
                m_freem(m);
                error = ENOBUFS;
                goto end;
        }
-       bcopy(&ifp, (caddr_t)(mtag + 1), sizeof(struct ifnet *));
+       memcpy((caddr_t)(mtag + 1), &ifp->if_index, sizeof(ifp->if_index));
        m_tag_prepend(m, mtag);
 
-       m->m_flags &= ~(M_BCAST|M_MCAST);
+       m->m_pkthdr.ph_family = dst->sa_family;
+
+       error = if_enqueue(ifp, m);
+end:
+       if (error)
+               ifp->if_oerrors++;
+       return (error);
 
+drop:
+       m_freem(m);
+       return (error);
+}
+
+void
+gre_start(struct ifnet *ifp)
+{
+       struct gre_softc *sc = ifp->if_softc;
+       struct mbuf *m;
+       uint8_t tos;
 #if NBPFILTER > 0
-       if (ifp->if_bpf)
-               bpf_mtap_af(ifp->if_bpf, dst->sa_family, m, BPF_DIRECTION_OUT);
+       caddr_t if_bpf;
 #endif
 
-       if (gre_allow == 0) {
-               m_freem(m);
-               error = EACCES;
-               goto end;
-       }
-
-       switch(dst->sa_family) {
-       case AF_INET:
-               if (m->m_len < sizeof(struct ip)) {
-                       m = m_pullup(m, sizeof(struct ip));
-                       if (m == NULL) {
-                               error = ENOBUFS;
-                               goto end;
-                       }
+       while ((m = ifq_dequeue(&ifp->if_snd)) != NULL) {
+#if NBPFILTER > 0
+               if_bpf = ifp->if_bpf;
+               if (if_bpf) {
+                       int af = m->m_pkthdr.ph_family;
+                       bpf_mtap_af(if_bpf, af, m, BPF_DIRECTION_OUT);
                }
+#endif
+
+               m = gre_encap(sc, m, &tos);
+               if (m == NULL || gre_ip_output(&sc->sc_tunnel, m, tos) != 0)
+                       ifp->if_oerrors++;
+       }
+}
 
-               inp = mtod(m, struct ip *);
-               ip_tos = inp->ip_tos;
-               etype = ETHERTYPE_IP;
+static struct mbuf *
+gre_encap(struct gre_softc *sc, struct mbuf *m, uint8_t *tos)
+{
+       struct gre_header *gh;
+       struct gre_h_key *gkh;
+       uint16_t proto;
+       int hlen;
+
+       *tos = 0;
+       switch (m->m_pkthdr.ph_family) {
+       case AF_INET: {
+               proto = htons(ETHERTYPE_IP);
+
+               struct ip *ip = mtod(m, struct ip *);
+               *tos = ip->ip_tos;
                break;
+       }
 #ifdef INET6
        case AF_INET6:
-               etype = ETHERTYPE_IPV6;
+               proto = htons(ETHERTYPE_IPV6);
                break;
 #endif
 #ifdef MPLS
        case AF_MPLS:
                if (m->m_flags & (M_BCAST | M_MCAST))
-                       etype = ETHERTYPE_MPLS_MCAST;
+                       proto = htons(ETHERTYPE_MPLS_MCAST);
                else
-                       etype = ETHERTYPE_MPLS;
+                       proto = htons(ETHERTYPE_MPLS);
                break;
 #endif
        default:
-               m_freem(m);
-               error = EAFNOSUPPORT;
-               goto end;
+               unhandled_af(m->m_pkthdr.ph_family);
        }
 
-       M_PREPEND(m, sizeof(struct greip), M_DONTWAIT);
+       hlen = sizeof(*gh);
+       if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE)
+               hlen += sizeof(*gkh);
 
-       if (m == NULL) {
-               error = ENOBUFS;
-               goto end;
-       }
+       m = m_prepend(m, hlen, M_DONTWAIT);
+       if (m == NULL)
+               return (NULL);
 
-       gh = mtod(m, struct greip *);
-       if (sc->g_proto == IPPROTO_GRE) {
-               /* We don't support any GRE flags for now */
+       gh = mtod(m, struct gre_header *);
+       gh->gre_flags = GRE_VERS_0;
+       gh->gre_proto = proto;
+       if (sc->sc_tunnel.t_key_mask != GRE_KEY_NONE) {
+               gh->gre_flags |= htons(GRE_KP);
 
-               bzero((void *) &gh->gi_g, sizeof(struct gre_h));
-               gh->gi_ptype = htons(etype);
+               gkh = (struct gre_h_key *)(gh + 1);
+               gkh->gre_key = sc->sc_tunnel.t_key;
        }
 
-       gh->gi_pr = sc->g_proto;
-       gh->gi_src = sc->g_src;
-       gh->gi_dst = sc->g_dst;
-       ((struct ip *) gh)->ip_hl = (sizeof(struct ip)) >> 2;
-       ((struct ip *) gh)->ip_ttl = ip_defttl;
-       ((struct ip *) gh)->ip_tos = ip_tos;
-       gh->gi_len = htons(m->m_pkthdr.len);
-
-       ifp->if_opackets++;
-       ifp->if_obytes += m->m_pkthdr.len;
-
+       return (m);
+}
 
-       m->m_pkthdr.ph_rtableid = sc->g_rtableid;
+static int
+gre_ip_output(const struct gre_tunnel *tunnel, struct mbuf *m, uint8_t tos)
+{
+       m->m_flags &= ~(M_BCAST|M_MCAST);
+       m->m_pkthdr.ph_rtableid = tunnel->t_rtableid;
 
 #if NPF > 0
        pf_pkt_addr_changed(m);
 #endif
 
-       /* Send it off */
-       error = ip_output(m, NULL, &sc->route, 0, NULL, NULL, 0);
-  end:
-       if (error)
-               ifp->if_oerrors++;
-       return (error);
+       switch (tunnel->t_af) {
+       case AF_INET: {
+               struct ip *ip;
+
+               m = m_prepend(m, sizeof(*ip), M_DONTWAIT);
+               if (m == NULL)
+                       return (ENOMEM);
+
+               ip = mtod(m, struct ip *);
+               ip->ip_tos = tos;
+               ip->ip_len = htons(m->m_pkthdr.len);
+               ip->ip_ttl = tunnel->t_ttl;
+               ip->ip_p = IPPROTO_GRE;
+               ip->ip_src.s_addr = tunnel->t_src[0];
+               ip->ip_dst.s_addr = tunnel->t_dst[0];
+
+               ip_send(m);
+               break;
+       }
+#ifdef INET6
+       case AF_INET6: {
+               struct ip6_hdr *ip6;
+               int len = m->m_pkthdr.len;
+
+               m = m_prepend(m, sizeof(*ip6), M_DONTWAIT);
+               if (m == NULL)
+                       return (ENOMEM);
+
+               ip6 = mtod(m, struct ip6_hdr *);
+               ip6->ip6_flow = ISSET(m->m_pkthdr.ph_flowid, M_FLOWID_VALID) ?
+                   htonl(m->m_pkthdr.ph_flowid & M_FLOWID_MASK) : 0;
+               ip6->ip6_vfc |= IPV6_VERSION;
+               ip6->ip6_plen = htons(len);
+               ip6->ip6_nxt = IPPROTO_GRE;
+               ip6->ip6_hlim = tunnel->t_ttl;
+               memcpy(&ip6->ip6_src, tunnel->t_src, sizeof(ip6->ip6_src));
+               memcpy(&ip6->ip6_dst, tunnel->t_dst, sizeof(ip6->ip6_dst));
+
+               ip6_send(m);
+               break;
+       }
+#endif /* INET6 */
+       default:
+               panic("%s: unsupported af %d in %p", __func__, tunnel->t_af,
+                   tunnel);
+       }
+
+       return (0);
 }
 
-int
+static int
 gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 {
-
        struct ifreq *ifr = (struct ifreq *)data;
-       struct if_laddrreq *lifr = (struct if_laddrreq *)data;
-       struct ifkalivereq *ikar = (struct ifkalivereq *)data;
        struct gre_softc *sc = ifp->if_softc;
-       struct sockaddr_in si;
        int error = 0;
-       struct proc *prc = curproc;             /* XXX */
 
        switch(cmd) {
        case SIOCSIFADDR:
                ifp->if_flags |= IFF_UP;
-               break;
-       case SIOCSIFDSTADDR:
-               break;
+               /* FALLTHROUGH */
        case SIOCSIFFLAGS:
+               if (ISSET(ifp->if_flags, IFF_UP)) {
+                       if (!ISSET(ifp->if_flags, IFF_RUNNING))
+                               error = gre_up(sc);
+                       else
+                               error = 0;
+               } else {
+                       if (ISSET(ifp->if_flags, IFF_RUNNING))
+                               error = gre_down(sc);
+               }
                break;
        case SIOCSIFMTU:
                if (ifr->ifr_mtu < 576) {
@@ -338,95 +669,79 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
        case SIOCADDMULTI:
        case SIOCDELMULTI:
                break;
-       case SIOCSETKALIVE:
-               if ((error = suser(prc, 0)) != 0)
-                       break;
-               if (ikar->ikar_timeo < 0 || ikar->ikar_timeo > 86400 ||
-                   ikar->ikar_cnt < 0 || ikar->ikar_cnt > 256) {
-                       error = EINVAL;
-                       break;
-               }
-               sc->sc_ka_timout = ikar->ikar_timeo;
-               sc->sc_ka_cnt = ikar->ikar_cnt;
-               if (sc->sc_ka_timout == 0 || sc->sc_ka_cnt == 0) {
-                       sc->sc_ka_timout = 0;
-                       sc->sc_ka_cnt = 0;
-                       sc->sc_ka_state = GRE_STATE_UKNWN;
-                       gre_link_state(sc);
+
+       case SIOCSVNETID:
+               if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+                       error = EBUSY;
                        break;
                }
-               if (!timeout_pending(&sc->sc_ka_snd)) {
-                       sc->sc_ka_holdmax = sc->sc_ka_cnt;
-                       timeout_add(&sc->sc_ka_snd, 1);
-                       timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout *
-                           sc->sc_ka_cnt);
-               }
+               error = gre_set_vnetid(&sc->sc_tunnel, ifr);
                break;
-       case SIOCGETKALIVE:
-               ikar->ikar_timeo = sc->sc_ka_timout;
-               ikar->ikar_cnt = sc->sc_ka_cnt;
+
+       case SIOCGVNETID:
+               error = gre_get_vnetid(&sc->sc_tunnel, ifr);
                break;
-       case SIOCSLIFPHYADDR:
-               if ((error = suser(prc, 0)) != 0)
-                       break;
-               if (lifr->addr.ss_family != AF_INET ||
-                   lifr->dstaddr.ss_family != AF_INET) {
-                       error = EAFNOSUPPORT;
-                       break;
-               }
-               if (lifr->addr.ss_len != sizeof(si) ||
-                   lifr->dstaddr.ss_len != sizeof(si)) {
-                       error = EINVAL;
+       case SIOCDVNETID:
+               if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+                       error = EBUSY;
                        break;
                }
-               sc->g_src = ((struct sockaddr_in *)&lifr->addr)->sin_addr;
-               sc->g_dst = ((struct sockaddr_in *)&lifr->dstaddr)->sin_addr;
- recompute:
-               if ((sc->g_src.s_addr != INADDR_ANY) &&
-                   (sc->g_dst.s_addr != INADDR_ANY)) {
-                       if (sc->route.ro_rt != NULL) {
-                               rtfree(sc->route.ro_rt);
-                               sc->route.ro_rt = NULL;
-                       }
-                       /* ip_output() will do the lookup */
-                       bzero(&sc->route, sizeof(sc->route));
-                       ifp->if_flags |= IFF_UP;
-               }
+               error = gre_del_vnetid(&sc->sc_tunnel);
                break;
-       case SIOCDIFPHYADDR:
-               if ((error = suser(prc, 0)) != 0)
+
+       case SIOCSLIFPHYADDR:
+               if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+                       error = EBUSY;
                        break;
-               sc->g_src.s_addr = INADDR_ANY;
-               sc->g_dst.s_addr = INADDR_ANY;
+               }
+               error = gre_set_tunnel(&sc->sc_tunnel,
+                   (struct if_laddrreq *)data);
                break;
        case SIOCGLIFPHYADDR:
-               if (sc->g_src.s_addr == INADDR_ANY ||
-                   sc->g_dst.s_addr == INADDR_ANY) {
-                       error = EADDRNOTAVAIL;
+               error = gre_get_tunnel(&sc->sc_tunnel,
+                   (struct if_laddrreq *)data);
+               break;
+       case SIOCDIFPHYADDR:
+               if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+                       error = EBUSY;
                        break;
                }
-               bzero(&si, sizeof(si));
-               si.sin_family = AF_INET;
-               si.sin_len = sizeof(struct sockaddr_in);
-               si.sin_addr.s_addr = sc->g_src.s_addr;
-               memcpy(&lifr->addr, &si, sizeof(si));
-               si.sin_addr.s_addr = sc->g_dst.s_addr;
-               memcpy(&lifr->dstaddr, &si, sizeof(si));
+
+               error = gre_del_tunnel(&sc->sc_tunnel);
                break;
+
        case SIOCSLIFPHYRTABLE:
-               if ((error = suser(prc, 0)) != 0)
+               if (ISSET(ifp->if_flags, IFF_RUNNING)) {
+                       error = EBUSY;
                        break;
+               }
+
                if (ifr->ifr_rdomainid < 0 ||
                    ifr->ifr_rdomainid > RT_TABLEID_MAX ||
                    !rtable_exists(ifr->ifr_rdomainid)) {
                        error = EINVAL;
                        break;
                }
-               sc->g_rtableid = ifr->ifr_rdomainid;
-               goto recompute;
+               sc->sc_tunnel.t_rtableid = ifr->ifr_rdomainid;
+               break;
        case SIOCGLIFPHYRTABLE:
-               ifr->ifr_rdomainid = sc->g_rtableid;
+               ifr->ifr_rdomainid = sc->sc_tunnel.t_rtableid;
+               break;
+
+       case SIOCSLIFPHYTTL:
+               if (ifr->ifr_ttl < 0 || ifr->ifr_ttl > 0xff) {
+                       error = EINVAL;
+                       break;
+               }
+
+               /* commit */
+               sc->sc_tunnel.t_ttl = (uint8_t)ifr->ifr_ttl;
+               break;
+
+       case SIOCGLIFPHYTTL:
+               ifr->ifr_ttl = (int)sc->sc_tunnel.t_ttl;
                break;
+
        default:
                error = ENOTTY;
        }
@@ -434,151 +749,322 @@ gre_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
        return (error);
 }
 
-/*
- * do a checksum of a buffer - much like in_cksum, which operates on
- * mbufs.
- */
-u_int16_t
-gre_in_cksum(u_int16_t *p, u_int len)
+static int
+gre_up(struct gre_softc *sc)
 {
-       u_int32_t sum = 0;
-       int nwords = len >> 1;
-
-       while (nwords-- != 0)
-               sum += *p++;
-
-       if (len & 1) {
-               union {
-                       u_short w;
-                       u_char c[2];
-               } u;
-               u.c[0] = *(u_char *) p;
-               u.c[1] = 0;
-               sum += u.w;
-       }
+       int error = 0;
+       if (sc->sc_tunnel.t_af == AF_UNSPEC)
+               return (ENXIO);
 
-       /* end-around-carry */
-       sum = (sum >> 16) + (sum & 0xffff);
-       sum += (sum >> 16);
-       return (~sum);
+       NET_ASSERT_LOCKED(); 
+       if (RBT_INSERT(gre_tree, &gre_softcs, &sc->sc_tunnel) != NULL)
+               return (EBUSY);
+
+       SET(sc->sc_if.if_flags, IFF_RUNNING);
+
+       return (error);
 }
 
-void
-gre_keepalive(void *arg)
+static int
+gre_down(struct gre_softc *sc)
 {
-       struct gre_softc *sc = arg;
+       NET_ASSERT_LOCKED();
+       RBT_REMOVE(gre_tree, &gre_softcs, &sc->sc_tunnel);
 
-       if (!sc->sc_ka_timout)
-               return;
+       CLR(sc->sc_if.if_flags, IFF_RUNNING);
 
-       sc->sc_ka_state = GRE_STATE_DOWN;
-       gre_link_state(sc);
+       return (0);
 }
 
-void
-gre_send_keepalive(void *arg)
+static int
+gre_set_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
 {
-       struct gre_softc *sc = arg;
-       struct mbuf *m;
-       struct ip *ip;
-       struct gre_h *gh;
-       struct sockaddr dst;
-
-       if (sc->sc_ka_timout)
-               timeout_add_sec(&sc->sc_ka_snd, sc->sc_ka_timout);
-
-       if (sc->g_proto != IPPROTO_GRE)
-               return;
-       if ((sc->sc_if.if_flags & IFF_UP) == 0 ||
-           sc->g_src.s_addr == INADDR_ANY || sc->g_dst.s_addr == INADDR_ANY)
-               return;
-
-       MGETHDR(m, M_DONTWAIT, MT_DATA);
-       if (m == NULL) {
-               sc->sc_if.if_oerrors++;
-               return;
-       }
+       struct sockaddr *src = (struct sockaddr *)&req->addr;
+       struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
+       struct sockaddr_in *src4, *dst4;
+#ifdef INET6
+       struct sockaddr_in6 *src6, *dst6;
+       int error;
+#endif
 
-       m->m_len = m->m_pkthdr.len = sizeof(*ip) + sizeof(*gh);
-       MH_ALIGN(m, m->m_len);
+       /* sa_family and sa_len must be equal */
+       if (src->sa_family != dst->sa_family || src->sa_len != dst->sa_len)
+               return (EINVAL);
 
-       /* use the interface's rdomain when sending keepalives. */
-       m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
+       /* validate */
+       switch (dst->sa_family) {
+       case AF_INET:
+               if (dst->sa_len != sizeof(*dst4))
+                       return (EINVAL);
 
-       /* build the ip header */
-       ip = mtod(m, struct ip *);
+               src4 = (struct sockaddr_in *)src;
+               if (in_nullhost(src4->sin_addr) ||
+                   IN_MULTICAST(src4->sin_addr.s_addr))
+                       return (EINVAL);
 
-       ip->ip_v = IPVERSION;
-       ip->ip_hl = sizeof(*ip) >> 2;
-       ip->ip_tos = IPTOS_LOWDELAY;
-       ip->ip_len = htons(m->m_pkthdr.len);
-       ip->ip_id = htons(ip_randomid());
-       ip->ip_off = htons(IP_DF);
-       ip->ip_ttl = ip_defttl;
-       ip->ip_p = IPPROTO_GRE;
-       ip->ip_src.s_addr = sc->g_dst.s_addr;
-       ip->ip_dst.s_addr = sc->g_src.s_addr;
-       ip->ip_sum = 0;
-       ip->ip_sum = in_cksum(m, sizeof(*ip));
-
-       gh = (struct gre_h *)(ip + 1);
-       /* We don't support any GRE flags for now */
-       bzero(gh, sizeof(*gh));
-
-       bzero(&dst, sizeof(dst));
-       dst.sa_family = AF_INET;
+               dst4 = (struct sockaddr_in *)dst;
+               if (in_nullhost(dst4->sin_addr) ||
+                   IN_MULTICAST(dst4->sin_addr.s_addr))
+                       return (EINVAL);
 
-       NET_LOCK();
-       /* should we care about the error? */
-       gre_output(&sc->sc_if, m, &dst, NULL);
-       NET_UNLOCK();
+               tunnel->t_src[0] = src4->sin_addr.s_addr;
+               tunnel->t_dst[0] = dst4->sin_addr.s_addr;
+
+               break;
+#ifdef INET6
+       case AF_INET6:
+               if (dst->sa_len != sizeof(*dst6))
+                       return (EINVAL);
+
+               src6 = (struct sockaddr_in6 *)src;
+               if (IN6_IS_ADDR_UNSPECIFIED(&src6->sin6_addr) ||
+                   IN6_IS_ADDR_MULTICAST(&src6->sin6_addr))
+                       return (EINVAL);
+
+               dst6 = (struct sockaddr_in6 *)dst;
+               if (IN6_IS_ADDR_UNSPECIFIED(&dst6->sin6_addr) ||
+                   IN6_IS_ADDR_MULTICAST(&dst6->sin6_addr))
+                       return (EINVAL);
+
+               error = in6_embedscope((struct in6_addr *)tunnel->t_src,
+                   src6, NULL);
+               if (error != 0)
+                       return (error);
+
+               error = in6_embedscope((struct in6_addr *)tunnel->t_dst,
+                   dst6, NULL);
+               if (error != 0)
+                       return (error);
+
+               break;
+#endif
+       default:
+               return (EAFNOSUPPORT);
+       }
+
+       /* commit */
+       tunnel->t_af = dst->sa_family;
+
+       return (0);
 }
 
-void
-gre_recv_keepalive(struct gre_softc *sc)
+static int
+gre_get_tunnel(struct gre_tunnel *tunnel, struct if_laddrreq *req)
 {
-       if (!sc->sc_ka_timout)
-               return;
-
-       /* link state flap dampening */
-       switch (sc->sc_ka_state) {
-       case GRE_STATE_UKNWN:
-       case GRE_STATE_DOWN:
-               sc->sc_ka_state = GRE_STATE_HOLD;
-               sc->sc_ka_holdcnt = sc->sc_ka_holdmax;
-               sc->sc_ka_holdmax = MIN(sc->sc_ka_holdmax * 2,
-                   16 * sc->sc_ka_cnt);
-               break;
-       case GRE_STATE_HOLD:
-               if (--sc->sc_ka_holdcnt < 1) {
-                       sc->sc_ka_state = GRE_STATE_UP;
-                       gre_link_state(sc);
-               }
+       struct sockaddr *src = (struct sockaddr *)&req->addr;
+       struct sockaddr *dst = (struct sockaddr *)&req->dstaddr;
+       struct sockaddr_in *sin;
+#ifdef INET6 /* ifconfig already embeds the scopeid */
+       struct sockaddr_in6 *sin6;
+#endif
+
+       switch (tunnel->t_af) {
+       case AF_UNSPEC:
+               return (EADDRNOTAVAIL);
+       case AF_INET:
+               sin = (struct sockaddr_in *)src;
+               memset(sin, 0, sizeof(*sin));
+               sin->sin_family = AF_INET;
+               sin->sin_len = sizeof(*sin);
+               sin->sin_addr.s_addr = tunnel->t_src[0];
+
+               sin = (struct sockaddr_in *)dst;
+               memset(sin, 0, sizeof(*sin));
+               sin->sin_family = AF_INET;
+               sin->sin_len = sizeof(*sin);
+               sin->sin_addr.s_addr = tunnel->t_dst[0];
+
                break;
-       case GRE_STATE_UP:
-               sc->sc_ka_holdmax--;
-               sc->sc_ka_holdmax = MAX(sc->sc_ka_holdmax, sc->sc_ka_cnt);
+
+#ifdef INET6
+       case AF_INET6:
+               sin6 = (struct sockaddr_in6 *)src;
+               memset(sin6, 0, sizeof(*sin6));
+               sin6->sin6_family = AF_INET6;
+               sin6->sin6_len = sizeof(*sin6);
+               in6_recoverscope(sin6, (struct in6_addr *)tunnel->t_src);
+
+               sin6 = (struct sockaddr_in6 *)dst;
+               memset(sin6, 0, sizeof(*sin6));
+               sin6->sin6_family = AF_INET6;
+               sin6->sin6_len = sizeof(*sin6);
+               in6_recoverscope(sin6, (struct in6_addr *)tunnel->t_dst);
+
                break;
+#endif
+       default:
+               return (EAFNOSUPPORT);
        }
 
-       /* rescedule hold timer */
-       timeout_add_sec(&sc->sc_ka_hold, sc->sc_ka_timout * sc->sc_ka_cnt);
+       return (0);
 }
 
-void
-gre_link_state(struct gre_softc *sc)
+static int
+gre_del_tunnel(struct gre_tunnel *tunnel)
+{
+       /* commit */
+       tunnel->t_af = AF_UNSPEC;
+
+       return (0);
+}
+
+static int
+gre_set_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
+{
+       uint32_t key;
+
+       if (ifr->ifr_vnetid < 0 || ifr->ifr_vnetid > 0xffffffff)
+               return EINVAL;
+
+       key = htonl(ifr->ifr_vnetid);
+
+       if (tunnel->t_key_mask == GRE_KEY_MASK && tunnel->t_key == key)
+               return (0);
+
+       /* commit */
+       tunnel->t_key_mask = GRE_KEY_MASK;
+       tunnel->t_key = key;
+
+       return (0);
+}
+
+static int
+gre_get_vnetid(struct gre_tunnel *tunnel, struct ifreq *ifr)
+{
+       if (tunnel->t_key_mask == GRE_KEY_NONE)
+               return (EADDRNOTAVAIL);
+
+       ifr->ifr_vnetid = (int64_t)ntohl(tunnel->t_key);
+
+       return (0);
+}
+
+static int
+gre_del_vnetid(struct gre_tunnel *tunnel)
+{
+       tunnel->t_key_mask = GRE_KEY_NONE;
+
+       return (0);
+}
+
+int
+gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
+    size_t newlen)
+{
+       int error;
+
+       /* All sysctl names at this level are terminal. */
+       if (namelen != 1)
+               return (ENOTDIR);
+
+       switch (name[0]) {
+       case GRECTL_ALLOW:
+               NET_LOCK();
+               error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow);
+               NET_UNLOCK();
+               return (error);
+       case GRECTL_WCCP:
+               NET_LOCK();
+               error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp);
+               NET_UNLOCK();
+               return (error);
+       default:
+               return (ENOPROTOOPT);
+        }
+       /* NOTREACHED */
+}
+
+static inline int
+gre_ip_cmp(int af, const uint32_t *a, const uint32_t *b)
 {
-       struct ifnet *ifp = &sc->sc_if;
-       int link_state = LINK_STATE_UNKNOWN;
+       switch (af) {
+#ifdef INET6
+       case AF_INET6:
+               if (a[3] > b[3])
+                       return (1);
+               if (a[3] < b[3])
+                       return (-1);
+
+               if (a[2] > b[2])
+                       return (1);
+               if (a[2] < b[2])
+                       return (-1);
+
+               if (a[1] > b[1])
+                       return (1);
+               if (a[1] < b[1])
+                       return (-1);
+
+               /* FALLTHROUGH */
+#endif /* INET6 */
+       case AF_INET:
+               if (a[0] > b[0])
+                       return (1);
+               if (a[0] < b[0])
+                       return (-1);
+               break;
+       default:
+               panic("%s: unsupported af %d\n", __func__, af);
+       }
 
-       if (sc->sc_ka_state == GRE_STATE_UP)
-               link_state = LINK_STATE_UP;
-       else if (sc->sc_ka_state != GRE_STATE_UKNWN)
-               link_state = LINK_STATE_KALIVE_DOWN;
+       return (0);
+}
 
-       if (ifp->if_link_state != link_state) {
-               ifp->if_link_state = link_state;
-               if_link_state_change(ifp);
+static inline int
+gre_cmp(const struct gre_tunnel *a, const struct gre_tunnel *b)
+{
+       uint32_t ka, kb;
+       uint32_t mask;
+       int rv;
+
+       /* sort by routing table */
+       if (a->t_rtableid > b->t_rtableid)
+               return (1);
+       if (a->t_rtableid < b->t_rtableid)
+               return (-1);
+
+       /* sort by address */
+       if (a->t_af > b->t_af)
+               return (1);
+       if (a->t_af < b->t_af)
+               return (-1);
+
+       rv = gre_ip_cmp(a->t_af, a->t_dst, b->t_dst);
+       if (rv != 0)
+               return (rv);
+
+       rv = gre_ip_cmp(a->t_af, a->t_src, b->t_src);
+       if (rv != 0)
+               return (rv);
+
+       /* is K set at all? */
+       ka = a->t_key_mask & GRE_KEY_ENTROPY;
+       kb = b->t_key_mask & GRE_KEY_ENTROPY;
+
+       /* sort by whether K is set */
+       if (ka > kb)
+               return (1);
+       if (ka < kb)
+               return (-1);
+
+       /* is K set on both? */
+       if (ka != GRE_KEY_NONE) {
+               /* get common prefix */
+               mask = a->t_key_mask & b->t_key_mask;
+
+               ka = a->t_key & mask;
+               kb = b->t_key & mask;
+
+               /* sort by common prefix */
+               if (ka > kb)
+                       return (1);
+               if (ka < kb)
+                       return (-1);
        }
+
+       return (0);
 }
-#endif
+
+RBT_GENERATE(gre_tree, gre_tunnel, t_entry, gre_cmp);
+
index b5f1550..8ade4d5 100644 (file)
@@ -1,4 +1,4 @@
-/*      $OpenBSD: if_gre.h,v 1.14 2017/01/24 10:08:30 krw Exp $ */
+/*      $OpenBSD: if_gre.h,v 1.15 2018/02/07 22:30:59 dlg Exp $ */
 /*     $NetBSD: if_gre.h,v 1.5 1999/11/19 20:41:19 thorpej Exp $ */
 
 /*
 #ifndef _NET_IF_GRE_H
 #define _NET_IF_GRE_H
 
-struct gre_softc {
-       struct ifnet            sc_if;
-       LIST_ENTRY(gre_softc)   sc_list;
-       struct timeout          sc_ka_hold;
-       struct timeout          sc_ka_snd;
-       struct in_addr          g_src;  /* source address of gre packets */
-       struct in_addr          g_dst;  /* destination address of gre packets */
-       struct route            route;  /* routing entry that determines, where
-                                          an encapsulated packet should go */
-       u_int  g_rtableid;      /* routing table used for the tunnel */
-       int                     gre_unit;
-       int                     gre_flags;
-       int                     sc_ka_timout;
-       int                     sc_ka_holdmax;
-       int                     sc_ka_holdcnt;
-       int                     sc_ka_cnt;
-       u_char                  g_proto;        /* protocol of encapsulator */
-       u_char                  sc_ka_state;
-#define GRE_STATE_UKNWN        0
-#define GRE_STATE_DOWN 1
-#define GRE_STATE_HOLD 2
-#define GRE_STATE_UP   3
-};
-
-
 struct gre_h {
        u_int16_t flags;        /* GRE flags */
        u_int16_t ptype;        /* protocol type of payload typically
@@ -138,22 +113,9 @@ struct mobip_h {
 #define MOB_H_SIZ_L            (sizeof(struct mobile_h))
 #define MOB_H_SBIT     0x0080
 
-
-/*
- * ioctls needed to manipulate the interface
- */
-
 #ifdef _KERNEL
-extern  LIST_HEAD(gre_softc_head, gre_softc) gre_softc_list;
-extern  int gre_allow;
-extern  int gre_wccp;
-extern  int ip_mobile_allow;
-
-void   greattach(int);
-int     gre_ioctl(struct ifnet *, u_long, caddr_t);
-int     gre_output(struct ifnet *, struct mbuf *, struct sockaddr *,
-           struct rtentry *);
-u_int16_t gre_in_cksum(u_int16_t *, u_int);
-void   gre_recv_keepalive(struct gre_softc *);
-#endif /* _KERNEL */
+int    gre_sysctl(int *, u_int, void *, size_t *, void *, size_t);
+int    gre_input(struct mbuf **, int *, int, int);
+int    gre_input6(struct mbuf **, int *, int, int);
+#endif
 #endif /* _NET_IF_GRE_H_ */
index 5dad1d8..147ab23 100644 (file)
@@ -1,4 +1,4 @@
-/*      $OpenBSD: ip_gre.c,v 1.70 2018/02/07 01:09:57 dlg Exp $ */
+/*      $OpenBSD: ip_gre.c,v 1.71 2018/02/07 22:30:59 dlg Exp $ */
 /*     $NetBSD: ip_gre.c,v 1.9 1999/10/25 19:18:11 drochner Exp $ */
 
 /*
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
+
 #include <net/if.h>
-#include <net/netisr.h>
 #include <net/route.h>
-#include <net/bpf.h>
 
 #include <netinet/in.h>
 #include <netinet/ip.h>
 #include <netinet/ip_var.h>
-#include <netinet/ip_gre.h>
-#include <netinet/if_ether.h>
 #include <netinet/in_pcb.h>
 
-#ifdef MPLS
-#include <netmpls/mpls.h>
-#endif
-
-#include "bpfilter.h"
-#include "pf.h"
-
-#if NPF > 0
-#include <net/pfvar.h>
-#endif
-
 #ifdef PIPEX
 #include <net/pipex.h>
 #endif
 
-/* Needs IP headers. */
-#include <net/if_gre.h>
-
-struct gre_softc *gre_lookup(struct mbuf *, u_int8_t);
-int gre_input2(struct mbuf *, int, int);
-
-/*
- * Decapsulate.
- * Does the real work and is called from gre_input() (above)
- * returns 0 if packet is not yet processed
- * and 1 if it needs no further processing
- * proto is the protocol number of the "calling" foo_input()
- * routine.
- */
-
-int
-gre_input2(struct mbuf *m, int hlen, int proto)
-{
-       struct greip *gip;
-       struct gre_softc *sc;
-       u_short flags;
-       u_int af;
-
-       if ((sc = gre_lookup(m, proto)) == NULL) {
-               /* No matching tunnel or tunnel is down. */
-               return (0);
-       }
-
-       if (m->m_len < sizeof(*gip)) {
-               m = m_pullup(m, sizeof(*gip));
-               if (m == NULL)
-                       return (ENOBUFS);
-       }
-       gip = mtod(m, struct greip *);
-
-       m->m_pkthdr.ph_ifidx = sc->sc_if.if_index;
-       m->m_pkthdr.ph_rtableid = sc->sc_if.if_rdomain;
-
-       sc->sc_if.if_ipackets++;
-       sc->sc_if.if_ibytes += m->m_pkthdr.len;
-
-       switch (proto) {
-       case IPPROTO_GRE:
-               hlen += sizeof (struct gre_h);
-
-               /* process GRE flags as packet can be of variable len */
-               flags = ntohs(gip->gi_flags);
-
-               /* Checksum & Offset are present */
-               if ((flags & GRE_CP) | (flags & GRE_RP))
-                       hlen += 4;
-
-               /* We don't support routing fields (variable length) */
-               if (flags & GRE_RP)
-                       return (0);
-
-               if (flags & GRE_KP)
-                       hlen += 4;
-
-               if (flags & GRE_SP)
-                       hlen += 4;
-
-               switch (ntohs(gip->gi_ptype)) { /* ethertypes */
-               case GREPROTO_WCCP:
-                       /* WCCP/GRE:
-                        *   So far as I can see (and test) it seems that Cisco's WCCP
-                        *   GRE tunnel is precisely a IP-in-GRE tunnel that differs
-                        *   only in its protocol number.  At least, it works for me.
-                        *
-                        *   The Internet Drafts can be found if you look for
-                        *   the following:
-                        *     draft-forster-wrec-wccp-v1-00.txt
-                        *     draft-wilson-wrec-wccp-v2-01.txt
-                        *
-                        *   So yes, we're doing a fall-through (unless, of course,
-                        *   net.inet.gre.wccp is 0).
-                        */
-                       if (!gre_wccp)
-                               return (0);
-                       /*
-                        * For WCCPv2, additionally skip the 4 byte
-                        * redirect header.
-                        */
-                       if (gre_wccp == 2) 
-                               hlen += 4;
-               case ETHERTYPE_IP:
-                       af = AF_INET;
-                       break;
-#ifdef INET6
-               case ETHERTYPE_IPV6:
-                       af = AF_INET6;
-                       break;
-#endif
-               case 0:
-                       /* keepalive reply, retrigger hold timer */
-                       gre_recv_keepalive(sc);
-                       m_freem(m);
-                       return (1);
-#ifdef MPLS
-               case ETHERTYPE_MPLS:
-               case ETHERTYPE_MPLS_MCAST:
-                       mpls_input(&sc->sc_if, m);
-                       return (1);
-#endif
-               default:           /* others not yet supported */
-                       return (0);
-               }
-               break;
-       default:
-               /* others not yet supported */
-               return (0);
-       }
-
-       if (hlen > m->m_pkthdr.len) {
-               m_freem(m);
-               return (EINVAL);
-       }
-       m_adj(m, hlen);
-
-#if NBPFILTER > 0
-        if (sc->sc_if.if_bpf)
-               bpf_mtap_af(sc->sc_if.if_bpf, af, m, BPF_DIRECTION_IN);
-#endif
-
-#if NPF > 0
-       pf_pkt_addr_changed(m);
-#endif
-
-       switch (af) {
-       case AF_INET:
-               ipv4_input(&sc->sc_if, m);
-               break;
-#ifdef INET6
-       case AF_INET6:
-               ipv6_input(&sc->sc_if, m);
-               break;
-#endif
-       default:
-               return (0);
-       }
-
-
-       return (1);     /* packet is done, no further processing needed */
-}
-
-/*
- * Decapsulate a packet and feed it back through ip_input (this
- * routine is called whenever IP gets a packet with proto type
- * IPPROTO_GRE and a local destination address).
- */
-int
-gre_input(struct mbuf **mp, int *offp, int proto, int af)
-{
-       struct mbuf *m = *mp;
-       int hlen = *offp;
-       int ret;
-
-       if (!gre_allow) {
-               m_freem(m);
-               return IPPROTO_DONE;
-       }
-
-#ifdef PIPEX
-       if (pipex_enable) {
-               struct pipex_session *session;
-
-               if ((session = pipex_pptp_lookup_session(m)) != NULL) {
-                       if (pipex_pptp_input(m, session) == NULL)
-                               return IPPROTO_DONE;
-               }
-       }
-#endif
-
-       ret = gre_input2(m, hlen, proto);
-       /*
-        * ret == 0: packet not processed, but input from here
-        * means no matching tunnel that is up is found.
-        * we inject it to raw ip socket to see if anyone picks it up.
-        * possible that we received a WCCPv1-style GRE packet
-        * but we're not set to accept them.
-        */
-       if (!ret)
-               return rip_input(mp, offp, proto, af);
-       return IPPROTO_DONE;
-}
-
-/*
- * Find the gre interface associated with our src/dst/proto set.
- */
-struct gre_softc *
-gre_lookup(struct mbuf *m, u_int8_t proto)
-{
-       struct ip *ip = mtod(m, struct ip *);
-       struct gre_softc *sc;
-
-       NET_ASSERT_LOCKED();
-       LIST_FOREACH(sc, &gre_softc_list, sc_list) {
-               if ((sc->g_dst.s_addr == ip->ip_src.s_addr) &&
-                   (sc->g_src.s_addr == ip->ip_dst.s_addr) &&
-                   (sc->g_proto == proto) &&
-                   (rtable_l2(sc->g_rtableid) ==
-                   rtable_l2(m->m_pkthdr.ph_rtableid)) &&
-                   ((sc->sc_if.if_flags & IFF_UP) != 0))
-                       return (sc);
-       }
-
-       return (NULL);
-}
-
-int
-gre_sysctl(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp,
-    size_t newlen)
-{
-       int error;
-
-        /* All sysctl names at this level are terminal. */
-        if (namelen != 1)
-                return (ENOTDIR);
-
-        switch (name[0]) {
-        case GRECTL_ALLOW:
-               NET_LOCK();
-               error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_allow);
-               NET_UNLOCK();
-               return (error);
-        case GRECTL_WCCP:
-               NET_LOCK();
-               error = sysctl_int(oldp, oldlenp, newp, newlen, &gre_wccp);
-               NET_UNLOCK();
-               return (error);
-        default:
-                return (ENOPROTOOPT);
-        }
-        /* NOTREACHED */
-}
-
 int
 gre_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam,
     struct mbuf *control, struct proc *p)
index b072532..3a51789 100644 (file)
@@ -1,4 +1,4 @@
-/*      $OpenBSD: ip_gre.h,v 1.12 2017/04/14 20:46:31 bluhm Exp $ */
+/*      $OpenBSD: ip_gre.h,v 1.13 2018/02/07 22:30:59 dlg Exp $ */
 /*     $NetBSD: ip_gre.h,v 1.3 1998/10/07 23:33:02 thorpej Exp $ */
 
 /*
 }
 
 #ifdef _KERNEL
-int    gre_input(struct mbuf **, int *, int, int);
-int    gre_mobile_input(struct mbuf **, int *, int, int);
-int     ipmobile_sysctl(int *, u_int, void *, size_t *, void *, size_t);
-int     gre_sysctl(int *, u_int, void *, size_t *, void *, size_t);
 int     gre_usrreq(struct socket *, int, struct mbuf *, struct mbuf *, struct mbuf *, struct proc *);
 #endif /* _KERNEL */
 #endif /* _NETINET_IP_GRE_H_ */
index afe4159..1435acc 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: in6_proto.c,v 1.100 2017/11/23 13:45:46 mpi Exp $     */
+/*     $OpenBSD: in6_proto.c,v 1.101 2018/02/07 22:30:59 dlg Exp $     */
 /*     $KAME: in6_proto.c,v 1.66 2000/10/10 15:35:47 itojun Exp $      */
 
 /*
 #include <net/if_etherip.h>
 #endif
 
+#include "gre.h"
+#if NGRE > 0
+#include <net/if_gre.h>
+#endif
+
 /*
  * TCP/IP protocol family: IP6, ICMP6, UDP, TCP.
  */
@@ -313,6 +318,19 @@ const struct protosw inet6sw[] = {
   .pr_detach   = rip6_detach,
 },
 #endif /* NETHERIP */
+#if NGRE > 0
+{
+  .pr_type     = SOCK_RAW,
+  .pr_domain   = &inet6domain,
+  .pr_protocol = IPPROTO_GRE,
+  .pr_flags    = PR_ATOMIC|PR_ADDR,
+  .pr_input    = gre_input6,
+  .pr_ctloutput        = rip6_ctloutput,
+  .pr_usrreq   = rip6_usrreq,
+  .pr_attach   = rip6_attach,
+  .pr_detach   = rip6_detach,
+},
+#endif /* NGRE */
 {
   /* raw wildcard */
   .pr_type     = SOCK_RAW,