add veb(4), a Virtual Ethernet Bridge driver.
authordlg <dlg@openbsd.org>
Tue, 23 Feb 2021 03:30:04 +0000 (03:30 +0000)
committerdlg <dlg@openbsd.org>
Tue, 23 Feb 2021 03:30:04 +0000 (03:30 +0000)
my intention is to replace bridge(4), but the way it works is
different enough from from bridge that a name change is justified
to distinguish them. it also makes it easier to commit it to the
tree and work on it in parallel to bridge, and allows a window of
migration.

the main difference between veb(4) and bridge(4) is how they use
interfaces as ports. veb takes over interfaces completely and only
uses them to receive and transmit ethernet packets. bridge also use
each interface as a port to the ethernet segment it's connected to,
but also tries to continue supporting the use of the interface as
a way to talk to the network stack on the local system. supporting
the use of interfaces for both external and local communication is
where most of my confusion with bridge comes from, both when i'm
trying to operate it and also understand the code. changing this
semantic is where most of the simplification in veb comes from
compared to bridge.

because veb takes over interfaces, the ethernet network set up on
a veb is isolated from the host network stack. by default veb does
not interact with pf or the ip (and mpls) stacks. to enable pf for
ip frames going over veb ports link1 on the veb interface must be
set. to have the stack interact with a veb network, vport interfaces
must be created and added as ports to a veb.

the vport interface driver is provided as part of veb, and is handled
specially by veb. veb usually prevents the use of ports by the stack
for sending an receiving packets, but that's why vports exist, so
veb has special handling for them.

veb already supports a lot of the other features that bridge has,
including bridge rules and protected domains, but i got tired of
working out of the tree and stopped implementing them. the main
outstanding features is better address table management, the
blocknonip flag on ports, transparent ipsec interception, and
spanning tree. i may not bother with spanning tree unless someone
tells me that they actually use it.

the core ethernet learning bridge functionality is provided by the
etherbridge code that was factored out of nvgre and bpe. veb is
already (a lot) faster than bridge, and is better prepared to operate
in parallel on multiple CPUs concurrently.

thanks to hrvoje popovski for testing some earlier versions of this.
discussed with many
ok patrick@ jmatthew@

sys/conf/files
sys/net/if_veb.c [new file with mode: 0644]

index e43923b..ba16d78 100644 (file)
@@ -1,4 +1,4 @@
-#      $OpenBSD: files,v 1.697 2021/02/21 03:46:34 dlg Exp $
+#      $OpenBSD: files,v 1.698 2021/02/23 03:30:04 dlg Exp $
 #      $NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
 
 #      @(#)files.newconf       7.5 (Berkeley) 5/10/93
@@ -557,6 +557,7 @@ pseudo-device bpfilter: ifnet
 pseudo-device enc: ifnet
 pseudo-device etherip: ifnet, ether, ifmedia
 pseudo-device bridge: ifnet, ether
+pseudo-device veb: ifnet, ether, etherbridge
 pseudo-device vlan: ifnet, ether
 pseudo-device carp: ifnet, ether
 pseudo-device sppp: ifnet
@@ -815,6 +816,7 @@ file net/if_bridge.c                        bridge                  needs-count
 file net/bridgectl.c                   bridge
 file net/bridgestp.c                   bridge
 file net/if_etherbridge.c              etherbridge
+file net/if_veb.c                      veb
 file net/if_vlan.c                     vlan                    needs-count
 file net/if_switch.c                   switch                  needs-count
 file net/switchctl.c                   switch
diff --git a/sys/net/if_veb.c b/sys/net/if_veb.c
new file mode 100644 (file)
index 0000000..739a451
--- /dev/null
@@ -0,0 +1,1742 @@
+/*     $OpenBSD: if_veb.c,v 1.1 2021/02/23 03:30:04 dlg Exp $ */
+
+/*
+ * Copyright (c) 2021 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bpfilter.h"
+#include "pf.h"
+#include "vlan.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+#include <sys/rwlock.h>
+#include <sys/percpu.h>
+#include <sys/smr.h>
+#include <sys/task.h>
+#include <sys/pool.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <net/if_bridge.h>
+#include <net/if_etherbridge.h>
+
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+
+#if NPF > 0
+#include <net/pfvar.h>
+#endif
+
+#if NVLAN > 0
+#include <net/if_vlan_var.h>
+#endif
+
+struct veb_rule {
+       TAILQ_ENTRY(veb_rule)           vr_entry;
+       SMR_TAILQ_ENTRY(veb_rule)       vr_lentry[2];
+
+       uint16_t                        vr_flags;
+#define VEB_R_F_IN                             (1U << 0)
+#define VEB_R_F_OUT                            (1U << 1)
+#define VEB_R_F_SRC                            (1U << 2)
+#define VEB_R_F_DST                            (1U << 3)
+
+#define VEB_R_F_ARP                            (1U << 4)
+#define VEB_R_F_RARP                           (1U << 5)
+#define VEB_R_F_SHA                            (1U << 6)
+#define VEB_R_F_SPA                            (1U << 7)
+#define VEB_R_F_THA                            (1U << 8)
+#define VEB_R_F_TPA                            (1U << 9)
+       uint16_t                         vr_arp_op;
+
+       struct ether_addr                vr_src;
+       struct ether_addr                vr_dst;
+       struct ether_addr                vr_arp_sha;
+       struct ether_addr                vr_arp_tha;
+       struct in_addr                   vr_arp_spa;
+       struct in_addr                   vr_arp_tpa;
+
+       unsigned int                     vr_action;
+#define VEB_R_MATCH                            0
+#define VEB_R_PASS                             1
+#define VEB_R_BLOCK                            2
+
+       int                              vr_pftag;
+};
+
+TAILQ_HEAD(veb_rules, veb_rule);
+SMR_TAILQ_HEAD(veb_rule_list, veb_rule);
+
+struct veb_softc;
+
+struct veb_port {
+       struct ifnet                    *p_ifp0;
+       struct refcnt                    p_refs;
+
+       int (*p_ioctl)(struct ifnet *, u_long, caddr_t);
+       int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
+           struct rtentry *);
+
+       struct task                      p_ltask;
+       struct task                      p_dtask;
+
+       struct veb_softc                *p_veb;
+
+       struct ether_brport              p_brport;
+
+       unsigned int                     p_link_state;
+       unsigned int                     p_span;
+       unsigned int                     p_bif_flags;
+       uint32_t                         p_protected;
+
+       struct veb_rules                 p_vrl;
+       unsigned int                     p_nvrl;
+       struct veb_rule_list             p_vr_list[2];
+#define VEB_RULE_LIST_OUT                      0
+#define VEB_RULE_LIST_IN                       1
+
+       SMR_TAILQ_ENTRY(veb_port)        p_entry;
+};
+
+struct veb_ports {
+       SMR_TAILQ_HEAD(, veb_port)       l_list;
+       unsigned int                     l_count;
+};
+
+struct veb_softc {
+       struct ifnet                     sc_if;
+       unsigned int                     sc_dead;
+
+       struct etherbridge               sc_eb;
+
+       struct rwlock                    sc_rule_lock;
+       struct veb_ports                 sc_ports;
+       struct veb_ports                 sc_spans;
+};
+
+#define DPRINTF(_sc, fmt...)    do { \
+       if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \
+               printf(fmt); \
+} while (0)
+
+
+static int     veb_clone_create(struct if_clone *, int);
+static int     veb_clone_destroy(struct ifnet *);
+
+static int     veb_ioctl(struct ifnet *, u_long, caddr_t);
+static void    veb_input(struct ifnet *, struct mbuf *);
+static int     veb_enqueue(struct ifnet *, struct mbuf *);
+static int     veb_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+                   struct rtentry *);
+static void    veb_start(struct ifqueue *);
+
+static int     veb_up(struct veb_softc *);
+static int     veb_down(struct veb_softc *);
+static int     veb_iff(struct veb_softc *);
+
+static void    veb_p_linkch(void *);
+static void    veb_p_detach(void *);
+static int     veb_p_ioctl(struct ifnet *, u_long, caddr_t);
+static int     veb_p_output(struct ifnet *, struct mbuf *,
+                   struct sockaddr *, struct rtentry *);
+
+static void    veb_p_dtor(struct veb_softc *, struct veb_port *,
+                   const char *);
+static int     veb_add_port(struct veb_softc *,
+                   const struct ifbreq *, unsigned int);
+static int     veb_del_port(struct veb_softc *,
+                   const struct ifbreq *, unsigned int);
+static int     veb_port_list(struct veb_softc *, struct ifbifconf *);
+static int     veb_port_set_protected(struct veb_softc *,
+                   const struct ifbreq *);
+
+static int     veb_rule_add(struct veb_softc *, const struct ifbrlreq *);
+static int     veb_rule_list_flush(struct veb_softc *,
+                   const struct ifbrlreq *);
+static void    veb_rule_list_free(struct veb_rule *);
+static int     veb_rule_list_get(struct veb_softc *, struct ifbrlconf *);
+
+static int      veb_eb_port_cmp(void *, void *, void *);
+static void    *veb_eb_port_take(void *, void *);
+static void     veb_eb_port_rele(void *, void *);
+static size_t   veb_eb_port_ifname(void *, char *, size_t, void *);
+static void     veb_eb_port_sa(void *, struct sockaddr_storage *, void *);
+
+static const struct etherbridge_ops veb_etherbridge_ops = {
+       veb_eb_port_cmp,
+       veb_eb_port_take,
+       veb_eb_port_rele,
+       veb_eb_port_ifname,
+       veb_eb_port_sa,
+};
+
+static struct if_clone veb_cloner =
+    IF_CLONE_INITIALIZER("veb", veb_clone_create, veb_clone_destroy);
+
+static struct pool veb_rule_pool;
+
+static int     vport_clone_create(struct if_clone *, int);
+static int     vport_clone_destroy(struct ifnet *);
+
+struct vport_softc {
+       struct arpcom            sc_ac;
+       unsigned int             sc_dead;
+};
+
+static int     vport_ioctl(struct ifnet *, u_long, caddr_t);
+static int     vport_enqueue(struct ifnet *, struct mbuf *);
+static void    vport_start(struct ifqueue *);
+
+static int     vport_up(struct vport_softc *);
+static int     vport_down(struct vport_softc *);
+static int     vport_iff(struct vport_softc *);
+
+static struct if_clone vport_cloner =
+    IF_CLONE_INITIALIZER("vport", vport_clone_create, vport_clone_destroy);
+
+void
+vebattach(int count)
+{
+       if_clone_attach(&veb_cloner);
+       if_clone_attach(&vport_cloner);
+}
+
+static int
+veb_clone_create(struct if_clone *ifc, int unit)
+{
+       struct veb_softc *sc;
+       struct ifnet *ifp;
+       int error;
+
+       if (veb_rule_pool.pr_size == 0) {
+               pool_init(&veb_rule_pool, sizeof(struct veb_rule),
+                   0, IPL_SOFTNET, 0, "vebrpl", NULL);
+       }
+
+       sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
+       if (sc == NULL)
+               return (ENOMEM);
+
+       rw_init(&sc->sc_rule_lock, "vebrlk");
+       SMR_TAILQ_INIT(&sc->sc_ports.l_list);
+       SMR_TAILQ_INIT(&sc->sc_spans.l_list);
+
+       ifp = &sc->sc_if;
+
+       snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
+           ifc->ifc_name, unit);
+
+       error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
+           &veb_etherbridge_ops, sc);
+       if (error != 0) {
+               free(sc, M_DEVBUF, sizeof(*sc));
+               return (error);
+       }
+
+       ifp->if_softc = sc;
+       ifp->if_type = IFT_BRIDGE;
+       ifp->if_hdrlen = ETHER_HDR_LEN;
+       ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
+       ifp->if_ioctl = veb_ioctl;
+       ifp->if_input = veb_input;
+       ifp->if_output = veb_output;
+       ifp->if_enqueue = veb_enqueue;
+       ifp->if_qstart = veb_start;
+       ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+       ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
+
+       if_counters_alloc(ifp);
+       if_attach(ifp);
+
+       if_alloc_sadl(ifp);
+
+#if NBPFILTER > 0
+       bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
+#endif
+
+       return (0);
+}
+
+static int
+veb_clone_destroy(struct ifnet *ifp)
+{
+       struct veb_softc *sc = ifp->if_softc;
+       struct veb_port *p, *np;
+
+       NET_LOCK();
+       sc->sc_dead = 1;
+
+       if (ISSET(ifp->if_flags, IFF_RUNNING))
+               veb_down(sc);
+       NET_UNLOCK();
+
+       if_detach(ifp);
+
+       NET_LOCK();
+       SMR_TAILQ_FOREACH_SAFE_LOCKED(p, &sc->sc_ports.l_list, p_entry, np)
+               veb_p_dtor(sc, p, "destroy");
+       SMR_TAILQ_FOREACH_SAFE_LOCKED(p, &sc->sc_spans.l_list, p_entry, np)
+               veb_p_dtor(sc, p, "destroy");
+       NET_UNLOCK();
+
+       etherbridge_destroy(&sc->sc_eb);
+
+       free(sc, M_DEVBUF, sizeof(*sc));
+
+       return (0);
+}
+
+static struct mbuf *
+veb_span_input(struct ifnet *ifp0, struct mbuf *m, void *brport)
+{
+       m_freem(m);
+       return (NULL);
+}
+
+static void
+veb_span(struct veb_softc *sc, struct mbuf *m0)
+{
+       struct veb_port *p;
+       struct ifnet *ifp0;
+       struct mbuf *m;
+
+       smr_read_enter();
+       SMR_TAILQ_FOREACH(p, &sc->sc_spans.l_list, p_entry) {
+               ifp0 = p->p_ifp0;
+               if (!ISSET(ifp0->if_flags, IFF_RUNNING))
+                       continue;
+
+               m = m_dup_pkt(m0, max_linkhdr + ETHER_ALIGN, M_NOWAIT);
+               if (m == NULL) {
+                       /* XXX count error */
+                       continue;
+               }
+
+               if_enqueue(ifp0, m); /* XXX count error */
+       }
+       smr_read_leave();
+}
+
+static int
+veb_vlan_filter(const struct mbuf *m)
+{
+       const struct ether_header *eh;
+
+       eh = mtod(m, struct ether_header *);
+       switch (ntohs(eh->ether_type)) {
+       case ETHERTYPE_VLAN:
+       case ETHERTYPE_QINQ:
+               return (1);
+       default:
+               break;
+       }
+
+       return (0);
+}
+
+static int
+veb_rule_arp_match(const struct veb_rule *vr, struct mbuf *m)
+{
+       struct ether_header *eh;
+       struct ether_arp ea;
+
+       eh = mtod(m, struct ether_header *);
+
+       if (eh->ether_type != htons(ETHERTYPE_ARP))
+               return (0);
+       if (m->m_pkthdr.len < sizeof(*eh) + sizeof(ea))
+               return (0);
+
+       m_copydata(m, sizeof(*eh), sizeof(ea), (caddr_t)&ea);
+
+       if (ea.arp_hrd != htons(ARPHRD_ETHER) ||
+           ea.arp_pro != htons(ETHERTYPE_IP) ||
+           ea.arp_hln != ETHER_ADDR_LEN ||
+           ea.arp_pln != sizeof(struct in_addr))
+               return (0);
+
+       if (ISSET(vr->vr_flags, VEB_R_F_ARP)) {
+               if (ea.arp_op != htons(ARPOP_REQUEST) &&
+                   ea.arp_op != htons(ARPOP_REPLY))
+                       return (0);
+       }
+       if (ISSET(vr->vr_flags, VEB_R_F_RARP)) {
+               if (ea.arp_op != htons(ARPOP_REVREQUEST) &&
+                   ea.arp_op != htons(ARPOP_REVREPLY))
+                       return (0);
+       }
+
+       if (vr->vr_arp_op != htons(0) && vr->vr_arp_op != ea.arp_op)
+               return (0);
+
+       if (ISSET(vr->vr_flags, VEB_R_F_SHA) &&
+           !ETHER_IS_EQ(&vr->vr_arp_sha, ea.arp_sha))
+               return (0);
+       if (ISSET(vr->vr_flags, VEB_R_F_THA) &&
+           !ETHER_IS_EQ(&vr->vr_arp_tha, ea.arp_tha))
+               return (0);
+       if (ISSET(vr->vr_flags, VEB_R_F_SPA) &&
+           memcmp(&vr->vr_arp_spa, ea.arp_spa, sizeof(vr->vr_arp_spa)) != 0)
+               return (0);
+       if (ISSET(vr->vr_flags, VEB_R_F_TPA) &&
+           memcmp(&vr->vr_arp_tpa, ea.arp_tpa, sizeof(vr->vr_arp_tpa)) != 0)
+               return (0);
+
+       return (1);
+}
+
+static int
+veb_rule_list_test(struct veb_rule *vr, int dir, struct mbuf *m)
+{
+       struct ether_header *eh = mtod(m, struct ether_header *);
+
+       SMR_ASSERT_CRITICAL();
+
+       do {
+               if (ISSET(vr->vr_flags, VEB_R_F_ARP|VEB_R_F_RARP) &&
+                   !veb_rule_arp_match(vr, m))
+                       continue;
+
+               if (ISSET(vr->vr_flags, VEB_R_F_SRC) &&
+                   !ETHER_IS_EQ(&vr->vr_src, eh->ether_shost))
+                       continue;
+               if (ISSET(vr->vr_flags, VEB_R_F_DST) &&
+                   !ETHER_IS_EQ(&vr->vr_dst, eh->ether_dhost))
+                       continue;
+
+               if (vr->vr_action == VEB_R_BLOCK)
+                       return (VEB_R_BLOCK);
+#if NPF > 0
+               pf_tag_packet(m, vr->vr_pftag, -1);
+#endif
+               if (vr->vr_action == VEB_R_PASS)
+                       return (VEB_R_PASS);
+       } while ((vr = SMR_TAILQ_NEXT(vr, vr_lentry[dir])) != NULL);
+
+       return (VEB_R_PASS);
+}
+
+static inline int
+veb_rule_filter(struct veb_port *p, int dir, struct mbuf *m)
+{
+       struct veb_rule *vr;
+
+       vr = SMR_TAILQ_FIRST(&p->p_vr_list[dir]);
+       if (vr == NULL)
+               return (0);
+
+       return (veb_rule_list_test(vr, dir, m) == VEB_R_BLOCK);
+}
+
+#if NPF > 0
+static struct mbuf *
+veb_pf(struct ifnet *ifp0, int dir, struct mbuf *m)
+{
+       struct ether_header *eh, copy;
+       sa_family_t af = AF_UNSPEC;
+
+       /*
+        * pf runs on vport interfaces when they enter or leave the
+        * l3 stack, so don't confuse things (even more) by running
+        * pf again here. note that because of this exception the
+        * pf direction on vport interfaces is reversed compared to
+        * other veb ports.
+        */
+       if (ifp0->if_enqueue == vport_enqueue)
+               return (m);
+
+       eh = mtod(m, struct ether_header *);
+       switch (ntohs(eh->ether_type)) {
+       case ETHERTYPE_IP:
+               af = AF_INET;
+               break;
+       case ETHERTYPE_IPV6:
+               af = AF_INET6;
+               break;
+       default:
+               return (m);
+       }
+
+       copy = *eh;
+       m_adj(m, sizeof(*eh));
+
+       if (pf_test(af, dir, ifp0, &m) != PF_PASS) {
+               m_freem(m);
+               return (NULL);
+       }
+       if (m == NULL)
+               return (NULL);
+
+       m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
+       if (m == NULL)
+               return (NULL);
+
+       /* checksum? */
+
+       eh = mtod(m, struct ether_header *);
+       *eh = copy;
+
+       return (m);
+}
+#endif /* NPF > 0 */
+
+static void
+veb_broadcast(struct veb_softc *sc, struct veb_port *rp, struct mbuf *m0)
+{
+       struct ifnet *ifp = &sc->sc_if;
+       struct veb_port *tp;
+       struct ifnet *ifp0;
+       struct mbuf *m;
+
+#if NPF > 0
+       /*
+        * we couldnt find a specific port to send this packet to,
+        * but pf should still have a chance to apply policy to it.
+        * let pf look at it, but use the veb interface as a proxy.
+        */
+       if (ISSET(ifp->if_flags, IFF_LINK1) &&
+           (m = veb_pf(ifp, PF_OUT, m0)) == NULL)
+               return;
+#endif
+
+       counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
+           m0->m_pkthdr.len);
+
+       smr_read_enter();
+       SMR_TAILQ_FOREACH(tp, &sc->sc_ports.l_list, p_entry) {
+               if (rp == tp || (rp->p_protected & tp->p_protected)) {
+                       /*
+                        * don't let Ethernet packets hairpin or
+                        * move between ports in the same protected
+                        * domain(s).
+                        */
+                       continue;
+               }
+
+               ifp0 = tp->p_ifp0;
+               if (!ISSET(ifp0->if_flags, IFF_RUNNING)) {
+                       /* don't waste time */
+                       continue;
+               }
+
+               if (!ISSET(tp->p_bif_flags, IFBIF_DISCOVER) &&
+                   !ISSET(m0->m_flags, M_BCAST | M_MCAST)) {
+                       /* don't flood unknown unicast */
+                       continue;
+               }
+
+               if (veb_rule_filter(tp, VEB_RULE_LIST_OUT, m0))
+                       continue;
+
+               m = m_dup_pkt(m0, max_linkhdr + ETHER_ALIGN, M_NOWAIT);
+               if (m == NULL) {
+                       /* XXX count error? */
+                       continue;
+               }
+
+               if_enqueue(ifp0, m); /* XXX count error? */
+       }
+       smr_read_leave();
+
+       m_freem(m0);
+}
+
+static struct mbuf *
+veb_transmit(struct veb_softc *sc, struct veb_port *rp, struct veb_port *tp,
+    struct mbuf *m)
+{
+       struct ifnet *ifp = &sc->sc_if;
+       struct ifnet *ifp0;
+
+       if (tp == NULL)
+               return (m);
+
+       if (rp == tp || (rp->p_protected & tp->p_protected)) {
+               /*
+                * don't let Ethernet packets hairpin or move between
+                * ports in the same protected domain(s).
+                */
+               goto drop;
+       }
+
+       if (veb_rule_filter(tp, VEB_RULE_LIST_OUT, m))
+               goto drop;
+
+       ifp0 = tp->p_ifp0;
+
+#if NPF > 0
+       if (ISSET(ifp->if_flags, IFF_LINK1) &&
+           (m = veb_pf(ifp0, PF_OUT, m)) == NULL)
+               return (NULL);
+#endif
+
+       counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
+           m->m_pkthdr.len);
+
+       if_enqueue(ifp0, m); /* XXX count error? */
+
+       return (NULL);
+drop:
+       m_freem(m);
+       return (NULL);
+}
+
+static struct mbuf *
+veb_port_input(struct ifnet *ifp0, struct mbuf *m, void *brport)
+{
+       struct veb_port *p = brport;
+       struct veb_softc *sc = p->p_veb;
+       struct ifnet *ifp = &sc->sc_if;
+       struct ether_header *eh;
+#if NBPFILTER > 0
+       caddr_t if_bpf;
+#endif
+
+       if (ISSET(m->m_flags, M_PROTO1)) {
+               CLR(m->m_flags, M_PROTO1);
+               return (m);
+       }
+
+       if (!ISSET(ifp->if_flags, IFF_RUNNING))
+               return (m);
+
+#if NVLAN > 0
+       /*
+        * If the underlying interface removed the VLAN header itself,
+        * add it back.
+        */
+       if (ISSET(m->m_flags, M_VLANTAG)) {
+               m = vlan_inject(m, ETHERTYPE_VLAN, m->m_pkthdr.ether_vtag);
+               if (m == NULL) {
+                       counters_inc(ifp->if_counters, ifc_ierrors);
+                       goto drop;
+               }
+       }
+#endif
+
+       counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes,
+           m->m_pkthdr.len);
+
+       /* force packets into the one routing domain for pf */
+       m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
+
+#if NBPFILTER > 0
+       if_bpf = READ_ONCE(ifp->if_bpf);
+       if (if_bpf != NULL) {
+               if (bpf_mtap_ether(if_bpf, m, 0) != 0)
+                       goto drop;
+       }
+#endif
+
+       veb_span(sc, m);
+
+       if (!ISSET(ifp->if_flags, IFF_LINK2) &&
+           veb_vlan_filter(m))
+               goto drop;
+
+       if (veb_rule_filter(p, VEB_RULE_LIST_IN, m))
+               goto drop;
+
+#if NPF > 0
+       if (ISSET(ifp->if_flags, IFF_LINK1) &&
+           (m = veb_pf(ifp0, PF_IN, m)) == NULL)
+               return (NULL);
+#endif
+
+       eh = mtod(m, struct ether_header *);
+
+       if (ISSET(p->p_bif_flags, IFBIF_LEARNING)) {
+               etherbridge_map(&sc->sc_eb, p,
+                   (struct ether_addr *)eh->ether_shost);
+       }
+
+       CLR(m->m_flags, M_BCAST|M_MCAST);
+       SET(m->m_flags, M_PROTO1);
+
+       if (!ETHER_IS_MULTICAST(eh->ether_dhost)) {
+               struct veb_port *tp = NULL;
+
+               smr_read_enter();
+               tp = etherbridge_resolve(&sc->sc_eb,
+                   (struct ether_addr *)eh->ether_dhost);
+               m = veb_transmit(sc, p, tp, m);
+               smr_read_leave();
+
+               if (m == NULL)
+                       return (NULL);
+
+               /* unknown unicast address */
+       } else {
+               SET(m->m_flags,
+                   ETHER_IS_BROADCAST(eh->ether_dhost) ? M_BCAST : M_MCAST);
+       }
+
+       veb_broadcast(sc, p, m);
+       return (NULL);
+
+drop:
+       m_freem(m);
+       return (NULL);
+}
+
+static void
+veb_input(struct ifnet *ifp, struct mbuf *m)
+{
+       m_freem(m);
+}
+
+static int
+veb_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+       m_freem(m);
+       return (ENODEV);
+}
+
+static int
+veb_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+       m_freem(m);
+       return (ENODEV);
+}
+
+static void
+veb_start(struct ifqueue *ifq)
+{
+       ifq_purge(ifq);
+}
+
+static int
+veb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+       struct veb_softc *sc = ifp->if_softc;
+       struct ifbrparam *bparam = (struct ifbrparam *)data;
+       int error = 0;
+
+       if (sc->sc_dead)
+               return (ENXIO);
+
+       switch (cmd) {
+       case SIOCSIFFLAGS:
+               if (ISSET(ifp->if_flags, IFF_UP)) {
+                       if (!ISSET(ifp->if_flags, IFF_RUNNING))
+                               error = veb_up(sc);
+               } else {
+                       if (ISSET(ifp->if_flags, IFF_RUNNING))
+                               error = veb_down(sc);
+               }
+               break;
+
+       case SIOCBRDGADD:
+               error = suser(curproc);
+               if (error != 0)
+                       break;
+
+               error = veb_add_port(sc, (struct ifbreq *)data, 0);
+               break;
+       case SIOCBRDGADDS:
+               error = suser(curproc);
+               if (error != 0)
+                       break;
+
+               error = veb_add_port(sc, (struct ifbreq *)data, 1);
+               break;
+       case SIOCBRDGDEL:
+               error = suser(curproc);
+               if (error != 0)
+                       break;
+
+               error = veb_del_port(sc, (struct ifbreq *)data, 0);
+               break;
+       case SIOCBRDGDELS:
+               error = suser(curproc);
+               if (error != 0)
+                       break;
+
+               error = veb_del_port(sc, (struct ifbreq *)data, 1);
+               break;
+
+       case SIOCBRDGSCACHE:
+               error = suser(curproc);
+               if (error != 0)
+                       break;
+
+               error = etherbridge_set_max(&sc->sc_eb, bparam);
+               break;
+       case SIOCBRDGGCACHE:
+               error = etherbridge_get_max(&sc->sc_eb, bparam);
+               break;
+
+       case SIOCBRDGSTO:
+               error = suser(curproc);
+               if (error != 0)
+                       break;
+
+               error = etherbridge_set_tmo(&sc->sc_eb, bparam);
+               break;
+       case SIOCBRDGGTO:
+               error = etherbridge_get_tmo(&sc->sc_eb, bparam);
+               break;
+
+       case SIOCBRDGRTS:
+               error = etherbridge_rtfind(&sc->sc_eb, (struct ifbaconf *)data);
+               break;
+       case SIOCBRDGIFS:
+               error = veb_port_list(sc, (struct ifbifconf *)data);
+               break;
+
+       case SIOCBRDGSIFPROT:
+               error = veb_port_set_protected(sc, (struct ifbreq *)data);
+               break;
+
+       case SIOCBRDGARL:
+               error = veb_rule_add(sc, (struct ifbrlreq *)data);
+               break;
+       case SIOCBRDGFRL:
+               error = veb_rule_list_flush(sc, (struct ifbrlreq *)data);
+               break;
+       case SIOCBRDGGRL:
+               error = veb_rule_list_get(sc, (struct ifbrlconf *)data);
+               break;
+
+       default:
+               error = ENOTTY;
+               break;
+       }
+
+       if (error == ENETRESET)
+               error = veb_iff(sc);
+
+       return (error);
+}
+
+static int
+veb_add_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int span)
+{
+       struct ifnet *ifp = &sc->sc_if;
+       struct ifnet *ifp0;
+       struct veb_ports *port_list;
+       struct veb_port *p;
+       int error;
+
+       NET_ASSERT_LOCKED();
+
+       ifp0 = if_unit(req->ifbr_ifsname);
+       if (ifp0 == NULL)
+               return (EINVAL);
+
+       if (ifp0->if_type != IFT_ETHER) {
+               error = EPROTONOSUPPORT;
+               goto put;
+       }
+
+       if (ifp0 == ifp) {
+               error = EPROTONOSUPPORT;
+               goto put;
+       }
+
+       error = ether_brport_isset(ifp0);
+       if (error != 0)
+               goto put;
+
+       /* let's try */
+
+       p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
+       if (p == NULL) {
+               error = ENOMEM;
+               goto put;
+       }
+
+       p->p_ifp0 = ifp0;
+       p->p_veb = sc;
+
+       refcnt_init(&p->p_refs);
+       TAILQ_INIT(&p->p_vrl);
+       SMR_TAILQ_INIT(&p->p_vr_list[0]);
+       SMR_TAILQ_INIT(&p->p_vr_list[1]);
+
+       p->p_ioctl = ifp0->if_ioctl;
+       p->p_output = ifp0->if_output;
+
+       if (span) {
+               port_list = &sc->sc_spans;
+
+               p->p_brport.eb_input = veb_span_input;
+               p->p_bif_flags = IFBIF_SPAN;
+       } else {
+               port_list = &sc->sc_ports;
+
+               error = ifpromisc(ifp0, 1);
+               if (error != 0)
+                       goto free;
+
+               p->p_bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+               p->p_brport.eb_input = veb_port_input;
+       }
+
+       /* this might have changed if we slept for malloc or ifpromisc */
+       error = ether_brport_isset(ifp0);
+       if (error != 0)
+               goto unpromisc;
+
+       task_set(&p->p_ltask, veb_p_linkch, p);
+       if_linkstatehook_add(ifp0, &p->p_ltask);
+
+       task_set(&p->p_dtask, veb_p_detach, p);
+       if_detachhook_add(ifp0, &p->p_dtask);
+
+       p->p_brport.eb_port = p;
+
+       /* commit */
+       SMR_TAILQ_INSERT_TAIL_LOCKED(&port_list->l_list, p, p_entry);
+       port_list->l_count++;
+
+       ether_brport_set(ifp0, &p->p_brport);
+       if (ifp0->if_enqueue != vport_enqueue) { /* vport is special */
+               ifp0->if_ioctl = veb_p_ioctl;
+               ifp0->if_output = veb_p_output;
+       }
+
+       veb_p_linkch(p);
+
+       return (0);
+
+unpromisc:
+       if (!span)
+               ifpromisc(ifp0, 0);
+free:
+       free(p, M_DEVBUF, sizeof(*p));
+put:
+       if_put(ifp0);
+       return (error);
+}
+
+static struct veb_port *
+veb_trunkport(struct veb_softc *sc, const char *name, unsigned int span)
+{
+       struct veb_ports *port_list;
+       struct veb_port *p;
+
+       port_list = span ? &sc->sc_spans : &sc->sc_ports;
+
+       SMR_TAILQ_FOREACH_LOCKED(p, &port_list->l_list, p_entry) {
+               if (strcmp(p->p_ifp0->if_xname, name) == 0)
+                       return (p);
+       }
+
+       return (NULL);
+}
+
+static int
+veb_del_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int span)
+{
+       struct veb_port *p;
+
+       NET_ASSERT_LOCKED();
+       p = veb_trunkport(sc, req->ifbr_ifsname, span);
+       if (p == NULL)
+               return (EINVAL);
+
+       veb_p_dtor(sc, p, "del");
+
+       return (0);
+}
+
+static struct veb_port *
+veb_port_get(struct veb_softc *sc, const char *name)
+{
+       struct veb_port *p;
+
+       NET_ASSERT_LOCKED();
+
+       SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_ports.l_list, p_entry) {
+               struct ifnet *ifp0 = p->p_ifp0;
+               if (strncmp(ifp0->if_xname, name,
+                   sizeof(ifp0->if_xname)) == 0) {
+                       refcnt_take(&p->p_refs);
+                       break;
+               }
+       }
+
+       return (p);
+}
+
+static void
+veb_port_put(struct veb_softc *sc, struct veb_port *p)
+{
+       refcnt_rele_wake(&p->p_refs);
+}
+
+static int
+veb_port_set_protected(struct veb_softc *sc, const struct ifbreq *ifbr)
+{
+       struct veb_port *p;
+
+       p = veb_port_get(sc, ifbr->ifbr_ifsname);
+       if (p == NULL)
+               return (ESRCH);
+
+       p->p_protected = ifbr->ifbr_protected;
+       veb_port_put(sc, p);
+
+       return (0);
+}
+
+static int
+veb_rule_add(struct veb_softc *sc, const struct ifbrlreq *ifbr)
+{
+       const struct ifbrarpf *brla = &ifbr->ifbr_arpf;
+       struct veb_rule vr, *vrp;
+       struct veb_port *p;
+       int error;
+
+       memset(&vr, 0, sizeof(vr));
+
+       switch (ifbr->ifbr_action) {
+       case BRL_ACTION_BLOCK:
+               vr.vr_action = VEB_R_BLOCK;
+               break;
+       case BRL_ACTION_PASS:
+               vr.vr_action = VEB_R_PASS;
+               break;
+       /* XXX VEB_R_MATCH */
+       default:
+               return (EINVAL);
+       }
+
+       if (!ISSET(ifbr->ifbr_flags, BRL_FLAG_IN|BRL_FLAG_OUT))
+               return (EINVAL);
+       if (ISSET(ifbr->ifbr_flags, BRL_FLAG_IN))
+               SET(vr.vr_flags, VEB_R_F_IN);
+       if (ISSET(ifbr->ifbr_flags, BRL_FLAG_OUT))
+               SET(vr.vr_flags, VEB_R_F_OUT);
+
+       if (ISSET(ifbr->ifbr_flags, BRL_FLAG_SRCVALID)) {
+               SET(vr.vr_flags, VEB_R_F_SRC);
+               vr.vr_src = ifbr->ifbr_src;
+       }
+       if (ISSET(ifbr->ifbr_flags, BRL_FLAG_DSTVALID)) {
+               SET(vr.vr_flags, VEB_R_F_DST);
+               vr.vr_dst = ifbr->ifbr_dst;
+       }
+
+       /* ARP rule */
+       if (ISSET(brla->brla_flags, BRLA_ARP|BRLA_RARP)) {
+               if (ISSET(brla->brla_flags, BRLA_ARP))
+                       SET(vr.vr_flags, VEB_R_F_ARP);
+               if (ISSET(brla->brla_flags, BRLA_RARP))
+                       SET(vr.vr_flags, VEB_R_F_RARP);
+
+               if (ISSET(brla->brla_flags, BRLA_SHA)) {
+                       SET(vr.vr_flags, VEB_R_F_SHA);
+                       vr.vr_arp_sha = brla->brla_sha;
+               }
+               if (ISSET(brla->brla_flags, BRLA_THA)) {
+                       SET(vr.vr_flags, VEB_R_F_THA);
+                       vr.vr_arp_tha = brla->brla_tha;
+               }
+               if (ISSET(brla->brla_flags, BRLA_SPA)) {
+                       SET(vr.vr_flags, VEB_R_F_SPA);
+                       vr.vr_arp_spa = brla->brla_spa;
+               }
+               if (ISSET(brla->brla_flags, BRLA_TPA)) {
+                       SET(vr.vr_flags, VEB_R_F_TPA);
+                       vr.vr_arp_tpa = brla->brla_tpa;
+               }
+               vr.vr_arp_op = htons(brla->brla_op);
+       }
+
+       if (ifbr->ifbr_tagname[0] != '\0') {
+#if NPF > 0
+               vr.vr_pftag = pf_tagname2tag((char *)ifbr->ifbr_tagname, 1);
+               if (vr.vr_pftag == 0)
+                       return (ENOMEM);
+#else
+               return (EINVAL);
+#endif
+       }
+
+       p = veb_port_get(sc, ifbr->ifbr_ifsname);
+       if (p == NULL) {
+               error = ESRCH;
+               goto error;
+       }
+
+       vrp = pool_get(&veb_rule_pool, PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
+       if (vrp == NULL) {
+               error = ENOMEM;
+               goto port_put;
+       }
+
+       *vrp = vr;
+
+       /* there's one big lock on a veb for all ports */
+       error = rw_enter(&sc->sc_rule_lock, RW_WRITE|RW_INTR);
+       if (error != 0)
+               goto rule_put;
+
+       TAILQ_INSERT_TAIL(&p->p_vrl, vrp, vr_entry);
+       p->p_nvrl++;
+       if (ISSET(vr.vr_flags, VEB_R_F_OUT)) {
+               SMR_TAILQ_INSERT_TAIL_LOCKED(&p->p_vr_list[0],
+                   vrp, vr_lentry[0]);
+       }
+       if (ISSET(vr.vr_flags, VEB_R_F_IN)) {
+               SMR_TAILQ_INSERT_TAIL_LOCKED(&p->p_vr_list[1],
+                   vrp, vr_lentry[1]);
+       }
+
+       rw_exit(&sc->sc_rule_lock);
+       veb_port_put(sc, p);
+
+       return (0);
+
+rule_put:
+       pool_put(&veb_rule_pool, vrp);
+port_put:
+       veb_port_put(sc, p);
+error:
+#if NPF > 0
+       pf_tag_unref(vr.vr_pftag);
+#endif
+       return (error);
+}
+
+static void
+veb_rule_list_free(struct veb_rule *nvr)
+{
+       struct veb_rule *vr;
+
+       while ((vr = nvr) != NULL) {
+               nvr = TAILQ_NEXT(vr, vr_entry);
+               pool_put(&veb_rule_pool, vr);
+       }
+}
+
+static int
+veb_rule_list_flush(struct veb_softc *sc, const struct ifbrlreq *ifbr)
+{
+       struct veb_port *p;
+       struct veb_rule *vr;
+       int error;
+
+       p = veb_port_get(sc, ifbr->ifbr_ifsname);
+       if (p == NULL)
+               return (ESRCH);
+
+       error = rw_enter(&sc->sc_rule_lock, RW_WRITE|RW_INTR);
+       if (error != 0) {
+               veb_port_put(sc, p);
+               return (error);
+       }
+
+       /* take all the rules away */
+       vr = TAILQ_FIRST(&p->p_vrl);
+
+       /* reset the lists and counts of rules */
+       TAILQ_INIT(&p->p_vrl);
+       p->p_nvrl = 0;
+       SMR_TAILQ_INIT(&p->p_vr_list[0]);
+       SMR_TAILQ_INIT(&p->p_vr_list[1]);
+
+       rw_exit(&sc->sc_rule_lock);
+       veb_port_put(sc, p);
+
+       smr_barrier();
+       veb_rule_list_free(vr);
+
+       return (0);
+}
+
+static void
+veb_rule2ifbr(struct ifbrlreq *ifbr, const struct veb_rule *vr)
+{
+       switch (vr->vr_action) {
+       case VEB_R_PASS:
+               ifbr->ifbr_action = BRL_ACTION_PASS;
+               break;
+       case VEB_R_BLOCK:
+               ifbr->ifbr_action = BRL_ACTION_BLOCK;
+               break;
+       }
+
+       if (ISSET(vr->vr_flags, VEB_R_F_IN))
+               SET(ifbr->ifbr_flags, BRL_FLAG_IN);
+       if (ISSET(vr->vr_flags, VEB_R_F_OUT))
+               SET(ifbr->ifbr_flags, BRL_FLAG_OUT);
+
+       if (ISSET(vr->vr_flags, VEB_R_F_SRC)) {
+               SET(ifbr->ifbr_flags, BRL_FLAG_SRCVALID);
+               ifbr->ifbr_src = vr->vr_src;
+       }
+       if (ISSET(vr->vr_flags, VEB_R_F_DST)) {
+               SET(ifbr->ifbr_flags, BRL_FLAG_DSTVALID);
+               ifbr->ifbr_dst = vr->vr_dst;
+       }
+
+       /* ARP rule */
+       if (ISSET(vr->vr_flags, VEB_R_F_ARP|VEB_R_F_RARP)) {
+               struct ifbrarpf *brla = &ifbr->ifbr_arpf;
+
+               if (ISSET(vr->vr_flags, VEB_R_F_ARP))
+                       SET(brla->brla_flags, BRLA_ARP);
+               if (ISSET(vr->vr_flags, VEB_R_F_RARP))
+                       SET(brla->brla_flags, BRLA_RARP);
+
+               if (ISSET(vr->vr_flags, VEB_R_F_SHA)) {
+                       SET(brla->brla_flags, BRLA_SHA);
+                       brla->brla_sha = vr->vr_arp_sha;
+               }
+               if (ISSET(vr->vr_flags, VEB_R_F_THA)) {
+                       SET(brla->brla_flags, BRLA_THA);
+                       brla->brla_tha = vr->vr_arp_tha;
+               }
+
+               if (ISSET(vr->vr_flags, VEB_R_F_SPA)) {
+                       SET(brla->brla_flags, BRLA_SPA);
+                       brla->brla_spa = vr->vr_arp_spa;
+               }
+               if (ISSET(vr->vr_flags, VEB_R_F_TPA)) {
+                       SET(brla->brla_flags, BRLA_TPA);
+                       brla->brla_tpa = vr->vr_arp_tpa;
+               }
+
+               brla->brla_op = ntohs(vr->vr_arp_op);
+       }
+
+#if NPF > 0
+       if (vr->vr_pftag != 0)
+               pf_tag2tagname(vr->vr_pftag, ifbr->ifbr_tagname);
+#endif
+}
+
+static int
+veb_rule_list_get(struct veb_softc *sc, struct ifbrlconf *ifbrl)
+{
+       struct veb_port *p;
+       struct veb_rule *vr;
+       struct ifbrlreq *ifbr, *ifbrs;
+       int error = 0;
+       size_t len;
+
+       p = veb_port_get(sc, ifbrl->ifbrl_ifsname);
+       if (p == NULL)
+               return (ESRCH);
+
+       len = p->p_nvrl; /* estimate */
+       if (ifbrl->ifbrl_len == 0 || len == 0) {
+               ifbrl->ifbrl_len = len * sizeof(*ifbrs);
+               goto port_put;
+       }
+
+       error = rw_enter(&sc->sc_rule_lock, RW_READ|RW_INTR);
+       if (error != 0)
+               goto port_put;
+
+       ifbrs = mallocarray(p->p_nvrl, sizeof(*ifbrs), M_TEMP,
+           M_WAITOK|M_CANFAIL|M_ZERO);
+       if (ifbrs == NULL) {
+               rw_exit(&sc->sc_rule_lock);
+               goto port_put;
+       }
+       len = p->p_nvrl * sizeof(*ifbrs);
+
+       ifbr = ifbrs;
+       TAILQ_FOREACH(vr, &p->p_vrl, vr_entry) {
+               strlcpy(ifbr->ifbr_name, sc->sc_if.if_xname,
+                   sizeof(ifbr->ifbr_name));
+               strlcpy(ifbr->ifbr_ifsname, p->p_ifp0->if_xname,
+                   sizeof(ifbr->ifbr_ifsname));
+               veb_rule2ifbr(ifbr, vr);
+
+               ifbr++;
+       }
+
+       rw_exit(&sc->sc_rule_lock);
+
+       error = copyout(ifbrs, ifbrl->ifbrl_buf, min(len, ifbrl->ifbrl_len));
+       if (error == 0)
+               ifbrl->ifbrl_len = len;
+       free(ifbrs, M_TEMP, len);
+
+port_put:
+       veb_port_put(sc, p);
+       return (error);
+}
+
+static int
+veb_port_list(struct veb_softc *sc, struct ifbifconf *bifc)
+{
+       struct ifnet *ifp = &sc->sc_if;
+       struct veb_port *p;
+       struct ifnet *ifp0;
+       struct ifbreq breq;
+       int n = 0, error = 0;
+
+       NET_ASSERT_LOCKED();
+
+       if (bifc->ifbic_len == 0) {
+               n = sc->sc_ports.l_count + sc->sc_spans.l_count;
+               goto done;
+       }
+
+       SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_ports.l_list, p_entry) {
+               if (bifc->ifbic_len < sizeof(breq))
+                       break;
+
+               memset(&breq, 0, sizeof(breq));
+
+               ifp0 = p->p_ifp0;
+
+               strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ);
+               strlcpy(breq.ifbr_ifsname, ifp0->if_xname, IFNAMSIZ);
+
+               breq.ifbr_ifsflags = p->p_bif_flags;
+               breq.ifbr_portno = ifp0->if_index;
+               breq.ifbr_protected = p->p_protected;
+               if ((error = copyout(&breq, bifc->ifbic_req + n,
+                   sizeof(breq))) != 0)
+                       goto done;
+
+               bifc->ifbic_len -= sizeof(breq);
+               n++;
+       }
+
+       SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_spans.l_list, p_entry) {
+               if (bifc->ifbic_len < sizeof(breq))
+                       break;
+
+               memset(&breq, 0, sizeof(breq));
+
+               strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ);
+               strlcpy(breq.ifbr_ifsname, p->p_ifp0->if_xname, IFNAMSIZ);
+
+               breq.ifbr_ifsflags = p->p_bif_flags;
+               if ((error = copyout(&breq, bifc->ifbic_req + n,
+                   sizeof(breq))) != 0)
+                       goto done;
+
+               bifc->ifbic_len -= sizeof(breq);
+               n++;
+       }
+
+done:
+       bifc->ifbic_len = n * sizeof(breq);
+       return (error);
+}
+
+static int
+veb_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data)
+{
+       const struct ether_brport *eb = ether_brport_get_locked(ifp0);
+       struct veb_port *p;
+       int error = 0;
+
+       KASSERTMSG(eb != NULL,
+           "%s: %s called without an ether_brport set",
+           ifp0->if_xname, __func__);
+       KASSERTMSG(eb->eb_input == veb_port_input,
+           "%s: %s called, but eb_input seems wrong (%p != veb_port_input())",
+           ifp0->if_xname, __func__, eb->eb_input);
+
+       p = eb->eb_port;
+
+       switch (cmd) {
+       case SIOCSIFADDR:
+               error = EBUSY;
+               break;
+
+       default:
+               error = (*p->p_ioctl)(ifp0, cmd, data);
+               break;
+       }
+
+       return (error);
+}
+
+static int
+veb_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+       int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
+           struct rtentry *) = NULL;
+       const struct ether_brport *eb;
+
+       /* restrict transmission to bpf only */
+       if ((m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL)) {
+               m_freem(m);
+               return (EBUSY);
+       }
+
+       smr_read_enter();
+       eb = ether_brport_get(ifp0);
+       if (eb != NULL && eb->eb_input == veb_port_input) {
+               struct veb_port *p = eb->eb_port;
+               p_output = p->p_output; /* code doesn't go away */
+       }
+       smr_read_leave();
+
+       if (p_output == NULL) {
+               m_freem(m);
+               return (ENXIO);
+       }
+
+       return ((*p_output)(ifp0, m, dst, rt));
+}
+
+static void
+veb_p_dtor(struct veb_softc *sc, struct veb_port *p, const char *op)
+{
+       struct ifnet *ifp = &sc->sc_if;
+       struct ifnet *ifp0 = p->p_ifp0;
+       struct veb_ports *port_list;
+
+       DPRINTF(sc, "%s %s: destroying port\n",
+           ifp->if_xname, ifp0->if_xname);
+
+       ifp0->if_ioctl = p->p_ioctl;
+       ifp0->if_output = p->p_output;
+
+       ether_brport_clr(ifp0);
+
+       if_detachhook_del(ifp0, &p->p_dtask);
+       if_linkstatehook_del(ifp0, &p->p_ltask);
+
+       if (p->p_span) {
+               port_list = &sc->sc_spans;
+       } else {
+               if (ifpromisc(ifp0, 0) != 0) {
+                       log(LOG_WARNING, "%s %s: unable to disable promisc\n",
+                           ifp->if_xname, ifp0->if_xname);
+               }
+
+               etherbridge_detach_port(&sc->sc_eb, p);
+
+               port_list = &sc->sc_ports;
+       }
+       SMR_TAILQ_REMOVE_LOCKED(&port_list->l_list, p, p_entry);
+       port_list->l_count--;
+
+       smr_barrier();
+       refcnt_finalize(&p->p_refs, "vebpdtor");
+
+       veb_rule_list_free(TAILQ_FIRST(&p->p_vrl));
+
+       if_put(ifp0);
+       free(p, M_DEVBUF, sizeof(*p));
+}
+
+static void
+veb_p_detach(void *arg)
+{
+       struct veb_port *p = arg;
+       struct veb_softc *sc = p->p_veb;
+
+       veb_p_dtor(sc, p, "detach");
+
+       NET_ASSERT_LOCKED();
+}
+
+static int
+veb_p_active(struct veb_port *p)
+{
+       struct ifnet *ifp0 = p->p_ifp0;
+
+       return (ISSET(ifp0->if_flags, IFF_RUNNING) &&
+           LINK_STATE_IS_UP(ifp0->if_link_state));
+}
+
+static void
+veb_p_linkch(void *arg)
+{
+       struct veb_port *p = arg;
+       u_char link_state = LINK_STATE_FULL_DUPLEX;
+
+       NET_ASSERT_LOCKED();
+
+       if (!veb_p_active(p))
+               link_state = LINK_STATE_DOWN;
+
+       p->p_link_state = link_state;
+}
+
+static int
+veb_up(struct veb_softc *sc)
+{
+       struct ifnet *ifp = &sc->sc_if;
+       int error;
+
+       error = etherbridge_up(&sc->sc_eb);
+       if (error != 0)
+               return (error);
+
+       NET_ASSERT_LOCKED();
+       SET(ifp->if_flags, IFF_RUNNING);
+
+       return (0);
+}
+
+static int
+veb_iff(struct veb_softc *sc)
+{
+       return (0);
+}
+
+static int
+veb_down(struct veb_softc *sc)
+{
+       struct ifnet *ifp = &sc->sc_if;
+       int error;
+
+       error = etherbridge_down(&sc->sc_eb);
+       if (error != 0)
+               return (0);
+
+       NET_ASSERT_LOCKED();
+       CLR(ifp->if_flags, IFF_RUNNING);
+
+       return (0);
+}
+
+static int
+veb_eb_port_cmp(void *arg, void *a, void *b)
+{
+       struct veb_port *pa = a, *pb = b;
+       return (pa == pb);
+}
+
+static void *
+veb_eb_port_take(void *arg, void *port)
+{
+       struct veb_port *p = port;
+
+       refcnt_take(&p->p_refs);
+
+       return (p);
+}
+
+static void
+veb_eb_port_rele(void *arg, void *port)
+{
+       struct veb_port *p = port;
+
+       refcnt_rele_wake(&p->p_refs);
+}
+
+static size_t
+veb_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
+{
+       struct veb_port *p = port;
+
+       return (strlcpy(dst, p->p_ifp0->if_xname, len));
+}
+
+static void
+veb_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
+{
+       ss->ss_family = AF_UNSPEC;
+}
+
+/*
+ * virtual ethernet bridge port
+ */
+
+static int
+vport_clone_create(struct if_clone *ifc, int unit)
+{
+       struct vport_softc *sc;
+       struct ifnet *ifp;
+
+       sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
+       if (sc == NULL)
+               return (ENOMEM);
+
+       ifp = &sc->sc_ac.ac_if;
+
+       snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
+           ifc->ifc_name, unit);
+
+       ifp->if_softc = sc;
+       ifp->if_type = IFT_ETHER;
+       ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
+       ifp->if_ioctl = vport_ioctl;
+       ifp->if_enqueue = vport_enqueue;
+       ifp->if_qstart = vport_start;
+       ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+       ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
+       ether_fakeaddr(ifp);
+
+       if_counters_alloc(ifp);
+       if_attach(ifp);
+       ether_ifattach(ifp);
+
+       return (0);
+}
+
+static int
+vport_clone_destroy(struct ifnet *ifp)
+{
+       struct vport_softc *sc = ifp->if_softc;
+
+       NET_LOCK();
+       sc->sc_dead = 1;
+
+       if (ISSET(ifp->if_flags, IFF_RUNNING))
+               vport_down(sc);
+       NET_UNLOCK();
+
+       ether_ifdetach(ifp);
+       if_detach(ifp);
+
+       free(sc, M_DEVBUF, sizeof(*sc));
+
+       return (0);
+}
+
+static int
+vport_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+       struct vport_softc *sc = ifp->if_softc;
+       int error = 0;
+
+       if (sc->sc_dead)
+               return (ENXIO);
+
+       switch (cmd) {
+       case SIOCSIFFLAGS:
+               if (ISSET(ifp->if_flags, IFF_UP)) {
+                       if (!ISSET(ifp->if_flags, IFF_RUNNING))
+                               error = vport_up(sc);
+               } else {
+                       if (ISSET(ifp->if_flags, IFF_RUNNING))
+                               error = vport_down(sc);
+               }
+               break;
+
+       case SIOCADDMULTI:
+       case SIOCDELMULTI:
+               break;
+
+       default:
+               error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
+               break;
+       }
+
+       if (error == ENETRESET)
+               error = vport_iff(sc);
+
+       return (error);
+}
+
+static int
+vport_up(struct vport_softc *sc)
+{
+       struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+       NET_ASSERT_LOCKED();
+       SET(ifp->if_flags, IFF_RUNNING);
+
+       return (0);
+}
+
+static int
+vport_iff(struct vport_softc *sc)
+{
+       return (0);
+}
+
+static int
+vport_down(struct vport_softc *sc)
+{
+       struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+       NET_ASSERT_LOCKED();
+       CLR(ifp->if_flags, IFF_RUNNING);
+
+       return (0);
+}
+
+static int
+vport_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+       struct arpcom *ac;
+       const struct ether_brport *eb;
+       int error = ENETDOWN;
+#if NBPFILTER > 0
+       caddr_t if_bpf;
+#endif
+
+#if NPF > 0
+       /*
+        * the packet is about to leave the l3 stack and go into
+        * the l2 switching space, or it's coming from a switch space
+        * into the network stack. either way, there's no relationship
+        * between pf states in those different places.
+        */
+       pf_pkt_addr_changed(m);
+#endif
+
+       if (ISSET(m->m_flags, M_PROTO1)) {
+               /* packet is coming from a bridge */
+               if_vinput(ifp, m);
+               return (0);
+       }
+
+       /* packet is going to the bridge */
+
+       ac = (struct arpcom *)ifp;
+
+       smr_read_enter();
+       eb = SMR_PTR_GET(&ac->ac_brport);
+       if (eb != NULL) {
+               counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
+                   m->m_pkthdr.len);
+
+#if NBPFILTER > 0
+               if_bpf = READ_ONCE(ifp->if_bpf);
+               if (if_bpf != NULL)
+                       bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+
+               m = (*eb->eb_input)(ifp, m, eb->eb_port);
+
+               error = 0;
+       }
+       smr_read_leave();
+
+       m_freem(m);
+
+       return (error);
+}
+
+static void
+vport_start(struct ifqueue *ifq)
+{
+       ifq_purge(ifq);
+}