From: dlg <dlg@openbsd.org>
Date: Tue, 23 Feb 2021 03:30:04 +0000 (+0000)
Subject: add veb(4), a Virtual Ethernet Bridge driver.
X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=aaf19835c9b046d5367ce69b1c7d70331b7e92f9;p=openbsd

add veb(4), a Virtual Ethernet Bridge driver.

my intention is to replace bridge(4), but the way it works is
different enough from from bridge that a name change is justified
to distinguish them. it also makes it easier to commit it to the
tree and work on it in parallel to bridge, and allows a window of
migration.

the main difference between veb(4) and bridge(4) is how they use
interfaces as ports. veb takes over interfaces completely and only
uses them to receive and transmit ethernet packets. bridge also use
each interface as a port to the ethernet segment it's connected to,
but also tries to continue supporting the use of the interface as
a way to talk to the network stack on the local system. supporting
the use of interfaces for both external and local communication is
where most of my confusion with bridge comes from, both when i'm
trying to operate it and also understand the code. changing this
semantic is where most of the simplification in veb comes from
compared to bridge.

because veb takes over interfaces, the ethernet network set up on
a veb is isolated from the host network stack. by default veb does
not interact with pf or the ip (and mpls) stacks. to enable pf for
ip frames going over veb ports link1 on the veb interface must be
set. to have the stack interact with a veb network, vport interfaces
must be created and added as ports to a veb.

the vport interface driver is provided as part of veb, and is handled
specially by veb. veb usually prevents the use of ports by the stack
for sending an receiving packets, but that's why vports exist, so
veb has special handling for them.

veb already supports a lot of the other features that bridge has,
including bridge rules and protected domains, but i got tired of
working out of the tree and stopped implementing them. the main
outstanding features is better address table management, the
blocknonip flag on ports, transparent ipsec interception, and
spanning tree. i may not bother with spanning tree unless someone
tells me that they actually use it.

the core ethernet learning bridge functionality is provided by the
etherbridge code that was factored out of nvgre and bpe. veb is
already (a lot) faster than bridge, and is better prepared to operate
in parallel on multiple CPUs concurrently.

thanks to hrvoje popovski for testing some earlier versions of this.
discussed with many
ok patrick@ jmatthew@
---

diff --git a/sys/conf/files b/sys/conf/files
index e43923ba18a..ba16d783d77 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1,4 +1,4 @@
-#	$OpenBSD: files,v 1.697 2021/02/21 03:46:34 dlg Exp $
+#	$OpenBSD: files,v 1.698 2021/02/23 03:30:04 dlg Exp $
 #	$NetBSD: files,v 1.87 1996/05/19 17:17:50 jonathan Exp $
 
 #	@(#)files.newconf	7.5 (Berkeley) 5/10/93
@@ -557,6 +557,7 @@ pseudo-device bpfilter: ifnet
 pseudo-device enc: ifnet
 pseudo-device etherip: ifnet, ether, ifmedia
 pseudo-device bridge: ifnet, ether
+pseudo-device veb: ifnet, ether, etherbridge
 pseudo-device vlan: ifnet, ether
 pseudo-device carp: ifnet, ether
 pseudo-device sppp: ifnet
@@ -815,6 +816,7 @@ file net/if_bridge.c			bridge			needs-count
 file net/bridgectl.c			bridge
 file net/bridgestp.c			bridge
 file net/if_etherbridge.c		etherbridge
+file net/if_veb.c			veb
 file net/if_vlan.c			vlan			needs-count
 file net/if_switch.c			switch			needs-count
 file net/switchctl.c			switch
diff --git a/sys/net/if_veb.c b/sys/net/if_veb.c
new file mode 100644
index 00000000000..739a451225a
--- /dev/null
+++ b/sys/net/if_veb.c
@@ -0,0 +1,1742 @@
+/*	$OpenBSD: if_veb.c,v 1.1 2021/02/23 03:30:04 dlg Exp $ */
+
+/*
+ * Copyright (c) 2021 David Gwynne <dlg@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include "bpfilter.h"
+#include "pf.h"
+#include "vlan.h"
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/queue.h>
+#include <sys/socket.h>
+#include <sys/sockio.h>
+#include <sys/systm.h>
+#include <sys/syslog.h>
+#include <sys/rwlock.h>
+#include <sys/percpu.h>
+#include <sys/smr.h>
+#include <sys/task.h>
+#include <sys/pool.h>
+
+#include <net/if.h>
+#include <net/if_dl.h>
+#include <net/if_types.h>
+
+#include <netinet/in.h>
+#include <netinet/if_ether.h>
+
+#include <net/if_bridge.h>
+#include <net/if_etherbridge.h>
+
+#if NBPFILTER > 0
+#include <net/bpf.h>
+#endif
+
+#if NPF > 0
+#include <net/pfvar.h>
+#endif
+
+#if NVLAN > 0
+#include <net/if_vlan_var.h>
+#endif
+
+struct veb_rule {
+	TAILQ_ENTRY(veb_rule)		vr_entry;
+	SMR_TAILQ_ENTRY(veb_rule)	vr_lentry[2];
+
+	uint16_t			vr_flags;
+#define VEB_R_F_IN				(1U << 0)
+#define VEB_R_F_OUT				(1U << 1)
+#define VEB_R_F_SRC				(1U << 2)
+#define VEB_R_F_DST				(1U << 3)
+
+#define VEB_R_F_ARP				(1U << 4)
+#define VEB_R_F_RARP				(1U << 5)
+#define VEB_R_F_SHA				(1U << 6)
+#define VEB_R_F_SPA				(1U << 7)
+#define VEB_R_F_THA				(1U << 8)
+#define VEB_R_F_TPA				(1U << 9)
+	uint16_t			 vr_arp_op;
+
+	struct ether_addr		 vr_src;
+	struct ether_addr		 vr_dst;
+	struct ether_addr		 vr_arp_sha;
+	struct ether_addr		 vr_arp_tha;
+	struct in_addr			 vr_arp_spa;
+	struct in_addr			 vr_arp_tpa;
+
+	unsigned int			 vr_action;
+#define VEB_R_MATCH				0
+#define VEB_R_PASS				1
+#define VEB_R_BLOCK				2
+
+	int				 vr_pftag;
+};
+
+TAILQ_HEAD(veb_rules, veb_rule);
+SMR_TAILQ_HEAD(veb_rule_list, veb_rule);
+
+struct veb_softc;
+
+struct veb_port {
+	struct ifnet			*p_ifp0;
+	struct refcnt			 p_refs;
+
+	int (*p_ioctl)(struct ifnet *, u_long, caddr_t);
+	int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
+	    struct rtentry *);
+
+	struct task			 p_ltask;
+	struct task			 p_dtask;
+
+	struct veb_softc		*p_veb;
+
+	struct ether_brport		 p_brport;
+
+	unsigned int			 p_link_state;
+	unsigned int			 p_span;
+	unsigned int			 p_bif_flags;
+	uint32_t			 p_protected;
+
+	struct veb_rules		 p_vrl;
+	unsigned int			 p_nvrl;
+	struct veb_rule_list		 p_vr_list[2];
+#define VEB_RULE_LIST_OUT			0
+#define VEB_RULE_LIST_IN			1
+
+	SMR_TAILQ_ENTRY(veb_port)	 p_entry;
+};
+
+struct veb_ports {
+	SMR_TAILQ_HEAD(, veb_port)	 l_list;
+	unsigned int			 l_count;
+};
+
+struct veb_softc {
+	struct ifnet			 sc_if;
+	unsigned int			 sc_dead;
+
+	struct etherbridge		 sc_eb;
+
+	struct rwlock			 sc_rule_lock;
+	struct veb_ports		 sc_ports;
+	struct veb_ports		 sc_spans;
+};
+
+#define DPRINTF(_sc, fmt...)    do { \
+	if (ISSET((_sc)->sc_if.if_flags, IFF_DEBUG)) \
+		printf(fmt); \
+} while (0)
+
+
+static int	veb_clone_create(struct if_clone *, int);
+static int	veb_clone_destroy(struct ifnet *);
+
+static int	veb_ioctl(struct ifnet *, u_long, caddr_t);
+static void	veb_input(struct ifnet *, struct mbuf *);
+static int	veb_enqueue(struct ifnet *, struct mbuf *);
+static int	veb_output(struct ifnet *, struct mbuf *, struct sockaddr *,
+		    struct rtentry *);
+static void	veb_start(struct ifqueue *);
+
+static int	veb_up(struct veb_softc *);
+static int	veb_down(struct veb_softc *);
+static int	veb_iff(struct veb_softc *);
+
+static void	veb_p_linkch(void *);
+static void	veb_p_detach(void *);
+static int	veb_p_ioctl(struct ifnet *, u_long, caddr_t);
+static int	veb_p_output(struct ifnet *, struct mbuf *,
+		    struct sockaddr *, struct rtentry *);
+
+static void	veb_p_dtor(struct veb_softc *, struct veb_port *,
+		    const char *);
+static int	veb_add_port(struct veb_softc *,
+		    const struct ifbreq *, unsigned int);
+static int	veb_del_port(struct veb_softc *,
+		    const struct ifbreq *, unsigned int);
+static int	veb_port_list(struct veb_softc *, struct ifbifconf *);
+static int	veb_port_set_protected(struct veb_softc *,
+		    const struct ifbreq *);
+
+static int	veb_rule_add(struct veb_softc *, const struct ifbrlreq *);
+static int	veb_rule_list_flush(struct veb_softc *,
+		    const struct ifbrlreq *);
+static void	veb_rule_list_free(struct veb_rule *);
+static int	veb_rule_list_get(struct veb_softc *, struct ifbrlconf *);
+
+static int	 veb_eb_port_cmp(void *, void *, void *);
+static void	*veb_eb_port_take(void *, void *);
+static void	 veb_eb_port_rele(void *, void *);
+static size_t	 veb_eb_port_ifname(void *, char *, size_t, void *);
+static void	 veb_eb_port_sa(void *, struct sockaddr_storage *, void *);
+
+static const struct etherbridge_ops veb_etherbridge_ops = {
+	veb_eb_port_cmp,
+	veb_eb_port_take,
+	veb_eb_port_rele,
+	veb_eb_port_ifname,
+	veb_eb_port_sa,
+};
+
+static struct if_clone veb_cloner =
+    IF_CLONE_INITIALIZER("veb", veb_clone_create, veb_clone_destroy);
+
+static struct pool veb_rule_pool;
+
+static int	vport_clone_create(struct if_clone *, int);
+static int	vport_clone_destroy(struct ifnet *);
+
+struct vport_softc {
+	struct arpcom		 sc_ac;
+	unsigned int		 sc_dead;
+};
+
+static int	vport_ioctl(struct ifnet *, u_long, caddr_t);
+static int	vport_enqueue(struct ifnet *, struct mbuf *);
+static void	vport_start(struct ifqueue *);
+
+static int	vport_up(struct vport_softc *);
+static int	vport_down(struct vport_softc *);
+static int	vport_iff(struct vport_softc *);
+
+static struct if_clone vport_cloner =
+    IF_CLONE_INITIALIZER("vport", vport_clone_create, vport_clone_destroy);
+
+void
+vebattach(int count)
+{
+	if_clone_attach(&veb_cloner);
+	if_clone_attach(&vport_cloner);
+}
+
+static int
+veb_clone_create(struct if_clone *ifc, int unit)
+{
+	struct veb_softc *sc;
+	struct ifnet *ifp;
+	int error;
+
+	if (veb_rule_pool.pr_size == 0) {
+		pool_init(&veb_rule_pool, sizeof(struct veb_rule),
+		    0, IPL_SOFTNET, 0, "vebrpl", NULL);
+	}
+
+	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
+	if (sc == NULL)
+		return (ENOMEM);
+
+	rw_init(&sc->sc_rule_lock, "vebrlk");
+	SMR_TAILQ_INIT(&sc->sc_ports.l_list);
+	SMR_TAILQ_INIT(&sc->sc_spans.l_list);
+
+	ifp = &sc->sc_if;
+
+	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
+	    ifc->ifc_name, unit);
+
+	error = etherbridge_init(&sc->sc_eb, ifp->if_xname,
+	    &veb_etherbridge_ops, sc);
+	if (error != 0) {
+		free(sc, M_DEVBUF, sizeof(*sc));
+		return (error);
+	}
+
+	ifp->if_softc = sc;
+	ifp->if_type = IFT_BRIDGE;
+	ifp->if_hdrlen = ETHER_HDR_LEN;
+	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
+	ifp->if_ioctl = veb_ioctl;
+	ifp->if_input = veb_input;
+	ifp->if_output = veb_output;
+	ifp->if_enqueue = veb_enqueue;
+	ifp->if_qstart = veb_start;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
+
+	if_counters_alloc(ifp);
+	if_attach(ifp);
+
+	if_alloc_sadl(ifp);
+
+#if NBPFILTER > 0
+	bpfattach(&ifp->if_bpf, ifp, DLT_EN10MB, ETHER_HDR_LEN);
+#endif
+
+	return (0);
+}
+
+static int
+veb_clone_destroy(struct ifnet *ifp)
+{
+	struct veb_softc *sc = ifp->if_softc;
+	struct veb_port *p, *np;
+
+	NET_LOCK();
+	sc->sc_dead = 1;
+
+	if (ISSET(ifp->if_flags, IFF_RUNNING))
+		veb_down(sc);
+	NET_UNLOCK();
+
+	if_detach(ifp);
+
+	NET_LOCK();
+	SMR_TAILQ_FOREACH_SAFE_LOCKED(p, &sc->sc_ports.l_list, p_entry, np)
+		veb_p_dtor(sc, p, "destroy");
+	SMR_TAILQ_FOREACH_SAFE_LOCKED(p, &sc->sc_spans.l_list, p_entry, np)
+		veb_p_dtor(sc, p, "destroy");
+	NET_UNLOCK();
+
+	etherbridge_destroy(&sc->sc_eb);
+
+	free(sc, M_DEVBUF, sizeof(*sc));
+
+	return (0);
+}
+
+static struct mbuf *
+veb_span_input(struct ifnet *ifp0, struct mbuf *m, void *brport)
+{
+	m_freem(m);
+	return (NULL);
+}
+
+static void
+veb_span(struct veb_softc *sc, struct mbuf *m0)
+{
+	struct veb_port *p;
+	struct ifnet *ifp0;
+	struct mbuf *m;
+
+	smr_read_enter();
+	SMR_TAILQ_FOREACH(p, &sc->sc_spans.l_list, p_entry) {
+		ifp0 = p->p_ifp0;
+		if (!ISSET(ifp0->if_flags, IFF_RUNNING))
+			continue;
+
+		m = m_dup_pkt(m0, max_linkhdr + ETHER_ALIGN, M_NOWAIT);
+		if (m == NULL) {
+			/* XXX count error */
+			continue;
+		}
+
+		if_enqueue(ifp0, m); /* XXX count error */
+	}
+	smr_read_leave();
+}
+
+static int
+veb_vlan_filter(const struct mbuf *m)
+{
+	const struct ether_header *eh;
+
+	eh = mtod(m, struct ether_header *);
+	switch (ntohs(eh->ether_type)) {
+	case ETHERTYPE_VLAN:
+	case ETHERTYPE_QINQ:
+		return (1);
+	default:
+		break;
+	}
+
+	return (0);
+}
+
+static int
+veb_rule_arp_match(const struct veb_rule *vr, struct mbuf *m)
+{
+	struct ether_header *eh;
+	struct ether_arp ea;
+
+	eh = mtod(m, struct ether_header *);
+
+	if (eh->ether_type != htons(ETHERTYPE_ARP))
+		return (0);
+	if (m->m_pkthdr.len < sizeof(*eh) + sizeof(ea))
+		return (0);
+
+	m_copydata(m, sizeof(*eh), sizeof(ea), (caddr_t)&ea);
+
+	if (ea.arp_hrd != htons(ARPHRD_ETHER) ||
+	    ea.arp_pro != htons(ETHERTYPE_IP) ||
+	    ea.arp_hln != ETHER_ADDR_LEN ||
+	    ea.arp_pln != sizeof(struct in_addr))
+		return (0);
+
+	if (ISSET(vr->vr_flags, VEB_R_F_ARP)) {
+		if (ea.arp_op != htons(ARPOP_REQUEST) &&
+		    ea.arp_op != htons(ARPOP_REPLY))
+			return (0);
+	}
+	if (ISSET(vr->vr_flags, VEB_R_F_RARP)) {
+		if (ea.arp_op != htons(ARPOP_REVREQUEST) &&
+		    ea.arp_op != htons(ARPOP_REVREPLY))
+			return (0);
+	}
+
+	if (vr->vr_arp_op != htons(0) && vr->vr_arp_op != ea.arp_op)
+		return (0);
+
+	if (ISSET(vr->vr_flags, VEB_R_F_SHA) &&
+	    !ETHER_IS_EQ(&vr->vr_arp_sha, ea.arp_sha))
+		return (0);
+	if (ISSET(vr->vr_flags, VEB_R_F_THA) &&
+	    !ETHER_IS_EQ(&vr->vr_arp_tha, ea.arp_tha))
+		return (0);
+	if (ISSET(vr->vr_flags, VEB_R_F_SPA) &&
+	    memcmp(&vr->vr_arp_spa, ea.arp_spa, sizeof(vr->vr_arp_spa)) != 0)
+		return (0);
+	if (ISSET(vr->vr_flags, VEB_R_F_TPA) &&
+	    memcmp(&vr->vr_arp_tpa, ea.arp_tpa, sizeof(vr->vr_arp_tpa)) != 0)
+		return (0);
+
+	return (1);
+}
+
+static int
+veb_rule_list_test(struct veb_rule *vr, int dir, struct mbuf *m)
+{
+	struct ether_header *eh = mtod(m, struct ether_header *);
+
+	SMR_ASSERT_CRITICAL();
+
+	do {
+		if (ISSET(vr->vr_flags, VEB_R_F_ARP|VEB_R_F_RARP) &&
+		    !veb_rule_arp_match(vr, m))
+			continue;
+
+		if (ISSET(vr->vr_flags, VEB_R_F_SRC) &&
+		    !ETHER_IS_EQ(&vr->vr_src, eh->ether_shost))
+			continue;
+		if (ISSET(vr->vr_flags, VEB_R_F_DST) &&
+		    !ETHER_IS_EQ(&vr->vr_dst, eh->ether_dhost))
+			continue;
+
+		if (vr->vr_action == VEB_R_BLOCK)
+			return (VEB_R_BLOCK);
+#if NPF > 0
+		pf_tag_packet(m, vr->vr_pftag, -1);
+#endif
+		if (vr->vr_action == VEB_R_PASS)
+			return (VEB_R_PASS);
+	} while ((vr = SMR_TAILQ_NEXT(vr, vr_lentry[dir])) != NULL);
+
+	return (VEB_R_PASS);
+}
+
+static inline int
+veb_rule_filter(struct veb_port *p, int dir, struct mbuf *m)
+{
+	struct veb_rule *vr;
+
+	vr = SMR_TAILQ_FIRST(&p->p_vr_list[dir]);
+	if (vr == NULL)
+		return (0);
+
+	return (veb_rule_list_test(vr, dir, m) == VEB_R_BLOCK);
+}
+
+#if NPF > 0
+static struct mbuf *
+veb_pf(struct ifnet *ifp0, int dir, struct mbuf *m)
+{
+	struct ether_header *eh, copy;
+	sa_family_t af = AF_UNSPEC;
+
+	/*
+	 * pf runs on vport interfaces when they enter or leave the
+	 * l3 stack, so don't confuse things (even more) by running
+	 * pf again here. note that because of this exception the
+	 * pf direction on vport interfaces is reversed compared to
+	 * other veb ports.
+	 */
+	if (ifp0->if_enqueue == vport_enqueue)
+		return (m);
+
+	eh = mtod(m, struct ether_header *);
+	switch (ntohs(eh->ether_type)) {
+	case ETHERTYPE_IP:
+		af = AF_INET;
+		break;
+	case ETHERTYPE_IPV6:
+		af = AF_INET6;
+		break;
+	default:
+		return (m);
+	}
+
+	copy = *eh;
+	m_adj(m, sizeof(*eh));
+
+	if (pf_test(af, dir, ifp0, &m) != PF_PASS) {
+		m_freem(m);
+		return (NULL);
+	}
+	if (m == NULL)
+		return (NULL);
+
+	m = m_prepend(m, sizeof(*eh), M_DONTWAIT);
+	if (m == NULL)
+		return (NULL);
+
+	/* checksum? */
+
+	eh = mtod(m, struct ether_header *);
+	*eh = copy;
+
+	return (m);
+}
+#endif /* NPF > 0 */
+
+static void
+veb_broadcast(struct veb_softc *sc, struct veb_port *rp, struct mbuf *m0)
+{
+	struct ifnet *ifp = &sc->sc_if;
+	struct veb_port *tp;
+	struct ifnet *ifp0;
+	struct mbuf *m;
+
+#if NPF > 0
+	/*
+	 * we couldnt find a specific port to send this packet to,
+	 * but pf should still have a chance to apply policy to it.
+	 * let pf look at it, but use the veb interface as a proxy.
+	 */
+	if (ISSET(ifp->if_flags, IFF_LINK1) &&
+	    (m = veb_pf(ifp, PF_OUT, m0)) == NULL)
+		return;
+#endif
+
+	counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
+	    m0->m_pkthdr.len);
+
+	smr_read_enter();
+	SMR_TAILQ_FOREACH(tp, &sc->sc_ports.l_list, p_entry) {
+		if (rp == tp || (rp->p_protected & tp->p_protected)) {
+			/*
+			 * don't let Ethernet packets hairpin or
+			 * move between ports in the same protected
+			 * domain(s).
+			 */
+			continue;
+		}
+
+		ifp0 = tp->p_ifp0;
+		if (!ISSET(ifp0->if_flags, IFF_RUNNING)) {
+			/* don't waste time */
+			continue;
+		}
+
+		if (!ISSET(tp->p_bif_flags, IFBIF_DISCOVER) &&
+		    !ISSET(m0->m_flags, M_BCAST | M_MCAST)) {
+			/* don't flood unknown unicast */
+			continue;
+		}
+
+		if (veb_rule_filter(tp, VEB_RULE_LIST_OUT, m0))
+			continue;
+
+		m = m_dup_pkt(m0, max_linkhdr + ETHER_ALIGN, M_NOWAIT);
+		if (m == NULL) {
+			/* XXX count error? */
+			continue;
+		}
+
+		if_enqueue(ifp0, m); /* XXX count error? */
+	}
+	smr_read_leave();
+
+	m_freem(m0);
+}
+
+static struct mbuf *
+veb_transmit(struct veb_softc *sc, struct veb_port *rp, struct veb_port *tp,
+    struct mbuf *m)
+{
+	struct ifnet *ifp = &sc->sc_if;
+	struct ifnet *ifp0;
+
+	if (tp == NULL)
+		return (m);
+
+	if (rp == tp || (rp->p_protected & tp->p_protected)) {
+		/*
+		 * don't let Ethernet packets hairpin or move between
+		 * ports in the same protected domain(s).
+		 */
+		goto drop;
+	}
+
+	if (veb_rule_filter(tp, VEB_RULE_LIST_OUT, m))
+		goto drop;
+
+	ifp0 = tp->p_ifp0;
+
+#if NPF > 0
+	if (ISSET(ifp->if_flags, IFF_LINK1) &&
+	    (m = veb_pf(ifp0, PF_OUT, m)) == NULL)
+		return (NULL);
+#endif
+
+	counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
+	    m->m_pkthdr.len);
+
+	if_enqueue(ifp0, m); /* XXX count error? */
+
+	return (NULL);
+drop:
+	m_freem(m);
+	return (NULL);
+}
+
+static struct mbuf *
+veb_port_input(struct ifnet *ifp0, struct mbuf *m, void *brport)
+{
+	struct veb_port *p = brport;
+	struct veb_softc *sc = p->p_veb;
+	struct ifnet *ifp = &sc->sc_if;
+	struct ether_header *eh;
+#if NBPFILTER > 0
+	caddr_t if_bpf;
+#endif
+
+	if (ISSET(m->m_flags, M_PROTO1)) {
+		CLR(m->m_flags, M_PROTO1);
+		return (m);
+	}
+
+	if (!ISSET(ifp->if_flags, IFF_RUNNING))
+		return (m);
+
+#if NVLAN > 0
+	/*
+	 * If the underlying interface removed the VLAN header itself,
+	 * add it back.
+	 */
+	if (ISSET(m->m_flags, M_VLANTAG)) {
+		m = vlan_inject(m, ETHERTYPE_VLAN, m->m_pkthdr.ether_vtag);
+		if (m == NULL) {
+			counters_inc(ifp->if_counters, ifc_ierrors);
+			goto drop;
+		}
+	}
+#endif
+
+	counters_pkt(ifp->if_counters, ifc_ipackets, ifc_ibytes,
+	    m->m_pkthdr.len);
+
+	/* force packets into the one routing domain for pf */
+	m->m_pkthdr.ph_rtableid = ifp->if_rdomain;
+
+#if NBPFILTER > 0
+	if_bpf = READ_ONCE(ifp->if_bpf);
+	if (if_bpf != NULL) {
+		if (bpf_mtap_ether(if_bpf, m, 0) != 0)
+			goto drop;
+	}
+#endif
+
+	veb_span(sc, m);
+
+	if (!ISSET(ifp->if_flags, IFF_LINK2) &&
+	    veb_vlan_filter(m))
+		goto drop;
+
+	if (veb_rule_filter(p, VEB_RULE_LIST_IN, m))
+		goto drop;
+
+#if NPF > 0
+	if (ISSET(ifp->if_flags, IFF_LINK1) &&
+	    (m = veb_pf(ifp0, PF_IN, m)) == NULL)
+		return (NULL);
+#endif
+
+	eh = mtod(m, struct ether_header *);
+
+	if (ISSET(p->p_bif_flags, IFBIF_LEARNING)) {
+		etherbridge_map(&sc->sc_eb, p,
+		    (struct ether_addr *)eh->ether_shost);
+	}
+
+	CLR(m->m_flags, M_BCAST|M_MCAST);
+	SET(m->m_flags, M_PROTO1);
+
+	if (!ETHER_IS_MULTICAST(eh->ether_dhost)) {
+		struct veb_port *tp = NULL;
+
+		smr_read_enter();
+		tp = etherbridge_resolve(&sc->sc_eb,
+		    (struct ether_addr *)eh->ether_dhost);
+		m = veb_transmit(sc, p, tp, m);
+		smr_read_leave();
+
+		if (m == NULL)
+			return (NULL);
+
+		/* unknown unicast address */
+	} else {
+		SET(m->m_flags,
+		    ETHER_IS_BROADCAST(eh->ether_dhost) ? M_BCAST : M_MCAST);
+	}
+
+	veb_broadcast(sc, p, m);
+	return (NULL);
+
+drop:
+	m_freem(m);
+	return (NULL);
+}
+
+static void
+veb_input(struct ifnet *ifp, struct mbuf *m)
+{
+	m_freem(m);
+}
+
+static int
+veb_output(struct ifnet *ifp, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+	m_freem(m);
+	return (ENODEV);
+}
+
+static int
+veb_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+	m_freem(m);
+	return (ENODEV);
+}
+
+static void
+veb_start(struct ifqueue *ifq)
+{
+	ifq_purge(ifq);
+}
+
+static int
+veb_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct veb_softc *sc = ifp->if_softc;
+	struct ifbrparam *bparam = (struct ifbrparam *)data;
+	int error = 0;
+
+	if (sc->sc_dead)
+		return (ENXIO);
+
+	switch (cmd) {
+	case SIOCSIFFLAGS:
+		if (ISSET(ifp->if_flags, IFF_UP)) {
+			if (!ISSET(ifp->if_flags, IFF_RUNNING))
+				error = veb_up(sc);
+		} else {
+			if (ISSET(ifp->if_flags, IFF_RUNNING))
+				error = veb_down(sc);
+		}
+		break;
+
+	case SIOCBRDGADD:
+		error = suser(curproc);
+		if (error != 0)
+			break;
+
+		error = veb_add_port(sc, (struct ifbreq *)data, 0);
+		break;
+	case SIOCBRDGADDS:
+		error = suser(curproc);
+		if (error != 0)
+			break;
+
+		error = veb_add_port(sc, (struct ifbreq *)data, 1);
+		break;
+	case SIOCBRDGDEL:
+		error = suser(curproc);
+		if (error != 0)
+			break;
+
+		error = veb_del_port(sc, (struct ifbreq *)data, 0);
+		break;
+	case SIOCBRDGDELS:
+		error = suser(curproc);
+		if (error != 0)
+			break;
+
+		error = veb_del_port(sc, (struct ifbreq *)data, 1);
+		break;
+
+	case SIOCBRDGSCACHE:
+		error = suser(curproc);
+		if (error != 0)
+			break;
+
+		error = etherbridge_set_max(&sc->sc_eb, bparam);
+		break;
+	case SIOCBRDGGCACHE:
+		error = etherbridge_get_max(&sc->sc_eb, bparam);
+		break;
+
+	case SIOCBRDGSTO:
+		error = suser(curproc);
+		if (error != 0)
+			break;
+
+		error = etherbridge_set_tmo(&sc->sc_eb, bparam);
+		break;
+	case SIOCBRDGGTO:
+		error = etherbridge_get_tmo(&sc->sc_eb, bparam);
+		break;
+
+	case SIOCBRDGRTS:
+		error = etherbridge_rtfind(&sc->sc_eb, (struct ifbaconf *)data);
+		break;
+	case SIOCBRDGIFS:
+		error = veb_port_list(sc, (struct ifbifconf *)data);
+		break;
+
+	case SIOCBRDGSIFPROT:
+		error = veb_port_set_protected(sc, (struct ifbreq *)data);
+		break;
+
+	case SIOCBRDGARL:
+		error = veb_rule_add(sc, (struct ifbrlreq *)data);
+		break;
+	case SIOCBRDGFRL:
+		error = veb_rule_list_flush(sc, (struct ifbrlreq *)data);
+		break;
+	case SIOCBRDGGRL:
+		error = veb_rule_list_get(sc, (struct ifbrlconf *)data);
+		break;
+
+	default:
+		error = ENOTTY;
+		break;
+	}
+
+	if (error == ENETRESET)
+		error = veb_iff(sc);
+
+	return (error);
+}
+
+static int
+veb_add_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int span)
+{
+	struct ifnet *ifp = &sc->sc_if;
+	struct ifnet *ifp0;
+	struct veb_ports *port_list;
+	struct veb_port *p;
+	int error;
+
+	NET_ASSERT_LOCKED();
+
+	ifp0 = if_unit(req->ifbr_ifsname);
+	if (ifp0 == NULL)
+		return (EINVAL);
+
+	if (ifp0->if_type != IFT_ETHER) {
+		error = EPROTONOSUPPORT;
+		goto put;
+	}
+
+	if (ifp0 == ifp) {
+		error = EPROTONOSUPPORT;
+		goto put;
+	}
+
+	error = ether_brport_isset(ifp0);
+	if (error != 0)
+		goto put;
+
+	/* let's try */
+
+	p = malloc(sizeof(*p), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
+	if (p == NULL) {
+		error = ENOMEM;
+		goto put;
+	}
+
+	p->p_ifp0 = ifp0;
+	p->p_veb = sc;
+
+	refcnt_init(&p->p_refs);
+	TAILQ_INIT(&p->p_vrl);
+	SMR_TAILQ_INIT(&p->p_vr_list[0]);
+	SMR_TAILQ_INIT(&p->p_vr_list[1]);
+
+	p->p_ioctl = ifp0->if_ioctl;
+	p->p_output = ifp0->if_output;
+
+	if (span) {
+		port_list = &sc->sc_spans;
+
+		p->p_brport.eb_input = veb_span_input;
+		p->p_bif_flags = IFBIF_SPAN;
+	} else {
+		port_list = &sc->sc_ports;
+
+		error = ifpromisc(ifp0, 1);
+		if (error != 0)
+			goto free;
+
+		p->p_bif_flags = IFBIF_LEARNING | IFBIF_DISCOVER;
+		p->p_brport.eb_input = veb_port_input;
+	}
+
+	/* this might have changed if we slept for malloc or ifpromisc */
+	error = ether_brport_isset(ifp0);
+	if (error != 0)
+		goto unpromisc;
+
+	task_set(&p->p_ltask, veb_p_linkch, p);
+	if_linkstatehook_add(ifp0, &p->p_ltask);
+
+	task_set(&p->p_dtask, veb_p_detach, p);
+	if_detachhook_add(ifp0, &p->p_dtask);
+
+	p->p_brport.eb_port = p;
+
+	/* commit */
+	SMR_TAILQ_INSERT_TAIL_LOCKED(&port_list->l_list, p, p_entry);
+	port_list->l_count++;
+
+	ether_brport_set(ifp0, &p->p_brport);
+	if (ifp0->if_enqueue != vport_enqueue) { /* vport is special */
+		ifp0->if_ioctl = veb_p_ioctl;
+		ifp0->if_output = veb_p_output;
+	}
+
+	veb_p_linkch(p);
+
+	return (0);
+
+unpromisc:
+	if (!span)
+		ifpromisc(ifp0, 0);
+free:
+	free(p, M_DEVBUF, sizeof(*p));
+put:
+	if_put(ifp0);
+	return (error);
+}
+
+static struct veb_port *
+veb_trunkport(struct veb_softc *sc, const char *name, unsigned int span)
+{
+	struct veb_ports *port_list;
+	struct veb_port *p;
+
+	port_list = span ? &sc->sc_spans : &sc->sc_ports;
+
+	SMR_TAILQ_FOREACH_LOCKED(p, &port_list->l_list, p_entry) {
+		if (strcmp(p->p_ifp0->if_xname, name) == 0)
+			return (p);
+	}
+
+	return (NULL);
+}
+
+static int
+veb_del_port(struct veb_softc *sc, const struct ifbreq *req, unsigned int span)
+{
+	struct veb_port *p;
+
+	NET_ASSERT_LOCKED();
+	p = veb_trunkport(sc, req->ifbr_ifsname, span);
+	if (p == NULL)
+		return (EINVAL);
+
+	veb_p_dtor(sc, p, "del");
+
+	return (0);
+}
+
+static struct veb_port *
+veb_port_get(struct veb_softc *sc, const char *name)
+{
+	struct veb_port *p;
+
+	NET_ASSERT_LOCKED();
+
+	SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_ports.l_list, p_entry) {
+		struct ifnet *ifp0 = p->p_ifp0;
+		if (strncmp(ifp0->if_xname, name,
+		    sizeof(ifp0->if_xname)) == 0) {
+			refcnt_take(&p->p_refs);
+			break;
+		}
+	}
+
+	return (p);
+}
+
+static void
+veb_port_put(struct veb_softc *sc, struct veb_port *p)
+{
+	refcnt_rele_wake(&p->p_refs);
+}
+
+static int
+veb_port_set_protected(struct veb_softc *sc, const struct ifbreq *ifbr)
+{
+	struct veb_port *p;
+
+	p = veb_port_get(sc, ifbr->ifbr_ifsname);
+	if (p == NULL)
+		return (ESRCH);
+
+	p->p_protected = ifbr->ifbr_protected;
+	veb_port_put(sc, p);
+
+	return (0);
+}
+
+static int
+veb_rule_add(struct veb_softc *sc, const struct ifbrlreq *ifbr)
+{
+	const struct ifbrarpf *brla = &ifbr->ifbr_arpf;
+	struct veb_rule vr, *vrp;
+	struct veb_port *p;
+	int error;
+
+	memset(&vr, 0, sizeof(vr));
+
+	switch (ifbr->ifbr_action) {
+	case BRL_ACTION_BLOCK:
+		vr.vr_action = VEB_R_BLOCK;
+		break;
+	case BRL_ACTION_PASS:
+		vr.vr_action = VEB_R_PASS;
+		break;
+	/* XXX VEB_R_MATCH */
+	default:
+		return (EINVAL);
+	}
+
+	if (!ISSET(ifbr->ifbr_flags, BRL_FLAG_IN|BRL_FLAG_OUT))
+		return (EINVAL);
+	if (ISSET(ifbr->ifbr_flags, BRL_FLAG_IN))
+		SET(vr.vr_flags, VEB_R_F_IN);
+	if (ISSET(ifbr->ifbr_flags, BRL_FLAG_OUT))
+		SET(vr.vr_flags, VEB_R_F_OUT);
+
+	if (ISSET(ifbr->ifbr_flags, BRL_FLAG_SRCVALID)) {
+		SET(vr.vr_flags, VEB_R_F_SRC);
+		vr.vr_src = ifbr->ifbr_src;
+	}
+	if (ISSET(ifbr->ifbr_flags, BRL_FLAG_DSTVALID)) {
+		SET(vr.vr_flags, VEB_R_F_DST);
+		vr.vr_dst = ifbr->ifbr_dst;
+	}
+
+	/* ARP rule */
+	if (ISSET(brla->brla_flags, BRLA_ARP|BRLA_RARP)) {
+		if (ISSET(brla->brla_flags, BRLA_ARP))
+			SET(vr.vr_flags, VEB_R_F_ARP);
+		if (ISSET(brla->brla_flags, BRLA_RARP))
+			SET(vr.vr_flags, VEB_R_F_RARP);
+
+		if (ISSET(brla->brla_flags, BRLA_SHA)) {
+			SET(vr.vr_flags, VEB_R_F_SHA);
+			vr.vr_arp_sha = brla->brla_sha;
+		}
+		if (ISSET(brla->brla_flags, BRLA_THA)) {
+			SET(vr.vr_flags, VEB_R_F_THA);
+			vr.vr_arp_tha = brla->brla_tha;
+		}
+		if (ISSET(brla->brla_flags, BRLA_SPA)) {
+			SET(vr.vr_flags, VEB_R_F_SPA);
+			vr.vr_arp_spa = brla->brla_spa;
+		}
+		if (ISSET(brla->brla_flags, BRLA_TPA)) {
+			SET(vr.vr_flags, VEB_R_F_TPA);
+			vr.vr_arp_tpa = brla->brla_tpa;
+		}
+		vr.vr_arp_op = htons(brla->brla_op);
+	}
+
+	if (ifbr->ifbr_tagname[0] != '\0') {
+#if NPF > 0
+		vr.vr_pftag = pf_tagname2tag((char *)ifbr->ifbr_tagname, 1);
+		if (vr.vr_pftag == 0)
+			return (ENOMEM);
+#else
+		return (EINVAL);
+#endif
+	}
+
+	p = veb_port_get(sc, ifbr->ifbr_ifsname);
+	if (p == NULL) {
+		error = ESRCH;
+		goto error;
+	}
+
+	vrp = pool_get(&veb_rule_pool, PR_WAITOK|PR_LIMITFAIL|PR_ZERO);
+	if (vrp == NULL) {
+		error = ENOMEM;
+		goto port_put;
+	}
+
+	*vrp = vr;
+
+	/* there's one big lock on a veb for all ports */
+	error = rw_enter(&sc->sc_rule_lock, RW_WRITE|RW_INTR);
+	if (error != 0)
+		goto rule_put;
+
+	TAILQ_INSERT_TAIL(&p->p_vrl, vrp, vr_entry);
+	p->p_nvrl++;
+	if (ISSET(vr.vr_flags, VEB_R_F_OUT)) {
+		SMR_TAILQ_INSERT_TAIL_LOCKED(&p->p_vr_list[0],
+		    vrp, vr_lentry[0]);
+	}
+	if (ISSET(vr.vr_flags, VEB_R_F_IN)) {
+		SMR_TAILQ_INSERT_TAIL_LOCKED(&p->p_vr_list[1],
+		    vrp, vr_lentry[1]);
+	}
+
+	rw_exit(&sc->sc_rule_lock);
+	veb_port_put(sc, p);
+
+	return (0);
+
+rule_put:
+	pool_put(&veb_rule_pool, vrp);
+port_put:
+	veb_port_put(sc, p);
+error:
+#if NPF > 0
+	pf_tag_unref(vr.vr_pftag);
+#endif
+	return (error);
+}
+
+static void
+veb_rule_list_free(struct veb_rule *nvr)
+{
+	struct veb_rule *vr;
+
+	while ((vr = nvr) != NULL) {
+		nvr = TAILQ_NEXT(vr, vr_entry);
+		pool_put(&veb_rule_pool, vr);
+	}
+}
+
+static int
+veb_rule_list_flush(struct veb_softc *sc, const struct ifbrlreq *ifbr)
+{
+	struct veb_port *p;
+	struct veb_rule *vr;
+	int error;
+
+	p = veb_port_get(sc, ifbr->ifbr_ifsname);
+	if (p == NULL)
+		return (ESRCH);
+
+	error = rw_enter(&sc->sc_rule_lock, RW_WRITE|RW_INTR);
+	if (error != 0) {
+		veb_port_put(sc, p);
+		return (error);
+	}
+
+	/* take all the rules away */
+	vr = TAILQ_FIRST(&p->p_vrl);
+
+	/* reset the lists and counts of rules */
+	TAILQ_INIT(&p->p_vrl);
+	p->p_nvrl = 0;
+	SMR_TAILQ_INIT(&p->p_vr_list[0]);
+	SMR_TAILQ_INIT(&p->p_vr_list[1]);
+
+	rw_exit(&sc->sc_rule_lock);
+	veb_port_put(sc, p);
+
+	smr_barrier();
+	veb_rule_list_free(vr);
+
+	return (0);
+}
+
+static void
+veb_rule2ifbr(struct ifbrlreq *ifbr, const struct veb_rule *vr)
+{
+	switch (vr->vr_action) {
+	case VEB_R_PASS:
+		ifbr->ifbr_action = BRL_ACTION_PASS;
+		break;
+	case VEB_R_BLOCK:
+		ifbr->ifbr_action = BRL_ACTION_BLOCK;
+		break;
+	}
+
+	if (ISSET(vr->vr_flags, VEB_R_F_IN))
+		SET(ifbr->ifbr_flags, BRL_FLAG_IN);
+	if (ISSET(vr->vr_flags, VEB_R_F_OUT))
+		SET(ifbr->ifbr_flags, BRL_FLAG_OUT);
+
+	if (ISSET(vr->vr_flags, VEB_R_F_SRC)) {
+		SET(ifbr->ifbr_flags, BRL_FLAG_SRCVALID);
+		ifbr->ifbr_src = vr->vr_src;
+	}
+	if (ISSET(vr->vr_flags, VEB_R_F_DST)) {
+		SET(ifbr->ifbr_flags, BRL_FLAG_DSTVALID);
+		ifbr->ifbr_dst = vr->vr_dst;
+	}
+
+	/* ARP rule */
+	if (ISSET(vr->vr_flags, VEB_R_F_ARP|VEB_R_F_RARP)) {
+		struct ifbrarpf *brla = &ifbr->ifbr_arpf;
+
+		if (ISSET(vr->vr_flags, VEB_R_F_ARP))
+			SET(brla->brla_flags, BRLA_ARP);
+		if (ISSET(vr->vr_flags, VEB_R_F_RARP))
+			SET(brla->brla_flags, BRLA_RARP);
+
+		if (ISSET(vr->vr_flags, VEB_R_F_SHA)) {
+			SET(brla->brla_flags, BRLA_SHA);
+			brla->brla_sha = vr->vr_arp_sha;
+		}
+		if (ISSET(vr->vr_flags, VEB_R_F_THA)) {
+			SET(brla->brla_flags, BRLA_THA);
+			brla->brla_tha = vr->vr_arp_tha;
+		}
+
+		if (ISSET(vr->vr_flags, VEB_R_F_SPA)) {
+			SET(brla->brla_flags, BRLA_SPA);
+			brla->brla_spa = vr->vr_arp_spa;
+		}
+		if (ISSET(vr->vr_flags, VEB_R_F_TPA)) {
+			SET(brla->brla_flags, BRLA_TPA);
+			brla->brla_tpa = vr->vr_arp_tpa;
+		}
+
+		brla->brla_op = ntohs(vr->vr_arp_op);
+	}
+
+#if NPF > 0
+	if (vr->vr_pftag != 0)
+		pf_tag2tagname(vr->vr_pftag, ifbr->ifbr_tagname);
+#endif
+}
+
+static int
+veb_rule_list_get(struct veb_softc *sc, struct ifbrlconf *ifbrl)
+{
+	struct veb_port *p;
+	struct veb_rule *vr;
+	struct ifbrlreq *ifbr, *ifbrs;
+	int error = 0;
+	size_t len;
+
+	p = veb_port_get(sc, ifbrl->ifbrl_ifsname);
+	if (p == NULL)
+		return (ESRCH);
+
+	len = p->p_nvrl; /* estimate */
+	if (ifbrl->ifbrl_len == 0 || len == 0) {
+		ifbrl->ifbrl_len = len * sizeof(*ifbrs);
+		goto port_put;
+	}
+
+	error = rw_enter(&sc->sc_rule_lock, RW_READ|RW_INTR);
+	if (error != 0)
+		goto port_put;
+
+	ifbrs = mallocarray(p->p_nvrl, sizeof(*ifbrs), M_TEMP,
+	    M_WAITOK|M_CANFAIL|M_ZERO);
+	if (ifbrs == NULL) {
+		rw_exit(&sc->sc_rule_lock);
+		goto port_put;
+	}
+	len = p->p_nvrl * sizeof(*ifbrs);
+
+	ifbr = ifbrs;
+	TAILQ_FOREACH(vr, &p->p_vrl, vr_entry) {
+		strlcpy(ifbr->ifbr_name, sc->sc_if.if_xname,
+		    sizeof(ifbr->ifbr_name));
+		strlcpy(ifbr->ifbr_ifsname, p->p_ifp0->if_xname,
+		    sizeof(ifbr->ifbr_ifsname));
+		veb_rule2ifbr(ifbr, vr);
+
+		ifbr++;
+	}
+
+	rw_exit(&sc->sc_rule_lock);
+
+	error = copyout(ifbrs, ifbrl->ifbrl_buf, min(len, ifbrl->ifbrl_len));
+	if (error == 0)
+		ifbrl->ifbrl_len = len;
+	free(ifbrs, M_TEMP, len);
+
+port_put:
+	veb_port_put(sc, p);
+	return (error);
+}
+
+static int
+veb_port_list(struct veb_softc *sc, struct ifbifconf *bifc)
+{
+	struct ifnet *ifp = &sc->sc_if;
+	struct veb_port *p;
+	struct ifnet *ifp0;
+	struct ifbreq breq;
+	int n = 0, error = 0;
+
+	NET_ASSERT_LOCKED();
+
+	if (bifc->ifbic_len == 0) {
+		n = sc->sc_ports.l_count + sc->sc_spans.l_count;
+		goto done;
+	}
+
+	SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_ports.l_list, p_entry) {
+		if (bifc->ifbic_len < sizeof(breq))
+			break;
+
+		memset(&breq, 0, sizeof(breq));
+
+		ifp0 = p->p_ifp0;
+
+		strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ);
+		strlcpy(breq.ifbr_ifsname, ifp0->if_xname, IFNAMSIZ);
+
+		breq.ifbr_ifsflags = p->p_bif_flags;
+		breq.ifbr_portno = ifp0->if_index;
+		breq.ifbr_protected = p->p_protected;
+		if ((error = copyout(&breq, bifc->ifbic_req + n,
+		    sizeof(breq))) != 0)
+			goto done;
+
+		bifc->ifbic_len -= sizeof(breq);
+		n++;
+	}
+
+	SMR_TAILQ_FOREACH_LOCKED(p, &sc->sc_spans.l_list, p_entry) {
+		if (bifc->ifbic_len < sizeof(breq))
+			break;
+
+		memset(&breq, 0, sizeof(breq));
+
+		strlcpy(breq.ifbr_name, ifp->if_xname, IFNAMSIZ);
+		strlcpy(breq.ifbr_ifsname, p->p_ifp0->if_xname, IFNAMSIZ);
+
+		breq.ifbr_ifsflags = p->p_bif_flags;
+		if ((error = copyout(&breq, bifc->ifbic_req + n,
+		    sizeof(breq))) != 0)
+			goto done;
+
+		bifc->ifbic_len -= sizeof(breq);
+		n++;
+	}
+
+done:
+	bifc->ifbic_len = n * sizeof(breq);
+	return (error);
+}
+
+static int
+veb_p_ioctl(struct ifnet *ifp0, u_long cmd, caddr_t data)
+{
+	const struct ether_brport *eb = ether_brport_get_locked(ifp0);
+	struct veb_port *p;
+	int error = 0;
+
+	KASSERTMSG(eb != NULL,
+	    "%s: %s called without an ether_brport set",
+	    ifp0->if_xname, __func__);
+	KASSERTMSG(eb->eb_input == veb_port_input,
+	    "%s: %s called, but eb_input seems wrong (%p != veb_port_input())",
+	    ifp0->if_xname, __func__, eb->eb_input);
+
+	p = eb->eb_port;
+
+	switch (cmd) {
+	case SIOCSIFADDR:
+		error = EBUSY;
+		break;
+
+	default:
+		error = (*p->p_ioctl)(ifp0, cmd, data);
+		break;
+	}
+
+	return (error);
+}
+
+static int
+veb_p_output(struct ifnet *ifp0, struct mbuf *m, struct sockaddr *dst,
+    struct rtentry *rt)
+{
+	int (*p_output)(struct ifnet *, struct mbuf *, struct sockaddr *,
+	    struct rtentry *) = NULL;
+	const struct ether_brport *eb;
+
+	/* restrict transmission to bpf only */
+	if ((m_tag_find(m, PACKET_TAG_DLT, NULL) == NULL)) {
+		m_freem(m);
+		return (EBUSY);
+	}
+
+	smr_read_enter();
+	eb = ether_brport_get(ifp0);
+	if (eb != NULL && eb->eb_input == veb_port_input) {
+		struct veb_port *p = eb->eb_port;
+		p_output = p->p_output; /* code doesn't go away */
+	}
+	smr_read_leave();
+
+	if (p_output == NULL) {
+		m_freem(m);
+		return (ENXIO);
+	}
+
+	return ((*p_output)(ifp0, m, dst, rt));
+}
+
+static void
+veb_p_dtor(struct veb_softc *sc, struct veb_port *p, const char *op)
+{
+	struct ifnet *ifp = &sc->sc_if;
+	struct ifnet *ifp0 = p->p_ifp0;
+	struct veb_ports *port_list;
+
+	DPRINTF(sc, "%s %s: destroying port\n",
+	    ifp->if_xname, ifp0->if_xname);
+
+	ifp0->if_ioctl = p->p_ioctl;
+	ifp0->if_output = p->p_output;
+
+	ether_brport_clr(ifp0);
+
+	if_detachhook_del(ifp0, &p->p_dtask);
+	if_linkstatehook_del(ifp0, &p->p_ltask);
+
+	if (p->p_span) {
+		port_list = &sc->sc_spans;
+	} else {
+		if (ifpromisc(ifp0, 0) != 0) {
+			log(LOG_WARNING, "%s %s: unable to disable promisc\n",
+			    ifp->if_xname, ifp0->if_xname);
+		}
+
+		etherbridge_detach_port(&sc->sc_eb, p);
+
+		port_list = &sc->sc_ports;
+	}
+	SMR_TAILQ_REMOVE_LOCKED(&port_list->l_list, p, p_entry);
+	port_list->l_count--;
+
+	smr_barrier();
+	refcnt_finalize(&p->p_refs, "vebpdtor");
+
+	veb_rule_list_free(TAILQ_FIRST(&p->p_vrl));
+
+	if_put(ifp0);
+	free(p, M_DEVBUF, sizeof(*p));
+}
+
+static void
+veb_p_detach(void *arg)
+{
+	struct veb_port *p = arg;
+	struct veb_softc *sc = p->p_veb;
+
+	veb_p_dtor(sc, p, "detach");
+
+	NET_ASSERT_LOCKED();
+}
+
+static int
+veb_p_active(struct veb_port *p)
+{
+	struct ifnet *ifp0 = p->p_ifp0;
+
+	return (ISSET(ifp0->if_flags, IFF_RUNNING) &&
+	    LINK_STATE_IS_UP(ifp0->if_link_state));
+}
+
+static void
+veb_p_linkch(void *arg)
+{
+	struct veb_port *p = arg;
+	u_char link_state = LINK_STATE_FULL_DUPLEX;
+
+	NET_ASSERT_LOCKED();
+
+	if (!veb_p_active(p))
+		link_state = LINK_STATE_DOWN;
+
+	p->p_link_state = link_state;
+}
+
+static int
+veb_up(struct veb_softc *sc)
+{
+	struct ifnet *ifp = &sc->sc_if;
+	int error;
+
+	error = etherbridge_up(&sc->sc_eb);
+	if (error != 0)
+		return (error);
+
+	NET_ASSERT_LOCKED();
+	SET(ifp->if_flags, IFF_RUNNING);
+
+	return (0);
+}
+
+static int
+veb_iff(struct veb_softc *sc)
+{
+	return (0);
+}
+
+static int
+veb_down(struct veb_softc *sc)
+{
+	struct ifnet *ifp = &sc->sc_if;
+	int error;
+
+	error = etherbridge_down(&sc->sc_eb);
+	if (error != 0)
+		return (0);
+
+	NET_ASSERT_LOCKED();
+	CLR(ifp->if_flags, IFF_RUNNING);
+
+	return (0);
+}
+
+static int
+veb_eb_port_cmp(void *arg, void *a, void *b)
+{
+	struct veb_port *pa = a, *pb = b;
+	return (pa == pb);
+}
+
+static void *
+veb_eb_port_take(void *arg, void *port)
+{
+	struct veb_port *p = port;
+
+	refcnt_take(&p->p_refs);
+
+	return (p);
+}
+
+static void
+veb_eb_port_rele(void *arg, void *port)
+{
+	struct veb_port *p = port;
+
+	refcnt_rele_wake(&p->p_refs);
+}
+
+static size_t
+veb_eb_port_ifname(void *arg, char *dst, size_t len, void *port)
+{
+	struct veb_port *p = port;
+
+	return (strlcpy(dst, p->p_ifp0->if_xname, len));
+}
+
+static void
+veb_eb_port_sa(void *arg, struct sockaddr_storage *ss, void *port)
+{
+	ss->ss_family = AF_UNSPEC;
+}
+
+/*
+ * virtual ethernet bridge port
+ */
+
+static int
+vport_clone_create(struct if_clone *ifc, int unit)
+{
+	struct vport_softc *sc;
+	struct ifnet *ifp;
+
+	sc = malloc(sizeof(*sc), M_DEVBUF, M_WAITOK|M_ZERO|M_CANFAIL);
+	if (sc == NULL)
+		return (ENOMEM);
+
+	ifp = &sc->sc_ac.ac_if;
+
+	snprintf(ifp->if_xname, sizeof(ifp->if_xname), "%s%d",
+	    ifc->ifc_name, unit);
+
+	ifp->if_softc = sc;
+	ifp->if_type = IFT_ETHER;
+	ifp->if_hardmtu = ETHER_MAX_HARDMTU_LEN;
+	ifp->if_ioctl = vport_ioctl;
+	ifp->if_enqueue = vport_enqueue;
+	ifp->if_qstart = vport_start;
+	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
+	ifp->if_xflags = IFXF_CLONED | IFXF_MPSAFE;
+	ether_fakeaddr(ifp);
+
+	if_counters_alloc(ifp);
+	if_attach(ifp);
+	ether_ifattach(ifp);
+
+	return (0);
+}
+
+static int
+vport_clone_destroy(struct ifnet *ifp)
+{
+	struct vport_softc *sc = ifp->if_softc;
+
+	NET_LOCK();
+	sc->sc_dead = 1;
+
+	if (ISSET(ifp->if_flags, IFF_RUNNING))
+		vport_down(sc);
+	NET_UNLOCK();
+
+	ether_ifdetach(ifp);
+	if_detach(ifp);
+
+	free(sc, M_DEVBUF, sizeof(*sc));
+
+	return (0);
+}
+
+static int
+vport_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
+{
+	struct vport_softc *sc = ifp->if_softc;
+	int error = 0;
+
+	if (sc->sc_dead)
+		return (ENXIO);
+
+	switch (cmd) {
+	case SIOCSIFFLAGS:
+		if (ISSET(ifp->if_flags, IFF_UP)) {
+			if (!ISSET(ifp->if_flags, IFF_RUNNING))
+				error = vport_up(sc);
+		} else {
+			if (ISSET(ifp->if_flags, IFF_RUNNING))
+				error = vport_down(sc);
+		}
+		break;
+
+	case SIOCADDMULTI:
+	case SIOCDELMULTI:
+		break;
+
+	default:
+		error = ether_ioctl(ifp, &sc->sc_ac, cmd, data);
+		break;
+	}
+
+	if (error == ENETRESET)
+		error = vport_iff(sc);
+
+	return (error);
+}
+
+static int
+vport_up(struct vport_softc *sc)
+{
+	struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+	NET_ASSERT_LOCKED();
+	SET(ifp->if_flags, IFF_RUNNING);
+
+	return (0);
+}
+
+static int
+vport_iff(struct vport_softc *sc)
+{
+	return (0);
+}
+
+static int
+vport_down(struct vport_softc *sc)
+{
+	struct ifnet *ifp = &sc->sc_ac.ac_if;
+
+	NET_ASSERT_LOCKED();
+	CLR(ifp->if_flags, IFF_RUNNING);
+
+	return (0);
+}
+
+static int
+vport_enqueue(struct ifnet *ifp, struct mbuf *m)
+{
+	struct arpcom *ac;
+	const struct ether_brport *eb;
+	int error = ENETDOWN;
+#if NBPFILTER > 0
+	caddr_t if_bpf;
+#endif
+
+#if NPF > 0
+	/*
+	 * the packet is about to leave the l3 stack and go into
+	 * the l2 switching space, or it's coming from a switch space
+	 * into the network stack. either way, there's no relationship
+	 * between pf states in those different places.
+	 */
+	pf_pkt_addr_changed(m);
+#endif
+
+	if (ISSET(m->m_flags, M_PROTO1)) {
+		/* packet is coming from a bridge */
+		if_vinput(ifp, m);
+		return (0);
+	}
+
+	/* packet is going to the bridge */
+
+	ac = (struct arpcom *)ifp;
+
+	smr_read_enter();
+	eb = SMR_PTR_GET(&ac->ac_brport);
+	if (eb != NULL) {
+		counters_pkt(ifp->if_counters, ifc_opackets, ifc_obytes,
+		    m->m_pkthdr.len);
+
+#if NBPFILTER > 0
+		if_bpf = READ_ONCE(ifp->if_bpf);
+		if (if_bpf != NULL)
+			bpf_mtap_ether(if_bpf, m, BPF_DIRECTION_OUT);
+#endif
+
+		m = (*eb->eb_input)(ifp, m, eb->eb_port);
+
+		error = 0;
+	}
+	smr_read_leave();
+
+	m_freem(m);
+
+	return (error);
+}
+
+static void
+vport_start(struct ifqueue *ifq)
+{
+	ifq_purge(ifq);
+}