From 956659a2c9191e0927b4f56584d8209ea85115f5 Mon Sep 17 00:00:00 2001
From: sashan <sashan@openbsd.org>
Date: Mon, 5 Jun 2017 22:18:28 +0000
Subject: [PATCH] - let's add PF_LOCK()   to enable PF_LOCK(), you must add
 'option WITH_PF_LOCK' to your kernel   configuration. The code does not do
 much currently it's just the very   small step towards MP.

O.K. henning@, mikeb@, mpi@
---
 sys/net/pf.c         | 47 ++++++++++++++++++++++++++++++++------------
 sys/net/pf_ioctl.c   |  8 +++++++-
 sys/net/pf_norm.c    | 45 ++++++++++++++++++++++++++++++++++++------
 sys/net/pfvar_priv.h | 36 ++++++++++++++++++++++++++++++++-
 4 files changed, 115 insertions(+), 21 deletions(-)

diff --git a/sys/net/pf.c b/sys/net/pf.c
index 578b9a0da60..b123858585e 100644
--- a/sys/net/pf.c
+++ b/sys/net/pf.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pf.c,v 1.1033 2017/05/31 09:19:10 bluhm Exp $ */
+/*	$OpenBSD: pf.c,v 1.1034 2017/06/05 22:18:28 sashan Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -923,7 +923,7 @@ int
 pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw,
     struct pf_state_key **sks, struct pf_state *s)
 {
-	NET_ASSERT_LOCKED();
+	PF_ASSERT_LOCKED();
 
 	s->kif = kif;
 	if (*skw == *sks) {
@@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void)
 {
 	struct pf_rule	*r;
 
-	NET_ASSERT_LOCKED();
+	PF_ASSERT_LOCKED();
 
 	if (SLIST_EMPTY(&pf_rule_gcl))
 		return;
@@ -1208,15 +1208,22 @@ pf_purge_thread(void *v)
 
 		NET_LOCK(s);
 
+		PF_LOCK();
 		/* process a fraction of the state table every second */
 		pf_purge_expired_states(1 + (pf_status.states
 		    / pf_default_rule.timeout[PFTM_INTERVAL]));
 
 		/* purge other expired types every PFTM_INTERVAL seconds */
 		if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
-			pf_purge_expired_fragments();
 			pf_purge_expired_src_nodes(0);
 			pf_purge_expired_rules();
+		}
+		PF_UNLOCK();
+		/*
+		 * Fragments don't require PF_LOCK(), they use their own lock.
+		 */
+		if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) {
+			pf_purge_expired_fragments();
 			nloops = 0;
 		}
 
@@ -1267,7 +1274,7 @@ pf_purge_expired_src_nodes(void)
 {
 	struct pf_src_node		*cur, *next;
 
-	NET_ASSERT_LOCKED();
+	PF_ASSERT_LOCKED();
 
 	for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) {
 	next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur);
@@ -1303,7 +1310,7 @@ pf_src_tree_remove_state(struct pf_state *s)
 void
 pf_remove_state(struct pf_state *cur)
 {
-	NET_ASSERT_LOCKED();
+	PF_ASSERT_LOCKED();
 
 	/* handle load balancing related tasks */
 	pf_postprocess_addr(cur);
@@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur)
 {
 	struct pf_rule_item *ri;
 
-	NET_ASSERT_LOCKED();
+	PF_ASSERT_LOCKED();
 
 #if NPFSYNC > 0
 	if (pfsync_state_in_use(cur))
@@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxcheck)
 	static struct pf_state	*cur = NULL;
 	struct pf_state		*next;
 
-	NET_ASSERT_LOCKED();
+	PF_ASSERT_LOCKED();
 
 	while (maxcheck--) {
 		/* wrap to start of list when we hit the end */
@@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd)
 	case IPPROTO_TCP:
 		sport = pd->hdr.tcp.th_sport;
 		dport = pd->hdr.tcp.th_dport;
-		NET_ASSERT_LOCKED();
+		PF_ASSERT_LOCKED();
 		tb = &tcbtable;
 		break;
 	case IPPROTO_UDP:
 		sport = pd->hdr.udp.uh_sport;
 		dport = pd->hdr.udp.uh_dport;
-		NET_ASSERT_LOCKED();
+		PF_ASSERT_LOCKED();
 		tb = &udbtable;
 		break;
 	default:
@@ -6678,6 +6685,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
 	/* if packet sits in reassembly queue, return without error */
 	if (pd.m == NULL)
 		return PF_PASS;
+
 	if (action != PF_PASS) {
 #if NPFLOG > 0
 		pd.pflog |= PF_LOG_FORCE;
@@ -6697,6 +6705,9 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
 	}
 	pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED;
 
+	/* lock the lookup/write section of pf_test() */
+	PF_LOCK();
+
 	switch (pd.virtual_proto) {
 
 	case PF_VPROTO_FRAGMENT: {
@@ -6716,7 +6727,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
 			REASON_SET(&reason, PFRES_NORM);
 			DPFPRINTF(LOG_NOTICE,
 			    "dropping IPv6 packet with ICMPv4 payload");
-			goto done;
+			goto unlock;
 		}
 		action = pf_test_state_icmp(&pd, &s, &reason);
 		if (action == PF_PASS || action == PF_AFRT) {
@@ -6741,7 +6752,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
 			REASON_SET(&reason, PFRES_NORM);
 			DPFPRINTF(LOG_NOTICE,
 			    "dropping IPv4 packet with ICMPv6 payload");
-			goto done;
+			goto unlock;
 		}
 		action = pf_test_state_icmp(&pd, &s, &reason);
 		if (action == PF_PASS || action == PF_AFRT) {
@@ -6766,7 +6777,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
 				pqid = 1;
 			action = pf_normalize_tcp(&pd);
 			if (action == PF_DROP)
-				goto done;
+				goto unlock;
 		}
 		action = pf_test_state(&pd, &s, &reason);
 		if (action == PF_PASS || action == PF_AFRT) {
@@ -6793,6 +6804,15 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0)
 		break;
 	}
 
+unlock:
+	PF_UNLOCK();
+
+	/*
+	 * At the moment, we rely on NET_LOCK() to prevent removal of items
+	 * we've collected above ('s', 'r', 'anchor' and 'ruleset').  They'll
+	 * have to be refcounted when NET_LOCK() is gone.
+	 */
+
 done:
 	if (action != PF_DROP) {
 		if (s) {
@@ -6996,6 +7016,7 @@ done:
 	}
 
 	*m0 = pd.m;
+
 	return (action);
 }
 
diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c
index 541cb72e8cc..a5cc75fb0f6 100644
--- a/sys/net/pf_ioctl.c
+++ b/sys/net/pf_ioctl.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pf_ioctl.c,v 1.314 2017/06/01 14:38:28 patrick Exp $ */
+/*	$OpenBSD: pf_ioctl.c,v 1.315 2017/06/05 22:18:28 sashan Exp $ */
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -129,6 +129,10 @@ struct {
 TAILQ_HEAD(pf_tags, pf_tagname)	pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags),
 				pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids);
 
+#ifdef WITH_PF_LOCK
+struct rwlock		 pf_lock = RWLOCK_INITIALIZER("pf_lock");
+#endif /* WITH_PF_LOCK */
+
 #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE)
 #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE
 #endif
@@ -998,6 +1002,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		}
 
 	NET_LOCK(s);
+	PF_LOCK();
 	switch (cmd) {
 
 	case DIOCSTART:
@@ -2437,6 +2442,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p)
 		break;
 	}
 fail:
+	PF_UNLOCK();
 	NET_UNLOCK(s);
 	return (error);
 }
diff --git a/sys/net/pf_norm.c b/sys/net/pf_norm.c
index 0a9a0df1cb0..e694b55f9b4 100644
--- a/sys/net/pf_norm.c
+++ b/sys/net/pf_norm.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pf_norm.c,v 1.204 2017/05/15 12:26:00 mpi Exp $ */
+/*	$OpenBSD: pf_norm.c,v 1.205 2017/06/05 22:18:28 sashan Exp $ */
 
 /*
  * Copyright 2001 Niels Provos <provos@citi.umich.edu>
@@ -39,6 +39,7 @@
 #include <sys/time.h>
 #include <sys/pool.h>
 #include <sys/syslog.h>
+#include <sys/mutex.h>
 
 #include <net/if.h>
 #include <net/if_var.h>
@@ -135,6 +136,18 @@ struct pool		 pf_frent_pl, pf_frag_pl;
 struct pool		 pf_state_scrub_pl;
 int			 pf_nfrents;
 
+#ifdef WITH_PF_LOCK
+struct mutex		 pf_frag_mtx;
+
+#define PF_FRAG_LOCK_INIT()	mtx_init(&pf_frag_mtx, IPL_SOFTNET)
+#define PF_FRAG_LOCK()		mtx_enter(&pf_frag_mtx)
+#define PF_FRAG_UNLOCK()	mtx_leave(&pf_frag_mtx)
+#else /* !WITH_PF_LOCK */
+#define PF_FRAG_LOCK_INIT()	(void)(0)
+#define PF_FRAG_LOCK()		(void)(0)
+#define PF_FRAG_UNLOCK()	(void)(0)
+#endif /* WITH_PF_LOCK */
+
 void
 pf_normalize_init(void)
 {
@@ -149,6 +162,8 @@ pf_normalize_init(void)
 	pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0);
 
 	TAILQ_INIT(&pf_fragqueue);
+
+	PF_FRAG_LOCK_INIT();
 }
 
 static __inline int
@@ -176,15 +191,18 @@ pf_purge_expired_fragments(void)
 	struct pf_fragment	*frag;
 	int32_t			 expire;
 
-	NET_ASSERT_LOCKED();
+	PF_ASSERT_UNLOCKED();
 
 	expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG];
+
+	PF_FRAG_LOCK();
 	while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) {
 		if (frag->fr_timeout > expire)
 			break;
 		DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag);
 		pf_free_fragment(frag);
 	}
+	PF_FRAG_UNLOCK();
 }
 
 /*
@@ -533,14 +551,19 @@ pf_reassemble(struct mbuf **m0, int dir, u_short *reason)
 	key.fr_id = ip->ip_id;
 	key.fr_direction = dir;
 
-	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+	PF_FRAG_LOCK();
+	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
+		PF_FRAG_UNLOCK();
 		return (PF_DROP);
+	}
 
 	/* The mbuf is part of the fragment entry, no direct free or access */
 	m = *m0 = NULL;
 
-	if (!pf_isfull_fragment(frag))
+	if (!pf_isfull_fragment(frag)) {
+		PF_FRAG_UNLOCK();
 		return (PF_PASS);  /* drop because *m0 is NULL, no error */
+	}
 
 	/* We have all the data */
 	frent = TAILQ_FIRST(&frag->fr_queue);
@@ -564,6 +587,7 @@ pf_reassemble(struct mbuf **m0, int dir, u_short *reason)
 	ip->ip_off &= ~(IP_MF|IP_OFFMASK);
 
 	if (hdrlen + total > IP_MAXPACKET) {
+		PF_FRAG_UNLOCK();
 		DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
 		ip->ip_len = 0;
 		REASON_SET(reason, PFRES_SHORT);
@@ -571,6 +595,7 @@ pf_reassemble(struct mbuf **m0, int dir, u_short *reason)
 		return (PF_DROP);
 	}
 
+	PF_FRAG_UNLOCK();
 	DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len));
 	return (PF_PASS);
 }
@@ -610,14 +635,19 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr,
 	key.fr_id = fraghdr->ip6f_ident;
 	key.fr_direction = dir;
 
-	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL)
+	PF_FRAG_LOCK();
+	if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) {
+		PF_FRAG_UNLOCK();
 		return (PF_DROP);
+	}
 
 	/* The mbuf is part of the fragment entry, no direct free or access */
 	m = *m0 = NULL;
 
-	if (!pf_isfull_fragment(frag))
+	if (!pf_isfull_fragment(frag)) {
+		PF_FRAG_UNLOCK();
 		return (PF_PASS);  /* drop because *m0 is NULL, no error */
+	}
 
 	/* We have all the data */
 	extoff = frent->fe_extoff;
@@ -671,17 +701,20 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr,
 		ip6->ip6_nxt = proto;
 
 	if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) {
+		PF_FRAG_UNLOCK();
 		DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total);
 		ip6->ip6_plen = 0;
 		REASON_SET(reason, PFRES_SHORT);
 		/* PF_DROP requires a valid mbuf *m0 in pf_test6() */
 		return (PF_DROP);
 	}
+	PF_FRAG_UNLOCK();
 
 	DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip6->ip6_plen));
 	return (PF_PASS);
 
 fail:
+	PF_FRAG_UNLOCK();
 	REASON_SET(reason, PFRES_MEMORY);
 	/* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */
 	return (PF_DROP);
diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h
index 7f36bb06af3..6ea946146a9 100644
--- a/sys/net/pfvar_priv.h
+++ b/sys/net/pfvar_priv.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: pfvar_priv.h,v 1.2 2016/11/22 19:29:54 procter Exp $	*/
+/*	$OpenBSD: pfvar_priv.h,v 1.3 2017/06/05 22:18:28 sashan Exp $	*/
 
 /*
  * Copyright (c) 2001 Daniel Hartmeier
@@ -37,6 +37,10 @@
 
 #ifdef _KERNEL
 
+#include <sys/rwlock.h>
+
+extern struct rwlock pf_lock;
+
 struct pf_pdesc {
 	struct {
 		int	 done;
@@ -94,6 +98,36 @@ struct pf_pdesc {
 	} hdr;
 };
 
+#ifdef WITH_PF_LOCK
+extern struct rwlock	pf_lock;
+
+#define PF_LOCK()		do {			\
+		NET_ASSERT_LOCKED();			\
+		rw_enter_write(&pf_lock);		\
+	} while (0)
+
+#define PF_UNLOCK()		do {			\
+		PF_ASSERT_LOCKED();			\
+		rw_exit_write(&pf_lock);		\
+	} while (0)
+
+#define PF_ASSERT_LOCKED()	do {			\
+		if (rw_status(&pf_lock) != RW_WRITE)	\
+			splassert_fail(RW_WRITE,	\
+			    rw_status(&pf_lock),__func__);\
+	} while (0)
+
+#define PF_ASSERT_UNLOCKED()	do {			\
+		if (rw_status(&pf_lock) == RW_WRITE)	\
+			splassert_fail(0, rw_status(&pf_lock), __func__);\
+	} while (0)
+#else /* !WITH_PF_LOCK */
+#define PF_LOCK()		(void)(0)
+#define PF_UNLOCK()		(void)(0)
+#define PF_ASSERT_LOCKED()	(void)(0)
+#define PF_ASSERT_UNLOCKED()	(void)(0)
+#endif /* WITH_PF_LOCK */
+
 #endif /* _KERNEL */
 
 #endif /* _NET_PFVAR_PRIV_H_ */
-- 
2.20.1