From: sashan Date: Mon, 5 Jun 2017 22:18:28 +0000 (+0000) Subject: - let's add PF_LOCK() X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=956659a2c9191e0927b4f56584d8209ea85115f5;p=openbsd - let's add PF_LOCK() to enable PF_LOCK(), you must add 'option WITH_PF_LOCK' to your kernel configuration. The code does not do much currently it's just the very small step towards MP. O.K. henning@, mikeb@, mpi@ --- diff --git a/sys/net/pf.c b/sys/net/pf.c index 578b9a0da60..b123858585e 100644 --- a/sys/net/pf.c +++ b/sys/net/pf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf.c,v 1.1033 2017/05/31 09:19:10 bluhm Exp $ */ +/* $OpenBSD: pf.c,v 1.1034 2017/06/05 22:18:28 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -923,7 +923,7 @@ int pf_state_insert(struct pfi_kif *kif, struct pf_state_key **skw, struct pf_state_key **sks, struct pf_state *s) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); s->kif = kif; if (*skw == *sks) { @@ -1186,7 +1186,7 @@ pf_purge_expired_rules(void) { struct pf_rule *r; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); if (SLIST_EMPTY(&pf_rule_gcl)) return; @@ -1208,15 +1208,22 @@ pf_purge_thread(void *v) NET_LOCK(s); + PF_LOCK(); /* process a fraction of the state table every second */ pf_purge_expired_states(1 + (pf_status.states / pf_default_rule.timeout[PFTM_INTERVAL])); /* purge other expired types every PFTM_INTERVAL seconds */ if (++nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { - pf_purge_expired_fragments(); pf_purge_expired_src_nodes(0); pf_purge_expired_rules(); + } + PF_UNLOCK(); + /* + * Fragments don't require PF_LOCK(), they use their own lock. + */ + if (nloops >= pf_default_rule.timeout[PFTM_INTERVAL]) { + pf_purge_expired_fragments(); nloops = 0; } @@ -1267,7 +1274,7 @@ pf_purge_expired_src_nodes(void) { struct pf_src_node *cur, *next; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); for (cur = RB_MIN(pf_src_tree, &tree_src_tracking); cur; cur = next) { next = RB_NEXT(pf_src_tree, &tree_src_tracking, cur); @@ -1303,7 +1310,7 @@ pf_src_tree_remove_state(struct pf_state *s) void pf_remove_state(struct pf_state *cur) { - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); /* handle load balancing related tasks */ pf_postprocess_addr(cur); @@ -1350,7 +1357,7 @@ pf_free_state(struct pf_state *cur) { struct pf_rule_item *ri; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); #if NPFSYNC > 0 if (pfsync_state_in_use(cur)) @@ -1386,7 +1393,7 @@ pf_purge_expired_states(u_int32_t maxcheck) static struct pf_state *cur = NULL; struct pf_state *next; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); while (maxcheck--) { /* wrap to start of list when we hit the end */ @@ -3146,13 +3153,13 @@ pf_socket_lookup(struct pf_pdesc *pd) case IPPROTO_TCP: sport = pd->hdr.tcp.th_sport; dport = pd->hdr.tcp.th_dport; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); tb = &tcbtable; break; case IPPROTO_UDP: sport = pd->hdr.udp.uh_sport; dport = pd->hdr.udp.uh_dport; - NET_ASSERT_LOCKED(); + PF_ASSERT_LOCKED(); tb = &udbtable; break; default: @@ -6678,6 +6685,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) /* if packet sits in reassembly queue, return without error */ if (pd.m == NULL) return PF_PASS; + if (action != PF_PASS) { #if NPFLOG > 0 pd.pflog |= PF_LOG_FORCE; @@ -6697,6 +6705,9 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) } pd.m->m_pkthdr.pf.flags |= PF_TAG_PROCESSED; + /* lock the lookup/write section of pf_test() */ + PF_LOCK(); + switch (pd.virtual_proto) { case PF_VPROTO_FRAGMENT: { @@ -6716,7 +6727,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) REASON_SET(&reason, PFRES_NORM); DPFPRINTF(LOG_NOTICE, "dropping IPv6 packet with ICMPv4 payload"); - goto done; + goto unlock; } action = pf_test_state_icmp(&pd, &s, &reason); if (action == PF_PASS || action == PF_AFRT) { @@ -6741,7 +6752,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) REASON_SET(&reason, PFRES_NORM); DPFPRINTF(LOG_NOTICE, "dropping IPv4 packet with ICMPv6 payload"); - goto done; + goto unlock; } action = pf_test_state_icmp(&pd, &s, &reason); if (action == PF_PASS || action == PF_AFRT) { @@ -6766,7 +6777,7 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) pqid = 1; action = pf_normalize_tcp(&pd); if (action == PF_DROP) - goto done; + goto unlock; } action = pf_test_state(&pd, &s, &reason); if (action == PF_PASS || action == PF_AFRT) { @@ -6793,6 +6804,15 @@ pf_test(sa_family_t af, int fwdir, struct ifnet *ifp, struct mbuf **m0) break; } +unlock: + PF_UNLOCK(); + + /* + * At the moment, we rely on NET_LOCK() to prevent removal of items + * we've collected above ('s', 'r', 'anchor' and 'ruleset'). They'll + * have to be refcounted when NET_LOCK() is gone. + */ + done: if (action != PF_DROP) { if (s) { @@ -6996,6 +7016,7 @@ done: } *m0 = pd.m; + return (action); } diff --git a/sys/net/pf_ioctl.c b/sys/net/pf_ioctl.c index 541cb72e8cc..a5cc75fb0f6 100644 --- a/sys/net/pf_ioctl.c +++ b/sys/net/pf_ioctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_ioctl.c,v 1.314 2017/06/01 14:38:28 patrick Exp $ */ +/* $OpenBSD: pf_ioctl.c,v 1.315 2017/06/05 22:18:28 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -129,6 +129,10 @@ struct { TAILQ_HEAD(pf_tags, pf_tagname) pf_tags = TAILQ_HEAD_INITIALIZER(pf_tags), pf_qids = TAILQ_HEAD_INITIALIZER(pf_qids); +#ifdef WITH_PF_LOCK +struct rwlock pf_lock = RWLOCK_INITIALIZER("pf_lock"); +#endif /* WITH_PF_LOCK */ + #if (PF_QNAME_SIZE != PF_TAG_NAME_SIZE) #error PF_QNAME_SIZE must be equal to PF_TAG_NAME_SIZE #endif @@ -998,6 +1002,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) } NET_LOCK(s); + PF_LOCK(); switch (cmd) { case DIOCSTART: @@ -2437,6 +2442,7 @@ pfioctl(dev_t dev, u_long cmd, caddr_t addr, int flags, struct proc *p) break; } fail: + PF_UNLOCK(); NET_UNLOCK(s); return (error); } diff --git a/sys/net/pf_norm.c b/sys/net/pf_norm.c index 0a9a0df1cb0..e694b55f9b4 100644 --- a/sys/net/pf_norm.c +++ b/sys/net/pf_norm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pf_norm.c,v 1.204 2017/05/15 12:26:00 mpi Exp $ */ +/* $OpenBSD: pf_norm.c,v 1.205 2017/06/05 22:18:28 sashan Exp $ */ /* * Copyright 2001 Niels Provos @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -135,6 +136,18 @@ struct pool pf_frent_pl, pf_frag_pl; struct pool pf_state_scrub_pl; int pf_nfrents; +#ifdef WITH_PF_LOCK +struct mutex pf_frag_mtx; + +#define PF_FRAG_LOCK_INIT() mtx_init(&pf_frag_mtx, IPL_SOFTNET) +#define PF_FRAG_LOCK() mtx_enter(&pf_frag_mtx) +#define PF_FRAG_UNLOCK() mtx_leave(&pf_frag_mtx) +#else /* !WITH_PF_LOCK */ +#define PF_FRAG_LOCK_INIT() (void)(0) +#define PF_FRAG_LOCK() (void)(0) +#define PF_FRAG_UNLOCK() (void)(0) +#endif /* WITH_PF_LOCK */ + void pf_normalize_init(void) { @@ -149,6 +162,8 @@ pf_normalize_init(void) pool_sethardlimit(&pf_frent_pl, PFFRAG_FRENT_HIWAT, NULL, 0); TAILQ_INIT(&pf_fragqueue); + + PF_FRAG_LOCK_INIT(); } static __inline int @@ -176,15 +191,18 @@ pf_purge_expired_fragments(void) struct pf_fragment *frag; int32_t expire; - NET_ASSERT_LOCKED(); + PF_ASSERT_UNLOCKED(); expire = time_uptime - pf_default_rule.timeout[PFTM_FRAG]; + + PF_FRAG_LOCK(); while ((frag = TAILQ_LAST(&pf_fragqueue, pf_fragqueue)) != NULL) { if (frag->fr_timeout > expire) break; DPFPRINTF(LOG_NOTICE, "expiring %d(%p)", frag->fr_id, frag); pf_free_fragment(frag); } + PF_FRAG_UNLOCK(); } /* @@ -533,14 +551,19 @@ pf_reassemble(struct mbuf **m0, int dir, u_short *reason) key.fr_id = ip->ip_id; key.fr_direction = dir; - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) + PF_FRAG_LOCK(); + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { + PF_FRAG_UNLOCK(); return (PF_DROP); + } /* The mbuf is part of the fragment entry, no direct free or access */ m = *m0 = NULL; - if (!pf_isfull_fragment(frag)) + if (!pf_isfull_fragment(frag)) { + PF_FRAG_UNLOCK(); return (PF_PASS); /* drop because *m0 is NULL, no error */ + } /* We have all the data */ frent = TAILQ_FIRST(&frag->fr_queue); @@ -564,6 +587,7 @@ pf_reassemble(struct mbuf **m0, int dir, u_short *reason) ip->ip_off &= ~(IP_MF|IP_OFFMASK); if (hdrlen + total > IP_MAXPACKET) { + PF_FRAG_UNLOCK(); DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); ip->ip_len = 0; REASON_SET(reason, PFRES_SHORT); @@ -571,6 +595,7 @@ pf_reassemble(struct mbuf **m0, int dir, u_short *reason) return (PF_DROP); } + PF_FRAG_UNLOCK(); DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip->ip_len)); return (PF_PASS); } @@ -610,14 +635,19 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, key.fr_id = fraghdr->ip6f_ident; key.fr_direction = dir; - if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) + PF_FRAG_LOCK(); + if ((frag = pf_fillup_fragment(&key, frent, reason)) == NULL) { + PF_FRAG_UNLOCK(); return (PF_DROP); + } /* The mbuf is part of the fragment entry, no direct free or access */ m = *m0 = NULL; - if (!pf_isfull_fragment(frag)) + if (!pf_isfull_fragment(frag)) { + PF_FRAG_UNLOCK(); return (PF_PASS); /* drop because *m0 is NULL, no error */ + } /* We have all the data */ extoff = frent->fe_extoff; @@ -671,17 +701,20 @@ pf_reassemble6(struct mbuf **m0, struct ip6_frag *fraghdr, ip6->ip6_nxt = proto; if (hdrlen - sizeof(struct ip6_hdr) + total > IPV6_MAXPACKET) { + PF_FRAG_UNLOCK(); DPFPRINTF(LOG_NOTICE, "drop: too big: %d", total); ip6->ip6_plen = 0; REASON_SET(reason, PFRES_SHORT); /* PF_DROP requires a valid mbuf *m0 in pf_test6() */ return (PF_DROP); } + PF_FRAG_UNLOCK(); DPFPRINTF(LOG_INFO, "complete: %p(%d)", m, ntohs(ip6->ip6_plen)); return (PF_PASS); fail: + PF_FRAG_UNLOCK(); REASON_SET(reason, PFRES_MEMORY); /* PF_DROP requires a valid mbuf *m0 in pf_test6(), will free later */ return (PF_DROP); diff --git a/sys/net/pfvar_priv.h b/sys/net/pfvar_priv.h index 7f36bb06af3..6ea946146a9 100644 --- a/sys/net/pfvar_priv.h +++ b/sys/net/pfvar_priv.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pfvar_priv.h,v 1.2 2016/11/22 19:29:54 procter Exp $ */ +/* $OpenBSD: pfvar_priv.h,v 1.3 2017/06/05 22:18:28 sashan Exp $ */ /* * Copyright (c) 2001 Daniel Hartmeier @@ -37,6 +37,10 @@ #ifdef _KERNEL +#include + +extern struct rwlock pf_lock; + struct pf_pdesc { struct { int done; @@ -94,6 +98,36 @@ struct pf_pdesc { } hdr; }; +#ifdef WITH_PF_LOCK +extern struct rwlock pf_lock; + +#define PF_LOCK() do { \ + NET_ASSERT_LOCKED(); \ + rw_enter_write(&pf_lock); \ + } while (0) + +#define PF_UNLOCK() do { \ + PF_ASSERT_LOCKED(); \ + rw_exit_write(&pf_lock); \ + } while (0) + +#define PF_ASSERT_LOCKED() do { \ + if (rw_status(&pf_lock) != RW_WRITE) \ + splassert_fail(RW_WRITE, \ + rw_status(&pf_lock),__func__);\ + } while (0) + +#define PF_ASSERT_UNLOCKED() do { \ + if (rw_status(&pf_lock) == RW_WRITE) \ + splassert_fail(0, rw_status(&pf_lock), __func__);\ + } while (0) +#else /* !WITH_PF_LOCK */ +#define PF_LOCK() (void)(0) +#define PF_UNLOCK() (void)(0) +#define PF_ASSERT_LOCKED() (void)(0) +#define PF_ASSERT_UNLOCKED() (void)(0) +#endif /* WITH_PF_LOCK */ + #endif /* _KERNEL */ #endif /* _NET_PFVAR_PRIV_H_ */