From: mpi Date: Wed, 31 May 2017 05:59:09 +0000 (+0000) Subject: Move IPv4 & IPv6 incoming/forwarding path, PIPEX ppp processing and X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=4cf8e8383e7f0c0459159a8c35086c3e2c6f0084;p=openbsd Move IPv4 & IPv6 incoming/forwarding path, PIPEX ppp processing and IPv4 & IPv6 dispatch functions outside the KERNEL_LOCK(). We currently rely on the NET_LOCK() serializing access to most global data structures for that. IP input queues are no longer used in the forwarding case. They still exist as boundary between the network and transport layers because TCP/UDP & friends still need the KERNEL_LOCK(). Since we do not want to grab the NET_LOCK() for every packet, the softnet thread will do it once before processing a batch. That means the L2 processing path, which is currently running without lock, will now run with the NET_LOCK(). IPsec isn't ready to run without KERNEL_LOCK(), so the softnet thread will grab the KERNEL_LOCK() as soon as ``ipsec_in_use'' is set. Tested by Hrvoje Popovski. ok visa@, bluhm@, henning@ --- diff --git a/sys/net/if.c b/sys/net/if.c index cdeb70a2d13..c4fca1f88f3 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if.c,v 1.502 2017/05/30 07:50:37 mpi Exp $ */ +/* $OpenBSD: if.c,v 1.503 2017/05/31 05:59:09 mpi Exp $ */ /* $NetBSD: if.c,v 1.35 1996/05/07 05:26:04 thorpej Exp $ */ /* @@ -874,7 +874,10 @@ if_input_process(void *xifidx) struct ifnet *ifp; struct ifih *ifih; struct srp_ref sr; - int s; + int s, s2; +#ifdef IPSEC + int locked = 0; +#endif /* IPSEC */ ifp = if_get(ifidx); if (ifp == NULL) @@ -887,6 +890,32 @@ if_input_process(void *xifidx) if (!ISSET(ifp->if_xflags, IFXF_CLONED)) add_net_randomness(ml_len(&ml)); +#ifdef IPSEC + /* + * IPsec is not ready to run without KERNEL_LOCK(). So all + * the traffic on your machine is punished if you have IPsec + * enabled. + */ + extern int ipsec_in_use; + if (ipsec_in_use) { + KERNEL_LOCK(); + locked = 1; + } +#endif /* IPSEC */ + + /* + * We grab the NET_LOCK() before processing any packet to + * ensure there's no contention on the routing table lock. + * + * Without it we could race with a userland thread to insert + * a L2 entry in ip{6,}_output(). Such race would result in + * one of the threads sleeping *inside* the IP output path. + * + * Since we have a NET_LOCK() we also use it to serialize access + * to PF globals, pipex globals, unicast and multicast addresses + * lists. + */ + NET_LOCK(s2); s = splnet(); while ((m = ml_dequeue(&ml)) != NULL) { /* @@ -903,7 +932,12 @@ if_input_process(void *xifidx) m_freem(m); } splx(s); + NET_UNLOCK(s2); +#ifdef IPSEC + if (locked) + KERNEL_UNLOCK(); +#endif /* IPSEC */ out: if_put(ifp); } diff --git a/sys/net/if_ethersubr.c b/sys/net/if_ethersubr.c index da6f90a1e9f..8041fd2ae1a 100644 --- a/sys/net/if_ethersubr.c +++ b/sys/net/if_ethersubr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_ethersubr.c,v 1.245 2017/05/30 07:50:37 mpi Exp $ */ +/* $OpenBSD: if_ethersubr.c,v 1.246 2017/05/31 05:59:09 mpi Exp $ */ /* $NetBSD: if_ethersubr.c,v 1.19 1996/05/07 02:40:30 thorpej Exp $ */ /* @@ -416,15 +416,11 @@ decapsulate: #ifdef PIPEX if (pipex_enable) { struct pipex_session *session; - int s; - NET_LOCK(s); if ((session = pipex_pppoe_lookup_session(m)) != NULL) { pipex_pppoe_input(m, session); - NET_UNLOCK(s); return (1); } - NET_UNLOCK(s); } #endif if (etype == ETHERTYPE_PPPOEDISC) diff --git a/sys/net/if_switch.c b/sys/net/if_switch.c index b5f9256e02b..cd38a2a3c14 100644 --- a/sys/net/if_switch.c +++ b/sys/net/if_switch.c @@ -1,4 +1,4 @@ -/* $OpenBSD: if_switch.c,v 1.19 2017/05/12 13:40:29 bluhm Exp $ */ +/* $OpenBSD: if_switch.c,v 1.20 2017/05/31 05:59:09 mpi Exp $ */ /* * Copyright (c) 2016 Kazuya GODA @@ -388,9 +388,8 @@ switch_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) struct bstp_port *bp; struct ifnet *ifs; struct switch_port *swpo; - int s, error = 0; + int error = 0; - s = splnet(); switch (cmd) { case SIOCBRDGADD: if ((error = suser(curproc, 0)) != 0) @@ -481,7 +480,6 @@ switch_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data) break; } - splx(s); return (error); } diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 93bc790a211..9c8c9ba0280 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_input.c,v 1.309 2017/05/30 12:09:27 friehm Exp $ */ +/* $OpenBSD: ip_input.c,v 1.310 2017/05/31 05:59:09 mpi Exp $ */ /* $NetBSD: ip_input.c,v 1.30 1996/03/16 23:53:58 christos Exp $ */ /* @@ -127,6 +127,7 @@ int ip_sysctl_ipstat(void *, size_t *, void *); static struct mbuf_queue ipsend_mq; void ip_ours(struct mbuf *); +void ip_local(struct mbuf *); int ip_dooptions(struct mbuf *, struct ifnet *); int in_ouraddr(struct mbuf *, struct ifnet *, struct rtentry **); @@ -207,27 +208,31 @@ ip_init(void) mq_init(&ipsend_mq, 64, IPL_SOFTNET); } +/* + * Enqueue packet for local delivery. Queuing is used as a boundary + * between the network layer (input/forward path) running without + * KERNEL_LOCK() and the transport layer still needing it. + */ void -ipv4_input(struct ifnet *ifp, struct mbuf *m) +ip_ours(struct mbuf *m) { niq_enqueue(&ipintrq, m); } +/* + * Dequeue and process locally delivered packets. + */ void ipintr(void) { struct mbuf *m; - /* - * Get next datagram off input queue and get IP header - * in first mbuf. - */ while ((m = niq_dequeue(&ipintrq)) != NULL) { -#ifdef DIAGNOSTIC +#ifdef DIAGNOSTIC if ((m->m_flags & M_PKTHDR) == 0) panic("ipintr no HDR"); #endif - ip_input(m); + ip_local(m); } } @@ -237,18 +242,13 @@ ipintr(void) * Checksum and byte swap header. Process options. Forward or deliver. */ void -ip_input(struct mbuf *m) +ipv4_input(struct ifnet *ifp, struct mbuf *m) { - struct ifnet *ifp; struct rtentry *rt = NULL; struct ip *ip; int hlen, len; in_addr_t pfrdr = 0; - ifp = if_get(m->m_pkthdr.ph_ifidx); - if (ifp == NULL) - goto bad; - ipstat_inc(ips_total); if (m->m_len < sizeof (struct ip) && (m = m_pullup(m, sizeof (struct ip))) == NULL) { @@ -463,13 +463,11 @@ ip_input(struct mbuf *m) #endif /* IPSEC */ ip_forward(m, ifp, rt, pfrdr); - if_put(ifp); return; bad: m_freem(m); out: rtfree(rt); - if_put(ifp); } /* @@ -478,13 +476,15 @@ out: * If fragmented try to reassemble. Pass to next level. */ void -ip_ours(struct mbuf *m) +ip_local(struct mbuf *m) { struct ip *ip = mtod(m, struct ip *); struct ipq *fp; struct ipqent *ipqe; int mff, hlen; + KERNEL_ASSERT_LOCKED(); + hlen = ip->ip_hl << 2; /* @@ -1681,18 +1681,37 @@ ip_send_dispatch(void *xmq) struct mbuf *m; struct mbuf_list ml; int s; +#ifdef IPSEC + int locked = 0; +#endif /* IPSEC */ mq_delist(mq, &ml); if (ml_empty(&ml)) return; - KERNEL_LOCK(); +#ifdef IPSEC + /* + * IPsec is not ready to run without KERNEL_LOCK(). So all + * the traffic on your machine is punished if you have IPsec + * enabled. + */ + extern int ipsec_in_use; + if (ipsec_in_use) { + KERNEL_LOCK(); + locked = 1; + } +#endif /* IPSEC */ + NET_LOCK(s); while ((m = ml_dequeue(&ml)) != NULL) { ip_output(m, NULL, NULL, 0, NULL, NULL, 0); } NET_UNLOCK(s); - KERNEL_UNLOCK(); + +#ifdef IPSEC + if (locked) + KERNEL_UNLOCK(); +#endif /* IPSEC */ } void diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 5bda4828dfe..737e8e214b8 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_var.h,v 1.77 2017/05/30 07:50:37 mpi Exp $ */ +/* $OpenBSD: ip_var.h,v 1.78 2017/05/31 05:59:09 mpi Exp $ */ /* $NetBSD: ip_var.h,v 1.16 1996/02/13 23:43:20 christos Exp $ */ /* @@ -248,7 +248,6 @@ int ip_sysctl(int *, u_int, void *, size_t *, void *, size_t); void ip_savecontrol(struct inpcb *, struct mbuf **, struct ip *, struct mbuf *); void ipintr(void); -void ip_input(struct mbuf *); void ip_deliver(struct mbuf **, int *, int, int); void ip_forward(struct mbuf *, struct ifnet *, struct rtentry *, int); int rip_ctloutput(int, struct socket *, int, int, struct mbuf *); diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index 8d1f2f545c7..7018444ba98 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_input.c,v 1.193 2017/05/30 12:09:27 friehm Exp $ */ +/* $OpenBSD: ip6_input.c,v 1.194 2017/05/31 05:59:09 mpi Exp $ */ /* $KAME: ip6_input.c,v 1.188 2001/03/29 05:34:31 itojun Exp $ */ /* @@ -119,6 +119,7 @@ struct niqueue ip6intrq = NIQUEUE_INITIALIZER(IFQ_MAXLEN, NETISR_IPV6); struct cpumem *ip6counters; void ip6_ours(struct mbuf *); +void ip6_local(struct mbuf *); int ip6_check_rh0hdr(struct mbuf *, int *); int ip6_hbhchcheck(struct mbuf *, int *, int *, int *); int ip6_hopopts_input(u_int32_t *, u_int32_t *, struct mbuf **, int *); @@ -160,28 +161,37 @@ ip6_init(void) ip6counters = counters_alloc(ip6s_ncounters); } +/* + * Enqueue packet for local delivery. Queuing is used as a boundary + * between the network layer (input/forward path) running without + * KERNEL_LOCK() and the transport layer still needing it. + */ void -ipv6_input(struct ifnet *ifp, struct mbuf *m) +ip6_ours(struct mbuf *m) { niq_enqueue(&ip6intrq, m); } /* - * IP6 input interrupt handling. Just pass the packet to ip6_input. + * Dequeue and process locally delivered packets. */ void ip6intr(void) { struct mbuf *m; - while ((m = niq_dequeue(&ip6intrq)) != NULL) - ip6_input(m); + while ((m = niq_dequeue(&ip6intrq)) != NULL) { +#ifdef DIAGNOSTIC + if ((m->m_flags & M_PKTHDR) == 0) + panic("ipintr no HDR"); +#endif + ip6_local(m); + } } void -ip6_input(struct mbuf *m) +ipv6_input(struct ifnet *ifp, struct mbuf *m) { - struct ifnet *ifp; struct ip6_hdr *ip6; struct sockaddr_in6 sin6; struct rtentry *rt = NULL; @@ -192,10 +202,6 @@ ip6_input(struct mbuf *m) #endif int srcrt = 0; - ifp = if_get(m->m_pkthdr.ph_ifidx); - if (ifp == NULL) - goto bad; - ip6stat_inc(ip6s_total); if (m->m_len < sizeof(struct ip6_hdr)) { @@ -441,8 +447,8 @@ ip6_input(struct mbuf *m) inet_ntop(AF_INET6, &ip6->ip6_dst, dst, sizeof(dst)); /* address is not ready, so discard the packet. */ nd6log((LOG_INFO, - "ip6_input: packet to an unready address %s->%s\n", - src, dst)); + "%s: packet to an unready address %s->%s\n", + __func__, src, dst)); goto bad; } else { @@ -494,17 +500,15 @@ ip6_input(struct mbuf *m) #endif /* IPSEC */ ip6_forward(m, rt, srcrt); - if_put(ifp); return; bad: m_freem(m); out: rtfree(rt); - if_put(ifp); } void -ip6_ours(struct mbuf *m) +ip6_local(struct mbuf *m) { int off, nxt; @@ -1456,18 +1460,37 @@ ip6_send_dispatch(void *xmq) struct mbuf *m; struct mbuf_list ml; int s; +#ifdef IPSEC + int locked = 0; +#endif /* IPSEC */ mq_delist(mq, &ml); if (ml_empty(&ml)) return; - KERNEL_LOCK(); +#ifdef IPSEC + /* + * IPsec is not ready to run without KERNEL_LOCK(). So all + * the traffic on your machine is punished if you have IPsec + * enabled. + */ + extern int ipsec_in_use; + if (ipsec_in_use) { + KERNEL_LOCK(); + locked = 1; + } +#endif /* IPSEC */ + NET_LOCK(s); while ((m = ml_dequeue(&ml)) != NULL) { ip6_output(m, NULL, NULL, IPV6_MINMTU, NULL, NULL); } NET_UNLOCK(s); - KERNEL_UNLOCK(); + +#ifdef IPSEC + if (locked) + KERNEL_UNLOCK(); +#endif /* IPSEC */ } void diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index ae173bb03f6..76cafe127b3 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_var.h,v 1.74 2017/05/28 09:25:51 bluhm Exp $ */ +/* $OpenBSD: ip6_var.h,v 1.75 2017/05/31 05:59:09 mpi Exp $ */ /* $KAME: ip6_var.h,v 1.33 2000/06/11 14:59:20 jinmei Exp $ */ /* @@ -303,7 +303,6 @@ int icmp6_ctloutput(int, struct socket *, int, int, struct mbuf *); void ip6_init(void); void ip6intr(void); -void ip6_input(struct mbuf *); void ip6_deliver(struct mbuf **, int *, int, int); void ip6_freepcbopts(struct ip6_pktopts *); void ip6_freemoptions(struct ip6_moptions *);