From 8a7b43ff23c5924c9f444901961e4773621c2482 Mon Sep 17 00:00:00 2001 From: bluhm Date: Thu, 20 Sep 2018 18:59:10 +0000 Subject: [PATCH] As a step towards per inpcb or socket locks, remove the net lock for netstat -a. Introduce a global mutex that protects the tables and hashes for the internet PCBs. To detect detached PCB, set its inp_socket field to NULL. This has to be protected by a per PCB mutex. The protocol pointer has to be protected by the mutex as netstat uses it. Always take the kernel lock in in_pcbnotifyall() and in6_pcbnotify() before the table mutex to avoid lock ordering problems in the notify functions. OK visa@ --- sys/kern/kern_sysctl.c | 37 ++++++++++++++++++-------- sys/netinet/in_pcb.c | 55 +++++++++++++++++++++++++++++++++++---- sys/netinet/in_pcb.h | 35 ++++++++++++++++--------- sys/netinet/ip_divert.c | 4 ++- sys/netinet/raw_ip.c | 5 +++- sys/netinet/tcp_subr.c | 4 ++- sys/netinet/udp_usrreq.c | 4 ++- sys/netinet6/in6_pcb.c | 19 ++++++++++++-- sys/netinet6/ip6_divert.c | 4 ++- sys/netinet6/raw_ip6.c | 9 +++++-- 10 files changed, 138 insertions(+), 38 deletions(-) diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 7a565b2de88..f83cb70a9a6 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.346 2018/07/12 01:23:38 cheloha Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.347 2018/09/20 18:59:10 bluhm Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -1179,7 +1179,8 @@ fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp, kf->inp_rtableid = inpcb->inp_rtableid; if (so->so_type == SOCK_RAW) kf->inp_proto = inpcb->inp_ip.ip_p; - if (so->so_proto->pr_protocol == IPPROTO_TCP) { + if (so->so_proto->pr_protocol == IPPROTO_TCP && + inpcb->inp_ppcb != NULL) { struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb; kf->t_rcv_wnd = tcpcb->rcv_wnd; kf->t_snd_wnd = tcpcb->snd_wnd; @@ -1206,7 +1207,8 @@ fill_file(struct kinfo_file *kf, struct file *fp, struct filedesc *fdp, kf->inp_rtableid = inpcb->inp_rtableid; if (so->so_type == SOCK_RAW) kf->inp_proto = inpcb->inp_ipv6.ip6_nxt; - if (so->so_proto->pr_protocol == IPPROTO_TCP) { + if (so->so_proto->pr_protocol == IPPROTO_TCP && + inpcb->inp_ppcb != NULL) { struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb; kf->t_rcv_wnd = tcpcb->rcv_wnd; kf->t_snd_wnd = tcpcb->snd_wnd; @@ -1320,10 +1322,16 @@ sysctl_file(int *name, u_int namelen, char *where, size_t *sizep, } \ needed += elem_size; \ } while (0) + #define FILLIT(fp, fdp, i, vp, pr) \ FILLIT2(fp, fdp, i, vp, pr, NULL) -#define FILLSO(so) \ - FILLIT2(NULL, NULL, 0, NULL, NULL, so) + +#define FILLINPCB(inp) do { \ + mtx_enter(&inp->inp_mtx); \ + if (inp->inp_socket != NULL) \ + FILLIT2(NULL, NULL, 0, NULL, NULL, inp->inp_socket); \ + mtx_leave(&inp->inp_mtx); \ +} while (0) switch (op) { case KERN_FILE_BYFILE: @@ -1331,19 +1339,26 @@ sysctl_file(int *name, u_int namelen, char *where, size_t *sizep, if (arg == DTYPE_SOCKET) { struct inpcb *inp; - NET_LOCK(); + /* + * The inpcb and socket fields are accessed and read + * without net lock. This may result in inconsistent + * data provided to userland. The fix will be to + * protect the socket fields with the inpcb mutex. + * XXXSMP + */ + mtx_enter(&inpcbtable_mtx); TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue) - FILLSO(inp->inp_socket); + FILLINPCB(inp); TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) - FILLSO(inp->inp_socket); + FILLINPCB(inp); TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) - FILLSO(inp->inp_socket); + FILLINPCB(inp); #ifdef INET6 TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue, inp_queue) - FILLSO(inp->inp_socket); + FILLINPCB(inp); #endif - NET_UNLOCK(); + mtx_leave(&inpcbtable_mtx); } fp = NULL; while ((fp = fd_iterfile(fp, p)) != NULL) { diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 02a1e883095..1d5fd6ff4ee 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.c,v 1.245 2018/09/14 12:55:17 bluhm Exp $ */ +/* $OpenBSD: in_pcb.c,v 1.246 2018/09/20 18:59:10 bluhm Exp $ */ /* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */ /* @@ -111,12 +111,16 @@ int ipport_lastauto = IPPORT_USERRESERVED; int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; int ipport_hilastauto = IPPORT_HILASTAUTO; +/* Protect PCB table queues, lookup hashes and existence of inpcb. */ +struct mutex inpcbtable_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); + struct baddynamicports baddynamicports; struct baddynamicports rootonlyports; struct pool inpcb_pool; int inpcb_pool_initialized = 0; -int in_pcbresize (struct inpcbtable *, int); +void in_pcbrehash_locked(struct inpcb *); +int in_pcbresize(struct inpcbtable *, int); #define INPCBHASH_LOADFACTOR(_x) (((_x) * 3) / 4) @@ -159,6 +163,7 @@ void in_pcbinit(struct inpcbtable *table, int hashsize) { + mtx_enter(&inpcbtable_mtx); TAILQ_INIT(&table->inpt_queue); table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT, &table->inpt_mask); @@ -172,6 +177,7 @@ in_pcbinit(struct inpcbtable *table, int hashsize) table->inpt_size = hashsize; arc4random_buf(&table->inpt_key, sizeof(table->inpt_key)); arc4random_buf(&table->inpt_lkey, sizeof(table->inpt_lkey)); + mtx_leave(&inpcbtable_mtx); } /* @@ -246,6 +252,7 @@ in_pcballoc(struct socket *so, struct inpcbtable *table) inp->inp_cksum6 = -1; #endif /* INET6 */ + mtx_enter(&inpcbtable_mtx); if (table->inpt_count++ > INPCBHASH_LOADFACTOR(table->inpt_size)) (void)in_pcbresize(table, table->inpt_size * 2); TAILQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue); @@ -262,6 +269,8 @@ in_pcballoc(struct socket *so, struct inpcbtable *table) &inp->inp_faddr, inp->inp_fport, &inp->inp_laddr, inp->inp_lport); LIST_INSERT_HEAD(head, inp, inp_hash); + mtx_leave(&inpcbtable_mtx); + so->so_pcb = inp; return (0); @@ -545,10 +554,15 @@ in_pcbdisconnect(struct inpcb *inp) void in_pcbdetach(struct inpcb *inp) { - struct socket *so = inp->inp_socket; + struct socket *so; NET_ASSERT_LOCKED(); + mtx_enter(&inp->inp_mtx); + so = inp->inp_socket; + inp->inp_socket = NULL; + mtx_leave(&inp->inp_mtx); + so->so_pcb = NULL; /* * As long as the NET_LOCK() is the default lock for Internet @@ -575,10 +589,12 @@ in_pcbdetach(struct inpcb *inp) pf_inp_unlink(inp); } #endif + mtx_enter(&inpcbtable_mtx); LIST_REMOVE(inp, inp_lhash); LIST_REMOVE(inp, inp_hash); TAILQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue); inp->inp_table->inpt_count--; + mtx_leave(&inpcbtable_mtx); in_pcbunref(inp); } @@ -594,6 +610,7 @@ void in_pcbunref(struct inpcb *inp) { if (refcnt_rele(&inp->inp_refcnt)) { + KASSERT(inp->inp_socket == NULL); KASSERT((LIST_NEXT(inp, inp_hash) == NULL) || (LIST_NEXT(inp, inp_hash) == _Q_INVALID)); KASSERT((LIST_NEXT(inp, inp_lhash) == NULL) || @@ -672,6 +689,8 @@ in_pcbnotifyall(struct inpcbtable *table, struct sockaddr *dst, u_int rtable, return; rdomain = rtable_l2(rtable); + KERNEL_LOCK(); + mtx_enter(&inpcbtable_mtx); TAILQ_FOREACH_SAFE(inp, &table->inpt_queue, inp_queue, ninp) { #ifdef INET6 if (inp->inp_flags & INP_IPV6) @@ -679,12 +698,20 @@ in_pcbnotifyall(struct inpcbtable *table, struct sockaddr *dst, u_int rtable, #endif if (inp->inp_faddr.s_addr != faddr.s_addr || rtable_l2(inp->inp_rtableid) != rdomain || - inp->inp_socket == 0) { + inp->inp_socket == NULL) { continue; } + /* + * The notify functions may grab the kernel lock. Sometimes + * we already hold the kernel lock when we acquire the pcb + * mutex. So do an extra kernel lock before the mutex outside + * of this loop. XXXSMP + */ if (notify) (*notify)(inp, errno); } + mtx_leave(&inpcbtable_mtx); + KERNEL_UNLOCK(); } /* @@ -757,6 +784,7 @@ in_pcblookup_local(struct inpcbtable *table, void *laddrp, u_int lport_arg, u_int rdomain; rdomain = rtable_l2(rtable); + mtx_enter(&inpcbtable_mtx); head = in_pcblhash(table, rdomain, lport); LIST_FOREACH(inp, head, inp_lhash) { if (rtable_l2(inp->inp_rtableid) != rdomain) @@ -807,6 +835,8 @@ in_pcblookup_local(struct inpcbtable *table, void *laddrp, u_int lport_arg, break; } } + mtx_leave(&inpcbtable_mtx); + return (match); } @@ -951,11 +981,20 @@ in_pcbselsrc(struct in_addr **insrc, struct sockaddr_in *sin, void in_pcbrehash(struct inpcb *inp) +{ + mtx_enter(&inpcbtable_mtx); + in_pcbrehash_locked(inp); + mtx_leave(&inpcbtable_mtx); +} + +void +in_pcbrehash_locked(struct inpcb *inp) { struct inpcbtable *table = inp->inp_table; struct inpcbhead *head; NET_ASSERT_LOCKED(); + MUTEX_ASSERT_LOCKED(&inpcbtable_mtx); LIST_REMOVE(inp, inp_lhash); head = in_pcblhash(table, inp->inp_rtableid, inp->inp_lport); @@ -982,6 +1021,8 @@ in_pcbresize(struct inpcbtable *table, int hashsize) void *nhashtbl, *nlhashtbl, *ohashtbl, *olhashtbl; struct inpcb *inp; + MUTEX_ASSERT_LOCKED(&inpcbtable_mtx); + ohashtbl = table->inpt_hashtbl; olhashtbl = table->inpt_lhashtbl; osize = table->inpt_size; @@ -1003,7 +1044,7 @@ in_pcbresize(struct inpcbtable *table, int hashsize) arc4random_buf(&table->inpt_lkey, sizeof(table->inpt_lkey)); TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) { - in_pcbrehash(inp); + in_pcbrehash_locked(inp); } hashfree(ohashtbl, osize, M_PCB); hashfree(olhashtbl, osize, M_PCB); @@ -1034,6 +1075,7 @@ in_pcbhashlookup(struct inpcbtable *table, struct in_addr faddr, u_int rdomain; rdomain = rtable_l2(rtable); + mtx_enter(&inpcbtable_mtx); head = in_pcbhash(table, rdomain, &faddr, fport, &laddr, lport); LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 @@ -1056,6 +1098,7 @@ in_pcbhashlookup(struct inpcbtable *table, struct in_addr faddr, break; } } + mtx_leave(&inpcbtable_mtx); #ifdef DIAGNOSTIC if (inp == NULL && in_pcbnotifymiss) { printf("%s: faddr=%08x fport=%d laddr=%08x lport=%d rdom=%u\n", @@ -1117,6 +1160,7 @@ in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, #endif rdomain = rtable_l2(rtable); + mtx_enter(&inpcbtable_mtx); head = in_pcbhash(table, rdomain, &zeroin_addr, 0, key1, lport); LIST_FOREACH(inp, head, inp_hash) { #ifdef INET6 @@ -1153,6 +1197,7 @@ in_pcblookup_listen(struct inpcbtable *table, struct in_addr laddr, LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); } + mtx_leave(&inpcbtable_mtx); #ifdef DIAGNOSTIC if (inp == NULL && in_pcbnotifymiss) { printf("%s: laddr=%08x lport=%d rdom=%u\n", diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 8b90762d50e..4ec18dea8ac 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: in_pcb.h,v 1.113 2018/09/14 12:55:17 bluhm Exp $ */ +/* $OpenBSD: in_pcb.h,v 1.114 2018/09/20 18:59:10 bluhm Exp $ */ /* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */ /* @@ -64,6 +64,7 @@ #ifndef _NETINET_IN_PCB_H_ #define _NETINET_IN_PCB_H_ +#include #include #include #include @@ -83,6 +84,12 @@ union inpaddru { } iau_a4u; }; +/* + * Locks used to protect struct members in this file: + * I immutable after creation + * t protected by internet table mutex inpcbtable_mtx + * p protected by internet PCB mutex inp_mtx + */ /* * Common structure pcb for internet protocol implementation. * Here are stored pointers to local and foreign host table @@ -91,10 +98,11 @@ union inpaddru { * control block. */ struct inpcb { - LIST_ENTRY(inpcb) inp_hash; /* local and foreign hash */ - LIST_ENTRY(inpcb) inp_lhash; /* local port hash */ - TAILQ_ENTRY(inpcb) inp_queue; /* inet PCB queue */ - struct inpcbtable *inp_table; /* inet queue/hash table */ + LIST_ENTRY(inpcb) inp_hash; /* [t] local and foreign hash */ + LIST_ENTRY(inpcb) inp_lhash; /* [t] local port hash */ + TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */ + struct inpcbtable *inp_table; /* [I] inet queue/hash table */ + struct mutex inp_mtx; /* protect PCB and socket */ union inpaddru inp_faddru; /* Foreign address. */ union inpaddru inp_laddru; /* Local address. */ #define inp_faddr inp_faddru.iau_a4u.inaddr @@ -103,8 +111,8 @@ struct inpcb { #define inp_laddr6 inp_laddru.iau_addr6 u_int16_t inp_fport; /* foreign port */ u_int16_t inp_lport; /* local port */ - struct socket *inp_socket; /* back pointer to socket */ - caddr_t inp_ppcb; /* pointer to per-protocol pcb */ + struct socket *inp_socket; /* [p] back pointer to socket */ + caddr_t inp_ppcb; /* [p] pointer to per-protocol pcb */ union { /* Route (notice increased size). */ struct route ru_route; struct route_in6 ru_route6; @@ -150,12 +158,12 @@ struct inpcb { LIST_HEAD(inpcbhead, inpcb); struct inpcbtable { - TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* inet PCB queue */ - struct inpcbhead *inpt_hashtbl; /* local and foreign hash */ - struct inpcbhead *inpt_lhashtbl; /* local port hash */ - SIPHASH_KEY inpt_key, inpt_lkey; /* secrets for hashes */ - u_long inpt_mask, inpt_lmask; /* hash masks */ - int inpt_count, inpt_size; /* queue count, hash size */ + TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */ + struct inpcbhead *inpt_hashtbl; /* [t] local and foreign hash */ + struct inpcbhead *inpt_lhashtbl; /* [t] local port hash */ + SIPHASH_KEY inpt_key, inpt_lkey; /* [t] secrets for hashes */ + u_long inpt_mask, inpt_lmask; /* [t] hash masks */ + int inpt_count, inpt_size; /* [t] queue count, hash size */ }; /* flags in inp_flags: */ @@ -249,6 +257,7 @@ struct baddynamicports { #ifdef _KERNEL +extern struct mutex inpcbtable_mtx; extern struct inpcbtable rawcbtable, rawin6pcbtable; extern struct baddynamicports baddynamicports; extern struct baddynamicports rootonlyports; diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 78d7f16e3f6..48a49ee6d0e 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip_divert.c,v 1.57 2018/04/24 15:40:55 pirofti Exp $ */ +/* $OpenBSD: ip_divert.c,v 1.58 2018/09/20 18:59:10 bluhm Exp $ */ /* * Copyright (c) 2009 Michele Marchetto @@ -186,10 +186,12 @@ divert_packet(struct mbuf *m, int dir, u_int16_t divert_port) return (0); } + mtx_enter(&inpcbtable_mtx); TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) { if (inp->inp_lport == divert_port) break; } + mtx_leave(&inpcbtable_mtx); memset(&addr, 0, sizeof(addr)); addr.sin_family = AF_INET; diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 20c9776e9d5..e5fc85d80d6 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -1,4 +1,4 @@ -/* $OpenBSD: raw_ip.c,v 1.112 2018/09/13 19:53:58 bluhm Exp $ */ +/* $OpenBSD: raw_ip.c,v 1.113 2018/09/20 18:59:10 bluhm Exp $ */ /* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */ /* @@ -149,6 +149,7 @@ rip_input(struct mbuf **mp, int *offp, int proto, int af) } #endif NET_ASSERT_LOCKED(); + mtx_enter(&inpcbtable_mtx); TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) { if (inp->inp_socket->so_state & SS_CANTRCVMORE) continue; @@ -188,6 +189,8 @@ rip_input(struct mbuf **mp, int *offp, int proto, int af) } last = inp; } + mtx_leave(&inpcbtable_mtx); + if (last) { if (last->inp_flags & INP_CONTROLOPTS || last->inp_socket->so_options & SO_TIMESTAMP) diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 99bf6bebc78..15548a154bb 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tcp_subr.c,v 1.172 2018/06/14 01:24:08 yasuoka Exp $ */ +/* $OpenBSD: tcp_subr.c,v 1.173 2018/09/20 18:59:10 bluhm Exp $ */ /* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */ /* @@ -520,7 +520,9 @@ tcp_close(struct tcpcb *tp) /* Free tcpcb after all pending timers have been run. */ TCP_TIMER_ARM(tp, TCPT_REAPER, 0); + mtx_enter(&inp->inp_mtx); inp->inp_ppcb = NULL; + mtx_leave(&inp->inp_mtx); soisdisconnected(so); in_pcbdetach(inp); return (NULL); diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 3f944589182..adb4881940b 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: udp_usrreq.c,v 1.251 2018/09/13 19:53:58 bluhm Exp $ */ +/* $OpenBSD: udp_usrreq.c,v 1.252 2018/09/20 18:59:10 bluhm Exp $ */ /* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */ /* @@ -380,6 +380,7 @@ udp_input(struct mbuf **mp, int *offp, int proto, int af) */ last = NULL; NET_ASSERT_LOCKED(); + mtx_enter(&inpcbtable_mtx); TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) { if (inp->inp_socket->so_state & SS_CANTRCVMORE) continue; @@ -455,6 +456,7 @@ udp_input(struct mbuf **mp, int *offp, int proto, int af) SO_REUSEADDR)) == 0) break; } + mtx_leave(&inpcbtable_mtx); if (last == NULL) { /* diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 4dfc75bb246..2f56401b029 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -1,4 +1,4 @@ -/* $OpenBSD: in6_pcb.c,v 1.106 2018/09/11 21:04:03 bluhm Exp $ */ +/* $OpenBSD: in6_pcb.c,v 1.107 2018/09/20 18:59:10 bluhm Exp $ */ /* * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project. @@ -411,6 +411,8 @@ in6_pcbnotify(struct inpcbtable *table, struct sockaddr_in6 *dst, errno = inet6ctlerrmap[cmd]; rdomain = rtable_l2(rtable); + KERNEL_LOCK(); + mtx_enter(&inpcbtable_mtx); TAILQ_FOREACH_SAFE(inp, &table->inpt_queue, inp_queue, ninp) { if ((inp->inp_flags & INP_IPV6) == 0) continue; @@ -474,7 +476,7 @@ in6_pcbnotify(struct inpcbtable *table, struct sockaddr_in6 *dst, else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6, &dst->sin6_addr) || rtable_l2(inp->inp_rtableid) != rdomain || - inp->inp_socket == 0 || + inp->inp_socket == NULL || (lport && inp->inp_lport != lport) || (!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) && !IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6, @@ -484,9 +486,18 @@ in6_pcbnotify(struct inpcbtable *table, struct sockaddr_in6 *dst, } do_notify: nmatch++; + /* + * The notify functions may grab the kernel lock. Sometimes + * we already hold the kernel lock when we acquire the pcb + * mutex. So do an extra kernel lock before the mutex outside + * of this loop. XXXSMP + */ if (notify) (*notify)(inp, errno); } + mtx_leave(&inpcbtable_mtx); + KERNEL_UNLOCK(); + return (nmatch); } @@ -501,6 +512,7 @@ in6_pcbhashlookup(struct inpcbtable *table, const struct in6_addr *faddr, u_int rdomain; rdomain = rtable_l2(rtable); + mtx_enter(&inpcbtable_mtx); head = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport); LIST_FOREACH(inp, head, inp_hash) { if (!(inp->inp_flags & INP_IPV6)) @@ -521,6 +533,7 @@ in6_pcbhashlookup(struct inpcbtable *table, const struct in6_addr *faddr, break; } } + mtx_leave(&inpcbtable_mtx); #ifdef DIAGNOSTIC if (inp == NULL && in_pcbnotifymiss) { printf("%s: faddr= fport=%d laddr= lport=%d rdom=%u\n", @@ -571,6 +584,7 @@ in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, #endif rdomain = rtable_l2(rtable); + mtx_enter(&inpcbtable_mtx); head = in6_pcbhash(table, rdomain, &zeroin6_addr, 0, key1, lport); LIST_FOREACH(inp, head, inp_hash) { if (!(inp->inp_flags & INP_IPV6)) @@ -603,6 +617,7 @@ in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr, LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); } + mtx_leave(&inpcbtable_mtx); #ifdef DIAGNOSTIC if (inp == NULL && in_pcbnotifymiss) { printf("%s: laddr= lport=%d rdom=%u\n", diff --git a/sys/netinet6/ip6_divert.c b/sys/netinet6/ip6_divert.c index be67d64d1a5..1c9933f2365 100644 --- a/sys/netinet6/ip6_divert.c +++ b/sys/netinet6/ip6_divert.c @@ -1,4 +1,4 @@ -/* $OpenBSD: ip6_divert.c,v 1.56 2018/04/24 15:40:55 pirofti Exp $ */ +/* $OpenBSD: ip6_divert.c,v 1.57 2018/09/20 18:59:10 bluhm Exp $ */ /* * Copyright (c) 2009 Michele Marchetto @@ -190,10 +190,12 @@ divert6_packet(struct mbuf *m, int dir, u_int16_t divert_port) return (0); } + mtx_enter(&inpcbtable_mtx); TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) { if (inp->inp_lport == divert_port) break; } + mtx_leave(&inpcbtable_mtx); memset(&addr, 0, sizeof(addr)); addr.sin6_family = AF_INET6; diff --git a/sys/netinet6/raw_ip6.c b/sys/netinet6/raw_ip6.c index 8ae55b2832e..b3b140fda82 100644 --- a/sys/netinet6/raw_ip6.c +++ b/sys/netinet6/raw_ip6.c @@ -1,4 +1,4 @@ -/* $OpenBSD: raw_ip6.c,v 1.130 2018/09/13 19:53:58 bluhm Exp $ */ +/* $OpenBSD: raw_ip6.c,v 1.131 2018/09/20 18:59:10 bluhm Exp $ */ /* $KAME: raw_ip6.c,v 1.69 2001/03/04 15:55:44 itojun Exp $ */ /* @@ -158,6 +158,7 @@ rip6_input(struct mbuf **mp, int *offp, int proto, int af) } #endif NET_ASSERT_LOCKED(); + mtx_enter(&inpcbtable_mtx); TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) { if (in6p->inp_socket->so_state & SS_CANTRCVMORE) continue; @@ -177,8 +178,10 @@ rip6_input(struct mbuf **mp, int *offp, int proto, int af) IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, *offp, sizeof(*icmp6)); - if (icmp6 == NULL) + if (icmp6 == NULL) { + mtx_leave(&inpcbtable_mtx); return IPPROTO_DONE; + } if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type, in6p->inp_icmp6filt)) continue; @@ -212,6 +215,8 @@ rip6_input(struct mbuf **mp, int *offp, int proto, int af) } last = in6p; } + mtx_leave(&inpcbtable_mtx); + if (last) { if (last->inp_flags & IN6P_CONTROLOPTS) ip6_savecontrol(last, m, &opts); -- 2.20.1