-/* $OpenBSD: kern_sysctl.c,v 1.346 2018/07/12 01:23:38 cheloha Exp $ */
+/* $OpenBSD: kern_sysctl.c,v 1.347 2018/09/20 18:59:10 bluhm Exp $ */
/* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */
/*-
kf->inp_rtableid = inpcb->inp_rtableid;
if (so->so_type == SOCK_RAW)
kf->inp_proto = inpcb->inp_ip.ip_p;
- if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+ if (so->so_proto->pr_protocol == IPPROTO_TCP &&
+ inpcb->inp_ppcb != NULL) {
struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
kf->t_rcv_wnd = tcpcb->rcv_wnd;
kf->t_snd_wnd = tcpcb->snd_wnd;
kf->inp_rtableid = inpcb->inp_rtableid;
if (so->so_type == SOCK_RAW)
kf->inp_proto = inpcb->inp_ipv6.ip6_nxt;
- if (so->so_proto->pr_protocol == IPPROTO_TCP) {
+ if (so->so_proto->pr_protocol == IPPROTO_TCP &&
+ inpcb->inp_ppcb != NULL) {
struct tcpcb *tcpcb = (void *)inpcb->inp_ppcb;
kf->t_rcv_wnd = tcpcb->rcv_wnd;
kf->t_snd_wnd = tcpcb->snd_wnd;
} \
needed += elem_size; \
} while (0)
+
#define FILLIT(fp, fdp, i, vp, pr) \
FILLIT2(fp, fdp, i, vp, pr, NULL)
-#define FILLSO(so) \
- FILLIT2(NULL, NULL, 0, NULL, NULL, so)
+
+#define FILLINPCB(inp) do { \
+ mtx_enter(&inp->inp_mtx); \
+ if (inp->inp_socket != NULL) \
+ FILLIT2(NULL, NULL, 0, NULL, NULL, inp->inp_socket); \
+ mtx_leave(&inp->inp_mtx); \
+} while (0)
switch (op) {
case KERN_FILE_BYFILE:
if (arg == DTYPE_SOCKET) {
struct inpcb *inp;
- NET_LOCK();
+ /*
+ * The inpcb and socket fields are accessed and read
+ * without net lock. This may result in inconsistent
+ * data provided to userland. The fix will be to
+ * protect the socket fields with the inpcb mutex.
+ * XXXSMP
+ */
+ mtx_enter(&inpcbtable_mtx);
TAILQ_FOREACH(inp, &tcbtable.inpt_queue, inp_queue)
- FILLSO(inp->inp_socket);
+ FILLINPCB(inp);
TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue)
- FILLSO(inp->inp_socket);
+ FILLINPCB(inp);
TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue)
- FILLSO(inp->inp_socket);
+ FILLINPCB(inp);
#ifdef INET6
TAILQ_FOREACH(inp, &rawin6pcbtable.inpt_queue,
inp_queue)
- FILLSO(inp->inp_socket);
+ FILLINPCB(inp);
#endif
- NET_UNLOCK();
+ mtx_leave(&inpcbtable_mtx);
}
fp = NULL;
while ((fp = fd_iterfile(fp, p)) != NULL) {
-/* $OpenBSD: in_pcb.c,v 1.245 2018/09/14 12:55:17 bluhm Exp $ */
+/* $OpenBSD: in_pcb.c,v 1.246 2018/09/20 18:59:10 bluhm Exp $ */
/* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */
/*
int ipport_hifirstauto = IPPORT_HIFIRSTAUTO;
int ipport_hilastauto = IPPORT_HILASTAUTO;
+/* Protect PCB table queues, lookup hashes and existence of inpcb. */
+struct mutex inpcbtable_mtx = MUTEX_INITIALIZER(IPL_SOFTNET);
+
struct baddynamicports baddynamicports;
struct baddynamicports rootonlyports;
struct pool inpcb_pool;
int inpcb_pool_initialized = 0;
-int in_pcbresize (struct inpcbtable *, int);
+void in_pcbrehash_locked(struct inpcb *);
+int in_pcbresize(struct inpcbtable *, int);
#define INPCBHASH_LOADFACTOR(_x) (((_x) * 3) / 4)
in_pcbinit(struct inpcbtable *table, int hashsize)
{
+ mtx_enter(&inpcbtable_mtx);
TAILQ_INIT(&table->inpt_queue);
table->inpt_hashtbl = hashinit(hashsize, M_PCB, M_NOWAIT,
&table->inpt_mask);
table->inpt_size = hashsize;
arc4random_buf(&table->inpt_key, sizeof(table->inpt_key));
arc4random_buf(&table->inpt_lkey, sizeof(table->inpt_lkey));
+ mtx_leave(&inpcbtable_mtx);
}
/*
inp->inp_cksum6 = -1;
#endif /* INET6 */
+ mtx_enter(&inpcbtable_mtx);
if (table->inpt_count++ > INPCBHASH_LOADFACTOR(table->inpt_size))
(void)in_pcbresize(table, table->inpt_size * 2);
TAILQ_INSERT_HEAD(&table->inpt_queue, inp, inp_queue);
&inp->inp_faddr, inp->inp_fport,
&inp->inp_laddr, inp->inp_lport);
LIST_INSERT_HEAD(head, inp, inp_hash);
+ mtx_leave(&inpcbtable_mtx);
+
so->so_pcb = inp;
return (0);
void
in_pcbdetach(struct inpcb *inp)
{
- struct socket *so = inp->inp_socket;
+ struct socket *so;
NET_ASSERT_LOCKED();
+ mtx_enter(&inp->inp_mtx);
+ so = inp->inp_socket;
+ inp->inp_socket = NULL;
+ mtx_leave(&inp->inp_mtx);
+
so->so_pcb = NULL;
/*
* As long as the NET_LOCK() is the default lock for Internet
pf_inp_unlink(inp);
}
#endif
+ mtx_enter(&inpcbtable_mtx);
LIST_REMOVE(inp, inp_lhash);
LIST_REMOVE(inp, inp_hash);
TAILQ_REMOVE(&inp->inp_table->inpt_queue, inp, inp_queue);
inp->inp_table->inpt_count--;
+ mtx_leave(&inpcbtable_mtx);
in_pcbunref(inp);
}
in_pcbunref(struct inpcb *inp)
{
if (refcnt_rele(&inp->inp_refcnt)) {
+ KASSERT(inp->inp_socket == NULL);
KASSERT((LIST_NEXT(inp, inp_hash) == NULL) ||
(LIST_NEXT(inp, inp_hash) == _Q_INVALID));
KASSERT((LIST_NEXT(inp, inp_lhash) == NULL) ||
return;
rdomain = rtable_l2(rtable);
+ KERNEL_LOCK();
+ mtx_enter(&inpcbtable_mtx);
TAILQ_FOREACH_SAFE(inp, &table->inpt_queue, inp_queue, ninp) {
#ifdef INET6
if (inp->inp_flags & INP_IPV6)
#endif
if (inp->inp_faddr.s_addr != faddr.s_addr ||
rtable_l2(inp->inp_rtableid) != rdomain ||
- inp->inp_socket == 0) {
+ inp->inp_socket == NULL) {
continue;
}
+ /*
+ * The notify functions may grab the kernel lock. Sometimes
+ * we already hold the kernel lock when we acquire the pcb
+ * mutex. So do an extra kernel lock before the mutex outside
+ * of this loop. XXXSMP
+ */
if (notify)
(*notify)(inp, errno);
}
+ mtx_leave(&inpcbtable_mtx);
+ KERNEL_UNLOCK();
}
/*
u_int rdomain;
rdomain = rtable_l2(rtable);
+ mtx_enter(&inpcbtable_mtx);
head = in_pcblhash(table, rdomain, lport);
LIST_FOREACH(inp, head, inp_lhash) {
if (rtable_l2(inp->inp_rtableid) != rdomain)
break;
}
}
+ mtx_leave(&inpcbtable_mtx);
+
return (match);
}
void
in_pcbrehash(struct inpcb *inp)
+{
+ mtx_enter(&inpcbtable_mtx);
+ in_pcbrehash_locked(inp);
+ mtx_leave(&inpcbtable_mtx);
+}
+
+void
+in_pcbrehash_locked(struct inpcb *inp)
{
struct inpcbtable *table = inp->inp_table;
struct inpcbhead *head;
NET_ASSERT_LOCKED();
+ MUTEX_ASSERT_LOCKED(&inpcbtable_mtx);
LIST_REMOVE(inp, inp_lhash);
head = in_pcblhash(table, inp->inp_rtableid, inp->inp_lport);
void *nhashtbl, *nlhashtbl, *ohashtbl, *olhashtbl;
struct inpcb *inp;
+ MUTEX_ASSERT_LOCKED(&inpcbtable_mtx);
+
ohashtbl = table->inpt_hashtbl;
olhashtbl = table->inpt_lhashtbl;
osize = table->inpt_size;
arc4random_buf(&table->inpt_lkey, sizeof(table->inpt_lkey));
TAILQ_FOREACH(inp, &table->inpt_queue, inp_queue) {
- in_pcbrehash(inp);
+ in_pcbrehash_locked(inp);
}
hashfree(ohashtbl, osize, M_PCB);
hashfree(olhashtbl, osize, M_PCB);
u_int rdomain;
rdomain = rtable_l2(rtable);
+ mtx_enter(&inpcbtable_mtx);
head = in_pcbhash(table, rdomain, &faddr, fport, &laddr, lport);
LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
break;
}
}
+ mtx_leave(&inpcbtable_mtx);
#ifdef DIAGNOSTIC
if (inp == NULL && in_pcbnotifymiss) {
printf("%s: faddr=%08x fport=%d laddr=%08x lport=%d rdom=%u\n",
#endif
rdomain = rtable_l2(rtable);
+ mtx_enter(&inpcbtable_mtx);
head = in_pcbhash(table, rdomain, &zeroin_addr, 0, key1, lport);
LIST_FOREACH(inp, head, inp_hash) {
#ifdef INET6
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
}
+ mtx_leave(&inpcbtable_mtx);
#ifdef DIAGNOSTIC
if (inp == NULL && in_pcbnotifymiss) {
printf("%s: laddr=%08x lport=%d rdom=%u\n",
-/* $OpenBSD: in_pcb.h,v 1.113 2018/09/14 12:55:17 bluhm Exp $ */
+/* $OpenBSD: in_pcb.h,v 1.114 2018/09/20 18:59:10 bluhm Exp $ */
/* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */
/*
#ifndef _NETINET_IN_PCB_H_
#define _NETINET_IN_PCB_H_
+#include <sys/mutex.h>
#include <sys/queue.h>
#include <sys/refcnt.h>
#include <netinet/ip6.h>
} iau_a4u;
};
+/*
+ * Locks used to protect struct members in this file:
+ * I immutable after creation
+ * t protected by internet table mutex inpcbtable_mtx
+ * p protected by internet PCB mutex inp_mtx
+ */
/*
* Common structure pcb for internet protocol implementation.
* Here are stored pointers to local and foreign host table
* control block.
*/
struct inpcb {
- LIST_ENTRY(inpcb) inp_hash; /* local and foreign hash */
- LIST_ENTRY(inpcb) inp_lhash; /* local port hash */
- TAILQ_ENTRY(inpcb) inp_queue; /* inet PCB queue */
- struct inpcbtable *inp_table; /* inet queue/hash table */
+ LIST_ENTRY(inpcb) inp_hash; /* [t] local and foreign hash */
+ LIST_ENTRY(inpcb) inp_lhash; /* [t] local port hash */
+ TAILQ_ENTRY(inpcb) inp_queue; /* [t] inet PCB queue */
+ struct inpcbtable *inp_table; /* [I] inet queue/hash table */
+ struct mutex inp_mtx; /* protect PCB and socket */
union inpaddru inp_faddru; /* Foreign address. */
union inpaddru inp_laddru; /* Local address. */
#define inp_faddr inp_faddru.iau_a4u.inaddr
#define inp_laddr6 inp_laddru.iau_addr6
u_int16_t inp_fport; /* foreign port */
u_int16_t inp_lport; /* local port */
- struct socket *inp_socket; /* back pointer to socket */
- caddr_t inp_ppcb; /* pointer to per-protocol pcb */
+ struct socket *inp_socket; /* [p] back pointer to socket */
+ caddr_t inp_ppcb; /* [p] pointer to per-protocol pcb */
union { /* Route (notice increased size). */
struct route ru_route;
struct route_in6 ru_route6;
LIST_HEAD(inpcbhead, inpcb);
struct inpcbtable {
- TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* inet PCB queue */
- struct inpcbhead *inpt_hashtbl; /* local and foreign hash */
- struct inpcbhead *inpt_lhashtbl; /* local port hash */
- SIPHASH_KEY inpt_key, inpt_lkey; /* secrets for hashes */
- u_long inpt_mask, inpt_lmask; /* hash masks */
- int inpt_count, inpt_size; /* queue count, hash size */
+ TAILQ_HEAD(inpthead, inpcb) inpt_queue; /* [t] inet PCB queue */
+ struct inpcbhead *inpt_hashtbl; /* [t] local and foreign hash */
+ struct inpcbhead *inpt_lhashtbl; /* [t] local port hash */
+ SIPHASH_KEY inpt_key, inpt_lkey; /* [t] secrets for hashes */
+ u_long inpt_mask, inpt_lmask; /* [t] hash masks */
+ int inpt_count, inpt_size; /* [t] queue count, hash size */
};
/* flags in inp_flags: */
#ifdef _KERNEL
+extern struct mutex inpcbtable_mtx;
extern struct inpcbtable rawcbtable, rawin6pcbtable;
extern struct baddynamicports baddynamicports;
extern struct baddynamicports rootonlyports;
-/* $OpenBSD: ip_divert.c,v 1.57 2018/04/24 15:40:55 pirofti Exp $ */
+/* $OpenBSD: ip_divert.c,v 1.58 2018/09/20 18:59:10 bluhm Exp $ */
/*
* Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
return (0);
}
+ mtx_enter(&inpcbtable_mtx);
TAILQ_FOREACH(inp, &divbtable.inpt_queue, inp_queue) {
if (inp->inp_lport == divert_port)
break;
}
+ mtx_leave(&inpcbtable_mtx);
memset(&addr, 0, sizeof(addr));
addr.sin_family = AF_INET;
-/* $OpenBSD: raw_ip.c,v 1.112 2018/09/13 19:53:58 bluhm Exp $ */
+/* $OpenBSD: raw_ip.c,v 1.113 2018/09/20 18:59:10 bluhm Exp $ */
/* $NetBSD: raw_ip.c,v 1.25 1996/02/18 18:58:33 christos Exp $ */
/*
}
#endif
NET_ASSERT_LOCKED();
+ mtx_enter(&inpcbtable_mtx);
TAILQ_FOREACH(inp, &rawcbtable.inpt_queue, inp_queue) {
if (inp->inp_socket->so_state & SS_CANTRCVMORE)
continue;
}
last = inp;
}
+ mtx_leave(&inpcbtable_mtx);
+
if (last) {
if (last->inp_flags & INP_CONTROLOPTS ||
last->inp_socket->so_options & SO_TIMESTAMP)
-/* $OpenBSD: tcp_subr.c,v 1.172 2018/06/14 01:24:08 yasuoka Exp $ */
+/* $OpenBSD: tcp_subr.c,v 1.173 2018/09/20 18:59:10 bluhm Exp $ */
/* $NetBSD: tcp_subr.c,v 1.22 1996/02/13 23:44:00 christos Exp $ */
/*
/* Free tcpcb after all pending timers have been run. */
TCP_TIMER_ARM(tp, TCPT_REAPER, 0);
+ mtx_enter(&inp->inp_mtx);
inp->inp_ppcb = NULL;
+ mtx_leave(&inp->inp_mtx);
soisdisconnected(so);
in_pcbdetach(inp);
return (NULL);
-/* $OpenBSD: udp_usrreq.c,v 1.251 2018/09/13 19:53:58 bluhm Exp $ */
+/* $OpenBSD: udp_usrreq.c,v 1.252 2018/09/20 18:59:10 bluhm Exp $ */
/* $NetBSD: udp_usrreq.c,v 1.28 1996/03/16 23:54:03 christos Exp $ */
/*
*/
last = NULL;
NET_ASSERT_LOCKED();
+ mtx_enter(&inpcbtable_mtx);
TAILQ_FOREACH(inp, &udbtable.inpt_queue, inp_queue) {
if (inp->inp_socket->so_state & SS_CANTRCVMORE)
continue;
SO_REUSEADDR)) == 0)
break;
}
+ mtx_leave(&inpcbtable_mtx);
if (last == NULL) {
/*
-/* $OpenBSD: in6_pcb.c,v 1.106 2018/09/11 21:04:03 bluhm Exp $ */
+/* $OpenBSD: in6_pcb.c,v 1.107 2018/09/20 18:59:10 bluhm Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
errno = inet6ctlerrmap[cmd];
rdomain = rtable_l2(rtable);
+ KERNEL_LOCK();
+ mtx_enter(&inpcbtable_mtx);
TAILQ_FOREACH_SAFE(inp, &table->inpt_queue, inp_queue, ninp) {
if ((inp->inp_flags & INP_IPV6) == 0)
continue;
else if (!IN6_ARE_ADDR_EQUAL(&inp->inp_faddr6,
&dst->sin6_addr) ||
rtable_l2(inp->inp_rtableid) != rdomain ||
- inp->inp_socket == 0 ||
+ inp->inp_socket == NULL ||
(lport && inp->inp_lport != lport) ||
(!IN6_IS_ADDR_UNSPECIFIED(&sa6_src.sin6_addr) &&
!IN6_ARE_ADDR_EQUAL(&inp->inp_laddr6,
}
do_notify:
nmatch++;
+ /*
+ * The notify functions may grab the kernel lock. Sometimes
+ * we already hold the kernel lock when we acquire the pcb
+ * mutex. So do an extra kernel lock before the mutex outside
+ * of this loop. XXXSMP
+ */
if (notify)
(*notify)(inp, errno);
}
+ mtx_leave(&inpcbtable_mtx);
+ KERNEL_UNLOCK();
+
return (nmatch);
}
u_int rdomain;
rdomain = rtable_l2(rtable);
+ mtx_enter(&inpcbtable_mtx);
head = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport);
LIST_FOREACH(inp, head, inp_hash) {
if (!(inp->inp_flags & INP_IPV6))
break;
}
}
+ mtx_leave(&inpcbtable_mtx);
#ifdef DIAGNOSTIC
if (inp == NULL && in_pcbnotifymiss) {
printf("%s: faddr= fport=%d laddr= lport=%d rdom=%u\n",
#endif
rdomain = rtable_l2(rtable);
+ mtx_enter(&inpcbtable_mtx);
head = in6_pcbhash(table, rdomain, &zeroin6_addr, 0, key1, lport);
LIST_FOREACH(inp, head, inp_hash) {
if (!(inp->inp_flags & INP_IPV6))
LIST_REMOVE(inp, inp_hash);
LIST_INSERT_HEAD(head, inp, inp_hash);
}
+ mtx_leave(&inpcbtable_mtx);
#ifdef DIAGNOSTIC
if (inp == NULL && in_pcbnotifymiss) {
printf("%s: laddr= lport=%d rdom=%u\n",
-/* $OpenBSD: ip6_divert.c,v 1.56 2018/04/24 15:40:55 pirofti Exp $ */
+/* $OpenBSD: ip6_divert.c,v 1.57 2018/09/20 18:59:10 bluhm Exp $ */
/*
* Copyright (c) 2009 Michele Marchetto <michele@openbsd.org>
return (0);
}
+ mtx_enter(&inpcbtable_mtx);
TAILQ_FOREACH(inp, &divb6table.inpt_queue, inp_queue) {
if (inp->inp_lport == divert_port)
break;
}
+ mtx_leave(&inpcbtable_mtx);
memset(&addr, 0, sizeof(addr));
addr.sin6_family = AF_INET6;
-/* $OpenBSD: raw_ip6.c,v 1.130 2018/09/13 19:53:58 bluhm Exp $ */
+/* $OpenBSD: raw_ip6.c,v 1.131 2018/09/20 18:59:10 bluhm Exp $ */
/* $KAME: raw_ip6.c,v 1.69 2001/03/04 15:55:44 itojun Exp $ */
/*
}
#endif
NET_ASSERT_LOCKED();
+ mtx_enter(&inpcbtable_mtx);
TAILQ_FOREACH(in6p, &rawin6pcbtable.inpt_queue, inp_queue) {
if (in6p->inp_socket->so_state & SS_CANTRCVMORE)
continue;
IP6_EXTHDR_GET(icmp6, struct icmp6_hdr *, m, *offp,
sizeof(*icmp6));
- if (icmp6 == NULL)
+ if (icmp6 == NULL) {
+ mtx_leave(&inpcbtable_mtx);
return IPPROTO_DONE;
+ }
if (ICMP6_FILTER_WILLBLOCK(icmp6->icmp6_type,
in6p->inp_icmp6filt))
continue;
}
last = in6p;
}
+ mtx_leave(&inpcbtable_mtx);
+
if (last) {
if (last->inp_flags & IN6P_CONTROLOPTS)
ip6_savecontrol(last, m, &opts);