-/* $OpenBSD: in_pcb.c,v 1.281 2023/12/03 20:24:17 bluhm Exp $ */
+/* $OpenBSD: in_pcb.c,v 1.282 2023/12/07 16:08:30 bluhm Exp $ */
/* $NetBSD: in_pcb.c,v 1.25 1996/02/13 23:41:53 christos Exp $ */
/*
const struct in_addr *, u_short, const struct in_addr *, u_short);
uint64_t in_pcblhash(struct inpcbtable *, u_int, u_short);
+struct inpcb *in_pcblookup_lock(struct inpcbtable *, struct in_addr, u_int,
+ struct in_addr, u_int, u_int, int);
+int in_pcbaddrisavail_lock(struct inpcb *, struct sockaddr_in *, int,
+ struct proc *, int);
+int in_pcbpickport(u_int16_t *, const void *, int, const struct inpcb *,
+ struct proc *);
+
/*
* in_pcb is used for inet and inet6. in6_pcb only contains special
* IPv6 cases. So the internet initializer is used for both domains.
}
int
-in_pcbbind(struct inpcb *inp, struct mbuf *nam, struct proc *p)
+in_pcbbind_locked(struct inpcb *inp, struct mbuf *nam, struct proc *p)
{
- struct inpcbtable *table = inp->inp_table;
struct socket *so = inp->inp_socket;
u_int16_t lport = 0;
int wild = 0;
if ((error = in6_nam2sin6(nam, &sin6)))
return (error);
- if ((error = in6_pcbaddrisavail(inp, sin6, wild, p)))
+ if ((error = in6_pcbaddrisavail_lock(inp, sin6, wild,
+ p, IN_PCBLOCK_HOLD)))
return (error);
laddr = &sin6->sin6_addr;
lport = sin6->sin6_port;
if ((error = in_nam2sin(nam, &sin)))
return (error);
- if ((error = in_pcbaddrisavail(inp, sin, wild, p)))
+ if ((error = in_pcbaddrisavail_lock(inp, sin, wild,
+ p, IN_PCBLOCK_HOLD)))
return (error);
laddr = &sin->sin_addr;
lport = sin->sin_port;
inp->inp_laddr = *(struct in_addr *)laddr;
}
inp->inp_lport = lport;
- mtx_enter(&table->inpt_mtx);
in_pcbrehash(inp);
- mtx_leave(&table->inpt_mtx);
return (0);
}
int
-in_pcbaddrisavail(struct inpcb *inp, struct sockaddr_in *sin, int wild,
- struct proc *p)
+in_pcbbind(struct inpcb *inp, struct mbuf *nam, struct proc *p)
+{
+ struct inpcbtable *table = inp->inp_table;
+ int error;
+
+ /* keep lookup, modification, and rehash in sync */
+ mtx_enter(&table->inpt_mtx);
+ error = in_pcbbind_locked(inp, nam, p);
+ mtx_leave(&table->inpt_mtx);
+
+ return error;
+}
+
+int
+in_pcbaddrisavail_lock(struct inpcb *inp, struct sockaddr_in *sin, int wild,
+ struct proc *p, int lock)
{
struct socket *so = inp->inp_socket;
struct inpcbtable *table = inp->inp_table;
int error = 0;
if (so->so_euid && !IN_MULTICAST(sin->sin_addr.s_addr)) {
- t = in_pcblookup_local(table, &sin->sin_addr, lport,
- INPLOOKUP_WILDCARD, inp->inp_rtableid);
+ t = in_pcblookup_local_lock(table, &sin->sin_addr,
+ lport, INPLOOKUP_WILDCARD, inp->inp_rtableid, lock);
if (t && (so->so_euid != t->inp_socket->so_euid))
error = EADDRINUSE;
- in_pcbunref(t);
+ if (lock == IN_PCBLOCK_GRAB)
+ in_pcbunref(t);
if (error)
return (error);
}
- t = in_pcblookup_local(table, &sin->sin_addr, lport,
- wild, inp->inp_rtableid);
+ t = in_pcblookup_local_lock(table, &sin->sin_addr, lport,
+ wild, inp->inp_rtableid, lock);
if (t && (reuseport & t->inp_socket->so_options) == 0)
error = EADDRINUSE;
- in_pcbunref(t);
+ if (lock == IN_PCBLOCK_GRAB)
+ in_pcbunref(t);
if (error)
return (error);
}
return (0);
}
+int
+in_pcbaddrisavail(struct inpcb *inp, struct sockaddr_in *sin, int wild,
+ struct proc *p)
+{
+ return in_pcbaddrisavail_lock(inp, sin, wild, p, IN_PCBLOCK_GRAB);
+}
+
int
in_pcbpickport(u_int16_t *lport, const void *laddr, int wild,
const struct inpcb *inp, struct proc *p)
u_int16_t first, last, lower, higher, candidate, localport;
int count;
+ MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
+
if (inp->inp_flags & INP_HIGHPORT) {
first = ipport_hifirstauto; /* sysctl */
last = ipport_hilastauto;
count = higher - lower;
candidate = lower + arc4random_uniform(count);
- t = NULL;
do {
- in_pcbunref(t);
do {
if (count-- < 0) /* completely used? */
return (EADDRNOTAVAIL);
candidate = lower;
localport = htons(candidate);
} while (in_baddynamic(candidate, so->so_proto->pr_protocol));
- t = in_pcblookup_local(table, laddr, localport, wild,
- inp->inp_rtableid);
+ t = in_pcblookup_local_lock(table, laddr, localport, wild,
+ inp->inp_rtableid, IN_PCBLOCK_HOLD);
} while (t != NULL);
*lport = localport;
if (error)
return (error);
- t = in_pcblookup(inp->inp_table, sin->sin_addr, sin->sin_port,
- ina, inp->inp_lport, inp->inp_rtableid);
+ /* keep lookup, modification, and rehash in sync */
+ mtx_enter(&table->inpt_mtx);
+
+ t = in_pcblookup_lock(inp->inp_table, sin->sin_addr, sin->sin_port,
+ ina, inp->inp_lport, inp->inp_rtableid, IN_PCBLOCK_HOLD);
if (t != NULL) {
- in_pcbunref(t);
+ mtx_leave(&table->inpt_mtx);
return (EADDRINUSE);
}
if (inp->inp_laddr.s_addr == INADDR_ANY) {
if (inp->inp_lport == 0) {
- error = in_pcbbind(inp, NULL, curproc);
- if (error)
+ error = in_pcbbind_locked(inp, NULL, curproc);
+ if (error) {
+ mtx_leave(&table->inpt_mtx);
return (error);
- t = in_pcblookup(inp->inp_table, sin->sin_addr,
+ }
+ t = in_pcblookup_lock(inp->inp_table, sin->sin_addr,
sin->sin_port, ina, inp->inp_lport,
- inp->inp_rtableid);
+ inp->inp_rtableid, IN_PCBLOCK_HOLD);
if (t != NULL) {
inp->inp_lport = 0;
- in_pcbunref(t);
+ mtx_leave(&table->inpt_mtx);
return (EADDRINUSE);
}
}
}
inp->inp_faddr = sin->sin_addr;
inp->inp_fport = sin->sin_port;
- mtx_enter(&table->inpt_mtx);
in_pcbrehash(inp);
+
mtx_leave(&table->inpt_mtx);
#if NSTOEPLITZ > 0
void
in_pcbdisconnect(struct inpcb *inp)
{
+ /*
+ * XXXSMP pf lock sleeps, so we cannot use table->inpt_mtx
+ * to keep inp_pf_sk in sync with pcb. Use net lock for now.
+ */
+ NET_ASSERT_LOCKED_EXCLUSIVE();
#if NPF > 0
if (inp->inp_pf_sk) {
pf_remove_divert_state(inp->inp_pf_sk);
} else
#endif
ip_freemoptions(inp->inp_moptions);
+
+ /*
+ * XXXSMP pf lock sleeps, so we cannot use table->inpt_mtx
+ * to keep inp_pf_sk in sync with pcb. Use net lock for now.
+ */
+ NET_ASSERT_LOCKED_EXCLUSIVE();
#if NPF > 0
if (inp->inp_pf_sk) {
pf_remove_divert_state(inp->inp_pf_sk);
}
struct inpcb *
-in_pcblookup_local(struct inpcbtable *table, const void *laddrp,
- u_int lport_arg, int flags, u_int rtable)
+in_pcblookup_local_lock(struct inpcbtable *table, const void *laddrp,
+ u_int lport_arg, int flags, u_int rtable, int lock)
{
struct inpcb *inp, *match = NULL;
int matchwild = 3, wildcard;
rdomain = rtable_l2(rtable);
lhash = in_pcblhash(table, rdomain, lport);
- mtx_enter(&table->inpt_mtx);
+ if (lock == IN_PCBLOCK_GRAB) {
+ mtx_enter(&table->inpt_mtx);
+ } else {
+ KASSERT(lock == IN_PCBLOCK_HOLD);
+ MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
+ }
head = &table->inpt_lhashtbl[lhash & table->inpt_lmask];
LIST_FOREACH(inp, head, inp_lhash) {
if (rtable_l2(inp->inp_rtableid) != rdomain)
break;
}
}
- in_pcbref(match);
- mtx_leave(&table->inpt_mtx);
+ if (lock == IN_PCBLOCK_GRAB) {
+ in_pcbref(match);
+ mtx_leave(&table->inpt_mtx);
+ }
return (match);
}
void
in_pcbrehash(struct inpcb *inp)
{
- struct inpcbtable *table = inp->inp_table;
-
- MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
-
LIST_REMOVE(inp, inp_lhash);
LIST_REMOVE(inp, inp_hash);
in_pcbhash_insert(inp);
* After those two lookups no other are necessary.
*/
struct inpcb *
-in_pcblookup(struct inpcbtable *table, struct in_addr faddr,
- u_int fport, struct in_addr laddr, u_int lport, u_int rtable)
+in_pcblookup_lock(struct inpcbtable *table, struct in_addr faddr,
+ u_int fport, struct in_addr laddr, u_int lport, u_int rtable, int lock)
{
struct inpcb *inp;
uint64_t hash;
rdomain = rtable_l2(rtable);
hash = in_pcbhash(table, rdomain, &faddr, fport, &laddr, lport);
- mtx_enter(&table->inpt_mtx);
+ if (lock == IN_PCBLOCK_GRAB) {
+ mtx_enter(&table->inpt_mtx);
+ } else {
+ KASSERT(lock == IN_PCBLOCK_HOLD);
+ MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
+ }
inp = in_pcbhash_lookup(table, hash, rdomain,
&faddr, fport, &laddr, lport);
- in_pcbref(inp);
- mtx_leave(&table->inpt_mtx);
+ if (lock == IN_PCBLOCK_GRAB) {
+ in_pcbref(inp);
+ mtx_leave(&table->inpt_mtx);
+ }
#ifdef DIAGNOSTIC
if (inp == NULL && in_pcbnotifymiss) {
return (inp);
}
+struct inpcb *
+in_pcblookup(struct inpcbtable *table, struct in_addr faddr,
+ u_int fport, struct in_addr laddr, u_int lport, u_int rtable)
+{
+ return in_pcblookup_lock(table, faddr, fport, laddr, lport, rtable,
+ IN_PCBLOCK_GRAB);
+}
+
/*
* The in(6)_pcblookup_listen functions are used to locate listening
* sockets quickly. This are sockets with unspecified foreign address
-/* $OpenBSD: in_pcb.h,v 1.142 2023/12/03 20:24:17 bluhm Exp $ */
+/* $OpenBSD: in_pcb.h,v 1.143 2023/12/07 16:08:30 bluhm Exp $ */
/* $NetBSD: in_pcb.h,v 1.14 1996/02/13 23:42:00 christos Exp $ */
/*
#ifdef _KERNEL
+#define IN_PCBLOCK_HOLD 1
+#define IN_PCBLOCK_GRAB 2
+
extern struct inpcbtable rawcbtable, rawin6pcbtable;
extern struct baddynamicports baddynamicports;
extern struct baddynamicports rootonlyports;
void in_init(void);
void in_losing(struct inpcb *);
int in_pcballoc(struct socket *, struct inpcbtable *, int);
+int in_pcbbind_locked(struct inpcb *, struct mbuf *, struct proc *);
int in_pcbbind(struct inpcb *, struct mbuf *, struct proc *);
int in_pcbaddrisavail(struct inpcb *, struct sockaddr_in *, int,
struct proc *);
u_short, const struct in6_addr *, u_short);
struct inpcb *
in6_pcblookup(struct inpcbtable *, const struct in6_addr *,
- u_int, const struct in6_addr *, u_int, u_int);
+ u_int, const struct in6_addr *, u_int, u_int);
struct inpcb *
in6_pcblookup_listen(struct inpcbtable *, struct in6_addr *, u_int,
struct mbuf *, u_int);
+int in6_pcbaddrisavail_lock(struct inpcb *, struct sockaddr_in6 *, int,
+ struct proc *, int);
int in6_pcbaddrisavail(struct inpcb *, struct sockaddr_in6 *, int,
struct proc *);
int in6_pcbconnect(struct inpcb *, struct mbuf *);
#endif /* INET6 */
void in_pcbinit(struct inpcbtable *, int);
struct inpcb *
- in_pcblookup_local(struct inpcbtable *, const void *, u_int, int,
- u_int);
+ in_pcblookup_local_lock(struct inpcbtable *, const void *, u_int, int,
+ u_int, int);
void in_pcbnotifyall(struct inpcbtable *, struct sockaddr *,
u_int, int, void (*)(struct inpcb *, int));
void in_pcbrehash(struct inpcb *);
u_int, const struct sockaddr_in6 *, u_int, u_int, int, void *,
void (*)(struct inpcb *, int));
int in6_selecthlim(struct inpcb *);
-int in_pcbpickport(u_int16_t *, const void *, int, const struct inpcb *,
- struct proc *);
int in_pcbset_rtableid(struct inpcb *, u_int);
void in_pcbset_laddr(struct inpcb *, const struct sockaddr *, u_int);
void in_pcbunset_faddr(struct inpcb *);
-/* $OpenBSD: in6_pcb.c,v 1.130 2023/12/03 20:36:24 bluhm Exp $ */
+/* $OpenBSD: in6_pcb.c,v 1.131 2023/12/07 16:08:30 bluhm Exp $ */
/*
* Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
struct inpcb *in6_pcbhash_lookup(struct inpcbtable *, uint64_t, u_int,
const struct in6_addr *, u_short, const struct in6_addr *, u_short);
+struct inpcb * in6_pcblookup_lock(struct inpcbtable *, const struct in6_addr *,
+ u_int, const struct in6_addr *, u_int, u_int, int);
+
uint64_t
in6_pcbhash(struct inpcbtable *table, u_int rdomain,
const struct in6_addr *faddr, u_short fport,
}
int
-in6_pcbaddrisavail(struct inpcb *inp, struct sockaddr_in6 *sin6, int wild,
- struct proc *p)
+in6_pcbaddrisavail_lock(struct inpcb *inp, struct sockaddr_in6 *sin6, int wild,
+ struct proc *p, int lock)
{
struct socket *so = inp->inp_socket;
struct inpcbtable *table = inp->inp_table;
int error = 0;
if (so->so_euid && !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr)) {
- t = in_pcblookup_local(table, &sin6->sin6_addr, lport,
- INPLOOKUP_WILDCARD | INPLOOKUP_IPV6,
- inp->inp_rtableid);
+ t = in_pcblookup_local_lock(table, &sin6->sin6_addr,
+ lport, INPLOOKUP_WILDCARD | INPLOOKUP_IPV6,
+ inp->inp_rtableid, lock);
if (t && (so->so_euid != t->inp_socket->so_euid))
error = EADDRINUSE;
- in_pcbunref(t);
+ if (lock == IN_PCBLOCK_GRAB)
+ in_pcbunref(t);
if (error)
return (error);
}
- t = in_pcblookup_local(table, &sin6->sin6_addr, lport,
- wild, inp->inp_rtableid);
+ t = in_pcblookup_local_lock(table, &sin6->sin6_addr, lport,
+ wild, inp->inp_rtableid, lock);
if (t && (reuseport & t->inp_socket->so_options) == 0)
error = EADDRINUSE;
- in_pcbunref(t);
+ if (lock == IN_PCBLOCK_GRAB)
+ in_pcbunref(t);
if (error)
return (error);
}
return (0);
}
+int
+in6_pcbaddrisavail(struct inpcb *inp, struct sockaddr_in6 *sin6, int wild,
+ struct proc *p)
+{
+ return in6_pcbaddrisavail_lock(inp, sin6, wild, p, IN_PCBLOCK_GRAB);
+}
+
/*
* Connect from a socket to a specified address.
* Both address and port must be specified in argument sin6.
inp->inp_ipv6.ip6_hlim = (u_int8_t)in6_selecthlim(inp);
- t = in6_pcblookup(inp->inp_table, &sin6->sin6_addr, sin6->sin6_port,
+ /* keep lookup, modification, and rehash in sync */
+ mtx_enter(&table->inpt_mtx);
+
+ t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr,
+ sin6->sin6_port,
IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6) ? in6a : &inp->inp_laddr6,
- inp->inp_lport, inp->inp_rtableid);
+ inp->inp_lport, inp->inp_rtableid, IN_PCBLOCK_HOLD);
if (t != NULL) {
- in_pcbunref(t);
+ mtx_leave(&table->inpt_mtx);
return (EADDRINUSE);
}
if (IN6_IS_ADDR_UNSPECIFIED(&inp->inp_laddr6)) {
if (inp->inp_lport == 0) {
- error = in_pcbbind(inp, NULL, curproc);
- if (error)
+ error = in_pcbbind_locked(inp, NULL, curproc);
+ if (error) {
+ mtx_leave(&table->inpt_mtx);
return (error);
- t = in6_pcblookup(inp->inp_table, &sin6->sin6_addr,
+ }
+ t = in6_pcblookup_lock(inp->inp_table, &sin6->sin6_addr,
sin6->sin6_port, in6a, inp->inp_lport,
- inp->inp_rtableid);
+ inp->inp_rtableid, IN_PCBLOCK_HOLD);
if (t != NULL) {
inp->inp_lport = 0;
- in_pcbunref(t);
+ mtx_leave(&table->inpt_mtx);
return (EADDRINUSE);
}
}
}
inp->inp_faddr6 = sin6->sin6_addr;
inp->inp_fport = sin6->sin6_port;
- mtx_enter(&table->inpt_mtx);
in_pcbrehash(inp);
+
mtx_leave(&table->inpt_mtx);
inp->inp_flowinfo &= ~IPV6_FLOWLABEL_MASK;
}
struct inpcb *
-in6_pcblookup(struct inpcbtable *table, const struct in6_addr *faddr,
- u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable)
+in6_pcblookup_lock(struct inpcbtable *table, const struct in6_addr *faddr,
+ u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable,
+ int lock)
{
struct inpcb *inp;
uint64_t hash;
rdomain = rtable_l2(rtable);
hash = in6_pcbhash(table, rdomain, faddr, fport, laddr, lport);
- mtx_enter(&table->inpt_mtx);
+ if (lock == IN_PCBLOCK_GRAB) {
+ mtx_enter(&table->inpt_mtx);
+ } else {
+ KASSERT(lock == IN_PCBLOCK_HOLD);
+ MUTEX_ASSERT_LOCKED(&table->inpt_mtx);
+ }
inp = in6_pcbhash_lookup(table, hash, rdomain,
faddr, fport, laddr, lport);
- in_pcbref(inp);
- mtx_leave(&table->inpt_mtx);
+ if (lock == IN_PCBLOCK_GRAB) {
+ in_pcbref(inp);
+ mtx_leave(&table->inpt_mtx);
+ }
#ifdef DIAGNOSTIC
if (inp == NULL && in_pcbnotifymiss) {
return (inp);
}
+struct inpcb *
+in6_pcblookup(struct inpcbtable *table, const struct in6_addr *faddr,
+ u_int fport, const struct in6_addr *laddr, u_int lport, u_int rtable)
+{
+ return in6_pcblookup_lock(table, faddr, fport, laddr, lport, rtable,
+ IN_PCBLOCK_GRAB);
+}
+
struct inpcb *
in6_pcblookup_listen(struct inpcbtable *table, struct in6_addr *laddr,
u_int lport, struct mbuf *m, u_int rtable)