From: mvs Date: Fri, 1 Jul 2022 09:56:17 +0000 (+0000) Subject: Make fine grained unix(4) domain sockets locking. Use the per-socket X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=cf85347fcaf1b04df91f85820772f5ae84768baf;p=openbsd Make fine grained unix(4) domain sockets locking. Use the per-socket `so_lock' rwlock(9) instead of global `unp_lock' which locks the whole layer. The PCB of unix(4) sockets are linked to each other and we need to lock them both. This introduces the lock ordering problem, because when the thread (1) keeps lock on `so1' and trying to lock `so2', the thread (2) could hold lock on `so2' and trying to lock `so1'. To solve this we always lock sockets in the strict order. For the sockets which are already accessible from userland, we always lock socket with the smallest memory address first. Sometimes we need to unlock socket before lock it's peer and lock it again. We use reference counters for prevent the connected peer destruction during to relock. We also handle the case where the peer socket was replaced by another socket. For the newly connected sockets, which are not yet exported to the userland by accept(2), we always lock the listening socket `head' first. This allows us to avoid unwanted relock within accept(2) syscall. ok claudio@ --- diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index fc2421e9ca1..aa856abc56a 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket.c,v 1.278 2022/06/06 14:45:41 claudio Exp $ */ +/* $OpenBSD: uipc_socket.c,v 1.279 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: uipc_socket.c,v 1.21 1996/02/04 02:17:52 christos Exp $ */ /* @@ -52,6 +52,7 @@ #include #include #include +#include #ifdef DDB #include @@ -146,7 +147,9 @@ soalloc(int prflags) so = pool_get(&socket_pool, prflags); if (so == NULL) return (NULL); - rw_init(&so->so_lock, "solock"); + rw_init_flags(&so->so_lock, "solock", RWL_DUPOK); + refcnt_init(&so->so_refcnt); + return (so); } @@ -247,6 +250,8 @@ solisten(struct socket *so, int backlog) void sofree(struct socket *so, int keep_lock) { + int persocket = solock_persocket(so); + soassertlocked(so); if (so->so_pcb || (so->so_state & SS_NOFDREF) == 0) { @@ -255,17 +260,54 @@ sofree(struct socket *so, int keep_lock) return; } if (so->so_head) { + struct socket *head = so->so_head; + /* * We must not decommission a socket that's on the accept(2) * queue. If we do, then accept(2) may hang after select(2) * indicated that the listening socket was ready. */ - if (!soqremque(so, 0)) { + if (so->so_onq == &head->so_q) { if (!keep_lock) sounlock(so); return; } + + if (persocket) { + /* + * Concurrent close of `head' could + * abort `so' due to re-lock. + */ + soref(so); + soref(head); + sounlock(so); + solock(head); + solock(so); + + if (so->so_onq != &head->so_q0) { + sounlock(head); + sounlock(so); + sorele(head); + sorele(so); + return; + } + + sorele(head); + sorele(so); + } + + soqremque(so, 0); + + if (persocket) + sounlock(head); } + + if (persocket) { + sounlock(so); + refcnt_finalize(&so->so_refcnt, "sofinal"); + solock(so); + } + sigio_free(&so->so_sigio); klist_free(&so->so_rcv.sb_sel.si_note); klist_free(&so->so_snd.sb_sel.si_note); @@ -356,13 +398,36 @@ drop: error = error2; } if (so->so_options & SO_ACCEPTCONN) { + int persocket = solock_persocket(so); + + if (persocket) { + /* Wait concurrent sonewconn() threads. */ + while (so->so_newconn > 0) { + so->so_state |= SS_NEWCONN_WAIT; + sosleep_nsec(so, &so->so_newconn, PSOCK, + "netlck", INFSLP); + } + } + while ((so2 = TAILQ_FIRST(&so->so_q0)) != NULL) { + if (persocket) + solock(so2); (void) soqremque(so2, 0); + if (persocket) + sounlock(so); (void) soabort(so2); + if (persocket) + solock(so); } while ((so2 = TAILQ_FIRST(&so->so_q)) != NULL) { + if (persocket) + solock(so2); (void) soqremque(so2, 1); + if (persocket) + sounlock(so); (void) soabort(so2); + if (persocket) + solock(so); } } discard: @@ -430,11 +495,18 @@ soconnect(struct socket *so, struct mbuf *nam) int soconnect2(struct socket *so1, struct socket *so2) { - int error; + int persocket, error; + + if ((persocket = solock_persocket(so1))) + solock_pair(so1, so2); + else + solock(so1); - solock(so1); error = (*so1->so_proto->pr_usrreq)(so1, PRU_CONNECT2, NULL, (struct mbuf *)so2, NULL, curproc); + + if (persocket) + sounlock(so2); sounlock(so1); return (error); } diff --git a/sys/kern/uipc_socket2.c b/sys/kern/uipc_socket2.c index 6b0b36e3150..e3327e14531 100644 --- a/sys/kern/uipc_socket2.c +++ b/sys/kern/uipc_socket2.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_socket2.c,v 1.124 2022/06/26 05:20:42 visa Exp $ */ +/* $OpenBSD: uipc_socket2.c,v 1.125 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: uipc_socket2.c,v 1.11 1996/02/04 02:17:55 christos Exp $ */ /* @@ -53,8 +53,6 @@ u_long sb_max = SB_MAX; /* patchable */ extern struct pool mclpools[]; extern struct pool mbpool; -extern struct rwlock unp_lock; - /* * Procedures to manipulate state flags of socket * and do appropriate wakeups. Normal sequence from the @@ -101,10 +99,37 @@ soisconnected(struct socket *so) soassertlocked(so); so->so_state &= ~(SS_ISCONNECTING|SS_ISDISCONNECTING); so->so_state |= SS_ISCONNECTED; - if (head && soqremque(so, 0)) { + + if (head != NULL && so->so_onq == &head->so_q0) { + int persocket = solock_persocket(so); + + if (persocket) { + soref(so); + soref(head); + + sounlock(so); + solock(head); + solock(so); + + if (so->so_onq != &head->so_q0) { + sounlock(head); + sorele(head); + sorele(so); + + return; + } + + sorele(head); + sorele(so); + } + + soqremque(so, 0); soqinsque(head, so, 1); sorwakeup(head); wakeup_one(&head->so_timeo); + + if (persocket) + sounlock(head); } else { wakeup(&so->so_timeo); sorwakeup(so); @@ -146,7 +171,8 @@ struct socket * sonewconn(struct socket *head, int connstatus) { struct socket *so; - int soqueue = connstatus ? 1 : 0; + int persocket = solock_persocket(head); + int error; /* * XXXSMP as long as `so' and `head' share the same lock, we @@ -174,10 +200,18 @@ sonewconn(struct socket *head, int connstatus) so->so_rgid = head->so_rgid; so->so_cpid = head->so_cpid; + /* + * Lock order will be `head' -> `so' while these sockets are linked. + */ + if (persocket) + solock(so); + /* * Inherit watermarks but those may get clamped in low mem situations. */ if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { + if (persocket) + sounlock(so); pool_put(&socket_pool, so); return (NULL); } @@ -193,20 +227,54 @@ sonewconn(struct socket *head, int connstatus) sigio_init(&so->so_sigio); sigio_copy(&so->so_sigio, &head->so_sigio); - soqinsque(head, so, soqueue); - if ((*so->so_proto->pr_attach)(so, 0)) { - (void) soqremque(so, soqueue); + soqinsque(head, so, 0); + + /* + * We need to unlock `head' because PCB layer could release + * solock() to enforce desired lock order. + */ + if (persocket) { + head->so_newconn++; + sounlock(head); + } + + error = (*so->so_proto->pr_attach)(so, 0); + + if (persocket) { + sounlock(so); + solock(head); + solock(so); + + if ((head->so_newconn--) == 0) { + if ((head->so_state & SS_NEWCONN_WAIT) != 0) { + head->so_state &= ~SS_NEWCONN_WAIT; + wakeup(&head->so_newconn); + } + } + } + + if (error) { + soqremque(so, 0); + if (persocket) + sounlock(so); sigio_free(&so->so_sigio); klist_free(&so->so_rcv.sb_sel.si_note); klist_free(&so->so_snd.sb_sel.si_note); pool_put(&socket_pool, so); return (NULL); } + if (connstatus) { + so->so_state |= connstatus; + soqremque(so, 0); + soqinsque(head, so, 1); sorwakeup(head); wakeup(&head->so_timeo); - so->so_state |= connstatus; } + + if (persocket) + sounlock(so); + return (so); } @@ -214,6 +282,7 @@ void soqinsque(struct socket *head, struct socket *so, int q) { soassertlocked(head); + soassertlocked(so); KASSERT(so->so_onq == NULL); @@ -233,6 +302,7 @@ soqremque(struct socket *so, int q) { struct socket *head = so->so_head; + soassertlocked(so); soassertlocked(head); if (q == 0) { @@ -284,15 +354,40 @@ solock(struct socket *so) case PF_INET6: NET_LOCK(); break; - case PF_UNIX: - rw_enter_write(&unp_lock); - break; default: rw_enter_write(&so->so_lock); break; } } +int +solock_persocket(struct socket *so) +{ + switch (so->so_proto->pr_domain->dom_family) { + case PF_INET: + case PF_INET6: + return 0; + default: + return 1; + } +} + +void +solock_pair(struct socket *so1, struct socket *so2) +{ + KASSERT(so1 != so2); + KASSERT(so1->so_type == so2->so_type); + KASSERT(solock_persocket(so1)); + + if (so1 < so2) { + solock(so1); + solock(so2); + } else { + solock(so2); + solock(so1); + } +} + void sounlock(struct socket *so) { @@ -301,9 +396,6 @@ sounlock(struct socket *so) case PF_INET6: NET_UNLOCK(); break; - case PF_UNIX: - rw_exit_write(&unp_lock); - break; default: rw_exit_write(&so->so_lock); break; @@ -318,9 +410,6 @@ soassertlocked(struct socket *so) case PF_INET6: NET_ASSERT_LOCKED(); break; - case PF_UNIX: - rw_assert_wrlock(&unp_lock); - break; default: rw_assert_wrlock(&so->so_lock); break; @@ -338,9 +427,6 @@ sosleep_nsec(struct socket *so, void *ident, int prio, const char *wmesg, case PF_INET6: ret = rwsleep_nsec(ident, &netlock, prio, wmesg, nsecs); break; - case PF_UNIX: - ret = rwsleep_nsec(ident, &unp_lock, prio, wmesg, nsecs); - break; default: ret = rwsleep_nsec(ident, &so->so_lock, prio, wmesg, nsecs); break; diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index a2590014ae9..c99ee6116be 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_syscalls.c,v 1.195 2022/06/06 14:45:41 claudio Exp $ */ +/* $OpenBSD: uipc_syscalls.c,v 1.196 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: uipc_syscalls.c,v 1.19 1996/02/09 19:00:48 christos Exp $ */ /* @@ -246,7 +246,7 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, socklen_t namelen; int error, tmpfd; struct socket *head, *so; - int cloexec, nflag; + int cloexec, nflag, persocket; cloexec = (flags & SOCK_CLOEXEC) ? UF_EXCLOSE : 0; @@ -269,16 +269,19 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, head = headfp->f_data; solock(head); + + persocket = solock_persocket(head); + if (isdnssocket(head) || (head->so_options & SO_ACCEPTCONN) == 0) { error = EINVAL; - goto out; + goto out_unlock; } if ((headfp->f_flag & FNONBLOCK) && head->so_qlen == 0) { if (head->so_state & SS_CANTRCVMORE) error = ECONNABORTED; else error = EWOULDBLOCK; - goto out; + goto out_unlock; } while (head->so_qlen == 0 && head->so_error == 0) { if (head->so_state & SS_CANTRCVMORE) { @@ -288,18 +291,22 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, error = sosleep_nsec(head, &head->so_timeo, PSOCK | PCATCH, "netcon", INFSLP); if (error) - goto out; + goto out_unlock; } if (head->so_error) { error = head->so_error; head->so_error = 0; - goto out; + goto out_unlock; } /* * Do not sleep after we have taken the socket out of the queue. */ so = TAILQ_FIRST(&head->so_q); + + if (persocket) + solock(so); + if (soqremque(so, 1) == 0) panic("accept"); @@ -310,30 +317,52 @@ doaccept(struct proc *p, int sock, struct sockaddr *name, socklen_t *anamelen, /* connection has been removed from the listen queue */ KNOTE(&head->so_rcv.sb_sel.si_note, 0); + if (persocket) + sounlock(head); + fp->f_type = DTYPE_SOCKET; fp->f_flag = FREAD | FWRITE | nflag; fp->f_ops = &socketops; fp->f_data = so; + error = soaccept(so, nam); -out: - sounlock(head); - if (!error && name != NULL) + + if (persocket) + sounlock(so); + else + sounlock(head); + + if (error) + goto out; + + if (name != NULL) { error = copyaddrout(p, nam, name, namelen, anamelen); - if (!error) { - fdplock(fdp); - fdinsert(fdp, tmpfd, cloexec, fp); - fdpunlock(fdp); - FRELE(fp, p); - *retval = tmpfd; - } else { - fdplock(fdp); - fdremove(fdp, tmpfd); - fdpunlock(fdp); - closef(fp, p); + if (error) + goto out; } + fdplock(fdp); + fdinsert(fdp, tmpfd, cloexec, fp); + fdpunlock(fdp); + FRELE(fp, p); + *retval = tmpfd; + m_freem(nam); FRELE(headfp, p); + + return 0; + +out_unlock: + sounlock(head); +out: + fdplock(fdp); + fdremove(fdp, tmpfd); + fdpunlock(fdp); + closef(fp, p); + + m_freem(nam); + FRELE(headfp, p); + return (error); } diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 372a473a75c..0710393d376 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.165 2022/06/06 14:45:41 claudio Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.166 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -55,6 +55,7 @@ #include #include #include +#include #include "kcov.h" #if NKCOV > 0 @@ -66,9 +67,10 @@ * I immutable after creation * D unp_df_lock * G unp_gc_lock - * U unp_lock + * M unp_ino_mtx * R unp_rights_mtx * a atomic + * s socket lock */ struct rwlock unp_lock = RWLOCK_INITIALIZER("unplock"); @@ -76,6 +78,7 @@ struct rwlock unp_df_lock = RWLOCK_INITIALIZER("unpdflk"); struct rwlock unp_gc_lock = RWLOCK_INITIALIZER("unpgclk"); struct mutex unp_rights_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); +struct mutex unp_ino_mtx = MUTEX_INITIALIZER(IPL_SOFTNET); /* * Stack of sets of files that were passed over a socket but were @@ -94,6 +97,9 @@ void unp_remove_gcrefs(struct fdpass *, int); void unp_restore_gcrefs(struct fdpass *, int); void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); +static inline void unp_ref(struct unpcb *); +static inline void unp_rele(struct unpcb *); +struct socket *unp_solock_peer(struct socket *); struct pool unpcb_pool; struct task unp_gc_task = TASK_INITIALIZER(unp_gc, NULL); @@ -127,6 +133,53 @@ unp_init(void) IPL_SOFTNET, 0, "unpcb", NULL); } +static inline void +unp_ref(struct unpcb *unp) +{ + refcnt_take(&unp->unp_refcnt); +} + +static inline void +unp_rele(struct unpcb *unp) +{ + refcnt_rele_wake(&unp->unp_refcnt); +} + +struct socket * +unp_solock_peer(struct socket *so) +{ + struct unpcb *unp, *unp2; + struct socket *so2; + + unp = so->so_pcb; + +again: + if ((unp2 = unp->unp_conn) == NULL) + return NULL; + + so2 = unp2->unp_socket; + + if (so < so2) + solock(so2); + else if (so > so2){ + unp_ref(unp2); + sounlock(so); + solock(so2); + solock(so); + + /* Datagram socket could be reconnected due to re-lock. */ + if (unp->unp_conn != unp2) { + sounlock(so2); + unp_rele(unp2); + goto again; + } + + unp_rele(unp2); + } + + return so2; +} + void uipc_setaddr(const struct unpcb *unp, struct mbuf *nam) { @@ -201,7 +254,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, * if it was bound and we are still connected * (our peer may have closed already!). */ + so2 = unp_solock_peer(so); uipc_setaddr(unp->unp_conn, nam); + if (so2 != NULL && so2 != so) + sounlock(so2); break; case PRU_SHUTDOWN: @@ -218,9 +274,8 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, case SOCK_STREAM: case SOCK_SEQPACKET: - if (unp->unp_conn == NULL) + if ((so2 = unp_solock_peer(so)) == NULL) break; - so2 = unp->unp_conn->unp_socket; /* * Adjust backpressure on sender * and wakeup any waiting to write. @@ -228,6 +283,7 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, so2->so_snd.sb_mbcnt = so->so_rcv.sb_mbcnt; so2->so_snd.sb_cc = so->so_rcv.sb_cc; sowwakeup(so2); + sounlock(so2); break; default: @@ -256,13 +312,16 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, error = unp_connect(so, nam, p); if (error) break; - } else { - if (unp->unp_conn == NULL) { + } + + if ((so2 = unp_solock_peer(so)) == NULL) { + if (nam != NULL) + error = ECONNREFUSED; + else error = ENOTCONN; - break; - } + break; } - so2 = unp->unp_conn->unp_socket; + if (unp->unp_addr) from = mtod(unp->unp_addr, struct sockaddr *); else @@ -273,6 +332,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, control = NULL; } else error = ENOBUFS; + + if (so2 != so) + sounlock(so2); + if (nam) unp_disconnect(unp); break; @@ -284,11 +347,11 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, error = EPIPE; break; } - if (unp->unp_conn == NULL) { + if ((so2 = unp_solock_peer(so)) == NULL) { error = ENOTCONN; break; } - so2 = unp->unp_conn->unp_socket; + /* * Send to paired receive port, and then raise * send buffer counts to maintain backpressure. @@ -310,6 +373,8 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, so->so_snd.sb_cc = so2->so_rcv.sb_cc; if (so2->so_rcv.sb_cc > 0) sorwakeup(so2); + + sounlock(so2); m = NULL; break; @@ -323,12 +388,7 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, case PRU_ABORT: unp_detach(unp); - /* - * As long as `unp_lock' is taken before entering - * uipc_usrreq() releasing it here would lead to a - * double unlock. - */ - sofree(so, 1); + sofree(so, 0); break; case PRU_SENSE: { @@ -336,8 +396,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, sb->st_blksize = so->so_snd.sb_hiwat; sb->st_dev = NODEV; + mtx_enter(&unp_ino_mtx); if (unp->unp_ino == 0) unp->unp_ino = unp_ino++; + mtx_leave(&unp_ino_mtx); sb->st_atim.tv_sec = sb->st_mtim.tv_sec = sb->st_ctim.tv_sec = unp->unp_ctime.tv_sec; @@ -358,7 +420,10 @@ uipc_usrreq(struct socket *so, int req, struct mbuf *m, struct mbuf *nam, break; case PRU_PEERADDR: + so2 = unp_solock_peer(so); uipc_setaddr(unp->unp_conn, nam); + if (so2 != NULL && so2 != so) + sounlock(so2); break; case PRU_SLOWTIMO: @@ -410,8 +475,6 @@ uipc_attach(struct socket *so, int proto) struct unpcb *unp; int error; - rw_assert_wrlock(&unp_lock); - if (so->so_pcb) return EISCONN; if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) { @@ -438,6 +501,7 @@ uipc_attach(struct socket *so, int proto) unp = pool_get(&unpcb_pool, PR_NOWAIT|PR_ZERO); if (unp == NULL) return (ENOBUFS); + refcnt_init(&unp->unp_refcnt); unp->unp_socket = so; so->so_pcb = unp; getnanotime(&unp->unp_ctime); @@ -445,12 +509,6 @@ uipc_attach(struct socket *so, int proto) /* * Enforce `unp_gc_lock' -> `solock()' lock order. */ - /* - * We also release the lock on listening socket and on our peer - * socket when called from unp_connect(). This is safe. The - * listening socket protected by vnode(9) lock. The peer socket - * has 'UNP_CONNECTING' flag set. - */ sounlock(so); rw_enter_write(&unp_gc_lock); LIST_INSERT_HEAD(&unp_head, unp, unp_link); @@ -512,14 +570,13 @@ unp_detach(struct unpcb *unp) { struct socket *so = unp->unp_socket; struct vnode *vp = unp->unp_vnode; - - rw_assert_wrlock(&unp_lock); + struct unpcb *unp2; unp->unp_vnode = NULL; /* * Enforce `unp_gc_lock' -> `solock()' lock order. - * Enforce `i_lock' -> `unp_lock' lock order. + * Enforce `i_lock' -> `solock()' lock order. */ sounlock(so); @@ -538,10 +595,47 @@ unp_detach(struct unpcb *unp) solock(so); - if (unp->unp_conn) + if (unp->unp_conn != NULL) { + /* + * Datagram socket could be connected to itself. + * Such socket will be disconnected here. + */ unp_disconnect(unp); - while (!SLIST_EMPTY(&unp->unp_refs)) - unp_drop(SLIST_FIRST(&unp->unp_refs), ECONNRESET); + } + + while ((unp2 = SLIST_FIRST(&unp->unp_refs)) != NULL) { + struct socket *so2 = unp2->unp_socket; + + if (so < so2) + solock(so2); + else { + unp_ref(unp2); + sounlock(so); + solock(so2); + solock(so); + + if (unp2->unp_conn != unp) { + /* `unp2' was disconnected due to re-lock. */ + sounlock(so2); + unp_rele(unp2); + continue; + } + + unp_rele(unp2); + } + + unp2->unp_conn = NULL; + SLIST_REMOVE(&unp->unp_refs, unp2, unpcb, unp_nextref); + so2->so_error = ECONNRESET; + so2->so_state &= ~SS_ISCONNECTED; + + sounlock(so2); + } + + sounlock(so); + refcnt_finalize(&unp->unp_refcnt, "unpfinal"); + solock(so); + soisdisconnected(so); so->so_pcb = NULL; m_freem(unp->unp_addr); @@ -681,24 +775,42 @@ unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) } if ((error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p)) != 0) goto put; - solock(so); so2 = vp->v_socket; if (so2 == NULL) { error = ECONNREFUSED; - goto put_locked; + goto put; } if (so->so_type != so2->so_type) { error = EPROTOTYPE; - goto put_locked; + goto put; } + if (so->so_proto->pr_flags & PR_CONNREQUIRED) { + solock(so2); + if ((so2->so_options & SO_ACCEPTCONN) == 0 || (so3 = sonewconn(so2, 0)) == NULL) { error = ECONNREFUSED; - goto put_locked; } + + sounlock(so2); + + if (error != 0) + goto put; + + /* + * Since `so2' is protected by vnode(9) lock, `so3' + * can't be PRU_ABORT'ed here. + */ + solock_pair(so, so3); + unp2 = sotounpcb(so2); unp3 = sotounpcb(so3); + + /* + * `unp_addr', `unp_connid' and 'UNP_FEIDSBIND' flag + * are immutable since we set them in unp_bind(). + */ if (unp2->unp_addr) unp3->unp_addr = m_copym(unp2->unp_addr, 0, M_COPYALL, M_NOWAIT); @@ -706,15 +818,29 @@ unp_connect(struct socket *so, struct mbuf *nam, struct proc *p) unp3->unp_connid.gid = p->p_ucred->cr_gid; unp3->unp_connid.pid = p->p_p->ps_pid; unp3->unp_flags |= UNP_FEIDS; - so2 = so3; + if (unp2->unp_flags & UNP_FEIDSBIND) { unp->unp_connid = unp2->unp_connid; unp->unp_flags |= UNP_FEIDS; } + + so2 = so3; + } else { + if (so2 != so) + solock_pair(so, so2); + else + solock(so); } + error = unp_connect2(so, so2); -put_locked: + sounlock(so); + + /* + * `so2' can't be PRU_ABORT'ed concurrently + */ + if (so2 != so) + sounlock(so2); put: vput(vp); unlock: @@ -738,7 +864,8 @@ unp_connect2(struct socket *so, struct socket *so2) struct unpcb *unp = sotounpcb(so); struct unpcb *unp2; - rw_assert_wrlock(&unp_lock); + soassertlocked(so); + soassertlocked(so2); if (so2->so_type != so->so_type) return (EPROTOTYPE); @@ -767,11 +894,15 @@ unp_connect2(struct socket *so, struct socket *so2) void unp_disconnect(struct unpcb *unp) { - struct unpcb *unp2 = unp->unp_conn; + struct socket *so2; + struct unpcb *unp2; - if (unp2 == NULL) + if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL) return; + + unp2 = unp->unp_conn; unp->unp_conn = NULL; + switch (unp->unp_socket->so_type) { case SOCK_DGRAM: @@ -790,35 +921,31 @@ unp_disconnect(struct unpcb *unp) soisdisconnected(unp2->unp_socket); break; } + + if (so2 != unp->unp_socket) + sounlock(so2); } void unp_shutdown(struct unpcb *unp) { - struct socket *so; + struct socket *so2; switch (unp->unp_socket->so_type) { case SOCK_STREAM: case SOCK_SEQPACKET: - if (unp->unp_conn && (so = unp->unp_conn->unp_socket)) - socantrcvmore(so); + if ((so2 = unp_solock_peer(unp->unp_socket)) == NULL) + break; + + socantrcvmore(so2); + sounlock(so2); + break; default: break; } } -void -unp_drop(struct unpcb *unp, int errno) -{ - struct socket *so = unp->unp_socket; - - rw_assert_wrlock(&unp_lock); - - so->so_error = errno; - unp_disconnect(unp); -} - #ifdef notdef unp_drain(void) { diff --git a/sys/miscfs/fifofs/fifo_vnops.c b/sys/miscfs/fifofs/fifo_vnops.c index ff62072ccdb..d4f5df4d2c7 100644 --- a/sys/miscfs/fifofs/fifo_vnops.c +++ b/sys/miscfs/fifofs/fifo_vnops.c @@ -1,4 +1,4 @@ -/* $OpenBSD: fifo_vnops.c,v 1.95 2022/06/26 05:20:42 visa Exp $ */ +/* $OpenBSD: fifo_vnops.c,v 1.96 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: fifo_vnops.c,v 1.18 1996/03/16 23:52:42 christos Exp $ */ /* @@ -176,15 +176,17 @@ fifo_open(void *v) solock(wso); wso->so_state |= SS_CANTSENDMORE; wso->so_snd.sb_lowat = PIPE_BUF; + sounlock(wso); } else { rso = fip->fi_readsock; wso = fip->fi_writesock; - solock(wso); } if (ap->a_mode & FREAD) { fip->fi_readers++; if (fip->fi_readers == 1) { + solock(wso); wso->so_state &= ~SS_CANTSENDMORE; + sounlock(wso); if (fip->fi_writers > 0) wakeup(&fip->fi_writers); } @@ -193,16 +195,16 @@ fifo_open(void *v) fip->fi_writers++; if ((ap->a_mode & O_NONBLOCK) && fip->fi_readers == 0) { error = ENXIO; - sounlock(wso); goto bad; } if (fip->fi_writers == 1) { + solock(rso); rso->so_state &= ~(SS_CANTRCVMORE|SS_ISDISCONNECTED); + sounlock(rso); if (fip->fi_readers > 0) wakeup(&fip->fi_readers); } } - sounlock(wso); if ((ap->a_mode & O_NONBLOCK) == 0) { if ((ap->a_mode & FREAD) && fip->fi_writers == 0) { VOP_UNLOCK(vp); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 7e899aee63b..f82c1b0ac11 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: socketvar.h,v 1.104 2022/06/26 05:20:42 visa Exp $ */ +/* $OpenBSD: socketvar.h,v 1.105 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: socketvar.h,v 1.18 1996/02/09 18:25:38 christos Exp $ */ /*- @@ -38,6 +38,7 @@ #include #include #include +#include #ifndef _SOCKLEN_T_DEFINED_ #define _SOCKLEN_T_DEFINED_ @@ -55,6 +56,7 @@ TAILQ_HEAD(soqhead, socket); struct socket { const struct protosw *so_proto; /* protocol handle */ struct rwlock so_lock; /* this socket lock */ + struct refcnt so_refcnt; /* references to this socket */ void *so_pcb; /* protocol control block */ u_int so_state; /* internal state flags SS_*, below */ short so_type; /* generic type, see socket.h */ @@ -80,6 +82,7 @@ struct socket { short so_q0len; /* partials on so_q0 */ short so_qlen; /* number of connections on so_q */ short so_qlimit; /* max number queued connections */ + u_long so_newconn; /* # of pending sonewconn() threads */ short so_timeo; /* connection timeout */ u_long so_oobmark; /* chars to oob mark */ u_int so_error; /* error affecting connection */ @@ -149,6 +152,7 @@ struct socket { #define SS_CONNECTOUT 0x1000 /* connect, not accept, at this end */ #define SS_ISSENDING 0x2000 /* hint for lower layer */ #define SS_DNS 0x4000 /* created using SOCK_DNS socket(2) */ +#define SS_NEWCONN_WAIT 0x8000 /* waiting sonewconn() relock */ #ifdef _KERNEL @@ -156,6 +160,18 @@ struct socket { void soassertlocked(struct socket *); +static inline void +soref(struct socket *so) +{ + refcnt_take(&so->so_refcnt); +} + +static inline void +sorele(struct socket *so) +{ + refcnt_rele_wake(&so->so_refcnt); +} + /* * Macros for sockets and socket buffering. */ @@ -329,6 +345,8 @@ int sockargs(struct mbuf **, const void *, size_t, int); int sosleep_nsec(struct socket *, void *, int, const char *, uint64_t); void solock(struct socket *); +int solock_persocket(struct socket *); +void solock_pair(struct socket *, struct socket *); void sounlock(struct socket *); int sendit(struct proc *, int, struct msghdr *, int, register_t *); diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index b3641bde092..54d083b6103 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: unpcb.h,v 1.25 2022/02/25 23:51:04 guenther Exp $ */ +/* $OpenBSD: unpcb.h,v 1.26 2022/07/01 09:56:17 mvs Exp $ */ /* $NetBSD: unpcb.h,v 1.6 1994/06/29 06:46:08 cgd Exp $ */ /* @@ -32,6 +32,8 @@ * @(#)unpcb.h 8.1 (Berkeley) 6/2/93 */ +#include + /* * Protocol control block for an active * instance of a UNIX internal protocol. @@ -60,24 +62,26 @@ * Locks used to protect struct members: * I immutable after creation * G unp_gc_lock - * U unp_lock + * s socket lock */ struct unpcb { + struct refcnt unp_refcnt; /* references to this pcb */ struct socket *unp_socket; /* [I] pointer back to socket */ - struct vnode *unp_vnode; /* [U] if associated with file */ + struct vnode *unp_vnode; /* [s] if associated with file */ struct file *unp_file; /* [G] backpointer for unp_gc() */ - struct unpcb *unp_conn; /* [U] control block of connected socket */ - ino_t unp_ino; /* [U] fake inode number */ - SLIST_HEAD(,unpcb) unp_refs; /* [U] referencing socket linked list */ - SLIST_ENTRY(unpcb) unp_nextref; /* [U] link in unp_refs list */ - struct mbuf *unp_addr; /* [U] bound address of socket */ + struct unpcb *unp_conn; /* [s] control block of connected + socket */ + ino_t unp_ino; /* [s] fake inode number */ + SLIST_HEAD(,unpcb) unp_refs; /* [s] referencing socket linked list */ + SLIST_ENTRY(unpcb) unp_nextref; /* [s] link in unp_refs list */ + struct mbuf *unp_addr; /* [s] bound address of socket */ long unp_msgcount; /* [G] references from socket rcv buf */ long unp_gcrefs; /* [G] references from gc */ - int unp_flags; /* [U] this unpcb contains peer eids */ + int unp_flags; /* [s] this unpcb contains peer eids */ int unp_gcflags; /* [G] garbage collector flags */ - struct sockpeercred unp_connid;/* [U] id of peer process */ + struct sockpeercred unp_connid;/* [s] id of peer process */ struct timespec unp_ctime; /* [I] holds creation time */ LIST_ENTRY(unpcb) unp_link; /* [G] link in per-AF list of sockets */ }; @@ -114,7 +118,6 @@ int unp_connect(struct socket *, struct mbuf *, struct proc *); int unp_connect2(struct socket *, struct socket *); void unp_detach(struct unpcb *); void unp_disconnect(struct unpcb *); -void unp_drop(struct unpcb *, int); void unp_gc(void *); void unp_shutdown(struct unpcb *); int unp_externalize(struct mbuf *, socklen_t, int);