From e82b5ebce50ce92a513ef50cef3ae51dcab7c4a0 Mon Sep 17 00:00:00 2001 From: mvs Date: Sun, 26 Dec 2021 23:41:41 +0000 Subject: [PATCH] Rework garbage collector for unix(4) sockets. This time unix(4) sockets garbage collector always destroys any socket with positive "fp->f_count == unp->unp_msgcount" equation. This is wrong because unix(4) sockets within SCM_RIGHTS message but closed on sender side also have this equation positive. Such sockets are not in the loop, and if garbage collector kill them before they are received, we get kernel panic. FreeBSD already has garbage collector reworked to fix this issue [1]. The logic is pretty simple so import it to our garbage collector. 1. https://reviews.freebsd.org/D23142 ok bluhm@ --- sys/kern/uipc_usrreq.c | 165 ++++++++++++++++++++++++----------------- sys/sys/unpcb.h | 7 +- 2 files changed, 99 insertions(+), 73 deletions(-) diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 7cb3cc39494..160ff15738d 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uipc_usrreq.c,v 1.159 2021/12/07 01:19:47 mvs Exp $ */ +/* $OpenBSD: uipc_usrreq.c,v 1.160 2021/12/26 23:41:41 mvs Exp $ */ /* $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $ */ /* @@ -85,7 +85,8 @@ struct unp_deferral { void uipc_setaddr(const struct unpcb *, struct mbuf *); void unp_discard(struct fdpass *, int); -void unp_mark(struct fdpass *, int); +void unp_remove_gcrefs(struct fdpass *, int); +void unp_restore_gcrefs(struct fdpass *, int); void unp_scan(struct mbuf *, void (*)(struct fdpass *, int)); int unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *); @@ -1162,61 +1163,71 @@ unp_gc(void *arg __unused) } rw_exit_write(&unp_df_lock); + nunref = 0; + rw_enter_write(&unp_gc_lock); - unp_defer = 0; - LIST_FOREACH(unp, &unp_head, unp_link) + + /* + * Determine sockets which may be prospectively dead. Such + * sockets have their `unp_msgcount' equal to the `f_count'. + * If `unp_msgcount' is 0, the socket has not been passed + * and can't be unreferenced. + */ + LIST_FOREACH(unp, &unp_head, unp_link) { unp->unp_gcflags = 0; + + if (unp->unp_msgcount == 0) + continue; + if ((fp = unp->unp_file) == NULL) + continue; + if (fp->f_count == unp->unp_msgcount) { + unp->unp_gcflags |= UNP_GCDEAD; + unp->unp_gcrefs = unp->unp_msgcount; + nunref++; + } + } + + /* + * Scan all sockets previously marked as dead. Remove + * the `unp_gcrefs' reference each socket holds on any + * dead socket in its buffer. + */ + LIST_FOREACH(unp, &unp_head, unp_link) { + if ((unp->unp_gcflags & UNP_GCDEAD) == 0) + continue; + so = unp->unp_socket; + solock(so); + unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs); + sounlock(so, SL_LOCKED); + } + + /* + * If the dead socket has `unp_gcrefs' reference counter + * greater than 0, it can't be unreferenced. Mark it as + * alive and increment the `unp_gcrefs' reference for each + * dead socket within its buffer. Repeat this until we + * have no new alive sockets found. + */ do { - nunref = 0; + unp_defer = 0; + LIST_FOREACH(unp, &unp_head, unp_link) { - fp = unp->unp_file; - if (unp->unp_gcflags & UNP_GCDEFER) { - /* - * This socket is referenced by another - * socket which is known to be live, - * so it's certainly live. - */ - unp->unp_gcflags &= ~UNP_GCDEFER; - unp_defer--; - } else if (unp->unp_gcflags & UNP_GCMARK) { - /* marked as live in previous pass */ + if ((unp->unp_gcflags & UNP_GCDEAD) == 0) + continue; + if (unp->unp_gcrefs == 0) continue; - } else if (fp == NULL) { - /* not being passed, so can't be in loop */ - } else if (fp->f_count == 0) { - /* - * Already being closed, let normal close - * path take its course - */ - } else { - /* - * Unreferenced by other sockets so far, - * so if all the references (f_count) are - * from passing (unp_msgcount) then this - * socket is prospectively dead - */ - if (fp->f_count == unp->unp_msgcount) { - nunref++; - unp->unp_gcflags |= UNP_GCDEAD; - continue; - } - } - /* - * This is the first time we've seen this socket on - * the mark pass and known it has a live reference, - * so mark it, then scan its receive buffer for - * sockets and note them as deferred (== referenced, - * but not yet marked). - */ - unp->unp_gcflags |= UNP_GCMARK; + unp->unp_gcflags &= ~UNP_GCDEAD; so = unp->unp_socket; solock(so); - unp_scan(so->so_rcv.sb_mb, unp_mark); + unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs); sounlock(so, SL_LOCKED); + + KASSERT(nunref > 0); + nunref--; } - } while (unp_defer); + } while (unp_defer > 0); /* * If there are any unreferenced sockets, then for each dispose @@ -1238,6 +1249,7 @@ unp_gc(void *arg __unused) } } } + unp_gcing = 0; unlock: rw_exit_write(&unp_gc_lock); @@ -1281,7 +1293,25 @@ unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int)) } void -unp_mark(struct fdpass *rp, int nfds) +unp_discard(struct fdpass *rp, int nfds) +{ + struct unp_deferral *defer; + + /* copy the file pointers to a deferral structure */ + defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); + defer->ud_n = nfds; + memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); + memset(rp, 0, sizeof(*rp) * nfds); + + rw_enter_write(&unp_df_lock); + SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); + rw_exit_write(&unp_df_lock); + + task_add(systqmp, &unp_gc_task); +} + +void +unp_remove_gcrefs(struct fdpass *rp, int nfds) { struct unpcb *unp; int i; @@ -1291,36 +1321,33 @@ unp_mark(struct fdpass *rp, int nfds) for (i = 0; i < nfds; i++) { if (rp[i].fp == NULL) continue; - - unp = fptounp(rp[i].fp); - if (unp == NULL) - continue; - - if (unp->unp_gcflags & (UNP_GCMARK|UNP_GCDEFER)) + if ((unp = fptounp(rp[i].fp)) == NULL) continue; - - unp_defer++; - unp->unp_gcflags |= UNP_GCDEFER; - unp->unp_gcflags &= ~UNP_GCDEAD; + if (unp->unp_gcflags & UNP_GCDEAD) { + KASSERT(unp->unp_gcrefs > 0); + unp->unp_gcrefs--; + } } } void -unp_discard(struct fdpass *rp, int nfds) +unp_restore_gcrefs(struct fdpass *rp, int nfds) { - struct unp_deferral *defer; - - /* copy the file pointers to a deferral structure */ - defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK); - defer->ud_n = nfds; - memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds); - memset(rp, 0, sizeof(*rp) * nfds); + struct unpcb *unp; + int i; - rw_enter_write(&unp_df_lock); - SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link); - rw_exit_write(&unp_df_lock); + rw_assert_wrlock(&unp_gc_lock); - task_add(systqmp, &unp_gc_task); + for (i = 0; i < nfds; i++) { + if (rp[i].fp == NULL) + continue; + if ((unp = fptounp(rp[i].fp)) == NULL) + continue; + if (unp->unp_gcflags & UNP_GCDEAD) { + unp->unp_gcrefs++; + unp_defer++; + } + } } int diff --git a/sys/sys/unpcb.h b/sys/sys/unpcb.h index 5def517fa08..27aa0ba8f48 100644 --- a/sys/sys/unpcb.h +++ b/sys/sys/unpcb.h @@ -1,4 +1,4 @@ -/* $OpenBSD: unpcb.h,v 1.21 2021/12/07 01:19:47 mvs Exp $ */ +/* $OpenBSD: unpcb.h,v 1.22 2021/12/26 23:41:41 mvs Exp $ */ /* $NetBSD: unpcb.h,v 1.6 1994/06/29 06:46:08 cgd Exp $ */ /* @@ -74,6 +74,7 @@ struct unpcb { SLIST_ENTRY(unpcb) unp_nextref; /* [U] link in unp_refs list */ struct mbuf *unp_addr; /* [U] bound address of socket */ long unp_msgcount; /* [G] references from socket rcv buf */ + long unp_gcrefs; /* [G] references from gc */ int unp_flags; /* [U] this unpcb contains peer eids */ int unp_gcflags; /* [G] garbge collector flags */ struct sockpeercred unp_connid;/* [U] id of peer process */ @@ -92,9 +93,7 @@ struct unpcb { /* * flag bits in unp_gcflags */ -#define UNP_GCMARK 0x01 /* mark during unp_gc() */ -#define UNP_GCDEFER 0x02 /* ref'd, but not marked in this pass */ -#define UNP_GCDEAD 0x04 /* unref'd in this pass */ +#define UNP_GCDEAD 0x01 /* unp could be dead */ #define sotounpcb(so) ((struct unpcb *)((so)->so_pcb)) -- 2.20.1