Rework garbage collector for unix(4) sockets.
authormvs <mvs@openbsd.org>
Sun, 26 Dec 2021 23:41:41 +0000 (23:41 +0000)
committermvs <mvs@openbsd.org>
Sun, 26 Dec 2021 23:41:41 +0000 (23:41 +0000)
This time unix(4) sockets garbage collector always destroys any socket
with positive "fp->f_count == unp->unp_msgcount" equation. This is wrong
because unix(4) sockets within SCM_RIGHTS message but closed on sender
side also have this equation positive. Such sockets are not in the loop,
and if garbage collector kill them before they are received, we get
kernel panic.

FreeBSD already has garbage collector reworked to fix this issue [1]. The
logic is pretty simple so import it to our garbage collector.

1. https://reviews.freebsd.org/D23142

ok bluhm@

sys/kern/uipc_usrreq.c
sys/sys/unpcb.h

index 7cb3cc3..160ff15 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uipc_usrreq.c,v 1.159 2021/12/07 01:19:47 mvs Exp $   */
+/*     $OpenBSD: uipc_usrreq.c,v 1.160 2021/12/26 23:41:41 mvs Exp $   */
 /*     $NetBSD: uipc_usrreq.c,v 1.18 1996/02/09 19:00:50 christos Exp $        */
 
 /*
@@ -85,7 +85,8 @@ struct        unp_deferral {
 
 void   uipc_setaddr(const struct unpcb *, struct mbuf *);
 void   unp_discard(struct fdpass *, int);
-void   unp_mark(struct fdpass *, int);
+void   unp_remove_gcrefs(struct fdpass *, int);
+void   unp_restore_gcrefs(struct fdpass *, int);
 void   unp_scan(struct mbuf *, void (*)(struct fdpass *, int));
 int    unp_nam2sun(struct mbuf *, struct sockaddr_un **, size_t *);
 
@@ -1162,61 +1163,71 @@ unp_gc(void *arg __unused)
        }
        rw_exit_write(&unp_df_lock);
 
+       nunref = 0;
+
        rw_enter_write(&unp_gc_lock);
-       unp_defer = 0;
-       LIST_FOREACH(unp, &unp_head, unp_link)
+
+       /*
+        * Determine sockets which may be prospectively dead. Such
+        * sockets have their `unp_msgcount' equal to the `f_count'.
+        * If `unp_msgcount' is 0, the socket has not been passed
+        * and can't be unreferenced.
+        */
+       LIST_FOREACH(unp, &unp_head, unp_link) {
                unp->unp_gcflags = 0;
+
+               if (unp->unp_msgcount == 0)
+                       continue;
+               if ((fp = unp->unp_file) == NULL)
+                       continue;
+               if (fp->f_count == unp->unp_msgcount) {
+                       unp->unp_gcflags |= UNP_GCDEAD;
+                       unp->unp_gcrefs = unp->unp_msgcount;
+                       nunref++;
+               }
+       }
+
+       /*
+        * Scan all sockets previously marked as dead. Remove
+        * the `unp_gcrefs' reference each socket holds on any
+        * dead socket in its buffer.
+        */
+       LIST_FOREACH(unp, &unp_head, unp_link) {
+               if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
+                       continue;
+               so = unp->unp_socket;
+               solock(so);
+               unp_scan(so->so_rcv.sb_mb, unp_remove_gcrefs);
+               sounlock(so, SL_LOCKED);
+       }
+
+       /*
+        * If the dead socket has `unp_gcrefs' reference counter
+        * greater than 0, it can't be unreferenced. Mark it as
+        * alive and increment the `unp_gcrefs' reference for each
+        * dead socket within its buffer. Repeat this until we
+        * have no new alive sockets found.
+        */
        do {
-               nunref = 0;
+               unp_defer = 0;
+
                LIST_FOREACH(unp, &unp_head, unp_link) {
-                       fp = unp->unp_file;
-                       if (unp->unp_gcflags & UNP_GCDEFER) {
-                               /*
-                                * This socket is referenced by another
-                                * socket which is known to be live,
-                                * so it's certainly live.
-                                */
-                               unp->unp_gcflags &= ~UNP_GCDEFER;
-                               unp_defer--;
-                       } else if (unp->unp_gcflags & UNP_GCMARK) {
-                               /* marked as live in previous pass */
+                       if ((unp->unp_gcflags & UNP_GCDEAD) == 0)
+                               continue;
+                       if (unp->unp_gcrefs == 0)
                                continue;
-                       } else if (fp == NULL) {
-                               /* not being passed, so can't be in loop */
-                       } else if (fp->f_count == 0) {
-                               /*
-                                * Already being closed, let normal close
-                                * path take its course
-                                */
-                       } else {
-                               /*
-                                * Unreferenced by other sockets so far,
-                                * so if all the references (f_count) are
-                                * from passing (unp_msgcount) then this
-                                * socket is prospectively dead
-                                */
-                               if (fp->f_count == unp->unp_msgcount) {
-                                       nunref++;
-                                       unp->unp_gcflags |= UNP_GCDEAD;
-                                       continue;
-                               }
-                       }
 
-                       /*
-                        * This is the first time we've seen this socket on
-                        * the mark pass and known it has a live reference,
-                        * so mark it, then scan its receive buffer for
-                        * sockets and note them as deferred (== referenced,
-                        * but not yet marked).
-                        */
-                       unp->unp_gcflags |= UNP_GCMARK;
+                       unp->unp_gcflags &= ~UNP_GCDEAD;
 
                        so = unp->unp_socket;
                        solock(so);
-                       unp_scan(so->so_rcv.sb_mb, unp_mark);
+                       unp_scan(so->so_rcv.sb_mb, unp_restore_gcrefs);
                        sounlock(so, SL_LOCKED);
+
+                       KASSERT(nunref > 0);
+                       nunref--;
                }
-       } while (unp_defer);
+       } while (unp_defer > 0);
 
        /*
         * If there are any unreferenced sockets, then for each dispose
@@ -1238,6 +1249,7 @@ unp_gc(void *arg __unused)
                        }
                }
        }
+
        unp_gcing = 0;
 unlock:
        rw_exit_write(&unp_gc_lock);
@@ -1281,7 +1293,25 @@ unp_scan(struct mbuf *m0, void (*op)(struct fdpass *, int))
 }
 
 void
-unp_mark(struct fdpass *rp, int nfds)
+unp_discard(struct fdpass *rp, int nfds)
+{
+       struct unp_deferral *defer;
+
+       /* copy the file pointers to a deferral structure */
+       defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
+       defer->ud_n = nfds;
+       memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
+       memset(rp, 0, sizeof(*rp) * nfds);
+
+       rw_enter_write(&unp_df_lock);
+       SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
+       rw_exit_write(&unp_df_lock);
+
+       task_add(systqmp, &unp_gc_task);
+}
+
+void
+unp_remove_gcrefs(struct fdpass *rp, int nfds)
 {
        struct unpcb *unp;
        int i;
@@ -1291,36 +1321,33 @@ unp_mark(struct fdpass *rp, int nfds)
        for (i = 0; i < nfds; i++) {
                if (rp[i].fp == NULL)
                        continue;
-
-               unp = fptounp(rp[i].fp);
-               if (unp == NULL)
-                       continue;
-
-               if (unp->unp_gcflags & (UNP_GCMARK|UNP_GCDEFER))
+               if ((unp = fptounp(rp[i].fp)) == NULL)
                        continue;
-
-               unp_defer++;
-               unp->unp_gcflags |= UNP_GCDEFER;
-               unp->unp_gcflags &= ~UNP_GCDEAD;
+               if (unp->unp_gcflags & UNP_GCDEAD) {
+                       KASSERT(unp->unp_gcrefs > 0);
+                       unp->unp_gcrefs--;
+               }
        }
 }
 
 void
-unp_discard(struct fdpass *rp, int nfds)
+unp_restore_gcrefs(struct fdpass *rp, int nfds)
 {
-       struct unp_deferral *defer;
-
-       /* copy the file pointers to a deferral structure */
-       defer = malloc(sizeof(*defer) + sizeof(*rp) * nfds, M_TEMP, M_WAITOK);
-       defer->ud_n = nfds;
-       memcpy(&defer->ud_fp[0], rp, sizeof(*rp) * nfds);
-       memset(rp, 0, sizeof(*rp) * nfds);
+       struct unpcb *unp;
+       int i;
 
-       rw_enter_write(&unp_df_lock);
-       SLIST_INSERT_HEAD(&unp_deferred, defer, ud_link);
-       rw_exit_write(&unp_df_lock);
+       rw_assert_wrlock(&unp_gc_lock);
 
-       task_add(systqmp, &unp_gc_task);
+       for (i = 0; i < nfds; i++) {
+               if (rp[i].fp == NULL)
+                       continue;
+               if ((unp = fptounp(rp[i].fp)) == NULL)
+                       continue;
+               if (unp->unp_gcflags & UNP_GCDEAD) {
+                       unp->unp_gcrefs++;
+                       unp_defer++;
+               }
+       }
 }
 
 int
index 5def517..27aa0ba 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: unpcb.h,v 1.21 2021/12/07 01:19:47 mvs Exp $  */
+/*     $OpenBSD: unpcb.h,v 1.22 2021/12/26 23:41:41 mvs Exp $  */
 /*     $NetBSD: unpcb.h,v 1.6 1994/06/29 06:46:08 cgd Exp $    */
 
 /*
@@ -74,6 +74,7 @@ struct        unpcb {
        SLIST_ENTRY(unpcb) unp_nextref; /* [U] link in unp_refs list */
        struct  mbuf *unp_addr;         /* [U] bound address of socket */
        long    unp_msgcount;           /* [G] references from socket rcv buf */
+       long    unp_gcrefs;             /* [G] references from gc */
        int     unp_flags;              /* [U] this unpcb contains peer eids */
        int     unp_gcflags;            /* [G] garbge collector flags */
        struct  sockpeercred unp_connid;/* [U] id of peer process */
@@ -92,9 +93,7 @@ struct        unpcb {
 /*
  * flag bits in unp_gcflags
  */
-#define UNP_GCMARK     0x01            /* mark during unp_gc() */
-#define UNP_GCDEFER    0x02            /* ref'd, but not marked in this pass */
-#define UNP_GCDEAD     0x04            /* unref'd in this pass */
+#define UNP_GCDEAD     0x01            /* unp could be dead */
 
 #define        sotounpcb(so)   ((struct unpcb *)((so)->so_pcb))