Our read/write lock implementation was not fair to writers. When
authorbluhm <bluhm@openbsd.org>
Tue, 10 May 2022 16:56:16 +0000 (16:56 +0000)
committerbluhm <bluhm@openbsd.org>
Tue, 10 May 2022 16:56:16 +0000 (16:56 +0000)
multiple IP forwarding threads were processing packets and holding
the shared net lock, the exclusive net lock was blocked permanently.
This could result in ping times well above 10 seconds.
Add the RWLOCK_WRWANT bit to the check mask of readers.  Then they
cannot grab the lock if a writer is also waiting.  This logic was
already present in revision 1.3, but got lost during refactoring.
When exiting the lock, there exists a race when the RWLOCK_WRWANT
bit gets deleted.  Add a comment that was present until revision
1.8 to document it.  The race itself is not easy to fix and had no
impact during testing.
OK sashan@

sys/kern/kern_rwlock.c

index d79b597..3edb80e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: kern_rwlock.c,v 1.47 2021/02/08 08:18:45 mpi Exp $    */
+/*     $OpenBSD: kern_rwlock.c,v 1.48 2022/05/10 16:56:16 bluhm Exp $  */
 
 /*
  * Copyright (c) 2002, 2003 Artur Grabowski <art@openbsd.org>
@@ -81,7 +81,7 @@ static const struct rwlock_op {
        },
        {       /* RW_READ */
                RWLOCK_READ_INCR,
-               RWLOCK_WRLOCK,
+               RWLOCK_WRLOCK | RWLOCK_WRWANT,
                RWLOCK_WAIT,
                0,
                PLOCK
@@ -103,7 +103,7 @@ rw_enter_read(struct rwlock *rwl)
 {
        unsigned long owner = rwl->rwl_owner;
 
-       if (__predict_false((owner & RWLOCK_WRLOCK) ||
+       if (__predict_false((owner & (RWLOCK_WRLOCK | RWLOCK_WRWANT)) ||
            rw_cas(&rwl->rwl_owner, owner, owner + RWLOCK_READ_INCR)))
                rw_enter(rwl, RW_READ);
        else {
@@ -345,6 +345,10 @@ rw_do_exit(struct rwlock *rwl, unsigned long wrlock)
                else
                        set = (owner - RWLOCK_READ_INCR) &
                                ~(RWLOCK_WAIT|RWLOCK_WRWANT);
+               /*
+                * Potential MP race here.  If the owner had WRWANT set, we
+                * cleared it and a reader can sneak in before a writer.
+                */
        } while (__predict_false(rw_cas(&rwl->rwl_owner, owner, set)));
 
        if (owner & RWLOCK_WAIT)