From: gkoehler <gkoehler@openbsd.org>
Date: Fri, 21 May 2021 00:39:35 +0000 (+0000)
Subject: Fix __ppc_lock for page faults that recursively grab the lock.
X-Git-Url: http://artulab.com/gitweb/?a=commitdiff_plain;h=68ea07c54f6b15b1df76e0af2ff20a71c382d6cd;p=openbsd

Fix __ppc_lock for page faults that recursively grab the lock.

The macppc kernel, when running on G5, may get page faults while
executing itself.  Because we reorder our kernels, these faults happen
in different places in each kernel.  I got unlucky with a bsd.mp where
the function __ppc_lock() crossed a page boundary.  The fault handler
recursively called __ppc_lock() and caused my G5 to freeze or hang
very early during boot, while trying to map the framebuffer.

Change the lock to spin while (mpl->mpl_cpu != NULL).  Acquire the
lock with a single atomic write, by setting mpl_cpu and leaving
mpl_count at 0.  Page faults that recursively call __ppc_lock() and
__ppc_unlock() should now not corrupt the lock.

In case we hold the lock but get a page fault before membar_enter() or
after membar_exit(), the recursive calls now have memory barriers.

Delete some unused functions.  In the past, __ppc_lock was __mp_lock,
but today, the only __ppc_lock is PMAP_HASH_LOCK.

ok kettenis@
---

diff --git a/sys/arch/powerpc/include/mplock.h b/sys/arch/powerpc/include/mplock.h
index 6ff628bf4bc..962270e7067 100644
--- a/sys/arch/powerpc/include/mplock.h
+++ b/sys/arch/powerpc/include/mplock.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: mplock.h,v 1.4 2020/04/15 08:09:00 mpi Exp $	*/
+/*	$OpenBSD: mplock.h,v 1.5 2021/05/21 00:39:35 gkoehler Exp $	*/
 
 /*
  * Copyright (c) 2004 Niklas Hallqvist.  All rights reserved.
@@ -30,13 +30,14 @@
 #define __USE_MI_MPLOCK
 
 /*
+ * __ppc_lock exists because pte_spill_r() can't use __mp_lock.
  * Really simple spinlock implementation with recursive capabilities.
  * Correctness is paramount, no fancyness allowed.
  */
 
 struct __ppc_lock {
-	volatile struct cpu_info *mpl_cpu;
-	volatile long		mpl_count;
+	struct cpu_info *volatile	mpl_cpu;
+	long				mpl_count;
 };
 
 #ifndef _LOCORE
@@ -44,10 +45,6 @@ struct __ppc_lock {
 void __ppc_lock_init(struct __ppc_lock *);
 void __ppc_lock(struct __ppc_lock *);
 void __ppc_unlock(struct __ppc_lock *);
-int __ppc_release_all(struct __ppc_lock *);
-int __ppc_release_all_but_one(struct __ppc_lock *);
-void __ppc_acquire_count(struct __ppc_lock *, int);
-int __ppc_lock_held(struct __ppc_lock *, struct cpu_info *);
 
 #endif
 
diff --git a/sys/arch/powerpc/powerpc/lock_machdep.c b/sys/arch/powerpc/powerpc/lock_machdep.c
index c7e865df1ba..8c5fcd77f9c 100644
--- a/sys/arch/powerpc/powerpc/lock_machdep.c
+++ b/sys/arch/powerpc/powerpc/lock_machdep.c
@@ -1,6 +1,7 @@
-/*	$OpenBSD: lock_machdep.c,v 1.9 2020/04/15 08:09:00 mpi Exp $	*/
+/*	$OpenBSD: lock_machdep.c,v 1.10 2021/05/21 00:39:35 gkoehler Exp $	*/
 
 /*
+ * Copyright (c) 2021 George Koehler <gkoehler@openbsd.org>
  * Copyright (c) 2007 Artur Grabowski <art@openbsd.org>
  *
  * Permission to use, copy, modify, and distribute this software for any
@@ -22,10 +23,21 @@
 #include <sys/atomic.h>
 
 #include <machine/cpu.h>
-#include <machine/psl.h>
 
 #include <ddb/db_output.h>
 
+/*
+ * If __ppc_lock() crosses a page boundary in the kernel text, then it
+ * may cause a page fault (on G5 with ppc_nobat), and pte_spill_r()
+ * would recursively call __ppc_lock().  The lock must be in a valid
+ * state when the page fault happens.  We acquire or release the lock
+ * with a 32-bit atomic write to mpl_owner, so the lock is always in a
+ * valid state, before or after the write.
+ *
+ * Acquired the lock:	mpl->mpl_cpu == curcpu()
+ * Released the lock:	mpl->mpl_cpu == NULL
+ */
+
 void
 __ppc_lock_init(struct __ppc_lock *lock)
 {
@@ -46,12 +58,12 @@ static __inline void
 __ppc_lock_spin(struct __ppc_lock *mpl)
 {
 #ifndef MP_LOCKDEBUG
-	while (mpl->mpl_count != 0)
+	while (mpl->mpl_cpu != NULL)
 		CPU_BUSY_CYCLE();
 #else
 	int nticks = __mp_lock_spinout;
 
-	while (mpl->mpl_count != 0 && --nticks > 0)
+	while (mpl->mpl_cpu != NULL && --nticks > 0)
 		CPU_BUSY_CYCLE();
 
 	if (nticks == 0) {
@@ -65,32 +77,33 @@ void
 __ppc_lock(struct __ppc_lock *mpl)
 {
 	/*
-	 * Please notice that mpl_count gets incremented twice for the
-	 * first lock. This is on purpose. The way we release the lock
-	 * in mp_unlock is to decrement the mpl_count and then check if
-	 * the lock should be released. Since mpl_count is what we're
-	 * spinning on, decrementing it in mpl_unlock to 0 means that
-	 * we can't clear mpl_cpu, because we're no longer holding the
-	 * lock. In theory mpl_cpu doesn't need to be cleared, but it's
-	 * safer to clear it and besides, setting mpl_count to 2 on the
-	 * first lock makes most of this code much simpler.
+	 * Please notice that mpl_count stays at 0 for the first lock.
+	 * A page fault might recursively call __ppc_lock() after we
+	 * set mpl_cpu, but before we can increase mpl_count.
+	 *
+	 * After we acquire the lock, we need a "bc; isync" memory
+	 * barrier, but we might not reach the barrier before the next
+	 * page fault.  Then the fault's recursive __ppc_lock() must
+	 * have a barrier.  membar_enter() is just "isync" and must
+	 * come after a conditional branch for holding the lock.
 	 */
 
 	while (1) {
-		int s;
-
-		s = ppc_intr_disable();
-		if (atomic_cas_ulong(&mpl->mpl_count, 0, 1) == 0) {
+		struct cpu_info *owner = mpl->mpl_cpu;
+		struct cpu_info *ci = curcpu();
+
+		if (owner == NULL) {
+			/* Try to acquire the lock. */
+			if (atomic_cas_ptr(&mpl->mpl_cpu, NULL, ci) == NULL) {
+				membar_enter();
+				break;
+			}
+		} else if (owner == ci) {
+			/* We hold the lock, but might need a barrier. */
 			membar_enter();
-			mpl->mpl_cpu = curcpu();
-		}
-
-		if (mpl->mpl_cpu == curcpu()) {
 			mpl->mpl_count++;
-			ppc_intr_enable(s);
 			break;
 		}
-		ppc_intr_enable(s);
 
 		__ppc_lock_spin(mpl);
 	}
@@ -99,8 +112,6 @@ __ppc_lock(struct __ppc_lock *mpl)
 void
 __ppc_unlock(struct __ppc_lock *mpl)
 {
-	int s;
-
 #ifdef MP_LOCKDEBUG
 	if (mpl->mpl_cpu != curcpu()) {
 		db_printf("__ppc_unlock(%p): not held lock\n", mpl);
@@ -108,63 +119,16 @@ __ppc_unlock(struct __ppc_lock *mpl)
 	}
 #endif
 
-	s = ppc_intr_disable();
-	if (--mpl->mpl_count == 1) {
-		mpl->mpl_cpu = NULL;
+	/*
+	 * If we get a page fault after membar_exit() and before
+	 * releasing the lock, then then recursive call to
+	 * __ppc_unlock() must also membar_exit().
+	 */
+	if (mpl->mpl_count == 0) {
 		membar_exit();
-		mpl->mpl_count = 0;
-	}
-	ppc_intr_enable(s);
-}
-
-int
-__ppc_release_all(struct __ppc_lock *mpl)
-{
-	int rv = mpl->mpl_count - 1;
-	int s;
-
-#ifdef MP_LOCKDEBUG
-	if (mpl->mpl_cpu != curcpu()) {
-		db_printf("__ppc_release_all(%p): not held lock\n", mpl);
-		db_enter();
-	}
-#endif
-
-	s = ppc_intr_disable();
-	mpl->mpl_cpu = NULL;
-	membar_exit();
-	mpl->mpl_count = 0;
-	ppc_intr_enable(s);
-
-	return (rv);
-}
-
-int
-__ppc_release_all_but_one(struct __ppc_lock *mpl)
-{
-	int rv = mpl->mpl_count - 2;
-
-#ifdef MP_LOCKDEBUG
-	if (mpl->mpl_cpu != curcpu()) {
-		db_printf("__ppc_release_all_but_one(%p): not held lock\n", mpl);
-		db_enter();
+		mpl->mpl_cpu = NULL;	/* Release the lock. */
+	} else {
+		membar_exit();
+		mpl->mpl_count--;
 	}
-#endif
-
-	mpl->mpl_count = 2;
-
-	return (rv);
-}
-
-void
-__ppc_acquire_count(struct __ppc_lock *mpl, int count)
-{
-	while (count--)
-		__ppc_lock(mpl);
-}
-
-int
-__ppc_lock_held(struct __ppc_lock *mpl, struct cpu_info *ci)
-{
-	return mpl->mpl_cpu == ci;
 }