From 65a96b19f714255913c65f6de4ea2f0897fe7163 Mon Sep 17 00:00:00 2001
From: niklas <niklas@openbsd.org>
Date: Thu, 17 Apr 1997 01:25:16 +0000
Subject: [PATCH] Removal of race conditions.  Inspired by Charles Hannum's
 <mycroft@netbsd.org> reorganization of the vm_collapse logic, although not
 used verbatim.  We no longer collapse objects from the pagedaemon as that is
 not necessary anymore with the more aggressive collapses that gets done. This
 also increases performance of loaded systems.  Much KNF too.

---
 sys/vm/kern_lock.c  |   3 +-
 sys/vm/swap_pager.c | 102 +++--
 sys/vm/vm_extern.h  |  11 +-
 sys/vm/vm_fault.c   |  35 +-
 sys/vm/vm_glue.c    |  10 +-
 sys/vm/vm_kern.c    |   3 +-
 sys/vm/vm_map.c     |  21 +-
 sys/vm/vm_meter.c   |   4 +-
 sys/vm/vm_object.c  | 937 +++++++++++++++++++++++---------------------
 sys/vm/vm_object.h  |  52 ++-
 sys/vm/vm_page.c    | 358 ++++++++---------
 sys/vm/vm_pageout.c |  45 ++-
 12 files changed, 817 insertions(+), 764 deletions(-)

diff --git a/sys/vm/kern_lock.c b/sys/vm/kern_lock.c
index 2d03a11c25a..d5c87beee1e 100644
--- a/sys/vm/kern_lock.c
+++ b/sys/vm/kern_lock.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: kern_lock.c,v 1.4 1996/08/02 00:05:56 niklas Exp $	*/
+/*	$OpenBSD: kern_lock.c,v 1.5 1997/04/17 01:25:16 niklas Exp $	*/
 /*	$NetBSD: kern_lock.c,v 1.10 1994/10/30 19:11:09 cgd Exp $	*/
 
 /* 
@@ -73,6 +73,7 @@
 #include <sys/systm.h>
 
 #include <vm/vm.h>
+#include <vm/vm_extern.h>
 
 /* XXX */
 #include <sys/proc.h>
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 7ace4838f1b..4774428adcb 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: swap_pager.c,v 1.9 1997/03/05 12:49:38 niklas Exp $	*/
+/*	$OpenBSD: swap_pager.c,v 1.10 1997/04/17 01:25:16 niklas Exp $	*/
 /*	$NetBSD: swap_pager.c,v 1.27 1996/03/16 23:15:20 christos Exp $	*/
 
 /*
@@ -272,7 +272,7 @@ swap_pager_alloc(handle, size, prot, foff)
 			 */
 			if (vm_object_lookup(pager) == NULL)
 				panic("swap_pager_alloc: bad object");
-			return(pager);
+			return (pager);
 		}
 	}
 	/*
@@ -282,7 +282,7 @@ swap_pager_alloc(handle, size, prot, foff)
 	waitok = handle ? M_WAITOK : M_NOWAIT;
 	pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, waitok);
 	if (pager == NULL)
-		return(NULL);
+		return (NULL);
 	swp = (sw_pager_t)malloc(sizeof *swp, M_VMPGDATA, waitok);
 	if (swp == NULL) {
 #ifdef DEBUG
@@ -290,7 +290,7 @@ swap_pager_alloc(handle, size, prot, foff)
 			printf("swpg_alloc: swpager malloc failed\n");
 #endif
 		free((caddr_t)pager, M_VMPAGER);
-		return(NULL);
+		return (NULL);
 	}
 	size = round_page(size);
 	for (swt = swtab; swt->st_osize; swt++)
@@ -303,9 +303,8 @@ swap_pager_alloc(handle, size, prot, foff)
 	swp->sw_osize = size;
 	swp->sw_bsize = swt->st_bsize;
 	swp->sw_nblocks = (btodb(size) + swp->sw_bsize - 1) / swp->sw_bsize;
-	swp->sw_blocks = (sw_blk_t)
-		malloc(swp->sw_nblocks*sizeof(*swp->sw_blocks),
-		       M_VMPGDATA, M_NOWAIT);
+	swp->sw_blocks = (sw_blk_t)malloc(swp->sw_nblocks *
+	    sizeof(*swp->sw_blocks), M_VMPGDATA, M_NOWAIT);
 	if (swp->sw_blocks == NULL) {
 		free((caddr_t)swp, M_VMPGDATA);
 		free((caddr_t)pager, M_VMPAGER);
@@ -315,10 +314,10 @@ swap_pager_alloc(handle, size, prot, foff)
 		swt->st_inuse--;
 		swt->st_usecnt--;
 #endif
-		return(FALSE);
+		return (FALSE);
 	}
 	bzero((caddr_t)swp->sw_blocks,
-	      swp->sw_nblocks * sizeof(*swp->sw_blocks));
+	    swp->sw_nblocks * sizeof(*swp->sw_blocks));
 	swp->sw_poip = swp->sw_cnt = 0;
 	if (handle) {
 		vm_object_t object;
@@ -349,7 +348,7 @@ swap_pager_alloc(handle, size, prot, foff)
 		printf("swpg_alloc: pg_data %p, %x of %x at %p\n",
 		    swp, swp->sw_nblocks, swp->sw_bsize, swp->sw_blocks);
 #endif
-	return(pager);
+	return (pager);
 }
 
 static void
@@ -451,7 +450,7 @@ swap_pager_getpage(pager, mlist, npages, sync)
 		m->flags &= ~PG_FAULTING;
 	}
 #endif
-	return(rv);
+	return (rv);
 }
 
 static int
@@ -475,8 +474,8 @@ swap_pager_putpage(pager, mlist, npages, sync)
 	flags = B_WRITE;
 	if (!sync)
 		flags |= B_ASYNC;
-	return(swap_pager_io((sw_pager_t)pager->pg_data,
-			     mlist, npages, flags));
+	return (swap_pager_io((sw_pager_t)pager->pg_data, mlist, npages,
+	    flags));
 }
 
 static boolean_t
@@ -500,7 +499,7 @@ swap_pager_haspage(pager, offset)
 			printf("swpg_haspage: %p bad offset %lx, ix %x\n",
 			    swp->sw_blocks, offset, ix);
 #endif
-		return(FALSE);
+		return (FALSE);
 	}
 	swb = &swp->sw_blocks[ix];
 	if (swb->swb_block)
@@ -513,8 +512,8 @@ swap_pager_haspage(pager, offset)
 		    "FT"[swb->swb_block && (swb->swb_mask & (1 << ix))]);
 #endif
 	if (swb->swb_block && (swb->swb_mask & (1 << ix)))
-		return(TRUE);
-	return(FALSE);
+		return (TRUE);
+	return (FALSE);
 }
 
 static void
@@ -603,16 +602,16 @@ swap_pager_io(swp, mlist, npages, flags)
 #ifdef DEBUG
 		if ((flags & B_READ) == 0 && (swpagerdebug & SDB_ANOM)) {
 			printf("swap_pager_io: no swap block on write\n");
-			return(VM_PAGER_BAD);
+			return (VM_PAGER_BAD);
 		}
 #endif
-		return(VM_PAGER_FAIL);
+		return (VM_PAGER_FAIL);
 	}
 	swb = &swp->sw_blocks[ix];
 	off = off % dbtob(swp->sw_bsize);
 	if ((flags & B_READ) &&
 	    (swb->swb_block == 0 || (swb->swb_mask & (1 << atop(off))) == 0))
-		return(VM_PAGER_FAIL);
+		return (VM_PAGER_FAIL);
 
 	/*
 	 * For reads (pageins) and synchronous writes, we clean up
@@ -645,7 +644,7 @@ swap_pager_io(swp, mlist, npages, flags)
 				printf("%s: no available io headers\n",
 				    "swap_pager_io");
 #endif
-			return(VM_PAGER_AGAIN);
+			return (VM_PAGER_AGAIN);
 		}
 	}
 
@@ -667,7 +666,7 @@ swap_pager_io(swp, mlist, npages, flags)
 			 * trying again (the pageout daemon's current response
 			 * to AGAIN) so we just return FAIL.
 			 */
-			return(VM_PAGER_FAIL);
+			return (VM_PAGER_FAIL);
 		}
 #ifdef DEBUG
 		if (swpagerdebug & (SDB_FULL|SDB_ALLOCBLK))
@@ -687,7 +686,7 @@ swap_pager_io(swp, mlist, npages, flags)
 			printf("%s: no KVA space to map pages\n",
 			    "swap_pager_io");
 #endif
-		return(VM_PAGER_AGAIN);
+		return (VM_PAGER_AGAIN);
 	}
 
 	/*
@@ -795,7 +794,7 @@ swap_pager_io(swp, mlist, npages, flags)
 		if (swpagerdebug & SDB_IO)
 			printf("swpg_io:  IO started: bp %p\n", bp);
 #endif
-		return(VM_PAGER_PEND);
+		return (VM_PAGER_PEND);
 	}
 	s = splbio();
 #ifdef DEBUG
@@ -836,7 +835,7 @@ swap_pager_io(swp, mlist, npages, flags)
 		printf("swpg_io: IO error\n");
 #endif
 	vm_pager_unmap_pages(kva, npages);
-	return(rv);
+	return (rv);
 }
 
 static void
@@ -926,8 +925,7 @@ swap_pager_clean(rw)
 		 * Done with the object, decrement the paging count
 		 * and unlock it.
 		 */
-		if (--object->paging_in_progress == 0)
-			wakeup(object);
+		vm_object_paging_end(object);
 		vm_object_unlock(object);
 
 		/*
@@ -1066,7 +1064,7 @@ swap_pager_remove(pager, from, to)
 
 	/*	Special case stupid ranges.	*/
 	if (to > 0 && from >= to)
-		return(0);
+		return (0);
 
 	swp = (sw_pager_t)pager->pg_data;
 
@@ -1079,14 +1077,14 @@ swap_pager_remove(pager, from, to)
 	 *	be created without any pages put into it?
 	 */
 	if (swp->sw_cnt == 0)
-		return(0);
+		return (0);
 
 	bsize = dbtob(swp->sw_bsize);
 	blk = from / bsize;
 
 	/*	Another fast one.. no blocks in range.	*/
 	if (blk >= swp->sw_nblocks)
-		return(0);
+		return (0);
 	bit = atop(from % bsize);
 
 	/*
@@ -1149,18 +1147,18 @@ swap_pager_remove(pager, from, to)
 	if (swp->sw_cnt < 0)
 		panic("swap_pager_remove: sw_cnt < 0");
 #endif
-	return(cnt);
+	return (cnt);
 }
 
 /*
- *	swap_pager_next:
+ * swap_pager_next:
  *
- *	This is called via the vm_pager_next path and
- *	will return the offset of the next page (addresswise)
- *	which this pager is backing.  If there are no more
- *	pages we will return the size of the pager's managed
- *	space (which by definition is larger than any page's
- *	offset).
+ * This is called via the vm_pager_next path and
+ * will return the offset of the next page (addresswise)
+ * which this pager is backing.  If there are no more
+ * pages we will return the size of the pager's managed
+ * space (which by definition is larger than any page's
+ * offset).
  */
 static vm_offset_t
 swap_pager_next(pager, offset)
@@ -1179,22 +1177,22 @@ swap_pager_next(pager, offset)
 	swp = (sw_pager_t)pager->pg_data;
 
 	/*
-	 *	If we back no pages, just return our size.  XXX Can
-	 *	this ever be the case?  At least all remove calls
-	 *	should be through vm_object_remove_from_pager which
-	 *	also deallocates the pager when it no longer backs any
-	 *	pages.  Left is the initial case: can a swap-pager
-	 *	be created without any pages put into it?
+	 * If we back no pages, just return our size.  XXX Can
+	 * this ever be the case?  At least all remove calls
+	 * should be through vm_object_remove_from_pager which
+	 * also deallocates the pager when it no longer backs any
+	 * pages.  Left is the initial case: can a swap-pager
+	 * be created without any pages put into it?
 	 */
 	if (swp->sw_cnt == 0)
-		return(swp->sw_osize);
+		return (swp->sw_osize);
 
 	bsize = dbtob(swp->sw_bsize);
 	blk = offset / bsize;
 
-	/*	Another fast one.. no blocks in range.	*/
+	/* Another fast one.. no blocks in range.	*/
 	if (blk >= swp->sw_nblocks)
-		return(swp->sw_osize);
+		return (swp->sw_osize);
 	bit = atop(offset % bsize);
 	to_blk = swp->sw_osize / bsize;
 	to_bit = atop(swp->sw_osize % bsize);
@@ -1219,7 +1217,7 @@ swap_pager_next(pager, offset)
 		 */
 		mask &= swb->swb_mask;
 		if (mask)
-			return(blk * bsize + (ffs (mask) - 1) * PAGE_SIZE);
+			return (blk * bsize + (ffs (mask) - 1) * PAGE_SIZE);
 
 		/*
 		 *	If we handled the end of range now, this
@@ -1233,7 +1231,7 @@ swap_pager_next(pager, offset)
 		swb++;
 		mask = ~0;
  	}
-	return swp->sw_osize;
+	return (swp->sw_osize);
 }
 
 /*
@@ -1246,7 +1244,7 @@ swap_pager_count(pager)
 	vm_pager_t	pager;
 {
 #ifndef notyet
-	return((sw_pager_t)pager->pg_data)->sw_cnt;
+	return ((sw_pager_t)pager->pg_data)->sw_cnt;
 #else
 	sw_pager_t swp;
 	sw_blk_t swb;
@@ -1254,10 +1252,10 @@ swap_pager_count(pager)
 
 	swp = (sw_pager_t)pager->pg_data;
 	if (swp->sw_blocks == NULL)
-		return 0;
+		return (0);
 	for (i = 0; i < swp->sw_nblocks; i++)
 		cnt += count_bits(swp->sw_blocks[i].swb_mask); 
-	return cnt;
+	return (cnt);
 #endif
 }
 
@@ -1276,5 +1274,5 @@ count_bits(x)
 		cnt += x & 1;
 		x >>= 1;
 	}
-	return(cnt);
+	return (cnt);
 }
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 14346cb8a05..964adb26dec 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_extern.h,v 1.9 1997/01/07 05:37:35 tholo Exp $	*/
+/*	$OpenBSD: vm_extern.h,v 1.10 1997/04/17 01:25:17 niklas Exp $	*/
 /*	$NetBSD: vm_extern.h,v 1.20 1996/04/23 12:25:23 christos Exp $	*/
 
 /*-
@@ -98,7 +98,13 @@ void		swapout_threads __P((void));
 int		swfree __P((struct proc *, int));
 void		swstrategy __P((struct buf *));
 void		thread_block __P((void));
-void		thread_sleep __P((void *, simple_lock_t, boolean_t));
+void		thread_sleep_msg __P((void *, simple_lock_t,
+		    boolean_t, char *));
+
+/* backwards compatibility */
+#define		thread_sleep(event, lock, ruptible) \
+    thread_sleep_msg((event), (lock), (ruptible), "thrd_sleep")
+
 /*
  * This define replaces a thread_wakeup prototype, as thread_wakeup
  * was solely a wrapper around wakeup.
@@ -145,7 +151,6 @@ int		vsunlock __P((caddr_t, u_int));
 /* Machine dependent portion */
 void		vmapbuf __P((struct buf *, vm_size_t));
 void		vunmapbuf __P((struct buf *, vm_size_t));
-void		remrq __P((struct proc *));
 void		pagemove __P((caddr_t, caddr_t, size_t));
 #ifdef __FORK_BRAINDAMAGE
 int		cpu_fork __P((struct proc *, struct proc *));
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index e591f555d16..5a28eb128ab 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_fault.c,v 1.5 1996/11/23 21:47:14 kstailey Exp $	*/
+/*	$OpenBSD: vm_fault.c,v 1.6 1997/04/17 01:25:17 niklas Exp $	*/
 /*	$NetBSD: vm_fault.c,v 1.18 1996/05/20 17:40:02 mrg Exp $	*/
 
 /* 
@@ -145,12 +145,12 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 }
 
 #define	UNLOCK_THINGS	{				\
-	object->paging_in_progress--;			\
+	vm_object_paging_end(object);			\
 	vm_object_unlock(object);			\
 	if (object != first_object) {			\
 		vm_object_lock(first_object);		\
 		FREE_PAGE(first_m);			\
-		first_object->paging_in_progress--;	\
+		vm_object_paging_end(first_object);	\
 		vm_object_unlock(first_object);		\
 	}						\
 	UNLOCK_MAP;					\
@@ -191,11 +191,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 	vm_object_lock(first_object);
 
 	first_object->ref_count++;
-#ifdef DIAGNOSTIC
-	if (first_object->paging_in_progress == 0xdead)
-		panic("vm_fault: first_object deallocated");
-#endif
-	first_object->paging_in_progress++;
+	vm_object_paging_begin(first_object);
 
 	/*
 	 *	INVARIANTS (through entire routine):
@@ -407,7 +403,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 			 *	in the top object with zeros.
 			 */
 			if (object != first_object) {
-				object->paging_in_progress--;
+				vm_object_paging_end(object);
 				vm_object_unlock(object);
 
 				object = first_object;
@@ -425,14 +421,10 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 		else {
 			vm_object_lock(next_object);
 			if (object != first_object)
-				object->paging_in_progress--;
+				vm_object_paging_end(object);
 			vm_object_unlock(object);
 			object = next_object;
-#ifdef DIAGNOSTIC
-			if (object->paging_in_progress == 0xdead)
-				panic("vm_fault: object deallocated (1)");
-#endif
-			object->paging_in_progress++;
+			vm_object_paging_begin(object);
 		}
 	}
 
@@ -508,7 +500,7 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 			 *	We no longer need the old page or object.
 			 */
 			PAGE_WAKEUP(m);
-			object->paging_in_progress--;
+			vm_object_paging_end(object);
 			vm_object_unlock(object);
 
 			/*
@@ -529,15 +521,10 @@ vm_fault(map, vaddr, fault_type, change_wiring)
 			 *	But we have to play ugly games with
 			 *	paging_in_progress to do that...
 			 */
-			object->paging_in_progress--;
+			vm_object_paging_end(object);
 			vm_object_collapse(object);
-#ifdef DIAGNOSTIC
-			if (object->paging_in_progress == 0xdead)
-				panic("vm_fault: object deallocated (2)");
-#endif
-			object->paging_in_progress++;
-		}
-		else {
+			vm_object_paging_begin(object);
+		} else {
 		    	prot &= ~VM_PROT_WRITE;
 			m->flags |= PG_COPYONWRITE;
 		}
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 2851697956f..10ef84b937a 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_glue.c,v 1.22 1997/01/07 05:37:36 tholo Exp $    */
+/*	$OpenBSD: vm_glue.c,v 1.23 1997/04/17 01:25:18 niklas Exp $    */
 /*	$NetBSD: vm_glue.c,v 1.55.4.1 1996/06/13 17:25:45 cgd Exp $	*/
 
 /* 
@@ -74,6 +74,7 @@
 #endif
 
 #include <vm/vm.h>
+#include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_kern.h>
 
@@ -231,7 +232,7 @@ vm_fork(p1, p2)
 		shmfork(p1, p2);
 #endif
 
-#if !defined(pc532) && !defined(vax)
+#if !defined(vax)
 	/*
 	 * Allocate a wired-down (for now) pcb and kernel stack for the process
 	 */
@@ -578,10 +579,11 @@ thread_block()
 }
 
 void
-thread_sleep(event, lock, ruptible)
+thread_sleep_msg(event, lock, ruptible, msg)
 	void *event;
 	simple_lock_t lock;
 	boolean_t ruptible;
+	char *msg;
 {
 	int s = splhigh();
 
@@ -591,7 +593,7 @@ thread_sleep(event, lock, ruptible)
 	curproc->p_thread = event;
 	simple_unlock(lock);
 	if (curproc->p_thread)
-		tsleep(event, PVM, "thrd_sleep", 0);
+		tsleep(event, PVM, msg, 0);
 	splx(s);
 }
 
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 4ebf2bbbd3b..b15b91e71e2 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_kern.c,v 1.3 1996/08/02 00:06:01 niklas Exp $	*/
+/*	$OpenBSD: vm_kern.c,v 1.4 1997/04/17 01:25:18 niklas Exp $	*/
 /*	$NetBSD: vm_kern.c,v 1.17.6.1 1996/06/13 17:21:28 cgd Exp $	*/
 
 /* 
@@ -74,6 +74,7 @@
 #include <sys/proc.h>
 
 #include <vm/vm.h>
+#include <vm/vm_extern.h>
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_kern.h>
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index f8089e268ba..af5713ea852 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_map.c,v 1.3 1996/07/23 23:54:23 deraadt Exp $	*/
+/*	$OpenBSD: vm_map.c,v 1.4 1997/04/17 01:25:19 niklas Exp $	*/
 /*	$NetBSD: vm_map.c,v 1.23 1996/02/10 00:08:08 christos Exp $	*/
 
 /* 
@@ -1753,13 +1753,11 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
 		 *	Make a copy of the object.
 		 */
 		temp_object = dst_entry->object.vm_object;
-		vm_object_copy(src_entry->object.vm_object,
-				src_entry->offset,
-				(vm_size_t)(src_entry->end -
-					    src_entry->start),
-				&dst_entry->object.vm_object,
-				&dst_entry->offset,
-				&src_needs_copy);
+		vm_object_copy(src_entry->object.vm_object, src_entry->offset,
+		    (vm_size_t)(src_entry->end - src_entry->start),
+		    &dst_entry->object.vm_object, &dst_entry->offset,
+		    &src_needs_copy);
+
 		/*
 		 *	If we didn't get a copy-object now, mark the
 		 *	source map entry so that a shadow will be created
@@ -1770,9 +1768,12 @@ vm_map_copy_entry(src_map, dst_map, src_entry, dst_entry)
 
 		/*
 		 *	The destination always needs to have a shadow
-		 *	created.
+		 *	created, unless it's a zero-fill entry.
 		 */
-		dst_entry->needs_copy = TRUE;
+		if (dst_entry->object.vm_object != NULL)
+			dst_entry->needs_copy = TRUE;
+		else
+			dst_entry->needs_copy = FALSE;
 
 		/*
 		 *	Mark the entries copy-on-write, so that write-enabling
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 217455c559b..3d96b889c5a 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_meter.c,v 1.3 1996/10/23 15:38:36 deraadt Exp $	*/
+/*	$OpenBSD: vm_meter.c,v 1.4 1997/04/17 01:25:20 niklas Exp $	*/
 /*	$NetBSD: vm_meter.c,v 1.18 1996/02/05 01:53:59 christos Exp $	*/
 
 /*
@@ -203,7 +203,7 @@ vmtotal(totalp)
 			    entry->object.vm_object == NULL)
 				continue;
 			entry->object.vm_object->flags |= OBJ_ACTIVE;
-			paging |= entry->object.vm_object->paging_in_progress;
+			paging |= vm_object_paging(entry->object.vm_object);
 		}
 		if (paging)
 			totalp->t_pw++;
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 183d9bb0780..951a84e0939 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -1,7 +1,37 @@
-/*	$OpenBSD: vm_object.c,v 1.14 1997/03/26 18:45:31 niklas Exp $	*/
-/*	$NetBSD: vm_object.c,v 1.34 1996/02/28 22:35:35 gwr Exp $	*/
+/*	$OpenBSD: vm_object.c,v 1.15 1997/04/17 01:25:20 niklas Exp $	*/
+/*	$NetBSD: vm_object.c,v 1.46 1997/03/30 20:56:12 mycroft Exp $	*/
 
-/* 
+/*-
+ * Copyright (c) 1997 Charles M. Hannum.  All rights reserved.
+ * Copyright (c) 1997 Niklas Hallqvist.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by Charles M. Hannum.
+ * 4. The name of the author may not be used to endorse or promote products
+ *    derived from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
  * Copyright (c) 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  *
@@ -129,11 +159,12 @@ int	vmdebug = VMDEBUG;
 #endif
 
 void		_vm_object_allocate __P((vm_size_t, vm_object_t));
-int		vm_object_collapse_aux __P((vm_object_t));
 int		vm_object_bypass __P((vm_object_t));
-void		vm_object_set_shadow __P((vm_object_t, vm_object_t));
+void		vm_object_collapse_internal __P((vm_object_t, vm_object_t *));
+int		vm_object_overlay __P((vm_object_t));
 int		vm_object_remove_from_pager
 		    __P((vm_object_t, vm_offset_t, vm_offset_t));
+void		vm_object_set_shadow __P((vm_object_t, vm_object_t));
 
 /*
  * vm_object_init:
@@ -173,8 +204,8 @@ vm_object_allocate(size)
 {
 	register vm_object_t	result;
 
-	result = (vm_object_t)malloc((u_long)sizeof *result,
-	    M_VMOBJ, M_WAITOK);
+	result = (vm_object_t)malloc((u_long)sizeof *result, M_VMOBJ,
+	    M_WAITOK);
 
 	_vm_object_allocate(size, result);
 
@@ -242,7 +273,7 @@ vm_object_reference(object)
  */
 void
 vm_object_deallocate(object)
-	register vm_object_t	object;
+	vm_object_t	object;
 {
 	/*
 	 * While "temp" is used for other things as well, we
@@ -254,9 +285,8 @@ vm_object_deallocate(object)
 	while (object != NULL) {
 
 		/*
-		 * The cache holds a reference (uncounted) to
-		 * the object; we must lock it before removing
-		 * the object.
+		 * The cache holds a reference (uncounted) to the object; we
+		 * must lock it before removing the object.
 		 */
 
 		vm_object_cache_lock();
@@ -266,34 +296,47 @@ vm_object_deallocate(object)
 		 */
 		vm_object_lock(object);
 		if (--(object->ref_count) != 0) {
+			vm_object_unlock(object);
+			vm_object_cache_unlock();
+
 			/*
-			 * If this is a deallocation of a shadow
-			 * reference (which it is unless it's the
-			 * first time round) and this operation made
-			 * us singly-shadowed, try to collapse us
-			 * with our shadower.
+			 * If this is a deallocation of a shadow reference
+			 * (which it is unless it's the first time round) and
+			 * this operation made us singly-shadowed, try to
+			 * collapse us with our shadower.  Otherwise we're
+			 * ready.
 			 */
-			vm_object_unlock(object);
 			if (temp != NULL &&
 			    (temp = object->shadowers.lh_first) != NULL &&
 			    temp->shadowers_list.le_next == NULL) {
 				vm_object_lock(temp);
-				vm_object_collapse(temp);
-				vm_object_unlock(temp);
-			}
 
-			/*
-			 * If there are still references, then
-			 * we are done.
-			 */
-			vm_object_cache_unlock();
-			return;
+				/*
+				 * This is a bit tricky: the temp object can
+				 * go away while collapsing, check the
+				 * vm_object_collapse_internal comments for
+				 * details.  In this case we get an object
+				 * back to deallocate (it's done like this
+				 * to prevent potential recursion and hence
+				 * kernel stack overflow).  In the normal case
+				 * we won't get an object back, if so, we are
+				 * ready and may return.
+				 */
+				vm_object_collapse_internal(temp, &object);
+				if (object != NULL) {
+					vm_object_lock(object);
+					vm_object_cache_lock();
+				} else {
+					vm_object_unlock(temp);
+					return;
+				}
+			} else
+				return;
 		}
 
 		/*
-		 * See if this object can persist.  If so, enter
-		 * it in the cache, then deactivate all of its
-		 * pages.
+		 * See if this object can persist.  If so, enter it in the
+		 * cache, then deactivate all of its pages.
 		 */
 		if (object->flags & OBJ_CANPERSIST) {
 
@@ -315,9 +358,12 @@ vm_object_deallocate(object)
 		vm_object_remove(object->pager);
 		vm_object_cache_unlock();
 
+		/*
+		 * Deallocate the object, and move on to the backing object.
+		 */
 		temp = object->shadow;
+		vm_object_reference(temp);
 		vm_object_terminate(object);
-			/* unlocks and deallocates object */
 		object = temp;
 	}
 }
@@ -337,8 +383,7 @@ vm_object_terminate(object)
 	vm_object_t		shadow_object;
 
 	/*
-	 * Setters of paging_in_progress might be interested that this object
-	 * is going away as soon as we get a grip on it.
+	 * Protect against simultaneous collapses.
 	 */
 	object->flags |= OBJ_FADING;
 
@@ -346,10 +391,7 @@ vm_object_terminate(object)
 	 * Wait until the pageout daemon is through with the object or a
 	 * potential collapse operation is finished.
 	 */
-	while (object->paging_in_progress) {
-		vm_object_sleep(object, object, FALSE);
-		vm_object_lock(object);
-	}
+	vm_object_paging_wait(object);
 
 	/*
 	 * Detach the object from its shadow if we are the shadow's
@@ -362,7 +404,8 @@ vm_object_terminate(object)
 			shadow_object->copy = NULL;
 #if 0
 		else if (shadow_object->copy != NULL)
-			panic("vm_object_terminate: copy/shadow inconsistency");
+			panic("vm_object_terminate: "
+			    "copy/shadow inconsistency");
 #endif
 		vm_object_unlock(shadow_object);
 	}
@@ -466,10 +509,8 @@ again:
 	/*
 	 * Wait until the pageout daemon is through with the object.
 	 */
-	while (object->paging_in_progress) {
-		vm_object_sleep(object, object, FALSE);
-		vm_object_lock(object);
-	}
+	vm_object_paging_wait(object);
+
 	/*
 	 * Loop through the object page list cleaning as necessary.
 	 */
@@ -515,12 +556,7 @@ again:
 			pmap_page_protect(VM_PAGE_TO_PHYS(p), VM_PROT_READ);
 			if (!(p->flags & PG_CLEAN)) {
 				p->flags |= PG_BUSY;
-#ifdef DIAGNOSTIC
-				if (object->paging_in_progress == 0xdead)
-					panic("vm_object_page_clean: "
-					    "object deallocated");
-#endif
-				object->paging_in_progress++;
+				vm_object_paging_begin(object);
 				vm_object_unlock(object);
 				/*
 				 * XXX if put fails we mark the page as
@@ -529,12 +565,12 @@ again:
 				 */
 				if (vm_pager_put(object->pager, p, syncio)) {
 					printf("%s: pager_put error\n",
-					       "vm_object_page_clean");
+					    "vm_object_page_clean");
 					p->flags |= PG_CLEAN;
 					noerror = FALSE;
 				}
 				vm_object_lock(object);
-				object->paging_in_progress--;
+				vm_object_paging_end(object);
 				if (!de_queue && onqueue) {
 					vm_page_lock_queues();
 					if (onqueue > 0)
@@ -703,7 +739,7 @@ vm_object_copy(src_object, src_offset, size,
 	    (src_object->flags & OBJ_INTERNAL)) {
 
 		/*
-		 * Make another reference to the object
+		 * Make another reference to the object.
 		 */
 		src_object->ref_count++;
 
@@ -751,7 +787,7 @@ Retry1:
 		if (!vm_object_lock_try(old_copy)) {
 			vm_object_unlock(src_object);
 
-			/* should spin a bit here... */
+			/* XXX should spin a bit here... */
 			vm_object_lock(src_object);
 			goto Retry1;
 		}
@@ -815,15 +851,13 @@ Retry2:
 		 * object.  Locking of new_copy not needed.  We
 		 * have the only pointer.
 		 */
-		src_object->ref_count--;	/* remove ref. from old_copy */
 		vm_object_set_shadow(old_copy, new_copy);
-		new_copy->ref_count++;		/* locking not needed - we
-						   have the only pointer */
-		vm_object_unlock(old_copy);	/* done with old_copy */
+		vm_object_unlock(old_copy);
 	}
 
-	new_start = (vm_offset_t)0;	/* always shadow original at 0 */
-	new_end   = (vm_offset_t)new_copy->size; /* for the whole object */
+	/* Always shadow original at 0 for the whole object */
+	new_start = (vm_offset_t)0;
+	new_end = (vm_offset_t)new_copy->size;
 
 	/*
 	 * Point the new copy at the existing object.
@@ -831,7 +865,6 @@ Retry2:
 
 	vm_object_set_shadow(new_copy, src_object);
 	new_copy->shadow_offset = new_start;
-	src_object->ref_count++;
 	src_object->copy = new_copy;
 
 	/*
@@ -872,6 +905,11 @@ vm_object_shadow(object, offset, length)
 
 	source = *object;
 
+#ifdef DIAGNOSTIC
+	if (source == NULL)
+		panic("vm_object_shadow: attempt to shadow null object");
+#endif
+
 	/*
 	 * Allocate a new object with the given length
 	 */
@@ -879,14 +917,13 @@ vm_object_shadow(object, offset, length)
 		panic("vm_object_shadow: no object for shadowing");
 
 	/*
-	 * The new object shadows the source object, adding
-	 * a reference to it.  Our caller changes his reference
-	 * to point to the new object, removing a reference to
-	 * the source object.  Net result: no change of reference
-	 * count.
+	 * The new object shadows the source object.  Our caller changes his
+	 * reference to point to the new object, removing a reference to the
+	 * source object.
 	 */
 	vm_object_lock(source);
 	vm_object_set_shadow(result, source);
+	source->ref_count--;
 	vm_object_unlock(source);
 	
 	/*
@@ -1030,7 +1067,6 @@ vm_object_remove(pager)
 
 /*
  * vm_object_cache_clear removes all objects from the cache.
- *
  */
 void
 vm_object_cache_clear()
@@ -1079,7 +1115,7 @@ vm_object_remove_from_pager(object, from, to)
 
 	cnt = vm_pager_remove(pager, from, to);
 
-	/* If pager became empty, remove it.	*/
+	/* If pager became empty, remove it.  */
 	if (cnt > 0 && vm_pager_count(pager) == 0) {
 		vm_pager_deallocate(pager);
 		object->pager = NULL;
@@ -1087,8 +1123,15 @@ vm_object_remove_from_pager(object, from, to)
 	return(cnt);
 }
 
+#define	FREE_PAGE(m)	do {					\
+	PAGE_WAKEUP(m);						\
+	vm_page_lock_queues();					\
+	vm_page_free(m);					\
+	vm_page_unlock_queues();				\
+} while(0)
+
 /*
- * vm_object_collapse_aux:
+ * vm_object_overlay:
  *
  * Internal function to vm_object_collapse called when
  * it has been shown that a collapse operation is likely
@@ -1096,7 +1139,7 @@ vm_object_remove_from_pager(object, from, to)
  * referenced by me and that paging is not in progress.
  */
 int
-vm_object_collapse_aux(object)
+vm_object_overlay(object)
 	vm_object_t	object;
 {
 	vm_object_t	backing_object = object->shadow;
@@ -1104,35 +1147,36 @@ vm_object_collapse_aux(object)
 	vm_size_t	size = object->size;
 	vm_offset_t	offset, paged_offset;
 	vm_page_t	backing_page, page = NULL;
+	int		rv;
 
 #ifdef DEBUG
 	if (vmdebug & VMDEBUG_COLLAPSE)
-		printf("vm_object_collapse_aux(0x%x)\n", object);
+		printf("vm_object_overlay(0x%p)\n", object);
 #endif
 
+	/*
+	 * Protect against multiple collapses.
+	 */
+	backing_object->flags |= OBJ_FADING;
+
 	/*
 	 * The algorithm used is roughly like this:
-	 * (1)	Trim a potential pager in the backing
-	 * 	object so it'll only hold pages in reach.
-	 * (2)	Loop over all the resident pages in the
-	 * 	shadow object and either remove them if
-	 * 	they are shadowed or move them into the
+	 * (1)	Trim a potential pager in the backing object so it'll only hold
+	 *      pages in reach.
+	 * (2)	Loop over all the resident pages in the shadow object and
+	 *      either remove them if they are shadowed or move them into the
 	 * 	shadowing object.
-	 * (3)	Loop over the paged out pages in the
-	 * 	shadow object.  Start pageins on those
-	 * 	that aren't shadowed, and just deallocate
-	 * 	the others.  In each iteration check if
-	 * 	other users of these objects have caused
-	 * 	pageins resulting in new resident pages.
-	 * 	This can happen while we are waiting for
-	 * 	a pagein of ours.  If such resident pages
-	 * 	turn up, restart from (2).
+	 * (3)	Loop over the paged out pages in the shadow object.  Start
+	 *      pageins on those that aren't shadowed, and just deallocate
+	 * 	the others.  In each iteration check if other users of these
+	 *      objects have caused pageins resulting in new resident pages.
+	 * 	This can happen while we are waiting for a page or a pagein of
+	 *      ours.  If such resident pages turn up, restart from (2).
 	 */
 
 	/*
-	 * As a first measure we know we can discard
-	 * everything that the shadowing object doesn't
-	 * shadow.
+	 * As a first measure we know we can discard everything that the
+	 * shadowing object doesn't shadow.
 	 */
 	if (backing_object->pager != NULL) {
 		if (backing_offset > 0)
@@ -1144,257 +1188,217 @@ vm_object_collapse_aux(object)
 	}
 
 	/*
-	 * This is the outer loop, iterating until all resident and
-	 * paged out pages in the shadow object are drained.
+	 * At this point, there may still be asynchronous paging in the parent
+	 * object.  Any pages being paged in will be represented by fake pages.
+	 * There are three cases:
+	 * 1) The page is being paged in from the parent object's own pager.
+	 *    In this case, we just delete our copy, since it's not needed.
+	 * 2) The page is being paged in from the backing object.  We prevent
+	 *    this case by waiting for paging to complete on the backing object
+	 *    before continuing.
+	 * 3) The page is being paged in from a backing object behind the one
+	 *    we're deleting.  We'll never notice this case, because the
+	 *    backing object we're deleting won't have the page.
 	 */
-	paged_offset = 0;
-	while (backing_object->memq.tqh_first != NULL ||
-	    backing_object->pager != NULL) {
-		/*
-		 * First of all get rid of resident pages in the
-		 * backing object.  We can guarantee to remove
-		 * every page thus we can write the while-test
-		 * like this.
-		 */
-		while ((backing_page = backing_object->memq.tqh_first) !=
-		    NULL) {
-			/*
-			 * If the page is outside the shadowing object's
-			 * range or if the page is shadowed (either by a
-			 * resident "non-fake" page or a paged out one) we
-			 * can discard it right away.  Otherwise we need
-			 * to move the page to the shadowing object,
-			 * perhaps waking up waiters for "fake" pages
-			 * first.
-			 */
-			if (backing_page->offset < backing_offset ||
-			    (offset = backing_page->offset - backing_offset) >=
-			    size ||
-			    ((page = vm_page_lookup(object, offset)) != NULL &&
-			     !(page->flags & PG_FAKE)) ||
-			    (object->pager != NULL &&
-			    vm_pager_has_page(object->pager, offset))) {
 
-				/*
-				 * Just discard the page, noone needs it.
-				 */
-				vm_page_lock_queues();
-				vm_page_free(backing_page);
-				vm_page_unlock_queues();
-			} else {
-				/*
-				 * If a "fake" page was found, someone may
-				 * be waiting for it.  Wake her up and
-				 * then remove the page.
-				 */
-				if (page) {
-					PAGE_WAKEUP(page);
-					vm_page_lock_queues();
-					vm_page_free(page);
-					vm_page_unlock_queues();
-				}
+	vm_object_unlock(object);
+retry:
+	vm_object_paging_wait(backing_object);
 
-				/*
-				 * If the backing page was ever paged out,
-				 * it was due to it being dirty at one
-				 * point.  Unless we have no pager
-				 * allocated to the front object (thus
-				 * will move forward the shadow's one),
-				 * mark it dirty again so it won't be
-				 * thrown away without being paged out to
-				 * the front pager.
-				 */
-				if (object->pager != NULL &&
-				    vm_object_remove_from_pager(backing_object,
-				    backing_page->offset,
-				    backing_page->offset + PAGE_SIZE))
-					backing_page->flags &= ~PG_CLEAN;
+	/*
+	 * While we were asleep, the parent object might have been deleted.  If
+	 * so, the backing object will now have only one reference (the one we
+	 * hold).  If this happened, just deallocate the backing object and
+	 * return failure status so vm_object_collapse() will stop.  This will
+	 * continue vm_object_deallocate() where it stopped due to our
+	 * reference.
+	 */
+	if (backing_object->ref_count == 1)
+		goto fail;
+	vm_object_lock(object);
 
-				/* Move the page up front.	*/
-				vm_page_rename(backing_page, object, offset);
-			}
-		}
+	/*
+	 * Next, get rid of resident pages in the backing object.  We can
+	 * guarantee to remove every page thus we can write the while-test like
+	 * this.
+	 */
+	while ((backing_page = backing_object->memq.tqh_first) != NULL) {
+		offset = backing_page->offset - backing_offset;
 
-		/*
-		 * If there isn't a pager in the shadow object, we're
-		 * ready.  Take the easy way out.
-		 */
-		if (backing_object->pager == NULL)
-			break;
+#ifdef DIAGNOSTIC
+		if (backing_page->flags & (PG_BUSY | PG_FAKE))
+			panic("vm_object_overlay: "
+			    "busy or fake page in backing_object");
+#endif
 
 		/*
-		 * If the shadowing object doesn't have a pager
-		 * the easiest thing to do now is to just move the
-		 * backing pager up front and everything is done.  
+		 * If the page is outside the shadowing object's range or if
+		 * the page is shadowed (either by a resident page or a paged
+		 * out one) we can discard it right away.  Otherwise we need to
+		 * move the page to the shadowing object.
 		 */
-		if (object->pager == NULL) {
-			object->pager = backing_object->pager;
-			object->paging_offset = backing_object->paging_offset +
-			    backing_offset;
-			backing_object->pager = NULL;
-			break;
+		if (backing_page->offset < backing_offset || offset >= size ||
+		    ((page = vm_page_lookup(object, offset)) != NULL) ||
+		    (object->pager != NULL &&
+		     vm_pager_has_page(object->pager, offset))) {
+			/*
+			 * Just discard the page, noone needs it.  This
+			 * includes removing the possible backing store too.
+			 */
+			if (backing_object->pager != NULL)
+				vm_object_remove_from_pager(backing_object,
+				    backing_page->offset,
+				    backing_page->offset + PAGE_SIZE);
+			vm_page_lock_queues();
+			vm_page_free(backing_page);
+			vm_page_unlock_queues();
+		} else {
+			/*
+			 * If the backing page was ever paged out, it was due
+			 * to it being dirty at one point.  Unless we have no
+			 * pager allocated to the front object (thus will move
+			 * forward the shadow's one), mark it dirty again so it
+			 * won't be thrown away without being paged out to the
+			 * front pager.
+			 *
+			 * XXX
+			 * Should be able to move a page from one pager to
+			 * another.
+			 */
+			if (object->pager != NULL &&
+			    vm_object_remove_from_pager(backing_object,
+			    backing_page->offset,
+			    backing_page->offset + PAGE_SIZE))
+				backing_page->flags &= ~PG_CLEAN;
+
+			/* Move the page up front.  */
+			vm_page_rename(backing_page, object, offset);
 		}
+	}
+
+	/*
+	 * If the shadowing object doesn't have a pager the easiest
+	 * thing to do now is to just move the backing pager up front
+	 * and everything is done.  
+	 */
+	if (object->pager == NULL && backing_object->pager != NULL) {
+		object->pager = backing_object->pager;
+		object->paging_offset = backing_object->paging_offset +
+		    backing_offset;
+		backing_object->pager = NULL;
+		goto done;
+	}
+
+	/*
+	 * What's left to do is to find all paged out pages in the
+	 * backing pager and either discard or move it to the front
+	 * object.  We need to recheck the resident page set as a
+	 * pagein might have given other threads the chance to, via
+	 * readfaults, page in another page into the resident set.  In
+	 * this case we need to retry getting rid of pages from core.
+	 */
+	paged_offset = 0;
+	while (backing_object->pager != NULL &&
+	    (paged_offset = vm_pager_next(backing_object->pager,
+	    paged_offset)) < backing_object->size) {
+		offset = paged_offset - backing_offset;
 
 		/*
-		 * What's left to do is to find all paged out
-		 * pages in the backing pager and either discard
-		 * or move it to the front object.  We need to
-		 * recheck the resident page set as a pagein might
-		 * have given other threads the chance to, via
-		 * readfaults, page in another page into the
-		 * resident set.  In this case the outer loop must
-		 * get reentered.  That is also the case if some other
-		 * thread removes the front pager, a case that has
-		 * been seen...
+		 * If the parent object already has this page, delete it.
+		 * Otherwise, start a pagein.
 		 */
-		while (backing_object->memq.tqh_first == NULL &&
-		    backing_object->pager != NULL && object->pager != NULL &&
-		    (paged_offset = vm_pager_next(backing_object->pager,
-		     paged_offset)) < backing_object->size) {
+		if (((page = vm_page_lookup(object, offset)) == NULL) &&
+		    (object->pager == NULL ||
+		     !vm_pager_has_page(object->pager, offset))) {
+			vm_object_unlock(object);
+
 			/*
-			 * If the shadowing object has this page, get
-			 * rid of it from the backing pager.  Trust
-			 * the loop condition to get us out of here
-			 * quickly if we remove the last paged out page.
-			 *
-			 * XXX Would clustering several pages at a time
-			 * be a win in this situation?
+			 * First allocate a page and mark it busy so another
+			 * thread won't try to start another pagein.
 			 */
-			if (((page = vm_page_lookup(object,
-			    paged_offset - backing_offset)) == NULL ||
-			    (page->flags & PG_FAKE)) &&
-			    !vm_pager_has_page(object->pager,
-			    paged_offset - backing_offset)) {
-				/*
-				 * If a "fake" page was found, someone
-				 * may be waiting for it.  Wake her up
-				 * and then remove the page.
-				 */
-				if (page) {
-					PAGE_WAKEUP(page);
-					vm_page_lock_queues();
-					vm_page_free(page);
-					vm_page_unlock_queues();
-				}
-				/*
-				 * Suck the page from the pager and give
-				 * it to the shadowing object.
-				 */
+			backing_page = vm_page_alloc(backing_object,
+			    paged_offset);
+			if (backing_page == NULL) {
+				vm_object_unlock(backing_object);
+				VM_WAIT;
+				vm_object_lock(backing_object);
+				goto retry;
+			}
+			backing_page->flags |= PG_BUSY;
+
 #ifdef DEBUG
-				if (vmdebug & VMDEBUG_COLLAPSE_PAGEIN)
-					printf("vm_object_collapse_aux: "
-					    "pagein needed\n");
+			if (vmdebug & VMDEBUG_COLLAPSE_PAGEIN)
+				printf("vm_object_overlay: pagein needed\n");
 #endif
 
-				/*
-				 * First allocate a page and mark it
-				 * busy so another thread won't try
-				 * to start another pagein.
-				 */
-				for (;;) {
-					backing_page =
-					    vm_page_alloc(backing_object,
-					    paged_offset);
-					if (backing_page)
-						break;
-					VM_WAIT;
-				}
-				backing_page->flags |= PG_BUSY;
+			/*
+			 * Second, start paging it in.  If this fails,
+			 * what can we do but punt?
+			 */
+			vm_object_paging_begin(backing_object);
+			vm_object_unlock(backing_object);
+			cnt.v_pageins++;
+			rv = vm_pager_get_pages(backing_object->pager,
+			    &backing_page, 1, TRUE);
+			vm_object_lock(backing_object);
+			vm_object_paging_end(backing_object);
 
-				/*
-				 * Second, start paging it in.  If this
-				 * fails, what can we do but punt?
-				 * Even though the shadowing object
-				 * isn't exactly paging we say so in
-				 * order to not get simultaneous
-				 * cascaded collapses.
-				 */
-				object->paging_in_progress++;
-				backing_object->paging_in_progress++;
-				if (vm_pager_get_pages(backing_object->pager,
-				    &backing_page, 1, TRUE) != VM_PAGER_OK) {
+			/*
+			 * IO error or page outside the range of the pager:
+			 * cleanup and return an error.
+			 */
+			if (rv == VM_PAGER_ERROR || rv == VM_PAGER_BAD) {
+				FREE_PAGE(backing_page);
+				goto fail;
+			}
+
+			/* Handle the remaining failures.  */
+			if (rv != VM_PAGER_OK) {
 #ifdef DIAGNOSTIC
-					panic("vm_object_collapse_aux: "
-					    "could not get paged out page");
+				panic("vm_object_overlay: pager returned %d",
+				    rv);
+#else
+				FREE_PAGE(backing_page);
+				goto fail;
 #endif
-					return KERN_FAILURE;
-				}
-				cnt.v_pgpgin++;
-
-				/*
-				 * A fault might have issued other
-				 * pagein operations.  We must wait for
-				 * them to complete, then we get to
-				 * wakeup potential other waiters as
-				 * well.
-				 */
-				while (backing_object->paging_in_progress != 1
-				    || object->paging_in_progress != 1) {
-					if (object->paging_in_progress != 1) {
-						vm_object_sleep(object, object,
-						    FALSE);
-						vm_object_lock(object);
-						continue;
-					}
-					vm_object_sleep(backing_object,
-					    backing_object, FALSE);
-					vm_object_lock(backing_object);
-				}
-				backing_object->paging_in_progress--;
-				object->paging_in_progress--;
-				thread_wakeup(backing_object);
-				thread_wakeup(object);
-
-				/*
-				 * During the pagein vm_object_terminate
-				 * might have slept on our front object in
-				 * order to remove it.  If this is the
-				 * case, we might as well stop all the
-				 * collapse work right here.
-				 */
-				if (object->flags & OBJ_FADING) {
-					PAGE_WAKEUP(backing_page);
-					return KERN_FAILURE;
-				}
-
-				/*
-				 * Third, relookup in case pager changed
-				 * page.  Pager is responsible for
-				 * disposition of old page if moved.
-				 */
-				backing_page = vm_page_lookup(backing_object,
-				    paged_offset);
+			}
+			cnt.v_pgpgin++;
 
-				/*
-				 * This page was once dirty, otherwise
-				 * it hadn't been paged out in this
-				 * shadow object.  As we now remove the
-				 * persistant store of the page, make
-				 * sure it will be paged out in the
-				 * front pager by dirtying it.
-				 */
-				backing_page->flags &= ~(PG_FAKE|PG_CLEAN);
+			/*
+			 * Third, relookup in case pager changed page.  Pager
+			 * is responsible for disposition of old page if moved.
+			 */
+			backing_page = vm_page_lookup(backing_object,
+			    paged_offset);
 
-				/*
-				 * Fourth, move it up front, and wake up
-				 * potential waiters.
-				 */
-				vm_page_rename(backing_page, object,
-				    paged_offset - backing_offset);
-				PAGE_WAKEUP(backing_page);
+			/*
+			 * This page was once dirty, otherwise it
+			 * hadn't been paged out in this shadow object.
+			 * As we now remove the persistant store of the
+			 * page, make sure it will be paged out in the
+			 * front pager by dirtying it.
+			 */
+			backing_page->flags &= ~(PG_FAKE | PG_CLEAN);
 
-			}
-			vm_object_remove_from_pager(backing_object,
-			    paged_offset, paged_offset + PAGE_SIZE);
-			paged_offset += PAGE_SIZE;
+			/*
+			 * Fourth, restart the process as we have slept,
+			 * thereby letting other threads change object's
+			 * internal structure.  Don't be tempted to move it up
+			 * front here, the parent may be gone already.
+			 */
+			PAGE_WAKEUP(backing_page);
+			goto retry;
 		}
+		vm_object_remove_from_pager(backing_object, paged_offset, 
+		    paged_offset + PAGE_SIZE);
+		paged_offset += PAGE_SIZE;
 	}
 
+done:
 	/*
-	 * I've seen this condition once in an out of VM situation.
-	 * For the moment I don't know why it occurred, although I suspect
-	 * vm_object_page_clean can create a pager even if it won't use
-	 * it.
+	 * I've seen this condition once in an out of VM situation.  For the
+	 * moment I don't know why it occurred, although I suspect
+	 * vm_object_page_clean can create a pager even if it won't use it.
 	 */
 	if (backing_object->pager != NULL &&
 	    vm_pager_count(backing_object->pager) == 0) {
@@ -1404,37 +1408,32 @@ vm_object_collapse_aux(object)
 
 #ifdef DIAGNOSTIC
 	if (backing_object->pager)
-		panic("vm_object_collapse_aux: backing_object->pager remains");
+		panic("vm_object_overlay: backing_object->pager remains");
 #endif
 
 	/*
 	 * Object now shadows whatever backing_object did.
-	 * Note that the reference to backing_object->shadow
-	 * moves from within backing_object to within object.
 	 */
-	if(backing_object->shadow)
+	if (backing_object->shadow)
 		vm_object_lock(backing_object->shadow);
 	vm_object_set_shadow(object, backing_object->shadow);
-	if(backing_object->shadow) {
-		vm_object_set_shadow(backing_object, NULL);
+	if (backing_object->shadow)
 		vm_object_unlock(backing_object->shadow);
-	}
 	object->shadow_offset += backing_object->shadow_offset;
 	if (object->shadow != NULL && object->shadow->copy != NULL)
-		panic("vm_object_collapse_aux: we collapsed a copy-object!");
-
-	/* Fast cleanup is the only thing left now.	*/
-	vm_object_unlock(backing_object);
+		panic("vm_object_overlay: we collapsed a copy-object!");
 
-	simple_lock(&vm_object_list_lock);
-	TAILQ_REMOVE(&vm_object_list, backing_object, object_list);
-	vm_object_count--;
-	simple_unlock(&vm_object_list_lock);
-
-	free((caddr_t)backing_object, M_VMOBJ);
+#ifdef DIAGNOSTIC
+	if (backing_object->ref_count != 1)
+		panic("vm_object_overlay: backing_object still referenced");
+#endif
 
 	object_collapses++;
 	return KERN_SUCCESS;
+
+fail:
+	backing_object->flags &= ~OBJ_FADING;
+	return KERN_FAILURE;
 }
 
 /*
@@ -1444,91 +1443,100 @@ vm_object_collapse_aux(object)
  * the object with its backing one is not allowed but there may
  * be an opportunity to bypass the backing object and shadow the
  * next object in the chain instead.
+ *
+ * If all of the pages in the backing object are shadowed by the parent
+ * object, the parent object no longer has to shadow the backing
+ * object; it can shadow the next one in the chain.
  */
 int
 vm_object_bypass(object)
 	vm_object_t	object;
 {
-	register vm_object_t	backing_object = object->shadow;
-	register vm_offset_t	backing_offset = object->shadow_offset;
-	register vm_offset_t	new_offset;
-	register vm_page_t	p, pp;
+	vm_object_t	backing_object = object->shadow;
+	vm_offset_t	backing_offset = object->shadow_offset;
+	vm_offset_t	offset, new_offset;
+	vm_page_t	p, pp;
 
 	/*
-	 * If all of the pages in the backing object are
-	 * shadowed by the parent object, the parent
-	 * object no longer has to shadow the backing
-	 * object; it can shadow the next one in the
-	 * chain.
-	 *
-	 * The backing object must not be paged out - we'd
-	 * have to check all of the paged-out pages, as
-	 * well.
+	 * XXX Punt if paging is going on.  The issues in this case need to be
+	 * looked into more closely.  For now play it safe and return.  There's
+	 * no need to wait for it to end, as the expense will be much higher
+	 * than the gain.
 	 */
-
-	if (backing_object->pager != NULL)
+	if (vm_object_paging(backing_object))
 		return KERN_FAILURE;
 
 	/*
-	 * Should have a check for a 'small' number
-	 * of pages here.
+	 * Should have a check for a 'small' number of pages here.
 	 */
-
 	for (p = backing_object->memq.tqh_first; p != NULL;
 	    p = p->listq.tqe_next) {
 		new_offset = p->offset - backing_offset;
 
 		/*
-		 * If the parent has a page here, or if
-		 * this page falls outside the parent,
-		 * keep going.
+		 * If the parent has a page here, or if this page falls outside
+		 * the parent, keep going.
 		 *
-		 * Otherwise, the backing_object must be
-		 * left in the chain.
+		 * Otherwise, the backing_object must be left in the chain.
 		 */
-
 		if (p->offset >= backing_offset && new_offset < object->size &&
 		    ((pp = vm_page_lookup(object, new_offset)) == NULL ||
-		    (pp->flags & PG_FAKE))) {
+		    (pp->flags & PG_FAKE)) &&
+		    (object->pager == NULL ||
+		    !vm_pager_has_page(object->pager, new_offset)))
 			/*
 			 * Page still needed.  Can't go any further.
 			 */
 			return KERN_FAILURE;
+	}
+
+	if (backing_object->pager) {
+		/*
+		 * Should have a check for a 'small' number of pages here.
+		 */
+		for (offset = vm_pager_next(backing_object->pager, 0);
+		    offset < backing_object->size;
+		    offset = vm_pager_next(backing_object->pager,
+		    offset + PAGE_SIZE)) {
+			new_offset = offset - backing_offset;
+
+			/*
+			 * If the parent has a page here, or if this page falls
+			 * outside the parent, keep going.
+			 *
+			 * Otherwise, the backing_object must be left in the
+			 * chain.
+			 */
+			if (offset >= backing_offset &&
+			    new_offset < object->size &&
+			    ((pp = vm_page_lookup(object, new_offset)) ==
+			    NULL || (pp->flags & PG_FAKE)) &&
+			    (object->pager == NULL ||
+			    !vm_pager_has_page(object->pager, new_offset)))
+				/*
+				 * Page still needed.  Can't go any further.
+				 */
+				return KERN_FAILURE;
 		}
 	}
 
 	/*
-	 * Make the parent shadow the next object
-	 * in the chain.  Deallocating backing_object
-	 * will not remove it, since its reference
-	 * count is at least 2.
+	 * Object now shadows whatever backing_object did.
 	 */
-
-	vm_object_lock(object->shadow);
 	if (backing_object->shadow)
 		vm_object_lock(backing_object->shadow);
 	vm_object_set_shadow(object, backing_object->shadow);
 	if (backing_object->shadow)
 		vm_object_unlock(backing_object->shadow);
-	vm_object_reference(object->shadow);
-	vm_object_unlock(object->shadow);
 	object->shadow_offset += backing_object->shadow_offset;
 
 	/*
-	 * Backing object might have had a copy pointer
-	 * to us.  If it did, clear it. 
+	 * Backing object might have had a copy pointer to us.  If it did,
+	 * clear it. 
 	 */
-
 	if (backing_object->copy == object)
 		backing_object->copy = NULL;
 
-	/* Drop the reference count on backing_object.
-	 * Since its ref_count was at least 2, it
-	 * will not vanish; so we don't need to call
-	 * vm_object_deallocate.
-	 */
-	backing_object->ref_count--;
-	vm_object_unlock(backing_object);
 	object_bypasses++;
 	return KERN_SUCCESS;
 }
@@ -1536,65 +1544,70 @@ vm_object_bypass(object)
 /*
  * vm_object_collapse:
  *
- * Collapse an object with the object backing it.
- * Pages in the backing object are moved into the
- * parent, and the backing object is deallocated.
- *
- * Requires that the object be locked and the page
- * queues be unlocked.
+ * Collapse an object with the object backing it.  Pages in the backing object
+ * are moved into the parent, and the backing object is deallocated.
  *
+ * Requires that the object be locked and the page queues be unlocked.
  */
 void
 vm_object_collapse(object)
-	register vm_object_t	object;
+	vm_object_t object;
 
+{
+	vm_object_collapse_internal(object, NULL);
+}
+
+/*
+ * An internal to vm_object.c entry point to the collapsing logic, used by
+ * vm_object_deallocate to get rid of a potential recursion case.  In that case
+ * an object to be deallocated is fed back via the retry_object pointer.
+ * External users will have that parameter wired to NULL, and then we are
+ * allowed to do vm_object_deallocate calls that may mutually recursive call us
+ * again.  In that case it will only get one level deep and thus not be a real
+ * recursion.
+ */
+void
+vm_object_collapse_internal(object, retry_object)
+	vm_object_t	object, *retry_object;
 {
 	register vm_object_t	backing_object;
+	int			rv;
+
+	/* We'd better initialize this one if the pointer is given.  */
+	if (retry_object)
+		*retry_object = NULL;
 
-	if (!vm_object_collapse_allowed)
+	if (!vm_object_collapse_allowed || object == NULL)
 		return;
 
-	while (TRUE) {
+	do {
 		/*
 		 * Verify that the conditions are right for collapse:
 		 *
-		 * The object exists and no pages in it are currently
-		 * being paged out.
+		 * There is a backing object, and
 		 */
-		if (object == NULL || object->paging_in_progress)
-			return;
-
-		/*
-		 * 	There is a backing object, and
-		 */
-	
 		if ((backing_object = object->shadow) == NULL)
 			return;
-	
+
 		vm_object_lock(backing_object);
+
 		/*
-		 * ...
-		 * 	The backing object is not read_only,
-		 * 	and no pages in the backing object are
-		 * 	currently being paged out.
-		 * 	The backing object is internal.
+		 * ... the backing object is not read_only, is internal and is
+		 * not already being collapsed, ...
 		 */
-	
-		if ((backing_object->flags & OBJ_INTERNAL) == 0 ||
-		    backing_object->paging_in_progress != 0) {
+		if ((backing_object->flags & (OBJ_INTERNAL | OBJ_FADING)) !=
+		    OBJ_INTERNAL) {
 			vm_object_unlock(backing_object);
 			return;
 		}
 	
 		/*
-		 * The backing object can't be a copy-object:
-		 * the shadow_offset for the copy-object must stay
-		 * as 0.  Furthermore (for the 'we have all the
-		 * pages' case), if we bypass backing_object and
-		 * just shadow the next object in the chain, old
-		 * pages from that object would then have to be copied
-		 * BOTH into the (former) backing_object and into the
-		 * parent object.
+		 * The backing object can't be a copy-object: the shadow_offset
+		 * for the copy-object must stay as 0.  Furthermore (for the
+		 * we have all the pages' case), if we bypass backing_object
+		 * and just shadow the next object in the chain, old pages from
+		 * that object would then have to be copied BOTH into the
+		 *(former) backing_object and into the parent object.
 		 */
 		if (backing_object->shadow != NULL &&
 		    backing_object->shadow->copy != NULL) {
@@ -1603,26 +1616,50 @@ vm_object_collapse(object)
 		}
 
 		/*
-		 * If there is exactly one reference to the backing
-		 * object, we can collapse it into the parent,
-		 * otherwise we might be able to bypass it completely.
+		 * Grab a reference to the backing object so that it
+		 * can't be deallocated behind our back.
 		 */
-	
-		if (backing_object->ref_count == 1) {
-			if (vm_object_collapse_aux(object) != KERN_SUCCESS) {
-				vm_object_unlock(backing_object);
-				return;
-			}
-		} else
-			if (vm_object_bypass(object) != KERN_SUCCESS) {
-				vm_object_unlock(backing_object);
-				return;
-			}
+		backing_object->ref_count++;
+
+#ifdef DIAGNOSTIC
+		if (backing_object->ref_count == 1)
+			panic("vm_object_collapse: "
+			    "collapsing unreferenced object");
+#endif
+
+		/*
+		 * If there is exactly one reference to the backing object, we
+		 * can collapse it into the parent, otherwise we might be able
+		 * to bypass it completely.
+		 */
+		rv = backing_object->ref_count == 2 ?
+		    vm_object_overlay(object) : vm_object_bypass(object);
+
+		/*
+		 * Unlock and note we're ready with the backing object.  If
+		 * we are now the last referrer this will also deallocate the
+		 * object itself.  If the backing object has been orphaned
+		 * and still have a shadow (it is possible in case of
+		 * KERN_FAILURE from vm_object_overlay) this might lead to a
+		 * recursion.  However, if we are called from
+		 * vm_object_deallocate, retry_object is not NULL and we are
+		 * allowed to feedback the current backing object via that
+		 * pointer.  That way the recursion case turns into an
+		 * iteration in vm_object_deallcate instead.
+		 */
+		if (retry_object != NULL && backing_object->ref_count == 1 &&
+		    backing_object->shadow != NULL) {
+			*retry_object = backing_object;
+			vm_object_unlock(backing_object);
+			return;
+		}
+		vm_object_unlock(backing_object);
+		vm_object_deallocate(backing_object);
 
 		/*
 		 * Try again with this object's new backing object.
 		 */
-	}
+	} while (rv == KERN_SUCCESS);
 }
 
 /*
@@ -1658,30 +1695,28 @@ vm_object_page_remove(object, start, end)
 /*
  * Routine:	vm_object_coalesce
  * Function:	Coalesces two objects backing up adjoining
- * 		regions of memory into a single object.
+ *		regions of memory into a single object.
  *
  * returns TRUE if objects were combined.
  *
- * NOTE:	Only works at the moment if the second object is NULL -
- * 	if it's not, which object do we lock first?
+ * NOTE: Only works at the moment if the second object is NULL -
+ *	 if it's not, which object do we lock first?
  *
  * Parameters:
- * 	prev_object	First object to coalesce
- * 	prev_offset	Offset into prev_object
- * 	next_object	Second object into coalesce
- * 	next_offset	Offset into next_object
+ *	prev_object	First object to coalesce
+ *	prev_offset	Offset into prev_object
+ *	next_object	Second object into coalesce
+ *	next_offset	Offset into next_object
  *
- * 	prev_size	Size of reference to prev_object
- * 	next_size	Size of reference to next_object
+ *	prev_size	Size of reference to prev_object
+ *	next_size	Size of reference to next_object
  *
  * Conditions:
  * The object must *not* be locked.
  */
 boolean_t
-vm_object_coalesce(prev_object, next_object,
-			prev_offset, next_offset,
-			prev_size, next_size)
-
+vm_object_coalesce(prev_object, next_object, prev_offset, next_offset,
+    prev_size, next_size)
 	register vm_object_t	prev_object;
 	vm_object_t	next_object;
 	vm_offset_t	prev_offset, next_offset;
@@ -1718,7 +1753,7 @@ vm_object_coalesce(prev_object, next_object,
 	 * prev_entry may be in use anyway)
 	 */
 
-	if (prev_object->ref_count > 1 || prev_object->pager != NULL ||
+	if (prev_object->ref_count > 1 ||  prev_object->pager != NULL ||
 	    prev_object->shadow != NULL || prev_object->copy != NULL) {
 		vm_object_unlock(prev_object);
 		return(FALSE);
@@ -1728,7 +1763,6 @@ vm_object_coalesce(prev_object, next_object,
 	 * Remove any pages that may still be in the object from
 	 * a previous deallocation.
 	 */
-
 	vm_object_page_remove(prev_object, prev_offset + prev_size,
 	    prev_offset + prev_size + next_size);
 
@@ -1769,23 +1803,22 @@ _vm_object_print(object, full, pr)
 	if (object == NULL)
 		return;
 
-	iprintf(pr, "Object 0x%lx: size=0x%lx, res=%d, ref=%d, ",
-		(long)object, (long)object->size,
-		object->resident_page_count, object->ref_count);
-	(*pr)("pager=0x%lx+0x%lx, shadow=(0x%lx)+0x%lx\n",
-	       (long)object->pager, (long)object->paging_offset,
-	       (long)object->shadow, (long)object->shadow_offset);
+	iprintf(pr, "Object 0x%p: size=0x%lx, res=%d, ref=%d, ", object,
+	    (long)object->size, object->resident_page_count,
+	    object->ref_count);
+	(*pr)("pager=0x%p+0x%lx, shadow=(0x%p)+0x%lx\n", object->pager,
+	    (long)object->paging_offset, object->shadow,
+	    (long)object->shadow_offset);
 	(*pr)("shadowers=(");
 	delim = "";
 	for (o = object->shadowers.lh_first; o;
 	    o = o->shadowers_list.le_next) {
-		(*pr)("%s0x%x", delim, o);
+		(*pr)("%s0x%p", delim, o);
 		delim = ", ";
 	};
 	(*pr)(")\n");
-	(*pr)("cache: next=0x%lx, prev=0x%lx\n",
-	       (long)object->cached_list.tqe_next,
-	       (long)object->cached_list.tqe_prev);
+	(*pr)("cache: next=0x%p, prev=0x%p\n", object->cached_list.tqe_next,
+	    object->cached_list.tqe_prev);
 
 	if (!full)
 		return;
@@ -1803,7 +1836,8 @@ _vm_object_print(object, full, pr)
 			(*pr)(",");
 		count++;
 
-		(*pr)("(off=0x%x,page=0x%x)", p->offset, VM_PAGE_TO_PHYS(p));
+		(*pr)("(off=0x%lx,page=0x%lx)", (long)p->offset,
+		    (long)VM_PAGE_TO_PHYS(p));
 	}
 	if (count != 0)
 		(*pr)("\n");
@@ -1813,11 +1847,10 @@ _vm_object_print(object, full, pr)
 /*
  * vm_object_set_shadow:
  *
- * Maintain the shadow graph so that back-link consistency is
- * always kept.
+ * Maintain the shadow graph so that back-link consistency is always kept.
  *
- * Assumes both objects as well as the old shadow to be locked
- * (unless NULL of course).
+ * Assumes both objects as well as the old shadow to be locked (unless NULL
+ * of course).
  */
 void
 vm_object_set_shadow(object, shadow)
@@ -1827,8 +1860,8 @@ vm_object_set_shadow(object, shadow)
 
 #ifdef DEBUG
 	if (vmdebug & VMDEBUG_SHADOW)
-		printf("vm_object_set_shadow(object=0x%x, shadow=0x%x) "
-		    "old_shadow=0x%x\n", object, shadow, old_shadow);
+		printf("vm_object_set_shadow(object=0x%p, shadow=0x%p) "
+		    "old_shadow=0x%p\n", object, shadow, old_shadow);
 	if (vmdebug & VMDEBUG_SHADOW_VERBOSE) {
 		vm_object_print(object, 0);
 		vm_object_print(old_shadow, 0);
@@ -1838,9 +1871,11 @@ vm_object_set_shadow(object, shadow)
 	if (old_shadow == shadow)
 		return;
 	if (old_shadow) {
+		old_shadow->ref_count--;
 		LIST_REMOVE(object, shadowers_list);
 	}
 	if (shadow) {
+		shadow->ref_count++;
 		LIST_INSERT_HEAD(&shadow->shadowers, object, shadowers_list);
 	}
 	object->shadow = shadow;
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index be739e1fb11..370bab3920b 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_object.h,v 1.4 1996/12/24 20:14:32 niklas Exp $	*/
+/*	$OpenBSD: vm_object.h,v 1.5 1997/04/17 01:25:21 niklas Exp $	*/
 /*	$NetBSD: vm_object.h,v 1.16 1995/03/29 22:10:28 briggs Exp $	*/
 
 /* 
@@ -111,6 +111,7 @@ struct vm_object {
 #define OBJ_INTERNAL	0x0002	/* internally created object */
 #define OBJ_ACTIVE	0x0004	/* used to mark active objects */
 #define OBJ_FADING	0x0008	/* tell others that the object is going away */
+#define OBJ_WAITING	0x8000	/* someone is waiting for paging to finish */
 
 TAILQ_HEAD(vm_object_hash_head, vm_object_hash_entry);
 
@@ -144,8 +145,53 @@ vm_object_t	kmem_object;
 #define	vm_object_lock(object)		simple_lock(&(object)->Lock)
 #define	vm_object_unlock(object)	simple_unlock(&(object)->Lock)
 #define	vm_object_lock_try(object)	simple_lock_try(&(object)->Lock)
-#define	vm_object_sleep(event, object, interruptible) \
-			thread_sleep((event), &(object)->Lock, (interruptible))
+
+#define	vm_object_sleep(event, object, interruptible, where) \
+	do {								\
+		(object)->flags |= OBJ_WAITING;				\
+		thread_sleep_msg((event), &(object)->Lock,		\
+		    (interruptible), (where));				\
+	} while (0)
+
+#define	vm_object_wakeup(object) \
+	do {								\
+		if ((object)->flags & OBJ_WAITING) {			\
+			(object)->flags &= ~OBJ_WAITING;		\
+			thread_wakeup((object));			\
+		}							\
+	} while (0)
+
+#define	vm_object_paging(object) \
+	((object)->paging_in_progress != 0)
+
+#ifndef DIAGNOSTIC
+#define	vm_object_paging_begin(object) \
+	do {								\
+		(object)->paging_in_progress++;				\
+	} while (0)
+#else
+#define	vm_object_paging_begin(object) \
+	do {								\
+		if ((object)->paging_in_progress == 0xdead)		\
+			panic("vm_object_paging_begin");		\
+		(object)->paging_in_progress++;				\
+	} while (0)
+#endif
+
+#define	vm_object_paging_end(object) \
+	do {								\
+		if (--((object)->paging_in_progress) == 0)		\
+			vm_object_wakeup((object));			\
+	} while (0)
+
+#define	vm_object_paging_wait(object) \
+	do {								\
+		while (vm_object_paging((object))) {			\
+			vm_object_sleep((object), (object), FALSE,	\
+			    "vospgw");					\
+			vm_object_lock((object));			\
+		}							\
+	} while (0)
 
 #ifdef _KERNEL
 vm_object_t	 vm_object_allocate __P((vm_size_t));
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 37131d3c021..c3c0bf6e460 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -1,4 +1,4 @@
-/*    $OpenBSD: vm_page.c,v 1.4 1997/01/04 14:17:30 niklas Exp $    */
+/*    $OpenBSD: vm_page.c,v 1.5 1997/04/17 01:25:21 niklas Exp $    */
 /*    $NetBSD: vm_page.c,v 1.28 1996/02/05 01:54:05 christos Exp $    */
 
 /* 
@@ -36,7 +36,7 @@
  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  * SUCH DAMAGE.
  *
- *	@(#)vm_page.c	8.3 (Berkeley) 3/21/94
+ * @(#)vm_page.c	8.3 (Berkeley) 3/21/94
  *
  *
  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
@@ -66,7 +66,7 @@
  */
 
 /*
- *	Resident memory management module.
+ * Resident memory management module.
  */
 
 #include <sys/param.h>
@@ -82,17 +82,17 @@
 
 #ifdef MACHINE_NONCONTIG
 /*
- *	These variables record the values returned by vm_page_bootstrap,
- *	for debugging purposes.  The implementation of pmap_steal_memory
- *	and pmap_startup here also uses them internally.
+ * These variables record the values returned by vm_page_bootstrap,
+ * for debugging purposes.  The implementation of pmap_steal_memory
+ * and pmap_startup here also uses them internally.
  */
 vm_offset_t	virtual_space_start;
 vm_offset_t	virtual_space_end;
 #endif /* MACHINE_NONCONTIG */
 
 /*
- *	Associated with page of user-allocatable memory is a
- *	page structure.
+ * Associated with page of user-allocatable memory is a
+ * page structure.
  */
 
 struct pglist	*vm_page_buckets;		/* Array of buckets */
@@ -123,13 +123,13 @@ vm_size_t	page_mask;
 int		page_shift;
 
 /*
- *	vm_set_page_size:
+ * vm_set_page_size:
  *
- *	Sets the page size, perhaps based upon the memory
- *	size.  Must be called before any use of page-size
- *	dependent functions.
+ * Sets the page size, perhaps based upon the memory
+ * size.  Must be called before any use of page-size
+ * dependent functions.
  *
- *	Sets page_shift and page_mask from cnt.v_page_size.
+ * Sets page_shift and page_mask from cnt.v_page_size.
  */
 void
 vm_set_page_size()
@@ -148,14 +148,14 @@ vm_set_page_size()
 
 #ifdef	MACHINE_NONCONTIG
 /*
- *	vm_page_bootstrap:
+ * vm_page_bootstrap:
  *
- *	Initializes the resident memory module.
+ * Initializes the resident memory module.
  *
- *	Allocates memory for the page cells, and
- *	for the object/offset-to-page hash table headers.
- *	Each page cell is initialized and placed on the free list.
- *	Returns the range of available kernel virtual memory.
+ * Allocates memory for the page cells, and
+ * for the object/offset-to-page hash table headers.
+ * Each page cell is initialized and placed on the free list.
+ * Returns the range of available kernel virtual memory.
  */
 void
 vm_page_bootstrap(startp, endp)
@@ -170,55 +170,50 @@ vm_page_bootstrap(startp, endp)
 
 
 	/*
-	 *	Initialize the locks
+	 * Initialize the locks
 	 */
-
 	simple_lock_init(&vm_page_queue_free_lock);
 	simple_lock_init(&vm_page_queue_lock);
 
 	/*
-	 *	Initialize the queue headers for the free queue,
-	 *	the active queue and the inactive queue.
+	 * Initialize the queue headers for the free queue,
+	 * the active queue and the inactive queue.
 	 */
-
 	TAILQ_INIT(&vm_page_queue_free);
 	TAILQ_INIT(&vm_page_queue_active);
 	TAILQ_INIT(&vm_page_queue_inactive);
 
 	/*
-	 *	Pre-allocate maps and map entries that cannot be dynamically
-	 *	allocated via malloc().  The maps include the kernel_map and
-	 *	kmem_map which must be initialized before malloc() will
-	 *	work (obviously).  Also could include pager maps which would
-	 *	be allocated before kmeminit.
+	 * Pre-allocate maps and map entries that cannot be dynamically
+	 * allocated via malloc().  The maps include the kernel_map and
+	 * kmem_map which must be initialized before malloc() will
+	 * work (obviously).  Also could include pager maps which would
+	 * be allocated before kmeminit.
 	 *
-	 *	Allow some kernel map entries... this should be plenty
-	 *	since people shouldn't be cluttering up the kernel
-	 *	map (they should use their own maps).
+	 * Allow some kernel map entries... this should be plenty
+	 * since people shouldn't be cluttering up the kernel
+	 * map (they should use their own maps).
 	 */
-
 	kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
 				      MAX_KMAPENT*sizeof(struct vm_map_entry));
 	kentry_data = (vm_offset_t) pmap_steal_memory(kentry_data_size);
 	
 	/*
-	 *	Validate these zone addresses.
+	 * Validate these zone addresses.
 	 */
-
 	bzero((caddr_t) kentry_data, kentry_data_size);
 
 	/*
-	 *	Allocate (and initialize) the virtual-to-physical
-	 *	table hash buckets.
+	 * Allocate (and initialize) the virtual-to-physical
+	 * table hash buckets.
 	 *
-	 *	The number of buckets MUST BE a power of 2, and
-	 *	the actual value is the next power of 2 greater
-	 *	than the number of physical pages in the system.
+	 * The number of buckets MUST BE a power of 2, and
+	 * the actual value is the next power of 2 greater
+	 * than the number of physical pages in the system.
 	 *
-	 *	Note:
-	 *		This computation can be tweaked if desired.
+	 * Note:
+	 * 	This computation can be tweaked if desired.
 	 */
-
 	if (vm_page_bucket_count == 0) {
 		unsigned int npages = pmap_free_pages();
 	    
@@ -241,13 +236,12 @@ vm_page_bootstrap(startp, endp)
 	simple_lock_init(&bucket_lock);
 
 	/*
-	 *	Machine-dependent code allocates the resident page table.
-	 *	It uses VM_PAGE_INIT to initialize the page frames.
-	 *	The code also returns to us the virtual space available
-	 *	to the kernel.  We don't trust the pmap module
-	 *	to get the alignment right.
+	 * Machine-dependent code allocates the resident page table.
+	 * It uses VM_PAGE_INIT to initialize the page frames.
+	 * The code also returns to us the virtual space available
+	 * to the kernel.  We don't trust the pmap module
+	 * to get the alignment right.
 	 */
-	
 	pmap_startup(&virtual_space_start, &virtual_space_end);
 	virtual_space_start = round_page(virtual_space_start);
 	virtual_space_end = trunc_page(virtual_space_end);
@@ -261,13 +255,13 @@ vm_page_bootstrap(startp, endp)
 #else	/* MACHINE_NONCONTIG */
 
 /*
- *	vm_page_startup:
+ * vm_page_startup:
  *
- *	Initializes the resident memory module.
+ * Initializes the resident memory module.
  *
- *	Allocates memory for the page cells, and
- *	for the object/offset-to-page hash table headers.
- *	Each page cell is initialized and placed on the free list.
+ * Allocates memory for the page cells, and
+ * for the object/offset-to-page hash table headers.
+ * Each page cell is initialized and placed on the free list.
  */
 void
 vm_page_startup(start, end)
@@ -284,32 +278,29 @@ vm_page_startup(start, end)
 
 
 	/*
-	 *	Initialize the locks
+	 * Initialize the locks
 	 */
-
 	simple_lock_init(&vm_page_queue_free_lock);
 	simple_lock_init(&vm_page_queue_lock);
 
 	/*
-	 *	Initialize the queue headers for the free queue,
-	 *	the active queue and the inactive queue.
+	 * Initialize the queue headers for the free queue,
+	 * the active queue and the inactive queue.
 	 */
-
 	TAILQ_INIT(&vm_page_queue_free);
 	TAILQ_INIT(&vm_page_queue_active);
 	TAILQ_INIT(&vm_page_queue_inactive);
 
 	/*
-	 *	Calculate the number of hash table buckets.
+	 * Calculate the number of hash table buckets.
 	 *
-	 *	The number of buckets MUST BE a power of 2, and
-	 *	the actual value is the next power of 2 greater
-	 *	than the number of physical pages in the system.
+	 * The number of buckets MUST BE a power of 2, and
+	 * the actual value is the next power of 2 greater
+	 * than the number of physical pages in the system.
 	 *
-	 *	Note:
-	 *		This computation can be tweaked if desired.
+	 * Note:
+	 * 	This computation can be tweaked if desired.
 	 */
-
 	if (vm_page_bucket_count == 0) {
 		vm_page_bucket_count = 1;
 		while (vm_page_bucket_count < atop(*end - *start))
@@ -319,7 +310,7 @@ vm_page_startup(start, end)
 	vm_page_hash_mask = vm_page_bucket_count - 1;
 
 	/*
-	 *	Allocate (and initialize) the hash table buckets.
+	 * Allocate (and initialize) the hash table buckets.
 	 */
 	vm_page_buckets = (struct pglist *)
 	    pmap_bootstrap_alloc(vm_page_bucket_count * sizeof(struct pglist));
@@ -333,41 +324,37 @@ vm_page_startup(start, end)
 	simple_lock_init(&bucket_lock);
 
 	/*
-	 *	Truncate the remainder of physical memory to our page size.
+	 * Truncate the remainder of physical memory to our page size.
 	 */
-
 	*end = trunc_page(*end);
 
 	/*
-	 *	Pre-allocate maps and map entries that cannot be dynamically
-	 *	allocated via malloc().  The maps include the kernel_map and
-	 *	kmem_map which must be initialized before malloc() will
-	 *	work (obviously).  Also could include pager maps which would
-	 *	be allocated before kmeminit.
+	 * Pre-allocate maps and map entries that cannot be dynamically
+	 * allocated via malloc().  The maps include the kernel_map and
+	 * kmem_map which must be initialized before malloc() will
+	 * work (obviously).  Also could include pager maps which would
+	 * be allocated before kmeminit.
 	 *
-	 *	Allow some kernel map entries... this should be plenty
-	 *	since people shouldn't be cluttering up the kernel
-	 *	map (they should use their own maps).
+	 * Allow some kernel map entries... this should be plenty
+	 * since people shouldn't be cluttering up the kernel
+	 * map (they should use their own maps).
 	 */
-
 	kentry_data_size = round_page(MAX_KMAP*sizeof(struct vm_map) +
 				      MAX_KMAPENT*sizeof(struct vm_map_entry));
 	kentry_data = (vm_offset_t) pmap_bootstrap_alloc(kentry_data_size);
 
 	/*
- 	 *	Compute the number of pages of memory that will be
-	 *	available for use (taking into account the overhead
-	 *	of a page structure per page).
+ 	 * Compute the number of pages of memory that will be
+	 * available for use (taking into account the overhead
+	 * of a page structure per page).
 	 */
-
 	cnt.v_free_count = npages = (*end - *start + sizeof(struct vm_page))
 		/ (PAGE_SIZE + sizeof(struct vm_page));
 
 	/*
-	 *	Record the extent of physical memory that the
-	 *	virtual memory system manages.
+	 * Record the extent of physical memory that the
+	 * virtual memory system manages.
 	 */
-
 	first_page = *start;
 	first_page += npages*sizeof(struct vm_page);
 	first_page = atop(round_page(first_page));
@@ -378,17 +365,15 @@ vm_page_startup(start, end)
 
 
 	/*
-	 *	Allocate and clear the mem entry structures.
+	 * Allocate and clear the mem entry structures.
 	 */
-
 	m = vm_page_array = (vm_page_t)
 		pmap_bootstrap_alloc(npages * sizeof(struct vm_page));
 
 	/*
-	 *	Initialize the mem entry structures now, and
-	 *	put them in the free queue.
+	 * Initialize the mem entry structures now, and
+	 * put them in the free queue.
 	 */
-
 	pa = first_phys_addr;
 	while (npages--) {
 		m->flags = 0;
@@ -400,8 +385,8 @@ vm_page_startup(start, end)
 	}
 
 	/*
-	 *	Initialize vm_pages_needed lock here - don't wait for pageout
-	 *	daemon	XXX
+	 * Initialize vm_pages_needed lock here - don't wait for pageout
+	 * daemon	XXX
 	 */
 	simple_lock_init(&vm_pages_needed_lock);
 
@@ -412,8 +397,8 @@ vm_page_startup(start, end)
 
 #if	defined(MACHINE_NONCONTIG) && !defined(MACHINE_PAGES)
 /*
- *	We implement pmap_steal_memory and pmap_startup with the help
- *	of two simpler functions, pmap_virtual_space and pmap_next_page.
+ * We implement pmap_steal_memory and pmap_startup with the help
+ * of two simpler functions, pmap_virtual_space and pmap_next_page.
  */
 vm_offset_t
 pmap_steal_memory(size)
@@ -427,22 +412,22 @@ pmap_steal_memory(size)
 #endif
 
 	/*
-	 *	We round the size to an integer multiple.
+	 * We round the size to an integer multiple.
 	 */
 	
 	size = (size + 3) &~ 3; /* XXX */
 	
 	/*
-	 *	If this is the first call to pmap_steal_memory,
-	 *	we have to initialize ourself.
+	 * If this is the first call to pmap_steal_memory,
+	 * we have to initialize ourself.
 	 */
 	
 	if (virtual_space_start == virtual_space_end) {
 		pmap_virtual_space(&virtual_space_start, &virtual_space_end);
 		
 		/*
-		 *	The initial values must be aligned properly, and
-		 *	we don't trust the pmap module to do it right.
+		 * The initial values must be aligned properly, and
+		 * we don't trust the pmap module to do it right.
 		 */
 		
 		virtual_space_start = round_page(virtual_space_start);
@@ -450,14 +435,14 @@ pmap_steal_memory(size)
 	}
 	
 	/*
-	 *	Allocate virtual memory for this request.
+	 * Allocate virtual memory for this request.
 	 */
 	
 	addr = virtual_space_start;
 	virtual_space_start += size;
 	
 	/*
-	 *	Allocate and map physical pages to back new virtual pages.
+	 * Allocate and map physical pages to back new virtual pages.
 	 */
 	
 	for (vaddr = round_page(addr);
@@ -467,8 +452,8 @@ pmap_steal_memory(size)
 			panic("pmap_steal_memory");
 		
 		/*
-		 *	XXX Logically, these mappings should be wired,
-		 *	but some pmap modules barf if they are.
+		 * XXX Logically, these mappings should be wired,
+		 * but some pmap modules barf if they are.
 		 */
 		
 		pmap_enter(pmap_kernel(), vaddr, paddr,
@@ -491,25 +476,24 @@ pmap_startup(startp, endp)
 	 * and then allocate the page structures in one chunk.
 	 * The calculation is non-trivial.  We want:
 	 *
-	 *	vmpages > (freepages - (vmpages / sizeof(vm_page_t)))
+	 * vmpages > (freepages - (vmpages / sizeof(vm_page_t)))
 	 *
 	 * which, with some algebra, becomes:
 	 *
-	 *	vmpages > (freepages * sizeof(...) / (1 + sizeof(...)))
+	 * vmpages > (freepages * sizeof(...) / (1 + sizeof(...)))
 	 *
 	 * The value of vm_page_count need not be exact, but must be
 	 * large enough so vm_page_array handles the index range.
 	 */
-
 	freepages = pmap_free_pages();
 	/* Fudge slightly to deal with truncation error. */
 	freepages += 1;	/* fudge */
 
 	vm_page_count = (PAGE_SIZE * freepages) /
-		(PAGE_SIZE + sizeof(*vm_page_array));
+	    (PAGE_SIZE + sizeof(*vm_page_array));
 
 	vm_page_array = (vm_page_t)
-		pmap_steal_memory(vm_page_count * sizeof(*vm_page_array));
+	    pmap_steal_memory(vm_page_count * sizeof(*vm_page_array));
 
 #ifdef	DIAGNOSTIC
 	/*
@@ -523,9 +507,9 @@ pmap_startup(startp, endp)
 #endif
 
 	/*
-	 *	Initialize the page frames.
-	 *	Note that some page indices may not be usable
-	 *	when pmap_free_pages() counts pages in a hole.
+	 * Initialize the page frames.
+	 * Note that some page indices may not be usable
+	 * when pmap_free_pages() counts pages in a hole.
 	 */
 	if (!pmap_next_page(&paddr))
 		panic("pmap_startup: can't get first page");
@@ -548,7 +532,7 @@ pmap_startup(startp, endp)
 		    /* Cannot happen; i is unsigned */
 		    i < 0 ||
 #endif
-			    i >= vm_page_count)
+		    i >= vm_page_count)
 			panic("pmap_startup: bad i=0x%x", i);
 	}
 
@@ -558,22 +542,22 @@ pmap_startup(startp, endp)
 #endif /* MACHINE_NONCONTIG && !MACHINE_PAGES */
 
 /*
- *	vm_page_hash:
+ * vm_page_hash:
  *
- *	Distributes the object/offset key pair among hash buckets.
+ * Distributes the object/offset key pair among hash buckets.
  *
- *	NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
+ * NOTE:  This macro depends on vm_page_bucket_count being a power of 2.
  */
 #define vm_page_hash(object, offset) \
 	(((unsigned long)object+(unsigned long)atop(offset))&vm_page_hash_mask)
 
 /*
- *	vm_page_insert:		[ internal use only ]
+ * vm_page_insert:		[ internal use only ]
  *
- *	Inserts the given mem entry into the object/object-page
- *	table and object list.
+ * Inserts the given mem entry into the object/object-page
+ * table and object list.
  *
- *	The object and page must be locked.
+ * The object and page must be locked.
  */
 void
 vm_page_insert(mem, object, offset)
@@ -590,16 +574,14 @@ vm_page_insert(mem, object, offset)
 		panic("vm_page_insert: already inserted");
 
 	/*
-	 *	Record the object/offset pair in this page
+	 * Record the object/offset pair in this page
 	 */
-
 	mem->object = object;
 	mem->offset = offset;
 
 	/*
-	 *	Insert it into the object_object/offset hash table
+	 * Insert it into the object_object/offset hash table
 	 */
-
 	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
 	spl = splimp();
 	simple_lock(&bucket_lock);
@@ -608,28 +590,26 @@ vm_page_insert(mem, object, offset)
 	(void) splx(spl);
 
 	/*
-	 *	Now link into the object's list of backed pages.
+	 * Now link into the object's list of backed pages.
 	 */
-
 	TAILQ_INSERT_TAIL(&object->memq, mem, listq);
 	mem->flags |= PG_TABLED;
 
 	/*
-	 *	And show that the object has one more resident
-	 *	page.
+	 * And show that the object has one more resident
+	 * page.
 	 */
-
 	object->resident_page_count++;
 }
 
 /*
- *	vm_page_remove:		[ internal use only ]
- *				NOTE: used by device pager as well -wfj
+ * vm_page_remove:		[ internal use only ]
+ * 			NOTE: used by device pager as well -wfj
  *
- *	Removes the given mem entry from the object/offset-page
- *	table and the object page list.
+ * Removes the given mem entry from the object/offset-page
+ * table and the object page list.
  *
- *	The object and page must be locked.
+ * The object and page must be locked.
  */
 void
 vm_page_remove(mem)
@@ -649,9 +629,8 @@ vm_page_remove(mem)
 		return;
 
 	/*
-	 *	Remove from the object_object/offset hash table
+	 * Remove from the object_object/offset hash table
 	 */
-
 	bucket = &vm_page_buckets[vm_page_hash(mem->object, mem->offset)];
 	spl = splimp();
 	simple_lock(&bucket_lock);
@@ -660,28 +639,26 @@ vm_page_remove(mem)
 	(void) splx(spl);
 
 	/*
-	 *	Now remove from the object's list of backed pages.
+	 * Now remove from the object's list of backed pages.
 	 */
-
 	TAILQ_REMOVE(&mem->object->memq, mem, listq);
 
 	/*
-	 *	And show that the object has one fewer resident
-	 *	page.
+	 * And show that the object has one fewer resident
+	 * page.
 	 */
-
 	mem->object->resident_page_count--;
 
 	mem->flags &= ~PG_TABLED;
 }
 
 /*
- *	vm_page_lookup:
+ * vm_page_lookup:
  *
- *	Returns the page associated with the object/offset
- *	pair specified; if none is found, NULL is returned.
+ * Returns the page associated with the object/offset
+ * pair specified; if none is found, NULL is returned.
  *
- *	The object must be locked.  No side effects.
+ * The object must be locked.  No side effects.
  */
 vm_page_t
 vm_page_lookup(object, offset)
@@ -693,9 +670,8 @@ vm_page_lookup(object, offset)
 	int			spl;
 
 	/*
-	 *	Search the hash table for this object/offset pair
+	 * Search the hash table for this object/offset pair
 	 */
-
 	bucket = &vm_page_buckets[vm_page_hash(object, offset)];
 
 	spl = splimp();
@@ -715,12 +691,12 @@ vm_page_lookup(object, offset)
 }
 
 /*
- *	vm_page_rename:
+ * vm_page_rename:
  *
- *	Move the given memory entry from its
- *	current object to the specified target object/offset.
+ * Move the given memory entry from its
+ * current object to the specified target object/offset.
  *
- *	The object must be locked.
+ * The object must be locked.
  */
 void
 vm_page_rename(mem, new_object, new_offset)
@@ -739,12 +715,12 @@ vm_page_rename(mem, new_object, new_offset)
 }
 
 /*
- *	vm_page_alloc:
+ * vm_page_alloc:
  *
- *	Allocate and return a memory cell associated
- *	with this VM object/offset pair.
+ * Allocate and return a memory cell associated
+ * with this VM object/offset pair.
  *
- *	Object must be locked.
+ * Object must be locked.
  */
 vm_page_t
 vm_page_alloc(object, offset)
@@ -772,16 +748,15 @@ vm_page_alloc(object, offset)
 	VM_PAGE_INIT(mem, object, offset);
 
 	/*
-	 *	Decide if we should poke the pageout daemon.
-	 *	We do this if the free count is less than the low
-	 *	water mark, or if the free count is less than the high
-	 *	water mark (but above the low water mark) and the inactive
-	 *	count is less than its target.
+	 * Decide if we should poke the pageout daemon.
+	 * We do this if the free count is less than the low
+	 * water mark, or if the free count is less than the high
+	 * water mark (but above the low water mark) and the inactive
+	 * count is less than its target.
 	 *
-	 *	We don't have the counts locked ... if they change a little,
-	 *	it doesn't really matter.
+	 * We don't have the counts locked ... if they change a little,
+	 * it doesn't really matter.
 	 */
-
 	if (cnt.v_free_count < cnt.v_free_min ||
 	    (cnt.v_free_count < cnt.v_free_target &&
 	     cnt.v_inactive_count < cnt.v_inactive_target))
@@ -790,12 +765,12 @@ vm_page_alloc(object, offset)
 }
 
 /*
- *	vm_page_free:
+ * vm_page_free:
  *
- *	Returns the given page to the free list,
- *	disassociating it with any VM object.
+ * Returns the given page to the free list,
+ * disassociating it with any VM object.
  *
- *	Object and page must be locked prior to entry.
+ * Object and page must be locked prior to entry.
  */
 void
 vm_page_free(mem)
@@ -828,13 +803,13 @@ vm_page_free(mem)
 }
 
 /*
- *	vm_page_wire:
+ * vm_page_wire:
  *
- *	Mark this page as wired down by yet
- *	another map, removing it from paging queues
- *	as necessary.
+ * Mark this page as wired down by yet
+ * another map, removing it from paging queues
+ * as necessary.
  *
- *	The page queues must be locked.
+ * The page queues must be locked.
  */
 void
 vm_page_wire(mem)
@@ -859,12 +834,12 @@ vm_page_wire(mem)
 }
 
 /*
- *	vm_page_unwire:
+ * vm_page_unwire:
  *
- *	Release one wiring of this page, potentially
- *	enabling it to be paged again.
+ * Release one wiring of this page, potentially
+ * enabling it to be paged again.
  *
- *	The page queues must be locked.
+ * The page queues must be locked.
  */
 void
 vm_page_unwire(mem)
@@ -882,13 +857,13 @@ vm_page_unwire(mem)
 }
 
 /*
- *	vm_page_deactivate:
+ * vm_page_deactivate:
  *
- *	Returns the given page to the inactive list,
- *	indicating that no physical maps have access
- *	to this page.  [Used by the physical mapping system.]
+ * Returns the given page to the inactive list,
+ * indicating that no physical maps have access
+ * to this page.  [Used by the physical mapping system.]
  *
- *	The page queues must be locked.
+ * The page queues must be locked.
  */
 void
 vm_page_deactivate(m)
@@ -897,10 +872,9 @@ vm_page_deactivate(m)
 	VM_PAGE_CHECK(m);
 
 	/*
-	 *	Only move active pages -- ignore locked or already
-	 *	inactive ones.
+	 * Only move active pages -- ignore locked or already
+	 * inactive ones.
 	 */
-
 	if (m->flags & PG_ACTIVE) {
 		TAILQ_REMOVE(&vm_page_queue_active, m, pageq);
 		m->flags &= ~PG_ACTIVE;
@@ -923,11 +897,11 @@ vm_page_deactivate(m)
 }
 
 /*
- *	vm_page_activate:
+ * vm_page_activate:
  *
- *	Put the specified page on the active list (if appropriate).
+ * Put the specified page on the active list (if appropriate).
  *
- *	The page queues must be locked.
+ * The page queues must be locked.
  */
 void
 vm_page_activate(m)
@@ -951,11 +925,11 @@ vm_page_activate(m)
 }
 
 /*
- *	vm_page_zero_fill:
+ * vm_page_zero_fill:
  *
- *	Zero-fill the specified page.
- *	Written as a standard pagein routine, to
- *	be used by the zero-fill object.
+ * Zero-fill the specified page.
+ * Written as a standard pagein routine, to
+ * be used by the zero-fill object.
  */
 boolean_t
 vm_page_zero_fill(m)
@@ -969,9 +943,9 @@ vm_page_zero_fill(m)
 }
 
 /*
- *	vm_page_copy:
+ * vm_page_copy:
  *
- *	Copy one page to another
+ * Copy one page to another
  */
 void
 vm_page_copy(src_m, dest_m)
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index f2c9cad7509..23585120dd2 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1,4 +1,4 @@
-/*	$OpenBSD: vm_pageout.c,v 1.4 1996/09/18 11:57:38 deraadt Exp $	*/
+/*	$OpenBSD: vm_pageout.c,v 1.5 1997/04/17 01:25:22 niklas Exp $	*/
 /*	$NetBSD: vm_pageout.c,v 1.23 1996/02/05 01:54:07 christos Exp $	*/
 
 /* 
@@ -72,6 +72,7 @@
 #include <sys/param.h>
 #include <sys/proc.h>
 #include <sys/systm.h>
+#include <sys/kernel.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
@@ -274,14 +275,22 @@ vm_pageout_page(m, object)
 	 * We must unlock the page queues first.
 	 */
 	vm_page_unlock_queues();
+
+#if 0
+	/*
+	 * vm_object_collapse might want to sleep waiting for pages which
+	 * is not allowed to do in this thread.  Anyway, we now aggressively
+	 * collapse object-chains as early as possible so this call ought
+	 * to not be very useful anyhow.  This is just an educated guess.
+	 * Not doing a collapse operation is never fatal though, so we skip
+	 * it for the time being.  Later we might add some NOWAIT option for
+	 * the collapse code to look at, if it's deemed necessary.
+	 */
 	if (object->pager == NULL)
 		vm_object_collapse(object);
-
-#ifdef DIAGNOSTIC
-	if (object->paging_in_progress == 0xdead)
-		panic("vm_pageout_page: object deallocated");
 #endif
-	object->paging_in_progress++;
+
+	vm_object_paging_begin(object);
 	vm_object_unlock(object);
 
 	/*
@@ -297,7 +306,7 @@ vm_pageout_page(m, object)
 	 */
 	if ((pager = object->pager) == NULL) {
 		pager = vm_pager_allocate(PG_DFLT, (caddr_t)0, object->size,
-					  VM_PROT_ALL, (vm_offset_t)0);
+		    VM_PROT_ALL, (vm_offset_t)0);
 		if (pager != NULL)
 			vm_object_setpager(object, pager, 0, FALSE);
 	}
@@ -330,8 +339,8 @@ vm_pageout_page(m, object)
 		 * shortage, so we put pause for awhile and try again.
 		 * XXX could get stuck here.
 		 */
-		(void) tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
-		    "pageout", 100);
+		(void)tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
+		    "pageout", hz);
 		break;
 	}
 	case VM_PAGER_FAIL:
@@ -357,7 +366,7 @@ vm_pageout_page(m, object)
 	if (pageout_status != VM_PAGER_PEND) {
 		m->flags &= ~PG_BUSY;
 		PAGE_WAKEUP(m);
-		object->paging_in_progress--;
+		vm_object_paging_end(object);
 	}
 }
 
@@ -381,7 +390,6 @@ vm_pageout_cluster(m, object)
 	vm_offset_t offset, loff, hoff;
 	vm_page_t plist[MAXPOCLUSTER], *plistp, p;
 	int postatus, ix, count;
-	extern int lbolt;
 
 	/*
 	 * Determine the range of pages that can be part of a cluster
@@ -448,11 +456,7 @@ vm_pageout_cluster(m, object)
 	 * in case it blocks.
 	 */
 	vm_page_unlock_queues();
-#ifdef DIAGNOSTIC
-	if (object->paging_in_progress == 0xdead)
-		panic("vm_pageout_cluster: object deallocated");
-#endif
-	object->paging_in_progress++;
+	vm_object_paging_begin(object);
 	vm_object_unlock(object);
 again:
 	thread_wakeup(&cnt.v_free_count);
@@ -461,7 +465,8 @@ again:
 	 * XXX rethink this
 	 */
 	if (postatus == VM_PAGER_AGAIN) {
-		(void) tsleep((caddr_t)&lbolt, PZERO|PCATCH, "pageout", 0);
+		(void)tsleep((caddr_t)&vm_pages_needed, PZERO|PCATCH,
+		    "pageout", 0);
 		goto again;
 	} else if (postatus == VM_PAGER_BAD)
 		panic("vm_pageout_cluster: VM_PAGER_BAD");
@@ -501,7 +506,6 @@ again:
 		if (postatus != VM_PAGER_PEND) {
 			p->flags &= ~PG_BUSY;
 			PAGE_WAKEUP(p);
-
 		}
 	}
 	/*
@@ -509,8 +513,7 @@ again:
 	 * indicator set so that we don't attempt an object collapse.
 	 */
 	if (postatus != VM_PAGER_PEND)
-		object->paging_in_progress--;
-
+		vm_object_paging_end(object);
 }
 #endif
 
@@ -521,7 +524,7 @@ again:
 void
 vm_pageout()
 {
-	(void) spl0();
+	(void)spl0();
 
 	/*
 	 *	Initialize some paging parameters.
-- 
2.20.1