(re)Introduce locking for amaps & anons.
authormpi <mpi@openbsd.org>
Tue, 19 Jan 2021 13:21:36 +0000 (13:21 +0000)
committermpi <mpi@openbsd.org>
Tue, 19 Jan 2021 13:21:36 +0000 (13:21 +0000)
A rwlock is attached to every amap and is shared with all its anon.  The
same lock will be used by multiple amaps if they have anons in common.

This should be enough to get the upper part of the fault handler out of the
KERNEL_LOCK() which seems to bring up to 20% improvements in builds.

This is based/copied/adapted from the most recent work done in NetBSD which
is an evolution of the precendent simple_lock scheme.

Tested by many, thanks!

ok kettenis@, mvs@

sys/uvm/uvm_amap.c
sys/uvm/uvm_amap.h
sys/uvm/uvm_anon.c
sys/uvm/uvm_anon.h
sys/uvm/uvm_fault.c
sys/uvm/uvm_map.c
sys/uvm/uvm_page.c
sys/uvm/uvm_pager.c

index e4c38a5..7eb20e6 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uvm_amap.c,v 1.86 2020/11/13 11:11:48 mpi Exp $       */
+/*     $OpenBSD: uvm_amap.c,v 1.87 2021/01/19 13:21:36 mpi Exp $       */
 /*     $NetBSD: uvm_amap.c,v 1.27 2000/11/25 06:27:59 chs Exp $        */
 
 /*
@@ -55,6 +55,9 @@ struct pool uvm_small_amap_pool[UVM_AMAP_CHUNK];
 struct pool uvm_amap_chunk_pool;
 
 LIST_HEAD(, vm_amap) amap_list;
+struct rwlock amap_list_lock = RWLOCK_INITIALIZER("amaplstlk");
+#define amap_lock_list()       rw_enter_write(&amap_list_lock)
+#define amap_unlock_list()     rw_exit_write(&amap_list_lock)
 
 static char amap_small_pool_names[UVM_AMAP_CHUNK][9];
 
@@ -89,13 +92,17 @@ void        amap_wiperange(struct vm_amap *, int, int);
 static inline void
 amap_list_insert(struct vm_amap *amap)
 {
+       amap_lock_list();
        LIST_INSERT_HEAD(&amap_list, amap, am_list);
+       amap_unlock_list();
 }
 
 static inline void
 amap_list_remove(struct vm_amap *amap)
-{ 
+{
+       amap_lock_list();
        LIST_REMOVE(amap, am_list);
+       amap_unlock_list();
 }
 
 /*
@@ -249,7 +256,7 @@ amap_init(void)
 
        /* Initialize the vm_amap pool. */
        pool_init(&uvm_amap_pool, sizeof(struct vm_amap),
-           0, IPL_NONE, PR_WAITOK, "amappl", NULL);
+           0, IPL_MPFLOOR, PR_WAITOK, "amappl", NULL);
        pool_sethiwat(&uvm_amap_pool, 4096);
 
        /* initialize small amap pools */
@@ -258,13 +265,13 @@ amap_init(void)
                    sizeof(amap_small_pool_names[0]), "amappl%d", i + 1);
                size = offsetof(struct vm_amap, am_small.ac_anon) +
                    (i + 1) * sizeof(struct vm_anon *);
-               pool_init(&uvm_small_amap_pool[i], size, 0,
-                   IPL_NONE, 0, amap_small_pool_names[i], NULL);
+               pool_init(&uvm_small_amap_pool[i], size, 0, IPL_MPFLOOR,
+                   PR_WAITOK, amap_small_pool_names[i], NULL);
        }
 
        pool_init(&uvm_amap_chunk_pool, sizeof(struct vm_amap_chunk) +
            UVM_AMAP_CHUNK * sizeof(struct vm_anon *),
-           0, IPL_NONE, 0, "amapchunkpl", NULL);
+           0, IPL_MPFLOOR, PR_WAITOK, "amapchunkpl", NULL);
        pool_sethiwat(&uvm_amap_chunk_pool, 4096);
 }
 
@@ -332,6 +339,7 @@ amap_alloc1(int slots, int waitf, int lazyalloc)
        if (amap == NULL)
                return(NULL);
 
+       amap->am_lock = NULL;
        amap->am_ref = 1;
        amap->am_flags = 0;
 #ifdef UVM_AMAP_PPREF
@@ -389,6 +397,12 @@ fail1:
        return (NULL);
 }
 
+static void
+amap_lock_alloc(struct vm_amap *amap)
+{
+       rw_obj_alloc(&amap->am_lock, "amaplk");
+}
+
 /*
  * amap_alloc: allocate an amap to manage "sz" bytes of anonymous VM
  *
@@ -406,8 +420,10 @@ amap_alloc(vaddr_t sz, int waitf, int lazyalloc)
                return (NULL);
 
        amap = amap_alloc1(slots, waitf, lazyalloc);
-       if (amap)
+       if (amap != NULL) {
+               amap_lock_alloc(amap);
                amap_list_insert(amap);
+       }
 
        return(amap);
 }
@@ -426,6 +442,11 @@ amap_free(struct vm_amap *amap)
        KASSERT(amap->am_ref == 0 && amap->am_nused == 0);
        KASSERT((amap->am_flags & AMAP_SWAPOFF) == 0);
 
+       if (amap->am_lock != NULL) {
+               KASSERT(amap->am_lock == NULL || !rw_write_held(amap->am_lock));
+               rw_obj_free(amap->am_lock);
+       }
+
 #ifdef UVM_AMAP_PPREF
        if (amap->am_ppref && amap->am_ppref != PPREF_NONE)
                free(amap->am_ppref, M_UVMAMAP, amap->am_nslot * sizeof(int));
@@ -447,6 +468,7 @@ amap_free(struct vm_amap *amap)
  *
  * => called from amap_unref when the final reference to an amap is
  *     discarded (i.e. when reference count == 1)
+ * => amap must be locked.
  */
 
 void
@@ -457,15 +479,16 @@ amap_wipeout(struct vm_amap *amap)
        struct vm_amap_chunk *chunk;
        struct pglist pgl;
 
+       KASSERT(rw_write_held(amap->am_lock));
        KASSERT(amap->am_ref == 0);
 
        if (__predict_false((amap->am_flags & AMAP_SWAPOFF) != 0)) {
                /* amap_swap_off will call us again. */
+               amap_unlock(amap);
                return;
        }
 
        TAILQ_INIT(&pgl);
-
        amap_list_remove(amap);
 
        AMAP_CHUNK_FOREACH(chunk, amap) {
@@ -478,6 +501,7 @@ amap_wipeout(struct vm_amap *amap)
 
                        if (anon == NULL || anon->an_ref == 0)
                                panic("amap_wipeout: corrupt amap");
+                       KASSERT(anon->an_lock == amap->am_lock);
 
                        refs = --anon->an_ref;
                        if (refs == 0) {
@@ -495,7 +519,8 @@ amap_wipeout(struct vm_amap *amap)
        /* now we free the map */
        amap->am_ref = 0;       /* ... was one */
        amap->am_nused = 0;
-       amap_free(amap);        /* will free amap */
+       amap_unlock(amap);
+       amap_free(amap);
 }
 
 /*
@@ -503,6 +528,8 @@ amap_wipeout(struct vm_amap *amap)
  *     by copying the amap if necessary.
  * 
  * => an entry with a null amap pointer will get a new (blank) one.
+ * => the map that the map entry blocks to must be locked by caller.
+ * => the amap (if any) currently attached to the entry must be unlocked.
  * => if canchunk is true, then we may clip the entry into a chunk
  * => "startva" and "endva" are used only if canchunk is true.  they are
  *     used to limit chunking (e.g. if you have a large space that you
@@ -519,6 +546,9 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
        vaddr_t chunksize;
        int i, j, k, n, srcslot;
        struct vm_amap_chunk *chunk = NULL, *srcchunk = NULL;
+       struct vm_anon *anon;
+
+       KASSERT(map != kernel_map);             /* we use sleeping locks */
 
        /* is there a map to copy?   if not, create one from scratch. */
        if (entry->aref.ar_amap == NULL) {
@@ -574,6 +604,8 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
                return;
        srcamap = entry->aref.ar_amap;
 
+       amap_lock(srcamap);
+
        /*
         * need to double check reference count now.  the reference count
         * could have changed while we were in malloc.  if the reference count
@@ -582,6 +614,7 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
         */
        if (srcamap->am_ref == 1) {             /* take it over? */
                entry->etype &= ~UVM_ET_NEEDSCOPY;
+               amap_unlock(srcamap);
                amap->am_ref--;         /* drop final reference to map */
                amap_free(amap);        /* dispose of new (unused) amap */
                return;
@@ -606,18 +639,21 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
 
                chunk = amap_chunk_get(amap, lcv, 1, PR_NOWAIT);
                if (chunk == NULL) {
+                       amap_unlock(srcamap);
                        amap->am_ref = 0;
                        amap_wipeout(amap);
                        return;
                }
 
                for (k = 0; k < n; i++, j++, k++) {
-                       chunk->ac_anon[i] = srcchunk->ac_anon[j];
-                       if (chunk->ac_anon[i] == NULL)
+                       chunk->ac_anon[i] = anon = srcchunk->ac_anon[j];
+                       if (anon == NULL)
                                continue;
 
+                       KASSERT(anon->an_lock == srcamap->am_lock);
+                       KASSERT(anon->an_ref > 0);
                        chunk->ac_usedmap |= (1 << i);
-                       chunk->ac_anon[i]->an_ref++;
+                       anon->an_ref++;
                        amap->am_nused++;
                }
        }
@@ -629,6 +665,8 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
         * the count to zero.  [and no need to worry about freeing it]
         */
        srcamap->am_ref--;
+       KASSERT(srcamap->am_ref > 0);
+
        if (srcamap->am_ref == 1 && (srcamap->am_flags & AMAP_SHARED) != 0)
                srcamap->am_flags &= ~AMAP_SHARED;   /* clear shared flag */
 #ifdef UVM_AMAP_PPREF
@@ -638,6 +676,20 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
        }
 #endif
 
+       /*
+        * If we referenced any anons, then share the source amap's lock.
+        * Otherwise, we have nothing in common, so allocate a new one.
+        */
+       KASSERT(amap->am_lock == NULL);
+       if (amap->am_nused != 0) {
+               amap->am_lock = srcamap->am_lock;
+               rw_obj_hold(amap->am_lock);
+       }
+       amap_unlock(srcamap);
+
+       if (amap->am_lock == NULL)
+               amap_lock_alloc(amap);
+
        /* install new amap. */
        entry->aref.ar_pageoff = 0;
        entry->aref.ar_amap = amap;
@@ -655,6 +707,7 @@ amap_copy(struct vm_map *map, struct vm_map_entry *entry, int waitf,
  *     so we resolve the COW here.
  *
  * => assume parent's entry was wired, thus all pages are resident.
+ * => the parent and child vm_map must both be locked.
  * => caller passes child's map/entry in to us
  * => XXXCDC: out of memory should cause fork to fail, but there is
  *     currently no easy way to do this (needs fix)
@@ -675,6 +728,7 @@ amap_cow_now(struct vm_map *map, struct vm_map_entry *entry)
         * am_anon[] array on us.
         */
 ReStart:
+       amap_lock(amap);
        AMAP_CHUNK_FOREACH(chunk, amap) {
                int i, map = chunk->ac_usedmap;
 
@@ -683,6 +737,7 @@ ReStart:
                        map ^= 1 << slot;
                        anon = chunk->ac_anon[slot];
                        pg = anon->an_page;
+                       KASSERT(anon->an_lock == amap->am_lock);
 
                        /* page must be resident since parent is wired */
                        KASSERT(pg != NULL);
@@ -700,24 +755,27 @@ ReStart:
                         */
                        if (pg->pg_flags & PG_BUSY) {
                                atomic_setbits_int(&pg->pg_flags, PG_WANTED);
-                               tsleep_nsec(pg, PVM, "cownow", INFSLP);
+                               rwsleep_nsec(pg, amap->am_lock, PVM | PNORELOCK,
+                                   "cownow", INFSLP);
                                goto ReStart;
                        }
 
                        /* ok, time to do a copy-on-write to a new anon */
                        nanon = uvm_analloc();
-                       if (nanon) {
+                       if (nanon != NULL) {
+                               /* the new anon will share the amap's lock */
+                               nanon->an_lock = amap->am_lock;
                                npg = uvm_pagealloc(NULL, 0, nanon, 0);
                        } else
                                npg = NULL;     /* XXX: quiet gcc warning */
 
                        if (nanon == NULL || npg == NULL) {
                                /* out of memory */
-                               /*
-                                * XXXCDC: we should cause fork to fail, but
-                                * we can't ...
-                                */
-                               if (nanon) {
+                               amap_unlock(amap);
+                               if (nanon != NULL) {
+                                       nanon->an_lock = NULL;
+                                       nanon->an_ref--;
+                                       KASSERT(nanon->an_ref == 0);
                                        uvm_anfree(nanon);
                                }
                                uvm_wait("cownowpage");
@@ -730,6 +788,7 @@ ReStart:
                         */
                        uvm_pagecopy(pg, npg);          /* old -> new */
                        anon->an_ref--;                 /* can't drop to zero */
+                       KASSERT(anon->an_ref > 0);
                        chunk->ac_anon[slot] = nanon;   /* replace */
 
                        /*
@@ -744,6 +803,7 @@ ReStart:
                        uvm_unlock_pageq();
                }
        }
+       amap_unlock(amap);
 }
 
 /*
@@ -757,10 +817,13 @@ amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
        struct vm_amap *amap = origref->ar_amap;
        int leftslots;
 
+       KASSERT(splitref->ar_amap == amap);
        AMAP_B2SLOT(leftslots, offset);
        if (leftslots == 0)
                panic("amap_splitref: split at zero offset");
 
+       amap_lock(amap);
+
        /* now: we have a valid am_mapped array. */
        if (amap->am_nslot - origref->ar_pageoff - leftslots <= 0)
                panic("amap_splitref: map size check failed");
@@ -775,6 +838,7 @@ amap_splitref(struct vm_aref *origref, struct vm_aref *splitref, vaddr_t offset)
        amap->am_ref++;
        splitref->ar_amap = amap;
        splitref->ar_pageoff = origref->ar_pageoff + leftslots;
+       amap_unlock(amap);
 }
 
 #ifdef UVM_AMAP_PPREF
@@ -786,6 +850,7 @@ void
 amap_pp_establish(struct vm_amap *amap)
 {
 
+       KASSERT(rw_write_held(amap->am_lock));
        amap->am_ppref = mallocarray(amap->am_nslot, sizeof(int),
            M_UVMAMAP, M_NOWAIT|M_ZERO);
 
@@ -811,6 +876,8 @@ amap_pp_adjref(struct vm_amap *amap, int curslot, vsize_t slotlen, int adjval)
        int stopslot, *ppref, lcv, prevlcv;
        int ref, len, prevref, prevlen;
 
+       KASSERT(rw_write_held(amap->am_lock));
+
        stopslot = curslot + slotlen;
        ppref = amap->am_ppref;
        prevlcv = 0;
@@ -893,6 +960,7 @@ amap_wiperange_chunk(struct vm_amap *amap, struct vm_amap_chunk *chunk,
                map ^= 1 << curslot;
                chunk->ac_usedmap ^= 1 << curslot;
                anon = chunk->ac_anon[curslot];
+               KASSERT(anon->an_lock == amap->am_lock);
 
                /* remove it from the amap */
                chunk->ac_anon[curslot] = NULL;
@@ -902,10 +970,6 @@ amap_wiperange_chunk(struct vm_amap *amap, struct vm_amap_chunk *chunk,
                /* drop anon reference count */
                refs = --anon->an_ref;
                if (refs == 0) {
-                       /*
-                        * we just eliminated the last reference to an
-                        * anon.  free it.
-                        */
                        uvm_anfree(anon);
                }
        }
@@ -921,6 +985,8 @@ amap_wiperange(struct vm_amap *amap, int slotoff, int slots)
        int bucket, startbucket, endbucket;
        struct vm_amap_chunk *chunk, *nchunk;
 
+       KASSERT(rw_write_held(amap->am_lock));
+
        startbucket = UVM_AMAP_BUCKET(amap, slotoff);
        endbucket = UVM_AMAP_BUCKET(amap, slotoff + slots - 1);
 
@@ -980,12 +1046,24 @@ amap_swap_off(int startslot, int endslot)
 {
        struct vm_amap *am;
        struct vm_amap *am_next;
+       struct vm_amap marker;
        boolean_t rv = FALSE;
 
+       amap_lock_list();
        for (am = LIST_FIRST(&amap_list); am != NULL && !rv; am = am_next) {
                int i, map;
                struct vm_amap_chunk *chunk;
 
+               amap_lock(am);
+               if (am->am_nused == 0) {
+                       amap_unlock(am);
+                       am_next = LIST_NEXT(am, am_list);
+                       continue;
+               }
+
+               LIST_INSERT_AFTER(am, &marker, am_list);
+               amap_unlock_list();
+
 again:
                AMAP_CHUNK_FOREACH(chunk, am) {
                        map = chunk->ac_usedmap;
@@ -1005,20 +1083,28 @@ again:
 
                                am->am_flags |= AMAP_SWAPOFF;
 
-                               rv = uvm_anon_pagein(anon);
+                               rv = uvm_anon_pagein(am, anon);
+                               amap_lock(am);
 
                                am->am_flags &= ~AMAP_SWAPOFF;
-                               if (rv || amap_refs(am) == 0)
+                               if (amap_refs(am) == 0) {
+                                       amap_wipeout(am);
+                                       am = NULL;
+                                       goto nextamap;
+                               }
+                               if (rv)
                                        goto nextamap;
                                goto again;
                        }
                }
-
 nextamap:
-               am_next = LIST_NEXT(am, am_list);
-               if (amap_refs(am) == 0)
-                       amap_wipeout(am);
+               if (am != NULL)
+                       amap_unlock(am);
+               amap_lock_list();
+               am_next = LIST_NEXT(&marker, am_list);
+               LIST_REMOVE(&marker, am_list);
        }
+       amap_unlock_list();
 
        return rv;
 }
@@ -1147,9 +1233,11 @@ amap_add(struct vm_aref *aref, vaddr_t offset, struct vm_anon *anon,
 void
 amap_unadd(struct vm_aref *aref, vaddr_t offset)
 {
-       int slot;
        struct vm_amap *amap = aref->ar_amap;
        struct vm_amap_chunk *chunk;
+       int slot;
+
+       KASSERT(rw_write_held(amap->am_lock));
 
        AMAP_B2SLOT(slot, offset);
        slot += aref->ar_pageoff;
@@ -1176,6 +1264,12 @@ amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
     int refv, boolean_t all)
 {
 #ifdef UVM_AMAP_PPREF
+       KASSERT(rw_write_held(amap->am_lock));
+
+       /*
+        * We must establish the ppref array before changing am_ref
+        * so that the ppref values match the current amap refcount.
+        */
        if (amap->am_ppref == NULL && !all && len != amap->am_nslot) {
                amap_pp_establish(amap);
        }
@@ -1192,32 +1286,37 @@ amap_adjref_anons(struct vm_amap *amap, vaddr_t offset, vsize_t len,
                }
        }
 #endif
+       amap_unlock(amap);
 }
 
 /*
- * amap_ref: gain a reference to an amap
+ * amap_ref: gain a reference to an amap.
  *
- * => "offset" and "len" are in units of pages
- * => called at fork time to gain the child's reference
+ * => amap must not be locked (we will lock).
+ * => "offset" and "len" are in units of pages.
+ * => Called at fork time to gain the child's reference.
  */
 void
 amap_ref(struct vm_amap *amap, vaddr_t offset, vsize_t len, int flags)
 {
-
+       amap_lock(amap);
        if (flags & AMAP_SHARED)
                amap->am_flags |= AMAP_SHARED;
        amap_adjref_anons(amap, offset, len, 1, (flags & AMAP_REFALL) != 0);
 }
 
 /*
- * amap_unref: remove a reference to an amap
+ * amap_unref: remove a reference to an amap.
  *
  * => All pmap-level references to this amap must be already removed.
  * => Called from uvm_unmap_detach(); entry is already removed from the map.
+ * => We will lock amap, so it must be unlocked.
  */
 void
 amap_unref(struct vm_amap *amap, vaddr_t offset, vsize_t len, boolean_t all)
 {
+       amap_lock(amap);
+
        KASSERT(amap->am_ref > 0);
 
        if (amap->am_ref == 1) {
index c0de03d..fc0c4df 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uvm_amap.h,v 1.32 2020/11/13 11:11:49 mpi Exp $       */
+/*     $OpenBSD: uvm_amap.h,v 1.33 2021/01/19 13:21:36 mpi Exp $       */
 /*     $NetBSD: uvm_amap.h,v 1.14 2001/02/18 21:19:08 chs Exp $        */
 
 /*
@@ -133,6 +133,7 @@ struct vm_amap_chunk {
 };
 
 struct vm_amap {
+       struct rwlock *am_lock; /* lock for all vm_amap flags */
        int am_ref;             /* reference count */
        int am_flags;           /* flags */
        int am_nslot;           /* # of slots currently in map */
@@ -261,6 +262,9 @@ struct vm_amap {
 #define amap_flags(AMAP)       ((AMAP)->am_flags)
 #define amap_refs(AMAP)                ((AMAP)->am_ref)
 
+#define amap_lock(AMAP)                rw_enter_write((AMAP)->am_lock)
+#define amap_unlock(AMAP)      rw_exit_write((AMAP)->am_lock)
+
 #endif /* _KERNEL */
 
 #endif /* _UVM_UVM_AMAP_H_ */
index 11e0892..b643214 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uvm_anon.c,v 1.50 2020/11/24 13:49:09 mpi Exp $       */
+/*     $OpenBSD: uvm_anon.c,v 1.51 2021/01/19 13:21:36 mpi Exp $       */
 /*     $NetBSD: uvm_anon.c,v 1.10 2000/11/25 06:27:59 chs Exp $        */
 
 /*
@@ -48,7 +48,7 @@ struct pool uvm_anon_pool;
 void
 uvm_anon_init(void)
 {
-       pool_init(&uvm_anon_pool, sizeof(struct vm_anon), 0, IPL_NONE,
+       pool_init(&uvm_anon_pool, sizeof(struct vm_anon), 0, IPL_MPFLOOR,
            PR_WAITOK, "anonpl", NULL);
        pool_sethiwat(&uvm_anon_pool, uvmexp.free / 16);
 }
@@ -63,6 +63,7 @@ uvm_analloc(void)
 
        anon = pool_get(&uvm_anon_pool, PR_NOWAIT);
        if (anon) {
+               anon->an_lock = NULL;
                anon->an_ref = 1;
                anon->an_page = NULL;
                anon->an_swslot = 0;
@@ -71,25 +72,26 @@ uvm_analloc(void)
 }
 
 /*
- * uvm_anfree: free a single anon structure
+ * uvm_anfree_list: free a single anon structure
  *
- * => caller must remove anon from its amap before calling (if it was in
- *     an amap).
+ * => anon must be removed from the amap (if anon was in an amap).
+ * => amap must be locked, if anon was owned by amap.
  * => we may lock the pageq's.
  */
 void
 uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl)
 {
-       struct vm_page *pg;
+       struct vm_page *pg = anon->an_page;
 
-       /* get page */
-       pg = anon->an_page;
+       KASSERT(anon->an_lock == NULL || rw_write_held(anon->an_lock));
+       KASSERT(anon->an_ref == 0);
 
        /*
-        * if we have a resident page, we must dispose of it before freeing
-        * the anon.
+        * Dispose of the page, if it is resident.
         */
-       if (pg) {
+       if (pg != NULL) {
+               KASSERT(anon->an_lock != NULL);
+
                /*
                 * if page is busy then we just mark it as released (who ever
                 * has it busy must check for this when they wake up). if the
@@ -98,6 +100,7 @@ uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl)
                if ((pg->pg_flags & PG_BUSY) != 0) {
                        /* tell them to dump it when done */
                        atomic_setbits_int(&pg->pg_flags, PG_RELEASED);
+                       rw_obj_hold(anon->an_lock);
                        return;
                }
                pmap_page_protect(pg, PROT_NONE);
@@ -115,12 +118,14 @@ uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl)
                        uvm_pagefree(pg);       /* bye bye */
                        uvm_unlock_pageq();     /* free the daemon */
                }
+       } else {
+               if (anon->an_swslot != 0) {
+                       /* this page is no longer only in swap. */
+                       KASSERT(uvmexp.swpgonly > 0);
+                       uvmexp.swpgonly--;
+               }
        }
-       if (pg == NULL && anon->an_swslot != 0) {
-               /* this page is no longer only in swap. */
-               KASSERT(uvmexp.swpgonly > 0);
-               uvmexp.swpgonly--;
-       }
+       anon->an_lock = NULL;
 
        /* free any swap resources. */
        uvm_anon_dropswap(anon);
@@ -135,12 +140,6 @@ uvm_anfree_list(struct vm_anon *anon, struct pglist *pgl)
        pool_put(&uvm_anon_pool, anon);
 }
 
-void
-uvm_anfree(struct vm_anon *anon)
-{
-       uvm_anfree_list(anon, NULL);
-}
-
 /*
  * uvm_anwait: wait for memory to become available to allocate an anon.
  */
@@ -154,20 +153,6 @@ uvm_anwait(void)
        pool_put(&uvm_anon_pool, anon);
 }
 
-/*
- * uvm_anon_dropswap:  release any swap resources from this anon.
- */
-void
-uvm_anon_dropswap(struct vm_anon *anon)
-{
-
-       if (anon->an_swslot == 0)
-               return;
-
-       uvm_swap_free(anon->an_swslot, 1);
-       anon->an_swslot = 0;
-}
-
 /*
  * fetch an anon's page.
  *
@@ -175,15 +160,19 @@ uvm_anon_dropswap(struct vm_anon *anon)
  */
 
 boolean_t
-uvm_anon_pagein(struct vm_anon *anon)
+uvm_anon_pagein(struct vm_amap *amap, struct vm_anon *anon)
 {
        struct vm_page *pg;
        int rv;
 
-       rv = uvmfault_anonget(NULL, NULL, anon);
+       KASSERT(rw_write_held(anon->an_lock));
+       KASSERT(anon->an_lock == amap->am_lock);
+
+       rv = uvmfault_anonget(NULL, amap, anon);
 
        switch (rv) {
        case VM_PAGER_OK:
+               KASSERT(rw_write_held(anon->an_lock));
                break;
        case VM_PAGER_ERROR:
        case VM_PAGER_REFAULT:
@@ -206,7 +195,9 @@ uvm_anon_pagein(struct vm_anon *anon)
         * mark it as dirty, clear its swslot and un-busy it.
         */
        pg = anon->an_page;
-       uvm_swap_free(anon->an_swslot, 1);
+       if (anon->an_swslot > 0) {
+               uvm_swap_free(anon->an_swslot, 1);
+       }
        anon->an_swslot = 0;
        atomic_clearbits_int(&pg->pg_flags, PG_CLEAN);
 
@@ -216,6 +207,57 @@ uvm_anon_pagein(struct vm_anon *anon)
        uvm_lock_pageq();
        uvm_pagedeactivate(pg);
        uvm_unlock_pageq();
+       rw_exit(anon->an_lock);
 
        return FALSE;
 }
+
+/*
+ * uvm_anon_dropswap:  release any swap resources from this anon.
+ *
+ * => anon must be locked or have a reference count of 0.
+ */
+void
+uvm_anon_dropswap(struct vm_anon *anon)
+{
+       KASSERT(anon->an_ref == 0 || rw_lock_held(anon->an_lock));
+
+       if (anon->an_swslot == 0)
+               return;
+
+       uvm_swap_free(anon->an_swslot, 1);
+       anon->an_swslot = 0;
+}
+
+
+/*
+ * uvm_anon_release: release an anon and its page.
+ *
+ * => anon should not have any references.
+ * => anon must be locked.
+ */
+
+void
+uvm_anon_release(struct vm_anon *anon)
+{
+       struct vm_page *pg = anon->an_page;
+       struct rwlock *lock;
+
+       KASSERT(rw_write_held(anon->an_lock));
+       KASSERT(pg != NULL);
+       KASSERT((pg->pg_flags & PG_RELEASED) != 0);
+       KASSERT((pg->pg_flags & PG_BUSY) != 0);
+       KASSERT(pg->uobject == NULL);
+       KASSERT(pg->uanon == anon);
+       KASSERT(anon->an_ref == 0);
+
+       uvm_lock_pageq();
+       uvm_pagefree(pg);
+       uvm_unlock_pageq();
+       KASSERT(anon->an_page == NULL);
+       lock = anon->an_lock;
+       uvm_anfree(anon);
+       rw_exit(lock);
+       /* Note: extra reference is held for PG_RELEASED case. */
+       rw_obj_free(lock);
+}
index 50e7c17..7db12a8 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uvm_anon.h,v 1.21 2020/01/04 16:17:29 beck Exp $      */
+/*     $OpenBSD: uvm_anon.h,v 1.22 2021/01/19 13:21:36 mpi Exp $       */
 /*     $NetBSD: uvm_anon.h,v 1.13 2000/12/27 09:17:04 chs Exp $        */
 
 /*
@@ -38,6 +38,8 @@
  */
 
 struct vm_anon {
+       struct rwlock *an_lock;
+
        struct vm_page *an_page;        /* if in RAM */
        int an_ref;                     /* reference count */
 
@@ -78,12 +80,15 @@ struct vm_aref {
 
 #ifdef _KERNEL
 struct vm_anon *uvm_analloc(void);
-void            uvm_anfree(struct vm_anon *);
-void            uvm_anfree_list(struct vm_anon *, struct pglist *);
+void            uvm_anfree_list(struct vm_anon *, struct pglist *);
+void            uvm_anon_release(struct vm_anon *);
 void            uvm_anwait(void);
 void            uvm_anon_init(void);
 void            uvm_anon_dropswap(struct vm_anon *);
-boolean_t       uvm_anon_pagein(struct vm_anon *);
+boolean_t       uvm_anon_pagein(struct vm_amap *, struct vm_anon *);
+
+#define                uvm_anfree(an)  uvm_anfree_list((an), NULL)
+
 #endif /* _KERNEL */
 
 #endif /* _UVM_UVM_ANON_H_ */
index c2b546f..407f5d7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uvm_fault.c,v 1.112 2021/01/16 18:32:47 mpi Exp $     */
+/*     $OpenBSD: uvm_fault.c,v 1.113 2021/01/19 13:21:36 mpi Exp $     */
 /*     $NetBSD: uvm_fault.c,v 1.51 2000/08/06 00:22:53 thorpej Exp $   */
 
 /*
  *    by multiple map entries, and figuring out what should wait could be
  *    complex as well...).
  *
- * given that we are not currently multiprocessor or multithreaded we might
- * as well choose alternative 2 now.   maybe alternative 3 would be useful
+ * we use alternative 2 currently.   maybe alternative 3 would be useful
  * in the future.    XXX keep in mind for future consideration//rechecking.
  */
 
@@ -181,6 +180,7 @@ uvmfault_anonflush(struct vm_anon **anons, int n)
        for (lcv = 0 ; lcv < n ; lcv++) {
                if (anons[lcv] == NULL)
                        continue;
+               KASSERT(rw_lock_held(anons[lcv]->an_lock));
                pg = anons[lcv]->an_page;
                if (pg && (pg->pg_flags & PG_BUSY) == 0) {
                        uvm_lock_pageq();
@@ -271,6 +271,9 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
        struct vm_page *pg;
        int result;
 
+       KASSERT(rw_lock_held(anon->an_lock));
+       KASSERT(anon->an_lock == amap->am_lock);
+
        result = 0;             /* XXX shut up gcc */
        counters_inc(uvmexp_counters, flt_anget);
         /* bump rusage counters */
@@ -302,8 +305,14 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
                         * the last unlock must be an atomic unlock+wait on
                         * the owner of page
                         */
-                       uvmfault_unlockall(ufi, amap, NULL);
-                       tsleep_nsec(pg, PVM, "anonget2", INFSLP);
+                       if (pg->uobject) {
+                               uvmfault_unlockall(ufi, amap, NULL);
+                               tsleep_nsec(pg, PVM, "anonget1", INFSLP);
+                       } else {
+                               uvmfault_unlockall(ufi, NULL, NULL);
+                               rwsleep_nsec(pg, anon->an_lock, PVM | PNORELOCK,
+                                   "anonget2", INFSLP);
+                       }
                        /* ready to relock and try again */
                } else {
                        /* no page, we must try and bring it in. */
@@ -340,6 +349,9 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
 
                /* now relock and try again */
                locked = uvmfault_relock(ufi);
+               if (locked || we_own) {
+                       rw_enter(anon->an_lock, RW_WRITE);
+               }
 
                /*
                 * if we own the page (i.e. we set PG_BUSY), then we need
@@ -367,9 +379,10 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
                         */
                        if (pg->pg_flags & PG_RELEASED) {
                                pmap_page_protect(pg, PROT_NONE);
-                               uvm_anfree(anon);       /* frees page for us */
+                               KASSERT(anon->an_ref == 0);
                                if (locked)
                                        uvmfault_unlockall(ufi, amap, NULL);
+                               uvm_anon_release(anon); /* frees page for us */
                                counters_inc(uvmexp_counters, flt_pgrele);
                                return (VM_PAGER_REFAULT);      /* refault! */
                        }
@@ -400,6 +413,7 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
 
                                if (locked)
                                        uvmfault_unlockall(ufi, amap, NULL);
+                               rw_exit(anon->an_lock);
                                return (VM_PAGER_ERROR);
                        }
 
@@ -414,8 +428,12 @@ uvmfault_anonget(struct uvm_faultinfo *ufi, struct vm_amap *amap,
                }
 
                /* we were not able to relock.   restart fault. */
-               if (!locked)
+               if (!locked) {
+                       if (we_own) {
+                               rw_exit(anon->an_lock);
+                       }
                        return (VM_PAGER_REFAULT);
+               }
 
                /* verify no one touched the amap and moved the anon on us. */
                if (ufi != NULL &&
@@ -605,6 +623,7 @@ uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
 
        /* if we've got an amap, extract current anons. */
        if (amap) {
+               amap_lock(amap);
                amap_lookups(&ufi->entry->aref,
                    flt->startva - ufi->entry->start, *ranons, flt->npages);
        } else {
@@ -625,8 +644,10 @@ uvm_fault_check(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
                        voff_t uoff;
 
                        uoff = (flt->startva - ufi->entry->start) + ufi->entry->offset;
+                       KERNEL_LOCK();
                        (void) uobj->pgops->pgo_flush(uobj, uoff, uoff +
                            ((vsize_t)nback << PAGE_SHIFT), PGO_DEACTIVATE);
+                       KERNEL_UNLOCK();
                }
 
                /* now forget about the backpages */
@@ -656,6 +677,9 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        struct vm_page *pg = NULL;
        int error, ret;
 
+       KASSERT(rw_write_held(amap->am_lock));
+       KASSERT(anon->an_lock == amap->am_lock);
+
        /*
         * no matter if we have case 1A or case 1B we are going to need to
         * have the anon's memory resident.   ensure that now.
@@ -687,6 +711,9 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
 #endif
        }
 
+       KASSERT(rw_write_held(amap->am_lock));
+       KASSERT(anon->an_lock == amap->am_lock);
+
        /*
         * if we are case 1B then we will need to allocate a new blank
         * anon to transfer the data into.   note that we have a lock
@@ -705,6 +732,7 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
                oanon = anon;           /* oanon = old */
                anon = uvm_analloc();
                if (anon) {
+                       anon->an_lock = amap->am_lock;
                        pg = uvm_pagealloc(NULL, 0, anon, 0);
                }
 
@@ -714,6 +742,8 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
                        if (anon == NULL)
                                counters_inc(uvmexp_counters, flt_noanon);
                        else {
+                               anon->an_lock = NULL;
+                               anon->an_ref--;
                                uvm_anfree(anon);
                                counters_inc(uvmexp_counters, flt_noram);
                        }
@@ -806,7 +836,6 @@ uvm_fault_upper(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
        return 0;
 }
 
-
 /*
  * uvm_fault_upper_lookup: look up existing h/w mapping and amap.
  *
@@ -858,6 +887,7 @@ uvm_fault_upper_lookup(struct uvm_faultinfo *ufi,
                        continue;
                }
                anon = anons[lcv];
+               KASSERT(anon->an_lock == amap->am_lock);
                if (anon->an_page &&
                    (anon->an_page->pg_flags & (PG_RELEASED|PG_BUSY)) == 0) {
                        uvm_lock_pageq();
@@ -1136,6 +1166,8 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
 
                /* re-verify the state of the world.  */
                locked = uvmfault_relock(ufi);
+               if (locked && amap != NULL)
+                       amap_lock(amap);
 
                /*
                 * Re-verify that amap slot is still free. if there is
@@ -1213,6 +1245,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
                         * a zero'd, dirty page, so have
                         * uvm_pagealloc() do that for us.
                         */
+                       anon->an_lock = amap->am_lock;
                        pg = uvm_pagealloc(NULL, 0, anon,
                            (uobjpage == PGO_DONTCARE) ? UVM_PGA_ZERO : 0);
                }
@@ -1239,6 +1272,8 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
                        if (anon == NULL)
                                counters_inc(uvmexp_counters, flt_noanon);
                        else {
+                               anon->an_lock = NULL;
+                               anon->an_ref--;
                                uvm_anfree(anon);
                                counters_inc(uvmexp_counters, flt_noram);
                        }
@@ -1266,7 +1301,7 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
                         */
                        if ((amap_flags(amap) & AMAP_SHARED) != 0) {
                                pmap_page_protect(uobjpage, PROT_NONE);
-                       }
+                               }
 
                        /* dispose of uobjpage. drop handle to uobj as well. */
                        if (uobjpage->pg_flags & PG_WANTED)
@@ -1306,6 +1341,12 @@ uvm_fault_lower(struct uvm_faultinfo *ufi, struct uvm_faultctx *flt,
         * all resources are present.   we can now map it in and free our
         * resources.
         */
+       if (amap == NULL)
+               KASSERT(anon == NULL);
+       else {
+               KASSERT(rw_write_held(amap->am_lock));
+               KASSERT(anon == NULL || anon->an_lock == amap->am_lock);
+       }
        if (pmap_enter(ufi->orig_map->pmap, ufi->orig_rvaddr,
            VM_PAGE_TO_PHYS(pg) | flt->pa_flags, flt->enter_prot,
            flt->access_type | PMAP_CANFAIL | (flt->wired ? PMAP_WIRED : 0)) != 0) {
@@ -1491,7 +1532,8 @@ void
 uvmfault_unlockall(struct uvm_faultinfo *ufi, struct vm_amap *amap,
     struct uvm_object *uobj)
 {
-
+       if (amap != NULL)
+               amap_unlock(amap);
        uvmfault_unlockmaps(ufi, FALSE);
 }
 
index c5c40ef..931504a 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uvm_map.c,v 1.269 2020/10/19 08:19:46 mpi Exp $       */
+/*     $OpenBSD: uvm_map.c,v 1.270 2021/01/19 13:21:36 mpi Exp $       */
 /*     $NetBSD: uvm_map.c,v 1.86 2000/11/27 08:40:03 chs Exp $ */
 
 /*
@@ -1104,10 +1104,8 @@ uvm_mapanon(struct vm_map *map, vaddr_t *addr, vsize_t sz,
        if (flags & UVM_FLAG_CONCEAL)
                entry->etype |= UVM_ET_CONCEAL;
        if (flags & UVM_FLAG_OVERLAY) {
-               KERNEL_LOCK();
                entry->aref.ar_pageoff = 0;
                entry->aref.ar_amap = amap_alloc(sz, M_WAITOK, 0);
-               KERNEL_UNLOCK();
        }
 
        /* Update map and process statistics. */
@@ -2833,9 +2831,7 @@ uvm_map_splitentry(struct vm_map *map, struct vm_map_entry *orig,
                orig->end = next->start = split;
 
                if (next->aref.ar_amap) {
-                       KERNEL_LOCK();
                        amap_splitref(&orig->aref, &next->aref, adj);
-                       KERNEL_UNLOCK();
                }
                if (UVM_ET_ISSUBMAP(orig)) {
                        uvm_map_reference(next->object.sub_map);
@@ -4682,12 +4678,14 @@ uvm_map_clean(struct vm_map *map, vaddr_t start, vaddr_t end, int flags)
                cp_start = MAX(entry->start, start);
                cp_end = MIN(entry->end, end);
 
+               amap_lock(amap);
                for (; cp_start != cp_end; cp_start += PAGE_SIZE) {
                        anon = amap_lookup(&entry->aref,
                            cp_start - entry->start);
                        if (anon == NULL)
                                continue;
 
+                       KASSERT(anon->an_lock == amap->am_lock);
                        pg = anon->an_page;
                        if (pg == NULL) {
                                continue;
@@ -4743,6 +4741,7 @@ deactivate_it:
                                panic("uvm_map_clean: weird flags");
                        }
                }
+               amap_unlock(amap);
 
 flush_object:
                cp_start = MAX(entry->start, start);
index 222cd5c..10e8fd6 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uvm_page.c,v 1.154 2020/12/02 16:32:00 mpi Exp $      */
+/*     $OpenBSD: uvm_page.c,v 1.155 2021/01/19 13:21:36 mpi Exp $      */
 /*     $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $        */
 
 /*
@@ -1050,7 +1050,8 @@ uvm_page_unbusy(struct vm_page **pgs, int npgs)
                        } else {
                                atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
                                UVM_PAGE_OWN(pg, NULL);
-                               uvm_anfree(pg->uanon);
+                               rw_enter(pg->uanon->an_lock, RW_WRITE);
+                               uvm_anon_release(pg->uanon);
                        }
                } else {
                        atomic_clearbits_int(&pg->pg_flags, PG_WANTED|PG_BUSY);
index 910d7ec..f808c6e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: uvm_pager.c,v 1.73 2020/10/21 09:08:14 mpi Exp $      */
+/*     $OpenBSD: uvm_pager.c,v 1.74 2021/01/19 13:21:36 mpi Exp $      */
 /*     $NetBSD: uvm_pager.c,v 1.36 2000/11/27 18:26:41 chs Exp $       */
 
 /*
@@ -649,7 +649,8 @@ uvm_pager_dropcluster(struct uvm_object *uobj, struct vm_page *pg,
                                UVM_PAGE_OWN(ppsp[lcv], NULL);
 
                                /* kills anon and frees pg */
-                               uvm_anfree(ppsp[lcv]->uanon);
+                               rw_enter(ppsp[lcv]->uanon->an_lock, RW_WRITE);
+                               uvm_anon_release(ppsp[lcv]->uanon);
 
                                continue;
                } else {