Bring in some new UVM code from NetBSD (not current).
authorart <art@openbsd.org>
Thu, 16 Mar 2000 22:11:02 +0000 (22:11 +0000)
committerart <art@openbsd.org>
Thu, 16 Mar 2000 22:11:02 +0000 (22:11 +0000)
 - Introduce a new type of map that are interrupt safe and never allow faults
   in them. mb_map and kmem_map are made intrsafe.
 - Add "access protection" to uvm_vslock (to be passed down to uvm_fault and
   later to pmap_enter).
 - madvise(2) now works.
 - various cleanups.

30 files changed:
sys/arch/hppa/hppa/machdep.c
sys/arch/i386/i386/machdep.c
sys/arch/sparc/sparc/machdep.c
sys/kern/kern_malloc.c
sys/kern/kern_physio.c
sys/kern/kern_sysctl.c
sys/uvm/uvm_aobj.c
sys/uvm/uvm_extern.h
sys/uvm/uvm_fault.c
sys/uvm/uvm_fault.h
sys/uvm/uvm_fault_i.h
sys/uvm/uvm_glue.c
sys/uvm/uvm_km.c
sys/uvm/uvm_km.h
sys/uvm/uvm_loan.c
sys/uvm/uvm_map.c
sys/uvm/uvm_map.h
sys/uvm/uvm_map_i.h
sys/uvm/uvm_mmap.c
sys/uvm/uvm_object.h
sys/uvm/uvm_page.c
sys/uvm/uvm_page.h
sys/uvm/uvm_page_i.h
sys/uvm/uvm_pager.c
sys/uvm/uvm_pdaemon.c
sys/uvm/uvm_pdaemon.h
sys/uvm/uvm_pglist.c
sys/uvm/uvm_uio.c [new file with mode: 0644]
sys/uvm/uvm_uio.h [new file with mode: 0644]
sys/vm/vm_map.h

index 44b14c0..48facbc 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: machdep.c,v 1.22 2000/02/22 20:08:15 mickey Exp $     */
+/*     $OpenBSD: machdep.c,v 1.23 2000/03/16 22:11:03 art Exp $        */
 
 /*
  * Copyright (c) 1999-2000 Michael Shalayeff
@@ -506,13 +506,13 @@ cpu_startup()
         * limits the number of processes exec'ing at any time.
         */
        exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
-           16*NCARGS, TRUE, FALSE, NULL);
+           16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
 
        /*
         * Allocate a submap for physio
         */
        phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
-           VM_PHYS_SIZE, TRUE, FALSE, NULL);
+           VM_PHYS_SIZE, 0, FALSE, NULL);
 
        /*
         * Finally, allocate mbuf pool.  Since mclrefcnt is an off-size
@@ -522,7 +522,7 @@ cpu_startup()
            M_MBUF, M_NOWAIT);
        bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
        mb_map = uvm_km_suballoc(kernel_map, (vaddr_t *)&mbutl, &maxaddr,
-           VM_MBUF_SIZE, FALSE, FALSE, NULL);
+           VM_MBUF_SIZE, VM_MAP_INTRSAFE, FALSE, NULL);
 
        /*
         * Initialize callouts
index c6f67fa..18910c9 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: machdep.c,v 1.126 2000/03/02 00:15:00 niklas Exp $    */
+/*     $OpenBSD: machdep.c,v 1.127 2000/03/16 22:11:03 art Exp $       */
 /*     $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $    */
 
 /*-
@@ -355,7 +355,7 @@ cpu_startup()
         */
 #if defined(UVM)
        exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
-                                  16*NCARGS, TRUE, FALSE, NULL);
+                                  16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
 #else
        exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 16*NCARGS,
            TRUE);
@@ -366,7 +366,7 @@ cpu_startup()
         */
 #if defined(UVM)
        phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
-                                  VM_PHYS_SIZE, TRUE, FALSE, NULL);
+                                  VM_PHYS_SIZE, 0, FALSE, NULL);
 #else
        phys_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, VM_PHYS_SIZE,
            TRUE);
@@ -381,7 +381,7 @@ cpu_startup()
        bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
 #if defined(UVM)
        mb_map = uvm_km_suballoc(kernel_map, (vm_offset_t *)&mbutl, &maxaddr,
-           VM_MBUF_SIZE, FALSE, FALSE, NULL);
+           VM_MBUF_SIZE, VM_MAP_INTRSAFE, FALSE, NULL);
 #else
        mb_map = kmem_suballoc(kernel_map, (vm_offset_t *)&mbutl, &maxaddr,
            VM_MBUF_SIZE, FALSE);
index cf7a130..8051d02 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: machdep.c,v 1.46 2000/02/22 19:28:01 deraadt Exp $    */
+/*     $OpenBSD: machdep.c,v 1.47 2000/03/16 22:11:02 art Exp $        */
 /*     $NetBSD: machdep.c,v 1.85 1997/09/12 08:55:02 pk Exp $ */
 
 /*
@@ -301,7 +301,7 @@ cpu_startup()
         */
 #if defined(UVM)
        exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
-                                16*NCARGS, TRUE, FALSE, NULL);
+                                16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL);
 #else
        exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
                                 16*NCARGS, TRUE);
@@ -347,7 +347,7 @@ cpu_startup()
        bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES);
 #if defined(UVM)
        mb_map = uvm_km_suballoc(kernel_map, (vaddr_t *)&mbutl, &maxaddr,
-                                VM_MBUF_SIZE, FALSE, FALSE, NULL);
+                                VM_MBUF_SIZE, VM_MAP_INTRSAFE, FALSE, NULL);
 #else
        mb_map = kmem_suballoc(kernel_map, (vaddr_t *)&mbutl, &maxaddr,
                               VM_MBUF_SIZE, FALSE);
index aedd351..3e1aeb3 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: kern_malloc.c,v 1.18 1999/11/25 13:41:30 art Exp $    */
+/*     $OpenBSD: kern_malloc.c,v 1.19 2000/03/16 22:11:03 art Exp $    */
 /*     $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */
 
 /*
@@ -49,7 +49,7 @@
 #if defined(UVM)
 #include <uvm/uvm_extern.h>
 
-static struct vm_map kmem_map_store;
+static struct vm_map_intrsafe kmem_map_store;
 vm_map_t kmem_map = NULL;
 #endif
 
@@ -228,11 +228,11 @@ malloc(size, type, flags)
                int rv;
                vaddr_t addr = (vaddr_t)kbp->kb_next;
 
-               vm_map_lock_read(kmem_map);
+               vm_map_lock(kmem_map);
                rv = uvm_map_checkprot(kmem_map, addr,
                                       addr + sizeof(struct freelist),
                                       VM_PROT_WRITE);
-               vm_map_unlock_read(kmem_map);
+               vm_map_unlock(kmem_map);
 
                if (!rv)
 #else
@@ -439,7 +439,7 @@ kmeminit()
                (vsize_t)(npg * sizeof(struct kmemusage)));
        kmem_map = uvm_km_suballoc(kernel_map, (vaddr_t *)&kmembase,
                (vaddr_t *)&kmemlimit, (vsize_t)(npg * PAGE_SIZE), 
-                       FALSE, FALSE, &kmem_map_store);
+                       VM_MAP_INTRSAFE, FALSE, &kmem_map_store.vmi_map);
 #else
        kmemusage = (struct kmemusage *) kmem_alloc(kernel_map,
                (vsize_t)(npg * sizeof(struct kmemusage)));
index e454d9f..2086bab 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: kern_physio.c,v 1.7 1999/12/02 20:39:32 art Exp $     */
+/*     $OpenBSD: kern_physio.c,v 1.8 2000/03/16 22:11:04 art Exp $     */
 /*     $NetBSD: kern_physio.c,v 1.28 1997/05/19 10:43:28 pk Exp $      */
 
 /*-
@@ -184,7 +184,8 @@ physio(strategy, bp, dev, flags, minphys, uio)
                         */
                        p->p_holdcnt++;
 #if defined(UVM)
-                       uvm_vslock(p, bp->b_data, todo);
+                        uvm_vslock(p, bp->b_data, todo, (flags & B_READ) ?
+                               VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ);
 #else
                        vslock(bp->b_data, todo);
 #endif
index 1a5e37c..b7575a5 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: kern_sysctl.c,v 1.32 2000/03/03 11:46:09 art Exp $    */
+/*     $OpenBSD: kern_sysctl.c,v 1.33 2000/03/16 22:11:03 art Exp $    */
 /*     $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $     */
 
 /*-
@@ -173,7 +173,7 @@ sys___sysctl(p, v, retval)
                memlock.sl_lock = 1;
                if (dolock)
 #if defined(UVM)
-                       uvm_vslock(p, SCARG(uap, old), oldlen);
+                       uvm_vslock(p, SCARG(uap, old), oldlen, VM_PROT_NONE);
 #else
                        vslock(SCARG(uap, old), oldlen);
 #endif
index 250662b..727b720 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_aobj.c,v 1.18 1999/03/26 17:34:15 chs Exp $        */
+/*     $NetBSD: uvm_aobj.c,v 1.20 1999/05/25 00:09:00 thorpej Exp $    */
 
 /*
  * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and
@@ -619,7 +619,7 @@ uao_reference(uobj)
         * kernel_object already has plenty of references, leave it alone.
         */
 
-       if (uobj->uo_refs == UVM_OBJ_KERN)
+       if (UVM_OBJ_IS_KERN_OBJECT(uobj))
                return;
 
        simple_lock(&uobj->vmobjlock);
@@ -646,7 +646,7 @@ uao_detach(uobj)
        /*
         * detaching from kernel_object is a noop.
         */
-       if (uobj->uo_refs == UVM_OBJ_KERN)
+       if (UVM_OBJ_IS_KERN_OBJECT(uobj))
                return;
 
        simple_lock(&uobj->vmobjlock);
index 9782f0e..f3c2a65 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_extern.h,v 1.24 1999/04/11 04:04:11 chs Exp $      */
+/*     $NetBSD: uvm_extern.h,v 1.27 1999/05/26 19:16:36 thorpej Exp $  */
 
 /*
  *
@@ -282,7 +282,8 @@ boolean_t           uvm_kernacc __P((caddr_t, size_t, int));
 __dead void            uvm_scheduler __P((void)) __attribute__((noreturn));
 void                   uvm_swapin __P((struct proc *));
 boolean_t              uvm_useracc __P((caddr_t, size_t, int));
-void                   uvm_vslock __P((struct proc *, caddr_t, size_t));
+void                   uvm_vslock __P((struct proc *, caddr_t, size_t,
+                           vm_prot_t));
 void                   uvm_vsunlock __P((struct proc *, caddr_t, size_t));
 
 
@@ -301,7 +302,7 @@ void                        uvm_km_free_wakeup __P((vm_map_t, vaddr_t,
 vaddr_t                        uvm_km_kmemalloc __P((vm_map_t, struct uvm_object *,
                                                vsize_t, int));
 struct vm_map          *uvm_km_suballoc __P((vm_map_t, vaddr_t *,
-                               vaddr_t *, vsize_t, boolean_t,
+                               vaddr_t *, vsize_t, int,
                                boolean_t, vm_map_t));
 vaddr_t                        uvm_km_valloc __P((vm_map_t, vsize_t));
 vaddr_t                        uvm_km_valloc_wait __P((vm_map_t, vsize_t));
index d05c5c3..b4002ac 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_fault.c,v 1.28 1999/04/11 04:04:11 chs Exp $       */
+/*     $NetBSD: uvm_fault.c,v 1.33 1999/06/04 23:38:41 thorpej Exp $   */
 
 /*
  *
@@ -586,6 +586,19 @@ uvm_fault(orig_map, vaddr, fault_type, access_type)
        else
                narrow = FALSE;         /* normal fault */
 
+       /*
+        * before we do anything else, if this is a fault on a kernel
+        * address, check to see if the address is managed by an
+        * interrupt-safe map.  If it is, we fail immediately.  Intrsafe
+        * maps are never pageable, and this approach avoids an evil
+        * locking mess.
+        */
+       if (orig_map == kernel_map && uvmfault_check_intrsafe(&ufi)) {
+               UVMHIST_LOG(maphist, "<- VA 0x%lx in intrsafe map %p",
+                   ufi.orig_rvaddr, ufi.map, 0, 0);
+               return (KERN_FAILURE);
+       }
+
        /*
         * "goto ReFault" means restart the page fault from ground zero.
         */
@@ -613,6 +626,17 @@ ReFault:
                return (KERN_PROTECTION_FAILURE);
        }
 
+       /*
+        * if the map is not a pageable map, a page fault always fails.
+        */
+
+       if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) {
+               UVMHIST_LOG(maphist,
+                   "<- map %p not pageable", ufi.map, 0, 0, 0);
+               uvmfault_unlockmaps(&ufi, FALSE);
+               return (KERN_FAILURE);
+       }
+
        /*
         * "enter_prot" is the protection we want to enter the page in at.
         * for certain pages (e.g. copy-on-write pages) this protection can
@@ -1689,9 +1713,10 @@ Case2:
  */
 
 int
-uvm_fault_wire(map, start, end)
+uvm_fault_wire(map, start, end, access_type)
        vm_map_t map;
        vaddr_t start, end;
+       vm_prot_t access_type;
 {
        vaddr_t va;
        pmap_t  pmap;
@@ -1713,10 +1738,10 @@ uvm_fault_wire(map, start, end)
         */
 
        for (va = start ; va < end ; va += PAGE_SIZE) {
-               rv = uvm_fault(map, va, VM_FAULT_WIRE, VM_PROT_NONE);
+               rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type);
                if (rv) {
                        if (va != start) {
-                               uvm_fault_unwire(map->pmap, start, va);
+                               uvm_fault_unwire(map, start, va);
                        }
                        return (rv);
                }
@@ -1727,19 +1752,23 @@ uvm_fault_wire(map, start, end)
 
 /*
  * uvm_fault_unwire(): unwire range of virtual space.
- *
- * => caller holds reference to pmap (via its map)
  */
 
 void
-uvm_fault_unwire(pmap, start, end)
-       struct pmap *pmap;
+uvm_fault_unwire(map, start, end)
+       vm_map_t map;
        vaddr_t start, end;
 {
+       pmap_t pmap = vm_map_pmap(map);
        vaddr_t va;
        paddr_t pa;
        struct vm_page *pg;
 
+#ifdef DIAGNOSTIC
+       if (map->flags & VM_MAP_INTRSAFE)
+               panic("uvm_fault_unwire: intrsafe map");
+#endif
+
        /*
         * we assume that the area we are unwiring has actually been wired
         * in the first place.   this means that we should be able to extract
index fd8958f..6849b57 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_fault.h,v 1.7 1998/10/11 23:07:42 chuck Exp $      */
+/*     $NetBSD: uvm_fault.h,v 1.11 1999/06/04 23:38:41 thorpej Exp $   */
 
 /*
  *
@@ -72,13 +72,14 @@ struct uvm_faultinfo {
 
 int uvmfault_anonget __P((struct uvm_faultinfo *, struct vm_amap *,
                                                                                                        struct vm_anon *));
+static boolean_t uvmfault_check_intrsafe __P((struct uvm_faultinfo *));
 static boolean_t uvmfault_lookup __P((struct uvm_faultinfo *, boolean_t));
 static boolean_t uvmfault_relock __P((struct uvm_faultinfo *));
 static void uvmfault_unlockall __P((struct uvm_faultinfo *, struct vm_amap *,
                                    struct uvm_object *, struct vm_anon *));
 static void uvmfault_unlockmaps __P((struct uvm_faultinfo *, boolean_t));
 
-int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t));
-void uvm_fault_unwire __P((struct pmap *, vaddr_t, vaddr_t));
+int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t, vm_prot_t));
+void uvm_fault_unwire __P((vm_map_t, vaddr_t, vaddr_t));
 
 #endif /* _UVM_UVM_FAULT_H_ */
index 8a2c3ea..38e6447 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_fault_i.h,v 1.7 1999/01/24 23:53:15 chuck Exp $    */
+/*     $NetBSD: uvm_fault_i.h,v 1.9 1999/06/04 23:38:41 thorpej Exp $  */
 
 /*
  *
@@ -81,6 +81,39 @@ uvmfault_unlockall(ufi, amap, uobj, anon)
        uvmfault_unlockmaps(ufi, FALSE);
 }
 
+/*
+ * uvmfault_check_intrsafe: check for a virtual address managed by
+ * an interrupt-safe map.
+ *
+ * => caller must provide a uvm_faultinfo structure with the IN
+ *     params properly filled in
+ * => if we find an intersafe VA, we fill in ufi->map, and return TRUE
+ */
+
+static __inline boolean_t
+uvmfault_check_intrsafe(ufi)
+       struct uvm_faultinfo *ufi;
+{
+       struct vm_map_intrsafe *vmi;
+       int s;
+
+       s = vmi_list_lock();
+       for (vmi = LIST_FIRST(&vmi_list); vmi != NULL;
+            vmi = LIST_NEXT(vmi, vmi_list)) {
+               if (ufi->orig_rvaddr >= vm_map_min(&vmi->vmi_map) &&
+                   ufi->orig_rvaddr < vm_map_max(&vmi->vmi_map))
+                       break;
+       }
+       vmi_list_unlock(s);
+
+       if (vmi != NULL) {
+               ufi->map = &vmi->vmi_map;
+               return (TRUE);
+       }
+
+       return (FALSE);
+}
+
 /*
  * uvmfault_lookup: lookup a virtual address in a map
  *
index fe866fd..837c158 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_glue.c,v 1.19 1999/04/30 21:23:50 thorpej Exp $    */
+/*     $NetBSD: uvm_glue.c,v 1.23 1999/05/28 20:49:51 thorpej Exp $    */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -221,13 +221,15 @@ uvm_chgkprot(addr, len, rw)
  */
 
 void
-uvm_vslock(p, addr, len)
+uvm_vslock(p, addr, len, access_type)
        struct proc *p;
        caddr_t addr;
        size_t  len;
+       vm_prot_t access_type;
 {
+
        uvm_fault_wire(&p->p_vmspace->vm_map, trunc_page(addr), 
-           round_page(addr+len));
+           round_page(addr+len), access_type);
 }
 
 /*
@@ -243,7 +245,7 @@ uvm_vsunlock(p, addr, len)
        caddr_t addr;
        size_t  len;
 {
-       uvm_fault_unwire(p->p_vmspace->vm_map.pmap, trunc_page(addr), 
+       uvm_fault_unwire(&p->p_vmspace->vm_map, trunc_page(addr), 
                round_page(addr+len));
 }
 
@@ -282,9 +284,12 @@ uvm_fork(p1, p2, shared, stack, stacksize)
         * and the kernel stack.  Wired state is stored in p->p_flag's
         * P_INMEM bit rather than in the vm_map_entry's wired count
         * to prevent kernel_map fragmentation.
+        *
+        * Note the kernel stack gets read/write accesses right off
+        * the bat.
         */
        rv = uvm_fault_wire(kernel_map, (vaddr_t)up,
-           (vaddr_t)up + USPACE);
+           (vaddr_t)up + USPACE, VM_PROT_READ | VM_PROT_WRITE);
        if (rv != KERN_SUCCESS)
                panic("uvm_fork: uvm_fault_wire failed: %d", rv);
 
@@ -373,7 +378,8 @@ uvm_swapin(p)
 
        addr = (vaddr_t)p->p_addr;
        /* make P_INMEM true */
-       uvm_fault_wire(kernel_map, addr, addr + USPACE);
+       uvm_fault_wire(kernel_map, addr, addr + USPACE,
+           VM_PROT_READ | VM_PROT_WRITE);
 
        /*
         * Some architectures need to be notified when the user area has
@@ -586,7 +592,7 @@ uvm_swapout(p)
         * Unwire the to-be-swapped process's user struct and kernel stack.
         */
        addr = (vaddr_t)p->p_addr;
-       uvm_fault_unwire(kernel_map->pmap, addr, addr + USPACE); /* !P_INMEM */
+       uvm_fault_unwire(kernel_map, addr, addr + USPACE); /* !P_INMEM */
        pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
 
        /*
index a908f8f..a5bb21b 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_km.c,v 1.22 1999/03/26 21:58:39 mycroft Exp $      */
+/*     $NetBSD: uvm_km.c,v 1.27 1999/06/04 23:38:41 thorpej Exp $      */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
 
 vm_map_t kernel_map = NULL;
 
+struct vmi_list vmi_list;
+simple_lock_data_t vmi_list_slock;
+
 /*
  * local functions
  */
 
 static int uvm_km_get __P((struct uvm_object *, vaddr_t, 
-                                                                                                        vm_page_t *, int *, int, vm_prot_t, int, int));
+       vm_page_t *, int *, int, vm_prot_t, int, int));
+
 /*
  * local data structues
  */
@@ -414,29 +418,43 @@ uvm_km_init(start, end)
        vaddr_t base = VM_MIN_KERNEL_ADDRESS;
 
        /*
-        * first, init kernel memory objects.
+        * first, initialize the interrupt-safe map list.
+        */
+       LIST_INIT(&vmi_list);
+       simple_lock_init(&vmi_list_slock);
+
+       /*
+        * next, init kernel memory objects.
         */
 
        /* kernel_object: for pageable anonymous kernel memory */
        uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS -
                                 VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ);
 
-       /* kmem_object: for malloc'd memory (wired, protected by splimp) */
+       /*
+        * kmem_object: for use by the kernel malloc().  Memory is always
+        * wired, and this object (and the kmem_map) can be accessed at
+        * interrupt time.
+        */
        simple_lock_init(&kmem_object_store.vmobjlock);
        kmem_object_store.pgops = &km_pager;
        TAILQ_INIT(&kmem_object_store.memq);
        kmem_object_store.uo_npages = 0;
        /* we are special.  we never die */
-       kmem_object_store.uo_refs = UVM_OBJ_KERN; 
+       kmem_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE
        uvmexp.kmem_object = &kmem_object_store;
 
-       /* mb_object: for mbuf memory (always wired, protected by splimp) */
+       /*
+        * mb_object: for mbuf cluster pages on platforms which use the
+        * mb_map.  Memory is always wired, and this object (and the mb_map)
+        * can be accessed at interrupt time.
+        */
        simple_lock_init(&mb_object_store.vmobjlock);
        mb_object_store.pgops = &km_pager;
        TAILQ_INIT(&mb_object_store.memq);
        mb_object_store.uo_npages = 0;
        /* we are special.  we never die */
-       mb_object_store.uo_refs = UVM_OBJ_KERN; 
+       mb_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE
        uvmexp.mb_object = &mb_object_store;
 
        /*
@@ -444,7 +462,7 @@ uvm_km_init(start, end)
         * before installing.
         */
 
-       uvm_map_setup(&kernel_map_store, base, end, FALSE);
+       uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE);
        kernel_map_store.pmap = pmap_kernel();
        if (uvm_map(&kernel_map_store, &base, start - base, NULL,
            UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL,
@@ -469,11 +487,11 @@ uvm_km_init(start, end)
  *     alloc a new map
  */
 struct vm_map *
-uvm_km_suballoc(map, min, max, size, pageable, fixed, submap)
+uvm_km_suballoc(map, min, max, size, flags, fixed, submap)
        struct vm_map *map;
        vaddr_t *min, *max;             /* OUT, OUT */
        vsize_t size;
-       boolean_t pageable;
+       int flags;
        boolean_t fixed;
        struct vm_map *submap;
 {
@@ -503,11 +521,11 @@ uvm_km_suballoc(map, min, max, size, pageable, fixed, submap)
 
        pmap_reference(vm_map_pmap(map));
        if (submap == NULL) {
-               submap = uvm_map_create(vm_map_pmap(map), *min, *max, pageable);
+               submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags);
                if (submap == NULL)
                        panic("uvm_km_suballoc: unable to create submap");
        } else {
-               uvm_map_setup(submap, *min, *max, pageable);
+               uvm_map_setup(submap, *min, *max, flags);
                submap->pmap = vm_map_pmap(map);
        }
 
@@ -535,15 +553,17 @@ uvm_km_pgremove(uobj, start, end)
        struct uvm_object *uobj;
        vaddr_t start, end;
 {
-       boolean_t by_list, is_aobj;
+       boolean_t by_list;
        struct vm_page *pp, *ppnext;
        vaddr_t curoff;
        UVMHIST_FUNC("uvm_km_pgremove"); UVMHIST_CALLED(maphist);
 
        simple_lock(&uobj->vmobjlock);          /* lock object */
 
-       /* is uobj an aobj? */
-       is_aobj = uobj->pgops == &aobj_pager;
+#ifdef DIAGNOSTIC
+       if (uobj->pgops != &aobj_pager)
+               panic("uvm_km_pgremove: object %p not an aobj", uobj);
+#endif
 
        /* choose cheapest traversal */
        by_list = (uobj->uo_npages <=
@@ -561,26 +581,24 @@ uvm_km_pgremove(uobj, start, end)
 
                UVMHIST_LOG(maphist,"  page 0x%x, busy=%d", pp,
                    pp->flags & PG_BUSY, 0, 0);
+
                /* now do the actual work */
-               if (pp->flags & PG_BUSY)
+               if (pp->flags & PG_BUSY) {
                        /* owner must check for this when done */
                        pp->flags |= PG_RELEASED;
-               else {
-                       pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+               } else {
+                       /* free the swap slot... */
+                       uao_dropswap(uobj, curoff >> PAGE_SHIFT);
 
                        /*
-                        * if this kernel object is an aobj, free the swap slot.
+                        * ...and free the page; note it may be on the
+                        * active or inactive queues.
                         */
-                       if (is_aobj) {
-                               uao_dropswap(uobj, curoff >> PAGE_SHIFT);
-                       }
-
                        uvm_lock_pageq();
                        uvm_pagefree(pp);
                        uvm_unlock_pageq();
                }
                /* done */
-
        }
        simple_unlock(&uobj->vmobjlock);
        return;
@@ -588,7 +606,6 @@ uvm_km_pgremove(uobj, start, end)
 loop_by_list:
 
        for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = ppnext) {
-
                ppnext = pp->listq.tqe_next;
                if (pp->offset < start || pp->offset >= end) {
                        continue;
@@ -596,26 +613,111 @@ loop_by_list:
 
                UVMHIST_LOG(maphist,"  page 0x%x, busy=%d", pp,
                    pp->flags & PG_BUSY, 0, 0);
+
                /* now do the actual work */
-               if (pp->flags & PG_BUSY)
+               if (pp->flags & PG_BUSY) {
                        /* owner must check for this when done */
                        pp->flags |= PG_RELEASED;
-               else {
-                       pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE);
+               } else {
+                       /* free the swap slot... */
+                       uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
 
                        /*
-                        * if this kernel object is an aobj, free the swap slot.
+                        * ...and free the page; note it may be on the
+                        * active or inactive queues.
                         */
-                       if (is_aobj) {
-                               uao_dropswap(uobj, pp->offset >> PAGE_SHIFT);
-                       }
-
                        uvm_lock_pageq();
                        uvm_pagefree(pp);
                        uvm_unlock_pageq();
                }
                /* done */
+       }
+       simple_unlock(&uobj->vmobjlock);
+       return;
+}
+
+
+/*
+ * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe"
+ *    objects
+ *
+ * => when you unmap a part of anonymous kernel memory you want to toss
+ *    the pages right away.    (this gets called from uvm_unmap_...).
+ * => none of the pages will ever be busy, and none of them will ever
+ *    be on the active or inactive queues (because these objects are
+ *    never allowed to "page").
+ */
 
+void
+uvm_km_pgremove_intrsafe(uobj, start, end)
+       struct uvm_object *uobj;
+       vaddr_t start, end;
+{
+       boolean_t by_list;
+       struct vm_page *pp, *ppnext;
+       vaddr_t curoff;
+       UVMHIST_FUNC("uvm_km_pgremove_intrsafe"); UVMHIST_CALLED(maphist);
+
+       simple_lock(&uobj->vmobjlock);          /* lock object */
+
+#ifdef DIAGNOSTIC
+       if (UVM_OBJ_IS_INTRSAFE_OBJECT(uobj) == 0)
+               panic("uvm_km_pgremove_intrsafe: object %p not intrsafe", uobj);
+#endif
+
+       /* choose cheapest traversal */
+       by_list = (uobj->uo_npages <=
+            ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY);
+       if (by_list)
+               goto loop_by_list;
+
+       /* by hash */
+
+       for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) {
+               pp = uvm_pagelookup(uobj, curoff);
+               if (pp == NULL)
+                       continue;
+
+               UVMHIST_LOG(maphist,"  page 0x%x, busy=%d", pp,
+                   pp->flags & PG_BUSY, 0, 0);
+#ifdef DIAGNOSTIC
+               if (pp->flags & PG_BUSY)
+                       panic("uvm_km_pgremove_intrsafe: busy page");
+               if (pp->pqflags & PQ_ACTIVE)
+                       panic("uvm_km_pgremove_intrsafe: active page");
+               if (pp->pqflags & PQ_INACTIVE)
+                       panic("uvm_km_pgremove_intrsafe: inactive page");
+#endif
+
+               /* free the page */
+               uvm_pagefree(pp);
+       }
+       simple_unlock(&uobj->vmobjlock);
+       return;
+
+loop_by_list:
+
+       for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = ppnext) {
+               ppnext = pp->listq.tqe_next;
+               if (pp->offset < start || pp->offset >= end) {
+                       continue;
+               }
+
+               UVMHIST_LOG(maphist,"  page 0x%x, busy=%d", pp,
+                   pp->flags & PG_BUSY, 0, 0);
+
+#ifdef DIAGNOSTIC
+               if (pp->flags & PG_BUSY)
+                       panic("uvm_km_pgremove_intrsafe: busy page");
+               if (pp->pqflags & PQ_ACTIVE)
+                       panic("uvm_km_pgremove_intrsafe: active page");
+               if (pp->pqflags & PQ_INACTIVE)
+                       panic("uvm_km_pgremove_intrsafe: inactive page");
+#endif
+
+               /* free the page */
+               uvm_pagefree(pp);
        }
        simple_unlock(&uobj->vmobjlock);
        return;
@@ -725,12 +827,18 @@ uvm_km_kmemalloc(map, obj, size, flags)
                 * (because if pmap_enter wants to allocate out of kmem_object
                 * it will need to lock it itself!)
                 */
+               if (UVM_OBJ_IS_INTRSAFE_OBJECT(obj)) {
 #if defined(PMAP_NEW)
-               pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL);
+                       pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg),
+                           VM_PROT_ALL);
 #else
-               pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
-                   UVM_PROT_ALL, TRUE, VM_PROT_READ | VM_PROT_WRITE);
+                       pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+                           UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE);
 #endif
+               } else {
+                       pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+                           UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE);
+               }
                loopva += PAGE_SIZE;
                offset += PAGE_SIZE;
                size -= PAGE_SIZE;
@@ -861,8 +969,8 @@ uvm_km_alloc1(map, size, zeroit)
                 * map it in; note we're never called with an intrsafe
                 * object, so we always use regular old pmap_enter().
                 */
-                pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
-                    UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE);
+               pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg),
+                   UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE);
 
                loopva += PAGE_SIZE;
                offset += PAGE_SIZE;
index 7b07b0d..fb143cb 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_km.h,v 1.6 1998/08/13 02:11:01 eeh Exp $   */
+/*     $NetBSD: uvm_km.h,v 1.8 1999/05/25 20:30:09 thorpej Exp $       */
 
 /*
  *
@@ -47,5 +47,6 @@
 
 void uvm_km_init __P((vaddr_t, vaddr_t));
 void uvm_km_pgremove __P((struct uvm_object *, vaddr_t, vaddr_t));
+void uvm_km_pgremove_intrsafe __P((struct uvm_object *, vaddr_t, vaddr_t));
 
 #endif /* _UVM_UVM_KM_H_ */
index 66d81f1..f82d857 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_loan.c,v 1.14 1999/03/25 18:48:52 mrg Exp $        */
+/*     $NetBSD: uvm_loan.c,v 1.17 1999/06/03 00:05:45 thorpej Exp $    */
 
 /*
  *
@@ -59,7 +59,7 @@
  *
  * there are 3 types of loans possible:
  *  O->K  uvm_object page to wired kernel page (e.g. mbuf data area)
- *  A->K  anon page to kernel wired kernel page (e.g. mbuf data area)
+ *  A->K  anon page to wired kernel page (e.g. mbuf data area)
  *  O->A  uvm_object to anon loan (e.g. vnode page to an anon)
  * note that it possible to have an O page loaned to both an A and K
  * at the same time.
  * a uvm_object and a vm_anon, but PQ_ANON will not be set.   this sort
  * of page is considered "owned" by the uvm_object (not the anon).
  *
- * each loan of a page to a wired kernel page bumps the pg->wire_count.
- * wired kernel mappings should be entered with pmap_kenter functions
- * so that pmap_page_protect() will not affect the kernel mappings.
- * (this requires the PMAP_NEW interface...).
+ * each loan of a page to the kernel bumps the pg->wire_count.  the
+ * kernel mappings for these pages will be read-only and wired.  since
+ * the page will also be wired, it will not be a candidate for pageout,
+ * and thus will never be pmap_page_protect()'d with VM_PROT_NONE.  a
+ * write fault in the kernel to one of these pages will not cause
+ * copy-on-write.  instead, the page fault is considered fatal.  this
+ * is because the kernel mapping will have no way to look up the
+ * object/anon which the page is owned by.  this is a good side-effect,
+ * since a kernel write to a loaned page is an error.
  *
  * owners that want to free their pages and discover that they are 
  * loaned out simply "disown" them (the page becomes an orphan).  these
  *
  * note that loaning a page causes all mappings of the page to become
  * read-only (via pmap_page_protect).   this could have an unexpected
- * effect on normal "wired" pages if one is not careful.
+ * effect on normal "wired" pages if one is not careful (XXX).
  */
 
 /*
@@ -220,6 +225,11 @@ uvm_loan(map, start, len, result, flags)
        void **output;
        int rv;
 
+#ifdef DIAGNOSTIC
+       if (map->flags & VM_MAP_INTRSAFE)
+               panic("uvm_loan: intrsafe map");
+#endif
+
        /*
         * ensure that one and only one of the flags is set
         */
index 68487e3..c7c34a8 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_map.c,v 1.39 1999/05/12 19:11:23 thorpej Exp $     */
+/*     $NetBSD: uvm_map.c,v 1.53 1999/06/07 16:31:42 thorpej Exp $     */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -186,6 +186,23 @@ static void                uvm_map_entry_unwire __P((vm_map_t, vm_map_entry_t));
  * local inlines
  */
 
+/* XXX Should not exist! */
+#define        vm_map_downgrade(map)                                           \
+       (void) lockmgr(&(map)->lock, LK_DOWNGRADE, NULL, curproc)
+
+/* XXX Should not exist! */
+#ifdef DIAGNOSTIC
+#define        vm_map_upgrade(map)                                             \
+do {                                                                   \
+       if (lockmgr(&(map)->lock, LK_UPGRADE, NULL, curproc) != 0)      \
+               panic("vm_map_upgrade: failed to upgrade lock");        \
+} while (0)
+#else
+#define        vm_map_upgrade(map)                                             \
+       (void) lockmgr(&(map)->lock, LK_UPGRADE, NULL)
+#endif /* DIAGNOSTIC */
+
+
 /*
  * uvm_mapent_alloc: allocate a map entry
  *
@@ -201,11 +218,11 @@ uvm_mapent_alloc(map)
        UVMHIST_FUNC("uvm_mapent_alloc");
        UVMHIST_CALLED(maphist);
 
-       if (map->entries_pageable) {
+       if ((map->flags & VM_MAP_INTRSAFE) == 0 &&
+           map != kernel_map && kernel_map != NULL /* XXX */) {
                me = pool_get(&uvm_map_entry_pool, PR_WAITOK);
                me->flags = 0;
                /* me can't be null, wait ok */
-
        } else {
                s = splimp();   /* protect kentry_free list with splimp */
                simple_lock(&uvm.kentry_lock);
@@ -214,14 +231,14 @@ uvm_mapent_alloc(map)
                simple_unlock(&uvm.kentry_lock);
                splx(s);
                if (!me)
-       panic("mapent_alloc: out of kernel map entries, check MAX_KMAPENT");
+       panic("mapent_alloc: out of static map entries, check MAX_KMAPENT");
                me->flags = UVM_MAP_STATIC;
        }
 
-       UVMHIST_LOG(maphist, "<- new entry=0x%x [pageable=%d]", 
-               me, map->entries_pageable, 0, 0);
+       UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", 
+               me, ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map)
+               ? TRUE : FALSE, 0, 0);
        return(me);
-
 }
 
 /*
@@ -276,7 +293,7 @@ uvm_map_entry_unwire(map, entry)
        vm_map_entry_t entry;
 {
 
-       uvm_fault_unwire(map->pmap, entry->start, entry->end);
+       uvm_fault_unwire(map, entry->start, entry->end);
        entry->wired_count = 0;
 }
 
@@ -543,8 +560,9 @@ uvm_map(map, startp, size, uobj, uoffset, flags)
        } else {
                if (uoffset == UVM_UNKNOWN_OFFSET) {
 #ifdef DIAGNOSTIC
-                       if (uobj->uo_refs != UVM_OBJ_KERN)
-       panic("uvm_map: unknown offset with non-kernel object");
+                       if (UVM_OBJ_IS_KERN_OBJECT(uobj) == 0)
+                               panic("uvm_map: unknown offset with "
+                                   "non-kernel object");
 #endif
                        uoffset = *startp - vm_map_min(kernel_map);
                }
@@ -976,11 +994,11 @@ uvm_unmap_remove(map, start, end, entry_list)
                 * we want to free these pages right away...
                 */
                if (UVM_ET_ISOBJ(entry) &&
-                   entry->object.uvm_obj->uo_refs == UVM_OBJ_KERN) {
-
+                   UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) {
 #ifdef DIAGNOSTIC
                        if (vm_map_pmap(map) != pmap_kernel())
-       panic("uvm_unmap_remove: kernel object mapped by non-kernel map");
+                               panic("uvm_unmap_remove: kernel object "
+                                   "mapped by non-kernel map");
 #endif
 
                        /*
@@ -1006,40 +1024,35 @@ uvm_unmap_remove(map, start, end, entry_list)
                         *
                         * uvm_km_pgremove currently does the following: 
                         *   for pages in the kernel object in range: 
-                        *     - pmap_page_protect them out of all pmaps
+                        *     - drops the swap slot
                         *     - uvm_pagefree the page
                         *
-                        * note that in case [1] the pmap_page_protect call
-                        * in uvm_km_pgremove may very well be redundant
-                        * because we have already removed the mappings
-                        * beforehand with pmap_remove (or pmap_kremove).
-                        * in the PMAP_NEW case, the pmap_page_protect call
-                        * may not do anything, since PMAP_NEW allows the
-                        * kernel to enter/remove kernel mappings without
-                        * bothing to keep track of the mappings (e.g. via
-                        * pv_entry lists).    XXX: because of this, in the
-                        * future we should consider removing the
-                        * pmap_page_protect from uvm_km_pgremove some time
-                        * in the future.
+                        * note there is version of uvm_km_pgremove() that
+                        * is used for "intrsafe" objects.
                         */
 
                        /*
-                        * remove mappings from pmap
+                        * remove mappings from pmap and drop the pages
+                        * from the object.  offsets are always relative
+                        * to vm_map_min(kernel_map).
                         */
+                       if (UVM_OBJ_IS_INTRSAFE_OBJECT(entry->object.uvm_obj)) {
 #if defined(PMAP_NEW)
-                       pmap_kremove(entry->start, len);
+                               pmap_kremove(entry->start, len);
 #else
-                       pmap_remove(pmap_kernel(), entry->start,
-                           entry->start+len);
+                               pmap_remove(pmap_kernel(), entry->start,
+                                   entry->start + len);
 #endif
-
-                       /*
-                        * remove pages from a kernel object (offsets are
-                        * always relative to vm_map_min(kernel_map)).
-                        */
-                       uvm_km_pgremove(entry->object.uvm_obj, 
-                       entry->start - vm_map_min(kernel_map),
-                       entry->end - vm_map_min(kernel_map));
+                               uvm_km_pgremove_intrsafe(entry->object.uvm_obj,
+                                   entry->start - vm_map_min(kernel_map),
+                                   entry->end - vm_map_min(kernel_map));
+                       } else {
+                               pmap_remove(pmap_kernel(), entry->start,
+                                   entry->start + len);
+                               uvm_km_pgremove(entry->object.uvm_obj,
+                                   entry->start - vm_map_min(kernel_map),
+                                   entry->end - vm_map_min(kernel_map));
+                       }
 
                        /*
                         * null out kernel_object reference, we've just
@@ -1842,6 +1855,121 @@ uvm_map_inherit(map, start, end, new_inheritance)
        return(KERN_SUCCESS);
 }
 
+/* 
+ * uvm_map_advice: set advice code for range of addrs in map.
+ *
+ * => map must be unlocked
+ */
+
+int
+uvm_map_advice(map, start, end, new_advice)
+       vm_map_t map;
+       vaddr_t start;
+       vaddr_t end;
+       int new_advice;
+{
+       vm_map_entry_t entry, temp_entry;
+       UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist);
+       UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)",
+           map, start, end, new_advice);
+
+       vm_map_lock(map);
+       
+       VM_MAP_RANGE_CHECK(map, start, end);
+       
+       if (uvm_map_lookup_entry(map, start, &temp_entry)) {
+               entry = temp_entry;
+               UVM_MAP_CLIP_START(map, entry, start);
+       } else {
+               entry = temp_entry->next;
+       }
+       
+       while ((entry != &map->header) && (entry->start < end)) {
+               UVM_MAP_CLIP_END(map, entry, end);
+
+               switch (new_advice) {
+               case MADV_NORMAL:
+               case MADV_RANDOM:
+               case MADV_SEQUENTIAL:
+                       /* nothing special here */
+                       break;
+
+#if 0
+               case MADV_WILLNEED:
+                       /* activate all these pages */
+                       /* XXX */
+                       /*
+                        * should invent a "weak" mode for uvm_fault()
+                        * which would only do the PGO_LOCKED pgo_get().
+                        */
+                       break;
+
+               case MADV_DONTNEED:
+                       /* deactivate this page */
+                       /* XXX */
+                       /*
+                        * vm_page_t p;
+                        * uvm_lock_pageq();
+                        * for (p in each page)
+                        *      if (not_wired)
+                        *              uvm_pagedeactivate(p);
+                        * uvm_unlock_pageq();
+                        */
+                       break;
+
+               case MADV_SPACEAVAIL:
+                       /* 
+                        * XXXMRG
+                        * what is this?  i think:  "ensure that we have
+                        * allocated backing-store for these pages".  this
+                        * is going to require changes in the page daemon,
+                        * as it will free swap space allocated to pages in
+                        * core.  there's also what to do for
+                        * device/file/anonymous memory..
+                        */
+                       break;
+
+               case MADV_GARBAGE:
+                       /* pages are `empty' and can be garbage collected */
+                       /* XXX */
+                       /*
+                        * (perhaps MADV_FREE? check freebsd's MADV_FREE).
+                        * 
+                        * need to do this:
+                        *      - clear all the referenced and modified bits on
+                        *        the pages,
+                        *      - delete any backing store,
+                        *      - mark the page as `recycable'.
+                        *
+                        * So, if you start paging, the pages would be thrown out
+                        * and then zero-filled the next time they're used.
+                        * Otherwise you'd just reuse them directly.  Once the
+                        * page has been modified again, it would no longer be
+                        * recyclable.  That way, malloc() can just tell the
+                        * system when pages are `empty'; if memory is needed,
+                        * they'll be tossed; if memory is not needed, there
+                        * will be no additional overhead.
+                        */
+                       break;
+#endif
+
+               default:
+                       vm_map_unlock(map);
+                       UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0);
+                       return (KERN_INVALID_ARGUMENT);
+               }
+
+
+               entry->advice = new_advice;
+               
+               entry = entry->next;
+       }
+
+       vm_map_unlock(map);
+       UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0);
+       return (KERN_SUCCESS);
+}
+
 /*
  * uvm_map_pageable: sets the pageability of a range in a map.
  *
@@ -1864,6 +1992,11 @@ uvm_map_pageable(map, start, end, new_pageable)
        UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)",
        map, start, end, new_pageable);
 
+#ifdef DIAGNOSTIC
+       if ((map->flags & VM_MAP_PAGEABLE) == 0)
+               panic("uvm_map_pageable: map %p not pageable", map);
+#endif
+
        vm_map_lock(map);
        VM_MAP_RANGE_CHECK(map, start, end);
 
@@ -1913,10 +2046,11 @@ uvm_map_pageable(map, start, end, new_pageable)
                 * now decrement the wiring count for each region.  if a region
                 * becomes completely unwired, unwire its physical pages and
                 * mappings.
+                *
+                * Note, uvm_fault_unwire() (called via uvm_map_entry_unwire())
+                * does not lock the map, so we don't have to do anything
+                * special regarding locking here.
                 */
-#if 0          /* not necessary: uvm_fault_unwire does not lock */
-               lock_set_recursive(&map->lock);
-#endif  /* XXXCDC */
 
                entry = start_entry;
                while ((entry != &map->header) && (entry->start < end)) {
@@ -1928,9 +2062,6 @@ uvm_map_pageable(map, start, end, new_pageable)
                        
                        entry = entry->next;
                }
-#if 0 /* XXXCDC: not necessary, see above */
-               lock_clear_recursive(&map->lock);
-#endif
                vm_map_unlock(map);
                UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0);
                return(KERN_SUCCESS);
@@ -2015,62 +2146,53 @@ uvm_map_pageable(map, start, end, new_pageable)
        /*
         * Pass 2.
         */
-       /*
-        * HACK HACK HACK HACK
-        *
-        * if we are wiring in the kernel map or a submap of it, unlock the
-        * map to avoid deadlocks.  we trust that the kernel threads are
-        * well-behaved, and therefore will not do anything destructive to
-        * this region of the map while we have it unlocked.  we cannot
-        * trust user threads to do the same.
-        *
-        * HACK HACK HACK HACK 
-        */
-       if (vm_map_pmap(map) == pmap_kernel()) {
-               vm_map_unlock(map);         /* trust me ... */
-       } else {
-               vm_map_set_recursive(&map->lock);
-               lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc /*XXX*/);
-       }
+
+       vm_map_downgrade(map);
 
        rv = 0;
        entry = start_entry;
        while (entry != &map->header && entry->start < end) {
-               /*
-                * if uvm_fault_wire fails for any page we need to undo what has
-                * been done.  we decrement the wiring count for those pages
-                * which have not yet been wired (now) and unwire those that
-                * have * (later).
-                *
-                * XXX this violates the locking protocol on the map, needs to
-                * be fixed.  [because we only have a read lock on map we 
-                * shouldn't be changing wired_count?]
-                */
-               if (rv) {
-                       entry->wired_count--;
-               } else if (entry->wired_count == 1) {
-                       rv = uvm_fault_wire(map, entry->start, entry->end);
+               if (entry->wired_count == 1) {
+                       rv = uvm_fault_wire(map, entry->start, entry->end,
+                           entry->protection);
                        if (rv) {
-                               failed = entry->start;
-                               entry->wired_count--;
+                               /*
+                                * wiring failed.  break out of the loop.
+                                * we'll clean up the map below, once we
+                                * have a write lock again.
+                                */
+                               break;
                        }
                }
                entry = entry->next;
        }
 
-       if (vm_map_pmap(map) == pmap_kernel()) {
-               vm_map_lock(map);     /* relock */
-       } else {
-               vm_map_clear_recursive(&map->lock);
-       } 
-
        if (rv) {        /* failed? */
+               /*
+                * Get back to an exclusive (write) lock.
+                */
+               vm_map_upgrade(map);
+
+               /*
+                * first drop the wiring count on all the entries
+                * which haven't actually been wired yet.
+                */
+               failed = entry->start;
+               while (entry != &map->header && entry->start < end)
+                       entry->wired_count--;
+
+               /*
+                * now, unlock the map, and unwire all the pages that
+                * were successfully wired above.
+                */
                vm_map_unlock(map);
                (void) uvm_map_pageable(map, start, failed, TRUE);
                UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0);
                return(rv);
        }
-       vm_map_unlock(map);
+
+       /* We are holding a read lock here. */
+       vm_map_unlock_read(map);
        
        UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0);
        return(KERN_SUCCESS);
@@ -2255,7 +2377,7 @@ uvmspace_init(vm, pmap, min, max, pageable)
 
        bzero(vm, sizeof(*vm));
 
-       uvm_map_setup(&vm->vm_map, min, max, pageable);
+       uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0);
 
        if (pmap)
                pmap_reference(pmap);
@@ -2366,7 +2488,7 @@ uvmspace_exec(p)
                 * for p
                 */
                nvm = uvmspace_alloc(map->min_offset, map->max_offset, 
-                        map->entries_pageable);
+                        (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE);
 
 #if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW)
                /* 
@@ -2472,7 +2594,7 @@ uvmspace_fork(vm1)
        vm_map_lock(old_map);
 
        vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset,
-                     old_map->entries_pageable);
+                     (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE);
        bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy,
        (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy);
        new_map = &vm2->vm_map;           /* XXX */
@@ -2801,8 +2923,9 @@ uvm_map_printit(map, full, pr)
        vm_map_entry_t entry;
 
        (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset);
-       (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d\n",
-           map->nentries, map->size, map->ref_count, map->timestamp);
+       (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n",
+           map->nentries, map->size, map->ref_count, map->timestamp,
+           map->flags);
 #ifdef pmap_resident_count
        (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, 
            pmap_resident_count(map->pmap));
@@ -2855,7 +2978,7 @@ uvm_object_printit(uobj, full, pr)
 
        (*pr)("OBJECT %p: pgops=%p, npages=%d, ", uobj, uobj->pgops,
            uobj->uo_npages);
-       if (uobj->uo_refs == UVM_OBJ_KERN)
+       if (UVM_OBJ_IS_KERN_OBJECT(uobj))
                (*pr)("refs=<SYSTEM>\n");
        else
                (*pr)("refs=%d\n", uobj->uo_refs);
index c4ee571..b58b21a 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_map.h,v 1.11 1999/03/25 18:48:52 mrg Exp $ */
+/*     $NetBSD: uvm_map.h,v 1.14 1999/05/26 19:16:36 thorpej Exp $     */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -138,7 +138,7 @@ void                uvm_map_clip_end __P((vm_map_t, vm_map_entry_t,
                                vaddr_t));
 MAP_INLINE
 vm_map_t       uvm_map_create __P((pmap_t, vaddr_t, 
-                       vaddr_t, boolean_t));
+                       vaddr_t, int));
 int            uvm_map_extract __P((vm_map_t, vaddr_t, vsize_t, 
                        vm_map_t, vaddr_t *, int));
 vm_map_entry_t uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t,
@@ -146,6 +146,7 @@ vm_map_entry_t      uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t,
                        boolean_t));
 int            uvm_map_inherit __P((vm_map_t, vaddr_t, vaddr_t,
                        vm_inherit_t));
+int            uvm_map_advice __P((vm_map_t, vaddr_t, vaddr_t, int));
 void           uvm_map_init __P((void));
 boolean_t      uvm_map_lookup_entry __P((vm_map_t, vaddr_t, 
                        vm_map_entry_t *));
@@ -156,7 +157,7 @@ int         uvm_map_replace __P((vm_map_t, vaddr_t, vaddr_t,
 int            uvm_map_reserve __P((vm_map_t, vsize_t, vaddr_t, 
                        vaddr_t *));
 void           uvm_map_setup __P((vm_map_t, vaddr_t, 
-                       vaddr_t, boolean_t));
+                       vaddr_t, int));
 int            uvm_map_submap __P((vm_map_t, vaddr_t, 
                        vaddr_t, vm_map_t));
 MAP_INLINE
index e56ba28..85ca2a7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_map_i.h,v 1.11 1999/03/25 18:48:53 mrg Exp $       */
+/*     $NetBSD: uvm_map_i.h,v 1.14 1999/06/04 23:38:42 thorpej Exp $   */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
  */
 
 MAP_INLINE vm_map_t
-uvm_map_create(pmap, min, max, pageable)
+uvm_map_create(pmap, min, max, flags)
        pmap_t pmap;
        vaddr_t min, max;
-       boolean_t pageable;
+       int flags;
 {
        vm_map_t result;
 
-       MALLOC(result, vm_map_t, sizeof(struct vm_map), M_VMMAP, M_WAITOK);
-       uvm_map_setup(result, min, max, pageable);
+       MALLOC(result, vm_map_t,
+           (flags & VM_MAP_INTRSAFE) ? sizeof(struct vm_map_intrsafe) :
+                                       sizeof(struct vm_map),
+           M_VMMAP, M_WAITOK);
+       uvm_map_setup(result, min, max, flags);
        result->pmap = pmap;
        return(result);
 }
@@ -104,10 +107,10 @@ uvm_map_create(pmap, min, max, pageable)
  */
 
 MAP_INLINE void
-uvm_map_setup(map, min, max, pageable)
+uvm_map_setup(map, min, max, flags)
        vm_map_t map;
        vaddr_t min, max;
-       boolean_t pageable;
+       int flags;
 {
 
        map->header.next = map->header.prev = &map->header;
@@ -116,13 +119,26 @@ uvm_map_setup(map, min, max, pageable)
        map->ref_count = 1;
        map->min_offset = min;
        map->max_offset = max;
-       map->entries_pageable = pageable;
+       map->flags = flags;
        map->first_free = &map->header;
        map->hint = &map->header;
        map->timestamp = 0;
        lockinit(&map->lock, PVM, "thrd_sleep", 0, 0);
        simple_lock_init(&map->ref_lock);
        simple_lock_init(&map->hint_lock);
+
+       /*
+        * If the map is interrupt safe, place it on the list
+        * of interrupt safe maps, for uvm_fault().
+        */
+       if (flags & VM_MAP_INTRSAFE) {
+               struct vm_map_intrsafe *vmi = (struct vm_map_intrsafe *)map;
+               int s;
+
+               s = vmi_list_lock();
+               LIST_INSERT_HEAD(&vmi_list, vmi, vmi_list);
+               vmi_list_unlock(s);
+       }
 }
 
 
index 4d78b3a..75b1d16 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_mmap.c,v 1.19 1999/03/25 18:48:53 mrg Exp $        */
+/*     $NetBSD: uvm_mmap.c,v 1.21 1999/05/23 06:27:13 mrg Exp $        */
 
 /*
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -119,28 +119,6 @@ sys_sstk(p, v, retval)
        return (ENOSYS);
 }
 
-/*
- * sys_madvise: give advice about memory usage.
- */
-
-/* ARGSUSED */
-int
-sys_madvise(p, v, retval)
-       struct proc *p;
-       void *v;
-       register_t *retval;
-{
-#if 0
-       struct sys_madvise_args /* {
-               syscallarg(caddr_t) addr;
-               syscallarg(size_t) len;
-               syscallarg(int) behav;
-       } */ *uap = v;
-#endif
-
-       return (ENOSYS);
-}
-
 /*
  * sys_mincore: determine if pages are in core or not.
  */
@@ -695,6 +673,51 @@ sys_minherit(p, v, retval)
        return (EINVAL);
 }
 
+/*
+ * sys_madvise: give advice about memory usage.
+ */
+
+/* ARGSUSED */
+int
+sys_madvise(p, v, retval)
+       struct proc *p;
+       void *v;
+       register_t *retval;
+{
+       struct sys_madvise_args /* {
+               syscallarg(caddr_t) addr;
+               syscallarg(size_t) len;
+               syscallarg(int) behav;
+       } */ *uap = v;
+       vaddr_t addr;
+       vsize_t size, pageoff;
+       int advice;
+       
+       addr = (vaddr_t)SCARG(uap, addr);
+       size = (vsize_t)SCARG(uap, len);
+       advice = SCARG(uap, behav);
+
+       /*
+        * align the address to a page boundary, and adjust the size accordingly
+        */
+       pageoff = (addr & PAGE_MASK);
+       addr -= pageoff;
+       size += pageoff;
+       size = (vsize_t) round_page(size);
+
+       if ((int)size < 0)
+               return (EINVAL);
+       
+       switch (uvm_map_advice(&p->p_vmspace->vm_map, addr, addr+size,
+                        advice)) {
+       case KERN_SUCCESS:
+               return (0);
+       case KERN_PROTECTION_FAILURE:
+               return (EACCES);
+       }
+       return (EINVAL);
+}
+
 /*
  * sys_mlock: memory lock
  */
index c45dd26..294e362 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_object.h,v 1.5 1998/03/09 00:58:58 mrg Exp $       */
+/*     $NetBSD: uvm_object.h,v 1.8 1999/05/25 20:30:09 thorpej Exp $   */
 
 /*
  *
@@ -64,7 +64,22 @@ struct uvm_object {
  * for kernel objects... when a kernel object is unmapped we always want
  * to free the resources associated with the mapping.   UVM_OBJ_KERN
  * allows us to decide which type of unmapping we want to do.
+ *
+ * in addition, we have kernel objects which may be used in an
+ * interrupt context.  these objects get their mappings entered
+ * with pmap_kenter*() and removed with pmap_kremove(), which
+ * are safe to call in interrupt context, and must be used ONLY
+ * for wired kernel mappings in these objects and their associated
+ * maps.
  */
-#define UVM_OBJ_KERN   (-2)
+#define UVM_OBJ_KERN           (-2)
+#define        UVM_OBJ_KERN_INTRSAFE   (-3)
+
+#define        UVM_OBJ_IS_KERN_OBJECT(uobj)                                    \
+       ((uobj)->uo_refs == UVM_OBJ_KERN ||                             \
+        (uobj)->uo_refs == UVM_OBJ_KERN_INTRSAFE)
+
+#define        UVM_OBJ_IS_INTRSAFE_OBJECT(uobj)                                \
+       ((uobj)->uo_refs == UVM_OBJ_KERN_INTRSAFE)
 
 #endif /* _UVM_UVM_OBJECT_H_ */
index fa85122..c60017d 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_page.c,v 1.19 1999/05/20 20:07:55 thorpej Exp $    */
+/*     $NetBSD: uvm_page.c,v 1.23 1999/05/25 01:34:13 thorpej Exp $    */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -847,9 +847,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
                panic("uvm_pagealloc: obj and anon != NULL");
 #endif
 
-       s = splimp();
-
-       uvm_lock_fpageq();              /* lock free page queue */
+       s = uvm_lock_fpageq();          /* lock free page queue */
 
        /*
         * check to see if we need to generate some free pages waking
@@ -870,7 +868,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
         */
 
        use_reserve = (flags & UVM_PGA_USERESERVE) ||
-               (obj && obj->uo_refs == UVM_OBJ_KERN);
+               (obj && UVM_OBJ_IS_KERN_OBJECT(obj));
        if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) ||
            (uvmexp.free <= uvmexp.reserve_pagedaemon &&
             !(use_reserve && curproc == uvm.pagedaemon_proc)))
@@ -919,8 +917,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
        TAILQ_REMOVE(freeq, pg, pageq);
        uvmexp.free--;
 
-       uvm_unlock_fpageq();            /* unlock free page queue */
-       splx(s);
+       uvm_unlock_fpageq(s);           /* unlock free page queue */
 
        pg->offset = off;
        pg->uobject = obj;
@@ -945,8 +942,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list)
        return(pg);
 
  fail:
-       uvm_unlock_fpageq();
-       splx(s);
+       uvm_unlock_fpageq(s);
        return (NULL);
 }
 
@@ -1137,8 +1133,7 @@ struct vm_page *pg;
         * and put on free queue 
         */
 
-       s = splimp();
-       uvm_lock_fpageq();
+       s = uvm_lock_fpageq();
        TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)],
            pg, pageq);
        pg->pqflags = PQ_FREE;
@@ -1148,8 +1143,7 @@ struct vm_page *pg;
        pg->uanon = (void *)0xdeadbeef;
 #endif
        uvmexp.free++;
-       uvm_unlock_fpageq();
-       splx(s);
+       uvm_unlock_fpageq(s);
 }
 
 #if defined(UVM_PAGE_TRKOWN)
index 621bb01..09b4c63 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_page.h,v 1.10 1998/08/13 02:11:02 eeh Exp $        */
+/*     $NetBSD: uvm_page.h,v 1.12 1999/05/24 19:10:57 thorpej Exp $    */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -79,8 +79,6 @@
 
 #define uvm_lock_pageq()       simple_lock(&uvm.pageqlock)
 #define uvm_unlock_pageq()     simple_unlock(&uvm.pageqlock)
-#define uvm_lock_fpageq()      simple_lock(&uvm.fpageqlock)
-#define uvm_unlock_fpageq()    simple_unlock(&uvm.fpageqlock)
 
 #define uvm_pagehash(obj,off) \
        (((unsigned long)obj+(unsigned long)atop(off)) & uvm.page_hashmask)
@@ -108,6 +106,9 @@ boolean_t uvm_page_physget __P((paddr_t *));
 #endif
 void uvm_page_rehash __P((void));
 
+PAGE_INLINE int uvm_lock_fpageq __P((void));
+PAGE_INLINE void uvm_unlock_fpageq __P((int));
+
 PAGE_INLINE void uvm_pageactivate __P((struct vm_page *));
 vaddr_t uvm_pageboot_alloc __P((vsize_t));
 PAGE_INLINE void uvm_pagecopy __P((struct vm_page *, struct vm_page *));
index 4691e08..9fc1d2d 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_page_i.h,v 1.8 1998/08/13 02:11:02 eeh Exp $       */
+/*     $NetBSD: uvm_page_i.h,v 1.10 1999/05/24 19:10:57 thorpej Exp $  */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
 
 #if defined(UVM_PAGE_INLINE) || defined(UVM_PAGE)
 
+/*
+ * uvm_lock_fpageq: lock the free page queue
+ *
+ * => free page queue can be accessed in interrupt context, so this
+ *     blocks all interrupts that can cause memory allocation, and
+ *     returns the previous interrupt level.
+ */
+
+PAGE_INLINE int
+uvm_lock_fpageq()
+{
+       int s;
+
+       s = splimp();
+       simple_lock(&uvm.fpageqlock);
+       return (s);
+}
+
+/*
+ * uvm_unlock_fpageq: unlock the free page queue
+ *
+ * => caller must supply interrupt level returned by uvm_lock_fpageq()
+ *     so that it may be restored.
+ */
+
+PAGE_INLINE void
+uvm_unlock_fpageq(s)
+       int s;
+{
+
+       simple_unlock(&uvm.fpageqlock);
+       splx(s);
+}
+
 /*
  * uvm_pagelookup: look up a page
  *
index c805098..8c057a8 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_pager.c,v 1.15 1999/03/25 18:48:55 mrg Exp $       */
+/*     $NetBSD: uvm_pager.c,v 1.20 1999/05/26 19:16:36 thorpej Exp $   */
 
 /*
  *
@@ -88,7 +88,7 @@ uvm_pager_init()
         */
 
         pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva,
-                               PAGER_MAP_SIZE, FALSE, FALSE, NULL);
+                               PAGER_MAP_SIZE, 0, FALSE, NULL);
         simple_lock_init(&pager_map_wanted_lock);
         pager_map_wanted = FALSE;
 
@@ -113,6 +113,9 @@ uvm_pager_init()
  *
  * we basically just map in a blank map entry to reserve the space in the
  * map and then use pmap_enter() to put the mappings in by hand.
+ *
+ * XXX It would be nice to know the direction of the I/O, so that we can
+ * XXX map only what is necessary.
  */
 
 vaddr_t
@@ -169,6 +172,11 @@ ReStart:
                        panic("uvm_pagermapin: page not busy");
 #endif
 
+               /*
+                * XXX VM_PROT_DEFAULT includes VM_PROT_EXEC; is that
+                * XXX really necessary?  It could lead to unnecessary
+                * XXX instruction cache flushes.
+                */
                pmap_enter(vm_map_pmap(pager_map), cva, VM_PAGE_TO_PHYS(pp),
                    VM_PROT_DEFAULT, TRUE,
                    VM_PROT_READ | VM_PROT_WRITE);
@@ -698,8 +706,6 @@ int swblk;                  /* valid if (uobj == NULL && PGO_REALLOCSWAP) */
                 * had a successful pageout update the page!
                 */
                if (flags & PGO_PDFREECLUST) {
-                       /* XXX: with PMAP_NEW ref should already be clear,
-                        * but don't trust! */
                        pmap_clear_reference(PMAP_PGARG(ppsp[lcv]));
                        pmap_clear_modify(PMAP_PGARG(ppsp[lcv]));
                        ppsp[lcv]->flags |= PG_CLEAN;
index c68355c..b3788da 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_pdaemon.c,v 1.14 1999/03/26 17:33:30 chs Exp $     */
+/*     $NetBSD: uvm_pdaemon.c,v 1.16 1999/05/24 19:10:57 thorpej Exp $ */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
@@ -365,11 +365,9 @@ uvmpd_scan_inactive(pglst)
                         * update our copy of "free" and see if we've met
                         * our target
                         */
-                       s = splimp();
-                       uvm_lock_fpageq();
+                       s = uvm_lock_fpageq();
                        free = uvmexp.free;
-                       uvm_unlock_fpageq();
-                       splx(s);
+                       uvm_unlock_fpageq(s);
 
                        if (free + uvmexp.paging >= uvmexp.freetarg << 2 ||
                            dirtyreacts == UVMPD_NUMDIRTYREACTS) {
@@ -952,11 +950,9 @@ uvmpd_scan()
        /*
         * get current "free" page count
         */
-       s = splimp();
-       uvm_lock_fpageq();
+       s = uvm_lock_fpageq();
        free = uvmexp.free;
-       uvm_unlock_fpageq();
-       splx(s);
+       uvm_unlock_fpageq(s);
 
 #ifndef __SWAP_BROKEN
        /*
index 4590f1c..56ea153 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_pdaemon.h,v 1.5 1998/02/10 14:12:28 mrg Exp $      */
+/*     $NetBSD: uvm_pdaemon.h,v 1.6 1999/03/25 18:48:56 mrg Exp $      */
 
 /* 
  * Copyright (c) 1997 Charles D. Cranor and Washington University.
index 042ab2b..c24125e 100644 (file)
@@ -1,4 +1,4 @@
-/*     $NetBSD: uvm_pglist.c,v 1.5.2.1 1998/07/30 14:04:15 eeh Exp $   */
+/*     $NetBSD: uvm_pglist.c,v 1.7 1999/05/24 19:10:58 thorpej Exp $   */
 
 #define VM_PAGE_ALLOC_MEMORY_STATS
  
@@ -136,8 +136,7 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
        /*
         * Block all memory allocation and lock the free list.
         */
-       s = splimp();
-       uvm_lock_fpageq();            /* lock free page queue */
+       s = uvm_lock_fpageq();          /* lock free page queue */
 
        /* Are there even any free pages? */
        for (idx = 0; idx < VM_NFREELIST; idx++)
@@ -238,8 +237,7 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok)
        error = 0;
 
 out:
-       uvm_unlock_fpageq();
-       splx(s);
+       uvm_unlock_fpageq(s);
 
        /*
         * check to see if we need to generate some free pages waking
@@ -271,8 +269,7 @@ uvm_pglistfree(list)
        /*
         * Block all memory allocation and lock the free list.
         */
-       s = splimp();
-       uvm_lock_fpageq();
+       s = uvm_lock_fpageq();
 
        while ((m = list->tqh_first) != NULL) {
 #ifdef DIAGNOSTIC
@@ -287,6 +284,5 @@ uvm_pglistfree(list)
                STAT_DECR(uvm_pglistalloc_npages);
        }
 
-       uvm_unlock_fpageq();
-       splx(s);
+       uvm_unlock_fpageq(s);
 }
diff --git a/sys/uvm/uvm_uio.c b/sys/uvm/uvm_uio.c
new file mode 100644 (file)
index 0000000..84ef108
--- /dev/null
@@ -0,0 +1,266 @@
+/*
+ * Copyright (c) 1999 Artur Grabowski <art@openbsd.org>
+ * All rights reserved. 
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met: 
+ *
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ *
+ * 3. Neither the name of the author nor the names of his contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE. 
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/proc.h>
+#include <sys/uio.h>
+#include <sys/mbuf.h>
+
+#include <vm/vm.h>
+#include <vm/vm_page.h>
+#include <vm/vm_kern.h>
+
+#include <uvm/uvm.h>
+#include <uvm/uvm_uio.h>
+
+int uvm_uio_enable = 1;
+int uvm_uio_num_try = 0;
+int uvm_uio_num_success = 0;
+
+/*
+ * m_ext functions.
+ */
+void uvm_mbuf_free __P((struct mbuf *));
+void uvm_mbuf_ref __P((struct mbuf *));
+
+/*
+ * returns the length of I/O, 0 on failure.
+ *
+ * Should not be called if UVM_UIO_TRY(uio) has been checked first.
+ */
+size_t
+uvm_uio_to_mbuf(uio, mbuf)
+       struct uio *uio;
+       struct mbuf *mbuf;
+{
+       struct vm_map *map;
+       vaddr_t realbase, base, kva;
+       vsize_t reallen, len, offset;
+       struct vm_page **pages;
+       int npages;
+       struct iovec *iov;
+       struct uvm_mbuf *um;
+       struct mbuf *m;
+#ifndef PMAP_NEW
+       int i;
+#endif
+
+       uvm_uio_num_try++;
+
+       if ((mbuf->m_flags & M_EXT)) {
+               printf("uvm_uio_to_mbuf: fail 1\n");
+               return 0;
+       }
+
+       map = &uio->uio_procp->p_vmspace->vm_map;
+       iov = uio->uio_iov;
+
+       /*
+        * XXX - check if iov_len is bigger than max vsize_t
+        */
+
+       reallen = (vsize_t)iov->iov_len;
+       realbase = (vaddr_t)iov->iov_base;
+
+       /*
+        * Check alignment.
+        *
+        * What we really want is to somehow tell the caller how much the
+        * uios should be adjusted and try again.
+        */
+       if ((realbase & (sizeof(long) - 1)) != 0) {
+               printf("uvm_uio_to_mbuf: not aligned\n");
+               return 0;
+       }
+
+       base = trunc_page(realbase);
+       offset = realbase - base;
+
+       /*
+        * truncate reallen here so that we won't do a huge malloc.
+        * Subtract offset so that the next round will be page aligned.
+        */
+       if (reallen > UVM_UIO_LIMIT)
+               reallen = UVM_UIO_LIMIT - offset;
+
+       len = reallen + offset;
+       len = round_page(len);
+       npages = atop(len);
+
+       if ((mbuf->m_flags & M_PKTHDR)) {
+
+               MGET(m, M_WAIT, MT_DATA);
+               mbuf->m_len = 0;
+               mbuf->m_next = m;
+       } else {
+               m = mbuf;
+               m->m_next = NULL;
+       }
+
+       MALLOC(um, struct uvm_mbuf *, sizeof(struct uvm_mbuf), M_TEMP,
+              M_WAITOK);
+
+       /*
+        * If the pages we have less than UVM_UIO_SMALL_PAGES, we can fit
+        * them into the pages struct in uvm_uio.
+        */
+       if (npages > UVM_UIO_SMALL_PAGES)
+               MALLOC(pages, struct vm_page **,
+                      npages * sizeof(struct vm_page *),  M_TEMP, M_WAITOK);
+       else
+               pages = um->um_pages_small;
+
+       /*
+        * Loan the pages we want.
+        */
+       if (uvm_loan(map, base, len, (void **)pages, UVM_LOAN_TOPAGE) !=
+           KERN_SUCCESS) {
+               /*
+                * XXX - This is really ENOMEM or EFAULT.
+                */
+               printf("uvm_uio_to_mbuf: loan failed\n");
+
+               goto fail;
+       }
+
+       /*
+        * Allocate space to map pages.
+        */
+       kva = vm_map_min(kernel_map);
+       if (uvm_map(kernel_map, &kva, len, NULL, UVM_UNKNOWN_OFFSET,
+                      UVM_MAPFLAG(UVM_PROT_READ, UVM_PROT_READ, UVM_INH_NONE,
+                                  UVM_ADV_SEQUENTIAL, 0)) != KERN_SUCCESS) {
+               uvm_unloanpage(pages, npages);
+               goto fail;
+       }
+
+       /*
+        * Initialize um.
+        */
+       um->um_pages = pages;
+       um->um_npages = npages;
+       um->um_usecount = 1;
+       um->um_kva = kva;
+
+       printf("mapping: 0x%x -> 0x%x\n", kva, kva + len);
+       /*
+        * Map pages.
+        */
+#ifdef PMAP_NEW
+       pmap_kenter_pgs(kva, pages, npages);
+#else
+       for (i = 0; i < npages; i++, kva += PAGE_SIZE)
+               pmap_enter(pmap_kernel(), kva, VM_PAGE_TO_PHYS(pages[i]),
+                    VM_PROT_READ, TRUE, VM_PROT_READ);
+#endif
+
+       /*
+        * Update mbuf.
+        */
+       m->m_flags |= M_EXT | M_RONLY;
+       m->m_data = (caddr_t)(um->um_kva + offset);
+       m->m_len = reallen;
+       m->m_ext.ext_free = uvm_mbuf_free;
+       m->m_ext.ext_ref = uvm_mbuf_ref;
+       /*
+        * We lie about those two to avoid problems with someone trying
+        * to prepend data.
+        */ 
+       m->m_ext.ext_buf = (caddr_t)(um->um_kva + offset);
+       m->m_ext.ext_size = reallen;
+       m->m_ext.ext_handle = um;
+
+       /*
+        * Update uio.
+        */
+       if ((iov->iov_len -= reallen) == 0) {
+               uio->uio_iov++;
+               uio->uio_iovcnt--;
+       }
+       uio->uio_resid -= reallen;
+
+       uvm_uio_num_success++;
+
+       return reallen;
+fail:
+       if (npages > UVM_UIO_SMALL_PAGES)
+               FREE(pages, M_TEMP);
+
+       if (m != mbuf)
+               m_freem(m);
+
+       FREE(um, M_TEMP);
+
+       return 0;
+}
+
+void
+uvm_mbuf_free(mb)
+       struct mbuf *mb;
+{
+       struct uvm_mbuf *um = (struct uvm_mbuf *)mb->m_ext.ext_handle;
+       vsize_t len;
+
+       if (--um->um_usecount)
+               return;
+
+       len = ptoa(um->um_npages);
+
+       printf("unmapping: 0x%x -> 0x%x\n", um->um_kva, um->um_kva + len);
+#ifdef PMAP_NEW
+       pmap_kremove(um->um_kva, len);
+#else
+       pmap_remove(pmap_kernel(), um->um_kva, um->um_kva + len);
+#endif
+
+       uvm_unloanpage(um->um_pages, um->um_npages);
+       uvm_unmap(kernel_map, um->um_kva, um->um_kva + len);
+       uvm_km_free_wakeup(kernel_map, um->um_kva, len);
+       if (um->um_npages > UVM_UIO_SMALL_PAGES)
+               FREE(um->um_pages, M_TEMP);
+
+       FREE(um, M_TEMP);
+#ifdef DIAGNOSTIC
+       mb->m_data = NULL;
+       mb->m_ext.ext_handle = NULL;
+       mb->m_flags &= ~M_EXT;
+#endif
+}
+
+void
+uvm_mbuf_ref(mb)
+       struct mbuf *mb;
+{
+       ((struct uvm_mbuf *)mb->m_ext.ext_handle)->um_usecount++;
+}
diff --git a/sys/uvm/uvm_uio.h b/sys/uvm/uvm_uio.h
new file mode 100644 (file)
index 0000000..4a8ce97
--- /dev/null
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 1999 Artur Grabowski <art@openbsd.org>
+ * All rights reserved. 
+ *
+ * Redistribution and use in source and binary forms, with or without 
+ * modification, are permitted provided that the following conditions 
+ * are met: 
+ *
+ * 1. Redistributions of source code must retain the above copyright 
+ *    notice, this list of conditions and the following disclaimer. 
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright 
+ *    notice, this list of conditions and the following disclaimer in the 
+ *    documentation and/or other materials provided with the distribution. 
+ *
+ * 3. Neither the name of the author nor the names of his contributors 
+ *    may be used to endorse or promote products derived from this software 
+ *    without specific prior written permission. 
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 
+ * SUCH DAMAGE. 
+ */
+
+#include <vm/vm.h>     /* for PAGE_SIZE */
+
+/*
+ * If the number of pages we're about to transfer is smaller than this number
+ * we use the pre-allocated array.
+ */
+#define UVM_UIO_SMALL_PAGES 8
+
+/*
+ * Limit transfers to this number to avoid running out of memory.
+ */
+#define UVM_UIO_LIMIT (256 * PAGE_SIZE)
+
+/*
+ * m_ext structure.
+ */
+struct uvm_mbuf {
+       struct vm_page **um_pages;      /* The pages */
+       int um_npages;                  /* number of pages */
+       int um_usecount;                /* ref cnt */
+       vaddr_t um_kva;                 /* where the pages are mapped */
+       struct vm_page *um_pages_small[UVM_UIO_SMALL_PAGES];
+};
+
+extern int uvm_uio_enable;
+
+#define UVM_UIO_MINIO PAGE_SIZE                /* XXX - tweak */
+#define UVM_UIO_TRY(uio) (uvm_uio_enable && \
+                       ((uio)->uio_iov->iov_len >= UVM_UIO_MINIO) && \
+                       ((uio)->uio_procp != NULL) && \
+                       ((uio)->uio_rw == UIO_WRITE) && \
+                       ((uio)->uio_segflg == UIO_USERSPACE))
+
+size_t uvm_uio_to_mbuf __P((struct uio *, struct mbuf *));
index c2a3417..38f06bd 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: vm_map.h,v 1.10 2000/03/13 14:29:04 art Exp $ */
+/*     $OpenBSD: vm_map.h,v 1.11 2000/03/16 22:11:05 art Exp $ */
 /*     $NetBSD: vm_map.h,v 1.11 1995/03/26 20:39:10 jtc Exp $  */
 
 /* 
@@ -162,12 +162,58 @@ struct vm_map {
        vm_map_entry_t          hint;           /* hint for quick lookups */
        simple_lock_data_t      hint_lock;      /* lock for hint storage */
        vm_map_entry_t          first_free;     /* First free space hint */
+#ifdef UVM
+       int                     flags;          /* flags (read-only) */
+#else
        boolean_t               entries_pageable; /* map entries pageable?? */
+#endif
        unsigned int            timestamp;      /* Version number */
 #define        min_offset              header.start
 #define max_offset             header.end
 };
 
+#ifdef UVM
+/* vm_map flags */
+#define VM_MAP_PAGEABLE                0x01            /* entries are pageable*/
+#define VM_MAP_INTRSAFE                0x02            /* interrupt safe map */
+/*
+ *     Interrupt-safe maps must also be kept on a special list,
+ *     to assist uvm_fault() in avoiding locking problems.
+ */
+struct vm_map_intrsafe {
+       struct vm_map   vmi_map;
+       LIST_ENTRY(vm_map_intrsafe) vmi_list;
+};
+
+LIST_HEAD(vmi_list, vm_map_intrsafe);
+#ifdef _KERNEL
+extern simple_lock_data_t vmi_list_slock;
+extern struct vmi_list vmi_list;
+
+static __inline int vmi_list_lock __P((void));
+static __inline void vmi_list_unlock __P((int));
+
+static __inline int
+vmi_list_lock()
+{
+       int s;
+
+       s = splhigh();
+       simple_lock(&vmi_list_slock);
+       return (s);
+}
+
+static __inline void
+vmi_list_unlock(s)
+       int s;
+{
+
+       simple_unlock(&vmi_list_slock);
+       splx(s);
+}
+#endif /* _KERNEL */
+#endif /* UVM */
+
 #ifndef UVM    /* version handled elsewhere in uvm */
 /*
  *     Map versions are used to validate a previous lookup attempt.
@@ -185,6 +231,96 @@ typedef struct {
 } vm_map_version_t;
 #endif /* UVM */
 
+#ifdef UVM
+
+/*
+ * VM map locking operations:
+ *
+ *     These operations perform locking on the data portion of the
+ *     map.
+ *
+ *     vm_map_lock_try: try to lock a map, failing if it is already locked.
+ *
+ *     vm_map_lock: acquire an exclusive (write) lock on a map.
+ *
+ *     vm_map_lock_read: acquire a shared (read) lock on a map.
+ *
+ *     vm_map_unlock: release an exclusive lock on a map.
+ *
+ *     vm_map_unlock_read: release a shared lock on a map.
+ *
+ * Note that "intrsafe" maps use only exclusive, spin locks.  We simply
+ * use the sleep lock's interlock for this.
+ */
+
+#ifdef _KERNEL
+/* XXX: clean up later */
+#include <sys/time.h>
+#include <sys/proc.h>  /* XXX for curproc and p_pid */
+
+static __inline boolean_t vm_map_lock_try __P((vm_map_t));
+
+static __inline boolean_t
+vm_map_lock_try(map)
+       vm_map_t map;
+{
+       boolean_t rv;
+
+       if (map->flags & VM_MAP_INTRSAFE)
+               rv = simple_lock_try(&map->lock.lk_interlock);
+       else
+               rv = (lockmgr(&map->lock, LK_EXCLUSIVE|LK_NOWAIT, NULL, curproc) == 0);
+
+       if (rv)
+               map->timestamp++;
+
+       return (rv);
+}
+
+#ifdef DIAGNOSTIC
+#define        _vm_map_lock(map)                                               \
+do {                                                                   \
+       if (lockmgr(&(map)->lock, LK_EXCLUSIVE, NULL, curproc) != 0)    \
+               panic("vm_map_lock: failed to get lock");               \
+} while (0)
+#else
+#define        _vm_map_lock(map)                                               \
+       (void) lockmgr(&(map)->lock, LK_EXCLUSIVE, NULL, curproc)
+#endif
+
+#define        vm_map_lock(map)                                                \
+do {                                                                   \
+       if ((map)->flags & VM_MAP_INTRSAFE)                             \
+               simple_lock(&(map)->lock.lk_interlock);                 \
+       else                                                            \
+               _vm_map_lock((map));                                    \
+       (map)->timestamp++;                                             \
+} while (0)
+
+#ifdef DIAGNOSTIC
+#define        vm_map_lock_read(map)                                           \
+do {                                                                   \
+       if (map->flags & VM_MAP_INTRSAFE)                               \
+               panic("vm_map_lock_read: intrsafe map");                \
+       (void) lockmgr(&(map)->lock, LK_SHARED, NULL, curproc);         \
+} while (0)
+#else
+#define        vm_map_lock_read(map)                                           \
+       (void) lockmgr(&(map)->lock, LK_SHARED, NULL, curproc)
+#endif
+
+#define        vm_map_unlock(map)                                              \
+do {                                                                   \
+       if ((map)->flags & VM_MAP_INTRSAFE)                             \
+               simple_unlock(&(map)->lock.lk_interlock);               \
+       else                                                            \
+               (void) lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc);\
+} while (0)
+
+#define        vm_map_unlock_read(map)                                         \
+       (void) lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc)
+#endif /* _KERNEL */
+#else /* UVM */
 /*
  *     Macros:         vm_map_lock, etc.
  *     Function:
@@ -227,21 +363,7 @@ typedef struct {
        (map)->lk_flags &= ~LK_CANRECURSE; \
        simple_unlock(&(map)->lk_interlock); \
 }
-#if defined(UVM) && defined(_KERNEL)
-/* XXX: clean up later */
-static boolean_t vm_map_lock_try __P((vm_map_t));
-
-static __inline boolean_t vm_map_lock_try(map)
-
-vm_map_t map;
-
-{
-  if (lockmgr(&(map)->lock, LK_EXCLUSIVE|LK_NOWAIT, (void *)0, curproc) != 0)
-    return(FALSE);
-  map->timestamp++;
-  return(TRUE);
-}
-#endif
+#endif /* UVM */
 
 /*
  *     Functions implemented as macros
@@ -255,7 +377,11 @@ vm_map_t map;
 #define        MAX_KMAP        20
 #endif
 #ifndef        MAX_KMAPENT
-#define        MAX_KMAPENT     1000
+#if (50 + (2 * NPROC) > 1000)
+#define MAX_KMAPENT (50 + (2 * NPROC))
+#else
+#define        MAX_KMAPENT     1000  /* XXXCDC: no crash */
+#endif
 #endif
 
 #if defined(_KERNEL) && !defined(UVM)