From 7cb53682cfa1278ff20707d41fced713da20d463 Mon Sep 17 00:00:00 2001 From: art Date: Thu, 16 Mar 2000 22:11:02 +0000 Subject: [PATCH] Bring in some new UVM code from NetBSD (not current). - Introduce a new type of map that are interrupt safe and never allow faults in them. mb_map and kmem_map are made intrsafe. - Add "access protection" to uvm_vslock (to be passed down to uvm_fault and later to pmap_enter). - madvise(2) now works. - various cleanups. --- sys/arch/hppa/hppa/machdep.c | 8 +- sys/arch/i386/i386/machdep.c | 8 +- sys/arch/sparc/sparc/machdep.c | 6 +- sys/kern/kern_malloc.c | 10 +- sys/kern/kern_physio.c | 5 +- sys/kern/kern_sysctl.c | 4 +- sys/uvm/uvm_aobj.c | 6 +- sys/uvm/uvm_extern.h | 7 +- sys/uvm/uvm_fault.c | 45 ++++- sys/uvm/uvm_fault.h | 7 +- sys/uvm/uvm_fault_i.h | 35 +++- sys/uvm/uvm_glue.c | 20 ++- sys/uvm/uvm_km.c | 184 +++++++++++++++----- sys/uvm/uvm_km.h | 3 +- sys/uvm/uvm_loan.c | 24 ++- sys/uvm/uvm_map.c | 303 +++++++++++++++++++++++---------- sys/uvm/uvm_map.h | 7 +- sys/uvm/uvm_map_i.h | 32 +++- sys/uvm/uvm_mmap.c | 69 +++++--- sys/uvm/uvm_object.h | 19 ++- sys/uvm/uvm_page.c | 20 +-- sys/uvm/uvm_page.h | 7 +- sys/uvm/uvm_page_i.h | 36 +++- sys/uvm/uvm_pager.c | 14 +- sys/uvm/uvm_pdaemon.c | 14 +- sys/uvm/uvm_pdaemon.h | 2 +- sys/uvm/uvm_pglist.c | 14 +- sys/uvm/uvm_uio.c | 266 +++++++++++++++++++++++++++++ sys/uvm/uvm_uio.h | 66 +++++++ sys/vm/vm_map.h | 160 +++++++++++++++-- 30 files changed, 1127 insertions(+), 274 deletions(-) create mode 100644 sys/uvm/uvm_uio.c create mode 100644 sys/uvm/uvm_uio.h diff --git a/sys/arch/hppa/hppa/machdep.c b/sys/arch/hppa/hppa/machdep.c index 44b14c0e516..48facbcf808 100644 --- a/sys/arch/hppa/hppa/machdep.c +++ b/sys/arch/hppa/hppa/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.22 2000/02/22 20:08:15 mickey Exp $ */ +/* $OpenBSD: machdep.c,v 1.23 2000/03/16 22:11:03 art Exp $ */ /* * Copyright (c) 1999-2000 Michael Shalayeff @@ -506,13 +506,13 @@ cpu_startup() * limits the number of processes exec'ing at any time. */ exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, - 16*NCARGS, TRUE, FALSE, NULL); + 16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL); /* * Allocate a submap for physio */ phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, - VM_PHYS_SIZE, TRUE, FALSE, NULL); + VM_PHYS_SIZE, 0, FALSE, NULL); /* * Finally, allocate mbuf pool. Since mclrefcnt is an off-size @@ -522,7 +522,7 @@ cpu_startup() M_MBUF, M_NOWAIT); bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES); mb_map = uvm_km_suballoc(kernel_map, (vaddr_t *)&mbutl, &maxaddr, - VM_MBUF_SIZE, FALSE, FALSE, NULL); + VM_MBUF_SIZE, VM_MAP_INTRSAFE, FALSE, NULL); /* * Initialize callouts diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index c6f67fadb31..18910c9448e 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.126 2000/03/02 00:15:00 niklas Exp $ */ +/* $OpenBSD: machdep.c,v 1.127 2000/03/16 22:11:03 art Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -355,7 +355,7 @@ cpu_startup() */ #if defined(UVM) exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, - 16*NCARGS, TRUE, FALSE, NULL); + 16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL); #else exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 16*NCARGS, TRUE); @@ -366,7 +366,7 @@ cpu_startup() */ #if defined(UVM) phys_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, - VM_PHYS_SIZE, TRUE, FALSE, NULL); + VM_PHYS_SIZE, 0, FALSE, NULL); #else phys_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, VM_PHYS_SIZE, TRUE); @@ -381,7 +381,7 @@ cpu_startup() bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES); #if defined(UVM) mb_map = uvm_km_suballoc(kernel_map, (vm_offset_t *)&mbutl, &maxaddr, - VM_MBUF_SIZE, FALSE, FALSE, NULL); + VM_MBUF_SIZE, VM_MAP_INTRSAFE, FALSE, NULL); #else mb_map = kmem_suballoc(kernel_map, (vm_offset_t *)&mbutl, &maxaddr, VM_MBUF_SIZE, FALSE); diff --git a/sys/arch/sparc/sparc/machdep.c b/sys/arch/sparc/sparc/machdep.c index cf7a130a73c..8051d02965f 100644 --- a/sys/arch/sparc/sparc/machdep.c +++ b/sys/arch/sparc/sparc/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.46 2000/02/22 19:28:01 deraadt Exp $ */ +/* $OpenBSD: machdep.c,v 1.47 2000/03/16 22:11:02 art Exp $ */ /* $NetBSD: machdep.c,v 1.85 1997/09/12 08:55:02 pk Exp $ */ /* @@ -301,7 +301,7 @@ cpu_startup() */ #if defined(UVM) exec_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr, - 16*NCARGS, TRUE, FALSE, NULL); + 16*NCARGS, VM_MAP_PAGEABLE, FALSE, NULL); #else exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr, 16*NCARGS, TRUE); @@ -347,7 +347,7 @@ cpu_startup() bzero(mclrefcnt, NMBCLUSTERS+CLBYTES/MCLBYTES); #if defined(UVM) mb_map = uvm_km_suballoc(kernel_map, (vaddr_t *)&mbutl, &maxaddr, - VM_MBUF_SIZE, FALSE, FALSE, NULL); + VM_MBUF_SIZE, VM_MAP_INTRSAFE, FALSE, NULL); #else mb_map = kmem_suballoc(kernel_map, (vaddr_t *)&mbutl, &maxaddr, VM_MBUF_SIZE, FALSE); diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index aedd351b27d..3e1aeb3c0a6 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_malloc.c,v 1.18 1999/11/25 13:41:30 art Exp $ */ +/* $OpenBSD: kern_malloc.c,v 1.19 2000/03/16 22:11:03 art Exp $ */ /* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */ /* @@ -49,7 +49,7 @@ #if defined(UVM) #include -static struct vm_map kmem_map_store; +static struct vm_map_intrsafe kmem_map_store; vm_map_t kmem_map = NULL; #endif @@ -228,11 +228,11 @@ malloc(size, type, flags) int rv; vaddr_t addr = (vaddr_t)kbp->kb_next; - vm_map_lock_read(kmem_map); + vm_map_lock(kmem_map); rv = uvm_map_checkprot(kmem_map, addr, addr + sizeof(struct freelist), VM_PROT_WRITE); - vm_map_unlock_read(kmem_map); + vm_map_unlock(kmem_map); if (!rv) #else @@ -439,7 +439,7 @@ kmeminit() (vsize_t)(npg * sizeof(struct kmemusage))); kmem_map = uvm_km_suballoc(kernel_map, (vaddr_t *)&kmembase, (vaddr_t *)&kmemlimit, (vsize_t)(npg * PAGE_SIZE), - FALSE, FALSE, &kmem_map_store); + VM_MAP_INTRSAFE, FALSE, &kmem_map_store.vmi_map); #else kmemusage = (struct kmemusage *) kmem_alloc(kernel_map, (vsize_t)(npg * sizeof(struct kmemusage))); diff --git a/sys/kern/kern_physio.c b/sys/kern/kern_physio.c index e454d9f4be6..2086bab06a8 100644 --- a/sys/kern/kern_physio.c +++ b/sys/kern/kern_physio.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_physio.c,v 1.7 1999/12/02 20:39:32 art Exp $ */ +/* $OpenBSD: kern_physio.c,v 1.8 2000/03/16 22:11:04 art Exp $ */ /* $NetBSD: kern_physio.c,v 1.28 1997/05/19 10:43:28 pk Exp $ */ /*- @@ -184,7 +184,8 @@ physio(strategy, bp, dev, flags, minphys, uio) */ p->p_holdcnt++; #if defined(UVM) - uvm_vslock(p, bp->b_data, todo); + uvm_vslock(p, bp->b_data, todo, (flags & B_READ) ? + VM_PROT_READ | VM_PROT_WRITE : VM_PROT_READ); #else vslock(bp->b_data, todo); #endif diff --git a/sys/kern/kern_sysctl.c b/sys/kern/kern_sysctl.c index 1a5e37c1dbb..b7575a5ce2d 100644 --- a/sys/kern/kern_sysctl.c +++ b/sys/kern/kern_sysctl.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_sysctl.c,v 1.32 2000/03/03 11:46:09 art Exp $ */ +/* $OpenBSD: kern_sysctl.c,v 1.33 2000/03/16 22:11:03 art Exp $ */ /* $NetBSD: kern_sysctl.c,v 1.17 1996/05/20 17:49:05 mrg Exp $ */ /*- @@ -173,7 +173,7 @@ sys___sysctl(p, v, retval) memlock.sl_lock = 1; if (dolock) #if defined(UVM) - uvm_vslock(p, SCARG(uap, old), oldlen); + uvm_vslock(p, SCARG(uap, old), oldlen, VM_PROT_NONE); #else vslock(SCARG(uap, old), oldlen); #endif diff --git a/sys/uvm/uvm_aobj.c b/sys/uvm/uvm_aobj.c index 250662bece8..727b72006d7 100644 --- a/sys/uvm/uvm_aobj.c +++ b/sys/uvm/uvm_aobj.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_aobj.c,v 1.18 1999/03/26 17:34:15 chs Exp $ */ +/* $NetBSD: uvm_aobj.c,v 1.20 1999/05/25 00:09:00 thorpej Exp $ */ /* * Copyright (c) 1998 Chuck Silvers, Charles D. Cranor and @@ -619,7 +619,7 @@ uao_reference(uobj) * kernel_object already has plenty of references, leave it alone. */ - if (uobj->uo_refs == UVM_OBJ_KERN) + if (UVM_OBJ_IS_KERN_OBJECT(uobj)) return; simple_lock(&uobj->vmobjlock); @@ -646,7 +646,7 @@ uao_detach(uobj) /* * detaching from kernel_object is a noop. */ - if (uobj->uo_refs == UVM_OBJ_KERN) + if (UVM_OBJ_IS_KERN_OBJECT(uobj)) return; simple_lock(&uobj->vmobjlock); diff --git a/sys/uvm/uvm_extern.h b/sys/uvm/uvm_extern.h index 9782f0e0560..f3c2a65d15f 100644 --- a/sys/uvm/uvm_extern.h +++ b/sys/uvm/uvm_extern.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_extern.h,v 1.24 1999/04/11 04:04:11 chs Exp $ */ +/* $NetBSD: uvm_extern.h,v 1.27 1999/05/26 19:16:36 thorpej Exp $ */ /* * @@ -282,7 +282,8 @@ boolean_t uvm_kernacc __P((caddr_t, size_t, int)); __dead void uvm_scheduler __P((void)) __attribute__((noreturn)); void uvm_swapin __P((struct proc *)); boolean_t uvm_useracc __P((caddr_t, size_t, int)); -void uvm_vslock __P((struct proc *, caddr_t, size_t)); +void uvm_vslock __P((struct proc *, caddr_t, size_t, + vm_prot_t)); void uvm_vsunlock __P((struct proc *, caddr_t, size_t)); @@ -301,7 +302,7 @@ void uvm_km_free_wakeup __P((vm_map_t, vaddr_t, vaddr_t uvm_km_kmemalloc __P((vm_map_t, struct uvm_object *, vsize_t, int)); struct vm_map *uvm_km_suballoc __P((vm_map_t, vaddr_t *, - vaddr_t *, vsize_t, boolean_t, + vaddr_t *, vsize_t, int, boolean_t, vm_map_t)); vaddr_t uvm_km_valloc __P((vm_map_t, vsize_t)); vaddr_t uvm_km_valloc_wait __P((vm_map_t, vsize_t)); diff --git a/sys/uvm/uvm_fault.c b/sys/uvm/uvm_fault.c index d05c5c30282..b4002aceb87 100644 --- a/sys/uvm/uvm_fault.c +++ b/sys/uvm/uvm_fault.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_fault.c,v 1.28 1999/04/11 04:04:11 chs Exp $ */ +/* $NetBSD: uvm_fault.c,v 1.33 1999/06/04 23:38:41 thorpej Exp $ */ /* * @@ -586,6 +586,19 @@ uvm_fault(orig_map, vaddr, fault_type, access_type) else narrow = FALSE; /* normal fault */ + /* + * before we do anything else, if this is a fault on a kernel + * address, check to see if the address is managed by an + * interrupt-safe map. If it is, we fail immediately. Intrsafe + * maps are never pageable, and this approach avoids an evil + * locking mess. + */ + if (orig_map == kernel_map && uvmfault_check_intrsafe(&ufi)) { + UVMHIST_LOG(maphist, "<- VA 0x%lx in intrsafe map %p", + ufi.orig_rvaddr, ufi.map, 0, 0); + return (KERN_FAILURE); + } + /* * "goto ReFault" means restart the page fault from ground zero. */ @@ -613,6 +626,17 @@ ReFault: return (KERN_PROTECTION_FAILURE); } + /* + * if the map is not a pageable map, a page fault always fails. + */ + + if ((ufi.map->flags & VM_MAP_PAGEABLE) == 0) { + UVMHIST_LOG(maphist, + "<- map %p not pageable", ufi.map, 0, 0, 0); + uvmfault_unlockmaps(&ufi, FALSE); + return (KERN_FAILURE); + } + /* * "enter_prot" is the protection we want to enter the page in at. * for certain pages (e.g. copy-on-write pages) this protection can @@ -1689,9 +1713,10 @@ Case2: */ int -uvm_fault_wire(map, start, end) +uvm_fault_wire(map, start, end, access_type) vm_map_t map; vaddr_t start, end; + vm_prot_t access_type; { vaddr_t va; pmap_t pmap; @@ -1713,10 +1738,10 @@ uvm_fault_wire(map, start, end) */ for (va = start ; va < end ; va += PAGE_SIZE) { - rv = uvm_fault(map, va, VM_FAULT_WIRE, VM_PROT_NONE); + rv = uvm_fault(map, va, VM_FAULT_WIRE, access_type); if (rv) { if (va != start) { - uvm_fault_unwire(map->pmap, start, va); + uvm_fault_unwire(map, start, va); } return (rv); } @@ -1727,19 +1752,23 @@ uvm_fault_wire(map, start, end) /* * uvm_fault_unwire(): unwire range of virtual space. - * - * => caller holds reference to pmap (via its map) */ void -uvm_fault_unwire(pmap, start, end) - struct pmap *pmap; +uvm_fault_unwire(map, start, end) + vm_map_t map; vaddr_t start, end; { + pmap_t pmap = vm_map_pmap(map); vaddr_t va; paddr_t pa; struct vm_page *pg; +#ifdef DIAGNOSTIC + if (map->flags & VM_MAP_INTRSAFE) + panic("uvm_fault_unwire: intrsafe map"); +#endif + /* * we assume that the area we are unwiring has actually been wired * in the first place. this means that we should be able to extract diff --git a/sys/uvm/uvm_fault.h b/sys/uvm/uvm_fault.h index fd8958f96e5..6849b5713c6 100644 --- a/sys/uvm/uvm_fault.h +++ b/sys/uvm/uvm_fault.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_fault.h,v 1.7 1998/10/11 23:07:42 chuck Exp $ */ +/* $NetBSD: uvm_fault.h,v 1.11 1999/06/04 23:38:41 thorpej Exp $ */ /* * @@ -72,13 +72,14 @@ struct uvm_faultinfo { int uvmfault_anonget __P((struct uvm_faultinfo *, struct vm_amap *, struct vm_anon *)); +static boolean_t uvmfault_check_intrsafe __P((struct uvm_faultinfo *)); static boolean_t uvmfault_lookup __P((struct uvm_faultinfo *, boolean_t)); static boolean_t uvmfault_relock __P((struct uvm_faultinfo *)); static void uvmfault_unlockall __P((struct uvm_faultinfo *, struct vm_amap *, struct uvm_object *, struct vm_anon *)); static void uvmfault_unlockmaps __P((struct uvm_faultinfo *, boolean_t)); -int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t)); -void uvm_fault_unwire __P((struct pmap *, vaddr_t, vaddr_t)); +int uvm_fault_wire __P((vm_map_t, vaddr_t, vaddr_t, vm_prot_t)); +void uvm_fault_unwire __P((vm_map_t, vaddr_t, vaddr_t)); #endif /* _UVM_UVM_FAULT_H_ */ diff --git a/sys/uvm/uvm_fault_i.h b/sys/uvm/uvm_fault_i.h index 8a2c3ea0fb4..38e64476a68 100644 --- a/sys/uvm/uvm_fault_i.h +++ b/sys/uvm/uvm_fault_i.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_fault_i.h,v 1.7 1999/01/24 23:53:15 chuck Exp $ */ +/* $NetBSD: uvm_fault_i.h,v 1.9 1999/06/04 23:38:41 thorpej Exp $ */ /* * @@ -81,6 +81,39 @@ uvmfault_unlockall(ufi, amap, uobj, anon) uvmfault_unlockmaps(ufi, FALSE); } +/* + * uvmfault_check_intrsafe: check for a virtual address managed by + * an interrupt-safe map. + * + * => caller must provide a uvm_faultinfo structure with the IN + * params properly filled in + * => if we find an intersafe VA, we fill in ufi->map, and return TRUE + */ + +static __inline boolean_t +uvmfault_check_intrsafe(ufi) + struct uvm_faultinfo *ufi; +{ + struct vm_map_intrsafe *vmi; + int s; + + s = vmi_list_lock(); + for (vmi = LIST_FIRST(&vmi_list); vmi != NULL; + vmi = LIST_NEXT(vmi, vmi_list)) { + if (ufi->orig_rvaddr >= vm_map_min(&vmi->vmi_map) && + ufi->orig_rvaddr < vm_map_max(&vmi->vmi_map)) + break; + } + vmi_list_unlock(s); + + if (vmi != NULL) { + ufi->map = &vmi->vmi_map; + return (TRUE); + } + + return (FALSE); +} + /* * uvmfault_lookup: lookup a virtual address in a map * diff --git a/sys/uvm/uvm_glue.c b/sys/uvm/uvm_glue.c index fe866fdc017..837c158a240 100644 --- a/sys/uvm/uvm_glue.c +++ b/sys/uvm/uvm_glue.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_glue.c,v 1.19 1999/04/30 21:23:50 thorpej Exp $ */ +/* $NetBSD: uvm_glue.c,v 1.23 1999/05/28 20:49:51 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -221,13 +221,15 @@ uvm_chgkprot(addr, len, rw) */ void -uvm_vslock(p, addr, len) +uvm_vslock(p, addr, len, access_type) struct proc *p; caddr_t addr; size_t len; + vm_prot_t access_type; { + uvm_fault_wire(&p->p_vmspace->vm_map, trunc_page(addr), - round_page(addr+len)); + round_page(addr+len), access_type); } /* @@ -243,7 +245,7 @@ uvm_vsunlock(p, addr, len) caddr_t addr; size_t len; { - uvm_fault_unwire(p->p_vmspace->vm_map.pmap, trunc_page(addr), + uvm_fault_unwire(&p->p_vmspace->vm_map, trunc_page(addr), round_page(addr+len)); } @@ -282,9 +284,12 @@ uvm_fork(p1, p2, shared, stack, stacksize) * and the kernel stack. Wired state is stored in p->p_flag's * P_INMEM bit rather than in the vm_map_entry's wired count * to prevent kernel_map fragmentation. + * + * Note the kernel stack gets read/write accesses right off + * the bat. */ rv = uvm_fault_wire(kernel_map, (vaddr_t)up, - (vaddr_t)up + USPACE); + (vaddr_t)up + USPACE, VM_PROT_READ | VM_PROT_WRITE); if (rv != KERN_SUCCESS) panic("uvm_fork: uvm_fault_wire failed: %d", rv); @@ -373,7 +378,8 @@ uvm_swapin(p) addr = (vaddr_t)p->p_addr; /* make P_INMEM true */ - uvm_fault_wire(kernel_map, addr, addr + USPACE); + uvm_fault_wire(kernel_map, addr, addr + USPACE, + VM_PROT_READ | VM_PROT_WRITE); /* * Some architectures need to be notified when the user area has @@ -586,7 +592,7 @@ uvm_swapout(p) * Unwire the to-be-swapped process's user struct and kernel stack. */ addr = (vaddr_t)p->p_addr; - uvm_fault_unwire(kernel_map->pmap, addr, addr + USPACE); /* !P_INMEM */ + uvm_fault_unwire(kernel_map, addr, addr + USPACE); /* !P_INMEM */ pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map)); /* diff --git a/sys/uvm/uvm_km.c b/sys/uvm/uvm_km.c index a908f8f53aa..a5bb21b2a7b 100644 --- a/sys/uvm/uvm_km.c +++ b/sys/uvm/uvm_km.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_km.c,v 1.22 1999/03/26 21:58:39 mycroft Exp $ */ +/* $NetBSD: uvm_km.c,v 1.27 1999/06/04 23:38:41 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -156,12 +156,16 @@ vm_map_t kernel_map = NULL; +struct vmi_list vmi_list; +simple_lock_data_t vmi_list_slock; + /* * local functions */ static int uvm_km_get __P((struct uvm_object *, vaddr_t, - vm_page_t *, int *, int, vm_prot_t, int, int)); + vm_page_t *, int *, int, vm_prot_t, int, int)); + /* * local data structues */ @@ -414,29 +418,43 @@ uvm_km_init(start, end) vaddr_t base = VM_MIN_KERNEL_ADDRESS; /* - * first, init kernel memory objects. + * first, initialize the interrupt-safe map list. + */ + LIST_INIT(&vmi_list); + simple_lock_init(&vmi_list_slock); + + /* + * next, init kernel memory objects. */ /* kernel_object: for pageable anonymous kernel memory */ uvm.kernel_object = uao_create(VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS, UAO_FLAG_KERNOBJ); - /* kmem_object: for malloc'd memory (wired, protected by splimp) */ + /* + * kmem_object: for use by the kernel malloc(). Memory is always + * wired, and this object (and the kmem_map) can be accessed at + * interrupt time. + */ simple_lock_init(&kmem_object_store.vmobjlock); kmem_object_store.pgops = &km_pager; TAILQ_INIT(&kmem_object_store.memq); kmem_object_store.uo_npages = 0; /* we are special. we never die */ - kmem_object_store.uo_refs = UVM_OBJ_KERN; + kmem_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE; uvmexp.kmem_object = &kmem_object_store; - /* mb_object: for mbuf memory (always wired, protected by splimp) */ + /* + * mb_object: for mbuf cluster pages on platforms which use the + * mb_map. Memory is always wired, and this object (and the mb_map) + * can be accessed at interrupt time. + */ simple_lock_init(&mb_object_store.vmobjlock); mb_object_store.pgops = &km_pager; TAILQ_INIT(&mb_object_store.memq); mb_object_store.uo_npages = 0; /* we are special. we never die */ - mb_object_store.uo_refs = UVM_OBJ_KERN; + mb_object_store.uo_refs = UVM_OBJ_KERN_INTRSAFE; uvmexp.mb_object = &mb_object_store; /* @@ -444,7 +462,7 @@ uvm_km_init(start, end) * before installing. */ - uvm_map_setup(&kernel_map_store, base, end, FALSE); + uvm_map_setup(&kernel_map_store, base, end, VM_MAP_PAGEABLE); kernel_map_store.pmap = pmap_kernel(); if (uvm_map(&kernel_map_store, &base, start - base, NULL, UVM_UNKNOWN_OFFSET, UVM_MAPFLAG(UVM_PROT_ALL, UVM_PROT_ALL, @@ -469,11 +487,11 @@ uvm_km_init(start, end) * alloc a new map */ struct vm_map * -uvm_km_suballoc(map, min, max, size, pageable, fixed, submap) +uvm_km_suballoc(map, min, max, size, flags, fixed, submap) struct vm_map *map; vaddr_t *min, *max; /* OUT, OUT */ vsize_t size; - boolean_t pageable; + int flags; boolean_t fixed; struct vm_map *submap; { @@ -503,11 +521,11 @@ uvm_km_suballoc(map, min, max, size, pageable, fixed, submap) pmap_reference(vm_map_pmap(map)); if (submap == NULL) { - submap = uvm_map_create(vm_map_pmap(map), *min, *max, pageable); + submap = uvm_map_create(vm_map_pmap(map), *min, *max, flags); if (submap == NULL) panic("uvm_km_suballoc: unable to create submap"); } else { - uvm_map_setup(submap, *min, *max, pageable); + uvm_map_setup(submap, *min, *max, flags); submap->pmap = vm_map_pmap(map); } @@ -535,15 +553,17 @@ uvm_km_pgremove(uobj, start, end) struct uvm_object *uobj; vaddr_t start, end; { - boolean_t by_list, is_aobj; + boolean_t by_list; struct vm_page *pp, *ppnext; vaddr_t curoff; UVMHIST_FUNC("uvm_km_pgremove"); UVMHIST_CALLED(maphist); simple_lock(&uobj->vmobjlock); /* lock object */ - /* is uobj an aobj? */ - is_aobj = uobj->pgops == &aobj_pager; +#ifdef DIAGNOSTIC + if (uobj->pgops != &aobj_pager) + panic("uvm_km_pgremove: object %p not an aobj", uobj); +#endif /* choose cheapest traversal */ by_list = (uobj->uo_npages <= @@ -561,26 +581,24 @@ uvm_km_pgremove(uobj, start, end) UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp, pp->flags & PG_BUSY, 0, 0); + /* now do the actual work */ - if (pp->flags & PG_BUSY) + if (pp->flags & PG_BUSY) { /* owner must check for this when done */ pp->flags |= PG_RELEASED; - else { - pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE); + } else { + /* free the swap slot... */ + uao_dropswap(uobj, curoff >> PAGE_SHIFT); /* - * if this kernel object is an aobj, free the swap slot. + * ...and free the page; note it may be on the + * active or inactive queues. */ - if (is_aobj) { - uao_dropswap(uobj, curoff >> PAGE_SHIFT); - } - uvm_lock_pageq(); uvm_pagefree(pp); uvm_unlock_pageq(); } /* done */ - } simple_unlock(&uobj->vmobjlock); return; @@ -588,7 +606,6 @@ uvm_km_pgremove(uobj, start, end) loop_by_list: for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = ppnext) { - ppnext = pp->listq.tqe_next; if (pp->offset < start || pp->offset >= end) { continue; @@ -596,26 +613,111 @@ loop_by_list: UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp, pp->flags & PG_BUSY, 0, 0); + /* now do the actual work */ - if (pp->flags & PG_BUSY) + if (pp->flags & PG_BUSY) { /* owner must check for this when done */ pp->flags |= PG_RELEASED; - else { - pmap_page_protect(PMAP_PGARG(pp), VM_PROT_NONE); + } else { + /* free the swap slot... */ + uao_dropswap(uobj, pp->offset >> PAGE_SHIFT); /* - * if this kernel object is an aobj, free the swap slot. + * ...and free the page; note it may be on the + * active or inactive queues. */ - if (is_aobj) { - uao_dropswap(uobj, pp->offset >> PAGE_SHIFT); - } - uvm_lock_pageq(); uvm_pagefree(pp); uvm_unlock_pageq(); } /* done */ + } + simple_unlock(&uobj->vmobjlock); + return; +} + + +/* + * uvm_km_pgremove_intrsafe: like uvm_km_pgremove(), but for "intrsafe" + * objects + * + * => when you unmap a part of anonymous kernel memory you want to toss + * the pages right away. (this gets called from uvm_unmap_...). + * => none of the pages will ever be busy, and none of them will ever + * be on the active or inactive queues (because these objects are + * never allowed to "page"). + */ +void +uvm_km_pgremove_intrsafe(uobj, start, end) + struct uvm_object *uobj; + vaddr_t start, end; +{ + boolean_t by_list; + struct vm_page *pp, *ppnext; + vaddr_t curoff; + UVMHIST_FUNC("uvm_km_pgremove_intrsafe"); UVMHIST_CALLED(maphist); + + simple_lock(&uobj->vmobjlock); /* lock object */ + +#ifdef DIAGNOSTIC + if (UVM_OBJ_IS_INTRSAFE_OBJECT(uobj) == 0) + panic("uvm_km_pgremove_intrsafe: object %p not intrsafe", uobj); +#endif + + /* choose cheapest traversal */ + by_list = (uobj->uo_npages <= + ((end - start) >> PAGE_SHIFT) * UKM_HASH_PENALTY); + + if (by_list) + goto loop_by_list; + + /* by hash */ + + for (curoff = start ; curoff < end ; curoff += PAGE_SIZE) { + pp = uvm_pagelookup(uobj, curoff); + if (pp == NULL) + continue; + + UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp, + pp->flags & PG_BUSY, 0, 0); +#ifdef DIAGNOSTIC + if (pp->flags & PG_BUSY) + panic("uvm_km_pgremove_intrsafe: busy page"); + if (pp->pqflags & PQ_ACTIVE) + panic("uvm_km_pgremove_intrsafe: active page"); + if (pp->pqflags & PQ_INACTIVE) + panic("uvm_km_pgremove_intrsafe: inactive page"); +#endif + + /* free the page */ + uvm_pagefree(pp); + } + simple_unlock(&uobj->vmobjlock); + return; + +loop_by_list: + + for (pp = uobj->memq.tqh_first ; pp != NULL ; pp = ppnext) { + ppnext = pp->listq.tqe_next; + if (pp->offset < start || pp->offset >= end) { + continue; + } + + UVMHIST_LOG(maphist," page 0x%x, busy=%d", pp, + pp->flags & PG_BUSY, 0, 0); + +#ifdef DIAGNOSTIC + if (pp->flags & PG_BUSY) + panic("uvm_km_pgremove_intrsafe: busy page"); + if (pp->pqflags & PQ_ACTIVE) + panic("uvm_km_pgremove_intrsafe: active page"); + if (pp->pqflags & PQ_INACTIVE) + panic("uvm_km_pgremove_intrsafe: inactive page"); +#endif + + /* free the page */ + uvm_pagefree(pp); } simple_unlock(&uobj->vmobjlock); return; @@ -725,12 +827,18 @@ uvm_km_kmemalloc(map, obj, size, flags) * (because if pmap_enter wants to allocate out of kmem_object * it will need to lock it itself!) */ + if (UVM_OBJ_IS_INTRSAFE_OBJECT(obj)) { #if defined(PMAP_NEW) - pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), VM_PROT_ALL); + pmap_kenter_pa(loopva, VM_PAGE_TO_PHYS(pg), + VM_PROT_ALL); #else - pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), - UVM_PROT_ALL, TRUE, VM_PROT_READ | VM_PROT_WRITE); + pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), + UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE); #endif + } else { + pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), + UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE); + } loopva += PAGE_SIZE; offset += PAGE_SIZE; size -= PAGE_SIZE; @@ -861,8 +969,8 @@ uvm_km_alloc1(map, size, zeroit) * map it in; note we're never called with an intrsafe * object, so we always use regular old pmap_enter(). */ - pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), - UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE); + pmap_enter(map->pmap, loopva, VM_PAGE_TO_PHYS(pg), + UVM_PROT_ALL, TRUE, VM_PROT_READ|VM_PROT_WRITE); loopva += PAGE_SIZE; offset += PAGE_SIZE; diff --git a/sys/uvm/uvm_km.h b/sys/uvm/uvm_km.h index 7b07b0d74d7..fb143cbd39c 100644 --- a/sys/uvm/uvm_km.h +++ b/sys/uvm/uvm_km.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_km.h,v 1.6 1998/08/13 02:11:01 eeh Exp $ */ +/* $NetBSD: uvm_km.h,v 1.8 1999/05/25 20:30:09 thorpej Exp $ */ /* * @@ -47,5 +47,6 @@ void uvm_km_init __P((vaddr_t, vaddr_t)); void uvm_km_pgremove __P((struct uvm_object *, vaddr_t, vaddr_t)); +void uvm_km_pgremove_intrsafe __P((struct uvm_object *, vaddr_t, vaddr_t)); #endif /* _UVM_UVM_KM_H_ */ diff --git a/sys/uvm/uvm_loan.c b/sys/uvm/uvm_loan.c index 66d81f15eb0..f82d8576e0a 100644 --- a/sys/uvm/uvm_loan.c +++ b/sys/uvm/uvm_loan.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_loan.c,v 1.14 1999/03/25 18:48:52 mrg Exp $ */ +/* $NetBSD: uvm_loan.c,v 1.17 1999/06/03 00:05:45 thorpej Exp $ */ /* * @@ -59,7 +59,7 @@ * * there are 3 types of loans possible: * O->K uvm_object page to wired kernel page (e.g. mbuf data area) - * A->K anon page to kernel wired kernel page (e.g. mbuf data area) + * A->K anon page to wired kernel page (e.g. mbuf data area) * O->A uvm_object to anon loan (e.g. vnode page to an anon) * note that it possible to have an O page loaned to both an A and K * at the same time. @@ -68,10 +68,15 @@ * a uvm_object and a vm_anon, but PQ_ANON will not be set. this sort * of page is considered "owned" by the uvm_object (not the anon). * - * each loan of a page to a wired kernel page bumps the pg->wire_count. - * wired kernel mappings should be entered with pmap_kenter functions - * so that pmap_page_protect() will not affect the kernel mappings. - * (this requires the PMAP_NEW interface...). + * each loan of a page to the kernel bumps the pg->wire_count. the + * kernel mappings for these pages will be read-only and wired. since + * the page will also be wired, it will not be a candidate for pageout, + * and thus will never be pmap_page_protect()'d with VM_PROT_NONE. a + * write fault in the kernel to one of these pages will not cause + * copy-on-write. instead, the page fault is considered fatal. this + * is because the kernel mapping will have no way to look up the + * object/anon which the page is owned by. this is a good side-effect, + * since a kernel write to a loaned page is an error. * * owners that want to free their pages and discover that they are * loaned out simply "disown" them (the page becomes an orphan). these @@ -96,7 +101,7 @@ * * note that loaning a page causes all mappings of the page to become * read-only (via pmap_page_protect). this could have an unexpected - * effect on normal "wired" pages if one is not careful. + * effect on normal "wired" pages if one is not careful (XXX). */ /* @@ -220,6 +225,11 @@ uvm_loan(map, start, len, result, flags) void **output; int rv; +#ifdef DIAGNOSTIC + if (map->flags & VM_MAP_INTRSAFE) + panic("uvm_loan: intrsafe map"); +#endif + /* * ensure that one and only one of the flags is set */ diff --git a/sys/uvm/uvm_map.c b/sys/uvm/uvm_map.c index 68487e398e3..c7c34a8f9bd 100644 --- a/sys/uvm/uvm_map.c +++ b/sys/uvm/uvm_map.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_map.c,v 1.39 1999/05/12 19:11:23 thorpej Exp $ */ +/* $NetBSD: uvm_map.c,v 1.53 1999/06/07 16:31:42 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -186,6 +186,23 @@ static void uvm_map_entry_unwire __P((vm_map_t, vm_map_entry_t)); * local inlines */ +/* XXX Should not exist! */ +#define vm_map_downgrade(map) \ + (void) lockmgr(&(map)->lock, LK_DOWNGRADE, NULL, curproc) + +/* XXX Should not exist! */ +#ifdef DIAGNOSTIC +#define vm_map_upgrade(map) \ +do { \ + if (lockmgr(&(map)->lock, LK_UPGRADE, NULL, curproc) != 0) \ + panic("vm_map_upgrade: failed to upgrade lock"); \ +} while (0) +#else +#define vm_map_upgrade(map) \ + (void) lockmgr(&(map)->lock, LK_UPGRADE, NULL) +#endif /* DIAGNOSTIC */ + + /* * uvm_mapent_alloc: allocate a map entry * @@ -201,11 +218,11 @@ uvm_mapent_alloc(map) UVMHIST_FUNC("uvm_mapent_alloc"); UVMHIST_CALLED(maphist); - if (map->entries_pageable) { + if ((map->flags & VM_MAP_INTRSAFE) == 0 && + map != kernel_map && kernel_map != NULL /* XXX */) { me = pool_get(&uvm_map_entry_pool, PR_WAITOK); me->flags = 0; /* me can't be null, wait ok */ - } else { s = splimp(); /* protect kentry_free list with splimp */ simple_lock(&uvm.kentry_lock); @@ -214,14 +231,14 @@ uvm_mapent_alloc(map) simple_unlock(&uvm.kentry_lock); splx(s); if (!me) - panic("mapent_alloc: out of kernel map entries, check MAX_KMAPENT"); + panic("mapent_alloc: out of static map entries, check MAX_KMAPENT"); me->flags = UVM_MAP_STATIC; } - UVMHIST_LOG(maphist, "<- new entry=0x%x [pageable=%d]", - me, map->entries_pageable, 0, 0); + UVMHIST_LOG(maphist, "<- new entry=0x%x [kentry=%d]", + me, ((map->flags & VM_MAP_INTRSAFE) != 0 || map == kernel_map) + ? TRUE : FALSE, 0, 0); return(me); - } /* @@ -276,7 +293,7 @@ uvm_map_entry_unwire(map, entry) vm_map_entry_t entry; { - uvm_fault_unwire(map->pmap, entry->start, entry->end); + uvm_fault_unwire(map, entry->start, entry->end); entry->wired_count = 0; } @@ -543,8 +560,9 @@ uvm_map(map, startp, size, uobj, uoffset, flags) } else { if (uoffset == UVM_UNKNOWN_OFFSET) { #ifdef DIAGNOSTIC - if (uobj->uo_refs != UVM_OBJ_KERN) - panic("uvm_map: unknown offset with non-kernel object"); + if (UVM_OBJ_IS_KERN_OBJECT(uobj) == 0) + panic("uvm_map: unknown offset with " + "non-kernel object"); #endif uoffset = *startp - vm_map_min(kernel_map); } @@ -976,11 +994,11 @@ uvm_unmap_remove(map, start, end, entry_list) * we want to free these pages right away... */ if (UVM_ET_ISOBJ(entry) && - entry->object.uvm_obj->uo_refs == UVM_OBJ_KERN) { - + UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj)) { #ifdef DIAGNOSTIC if (vm_map_pmap(map) != pmap_kernel()) - panic("uvm_unmap_remove: kernel object mapped by non-kernel map"); + panic("uvm_unmap_remove: kernel object " + "mapped by non-kernel map"); #endif /* @@ -1006,40 +1024,35 @@ uvm_unmap_remove(map, start, end, entry_list) * * uvm_km_pgremove currently does the following: * for pages in the kernel object in range: - * - pmap_page_protect them out of all pmaps + * - drops the swap slot * - uvm_pagefree the page * - * note that in case [1] the pmap_page_protect call - * in uvm_km_pgremove may very well be redundant - * because we have already removed the mappings - * beforehand with pmap_remove (or pmap_kremove). - * in the PMAP_NEW case, the pmap_page_protect call - * may not do anything, since PMAP_NEW allows the - * kernel to enter/remove kernel mappings without - * bothing to keep track of the mappings (e.g. via - * pv_entry lists). XXX: because of this, in the - * future we should consider removing the - * pmap_page_protect from uvm_km_pgremove some time - * in the future. + * note there is version of uvm_km_pgremove() that + * is used for "intrsafe" objects. */ /* - * remove mappings from pmap + * remove mappings from pmap and drop the pages + * from the object. offsets are always relative + * to vm_map_min(kernel_map). */ + if (UVM_OBJ_IS_INTRSAFE_OBJECT(entry->object.uvm_obj)) { #if defined(PMAP_NEW) - pmap_kremove(entry->start, len); + pmap_kremove(entry->start, len); #else - pmap_remove(pmap_kernel(), entry->start, - entry->start+len); + pmap_remove(pmap_kernel(), entry->start, + entry->start + len); #endif - - /* - * remove pages from a kernel object (offsets are - * always relative to vm_map_min(kernel_map)). - */ - uvm_km_pgremove(entry->object.uvm_obj, - entry->start - vm_map_min(kernel_map), - entry->end - vm_map_min(kernel_map)); + uvm_km_pgremove_intrsafe(entry->object.uvm_obj, + entry->start - vm_map_min(kernel_map), + entry->end - vm_map_min(kernel_map)); + } else { + pmap_remove(pmap_kernel(), entry->start, + entry->start + len); + uvm_km_pgremove(entry->object.uvm_obj, + entry->start - vm_map_min(kernel_map), + entry->end - vm_map_min(kernel_map)); + } /* * null out kernel_object reference, we've just @@ -1842,6 +1855,121 @@ uvm_map_inherit(map, start, end, new_inheritance) return(KERN_SUCCESS); } +/* + * uvm_map_advice: set advice code for range of addrs in map. + * + * => map must be unlocked + */ + +int +uvm_map_advice(map, start, end, new_advice) + vm_map_t map; + vaddr_t start; + vaddr_t end; + int new_advice; +{ + vm_map_entry_t entry, temp_entry; + UVMHIST_FUNC("uvm_map_advice"); UVMHIST_CALLED(maphist); + UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_adv=0x%x)", + map, start, end, new_advice); + + vm_map_lock(map); + + VM_MAP_RANGE_CHECK(map, start, end); + + if (uvm_map_lookup_entry(map, start, &temp_entry)) { + entry = temp_entry; + UVM_MAP_CLIP_START(map, entry, start); + } else { + entry = temp_entry->next; + } + + while ((entry != &map->header) && (entry->start < end)) { + UVM_MAP_CLIP_END(map, entry, end); + + switch (new_advice) { + case MADV_NORMAL: + case MADV_RANDOM: + case MADV_SEQUENTIAL: + /* nothing special here */ + break; + +#if 0 + case MADV_WILLNEED: + /* activate all these pages */ + /* XXX */ + /* + * should invent a "weak" mode for uvm_fault() + * which would only do the PGO_LOCKED pgo_get(). + */ + break; + + case MADV_DONTNEED: + /* deactivate this page */ + /* XXX */ + /* + * vm_page_t p; + * uvm_lock_pageq(); + * for (p in each page) + * if (not_wired) + * uvm_pagedeactivate(p); + * uvm_unlock_pageq(); + */ + break; + + case MADV_SPACEAVAIL: + /* + * XXXMRG + * what is this? i think: "ensure that we have + * allocated backing-store for these pages". this + * is going to require changes in the page daemon, + * as it will free swap space allocated to pages in + * core. there's also what to do for + * device/file/anonymous memory.. + */ + break; + + case MADV_GARBAGE: + /* pages are `empty' and can be garbage collected */ + /* XXX */ + /* + * (perhaps MADV_FREE? check freebsd's MADV_FREE). + * + * need to do this: + * - clear all the referenced and modified bits on + * the pages, + * - delete any backing store, + * - mark the page as `recycable'. + * + * So, if you start paging, the pages would be thrown out + * and then zero-filled the next time they're used. + * Otherwise you'd just reuse them directly. Once the + * page has been modified again, it would no longer be + * recyclable. That way, malloc() can just tell the + * system when pages are `empty'; if memory is needed, + * they'll be tossed; if memory is not needed, there + * will be no additional overhead. + */ + break; +#endif + + default: + vm_map_unlock(map); + UVMHIST_LOG(maphist,"<- done (INVALID ARG)",0,0,0,0); + return (KERN_INVALID_ARGUMENT); + } + + + entry->advice = new_advice; + + entry = entry->next; + } + + vm_map_unlock(map); + UVMHIST_LOG(maphist,"<- done (OK)",0,0,0,0); + return (KERN_SUCCESS); +} + /* * uvm_map_pageable: sets the pageability of a range in a map. * @@ -1864,6 +1992,11 @@ uvm_map_pageable(map, start, end, new_pageable) UVMHIST_LOG(maphist,"(map=0x%x,start=0x%x,end=0x%x,new_pageable=0x%x)", map, start, end, new_pageable); +#ifdef DIAGNOSTIC + if ((map->flags & VM_MAP_PAGEABLE) == 0) + panic("uvm_map_pageable: map %p not pageable", map); +#endif + vm_map_lock(map); VM_MAP_RANGE_CHECK(map, start, end); @@ -1913,10 +2046,11 @@ uvm_map_pageable(map, start, end, new_pageable) * now decrement the wiring count for each region. if a region * becomes completely unwired, unwire its physical pages and * mappings. + * + * Note, uvm_fault_unwire() (called via uvm_map_entry_unwire()) + * does not lock the map, so we don't have to do anything + * special regarding locking here. */ -#if 0 /* not necessary: uvm_fault_unwire does not lock */ - lock_set_recursive(&map->lock); -#endif /* XXXCDC */ entry = start_entry; while ((entry != &map->header) && (entry->start < end)) { @@ -1928,9 +2062,6 @@ uvm_map_pageable(map, start, end, new_pageable) entry = entry->next; } -#if 0 /* XXXCDC: not necessary, see above */ - lock_clear_recursive(&map->lock); -#endif vm_map_unlock(map); UVMHIST_LOG(maphist,"<- done (OK UNWIRE)",0,0,0,0); return(KERN_SUCCESS); @@ -2015,62 +2146,53 @@ uvm_map_pageable(map, start, end, new_pageable) /* * Pass 2. */ - /* - * HACK HACK HACK HACK - * - * if we are wiring in the kernel map or a submap of it, unlock the - * map to avoid deadlocks. we trust that the kernel threads are - * well-behaved, and therefore will not do anything destructive to - * this region of the map while we have it unlocked. we cannot - * trust user threads to do the same. - * - * HACK HACK HACK HACK - */ - if (vm_map_pmap(map) == pmap_kernel()) { - vm_map_unlock(map); /* trust me ... */ - } else { - vm_map_set_recursive(&map->lock); - lockmgr(&map->lock, LK_DOWNGRADE, (void *)0, curproc /*XXX*/); - } + + vm_map_downgrade(map); rv = 0; entry = start_entry; while (entry != &map->header && entry->start < end) { - /* - * if uvm_fault_wire fails for any page we need to undo what has - * been done. we decrement the wiring count for those pages - * which have not yet been wired (now) and unwire those that - * have * (later). - * - * XXX this violates the locking protocol on the map, needs to - * be fixed. [because we only have a read lock on map we - * shouldn't be changing wired_count?] - */ - if (rv) { - entry->wired_count--; - } else if (entry->wired_count == 1) { - rv = uvm_fault_wire(map, entry->start, entry->end); + if (entry->wired_count == 1) { + rv = uvm_fault_wire(map, entry->start, entry->end, + entry->protection); if (rv) { - failed = entry->start; - entry->wired_count--; + /* + * wiring failed. break out of the loop. + * we'll clean up the map below, once we + * have a write lock again. + */ + break; } } entry = entry->next; } - if (vm_map_pmap(map) == pmap_kernel()) { - vm_map_lock(map); /* relock */ - } else { - vm_map_clear_recursive(&map->lock); - } - if (rv) { /* failed? */ + /* + * Get back to an exclusive (write) lock. + */ + vm_map_upgrade(map); + + /* + * first drop the wiring count on all the entries + * which haven't actually been wired yet. + */ + failed = entry->start; + while (entry != &map->header && entry->start < end) + entry->wired_count--; + + /* + * now, unlock the map, and unwire all the pages that + * were successfully wired above. + */ vm_map_unlock(map); (void) uvm_map_pageable(map, start, failed, TRUE); UVMHIST_LOG(maphist, "<- done (RV=%d)", rv,0,0,0); return(rv); } - vm_map_unlock(map); + + /* We are holding a read lock here. */ + vm_map_unlock_read(map); UVMHIST_LOG(maphist,"<- done (OK WIRE)",0,0,0,0); return(KERN_SUCCESS); @@ -2255,7 +2377,7 @@ uvmspace_init(vm, pmap, min, max, pageable) bzero(vm, sizeof(*vm)); - uvm_map_setup(&vm->vm_map, min, max, pageable); + uvm_map_setup(&vm->vm_map, min, max, pageable ? VM_MAP_PAGEABLE : 0); if (pmap) pmap_reference(pmap); @@ -2366,7 +2488,7 @@ uvmspace_exec(p) * for p */ nvm = uvmspace_alloc(map->min_offset, map->max_offset, - map->entries_pageable); + (map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE); #if (defined(i386) || defined(pc532)) && !defined(PMAP_NEW) /* @@ -2472,7 +2594,7 @@ uvmspace_fork(vm1) vm_map_lock(old_map); vm2 = uvmspace_alloc(old_map->min_offset, old_map->max_offset, - old_map->entries_pageable); + (old_map->flags & VM_MAP_PAGEABLE) ? TRUE : FALSE); bcopy(&vm1->vm_startcopy, &vm2->vm_startcopy, (caddr_t) (vm1 + 1) - (caddr_t) &vm1->vm_startcopy); new_map = &vm2->vm_map; /* XXX */ @@ -2801,8 +2923,9 @@ uvm_map_printit(map, full, pr) vm_map_entry_t entry; (*pr)("MAP %p: [0x%lx->0x%lx]\n", map, map->min_offset,map->max_offset); - (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d\n", - map->nentries, map->size, map->ref_count, map->timestamp); + (*pr)("\t#ent=%d, sz=%d, ref=%d, version=%d, flags=0x%x\n", + map->nentries, map->size, map->ref_count, map->timestamp, + map->flags); #ifdef pmap_resident_count (*pr)("\tpmap=%p(resident=%d)\n", map->pmap, pmap_resident_count(map->pmap)); @@ -2855,7 +2978,7 @@ uvm_object_printit(uobj, full, pr) (*pr)("OBJECT %p: pgops=%p, npages=%d, ", uobj, uobj->pgops, uobj->uo_npages); - if (uobj->uo_refs == UVM_OBJ_KERN) + if (UVM_OBJ_IS_KERN_OBJECT(uobj)) (*pr)("refs=\n"); else (*pr)("refs=%d\n", uobj->uo_refs); diff --git a/sys/uvm/uvm_map.h b/sys/uvm/uvm_map.h index c4ee5711acb..b58b21ac459 100644 --- a/sys/uvm/uvm_map.h +++ b/sys/uvm/uvm_map.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_map.h,v 1.11 1999/03/25 18:48:52 mrg Exp $ */ +/* $NetBSD: uvm_map.h,v 1.14 1999/05/26 19:16:36 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -138,7 +138,7 @@ void uvm_map_clip_end __P((vm_map_t, vm_map_entry_t, vaddr_t)); MAP_INLINE vm_map_t uvm_map_create __P((pmap_t, vaddr_t, - vaddr_t, boolean_t)); + vaddr_t, int)); int uvm_map_extract __P((vm_map_t, vaddr_t, vsize_t, vm_map_t, vaddr_t *, int)); vm_map_entry_t uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t, @@ -146,6 +146,7 @@ vm_map_entry_t uvm_map_findspace __P((vm_map_t, vaddr_t, vsize_t, boolean_t)); int uvm_map_inherit __P((vm_map_t, vaddr_t, vaddr_t, vm_inherit_t)); +int uvm_map_advice __P((vm_map_t, vaddr_t, vaddr_t, int)); void uvm_map_init __P((void)); boolean_t uvm_map_lookup_entry __P((vm_map_t, vaddr_t, vm_map_entry_t *)); @@ -156,7 +157,7 @@ int uvm_map_replace __P((vm_map_t, vaddr_t, vaddr_t, int uvm_map_reserve __P((vm_map_t, vsize_t, vaddr_t, vaddr_t *)); void uvm_map_setup __P((vm_map_t, vaddr_t, - vaddr_t, boolean_t)); + vaddr_t, int)); int uvm_map_submap __P((vm_map_t, vaddr_t, vaddr_t, vm_map_t)); MAP_INLINE diff --git a/sys/uvm/uvm_map_i.h b/sys/uvm/uvm_map_i.h index e56ba28e5e9..85ca2a72a43 100644 --- a/sys/uvm/uvm_map_i.h +++ b/sys/uvm/uvm_map_i.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_map_i.h,v 1.11 1999/03/25 18:48:53 mrg Exp $ */ +/* $NetBSD: uvm_map_i.h,v 1.14 1999/06/04 23:38:42 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -84,15 +84,18 @@ */ MAP_INLINE vm_map_t -uvm_map_create(pmap, min, max, pageable) +uvm_map_create(pmap, min, max, flags) pmap_t pmap; vaddr_t min, max; - boolean_t pageable; + int flags; { vm_map_t result; - MALLOC(result, vm_map_t, sizeof(struct vm_map), M_VMMAP, M_WAITOK); - uvm_map_setup(result, min, max, pageable); + MALLOC(result, vm_map_t, + (flags & VM_MAP_INTRSAFE) ? sizeof(struct vm_map_intrsafe) : + sizeof(struct vm_map), + M_VMMAP, M_WAITOK); + uvm_map_setup(result, min, max, flags); result->pmap = pmap; return(result); } @@ -104,10 +107,10 @@ uvm_map_create(pmap, min, max, pageable) */ MAP_INLINE void -uvm_map_setup(map, min, max, pageable) +uvm_map_setup(map, min, max, flags) vm_map_t map; vaddr_t min, max; - boolean_t pageable; + int flags; { map->header.next = map->header.prev = &map->header; @@ -116,13 +119,26 @@ uvm_map_setup(map, min, max, pageable) map->ref_count = 1; map->min_offset = min; map->max_offset = max; - map->entries_pageable = pageable; + map->flags = flags; map->first_free = &map->header; map->hint = &map->header; map->timestamp = 0; lockinit(&map->lock, PVM, "thrd_sleep", 0, 0); simple_lock_init(&map->ref_lock); simple_lock_init(&map->hint_lock); + + /* + * If the map is interrupt safe, place it on the list + * of interrupt safe maps, for uvm_fault(). + */ + if (flags & VM_MAP_INTRSAFE) { + struct vm_map_intrsafe *vmi = (struct vm_map_intrsafe *)map; + int s; + + s = vmi_list_lock(); + LIST_INSERT_HEAD(&vmi_list, vmi, vmi_list); + vmi_list_unlock(s); + } } diff --git a/sys/uvm/uvm_mmap.c b/sys/uvm/uvm_mmap.c index 4d78b3a3993..75b1d162648 100644 --- a/sys/uvm/uvm_mmap.c +++ b/sys/uvm/uvm_mmap.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_mmap.c,v 1.19 1999/03/25 18:48:53 mrg Exp $ */ +/* $NetBSD: uvm_mmap.c,v 1.21 1999/05/23 06:27:13 mrg Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -119,28 +119,6 @@ sys_sstk(p, v, retval) return (ENOSYS); } -/* - * sys_madvise: give advice about memory usage. - */ - -/* ARGSUSED */ -int -sys_madvise(p, v, retval) - struct proc *p; - void *v; - register_t *retval; -{ -#if 0 - struct sys_madvise_args /* { - syscallarg(caddr_t) addr; - syscallarg(size_t) len; - syscallarg(int) behav; - } */ *uap = v; -#endif - - return (ENOSYS); -} - /* * sys_mincore: determine if pages are in core or not. */ @@ -695,6 +673,51 @@ sys_minherit(p, v, retval) return (EINVAL); } +/* + * sys_madvise: give advice about memory usage. + */ + +/* ARGSUSED */ +int +sys_madvise(p, v, retval) + struct proc *p; + void *v; + register_t *retval; +{ + struct sys_madvise_args /* { + syscallarg(caddr_t) addr; + syscallarg(size_t) len; + syscallarg(int) behav; + } */ *uap = v; + vaddr_t addr; + vsize_t size, pageoff; + int advice; + + addr = (vaddr_t)SCARG(uap, addr); + size = (vsize_t)SCARG(uap, len); + advice = SCARG(uap, behav); + + /* + * align the address to a page boundary, and adjust the size accordingly + */ + pageoff = (addr & PAGE_MASK); + addr -= pageoff; + size += pageoff; + size = (vsize_t) round_page(size); + + if ((int)size < 0) + return (EINVAL); + + switch (uvm_map_advice(&p->p_vmspace->vm_map, addr, addr+size, + advice)) { + case KERN_SUCCESS: + return (0); + case KERN_PROTECTION_FAILURE: + return (EACCES); + } + return (EINVAL); +} + /* * sys_mlock: memory lock */ diff --git a/sys/uvm/uvm_object.h b/sys/uvm/uvm_object.h index c45dd262a6b..294e3624e9e 100644 --- a/sys/uvm/uvm_object.h +++ b/sys/uvm/uvm_object.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_object.h,v 1.5 1998/03/09 00:58:58 mrg Exp $ */ +/* $NetBSD: uvm_object.h,v 1.8 1999/05/25 20:30:09 thorpej Exp $ */ /* * @@ -64,7 +64,22 @@ struct uvm_object { * for kernel objects... when a kernel object is unmapped we always want * to free the resources associated with the mapping. UVM_OBJ_KERN * allows us to decide which type of unmapping we want to do. + * + * in addition, we have kernel objects which may be used in an + * interrupt context. these objects get their mappings entered + * with pmap_kenter*() and removed with pmap_kremove(), which + * are safe to call in interrupt context, and must be used ONLY + * for wired kernel mappings in these objects and their associated + * maps. */ -#define UVM_OBJ_KERN (-2) +#define UVM_OBJ_KERN (-2) +#define UVM_OBJ_KERN_INTRSAFE (-3) + +#define UVM_OBJ_IS_KERN_OBJECT(uobj) \ + ((uobj)->uo_refs == UVM_OBJ_KERN || \ + (uobj)->uo_refs == UVM_OBJ_KERN_INTRSAFE) + +#define UVM_OBJ_IS_INTRSAFE_OBJECT(uobj) \ + ((uobj)->uo_refs == UVM_OBJ_KERN_INTRSAFE) #endif /* _UVM_UVM_OBJECT_H_ */ diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index fa85122f307..c60017de35b 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_page.c,v 1.19 1999/05/20 20:07:55 thorpej Exp $ */ +/* $NetBSD: uvm_page.c,v 1.23 1999/05/25 01:34:13 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -847,9 +847,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) panic("uvm_pagealloc: obj and anon != NULL"); #endif - s = splimp(); - - uvm_lock_fpageq(); /* lock free page queue */ + s = uvm_lock_fpageq(); /* lock free page queue */ /* * check to see if we need to generate some free pages waking @@ -870,7 +868,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) */ use_reserve = (flags & UVM_PGA_USERESERVE) || - (obj && obj->uo_refs == UVM_OBJ_KERN); + (obj && UVM_OBJ_IS_KERN_OBJECT(obj)); if ((uvmexp.free <= uvmexp.reserve_kernel && !use_reserve) || (uvmexp.free <= uvmexp.reserve_pagedaemon && !(use_reserve && curproc == uvm.pagedaemon_proc))) @@ -919,8 +917,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) TAILQ_REMOVE(freeq, pg, pageq); uvmexp.free--; - uvm_unlock_fpageq(); /* unlock free page queue */ - splx(s); + uvm_unlock_fpageq(s); /* unlock free page queue */ pg->offset = off; pg->uobject = obj; @@ -945,8 +942,7 @@ uvm_pagealloc_strat(obj, off, anon, flags, strat, free_list) return(pg); fail: - uvm_unlock_fpageq(); - splx(s); + uvm_unlock_fpageq(s); return (NULL); } @@ -1137,8 +1133,7 @@ struct vm_page *pg; * and put on free queue */ - s = splimp(); - uvm_lock_fpageq(); + s = uvm_lock_fpageq(); TAILQ_INSERT_TAIL(&uvm.page_free[uvm_page_lookup_freelist(pg)], pg, pageq); pg->pqflags = PQ_FREE; @@ -1148,8 +1143,7 @@ struct vm_page *pg; pg->uanon = (void *)0xdeadbeef; #endif uvmexp.free++; - uvm_unlock_fpageq(); - splx(s); + uvm_unlock_fpageq(s); } #if defined(UVM_PAGE_TRKOWN) diff --git a/sys/uvm/uvm_page.h b/sys/uvm/uvm_page.h index 621bb01d9a1..09b4c635a65 100644 --- a/sys/uvm/uvm_page.h +++ b/sys/uvm/uvm_page.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_page.h,v 1.10 1998/08/13 02:11:02 eeh Exp $ */ +/* $NetBSD: uvm_page.h,v 1.12 1999/05/24 19:10:57 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -79,8 +79,6 @@ #define uvm_lock_pageq() simple_lock(&uvm.pageqlock) #define uvm_unlock_pageq() simple_unlock(&uvm.pageqlock) -#define uvm_lock_fpageq() simple_lock(&uvm.fpageqlock) -#define uvm_unlock_fpageq() simple_unlock(&uvm.fpageqlock) #define uvm_pagehash(obj,off) \ (((unsigned long)obj+(unsigned long)atop(off)) & uvm.page_hashmask) @@ -108,6 +106,9 @@ boolean_t uvm_page_physget __P((paddr_t *)); #endif void uvm_page_rehash __P((void)); +PAGE_INLINE int uvm_lock_fpageq __P((void)); +PAGE_INLINE void uvm_unlock_fpageq __P((int)); + PAGE_INLINE void uvm_pageactivate __P((struct vm_page *)); vaddr_t uvm_pageboot_alloc __P((vsize_t)); PAGE_INLINE void uvm_pagecopy __P((struct vm_page *, struct vm_page *)); diff --git a/sys/uvm/uvm_page_i.h b/sys/uvm/uvm_page_i.h index 4691e0806f8..9fc1d2da91b 100644 --- a/sys/uvm/uvm_page_i.h +++ b/sys/uvm/uvm_page_i.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_page_i.h,v 1.8 1998/08/13 02:11:02 eeh Exp $ */ +/* $NetBSD: uvm_page_i.h,v 1.10 1999/05/24 19:10:57 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -79,6 +79,40 @@ #if defined(UVM_PAGE_INLINE) || defined(UVM_PAGE) +/* + * uvm_lock_fpageq: lock the free page queue + * + * => free page queue can be accessed in interrupt context, so this + * blocks all interrupts that can cause memory allocation, and + * returns the previous interrupt level. + */ + +PAGE_INLINE int +uvm_lock_fpageq() +{ + int s; + + s = splimp(); + simple_lock(&uvm.fpageqlock); + return (s); +} + +/* + * uvm_unlock_fpageq: unlock the free page queue + * + * => caller must supply interrupt level returned by uvm_lock_fpageq() + * so that it may be restored. + */ + +PAGE_INLINE void +uvm_unlock_fpageq(s) + int s; +{ + + simple_unlock(&uvm.fpageqlock); + splx(s); +} + /* * uvm_pagelookup: look up a page * diff --git a/sys/uvm/uvm_pager.c b/sys/uvm/uvm_pager.c index c8050983dec..8c057a8bfdf 100644 --- a/sys/uvm/uvm_pager.c +++ b/sys/uvm/uvm_pager.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pager.c,v 1.15 1999/03/25 18:48:55 mrg Exp $ */ +/* $NetBSD: uvm_pager.c,v 1.20 1999/05/26 19:16:36 thorpej Exp $ */ /* * @@ -88,7 +88,7 @@ uvm_pager_init() */ pager_map = uvm_km_suballoc(kernel_map, &uvm.pager_sva, &uvm.pager_eva, - PAGER_MAP_SIZE, FALSE, FALSE, NULL); + PAGER_MAP_SIZE, 0, FALSE, NULL); simple_lock_init(&pager_map_wanted_lock); pager_map_wanted = FALSE; @@ -113,6 +113,9 @@ uvm_pager_init() * * we basically just map in a blank map entry to reserve the space in the * map and then use pmap_enter() to put the mappings in by hand. + * + * XXX It would be nice to know the direction of the I/O, so that we can + * XXX map only what is necessary. */ vaddr_t @@ -169,6 +172,11 @@ ReStart: panic("uvm_pagermapin: page not busy"); #endif + /* + * XXX VM_PROT_DEFAULT includes VM_PROT_EXEC; is that + * XXX really necessary? It could lead to unnecessary + * XXX instruction cache flushes. + */ pmap_enter(vm_map_pmap(pager_map), cva, VM_PAGE_TO_PHYS(pp), VM_PROT_DEFAULT, TRUE, VM_PROT_READ | VM_PROT_WRITE); @@ -698,8 +706,6 @@ int swblk; /* valid if (uobj == NULL && PGO_REALLOCSWAP) */ * had a successful pageout update the page! */ if (flags & PGO_PDFREECLUST) { - /* XXX: with PMAP_NEW ref should already be clear, - * but don't trust! */ pmap_clear_reference(PMAP_PGARG(ppsp[lcv])); pmap_clear_modify(PMAP_PGARG(ppsp[lcv])); ppsp[lcv]->flags |= PG_CLEAN; diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c index c68355c7b7b..b3788da4d09 100644 --- a/sys/uvm/uvm_pdaemon.c +++ b/sys/uvm/uvm_pdaemon.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdaemon.c,v 1.14 1999/03/26 17:33:30 chs Exp $ */ +/* $NetBSD: uvm_pdaemon.c,v 1.16 1999/05/24 19:10:57 thorpej Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. @@ -365,11 +365,9 @@ uvmpd_scan_inactive(pglst) * update our copy of "free" and see if we've met * our target */ - s = splimp(); - uvm_lock_fpageq(); + s = uvm_lock_fpageq(); free = uvmexp.free; - uvm_unlock_fpageq(); - splx(s); + uvm_unlock_fpageq(s); if (free + uvmexp.paging >= uvmexp.freetarg << 2 || dirtyreacts == UVMPD_NUMDIRTYREACTS) { @@ -952,11 +950,9 @@ uvmpd_scan() /* * get current "free" page count */ - s = splimp(); - uvm_lock_fpageq(); + s = uvm_lock_fpageq(); free = uvmexp.free; - uvm_unlock_fpageq(); - splx(s); + uvm_unlock_fpageq(s); #ifndef __SWAP_BROKEN /* diff --git a/sys/uvm/uvm_pdaemon.h b/sys/uvm/uvm_pdaemon.h index 4590f1cef10..56ea153296e 100644 --- a/sys/uvm/uvm_pdaemon.h +++ b/sys/uvm/uvm_pdaemon.h @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pdaemon.h,v 1.5 1998/02/10 14:12:28 mrg Exp $ */ +/* $NetBSD: uvm_pdaemon.h,v 1.6 1999/03/25 18:48:56 mrg Exp $ */ /* * Copyright (c) 1997 Charles D. Cranor and Washington University. diff --git a/sys/uvm/uvm_pglist.c b/sys/uvm/uvm_pglist.c index 042ab2b8749..c24125e18ec 100644 --- a/sys/uvm/uvm_pglist.c +++ b/sys/uvm/uvm_pglist.c @@ -1,4 +1,4 @@ -/* $NetBSD: uvm_pglist.c,v 1.5.2.1 1998/07/30 14:04:15 eeh Exp $ */ +/* $NetBSD: uvm_pglist.c,v 1.7 1999/05/24 19:10:58 thorpej Exp $ */ #define VM_PAGE_ALLOC_MEMORY_STATS @@ -136,8 +136,7 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok) /* * Block all memory allocation and lock the free list. */ - s = splimp(); - uvm_lock_fpageq(); /* lock free page queue */ + s = uvm_lock_fpageq(); /* lock free page queue */ /* Are there even any free pages? */ for (idx = 0; idx < VM_NFREELIST; idx++) @@ -238,8 +237,7 @@ uvm_pglistalloc(size, low, high, alignment, boundary, rlist, nsegs, waitok) error = 0; out: - uvm_unlock_fpageq(); - splx(s); + uvm_unlock_fpageq(s); /* * check to see if we need to generate some free pages waking @@ -271,8 +269,7 @@ uvm_pglistfree(list) /* * Block all memory allocation and lock the free list. */ - s = splimp(); - uvm_lock_fpageq(); + s = uvm_lock_fpageq(); while ((m = list->tqh_first) != NULL) { #ifdef DIAGNOSTIC @@ -287,6 +284,5 @@ uvm_pglistfree(list) STAT_DECR(uvm_pglistalloc_npages); } - uvm_unlock_fpageq(); - splx(s); + uvm_unlock_fpageq(s); } diff --git a/sys/uvm/uvm_uio.c b/sys/uvm/uvm_uio.c new file mode 100644 index 00000000000..84ef108b2b5 --- /dev/null +++ b/sys/uvm/uvm_uio.c @@ -0,0 +1,266 @@ +/* + * Copyright (c) 1999 Artur Grabowski + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +int uvm_uio_enable = 1; +int uvm_uio_num_try = 0; +int uvm_uio_num_success = 0; + +/* + * m_ext functions. + */ +void uvm_mbuf_free __P((struct mbuf *)); +void uvm_mbuf_ref __P((struct mbuf *)); + +/* + * returns the length of I/O, 0 on failure. + * + * Should not be called if UVM_UIO_TRY(uio) has been checked first. + */ +size_t +uvm_uio_to_mbuf(uio, mbuf) + struct uio *uio; + struct mbuf *mbuf; +{ + struct vm_map *map; + vaddr_t realbase, base, kva; + vsize_t reallen, len, offset; + struct vm_page **pages; + int npages; + struct iovec *iov; + struct uvm_mbuf *um; + struct mbuf *m; +#ifndef PMAP_NEW + int i; +#endif + + uvm_uio_num_try++; + + if ((mbuf->m_flags & M_EXT)) { + printf("uvm_uio_to_mbuf: fail 1\n"); + return 0; + } + + map = &uio->uio_procp->p_vmspace->vm_map; + iov = uio->uio_iov; + + /* + * XXX - check if iov_len is bigger than max vsize_t + */ + + reallen = (vsize_t)iov->iov_len; + realbase = (vaddr_t)iov->iov_base; + + /* + * Check alignment. + * + * What we really want is to somehow tell the caller how much the + * uios should be adjusted and try again. + */ + if ((realbase & (sizeof(long) - 1)) != 0) { + printf("uvm_uio_to_mbuf: not aligned\n"); + return 0; + } + + base = trunc_page(realbase); + offset = realbase - base; + + /* + * truncate reallen here so that we won't do a huge malloc. + * Subtract offset so that the next round will be page aligned. + */ + if (reallen > UVM_UIO_LIMIT) + reallen = UVM_UIO_LIMIT - offset; + + len = reallen + offset; + len = round_page(len); + npages = atop(len); + + if ((mbuf->m_flags & M_PKTHDR)) { + + MGET(m, M_WAIT, MT_DATA); + mbuf->m_len = 0; + mbuf->m_next = m; + } else { + m = mbuf; + m->m_next = NULL; + } + + MALLOC(um, struct uvm_mbuf *, sizeof(struct uvm_mbuf), M_TEMP, + M_WAITOK); + + /* + * If the pages we have less than UVM_UIO_SMALL_PAGES, we can fit + * them into the pages struct in uvm_uio. + */ + if (npages > UVM_UIO_SMALL_PAGES) + MALLOC(pages, struct vm_page **, + npages * sizeof(struct vm_page *), M_TEMP, M_WAITOK); + else + pages = um->um_pages_small; + + /* + * Loan the pages we want. + */ + if (uvm_loan(map, base, len, (void **)pages, UVM_LOAN_TOPAGE) != + KERN_SUCCESS) { + /* + * XXX - This is really ENOMEM or EFAULT. + */ + printf("uvm_uio_to_mbuf: loan failed\n"); + + goto fail; + } + + /* + * Allocate space to map pages. + */ + kva = vm_map_min(kernel_map); + if (uvm_map(kernel_map, &kva, len, NULL, UVM_UNKNOWN_OFFSET, + UVM_MAPFLAG(UVM_PROT_READ, UVM_PROT_READ, UVM_INH_NONE, + UVM_ADV_SEQUENTIAL, 0)) != KERN_SUCCESS) { + uvm_unloanpage(pages, npages); + goto fail; + } + + /* + * Initialize um. + */ + um->um_pages = pages; + um->um_npages = npages; + um->um_usecount = 1; + um->um_kva = kva; + + printf("mapping: 0x%x -> 0x%x\n", kva, kva + len); + /* + * Map pages. + */ +#ifdef PMAP_NEW + pmap_kenter_pgs(kva, pages, npages); +#else + for (i = 0; i < npages; i++, kva += PAGE_SIZE) + pmap_enter(pmap_kernel(), kva, VM_PAGE_TO_PHYS(pages[i]), + VM_PROT_READ, TRUE, VM_PROT_READ); +#endif + + /* + * Update mbuf. + */ + m->m_flags |= M_EXT | M_RONLY; + m->m_data = (caddr_t)(um->um_kva + offset); + m->m_len = reallen; + m->m_ext.ext_free = uvm_mbuf_free; + m->m_ext.ext_ref = uvm_mbuf_ref; + /* + * We lie about those two to avoid problems with someone trying + * to prepend data. + */ + m->m_ext.ext_buf = (caddr_t)(um->um_kva + offset); + m->m_ext.ext_size = reallen; + m->m_ext.ext_handle = um; + + /* + * Update uio. + */ + if ((iov->iov_len -= reallen) == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + } + uio->uio_resid -= reallen; + + uvm_uio_num_success++; + + return reallen; +fail: + if (npages > UVM_UIO_SMALL_PAGES) + FREE(pages, M_TEMP); + + if (m != mbuf) + m_freem(m); + + FREE(um, M_TEMP); + + return 0; +} + +void +uvm_mbuf_free(mb) + struct mbuf *mb; +{ + struct uvm_mbuf *um = (struct uvm_mbuf *)mb->m_ext.ext_handle; + vsize_t len; + + if (--um->um_usecount) + return; + + len = ptoa(um->um_npages); + + printf("unmapping: 0x%x -> 0x%x\n", um->um_kva, um->um_kva + len); +#ifdef PMAP_NEW + pmap_kremove(um->um_kva, len); +#else + pmap_remove(pmap_kernel(), um->um_kva, um->um_kva + len); +#endif + + uvm_unloanpage(um->um_pages, um->um_npages); + uvm_unmap(kernel_map, um->um_kva, um->um_kva + len); + uvm_km_free_wakeup(kernel_map, um->um_kva, len); + if (um->um_npages > UVM_UIO_SMALL_PAGES) + FREE(um->um_pages, M_TEMP); + + FREE(um, M_TEMP); +#ifdef DIAGNOSTIC + mb->m_data = NULL; + mb->m_ext.ext_handle = NULL; + mb->m_flags &= ~M_EXT; +#endif +} + +void +uvm_mbuf_ref(mb) + struct mbuf *mb; +{ + ((struct uvm_mbuf *)mb->m_ext.ext_handle)->um_usecount++; +} diff --git a/sys/uvm/uvm_uio.h b/sys/uvm/uvm_uio.h new file mode 100644 index 00000000000..4a8ce974664 --- /dev/null +++ b/sys/uvm/uvm_uio.h @@ -0,0 +1,66 @@ +/* + * Copyright (c) 1999 Artur Grabowski + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the author nor the names of his contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include /* for PAGE_SIZE */ + +/* + * If the number of pages we're about to transfer is smaller than this number + * we use the pre-allocated array. + */ +#define UVM_UIO_SMALL_PAGES 8 + +/* + * Limit transfers to this number to avoid running out of memory. + */ +#define UVM_UIO_LIMIT (256 * PAGE_SIZE) + +/* + * m_ext structure. + */ +struct uvm_mbuf { + struct vm_page **um_pages; /* The pages */ + int um_npages; /* number of pages */ + int um_usecount; /* ref cnt */ + vaddr_t um_kva; /* where the pages are mapped */ + struct vm_page *um_pages_small[UVM_UIO_SMALL_PAGES]; +}; + +extern int uvm_uio_enable; + +#define UVM_UIO_MINIO PAGE_SIZE /* XXX - tweak */ +#define UVM_UIO_TRY(uio) (uvm_uio_enable && \ + ((uio)->uio_iov->iov_len >= UVM_UIO_MINIO) && \ + ((uio)->uio_procp != NULL) && \ + ((uio)->uio_rw == UIO_WRITE) && \ + ((uio)->uio_segflg == UIO_USERSPACE)) + +size_t uvm_uio_to_mbuf __P((struct uio *, struct mbuf *)); diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index c2a3417cf4f..38f06bd7173 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -1,4 +1,4 @@ -/* $OpenBSD: vm_map.h,v 1.10 2000/03/13 14:29:04 art Exp $ */ +/* $OpenBSD: vm_map.h,v 1.11 2000/03/16 22:11:05 art Exp $ */ /* $NetBSD: vm_map.h,v 1.11 1995/03/26 20:39:10 jtc Exp $ */ /* @@ -162,12 +162,58 @@ struct vm_map { vm_map_entry_t hint; /* hint for quick lookups */ simple_lock_data_t hint_lock; /* lock for hint storage */ vm_map_entry_t first_free; /* First free space hint */ +#ifdef UVM + int flags; /* flags (read-only) */ +#else boolean_t entries_pageable; /* map entries pageable?? */ +#endif unsigned int timestamp; /* Version number */ #define min_offset header.start #define max_offset header.end }; +#ifdef UVM +/* vm_map flags */ +#define VM_MAP_PAGEABLE 0x01 /* entries are pageable*/ +#define VM_MAP_INTRSAFE 0x02 /* interrupt safe map */ +/* + * Interrupt-safe maps must also be kept on a special list, + * to assist uvm_fault() in avoiding locking problems. + */ +struct vm_map_intrsafe { + struct vm_map vmi_map; + LIST_ENTRY(vm_map_intrsafe) vmi_list; +}; + +LIST_HEAD(vmi_list, vm_map_intrsafe); +#ifdef _KERNEL +extern simple_lock_data_t vmi_list_slock; +extern struct vmi_list vmi_list; + +static __inline int vmi_list_lock __P((void)); +static __inline void vmi_list_unlock __P((int)); + +static __inline int +vmi_list_lock() +{ + int s; + + s = splhigh(); + simple_lock(&vmi_list_slock); + return (s); +} + +static __inline void +vmi_list_unlock(s) + int s; +{ + + simple_unlock(&vmi_list_slock); + splx(s); +} +#endif /* _KERNEL */ +#endif /* UVM */ + #ifndef UVM /* version handled elsewhere in uvm */ /* * Map versions are used to validate a previous lookup attempt. @@ -185,6 +231,96 @@ typedef struct { } vm_map_version_t; #endif /* UVM */ +#ifdef UVM + +/* + * VM map locking operations: + * + * These operations perform locking on the data portion of the + * map. + * + * vm_map_lock_try: try to lock a map, failing if it is already locked. + * + * vm_map_lock: acquire an exclusive (write) lock on a map. + * + * vm_map_lock_read: acquire a shared (read) lock on a map. + * + * vm_map_unlock: release an exclusive lock on a map. + * + * vm_map_unlock_read: release a shared lock on a map. + * + * Note that "intrsafe" maps use only exclusive, spin locks. We simply + * use the sleep lock's interlock for this. + */ + +#ifdef _KERNEL +/* XXX: clean up later */ +#include +#include /* XXX for curproc and p_pid */ + +static __inline boolean_t vm_map_lock_try __P((vm_map_t)); + +static __inline boolean_t +vm_map_lock_try(map) + vm_map_t map; +{ + boolean_t rv; + + if (map->flags & VM_MAP_INTRSAFE) + rv = simple_lock_try(&map->lock.lk_interlock); + else + rv = (lockmgr(&map->lock, LK_EXCLUSIVE|LK_NOWAIT, NULL, curproc) == 0); + + if (rv) + map->timestamp++; + + return (rv); +} + +#ifdef DIAGNOSTIC +#define _vm_map_lock(map) \ +do { \ + if (lockmgr(&(map)->lock, LK_EXCLUSIVE, NULL, curproc) != 0) \ + panic("vm_map_lock: failed to get lock"); \ +} while (0) +#else +#define _vm_map_lock(map) \ + (void) lockmgr(&(map)->lock, LK_EXCLUSIVE, NULL, curproc) +#endif + +#define vm_map_lock(map) \ +do { \ + if ((map)->flags & VM_MAP_INTRSAFE) \ + simple_lock(&(map)->lock.lk_interlock); \ + else \ + _vm_map_lock((map)); \ + (map)->timestamp++; \ +} while (0) + +#ifdef DIAGNOSTIC +#define vm_map_lock_read(map) \ +do { \ + if (map->flags & VM_MAP_INTRSAFE) \ + panic("vm_map_lock_read: intrsafe map"); \ + (void) lockmgr(&(map)->lock, LK_SHARED, NULL, curproc); \ +} while (0) +#else +#define vm_map_lock_read(map) \ + (void) lockmgr(&(map)->lock, LK_SHARED, NULL, curproc) +#endif + +#define vm_map_unlock(map) \ +do { \ + if ((map)->flags & VM_MAP_INTRSAFE) \ + simple_unlock(&(map)->lock.lk_interlock); \ + else \ + (void) lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc);\ +} while (0) + +#define vm_map_unlock_read(map) \ + (void) lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc) +#endif /* _KERNEL */ +#else /* UVM */ /* * Macros: vm_map_lock, etc. * Function: @@ -227,21 +363,7 @@ typedef struct { (map)->lk_flags &= ~LK_CANRECURSE; \ simple_unlock(&(map)->lk_interlock); \ } -#if defined(UVM) && defined(_KERNEL) -/* XXX: clean up later */ -static boolean_t vm_map_lock_try __P((vm_map_t)); - -static __inline boolean_t vm_map_lock_try(map) - -vm_map_t map; - -{ - if (lockmgr(&(map)->lock, LK_EXCLUSIVE|LK_NOWAIT, (void *)0, curproc) != 0) - return(FALSE); - map->timestamp++; - return(TRUE); -} -#endif +#endif /* UVM */ /* * Functions implemented as macros @@ -255,7 +377,11 @@ vm_map_t map; #define MAX_KMAP 20 #endif #ifndef MAX_KMAPENT -#define MAX_KMAPENT 1000 +#if (50 + (2 * NPROC) > 1000) +#define MAX_KMAPENT (50 + (2 * NPROC)) +#else +#define MAX_KMAPENT 1000 /* XXXCDC: no crash */ +#endif #endif #if defined(_KERNEL) && !defined(UVM) -- 2.20.1