From 7282a73c3cf7a0214a746f5aa15b1a50e734fd90 Mon Sep 17 00:00:00 2001 From: dlg Date: Mon, 10 Jul 2017 23:49:10 +0000 Subject: [PATCH] make malloc(9) mpsafe by using a mutex instead of splvm. this is almost a straightforward change of spl ops with mutex ops, except the accounting has been shuffled around. memory is counted as used before an attempt to allocate it from uvm is made to prevent overcommitting memory. this is modelled on how pools limit allocations. the uvm bits have been eyeballed by kettenis@ who says they should be safe. visa@ found some nits which have been fixed. tested by chris@ and amit kulkarni ok kettenis@ visa@ mpi@ --- sys/kern/kern_malloc.c | 79 +++++++++++++++++++++--------------- sys/kern/kern_malloc_debug.c | 73 ++++++++++++++++++++------------- 2 files changed, 92 insertions(+), 60 deletions(-) diff --git a/sys/kern/kern_malloc.c b/sys/kern/kern_malloc.c index dff635d9ce6..60b29ad437b 100644 --- a/sys/kern/kern_malloc.c +++ b/sys/kern/kern_malloc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_malloc.c,v 1.129 2017/06/07 13:30:36 mpi Exp $ */ +/* $OpenBSD: kern_malloc.c,v 1.130 2017/07/10 23:49:10 dlg Exp $ */ /* $NetBSD: kern_malloc.c,v 1.15.4.2 1996/06/13 17:10:56 cgd Exp $ */ /* @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -94,6 +95,7 @@ u_int nkmempages_min = 0; #endif u_int nkmempages_max = 0; +struct mutex malloc_mtx = MUTEX_INITIALIZER(IPL_VM); struct kmembuckets bucket[MINBUCKET + 16]; #ifdef KMEMSTATS struct kmemstats kmemstats[M_LAST]; @@ -151,36 +153,30 @@ malloc(size_t size, int type, int flags) struct kmemusage *kup; struct kmem_freelist *freep; long indx, npg, allocsize; - int s; caddr_t va, cp; + int s; #ifdef DIAGNOSTIC int freshalloc; char *savedtype; #endif #ifdef KMEMSTATS struct kmemstats *ksp = &kmemstats[type]; + int wake; if (((unsigned long)type) <= 1 || ((unsigned long)type) >= M_LAST) panic("malloc: bogus type %d", type); #endif - if (!cold) - KERNEL_ASSERT_LOCKED(); - KASSERT(flags & (M_WAITOK | M_NOWAIT)); +#ifdef DIAGNOSTIC if ((flags & M_NOWAIT) == 0) { extern int pool_debug; -#ifdef DIAGNOSTIC assertwaitok(); if (pool_debug == 2) yield(); -#endif - if (!cold && pool_debug) { - KERNEL_UNLOCK(); - KERNEL_LOCK(); - } } +#endif #ifdef MALLOC_DEBUG if (debug_malloc(size, type, flags, (void **)&va)) { @@ -193,6 +189,7 @@ malloc(size_t size, int type, int flags) if (size > 65535 * PAGE_SIZE) { if (flags & M_CANFAIL) { #ifndef SMALL_KERNEL + /* XXX lock */ if (ratecheck(&malloc_lasterr, &malloc_errintvl)) printf("malloc(): allocation too large, " "type = %d, size = %lu\n", type, size); @@ -204,32 +201,36 @@ malloc(size_t size, int type, int flags) } indx = BUCKETINDX(size); + if (size > MAXALLOCSAVE) + allocsize = round_page(size); + else + allocsize = 1 << indx; kbp = &bucket[indx]; - s = splvm(); + mtx_enter(&malloc_mtx); #ifdef KMEMSTATS while (ksp->ks_memuse >= ksp->ks_limit) { if (flags & M_NOWAIT) { - splx(s); + mtx_leave(&malloc_mtx); return (NULL); } if (ksp->ks_limblocks < 65535) ksp->ks_limblocks++; - tsleep(ksp, PSWP+2, memname[type], 0); + msleep(ksp, &malloc_mtx, PSWP+2, memname[type], 0); } + ksp->ks_memuse += allocsize; /* account for this early */ ksp->ks_size |= 1 << indx; #endif - if (size > MAXALLOCSAVE) - allocsize = round_page(size); - else - allocsize = 1 << indx; if (XSIMPLEQ_FIRST(&kbp->kb_freelist) == NULL) { + mtx_leave(&malloc_mtx); npg = atop(round_page(allocsize)); + s = splvm(); va = (caddr_t)uvm_km_kmemalloc_pla(kmem_map, NULL, (vsize_t)ptoa(npg), 0, ((flags & M_NOWAIT) ? UVM_KMF_NOWAIT : 0) | ((flags & M_CANFAIL) ? UVM_KMF_CANFAIL : 0), no_constraint.ucr_low, no_constraint.ucr_high, 0, 0, 0); + splx(s); if (va == NULL) { /* * Kmem_malloc() can return NULL, even if it can @@ -241,9 +242,20 @@ malloc(size_t size, int type, int flags) */ if ((flags & (M_NOWAIT|M_CANFAIL)) == 0) panic("malloc: out of space in kmem_map"); - splx(s); + +#ifdef KMEMSTATS + mtx_enter(&malloc_mtx); + ksp->ks_memuse -= allocsize; + wake = ksp->ks_memuse + allocsize >= ksp->ks_limit && + ksp->ks_memuse < ksp->ks_limit; + mtx_leave(&malloc_mtx); + if (wake) + wakeup(ksp); +#endif + return (NULL); } + mtx_enter(&malloc_mtx); #ifdef KMEMSTATS kbp->kb_total += kbp->kb_elmpercl; #endif @@ -254,9 +266,6 @@ malloc(size_t size, int type, int flags) #endif if (allocsize > MAXALLOCSAVE) { kup->ku_pagecnt = npg; -#ifdef KMEMSTATS - ksp->ks_memuse += allocsize; -#endif goto out; } #ifdef KMEMSTATS @@ -334,7 +343,6 @@ malloc(size_t size, int type, int flags) panic("malloc: lost data"); kup->ku_freecnt--; kbp->kb_totalfree--; - ksp->ks_memuse += 1 << indx; out: kbp->kb_calls++; ksp->ks_inuse++; @@ -344,7 +352,7 @@ out: #else out: #endif - splx(s); + mtx_leave(&malloc_mtx); if ((flags & M_ZERO) && va != NULL) memset(va, 0, size); @@ -369,9 +377,6 @@ free(void *addr, int type, size_t freedsize) struct kmemstats *ksp = &kmemstats[type]; #endif - if (!cold) - KERNEL_ASSERT_LOCKED(); - if (addr == NULL) return; @@ -391,7 +396,6 @@ free(void *addr, int type, size_t freedsize) kbp = &bucket[kup->ku_indx]; if (size > MAXALLOCSAVE) size = kup->ku_pagecnt << PAGE_SHIFT; - s = splvm(); #ifdef DIAGNOSTIC if (freedsize != 0 && freedsize > size) panic("free: size too large %zu > %ld (%p) type %s", @@ -412,8 +416,11 @@ free(void *addr, int type, size_t freedsize) addr, size, memname[type], alloc); #endif /* DIAGNOSTIC */ if (size > MAXALLOCSAVE) { + s = splvm(); uvm_km_free(kmem_map, (vaddr_t)addr, ptoa(kup->ku_pagecnt)); + splx(s); #ifdef KMEMSTATS + mtx_enter(&malloc_mtx); ksp->ks_memuse -= size; kup->ku_indx = 0; kup->ku_pagecnt = 0; @@ -422,11 +429,12 @@ free(void *addr, int type, size_t freedsize) wakeup(ksp); ksp->ks_inuse--; kbp->kb_total -= 1; + mtx_leave(&malloc_mtx); #endif - splx(s); return; } freep = (struct kmem_freelist *)addr; + mtx_enter(&malloc_mtx); #ifdef DIAGNOSTIC /* * Check for multiple frees. Use a quick check to see if @@ -468,7 +476,7 @@ free(void *addr, int type, size_t freedsize) ksp->ks_inuse--; #endif XSIMPLEQ_INSERT_TAIL(&kbp->kb_freelist, freep, kf_flist); - splx(s); + mtx_leave(&malloc_mtx); } /* @@ -578,6 +586,9 @@ sysctl_malloc(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, size_t newlen, struct proc *p) { struct kmembuckets kb; +#ifdef KMEMSTATS + struct kmemstats km; +#endif #if defined(KMEMSTATS) || defined(DIAGNOSTIC) || defined(FFS_SOFTUPDATES) int error; #endif @@ -606,15 +617,19 @@ sysctl_malloc(int *name, u_int namelen, void *oldp, size_t *oldlenp, void *newp, return (sysctl_rdstring(oldp, oldlenp, newp, buckstring)); case KERN_MALLOC_BUCKET: + mtx_enter(&malloc_mtx); memcpy(&kb, &bucket[BUCKETINDX(name[1])], sizeof(kb)); + mtx_leave(&malloc_mtx); memset(&kb.kb_freelist, 0, sizeof(kb.kb_freelist)); return (sysctl_rdstruct(oldp, oldlenp, newp, &kb, sizeof(kb))); case KERN_MALLOC_KMEMSTATS: #ifdef KMEMSTATS if ((name[1] < 0) || (name[1] >= M_LAST)) return (EINVAL); - return (sysctl_rdstruct(oldp, oldlenp, newp, - &kmemstats[name[1]], sizeof(struct kmemstats))); + mtx_enter(&malloc_mtx); + memcpy(&km, &kmemstats[name[1]], sizeof(km)); + mtx_leave(&malloc_mtx); + return (sysctl_rdstruct(oldp, oldlenp, newp, &km, sizeof(km))); #else return (EOPNOTSUPP); #endif diff --git a/sys/kern/kern_malloc_debug.c b/sys/kern/kern_malloc_debug.c index 701aee09bae..5ccf09786ff 100644 --- a/sys/kern/kern_malloc_debug.c +++ b/sys/kern/kern_malloc_debug.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kern_malloc_debug.c,v 1.34 2014/11/16 12:31:00 deraadt Exp $ */ +/* $OpenBSD: kern_malloc_debug.c,v 1.35 2017/07/10 23:49:10 dlg Exp $ */ /* * Copyright (c) 1999, 2000 Artur Grabowski @@ -56,8 +56,10 @@ #include #include #include +#include #include +#include /* * debug_malloc_type and debug_malloc_size define the type and size of @@ -104,12 +106,13 @@ int debug_malloc_chunks_on_freelist; int debug_malloc_initialized; struct pool debug_malloc_pool; +struct mutex debug_malloc_mtx = MUTEX_INITIALIZER(IPL_VM); int debug_malloc(unsigned long size, int type, int flags, void **addr) { struct debug_malloc_entry *md = NULL; - int s, wait = (flags & M_NOWAIT) == 0; + int wait = (flags & M_NOWAIT) == 0; /* Careful not to compare unsigned long to int -1 */ if (((type != debug_malloc_type && debug_malloc_type != 0) || @@ -123,13 +126,14 @@ debug_malloc(unsigned long size, int type, int flags, void **addr) if (size > PAGE_SIZE) return (0); - s = splvm(); if (debug_malloc_chunks_on_freelist < MALLOC_DEBUG_CHUNKS) debug_malloc_allocate_free(wait); + mtx_enter(&debug_malloc_mtx); + md = TAILQ_FIRST(&debug_malloc_freelist); if (md == NULL) { - splx(s); + mtx_leave(&debug_malloc_mtx); return (0); } TAILQ_REMOVE(&debug_malloc_freelist, md, md_list); @@ -137,14 +141,15 @@ debug_malloc(unsigned long size, int type, int flags, void **addr) TAILQ_INSERT_HEAD(&debug_malloc_usedlist, md, md_list); debug_malloc_allocs++; - splx(s); - - pmap_kenter_pa(md->md_va, md->md_pa, PROT_READ | PROT_WRITE); - pmap_update(pmap_kernel()); md->md_size = size; md->md_type = type; + mtx_leave(&debug_malloc_mtx); + + pmap_kenter_pa(md->md_va, md->md_pa, PROT_READ | PROT_WRITE); + pmap_update(pmap_kernel()); + /* * Align the returned addr so that it ends where the first page * ends. roundup to get decent alignment. @@ -158,7 +163,6 @@ debug_free(void *addr, int type) { struct debug_malloc_entry *md; vaddr_t va; - int s; if (type != debug_malloc_type && debug_malloc_type != 0 && type != M_DEBUG) @@ -169,7 +173,7 @@ debug_free(void *addr, int type) */ va = trunc_page((vaddr_t)addr); - s = splvm(); + mtx_enter(&debug_malloc_mtx); TAILQ_FOREACH(md, &debug_malloc_usedlist, md_list) if (md->md_va == va) break; @@ -185,21 +189,24 @@ debug_free(void *addr, int type) TAILQ_FOREACH(md, &debug_malloc_freelist, md_list) if (md->md_va == va) panic("debug_free: already free"); - splx(s); + mtx_leave(&debug_malloc_mtx); return (0); } debug_malloc_frees++; TAILQ_REMOVE(&debug_malloc_usedlist, md, md_list); + mtx_leave(&debug_malloc_mtx); - TAILQ_INSERT_TAIL(&debug_malloc_freelist, md, md_list); - debug_malloc_chunks_on_freelist++; /* * unmap the page. */ pmap_kremove(md->md_va, PAGE_SIZE); pmap_update(pmap_kernel()); - splx(s); + + mtx_enter(&debug_malloc_mtx); + TAILQ_INSERT_TAIL(&debug_malloc_freelist, md, md_list); + debug_malloc_chunks_on_freelist++; + mtx_leave(&debug_malloc_mtx); return (1); } @@ -217,7 +224,7 @@ debug_malloc_init(void) debug_malloc_chunks_on_freelist = 0; pool_init(&debug_malloc_pool, sizeof(struct debug_malloc_entry), - 0, 0, 0, "mdbepl", NULL); + 0, 0, IPL_VM, "mdbepl", NULL); debug_malloc_initialized = 1; } @@ -233,19 +240,18 @@ debug_malloc_allocate_free(int wait) vaddr_t va, offset; struct vm_page *pg; struct debug_malloc_entry *md; - - splassert(IPL_VM); + int s; md = pool_get(&debug_malloc_pool, wait ? PR_WAITOK : PR_NOWAIT); if (md == NULL) return; + s = splvm(); + va = uvm_km_kmemalloc(kmem_map, NULL, PAGE_SIZE * 2, UVM_KMF_VALLOC | (wait ? 0: UVM_KMF_NOWAIT)); - if (va == 0) { - pool_put(&debug_malloc_pool, md); - return; - } + if (va == 0) + goto put; offset = va - vm_map_min(kernel_map); for (;;) { @@ -258,20 +264,31 @@ debug_malloc_allocate_free(int wait) if (pg) break; - if (wait == 0) { - uvm_unmap(kmem_map, va, va + PAGE_SIZE * 2); - pool_put(&debug_malloc_pool, md); - return; - } + if (wait == 0) + goto unmap; + uvm_wait("debug_malloc"); } + splx(s); + md->md_va = va; md->md_pa = VM_PAGE_TO_PHYS(pg); + mtx_enter(&debug_malloc_mtx); debug_malloc_pages++; TAILQ_INSERT_HEAD(&debug_malloc_freelist, md, md_list); debug_malloc_chunks_on_freelist++; + mtx_leave(&debug_malloc_mtx); + + return; + +unmap: + uvm_unmap(kmem_map, va, va + PAGE_SIZE * 2); +put: + splx(s); + pool_put(&debug_malloc_pool, md); + return; } void @@ -311,7 +328,7 @@ debug_malloc_printit( if (addr >= md->md_va && addr < md->md_va + 2 * PAGE_SIZE) { (*pr)("Memory at address 0x%lx is in a freed " - "area. type %d, size: %d\n ", + "area. type %d, size: %zu\n ", addr, md->md_type, md->md_size); return; } @@ -320,7 +337,7 @@ debug_malloc_printit( if (addr >= md->md_va + PAGE_SIZE && addr < md->md_va + 2 * PAGE_SIZE) { (*pr)("Memory at address 0x%lx is just outside " - "an allocated area. type %d, size: %d\n", + "an allocated area. type %d, size: %zu\n", addr, md->md_type, md->md_size); return; } -- 2.20.1