From 82673a188a32931f4005a3ede8f05d97542feb17 Mon Sep 17 00:00:00 2001 From: mpi Date: Wed, 1 May 2024 12:54:27 +0000 Subject: [PATCH] Add per-CPU caches to the pmemrange allocator. The caches are used primarily to reduce contention on uvm_lock_fpageq() during concurrent page faults. For the moment only uvm_pagealloc() tries to get a page from the current CPU's cache. So on some architectures the caches are also used by the pmap layer. Each cache is composed of two magazines, design is borrowed from jeff bonwick vmem's paper and the implementation is similar to the one of pool_cache from dlg@. However there is no depot layer and magazines are refilled directly by the pmemrange allocator. This version includes splvm()/splx() dances because the buffer cache flips buffers in interrupt context. So we have to prevent recursive accesses to per-CPU magazines. Tested by naddy@, solene@, krw@, robert@, claudio@ and Laurence Tratt. ok claudio@, kettenis@ --- sys/arch/amd64/include/cpu.h | 5 +- sys/arch/arm64/include/cpu.h | 5 +- sys/arch/i386/include/cpu.h | 5 +- sys/uvm/uvm_page.c | 20 ++--- sys/uvm/uvm_pdaemon.c | 4 +- sys/uvm/uvm_percpu.h | 48 ++++++++++ sys/uvm/uvm_pmemrange.c | 169 ++++++++++++++++++++++++++++++++++- sys/uvm/uvm_pmemrange.h | 6 +- sys/uvm/uvmexp.h | 8 +- usr.bin/systat/uvm.c | 13 ++- usr.bin/vmstat/vmstat.c | 7 +- 11 files changed, 258 insertions(+), 32 deletions(-) diff --git a/sys/arch/amd64/include/cpu.h b/sys/arch/amd64/include/cpu.h index 998fc8edd51..318e87b5988 100644 --- a/sys/arch/amd64/include/cpu.h +++ b/sys/arch/amd64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.167 2024/04/19 10:22:50 mpi Exp $ */ +/* $OpenBSD: cpu.h,v 1.168 2024/05/01 12:54:27 mpi Exp $ */ /* $NetBSD: cpu.h,v 1.1 2003/04/26 18:39:39 fvdl Exp $ */ /*- @@ -53,6 +53,7 @@ #include #include #include +#include #ifdef _KERNEL @@ -210,6 +211,8 @@ struct cpu_info { #ifdef MULTIPROCESSOR struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM]; +#define __HAVE_UVM_PERCPU + struct uvm_pmr_cache ci_uvm; /* [o] page cache */ #endif struct ksensordev ci_sensordev; diff --git a/sys/arch/arm64/include/cpu.h b/sys/arch/arm64/include/cpu.h index 4cc24ee7235..c4073465a7a 100644 --- a/sys/arch/arm64/include/cpu.h +++ b/sys/arch/arm64/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.46 2024/04/29 13:01:54 jsg Exp $ */ +/* $OpenBSD: cpu.h,v 1.47 2024/05/01 12:54:27 mpi Exp $ */ /* * Copyright (c) 2016 Dale Rahn * @@ -104,6 +104,7 @@ extern uint64_t cpu_id_aa64pfr1; #include #include #include +#include struct cpu_info { struct device *ci_dev; /* Device corresponding to this CPU */ @@ -157,6 +158,8 @@ struct cpu_info { #ifdef MULTIPROCESSOR struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM]; +#define __HAVE_UVM_PERCPU + struct uvm_pmr_cache ci_uvm; volatile int ci_flags; volatile int ci_ddb_paused; diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index 5709bf5480c..60f0d044a4d 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.187 2024/04/19 10:22:50 mpi Exp $ */ +/* $OpenBSD: cpu.h,v 1.188 2024/05/01 12:54:27 mpi Exp $ */ /* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */ /*- @@ -69,6 +69,7 @@ #include #include #include +#include struct intrsource; @@ -99,6 +100,8 @@ struct cpu_info { #if defined(MULTIPROCESSOR) struct srp_hazard ci_srp_hazards[SRP_HAZARD_NUM]; +#define __HAVE_UVM_PERCPU + struct uvm_pmr_cache ci_uvm; #endif /* diff --git a/sys/uvm/uvm_page.c b/sys/uvm/uvm_page.c index c85ddde0142..470aa1bdd2e 100644 --- a/sys/uvm/uvm_page.c +++ b/sys/uvm/uvm_page.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_page.c,v 1.176 2024/04/19 10:22:51 mpi Exp $ */ +/* $OpenBSD: uvm_page.c,v 1.177 2024/05/01 12:54:27 mpi Exp $ */ /* $NetBSD: uvm_page.c,v 1.44 2000/11/27 08:40:04 chs Exp $ */ /* @@ -877,13 +877,11 @@ uvm_pagerealloc_multi(struct uvm_object *obj, voff_t off, vsize_t size, * => only one of obj or anon can be non-null * => caller must activate/deactivate page if it is not wired. */ - struct vm_page * uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon, int flags) { - struct vm_page *pg; - struct pglist pgl; + struct vm_page *pg = NULL; int pmr_flags; KASSERT(obj == NULL || anon == NULL); @@ -906,13 +904,10 @@ uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon, if (flags & UVM_PGA_ZERO) pmr_flags |= UVM_PLA_ZERO; - TAILQ_INIT(&pgl); - if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, pmr_flags, &pgl) != 0) - goto fail; - - pg = TAILQ_FIRST(&pgl); - KASSERT(pg != NULL && TAILQ_NEXT(pg, pageq) == NULL); + pg = uvm_pmr_cache_get(pmr_flags); + if (pg == NULL) + return NULL; uvm_pagealloc_pg(pg, obj, off, anon); KASSERT((pg->pg_flags & PG_DEV) == 0); if (flags & UVM_PGA_ZERO) @@ -921,9 +916,6 @@ uvm_pagealloc(struct uvm_object *obj, voff_t off, struct vm_anon *anon, atomic_setbits_int(&pg->pg_flags, PG_CLEAN); return pg; - -fail: - return NULL; } /* @@ -1025,7 +1017,7 @@ void uvm_pagefree(struct vm_page *pg) { uvm_pageclean(pg); - uvm_pmr_freepages(pg, 1); + uvm_pmr_cache_put(pg); } /* diff --git a/sys/uvm/uvm_pdaemon.c b/sys/uvm/uvm_pdaemon.c index 3688034fa1c..700d4206ed2 100644 --- a/sys/uvm/uvm_pdaemon.c +++ b/sys/uvm/uvm_pdaemon.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pdaemon.c,v 1.113 2024/04/19 10:22:51 mpi Exp $ */ +/* $OpenBSD: uvm_pdaemon.c,v 1.114 2024/05/01 12:54:27 mpi Exp $ */ /* $NetBSD: uvm_pdaemon.c,v 1.23 2000/08/20 10:24:14 bjh21 Exp $ */ /* @@ -262,6 +262,8 @@ uvm_pageout(void *arg) #if NDRM > 0 drmbackoff(size * 2); #endif + uvm_pmr_cache_drain(); + /* * scan if needed */ diff --git a/sys/uvm/uvm_percpu.h b/sys/uvm/uvm_percpu.h index e69de29bb2d..ceb110a8bed 100644 --- a/sys/uvm/uvm_percpu.h +++ b/sys/uvm/uvm_percpu.h @@ -0,0 +1,48 @@ +/* $OpenBSD: uvm_percpu.h,v 1.3 2024/05/01 12:54:27 mpi Exp $ */ + +/* + * Copyright (c) 2024 Martin Pieuchot + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#ifndef _UVM_UVM_PCPU_H_ +#define _UVM_UVM_PCPU_H_ + +struct vm_page; + +/* + * The number of pages per magazine should be large enough to get rid of the + * contention in the pmemrange allocator during concurrent page faults and + * small enough to limit fragmentation. + */ +#define UVM_PMR_CACHEMAGSZ 8 + +/* + * Magazine + */ +struct uvm_pmr_cache_item { + struct vm_page *upci_pages[UVM_PMR_CACHEMAGSZ]; + int upci_npages; /* # of pages in magazine */ +}; + +/* + * Per-CPU cache of physical pages. + */ +struct uvm_pmr_cache { + struct uvm_pmr_cache_item upc_magz[2]; /* magazines */ + int upc_actv; /* index of active magazine */ + +}; + +#endif /* _UVM_UVM_PCPU_H_ */ diff --git a/sys/uvm/uvm_pmemrange.c b/sys/uvm/uvm_pmemrange.c index b88f80c3be7..04abd46fe77 100644 --- a/sys/uvm/uvm_pmemrange.c +++ b/sys/uvm/uvm_pmemrange.c @@ -1,6 +1,7 @@ -/* $OpenBSD: uvm_pmemrange.c,v 1.65 2024/04/19 10:22:51 mpi Exp $ */ +/* $OpenBSD: uvm_pmemrange.c,v 1.66 2024/05/01 12:54:27 mpi Exp $ */ /* + * Copyright (c) 2024 Martin Pieuchot * Copyright (c) 2009, 2010 Ariane van der Steldt * * Permission to use, copy, modify, and distribute this software for any @@ -1261,6 +1262,28 @@ out: return 0; } +/* + * Acquire a single page. + * + * flags: UVM_PLA_* flags + * result: returned page. + */ +struct vm_page * +uvm_pmr_getone(int flags) +{ + struct vm_page *pg; + struct pglist pgl; + + TAILQ_INIT(&pgl); + if (uvm_pmr_getpages(1, 0, 0, 1, 0, 1, flags, &pgl) != 0) + return NULL; + + pg = TAILQ_FIRST(&pgl); + KASSERT(pg != NULL && TAILQ_NEXT(pg, pageq) == NULL); + + return pg; +} + /* * Free a number of contig pages (invoked by uvm_page_init). */ @@ -2190,3 +2213,147 @@ uvm_pagezero_thread(void *arg) yield(); } } + +#if defined(MULTIPROCESSOR) && defined(__HAVE_UVM_PERCPU) +int +uvm_pmr_cache_alloc(struct uvm_pmr_cache_item *upci) +{ + struct vm_page *pg; + struct pglist pgl; + int flags = UVM_PLA_NOWAIT|UVM_PLA_NOWAKE; + int npages = UVM_PMR_CACHEMAGSZ; + + splassert(IPL_VM); + KASSERT(upci->upci_npages == 0); + + TAILQ_INIT(&pgl); + if (uvm_pmr_getpages(npages, 0, 0, 1, 0, npages, flags, &pgl)) + return -1; + + while ((pg = TAILQ_FIRST(&pgl)) != NULL) { + TAILQ_REMOVE(&pgl, pg, pageq); + upci->upci_pages[upci->upci_npages] = pg; + upci->upci_npages++; + } + atomic_add_int(&uvmexp.percpucaches, npages); + + return 0; +} + +struct vm_page * +uvm_pmr_cache_get(int flags) +{ + struct uvm_pmr_cache *upc = &curcpu()->ci_uvm; + struct uvm_pmr_cache_item *upci; + struct vm_page *pg; + int s; + + s = splvm(); + upci = &upc->upc_magz[upc->upc_actv]; + if (upci->upci_npages == 0) { + unsigned int prev; + + prev = (upc->upc_actv == 0) ? 1 : 0; + upci = &upc->upc_magz[prev]; + if (upci->upci_npages == 0) { + atomic_inc_int(&uvmexp.pcpmiss); + if (uvm_pmr_cache_alloc(upci)) { + splx(s); + return uvm_pmr_getone(flags); + } + } + /* Swap magazines */ + upc->upc_actv = prev; + } else { + atomic_inc_int(&uvmexp.pcphit); + } + + atomic_dec_int(&uvmexp.percpucaches); + upci->upci_npages--; + pg = upci->upci_pages[upci->upci_npages]; + splx(s); + + if (flags & UVM_PLA_ZERO) + uvm_pagezero(pg); + + return pg; +} + +void +uvm_pmr_cache_free(struct uvm_pmr_cache_item *upci) +{ + struct pglist pgl; + unsigned int i; + + splassert(IPL_VM); + + TAILQ_INIT(&pgl); + for (i = 0; i < upci->upci_npages; i++) + TAILQ_INSERT_TAIL(&pgl, upci->upci_pages[i], pageq); + + uvm_pmr_freepageq(&pgl); + + atomic_sub_int(&uvmexp.percpucaches, upci->upci_npages); + upci->upci_npages = 0; + memset(upci->upci_pages, 0, sizeof(upci->upci_pages)); +} + +void +uvm_pmr_cache_put(struct vm_page *pg) +{ + struct uvm_pmr_cache *upc = &curcpu()->ci_uvm; + struct uvm_pmr_cache_item *upci; + int s; + + s = splvm(); + upci = &upc->upc_magz[upc->upc_actv]; + if (upci->upci_npages >= UVM_PMR_CACHEMAGSZ) { + unsigned int prev; + + prev = (upc->upc_actv == 0) ? 1 : 0; + upci = &upc->upc_magz[prev]; + if (upci->upci_npages > 0) + uvm_pmr_cache_free(upci); + + /* Swap magazines */ + upc->upc_actv = prev; + KASSERT(upci->upci_npages == 0); + } + + upci->upci_pages[upci->upci_npages] = pg; + upci->upci_npages++; + atomic_inc_int(&uvmexp.percpucaches); + splx(s); +} + +void +uvm_pmr_cache_drain(void) +{ + struct uvm_pmr_cache *upc = &curcpu()->ci_uvm; + int s; + + s = splvm(); + uvm_pmr_cache_free(&upc->upc_magz[0]); + uvm_pmr_cache_free(&upc->upc_magz[1]); + splx(s); +} + +#else /* !(MULTIPROCESSOR && __HAVE_UVM_PERCPU) */ + +struct vm_page * +uvm_pmr_cache_get(int flags) +{ + return uvm_pmr_getone(flags); +} + +void +uvm_pmr_cache_put(struct vm_page *pg) +{ + uvm_pmr_freepages(pg, 1); +} + +void +uvm_pmr_cache_drain(void) +{ +} +#endif diff --git a/sys/uvm/uvm_pmemrange.h b/sys/uvm/uvm_pmemrange.h index e70a9642393..45912df968e 100644 --- a/sys/uvm/uvm_pmemrange.h +++ b/sys/uvm/uvm_pmemrange.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm_pmemrange.h,v 1.16 2024/04/19 10:22:51 mpi Exp $ */ +/* $OpenBSD: uvm_pmemrange.h,v 1.17 2024/05/01 12:54:27 mpi Exp $ */ /* * Copyright (c) 2009 Ariane van der Steldt @@ -147,5 +147,9 @@ void uvm_pmr_remove(struct uvm_pmemrange *, struct vm_page *uvm_pmr_extract_range(struct uvm_pmemrange *, struct vm_page *, paddr_t, paddr_t, struct pglist *); +struct vm_page *uvm_pmr_cache_get(int); +void uvm_pmr_cache_put(struct vm_page *); +void uvm_pmr_cache_drain(void); + #endif /* _UVM_UVM_PMEMRANGE_H_ */ diff --git a/sys/uvm/uvmexp.h b/sys/uvm/uvmexp.h index d34aea71bf2..ccd87b30298 100644 --- a/sys/uvm/uvmexp.h +++ b/sys/uvm/uvmexp.h @@ -1,4 +1,4 @@ -/* $OpenBSD: uvmexp.h,v 1.14 2024/04/19 10:22:51 mpi Exp $ */ +/* $OpenBSD: uvmexp.h,v 1.15 2024/05/01 12:54:27 mpi Exp $ */ #ifndef _UVM_UVMEXP_ #define _UVM_UVMEXP_ @@ -66,7 +66,7 @@ struct uvmexp { int zeropages; /* [F] number of zero'd pages */ int reserve_pagedaemon; /* [I] # of pages reserved for pagedaemon */ int reserve_kernel; /* [I] # of pages reserved for kernel */ - int unused01; /* formerly anonpages */ + int percpucaches; /* [a] # of pages in per-CPU caches */ int vnodepages; /* XXX # of pages used by vnode page cache */ int vtextpages; /* XXX # of pages used by vtext vnodes */ @@ -101,8 +101,8 @@ struct uvmexp { int syscalls; /* system calls */ int pageins; /* [p] pagein operation count */ /* pageouts are in pdpageouts below */ - int unused07; /* formerly obsolete_swapins */ - int unused08; /* formerly obsolete_swapouts */ + int pcphit; /* [a] # of pagealloc from per-CPU cache */ + int pcpmiss; /* [a] # of times a per-CPU cache was empty */ int pgswapin; /* pages swapped in */ int pgswapout; /* pages swapped out */ int forks; /* forks */ diff --git a/usr.bin/systat/uvm.c b/usr.bin/systat/uvm.c index 811f7630067..fbf8d6e2adb 100644 --- a/usr.bin/systat/uvm.c +++ b/usr.bin/systat/uvm.c @@ -1,4 +1,4 @@ -/* $OpenBSD: uvm.c,v 1.8 2024/04/19 10:22:51 mpi Exp $ */ +/* $OpenBSD: uvm.c,v 1.9 2024/05/01 12:54:27 mpi Exp $ */ /* * Copyright (c) 2008 Can Erkin Acar * Copyright (c) 2018 Kenneth R Westerback @@ -80,11 +80,10 @@ struct uvmline uvmline[] = { { &uvmexp.zeropages, &last_uvmexp.zeropages, "zeropages", &uvmexp.pageins, &last_uvmexp.pageins, "pageins", &uvmexp.fltrelckok, &last_uvmexp.fltrelckok, "fltrelckok" }, - { &uvmexp.reserve_pagedaemon, &last_uvmexp.reserve_pagedaemon, - "reserve_pagedaemon", + { &uvmexp.percpucaches, &last_uvmexp.percpucaches, "percpucaches", &uvmexp.pgswapin, &last_uvmexp.pgswapin, "pgswapin", &uvmexp.fltanget, &last_uvmexp.fltanget, "fltanget" }, - { &uvmexp.reserve_kernel, &last_uvmexp.reserve_kernel, "reserve_kernel", + { NULL, NULL, NULL, &uvmexp.pgswapout, &last_uvmexp.pgswapout, "pgswapout", &uvmexp.fltanretry, &last_uvmexp.fltanretry, "fltanretry" }, { NULL, NULL, NULL, @@ -143,13 +142,13 @@ struct uvmline uvmline[] = { NULL, NULL, NULL }, { &uvmexp.pagesize, &last_uvmexp.pagesize, "pagesize", &uvmexp.pdpending, &last_uvmexp.pdpending, "pdpending", - NULL, NULL, NULL }, + NULL, NULL, "Per-CPU Counters" }, { &uvmexp.pagemask, &last_uvmexp.pagemask, "pagemask", &uvmexp.pddeact, &last_uvmexp.pddeact, "pddeact", - NULL, NULL, NULL }, + &uvmexp.pcphit, &last_uvmexp.pcphit, "pcphit" }, { &uvmexp.pageshift, &last_uvmexp.pageshift, "pageshift", NULL, NULL, NULL, - NULL, NULL, NULL } + &uvmexp.pcpmiss, &last_uvmexp.pcpmiss, "pcpmiss" } }; field_def fields_uvm[] = { diff --git a/usr.bin/vmstat/vmstat.c b/usr.bin/vmstat/vmstat.c index dcc999bd05e..00bd52febb7 100644 --- a/usr.bin/vmstat/vmstat.c +++ b/usr.bin/vmstat/vmstat.c @@ -1,5 +1,5 @@ /* $NetBSD: vmstat.c,v 1.29.4.1 1996/06/05 00:21:05 cgd Exp $ */ -/* $OpenBSD: vmstat.c,v 1.157 2024/04/19 10:22:51 mpi Exp $ */ +/* $OpenBSD: vmstat.c,v 1.158 2024/05/01 12:54:27 mpi Exp $ */ /* * Copyright (c) 1980, 1986, 1991, 1993 @@ -513,7 +513,12 @@ dosum(void) uvmexp.reserve_pagedaemon); (void)printf("%11u pages reserved for kernel\n", uvmexp.reserve_kernel); + (void)printf("%11u pages in per-cpu caches\n", + uvmexp.percpucaches); + /* per-cpu cache */ + (void)printf("%11u per-cpu cache hits\n", uvmexp.pcphit); + (void)printf("%11u per-cpu cache misses\n", uvmexp.pcpmiss); /* swap */ (void)printf("%11u swap pages\n", uvmexp.swpages); (void)printf("%11u swap pages in use\n", uvmexp.swpginuse); -- 2.20.1