From 6646b5557aa4719d0ec5465a2164c8bdd2e0c1fb Mon Sep 17 00:00:00 2001 From: mlarkin Date: Sun, 12 Apr 2015 18:37:53 +0000 Subject: [PATCH] Bring PAE code back to life, in a different form. This diff (via bluhm then to deraadt, then myself) brings the PAE pmap on i386 (not touched in any significant way for years) closer to the current non-PAE pmap and allows us to take a big next step toward better i386 W^X in the kernel (similar to what we did a few months ago on amd64). Unlike the original PAE pmap, this diff will not be supporting > 4GB physical memory on i386 - this effort is specifically geared toward providing W^X (via NX) only. There still seems to be a bug removing certain pmap entries when PAE is enabled, so I'm leaving PAE mode disabled for the moment until we can figure out what is going on, but with this diff in the tree hopefully others can help. The pmap functions now operate through function pointers, due to the need to support both non-PAE and PAE forms. My unscientific testing showed less than 0.3% (a third of a percent) slowdown with this approach during a base build. Discussed for months with guenther, kettenis, and deraadt. ok kettenis@, deraadt@ --- sys/arch/i386/conf/files.i386 | 3 +- sys/arch/i386/i386/autoconf.c | 33 +- sys/arch/i386/i386/db_memrw.c | 31 +- sys/arch/i386/i386/genassym.cf | 3 +- sys/arch/i386/i386/hibernate_machdep.c | 9 +- sys/arch/i386/i386/kgdb_machdep.c | 6 +- sys/arch/i386/i386/kvm86.c | 4 +- sys/arch/i386/i386/lapic.c | 6 +- sys/arch/i386/i386/locore.s | 59 +- sys/arch/i386/i386/machdep.c | 28 +- sys/arch/i386/i386/mptramp.s | 12 +- sys/arch/i386/i386/pmap.c | 649 +++++++++------ sys/arch/i386/i386/pmapae.c | 1011 ++++++++++-------------- sys/arch/i386/include/biosvar.h | 5 +- sys/arch/i386/include/cpu.h | 3 +- sys/arch/i386/include/pmap.h | 284 ++++--- sys/arch/i386/include/pte.h | 25 +- sys/arch/i386/pci/piixpcib.c | 7 +- 18 files changed, 1124 insertions(+), 1054 deletions(-) diff --git a/sys/arch/i386/conf/files.i386 b/sys/arch/i386/conf/files.i386 index 1962f5ed836..b4679e6bea7 100644 --- a/sys/arch/i386/conf/files.i386 +++ b/sys/arch/i386/conf/files.i386 @@ -1,4 +1,4 @@ -# $OpenBSD: files.i386,v 1.220 2014/12/10 05:42:25 jsg Exp $ +# $OpenBSD: files.i386,v 1.221 2015/04/12 18:37:53 mlarkin Exp $ # # new style config file for i386 architecture # @@ -32,6 +32,7 @@ file arch/i386/i386/k6_mem.c mtrr file arch/i386/i386/mtrr.c mtrr file arch/i386/i386/p4tcc.c !small_kernel file arch/i386/i386/pmap.c +file arch/i386/i386/pmapae.c file arch/i386/i386/powernow.c !small_kernel file arch/i386/i386/powernow-k7.c !small_kernel file arch/i386/i386/powernow-k8.c !small_kernel diff --git a/sys/arch/i386/i386/autoconf.c b/sys/arch/i386/i386/autoconf.c index bc0e2c9de80..a2f2b2958c3 100644 --- a/sys/arch/i386/i386/autoconf.c +++ b/sys/arch/i386/i386/autoconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: autoconf.c,v 1.92 2013/11/19 09:00:43 mpi Exp $ */ +/* $OpenBSD: autoconf.c,v 1.93 2015/04/12 18:37:53 mlarkin Exp $ */ /* $NetBSD: autoconf.c,v 1.20 1996/05/03 19:41:56 christos Exp $ */ /*- @@ -73,10 +73,20 @@ #include "ioapic.h" +#include "acpi.h" + #if NIOAPIC > 0 #include #endif +#if NACPI > 0 +#include +#endif + +#ifdef MULTIPROCESSOR +#include +#endif + /* * The following several variables are related to * the configuration process, and are used in initializing @@ -117,6 +127,27 @@ cpu_configure(void) #ifdef KVM86 kvm86_init(); #endif +#ifdef notyet + pmap_bootstrap_pae(); +#endif + +#if defined(MULTIPROCESSOR) || \ + (NACPI > 0 && !defined(SMALL_KERNEL)) + /* install the lowmem ptp after boot args for 1:1 mappings */ + pmap_prealloc_lowmem_ptp(); +#endif + +#ifdef MULTIPROCESSOR + pmap_kenter_pa((vaddr_t)MP_TRAMPOLINE, /* virtual */ + (paddr_t)MP_TRAMPOLINE, /* physical */ + PROT_READ | PROT_WRITE | PROT_EXEC); /* protection */ +#endif + +#if NACPI > 0 && !defined(SMALL_KERNEL) + pmap_kenter_pa((vaddr_t)ACPI_TRAMPOLINE, /* virtual */ + (paddr_t)ACPI_TRAMPOLINE, /* physical */ + PROT_READ | PROT_WRITE | PROT_EXEC); /* protection */ +#endif if (config_rootfound("mainbus", NULL) == NULL) panic("cpu_configure: mainbus not configured"); diff --git a/sys/arch/i386/i386/db_memrw.c b/sys/arch/i386/i386/db_memrw.c index 025ea3861d5..3434e7b410f 100644 --- a/sys/arch/i386/i386/db_memrw.c +++ b/sys/arch/i386/i386/db_memrw.c @@ -1,4 +1,4 @@ -/* $OpenBSD: db_memrw.c,v 1.14 2014/09/14 14:17:23 jsg Exp $ */ +/* $OpenBSD: db_memrw.c,v 1.15 2015/04/12 18:37:53 mlarkin Exp $ */ /* $NetBSD: db_memrw.c,v 1.6 1999/04/12 20:38:19 pk Exp $ */ /* @@ -43,6 +43,11 @@ #include +#define PG_LGFRAME 0xffc00000 /* large (4M) page frame mask */ +#define PG_LGFRAME_PAE 0xffe00000 /* large (2M) page frame mask */ + +extern int cpu_pae; + /* * Read bytes from kernel address space for debugger. */ @@ -63,9 +68,9 @@ db_read_bytes(vaddr_t addr, size_t size, char *data) static void db_write_text(vaddr_t addr, size_t size, char *data) { - pt_entry_t *pte, oldpte, tmppte; vaddr_t pgva; size_t limit; + uint32_t bits; char *dst; if (size == 0) @@ -77,10 +82,9 @@ db_write_text(vaddr_t addr, size_t size, char *data) /* * Get the PTE for the page. */ - pte = kvtopte(addr); - oldpte = *pte; + bits = pmap_pte_bits(addr); - if ((oldpte & PG_V) == 0) { + if ((bits & PG_V) == 0) { printf(" address %p not a valid page\n", dst); return; } @@ -88,9 +92,12 @@ db_write_text(vaddr_t addr, size_t size, char *data) /* * Get the VA for the page. */ - if (oldpte & PG_PS) - pgva = (vaddr_t)dst & PG_LGFRAME; - else + if (bits & PG_PS) { + if (cpu_pae) + pgva = (vaddr_t)dst & PG_LGFRAME_PAE; + else + pgva = (vaddr_t)dst & PG_LGFRAME; + } else pgva = trunc_page((vaddr_t)dst); /* @@ -99,7 +106,7 @@ db_write_text(vaddr_t addr, size_t size, char *data) * total size. */ #ifdef NBPD_L2 - if (oldpte & PG_PS) + if (bits & PG_PS) limit = NBPD_L2 - ((vaddr_t)dst & (NBPD_L2 - 1)); else #endif @@ -108,9 +115,8 @@ db_write_text(vaddr_t addr, size_t size, char *data) limit = size; size -= limit; - tmppte = (oldpte & ~PG_KR) | PG_KW; - *pte = tmppte; pmap_update_pg(pgva); + pmap_pte_setbits(addr, PG_RW, 0); /* * Page is now writable. Do as much access as we @@ -122,9 +128,8 @@ db_write_text(vaddr_t addr, size_t size, char *data) /* * Restore the old PTE. */ - *pte = oldpte; - pmap_update_pg(pgva); + pmap_pte_setbits(addr, 0, bits); } while (size != 0); } diff --git a/sys/arch/i386/i386/genassym.cf b/sys/arch/i386/i386/genassym.cf index a89b0c0fd70..ddb355b24df 100644 --- a/sys/arch/i386/i386/genassym.cf +++ b/sys/arch/i386/i386/genassym.cf @@ -1,4 +1,4 @@ -# $OpenBSD: genassym.cf,v 1.36 2014/12/22 02:26:53 tedu Exp $ +# $OpenBSD: genassym.cf,v 1.37 2015/04/12 18:37:53 mlarkin Exp $ # # Copyright (c) 1982, 1990 The Regents of the University of California. # All rights reserved. @@ -69,7 +69,6 @@ export PDSLOT_KERN export PDSLOT_PTE export PDSLOT_APTE export NKPTP_MIN -export NKPTP_MAX # values for virtual memory export VM_MAXUSER_ADDRESS diff --git a/sys/arch/i386/i386/hibernate_machdep.c b/sys/arch/i386/i386/hibernate_machdep.c index e7175de3c30..6b2eeb5b61a 100644 --- a/sys/arch/i386/i386/hibernate_machdep.c +++ b/sys/arch/i386/i386/hibernate_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: hibernate_machdep.c,v 1.44 2015/01/09 03:43:52 mlarkin Exp $ */ +/* $OpenBSD: hibernate_machdep.c,v 1.45 2015/04/12 18:37:53 mlarkin Exp $ */ /* * Copyright (c) 2011 Mike Larkin @@ -59,6 +59,13 @@ extern struct dumpmem dumpmem[]; extern bios_memmap_t *bios_memmap; extern struct hibernate_state *hibernate_state; +/* + * Hibernate always uses non-PAE page tables during resume, so + * redefine masks and pt_entry_t sizes in case PAE is in use. + */ +#define PAGE_MASK_L2 (NBPD - 1) +typedef uint32_t pt_entry_t; + /* * i386 MD Hibernate functions * diff --git a/sys/arch/i386/i386/kgdb_machdep.c b/sys/arch/i386/i386/kgdb_machdep.c index 1b0d5217e33..245f9bba3da 100644 --- a/sys/arch/i386/i386/kgdb_machdep.c +++ b/sys/arch/i386/i386/kgdb_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kgdb_machdep.c,v 1.11 2014/07/05 07:18:33 jsg Exp $ */ +/* $OpenBSD: kgdb_machdep.c,v 1.12 2015/04/12 18:37:53 mlarkin Exp $ */ /* $NetBSD: kgdb_machdep.c,v 1.6 1998/08/13 21:36:03 thorpej Exp $ */ /*- @@ -83,15 +83,13 @@ int kgdb_acc(vaddr_t va, size_t len) { vaddr_t last_va; - pt_entry_t *pte; last_va = va + len; va &= ~PGOFSET; last_va &= ~PGOFSET; do { - pte = kvtopte(va); - if ((*pte & PG_V) == 0) + if ((pmap_pte_bits(va) & PG_V) == 0) return (0); va += NBPG; } while (va < last_va); diff --git a/sys/arch/i386/i386/kvm86.c b/sys/arch/i386/i386/kvm86.c index ae7656f4a44..cc0bb1b9601 100644 --- a/sys/arch/i386/i386/kvm86.c +++ b/sys/arch/i386/i386/kvm86.c @@ -1,4 +1,4 @@ -/* $OpenBSD: kvm86.c,v 1.9 2015/02/11 05:54:48 dlg Exp $ */ +/* $OpenBSD: kvm86.c,v 1.10 2015/04/12 18:37:53 mlarkin Exp $ */ /* $NetBSD: kvm86.c,v 1.10 2005/12/26 19:23:59 perry Exp $ */ /* * Copyright (c) 2002 @@ -47,6 +47,8 @@ extern void kvm86_ret(struct trapframe *, int); #define PGTABLE_SIZE ((1024 + 64) * 1024 / PAGE_SIZE) +typedef uint32_t pt_entry_t; + struct kvm86_data { pt_entry_t pgtbl[PGTABLE_SIZE]; diff --git a/sys/arch/i386/i386/lapic.c b/sys/arch/i386/i386/lapic.c index 295c27853ed..bea9e3d045f 100644 --- a/sys/arch/i386/i386/lapic.c +++ b/sys/arch/i386/i386/lapic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lapic.c,v 1.37 2014/09/21 16:14:52 sf Exp $ */ +/* $OpenBSD: lapic.c,v 1.38 2015/04/12 18:37:53 mlarkin Exp $ */ /* $NetBSD: lapic.c,v 1.1.2.8 2000/02/23 06:10:50 sommerfeld Exp $ */ /*- @@ -70,7 +70,6 @@ void lapic_map(paddr_t lapic_base) { int s; - pt_entry_t *pte; vaddr_t va = (vaddr_t)&local_apic; disable_intr(); @@ -85,8 +84,7 @@ lapic_map(paddr_t lapic_base) * might have changed the value of cpu_number().. */ - pte = kvtopte(va); - *pte = lapic_base | PG_RW | PG_V | PG_N; + pmap_pte_set(va, lapic_base, PG_RW | PG_V | PG_N); invlpg(va); #ifdef MULTIPROCESSOR diff --git a/sys/arch/i386/i386/locore.s b/sys/arch/i386/i386/locore.s index e69414763e3..ed000829dc7 100644 --- a/sys/arch/i386/i386/locore.s +++ b/sys/arch/i386/i386/locore.s @@ -1,4 +1,4 @@ -/* $OpenBSD: locore.s,v 1.151 2015/04/01 19:45:21 mlarkin Exp $ */ +/* $OpenBSD: locore.s,v 1.152 2015/04/12 18:37:53 mlarkin Exp $ */ /* $NetBSD: locore.s,v 1.145 1996/05/03 19:41:19 christos Exp $ */ /*- @@ -178,6 +178,7 @@ .globl _C_LABEL(cold), _C_LABEL(cnvmem), _C_LABEL(extmem) .globl _C_LABEL(cpu_pae) .globl _C_LABEL(esym) + .globl _C_LABEL(nkptp_max) .globl _C_LABEL(boothowto), _C_LABEL(bootdev), _C_LABEL(atdevbase) .globl _C_LABEL(proc0paddr), _C_LABEL(PTDpaddr), _C_LABEL(PTDsize) .globl _C_LABEL(gdt) @@ -481,13 +482,13 @@ try586: /* Use the `cpuid' instruction. */ /* * Virtual address space of kernel: * - * text | data | bss | [syms] | proc0 stack | page dir | Sysmap - * 0 1 2 3 + * text | data | bss | [syms] | proc0 kstack | page dir | Sysmap + * 0 1 2 6 */ #define PROC0STACK ((0) * NBPG) #define PROC0PDIR (( UPAGES) * NBPG) -#define SYSMAP ((1+UPAGES) * NBPG) -#define TABLESIZE ((1+UPAGES) * NBPG) /* + _C_LABEL(nkpde) * NBPG */ +#define SYSMAP ((4+UPAGES) * NBPG) +#define TABLESIZE ((4+UPAGES) * NBPG) /* + _C_LABEL(nkpde) * NBPG */ /* Find end of kernel image. */ movl $RELOC(_C_LABEL(end)),%edi @@ -515,9 +516,9 @@ try586: /* Use the `cpuid' instruction. */ jge 1f movl $NKPTP_MIN,%ecx # set at min jmp 2f -1: cmpl $NKPTP_MAX,%ecx # larger than max? +1: cmpl RELOC(_C_LABEL(nkptp_max)),%ecx # larger than max? jle 2f - movl $NKPTP_MAX,%ecx + movl RELOC(_C_LABEL(nkptp_max)),%ecx 2: movl %ecx,RELOC(_C_LABEL(nkpde)) # and store it back /* Clear memory for bootstrap tables. */ @@ -581,9 +582,9 @@ try586: /* Use the `cpuid' instruction. */ /* * Construct a page table directory. */ - movl RELOC(_C_LABEL(nkpde)),%ecx # count of pde s, + movl RELOC(_C_LABEL(nkpde)),%ecx # count of pdes, leal (PROC0PDIR+0*4)(%esi),%ebx # where temp maps! - leal (SYSMAP+PG_V|PG_KW)(%esi),%eax # pte for KPT in proc 0 + leal (SYSMAP+PG_V|PG_KW|PG_U|PG_M)(%esi),%eax # pte for KPT in proc 0 fillkpt /* @@ -592,12 +593,14 @@ try586: /* Use the `cpuid' instruction. */ */ movl RELOC(_C_LABEL(nkpde)),%ecx # count of pde s, leal (PROC0PDIR+PDSLOT_KERN*4)(%esi),%ebx # map them high - leal (SYSMAP+PG_V|PG_KW)(%esi),%eax # pte for KPT in proc 0 + leal (SYSMAP+PG_V|PG_KW|PG_U|PG_M)(%esi),%eax # pte for KPT in proc 0 fillkpt /* Install a PDE recursively mapping page directory as a page table! */ - leal (PROC0PDIR+PG_V|PG_KW)(%esi),%eax # pte for ptd + leal (PROC0PDIR+PG_V|PG_KW|PG_U|PG_M)(%esi),%eax # pte for ptd movl %eax,(PROC0PDIR+PDSLOT_PTE*4)(%esi) # recursive PD slot + addl $NBPG, %eax # pte for ptd[1] + movl %eax,(PROC0PDIR+(PDSLOT_PTE+1)*4)(%esi) # recursive PD slot /* Save phys. addr of PTD, for libkvm. */ leal (PROC0PDIR)(%esi),%eax # phys address of ptd in proc 0 @@ -1646,6 +1649,40 @@ ENTRY(i686_pagezero) ret #endif +/* + * int cpu_paenable(void *); + */ +ENTRY(cpu_paenable) + movl $-1, %eax + testl $CPUID_PAE, _C_LABEL(cpu_feature) + jz 1f + + pushl %esi + pushl %edi + movl 12(%esp), %esi + movl %cr3, %edi + orl $0xfe0, %edi /* PDPT will be in the last four slots! */ + movl %edi, %cr3 + addl $KERNBASE, %edi /* and make it back virtual again */ + movl $8, %ecx + cld + rep + movsl + movl %cr4, %eax + orl $CR4_PAE, %eax + movl %eax, %cr4 /* BANG!!! */ + movl 12(%esp), %eax + subl $KERNBASE, %eax + movl %eax, %cr3 /* reload real PDPT */ + movl $4*NBPG, %eax + movl %eax, _C_LABEL(PTDsize) + + xorl %eax, %eax + popl %edi + popl %esi +1: + ret + /* * ucas_32(volatile int32_t *uptr, int32_t old, int32_t new); */ diff --git a/sys/arch/i386/i386/machdep.c b/sys/arch/i386/i386/machdep.c index afa02af241a..5027a0c0d5a 100644 --- a/sys/arch/i386/i386/machdep.c +++ b/sys/arch/i386/i386/machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: machdep.c,v 1.567 2015/02/08 04:41:48 deraadt Exp $ */ +/* $OpenBSD: machdep.c,v 1.568 2015/04/12 18:37:53 mlarkin Exp $ */ /* $NetBSD: machdep.c,v 1.214 1996/11/10 03:16:17 thorpej Exp $ */ /*- @@ -115,6 +115,7 @@ #include #include #include +#include #ifdef MULTIPROCESSOR #include #endif /* MULTIPROCESSOR */ @@ -2997,7 +2998,6 @@ fix_f00f(void) struct region_descriptor region; vaddr_t va; void *p; - pt_entry_t *pte; /* Allocate two new pages */ va = uvm_km_zalloc(kernel_map, NBPG*2); @@ -3012,8 +3012,7 @@ fix_f00f(void) GCODE_SEL); /* Map first page RO */ - pte = PTE_BASE + atop(va); - *pte &= ~PG_RW; + pmap_pte_setbits(va, 0, PG_RW); /* Reload idtr */ setregion(®ion, idt, sizeof(idt_region) - 1); @@ -3185,9 +3184,6 @@ init386(paddr_t first_avail) panic("no BIOS memory map supplied"); #endif - /* install the lowmem ptp after boot args for 1:1 mappings */ - pmap_prealloc_lowmem_ptp(round_page((paddr_t)(bootargv + bootargc))); - /* * account all the memory passed in the map from /boot * calculate avail_end and count the physmem. @@ -3335,24 +3331,6 @@ init386(paddr_t first_avail) printf("\n"); #endif -#if defined(MULTIPROCESSOR) || \ - (NACPI > 0 && !defined(SMALL_KERNEL)) - /* install the lowmem ptp after boot args for 1:1 mappings */ - pmap_prealloc_lowmem_ptp(PTP0_PA); -#endif - -#ifdef MULTIPROCESSOR - pmap_kenter_pa((vaddr_t)MP_TRAMPOLINE, /* virtual */ - (paddr_t)MP_TRAMPOLINE, /* physical */ - PROT_READ | PROT_WRITE | PROT_EXEC); /* protection */ -#endif - -#if NACPI > 0 && !defined(SMALL_KERNEL) - pmap_kenter_pa((vaddr_t)ACPI_TRAMPOLINE, /* virtual */ - (paddr_t)ACPI_TRAMPOLINE, /* physical */ - PROT_READ | PROT_WRITE | PROT_EXEC); /* protection */ -#endif - tlbflush(); #if 0 #if NISADMA > 0 diff --git a/sys/arch/i386/i386/mptramp.s b/sys/arch/i386/i386/mptramp.s index 10537ed2208..02efc76dfb9 100644 --- a/sys/arch/i386/i386/mptramp.s +++ b/sys/arch/i386/i386/mptramp.s @@ -1,4 +1,4 @@ -/* $OpenBSD: mptramp.s,v 1.14 2014/01/05 20:23:57 mlarkin Exp $ */ +/* $OpenBSD: mptramp.s,v 1.15 2015/04/12 18:37:53 mlarkin Exp $ */ /*- * Copyright (c) 2000 The NetBSD Foundation, Inc. @@ -159,6 +159,16 @@ _TRMP_LABEL(mp_startup) /* Load base of page directory and enable mapping. */ movl %ecx,%cr3 # load ptd addr into mmu +#ifndef SMALL_KERNEL + movl $_C_LABEL(pmap_pte_set_pae),%eax + cmpl RELOC(_C_LABEL(pmap_pte_set_p)),%eax + jne nopae + + movl %cr4,%eax + orl $CR4_PAE,%eax + movl %eax, %cr4 +nopae: +#endif movl %cr0,%eax # get control word # enable paging & NPX emulation orl $(CR0_PE|CR0_PG|CR0_NE|CR0_TS|CR0_EM|CR0_MP|CR0_WP),%eax diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index c8697933418..330107c44a5 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.171 2015/03/13 23:23:13 mlarkin Exp $ */ +/* $OpenBSD: pmap.c,v 1.172 2015/04/12 18:37:53 mlarkin Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -321,20 +321,73 @@ * If we fail, we simply let pmap_enter() tell UVM about it. */ -#define PD_MASK 0xffc00000 /* page directory address bits */ -#define PT_MASK 0x003ff000 /* page table address bits */ +#define PG_FRAME 0xfffff000 /* page frame mask */ +#define PG_LGFRAME 0xffc00000 /* large (4M) page frame mask */ + +/* + * The following defines give the virtual addresses of various MMU + * data structures: + * PTE_BASE and APTE_BASE: the base VA of the linear PTE mappings + * PTD_BASE and APTD_BASE: the base VA of the recursive mapping of the PTD + * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP + */ +#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD)) +#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD)) +#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * NBPG))) +#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * NBPG))) +#define PDP_PDE (PDP_BASE + PDSLOT_PTE) +#define APDP_PDE (PDP_BASE + PDSLOT_APTE) /* * pdei/ptei: generate index into PDP/PTP from a VA */ -#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) -#define ptei(VA) (((VA) & PT_MASK) >> PAGE_SHIFT) +#define PD_MASK 0xffc00000 /* page directory address bits */ +#define PT_MASK 0x003ff000 /* page table address bits */ +#define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) +#define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) + +/* + * Mach derived conversion macros + */ +#define i386_round_pdr(x) ((((unsigned)(x)) + ~PD_MASK) & PD_MASK) + +/* + * various address macros + * + * vtopte: return a pointer to the PTE mapping a VA + */ +#define vtopte(VA) (PTE_BASE + atop((vaddr_t)VA)) + + +/* + * PTP macros: + * A PTP's index is the PD index of the PDE that points to it. + * A PTP's offset is the byte-offset in the PTE space that this PTP is at. + * A PTP's VA is the first VA mapped by that PTP. + * + * Note that NBPG == number of bytes in a PTP (4096 bytes == 1024 entries) + * NBPD == number of bytes a PTP can map (4MB) + */ + +#define ptp_i2o(I) ((I) * NBPG) /* index => offset */ +#define ptp_o2i(O) ((O) / NBPG) /* offset => index */ +#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ +#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ + +#define PDE(pm,i) (((pd_entry_t *)(pm)->pm_pdir)[(i)]) + +/* + * here we define the data types for PDEs and PTEs + */ +typedef u_int32_t pd_entry_t; /* PDE */ +typedef u_int32_t pt_entry_t; /* PTE */ /* * global data structures */ -struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ +/* the kernel's pmap (proc0) */ +struct pmap __attribute__ ((aligned (32))) kernel_pmap_store; /* * nkpde is the number of kernel PTPs allocated for the kernel at @@ -344,10 +397,13 @@ struct pmap kernel_pmap_store; /* the kernel's pmap (proc0) */ */ int nkpde = NKPTP; +int nkptp_max = 1024 - (KERNBASE / NBPD) - 1; #ifdef NKPDE #error "obsolete NKPDE: use NKPTP" #endif +extern int cpu_pae; + /* * pmap_pg_g: if our processor supports PG_G in the PTE then we * set pmap_pg_g to PG_G (otherwise it is zero). @@ -366,8 +422,17 @@ int pmap_pg_wc = PG_UCMINUS; * other data structures */ -static pt_entry_t protection_codes[8]; /* maps MI prot to i386 prot code */ -static boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ +uint32_t protection_codes[8]; /* maps MI prot to i386 prot code */ +boolean_t pmap_initialized = FALSE; /* pmap_init done yet? */ + +/* + * pv management structures. + */ +struct pool pmap_pv_pool; + +#define PVE_LOWAT (PVE_PER_PVPAGE / 2) /* free pv_entry low water mark */ +#define PVE_HIWAT (PVE_LOWAT + (PVE_PER_PVPAGE * 2)) + /* high water mark */ /* * pv management structures. @@ -424,144 +489,62 @@ struct pool pmap_pmap_pool; * special VAs and the PTEs that map them */ -static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *flsh_pte; -static caddr_t csrcp, cdstp, zerop, ptpp, flshp; +pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *flsh_pte; +caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp, pmap_flshp; caddr_t vmmap; /* XXX: used by mem.c... it should really uvm_map_reserve it */ /* * local prototypes */ -struct vm_page *pmap_alloc_ptp(struct pmap *, int, boolean_t, pt_entry_t); -struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes below */ -#define ALLOCPV_NEED 0 /* need PV now */ -#define ALLOCPV_TRY 1 /* just try to allocate */ -#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ +struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes in pmap.h */ void pmap_enter_pv(struct vm_page *, struct pv_entry *, struct pmap *, vaddr_t, struct vm_page *); void pmap_free_pv(struct pmap *, struct pv_entry *); void pmap_free_pvs(struct pmap *, struct pv_entry *); -struct vm_page *pmap_get_ptp(struct pmap *, int, boolean_t); -void pmap_drop_ptp(struct pmap *, vaddr_t, struct vm_page *, - pt_entry_t *); -void pmap_sync_flags_pte(struct vm_page *, u_long); -pt_entry_t *pmap_map_ptes(struct pmap *); -struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t); -void pmap_do_remove(struct pmap *, vaddr_t, vaddr_t, int); -void pmap_remove_ptes(struct pmap *, struct vm_page *, vaddr_t, - vaddr_t, vaddr_t, int); +struct vm_page *pmap_alloc_ptp_86(struct pmap *, int, pt_entry_t); +struct vm_page *pmap_get_ptp_86(struct pmap *, int); +pt_entry_t *pmap_map_ptes_86(struct pmap *); +void pmap_unmap_ptes_86(struct pmap *); +void pmap_do_remove_86(struct pmap *, vaddr_t, vaddr_t, int); +void pmap_remove_ptes_86(struct pmap *, struct vm_page *, vaddr_t, + vaddr_t, vaddr_t, int); void *pmap_pv_page_alloc(struct pool *, int, int *); -void pmap_pv_page_free(struct pool *, void *); +void pmap_pv_page_free(struct pool *, void *); struct pool_allocator pmap_pv_page_allocator = { pmap_pv_page_alloc, pmap_pv_page_free, }; -#define PMAP_REMOVE_ALL 0 -#define PMAP_REMOVE_SKIPWIRED 1 +void pmap_sync_flags_pte_86(struct vm_page *, pt_entry_t); + +void pmap_drop_ptp(struct pmap *, vaddr_t, struct vm_page *, + pt_entry_t *); -vaddr_t pmap_tmpmap_pa(paddr_t); -void pmap_tmpunmap_pa(void); void pmap_apte_flush(void); -void pmap_unmap_ptes(struct pmap *); -void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t, +void pmap_exec_account(struct pmap *, vaddr_t, pt_entry_t, pt_entry_t); -void pmap_pinit(pmap_t); - -void pmap_zero_phys(paddr_t); - -void setcslimit(struct pmap *, struct trapframe *, struct pcb *, vaddr_t); - -/* - * p m a p i n l i n e h e l p e r f u n c t i o n s - */ - -/* - * pmap_is_active: is this pmap loaded into the specified processor's %cr3? - */ - -static __inline boolean_t -pmap_is_active(struct pmap *pmap, struct cpu_info *ci) -{ - - return (pmap == pmap_kernel() || ci->ci_curpmap == pmap); -} - -static __inline boolean_t -pmap_is_curpmap(struct pmap *pmap) -{ - return (pmap_is_active(pmap, curcpu())); -} +void setcslimit(struct pmap *, struct trapframe *, struct pcb *, + vaddr_t); +void pmap_pinit_pd_86(struct pmap *); static __inline u_int -pmap_pte2flags(u_long pte) +pmap_pte2flags(pt_entry_t pte) { return (((pte & PG_U) ? PG_PMAP_REF : 0) | ((pte & PG_M) ? PG_PMAP_MOD : 0)); } -static __inline u_int -pmap_flags2pte(u_long pte) -{ - return (((pte & PG_PMAP_REF) ? PG_U : 0) | - ((pte & PG_PMAP_MOD) ? PG_M : 0)); -} - void -pmap_sync_flags_pte(struct vm_page *pg, u_long pte) +pmap_sync_flags_pte_86(struct vm_page *pg, pt_entry_t pte) { if (pte & (PG_U|PG_M)) { atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(pte)); } } -/* - * pmap_tmpmap_pa: map a page in for tmp usage - */ - -vaddr_t -pmap_tmpmap_pa(paddr_t pa) -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(ptpp, id); -#if defined(DIAGNOSTIC) - if (*ptpte) - panic("pmap_tmpmap_pa: ptp_pte in use?"); -#endif - *ptpte = PG_V | PG_RW | pa; /* always a new mapping */ - return((vaddr_t)ptpva); -} - -/* - * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) - */ - -void -pmap_tmpunmap_pa() -{ -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(ptpp, id); -#if defined(DIAGNOSTIC) - if (!pmap_valid_entry(*ptpte)) - panic("pmap_tmpunmap_pa: our pte invalid?"); -#endif - *ptpte = 0; - pmap_update_pg((vaddr_t)ptpva); -#ifdef MULTIPROCESSOR - /* - * No need for tlb shootdown here, since ptp_pte is per-CPU. - */ -#endif -} - void pmap_apte_flush(void) { @@ -577,7 +560,7 @@ pmap_apte_flush(void) */ pt_entry_t * -pmap_map_ptes(struct pmap *pmap) +pmap_map_ptes_86(struct pmap *pmap) { pd_entry_t opde; @@ -611,7 +594,7 @@ pmap_map_ptes(struct pmap *pmap) */ void -pmap_unmap_ptes(struct pmap *pmap) +pmap_unmap_ptes_86(struct pmap *pmap) { if (pmap == pmap_kernel()) return; @@ -626,7 +609,7 @@ pmap_unmap_ptes(struct pmap *pmap) void pmap_exec_account(struct pmap *pm, vaddr_t va, - pt_entry_t opte, pt_entry_t npte) + uint32_t opte, uint32_t npte) { if (pm == pmap_kernel()) return; @@ -711,6 +694,114 @@ pmap_exec_fixup(struct vm_map *map, struct trapframe *tf, struct pcb *pcb) return (1); } +u_int32_t +pmap_pte_set_86(vaddr_t va, paddr_t pa, u_int32_t bits) +{ + pt_entry_t pte, *ptep = vtopte(va); + + pa &= PMAP_PA_MASK; + + pte = i386_atomic_testset_ul(ptep, pa | bits); /* zap! */ + return (pte & ~PG_FRAME); +} + +u_int32_t +pmap_pte_setbits_86(vaddr_t va, u_int32_t set, u_int32_t clr) +{ + pt_entry_t *ptep = vtopte(va); + pt_entry_t pte = *ptep; + + *ptep = (pte | set) & ~clr; + return (pte & ~PG_FRAME); +} + +u_int32_t +pmap_pte_bits_86(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & ~PG_FRAME); +} + +paddr_t +pmap_pte_paddr_86(vaddr_t va) +{ + pt_entry_t *ptep = vtopte(va); + + return (*ptep & PG_FRAME); +} + +/* + * pmap_tmpmap_pa: map a page in for tmp usage + */ + +vaddr_t +pmap_tmpmap_pa(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte; + caddr_t ptpva; + + if (cpu_pae) + return pmap_tmpmap_pa_pae(pa); + + ptpte = PTESLEW(ptp_pte, id); + ptpva = VASLEW(pmap_ptpp, id); + +#if defined(DIAGNOSTIC) + if (*ptpte) + panic("pmap_tmpmap_pa: ptp_pte in use?"); +#endif + *ptpte = PG_V | PG_RW | pa; /* always a new mapping */ + return((vaddr_t)ptpva); +} + +/* + * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) + */ + +void +pmap_tmpunmap_pa() +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte; + caddr_t ptpva; + + if (cpu_pae) { + pmap_tmpunmap_pa_pae(); + return; + } + + ptpte = PTESLEW(ptp_pte, id); + ptpva = VASLEW(pmap_ptpp, id); + +#if defined(DIAGNOSTIC) + if (!pmap_valid_entry(*ptpte)) + panic("pmap_tmpunmap_pa: our pte invalid?"); +#endif + + *ptpte = 0; + pmap_update_pg((vaddr_t)ptpva); +#ifdef MULTIPROCESSOR + /* + * No need for tlb shootdown here, since ptp_pte is per-CPU. + */ +#endif +} + +paddr_t +vtophys(vaddr_t va) +{ + if (cpu_pae) + return vtophys_pae(va); + else + return ((*vtopte(va) & PG_FRAME) | (va & ~PG_FRAME)); +} + void setcslimit(struct pmap *pm, struct trapframe *tf, struct pcb *pcb, vaddr_t limit) @@ -760,20 +851,24 @@ setcslimit(struct pmap *pm, struct trapframe *tf, struct pcb *pcb, void pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) { - pt_entry_t *pte, opte, npte; - - pte = vtopte(va); - npte = (pa & PMAP_PA_MASK) | ((prot & PROT_WRITE)? PG_RW : PG_RO) | - PG_V | PG_U | PG_M | ((pa & PMAP_NOCACHE) ? PG_N : 0) | - ((pa & PMAP_WC) ? pmap_pg_wc : 0); + uint32_t bits; + uint32_t global = 0; - /* special 1:1 mappings in the first 4MB must not be global */ - if (va >= (vaddr_t)NBPD) - npte |= pmap_pg_g; + /* special 1:1 mappings in the first large page must not be global */ + if (!cpu_pae) { + if (va >= (vaddr_t)NBPD) /* 4MB pages on non-PAE */ + global = pmap_pg_g; + } else { + if (va >= (vaddr_t)NBPD / 2) /* 2MB pages on PAE */ + global = pmap_pg_g; + } - opte = i386_atomic_testset_ul(pte, npte); - if (pmap_valid_entry(opte)) { - if (pa & PMAP_NOCACHE && (opte & PG_N) == 0) + bits = pmap_pte_set(va, pa, ((prot & PROT_WRITE) ? PG_RW : PG_RO) | + PG_V | global | PG_U | PG_M | + ((pa & PMAP_NOCACHE) ? PG_N : 0) | + ((pa & PMAP_WC) ? pmap_pg_wc : 0)); + if (pmap_valid_entry(bits)) { + if (pa & PMAP_NOCACHE && (bits & PG_N) == 0) wbinvd(); /* NB. - this should not happen. */ pmap_tlb_shootpage(pmap_kernel(), va); @@ -793,16 +888,15 @@ pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot) void pmap_kremove(vaddr_t sva, vsize_t len) { - pt_entry_t *pte, opte; + uint32_t bits; vaddr_t va, eva; eva = sva + len; for (va = sva; va != eva; va += PAGE_SIZE) { - pte = kvtopte(va); - opte = i386_atomic_testset_ul(pte, 0); + bits = pmap_pte_set(va, 0, 0); #ifdef DIAGNOSTIC - if (opte & PG_PVLIST) + if (bits & PG_PVLIST) panic("pmap_kremove: PG_PVLIST mapping for 0x%lx", va); #endif } @@ -885,8 +979,8 @@ pmap_bootstrap(vaddr_t kva_start) kpm = pmap_kernel(); uvm_objinit(&kpm->pm_obj, NULL, 1); bzero(&kpm->pm_list, sizeof(kpm->pm_list)); /* pm_list not used */ - kpm->pm_pdir = (pd_entry_t *)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); - kpm->pm_pdirpa = (u_int32_t) proc0.p_addr->u_pcb.pcb_cr3; + kpm->pm_pdir = (vaddr_t)(proc0.p_addr->u_pcb.pcb_cr3 + KERNBASE); + kpm->pm_pdirpa = proc0.p_addr->u_pcb.pcb_cr3; kpm->pm_stats.wired_count = kpm->pm_stats.resident_count = atop(kva_start - VM_MIN_KERNEL_ADDRESS); @@ -929,32 +1023,32 @@ pmap_bootstrap(vaddr_t kva_start) * as well; we could waste less space if we knew the largest * CPU ID beforehand. */ - csrcp = (caddr_t) virtual_avail; csrc_pte = pte; + pmap_csrcp = (caddr_t) virtual_avail; csrc_pte = pte; - cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; + pmap_cdstp = (caddr_t) virtual_avail+PAGE_SIZE; cdst_pte = pte+1; - zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; + pmap_zerop = (caddr_t) virtual_avail+PAGE_SIZE*2; zero_pte = pte+2; - ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; + pmap_ptpp = (caddr_t) virtual_avail+PAGE_SIZE*3; ptp_pte = pte+3; - flshp = (caddr_t) virtual_avail+PAGE_SIZE*4; flsh_pte = pte+4; + pmap_flshp = (caddr_t) virtual_avail+PAGE_SIZE*4; flsh_pte = pte+4; virtual_avail += PAGE_SIZE * MAXCPUS * NPTECL; pte += MAXCPUS * NPTECL; #else - csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ + pmap_csrcp = (caddr_t) virtual_avail; csrc_pte = pte; /* allocate */ virtual_avail += PAGE_SIZE; pte++; /* advance */ - cdstp = (caddr_t) virtual_avail; cdst_pte = pte; + pmap_cdstp = (caddr_t) virtual_avail; cdst_pte = pte; virtual_avail += PAGE_SIZE; pte++; - zerop = (caddr_t) virtual_avail; zero_pte = pte; + pmap_zerop = (caddr_t) virtual_avail; zero_pte = pte; virtual_avail += PAGE_SIZE; pte++; - ptpp = (caddr_t) virtual_avail; ptp_pte = pte; + pmap_ptpp = (caddr_t) virtual_avail; ptp_pte = pte; virtual_avail += PAGE_SIZE; pte++; - flshp = (caddr_t) virtual_avail; flsh_pte = pte; + pmap_flshp = (caddr_t) virtual_avail; flsh_pte = pte; virtual_avail += PAGE_SIZE; pte++; #endif @@ -984,7 +1078,7 @@ pmap_bootstrap(vaddr_t kva_start) * initialize the pmap pool. */ - pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl", + pool_init(&pmap_pmap_pool, sizeof(struct pmap), 32, 0, 0, "pmappl", &pool_allocator_nointr); pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl", &pmap_pv_page_allocator); @@ -1001,14 +1095,20 @@ pmap_bootstrap(vaddr_t kva_start) * trampoline code can be entered. */ void -pmap_prealloc_lowmem_ptp(paddr_t ptppa) +pmap_prealloc_lowmem_ptp(void) { pt_entry_t *pte, npte; vaddr_t ptpva = (vaddr_t)vtopte(0); + /* If PAE, use the PAE-specific preallocator */ + if (cpu_pae) { + pmap_prealloc_lowmem_ptp_pae(); + return; + } + /* enter pa for pte 0 into recursive map */ pte = vtopte(ptpva); - npte = ptppa | PG_RW | PG_V | PG_U | PG_M; + npte = PTP0_PA | PG_RW | PG_V | PG_U | PG_M; i386_atomic_testset_ul(pte, npte); @@ -1181,14 +1281,10 @@ pmap_remove_pv(struct vm_page *pg, struct pmap *pmap, vaddr_t va) * => we use the ptp's wire_count to count the number of active mappings * in the PTP (we start it at one to prevent any chance this PTP * will ever leak onto the active/inactive queues) - * => we may need to lock pv lists if we have to steal a PTP - * => just_try: true if we want a PTP, but not enough to steal one - * from another pmap (e.g. during optional functions like pmap_copy) */ struct vm_page * -pmap_alloc_ptp(struct pmap *pmap, int pde_index, boolean_t just_try, - pt_entry_t pde_flags) +pmap_alloc_ptp_86(struct pmap *pmap, int pde_index, pt_entry_t pde_flags) { struct vm_page *ptp; @@ -1200,7 +1296,7 @@ pmap_alloc_ptp(struct pmap *pmap, int pde_index, boolean_t just_try, /* got one! */ atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); ptp->wire_count = 1; /* no mappings yet */ - pmap->pm_pdir[pde_index] = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | + PDE(pmap, pde_index) = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_RW | PG_V | PG_M | PG_U | pde_flags); pmap->pm_stats.resident_count++; /* count PTP as resident */ pmap->pm_ptphint = ptp; @@ -1215,15 +1311,14 @@ pmap_alloc_ptp(struct pmap *pmap, int pde_index, boolean_t just_try, */ struct vm_page * -pmap_get_ptp(struct pmap *pmap, int pde_index, boolean_t just_try) +pmap_get_ptp_86(struct pmap *pmap, int pde_index) { struct vm_page *ptp; - if (pmap_valid_entry(pmap->pm_pdir[pde_index])) { - + if (pmap_valid_entry(PDE(pmap, pde_index))) { /* valid... check hint (saves us a PA->PG lookup) */ if (pmap->pm_ptphint && - (pmap->pm_pdir[pde_index] & PG_FRAME) == + (PDE(pmap, pde_index) & PG_FRAME) == VM_PAGE_TO_PHYS(pmap->pm_ptphint)) return(pmap->pm_ptphint); @@ -1237,14 +1332,14 @@ pmap_get_ptp(struct pmap *pmap, int pde_index, boolean_t just_try) } /* allocate a new PTP (updates ptphint) */ - return (pmap_alloc_ptp(pmap, pde_index, just_try, PG_u)); + return (pmap_alloc_ptp_86(pmap, pde_index, PG_u)); } void pmap_drop_ptp(struct pmap *pm, vaddr_t va, struct vm_page *ptp, pt_entry_t *ptes) { - i386_atomic_testset_ul(&pm->pm_pdir[pdei(va)], 0); + i386_atomic_testset_ul(&PDE(pm, pdei(va)), 0); pmap_tlb_shootpage(curcpu()->ci_curpmap, ((vaddr_t)ptes) + ptp->offset); #ifdef MULTIPROCESSOR /* @@ -1279,17 +1374,7 @@ pmap_create(void) struct pmap *pmap; pmap = pool_get(&pmap_pmap_pool, PR_WAITOK); - pmap_pinit(pmap); - return(pmap); -} -/* - * pmap_pinit: given a zero'd pmap structure, init it. - */ - -void -pmap_pinit(struct pmap *pmap) -{ /* init uvm_object */ uvm_objinit(&pmap->pm_obj, NULL, 1); pmap->pm_stats.wired_count = 0; @@ -1298,27 +1383,35 @@ pmap_pinit(struct pmap *pmap) pmap->pm_hiexec = 0; pmap->pm_flags = 0; + /* init the LDT */ + pmap->pm_ldt = NULL; + pmap->pm_ldt_len = 0; + pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); + setsegment(&pmap->pm_codeseg, 0, atop(I386_MAX_EXE_ADDR) - 1, SDT_MEMERA, SEL_UPL, 1, 1); + pmap_pinit_pd(pmap); + return (pmap); +} + +void +pmap_pinit_pd_86(struct pmap *pmap) +{ /* allocate PDP */ - pmap->pm_pdir = (pd_entry_t *) uvm_km_alloc(kernel_map, NBPG); - if (pmap->pm_pdir == NULL) + pmap->pm_pdir = uvm_km_alloc(kernel_map, NBPG); + if (pmap->pm_pdir == 0) panic("pmap_pinit: kernel_map out of virtual space!"); - (void) pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir, - (paddr_t *)&pmap->pm_pdirpa); + pmap_extract(pmap_kernel(), (vaddr_t)pmap->pm_pdir, + &pmap->pm_pdirpa); + pmap->pm_pdirsize = NBPG; /* init PDP */ /* zero init area */ - bzero(pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); + bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); /* put in recursive PDE to map the PTEs */ - pmap->pm_pdir[PDSLOT_PTE] = pmap->pm_pdirpa | PG_V | PG_KW | PG_U | - PG_M; - - /* init the LDT */ - pmap->pm_ldt = NULL; - pmap->pm_ldt_len = 0; - pmap->pm_ldt_sel = GSEL(GLDT_SEL, SEL_KPL); + PDE(pmap, PDSLOT_PTE) = pmap->pm_pdirpa | PG_V | PG_KW | PG_U | PG_M; + PDE(pmap, PDSLOT_PTE + 1) = 0; /* * we need to lock pmaps_lock to prevent nkpde from changing on @@ -1327,10 +1420,10 @@ pmap_pinit(struct pmap *pmap) * already allocated kernel PTPs to cover the range... */ /* put in kernel VM PDEs */ - bcopy(&PDP_BASE[PDSLOT_KERN], &pmap->pm_pdir[PDSLOT_KERN], + bcopy(&PDP_BASE[PDSLOT_KERN], &PDE(pmap, PDSLOT_KERN), nkpde * sizeof(pd_entry_t)); /* zero the rest */ - bzero(&pmap->pm_pdir[PDSLOT_KERN + nkpde], + bzero(&PDE(pmap, PDSLOT_KERN + nkpde), NBPG - ((PDSLOT_KERN + nkpde) * sizeof(pd_entry_t))); LIST_INSERT_HEAD(&pmaps, pmap, pm_list); } @@ -1362,8 +1455,8 @@ pmap_destroy(struct pmap *pmap) uvm_pagefree(pg); } - uvm_km_free(kernel_map, (vaddr_t)pmap->pm_pdir, NBPG); - pmap->pm_pdir = NULL; + uvm_km_free(kernel_map, pmap->pm_pdir, pmap->pm_pdirsize); + pmap->pm_pdir = 0; #ifdef USER_LDT if (pmap->pm_flags & PMF_USER_LDT) { @@ -1524,14 +1617,14 @@ pmap_deactivate(struct proc *p) */ boolean_t -pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pap) +pmap_extract_86(struct pmap *pmap, vaddr_t va, paddr_t *pap) { pt_entry_t *ptes, pte; - if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) { - ptes = pmap_map_ptes(pmap); + if (pmap_valid_entry(PDE(pmap, pdei(va)))) { + ptes = pmap_map_ptes_86(pmap); pte = ptes[atop(va)]; - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); if (!pmap_valid_entry(pte)) return (FALSE); if (pap != NULL) @@ -1569,13 +1662,13 @@ pmap_zero_page(struct vm_page *pg) * initialized. */ void -pmap_zero_phys(paddr_t pa) +pmap_zero_phys_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *zpte = PTESLEW(zero_pte, id); - caddr_t zerova = VASLEW(zerop, id); + caddr_t zerova = VASLEW(pmap_zerop, id); #ifdef DIAGNOSTIC if (*zpte) @@ -1593,13 +1686,13 @@ pmap_zero_phys(paddr_t pa) */ boolean_t -pmap_zero_page_uncached(paddr_t pa) +pmap_zero_page_uncached_86(paddr_t pa) { #ifdef MULTIPROCESSOR int id = cpu_number(); #endif pt_entry_t *zpte = PTESLEW(zero_pte, id); - caddr_t zerova = VASLEW(zerop, id); + caddr_t zerova = VASLEW(pmap_zerop, id); #ifdef DIAGNOSTIC if (*zpte) @@ -1608,7 +1701,7 @@ pmap_zero_page_uncached(paddr_t pa) *zpte = (pa & PG_FRAME) | PG_V | PG_RW | PG_N; /* map in */ pmap_update_pg((vaddr_t)zerova); /* flush TLB */ - pagezero(zerova, PAGE_SIZE); /* zero */ + pagezero(zerova, PAGE_SIZE); /* zero */ *zpte = 0; return (TRUE); @@ -1639,10 +1732,19 @@ pmap_flush_page(paddr_t pa) #ifdef MULTIPROCESSOR int id = cpu_number(); #endif - pt_entry_t *pte = PTESLEW(flsh_pte, id); - caddr_t va = VASLEW(flshp, id); + pt_entry_t *pte; + caddr_t va; KDASSERT(PHYS_TO_VM_PAGE(pa) != NULL); + + if (cpu_pae) { + pmap_flush_page_pae(pa); + return; + } + + pte = PTESLEW(flsh_pte, id); + va = VASLEW(pmap_flshp, id); + #ifdef DIAGNOSTIC if (*pte) panic("pmap_flush_page: lock botch"); @@ -1660,7 +1762,7 @@ pmap_flush_page(paddr_t pa) */ void -pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) +pmap_copy_page_86(struct vm_page *srcpg, struct vm_page *dstpg) { paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); @@ -1669,8 +1771,8 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) #endif pt_entry_t *spte = PTESLEW(csrc_pte, id); pt_entry_t *dpte = PTESLEW(cdst_pte, id); - caddr_t csrcva = VASLEW(csrcp, id); - caddr_t cdstva = VASLEW(cdstp, id); + caddr_t csrcva = VASLEW(pmap_csrcp, id); + caddr_t cdstva = VASLEW(pmap_cdstp, id); #ifdef DIAGNOSTIC if (*spte || *dpte) @@ -1701,7 +1803,7 @@ pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg) */ void -pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, +pmap_remove_ptes_86(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, vaddr_t startva, vaddr_t endva, int flags) { struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ @@ -1762,7 +1864,7 @@ pmap_remove_ptes(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, #endif /* sync R/M bits */ - pmap_sync_flags_pte(pg, opte); + pmap_sync_flags_pte_86(pg, opte); pve = pmap_remove_pv(pg, pmap, startva); if (pve) { pve->pv_next = pv_tofree; @@ -1788,7 +1890,7 @@ pmap_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva) } void -pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) +pmap_do_remove_86(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) { pt_entry_t *ptes; paddr_t ptppa; @@ -1800,7 +1902,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) TAILQ_INIT(&empty_ptps); - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* * Decide if we want to shoot the whole tlb or just the range. @@ -1836,12 +1938,12 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) /* XXXCDC: ugly hack to avoid freeing PDP here */ continue; - if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)])) + if (!pmap_valid_entry(PDE(pmap, pdei(va)))) /* valid block? */ continue; /* PA of the PTP */ - ptppa = (pmap->pm_pdir[pdei(va)] & PG_FRAME); + ptppa = PDE(pmap, pdei(va)) & PG_FRAME; /* get PTP if non-kernel mapping */ if (pmap == pmap_kernel()) { @@ -1860,7 +1962,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) #endif } } - pmap_remove_ptes(pmap, ptp, (vaddr_t)&ptes[atop(va)], + pmap_remove_ptes_86(pmap, ptp, (vaddr_t)&ptes[atop(va)], va, blkendva, flags); /* If PTP is no longer being used, free it. */ @@ -1877,7 +1979,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) pmap_tlb_shoottlb(); pmap_tlb_shootwait(); - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { TAILQ_REMOVE(&empty_ptps, ptp, pageq); uvm_pagefree(ptp); @@ -1891,7 +1993,7 @@ pmap_do_remove(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) */ void -pmap_page_remove(struct vm_page *pg) +pmap_page_remove_86(struct vm_page *pg) { struct pv_entry *pve; pt_entry_t *ptes, opte; @@ -1904,23 +2006,21 @@ pmap_page_remove(struct vm_page *pg) TAILQ_INIT(&empty_ptps); for (pve = pg->mdpage.pv_list ; pve != NULL ; pve = pve->pv_next) { - ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ - + ptes = pmap_map_ptes_86(pve->pv_pmap); /* locks pmap */ #ifdef DIAGNOSTIC - if (pve->pv_ptp && (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & + if (pve->pv_ptp && (PDE(pve->pv_pmap, pdei(pve->pv_va)) & PG_FRAME) != VM_PAGE_TO_PHYS(pve->pv_ptp)) { printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", - pg, pve->pv_va, pve->pv_ptp); + pg, pve->pv_va, pve->pv_ptp); printf("pmap_page_remove: PTP's phys addr: " - "actual=%x, recorded=%lx\n", - (pve->pv_pmap->pm_pdir[pdei(pve->pv_va)] & + "actual=%x, recorded=%lx\n", + (PDE(pve->pv_pmap, pdei(pve->pv_va)) & PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp)); panic("pmap_page_remove: mapped managed page has " - "invalid pv_ptp field"); - } + "invalid pv_ptp field"); +} #endif - opte = i386_atomic_testset_ul(&ptes[atop(pve->pv_va)], 0); if (opte & PG_W) @@ -1928,7 +2028,7 @@ pmap_page_remove(struct vm_page *pg) pve->pv_pmap->pm_stats.resident_count--; /* sync R/M bits */ - pmap_sync_flags_pte(pg, opte); + pmap_sync_flags_pte_86(pg, opte); /* update the PTP reference count. free if last reference. */ if (pve->pv_ptp && --pve->pv_ptp->wire_count <= 1) { @@ -1939,7 +2039,7 @@ pmap_page_remove(struct vm_page *pg) pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); - pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pve->pv_pmap); /* unlocks pmap */ } pmap_free_pvs(NULL, pg->mdpage.pv_list); pg->mdpage.pv_list = NULL; @@ -1963,7 +2063,7 @@ pmap_page_remove(struct vm_page *pg) */ boolean_t -pmap_test_attrs(struct vm_page *pg, int testbits) +pmap_test_attrs_86(struct vm_page *pg, int testbits) { struct pv_entry *pve; pt_entry_t *ptes, pte; @@ -1977,9 +2077,9 @@ pmap_test_attrs(struct vm_page *pg, int testbits) mybits = 0; for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0; pve = pve->pv_next) { - ptes = pmap_map_ptes(pve->pv_pmap); + ptes = pmap_map_ptes_86(pve->pv_pmap); pte = ptes[atop(pve->pv_va)]; - pmap_unmap_ptes(pve->pv_pmap); + pmap_unmap_ptes_86(pve->pv_pmap); mybits |= (pte & testbits); } @@ -1998,7 +2098,7 @@ pmap_test_attrs(struct vm_page *pg, int testbits) */ boolean_t -pmap_clear_attrs(struct vm_page *pg, int clearbits) +pmap_clear_attrs_86(struct vm_page *pg, int clearbits) { struct pv_entry *pve; pt_entry_t *ptes, opte; @@ -2012,10 +2112,10 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits) atomic_clearbits_int(&pg->pg_flags, clearflags); for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) { - ptes = pmap_map_ptes(pve->pv_pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pve->pv_pmap); /* locks pmap */ #ifdef DIAGNOSTIC - if (!pmap_valid_entry(pve->pv_pmap->pm_pdir[pdei(pve->pv_va)])) - panic("pmap_change_attrs: mapping without PTP " + if (!pmap_valid_entry(PDE(pve->pv_pmap, pdei(pve->pv_va)))) + panic("pmap_clear_attrs_86: mapping without PTP " "detected"); #endif @@ -2026,7 +2126,7 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits) (opte & clearbits)); pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); } - pmap_unmap_ptes(pve->pv_pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pve->pv_pmap); /* unlocks pmap */ } pmap_tlb_shootwait(); @@ -2060,7 +2160,7 @@ pmap_clear_attrs(struct vm_page *pg, int clearbits) */ void -pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, +pmap_write_protect_86(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) { pt_entry_t *ptes, *spte, *epte, npte, opte; @@ -2069,7 +2169,7 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vaddr_t va; int shootall = 0; - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* should be ok, but just in case ... */ sva &= PG_FRAME; @@ -2097,7 +2197,7 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, continue; /* empty block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(va)])) + if (!pmap_valid_entry(PDE(pmap, pdei(va)))) continue; md_prot = protection_codes[prot]; @@ -2132,7 +2232,7 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, pmap_tlb_shootrange(pmap, sva, eva); pmap_tlb_shootwait(); - pmap_unmap_ptes(pmap); /* unlocks pmap */ + pmap_unmap_ptes_86(pmap); /* unlocks pmap */ } /* @@ -2146,32 +2246,34 @@ pmap_write_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, */ void -pmap_unwire(struct pmap *pmap, vaddr_t va) +pmap_unwire_86(struct pmap *pmap, vaddr_t va) { pt_entry_t *ptes; - if (pmap_valid_entry(pmap->pm_pdir[pdei(va)])) { - ptes = pmap_map_ptes(pmap); /* locks pmap */ + if (pmap_valid_entry(PDE(pmap, pdei(va)))) { + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ #ifdef DIAGNOSTIC if (!pmap_valid_entry(ptes[atop(va)])) - panic("pmap_unwire: invalid (unmapped) va 0x%lx", va); + panic("pmap_unwire_86: invalid (unmapped) va " + "0x%lx", va); #endif + if ((ptes[atop(va)] & PG_W) != 0) { i386_atomic_clearbits_l(&ptes[atop(va)], PG_W); pmap->pm_stats.wired_count--; } #ifdef DIAGNOSTIC else { - printf("pmap_unwire: wiring for pmap %p va 0x%lx " + printf("pmap_unwire_86: wiring for pmap %p va 0x%lx " "didn't change!\n", pmap, va); } #endif - pmap_unmap_ptes(pmap); /* unlocks map */ + pmap_unmap_ptes_86(pmap); /* unlocks map */ } #ifdef DIAGNOSTIC else { - panic("pmap_unwire: invalid PDE"); + panic("pmap_unwire_86: invalid PDE"); } #endif } @@ -2213,7 +2315,7 @@ pmap_collect(struct pmap *pmap) */ int -pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, +pmap_enter_86(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) { pt_entry_t *ptes, opte, npte; @@ -2238,7 +2340,7 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, /* sanity check: kernel PTPs should already have been pre-allocated */ if (va >= VM_MIN_KERNEL_ADDRESS && - !pmap_valid_entry(pmap->pm_pdir[pdei(va)])) + !pmap_valid_entry(PDE(pmap, pdei(va)))) panic("pmap_enter: missing kernel PTP!"); #endif if (pmap_initialized) @@ -2251,11 +2353,11 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, * map in ptes and get a pointer to our PTP (unless we are the kernel) */ - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ if (pmap == pmap_kernel()) { ptp = NULL; } else { - ptp = pmap_get_ptp(pmap, pdei(va), FALSE); + ptp = pmap_get_ptp_86(pmap, pdei(va)); if (ptp == NULL) { if (flags & PMAP_CANFAIL) { error = ENOMEM; @@ -2298,12 +2400,13 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, pg = PHYS_TO_VM_PAGE(pa); #ifdef DIAGNOSTIC if (pg == NULL) - panic("pmap_enter: same pa PG_PVLIST " - "mapping with unmanaged page " - "pa = 0x%lx (0x%lx)", pa, - atop(pa)); + panic("pmap_enter_86: same pa " + "PG_PVLIST mapping with " + "unmanaged page " + "pa = 0x%lx (0x%lx)", pa, + atop(pa)); #endif - pmap_sync_flags_pte(pg, opte); + pmap_sync_flags_pte_86(pg, opte); } goto enter_now; } @@ -2321,11 +2424,11 @@ pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); #ifdef DIAGNOSTIC if (pg == NULL) - panic("pmap_enter: PG_PVLIST mapping with " + panic("pmap_enter_86: PG_PVLIST mapping with " "unmanaged page " "pa = 0x%lx (0x%lx)", pa, atop(pa)); #endif - pmap_sync_flags_pte(pg, opte); + pmap_sync_flags_pte_86(pg, opte); pve = pmap_remove_pv(pg, pmap, va); pg = NULL; /* This is not page we are looking for */ } @@ -2394,7 +2497,7 @@ enter_now: KASSERT(nocache == 0); wc = TRUE; } - pmap_sync_flags_pte(pg, npte); + pmap_sync_flags_pte_86(pg, npte); } if (wc) npte |= pmap_pg_wc; @@ -2415,7 +2518,7 @@ enter_now: error = 0; out: - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); if (freepve) pmap_free_pv(pmap, freepve); @@ -2430,7 +2533,7 @@ out: */ vaddr_t -pmap_growkernel(vaddr_t maxkvaddr) +pmap_growkernel_86(vaddr_t maxkvaddr) { struct pmap *kpm = pmap_kernel(), *pm; int needed_kpde; /* needed number of kernel PTPs */ @@ -2460,9 +2563,9 @@ pmap_growkernel(vaddr_t maxkvaddr) if (uvm_page_physget(&ptaddr) == FALSE) panic("pmap_growkernel: out of memory"); - pmap_zero_phys(ptaddr); + pmap_zero_phys_86(ptaddr); - kpm->pm_pdir[PDSLOT_KERN + nkpde] = + PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V | PG_U | PG_M; /* count PTP as resident */ @@ -2476,13 +2579,13 @@ pmap_growkernel(vaddr_t maxkvaddr) * INVOKED WHILE pmap_init() IS RUNNING! */ - while (!pmap_alloc_ptp(kpm, PDSLOT_KERN + nkpde, FALSE, 0)) + while (!pmap_alloc_ptp_86(kpm, PDSLOT_KERN + nkpde, 0)) uvm_wait("pmap_growkernel"); /* distribute new kernel PTP to all active pmaps */ LIST_FOREACH(pm, &pmaps, pm_list) { - pm->pm_pdir[PDSLOT_KERN + nkpde] = - kpm->pm_pdir[PDSLOT_KERN + nkpde]; + PDE(pm, PDSLOT_KERN + nkpde) = + PDE(kpm, PDSLOT_KERN + nkpde); } } @@ -2502,7 +2605,7 @@ void pmap_dump(struct pmap *, vaddr_t, vaddr_t); */ void -pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +pmap_dump_86(struct pmap *pmap, vaddr_t sva, vaddr_t eva) { pt_entry_t *ptes, *pte; vaddr_t blkendva; @@ -2515,7 +2618,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) if (eva > VM_MAXUSER_ADDRESS || eva <= sva) eva = VM_MAXUSER_ADDRESS; - ptes = pmap_map_ptes(pmap); /* locks pmap */ + ptes = pmap_map_ptes_86(pmap); /* locks pmap */ /* * dumping a range of pages: we dump in PTP sized blocks (4MB) @@ -2529,7 +2632,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) blkendva = eva; /* valid block? */ - if (!pmap_valid_entry(pmap->pm_pdir[pdei(sva)])) + if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) continue; pte = &ptes[atop(sva)]; @@ -2540,7 +2643,7 @@ pmap_dump(struct pmap *pmap, vaddr_t sva, vaddr_t eva) sva, *pte, *pte & PG_FRAME); } } - pmap_unmap_ptes(pmap); + pmap_unmap_ptes_86(pmap); } #endif @@ -2755,3 +2858,31 @@ pmap_tlb_shoottlb(void) tlbflush(); } #endif /* MULTIPROCESSOR */ + +u_int32_t (*pmap_pte_set_p)(vaddr_t, paddr_t, u_int32_t) = + pmap_pte_set_86; +u_int32_t (*pmap_pte_setbits_p)(vaddr_t, u_int32_t, u_int32_t) = + pmap_pte_setbits_86; +u_int32_t (*pmap_pte_bits_p)(vaddr_t) = pmap_pte_bits_86; +paddr_t (*pmap_pte_paddr_p)(vaddr_t) = pmap_pte_paddr_86; +boolean_t (*pmap_clear_attrs_p)(struct vm_page *, int) = + pmap_clear_attrs_86; +int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int) = + pmap_enter_86; +boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *) = + pmap_extract_86; +vaddr_t (*pmap_growkernel_p)(vaddr_t) = pmap_growkernel_86; +void (*pmap_page_remove_p)(struct vm_page *) = pmap_page_remove_86; +void (*pmap_do_remove_p)(struct pmap *, vaddr_t, vaddr_t, int) = + pmap_do_remove_86; +boolean_t (*pmap_test_attrs_p)(struct vm_page *, int) = + pmap_test_attrs_86; +void (*pmap_unwire_p)(struct pmap *, vaddr_t) = pmap_unwire_86; +void (*pmap_write_protect_p)(struct pmap *, vaddr_t, vaddr_t, + vm_prot_t) = pmap_write_protect_86; +void (*pmap_pinit_pd_p)(pmap_t) = pmap_pinit_pd_86; +void (*pmap_zero_phys_p)(paddr_t) = pmap_zero_phys_86; +boolean_t (*pmap_zero_page_uncached_p)(paddr_t) = + pmap_zero_page_uncached_86; +void (*pmap_copy_page_p)(struct vm_page *, struct vm_page *) = + pmap_copy_page_86; diff --git a/sys/arch/i386/i386/pmapae.c b/sys/arch/i386/i386/pmapae.c index f0636f758c4..d00328f1503 100644 --- a/sys/arch/i386/i386/pmapae.c +++ b/sys/arch/i386/i386/pmapae.c @@ -1,7 +1,7 @@ -/* $OpenBSD: pmapae.c,v 1.27 2015/02/02 09:29:53 mlarkin Exp $ */ +/* $OpenBSD: pmapae.c,v 1.28 2015/04/12 18:37:53 mlarkin Exp $ */ /* - * Copyright (c) 2006 Michael Shalayeff + * Copyright (c) 2006-2008 Michael Shalayeff * All rights reserved. * * Permission to use, copy, modify, and distribute this software for any @@ -398,16 +398,6 @@ * in the alternate PTE space (since that is determined by the * entry in the PDP). * - * - pvh_lock (per pv_head) - * this lock protects the pv_entry list which is chained off the - * pv_head structure for a specific managed PA. it is locked - * when traversing the list (e.g. adding/removing mappings, - * syncing R/M bits, etc.) - * - * - pvalloc_lock - * this lock protects the data structures which are used to manage - * the free list of pv_entry structures. - * * - pmaps_lock * this lock protects the list of active pmaps (headed by "pmaps"). * we lock it when adding or removing pmaps from this list. @@ -421,15 +411,10 @@ * Redefine the PDSHIFT, NBPD */ #undef PDSHIFT -#define PD_MASK 0xffe00000 /* page directory address bits */ #define PDSHIFT 21 /* page directory address shift */ -#define PT_MASK 0x001ff000 /* page table address bits */ #undef NBPD #define NBPD (1U << PDSHIFT) /* # bytes mapped by PD (2MB) */ -/* - * - */ #undef PDSLOT_PTE #define PDSLOT_PTE (1660U) /* 1660: for recursive PDP map */ #undef PDSLOT_KERN @@ -451,19 +436,11 @@ #define PDP_PDE (PDP_BASE + PDSLOT_PTE) #define APDP_PDE (PDP_BASE + PDSLOT_APTE) -#define PTES_PER_PTP (NBPG / sizeof(pt_entry_t)) /* # of PTEs in a PTP */ - -/* - * various address macros - * - * vtopte: return a pointer to the PTE mapping a VA - * - */ -#define vtopte(VA) (PTE_BASE + atop((vaddr_t)VA)) - /* * pdei/ptei: generate index into PDP/PTP from a VA */ +#define PD_MASK 0xffe00000 /* page directory address bits */ +#define PT_MASK 0x001ff000 /* page table address bits */ #define pdei(VA) (((VA) & PD_MASK) >> PDSHIFT) #define ptei(VA) (((VA) & PT_MASK) >> PGSHIFT) @@ -472,6 +449,15 @@ */ #define i386_round_pdr(x) ((((unsigned)(x)) + ~PD_MASK) & PD_MASK) +/* + * various address macros + * + * vtopte: return a pointer to the PTE mapping a VA + * + */ +#define vtopte(VA) (PTE_BASE + atop((vaddr_t)VA)) + + /* * PTP macros: * A PTP's index is the PD index of the PDE that points to it. @@ -529,8 +515,8 @@ extern boolean_t pmap_initialized; /* pmap_init done yet? */ * special VAs and the PTEs that map them */ -static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte; -extern caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp; +static pt_entry_t *csrc_pte, *cdst_pte, *zero_pte, *ptp_pte, *flsh_pte; +extern caddr_t pmap_csrcp, pmap_cdstp, pmap_zerop, pmap_ptpp, pmap_flshp; extern int pmap_pg_g; extern struct pmap_head pmaps; @@ -538,64 +524,39 @@ extern struct pmap_head pmaps; /* * local prototypes */ - -struct vm_page *pmap_alloc_ptp_pae(struct pmap *, int, boolean_t); -#define ALLOCPV_NEED 0 /* need PV now */ -#define ALLOCPV_TRY 1 /* just try to allocate, don't steal */ -#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ -struct vm_page *pmap_get_ptp_pae(struct pmap *, int, boolean_t); +struct pv_entry *pmap_add_pvpage(struct pv_page *, boolean_t); +struct pv_entry *pmap_alloc_pv(struct pmap *, int); /* see codes in pmap.h */ +struct pv_entry *pmap_alloc_pvpage(struct pmap *, int); +void pmap_enter_pv(struct vm_page *, struct pv_entry *, + struct pmap *, vaddr_t, struct vm_page *); +void pmap_free_pv(struct pmap *, struct pv_entry *); +void pmap_free_pvs(struct pmap *, struct pv_entry *); +void pmap_free_pv_doit(struct pv_entry *); +void pmap_free_pvpage(void); +struct vm_page *pmap_alloc_ptp_pae(struct pmap *, int, pt_entry_t); +struct vm_page *pmap_get_ptp_pae(struct pmap *, int); pt_entry_t *pmap_map_ptes_pae(struct pmap *); +void pmap_do_remove_pae(struct pmap *, vaddr_t, vaddr_t, int); void pmap_remove_ptes_pae(struct pmap *, struct vm_page *, - vaddr_t, vaddr_t, vaddr_t, int32_t *); + vaddr_t, vaddr_t, vaddr_t, int); boolean_t pmap_remove_pte_pae(struct pmap *, struct vm_page *, - pt_entry_t *, vaddr_t, int32_t *); + pt_entry_t *, vaddr_t, int); +void pmap_sync_flags_pte_pae(struct vm_page *, pt_entry_t); void pmap_unmap_ptes_pae(struct pmap *); -vaddr_t pmap_tmpmap_pa_pae(paddr_t); -void pmap_tmpunmap_pa_pae(void); -/* - * pmap_tmpmap_pa: map a page in for tmp usage - */ - -vaddr_t -pmap_tmpmap_pa_pae(paddr_t pa) +static __inline u_int +pmap_pte2flags(pt_entry_t pte) { -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(pmap_ptpp, id); -#if defined(DIAGNOSTIC) - if (*ptpte) - panic("pmap_tmpmap_pa: ptp_pte in use?"); -#endif - *ptpte = PG_V | PG_RW | pa; /* always a new mapping */ - return((vaddr_t)ptpva); + return (((pte & PG_U) ? PG_PMAP_REF : 0) | + ((pte & PG_M) ? PG_PMAP_MOD : 0)); } -/* - * pmap_tmpunmap_pa: unmap a tmp use page (undoes pmap_tmpmap_pa) - */ - void -pmap_tmpunmap_pa_pae() +pmap_sync_flags_pte_pae(struct vm_page *pg, pt_entry_t pte) { -#ifdef MULTIPROCESSOR - int id = cpu_number(); -#endif - pt_entry_t *ptpte = PTESLEW(ptp_pte, id); - caddr_t ptpva = VASLEW(pmap_ptpp, id); -#if defined(DIAGNOSTIC) - if (!pmap_valid_entry(*ptpte)) - panic("pmap_tmpunmap_pa: our pte invalid?"); -#endif - *ptpte = 0; /* zap! */ - pmap_update_pg((vaddr_t)ptpva); -#ifdef MULTIPROCESSOR - /* - * No need for tlb shootdown here, since ptp_pte is per-CPU. - */ -#endif + if (pte & (PG_U|PG_M)) { + atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(pte)); + } } /* @@ -622,13 +583,17 @@ pmap_map_ptes_pae(struct pmap *pmap) /* need to load a new alternate pt space into curpmap? */ opde = *APDP_PDE; +#if defined(MULTIPROCESSOR) && defined(DIAGNOSTIC) + if (pmap_valid_entry(opde)) + panic("pmap_map_ptes: APTE valid"); +#endif if (!pmap_valid_entry(opde) || (opde & PG_FRAME) != pmap->pm_pdidx[0]) { - APDP_PDE[0] = pmap->pm_pdidx[0] | PG_RW | PG_V; - APDP_PDE[1] = pmap->pm_pdidx[1] | PG_RW | PG_V; - APDP_PDE[2] = pmap->pm_pdidx[2] | PG_RW | PG_V; - APDP_PDE[3] = pmap->pm_pdidx[3] | PG_RW | PG_V; + APDP_PDE[0] = pmap->pm_pdidx[0] | PG_RW | PG_V | PG_U | PG_M; + APDP_PDE[1] = pmap->pm_pdidx[1] | PG_RW | PG_V | PG_U | PG_M; + APDP_PDE[2] = pmap->pm_pdidx[2] | PG_RW | PG_V | PG_U | PG_M; + APDP_PDE[3] = pmap->pm_pdidx[3] | PG_RW | PG_V | PG_U | PG_M; if (pmap_valid_entry(opde)) - pmap_apte_flush(curpcb->pcb_pmap); + pmap_apte_flush(); } return(APTE_BASE); } @@ -645,11 +610,14 @@ pmap_unmap_ptes_pae(struct pmap *pmap) if (!pmap_is_curpmap(pmap)) { #if defined(MULTIPROCESSOR) + int ef = read_eflags(); + disable_intr(); APDP_PDE[0] = 0; APDP_PDE[1] = 0; APDP_PDE[2] = 0; APDP_PDE[3] = 0; - pmap_apte_flush(curpcb->pcb_pmap); + pmap_apte_flush(); + write_eflags(ef); #endif } } @@ -659,6 +627,8 @@ pmap_pte_set_pae(vaddr_t va, paddr_t pa, u_int32_t bits) { pt_entry_t pte, *ptep = vtopte(va); + pa &= PMAP_PA_MASK; + pte = i386_atomic_testset_uq(ptep, pa | bits); return (pte & ~PG_FRAME); } @@ -696,7 +666,6 @@ pmap_pte_paddr_pae(vaddr_t va) void pmap_bootstrap_pae() { - extern paddr_t avail_end, avail_end2; extern int cpu_pae, nkpde; struct pmap *kpm = pmap_kernel(); struct vm_page *ptp; @@ -705,21 +674,22 @@ pmap_bootstrap_pae() vaddr_t va, eva; int i, pn, pe; - if (!cpu_pae || avail_end >= avail_end2 || !(cpu_feature & CPUID_PAE)){ - avail_end2 = avail_end; + if (!(cpu_feature & CPUID_PAE)){ return; } + cpu_pae = 1; + va = (vaddr_t)kpm->pm_pdir; kpm->pm_pdidx[0] = (va + 0*NBPG - KERNBASE) | PG_V; kpm->pm_pdidx[1] = (va + 1*NBPG - KERNBASE) | PG_V; kpm->pm_pdidx[2] = (va + 2*NBPG - KERNBASE) | PG_V; kpm->pm_pdidx[3] = (va + 3*NBPG - KERNBASE) | PG_V; /* map pde recursively into itself */ - PDE(kpm, PDSLOT_PTE+0) = kpm->pm_pdidx[0] | PG_KW; - PDE(kpm, PDSLOT_PTE+1) = kpm->pm_pdidx[1] | PG_KW; - PDE(kpm, PDSLOT_PTE+2) = kpm->pm_pdidx[2] | PG_KW; - PDE(kpm, PDSLOT_PTE+3) = kpm->pm_pdidx[3] | PG_KW; + PDE(kpm, PDSLOT_PTE+0) = kpm->pm_pdidx[0] | PG_KW | PG_M | PG_U; + PDE(kpm, PDSLOT_PTE+1) = kpm->pm_pdidx[1] | PG_KW | PG_M | PG_U; + PDE(kpm, PDSLOT_PTE+2) = kpm->pm_pdidx[2] | PG_KW | PG_M | PG_U; + PDE(kpm, PDSLOT_PTE+3) = kpm->pm_pdidx[3] | PG_KW | PG_M | PG_U; /* transfer all kernel mappings over into pae tables */ for (va = KERNBASE, eva = va + (nkpde << 22); @@ -728,9 +698,10 @@ pmap_bootstrap_pae() ptp = uvm_pagealloc(&kpm->pm_obj, va, NULL, UVM_PGA_ZERO); ptaddr = VM_PAGE_TO_PHYS(ptp); - PDE(kpm, pdei(va)) = ptaddr | PG_KW | PG_V; + PDE(kpm, pdei(va)) = ptaddr | PG_KW | PG_V | + PG_U | PG_M; pmap_pte_set_86((vaddr_t)vtopte(va), - ptaddr, PG_KW | PG_V); + ptaddr, PG_KW | PG_V | PG_U | PG_M); /* count PTP as resident */ kpm->pm_stats.resident_count++; @@ -750,23 +721,22 @@ pmap_bootstrap_pae() csrc_pte = vtopte(pmap_csrcp); cdst_pte = vtopte(pmap_cdstp); zero_pte = vtopte(pmap_zerop); - ptp_pte = vtopte(pmap_ptpp); + ptp_pte = vtopte(pmap_ptpp); + flsh_pte = vtopte(pmap_flshp); nkpde *= 2; nkptp_max = 2048 - PDSLOT_KERN - 4; - vm_max_address = (PDSLOT_PTE << PDSHIFT) + - (PDSLOT_PTE << PGSHIFT); pmap_pte_set_p = pmap_pte_set_pae; pmap_pte_setbits_p = pmap_pte_setbits_pae; pmap_pte_bits_p = pmap_pte_bits_pae; pmap_pte_paddr_p = pmap_pte_paddr_pae; - pmap_change_attrs_p = pmap_change_attrs_pae; + pmap_clear_attrs_p = pmap_clear_attrs_pae; pmap_enter_p = pmap_enter_pae; pmap_extract_p = pmap_extract_pae; pmap_growkernel_p = pmap_growkernel_pae; pmap_page_remove_p = pmap_page_remove_pae; - pmap_remove_p = pmap_remove_pae; + pmap_do_remove_p = pmap_do_remove_pae; pmap_test_attrs_p = pmap_test_attrs_pae; pmap_unwire_p = pmap_unwire_pae; pmap_write_protect_p = pmap_write_protect_pae; @@ -774,7 +744,6 @@ pmap_bootstrap_pae() pmap_zero_phys_p = pmap_zero_phys_pae; pmap_zero_page_uncached_p = pmap_zero_page_uncached_pae; pmap_copy_page_p = pmap_copy_page_pae; - pmap_try_steal_pv_p = pmap_try_steal_pv_pae; bzero((void *)kpm->pm_pdir + 8, (PDSLOT_PTE-1) * 8); /* TODO also reclaim old PDPs */ @@ -790,90 +759,7 @@ pmap_bootstrap_pae() } } - uvm_page_rehash(); - } -} - -/* - * p v _ e n t r y f u n c t i o n s - */ - -/* - * pv_entry allocation functions: - * the main pv_entry allocation functions are: - * pmap_alloc_pv: allocate a pv_entry structure - * pmap_free_pv: free one pv_entry - * pmap_free_pvs: free a list of pv_entrys - * - * the rest are helper functions - */ - -/* - * pmap_try_steal_pv: try and steal a pv_entry from a pmap - * - * => return true if we did it! - */ - -boolean_t -pmap_try_steal_pv_pae(struct pv_head *pvh, struct pv_entry *cpv, - struct pv_entry *prevpv) -{ - pt_entry_t *ptep, opte; -#ifdef MULTIPROCESSOR - int32_t cpumask = 0; -#endif - - /* - * we never steal kernel mappings or mappings from pmaps we can't lock - */ - - if (cpv->pv_pmap == pmap_kernel()) - return(FALSE); - - /* - * yes, we can try and steal it. first we need to remove the - * mapping from the pmap. - */ - - ptep = pmap_tmpmap_pvepte_pae(cpv); - if (*ptep & PG_W) { - ptep = NULL; /* wired page, avoid stealing this one */ - } else { - opte = i386_atomic_testset_uq(ptep, 0); /* zap! */ -#ifdef MULTIPROCESSOR - pmap_tlb_shootdown(cpv->pv_pmap, cpv->pv_va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - if (pmap_is_curpmap(cpv->pv_pmap)) - pmap_update_pg(cpv->pv_va); -#endif - pmap_tmpunmap_pvepte_pae(cpv); } - if (ptep == NULL) { - return(FALSE); /* wired page, abort! */ - } - cpv->pv_pmap->pm_stats.resident_count--; - if (cpv->pv_ptp && cpv->pv_ptp->wire_count) - /* drop PTP's wired count */ - cpv->pv_ptp->wire_count--; - - /* - * XXX: if wire_count goes to one the PTP could be freed, however, - * we'd have to lock the page queues (etc.) to do that and it could - * cause deadlock headaches. besides, the pmap we just stole from - * may want the mapping back anyway, so leave the PTP around. - */ - - /* - * now we need to remove the entry from the pvlist - */ - - if (cpv == pvh->pvh_list) - pvh->pvh_list = cpv->pv_next; - else - prevpv->pv_next = cpv->pv_next; - return(TRUE); } /* @@ -895,7 +781,7 @@ pmap_try_steal_pv_pae(struct pv_head *pvh, struct pv_entry *cpv, */ struct vm_page * -pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) +pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, pt_entry_t pde_flags) { struct vm_page *ptp; @@ -907,8 +793,8 @@ pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) /* got one! */ atomic_clearbits_int(&ptp->pg_flags, PG_BUSY); ptp->wire_count = 1; /* no mappings yet */ - PDE(pmap, pde_index) = - (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | PG_u | PG_RW | PG_V); + PDE(pmap, pde_index) = (pd_entry_t)(VM_PAGE_TO_PHYS(ptp) | + PG_RW | PG_V | PG_M | PG_U | pde_flags); pmap->pm_stats.resident_count++; /* count PTP as resident */ pmap->pm_ptphint = ptp; return(ptp); @@ -922,12 +808,11 @@ pmap_alloc_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) */ struct vm_page * -pmap_get_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) +pmap_get_ptp_pae(struct pmap *pmap, int pde_index) { struct vm_page *ptp; if (pmap_valid_entry(PDE(pmap, pde_index))) { - /* valid... check hint (saves us a PA->PG lookup) */ if (pmap->pm_ptphint && (PDE(pmap, pde_index) & PG_FRAME) == @@ -944,7 +829,7 @@ pmap_get_ptp_pae(struct pmap *pmap, int pde_index, boolean_t just_try) } /* allocate a new PTP (updates ptphint) */ - return (pmap_alloc_ptp_pae(pmap, pde_index, just_try)); + return (pmap_alloc_ptp_pae(pmap, pde_index, PG_u)); } /* @@ -955,19 +840,23 @@ pmap_pinit_pd_pae(struct pmap *pmap) { extern int nkpde; vaddr_t va; + paddr_t pdidx[4]; /* allocate PDP */ pmap->pm_pdir = uvm_km_alloc(kernel_map, 4 * NBPG); - if (pmap->pm_pdir == NULL) + if (pmap->pm_pdir == 0) panic("pmap_pinit_pd_pae: kernel_map out of virtual space!"); /* page index is in the pmap! */ pmap_extract(pmap_kernel(), (vaddr_t)pmap, &pmap->pm_pdirpa); - /* fill out the PDPT entries */ va = (vaddr_t)pmap->pm_pdir; - pmap_extract(pmap_kernel(), va + 0*NBPG, &pmap->pm_pdidx[0]); - pmap_extract(pmap_kernel(), va + 1*NBPG, &pmap->pm_pdidx[1]); - pmap_extract(pmap_kernel(), va + 2*NBPG, &pmap->pm_pdidx[2]); - pmap_extract(pmap_kernel(), va + 3*NBPG, &pmap->pm_pdidx[3]); + pmap_extract(pmap_kernel(), va + 0*NBPG, &pdidx[0]); + pmap_extract(pmap_kernel(), va + 1*NBPG, &pdidx[1]); + pmap_extract(pmap_kernel(), va + 2*NBPG, &pdidx[2]); + pmap_extract(pmap_kernel(), va + 3*NBPG, &pdidx[3]); + pmap->pm_pdidx[0] = (uint64_t)pdidx[0]; + pmap->pm_pdidx[1] = (uint64_t)pdidx[1]; + pmap->pm_pdidx[2] = (uint64_t)pdidx[2]; + pmap->pm_pdidx[3] = (uint64_t)pdidx[3]; pmap->pm_pdidx[0] |= PG_V; pmap->pm_pdidx[1] |= PG_V; pmap->pm_pdidx[2] |= PG_V; @@ -978,10 +867,10 @@ pmap_pinit_pd_pae(struct pmap *pmap) /* zero init area */ bzero((void *)pmap->pm_pdir, PDSLOT_PTE * sizeof(pd_entry_t)); /* put in recursive PDE to map the PTEs */ - PDE(pmap, PDSLOT_PTE+0) = pmap->pm_pdidx[0] | PG_KW; - PDE(pmap, PDSLOT_PTE+1) = pmap->pm_pdidx[1] | PG_KW; - PDE(pmap, PDSLOT_PTE+2) = pmap->pm_pdidx[2] | PG_KW; - PDE(pmap, PDSLOT_PTE+3) = pmap->pm_pdidx[3] | PG_KW; + PDE(pmap, PDSLOT_PTE+0) = pmap->pm_pdidx[0] | PG_KW | PG_U | PG_M; + PDE(pmap, PDSLOT_PTE+1) = pmap->pm_pdidx[1] | PG_KW | PG_U | PG_M; + PDE(pmap, PDSLOT_PTE+2) = pmap->pm_pdidx[2] | PG_KW | PG_U | PG_M; + PDE(pmap, PDSLOT_PTE+3) = pmap->pm_pdidx[3] | PG_KW | PG_U | PG_M; /* * we need to lock pmaps_lock to prevent nkpde from changing on @@ -1009,15 +898,16 @@ pmap_pinit_pd_pae(struct pmap *pmap) boolean_t pmap_extract_pae(struct pmap *pmap, vaddr_t va, paddr_t *pap) { - paddr_t retval; - pt_entry_t *ptes; + pt_entry_t *ptes, pte; - if (PDE(pmap, pdei(va))) { + if (pmap_valid_entry(PDE(pmap, pdei(va)))) { ptes = pmap_map_ptes_pae(pmap); - retval = (paddr_t)(ptes[atop(va)] & PG_FRAME); + pte = ptes[atop(va)]; pmap_unmap_ptes_pae(pmap); + if (!pmap_valid_entry(pte)) + return (FALSE); if (pap != NULL) - *pap = retval | (va & ~PG_FRAME); + *pap = (pte & PG_FRAME) | (va & ~PG_FRAME); return (TRUE); } return (FALSE); @@ -1042,6 +932,7 @@ pmap_zero_phys_pae(paddr_t pa) if (*zpte) panic("pmap_zero_phys: lock botch"); #endif + *zpte = (pa & PG_FRAME) | PG_V | PG_RW; /* map in */ pmap_update_pg((vaddr_t)zerova); /* flush TLB */ pagezero(zerova, PAGE_SIZE); /* zero */ @@ -1066,7 +957,7 @@ pmap_zero_page_uncached_pae(paddr_t pa) panic("pmap_zero_page_uncached: lock botch"); #endif - *zpte = (pa & PG_FRAME) | PG_V | PG_RW | PG_N); /* map in */ + *zpte = (pa & PG_FRAME) | PG_V | PG_RW | PG_N; /* map in */ pmap_update_pg((vaddr_t)zerova); /* flush TLB */ pagezero(zerova, PAGE_SIZE); /* zero */ *zpte = 0; /* zap! */ @@ -1079,15 +970,13 @@ pmap_zero_page_uncached_pae(paddr_t pa) */ void -pmap_copy_page_pae(struct vm_page *srcpg, struct vm_page *dstpg) +pae_copy_phys(paddr_t srcpa, paddr_t dstpa, int off, int l) { - paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); - paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); #ifdef MULTIPROCESSOR int id = cpu_number(); #endif - pt_entry_t *spte = PTESLEW(csrc_pte,id); - pt_entry_t *dpte = PTESLEW(cdst_pte,id); + pt_entry_t *spte = PTESLEW(csrc_pte, id); + pt_entry_t *dpte = PTESLEW(cdst_pte, id); caddr_t csrcva = VASLEW(pmap_csrcp, id); caddr_t cdstva = VASLEW(pmap_cdstp, id); @@ -1099,12 +988,22 @@ pmap_copy_page_pae(struct vm_page *srcpg, struct vm_page *dstpg) *spte = (srcpa & PG_FRAME) | PG_V | PG_RW; *dpte = (dstpa & PG_FRAME) | PG_V | PG_RW; pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); - bcopy(csrcva, cdstva, PAGE_SIZE); + if (l > PAGE_SIZE - off) + l = PAGE_SIZE - off; + bcopy(csrcva + off, cdstva + off, l); *spte = *dpte = 0; /* zap! */ pmap_update_2pg((vaddr_t)csrcva, (vaddr_t)cdstva); -#ifdef MULTIPROCESSOR - /* Using per-cpu VA; no shootdown required here. */ -#endif +} + +void +pmap_copy_page_pae(struct vm_page *srcpg, struct vm_page *dstpg) +{ + paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg); + paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg); + int s = splhigh(); + + pae_copy_phys(srcpa, dstpa, 0, PAGE_SIZE); + splx(s); } /* @@ -1124,13 +1023,13 @@ pmap_copy_page_pae(struct vm_page *srcpg, struct vm_page *dstpg) void pmap_remove_ptes_pae(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, - vaddr_t startva, vaddr_t endva, int32_t *cpumaskp) + vaddr_t startva, vaddr_t endva, int flags) { struct pv_entry *pv_tofree = NULL; /* list of pv_entrys to free */ struct pv_entry *pve; pt_entry_t *pte = (pt_entry_t *) ptpva; + struct vm_page *pg; pt_entry_t opte; - int bank, off; /* * note that ptpva points to the PTE that maps startva. this may @@ -1146,49 +1045,31 @@ pmap_remove_ptes_pae(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, if (!pmap_valid_entry(*pte)) continue; /* VA not mapped */ - opte = i386_atomic_testset_uq(pte, 0); /* zap! */ + if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) + continue; + + /* atomically save the old PTE and zap! it */ + opte = i386_atomic_testset_uq(pte, 0); if (opte & PG_W) pmap->pm_stats.wired_count--; pmap->pm_stats.resident_count--; - if (opte & PG_U) - pmap_tlb_shootdown(pmap, startva, opte, cpumaskp); - - if (ptp) { + if (ptp) ptp->wire_count--; /* dropping a PTE */ - /* Make sure that the PDE is flushed */ - if ((ptp->wire_count <= 1) && !(opte & PG_U)) - pmap_tlb_shootdown(pmap, startva, opte, - cpumaskp); - } /* * if we are not on a pv_head list we are done. */ + pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); if ((opte & PG_PVLIST) == 0) { -#ifdef DIAGNOSTIC - if (vm_physseg_find(atop(opte & PG_FRAME), &off) - != -1) - panic("pmap_remove_ptes: managed page without " - "PG_PVLIST for 0x%lx", startva); -#endif continue; } - bank = vm_physseg_find(atop(opte & PG_FRAME), &off); -#ifdef DIAGNOSTIC - if (bank == -1) - panic("pmap_remove_ptes: unmanaged page marked " - "PG_PVLIST, va = 0x%lx, pa = 0x%lx", - startva, (u_long)(opte & PG_FRAME)); -#endif - /* sync R/M bits */ - vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); - pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, - startva); + pmap_sync_flags_pte_pae(pg, opte); + pve = pmap_remove_pv(pg, pmap, startva); if (pve) { pve->pv_next = pv_tofree; @@ -1214,15 +1095,18 @@ pmap_remove_ptes_pae(struct pmap *pmap, struct vm_page *ptp, vaddr_t ptpva, boolean_t pmap_remove_pte_pae(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, - vaddr_t va, int32_t *cpumaskp) + vaddr_t va, int flags) { - pt_entry_t opte; - int bank, off; struct pv_entry *pve; + struct vm_page *pg; + pt_entry_t opte; if (!pmap_valid_entry(*pte)) return(FALSE); /* VA not mapped */ + if ((flags & PMAP_REMOVE_SKIPWIRED) && (*pte & PG_W)) + return (FALSE); + opte = *pte; /* save the old PTE */ *pte = 0; /* zap! */ @@ -1230,46 +1114,28 @@ pmap_remove_pte_pae(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, if (opte & PG_W) pmap->pm_stats.wired_count--; - pmap->pm_stats.resident_count--; - if (opte & PG_U) - pmap_tlb_shootdown(pmap, va, opte, cpumaskp); + pmap->pm_stats.resident_count--; - if (ptp) { + if (ptp) ptp->wire_count--; /* dropping a PTE */ - /* Make sure that the PDE is flushed */ - if ((ptp->wire_count <= 1) && !(opte & PG_U)) - pmap_tlb_shootdown(pmap, va, opte, cpumaskp); - } + pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); + /* * if we are not on a pv_head list we are done. */ - if ((opte & PG_PVLIST) == 0) { -#ifdef DIAGNOSTIC - if (vm_physseg_find(atop(opte & PG_FRAME), &off) != -1) - panic("pmap_remove_pte: managed page without " - "PG_PVLIST for 0x%lx", va); -#endif + if ((opte & PG_PVLIST) == 0) return(TRUE); - } - - bank = vm_physseg_find(atop(opte & PG_FRAME), &off); -#ifdef DIAGNOSTIC - if (bank == -1) - panic("pmap_remove_pte: unmanaged page marked " - "PG_PVLIST, va = 0x%lx, pa = 0x%lx", va, - (u_long)(opte & PG_FRAME)); -#endif - /* sync R/M bits */ - vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); - pve = pmap_remove_pv(&vm_physmem[bank].pmseg.pvhead[off], pmap, va); + pmap_sync_flags_pte_pae(pg, opte); + pve = pmap_remove_pv(pg, pmap, va); if (pve) pmap_free_pv(pmap, pve); + return(TRUE); } @@ -1280,110 +1146,31 @@ pmap_remove_pte_pae(struct pmap *pmap, struct vm_page *ptp, pt_entry_t *pte, */ void -pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) +pmap_do_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva, int flags) { pt_entry_t *ptes, opte; - boolean_t result; paddr_t ptppa; vaddr_t blkendva; struct vm_page *ptp; - int32_t cpumask = 0; TAILQ_HEAD(, vm_page) empty_ptps; - - /* - * we lock in the pmap => pv_head direction - */ + int shootall = 0; + vaddr_t va; TAILQ_INIT(&empty_ptps); ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ + /* - * removing one page? take shortcut function. + * Decide if we want to shoot the whole tlb or just the range. + * Right now, we simply shoot everything when we remove more + * than 32 pages, but never in the kernel pmap. XXX - tune. */ + if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) + shootall = 1; - if (sva + PAGE_SIZE == eva) { - - if (pmap_valid_entry(PDE(pmap, pdei(sva)))) { - - /* PA of the PTP */ - ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; - - /* get PTP if non-kernel mapping */ - - if (pmap == pmap_kernel()) { - /* we never free kernel PTPs */ - ptp = NULL; - } else { - if (pmap->pm_ptphint && - VM_PAGE_TO_PHYS(pmap->pm_ptphint) == - ptppa) { - ptp = pmap->pm_ptphint; - } else { - ptp = PHYS_TO_VM_PAGE(ptppa); -#ifdef DIAGNOSTIC - if (ptp == NULL) - panic("pmap_remove: unmanaged " - "PTP detected"); -#endif - } - } - - /* do it! */ - result = pmap_remove_pte_pae(pmap, ptp, - &ptes[atop(sva)], sva, &cpumask); - - /* - * if mapping removed and the PTP is no longer - * being used, free it! - */ - - if (result && ptp && ptp->wire_count <= 1) { - opte = i386_atomic_testset_uq(&PDE(pmap, - pdei(sva)), 0); /* zap! */ -#ifdef MULTIPROCESSOR - /* - * XXXthorpej Redundant shootdown can happen - * here if we're using APTE space. - */ -#endif - pmap_tlb_shootdown(curpcb->pcb_pmap, - ((vaddr_t)ptes) + ptp->offset, opte, - &cpumask); -#ifdef MULTIPROCESSOR - /* - * Always shoot down the pmap's self-mapping - * of the PTP. - * XXXthorpej Redundant shootdown can happen - * here if pmap == curpcb->pcb_pmap (not APTE - * space). - */ - pmap_tlb_shootdown(pmap, - ((vaddr_t)PTE_BASE) + ptp->offset, opte, - &cpumask); -#endif - pmap->pm_stats.resident_count--; - if (pmap->pm_ptphint == ptp) - pmap->pm_ptphint = - RB_ROOT(&pmap->pm_obj.memt); - ptp->wire_count = 0; - /* Postpone free to after shootdown. */ - uvm_pagerealloc(ptp, NULL, 0); - TAILQ_INSERT_TAIL(&empty_ptps, ptp, pageq); - } - } - pmap_tlb_shootnow(cpumask); - pmap_unmap_ptes_pae(pmap); /* unlock pmap */ - while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { - TAILQ_REMOVE(&empty_ptps, ptp, pageq); - uvm_pagefree(ptp); - } - return; - } - - for (/* null */ ; sva < eva ; sva = blkendva) { - + for (va = sva ; va < eva ; va = blkendva) { /* determine range of block */ - blkendva = i386_round_pdr(sva+1); + blkendva = i386_round_pdr(va+1); if (blkendva > eva) blkendva = eva; @@ -1401,16 +1188,16 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) * be VM_MAX_ADDRESS. */ - if (pdei(sva) == PDSLOT_PTE) + if (pdei(va) == PDSLOT_PTE) /* XXXCDC: ugly hack to avoid freeing PDP here */ continue; - if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) + if (!pmap_valid_entry(PDE(pmap, pdei(va)))) /* valid block? */ continue; /* PA of the PTP */ - ptppa = PDE(pmap, pdei(sva)) & PG_FRAME; + ptppa = PDE(pmap, pdei(va)) & PG_FRAME; /* get PTP if non-kernel mapping */ if (pmap == pmap_kernel()) { @@ -1423,26 +1210,33 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) } else { ptp = PHYS_TO_VM_PAGE(ptppa); #ifdef DIAGNOSTIC - if (ptp == NULL) + if (ptp == NULL) { + printf("pmap_remove: null PTP for ptppa 0x%lx\n", ptppa); + printf("pmap_remove: va = 0x%lx\n", va); + printf("pmap_remove: pdei(va) = 0x%lx\n", pdei(va)); + printf("pmap_remove: PDE = 0x%llx\n", PDE(pmap, pdei(va))); panic("pmap_remove: unmanaged PTP " "detected"); + } #endif } } - pmap_remove_ptes_pae(pmap, ptp, (vaddr_t)&ptes[atop(sva)], - sva, blkendva, &cpumask); + + pmap_remove_ptes_pae(pmap, ptp, (vaddr_t)&ptes[atop(va)], + va, blkendva, flags); /* if PTP is no longer being used, free it! */ if (ptp && ptp->wire_count <= 1) { - opte = i386_atomic_testset_uq(&PDE(pmap, pdei(sva)),0); + + opte = i386_atomic_testset_uq(&PDE(pmap, pdei(va)), 0); #if defined(MULTIPROCESSOR) /* * XXXthorpej Redundant shootdown can happen here * if we're using APTE space. */ #endif - pmap_tlb_shootdown(curpcb->pcb_pmap, - ((vaddr_t)ptes) + ptp->offset, opte, &cpumask); + pmap_tlb_shootpage(curpcb->pcb_pmap, + ((vaddr_t)ptes) + ptp->offset); #if defined(MULTIPROCESSOR) /* * Always shoot down the pmap's self-mapping @@ -1450,21 +1244,28 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) * XXXthorpej Redundant shootdown can happen here * if pmap == curpcb->pcb_pmap (not APTE space). */ - pmap_tlb_shootdown(pmap, - ((vaddr_t)PTE_BASE) + ptp->offset, opte, &cpumask); + pmap_tlb_shootpage(pmap, + ((vaddr_t)PTE_BASE) + ptp->offset); #endif pmap->pm_stats.resident_count--; - if (pmap->pm_ptphint == ptp) /* update hint? */ - pmap->pm_ptphint = - RB_ROOT(&pmap->pm_obj.memt); ptp->wire_count = 0; /* Postpone free to after shootdown. */ uvm_pagerealloc(ptp, NULL, 0); TAILQ_INSERT_TAIL(&empty_ptps, ptp, pageq); + if (pmap->pm_ptphint == ptp) /* update hint? */ + pmap->pm_ptphint = + RB_ROOT(&pmap->pm_obj.memt); } + + if (!shootall) + pmap_tlb_shootrange(pmap, va, blkendva); } - pmap_tlb_shootnow(cpumask); + + if (shootall) + pmap_tlb_shoottlb(); + + pmap_tlb_shootwait(); pmap_unmap_ptes_pae(pmap); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { TAILQ_REMOVE(&empty_ptps, ptp, pageq); @@ -1475,93 +1276,51 @@ pmap_remove_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) /* * pmap_page_remove: remove a managed vm_page from all pmaps that map it * - * => we set pv_head => pmap locking * => R/M bits are sync'd back to attrs */ void pmap_page_remove_pae(struct vm_page *pg) { - int bank, off; - struct pv_head *pvh; struct pv_entry *pve; pt_entry_t *ptes, opte; - int32_t cpumask = 0; TAILQ_HEAD(, vm_page) empty_ptps; struct vm_page *ptp; - /* XXX: vm_page should either contain pv_head or have a pointer to it */ - bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); - if (bank == -1) { - printf("pmap_page_remove: unmanaged page?\n"); + if (pg->mdpage.pv_list == NULL) return; - } - - pvh = &vm_physmem[bank].pmseg.pvhead[off]; - if (pvh->pvh_list == NULL) { - return; - } TAILQ_INIT(&empty_ptps); - for (pve = pvh->pvh_list ; pve != NULL ; pve = pve->pv_next) { - ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */ - -#ifdef DIAGNOSTIC - if (pve->pv_ptp && (PDE(pve->pv_pmap, - pdei(pve->pv_va)) & PG_FRAME) != - VM_PAGE_TO_PHYS(pve->pv_ptp)) { - printf("pmap_page_remove: pg=%p: va=%lx, pv_ptp=%p\n", - pg, pve->pv_va, pve->pv_ptp); - printf("pmap_page_remove: PTP's phys addr: " - "actual=%llx, recorded=%llx\n", - (PDE(pve->pv_pmap, pdei(pve->pv_va)) & - PG_FRAME), VM_PAGE_TO_PHYS(pve->pv_ptp)); - panic("pmap_page_remove: mapped managed page has " - "invalid pv_ptp field"); - } -#endif - - opte = ptes[atop(pve->pv_va)]; - ptes[atop(pve->pv_va)] = 0; /* zap! */ + for (pve = pg->mdpage.pv_list ; pve != NULL ; pve = pve->pv_next) { + if (pve->pv_ptp == NULL) + continue; + ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */ + opte = i386_atomic_testset_uq(&ptes[atop(pve->pv_va)], 0); if (opte & PG_W) pve->pv_pmap->pm_stats.wired_count--; pve->pv_pmap->pm_stats.resident_count--; - /* Shootdown only if referenced */ - if (opte & PG_U) - pmap_tlb_shootdown(pve->pv_pmap, pve->pv_va, opte, - &cpumask); - /* sync R/M bits */ - vm_physmem[bank].pmseg.attrs[off] |= (opte & (PG_U|PG_M)); + pmap_sync_flags_pte_pae(pg, opte); /* update the PTP reference count. free if last reference. */ - if (pve->pv_ptp) { - pve->pv_ptp->wire_count--; - if (pve->pv_ptp->wire_count <= 1) { - /* - * Do we have to shootdown the page just to - * get the pte out of the TLB ? - */ - if(!(opte & PG_U)) - pmap_tlb_shootdown(pve->pv_pmap, - pve->pv_va, opte, &cpumask); + if (pve->pv_ptp && --pve->pv_ptp->wire_count <= 1) { opte = i386_atomic_testset_uq(&PDE(pve->pv_pmap, pdei(pve->pv_va)), 0); - pmap_tlb_shootdown(curpcb->pcb_pmap, - ((vaddr_t)ptes) + pve->pv_ptp->offset, - opte, &cpumask); + pmap_tlb_shootpage(curcpu()->ci_curpmap, + ((vaddr_t)ptes) + pve->pv_ptp->offset); + #if defined(MULTIPROCESSOR) /* * Always shoot down the other pmap's * self-mapping of the PTP. */ - pmap_tlb_shootdown(pve->pv_pmap, - ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset, - opte, &cpumask); + pmap_tlb_shootpage(pve->pv_pmap, + ((vaddr_t)PTE_BASE) + pve->pv_ptp->offset); + #endif pve->pv_pmap->pm_stats.resident_count--; /* update hint? */ @@ -1573,13 +1332,13 @@ pmap_page_remove_pae(struct vm_page *pg) uvm_pagerealloc(pve->pv_ptp, NULL, 0); TAILQ_INSERT_TAIL(&empty_ptps, pve->pv_ptp, pageq); - } } + pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */ } - pmap_free_pvs(NULL, pvh->pvh_list); - pvh->pvh_list = NULL; - pmap_tlb_shootnow(cpumask); + pmap_free_pvs(NULL, pg->mdpage.pv_list); + pg->mdpage.pv_list = NULL; + pmap_tlb_shootwait(); while ((ptp = TAILQ_FIRST(&empty_ptps)) != NULL) { TAILQ_REMOVE(&empty_ptps, ptp, pageq); uvm_pagefree(ptp); @@ -1602,106 +1361,70 @@ pmap_page_remove_pae(struct vm_page *pg) boolean_t pmap_test_attrs_pae(struct vm_page *pg, int testbits) { - int bank, off; - char *myattrs; - struct pv_head *pvh; struct pv_entry *pve; pt_entry_t *ptes, pte; + u_long mybits, testflags; - /* XXX: vm_page should either contain pv_head or have a pointer to it */ - bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); - if (bank == -1) { - printf("pmap_test_attrs: unmanaged page?\n"); - return(FALSE); - } + testflags = pmap_pte2flags(testbits); - /* - * before locking: see if attributes are already set and if so, - * return! - */ + if (pg->pg_flags & testflags) + return (TRUE); - myattrs = &vm_physmem[bank].pmseg.attrs[off]; - if (*myattrs & testbits) - return(TRUE); + mybits = 0; + for (pve = pg->mdpage.pv_list; pve != NULL && mybits == 0; + pve = pve->pv_next) { - /* test to see if there is a list before bothering to lock */ - pvh = &vm_physmem[bank].pmseg.pvhead[off]; - if (pvh->pvh_list == NULL) { - return(FALSE); - } - - /* nope, gonna have to do it the hard way */ - for (pve = pvh->pvh_list; pve != NULL && (*myattrs & testbits) == 0; - pve = pve->pv_next) { ptes = pmap_map_ptes_pae(pve->pv_pmap); pte = ptes[atop(pve->pv_va)]; pmap_unmap_ptes_pae(pve->pv_pmap); - *myattrs |= pte; + mybits |= (pte & testbits); } - /* - * note that we will exit the for loop with a non-null pve if - * we have found the bits we are testing for. - */ - return((*myattrs & testbits) != 0); + if (mybits == 0) + return (FALSE); + + atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(mybits)); + + return (TRUE); } /* - * pmap_change_attrs: change a page's attributes + * pmap_clear_attrs: change a page's attributes * - * => we set pv_head => pmap locking * => we return TRUE if we cleared one of the bits we were asked to */ - boolean_t -pmap_change_attrs_pae(struct vm_page *pg, int setbits, int clearbits) +pmap_clear_attrs_pae(struct vm_page *pg, int clearbits) { - u_int32_t result; - int bank, off; - struct pv_head *pvh; struct pv_entry *pve; pt_entry_t *ptes, npte, opte; - char *myattrs; - int32_t cpumask = 0; - - /* XXX: vm_page should either contain pv_head or have a pointer to it */ - bank = vm_physseg_find(atop(VM_PAGE_TO_PHYS(pg)), &off); - if (bank == -1) { - printf("pmap_change_attrs: unmanaged page?\n"); - return(FALSE); - } - - pvh = &vm_physmem[bank].pmseg.pvhead[off]; + u_long clearflags; + int result; - myattrs = &vm_physmem[bank].pmseg.attrs[off]; - result = *myattrs & clearbits; - *myattrs = (*myattrs | setbits) & ~clearbits; - - for (pve = pvh->pvh_list; pve != NULL; pve = pve->pv_next) { -#ifdef DIAGNOSTIC - if (!pmap_valid_entry(PDE(pve->pv_pmap, pdei(pve->pv_va)))) - panic("pmap_change_attrs: mapping without PTP " - "detected"); -#endif + clearflags = pmap_pte2flags(clearbits); + result = pg->pg_flags & clearflags; + if (result) + atomic_clearbits_int(&pg->pg_flags, clearflags); + for (pve = pg->mdpage.pv_list; pve != NULL; pve = pve->pv_next) { ptes = pmap_map_ptes_pae(pve->pv_pmap); /* locks pmap */ npte = ptes[atop(pve->pv_va)]; - result |= (npte & clearbits); - npte = (npte | setbits) & ~(pt_entry_t)clearbits; - if (ptes[atop(pve->pv_va)] != npte) { - opte = i386_atomic_testset_uq(&ptes[atop(pve->pv_va)], - npte); - pmap_tlb_shootdown(pve->pv_pmap, - atop(pve->pv_va), opte, &cpumask); + if (npte & clearbits) { + result = TRUE; + npte &= ~clearbits; + opte = i386_atomic_testset_uq( + &ptes[atop(pve->pv_va)], npte); + pmap_tlb_shootpage(pve->pv_pmap, pve->pv_va); } pmap_unmap_ptes_pae(pve->pv_pmap); /* unlocks pmap */ } - pmap_tlb_shootnow(cpumask); + pmap_tlb_shootwait(); - return(result != 0); + return (result != 0); } + /* * p m a p p r o t e c t i o n f u n c t i o n s */ @@ -1726,14 +1449,16 @@ pmap_change_attrs_pae(struct vm_page *pg, int setbits, int clearbits) /* * pmap_write_protect: write-protect pages in a pmap */ + void pmap_write_protect_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) { - pt_entry_t *ptes, *spte, *epte, opte, npte; + pt_entry_t *ptes, *spte, *epte, npte; vaddr_t blockend; u_int32_t md_prot; - int32_t cpumask = 0; + vaddr_t va; + int shootall = 0; ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ @@ -1741,9 +1466,11 @@ pmap_write_protect_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva, sva &= PG_FRAME; eva &= PG_FRAME; - for (/* null */ ; sva < eva ; sva = blockend) { + if ((eva - sva > 32 * PAGE_SIZE) && pmap != pmap_kernel()) + shootall = 1; - blockend = (sva & PD_MASK) + NBPD; + for (va = sva; va < eva; va = blockend) { + blockend = (va & PD_MASK) + NBPD; if (blockend > eva) blockend = eva; @@ -1757,24 +1484,24 @@ pmap_write_protect_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva, */ /* XXXCDC: ugly hack to avoid freeing PDP here */ - if (pdei(sva) == PDSLOT_PTE) + if (pdei(va) == PDSLOT_PTE) continue; /* empty block? */ - if (!pmap_valid_entry(PDE(pmap, pdei(sva)))) + if (!pmap_valid_entry(PDE(pmap, pdei(va)))) continue; md_prot = protection_codes[prot]; - if (sva < VM_MAXUSER_ADDRESS) + if (va < VM_MAXUSER_ADDRESS) md_prot |= PG_u; - else if (sva < VM_MAX_ADDRESS) + else if (va < VM_MAX_ADDRESS) /* XXX: write-prot our PTES? never! */ md_prot |= (PG_u | PG_RW); - spte = &ptes[atop(sva)]; + spte = &ptes[atop(va)]; epte = &ptes[atop(blockend)]; - for (/*null */; spte < epte ; spte++, sva += PAGE_SIZE) { + for (/*null */; spte < epte ; spte++, va += PAGE_SIZE) { if (!pmap_valid_entry(*spte)) /* no mapping? */ continue; @@ -1782,15 +1509,17 @@ pmap_write_protect_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva, npte = (*spte & ~(pt_entry_t)PG_PROT) | md_prot; if (npte != *spte) { - pmap_exec_account(pmap, sva, *spte, npte); - opte = *spte; - *spte = npte; - pmap_tlb_shootdown(pmap, sva, opte, &cpumask); + pmap_exec_account(pmap, va, *spte, npte); + i386_atomic_testset_uq(spte, npte); } } } + if (shootall) + pmap_tlb_shoottlb(); + else + pmap_tlb_shootrange(pmap, sva, eva); - pmap_tlb_shootnow(cpumask); + pmap_tlb_shootwait(); pmap_unmap_ptes_pae(pmap); /* unlocks pmap */ } @@ -1850,7 +1579,6 @@ pmap_unwire_pae(struct pmap *pmap, vaddr_t va) * pmap_enter: enter a mapping into a pmap * * => must be done "now" ... no lazy-evaluation - * => we set pmap => pv_head locking */ int @@ -1859,24 +1587,18 @@ pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, { pt_entry_t *ptes, opte, npte; struct vm_page *ptp; - struct pv_head *pvh; - struct pv_entry *pve; - int bank, off, error; + struct pv_entry *pve = NULL, *freepve; boolean_t wired = (flags & PMAP_WIRED) != 0; + struct vm_page *pg = NULL; + int error, wired_count, resident_count, ptp_count; -#ifdef DIAGNOSTIC - /* sanity check: totally out of range? */ - if (va >= VM_MAX_KERNEL_ADDRESS) - panic("pmap_enter: too big"); - - if (va == (vaddr_t) PDP_BASE || va == (vaddr_t) APDP_BASE) - panic("pmap_enter: trying to map over PDP/APDP!"); + pa &= PMAP_PA_MASK; - /* sanity check: kernel PTPs should already have been pre-allocated */ - if (va >= VM_MIN_KERNEL_ADDRESS && - !pmap_valid_entry(PDE(pmap, pdei(va)))) - panic("pmap_enter: missing kernel PTP!"); -#endif + if (pmap_initialized) + freepve = pmap_alloc_pv(pmap, ALLOCPV_NEED); + else + freepve = NULL; + wired_count = resident_count = ptp_count = 0; /* * map in ptes and get a pointer to our PTP (unless we are the kernel) @@ -1886,7 +1608,7 @@ pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, if (pmap == pmap_kernel()) { ptp = NULL; } else { - ptp = pmap_get_ptp_pae(pmap, pdei(va), FALSE); + ptp = pmap_get_ptp_pae(pmap, pdei(va)); if (ptp == NULL) { if (flags & PMAP_CANFAIL) { error = ENOMEM; @@ -1895,6 +1617,9 @@ pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, panic("pmap_enter: get ptp failed"); } } + /* + * not allowed to sleep after here! + */ opte = ptes[atop(va)]; /* old PTE */ /* @@ -1902,39 +1627,25 @@ pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, */ if (pmap_valid_entry(opte)) { - /* - * first, update pm_stats. resident count will not + * first, calculate pm_stats updates. resident count will not * change since we are replacing/changing a valid * mapping. wired count might change... */ - if (wired && (opte & PG_W) == 0) - pmap->pm_stats.wired_count++; + wired_count++; else if (!wired && (opte & PG_W) != 0) - pmap->pm_stats.wired_count--; + wired_count--; /* * is the currently mapped PA the same as the one we * want to map? */ - if ((opte & PG_FRAME) == pa) { - /* if this is on the PVLIST, sync R/M bit */ if (opte & PG_PVLIST) { - bank = vm_physseg_find(atop(pa), &off); -#ifdef DIAGNOSTIC - if (bank == -1) - panic("pmap_enter: same pa PG_PVLIST " - "mapping with unmanaged page " - "pa = 0x%lx (0x%lx)", pa, - atop(pa)); -#endif - pvh = &vm_physmem[bank].pmseg.pvhead[off]; - vm_physmem[bank].pmseg.attrs[off] |= opte; - } else { - pvh = NULL; /* ensure !PG_PVLIST */ + pg = PHYS_TO_VM_PAGE(pa); + pmap_sync_flags_pte_pae(pg, opte); } goto enter_now; } @@ -1949,41 +1660,25 @@ pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, */ if (opte & PG_PVLIST) { - bank = vm_physseg_find(atop(opte & PG_FRAME), &off); -#ifdef DIAGNOSTIC - if (bank == -1) - panic("pmap_enter: PG_PVLIST mapping with " - "unmanaged page " - "pa = 0x%lx (0x%lx)", pa, atop(pa)); -#endif - pvh = &vm_physmem[bank].pmseg.pvhead[off]; - pve = pmap_remove_pv(pvh, pmap, va); - vm_physmem[bank].pmseg.attrs[off] |= opte; - } else { - pve = NULL; + pg = PHYS_TO_VM_PAGE(opte & PG_FRAME); + pmap_sync_flags_pte_pae(pg, opte); + pve = pmap_remove_pv(pg, pmap, va); + pg = NULL; } } else { /* opte not valid */ - pve = NULL; - pmap->pm_stats.resident_count++; + resident_count++; if (wired) - pmap->pm_stats.wired_count++; + wired_count++; if (ptp) - ptp->wire_count++; /* count # of valid entrys */ + ptp_count++; } - /* - * at this point pm_stats has been updated. pve is either NULL - * or points to a now-free pv_entry structure (the latter case is - * if we called pmap_remove_pv above). - * - * if this entry is to be on a pvlist, enter it now. - */ + if (pmap_initialized && pg == NULL) + pg = PHYS_TO_VM_PAGE(pa); - bank = vm_physseg_find(atop(pa), &off); - if (pmap_initialized && bank != -1) { - pvh = &vm_physmem[bank].pmseg.pvhead[off]; + if (pg != NULL) { if (pve == NULL) { - pve = pmap_alloc_pv(pmap, ALLOCPV_NEED); + pve = freepve; if (pve == NULL) { if (flags & PMAP_CANFAIL) { error = ENOMEM; @@ -1991,26 +1686,24 @@ pmap_enter_pae(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, } panic("pmap_enter: no pv entries available"); } + freepve = NULL; } - /* lock pvh when adding */ - pmap_enter_pv(pvh, pve, pmap, va, ptp); + /* lock pg when adding */ + pmap_enter_pv(pg, pve, pmap, va, ptp); } else { - /* new mapping is not PG_PVLIST. free pve if we've got one */ - pvh = NULL; /* ensure !PG_PVLIST */ - if (pve) + if (pve) { pmap_free_pv(pmap, pve); + } } enter_now: /* - * at this point pvh is !NULL if we want the PG_PVLIST bit set + * at this point pg is !NULL if we want the PG_PVLIST bit set */ npte = pa | protection_codes[prot] | PG_V; pmap_exec_account(pmap, va, opte, npte); - if (pvh) - npte |= PG_PVLIST; if (wired) npte |= PG_W; if (va < VM_MAXUSER_ADDRESS) @@ -2019,26 +1712,35 @@ enter_now: npte |= (PG_u | PG_RW); /* XXXCDC: no longer needed? */ if (pmap == pmap_kernel()) npte |= pmap_pg_g; + if (flags & PROT_READ) + npte |= PG_U; + if (flags & PROT_WRITE) + npte |= PG_M; + if (pg) { + npte |= PG_PVLIST; + pmap_sync_flags_pte_pae(pg, npte); + } - ptes[atop(va)] = npte; /* zap! */ - - if ((opte & ~(pt_entry_t)(PG_M|PG_U)) != npte) { -#ifdef MULTIPROCESSOR - int32_t cpumask = 0; + opte = i386_atomic_testset_uq(&ptes[atop(va)], npte); + if (ptp) + ptp->wire_count += ptp_count; + pmap->pm_stats.resident_count += resident_count; + pmap->pm_stats.wired_count += wired_count; - pmap_tlb_shootdown(pmap, va, opte, &cpumask); - pmap_tlb_shootnow(cpumask); -#else - /* Don't bother deferring in the single CPU case. */ - if (pmap_is_curpmap(pmap)) - pmap_update_pg(va); -#endif + if (opte & PG_V) { + pmap_tlb_shootpage(pmap, va); + pmap_tlb_shootwait(); } error = 0; out: pmap_unmap_ptes_pae(pmap); + + if (freepve) { + pmap_free_pv(pmap, freepve); + } + return error; } @@ -2081,9 +1783,10 @@ pmap_growkernel_pae(vaddr_t maxkvaddr) if (uvm_page_physget(&ptaddr) == FALSE) panic("pmap_growkernel: out of memory"); - pmap_zero_phys(ptaddr); + pmap_zero_phys_pae(ptaddr); - PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V; + PDE(kpm, PDSLOT_KERN + nkpde) = ptaddr | PG_RW | PG_V | + PG_U | PG_M; /* count PTP as resident */ kpm->pm_stats.resident_count++; @@ -2096,12 +1799,9 @@ pmap_growkernel_pae(vaddr_t maxkvaddr) * INVOKED WHILE pmap_init() IS RUNNING! */ - while (!pmap_alloc_ptp_pae(kpm, PDSLOT_KERN + nkpde, FALSE)) + while (!pmap_alloc_ptp_pae(kpm, PDSLOT_KERN + nkpde, 0)) uvm_wait("pmap_growkernel"); - /* PG_u not for kernel */ - PDE(kpm, PDSLOT_KERN + nkpde) &= ~PG_u; - /* distribute new kernel PTP to all active pmaps */ LIST_FOREACH(pm, &pmaps, pm_list) { PDE(pm, PDSLOT_KERN + nkpde) = @@ -2115,6 +1815,99 @@ out: return (VM_MIN_KERNEL_ADDRESS + (nkpde * NBPD)); } +/* + * Pre-allocate PTP 0 for low memory, so that 1:1 mappings for various + * trampoline code can be entered. + */ +void +pmap_prealloc_lowmem_ptp_pae(void) +{ + pt_entry_t *pte, npte; + vaddr_t ptpva = (vaddr_t)vtopte(0); + + /* enter pa for pte 0 into recursive map */ + pte = vtopte(ptpva); + npte = PTP0_PA | PG_RW | PG_V | PG_U | PG_M; + + i386_atomic_testset_uq(pte, npte); + + /* make sure it is clean before using */ + memset((void *)ptpva, 0, NBPG); +} + +/* + * pmap_tmpmap_pa_pae: map a page in for tmp usage + */ + +vaddr_t +pmap_tmpmap_pa_pae(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte = PTESLEW(ptp_pte, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); +#if defined(DIAGNOSTIC) + if (*ptpte) + panic("pmap_tmpmap_pa_pae: ptp_pte in use?"); +#endif + *ptpte = PG_V | PG_RW | pa; /* always a new mapping */ + return((vaddr_t)ptpva); +} + +/* + * pmap_tmpunmap_pa_pae: unmap a tmp use page (undoes pmap_tmpmap_pa_pae) + */ + +void +pmap_tmpunmap_pa_pae() +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *ptpte = PTESLEW(ptp_pte, id); + caddr_t ptpva = VASLEW(pmap_ptpp, id); +#if defined(DIAGNOSTIC) + if (!pmap_valid_entry(*ptpte)) + panic("pmap_tmpunmap_pa_pae: our pte invalid?"); +#endif + *ptpte = 0; + pmap_update_pg((vaddr_t)ptpva); +#ifdef MULTIPROCESSOR + /* + * No need for tlb shootdown here, since ptp_pte is per-CPU. + */ +#endif +} + +paddr_t +vtophys_pae(vaddr_t va) +{ + return ((*vtopte(va) & PG_FRAME) | (va & ~PG_FRAME)); +} + +void +pmap_flush_page_pae(paddr_t pa) +{ +#ifdef MULTIPROCESSOR + int id = cpu_number(); +#endif + pt_entry_t *pte = PTESLEW(flsh_pte, id); + caddr_t va = VASLEW(pmap_flshp, id); + + KDASSERT(PHYS_TO_VM_PAGE(pa) != NULL); +#ifdef DIAGNOSTIC + if (*pte) + panic("pmap_flush_page_pae: lock botch"); +#endif + + *pte = (pa & PG_FRAME) | PG_V | PG_RW; + pmap_update_pg(va); + pmap_flush_cache((vaddr_t)va, PAGE_SIZE); + *pte = 0; + pmap_update_pg(va); +} + #ifdef DEBUG void pmap_dump_pae(struct pmap *, vaddr_t, vaddr_t); @@ -2138,10 +1931,6 @@ pmap_dump_pae(struct pmap *pmap, vaddr_t sva, vaddr_t eva) if (eva > VM_MAXUSER_ADDRESS || eva <= sva) eva = VM_MAXUSER_ADDRESS; - /* - * we lock in the pmap => pv_head direction - */ - ptes = pmap_map_ptes_pae(pmap); /* locks pmap */ /* diff --git a/sys/arch/i386/include/biosvar.h b/sys/arch/i386/include/biosvar.h index 1f95cdc1376..afe03ffe631 100644 --- a/sys/arch/i386/include/biosvar.h +++ b/sys/arch/i386/include/biosvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: biosvar.h,v 1.61 2013/11/02 15:02:27 kettenis Exp $ */ +/* $OpenBSD: biosvar.h,v 1.62 2015/04/12 18:37:54 mlarkin Exp $ */ /* * Copyright (c) 1997-1999 Michael Shalayeff @@ -35,9 +35,6 @@ #define BOOTBIOS_ADDR (0x7c00) #define BOOTBIOS_MAXSEC ((1 << 28) - 1) - /* physical page for ptp 0 need for various tramps */ -#define PTP0_PA (PAGE_SIZE * 3) - /* BIOS configure flags */ #define BIOSF_BIOS32 0x0001 #define BIOSF_PCIBIOS 0x0002 diff --git a/sys/arch/i386/include/cpu.h b/sys/arch/i386/include/cpu.h index 36aec9df34f..516bbe1f3ca 100644 --- a/sys/arch/i386/include/cpu.h +++ b/sys/arch/i386/include/cpu.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.h,v 1.137 2014/12/16 21:40:05 tedu Exp $ */ +/* $OpenBSD: cpu.h,v 1.138 2015/04/12 18:37:54 mlarkin Exp $ */ /* $NetBSD: cpu.h,v 1.35 1996/05/05 19:29:26 christos Exp $ */ /*- @@ -458,6 +458,7 @@ void mp_setperf_init(void); void vm86_gpfault(struct proc *, int); #endif /* VM86 */ +int cpu_paenable(void *); #endif /* _KERNEL */ /* diff --git a/sys/arch/i386/include/pmap.h b/sys/arch/i386/include/pmap.h index 4d3e0bfbc06..6aa02e3e7c0 100644 --- a/sys/arch/i386/include/pmap.h +++ b/sys/arch/i386/include/pmap.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.h,v 1.72 2015/03/13 23:23:13 mlarkin Exp $ */ +/* $OpenBSD: pmap.h,v 1.73 2015/04/12 18:37:54 mlarkin Exp $ */ /* $NetBSD: pmap.h,v 1.44 2000/04/24 17:18:18 thorpej Exp $ */ /* @@ -41,14 +41,6 @@ #include #include -/* - * The following defines identify the slots used as described above. - */ - -#define PDSLOT_PTE ((KERNBASE/NBPD)-1) /* 831: for recursive PDP map */ -#define PDSLOT_KERN (KERNBASE/NBPD) /* 832: start of kernel space */ -#define PDSLOT_APTE ((unsigned)1023) /* 1023: alternative recursive slot */ - /* * The following defines give the virtual addresses of various MMU * data structures: @@ -57,12 +49,9 @@ * PDP_PDE and APDP_PDE: the VA of the PDE that points back to the PDP/APDP */ -#define PTE_BASE ((pt_entry_t *) (PDSLOT_PTE * NBPD) ) -#define APTE_BASE ((pt_entry_t *) (PDSLOT_APTE * NBPD) ) -#define PDP_BASE ((pd_entry_t *)(((char *)PTE_BASE) + (PDSLOT_PTE * PAGE_SIZE))) -#define APDP_BASE ((pd_entry_t *)(((char *)APTE_BASE) + (PDSLOT_APTE * PAGE_SIZE))) -#define PDP_PDE (PDP_BASE + PDSLOT_PTE) -#define APDP_PDE (PDP_BASE + PDSLOT_APTE) +#define PDSLOT_PTE ((KERNBASE/NBPD)-2) /* 830: for recursive PDP map */ +#define PDSLOT_KERN (KERNBASE/NBPD) /* 832: start of kernel space */ +#define PDSLOT_APTE ((unsigned)1022) /* 1022: alternative recursive slot */ /* * The following define determines how many PTPs should be set up for the @@ -71,48 +60,10 @@ * pmap module can add more PTPs to the kernel area on demand. */ -#ifndef NKPTP -#define NKPTP 4 /* 16MB to start */ +#ifndef NKPTP +#define NKPTP 8 /* 16/32MB to start */ #endif -#define NKPTP_MIN 4 /* smallest value we allow */ -#define NKPTP_MAX (1024 - (KERNBASE/NBPD) - 1) - /* largest value (-1 for APTP space) */ - -/* - * various address macros - * - * vtopte: return a pointer to the PTE mapping a VA - * kvtopte: same as above (takes a KVA, but doesn't matter with this pmap) - * ptetov: given a pointer to a PTE, return the VA that it maps - * vtophys: translate a VA to the PA mapped to it - * - * plus alternative versions of the above - */ - -#define vtopte(VA) (PTE_BASE + atop(VA)) -#define kvtopte(VA) vtopte(VA) -#define ptetov(PT) (ptoa(PT - PTE_BASE)) -#define vtophys(VA) ((*vtopte(VA) & PG_FRAME) | \ - ((unsigned)(VA) & ~PG_FRAME)) -#define avtopte(VA) (APTE_BASE + atop(VA)) -#define ptetoav(PT) (ptoa(PT - APTE_BASE)) -#define avtophys(VA) ((*avtopte(VA) & PG_FRAME) | \ - ((unsigned)(VA) & ~PG_FRAME)) - -/* - * PTP macros: - * A PTP's index is the PD index of the PDE that points to it. - * A PTP's offset is the byte-offset in the PTE space that this PTP is at. - * A PTP's VA is the first VA mapped by that PTP. - * - * Note that PAGE_SIZE == number of bytes in a PTP (4096 bytes == 1024 entries) - * NBPD == number of bytes a PTP can map (4MB) - */ - -#define ptp_i2o(I) ((I) * PAGE_SIZE) /* index => offset */ -#define ptp_o2i(O) ((O) / PAGE_SIZE) /* offset => index */ -#define ptp_i2v(I) ((I) * NBPD) /* index => VA */ -#define ptp_v2i(V) ((V) / NBPD) /* VA => index (same as pdei) */ +#define NKPTP_MIN 4 /* smallest value we allow */ /* * PG_AVAIL usage: we make use of the ignored bits of the PTE @@ -122,6 +73,8 @@ #define PG_PVLIST PG_AVAIL2 /* mapping has entry on pvlist */ #define PG_X PG_AVAIL3 /* executable mapping */ +#define PTP0_PA (PAGE_SIZE * 3) + #ifdef _KERNEL /* * pmap data structures: see pmap.c for details of locking. @@ -144,11 +97,13 @@ LIST_HEAD(pmap_head, pmap); /* struct pmap_head: head of a pmap list */ */ struct pmap { + uint64_t pm_pdidx[4]; /* PDIEs for PAE mode */ + paddr_t pm_pdirpa; /* PA of PD (read-only after create) */ + vaddr_t pm_pdir; /* VA of PD (lck by object lock) */ + int pm_pdirsize; /* PD size (4k vs 16k on PAE) */ struct uvm_object pm_obj; /* object (lck by object lock) */ #define pm_lock pm_obj.vmobjlock LIST_ENTRY(pmap) pm_list; /* list (lck by pm_list lock) */ - pd_entry_t *pm_pdir; /* VA of PD (lck by object lock) */ - paddr_t pm_pdirpa; /* PA of PD (read-only after create) */ struct vm_page *pm_ptphint; /* pointer to a PTP in our pmap */ struct pmap_statistics pm_stats; /* pmap stats (lck by object lock) */ @@ -223,18 +178,24 @@ struct pv_page { struct pv_entry pvents[PVE_PER_PVPAGE]; }; + /* - * global kernel variables + * pv_entrys are dynamically allocated in chunks from a single page. + * we keep track of how many pv_entrys are in use for each page and + * we can free pv_entry pages if needed. There is one lock for the + * entire allocation system. */ -extern pd_entry_t PTD[]; +extern char PTD[]; +extern struct pmap kernel_pmap_store; /* kernel pmap */ +extern int nkptp_max; -/* PTDpaddr: is the physical address of the kernel's PDP */ -extern u_int32_t PTDpaddr; +#define PMAP_REMOVE_ALL 0 +#define PMAP_REMOVE_SKIPWIRED 1 -extern struct pmap kernel_pmap_store; /* kernel pmap */ -extern int nkpde; /* current # of PDEs for kernel */ -extern int pmap_pg_g; /* do we support PG_G? */ +#define ALLOCPV_NEED 0 /* need PV now */ +#define ALLOCPV_TRY 1 /* just try to allocate */ +#define ALLOCPV_NONEED 2 /* don't need PV, just growing cache */ /* * Macros @@ -256,53 +217,153 @@ extern int pmap_pg_g; /* do we support PG_G? */ #define pmap_unuse_final(p) /* nothing */ #define pmap_remove_holes(vm) do { /* nothing */ } while (0) - /* * Prototypes */ -void pmap_bootstrap(vaddr_t); -boolean_t pmap_clear_attrs(struct vm_page *, int); -static void pmap_page_protect(struct vm_page *, vm_prot_t); -void pmap_page_remove(struct vm_page *); -static void pmap_protect(struct pmap *, vaddr_t, - vaddr_t, vm_prot_t); -void pmap_remove(struct pmap *, vaddr_t, vaddr_t); -boolean_t pmap_test_attrs(struct vm_page *, int); -void pmap_write_protect(struct pmap *, vaddr_t, - vaddr_t, vm_prot_t); -int pmap_exec_fixup(struct vm_map *, struct trapframe *, - struct pcb *); -void pmap_switch(struct proc *, struct proc *); - +vaddr_t pmap_tmpmap_pa(paddr_t); +void pmap_tmpunmap_pa(void); + +void pmap_bootstrap(vaddr_t); +void pmap_bootstrap_pae(void); +void pmap_virtual_space(vaddr_t *, vaddr_t *); +void pmap_init(void); +struct pmap *pmap_create(void); +void pmap_destroy(struct pmap *); +void pmap_reference(struct pmap *); +void pmap_fork(struct pmap *, struct pmap *); +void pmap_remove(struct pmap *, vaddr_t, vaddr_t); +void pmap_collect(struct pmap *); +void pmap_activate(struct proc *); +void pmap_deactivate(struct proc *); +void pmap_kenter_pa(vaddr_t, paddr_t, vm_prot_t); +void pmap_kremove(vaddr_t, vsize_t); +void pmap_zero_page(struct vm_page *); +void pmap_copy_page(struct vm_page *, struct vm_page *); +void pmap_enter_pv(struct vm_page *, struct pv_entry *, + struct pmap *, vaddr_t, struct vm_page *); +void pmap_free_pvs(struct pmap *, struct pv_entry *); +boolean_t pmap_clear_attrs(struct vm_page *, int); +static void pmap_page_protect(struct vm_page *, vm_prot_t); +void pmap_page_remove(struct vm_page *); +static void pmap_protect(struct pmap *, vaddr_t, + vaddr_t, vm_prot_t); +void pmap_remove(struct pmap *, vaddr_t, vaddr_t); +boolean_t pmap_test_attrs(struct vm_page *, int); +void pmap_write_protect(struct pmap *, vaddr_t, + vaddr_t, vm_prot_t); +int pmap_exec_fixup(struct vm_map *, struct trapframe *, + struct pcb *); +void pmap_exec_account(struct pmap *, vaddr_t, u_int32_t, + u_int32_t); +struct pv_entry *pmap_remove_pv(struct vm_page *, struct pmap *, vaddr_t); +void pmap_apte_flush(void); +void pmap_switch(struct proc *, struct proc *); vaddr_t reserve_dumppages(vaddr_t); /* XXX: not a pmap fn */ - -void pmap_tlb_shootpage(struct pmap *, vaddr_t); -void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t); -void pmap_tlb_shoottlb(void); +paddr_t vtophys(vaddr_t va); +paddr_t vtophys_pae(vaddr_t va); + +extern u_int32_t (*pmap_pte_set_p)(vaddr_t, paddr_t, u_int32_t); +extern u_int32_t (*pmap_pte_setbits_p)(vaddr_t, u_int32_t, u_int32_t); +extern u_int32_t (*pmap_pte_bits_p)(vaddr_t); +extern paddr_t (*pmap_pte_paddr_p)(vaddr_t); +extern boolean_t (*pmap_clear_attrs_p)(struct vm_page *, int); +extern int (*pmap_enter_p)(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +extern boolean_t (*pmap_extract_p)(pmap_t, vaddr_t, paddr_t *); +extern vaddr_t (*pmap_growkernel_p)(vaddr_t); +extern void (*pmap_page_remove_p)(struct vm_page *); +extern void (*pmap_do_remove_p)(struct pmap *, vaddr_t, vaddr_t, int); +extern boolean_t (*pmap_test_attrs_p)(struct vm_page *, int); +extern void (*pmap_unwire_p)(struct pmap *, vaddr_t); +extern void (*pmap_write_protect_p)(struct pmap*, vaddr_t, vaddr_t, vm_prot_t); +extern void (*pmap_pinit_pd_p)(pmap_t); +extern void (*pmap_zero_phys_p)(paddr_t); +extern boolean_t (*pmap_zero_page_uncached_p)(paddr_t); +extern void (*pmap_copy_page_p)(struct vm_page *, struct vm_page *); + +u_int32_t pmap_pte_set_pae(vaddr_t, paddr_t, u_int32_t); +u_int32_t pmap_pte_setbits_pae(vaddr_t, u_int32_t, u_int32_t); +u_int32_t pmap_pte_bits_pae(vaddr_t); +paddr_t pmap_pte_paddr_pae(vaddr_t); +boolean_t pmap_clear_attrs_pae(struct vm_page *, int); +int pmap_enter_pae(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +boolean_t pmap_extract_pae(pmap_t, vaddr_t, paddr_t *); +vaddr_t pmap_growkernel_pae(vaddr_t); +void pmap_page_remove_pae(struct vm_page *); +void pmap_do_remove_pae(struct pmap *, vaddr_t, vaddr_t, int); +boolean_t pmap_test_attrs_pae(struct vm_page *, int); +void pmap_unwire_pae(struct pmap *, vaddr_t); +void pmap_write_protect_pae(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); +void pmap_pinit_pd_pae(pmap_t); +void pmap_zero_phys_pae(paddr_t); +boolean_t pmap_zero_page_uncached_pae(paddr_t); +void pmap_copy_page_pae(struct vm_page *, struct vm_page *); +void pae_copy_phys(paddr_t, paddr_t, int, int); + +#define pmap_pte_set (*pmap_pte_set_p) +#define pmap_pte_setbits (*pmap_pte_setbits_p) +#define pmap_pte_bits (*pmap_pte_bits_p) +#define pmap_pte_paddr (*pmap_pte_paddr_p) +#define pmap_clear_attrs (*pmap_clear_attrs_p) +#define pmap_page_remove (*pmap_page_remove_p) +#define pmap_do_remove (*pmap_do_remove_p) +#define pmap_test_attrs (*pmap_test_attrs_p) +#define pmap_unwire (*pmap_unwire_p) +#define pmap_write_protect (*pmap_write_protect_p) +#define pmap_pinit_pd (*pmap_pinit_pd_p) +#define pmap_zero_phys (*pmap_zero_phys_p) +#define pmap_zero_page_uncached (*pmap_zero_page_uncached_p) +#define pmap_copy_page (*pmap_copy_page_p) + +u_int32_t pmap_pte_set_86(vaddr_t, paddr_t, u_int32_t); +u_int32_t pmap_pte_setbits_86(vaddr_t, u_int32_t, u_int32_t); +u_int32_t pmap_pte_bits_86(vaddr_t); +paddr_t pmap_pte_paddr_86(vaddr_t); +boolean_t pmap_clear_attrs_86(struct vm_page *, int); +int pmap_enter_86(pmap_t, vaddr_t, paddr_t, vm_prot_t, int); +boolean_t pmap_extract_86(pmap_t, vaddr_t, paddr_t *); +vaddr_t pmap_growkernel_86(vaddr_t); +void pmap_page_remove_86(struct vm_page *); +void pmap_do_remove_86(struct pmap *, vaddr_t, vaddr_t, int); +boolean_t pmap_test_attrs_86(struct vm_page *, int); +void pmap_unwire_86(struct pmap *, vaddr_t); +void pmap_write_protect_86(struct pmap *, vaddr_t, vaddr_t, vm_prot_t); +void pmap_pinit_pd_86(pmap_t); +void pmap_zero_phys_86(paddr_t); +boolean_t pmap_zero_page_uncached_86(paddr_t); +void pmap_copy_page_86(struct vm_page *, struct vm_page *); +void pmap_tlb_shootpage(struct pmap *, vaddr_t); +void pmap_tlb_shootrange(struct pmap *, vaddr_t, vaddr_t); +void pmap_tlb_shoottlb(void); #ifdef MULTIPROCESSOR -void pmap_tlb_droppmap(struct pmap *); -void pmap_tlb_shootwait(void); +void pmap_tlb_droppmap(struct pmap *); +void pmap_tlb_shootwait(void); #else #define pmap_tlb_shootwait() #endif -void pmap_prealloc_lowmem_ptp(paddr_t); +void pmap_prealloc_lowmem_ptp(); +void pmap_prealloc_lowmem_ptp_pae(); +vaddr_t pmap_tmpmap_pa(paddr_t); +void pmap_tmpunmap_pa(void); +vaddr_t pmap_tmpmap_pa_pae(paddr_t); +void pmap_tmpunmap_pa_pae(void); + /* * functions for flushing the cache for vaddrs and pages. * these functions are not part of the MI pmap interface and thus * should not be used as such. */ -void pmap_flush_cache(vaddr_t, vsize_t); -void pmap_flush_page(paddr_t); +void pmap_flush_cache(vaddr_t, vsize_t); +void pmap_flush_page(paddr_t); +void pmap_flush_page_pae(paddr_t); #define PMAP_GROWKERNEL /* turn on pmap_growkernel interface */ /* * Do idle page zero'ing uncached to avoid polluting the cache. */ -boolean_t pmap_zero_page_uncached(paddr_t); #define PMAP_PAGEIDLEZERO(pg) pmap_zero_page_uncached(VM_PAGE_TO_PHYS(pg)) /* @@ -363,6 +424,49 @@ pmap_protect(struct pmap *pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot) } } +/* + * pmap_growkernel, pmap_enter, and pmap_extract get picked up in variuos + * modules from both uvm_pmap.h and pmap.h. Since uvm_pmap.h defines these + * as functions, inline them here to suppress linker warnings. + */ +__inline static vaddr_t +pmap_growkernel(vaddr_t maxkvaddr) +{ + return (*pmap_growkernel_p)(maxkvaddr); +} + +__inline static int +pmap_enter(struct pmap *pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags) +{ + return (*pmap_enter_p)(pmap, va, pa, prot, flags); +} + +__inline static boolean_t +pmap_extract(struct pmap *pmap, vaddr_t va, paddr_t *pa) +{ + return (*pmap_extract_p)(pmap, va, pa); +} + +/* + * p m a p i n l i n e h e l p e r f u n c t i o n s + */ + +/* + * pmap_is_active: is this pmap loaded into the specified processor's %cr3? + */ + +static __inline boolean_t +pmap_is_active(struct pmap *pmap, struct cpu_info *ci) +{ + return (pmap == pmap_kernel() || ci->ci_curpmap == pmap); +} + +static __inline boolean_t +pmap_is_curpmap(struct pmap *pmap) +{ + return (pmap_is_active(pmap, curcpu())); +} + #if defined(USER_LDT) void pmap_ldt_cleanup(struct proc *); #define PMAP_FORK diff --git a/sys/arch/i386/include/pte.h b/sys/arch/i386/include/pte.h index efaa89caaec..c0e1ccfb83d 100644 --- a/sys/arch/i386/include/pte.h +++ b/sys/arch/i386/include/pte.h @@ -1,4 +1,4 @@ -/* $OpenBSD: pte.h,v 1.20 2015/03/13 23:23:13 mlarkin Exp $ */ +/* $OpenBSD: pte.h,v 1.21 2015/04/12 18:37:54 mlarkin Exp $ */ /* $NetBSD: pte.h,v 1.11 1998/02/06 21:58:05 thorpej Exp $ */ /* @@ -37,33 +37,12 @@ #ifndef _MACHINE_PTE_H_ #define _MACHINE_PTE_H_ -#if !defined(_LOCORE) - -/* - * here we define the data types for PDEs and PTEs - */ - -typedef u_int32_t pd_entry_t; /* PDE */ -typedef u_int32_t pt_entry_t; /* PTE */ - -#endif - /* * now we define various for playing with virtual addresses */ #define PDSHIFT 22 /* offset of PD index in VA */ #define NBPD (1 << PDSHIFT) /* # bytes mapped by PD (4MB) */ -#define PDOFSET (NBPD-1) /* mask for non-PD part of VA */ -#if 0 /* not used? */ -#define NPTEPD (NBPD / PAGE_SIZE) /* # of PTEs in a PD */ -#else -#define PTES_PER_PTP (NBPD / PAGE_SIZE) /* # of PTEs in a PTP */ -#endif - -#define PAGE_MASK_L2 (NBPD - 1) - -#define i386_round_pdr(x) ((((unsigned)(x)) + PDOFSET) & ~PDOFSET) /* * here we define the bits of the PDE/PTE, as described above: @@ -87,8 +66,6 @@ typedef u_int32_t pt_entry_t; /* PTE */ #define PG_AVAIL2 0x00000400 /* ignored by hardware */ #define PG_AVAIL3 0x00000800 /* ignored by hardware */ #define PG_PATLG 0x00001000 /* PAT on large pages */ -#define PG_FRAME 0xfffff000 /* page frame mask */ -#define PG_LGFRAME 0xffc00000 /* large (4M) page frame mask */ /* Cacheability bits when we are using PAT */ #define PG_WB (0) /* The default */ diff --git a/sys/arch/i386/pci/piixpcib.c b/sys/arch/i386/pci/piixpcib.c index a33e4b1fc66..35199a31b05 100644 --- a/sys/arch/i386/pci/piixpcib.c +++ b/sys/arch/i386/pci/piixpcib.c @@ -1,4 +1,4 @@ -/* $OpenBSD: piixpcib.c,v 1.11 2014/09/14 14:17:23 jsg Exp $ */ +/* $OpenBSD: piixpcib.c,v 1.12 2015/04/12 18:37:54 mlarkin Exp $ */ /* * Copyright (c) 2007 Stefan Sperling @@ -115,6 +115,8 @@ extern void pcibattach(struct device *, struct device *, void *); extern void p3_update_cpuspeed(void); #endif +extern int cpu_pae; + struct cfattach piixpcib_ca = { sizeof(struct piixpcib_softc), piixpcib_match, @@ -195,6 +197,9 @@ piixpcib_configure_speedstep(struct piixpcib_softc *sc) sc->sc_command = PIIXPCIB_DEFAULT_COMMAND; sc->sc_flags = 0; + if (cpu_pae) + return ENODEV; + piixpcib_int15_gsic_call(sc); /* If signature doesn't match, bail out */ -- 2.20.1