From 984d47442509d56351882771252bcf7b658a4e08 Mon Sep 17 00:00:00 2001 From: sf Date: Sun, 19 Apr 2015 19:45:21 +0000 Subject: [PATCH] Add support for x2apic mode This is currently only enabled on hypervisors because on real hardware, it requires interrupt remapping which we don't support yet. But on virtualization it reduces the number of vmexits required per IPI from 4 to 1, causing a significant speed-up for MP guests. ok kettenis@ --- sys/arch/amd64/amd64/lapic.c | 216 +++++++++++++++++++++++----- sys/arch/amd64/amd64/mptramp.S | 22 ++- sys/arch/amd64/amd64/vector.S | 22 ++- sys/arch/amd64/include/codepatch.h | 3 +- sys/arch/amd64/include/cpuvar.h | 6 +- sys/arch/amd64/include/i82093reg.h | 11 +- sys/arch/amd64/include/i82489reg.h | 7 +- sys/arch/amd64/include/i82489var.h | 22 +-- sys/arch/amd64/include/specialreg.h | 3 +- 9 files changed, 250 insertions(+), 62 deletions(-) diff --git a/sys/arch/amd64/amd64/lapic.c b/sys/arch/amd64/amd64/lapic.c index 174bffb583d..9b346c64f78 100644 --- a/sys/arch/amd64/amd64/lapic.c +++ b/sys/arch/amd64/amd64/lapic.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lapic.c,v 1.38 2015/03/14 03:38:46 jsg Exp $ */ +/* $OpenBSD: lapic.c,v 1.39 2015/04/19 19:45:21 sf Exp $ */ /* $NetBSD: lapic.c,v 1.2 2003/05/08 01:04:35 fvdl Exp $ */ /*- @@ -38,6 +38,7 @@ #include +#include #include #include #include @@ -88,12 +89,120 @@ struct pic local_pic = { lapic_setup, }; +extern int x2apic_eoi; +int x2apic_enabled = 0; + +u_int32_t x2apic_readreg(int reg); +u_int32_t x2apic_cpu_number(); +void x2apic_writereg(int reg, u_int32_t val); +int x2apic_ipi(int vec, int target, int dl); + +u_int32_t i82489_readreg(int reg); +u_int32_t i82489_cpu_number(); +void i82489_writereg(int reg, u_int32_t val); +int i82489_ipi(int vec, int target, int dl); + +u_int32_t (*lapic_readreg)(int) = i82489_readreg; +void (*lapic_writereg)(int, u_int32_t) = i82489_writereg; +#ifdef MULTIPROCESSOR +int (*x86_ipi)(int vec, int target, int dl) = i82489_ipi; +#endif + +u_int32_t +i82489_readreg(int reg) +{ + return *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic) + + reg)); +} + +u_int32_t +i82489_cpu_number() +{ + return i82489_readreg(LAPIC_ID) >> LAPIC_ID_SHIFT; +} + +void +i82489_writereg(int reg, u_int32_t val) +{ + *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic) + reg)) = + val; +} + +u_int32_t +x2apic_readreg(int reg) +{ + return rdmsr(MSR_X2APIC_BASE + (reg >> 4)); +} + +u_int32_t +x2apic_cpu_number() +{ + return x2apic_readreg(LAPIC_ID) & X2APIC_ID_MASK; +} + +void +x2apic_writereg(int reg, u_int32_t val) +{ + wrmsr(MSR_X2APIC_BASE + (reg >> 4), val); +} + +static inline void +x2apic_writeicr(u_int32_t hi, u_int32_t lo) +{ + u_int32_t msr = MSR_X2APIC_BASE + (LAPIC_ICRLO >> 4); + __asm volatile("wrmsr" : : "a" (lo), "d" (hi), "c" (msr)); +} + +u_int32_t +lapic_cpu_number() +{ + if (x2apic_enabled) + return x2apic_cpu_number(); + return i82489_cpu_number(); +} + + void lapic_map(paddr_t lapic_base) { int s; pt_entry_t *pte; - vaddr_t va = (vaddr_t)&local_apic; + vaddr_t va; + + /* + * On real hardware, x2apic must only be enabled if interrupt remapping + * is also enabled. See 10.12.7 of the SDM vol 3. + * On hypervisors, this is not necessary. Hypervisors can implement + * x2apic support even if the host CPU does not support it. + * Until we support interrupt remapping, use x2apic only if the + * hypervisor flag is also set. + */ + if ((cpu_ecxfeature&CPUIDECX_X2APIC) && (cpu_ecxfeature&CPUIDECX_HV)) { + u_int64_t msr; + + disable_intr(); + s = lapic_tpr; + + msr = rdmsr(MSR_APICBASE); + msr |= APICBASE_ENABLE_X2APIC; + wrmsr(MSR_APICBASE, msr); + + lapic_readreg = x2apic_readreg; + lapic_writereg = x2apic_writereg; +#ifdef MULTIPROCESSOR + x86_ipi = x2apic_ipi; +#endif + x2apic_enabled = 1; + + codepatch_call(CPTAG_EOI, &x2apic_eoi); + + lapic_writereg(LAPIC_TPRI, s); + enable_intr(); + + return; + } + + va = (vaddr_t)&local_apic; disable_intr(); s = lapic_tpr; @@ -121,13 +230,13 @@ lapic_map(paddr_t lapic_base) void lapic_enable(void) { - i82489_writereg(LAPIC_SVR, LAPIC_SVR_ENABLE | LAPIC_SPURIOUS_VECTOR); + lapic_writereg(LAPIC_SVR, LAPIC_SVR_ENABLE | LAPIC_SPURIOUS_VECTOR); } void lapic_disable(void) { - i82489_writereg(LAPIC_SVR, 0); + lapic_writereg(LAPIC_SVR, 0); } void @@ -141,9 +250,9 @@ lapic_set_lvt(void) #ifdef MULTIPROCESSOR if (mp_verbose) { apic_format_redir(ci->ci_dev->dv_xname, "prelint", 0, 0, - i82489_readreg(LAPIC_LVINT0)); + lapic_readreg(LAPIC_LVINT0)); apic_format_redir(ci->ci_dev->dv_xname, "prelint", 1, 0, - i82489_readreg(LAPIC_LVINT1)); + lapic_readreg(LAPIC_LVINT1)); } #endif @@ -152,9 +261,9 @@ lapic_set_lvt(void) * Disable ExtINT by default when using I/O APICs. */ if (nioapics > 0) { - lint0 = i82489_readreg(LAPIC_LVINT0); + lint0 = lapic_readreg(LAPIC_LVINT0); lint0 |= LAPIC_LVT_MASKED; - i82489_writereg(LAPIC_LVINT0, lint0); + lapic_writereg(LAPIC_LVINT0, lint0); } #endif @@ -193,24 +302,24 @@ lapic_set_lvt(void) mpi->ioapic_pin); #endif if (mpi->ioapic_pin == 0) - i82489_writereg(LAPIC_LVINT0, mpi->redir); + lapic_writereg(LAPIC_LVINT0, mpi->redir); else - i82489_writereg(LAPIC_LVINT1, mpi->redir); + lapic_writereg(LAPIC_LVINT1, mpi->redir); } } #ifdef MULTIPROCESSOR if (mp_verbose) { apic_format_redir(ci->ci_dev->dv_xname, "timer", 0, 0, - i82489_readreg(LAPIC_LVTT)); + lapic_readreg(LAPIC_LVTT)); apic_format_redir(ci->ci_dev->dv_xname, "pcint", 0, 0, - i82489_readreg(LAPIC_PCINT)); + lapic_readreg(LAPIC_PCINT)); apic_format_redir(ci->ci_dev->dv_xname, "lint", 0, 0, - i82489_readreg(LAPIC_LVINT0)); + lapic_readreg(LAPIC_LVINT0)); apic_format_redir(ci->ci_dev->dv_xname, "lint", 1, 0, - i82489_readreg(LAPIC_LVINT1)); + lapic_readreg(LAPIC_LVINT1)); apic_format_redir(ci->ci_dev->dv_xname, "err", 0, 0, - i82489_readreg(LAPIC_LVERR)); + lapic_readreg(LAPIC_LVERR)); } #endif } @@ -253,7 +362,7 @@ lapic_boot_init(paddr_t lapic_base) static __inline u_int32_t lapic_gettick(void) { - return i82489_readreg(LAPIC_CCR_TIMER); + return lapic_readreg(LAPIC_CCR_TIMER); } #include /* for hz */ @@ -292,10 +401,10 @@ lapic_startclock(void) * then set divisor, * then unmask and set the vector. */ - i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_LVTT_M); - i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); - i82489_writereg(LAPIC_ICR_TIMER, lapic_tval); - i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_TIMER_VECTOR); + lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_LVTT_M); + lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); + lapic_writereg(LAPIC_ICR_TIMER, lapic_tval); + lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM|LAPIC_TIMER_VECTOR); } void @@ -350,9 +459,9 @@ lapic_calibrate_timer(struct cpu_info *ci) * Configure timer to one-shot, interrupt masked, * large positive number. */ - i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_M); - i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); - i82489_writereg(LAPIC_ICR_TIMER, 0x80000000); + lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_M); + lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); + lapic_writereg(LAPIC_ICR_TIMER, 0x80000000); disable_intr(); @@ -390,10 +499,10 @@ lapic_calibrate_timer(struct cpu_info *ci) lapic_tval = (lapic_per_second * 2) / hz; lapic_tval = (lapic_tval / 2) + (lapic_tval & 0x1); - i82489_writereg(LAPIC_LVTT, LAPIC_LVTT_TM | LAPIC_LVTT_M | + lapic_writereg(LAPIC_LVTT, LAPIC_LVTT_TM | LAPIC_LVTT_M | LAPIC_TIMER_VECTOR); - i82489_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); - i82489_writereg(LAPIC_ICR_TIMER, lapic_tval); + lapic_writereg(LAPIC_DCR_TIMER, LAPIC_DCRT_DIV1); + lapic_writereg(LAPIC_ICR_TIMER, lapic_tval); /* * Compute fixed-point ratios between cycles and @@ -480,7 +589,7 @@ i82489_icr_wait(void) #ifdef MULTIPROCESSOR int -x86_ipi_init(int target) +i82489_ipi_init(int target) { if ((target & LAPIC_DEST_MASK) == 0) @@ -502,7 +611,7 @@ x86_ipi_init(int target) } int -x86_ipi(int vec, int target, int dl) +i82489_ipi(int vec, int target, int dl) { int s; @@ -522,6 +631,49 @@ x86_ipi(int vec, int target, int dl) return 0; } + +int +x2apic_ipi_init(int target) +{ + u_int64_t hi = 0; + + if ((target & LAPIC_DEST_MASK) == 0) + hi = target & 0xff; + + x2apic_writeicr(hi, (target & LAPIC_DEST_MASK) | LAPIC_DLMODE_INIT | + LAPIC_LVL_ASSERT ); + + i8254_delay(10000); + + x2apic_writeicr(0, (target & LAPIC_DEST_MASK) | LAPIC_DLMODE_INIT | + LAPIC_LVL_TRIG | LAPIC_LVL_DEASSERT); + + return 0; +} + +int +x2apic_ipi(int vec, int target, int dl) +{ + u_int64_t hi = 0, lo; + + if ((target & LAPIC_DEST_MASK) == 0) + hi = target & 0xff; + + lo = (target & LAPIC_DEST_MASK) | vec | dl | LAPIC_LVL_ASSERT; + + x2apic_writeicr(hi, lo); + + return 0; +} + +int +x86_ipi_init(int target) +{ + if (x2apic_enabled) + return x2apic_ipi_init(target); + else + return i82489_ipi_init(target); +} #endif /* MULTIPROCESSOR */ @@ -542,9 +694,9 @@ lapic_hwmask(struct pic *pic, int pin) u_int32_t val; reg = LAPIC_LVTT + (pin << 4); - val = i82489_readreg(reg); + val = lapic_readreg(reg); val |= LAPIC_LVT_MASKED; - i82489_writereg(reg, val); + lapic_writereg(reg, val); } void @@ -554,9 +706,9 @@ lapic_hwunmask(struct pic *pic, int pin) u_int32_t val; reg = LAPIC_LVTT + (pin << 4); - val = i82489_readreg(reg); + val = lapic_readreg(reg); val &= ~LAPIC_LVT_MASKED; - i82489_writereg(reg, val); + lapic_writereg(reg, val); } void diff --git a/sys/arch/amd64/amd64/mptramp.S b/sys/arch/amd64/amd64/mptramp.S index 817a27ffc1e..2616c01f658 100644 --- a/sys/arch/amd64/amd64/mptramp.S +++ b/sys/arch/amd64/amd64/mptramp.S @@ -1,4 +1,4 @@ -/* $OpenBSD: mptramp.S,v 1.11 2014/12/08 07:49:17 mlarkin Exp $ */ +/* $OpenBSD: mptramp.S,v 1.12 2015/04/19 19:45:21 sf Exp $ */ /* $NetBSD: mptramp.S,v 1.1 2003/04/26 18:39:30 fvdl Exp $ */ /*- @@ -187,15 +187,31 @@ _TRMP_LABEL(mptramp_longmode) _C_LABEL(cpu_spinup_trampoline_end): #end of code copied to MP_TRAMPOLINE + .globl _C_LABEL(x2apic_enabled) + movl x2apic_enabled,%eax + testl %eax,%eax + jz 1f + + mov $MSR_APICBASE,%ecx + mov $0,%edx + rdmsr + orl $APICBASE_ENABLE_X2APIC,%eax + wrmsr + mov $MSR_X2APIC_ID,%ecx + rdmsr + andl $X2APIC_ID_MASK,%eax + jmp 2f +1: movl _C_LABEL(local_apic)+LAPIC_ID,%eax shrl $LAPIC_ID_SHIFT,%eax +2: xorq %rcx,%rcx -1: +3: movq _C_LABEL(cpu_info)(,%rcx,8),%rdi incq %rcx movl CPU_INFO_APICID(%rdi),%edx cmpl %eax,%edx - jne 1b + jne 3b movq CPU_INFO_IDLE_PCB(%rdi),%rsi diff --git a/sys/arch/amd64/amd64/vector.S b/sys/arch/amd64/amd64/vector.S index 9f87751b8ea..aa2f10f940e 100644 --- a/sys/arch/amd64/amd64/vector.S +++ b/sys/arch/amd64/amd64/vector.S @@ -1,4 +1,4 @@ -/* $OpenBSD: vector.S,v 1.36 2015/02/07 00:26:37 deraadt Exp $ */ +/* $OpenBSD: vector.S,v 1.37 2015/04/19 19:45:21 sf Exp $ */ /* $NetBSD: vector.S,v 1.5 2004/06/28 09:13:11 fvdl Exp $ */ /* @@ -77,6 +77,8 @@ #include #include #include +#include +#include #include "ioapic.h" #include "lapic.h" @@ -309,6 +311,20 @@ calltrap: /* XXX See comment in locore.s */ #define XINTR(name,num) Xintr_##name##num + .globl _C_LABEL(x2apic_eoi) +_C_LABEL(x2apic_eoi): + pushq %rax + pushq %rcx + pushq %rdx + mov $MSR_X2APIC_EOI,%ecx + mov $0,%eax + mov $0,%edx + wrmsr + popq %rdx + popq %rcx + popq %rax + ret + #if NLAPIC > 0 #ifdef MULTIPROCESSOR IDTVEC(recurse_lapic_ipi) @@ -321,7 +337,9 @@ IDTVEC(intr_lapic_ipi) pushq $0 pushq $T_ASTFLT INTRENTRY + CODEPATCH_START movl $0,_C_LABEL(local_apic)+LAPIC_EOI + CODEPATCH_END(CPTAG_EOI) movl CPUVAR(ILEVEL),%ebx cmpl $IPL_IPI,%ebx jae 2f @@ -403,7 +421,9 @@ IDTVEC(intr_lapic_ltimer) pushq $0 pushq $T_ASTFLT INTRENTRY + CODEPATCH_START movl $0,_C_LABEL(local_apic)+LAPIC_EOI + CODEPATCH_END(CPTAG_EOI) movl CPUVAR(ILEVEL),%ebx cmpl $IPL_CLOCK,%ebx jae 2f diff --git a/sys/arch/amd64/include/codepatch.h b/sys/arch/amd64/include/codepatch.h index 7ff3216ffa0..9a29b3aaf77 100644 --- a/sys/arch/amd64/include/codepatch.h +++ b/sys/arch/amd64/include/codepatch.h @@ -1,4 +1,4 @@ -/* $OpenBSD: codepatch.h,v 1.1 2015/01/16 10:17:51 sf Exp $ */ +/* $OpenBSD: codepatch.h,v 1.2 2015/04/19 19:45:21 sf Exp $ */ /* * Copyright (c) 2014-2015 Stefan Fritsch * @@ -48,5 +48,6 @@ void codepatch_call(uint16_t tag, void *func); #define CPTAG_STAC 1 #define CPTAG_CLAC 2 +#define CPTAG_EOI 3 #endif /* _MACHINE_CODEPATCH_H_ */ diff --git a/sys/arch/amd64/include/cpuvar.h b/sys/arch/amd64/include/cpuvar.h index 3b91ab5c1cd..f81ee594c00 100644 --- a/sys/arch/amd64/include/cpuvar.h +++ b/sys/arch/amd64/include/cpuvar.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpuvar.h,v 1.4 2015/01/27 20:50:46 sf Exp $ */ +/* $OpenBSD: cpuvar.h,v 1.5 2015/04/19 19:45:21 sf Exp $ */ /* $NetBSD: cpuvar.h,v 1.1 2003/03/01 18:29:28 fvdl Exp $ */ /*- @@ -87,8 +87,10 @@ struct cpu_attach_args { #ifdef _KERNEL -int x86_ipi(int,int,int); +#ifdef MULTIPROCESSOR +extern int (*x86_ipi)(int,int,int); int x86_ipi_init(int); +#endif void identifycpu(struct cpu_info *); void cpu_init(struct cpu_info *); diff --git a/sys/arch/amd64/include/i82093reg.h b/sys/arch/amd64/include/i82093reg.h index e4ab947545e..c6a7b118db2 100644 --- a/sys/arch/amd64/include/i82093reg.h +++ b/sys/arch/amd64/include/i82093reg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: i82093reg.h,v 1.4 2011/06/05 19:36:25 deraadt Exp $ */ +/* $OpenBSD: i82093reg.h,v 1.5 2015/04/19 19:45:21 sf Exp $ */ /* $NetBSD: i82093reg.h,v 1.1 2003/02/26 21:26:10 fvdl Exp $ */ /*- @@ -112,8 +112,13 @@ #ifdef _KERNEL -#define ioapic_asm_ack(num) \ - movl $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip) +#include + +#define ioapic_asm_ack(num) \ + CODEPATCH_START ;\ + movl $0,(_C_LABEL(local_apic)+LAPIC_EOI)(%rip) ;\ + CODEPATCH_END(CPTAG_EOI) + #ifdef MULTIPROCESSOR diff --git a/sys/arch/amd64/include/i82489reg.h b/sys/arch/amd64/include/i82489reg.h index 73f7015c6ed..35e50044c6d 100644 --- a/sys/arch/amd64/include/i82489reg.h +++ b/sys/arch/amd64/include/i82489reg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: i82489reg.h,v 1.2 2008/06/26 05:42:09 ray Exp $ */ +/* $OpenBSD: i82489reg.h,v 1.3 2015/04/19 19:45:21 sf Exp $ */ /* $NetBSD: i82489reg.h,v 1.1 2003/02/26 21:26:10 fvdl Exp $ */ /*- @@ -141,3 +141,8 @@ #define LAPIC_BASE 0xfee00000 #define LAPIC_IRQ_MASK(i) (1 << ((i) + 1)) + +#define MSR_X2APIC_BASE 0x800 +#define MSR_X2APIC_EOI (MSR_X2APIC_BASE+0x0b) /* End Int. W */ +#define MSR_X2APIC_ID (MSR_X2APIC_BASE+0x02) /* ID. RW */ +#define X2APIC_ID_MASK 0xff diff --git a/sys/arch/amd64/include/i82489var.h b/sys/arch/amd64/include/i82489var.h index dd50af54e5f..4eb199984d2 100644 --- a/sys/arch/amd64/include/i82489var.h +++ b/sys/arch/amd64/include/i82489var.h @@ -1,4 +1,4 @@ -/* $OpenBSD: i82489var.h,v 1.14 2014/01/21 09:40:55 kettenis Exp $ */ +/* $OpenBSD: i82489var.h,v 1.15 2015/04/19 19:45:21 sf Exp $ */ /* $NetBSD: i82489var.h,v 1.1 2003/02/26 21:26:10 fvdl Exp $ */ /*- @@ -37,28 +37,14 @@ * Software definitions belonging to Local APIC driver. */ -static __inline__ u_int32_t i82489_readreg(int); -static __inline__ void i82489_writereg(int, u_int32_t); - #ifdef _KERNEL extern volatile u_int32_t local_apic[]; extern volatile u_int32_t lapic_tpr; #endif -static __inline__ u_int32_t -i82489_readreg(int reg) -{ - return *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic) - + reg)); -} - -static __inline__ void -i82489_writereg(int reg, u_int32_t val) -{ - *((volatile u_int32_t *)(((volatile u_int8_t *)local_apic) + reg)) = val; -} - -#define lapic_cpu_number() (i82489_readreg(LAPIC_ID)>>LAPIC_ID_SHIFT) +extern u_int32_t (*lapic_readreg)(int); +extern void (*lapic_writereg)(int, u_int32_t); +u_int32_t lapic_cpu_number(void); /* * "spurious interrupt vector"; vector used by interrupt which was diff --git a/sys/arch/amd64/include/specialreg.h b/sys/arch/amd64/include/specialreg.h index 23a4656e4de..7cf92377f28 100644 --- a/sys/arch/amd64/include/specialreg.h +++ b/sys/arch/amd64/include/specialreg.h @@ -1,4 +1,4 @@ -/* $OpenBSD: specialreg.h,v 1.34 2015/03/25 20:59:30 kettenis Exp $ */ +/* $OpenBSD: specialreg.h,v 1.35 2015/04/19 19:45:21 sf Exp $ */ /* $NetBSD: specialreg.h,v 1.1 2003/04/26 18:39:48 fvdl Exp $ */ /* $NetBSD: x86/specialreg.h,v 1.2 2003/04/25 21:54:30 fvdl Exp $ */ @@ -275,6 +275,7 @@ #define MSR_CTR0 0x012 /* P5 only (trap on P6) */ #define MSR_CTR1 0x013 /* P5 only (trap on P6) */ #define MSR_APICBASE 0x01b +#define APICBASE_ENABLE_X2APIC 0x400 #define MSR_EBL_CR_POWERON 0x02a #define MSR_EBC_FREQUENCY_ID 0x02c /* Pentium 4 only */ #define MSR_TEST_CTL 0x033 -- 2.20.1