From a9b606aa4bbdd86485cdc27c9a828b1347506bba Mon Sep 17 00:00:00 2001 From: kettenis Date: Sat, 30 May 2015 08:41:30 +0000 Subject: [PATCH] Native atomic operations for i386. ok deraadt@, guenther@, dlg@ --- sys/arch/i386/i386/acpi_machdep.c | 6 +- sys/arch/i386/i386/lock_machdep.c | 8 +- sys/arch/i386/i386/pmap.c | 10 +- sys/arch/i386/include/atomic.h | 242 +++++++++++++++++++++++++----- sys/arch/i386/include/lock.h | 11 +- 5 files changed, 217 insertions(+), 60 deletions(-) diff --git a/sys/arch/i386/i386/acpi_machdep.c b/sys/arch/i386/i386/acpi_machdep.c index 39c282611c4..b08ce6fdeb9 100644 --- a/sys/arch/i386/i386/acpi_machdep.c +++ b/sys/arch/i386/i386/acpi_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: acpi_machdep.c,v 1.58 2015/03/16 20:31:47 deraadt Exp $ */ +/* $OpenBSD: acpi_machdep.c,v 1.59 2015/05/30 08:41:30 kettenis Exp $ */ /* * Copyright (c) 2005 Thorsten Lockert * @@ -199,7 +199,7 @@ acpi_acquire_glk(uint32_t *lock) new = (old & ~GL_BIT_PENDING) | GL_BIT_OWNED; if ((old & GL_BIT_OWNED) != 0) new |= GL_BIT_PENDING; - } while (i386_atomic_cas_int32(lock, old, new) != old); + } while (atomic_cas_uint(lock, old, new) != old); return ((new & GL_BIT_PENDING) == 0); } @@ -217,7 +217,7 @@ acpi_release_glk(uint32_t *lock) do { old = *lock; new = old & ~(GL_BIT_PENDING | GL_BIT_OWNED); - } while (i386_atomic_cas_int32(lock, old, new) != old); + } while (atomic_cas_uint(lock, old, new) != old); return ((old & GL_BIT_PENDING) != 0); } diff --git a/sys/arch/i386/i386/lock_machdep.c b/sys/arch/i386/i386/lock_machdep.c index c35270bf850..4d0fb1157da 100644 --- a/sys/arch/i386/i386/lock_machdep.c +++ b/sys/arch/i386/i386/lock_machdep.c @@ -1,4 +1,4 @@ -/* $OpenBSD: lock_machdep.c,v 1.17 2015/02/11 05:54:48 dlg Exp $ */ +/* $OpenBSD: lock_machdep.c,v 1.18 2015/05/30 08:41:30 kettenis Exp $ */ /* $NetBSD: lock_machdep.c,v 1.1.2.3 2000/05/03 14:40:30 sommerfeld Exp $ */ /*- @@ -44,12 +44,6 @@ #include -int -rw_cas_486(volatile unsigned long *p, unsigned long o, unsigned long n) -{ - return (i486_atomic_cas_int((u_int *)p, o, n) != o); -} - #ifdef MULTIPROCESSOR void __mp_lock_init(struct __mp_lock *mpl) diff --git a/sys/arch/i386/i386/pmap.c b/sys/arch/i386/i386/pmap.c index 0c01a348e89..a12427ee1fb 100644 --- a/sys/arch/i386/i386/pmap.c +++ b/sys/arch/i386/i386/pmap.c @@ -1,4 +1,4 @@ -/* $OpenBSD: pmap.c,v 1.178 2015/04/22 06:26:23 mlarkin Exp $ */ +/* $OpenBSD: pmap.c,v 1.179 2015/05/30 08:41:30 kettenis Exp $ */ /* $NetBSD: pmap.c,v 1.91 2000/06/02 17:46:37 thorpej Exp $ */ /* @@ -2636,7 +2636,7 @@ pmap_tlb_shootpage(struct pmap *pm, vaddr_t va) if (wait > 0) { int s = splvm(); - while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) { + while (atomic_cas_uint(&tlb_shoot_wait, 0, wait) != 0) { while (tlb_shoot_wait != 0) SPINLOCK_SPIN_HOOK; } @@ -2674,7 +2674,7 @@ pmap_tlb_shootrange(struct pmap *pm, vaddr_t sva, vaddr_t eva) if (wait > 0) { int s = splvm(); - while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) { + while (atomic_cas_uint(&tlb_shoot_wait, 0, wait) != 0) { while (tlb_shoot_wait != 0) SPINLOCK_SPIN_HOOK; } @@ -2712,7 +2712,7 @@ pmap_tlb_shoottlb(void) if (wait) { int s = splvm(); - while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) { + while (atomic_cas_uint(&tlb_shoot_wait, 0, wait) != 0) { while (tlb_shoot_wait != 0) SPINLOCK_SPIN_HOOK; } @@ -2748,7 +2748,7 @@ pmap_tlb_droppmap(struct pmap *pm) if (wait) { int s = splvm(); - while (i486_atomic_cas_int(&tlb_shoot_wait, 0, wait) != 0) { + while (atomic_cas_uint(&tlb_shoot_wait, 0, wait) != 0) { while (tlb_shoot_wait != 0) SPINLOCK_SPIN_HOOK; } diff --git a/sys/arch/i386/include/atomic.h b/sys/arch/i386/include/atomic.h index 004988b9d26..e23ca84b7b0 100644 --- a/sys/arch/i386/include/atomic.h +++ b/sys/arch/i386/include/atomic.h @@ -1,4 +1,4 @@ -/* $OpenBSD: atomic.h,v 1.14 2014/10/08 19:40:28 sf Exp $ */ +/* $OpenBSD: atomic.h,v 1.15 2015/05/30 08:41:30 kettenis Exp $ */ /* $NetBSD: atomic.h,v 1.1.2.2 2000/02/21 18:54:07 sommerfeld Exp $ */ /*- @@ -50,65 +50,200 @@ #define LOCK #endif -static __inline u_int64_t -i386_atomic_testset_uq(volatile u_int64_t *ptr, u_int64_t val) +static inline unsigned int +_atomic_cas_uint(volatile unsigned int *p, unsigned int e, unsigned int n) { - __asm__ volatile ("\n1:\t" LOCK " cmpxchg8b (%1); jnz 1b" : "+A" (val) : - "r" (ptr), "b" ((u_int32_t)val), "c" ((u_int32_t)(val >> 32))); - return val; + __asm volatile(LOCK " cmpxchgl %2, %1" + : "=a" (n), "=m" (*p) + : "r" (n), "a" (e), "m" (*p)); + + return (n); } +#define atomic_cas_uint(_p, _e, _n) _atomic_cas_uint((_p), (_e), (_n)) -static __inline u_int32_t -i386_atomic_testset_ul(volatile u_int32_t *ptr, unsigned long val) +static inline unsigned long +_atomic_cas_ulong(volatile unsigned long *p, unsigned long e, unsigned long n) { - __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr)); - return val; + __asm volatile(LOCK " cmpxchgl %2, %1" + : "=a" (n), "=m" (*p) + : "r" (n), "a" (e), "m" (*p)); + + return (n); } +#define atomic_cas_ulong(_p, _e, _n) _atomic_cas_ulong((_p), (_e), (_n)) -static __inline int -i386_atomic_testset_i(volatile int *ptr, unsigned long val) +static inline void * +_atomic_cas_ptr(volatile void *p, void *e, void *n) { - __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr)); - return val; + __asm volatile(LOCK " cmpxchgl %2, %1" + : "=a" (n), "=m" (*(unsigned long *)p) + : "r" (n), "a" (e), "m" (*(unsigned long *)p)); + + return (n); } +#define atomic_cas_ptr(_p, _e, _n) _atomic_cas_ptr((_p), (_e), (_n)) -static __inline void -i386_atomic_setbits_l(volatile u_int32_t *ptr, unsigned long bits) +static inline unsigned int +_atomic_swap_uint(volatile unsigned int *p, unsigned int n) { - __asm volatile(LOCK " orl %1,%0" : "=m" (*ptr) : "ir" (bits)); + __asm volatile("xchgl %0, %1" + : "=a" (n), "=m" (*p) + : "0" (n), "m" (*p)); + + return (n); } +#define atomic_swap_uint(_p, _n) _atomic_swap_uint((_p), (_n)) +#define atomic_swap_32(_p, _n) _atomic_swap_uint((_p), (_n)) -static __inline void -i386_atomic_clearbits_l(volatile u_int32_t *ptr, unsigned long bits) +static inline unsigned long +_atomic_swap_ulong(volatile unsigned long *p, unsigned long n) { - bits = ~bits; - __asm volatile(LOCK " andl %1,%0" : "=m" (*ptr) : "ir" (bits)); + __asm volatile("xchgl %0, %1" + : "=a" (n), "=m" (*p) + : "0" (n), "m" (*p)); + + return (n); } +#define atomic_swap_ulong(_p, _n) _atomic_swap_ulong((_p), (_n)) -/* - * cas = compare and set - */ -static __inline int -i486_atomic_cas_int(volatile u_int *ptr, u_int expect, u_int set) +static inline uint64_t +_atomic_swap_64(volatile uint64_t *p, uint64_t n) { - int res; + __asm volatile("xchgl %0, %1" + : "=a" (n), "=m" (*p) + : "0" (n), "m" (*p)); + + return (n); +} +#define atomic_swap_64(_p, _n) _atomic_swap_64((_p), (_n)) - __asm volatile(LOCK " cmpxchgl %2, %1" : "=a" (res), "=m" (*ptr) - : "r" (set), "a" (expect), "m" (*ptr) : "memory"); +static inline void * +_atomic_swap_ptr(volatile void *p, void *n) +{ + __asm volatile("xchgl %0, %1" + : "=a" (n), "=m" (*(unsigned long *)p) + : "0" (n), "m" (*(unsigned long *)p)); - return (res); + return (n); } +#define atomic_swap_ptr(_p, _n) _atomic_swap_ptr((_p), (_n)) -static __inline int -i386_atomic_cas_int32(volatile int32_t *ptr, int32_t expect, int32_t set) +static inline void +_atomic_inc_int(volatile unsigned int *p) +{ + __asm volatile(LOCK " incl %0" + : "+m" (*p)); +} +#define atomic_inc_int(_p) _atomic_inc_int(_p) + +static inline void +_atomic_inc_long(volatile unsigned long *p) { - int res; + __asm volatile(LOCK " incl %0" + : "+m" (*p)); +} +#define atomic_inc_long(_p) _atomic_inc_long(_p) - __asm volatile(LOCK " cmpxchgl %2, %1" : "=a" (res), "=m" (*ptr) - : "r" (set), "a" (expect), "m" (*ptr) : "memory"); +static inline void +_atomic_dec_int(volatile unsigned int *p) +{ + __asm volatile(LOCK " decl %0" + : "+m" (*p)); +} +#define atomic_dec_int(_p) _atomic_dec_int(_p) - return (res); +static inline void +_atomic_dec_long(volatile unsigned long *p) +{ + __asm volatile(LOCK " decl %0" + : "+m" (*p)); } +#define atomic_dec_long(_p) _atomic_dec_long(_p) + +static inline void +_atomic_add_int(volatile unsigned int *p, unsigned int v) +{ + __asm volatile(LOCK " addl %1,%0" + : "+m" (*p) + : "a" (v)); +} +#define atomic_add_int(_p, _v) _atomic_add_int(_p, _v) + +static inline void +_atomic_add_long(volatile unsigned long *p, unsigned long v) +{ + __asm volatile(LOCK " addl %1,%0" + : "+m" (*p) + : "a" (v)); +} +#define atomic_add_long(_p, _v) _atomic_add_long(_p, _v) + +static inline void +_atomic_sub_int(volatile unsigned int *p, unsigned int v) +{ + __asm volatile(LOCK " subl %1,%0" + : "+m" (*p) + : "a" (v)); +} +#define atomic_sub_int(_p, _v) _atomic_sub_int(_p, _v) + +static inline void +_atomic_sub_long(volatile unsigned long *p, unsigned long v) +{ + __asm volatile(LOCK " subl %1,%0" + : "+m" (*p) + : "a" (v)); +} +#define atomic_sub_long(_p, _v) _atomic_sub_long(_p, _v) + + +static inline unsigned long +_atomic_add_int_nv(volatile unsigned int *p, unsigned int v) +{ + unsigned int rv = v; + + __asm volatile(LOCK " xaddl %0,%1" + : "+a" (rv), "+m" (*p)); + + return (rv + v); +} +#define atomic_add_int_nv(_p, _v) _atomic_add_int_nv(_p, _v) + +static inline unsigned long +_atomic_add_long_nv(volatile unsigned long *p, unsigned long v) +{ + unsigned long rv = v; + + __asm volatile(LOCK " xaddl %0,%1" + : "+a" (rv), "+m" (*p)); + + return (rv + v); +} +#define atomic_add_long_nv(_p, _v) _atomic_add_long_nv(_p, _v) + +static inline unsigned long +_atomic_sub_int_nv(volatile unsigned int *p, unsigned int v) +{ + unsigned int rv = 0 - v; + + __asm volatile(LOCK " xaddl %0,%1" + : "+a" (rv), "+m" (*p)); + + return (rv - v); +} +#define atomic_sub_int_nv(_p, _v) _atomic_sub_int_nv(_p, _v) + +static inline unsigned long +_atomic_sub_long_nv(volatile unsigned long *p, unsigned long v) +{ + unsigned long rv = 0 - v; + + __asm volatile(LOCK " xaddl %0,%1" + : "+a" (rv), "+m" (*p)); + + return (rv - v); +} +#define atomic_sub_long_nv(_p, _v) _atomic_sub_long_nv(_p, _v) /* * The IA-32 architecture is rather strongly ordered. When accessing @@ -141,6 +276,41 @@ i386_atomic_cas_int32(volatile int32_t *ptr, int32_t expect, int32_t set) #define virtio_membar_consumer() __membar("") #define virtio_membar_sync() __membar("lock; addl $0,0(%%esp)") +static __inline u_int64_t +i386_atomic_testset_uq(volatile u_int64_t *ptr, u_int64_t val) +{ + __asm__ volatile ("\n1:\t" LOCK " cmpxchg8b (%1); jnz 1b" : "+A" (val) : + "r" (ptr), "b" ((u_int32_t)val), "c" ((u_int32_t)(val >> 32))); + return val; +} + +static __inline u_int32_t +i386_atomic_testset_ul(volatile u_int32_t *ptr, unsigned long val) +{ + __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr)); + return val; +} + +static __inline int +i386_atomic_testset_i(volatile int *ptr, unsigned long val) +{ + __asm__ volatile ("xchgl %0,(%2)" :"=r" (val):"0" (val),"r" (ptr)); + return val; +} + +static __inline void +i386_atomic_setbits_l(volatile u_int32_t *ptr, unsigned long bits) +{ + __asm volatile(LOCK " orl %1,%0" : "=m" (*ptr) : "ir" (bits)); +} + +static __inline void +i386_atomic_clearbits_l(volatile u_int32_t *ptr, unsigned long bits) +{ + bits = ~bits; + __asm volatile(LOCK " andl %1,%0" : "=m" (*ptr) : "ir" (bits)); +} + int ucas_32(volatile int32_t *, int32_t, int32_t); #define futex_atomic_ucas_int32 ucas_32 diff --git a/sys/arch/i386/include/lock.h b/sys/arch/i386/include/lock.h index 63e616b0e26..2d3c84b2e7c 100644 --- a/sys/arch/i386/include/lock.h +++ b/sys/arch/i386/include/lock.h @@ -1,4 +1,4 @@ -/* $OpenBSD: lock.h,v 1.10 2015/02/11 00:14:11 dlg Exp $ */ +/* $OpenBSD: lock.h,v 1.11 2015/05/30 08:41:30 kettenis Exp $ */ /* $NetBSD: lock.h,v 1.1.2.2 2000/05/03 14:40:55 sommerfeld Exp $ */ /*- @@ -46,13 +46,6 @@ */ #define __lockbarrier() __asm volatile("": : :"memory") - -#define SPINLOCK_SPIN_HOOK __asm volatile("pause": : :"memory") - -#include - -#ifdef _KERNEL -extern int rw_cas_486(volatile unsigned long *, unsigned long, unsigned long); -#endif +#define SPINLOCK_SPIN_HOOK __asm volatile("pause": : :"memory") #endif /* _MACHINE_LOCK_H_ */ -- 2.20.1