From d55ef580b1748517027c3eabdb715316ca5b1442 Mon Sep 17 00:00:00 2001 From: robert Date: Thu, 22 Sep 2022 04:57:07 +0000 Subject: [PATCH] use the always serializing RDTSCP instruction in tsc and usertc if available tweaks from cheloha@; ok deraadt@, sthen@, cheloha@ --- lib/libc/arch/amd64/gen/usertc.c | 15 ++++++++++-- sys/arch/amd64/amd64/tsc.c | 42 ++++++++++++++++++++++---------- sys/arch/amd64/include/cpufunc.h | 11 ++++++++- sys/arch/amd64/include/timetc.h | 5 ++-- 4 files changed, 55 insertions(+), 18 deletions(-) diff --git a/lib/libc/arch/amd64/gen/usertc.c b/lib/libc/arch/amd64/gen/usertc.c index 0eef3d3f6db..3f926f06019 100644 --- a/lib/libc/arch/amd64/gen/usertc.c +++ b/lib/libc/arch/amd64/gen/usertc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: usertc.c,v 1.4 2021/07/25 22:58:39 jca Exp $ */ +/* $OpenBSD: usertc.c,v 1.5 2022/09/22 04:57:07 robert Exp $ */ /* * Copyright (c) 2020 Paul Irofti * @@ -26,13 +26,24 @@ rdtsc_lfence(void) return ((uint64_t)lo)|(((uint64_t)hi)<<32); } +static inline u_int +rdtscp(void) +{ + uint32_t hi, lo; + __asm volatile("rdtscp" : "=a"(lo), "=d"(hi) : : "ecx"); + return ((uint64_t)lo)|(((uint64_t)hi)<<32); +} + static int tc_get_timecount(struct timekeep *tk, u_int *tc) { switch (tk->tk_user) { - case TC_TSC: + case TC_TSC_LFENCE: *tc = rdtsc_lfence(); return 0; + case TC_TSC_RDTSCP: + *tc = rdtscp(); + return 0; } return -1; diff --git a/sys/arch/amd64/amd64/tsc.c b/sys/arch/amd64/amd64/tsc.c index aff10119651..7e5c414cc00 100644 --- a/sys/arch/amd64/amd64/tsc.c +++ b/sys/arch/amd64/amd64/tsc.c @@ -1,4 +1,4 @@ -/* $OpenBSD: tsc.c,v 1.28 2022/09/18 20:38:50 cheloha Exp $ */ +/* $OpenBSD: tsc.c,v 1.29 2022/09/22 04:57:08 robert Exp $ */ /* * Copyright (c) 2008 The NetBSD Foundation, Inc. * Copyright (c) 2016,2017 Reyk Floeter @@ -36,7 +36,8 @@ int tsc_recalibrate; uint64_t tsc_frequency; int tsc_is_invariant; -u_int tsc_get_timecount(struct timecounter *tc); +u_int tsc_get_timecount_lfence(struct timecounter *tc); +u_int tsc_get_timecount_rdtscp(struct timecounter *tc); void tsc_delay(int usecs); #include "lapic.h" @@ -44,15 +45,17 @@ void tsc_delay(int usecs); extern u_int32_t lapic_per_second; #endif +u_int64_t (*tsc_rdtsc)(void) = rdtsc_lfence; + struct timecounter tsc_timecounter = { - .tc_get_timecount = tsc_get_timecount, + .tc_get_timecount = tsc_get_timecount_lfence, .tc_poll_pps = NULL, .tc_counter_mask = ~0u, .tc_frequency = 0, .tc_name = "tsc", .tc_quality = -1000, .tc_priv = NULL, - .tc_user = TC_TSC, + .tc_user = TC_TSC_LFENCE, }; uint64_t @@ -105,6 +108,13 @@ tsc_identify(struct cpu_info *ci) !(ci->ci_flags & CPUF_INVAR_TSC)) return; + /* Prefer RDTSCP where supported. */ + if (ISSET(ci->ci_feature_eflags, CPUID_RDTSCP)) { + tsc_rdtsc = rdtscp; + tsc_timecounter.tc_get_timecount = tsc_get_timecount_rdtscp; + tsc_timecounter.tc_user = TC_TSC_RDTSCP; + } + tsc_is_invariant = 1; tsc_frequency = tsc_freq_cpuid(ci); @@ -119,9 +129,9 @@ get_tsc_and_timecount(struct timecounter *tc, uint64_t *tsc, uint64_t *count) int i; for (i = 0; i < RECALIBRATE_MAX_RETRIES; i++) { - tsc1 = rdtsc_lfence(); + tsc1 = tsc_rdtsc(); n = (tc->tc_get_timecount(tc) & tc->tc_counter_mask); - tsc2 = rdtsc_lfence(); + tsc2 = tsc_rdtsc(); if ((tsc2 - tsc1) < RECALIBRATE_SMI_THRESHOLD) { *count = n; @@ -227,11 +237,17 @@ cpu_recalibrate_tsc(struct timecounter *tc) } u_int -tsc_get_timecount(struct timecounter *tc) +tsc_get_timecount_lfence(struct timecounter *tc) { return rdtsc_lfence(); } +u_int +tsc_get_timecount_rdtscp(struct timecounter *tc) +{ + return rdtscp(); +} + void tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq) { @@ -260,8 +276,8 @@ tsc_delay(int usecs) uint64_t interval, start; interval = (uint64_t)usecs * tsc_frequency / 1000000; - start = rdtsc_lfence(); - while (rdtsc_lfence() - start < interval) + start = tsc_rdtsc(); + while (tsc_rdtsc() - start < interval) CPU_BUSY_CYCLE(); } @@ -452,7 +468,7 @@ tsc_test_ap(void) { uint64_t ap_val, bp_val, end, lag; - ap_val = rdtsc_lfence(); + ap_val = tsc_rdtsc(); end = ap_val + tsc_test_cycles; while (__predict_true(ap_val < end)) { /* @@ -463,7 +479,7 @@ tsc_test_ap(void) * the BP and the counters cannot be synchronized. */ bp_val = tsc_bp_status.val; - ap_val = rdtsc_lfence(); + ap_val = tsc_rdtsc(); tsc_ap_status.val = ap_val; /* @@ -488,11 +504,11 @@ tsc_test_bp(void) { uint64_t ap_val, bp_val, end, lag; - bp_val = rdtsc_lfence(); + bp_val = tsc_rdtsc(); end = bp_val + tsc_test_cycles; while (__predict_true(bp_val < end)) { ap_val = tsc_ap_status.val; - bp_val = rdtsc_lfence(); + bp_val = tsc_rdtsc(); tsc_bp_status.val = bp_val; if (__predict_false(bp_val < ap_val)) { diff --git a/sys/arch/amd64/include/cpufunc.h b/sys/arch/amd64/include/cpufunc.h index 7ca9130db77..2a8ddd34fd1 100644 --- a/sys/arch/amd64/include/cpufunc.h +++ b/sys/arch/amd64/include/cpufunc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: cpufunc.h,v 1.36 2020/09/13 11:53:16 jsg Exp $ */ +/* $OpenBSD: cpufunc.h,v 1.37 2022/09/22 04:57:08 robert Exp $ */ /* $NetBSD: cpufunc.h,v 1.3 2003/05/08 10:27:43 fvdl Exp $ */ /*- @@ -302,6 +302,15 @@ rdtsc(void) return (((uint64_t)hi << 32) | (uint64_t) lo); } +static __inline u_int64_t +rdtscp(void) +{ + uint32_t hi, lo; + + __asm volatile("rdtscp" : "=d" (hi), "=a" (lo) : : "ecx"); + return (((uint64_t)hi << 32) | (uint64_t) lo); +} + static __inline u_int64_t rdtsc_lfence(void) { diff --git a/sys/arch/amd64/include/timetc.h b/sys/arch/amd64/include/timetc.h index 48e1b57d410..733da2a9e8d 100644 --- a/sys/arch/amd64/include/timetc.h +++ b/sys/arch/amd64/include/timetc.h @@ -1,4 +1,4 @@ -/* $OpenBSD: timetc.h,v 1.2 2020/07/08 09:17:48 kettenis Exp $ */ +/* $OpenBSD: timetc.h,v 1.3 2022/09/22 04:57:08 robert Exp $ */ /* * Copyright (c) 2020 Paul Irofti * @@ -18,6 +18,7 @@ #ifndef _MACHINE_TIMETC_H_ #define _MACHINE_TIMETC_H_ -#define TC_TSC 1 +#define TC_TSC_LFENCE 1 +#define TC_TSC_RDTSCP 2 #endif /* _MACHINE_TIMETC_H_ */ -- 2.20.1