use the always serializing RDTSCP instruction in tsc and usertc if available
authorrobert <robert@openbsd.org>
Thu, 22 Sep 2022 04:57:07 +0000 (04:57 +0000)
committerrobert <robert@openbsd.org>
Thu, 22 Sep 2022 04:57:07 +0000 (04:57 +0000)
tweaks from cheloha@; ok deraadt@, sthen@, cheloha@

lib/libc/arch/amd64/gen/usertc.c
sys/arch/amd64/amd64/tsc.c
sys/arch/amd64/include/cpufunc.h
sys/arch/amd64/include/timetc.h

index 0eef3d3..3f926f0 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: usertc.c,v 1.4 2021/07/25 22:58:39 jca Exp $ */
+/*     $OpenBSD: usertc.c,v 1.5 2022/09/22 04:57:07 robert Exp $ */
 /*
  * Copyright (c) 2020 Paul Irofti <paul@irofti.net>
  *
@@ -26,13 +26,24 @@ rdtsc_lfence(void)
        return ((uint64_t)lo)|(((uint64_t)hi)<<32);
 }
 
+static inline u_int
+rdtscp(void)
+{
+       uint32_t hi, lo;
+       __asm volatile("rdtscp" : "=a"(lo), "=d"(hi) : : "ecx");
+       return ((uint64_t)lo)|(((uint64_t)hi)<<32);
+}
+
 static int
 tc_get_timecount(struct timekeep *tk, u_int *tc)
 {
        switch (tk->tk_user) {
-       case TC_TSC:
+       case TC_TSC_LFENCE:
                *tc = rdtsc_lfence();
                return 0;
+       case TC_TSC_RDTSCP:
+               *tc = rdtscp();
+               return 0;
        }
 
        return -1;
index aff1011..7e5c414 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: tsc.c,v 1.28 2022/09/18 20:38:50 cheloha Exp $        */
+/*     $OpenBSD: tsc.c,v 1.29 2022/09/22 04:57:08 robert Exp $ */
 /*
  * Copyright (c) 2008 The NetBSD Foundation, Inc.
  * Copyright (c) 2016,2017 Reyk Floeter <reyk@openbsd.org>
@@ -36,7 +36,8 @@ int           tsc_recalibrate;
 uint64_t       tsc_frequency;
 int            tsc_is_invariant;
 
-u_int          tsc_get_timecount(struct timecounter *tc);
+u_int          tsc_get_timecount_lfence(struct timecounter *tc);
+u_int          tsc_get_timecount_rdtscp(struct timecounter *tc);
 void           tsc_delay(int usecs);
 
 #include "lapic.h"
@@ -44,15 +45,17 @@ void                tsc_delay(int usecs);
 extern u_int32_t lapic_per_second;
 #endif
 
+u_int64_t (*tsc_rdtsc)(void) = rdtsc_lfence;
+
 struct timecounter tsc_timecounter = {
-       .tc_get_timecount = tsc_get_timecount,
+       .tc_get_timecount = tsc_get_timecount_lfence,
        .tc_poll_pps = NULL,
        .tc_counter_mask = ~0u,
        .tc_frequency = 0,
        .tc_name = "tsc",
        .tc_quality = -1000,
        .tc_priv = NULL,
-       .tc_user = TC_TSC,
+       .tc_user = TC_TSC_LFENCE,
 };
 
 uint64_t
@@ -105,6 +108,13 @@ tsc_identify(struct cpu_info *ci)
            !(ci->ci_flags & CPUF_INVAR_TSC))
                return;
 
+       /* Prefer RDTSCP where supported. */
+       if (ISSET(ci->ci_feature_eflags, CPUID_RDTSCP)) {
+               tsc_rdtsc = rdtscp;
+               tsc_timecounter.tc_get_timecount = tsc_get_timecount_rdtscp;
+               tsc_timecounter.tc_user = TC_TSC_RDTSCP;
+       }
+
        tsc_is_invariant = 1;
 
        tsc_frequency = tsc_freq_cpuid(ci);
@@ -119,9 +129,9 @@ get_tsc_and_timecount(struct timecounter *tc, uint64_t *tsc, uint64_t *count)
        int i;
 
        for (i = 0; i < RECALIBRATE_MAX_RETRIES; i++) {
-               tsc1 = rdtsc_lfence();
+               tsc1 = tsc_rdtsc();
                n = (tc->tc_get_timecount(tc) & tc->tc_counter_mask);
-               tsc2 = rdtsc_lfence();
+               tsc2 = tsc_rdtsc();
 
                if ((tsc2 - tsc1) < RECALIBRATE_SMI_THRESHOLD) {
                        *count = n;
@@ -227,11 +237,17 @@ cpu_recalibrate_tsc(struct timecounter *tc)
 }
 
 u_int
-tsc_get_timecount(struct timecounter *tc)
+tsc_get_timecount_lfence(struct timecounter *tc)
 {
        return rdtsc_lfence();
 }
 
+u_int
+tsc_get_timecount_rdtscp(struct timecounter *tc)
+{
+       return rdtscp();
+}
+
 void
 tsc_timecounter_init(struct cpu_info *ci, uint64_t cpufreq)
 {
@@ -260,8 +276,8 @@ tsc_delay(int usecs)
        uint64_t interval, start;
 
        interval = (uint64_t)usecs * tsc_frequency / 1000000;
-       start = rdtsc_lfence();
-       while (rdtsc_lfence() - start < interval)
+       start = tsc_rdtsc();
+       while (tsc_rdtsc() - start < interval)
                CPU_BUSY_CYCLE();
 }
 
@@ -452,7 +468,7 @@ tsc_test_ap(void)
 {
        uint64_t ap_val, bp_val, end, lag;
 
-       ap_val = rdtsc_lfence();
+       ap_val = tsc_rdtsc();
        end = ap_val + tsc_test_cycles;
        while (__predict_true(ap_val < end)) {
                /*
@@ -463,7 +479,7 @@ tsc_test_ap(void)
                 * the BP and the counters cannot be synchronized.
                 */
                bp_val = tsc_bp_status.val;
-               ap_val = rdtsc_lfence();
+               ap_val = tsc_rdtsc();
                tsc_ap_status.val = ap_val;
 
                /*
@@ -488,11 +504,11 @@ tsc_test_bp(void)
 {
        uint64_t ap_val, bp_val, end, lag;
 
-       bp_val = rdtsc_lfence();
+       bp_val = tsc_rdtsc();
        end = bp_val + tsc_test_cycles;
        while (__predict_true(bp_val < end)) {
                ap_val = tsc_ap_status.val;
-               bp_val = rdtsc_lfence();
+               bp_val = tsc_rdtsc();
                tsc_bp_status.val = bp_val;
 
                if (__predict_false(bp_val < ap_val)) {
index 7ca9130..2a8ddd3 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: cpufunc.h,v 1.36 2020/09/13 11:53:16 jsg Exp $        */
+/*     $OpenBSD: cpufunc.h,v 1.37 2022/09/22 04:57:08 robert Exp $     */
 /*     $NetBSD: cpufunc.h,v 1.3 2003/05/08 10:27:43 fvdl Exp $ */
 
 /*-
@@ -302,6 +302,15 @@ rdtsc(void)
        return (((uint64_t)hi << 32) | (uint64_t) lo);
 }
 
+static __inline u_int64_t
+rdtscp(void)
+{
+       uint32_t hi, lo;
+
+       __asm volatile("rdtscp" : "=d" (hi), "=a" (lo) : : "ecx");
+       return (((uint64_t)hi << 32) | (uint64_t) lo);
+}
+
 static __inline u_int64_t
 rdtsc_lfence(void)
 {
index 48e1b57..733da2a 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: timetc.h,v 1.2 2020/07/08 09:17:48 kettenis Exp $ */
+/*     $OpenBSD: timetc.h,v 1.3 2022/09/22 04:57:08 robert Exp $ */
 /*
  * Copyright (c) 2020 Paul Irofti <paul@irofti.net>
  *
@@ -18,6 +18,7 @@
 #ifndef _MACHINE_TIMETC_H_
 #define _MACHINE_TIMETC_H_
 
-#define TC_TSC 1
+#define TC_TSC_LFENCE  1
+#define TC_TSC_RDTSCP  2
 
 #endif /* _MACHINE_TIMETC_H_ */