From 1a8619957bf72939f00b6d60234974b173f9c70e Mon Sep 17 00:00:00 2001 From: gkoehler Date: Sat, 22 Oct 2022 17:50:28 +0000 Subject: [PATCH] Add regress for powerpc* AltiVec with subnormal floats Older CPUs do an AltiVec assist trap, EXC_VECAST_G4 or EXC_VECAST_G5 in /sys/arch/powerpc/powerpc/trap.c; newer CPUs (like POWER9) don't trap. The trap's emulation of subnormal floats should give almost the same results as the POWER9. This test is only for powerpc or powerpc64 with AltiVec. anton@ explained how to skip the test if we're on the wrong arch or don't have AltiVec. ok anton@ --- regress/sys/Makefile | 5 +- regress/sys/altivec_ast/Makefile | 20 +++ regress/sys/altivec_ast/vecast.c | 213 +++++++++++++++++++++++++++++++ 3 files changed, 236 insertions(+), 2 deletions(-) create mode 100644 regress/sys/altivec_ast/Makefile create mode 100644 regress/sys/altivec_ast/vecast.c diff --git a/regress/sys/Makefile b/regress/sys/Makefile index 2586d2ef6b8..ff0075e5146 100644 --- a/regress/sys/Makefile +++ b/regress/sys/Makefile @@ -1,7 +1,8 @@ -# $OpenBSD: Makefile,v 1.26 2018/12/23 11:23:21 natano Exp $ +# $OpenBSD: Makefile,v 1.27 2022/10/22 17:50:28 gkoehler Exp $ # $NetBSD: Makefile,v 1.4 1995/04/20 22:41:08 cgd Exp $ -SUBDIR= copy crypto dev ddb ffs fifofs fileops kern mfs_noperm \ +SUBDIR= altivec_ast copy crypto dev ddb ffs \ + fifofs fileops kern mfs_noperm \ net netinet netinet6 nfs ptrace sys uvm .if exists(arch/${MACHINE}/Makefile) SUBDIR+= arch/${MACHINE} diff --git a/regress/sys/altivec_ast/Makefile b/regress/sys/altivec_ast/Makefile new file mode 100644 index 00000000000..2740edd33a8 --- /dev/null +++ b/regress/sys/altivec_ast/Makefile @@ -0,0 +1,20 @@ +# $OpenBSD: Makefile,v 1.1 2022/10/22 17:50:28 gkoehler Exp $ + +.if ${MACHINE_ARCH} == "powerpc" || ${MACHINE_ARCH} == "powerpc64" +PROG = vecast +CFLAGS += -maltivec +LDADD = -lm + +.if ! (make(clean) || make(cleandir) || make(obj)) +ALTIVEC != /sbin/sysctl -n machdep.altivec +.if ${ALTIVEC} == 0 +REGRESS_SKIP_TARGETS = ${REGRESS_TARGETS} +.endif +.endif + +.elif make(regress) || make(all) +regress: + @echo Cannot run on ${MACHINE_ARCH}. + @echo SKIPPED +.endif +.include diff --git a/regress/sys/altivec_ast/vecast.c b/regress/sys/altivec_ast/vecast.c new file mode 100644 index 00000000000..4ff9e62f50c --- /dev/null +++ b/regress/sys/altivec_ast/vecast.c @@ -0,0 +1,213 @@ +/* $OpenBSD: vecast.c,v 1.1 2022/10/22 17:50:28 gkoehler Exp $ */ + +/* + * Copyright (c) 2022 George Koehler + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include + +struct double4 { + double d[4]; +}; + +union vu { + vector float vf; + vector int vi; + vector unsigned vu; + float f[4]; + int i[4]; + unsigned u[4]; +}; + +#define AD(a, b, c, d) (struct double4){a, b, c, d} +#define VF(a, b, c, d) (vector float)(a, b, c, d) +#define VI(a, b, c, d) (vector int)(a, b, c, d) +#define VU(a, b, c, d) (vector unsigned)(a, b, c, d) +#define rsqrt(f) (1.0 / sqrt(f)) + +int fail; + +void +ck_equal(const char *what, vector float out, vector float answer) +{ + if (vec_any_ne(out, answer)) { + union vu a, b; + + a.vf = out; + b.vf = answer; + warnx("%s: {%a, %a, %a, %a} should be {%a, %a, %a, %a}", + what, a.f[0], a.f[1], a.f[2], a.f[3], + b.f[0], b.f[1], b.f[2], b.f[3]); + fail = 1; + } +} + +void +ck_equal_i(const char *what, vector int out, vector int answer) +{ + if (vec_any_ne(out, answer)) { + union vu a, b; + + a.vi = out; + b.vi = answer; + warnx("%s: {%d, %d, %d, %d} should be {%d, %d, %d, %d}", + what, a.i[0], a.i[1], a.i[2], a.i[3], + b.i[0], b.i[1], b.i[2], b.i[3]); + fail = 1; + } +} + +void +ck_equal_u(const char *what, vector unsigned out, vector unsigned answer) +{ + if (vec_any_ne(out, answer)) { + union vu a, b; + + a.vi = out; + b.vi = answer; + warnx("%s: {%u, %u, %u, %u} should be {%u, %u, %u, %u}", + what, a.u[0], a.u[1], a.u[2], a.u[3], + b.u[0], b.u[1], b.u[2], b.u[3]); + fail = 1; + } +} + +enum error_check {REL_1_IN, ABS_1_IN}; + +/* Checks that error is at most 1 in err_den. */ +void +ck_estimate(const char *what, vector float out, struct double4 answer, + enum error_check which, double err_den) +{ + union vu u; + int i, warned = 0; + + u.vf = out; + for (i = 0; i < 4; i++) { + double estimate = u.f[i]; + double target = answer.d[i]; + double error; + + switch (which) { + case REL_1_IN: /* relative error */ + error = fabs(target / (estimate - target)); + break; + case ABS_1_IN: /* absolute error */ + error = fabs(1 / (estimate - target)); + break; + default: + errx(1, "invalid check"); + } + + if (error < err_den) { + if (!warned) { + warnx("%s: {%a, %a, %a, %a} should be " + "near {%a, %a, %a, %a} (1/%g)", + what, u.f[0], u.f[1], u.f[2], u.f[3], + answer.d[0], answer.d[1], answer.d[2], + answer.d[3], err_den); + warned = 1; + fail = 1; + } + warnx("%a is off %a by 1/%g", estimate, target, + error); + } + } +} + +/* + * Tries altivec with denormal or subnormal floats. + * These are single-precision floats f, where + * 0 < |f| < 2**-126 = 0x1p-126 = 0x10p-130 = 1.17549435E-38F + */ +int +main(void) +{ + struct double4 dan; + volatile vector float in1, in2, in3; + vector float ans; + vector int ian; + vector unsigned uan; + + /* in1 + in2 */ + in1 = VF(10, 0x10p-140, 0x20p-130, -0x2000p-134); + in2 = VF( 4, 0x5p-140, -0x1p-130, 0x1fffp-134); + ans = VF(14, 0x15p-140, 0x1fp-130, -0x1p-134); + ck_equal("vec_add", vec_add(in1, in2), ans); + + /* in1 - in2 */ + in1 = VF(0x4000p-134, 10, 0x10p-140, 0x3p-130); + in2 = VF(0x3ffep-134, 4, 0x5p-140, 0x40p-130); + ans = VF( 0x2p-134, 6, 0xbp-140, -0x3dp-130); + ck_equal("vec_sub", vec_sub(in1, in2), ans); + + /* in1 * in2 + in3 */ + in1 = VF( 0x6p-70, 0x6p-140, 6, 0x6p-100); + in2 = VF( 0x7p-70, 0x7p50, 7, 0x7p-30); + in3 = VF( 0, 0, 1, -0x20p-130); + ans = VF(0x2ap-140, 0x2ap-90, 43, 0xap-130); + ck_equal("vec_madd", vec_madd(in1, in2, in3), ans); + + /* in3 - in1 * in2 */ + in1 = VF( 0xbp-30, 0xbp-70, 0xbp44, 11); + in2 = VF( 0x3p-100, 0x3p-70, -0x3p-138, 3); + in3 = VF(0x25p-130, 0, 0, 35); + ans = VF( 0x4p-130, -0x21p-140, 0x21p-94, 2); + ck_equal("vec_nmsub", vec_nmsub(in1, in2, in3), ans); + + /* 1 / in1 */ + in1 = VF( 3, 0x3p126, 0x3p-126, 0x1p127); + dan = AD(1.0 / 3, 1.0 / 0x3p126, 1.0 / 0x3p-126, 0x1p-127); + ck_estimate("vec_re", vec_re(in1), dan, REL_1_IN, 4096); + + /* 1 / sqrt(in1) */ + in1 = VF(1, 2, 0x1p-128, 0x5p-135); + dan = AD(1, rsqrt(2), rsqrt(0x1p-128), rsqrt(0x5p-135)); + ck_estimate("vec_rsqrt", vec_rsqrte(in1), dan, REL_1_IN, 4096); + + /* log2(in1) */ + in1 = VF(0x1p-130, 0x1p-149, 32, 0x1p-10); + dan = AD( -130, -149, 5, -10); + ck_estimate("vec_loge", vec_loge(in1), dan, ABS_1_IN, 32); + in1 = VF( 0x123p-139, 0xabcp-145, 1, 1); + dan = AD(log2(0x123p-139), log2(0xabcp-145), 0, 0); + ck_estimate("vec_loge", vec_loge(in1), dan, ABS_1_IN, 32); + + /* 2**in1 */ + in1 = VF( -149, -138, -127, 10); + ans = VF(0x1p-149, 0x1p-138, 0x1p-127, 1024); + ck_equal("vec_expte", vec_expte(in1), ans); + in1 = VF( -10, -145.3, -136.9, -127.1); + dan = AD(0x1p-10, exp2(-145.3), exp2(-136.9), exp2(-127.1)); + ck_estimate("vec_expte", vec_expte(in1), dan, REL_1_IN, 16); + + /* (int)(in1 * 2**exponent) */ + in1 = VF(0x1p-127, 2.34, -0xfedp-140, -19.8); + ian = VI( 0, 2, 0, -19); + ck_equal_i("vec_cts", vec_cts(in1, 0), ian); + in1 = VF(0x1p-113, -1, -0xabcp-143, 0x1fp-10); + ian = VI( 0, -1024, 0, 0x1f); + ck_equal_i("vec_cts", vec_cts(in1, 10), ian); + + /* (unsigned)(in1 * 2**exponent) */ + in1 = VF(0x1.ap-130, 0x1.ep-140, 24000012, 0); + uan = VU( 0, 0, 3072001536, 0); + ck_equal_u("vec_ctu", vec_ctu(in1, 7), uan); + + return fail; +} -- 2.20.1