From 534f87a39f2f8b9fc6b98949287b67a607e25da4 Mon Sep 17 00:00:00 2001 From: tb Date: Tue, 23 Aug 2022 06:35:53 +0000 Subject: [PATCH] Rework random(6) Fix putchar(3) handling and a number of other issues in the random(6) code. In -e mode, avoid bias due to truncation of the return code to 8 bits. In default mode, actually treat the denominator as a floating point number and print the line with a probability of 1 / denominator. This allows e.g. printing 2/3 of stdin with 'random 1.5'. Includes code for uniform random floats by Taylor R. Campbell from https://mumble.net/~campbell/2014/04/28/uniform-random-float and clz64() by me. From lucic71 () bronze ! ctrl-c ! club, thanks. ok deraadt --- games/random/random.6 | 11 +-- games/random/random.c | 164 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 151 insertions(+), 24 deletions(-) diff --git a/games/random/random.6 b/games/random/random.6 index 8a006dcf3c6..74df6102a55 100644 --- a/games/random/random.6 +++ b/games/random/random.6 @@ -1,4 +1,4 @@ -.\" $OpenBSD: random.6,v 1.7 2007/05/31 19:19:18 jmc Exp $ +.\" $OpenBSD: random.6,v 1.8 2022/08/23 06:35:53 tb Exp $ .\" .\" Copyright (c) 1994 .\" The Regents of the University of California. All rights reserved. @@ -29,7 +29,7 @@ .\" .\" @(#)random.6 8.2 (Berkeley) 3/31/94 .\" -.Dd $Mdocdate: May 31 2007 $ +.Dd $Mdocdate: August 23 2022 $ .Dt RANDOM 6 .Os .Sh NAME @@ -43,9 +43,10 @@ .Nm reads lines from the standard input and copies them to the standard output with a probability of 1/denominator. -The default value for +The .Ar denominator -is 2. +must be at least 1, +its default value is 2. .Pp The options are as follows: .Bl -tag -width Ds @@ -55,7 +56,7 @@ If the option is specified, .Nm does not read or write anything, and simply exits with a random -exit value of 0 to +exit value of 0 to the minimum of 255 and .Ar denominator \&- 1, inclusive. .It Fl r diff --git a/games/random/random.c b/games/random/random.c index a1b22744d22..7da7117b430 100644 --- a/games/random/random.c +++ b/games/random/random.c @@ -1,4 +1,4 @@ -/* $OpenBSD: random.c,v 1.20 2016/03/07 12:07:56 mestre Exp $ */ +/* $OpenBSD: random.c,v 1.21 2022/08/23 06:35:53 tb Exp $ */ /* $NetBSD: random.c,v 1.3 1995/04/22 07:44:05 cgd Exp $ */ /* @@ -33,14 +33,136 @@ * SUCH DAMAGE. */ +/*- + * Copyright (c) 2014 Taylor R. Campbell + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + #include #include +#include #include #include #include __dead void usage(void); +int +clz64(uint64_t x) +{ + static const uint64_t mask[] = { + 0xffffffff00000000, 0xffff0000, 0xff00, 0xf0, 0xc, 0x2, + }; + static const int zeroes[] = { + 32, 16, 8, 4, 2, 1, + }; + int clz = 0; + int i; + + if (x == 0) + return 64; + + for (i = 0; i < 6; i++) { + if ((x & mask[i]) == 0) + clz += zeroes[i]; + else + x >>= zeroes[i]; + } + + return clz; +} + +uint64_t +random64(void) +{ + uint64_t r; + + arc4random_buf(&r, sizeof(uint64_t)); + + return r; +} + +/* + * random_real: Generate a stream of bits uniformly at random and + * interpret it as the fractional part of the binary expansion of a + * number in [0, 1], 0.00001010011111010100...; then round it. + */ +double +random_real(void) +{ + int exponent = -64; + uint64_t significand; + int shift; + + /* + * Read zeros into the exponent until we hit a one; the rest + * will go into the significand. + */ + while (__predict_false((significand = random64()) == 0)) { + exponent -= 64; + /* + * If the exponent falls below -1074 = emin + 1 - p, + * the exponent of the smallest subnormal, we are + * guaranteed the result will be rounded to zero. This + * case is so unlikely it will happen in realistic + * terms only if random64 is broken. + */ + if (__predict_false(exponent < -1074)) + return 0; + } + + /* + * There is a 1 somewhere in significand, not necessarily in + * the most significant position. If there are leading zeros, + * shift them into the exponent and refill the less-significant + * bits of the significand. Can't predict one way or another + * whether there are leading zeros: there's a fifty-fifty + * chance, if random64 is uniformly distributed. + */ + shift = clz64(significand); + if (shift != 0) { + exponent -= shift; + significand <<= shift; + significand |= (random64() >> (64 - shift)); + } + + /* + * Set the sticky bit, since there is almost surely another 1 + * in the bit stream. Otherwise, we might round what looks + * like a tie to even when, almost surely, were we to look + * further in the bit stream, there would be a 1 breaking the + * tie. + */ + significand |= 1; + + /* + * Finally, convert to double (rounding) and scale by + * 2^exponent. + */ + return ldexp((double)significand, exponent); +} + int main(int argc, char *argv[]) { @@ -77,16 +199,20 @@ main(int argc, char *argv[]) denom = strtod(*argv, &ep); if (errno == ERANGE) err(1, "%s", *argv); - if (denom == 0 || *ep != '\0') + if (denom < 1 || *ep != '\0') errx(1, "denominator is not valid."); break; default: - usage(); + usage(); } - /* Compute a random exit status between 0 and denom - 1. */ - if (random_exit) - return (arc4random_uniform(denom)); + /* Return a random exit status between 0 and min(denom - 1, 255). */ + if (random_exit) { + if (denom > 256) + denom = 256; + + return arc4random_uniform(denom); + } /* * Act as a filter, randomly choosing lines of the standard input @@ -94,25 +220,25 @@ main(int argc, char *argv[]) */ if (unbuffer_output) setvbuf(stdout, NULL, _IONBF, 0); - + /* * Select whether to print the first line. (Prime the pump.) - * We find a random number between 0 and denom - 1 and, if it's - * 0 (which has a 1 / denom chance of being true), we select the - * line. + * We find a random number between 0 and 1 and, if it's < 1 / denom, + * we select the line. */ - selected = arc4random_uniform(denom) == 0; + selected = random_real() < 1 / denom; while ((ch = getchar()) != EOF) { - if (selected) - (void)putchar(ch); - if (ch == '\n') { - /* End of that line. See if we got an error. */ - if (ferror(stdout)) - err(2, "stdout"); + int retch; - /* Now see if the next line is to be printed. */ - selected = arc4random_uniform(denom) == 0; + if (selected) { + errno = 0; + retch = putchar(ch); + if (retch == EOF && errno) + err(2, "putchar"); } + if (ch == '\n') + /* Now see if the next line is to be printed. */ + selected = random_real() < 1 / denom; } if (ferror(stdin)) err(2, "stdin"); -- 2.20.1