From 17d217fa2eec87d6eb388749b37dc3d926685a4e Mon Sep 17 00:00:00 2001
From: martynas <martynas@openbsd.org>
Date: Sun, 20 Jul 2008 13:07:27 +0000
Subject: [PATCH] get in the bug fix for pow, from fdlibm 5.3.  also adapt it
 for powf, the float version of it >     1. e_pow.c incorrect results when >  
       x is very close to -1.0 and y is very large, e.g. >        
 pow(-1.0000000000000002e+00,4.5035996273704970e+15) = 0 >        
 pow(-9.9999999999999978e-01,4.5035996273704970e+15) = 0 >         Correct
 results are close to -e and -1/e. while here merge more changes for pow,
 fixes sign in oflow/uflow cases ok millert@

---
 lib/libm/src/e_pow.c  | 77 ++++++++++++++++++++++---------------------
 lib/libm/src/e_powf.c |  2 +-
 2 files changed, 40 insertions(+), 39 deletions(-)

diff --git a/lib/libm/src/e_pow.c b/lib/libm/src/e_pow.c
index 9ee8279ea3a..4c4ad3bd112 100644
--- a/lib/libm/src/e_pow.c
+++ b/lib/libm/src/e_pow.c
@@ -5,7 +5,7 @@
  *
  * Developed at SunPro, a Sun Microsystems, Inc. business.
  * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice 
+ * software is freely granted, provided that this notice
  * is preserved.
  * ====================================================
  */
@@ -21,7 +21,7 @@ static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
  *	1. Compute and return log2(x) in two pieces:
  *		log2(x) = w1 + w2,
  *	   where w1 has 53-24 = 29 bit trailing zeros.
- *	2. Perform y*log2(x) = n+y' by simulating muti-precision 
+ *	2. Perform y*log2(x) = n+y' by simulating multi-precision
  *	   arithmetic, where |y'|<=0.5.
  *	3. Return x**y = 2**n*exp(y'*log2)
  *
@@ -49,20 +49,20 @@ static char rcsid[] = "$NetBSD: e_pow.c,v 1.9 1995/05/12 04:57:32 jtc Exp $";
  * Accuracy:
  *	pow(x,y) returns x**y nearly rounded. In particular
  *			pow(integer,integer)
- *	always returns the correct integer provided it is 
+ *	always returns the correct integer provided it is
  *	representable.
  *
  * Constants :
- * The hexadecimal values are the intended ones for the following 
- * constants. The decimal values may be used, provided that the 
- * compiler will convert from decimal to binary accurately enough 
+ * The hexadecimal values are the intended ones for the following
+ * constants. The decimal values may be used, provided that the
+ * compiler will convert from decimal to binary accurately enough
  * to produce the hexadecimal values shown.
  */
 
 #include "math.h"
 #include "math_private.h"
 
-static const double 
+static const double
 bp[] = {1.0, 1.5,},
 dp_h[] = { 0.0, 5.84962487220764160156e-01,}, /* 0x3FE2B803, 0x40000000 */
 dp_l[] = { 0.0, 1.35003920212974897128e-08,}, /* 0x3E4CFDEB, 0x43CFD006 */
@@ -109,12 +109,12 @@ __ieee754_pow(double x, double y)
 	ix = hx&0x7fffffff;  iy = hy&0x7fffffff;
 
     /* y==zero: x**0 = 1 */
-	if((iy|ly)==0) return one; 	
+	if((iy|ly)==0) return one;
 
     /* +-NaN return x+y */
 	if(ix > 0x7ff00000 || ((ix==0x7ff00000)&&(lx!=0)) ||
-	   iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0))) 
-		return x+y;	
+	   iy > 0x7ff00000 || ((iy==0x7ff00000)&&(ly!=0)))
+		return x+y;
 
     /* determine if y is an odd int when x < 0
      * yisint = 0	... y is not an integer
@@ -122,7 +122,7 @@ __ieee754_pow(double x, double y)
      * yisint = 2	... y is an even int
      */
 	yisint  = 0;
-	if(hx<0) {	
+	if(hx<0) {
 	    if(iy>=0x43400000) yisint = 2; /* even integer y */
 	    else if(iy>=0x3ff00000) {
 		k = (iy>>20)-0x3ff;	   /* exponent */
@@ -133,11 +133,11 @@ __ieee754_pow(double x, double y)
 		    j = iy>>(20-k);
 		    if((j<<(20-k))==iy) yisint = 2-(j&1);
 		}
-	    }		
-	} 
+	    }
+	}
 
     /* special value of y */
-	if(ly==0) { 	
+	if(ly==0) {
 	    if (iy==0x7ff00000) {	/* y is +-inf */
 	        if(((ix-0x3ff00000)|lx)==0)
 		    return  y - y;	/* inf**+-1 is NaN */
@@ -145,14 +145,14 @@ __ieee754_pow(double x, double y)
 		    return (hy>=0)? y: zero;
 	        else			/* (|x|<1)**-,+inf = inf,0 */
 		    return (hy<0)?-y: zero;
-	    } 
+	    }
 	    if(iy==0x3ff00000) {	/* y is  +-1 */
 		if(hy<0) return one/x; else return x;
 	    }
 	    if(hy==0x40000000) return x*x; /* y is  2 */
 	    if(hy==0x3fe00000) {	/* y is  0.5 */
 		if(hx>=0)	/* x >= +0 */
-		return __ieee754_sqrt(x);	
+		return __ieee754_sqrt(x);
 	    }
 	}
 
@@ -165,15 +165,20 @@ __ieee754_pow(double x, double y)
 		if(hx<0) {
 		    if(((ix-0x3ff00000)|yisint)==0) {
 			z = (z-z)/(z-z); /* (-1)**non-int is NaN */
-		    } else if(yisint==1) 
+		    } else if(yisint==1)
 			z = -z;		/* (x<0)**odd = -(|x|**odd) */
 		}
 		return z;
 	    }
 	}
-    
+
+	n = (hx>>31)+1;
+
     /* (x<0)**(non-int) is NaN */
-	if(((((u_int32_t)hx>>31)-1)|yisint)==0) return (x-x)/(x-x);
+	if((n|yisint)==0) return (x-x)/(x-x);
+
+	s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
+	if((n|(yisint-1))==0) s = -one;/* (-ve)**(odd int) */
 
     /* |y| is huge */
 	if(iy>0x41e00000) { /* if |y| > 2**31 */
@@ -182,11 +187,11 @@ __ieee754_pow(double x, double y)
 		if(ix>=0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
 	    }
 	/* over/underflow if x is not close to one */
-	    if(ix<0x3fefffff) return (hy<0)? huge*huge:tiny*tiny;
-	    if(ix>0x3ff00000) return (hy>0)? huge*huge:tiny*tiny;
-	/* now |1-x| is tiny <= 2**-20, suffice to compute 
+	    if(ix<0x3fefffff) return (hy<0)? s*huge*huge:s*tiny*tiny;
+	    if(ix>0x3ff00000) return (hy>0)? s*huge*huge:s*tiny*tiny;
+	/* now |1-x| is tiny <= 2**-20, suffice to compute
 	   log(x) by x-x^2/2+x^3/3-x^4/4 */
-	    t = x-1;		/* t has 20 trailing zeros */
+	    t = ax-one;		/* t has 20 trailing zeros */
 	    w = (t*t)*(0.5-t*(0.3333333333333333333333-t*0.25));
 	    u = ivln2_h*t;	/* ivln2_h has 21 sig. bits */
 	    v = t*ivln2_l-w*ivln2;
@@ -194,7 +199,7 @@ __ieee754_pow(double x, double y)
 	    SET_LOW_WORD(t1,0);
 	    t2 = v-(t1-u);
 	} else {
-	    double s2,s_h,s_l,t_h,t_l;
+	    double ss,s2,s_h,s_l,t_h,t_l;
 	    n = 0;
 	/* take care subnormal number */
 	    if(ix<0x00100000)
@@ -208,11 +213,11 @@ __ieee754_pow(double x, double y)
 	    else {k=0;n+=1;ix -= 0x00100000;}
 	    SET_HIGH_WORD(ax,ix);
 
-	/* compute s = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
+	/* compute ss = s_h+s_l = (x-1)/(x+1) or (x-1.5)/(x+1.5) */
 	    u = ax-bp[k];		/* bp[0]=1.0, bp[1]=1.5 */
 	    v = one/(ax+bp[k]);
-	    s = u*v;
-	    s_h = s;
+	    ss = u*v;
+	    s_h = ss;
 	    SET_LOW_WORD(s_h,0);
 	/* t_h=ax+bp[k] High */
 	    t_h = zero;
@@ -220,33 +225,29 @@ __ieee754_pow(double x, double y)
 	    t_l = ax - (t_h-bp[k]);
 	    s_l = v*((u-s_h*t_h)-s_h*t_l);
 	/* compute log(ax) */
-	    s2 = s*s;
+	    s2 = ss*ss;
 	    r = s2*s2*(L1+s2*(L2+s2*(L3+s2*(L4+s2*(L5+s2*L6)))));
-	    r += s_l*(s_h+s);
+	    r += s_l*(s_h+ss);
 	    s2  = s_h*s_h;
 	    t_h = 3.0+s2+r;
 	    SET_LOW_WORD(t_h,0);
 	    t_l = r-((t_h-3.0)-s2);
-	/* u+v = s*(1+...) */
+	/* u+v = ss*(1+...) */
 	    u = s_h*t_h;
-	    v = s_l*t_h+t_l*s;
-	/* 2/(3log2)*(s+...) */
+	    v = s_l*t_h+t_l*ss;
+	/* 2/(3log2)*(ss+...) */
 	    p_h = u+v;
 	    SET_LOW_WORD(p_h,0);
 	    p_l = v-(p_h-u);
 	    z_h = cp_h*p_h;		/* cp_h+cp_l = 2/(3*log2) */
 	    z_l = cp_l*p_h+p_l*cp+dp_l[k];
-	/* log2(ax) = (s+..)*2/(3*log2) = n + dp_h + z_h + z_l */
+	/* log2(ax) = (ss+..)*2/(3*log2) = n + dp_h + z_h + z_l */
 	    t = (double)n;
 	    t1 = (((z_h+z_l)+dp_h[k])+t);
 	    SET_LOW_WORD(t1,0);
 	    t2 = z_l-(((t1-t)-dp_h[k])-z_h);
 	}
 
-	s = one; /* s (sign of result -ve**odd) = -1 else = 1 */
-	if(((((u_int32_t)hx>>31)-1)|(yisint-1))==0)
-	    s = -one;/* (-ve)**(odd int) */
-
     /* split up y into yy1+y2 and compute (yy1+y2)*(t1+t2) */
 	yy1  = y;
 	SET_LOW_WORD(yy1,0);
@@ -281,7 +282,7 @@ __ieee754_pow(double x, double y)
 	    n = ((n&0x000fffff)|0x00100000)>>(20-k);
 	    if(j<0) n = -n;
 	    p_h -= t;
-	} 
+	}
 	t = p_l+p_h;
 	SET_LOW_WORD(t,0);
 	u = t*lg2_h;
diff --git a/lib/libm/src/e_powf.c b/lib/libm/src/e_powf.c
index 0555bf85b0c..ec93660cc3f 100644
--- a/lib/libm/src/e_powf.c
+++ b/lib/libm/src/e_powf.c
@@ -130,7 +130,7 @@ __ieee754_powf(float x, float y)
 	    if(ix>0x3f800007) return (hy>0)? huge*huge:tiny*tiny;
 	/* now |1-x| is tiny <= 2**-20, suffice to compute 
 	   log(x) by x-x^2/2+x^3/3-x^4/4 */
-	    t = x-1;		/* t has 20 trailing zeros */
+	    t = ax-one;		/* t has 20 trailing zeros */
 	    w = (t*t)*((float)0.5-t*((float)0.333333333333-t*(float)0.25));
 	    u = ivln2_h*t;	/* ivln2_h has 16 sig. bits */
 	    v = t*ivln2_l-w*ivln2;
-- 
2.20.1