From 7b38205b70fdab5882530e8bb7b5194ce8d3e07c Mon Sep 17 00:00:00 2001 From: miod Date: Fri, 23 May 2014 16:11:55 +0000 Subject: [PATCH] Replace (sometimes conditional) use of alloca with malloc, and clearing through volatile pointers with explicit_bzero(). ok beck@ jsing@ --- lib/libcrypto/bn/bn_asm.c | 39 +++++++++++++------------------ lib/libcrypto/bn/bn_exp.c | 16 ------------- lib/libssl/src/crypto/bn/bn_asm.c | 39 +++++++++++++------------------ lib/libssl/src/crypto/bn/bn_exp.c | 16 ------------- 4 files changed, 32 insertions(+), 78 deletions(-) diff --git a/lib/libcrypto/bn/bn_asm.c b/lib/libcrypto/bn/bn_asm.c index 0eebb9824f0..742188982c8 100644 --- a/lib/libcrypto/bn/bn_asm.c +++ b/lib/libcrypto/bn/bn_asm.c @@ -888,7 +888,6 @@ bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) #ifdef OPENSSL_NO_ASM #ifdef OPENSSL_BN_ASM_MONT -#include /* * This is essentially reference implementation, which may or may not * result in performance improvement. E.g. on IA-32 this routine was @@ -909,14 +908,15 @@ bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG #ifdef mul64 BN_ULONG mh; #endif - volatile BN_ULONG *vp; int i = 0, j; #if 0 /* template for platform-specific implementation */ if (ap == bp) return bn_sqr_mont(rp, ap, np, n0p, num); #endif - vp = tp = alloca((num + 2)*sizeof(BN_ULONG)); + tp = reallocarray(NULL, num + 2, sizeof(BN_ULONG)); + if (tp == NULL) + return 0; n0 = *n0p; @@ -979,15 +979,13 @@ enter: if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { c0 = bn_sub_words(rp, tp, np, num); if (tp[num] != 0 || c0 == 0) { - for (i = 0; i < num + 2; i++) - vp[i] = 0; - return 1; + goto out; } } - for (i = 0; i < num; i++) - rp[i] = tp[i], vp[i] = 0; - vp[num] = 0; - vp[num + 1] = 0; + memcpy(rp, tp, num * sizeof(BN_ULONG)); +out: + explicit_bzero(tp, (num + 2) * sizeof(BN_ULONG)); + free(tp); return 1; } #else @@ -1045,19 +1043,16 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) #ifdef OPENSSL_NO_ASM #ifdef OPENSSL_BN_ASM_MONT -#include int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0p, int num) { BN_ULONG c0, c1, *tp, n0 = *n0p; - volatile BN_ULONG *vp; int i = 0, j; - vp = tp = alloca((num + 2) * sizeof(BN_ULONG)); - - for(i = 0; i <= num; i++) - tp[i] = 0; + tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); + if (tp == NULL) + return 0; for (i = 0; i < num; i++) { c0 = bn_mul_add_words(tp, ap, num, bp[i]); @@ -1076,15 +1071,13 @@ bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { c0 = bn_sub_words(rp, tp, np, num); if (tp[num] != 0 || c0 == 0) { - for (i = 0; i < num + 2; i++) - vp[i] = 0; - return 1; + goto out; } } - for (i = 0; i < num; i++) - rp[i] = tp[i], vp[i] = 0; - vp[num] = 0; - vp[num + 1] = 0; + memcpy(rp, tp, num * sizeof(BN_ULONG)); +out: + explicit_bzero(tp, (num + 2) * sizeof(BN_ULONG)); + free(tp); return 1; } #else diff --git a/lib/libcrypto/bn/bn_exp.c b/lib/libcrypto/bn/bn_exp.c index 5d9263e01eb..a27373c97be 100644 --- a/lib/libcrypto/bn/bn_exp.c +++ b/lib/libcrypto/bn/bn_exp.c @@ -114,11 +114,6 @@ #include "bn_lcl.h" #include -#if defined(__GNUC__) -# ifndef alloca -# define alloca(s) __builtin_alloca((s)) -# endif -#endif /* maximum precomputation table size for *variable* sliding windows */ #define TABLE_SIZE 32 @@ -632,12 +627,6 @@ BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, numPowers = 1 << window; powerbufLen = sizeof(m->d[0]) * (top * numPowers + ((2*top) > numPowers ? (2*top) : numPowers)); -#ifdef alloca - if (powerbufLen < 3072) - powerbufFree = alloca(powerbufLen + - MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); - else -#endif if ((powerbufFree = (unsigned char*)malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) goto err; @@ -645,11 +634,6 @@ BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); memset(powerbuf, 0, powerbufLen); -#ifdef alloca - if (powerbufLen < 3072) - powerbufFree = NULL; -#endif - /* lay down tmp and am right after powers table */ tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); am.d = tmp.d + top; diff --git a/lib/libssl/src/crypto/bn/bn_asm.c b/lib/libssl/src/crypto/bn/bn_asm.c index 0eebb9824f0..742188982c8 100644 --- a/lib/libssl/src/crypto/bn/bn_asm.c +++ b/lib/libssl/src/crypto/bn/bn_asm.c @@ -888,7 +888,6 @@ bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) #ifdef OPENSSL_NO_ASM #ifdef OPENSSL_BN_ASM_MONT -#include /* * This is essentially reference implementation, which may or may not * result in performance improvement. E.g. on IA-32 this routine was @@ -909,14 +908,15 @@ bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG #ifdef mul64 BN_ULONG mh; #endif - volatile BN_ULONG *vp; int i = 0, j; #if 0 /* template for platform-specific implementation */ if (ap == bp) return bn_sqr_mont(rp, ap, np, n0p, num); #endif - vp = tp = alloca((num + 2)*sizeof(BN_ULONG)); + tp = reallocarray(NULL, num + 2, sizeof(BN_ULONG)); + if (tp == NULL) + return 0; n0 = *n0p; @@ -979,15 +979,13 @@ enter: if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { c0 = bn_sub_words(rp, tp, np, num); if (tp[num] != 0 || c0 == 0) { - for (i = 0; i < num + 2; i++) - vp[i] = 0; - return 1; + goto out; } } - for (i = 0; i < num; i++) - rp[i] = tp[i], vp[i] = 0; - vp[num] = 0; - vp[num + 1] = 0; + memcpy(rp, tp, num * sizeof(BN_ULONG)); +out: + explicit_bzero(tp, (num + 2) * sizeof(BN_ULONG)); + free(tp); return 1; } #else @@ -1045,19 +1043,16 @@ bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) #ifdef OPENSSL_NO_ASM #ifdef OPENSSL_BN_ASM_MONT -#include int bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, const BN_ULONG *np, const BN_ULONG *n0p, int num) { BN_ULONG c0, c1, *tp, n0 = *n0p; - volatile BN_ULONG *vp; int i = 0, j; - vp = tp = alloca((num + 2) * sizeof(BN_ULONG)); - - for(i = 0; i <= num; i++) - tp[i] = 0; + tp = calloc(NULL, num + 2, sizeof(BN_ULONG)); + if (tp == NULL) + return 0; for (i = 0; i < num; i++) { c0 = bn_mul_add_words(tp, ap, num, bp[i]); @@ -1076,15 +1071,13 @@ bn_mul_mont(BN_ULONG *rp, const BN_ULONG *ap, const BN_ULONG *bp, if (tp[num] != 0 || tp[num - 1] >= np[num - 1]) { c0 = bn_sub_words(rp, tp, np, num); if (tp[num] != 0 || c0 == 0) { - for (i = 0; i < num + 2; i++) - vp[i] = 0; - return 1; + goto out; } } - for (i = 0; i < num; i++) - rp[i] = tp[i], vp[i] = 0; - vp[num] = 0; - vp[num + 1] = 0; + memcpy(rp, tp, num * sizeof(BN_ULONG)); +out: + explicit_bzero(tp, (num + 2) * sizeof(BN_ULONG)); + free(tp); return 1; } #else diff --git a/lib/libssl/src/crypto/bn/bn_exp.c b/lib/libssl/src/crypto/bn/bn_exp.c index 5d9263e01eb..a27373c97be 100644 --- a/lib/libssl/src/crypto/bn/bn_exp.c +++ b/lib/libssl/src/crypto/bn/bn_exp.c @@ -114,11 +114,6 @@ #include "bn_lcl.h" #include -#if defined(__GNUC__) -# ifndef alloca -# define alloca(s) __builtin_alloca((s)) -# endif -#endif /* maximum precomputation table size for *variable* sliding windows */ #define TABLE_SIZE 32 @@ -632,12 +627,6 @@ BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, numPowers = 1 << window; powerbufLen = sizeof(m->d[0]) * (top * numPowers + ((2*top) > numPowers ? (2*top) : numPowers)); -#ifdef alloca - if (powerbufLen < 3072) - powerbufFree = alloca(powerbufLen + - MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); - else -#endif if ((powerbufFree = (unsigned char*)malloc(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) goto err; @@ -645,11 +634,6 @@ BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); memset(powerbuf, 0, powerbufLen); -#ifdef alloca - if (powerbufLen < 3072) - powerbufFree = NULL; -#endif - /* lay down tmp and am right after powers table */ tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); am.d = tmp.d + top; -- 2.20.1