From 537292fc482356e53e27fd70c0aa55773a8fb6a7 Mon Sep 17 00:00:00 2001 From: guenther Date: Tue, 20 May 2014 01:21:52 +0000 Subject: [PATCH] Bring UTF8_{getc,putc} up-to-date: it's been a decade since 5- and 6-byte encodings and encoding of surrogate pair code points were banned. Add checks for those, both to those functions and to the code decoding the BMP and UNIV encodings. ok miod@ --- lib/libcrypto/asn1/a_mbstr.c | 18 +++- lib/libcrypto/asn1/a_strex.c | 8 ++ lib/libcrypto/asn1/a_utf8.c | 144 +++++++++---------------- lib/libcrypto/asn1/asn1_locl.h | 11 ++ lib/libssl/src/crypto/asn1/a_mbstr.c | 18 +++- lib/libssl/src/crypto/asn1/a_strex.c | 8 ++ lib/libssl/src/crypto/asn1/a_utf8.c | 144 +++++++++---------------- lib/libssl/src/crypto/asn1/asn1_locl.h | 11 ++ 8 files changed, 176 insertions(+), 186 deletions(-) diff --git a/lib/libcrypto/asn1/a_mbstr.c b/lib/libcrypto/asn1/a_mbstr.c index 9945ede2acd..ebc7f2681ca 100644 --- a/lib/libcrypto/asn1/a_mbstr.c +++ b/lib/libcrypto/asn1/a_mbstr.c @@ -60,6 +60,7 @@ #include #include "cryptlib.h" #include +#include "asn1_locl.h" static int traverse_string(const unsigned char *p, int len, int inform, int (*rfunc)(unsigned long value, void *in), void *arg); @@ -232,7 +233,11 @@ ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, case MBSTRING_UTF8: outlen = 0; - traverse_string(in, len, inform, out_utf8, &outlen); + if (traverse_string(in, len, inform, out_utf8, &outlen) < 0) { + ASN1err(ASN1_F_ASN1_MBSTRING_NCOPY, + ASN1_R_ILLEGAL_CHARACTERS); + return -1; + } cpyfunc = cpy_utf8; break; } @@ -267,12 +272,17 @@ traverse_string(const unsigned char *p, int len, int inform, } else if (inform == MBSTRING_BMP) { value = *p++ << 8; value |= *p++; + /* BMP is explictly defined to not support surrogates */ + if (UNICODE_IS_SURROGATE(value)) + return -1; len -= 2; } else if (inform == MBSTRING_UNIV) { value = ((unsigned long)*p++) << 24; value |= ((unsigned long)*p++) << 16; value |= *p++ << 8; value |= *p++; + if (value > UNICODE_MAX || UNICODE_IS_SURROGATE(value)) + return -1; len -= 4; } else { ret = UTF8_getc(p, len, &value); @@ -310,9 +320,13 @@ static int out_utf8(unsigned long value, void *arg) { int *outlen; + int ret; outlen = arg; - *outlen += UTF8_putc(NULL, -1, value); + ret = UTF8_putc(NULL, -1, value); + if (ret < 0) + return ret; + *outlen += ret; return 1; } diff --git a/lib/libcrypto/asn1/a_strex.c b/lib/libcrypto/asn1/a_strex.c index 462a4059bec..684e933c4f6 100644 --- a/lib/libcrypto/asn1/a_strex.c +++ b/lib/libcrypto/asn1/a_strex.c @@ -62,6 +62,7 @@ #include #include #include +#include "asn1_locl.h" #include "charmap.h" @@ -215,11 +216,15 @@ do_buf(unsigned char *buf, int buflen, int type, unsigned char flags, c |= ((unsigned long)*p++) << 16; c |= ((unsigned long)*p++) << 8; c |= *p++; + if (c > UNICODE_MAX || UNICODE_IS_SURROGATE(c)) + return -1; break; case 2: c = ((unsigned long)*p++) << 8; c |= *p++; + if (UNICODE_IS_SURROGATE(c)) + return -1; break; case 1: @@ -240,7 +245,10 @@ do_buf(unsigned char *buf, int buflen, int type, unsigned char flags, if (type & BUF_TYPE_CONVUTF8) { unsigned char utfbuf[6]; int utflen; + utflen = UTF8_putc(utfbuf, sizeof utfbuf, c); + if (utflen < 0) + return -1; for (i = 0; i < utflen; i++) { /* We don't need to worry about setting orflags correctly * because if utflen==1 its value will be correct anyway diff --git a/lib/libcrypto/asn1/a_utf8.c b/lib/libcrypto/asn1/a_utf8.c index c224db4c12b..f5e4bec7e08 100644 --- a/lib/libcrypto/asn1/a_utf8.c +++ b/lib/libcrypto/asn1/a_utf8.c @@ -59,11 +59,13 @@ #include #include "cryptlib.h" #include +#include "asn1_locl.h" /* UTF8 utilities */ -/* This parses a UTF8 string one character at a time. It is passed a pointer +/* + * This parses a UTF8 string one character at a time. It is passed a pointer * to the string and the length of the string. It sets 'value' to the value of * the current character. It returns the number of characters read or a * negative error code: @@ -88,6 +90,8 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) value = *p++ & 0x7f; ret = 1; } else if ((*p & 0xe0) == 0xc0) { + if (*p < 0xc2) + return -2; if (len < 2) return -1; if ((p[1] & 0xc0) != 0x80) @@ -108,8 +112,11 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) value |= *p++ & 0x3f; if (value < 0x800) return -4; + /* surrogate pair code points are not valid */ + if (value >= 0xd800 && value < 0xe000) + return -2; ret = 3; - } else if ((*p & 0xf8) == 0xf0) { + } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) { if (len < 4) return -1; if (((p[1] & 0xc0) != 0x80) || @@ -122,116 +129,71 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) value |= *p++ & 0x3f; if (value < 0x10000) return -4; + if (value > UNICODE_MAX) + return -2; ret = 4; - } else if ((*p & 0xfc) == 0xf8) { - if (len < 5) - return -1; - if (((p[1] & 0xc0) != 0x80) || - ((p[2] & 0xc0) != 0x80) || - ((p[3] & 0xc0) != 0x80) || - ((p[4] & 0xc0) != 0x80)) - return -3; - value = ((unsigned long)(*p++ & 0x3)) << 24; - value |= ((unsigned long)(*p++ & 0x3f)) << 18; - value |= ((unsigned long)(*p++ & 0x3f)) << 12; - value |= (*p++ & 0x3f) << 6; - value |= *p++ & 0x3f; - if (value < 0x200000) - return -4; - ret = 5; - } else if ((*p & 0xfe) == 0xfc) { - if (len < 6) - return -1; - if (((p[1] & 0xc0) != 0x80) || - ((p[2] & 0xc0) != 0x80) || - ((p[3] & 0xc0) != 0x80) || - ((p[4] & 0xc0) != 0x80) || - ((p[5] & 0xc0) != 0x80)) - return -3; - value = ((unsigned long)(*p++ & 0x1)) << 30; - value |= ((unsigned long)(*p++ & 0x3f)) << 24; - value |= ((unsigned long)(*p++ & 0x3f)) << 18; - value |= ((unsigned long)(*p++ & 0x3f)) << 12; - value |= (*p++ & 0x3f) << 6; - value |= *p++ & 0x3f; - if (value < 0x4000000) - return -4; - ret = 6; - } else return -2; - *val = value; + } else + return -2; + *val = value; return ret; } -/* This takes a character 'value' and writes the UTF8 encoded value in - * 'str' where 'str' is a buffer containing 'len' characters. Returns - * the number of characters written or -1 if 'len' is too small. 'str' can - * be set to NULL in which case it just returns the number of characters. - * It will need at most 6 characters. +/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form + * in 'str' where 'str' is a buffer of at least length 'len'. If 'str' + * is NULL, then nothing is written and just the return code is determined. + + * Returns less than zero on error: + * -1 if 'str' is not NULL and 'len' is too small + * -2 if 'value' is an invalid character (surrogate or out-of-range) + * + * Otherwise, returns the number of bytes in 'value's encoded form + * (i.e., the number of bytes written to 'str' when it's not NULL). + * + * It will need at most 4 characters. */ int UTF8_putc(unsigned char *str, int len, unsigned long value) { - if (!str) - len = 6; /* Maximum we will need */ - else if (len <= 0) - return -1; if (value < 0x80) { - if (str) - *str = (unsigned char)value; + if (str != NULL) { + if (len < 1) + return -1; + str[0] = (unsigned char)value; + } return 1; } if (value < 0x800) { - if (len < 2) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); - *str = (unsigned char)((value & 0x3f) | 0x80); + if (str != NULL) { + if (len < 2) + return -1; + str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); + str[1] = (unsigned char)((value & 0x3f) | 0x80); } return 2; } if (value < 0x10000) { - if (len < 3) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); - *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); - *str = (unsigned char)((value & 0x3f) | 0x80); + if (UNICODE_IS_SURROGATE(value)) + return -2; + if (str != NULL) { + if (len < 3) + return -1; + str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0); + str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); + str[2] = (unsigned char)((value & 0x3f) | 0x80); } return 3; } - if (value < 0x200000) { - if (len < 4) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); - *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); - *str = (unsigned char)((value & 0x3f) | 0x80); + if (value <= UNICODE_MAX) { + if (str != NULL) { + if (len < 4) + return -1; + str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0); + str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80); + str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); + str[3] = (unsigned char)((value & 0x3f) | 0x80); } return 4; } - if (value < 0x4000000) { - if (len < 5) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8); - *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); - *str = (unsigned char)((value & 0x3f) | 0x80); - } - return 5; - } - if (len < 6) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc); - *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); - *str = (unsigned char)((value & 0x3f) | 0x80); - } - return 6; + return -2; } diff --git a/lib/libcrypto/asn1/asn1_locl.h b/lib/libcrypto/asn1/asn1_locl.h index 9fcf0d9530f..2c6087bf026 100644 --- a/lib/libcrypto/asn1/asn1_locl.h +++ b/lib/libcrypto/asn1/asn1_locl.h @@ -143,3 +143,14 @@ struct x509_crl_method_st ASN1_INTEGER *ser, X509_NAME *issuer); int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk); }; + +/* + * Unicode codepoint constants + */ +#define UNICODE_MAX 0x10FFFF +#define UNICODE_SURROGATE_MIN 0x00D800 +#define UNICODE_SURROGATE_MAX 0x00DFFF + +#define UNICODE_IS_SURROGATE(x) \ + ((x) >= UNICODE_SURROGATE_MIN && (x) <= UNICODE_SURROGATE_MAX) + diff --git a/lib/libssl/src/crypto/asn1/a_mbstr.c b/lib/libssl/src/crypto/asn1/a_mbstr.c index 9945ede2acd..ebc7f2681ca 100644 --- a/lib/libssl/src/crypto/asn1/a_mbstr.c +++ b/lib/libssl/src/crypto/asn1/a_mbstr.c @@ -60,6 +60,7 @@ #include #include "cryptlib.h" #include +#include "asn1_locl.h" static int traverse_string(const unsigned char *p, int len, int inform, int (*rfunc)(unsigned long value, void *in), void *arg); @@ -232,7 +233,11 @@ ASN1_mbstring_ncopy(ASN1_STRING **out, const unsigned char *in, int len, case MBSTRING_UTF8: outlen = 0; - traverse_string(in, len, inform, out_utf8, &outlen); + if (traverse_string(in, len, inform, out_utf8, &outlen) < 0) { + ASN1err(ASN1_F_ASN1_MBSTRING_NCOPY, + ASN1_R_ILLEGAL_CHARACTERS); + return -1; + } cpyfunc = cpy_utf8; break; } @@ -267,12 +272,17 @@ traverse_string(const unsigned char *p, int len, int inform, } else if (inform == MBSTRING_BMP) { value = *p++ << 8; value |= *p++; + /* BMP is explictly defined to not support surrogates */ + if (UNICODE_IS_SURROGATE(value)) + return -1; len -= 2; } else if (inform == MBSTRING_UNIV) { value = ((unsigned long)*p++) << 24; value |= ((unsigned long)*p++) << 16; value |= *p++ << 8; value |= *p++; + if (value > UNICODE_MAX || UNICODE_IS_SURROGATE(value)) + return -1; len -= 4; } else { ret = UTF8_getc(p, len, &value); @@ -310,9 +320,13 @@ static int out_utf8(unsigned long value, void *arg) { int *outlen; + int ret; outlen = arg; - *outlen += UTF8_putc(NULL, -1, value); + ret = UTF8_putc(NULL, -1, value); + if (ret < 0) + return ret; + *outlen += ret; return 1; } diff --git a/lib/libssl/src/crypto/asn1/a_strex.c b/lib/libssl/src/crypto/asn1/a_strex.c index 462a4059bec..684e933c4f6 100644 --- a/lib/libssl/src/crypto/asn1/a_strex.c +++ b/lib/libssl/src/crypto/asn1/a_strex.c @@ -62,6 +62,7 @@ #include #include #include +#include "asn1_locl.h" #include "charmap.h" @@ -215,11 +216,15 @@ do_buf(unsigned char *buf, int buflen, int type, unsigned char flags, c |= ((unsigned long)*p++) << 16; c |= ((unsigned long)*p++) << 8; c |= *p++; + if (c > UNICODE_MAX || UNICODE_IS_SURROGATE(c)) + return -1; break; case 2: c = ((unsigned long)*p++) << 8; c |= *p++; + if (UNICODE_IS_SURROGATE(c)) + return -1; break; case 1: @@ -240,7 +245,10 @@ do_buf(unsigned char *buf, int buflen, int type, unsigned char flags, if (type & BUF_TYPE_CONVUTF8) { unsigned char utfbuf[6]; int utflen; + utflen = UTF8_putc(utfbuf, sizeof utfbuf, c); + if (utflen < 0) + return -1; for (i = 0; i < utflen; i++) { /* We don't need to worry about setting orflags correctly * because if utflen==1 its value will be correct anyway diff --git a/lib/libssl/src/crypto/asn1/a_utf8.c b/lib/libssl/src/crypto/asn1/a_utf8.c index c224db4c12b..f5e4bec7e08 100644 --- a/lib/libssl/src/crypto/asn1/a_utf8.c +++ b/lib/libssl/src/crypto/asn1/a_utf8.c @@ -59,11 +59,13 @@ #include #include "cryptlib.h" #include +#include "asn1_locl.h" /* UTF8 utilities */ -/* This parses a UTF8 string one character at a time. It is passed a pointer +/* + * This parses a UTF8 string one character at a time. It is passed a pointer * to the string and the length of the string. It sets 'value' to the value of * the current character. It returns the number of characters read or a * negative error code: @@ -88,6 +90,8 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) value = *p++ & 0x7f; ret = 1; } else if ((*p & 0xe0) == 0xc0) { + if (*p < 0xc2) + return -2; if (len < 2) return -1; if ((p[1] & 0xc0) != 0x80) @@ -108,8 +112,11 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) value |= *p++ & 0x3f; if (value < 0x800) return -4; + /* surrogate pair code points are not valid */ + if (value >= 0xd800 && value < 0xe000) + return -2; ret = 3; - } else if ((*p & 0xf8) == 0xf0) { + } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) { if (len < 4) return -1; if (((p[1] & 0xc0) != 0x80) || @@ -122,116 +129,71 @@ UTF8_getc(const unsigned char *str, int len, unsigned long *val) value |= *p++ & 0x3f; if (value < 0x10000) return -4; + if (value > UNICODE_MAX) + return -2; ret = 4; - } else if ((*p & 0xfc) == 0xf8) { - if (len < 5) - return -1; - if (((p[1] & 0xc0) != 0x80) || - ((p[2] & 0xc0) != 0x80) || - ((p[3] & 0xc0) != 0x80) || - ((p[4] & 0xc0) != 0x80)) - return -3; - value = ((unsigned long)(*p++ & 0x3)) << 24; - value |= ((unsigned long)(*p++ & 0x3f)) << 18; - value |= ((unsigned long)(*p++ & 0x3f)) << 12; - value |= (*p++ & 0x3f) << 6; - value |= *p++ & 0x3f; - if (value < 0x200000) - return -4; - ret = 5; - } else if ((*p & 0xfe) == 0xfc) { - if (len < 6) - return -1; - if (((p[1] & 0xc0) != 0x80) || - ((p[2] & 0xc0) != 0x80) || - ((p[3] & 0xc0) != 0x80) || - ((p[4] & 0xc0) != 0x80) || - ((p[5] & 0xc0) != 0x80)) - return -3; - value = ((unsigned long)(*p++ & 0x1)) << 30; - value |= ((unsigned long)(*p++ & 0x3f)) << 24; - value |= ((unsigned long)(*p++ & 0x3f)) << 18; - value |= ((unsigned long)(*p++ & 0x3f)) << 12; - value |= (*p++ & 0x3f) << 6; - value |= *p++ & 0x3f; - if (value < 0x4000000) - return -4; - ret = 6; - } else return -2; - *val = value; + } else + return -2; + *val = value; return ret; } -/* This takes a character 'value' and writes the UTF8 encoded value in - * 'str' where 'str' is a buffer containing 'len' characters. Returns - * the number of characters written or -1 if 'len' is too small. 'str' can - * be set to NULL in which case it just returns the number of characters. - * It will need at most 6 characters. +/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form + * in 'str' where 'str' is a buffer of at least length 'len'. If 'str' + * is NULL, then nothing is written and just the return code is determined. + + * Returns less than zero on error: + * -1 if 'str' is not NULL and 'len' is too small + * -2 if 'value' is an invalid character (surrogate or out-of-range) + * + * Otherwise, returns the number of bytes in 'value's encoded form + * (i.e., the number of bytes written to 'str' when it's not NULL). + * + * It will need at most 4 characters. */ int UTF8_putc(unsigned char *str, int len, unsigned long value) { - if (!str) - len = 6; /* Maximum we will need */ - else if (len <= 0) - return -1; if (value < 0x80) { - if (str) - *str = (unsigned char)value; + if (str != NULL) { + if (len < 1) + return -1; + str[0] = (unsigned char)value; + } return 1; } if (value < 0x800) { - if (len < 2) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); - *str = (unsigned char)((value & 0x3f) | 0x80); + if (str != NULL) { + if (len < 2) + return -1; + str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0); + str[1] = (unsigned char)((value & 0x3f) | 0x80); } return 2; } if (value < 0x10000) { - if (len < 3) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0); - *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); - *str = (unsigned char)((value & 0x3f) | 0x80); + if (UNICODE_IS_SURROGATE(value)) + return -2; + if (str != NULL) { + if (len < 3) + return -1; + str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0); + str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); + str[2] = (unsigned char)((value & 0x3f) | 0x80); } return 3; } - if (value < 0x200000) { - if (len < 4) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0); - *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); - *str = (unsigned char)((value & 0x3f) | 0x80); + if (value <= UNICODE_MAX) { + if (str != NULL) { + if (len < 4) + return -1; + str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0); + str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80); + str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80); + str[3] = (unsigned char)((value & 0x3f) | 0x80); } return 4; } - if (value < 0x4000000) { - if (len < 5) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8); - *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); - *str = (unsigned char)((value & 0x3f) | 0x80); - } - return 5; - } - if (len < 6) - return -1; - if (str) { - *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc); - *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80); - *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80); - *str = (unsigned char)((value & 0x3f) | 0x80); - } - return 6; + return -2; } diff --git a/lib/libssl/src/crypto/asn1/asn1_locl.h b/lib/libssl/src/crypto/asn1/asn1_locl.h index 9fcf0d9530f..2c6087bf026 100644 --- a/lib/libssl/src/crypto/asn1/asn1_locl.h +++ b/lib/libssl/src/crypto/asn1/asn1_locl.h @@ -143,3 +143,14 @@ struct x509_crl_method_st ASN1_INTEGER *ser, X509_NAME *issuer); int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk); }; + +/* + * Unicode codepoint constants + */ +#define UNICODE_MAX 0x10FFFF +#define UNICODE_SURROGATE_MIN 0x00D800 +#define UNICODE_SURROGATE_MAX 0x00DFFF + +#define UNICODE_IS_SURROGATE(x) \ + ((x) >= UNICODE_SURROGATE_MIN && (x) <= UNICODE_SURROGATE_MAX) + -- 2.20.1