#include <ctype.h>
#include "cryptlib.h"
#include <openssl/asn1.h>
+#include "asn1_locl.h"
static int traverse_string(const unsigned char *p, int len, int inform,
int (*rfunc)(unsigned long value, void *in), void *arg);
case MBSTRING_UTF8:
outlen = 0;
- traverse_string(in, len, inform, out_utf8, &outlen);
+ if (traverse_string(in, len, inform, out_utf8, &outlen) < 0) {
+ ASN1err(ASN1_F_ASN1_MBSTRING_NCOPY,
+ ASN1_R_ILLEGAL_CHARACTERS);
+ return -1;
+ }
cpyfunc = cpy_utf8;
break;
}
} else if (inform == MBSTRING_BMP) {
value = *p++ << 8;
value |= *p++;
+ /* BMP is explictly defined to not support surrogates */
+ if (UNICODE_IS_SURROGATE(value))
+ return -1;
len -= 2;
} else if (inform == MBSTRING_UNIV) {
value = ((unsigned long)*p++) << 24;
value |= ((unsigned long)*p++) << 16;
value |= *p++ << 8;
value |= *p++;
+ if (value > UNICODE_MAX || UNICODE_IS_SURROGATE(value))
+ return -1;
len -= 4;
} else {
ret = UTF8_getc(p, len, &value);
out_utf8(unsigned long value, void *arg)
{
int *outlen;
+ int ret;
outlen = arg;
- *outlen += UTF8_putc(NULL, -1, value);
+ ret = UTF8_putc(NULL, -1, value);
+ if (ret < 0)
+ return ret;
+ *outlen += ret;
return 1;
}
#include <openssl/crypto.h>
#include <openssl/x509.h>
#include <openssl/asn1.h>
+#include "asn1_locl.h"
#include "charmap.h"
c |= ((unsigned long)*p++) << 16;
c |= ((unsigned long)*p++) << 8;
c |= *p++;
+ if (c > UNICODE_MAX || UNICODE_IS_SURROGATE(c))
+ return -1;
break;
case 2:
c = ((unsigned long)*p++) << 8;
c |= *p++;
+ if (UNICODE_IS_SURROGATE(c))
+ return -1;
break;
case 1:
if (type & BUF_TYPE_CONVUTF8) {
unsigned char utfbuf[6];
int utflen;
+
utflen = UTF8_putc(utfbuf, sizeof utfbuf, c);
+ if (utflen < 0)
+ return -1;
for (i = 0; i < utflen; i++) {
/* We don't need to worry about setting orflags correctly
* because if utflen==1 its value will be correct anyway
#include <stdio.h>
#include "cryptlib.h"
#include <openssl/asn1.h>
+#include "asn1_locl.h"
/* UTF8 utilities */
-/* This parses a UTF8 string one character at a time. It is passed a pointer
+/*
+ * This parses a UTF8 string one character at a time. It is passed a pointer
* to the string and the length of the string. It sets 'value' to the value of
* the current character. It returns the number of characters read or a
* negative error code:
value = *p++ & 0x7f;
ret = 1;
} else if ((*p & 0xe0) == 0xc0) {
+ if (*p < 0xc2)
+ return -2;
if (len < 2)
return -1;
if ((p[1] & 0xc0) != 0x80)
value |= *p++ & 0x3f;
if (value < 0x800)
return -4;
+ /* surrogate pair code points are not valid */
+ if (value >= 0xd800 && value < 0xe000)
+ return -2;
ret = 3;
- } else if ((*p & 0xf8) == 0xf0) {
+ } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) {
if (len < 4)
return -1;
if (((p[1] & 0xc0) != 0x80) ||
value |= *p++ & 0x3f;
if (value < 0x10000)
return -4;
+ if (value > UNICODE_MAX)
+ return -2;
ret = 4;
- } else if ((*p & 0xfc) == 0xf8) {
- if (len < 5)
- return -1;
- if (((p[1] & 0xc0) != 0x80) ||
- ((p[2] & 0xc0) != 0x80) ||
- ((p[3] & 0xc0) != 0x80) ||
- ((p[4] & 0xc0) != 0x80))
- return -3;
- value = ((unsigned long)(*p++ & 0x3)) << 24;
- value |= ((unsigned long)(*p++ & 0x3f)) << 18;
- value |= ((unsigned long)(*p++ & 0x3f)) << 12;
- value |= (*p++ & 0x3f) << 6;
- value |= *p++ & 0x3f;
- if (value < 0x200000)
- return -4;
- ret = 5;
- } else if ((*p & 0xfe) == 0xfc) {
- if (len < 6)
- return -1;
- if (((p[1] & 0xc0) != 0x80) ||
- ((p[2] & 0xc0) != 0x80) ||
- ((p[3] & 0xc0) != 0x80) ||
- ((p[4] & 0xc0) != 0x80) ||
- ((p[5] & 0xc0) != 0x80))
- return -3;
- value = ((unsigned long)(*p++ & 0x1)) << 30;
- value |= ((unsigned long)(*p++ & 0x3f)) << 24;
- value |= ((unsigned long)(*p++ & 0x3f)) << 18;
- value |= ((unsigned long)(*p++ & 0x3f)) << 12;
- value |= (*p++ & 0x3f) << 6;
- value |= *p++ & 0x3f;
- if (value < 0x4000000)
- return -4;
- ret = 6;
- } else return -2;
- *val = value;
+ } else
+ return -2;
+ *val = value;
return ret;
}
-/* This takes a character 'value' and writes the UTF8 encoded value in
- * 'str' where 'str' is a buffer containing 'len' characters. Returns
- * the number of characters written or -1 if 'len' is too small. 'str' can
- * be set to NULL in which case it just returns the number of characters.
- * It will need at most 6 characters.
+/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form
+ * in 'str' where 'str' is a buffer of at least length 'len'. If 'str'
+ * is NULL, then nothing is written and just the return code is determined.
+
+ * Returns less than zero on error:
+ * -1 if 'str' is not NULL and 'len' is too small
+ * -2 if 'value' is an invalid character (surrogate or out-of-range)
+ *
+ * Otherwise, returns the number of bytes in 'value's encoded form
+ * (i.e., the number of bytes written to 'str' when it's not NULL).
+ *
+ * It will need at most 4 characters.
*/
int
UTF8_putc(unsigned char *str, int len, unsigned long value)
{
- if (!str)
- len = 6; /* Maximum we will need */
- else if (len <= 0)
- return -1;
if (value < 0x80) {
- if (str)
- *str = (unsigned char)value;
+ if (str != NULL) {
+ if (len < 1)
+ return -1;
+ str[0] = (unsigned char)value;
+ }
return 1;
}
if (value < 0x800) {
- if (len < 2)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
- *str = (unsigned char)((value & 0x3f) | 0x80);
+ if (str != NULL) {
+ if (len < 2)
+ return -1;
+ str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
+ str[1] = (unsigned char)((value & 0x3f) | 0x80);
}
return 2;
}
if (value < 0x10000) {
- if (len < 3)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
- *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
- *str = (unsigned char)((value & 0x3f) | 0x80);
+ if (UNICODE_IS_SURROGATE(value))
+ return -2;
+ if (str != NULL) {
+ if (len < 3)
+ return -1;
+ str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
+ str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
+ str[2] = (unsigned char)((value & 0x3f) | 0x80);
}
return 3;
}
- if (value < 0x200000) {
- if (len < 4)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
- *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
- *str = (unsigned char)((value & 0x3f) | 0x80);
+ if (value <= UNICODE_MAX) {
+ if (str != NULL) {
+ if (len < 4)
+ return -1;
+ str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
+ str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
+ str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
+ str[3] = (unsigned char)((value & 0x3f) | 0x80);
}
return 4;
}
- if (value < 0x4000000) {
- if (len < 5)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
- *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
- *str = (unsigned char)((value & 0x3f) | 0x80);
- }
- return 5;
- }
- if (len < 6)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
- *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
- *str = (unsigned char)((value & 0x3f) | 0x80);
- }
- return 6;
+ return -2;
}
ASN1_INTEGER *ser, X509_NAME *issuer);
int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk);
};
+
+/*
+ * Unicode codepoint constants
+ */
+#define UNICODE_MAX 0x10FFFF
+#define UNICODE_SURROGATE_MIN 0x00D800
+#define UNICODE_SURROGATE_MAX 0x00DFFF
+
+#define UNICODE_IS_SURROGATE(x) \
+ ((x) >= UNICODE_SURROGATE_MIN && (x) <= UNICODE_SURROGATE_MAX)
+
#include <ctype.h>
#include "cryptlib.h"
#include <openssl/asn1.h>
+#include "asn1_locl.h"
static int traverse_string(const unsigned char *p, int len, int inform,
int (*rfunc)(unsigned long value, void *in), void *arg);
case MBSTRING_UTF8:
outlen = 0;
- traverse_string(in, len, inform, out_utf8, &outlen);
+ if (traverse_string(in, len, inform, out_utf8, &outlen) < 0) {
+ ASN1err(ASN1_F_ASN1_MBSTRING_NCOPY,
+ ASN1_R_ILLEGAL_CHARACTERS);
+ return -1;
+ }
cpyfunc = cpy_utf8;
break;
}
} else if (inform == MBSTRING_BMP) {
value = *p++ << 8;
value |= *p++;
+ /* BMP is explictly defined to not support surrogates */
+ if (UNICODE_IS_SURROGATE(value))
+ return -1;
len -= 2;
} else if (inform == MBSTRING_UNIV) {
value = ((unsigned long)*p++) << 24;
value |= ((unsigned long)*p++) << 16;
value |= *p++ << 8;
value |= *p++;
+ if (value > UNICODE_MAX || UNICODE_IS_SURROGATE(value))
+ return -1;
len -= 4;
} else {
ret = UTF8_getc(p, len, &value);
out_utf8(unsigned long value, void *arg)
{
int *outlen;
+ int ret;
outlen = arg;
- *outlen += UTF8_putc(NULL, -1, value);
+ ret = UTF8_putc(NULL, -1, value);
+ if (ret < 0)
+ return ret;
+ *outlen += ret;
return 1;
}
#include <openssl/crypto.h>
#include <openssl/x509.h>
#include <openssl/asn1.h>
+#include "asn1_locl.h"
#include "charmap.h"
c |= ((unsigned long)*p++) << 16;
c |= ((unsigned long)*p++) << 8;
c |= *p++;
+ if (c > UNICODE_MAX || UNICODE_IS_SURROGATE(c))
+ return -1;
break;
case 2:
c = ((unsigned long)*p++) << 8;
c |= *p++;
+ if (UNICODE_IS_SURROGATE(c))
+ return -1;
break;
case 1:
if (type & BUF_TYPE_CONVUTF8) {
unsigned char utfbuf[6];
int utflen;
+
utflen = UTF8_putc(utfbuf, sizeof utfbuf, c);
+ if (utflen < 0)
+ return -1;
for (i = 0; i < utflen; i++) {
/* We don't need to worry about setting orflags correctly
* because if utflen==1 its value will be correct anyway
#include <stdio.h>
#include "cryptlib.h"
#include <openssl/asn1.h>
+#include "asn1_locl.h"
/* UTF8 utilities */
-/* This parses a UTF8 string one character at a time. It is passed a pointer
+/*
+ * This parses a UTF8 string one character at a time. It is passed a pointer
* to the string and the length of the string. It sets 'value' to the value of
* the current character. It returns the number of characters read or a
* negative error code:
value = *p++ & 0x7f;
ret = 1;
} else if ((*p & 0xe0) == 0xc0) {
+ if (*p < 0xc2)
+ return -2;
if (len < 2)
return -1;
if ((p[1] & 0xc0) != 0x80)
value |= *p++ & 0x3f;
if (value < 0x800)
return -4;
+ /* surrogate pair code points are not valid */
+ if (value >= 0xd800 && value < 0xe000)
+ return -2;
ret = 3;
- } else if ((*p & 0xf8) == 0xf0) {
+ } else if ((*p & 0xf8) == 0xf0 && (*p < 0xf5)) {
if (len < 4)
return -1;
if (((p[1] & 0xc0) != 0x80) ||
value |= *p++ & 0x3f;
if (value < 0x10000)
return -4;
+ if (value > UNICODE_MAX)
+ return -2;
ret = 4;
- } else if ((*p & 0xfc) == 0xf8) {
- if (len < 5)
- return -1;
- if (((p[1] & 0xc0) != 0x80) ||
- ((p[2] & 0xc0) != 0x80) ||
- ((p[3] & 0xc0) != 0x80) ||
- ((p[4] & 0xc0) != 0x80))
- return -3;
- value = ((unsigned long)(*p++ & 0x3)) << 24;
- value |= ((unsigned long)(*p++ & 0x3f)) << 18;
- value |= ((unsigned long)(*p++ & 0x3f)) << 12;
- value |= (*p++ & 0x3f) << 6;
- value |= *p++ & 0x3f;
- if (value < 0x200000)
- return -4;
- ret = 5;
- } else if ((*p & 0xfe) == 0xfc) {
- if (len < 6)
- return -1;
- if (((p[1] & 0xc0) != 0x80) ||
- ((p[2] & 0xc0) != 0x80) ||
- ((p[3] & 0xc0) != 0x80) ||
- ((p[4] & 0xc0) != 0x80) ||
- ((p[5] & 0xc0) != 0x80))
- return -3;
- value = ((unsigned long)(*p++ & 0x1)) << 30;
- value |= ((unsigned long)(*p++ & 0x3f)) << 24;
- value |= ((unsigned long)(*p++ & 0x3f)) << 18;
- value |= ((unsigned long)(*p++ & 0x3f)) << 12;
- value |= (*p++ & 0x3f) << 6;
- value |= *p++ & 0x3f;
- if (value < 0x4000000)
- return -4;
- ret = 6;
- } else return -2;
- *val = value;
+ } else
+ return -2;
+ *val = value;
return ret;
}
-/* This takes a character 'value' and writes the UTF8 encoded value in
- * 'str' where 'str' is a buffer containing 'len' characters. Returns
- * the number of characters written or -1 if 'len' is too small. 'str' can
- * be set to NULL in which case it just returns the number of characters.
- * It will need at most 6 characters.
+/* This takes a Unicode code point 'value' and writes its UTF-8 encoded form
+ * in 'str' where 'str' is a buffer of at least length 'len'. If 'str'
+ * is NULL, then nothing is written and just the return code is determined.
+
+ * Returns less than zero on error:
+ * -1 if 'str' is not NULL and 'len' is too small
+ * -2 if 'value' is an invalid character (surrogate or out-of-range)
+ *
+ * Otherwise, returns the number of bytes in 'value's encoded form
+ * (i.e., the number of bytes written to 'str' when it's not NULL).
+ *
+ * It will need at most 4 characters.
*/
int
UTF8_putc(unsigned char *str, int len, unsigned long value)
{
- if (!str)
- len = 6; /* Maximum we will need */
- else if (len <= 0)
- return -1;
if (value < 0x80) {
- if (str)
- *str = (unsigned char)value;
+ if (str != NULL) {
+ if (len < 1)
+ return -1;
+ str[0] = (unsigned char)value;
+ }
return 1;
}
if (value < 0x800) {
- if (len < 2)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
- *str = (unsigned char)((value & 0x3f) | 0x80);
+ if (str != NULL) {
+ if (len < 2)
+ return -1;
+ str[0] = (unsigned char)(((value >> 6) & 0x1f) | 0xc0);
+ str[1] = (unsigned char)((value & 0x3f) | 0x80);
}
return 2;
}
if (value < 0x10000) {
- if (len < 3)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
- *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
- *str = (unsigned char)((value & 0x3f) | 0x80);
+ if (UNICODE_IS_SURROGATE(value))
+ return -2;
+ if (str != NULL) {
+ if (len < 3)
+ return -1;
+ str[0] = (unsigned char)(((value >> 12) & 0xf) | 0xe0);
+ str[1] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
+ str[2] = (unsigned char)((value & 0x3f) | 0x80);
}
return 3;
}
- if (value < 0x200000) {
- if (len < 4)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
- *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
- *str = (unsigned char)((value & 0x3f) | 0x80);
+ if (value <= UNICODE_MAX) {
+ if (str != NULL) {
+ if (len < 4)
+ return -1;
+ str[0] = (unsigned char)(((value >> 18) & 0x7) | 0xf0);
+ str[1] = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
+ str[2] = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
+ str[3] = (unsigned char)((value & 0x3f) | 0x80);
}
return 4;
}
- if (value < 0x4000000) {
- if (len < 5)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 24) & 0x3) | 0xf8);
- *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
- *str = (unsigned char)((value & 0x3f) | 0x80);
- }
- return 5;
- }
- if (len < 6)
- return -1;
- if (str) {
- *str++ = (unsigned char)(((value >> 30) & 0x1) | 0xfc);
- *str++ = (unsigned char)(((value >> 24) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 18) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 12) & 0x3f) | 0x80);
- *str++ = (unsigned char)(((value >> 6) & 0x3f) | 0x80);
- *str = (unsigned char)((value & 0x3f) | 0x80);
- }
- return 6;
+ return -2;
}
ASN1_INTEGER *ser, X509_NAME *issuer);
int (*crl_verify)(X509_CRL *crl, EVP_PKEY *pk);
};
+
+/*
+ * Unicode codepoint constants
+ */
+#define UNICODE_MAX 0x10FFFF
+#define UNICODE_SURROGATE_MIN 0x00D800
+#define UNICODE_SURROGATE_MAX 0x00DFFF
+
+#define UNICODE_IS_SURROGATE(x) \
+ ((x) >= UNICODE_SURROGATE_MIN && (x) <= UNICODE_SURROGATE_MAX)
+