The UTF-8 decoder should not accept byte sequences which decode to unicode
authorstsp <stsp@openbsd.org>
Thu, 5 Aug 2010 17:13:53 +0000 (17:13 +0000)
committerstsp <stsp@openbsd.org>
Thu, 5 Aug 2010 17:13:53 +0000 (17:13 +0000)
code positions U+D800 to U+DFFF (UTF-16 surrogates), U+FFFE, and U+FFFF.

http://www.cl.cam.ac.uk/~mgk25/unicode.html#utf-8
http://unicode.org/faq/utf_bom.html#utf8-4

ok phessler, millert, miod, deraadt

lib/libc/citrus/citrus_utf8.c

index 0ead0c7..45d07d7 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: citrus_utf8.c,v 1.2 2010/07/29 00:50:10 stsp Exp $ */
+/*     $OpenBSD: citrus_utf8.c,v 1.3 2010/08/05 17:13:53 stsp Exp $ */
 
 /*-
  * Copyright (c) 2002-2004 Tim J. Robbins
@@ -162,6 +162,14 @@ _citrus_utf8_ctype_mbrtowc(wchar_t * __restrict pwc,
                errno = EILSEQ;
                return ((size_t)-1);
        }
+       if ((wch >= 0xd800 && wch <= 0xdfff) ||
+           wch == 0xfffe || wch == 0xffff) {
+               /*
+                * Malformed input; invalid code points.
+                */
+               errno = EILSEQ;
+               return ((size_t)-1);
+       }
        if (pwc != NULL)
                *pwc = wch;
        us->want = 0;