Add regression test for UTF8_{getc,putc}()
authorguenther <guenther@openbsd.org>
Sun, 18 May 2014 22:04:14 +0000 (22:04 +0000)
committerguenther <guenther@openbsd.org>
Sun, 18 May 2014 22:04:14 +0000 (22:04 +0000)
regress/lib/libcrypto/Makefile
regress/lib/libcrypto/utf8/Makefile [new file with mode: 0644]
regress/lib/libcrypto/utf8/utf8test.c [new file with mode: 0644]

index 6cf7191..54fcae7 100644 (file)
@@ -1,4 +1,4 @@
-#      $OpenBSD: Makefile,v 1.9 2014/05/14 14:46:35 jsing Exp $
+#      $OpenBSD: Makefile,v 1.10 2014/05/18 22:04:14 guenther Exp $
 
 SUBDIR= \
        aeswrap \
@@ -31,7 +31,8 @@ SUBDIR= \
        rmd \
        sha \
        sha1 \
-       sha2
+       sha2 \
+       utf8
 
 install:
 
diff --git a/regress/lib/libcrypto/utf8/Makefile b/regress/lib/libcrypto/utf8/Makefile
new file mode 100644 (file)
index 0000000..4940e60
--- /dev/null
@@ -0,0 +1,7 @@
+#      $OpenBSD: Makefile,v 1.1 2014/05/18 22:04:14 guenther Exp $
+
+PROG=  utf8test
+LDADD= -lcrypto
+DPADD= ${LIBCRYPTO}
+
+.include <bsd.regress.mk>
diff --git a/regress/lib/libcrypto/utf8/utf8test.c b/regress/lib/libcrypto/utf8/utf8test.c
new file mode 100644 (file)
index 0000000..5b737a5
--- /dev/null
@@ -0,0 +1,307 @@
+/*
+ * Copyright (c) 2014 Philip Guenther <guenther@openbsd.org>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+/*
+ * A mostly exhaustive test of UTF-8 decoder and encoder
+ */
+
+#include <stdio.h>
+#include <string.h>
+#include <err.h>
+
+#include <openssl/asn1.h>
+
+#define        UNCHANGED       0xfedcba98
+
+#define ASSERT(x)                                              \
+       do {                                                    \
+               if (!(x))                                       \
+                       errx(1, "test failed at line %d: %s",   \
+                           __LINE__, #x);                      \
+       } while (0)
+
+int
+main(void)
+{
+       unsigned char testbuf[] = "012345";
+       const unsigned char zerobuf[sizeof testbuf] = { 0 };
+       unsigned long value;
+       int i, j, k, l, ret;
+
+       /*
+        * First, verify UTF8_getc()
+        */
+       value = UNCHANGED;
+       ret = UTF8_getc(testbuf, 0, &value);
+       ASSERT(ret == 0);
+       ASSERT(value == UNCHANGED);
+
+       /* check all valid single-byte chars */
+       for (i = 0; i < 0x80; i++) {
+               testbuf[0] = i;
+               ret = UTF8_getc(testbuf, 1, &value);
+               ASSERT(ret == 1);
+               ASSERT(value == i);
+
+               ret = UTF8_getc(testbuf, 2, &value);
+               ASSERT(ret == 1);
+               ASSERT(value == i);
+       }
+
+       /*
+        * Verify failure on all invalid initial bytes:
+        *      0x80 - 0xBF     following bytes only
+        *      0xC0 - 0xC1     used to be in non-shortest forms
+        *      0xF5 - 0xFD     used to be initial for 5 and 6 byte sequences
+        *      0xFE - 0xFF     have never been valid in utf-8
+        */
+       for (i = 0x80; i < 0xC2; i++) {
+               value = UNCHANGED;
+               testbuf[0] = i;
+               ret = UTF8_getc(testbuf, 1, &value);
+               ASSERT(ret == -2);
+               ASSERT(value == UNCHANGED);
+       }
+       for (i = 0xF5; i < 0x100; i++) {
+               value = UNCHANGED;
+               testbuf[0] = i;
+               ret = UTF8_getc(testbuf, 1, &value);
+               ASSERT(ret == -2);
+               ASSERT(value == UNCHANGED);
+       }
+
+       /* 
+        * Verify handling of all two-byte sequences
+        */
+       for (i = 0xC2; i < 0xE0; i++) {
+               testbuf[0] = i;
+
+               for (j = 0; j < 0x100; j++) {
+                       testbuf[1] = j;
+
+                       value = UNCHANGED;
+                       ret = UTF8_getc(testbuf, 1, &value);
+                       ASSERT(ret == -1);
+                       ASSERT(value == UNCHANGED);
+
+                       ret = UTF8_getc(testbuf, 2, &value);
+
+                       /* outside range of trailing bytes */
+                       if (j < 0x80 || j > 0xBF) {
+                               ASSERT(ret == -3);
+                               ASSERT(value == UNCHANGED);
+                               continue;
+                       }
+
+                       /* valid */
+                       ASSERT(ret == 2);
+                       ASSERT((value & 0x3F) == (j & 0x3F));
+                       ASSERT(value >> 6 == (i & 0x1F));
+               }
+       }
+
+#if 0
+       /* 
+        * Verify handling of all three-byte sequences
+        */
+       for (i = 0xE0; i < 0xF0; i++) {
+               testbuf[0] = i;
+
+               for (j = 0; j < 0x100; j++) {
+                       testbuf[1] = j;
+
+                       for (k = 0; k < 0x100; k++) {
+                               testbuf[2] = k;
+
+                               value = UNCHANGED;
+                               ret = UTF8_getc(testbuf, 2, &value);
+                               ASSERT(ret == -1);
+                               ASSERT(value == UNCHANGED);
+
+                               ret = UTF8_getc(testbuf, 3, &value);
+
+                               /* outside range of trailing bytes */
+                               if (j < 0x80 || j > 0xBF ||
+                                   k < 0x80 || k > 0xBF) {
+                                       ASSERT(ret == -3);
+                                       ASSERT(value == UNCHANGED);
+                                       continue;
+                               }
+
+                               /* non-shortest form */
+                               if (i == 0xE0 && j < 0xA0) {
+                                       ASSERT(ret == -4);
+                                       ASSERT(value == UNCHANGED);
+                                       continue;
+                               }
+
+                               /* surrogate pair code point */
+                               if (i == 0xED && j > 0x9F) {
+                                       ASSERT(ret == -2);
+                                       ASSERT(value == UNCHANGED);
+                                       continue;
+                               }
+
+                               ASSERT(ret == 3);
+                               ASSERT((value & 0x3F) == (k & 0x3F));
+                               ASSERT(((value >> 6) & 0x3F) == (j & 0x3F));
+                               ASSERT(value >> 12 == (i & 0x0F));
+                       }
+               }
+       }
+
+       /* 
+        * Verify handling of all four-byte sequences
+        */
+       for (i = 0xF0; i < 0xF5; i++) {
+               testbuf[0] = i;
+
+               for (j = 0; j < 0x100; j++) {
+                       testbuf[1] = j;
+
+                       for (k = 0; k < 0x100; k++) {
+                               testbuf[2] = k;
+
+                               for (l = 0; l < 0x100; l++) {
+                                       testbuf[3] = l;
+
+                                       value = UNCHANGED;
+                                       ret = UTF8_getc(testbuf, 3, &value);
+                                       ASSERT(ret == -1);
+                                       ASSERT(value == UNCHANGED);
+
+                                       ret = UTF8_getc(testbuf, 4, &value);
+
+                                       /* outside range of trailing bytes */
+                                       if (j < 0x80 || j > 0xBF ||
+                                           k < 0x80 || k > 0xBF ||
+                                           l < 0x80 || l > 0xBF) {
+                                               ASSERT(ret == -3);
+                                               ASSERT(value == UNCHANGED);
+                                               continue;
+                                       }
+
+                                       /* non-shortest form */
+                                       if (i == 0xF0 && j < 0x90) {
+                                               ASSERT(ret == -4);
+                                               ASSERT(value == UNCHANGED);
+                                               continue;
+                                       }
+
+                                       /* beyond end of UCS range */
+                                       if (i == 0xF4 && j > 0x8F) {
+                                               ASSERT(ret == -2);
+                                               ASSERT(value == UNCHANGED);
+                                               continue;
+                                       }
+
+                                       ASSERT(ret == 4);
+                                       ASSERT((value & 0x3F) == (l & 0x3F));
+                                       ASSERT(((value >> 6) & 0x3F) ==
+                                                         (k & 0x3F));
+                                       ASSERT(((value >> 12) & 0x3F) ==
+                                                          (j & 0x3F));
+                                       ASSERT(value >> 18 == (i & 0x07));
+                               }
+                       }
+               }
+       }
+#endif
+
+
+       /*
+        * Next, verify UTF8_putc()
+        */
+       memset(testbuf, 0, sizeof testbuf);
+
+       /* single-byte sequences */
+       for (i = 0; i < 0x80; i++) {
+               ret = UTF8_putc(NULL, 0, i);
+               ASSERT(ret == 1);
+
+               testbuf[0] = 0;
+               ret = UTF8_putc(testbuf, 0, i);
+               ASSERT(ret == -1);
+               ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0);
+
+               ret = UTF8_putc(testbuf, 1, i);
+               ASSERT(ret == 1);
+               ASSERT(testbuf[0] == i);
+               ASSERT(memcmp(testbuf+1, zerobuf, sizeof(testbuf)-1) == 0);
+       }
+
+       /* two-byte sequences */
+       for (i = 0x80; i < 0x800; i++) {
+               ret = UTF8_putc(NULL, 0, i);
+               ASSERT(ret == 2);
+
+               testbuf[0] = testbuf[1] = 0;
+               ret = UTF8_putc(testbuf, 1, i);
+               ASSERT(ret == -1);
+               ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0);
+
+               ret = UTF8_putc(testbuf, 2, i);
+               ASSERT(ret == 2);
+               ASSERT(memcmp(testbuf+2, zerobuf, sizeof(testbuf)-2) == 0);
+               ret = UTF8_getc(testbuf, 2, &value);
+               ASSERT(ret == 2);
+               ASSERT(value == i);
+       }
+
+       /* three-byte sequences */
+       for (i = 0x800; i < 0x10000; i++) {
+               /* XXX skip surrogate pair code points */
+               if (i >= 0xD800 && i < 0xE000)
+                       continue;
+                       
+               ret = UTF8_putc(NULL, 0, i);
+               ASSERT(ret == 3);
+
+               testbuf[0] = testbuf[1] = testbuf[2] = 0;
+               ret = UTF8_putc(testbuf, 2, i);
+               ASSERT(ret == -1);
+               ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0);
+
+               ret = UTF8_putc(testbuf, 3, i);
+               ASSERT(ret == 3);
+               ASSERT(memcmp(testbuf+3, zerobuf, sizeof(testbuf)-3) == 0);
+               ret = UTF8_getc(testbuf, 3, &value);
+               ASSERT(ret == 3);
+               ASSERT(value == i);
+       }
+
+       /* four-byte sequences */
+       for (i = 0x10000; i < 0x110000; i++) {
+               ret = UTF8_putc(NULL, 0, i);
+               ASSERT(ret == 4);
+
+               testbuf[0] = testbuf[1] = testbuf[2] = testbuf[3] = 0;
+               ret = UTF8_putc(testbuf, 3, i);
+               ASSERT(ret == -1);
+               ASSERT(memcmp(testbuf, zerobuf, sizeof testbuf) == 0);
+
+               ret = UTF8_putc(testbuf, 4, i);
+               ASSERT(ret == 4);
+               ASSERT(memcmp(testbuf+4, zerobuf, sizeof(testbuf)-4) == 0);
+               ret = UTF8_getc(testbuf, 4, &value);
+               ASSERT(ret == 4);
+               ASSERT(value == i);
+       }
+
+       /* XXX What should UTF8_putc() do with values > 0x10FFFF */
+
+       return 0;
+}