Add cryptox(4), a driver for armv8 cryptographic extensions.
authortobhe <tobhe@openbsd.org>
Sun, 21 Feb 2021 14:55:16 +0000 (14:55 +0000)
committertobhe <tobhe@openbsd.org>
Sun, 21 Feb 2021 14:55:16 +0000 (14:55 +0000)
The driver currently only supports AES-CBC mode but can easily
be extended to other algorithms and modes.
The aesv8-armx.S file was generated from the CRYPTOGAMS project.

Asked to commit by and ok patrick@

sys/arch/arm64/arm64/aesv8-armx.S [new file with mode: 0644]
sys/arch/arm64/arm64/autoconf.c
sys/arch/arm64/arm64/cpu.c
sys/arch/arm64/arm64/cryptox.c [new file with mode: 0644]
sys/arch/arm64/conf/files.arm64

diff --git a/sys/arch/arm64/arm64/aesv8-armx.S b/sys/arch/arm64/arm64/aesv8-armx.S
new file mode 100644 (file)
index 0000000..d62cfcd
--- /dev/null
@@ -0,0 +1,1143 @@
+/*     $OpenBSD: aesv8-armx.S,v 1.1 2021/02/21 14:55:16 tobhe Exp $    */
+/*
+ * Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *       *     Redistributions of source code must retain copyright notices,
+ *     this list of conditions and the following disclaimer.
+ *
+ *       *     Redistributions in binary form must reproduce the above
+ *     copyright notice, this list of conditions and the following
+ *     disclaimer in the documentation and/or other materials
+ *     provided with the distribution.
+ *
+ *       *     Neither the name of the CRYPTOGAMS nor the names of its
+ *     copyright holder and contributors may be used to endorse or
+ *     promote products derived from this software without specific
+ *     prior written permission.
+ *
+ * ALTERNATIVELY, provided that this notice is retained in full, this
+ * product may be distributed under the terms of the GNU General Public
+ * License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+ * those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Generated from CRYPTOGAMS aesv8-armx.pl.
+ * Changes to the original source code:
+ *
+ * - removed #include "arm_arch.h"
+ * - removed redundant __ARM_MAX_ARCH__ check
+ */
+
+.arch  armv8-a+crypto
+.text
+.align 5
+.Lrcon:
+.long  0x01,0x01,0x01,0x01
+.long  0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d     // rotate-n-splat
+.long  0x1b,0x1b,0x1b,0x1b
+
+.globl aes_v8_set_encrypt_key
+.type  aes_v8_set_encrypt_key,%function
+.align 5
+aes_v8_set_encrypt_key:
+.Lenc_key:
+       stp     x29,x30,[sp,#-16]!
+       add     x29,sp,#0
+       mov     x3,#-1
+       cmp     x0,#0
+       b.eq    .Lenc_key_abort
+       cmp     x2,#0
+       b.eq    .Lenc_key_abort
+       mov     x3,#-2
+       cmp     w1,#128
+       b.lt    .Lenc_key_abort
+       cmp     w1,#256
+       b.gt    .Lenc_key_abort
+       tst     w1,#0x3f
+       b.ne    .Lenc_key_abort
+
+       adr     x3,.Lrcon
+       cmp     w1,#192
+
+       eor     v0.16b,v0.16b,v0.16b
+       ld1     {v3.16b},[x0],#16
+       mov     w1,#8           // reuse w1
+       ld1     {v1.4s,v2.4s},[x3],#32
+
+       b.lt    .Loop128
+       b.eq    .L192
+       b       .L256
+
+.align 4
+.Loop128:
+       tbl     v6.16b,{v3.16b},v2.16b
+       ext     v5.16b,v0.16b,v3.16b,#12
+       st1     {v3.4s},[x2],#16
+       aese    v6.16b,v0.16b
+       subs    w1,w1,#1
+
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v6.16b,v6.16b,v1.16b
+       eor     v3.16b,v3.16b,v5.16b
+       shl     v1.16b,v1.16b,#1
+       eor     v3.16b,v3.16b,v6.16b
+       b.ne    .Loop128
+
+       ld1     {v1.4s},[x3]
+
+       tbl     v6.16b,{v3.16b},v2.16b
+       ext     v5.16b,v0.16b,v3.16b,#12
+       st1     {v3.4s},[x2],#16
+       aese    v6.16b,v0.16b
+
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v6.16b,v6.16b,v1.16b
+       eor     v3.16b,v3.16b,v5.16b
+       shl     v1.16b,v1.16b,#1
+       eor     v3.16b,v3.16b,v6.16b
+
+       tbl     v6.16b,{v3.16b},v2.16b
+       ext     v5.16b,v0.16b,v3.16b,#12
+       st1     {v3.4s},[x2],#16
+       aese    v6.16b,v0.16b
+
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v6.16b,v6.16b,v1.16b
+       eor     v3.16b,v3.16b,v5.16b
+       eor     v3.16b,v3.16b,v6.16b
+       st1     {v3.4s},[x2]
+       add     x2,x2,#0x50
+
+       mov     w12,#10
+       b       .Ldone
+
+.align 4
+.L192:
+       ld1     {v4.8b},[x0],#8
+       movi    v6.16b,#8                       // borrow v6.16b
+       st1     {v3.4s},[x2],#16
+       sub     v2.16b,v2.16b,v6.16b    // adjust the mask
+
+.Loop192:
+       tbl     v6.16b,{v4.16b},v2.16b
+       ext     v5.16b,v0.16b,v3.16b,#12
+       st1     {v4.8b},[x2],#8
+       aese    v6.16b,v0.16b
+       subs    w1,w1,#1
+
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v3.16b,v3.16b,v5.16b
+
+       dup     v5.4s,v3.s[3]
+       eor     v5.16b,v5.16b,v4.16b
+       eor     v6.16b,v6.16b,v1.16b
+       ext     v4.16b,v0.16b,v4.16b,#12
+       shl     v1.16b,v1.16b,#1
+       eor     v4.16b,v4.16b,v5.16b
+       eor     v3.16b,v3.16b,v6.16b
+       eor     v4.16b,v4.16b,v6.16b
+       st1     {v3.4s},[x2],#16
+       b.ne    .Loop192
+
+       mov     w12,#12
+       add     x2,x2,#0x20
+       b       .Ldone
+
+.align 4
+.L256:
+       ld1     {v4.16b},[x0]
+       mov     w1,#7
+       mov     w12,#14
+       st1     {v3.4s},[x2],#16
+
+.Loop256:
+       tbl     v6.16b,{v4.16b},v2.16b
+       ext     v5.16b,v0.16b,v3.16b,#12
+       st1     {v4.4s},[x2],#16
+       aese    v6.16b,v0.16b
+       subs    w1,w1,#1
+
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v3.16b,v3.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v6.16b,v6.16b,v1.16b
+       eor     v3.16b,v3.16b,v5.16b
+       shl     v1.16b,v1.16b,#1
+       eor     v3.16b,v3.16b,v6.16b
+       st1     {v3.4s},[x2],#16
+       b.eq    .Ldone
+
+       dup     v6.4s,v3.s[3]           // just splat
+       ext     v5.16b,v0.16b,v4.16b,#12
+       aese    v6.16b,v0.16b
+
+       eor     v4.16b,v4.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v4.16b,v4.16b,v5.16b
+       ext     v5.16b,v0.16b,v5.16b,#12
+       eor     v4.16b,v4.16b,v5.16b
+
+       eor     v4.16b,v4.16b,v6.16b
+       b       .Loop256
+
+.Ldone:
+       str     w12,[x2]
+       mov     x3,#0
+
+.Lenc_key_abort:
+       mov     x0,x3                   // return value
+       ldr     x29,[sp],#16
+       ret
+.size  aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
+
+.globl aes_v8_set_decrypt_key
+.type  aes_v8_set_decrypt_key,%function
+.align 5
+aes_v8_set_decrypt_key:
+.inst  0xd503233f              // paciasp
+       stp     x29,x30,[sp,#-16]!
+       add     x29,sp,#0
+       bl      .Lenc_key
+
+       cmp     x0,#0
+       b.ne    .Ldec_key_abort
+
+       sub     x2,x2,#240              // restore original x2
+       mov     x4,#-16
+       add     x0,x2,x12,lsl#4 // end of key schedule
+
+       ld1     {v0.4s},[x2]
+       ld1     {v1.4s},[x0]
+       st1     {v0.4s},[x0],x4
+       st1     {v1.4s},[x2],#16
+
+.Loop_imc:
+       ld1     {v0.4s},[x2]
+       ld1     {v1.4s},[x0]
+       aesimc  v0.16b,v0.16b
+       aesimc  v1.16b,v1.16b
+       st1     {v0.4s},[x0],x4
+       st1     {v1.4s},[x2],#16
+       cmp     x0,x2
+       b.hi    .Loop_imc
+
+       ld1     {v0.4s},[x2]
+       aesimc  v0.16b,v0.16b
+       st1     {v0.4s},[x0]
+
+       eor     x0,x0,x0                // return value
+.Ldec_key_abort:
+       ldp     x29,x30,[sp],#16
+.inst  0xd50323bf              // autiasp
+       ret
+.size  aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
+.globl aes_v8_encrypt
+.type  aes_v8_encrypt,%function
+.align 5
+aes_v8_encrypt:
+       ldr     w3,[x2,#240]
+       ld1     {v0.4s},[x2],#16
+       ld1     {v2.16b},[x0]
+       sub     w3,w3,#2
+       ld1     {v1.4s},[x2],#16
+
+.Loop_enc:
+       aese    v2.16b,v0.16b
+       aesmc   v2.16b,v2.16b
+       ld1     {v0.4s},[x2],#16
+       subs    w3,w3,#2
+       aese    v2.16b,v1.16b
+       aesmc   v2.16b,v2.16b
+       ld1     {v1.4s},[x2],#16
+       b.gt    .Loop_enc
+
+       aese    v2.16b,v0.16b
+       aesmc   v2.16b,v2.16b
+       ld1     {v0.4s},[x2]
+       aese    v2.16b,v1.16b
+       eor     v2.16b,v2.16b,v0.16b
+
+       st1     {v2.16b},[x1]
+       ret
+.size  aes_v8_encrypt,.-aes_v8_encrypt
+.globl aes_v8_decrypt
+.type  aes_v8_decrypt,%function
+.align 5
+aes_v8_decrypt:
+       ldr     w3,[x2,#240]
+       ld1     {v0.4s},[x2],#16
+       ld1     {v2.16b},[x0]
+       sub     w3,w3,#2
+       ld1     {v1.4s},[x2],#16
+
+.Loop_dec:
+       aesd    v2.16b,v0.16b
+       aesimc  v2.16b,v2.16b
+       ld1     {v0.4s},[x2],#16
+       subs    w3,w3,#2
+       aesd    v2.16b,v1.16b
+       aesimc  v2.16b,v2.16b
+       ld1     {v1.4s},[x2],#16
+       b.gt    .Loop_dec
+
+       aesd    v2.16b,v0.16b
+       aesimc  v2.16b,v2.16b
+       ld1     {v0.4s},[x2]
+       aesd    v2.16b,v1.16b
+       eor     v2.16b,v2.16b,v0.16b
+
+       st1     {v2.16b},[x1]
+       ret
+.size  aes_v8_decrypt,.-aes_v8_decrypt
+.globl aes_v8_cbc_encrypt
+.type  aes_v8_cbc_encrypt,%function
+.align 5
+aes_v8_cbc_encrypt:
+       stp     x29,x30,[sp,#-16]!
+       add     x29,sp,#0
+       subs    x2,x2,#16
+       mov     x8,#16
+       b.lo    .Lcbc_abort
+       csel    x8,xzr,x8,eq
+
+       cmp     w5,#0                   // en- or decrypting?
+       ldr     w5,[x3,#240]
+       and     x2,x2,#-16
+       ld1     {v6.16b},[x4]
+       ld1     {v0.16b},[x0],x8
+
+       ld1     {v16.4s,v17.4s},[x3]            // load key schedule...
+       sub     w5,w5,#6
+       add     x7,x3,x5,lsl#4  // pointer to last 7 round keys
+       sub     w5,w5,#2
+       ld1     {v18.4s,v19.4s},[x7],#32
+       ld1     {v20.4s,v21.4s},[x7],#32
+       ld1     {v22.4s,v23.4s},[x7],#32
+       ld1     {v7.4s},[x7]
+
+       add     x7,x3,#32
+       mov     w6,w5
+       b.eq    .Lcbc_dec
+
+       cmp     w5,#2
+       eor     v0.16b,v0.16b,v6.16b
+       eor     v5.16b,v16.16b,v7.16b
+       b.eq    .Lcbc_enc128
+
+       ld1     {v2.4s,v3.4s},[x7]
+       add     x7,x3,#16
+       add     x6,x3,#16*4
+       add     x12,x3,#16*5
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       add     x14,x3,#16*6
+       add     x3,x3,#16*7
+       b       .Lenter_cbc_enc
+
+.align 4
+.Loop_cbc_enc:
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       st1     {v6.16b},[x1],#16
+.Lenter_cbc_enc:
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       aese    v0.16b,v2.16b
+       aesmc   v0.16b,v0.16b
+       ld1     {v16.4s},[x6]
+       cmp     w5,#4
+       aese    v0.16b,v3.16b
+       aesmc   v0.16b,v0.16b
+       ld1     {v17.4s},[x12]
+       b.eq    .Lcbc_enc192
+
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       ld1     {v16.4s},[x14]
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       ld1     {v17.4s},[x3]
+       nop
+
+.Lcbc_enc192:
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       subs    x2,x2,#16
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       csel    x8,xzr,x8,eq
+       aese    v0.16b,v18.16b
+       aesmc   v0.16b,v0.16b
+       aese    v0.16b,v19.16b
+       aesmc   v0.16b,v0.16b
+       ld1     {v16.16b},[x0],x8
+       aese    v0.16b,v20.16b
+       aesmc   v0.16b,v0.16b
+       eor     v16.16b,v16.16b,v5.16b
+       aese    v0.16b,v21.16b
+       aesmc   v0.16b,v0.16b
+       ld1     {v17.4s},[x7]           // re-pre-load rndkey[1]
+       aese    v0.16b,v22.16b
+       aesmc   v0.16b,v0.16b
+       aese    v0.16b,v23.16b
+       eor     v6.16b,v0.16b,v7.16b
+       b.hs    .Loop_cbc_enc
+
+       st1     {v6.16b},[x1],#16
+       b       .Lcbc_done
+
+.align 5
+.Lcbc_enc128:
+       ld1     {v2.4s,v3.4s},[x7]
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       b       .Lenter_cbc_enc128
+.Loop_cbc_enc128:
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       st1     {v6.16b},[x1],#16
+.Lenter_cbc_enc128:
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       subs    x2,x2,#16
+       aese    v0.16b,v2.16b
+       aesmc   v0.16b,v0.16b
+       csel    x8,xzr,x8,eq
+       aese    v0.16b,v3.16b
+       aesmc   v0.16b,v0.16b
+       aese    v0.16b,v18.16b
+       aesmc   v0.16b,v0.16b
+       aese    v0.16b,v19.16b
+       aesmc   v0.16b,v0.16b
+       ld1     {v16.16b},[x0],x8
+       aese    v0.16b,v20.16b
+       aesmc   v0.16b,v0.16b
+       aese    v0.16b,v21.16b
+       aesmc   v0.16b,v0.16b
+       aese    v0.16b,v22.16b
+       aesmc   v0.16b,v0.16b
+       eor     v16.16b,v16.16b,v5.16b
+       aese    v0.16b,v23.16b
+       eor     v6.16b,v0.16b,v7.16b
+       b.hs    .Loop_cbc_enc128
+
+       st1     {v6.16b},[x1],#16
+       b       .Lcbc_done
+.align 5
+.Lcbc_dec:
+       ld1     {v24.16b},[x0],#16
+       subs    x2,x2,#32               // bias
+       add     w6,w5,#2
+       orr     v3.16b,v0.16b,v0.16b
+       orr     v1.16b,v0.16b,v0.16b
+       orr     v27.16b,v24.16b,v24.16b
+       b.lo    .Lcbc_dec_tail
+
+       orr     v1.16b,v24.16b,v24.16b
+       ld1     {v24.16b},[x0],#16
+       orr     v2.16b,v0.16b,v0.16b
+       orr     v3.16b,v1.16b,v1.16b
+       orr     v27.16b,v24.16b,v24.16b
+       cmp     x2,#32
+       b.lo    .Loop3x_cbc_dec
+
+       ld1     {v25.16b},[x0],#16
+       ld1     {v26.16b},[x0],#16
+       sub     x2,x2,#32               // bias
+       mov     w6,w5
+       orr     v28.16b,v25.16b,v25.16b
+       orr     v29.16b,v26.16b,v26.16b
+
+.Loop5x_cbc_dec:
+       aesd    v0.16b,v16.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v16.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v16.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v16.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v16.16b
+       aesimc  v26.16b,v26.16b
+       ld1     {v16.4s},[x7],#16
+       subs    w6,w6,#2
+       aesd    v0.16b,v17.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v17.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v17.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v17.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v17.16b
+       aesimc  v26.16b,v26.16b
+       ld1     {v17.4s},[x7],#16
+       b.gt    .Loop5x_cbc_dec
+
+       aesd    v0.16b,v16.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v16.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v16.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v16.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v16.16b
+       aesimc  v26.16b,v26.16b
+       cmp     x2,#0x40                // because .Lcbc_tail4x
+       sub     x2,x2,#0x50
+
+       aesd    v0.16b,v17.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v17.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v17.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v17.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v17.16b
+       aesimc  v26.16b,v26.16b
+       csel    x6,xzr,x2,gt            // borrow x6, w6, "gt" is not typo
+       mov     x7,x3
+
+       aesd    v0.16b,v18.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v18.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v18.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v18.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v18.16b
+       aesimc  v26.16b,v26.16b
+       add     x0,x0,x6                // x0 is adjusted in such way that
+                                       // at exit from the loop v1.16b-v26.16b
+                                       // are loaded with last "words"
+       add     x6,x2,#0x60             // because .Lcbc_tail4x
+
+       aesd    v0.16b,v19.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v19.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v19.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v19.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v19.16b
+       aesimc  v26.16b,v26.16b
+
+       aesd    v0.16b,v20.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v20.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v20.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v20.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v20.16b
+       aesimc  v26.16b,v26.16b
+
+       aesd    v0.16b,v21.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v21.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v21.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v21.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v21.16b
+       aesimc  v26.16b,v26.16b
+
+       aesd    v0.16b,v22.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v22.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v22.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v25.16b,v22.16b
+       aesimc  v25.16b,v25.16b
+       aesd    v26.16b,v22.16b
+       aesimc  v26.16b,v26.16b
+
+       eor     v4.16b,v6.16b,v7.16b
+       aesd    v0.16b,v23.16b
+       eor     v5.16b,v2.16b,v7.16b
+       ld1     {v2.16b},[x0],#16
+       aesd    v1.16b,v23.16b
+       eor     v17.16b,v3.16b,v7.16b
+       ld1     {v3.16b},[x0],#16
+       aesd    v24.16b,v23.16b
+       eor     v30.16b,v27.16b,v7.16b
+       ld1     {v27.16b},[x0],#16
+       aesd    v25.16b,v23.16b
+       eor     v31.16b,v28.16b,v7.16b
+       ld1     {v28.16b},[x0],#16
+       aesd    v26.16b,v23.16b
+       orr     v6.16b,v29.16b,v29.16b
+       ld1     {v29.16b},[x0],#16
+       cbz     x6,.Lcbc_tail4x
+       ld1     {v16.4s},[x7],#16       // re-pre-load rndkey[0]
+       eor     v4.16b,v4.16b,v0.16b
+       orr     v0.16b,v2.16b,v2.16b
+       eor     v5.16b,v5.16b,v1.16b
+       orr     v1.16b,v3.16b,v3.16b
+       eor     v17.16b,v17.16b,v24.16b
+       orr     v24.16b,v27.16b,v27.16b
+       eor     v30.16b,v30.16b,v25.16b
+       orr     v25.16b,v28.16b,v28.16b
+       eor     v31.16b,v31.16b,v26.16b
+       st1     {v4.16b},[x1],#16
+       orr     v26.16b,v29.16b,v29.16b
+       st1     {v5.16b},[x1],#16
+       mov     w6,w5
+       st1     {v17.16b},[x1],#16
+       ld1     {v17.4s},[x7],#16       // re-pre-load rndkey[1]
+       st1     {v30.16b},[x1],#16
+       st1     {v31.16b},[x1],#16
+       b.hs    .Loop5x_cbc_dec
+
+       add     x2,x2,#0x50
+       cbz     x2,.Lcbc_done
+
+       add     w6,w5,#2
+       subs    x2,x2,#0x30
+       orr     v0.16b,v27.16b,v27.16b
+       orr     v2.16b,v27.16b,v27.16b
+       orr     v1.16b,v28.16b,v28.16b
+       orr     v3.16b,v28.16b,v28.16b
+       orr     v24.16b,v29.16b,v29.16b
+       orr     v27.16b,v29.16b,v29.16b
+       b.lo    .Lcbc_dec_tail
+
+       b       .Loop3x_cbc_dec
+
+.align 4
+.Lcbc_tail4x:
+       eor     v5.16b,v4.16b,v1.16b
+       eor     v17.16b,v17.16b,v24.16b
+       eor     v30.16b,v30.16b,v25.16b
+       eor     v31.16b,v31.16b,v26.16b
+       st1     {v5.16b},[x1],#16
+       st1     {v17.16b},[x1],#16
+       st1     {v30.16b},[x1],#16
+       st1     {v31.16b},[x1],#16
+
+       b       .Lcbc_done
+.align 4
+.Loop3x_cbc_dec:
+       aesd    v0.16b,v16.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v16.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v16.16b
+       aesimc  v24.16b,v24.16b
+       ld1     {v16.4s},[x7],#16
+       subs    w6,w6,#2
+       aesd    v0.16b,v17.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v17.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v17.16b
+       aesimc  v24.16b,v24.16b
+       ld1     {v17.4s},[x7],#16
+       b.gt    .Loop3x_cbc_dec
+
+       aesd    v0.16b,v16.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v16.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v16.16b
+       aesimc  v24.16b,v24.16b
+       eor     v4.16b,v6.16b,v7.16b
+       subs    x2,x2,#0x30
+       eor     v5.16b,v2.16b,v7.16b
+       csel    x6,x2,x6,lo                     // x6, w6, is zero at this point
+       aesd    v0.16b,v17.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v17.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v17.16b
+       aesimc  v24.16b,v24.16b
+       eor     v17.16b,v3.16b,v7.16b
+       add     x0,x0,x6                // x0 is adjusted in such way that
+                                       // at exit from the loop v1.16b-v24.16b
+                                       // are loaded with last "words"
+       orr     v6.16b,v27.16b,v27.16b
+       mov     x7,x3
+       aesd    v0.16b,v20.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v20.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v20.16b
+       aesimc  v24.16b,v24.16b
+       ld1     {v2.16b},[x0],#16
+       aesd    v0.16b,v21.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v21.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v21.16b
+       aesimc  v24.16b,v24.16b
+       ld1     {v3.16b},[x0],#16
+       aesd    v0.16b,v22.16b
+       aesimc  v0.16b,v0.16b
+       aesd    v1.16b,v22.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v22.16b
+       aesimc  v24.16b,v24.16b
+       ld1     {v27.16b},[x0],#16
+       aesd    v0.16b,v23.16b
+       aesd    v1.16b,v23.16b
+       aesd    v24.16b,v23.16b
+       ld1     {v16.4s},[x7],#16       // re-pre-load rndkey[0]
+       add     w6,w5,#2
+       eor     v4.16b,v4.16b,v0.16b
+       eor     v5.16b,v5.16b,v1.16b
+       eor     v24.16b,v24.16b,v17.16b
+       ld1     {v17.4s},[x7],#16       // re-pre-load rndkey[1]
+       st1     {v4.16b},[x1],#16
+       orr     v0.16b,v2.16b,v2.16b
+       st1     {v5.16b},[x1],#16
+       orr     v1.16b,v3.16b,v3.16b
+       st1     {v24.16b},[x1],#16
+       orr     v24.16b,v27.16b,v27.16b
+       b.hs    .Loop3x_cbc_dec
+
+       cmn     x2,#0x30
+       b.eq    .Lcbc_done
+       nop
+
+.Lcbc_dec_tail:
+       aesd    v1.16b,v16.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v16.16b
+       aesimc  v24.16b,v24.16b
+       ld1     {v16.4s},[x7],#16
+       subs    w6,w6,#2
+       aesd    v1.16b,v17.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v17.16b
+       aesimc  v24.16b,v24.16b
+       ld1     {v17.4s},[x7],#16
+       b.gt    .Lcbc_dec_tail
+
+       aesd    v1.16b,v16.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v16.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v1.16b,v17.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v17.16b
+       aesimc  v24.16b,v24.16b
+       aesd    v1.16b,v20.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v20.16b
+       aesimc  v24.16b,v24.16b
+       cmn     x2,#0x20
+       aesd    v1.16b,v21.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v21.16b
+       aesimc  v24.16b,v24.16b
+       eor     v5.16b,v6.16b,v7.16b
+       aesd    v1.16b,v22.16b
+       aesimc  v1.16b,v1.16b
+       aesd    v24.16b,v22.16b
+       aesimc  v24.16b,v24.16b
+       eor     v17.16b,v3.16b,v7.16b
+       aesd    v1.16b,v23.16b
+       aesd    v24.16b,v23.16b
+       b.eq    .Lcbc_dec_one
+       eor     v5.16b,v5.16b,v1.16b
+       eor     v17.16b,v17.16b,v24.16b
+       orr     v6.16b,v27.16b,v27.16b
+       st1     {v5.16b},[x1],#16
+       st1     {v17.16b},[x1],#16
+       b       .Lcbc_done
+
+.Lcbc_dec_one:
+       eor     v5.16b,v5.16b,v24.16b
+       orr     v6.16b,v27.16b,v27.16b
+       st1     {v5.16b},[x1],#16
+
+.Lcbc_done:
+       st1     {v6.16b},[x4]
+.Lcbc_abort:
+       ldr     x29,[sp],#16
+       ret
+.size  aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
+.globl aes_v8_ctr32_encrypt_blocks
+.type  aes_v8_ctr32_encrypt_blocks,%function
+.align 5
+aes_v8_ctr32_encrypt_blocks:
+       stp     x29,x30,[sp,#-16]!
+       add     x29,sp,#0
+       ldr     w5,[x3,#240]
+
+       ldr     w8, [x4, #12]
+       ld1     {v0.4s},[x4]
+
+       ld1     {v16.4s,v17.4s},[x3]            // load key schedule...
+       sub     w5,w5,#4
+       mov     x12,#16
+       cmp     x2,#2
+       add     x7,x3,x5,lsl#4  // pointer to last 5 round keys
+       sub     w5,w5,#2
+       ld1     {v20.4s,v21.4s},[x7],#32
+       ld1     {v22.4s,v23.4s},[x7],#32
+       ld1     {v7.4s},[x7]
+       add     x7,x3,#32
+       mov     w6,w5
+       csel    x12,xzr,x12,lo
+#ifndef __ARMEB__
+       rev     w8, w8
+#endif
+       orr     v1.16b,v0.16b,v0.16b
+       add     w10, w8, #1
+       orr     v18.16b,v0.16b,v0.16b
+       add     w8, w8, #2
+       orr     v6.16b,v0.16b,v0.16b
+       rev     w10, w10
+       mov     v1.s[3],w10
+       b.ls    .Lctr32_tail
+       rev     w12, w8
+       sub     x2,x2,#3                // bias
+       mov     v18.s[3],w12
+       cmp     x2,#2
+       b.lo    .Loop3x_ctr32
+
+       add     w13,w8,#1
+       add     w14,w8,#2
+       orr     v24.16b,v0.16b,v0.16b
+       rev     w13,w13
+       orr     v25.16b,v0.16b,v0.16b
+       rev     w14,w14
+       mov     v24.s[3],w13
+       sub     x2,x2,#2                // bias
+       mov     v25.s[3],w14
+       add     w8,w8,#2
+       b       .Loop5x_ctr32
+
+.align 4
+.Loop5x_ctr32:
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v16.16b
+       aesmc   v1.16b,v1.16b
+       aese    v18.16b,v16.16b
+       aesmc   v18.16b,v18.16b
+       aese    v24.16b,v16.16b
+       aesmc   v24.16b,v24.16b
+       aese    v25.16b,v16.16b
+       aesmc   v25.16b,v25.16b
+       ld1     {v16.4s},[x7],#16
+       subs    w6,w6,#2
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v17.16b
+       aesmc   v1.16b,v1.16b
+       aese    v18.16b,v17.16b
+       aesmc   v18.16b,v18.16b
+       aese    v24.16b,v17.16b
+       aesmc   v24.16b,v24.16b
+       aese    v25.16b,v17.16b
+       aesmc   v25.16b,v25.16b
+       ld1     {v17.4s},[x7],#16
+       b.gt    .Loop5x_ctr32
+
+       mov     x7,x3
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v16.16b
+       aesmc   v1.16b,v1.16b
+       aese    v18.16b,v16.16b
+       aesmc   v18.16b,v18.16b
+       aese    v24.16b,v16.16b
+       aesmc   v24.16b,v24.16b
+       aese    v25.16b,v16.16b
+       aesmc   v25.16b,v25.16b
+       ld1     {v16.4s},[x7],#16       // re-pre-load rndkey[0]
+
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v17.16b
+       aesmc   v1.16b,v1.16b
+       aese    v18.16b,v17.16b
+       aesmc   v18.16b,v18.16b
+       aese    v24.16b,v17.16b
+       aesmc   v24.16b,v24.16b
+       aese    v25.16b,v17.16b
+       aesmc   v25.16b,v25.16b
+       ld1     {v17.4s},[x7],#16       // re-pre-load rndkey[1]
+
+       aese    v0.16b,v20.16b
+       aesmc   v0.16b,v0.16b
+       add     w9,w8,#1
+       add     w10,w8,#2
+       aese    v1.16b,v20.16b
+       aesmc   v1.16b,v1.16b
+       add     w12,w8,#3
+       add     w13,w8,#4
+       aese    v18.16b,v20.16b
+       aesmc   v18.16b,v18.16b
+       add     w14,w8,#5
+       rev     w9,w9
+       aese    v24.16b,v20.16b
+       aesmc   v24.16b,v24.16b
+       rev     w10,w10
+       rev     w12,w12
+       aese    v25.16b,v20.16b
+       aesmc   v25.16b,v25.16b
+       rev     w13,w13
+       rev     w14,w14
+
+       aese    v0.16b,v21.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v21.16b
+       aesmc   v1.16b,v1.16b
+       aese    v18.16b,v21.16b
+       aesmc   v18.16b,v18.16b
+       aese    v24.16b,v21.16b
+       aesmc   v24.16b,v24.16b
+       aese    v25.16b,v21.16b
+       aesmc   v25.16b,v25.16b
+
+       aese    v0.16b,v22.16b
+       aesmc   v0.16b,v0.16b
+       ld1     {v2.16b},[x0],#16
+       aese    v1.16b,v22.16b
+       aesmc   v1.16b,v1.16b
+       ld1     {v3.16b},[x0],#16
+       aese    v18.16b,v22.16b
+       aesmc   v18.16b,v18.16b
+       ld1     {v19.16b},[x0],#16
+       aese    v24.16b,v22.16b
+       aesmc   v24.16b,v24.16b
+       ld1     {v26.16b},[x0],#16
+       aese    v25.16b,v22.16b
+       aesmc   v25.16b,v25.16b
+       ld1     {v27.16b},[x0],#16
+
+       aese    v0.16b,v23.16b
+       eor     v2.16b,v2.16b,v7.16b
+       aese    v1.16b,v23.16b
+       eor     v3.16b,v3.16b,v7.16b
+       aese    v18.16b,v23.16b
+       eor     v19.16b,v19.16b,v7.16b
+       aese    v24.16b,v23.16b
+       eor     v26.16b,v26.16b,v7.16b
+       aese    v25.16b,v23.16b
+       eor     v27.16b,v27.16b,v7.16b
+
+       eor     v2.16b,v2.16b,v0.16b
+       orr     v0.16b,v6.16b,v6.16b
+       eor     v3.16b,v3.16b,v1.16b
+       orr     v1.16b,v6.16b,v6.16b
+       eor     v19.16b,v19.16b,v18.16b
+       orr     v18.16b,v6.16b,v6.16b
+       eor     v26.16b,v26.16b,v24.16b
+       orr     v24.16b,v6.16b,v6.16b
+       eor     v27.16b,v27.16b,v25.16b
+       orr     v25.16b,v6.16b,v6.16b
+
+       st1     {v2.16b},[x1],#16
+       mov     v0.s[3],w9
+       st1     {v3.16b},[x1],#16
+       mov     v1.s[3],w10
+       st1     {v19.16b},[x1],#16
+       mov     v18.s[3],w12
+       st1     {v26.16b},[x1],#16
+       mov     v24.s[3],w13
+       st1     {v27.16b},[x1],#16
+       mov     v25.s[3],w14
+
+       mov     w6,w5
+       cbz     x2,.Lctr32_done
+
+       add     w8,w8,#5
+       subs    x2,x2,#5
+       b.hs    .Loop5x_ctr32
+
+       add     x2,x2,#5
+       sub     w8,w8,#5
+
+       cmp     x2,#2
+       mov     x12,#16
+       csel    x12,xzr,x12,lo
+       b.ls    .Lctr32_tail
+
+       sub     x2,x2,#3                // bias
+       add     w8,w8,#3
+       b       .Loop3x_ctr32
+
+.align 4
+.Loop3x_ctr32:
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v16.16b
+       aesmc   v1.16b,v1.16b
+       aese    v18.16b,v16.16b
+       aesmc   v18.16b,v18.16b
+       ld1     {v16.4s},[x7],#16
+       subs    w6,w6,#2
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v17.16b
+       aesmc   v1.16b,v1.16b
+       aese    v18.16b,v17.16b
+       aesmc   v18.16b,v18.16b
+       ld1     {v17.4s},[x7],#16
+       b.gt    .Loop3x_ctr32
+
+       aese    v0.16b,v16.16b
+       aesmc   v4.16b,v0.16b
+       aese    v1.16b,v16.16b
+       aesmc   v5.16b,v1.16b
+       ld1     {v2.16b},[x0],#16
+       orr     v0.16b,v6.16b,v6.16b
+       aese    v18.16b,v16.16b
+       aesmc   v18.16b,v18.16b
+       ld1     {v3.16b},[x0],#16
+       orr     v1.16b,v6.16b,v6.16b
+       aese    v4.16b,v17.16b
+       aesmc   v4.16b,v4.16b
+       aese    v5.16b,v17.16b
+       aesmc   v5.16b,v5.16b
+       ld1     {v19.16b},[x0],#16
+       mov     x7,x3
+       aese    v18.16b,v17.16b
+       aesmc   v17.16b,v18.16b
+       orr     v18.16b,v6.16b,v6.16b
+       add     w9,w8,#1
+       aese    v4.16b,v20.16b
+       aesmc   v4.16b,v4.16b
+       aese    v5.16b,v20.16b
+       aesmc   v5.16b,v5.16b
+       eor     v2.16b,v2.16b,v7.16b
+       add     w10,w8,#2
+       aese    v17.16b,v20.16b
+       aesmc   v17.16b,v17.16b
+       eor     v3.16b,v3.16b,v7.16b
+       add     w8,w8,#3
+       aese    v4.16b,v21.16b
+       aesmc   v4.16b,v4.16b
+       aese    v5.16b,v21.16b
+       aesmc   v5.16b,v5.16b
+       eor     v19.16b,v19.16b,v7.16b
+       rev     w9,w9
+       aese    v17.16b,v21.16b
+       aesmc   v17.16b,v17.16b
+       mov     v0.s[3], w9
+       rev     w10,w10
+       aese    v4.16b,v22.16b
+       aesmc   v4.16b,v4.16b
+       aese    v5.16b,v22.16b
+       aesmc   v5.16b,v5.16b
+       mov     v1.s[3], w10
+       rev     w12,w8
+       aese    v17.16b,v22.16b
+       aesmc   v17.16b,v17.16b
+       mov     v18.s[3], w12
+       subs    x2,x2,#3
+       aese    v4.16b,v23.16b
+       aese    v5.16b,v23.16b
+       aese    v17.16b,v23.16b
+
+       eor     v2.16b,v2.16b,v4.16b
+       ld1     {v16.4s},[x7],#16       // re-pre-load rndkey[0]
+       st1     {v2.16b},[x1],#16
+       eor     v3.16b,v3.16b,v5.16b
+       mov     w6,w5
+       st1     {v3.16b},[x1],#16
+       eor     v19.16b,v19.16b,v17.16b
+       ld1     {v17.4s},[x7],#16       // re-pre-load rndkey[1]
+       st1     {v19.16b},[x1],#16
+       b.hs    .Loop3x_ctr32
+
+       adds    x2,x2,#3
+       b.eq    .Lctr32_done
+       cmp     x2,#1
+       mov     x12,#16
+       csel    x12,xzr,x12,eq
+
+.Lctr32_tail:
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v16.16b
+       aesmc   v1.16b,v1.16b
+       ld1     {v16.4s},[x7],#16
+       subs    w6,w6,#2
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v17.16b
+       aesmc   v1.16b,v1.16b
+       ld1     {v17.4s},[x7],#16
+       b.gt    .Lctr32_tail
+
+       aese    v0.16b,v16.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v16.16b
+       aesmc   v1.16b,v1.16b
+       aese    v0.16b,v17.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v17.16b
+       aesmc   v1.16b,v1.16b
+       ld1     {v2.16b},[x0],x12
+       aese    v0.16b,v20.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v20.16b
+       aesmc   v1.16b,v1.16b
+       ld1     {v3.16b},[x0]
+       aese    v0.16b,v21.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v21.16b
+       aesmc   v1.16b,v1.16b
+       eor     v2.16b,v2.16b,v7.16b
+       aese    v0.16b,v22.16b
+       aesmc   v0.16b,v0.16b
+       aese    v1.16b,v22.16b
+       aesmc   v1.16b,v1.16b
+       eor     v3.16b,v3.16b,v7.16b
+       aese    v0.16b,v23.16b
+       aese    v1.16b,v23.16b
+
+       cmp     x2,#1
+       eor     v2.16b,v2.16b,v0.16b
+       eor     v3.16b,v3.16b,v1.16b
+       st1     {v2.16b},[x1],#16
+       b.eq    .Lctr32_done
+       st1     {v3.16b},[x1]
+
+.Lctr32_done:
+       ldr     x29,[sp],#16
+       ret
+.size  aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
index 18be08a..bda3cb3 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: autoconf.c,v 1.11 2020/11/06 13:32:38 patrick Exp $   */
+/*     $OpenBSD: autoconf.c,v 1.12 2021/02/21 14:55:16 tobhe Exp $     */
 /*
  * Copyright (c) 2009 Miodrag Vallat.
  *
 #include <netinet/if_ether.h>
 #endif
 
+#ifdef CRYPTO
+void   cryptox_setup(void);
+extern int arm64_has_aes;
+#endif
+
 #include <machine/bootconfig.h>
 
 extern void dumpconf(void);
@@ -56,6 +61,11 @@ cpu_configure(void)
 
        unmap_startup();
 
+#ifdef CRYPTO
+       if (arm64_has_aes)
+               cryptox_setup();
+#endif
+
        cold = 0;
        spl0();
 }
index dc65e10..21f841a 100644 (file)
@@ -1,4 +1,4 @@
-/*     $OpenBSD: cpu.c,v 1.47 2021/02/10 20:51:27 kettenis Exp $       */
+/*     $OpenBSD: cpu.c,v 1.48 2021/02/21 14:55:16 tobhe Exp $  */
 
 /*
  * Copyright (c) 2016 Dale Rahn <drahn@dalerahn.com>
@@ -153,6 +153,10 @@ const struct implementers {
 char cpu_model[64];
 int cpu_node;
 
+#ifdef CRYPTO
+int arm64_has_aes;
+#endif
+
 struct cpu_info *cpu_info_list = &cpu_info_primary;
 
 int    cpu_match(struct device *, void *, void *);
@@ -378,6 +382,9 @@ cpu_identify(struct cpu_info *ci)
        if (ID_AA64ISAR0_AES(id) >= ID_AA64ISAR0_AES_BASE) {
                printf("%sAES", sep);
                sep = ",";
+#ifdef CRYPTO
+               arm64_has_aes = 1;
+#endif
        }
        if (ID_AA64ISAR0_AES(id) >= ID_AA64ISAR0_AES_PMULL)
                printf("+PMULL");
diff --git a/sys/arch/arm64/arm64/cryptox.c b/sys/arch/arm64/arm64/cryptox.c
new file mode 100644 (file)
index 0000000..5542a48
--- /dev/null
@@ -0,0 +1,494 @@
+/*     $OpenBSD: cryptox.c,v 1.1 2021/02/21 14:55:17 tobhe Exp $       */
+/*
+ * Copyright (c) 2003 Jason Wright
+ * Copyright (c) 2003, 2004 Theo de Raadt
+ * Copyright (c) 2010 Thordur I. Bjornsson
+ * Copyright (c) 2010 Mike Belopuhov
+ * Copyright (c) 2020 Tobias Heider
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/atomic.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+#include <sys/mbuf.h>
+#include <sys/smr.h>
+
+#include <crypto/cryptodev.h>
+#include <crypto/aes.h>
+#include <crypto/gmac.h>
+#include <crypto/xform.h>
+#include <crypto/cryptosoft.h>
+
+#include <machine/vfp.h>
+
+struct cryptox_aes_key {
+       uint32_t rd_key[4 *(AES_MAXROUNDS + 1)];
+       int rounds;
+};
+
+struct cryptox_session {
+       struct cryptox_aes_key   ses_ekey;
+       struct cryptox_aes_key   ses_dkey;
+       uint32_t                 ses_klen;
+       int                      ses_sid;
+       struct swcr_data        *ses_swd;
+       SMR_LIST_ENTRY(cryptox_session)
+                                ses_entries;
+       uint8_t                 *ses_buf;
+       size_t                   ses_buflen;
+       struct smr_entry         ses_smr;
+};
+
+struct cryptox_softc {
+       int32_t                  sc_cid;
+       uint32_t                 sc_sid;
+       struct mutex             sc_mtx;
+       SMR_LIST_HEAD(, cryptox_session)
+                                sc_sessions;
+} *cryptox_sc;
+
+struct pool cryptoxpl;
+
+uint32_t cryptox_ops;
+
+extern int aes_v8_set_encrypt_key(const uint8_t *user_key, const int bits,
+           struct cryptox_aes_key *key);
+extern int aes_v8_set_decrypt_key(const uint8_t *user_key, const int bits,
+           struct cryptox_aes_key *key);
+extern void aes_v8_encrypt(const uint8_t *in, uint8_t *out,
+           const struct cryptox_aes_key *key);
+extern void aes_v8_decrypt(const uint8_t *in, uint8_t *out,
+           const struct cryptox_aes_key *key);
+extern void aes_v8_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+           const struct cryptox_aes_key *key, uint8_t *ivec, const int enc);
+extern void aes_v8_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
+           size_t len, const struct cryptox_aes_key *key,
+           const uint8_t ivec[16]);
+
+void   cryptox_setup(void);
+int    cryptox_newsession(u_int32_t *, struct cryptoini *);
+int    cryptox_freesession(u_int64_t);
+int    cryptox_process(struct cryptop *);
+
+struct cryptox_session *
+       cryptox_get(uint32_t);
+void   cryptox_free(struct cryptox_session *);
+void   cryptox_free_smr(void *);
+
+int    cryptox_swauth(struct cryptop *, struct cryptodesc *, struct swcr_data *,
+           caddr_t);
+
+int    cryptox_encdec(struct cryptop *, struct cryptodesc *,
+           struct cryptox_session *);
+
+void
+cryptox_setup(void)
+{
+       int algs[CRYPTO_ALGORITHM_MAX + 1];
+
+       cryptox_sc = malloc(sizeof(*cryptox_sc), M_DEVBUF, M_NOWAIT|M_ZERO);
+       if (cryptox_sc == NULL)
+               return;
+
+       bzero(algs, sizeof(algs));
+
+       /* Encryption algorithms. */
+       algs[CRYPTO_AES_CBC] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+       /* HMACs needed for IPsec, uses software crypto. */
+       algs[CRYPTO_MD5_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+       algs[CRYPTO_SHA1_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+       algs[CRYPTO_RIPEMD160_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+       algs[CRYPTO_SHA2_256_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+       algs[CRYPTO_SHA2_384_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+       algs[CRYPTO_SHA2_512_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+       /* IPsec Extended Sequence Numbers. */
+       algs[CRYPTO_ESN] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+       cryptox_sc->sc_cid = crypto_get_driverid(CRYPTOCAP_F_MPSAFE);
+       if (cryptox_sc->sc_cid < 0) {
+               free(cryptox_sc, M_DEVBUF, sizeof(*cryptox_sc));
+               cryptox_sc = NULL;
+               return;
+       }
+
+       pool_init(&cryptoxpl, sizeof(struct cryptox_session), 16, IPL_VM, 0,
+           "cryptox", NULL);
+       pool_setlowat(&cryptoxpl, 2);
+
+       mtx_init(&cryptox_sc->sc_mtx, IPL_VM);
+
+       crypto_register(cryptox_sc->sc_cid, algs, cryptox_newsession,
+           cryptox_freesession, cryptox_process);
+}
+
+int
+cryptox_newsession(u_int32_t *sidp, struct cryptoini *cri)
+{
+       struct cryptox_session *ses = NULL;
+       struct cryptoini *c;
+       struct auth_hash *axf;
+       struct swcr_data *swd;
+       int i;
+
+       if (sidp == NULL || cri == NULL)
+               return (EINVAL);
+
+       ses = pool_get(&cryptoxpl, PR_NOWAIT | PR_ZERO);
+       if (!ses)
+               return (ENOMEM);
+       smr_init(&ses->ses_smr);
+
+       ses->ses_buf = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+       if (ses->ses_buf != NULL)
+               ses->ses_buflen = PAGE_SIZE;
+
+       for (c = cri; c != NULL; c = c->cri_next) {
+               switch (c->cri_alg) {
+               case CRYPTO_AES_CBC:
+                       ses->ses_klen = c->cri_klen / 8;
+                       vfp_kernel_enter();
+                       aes_v8_set_encrypt_key(c->cri_key, c->cri_klen, &ses->ses_ekey);
+                       aes_v8_set_decrypt_key(c->cri_key, c->cri_klen, &ses->ses_dkey);
+                       vfp_kernel_exit();
+                       break;
+
+               case CRYPTO_MD5_HMAC:
+                       axf = &auth_hash_hmac_md5_96;
+                       goto authcommon;
+               case CRYPTO_SHA1_HMAC:
+                       axf = &auth_hash_hmac_sha1_96;
+                       goto authcommon;
+               case CRYPTO_RIPEMD160_HMAC:
+                       axf = &auth_hash_hmac_ripemd_160_96;
+                       goto authcommon;
+               case CRYPTO_SHA2_256_HMAC:
+                       axf = &auth_hash_hmac_sha2_256_128;
+                       goto authcommon;
+               case CRYPTO_SHA2_384_HMAC:
+                       axf = &auth_hash_hmac_sha2_384_192;
+                       goto authcommon;
+               case CRYPTO_SHA2_512_HMAC:
+                       axf = &auth_hash_hmac_sha2_512_256;
+               authcommon:
+                       swd = malloc(sizeof(struct swcr_data), M_CRYPTO_DATA,
+                           M_NOWAIT|M_ZERO);
+                       if (swd == NULL) {
+                               cryptox_free(ses);
+                               return (ENOMEM);
+                       }
+                       ses->ses_swd = swd;
+
+                       swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+                           M_NOWAIT);
+                       if (swd->sw_ictx == NULL) {
+                               cryptox_free(ses);
+                               return (ENOMEM);
+                       }
+
+                       swd->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+                           M_NOWAIT);
+                       if (swd->sw_octx == NULL) {
+                               cryptox_free(ses);
+                               return (ENOMEM);
+                       }
+
+                       for (i = 0; i < c->cri_klen / 8; i++)
+                               c->cri_key[i] ^= HMAC_IPAD_VAL;
+
+                       axf->Init(swd->sw_ictx);
+                       axf->Update(swd->sw_ictx, c->cri_key, c->cri_klen / 8);
+                       axf->Update(swd->sw_ictx, hmac_ipad_buffer,
+                           axf->blocksize - (c->cri_klen / 8));
+
+                       for (i = 0; i < c->cri_klen / 8; i++)
+                               c->cri_key[i] ^= (HMAC_IPAD_VAL ^
+                                   HMAC_OPAD_VAL);
+
+                       axf->Init(swd->sw_octx);
+                       axf->Update(swd->sw_octx, c->cri_key, c->cri_klen / 8);
+                       axf->Update(swd->sw_octx, hmac_opad_buffer,
+                           axf->blocksize - (c->cri_klen / 8));
+
+                       for (i = 0; i < c->cri_klen / 8; i++)
+                               c->cri_key[i] ^= HMAC_OPAD_VAL;
+
+                       swd->sw_axf = axf;
+                       swd->sw_alg = c->cri_alg;
+
+                       break;
+
+               case CRYPTO_ESN:
+                       /* nothing to do */
+                       break;
+
+               default:
+                       cryptox_free(ses);
+                       return (EINVAL);
+               }
+       }
+
+       mtx_enter(&cryptox_sc->sc_mtx);
+       ses->ses_sid = ++cryptox_sc->sc_sid;
+       SMR_LIST_INSERT_HEAD_LOCKED(&cryptox_sc->sc_sessions, ses, ses_entries);
+       mtx_leave(&cryptox_sc->sc_mtx);
+
+       *sidp = ses->ses_sid;
+       return (0);
+}
+
+int
+cryptox_freesession(u_int64_t tid)
+{
+       struct cryptox_session *ses;
+       u_int32_t sid = (u_int32_t)tid;
+
+       mtx_enter(&cryptox_sc->sc_mtx);
+       SMR_LIST_FOREACH_LOCKED(ses, &cryptox_sc->sc_sessions, ses_entries) {
+               if (ses->ses_sid == sid) {
+                       SMR_LIST_REMOVE_LOCKED(ses, ses_entries);
+                       break;
+               }
+       }
+       mtx_leave(&cryptox_sc->sc_mtx);
+
+       if (ses == NULL)
+               return (EINVAL);
+
+       smr_call(&ses->ses_smr, cryptox_free_smr, ses);
+
+       return (0);
+}
+
+void
+cryptox_free(struct cryptox_session *ses)
+{
+       struct swcr_data *swd;
+       struct auth_hash *axf;
+
+       if (ses->ses_swd) {
+               swd = ses->ses_swd;
+               axf = swd->sw_axf;
+
+               if (swd->sw_ictx) {
+                       explicit_bzero(swd->sw_ictx, axf->ctxsize);
+                       free(swd->sw_ictx, M_CRYPTO_DATA, axf->ctxsize);
+               }
+               if (swd->sw_octx) {
+                       explicit_bzero(swd->sw_octx, axf->ctxsize);
+                       free(swd->sw_octx, M_CRYPTO_DATA, axf->ctxsize);
+               }
+               free(swd, M_CRYPTO_DATA, sizeof(*swd));
+       }
+
+       if (ses->ses_buf) {
+               explicit_bzero(ses->ses_buf, ses->ses_buflen);
+               free(ses->ses_buf, M_DEVBUF, ses->ses_buflen);
+       }
+
+       explicit_bzero(ses, sizeof (*ses));
+       pool_put(&cryptoxpl, ses);
+}
+
+void
+cryptox_free_smr(void *arg)
+{
+       struct cryptox_session *ses = arg;
+
+       cryptox_free(ses);
+}
+
+struct cryptox_session *
+cryptox_get(uint32_t sid)
+{
+       struct cryptox_session *ses = NULL;
+
+       SMR_ASSERT_CRITICAL();
+       SMR_LIST_FOREACH(ses, &cryptox_sc->sc_sessions, ses_entries) {
+               if (ses->ses_sid == sid)
+                       break;
+       }
+       return (ses);
+}
+
+int
+cryptox_swauth(struct cryptop *crp, struct cryptodesc *crd,
+    struct swcr_data *sw, caddr_t buf)
+{
+       int type;
+
+       if (crp->crp_flags & CRYPTO_F_IMBUF)
+               type = CRYPTO_BUF_MBUF;
+       else
+               type = CRYPTO_BUF_IOV;
+
+       return (swcr_authcompute(crp, crd, sw, buf, type));
+}
+
+int
+cryptox_encdec(struct cryptop *crp, struct cryptodesc *crd,
+    struct cryptox_session *ses)
+{
+       int err, ivlen, iskip, oskip, rlen;
+       uint8_t iv[EALG_MAX_BLOCK_LEN];
+       uint8_t *buf = ses->ses_buf;
+
+       rlen = err = iskip = oskip = 0;
+
+       if (crd->crd_len > ses->ses_buflen) {
+               if (buf != NULL) {
+                       explicit_bzero(buf, ses->ses_buflen);
+                       free(buf, M_DEVBUF, ses->ses_buflen);
+               }
+
+               ses->ses_buflen = 0;
+               rlen = roundup(crd->crd_len, EALG_MAX_BLOCK_LEN);
+               ses->ses_buf = buf = malloc(rlen, M_DEVBUF, M_NOWAIT |
+                   M_ZERO);
+               if (buf == NULL)
+                       return (ENOMEM);
+               ses->ses_buflen = rlen;
+       }
+
+       /* CBC uses 16 */
+       ivlen =  16;
+
+       /* Initialize the IV */
+       if (crd->crd_flags & CRD_F_ENCRYPT) {
+               if (crd->crd_flags & CRD_F_IV_EXPLICIT)
+                       memcpy(iv, crd->crd_iv, ivlen);
+               else
+                       arc4random_buf(iv, ivlen);
+
+               /* Do we need to write the IV */
+               if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0) {
+                       if (crp->crp_flags & CRYPTO_F_IMBUF) {
+                               if (m_copyback((struct mbuf *)crp->crp_buf,
+                                   crd->crd_inject, ivlen, iv, M_NOWAIT)) {
+                                       err = ENOMEM;
+                                       goto out;
+                               }
+                       } else
+                               cuio_copyback((struct uio *)crp->crp_buf,
+                                   crd->crd_inject, ivlen, iv);
+               }
+       } else {
+               if (crd->crd_flags & CRD_F_IV_EXPLICIT)
+                       memcpy(iv, crd->crd_iv, ivlen);
+               else {
+                       if (crp->crp_flags & CRYPTO_F_IMBUF)
+                               m_copydata((struct mbuf *)crp->crp_buf,
+                                   crd->crd_inject, ivlen, iv);
+                       else
+                               cuio_copydata((struct uio *)crp->crp_buf,
+                                   crd->crd_inject, ivlen, iv);
+               }
+       }
+
+       /* Copy data to be processed to the buffer */
+       if (crp->crp_flags & CRYPTO_F_IMBUF)
+               m_copydata((struct mbuf *)crp->crp_buf, crd->crd_skip,
+                   crd->crd_len, buf);
+       else
+               cuio_copydata((struct uio *)crp->crp_buf, crd->crd_skip,
+                   crd->crd_len, buf);
+
+       /* Apply cipher */
+       vfp_kernel_enter();
+       switch (crd->crd_alg) {
+       case CRYPTO_AES_CBC:
+               if (crd->crd_flags & CRD_F_ENCRYPT)
+                       aes_v8_cbc_encrypt(buf, buf, crd->crd_len, &ses->ses_ekey, iv, 1);
+               else
+                       aes_v8_cbc_encrypt(buf, buf, crd->crd_len, &ses->ses_dkey, iv, 0);
+               break;
+       }
+       vfp_kernel_exit();
+
+       cryptox_ops++;
+
+       /* Copy back the result */
+       if (crp->crp_flags & CRYPTO_F_IMBUF) {
+               if (m_copyback((struct mbuf *)crp->crp_buf, crd->crd_skip,
+                   crd->crd_len, buf, M_NOWAIT)) {
+                       err = ENOMEM;
+                       goto out;
+               }
+       } else
+               cuio_copyback((struct uio *)crp->crp_buf, crd->crd_skip,
+                   crd->crd_len, buf);
+
+out:
+       explicit_bzero(buf, roundup(crd->crd_len, EALG_MAX_BLOCK_LEN));
+       return (err);
+}
+
+int
+cryptox_process(struct cryptop *crp)
+{
+       struct cryptox_session *ses;
+       struct cryptodesc *crd, *crde;
+       int err = 0;
+       int i;
+
+       if (crp == NULL || crp->crp_callback == NULL)
+               return (EINVAL);
+       if (crp->crp_ndesc < 1)
+               return (EINVAL);
+
+       smr_read_enter();
+       ses = cryptox_get(crp->crp_sid & 0xffffffff);
+       if (!ses) {
+               err = EINVAL;
+               goto out;
+       }
+
+       crde = NULL;
+       for (i = 0; i < crp->crp_ndesc; i++) {
+               crd = &crp->crp_desc[i];
+               switch (crd->crd_alg) {
+               case CRYPTO_AES_CBC:
+                       err = cryptox_encdec(crp, crd, ses);
+                       if (err != 0)
+                               goto out;
+                       break;
+               case CRYPTO_MD5_HMAC:
+               case CRYPTO_SHA1_HMAC:
+               case CRYPTO_RIPEMD160_HMAC:
+               case CRYPTO_SHA2_256_HMAC:
+               case CRYPTO_SHA2_384_HMAC:
+               case CRYPTO_SHA2_512_HMAC:
+                       err = cryptox_swauth(crp, crd, ses->ses_swd,
+                           crp->crp_buf);
+                       if (err != 0)
+                               goto out;
+                       break;
+
+               default:
+                       err = EINVAL;
+                       goto out;
+               }
+       }
+
+out:
+       smr_read_leave();
+       crp->crp_etype = err;
+       crypto_done(crp);
+       return (err);
+}
index de03add..90c3677 100644 (file)
@@ -1,4 +1,4 @@
-# $OpenBSD: files.arm64,v 1.32 2020/07/25 12:26:09 tobhe Exp $
+# $OpenBSD: files.arm64,v 1.33 2021/02/21 14:55:17 tobhe Exp $
 
 maxpartitions  16
 maxusers       2 8 128
@@ -40,6 +40,9 @@ file  arch/arm64/arm64/bus_dma.c
 file   arch/arm64/dev/arm64_bus_space.c
 file   arch/arm64/dev/pci_machdep.c
 
+file   arch/arm64/arm64/cryptox.c              crypto
+file   arch/arm64/arm64/aesv8-armx.S           crypto
+
 file   arch/arm64/arm64/db_disasm.c            ddb
 file   arch/arm64/arm64/db_interface.c         ddb
 file   arch/arm64/arm64/db_trace.c             ddb