--- /dev/null
+/* $OpenBSD: aesv8-armx.S,v 1.1 2021/02/21 14:55:16 tobhe Exp $ */
+/*
+ * Copyright (c) 2006, CRYPTOGAMS by <appro@openssl.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain copyright notices,
+ * this list of conditions and the following disclaimer.
+ *
+ * * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials
+ * provided with the distribution.
+ *
+ * * Neither the name of the CRYPTOGAMS nor the names of its
+ * copyright holder and contributors may be used to endorse or
+ * promote products derived from this software without specific
+ * prior written permission.
+ *
+ * ALTERNATIVELY, provided that this notice is retained in full, this
+ * product may be distributed under the terms of the GNU General Public
+ * License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+ * those given above.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Generated from CRYPTOGAMS aesv8-armx.pl.
+ * Changes to the original source code:
+ *
+ * - removed #include "arm_arch.h"
+ * - removed redundant __ARM_MAX_ARCH__ check
+ */
+
+.arch armv8-a+crypto
+.text
+.align 5
+.Lrcon:
+.long 0x01,0x01,0x01,0x01
+.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat
+.long 0x1b,0x1b,0x1b,0x1b
+
+.globl aes_v8_set_encrypt_key
+.type aes_v8_set_encrypt_key,%function
+.align 5
+aes_v8_set_encrypt_key:
+.Lenc_key:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ mov x3,#-1
+ cmp x0,#0
+ b.eq .Lenc_key_abort
+ cmp x2,#0
+ b.eq .Lenc_key_abort
+ mov x3,#-2
+ cmp w1,#128
+ b.lt .Lenc_key_abort
+ cmp w1,#256
+ b.gt .Lenc_key_abort
+ tst w1,#0x3f
+ b.ne .Lenc_key_abort
+
+ adr x3,.Lrcon
+ cmp w1,#192
+
+ eor v0.16b,v0.16b,v0.16b
+ ld1 {v3.16b},[x0],#16
+ mov w1,#8 // reuse w1
+ ld1 {v1.4s,v2.4s},[x3],#32
+
+ b.lt .Loop128
+ b.eq .L192
+ b .L256
+
+.align 4
+.Loop128:
+ tbl v6.16b,{v3.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v3.4s},[x2],#16
+ aese v6.16b,v0.16b
+ subs w1,w1,#1
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v6.16b,v6.16b,v1.16b
+ eor v3.16b,v3.16b,v5.16b
+ shl v1.16b,v1.16b,#1
+ eor v3.16b,v3.16b,v6.16b
+ b.ne .Loop128
+
+ ld1 {v1.4s},[x3]
+
+ tbl v6.16b,{v3.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v3.4s},[x2],#16
+ aese v6.16b,v0.16b
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v6.16b,v6.16b,v1.16b
+ eor v3.16b,v3.16b,v5.16b
+ shl v1.16b,v1.16b,#1
+ eor v3.16b,v3.16b,v6.16b
+
+ tbl v6.16b,{v3.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v3.4s},[x2],#16
+ aese v6.16b,v0.16b
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v6.16b,v6.16b,v1.16b
+ eor v3.16b,v3.16b,v5.16b
+ eor v3.16b,v3.16b,v6.16b
+ st1 {v3.4s},[x2]
+ add x2,x2,#0x50
+
+ mov w12,#10
+ b .Ldone
+
+.align 4
+.L192:
+ ld1 {v4.8b},[x0],#8
+ movi v6.16b,#8 // borrow v6.16b
+ st1 {v3.4s},[x2],#16
+ sub v2.16b,v2.16b,v6.16b // adjust the mask
+
+.Loop192:
+ tbl v6.16b,{v4.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v4.8b},[x2],#8
+ aese v6.16b,v0.16b
+ subs w1,w1,#1
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+
+ dup v5.4s,v3.s[3]
+ eor v5.16b,v5.16b,v4.16b
+ eor v6.16b,v6.16b,v1.16b
+ ext v4.16b,v0.16b,v4.16b,#12
+ shl v1.16b,v1.16b,#1
+ eor v4.16b,v4.16b,v5.16b
+ eor v3.16b,v3.16b,v6.16b
+ eor v4.16b,v4.16b,v6.16b
+ st1 {v3.4s},[x2],#16
+ b.ne .Loop192
+
+ mov w12,#12
+ add x2,x2,#0x20
+ b .Ldone
+
+.align 4
+.L256:
+ ld1 {v4.16b},[x0]
+ mov w1,#7
+ mov w12,#14
+ st1 {v3.4s},[x2],#16
+
+.Loop256:
+ tbl v6.16b,{v4.16b},v2.16b
+ ext v5.16b,v0.16b,v3.16b,#12
+ st1 {v4.4s},[x2],#16
+ aese v6.16b,v0.16b
+ subs w1,w1,#1
+
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v3.16b,v3.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v6.16b,v6.16b,v1.16b
+ eor v3.16b,v3.16b,v5.16b
+ shl v1.16b,v1.16b,#1
+ eor v3.16b,v3.16b,v6.16b
+ st1 {v3.4s},[x2],#16
+ b.eq .Ldone
+
+ dup v6.4s,v3.s[3] // just splat
+ ext v5.16b,v0.16b,v4.16b,#12
+ aese v6.16b,v0.16b
+
+ eor v4.16b,v4.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v4.16b,v4.16b,v5.16b
+ ext v5.16b,v0.16b,v5.16b,#12
+ eor v4.16b,v4.16b,v5.16b
+
+ eor v4.16b,v4.16b,v6.16b
+ b .Loop256
+
+.Ldone:
+ str w12,[x2]
+ mov x3,#0
+
+.Lenc_key_abort:
+ mov x0,x3 // return value
+ ldr x29,[sp],#16
+ ret
+.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key
+
+.globl aes_v8_set_decrypt_key
+.type aes_v8_set_decrypt_key,%function
+.align 5
+aes_v8_set_decrypt_key:
+.inst 0xd503233f // paciasp
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ bl .Lenc_key
+
+ cmp x0,#0
+ b.ne .Ldec_key_abort
+
+ sub x2,x2,#240 // restore original x2
+ mov x4,#-16
+ add x0,x2,x12,lsl#4 // end of key schedule
+
+ ld1 {v0.4s},[x2]
+ ld1 {v1.4s},[x0]
+ st1 {v0.4s},[x0],x4
+ st1 {v1.4s},[x2],#16
+
+.Loop_imc:
+ ld1 {v0.4s},[x2]
+ ld1 {v1.4s},[x0]
+ aesimc v0.16b,v0.16b
+ aesimc v1.16b,v1.16b
+ st1 {v0.4s},[x0],x4
+ st1 {v1.4s},[x2],#16
+ cmp x0,x2
+ b.hi .Loop_imc
+
+ ld1 {v0.4s},[x2]
+ aesimc v0.16b,v0.16b
+ st1 {v0.4s},[x0]
+
+ eor x0,x0,x0 // return value
+.Ldec_key_abort:
+ ldp x29,x30,[sp],#16
+.inst 0xd50323bf // autiasp
+ ret
+.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key
+.globl aes_v8_encrypt
+.type aes_v8_encrypt,%function
+.align 5
+aes_v8_encrypt:
+ ldr w3,[x2,#240]
+ ld1 {v0.4s},[x2],#16
+ ld1 {v2.16b},[x0]
+ sub w3,w3,#2
+ ld1 {v1.4s},[x2],#16
+
+.Loop_enc:
+ aese v2.16b,v0.16b
+ aesmc v2.16b,v2.16b
+ ld1 {v0.4s},[x2],#16
+ subs w3,w3,#2
+ aese v2.16b,v1.16b
+ aesmc v2.16b,v2.16b
+ ld1 {v1.4s},[x2],#16
+ b.gt .Loop_enc
+
+ aese v2.16b,v0.16b
+ aesmc v2.16b,v2.16b
+ ld1 {v0.4s},[x2]
+ aese v2.16b,v1.16b
+ eor v2.16b,v2.16b,v0.16b
+
+ st1 {v2.16b},[x1]
+ ret
+.size aes_v8_encrypt,.-aes_v8_encrypt
+.globl aes_v8_decrypt
+.type aes_v8_decrypt,%function
+.align 5
+aes_v8_decrypt:
+ ldr w3,[x2,#240]
+ ld1 {v0.4s},[x2],#16
+ ld1 {v2.16b},[x0]
+ sub w3,w3,#2
+ ld1 {v1.4s},[x2],#16
+
+.Loop_dec:
+ aesd v2.16b,v0.16b
+ aesimc v2.16b,v2.16b
+ ld1 {v0.4s},[x2],#16
+ subs w3,w3,#2
+ aesd v2.16b,v1.16b
+ aesimc v2.16b,v2.16b
+ ld1 {v1.4s},[x2],#16
+ b.gt .Loop_dec
+
+ aesd v2.16b,v0.16b
+ aesimc v2.16b,v2.16b
+ ld1 {v0.4s},[x2]
+ aesd v2.16b,v1.16b
+ eor v2.16b,v2.16b,v0.16b
+
+ st1 {v2.16b},[x1]
+ ret
+.size aes_v8_decrypt,.-aes_v8_decrypt
+.globl aes_v8_cbc_encrypt
+.type aes_v8_cbc_encrypt,%function
+.align 5
+aes_v8_cbc_encrypt:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ subs x2,x2,#16
+ mov x8,#16
+ b.lo .Lcbc_abort
+ csel x8,xzr,x8,eq
+
+ cmp w5,#0 // en- or decrypting?
+ ldr w5,[x3,#240]
+ and x2,x2,#-16
+ ld1 {v6.16b},[x4]
+ ld1 {v0.16b},[x0],x8
+
+ ld1 {v16.4s,v17.4s},[x3] // load key schedule...
+ sub w5,w5,#6
+ add x7,x3,x5,lsl#4 // pointer to last 7 round keys
+ sub w5,w5,#2
+ ld1 {v18.4s,v19.4s},[x7],#32
+ ld1 {v20.4s,v21.4s},[x7],#32
+ ld1 {v22.4s,v23.4s},[x7],#32
+ ld1 {v7.4s},[x7]
+
+ add x7,x3,#32
+ mov w6,w5
+ b.eq .Lcbc_dec
+
+ cmp w5,#2
+ eor v0.16b,v0.16b,v6.16b
+ eor v5.16b,v16.16b,v7.16b
+ b.eq .Lcbc_enc128
+
+ ld1 {v2.4s,v3.4s},[x7]
+ add x7,x3,#16
+ add x6,x3,#16*4
+ add x12,x3,#16*5
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ add x14,x3,#16*6
+ add x3,x3,#16*7
+ b .Lenter_cbc_enc
+
+.align 4
+.Loop_cbc_enc:
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ st1 {v6.16b},[x1],#16
+.Lenter_cbc_enc:
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v2.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v16.4s},[x6]
+ cmp w5,#4
+ aese v0.16b,v3.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v17.4s},[x12]
+ b.eq .Lcbc_enc192
+
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v16.4s},[x14]
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v17.4s},[x3]
+ nop
+
+.Lcbc_enc192:
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ subs x2,x2,#16
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ csel x8,xzr,x8,eq
+ aese v0.16b,v18.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v19.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v16.16b},[x0],x8
+ aese v0.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ eor v16.16b,v16.16b,v5.16b
+ aese v0.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v17.4s},[x7] // re-pre-load rndkey[1]
+ aese v0.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v23.16b
+ eor v6.16b,v0.16b,v7.16b
+ b.hs .Loop_cbc_enc
+
+ st1 {v6.16b},[x1],#16
+ b .Lcbc_done
+
+.align 5
+.Lcbc_enc128:
+ ld1 {v2.4s,v3.4s},[x7]
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ b .Lenter_cbc_enc128
+.Loop_cbc_enc128:
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ st1 {v6.16b},[x1],#16
+.Lenter_cbc_enc128:
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ subs x2,x2,#16
+ aese v0.16b,v2.16b
+ aesmc v0.16b,v0.16b
+ csel x8,xzr,x8,eq
+ aese v0.16b,v3.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v18.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v19.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v16.16b},[x0],x8
+ aese v0.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ aese v0.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ eor v16.16b,v16.16b,v5.16b
+ aese v0.16b,v23.16b
+ eor v6.16b,v0.16b,v7.16b
+ b.hs .Loop_cbc_enc128
+
+ st1 {v6.16b},[x1],#16
+ b .Lcbc_done
+.align 5
+.Lcbc_dec:
+ ld1 {v24.16b},[x0],#16
+ subs x2,x2,#32 // bias
+ add w6,w5,#2
+ orr v3.16b,v0.16b,v0.16b
+ orr v1.16b,v0.16b,v0.16b
+ orr v27.16b,v24.16b,v24.16b
+ b.lo .Lcbc_dec_tail
+
+ orr v1.16b,v24.16b,v24.16b
+ ld1 {v24.16b},[x0],#16
+ orr v2.16b,v0.16b,v0.16b
+ orr v3.16b,v1.16b,v1.16b
+ orr v27.16b,v24.16b,v24.16b
+ cmp x2,#32
+ b.lo .Loop3x_cbc_dec
+
+ ld1 {v25.16b},[x0],#16
+ ld1 {v26.16b},[x0],#16
+ sub x2,x2,#32 // bias
+ mov w6,w5
+ orr v28.16b,v25.16b,v25.16b
+ orr v29.16b,v26.16b,v26.16b
+
+.Loop5x_cbc_dec:
+ aesd v0.16b,v16.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v16.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v16.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v16.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v16.16b
+ aesimc v26.16b,v26.16b
+ ld1 {v16.4s},[x7],#16
+ subs w6,w6,#2
+ aesd v0.16b,v17.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v17.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v17.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v17.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v17.16b
+ aesimc v26.16b,v26.16b
+ ld1 {v17.4s},[x7],#16
+ b.gt .Loop5x_cbc_dec
+
+ aesd v0.16b,v16.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v16.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v16.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v16.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v16.16b
+ aesimc v26.16b,v26.16b
+ cmp x2,#0x40 // because .Lcbc_tail4x
+ sub x2,x2,#0x50
+
+ aesd v0.16b,v17.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v17.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v17.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v17.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v17.16b
+ aesimc v26.16b,v26.16b
+ csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo
+ mov x7,x3
+
+ aesd v0.16b,v18.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v18.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v18.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v18.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v18.16b
+ aesimc v26.16b,v26.16b
+ add x0,x0,x6 // x0 is adjusted in such way that
+ // at exit from the loop v1.16b-v26.16b
+ // are loaded with last "words"
+ add x6,x2,#0x60 // because .Lcbc_tail4x
+
+ aesd v0.16b,v19.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v19.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v19.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v19.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v19.16b
+ aesimc v26.16b,v26.16b
+
+ aesd v0.16b,v20.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v20.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v20.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v20.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v20.16b
+ aesimc v26.16b,v26.16b
+
+ aesd v0.16b,v21.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v21.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v21.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v21.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v21.16b
+ aesimc v26.16b,v26.16b
+
+ aesd v0.16b,v22.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v22.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v22.16b
+ aesimc v24.16b,v24.16b
+ aesd v25.16b,v22.16b
+ aesimc v25.16b,v25.16b
+ aesd v26.16b,v22.16b
+ aesimc v26.16b,v26.16b
+
+ eor v4.16b,v6.16b,v7.16b
+ aesd v0.16b,v23.16b
+ eor v5.16b,v2.16b,v7.16b
+ ld1 {v2.16b},[x0],#16
+ aesd v1.16b,v23.16b
+ eor v17.16b,v3.16b,v7.16b
+ ld1 {v3.16b},[x0],#16
+ aesd v24.16b,v23.16b
+ eor v30.16b,v27.16b,v7.16b
+ ld1 {v27.16b},[x0],#16
+ aesd v25.16b,v23.16b
+ eor v31.16b,v28.16b,v7.16b
+ ld1 {v28.16b},[x0],#16
+ aesd v26.16b,v23.16b
+ orr v6.16b,v29.16b,v29.16b
+ ld1 {v29.16b},[x0],#16
+ cbz x6,.Lcbc_tail4x
+ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
+ eor v4.16b,v4.16b,v0.16b
+ orr v0.16b,v2.16b,v2.16b
+ eor v5.16b,v5.16b,v1.16b
+ orr v1.16b,v3.16b,v3.16b
+ eor v17.16b,v17.16b,v24.16b
+ orr v24.16b,v27.16b,v27.16b
+ eor v30.16b,v30.16b,v25.16b
+ orr v25.16b,v28.16b,v28.16b
+ eor v31.16b,v31.16b,v26.16b
+ st1 {v4.16b},[x1],#16
+ orr v26.16b,v29.16b,v29.16b
+ st1 {v5.16b},[x1],#16
+ mov w6,w5
+ st1 {v17.16b},[x1],#16
+ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
+ st1 {v30.16b},[x1],#16
+ st1 {v31.16b},[x1],#16
+ b.hs .Loop5x_cbc_dec
+
+ add x2,x2,#0x50
+ cbz x2,.Lcbc_done
+
+ add w6,w5,#2
+ subs x2,x2,#0x30
+ orr v0.16b,v27.16b,v27.16b
+ orr v2.16b,v27.16b,v27.16b
+ orr v1.16b,v28.16b,v28.16b
+ orr v3.16b,v28.16b,v28.16b
+ orr v24.16b,v29.16b,v29.16b
+ orr v27.16b,v29.16b,v29.16b
+ b.lo .Lcbc_dec_tail
+
+ b .Loop3x_cbc_dec
+
+.align 4
+.Lcbc_tail4x:
+ eor v5.16b,v4.16b,v1.16b
+ eor v17.16b,v17.16b,v24.16b
+ eor v30.16b,v30.16b,v25.16b
+ eor v31.16b,v31.16b,v26.16b
+ st1 {v5.16b},[x1],#16
+ st1 {v17.16b},[x1],#16
+ st1 {v30.16b},[x1],#16
+ st1 {v31.16b},[x1],#16
+
+ b .Lcbc_done
+.align 4
+.Loop3x_cbc_dec:
+ aesd v0.16b,v16.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v16.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v16.16b
+ aesimc v24.16b,v24.16b
+ ld1 {v16.4s},[x7],#16
+ subs w6,w6,#2
+ aesd v0.16b,v17.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v17.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v17.16b
+ aesimc v24.16b,v24.16b
+ ld1 {v17.4s},[x7],#16
+ b.gt .Loop3x_cbc_dec
+
+ aesd v0.16b,v16.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v16.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v16.16b
+ aesimc v24.16b,v24.16b
+ eor v4.16b,v6.16b,v7.16b
+ subs x2,x2,#0x30
+ eor v5.16b,v2.16b,v7.16b
+ csel x6,x2,x6,lo // x6, w6, is zero at this point
+ aesd v0.16b,v17.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v17.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v17.16b
+ aesimc v24.16b,v24.16b
+ eor v17.16b,v3.16b,v7.16b
+ add x0,x0,x6 // x0 is adjusted in such way that
+ // at exit from the loop v1.16b-v24.16b
+ // are loaded with last "words"
+ orr v6.16b,v27.16b,v27.16b
+ mov x7,x3
+ aesd v0.16b,v20.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v20.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v20.16b
+ aesimc v24.16b,v24.16b
+ ld1 {v2.16b},[x0],#16
+ aesd v0.16b,v21.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v21.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v21.16b
+ aesimc v24.16b,v24.16b
+ ld1 {v3.16b},[x0],#16
+ aesd v0.16b,v22.16b
+ aesimc v0.16b,v0.16b
+ aesd v1.16b,v22.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v22.16b
+ aesimc v24.16b,v24.16b
+ ld1 {v27.16b},[x0],#16
+ aesd v0.16b,v23.16b
+ aesd v1.16b,v23.16b
+ aesd v24.16b,v23.16b
+ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
+ add w6,w5,#2
+ eor v4.16b,v4.16b,v0.16b
+ eor v5.16b,v5.16b,v1.16b
+ eor v24.16b,v24.16b,v17.16b
+ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
+ st1 {v4.16b},[x1],#16
+ orr v0.16b,v2.16b,v2.16b
+ st1 {v5.16b},[x1],#16
+ orr v1.16b,v3.16b,v3.16b
+ st1 {v24.16b},[x1],#16
+ orr v24.16b,v27.16b,v27.16b
+ b.hs .Loop3x_cbc_dec
+
+ cmn x2,#0x30
+ b.eq .Lcbc_done
+ nop
+
+.Lcbc_dec_tail:
+ aesd v1.16b,v16.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v16.16b
+ aesimc v24.16b,v24.16b
+ ld1 {v16.4s},[x7],#16
+ subs w6,w6,#2
+ aesd v1.16b,v17.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v17.16b
+ aesimc v24.16b,v24.16b
+ ld1 {v17.4s},[x7],#16
+ b.gt .Lcbc_dec_tail
+
+ aesd v1.16b,v16.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v16.16b
+ aesimc v24.16b,v24.16b
+ aesd v1.16b,v17.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v17.16b
+ aesimc v24.16b,v24.16b
+ aesd v1.16b,v20.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v20.16b
+ aesimc v24.16b,v24.16b
+ cmn x2,#0x20
+ aesd v1.16b,v21.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v21.16b
+ aesimc v24.16b,v24.16b
+ eor v5.16b,v6.16b,v7.16b
+ aesd v1.16b,v22.16b
+ aesimc v1.16b,v1.16b
+ aesd v24.16b,v22.16b
+ aesimc v24.16b,v24.16b
+ eor v17.16b,v3.16b,v7.16b
+ aesd v1.16b,v23.16b
+ aesd v24.16b,v23.16b
+ b.eq .Lcbc_dec_one
+ eor v5.16b,v5.16b,v1.16b
+ eor v17.16b,v17.16b,v24.16b
+ orr v6.16b,v27.16b,v27.16b
+ st1 {v5.16b},[x1],#16
+ st1 {v17.16b},[x1],#16
+ b .Lcbc_done
+
+.Lcbc_dec_one:
+ eor v5.16b,v5.16b,v24.16b
+ orr v6.16b,v27.16b,v27.16b
+ st1 {v5.16b},[x1],#16
+
+.Lcbc_done:
+ st1 {v6.16b},[x4]
+.Lcbc_abort:
+ ldr x29,[sp],#16
+ ret
+.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt
+.globl aes_v8_ctr32_encrypt_blocks
+.type aes_v8_ctr32_encrypt_blocks,%function
+.align 5
+aes_v8_ctr32_encrypt_blocks:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+ ldr w5,[x3,#240]
+
+ ldr w8, [x4, #12]
+ ld1 {v0.4s},[x4]
+
+ ld1 {v16.4s,v17.4s},[x3] // load key schedule...
+ sub w5,w5,#4
+ mov x12,#16
+ cmp x2,#2
+ add x7,x3,x5,lsl#4 // pointer to last 5 round keys
+ sub w5,w5,#2
+ ld1 {v20.4s,v21.4s},[x7],#32
+ ld1 {v22.4s,v23.4s},[x7],#32
+ ld1 {v7.4s},[x7]
+ add x7,x3,#32
+ mov w6,w5
+ csel x12,xzr,x12,lo
+#ifndef __ARMEB__
+ rev w8, w8
+#endif
+ orr v1.16b,v0.16b,v0.16b
+ add w10, w8, #1
+ orr v18.16b,v0.16b,v0.16b
+ add w8, w8, #2
+ orr v6.16b,v0.16b,v0.16b
+ rev w10, w10
+ mov v1.s[3],w10
+ b.ls .Lctr32_tail
+ rev w12, w8
+ sub x2,x2,#3 // bias
+ mov v18.s[3],w12
+ cmp x2,#2
+ b.lo .Loop3x_ctr32
+
+ add w13,w8,#1
+ add w14,w8,#2
+ orr v24.16b,v0.16b,v0.16b
+ rev w13,w13
+ orr v25.16b,v0.16b,v0.16b
+ rev w14,w14
+ mov v24.s[3],w13
+ sub x2,x2,#2 // bias
+ mov v25.s[3],w14
+ add w8,w8,#2
+ b .Loop5x_ctr32
+
+.align 4
+.Loop5x_ctr32:
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v16.16b
+ aesmc v18.16b,v18.16b
+ aese v24.16b,v16.16b
+ aesmc v24.16b,v24.16b
+ aese v25.16b,v16.16b
+ aesmc v25.16b,v25.16b
+ ld1 {v16.4s},[x7],#16
+ subs w6,w6,#2
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v17.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v17.16b
+ aesmc v18.16b,v18.16b
+ aese v24.16b,v17.16b
+ aesmc v24.16b,v24.16b
+ aese v25.16b,v17.16b
+ aesmc v25.16b,v25.16b
+ ld1 {v17.4s},[x7],#16
+ b.gt .Loop5x_ctr32
+
+ mov x7,x3
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v16.16b
+ aesmc v18.16b,v18.16b
+ aese v24.16b,v16.16b
+ aesmc v24.16b,v24.16b
+ aese v25.16b,v16.16b
+ aesmc v25.16b,v25.16b
+ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
+
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v17.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v17.16b
+ aesmc v18.16b,v18.16b
+ aese v24.16b,v17.16b
+ aesmc v24.16b,v24.16b
+ aese v25.16b,v17.16b
+ aesmc v25.16b,v25.16b
+ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
+
+ aese v0.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ add w9,w8,#1
+ add w10,w8,#2
+ aese v1.16b,v20.16b
+ aesmc v1.16b,v1.16b
+ add w12,w8,#3
+ add w13,w8,#4
+ aese v18.16b,v20.16b
+ aesmc v18.16b,v18.16b
+ add w14,w8,#5
+ rev w9,w9
+ aese v24.16b,v20.16b
+ aesmc v24.16b,v24.16b
+ rev w10,w10
+ rev w12,w12
+ aese v25.16b,v20.16b
+ aesmc v25.16b,v25.16b
+ rev w13,w13
+ rev w14,w14
+
+ aese v0.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v21.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v21.16b
+ aesmc v18.16b,v18.16b
+ aese v24.16b,v21.16b
+ aesmc v24.16b,v24.16b
+ aese v25.16b,v21.16b
+ aesmc v25.16b,v25.16b
+
+ aese v0.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ ld1 {v2.16b},[x0],#16
+ aese v1.16b,v22.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v3.16b},[x0],#16
+ aese v18.16b,v22.16b
+ aesmc v18.16b,v18.16b
+ ld1 {v19.16b},[x0],#16
+ aese v24.16b,v22.16b
+ aesmc v24.16b,v24.16b
+ ld1 {v26.16b},[x0],#16
+ aese v25.16b,v22.16b
+ aesmc v25.16b,v25.16b
+ ld1 {v27.16b},[x0],#16
+
+ aese v0.16b,v23.16b
+ eor v2.16b,v2.16b,v7.16b
+ aese v1.16b,v23.16b
+ eor v3.16b,v3.16b,v7.16b
+ aese v18.16b,v23.16b
+ eor v19.16b,v19.16b,v7.16b
+ aese v24.16b,v23.16b
+ eor v26.16b,v26.16b,v7.16b
+ aese v25.16b,v23.16b
+ eor v27.16b,v27.16b,v7.16b
+
+ eor v2.16b,v2.16b,v0.16b
+ orr v0.16b,v6.16b,v6.16b
+ eor v3.16b,v3.16b,v1.16b
+ orr v1.16b,v6.16b,v6.16b
+ eor v19.16b,v19.16b,v18.16b
+ orr v18.16b,v6.16b,v6.16b
+ eor v26.16b,v26.16b,v24.16b
+ orr v24.16b,v6.16b,v6.16b
+ eor v27.16b,v27.16b,v25.16b
+ orr v25.16b,v6.16b,v6.16b
+
+ st1 {v2.16b},[x1],#16
+ mov v0.s[3],w9
+ st1 {v3.16b},[x1],#16
+ mov v1.s[3],w10
+ st1 {v19.16b},[x1],#16
+ mov v18.s[3],w12
+ st1 {v26.16b},[x1],#16
+ mov v24.s[3],w13
+ st1 {v27.16b},[x1],#16
+ mov v25.s[3],w14
+
+ mov w6,w5
+ cbz x2,.Lctr32_done
+
+ add w8,w8,#5
+ subs x2,x2,#5
+ b.hs .Loop5x_ctr32
+
+ add x2,x2,#5
+ sub w8,w8,#5
+
+ cmp x2,#2
+ mov x12,#16
+ csel x12,xzr,x12,lo
+ b.ls .Lctr32_tail
+
+ sub x2,x2,#3 // bias
+ add w8,w8,#3
+ b .Loop3x_ctr32
+
+.align 4
+.Loop3x_ctr32:
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v16.16b
+ aesmc v18.16b,v18.16b
+ ld1 {v16.4s},[x7],#16
+ subs w6,w6,#2
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v17.16b
+ aesmc v1.16b,v1.16b
+ aese v18.16b,v17.16b
+ aesmc v18.16b,v18.16b
+ ld1 {v17.4s},[x7],#16
+ b.gt .Loop3x_ctr32
+
+ aese v0.16b,v16.16b
+ aesmc v4.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v5.16b,v1.16b
+ ld1 {v2.16b},[x0],#16
+ orr v0.16b,v6.16b,v6.16b
+ aese v18.16b,v16.16b
+ aesmc v18.16b,v18.16b
+ ld1 {v3.16b},[x0],#16
+ orr v1.16b,v6.16b,v6.16b
+ aese v4.16b,v17.16b
+ aesmc v4.16b,v4.16b
+ aese v5.16b,v17.16b
+ aesmc v5.16b,v5.16b
+ ld1 {v19.16b},[x0],#16
+ mov x7,x3
+ aese v18.16b,v17.16b
+ aesmc v17.16b,v18.16b
+ orr v18.16b,v6.16b,v6.16b
+ add w9,w8,#1
+ aese v4.16b,v20.16b
+ aesmc v4.16b,v4.16b
+ aese v5.16b,v20.16b
+ aesmc v5.16b,v5.16b
+ eor v2.16b,v2.16b,v7.16b
+ add w10,w8,#2
+ aese v17.16b,v20.16b
+ aesmc v17.16b,v17.16b
+ eor v3.16b,v3.16b,v7.16b
+ add w8,w8,#3
+ aese v4.16b,v21.16b
+ aesmc v4.16b,v4.16b
+ aese v5.16b,v21.16b
+ aesmc v5.16b,v5.16b
+ eor v19.16b,v19.16b,v7.16b
+ rev w9,w9
+ aese v17.16b,v21.16b
+ aesmc v17.16b,v17.16b
+ mov v0.s[3], w9
+ rev w10,w10
+ aese v4.16b,v22.16b
+ aesmc v4.16b,v4.16b
+ aese v5.16b,v22.16b
+ aesmc v5.16b,v5.16b
+ mov v1.s[3], w10
+ rev w12,w8
+ aese v17.16b,v22.16b
+ aesmc v17.16b,v17.16b
+ mov v18.s[3], w12
+ subs x2,x2,#3
+ aese v4.16b,v23.16b
+ aese v5.16b,v23.16b
+ aese v17.16b,v23.16b
+
+ eor v2.16b,v2.16b,v4.16b
+ ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0]
+ st1 {v2.16b},[x1],#16
+ eor v3.16b,v3.16b,v5.16b
+ mov w6,w5
+ st1 {v3.16b},[x1],#16
+ eor v19.16b,v19.16b,v17.16b
+ ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1]
+ st1 {v19.16b},[x1],#16
+ b.hs .Loop3x_ctr32
+
+ adds x2,x2,#3
+ b.eq .Lctr32_done
+ cmp x2,#1
+ mov x12,#16
+ csel x12,xzr,x12,eq
+
+.Lctr32_tail:
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v16.4s},[x7],#16
+ subs w6,w6,#2
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v17.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v17.4s},[x7],#16
+ b.gt .Lctr32_tail
+
+ aese v0.16b,v16.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v16.16b
+ aesmc v1.16b,v1.16b
+ aese v0.16b,v17.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v17.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v2.16b},[x0],x12
+ aese v0.16b,v20.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v20.16b
+ aesmc v1.16b,v1.16b
+ ld1 {v3.16b},[x0]
+ aese v0.16b,v21.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v21.16b
+ aesmc v1.16b,v1.16b
+ eor v2.16b,v2.16b,v7.16b
+ aese v0.16b,v22.16b
+ aesmc v0.16b,v0.16b
+ aese v1.16b,v22.16b
+ aesmc v1.16b,v1.16b
+ eor v3.16b,v3.16b,v7.16b
+ aese v0.16b,v23.16b
+ aese v1.16b,v23.16b
+
+ cmp x2,#1
+ eor v2.16b,v2.16b,v0.16b
+ eor v3.16b,v3.16b,v1.16b
+ st1 {v2.16b},[x1],#16
+ b.eq .Lctr32_done
+ st1 {v3.16b},[x1]
+
+.Lctr32_done:
+ ldr x29,[sp],#16
+ ret
+.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks
--- /dev/null
+/* $OpenBSD: cryptox.c,v 1.1 2021/02/21 14:55:17 tobhe Exp $ */
+/*
+ * Copyright (c) 2003 Jason Wright
+ * Copyright (c) 2003, 2004 Theo de Raadt
+ * Copyright (c) 2010 Thordur I. Bjornsson
+ * Copyright (c) 2010 Mike Belopuhov
+ * Copyright (c) 2020 Tobias Heider
+ * All rights reserved.
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/atomic.h>
+#include <sys/malloc.h>
+#include <sys/pool.h>
+#include <sys/mbuf.h>
+#include <sys/smr.h>
+
+#include <crypto/cryptodev.h>
+#include <crypto/aes.h>
+#include <crypto/gmac.h>
+#include <crypto/xform.h>
+#include <crypto/cryptosoft.h>
+
+#include <machine/vfp.h>
+
+struct cryptox_aes_key {
+ uint32_t rd_key[4 *(AES_MAXROUNDS + 1)];
+ int rounds;
+};
+
+struct cryptox_session {
+ struct cryptox_aes_key ses_ekey;
+ struct cryptox_aes_key ses_dkey;
+ uint32_t ses_klen;
+ int ses_sid;
+ struct swcr_data *ses_swd;
+ SMR_LIST_ENTRY(cryptox_session)
+ ses_entries;
+ uint8_t *ses_buf;
+ size_t ses_buflen;
+ struct smr_entry ses_smr;
+};
+
+struct cryptox_softc {
+ int32_t sc_cid;
+ uint32_t sc_sid;
+ struct mutex sc_mtx;
+ SMR_LIST_HEAD(, cryptox_session)
+ sc_sessions;
+} *cryptox_sc;
+
+struct pool cryptoxpl;
+
+uint32_t cryptox_ops;
+
+extern int aes_v8_set_encrypt_key(const uint8_t *user_key, const int bits,
+ struct cryptox_aes_key *key);
+extern int aes_v8_set_decrypt_key(const uint8_t *user_key, const int bits,
+ struct cryptox_aes_key *key);
+extern void aes_v8_encrypt(const uint8_t *in, uint8_t *out,
+ const struct cryptox_aes_key *key);
+extern void aes_v8_decrypt(const uint8_t *in, uint8_t *out,
+ const struct cryptox_aes_key *key);
+extern void aes_v8_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length,
+ const struct cryptox_aes_key *key, uint8_t *ivec, const int enc);
+extern void aes_v8_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out,
+ size_t len, const struct cryptox_aes_key *key,
+ const uint8_t ivec[16]);
+
+void cryptox_setup(void);
+int cryptox_newsession(u_int32_t *, struct cryptoini *);
+int cryptox_freesession(u_int64_t);
+int cryptox_process(struct cryptop *);
+
+struct cryptox_session *
+ cryptox_get(uint32_t);
+void cryptox_free(struct cryptox_session *);
+void cryptox_free_smr(void *);
+
+int cryptox_swauth(struct cryptop *, struct cryptodesc *, struct swcr_data *,
+ caddr_t);
+
+int cryptox_encdec(struct cryptop *, struct cryptodesc *,
+ struct cryptox_session *);
+
+void
+cryptox_setup(void)
+{
+ int algs[CRYPTO_ALGORITHM_MAX + 1];
+
+ cryptox_sc = malloc(sizeof(*cryptox_sc), M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (cryptox_sc == NULL)
+ return;
+
+ bzero(algs, sizeof(algs));
+
+ /* Encryption algorithms. */
+ algs[CRYPTO_AES_CBC] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+ /* HMACs needed for IPsec, uses software crypto. */
+ algs[CRYPTO_MD5_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_SHA1_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_RIPEMD160_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_SHA2_256_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_SHA2_384_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+ algs[CRYPTO_SHA2_512_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+ /* IPsec Extended Sequence Numbers. */
+ algs[CRYPTO_ESN] = CRYPTO_ALG_FLAG_SUPPORTED;
+
+ cryptox_sc->sc_cid = crypto_get_driverid(CRYPTOCAP_F_MPSAFE);
+ if (cryptox_sc->sc_cid < 0) {
+ free(cryptox_sc, M_DEVBUF, sizeof(*cryptox_sc));
+ cryptox_sc = NULL;
+ return;
+ }
+
+ pool_init(&cryptoxpl, sizeof(struct cryptox_session), 16, IPL_VM, 0,
+ "cryptox", NULL);
+ pool_setlowat(&cryptoxpl, 2);
+
+ mtx_init(&cryptox_sc->sc_mtx, IPL_VM);
+
+ crypto_register(cryptox_sc->sc_cid, algs, cryptox_newsession,
+ cryptox_freesession, cryptox_process);
+}
+
+int
+cryptox_newsession(u_int32_t *sidp, struct cryptoini *cri)
+{
+ struct cryptox_session *ses = NULL;
+ struct cryptoini *c;
+ struct auth_hash *axf;
+ struct swcr_data *swd;
+ int i;
+
+ if (sidp == NULL || cri == NULL)
+ return (EINVAL);
+
+ ses = pool_get(&cryptoxpl, PR_NOWAIT | PR_ZERO);
+ if (!ses)
+ return (ENOMEM);
+ smr_init(&ses->ses_smr);
+
+ ses->ses_buf = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO);
+ if (ses->ses_buf != NULL)
+ ses->ses_buflen = PAGE_SIZE;
+
+ for (c = cri; c != NULL; c = c->cri_next) {
+ switch (c->cri_alg) {
+ case CRYPTO_AES_CBC:
+ ses->ses_klen = c->cri_klen / 8;
+ vfp_kernel_enter();
+ aes_v8_set_encrypt_key(c->cri_key, c->cri_klen, &ses->ses_ekey);
+ aes_v8_set_decrypt_key(c->cri_key, c->cri_klen, &ses->ses_dkey);
+ vfp_kernel_exit();
+ break;
+
+ case CRYPTO_MD5_HMAC:
+ axf = &auth_hash_hmac_md5_96;
+ goto authcommon;
+ case CRYPTO_SHA1_HMAC:
+ axf = &auth_hash_hmac_sha1_96;
+ goto authcommon;
+ case CRYPTO_RIPEMD160_HMAC:
+ axf = &auth_hash_hmac_ripemd_160_96;
+ goto authcommon;
+ case CRYPTO_SHA2_256_HMAC:
+ axf = &auth_hash_hmac_sha2_256_128;
+ goto authcommon;
+ case CRYPTO_SHA2_384_HMAC:
+ axf = &auth_hash_hmac_sha2_384_192;
+ goto authcommon;
+ case CRYPTO_SHA2_512_HMAC:
+ axf = &auth_hash_hmac_sha2_512_256;
+ authcommon:
+ swd = malloc(sizeof(struct swcr_data), M_CRYPTO_DATA,
+ M_NOWAIT|M_ZERO);
+ if (swd == NULL) {
+ cryptox_free(ses);
+ return (ENOMEM);
+ }
+ ses->ses_swd = swd;
+
+ swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ M_NOWAIT);
+ if (swd->sw_ictx == NULL) {
+ cryptox_free(ses);
+ return (ENOMEM);
+ }
+
+ swd->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA,
+ M_NOWAIT);
+ if (swd->sw_octx == NULL) {
+ cryptox_free(ses);
+ return (ENOMEM);
+ }
+
+ for (i = 0; i < c->cri_klen / 8; i++)
+ c->cri_key[i] ^= HMAC_IPAD_VAL;
+
+ axf->Init(swd->sw_ictx);
+ axf->Update(swd->sw_ictx, c->cri_key, c->cri_klen / 8);
+ axf->Update(swd->sw_ictx, hmac_ipad_buffer,
+ axf->blocksize - (c->cri_klen / 8));
+
+ for (i = 0; i < c->cri_klen / 8; i++)
+ c->cri_key[i] ^= (HMAC_IPAD_VAL ^
+ HMAC_OPAD_VAL);
+
+ axf->Init(swd->sw_octx);
+ axf->Update(swd->sw_octx, c->cri_key, c->cri_klen / 8);
+ axf->Update(swd->sw_octx, hmac_opad_buffer,
+ axf->blocksize - (c->cri_klen / 8));
+
+ for (i = 0; i < c->cri_klen / 8; i++)
+ c->cri_key[i] ^= HMAC_OPAD_VAL;
+
+ swd->sw_axf = axf;
+ swd->sw_alg = c->cri_alg;
+
+ break;
+
+ case CRYPTO_ESN:
+ /* nothing to do */
+ break;
+
+ default:
+ cryptox_free(ses);
+ return (EINVAL);
+ }
+ }
+
+ mtx_enter(&cryptox_sc->sc_mtx);
+ ses->ses_sid = ++cryptox_sc->sc_sid;
+ SMR_LIST_INSERT_HEAD_LOCKED(&cryptox_sc->sc_sessions, ses, ses_entries);
+ mtx_leave(&cryptox_sc->sc_mtx);
+
+ *sidp = ses->ses_sid;
+ return (0);
+}
+
+int
+cryptox_freesession(u_int64_t tid)
+{
+ struct cryptox_session *ses;
+ u_int32_t sid = (u_int32_t)tid;
+
+ mtx_enter(&cryptox_sc->sc_mtx);
+ SMR_LIST_FOREACH_LOCKED(ses, &cryptox_sc->sc_sessions, ses_entries) {
+ if (ses->ses_sid == sid) {
+ SMR_LIST_REMOVE_LOCKED(ses, ses_entries);
+ break;
+ }
+ }
+ mtx_leave(&cryptox_sc->sc_mtx);
+
+ if (ses == NULL)
+ return (EINVAL);
+
+ smr_call(&ses->ses_smr, cryptox_free_smr, ses);
+
+ return (0);
+}
+
+void
+cryptox_free(struct cryptox_session *ses)
+{
+ struct swcr_data *swd;
+ struct auth_hash *axf;
+
+ if (ses->ses_swd) {
+ swd = ses->ses_swd;
+ axf = swd->sw_axf;
+
+ if (swd->sw_ictx) {
+ explicit_bzero(swd->sw_ictx, axf->ctxsize);
+ free(swd->sw_ictx, M_CRYPTO_DATA, axf->ctxsize);
+ }
+ if (swd->sw_octx) {
+ explicit_bzero(swd->sw_octx, axf->ctxsize);
+ free(swd->sw_octx, M_CRYPTO_DATA, axf->ctxsize);
+ }
+ free(swd, M_CRYPTO_DATA, sizeof(*swd));
+ }
+
+ if (ses->ses_buf) {
+ explicit_bzero(ses->ses_buf, ses->ses_buflen);
+ free(ses->ses_buf, M_DEVBUF, ses->ses_buflen);
+ }
+
+ explicit_bzero(ses, sizeof (*ses));
+ pool_put(&cryptoxpl, ses);
+}
+
+void
+cryptox_free_smr(void *arg)
+{
+ struct cryptox_session *ses = arg;
+
+ cryptox_free(ses);
+}
+
+struct cryptox_session *
+cryptox_get(uint32_t sid)
+{
+ struct cryptox_session *ses = NULL;
+
+ SMR_ASSERT_CRITICAL();
+ SMR_LIST_FOREACH(ses, &cryptox_sc->sc_sessions, ses_entries) {
+ if (ses->ses_sid == sid)
+ break;
+ }
+ return (ses);
+}
+
+int
+cryptox_swauth(struct cryptop *crp, struct cryptodesc *crd,
+ struct swcr_data *sw, caddr_t buf)
+{
+ int type;
+
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ type = CRYPTO_BUF_MBUF;
+ else
+ type = CRYPTO_BUF_IOV;
+
+ return (swcr_authcompute(crp, crd, sw, buf, type));
+}
+
+int
+cryptox_encdec(struct cryptop *crp, struct cryptodesc *crd,
+ struct cryptox_session *ses)
+{
+ int err, ivlen, iskip, oskip, rlen;
+ uint8_t iv[EALG_MAX_BLOCK_LEN];
+ uint8_t *buf = ses->ses_buf;
+
+ rlen = err = iskip = oskip = 0;
+
+ if (crd->crd_len > ses->ses_buflen) {
+ if (buf != NULL) {
+ explicit_bzero(buf, ses->ses_buflen);
+ free(buf, M_DEVBUF, ses->ses_buflen);
+ }
+
+ ses->ses_buflen = 0;
+ rlen = roundup(crd->crd_len, EALG_MAX_BLOCK_LEN);
+ ses->ses_buf = buf = malloc(rlen, M_DEVBUF, M_NOWAIT |
+ M_ZERO);
+ if (buf == NULL)
+ return (ENOMEM);
+ ses->ses_buflen = rlen;
+ }
+
+ /* CBC uses 16 */
+ ivlen = 16;
+
+ /* Initialize the IV */
+ if (crd->crd_flags & CRD_F_ENCRYPT) {
+ if (crd->crd_flags & CRD_F_IV_EXPLICIT)
+ memcpy(iv, crd->crd_iv, ivlen);
+ else
+ arc4random_buf(iv, ivlen);
+
+ /* Do we need to write the IV */
+ if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0) {
+ if (crp->crp_flags & CRYPTO_F_IMBUF) {
+ if (m_copyback((struct mbuf *)crp->crp_buf,
+ crd->crd_inject, ivlen, iv, M_NOWAIT)) {
+ err = ENOMEM;
+ goto out;
+ }
+ } else
+ cuio_copyback((struct uio *)crp->crp_buf,
+ crd->crd_inject, ivlen, iv);
+ }
+ } else {
+ if (crd->crd_flags & CRD_F_IV_EXPLICIT)
+ memcpy(iv, crd->crd_iv, ivlen);
+ else {
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ m_copydata((struct mbuf *)crp->crp_buf,
+ crd->crd_inject, ivlen, iv);
+ else
+ cuio_copydata((struct uio *)crp->crp_buf,
+ crd->crd_inject, ivlen, iv);
+ }
+ }
+
+ /* Copy data to be processed to the buffer */
+ if (crp->crp_flags & CRYPTO_F_IMBUF)
+ m_copydata((struct mbuf *)crp->crp_buf, crd->crd_skip,
+ crd->crd_len, buf);
+ else
+ cuio_copydata((struct uio *)crp->crp_buf, crd->crd_skip,
+ crd->crd_len, buf);
+
+ /* Apply cipher */
+ vfp_kernel_enter();
+ switch (crd->crd_alg) {
+ case CRYPTO_AES_CBC:
+ if (crd->crd_flags & CRD_F_ENCRYPT)
+ aes_v8_cbc_encrypt(buf, buf, crd->crd_len, &ses->ses_ekey, iv, 1);
+ else
+ aes_v8_cbc_encrypt(buf, buf, crd->crd_len, &ses->ses_dkey, iv, 0);
+ break;
+ }
+ vfp_kernel_exit();
+
+ cryptox_ops++;
+
+ /* Copy back the result */
+ if (crp->crp_flags & CRYPTO_F_IMBUF) {
+ if (m_copyback((struct mbuf *)crp->crp_buf, crd->crd_skip,
+ crd->crd_len, buf, M_NOWAIT)) {
+ err = ENOMEM;
+ goto out;
+ }
+ } else
+ cuio_copyback((struct uio *)crp->crp_buf, crd->crd_skip,
+ crd->crd_len, buf);
+
+out:
+ explicit_bzero(buf, roundup(crd->crd_len, EALG_MAX_BLOCK_LEN));
+ return (err);
+}
+
+int
+cryptox_process(struct cryptop *crp)
+{
+ struct cryptox_session *ses;
+ struct cryptodesc *crd, *crde;
+ int err = 0;
+ int i;
+
+ if (crp == NULL || crp->crp_callback == NULL)
+ return (EINVAL);
+ if (crp->crp_ndesc < 1)
+ return (EINVAL);
+
+ smr_read_enter();
+ ses = cryptox_get(crp->crp_sid & 0xffffffff);
+ if (!ses) {
+ err = EINVAL;
+ goto out;
+ }
+
+ crde = NULL;
+ for (i = 0; i < crp->crp_ndesc; i++) {
+ crd = &crp->crp_desc[i];
+ switch (crd->crd_alg) {
+ case CRYPTO_AES_CBC:
+ err = cryptox_encdec(crp, crd, ses);
+ if (err != 0)
+ goto out;
+ break;
+ case CRYPTO_MD5_HMAC:
+ case CRYPTO_SHA1_HMAC:
+ case CRYPTO_RIPEMD160_HMAC:
+ case CRYPTO_SHA2_256_HMAC:
+ case CRYPTO_SHA2_384_HMAC:
+ case CRYPTO_SHA2_512_HMAC:
+ err = cryptox_swauth(crp, crd, ses->ses_swd,
+ crp->crp_buf);
+ if (err != 0)
+ goto out;
+ break;
+
+ default:
+ err = EINVAL;
+ goto out;
+ }
+ }
+
+out:
+ smr_read_leave();
+ crp->crp_etype = err;
+ crypto_done(crp);
+ return (err);
+}