From ba15a1a9e085e512fc4d901b11aea9dd4157e3e7 Mon Sep 17 00:00:00 2001 From: tobhe Date: Sun, 21 Feb 2021 14:55:16 +0000 Subject: [PATCH] Add cryptox(4), a driver for armv8 cryptographic extensions. The driver currently only supports AES-CBC mode but can easily be extended to other algorithms and modes. The aesv8-armx.S file was generated from the CRYPTOGAMS project. Asked to commit by and ok patrick@ --- sys/arch/arm64/arm64/aesv8-armx.S | 1143 +++++++++++++++++++++++++++++ sys/arch/arm64/arm64/autoconf.c | 12 +- sys/arch/arm64/arm64/cpu.c | 9 +- sys/arch/arm64/arm64/cryptox.c | 494 +++++++++++++ sys/arch/arm64/conf/files.arm64 | 5 +- 5 files changed, 1660 insertions(+), 3 deletions(-) create mode 100644 sys/arch/arm64/arm64/aesv8-armx.S create mode 100644 sys/arch/arm64/arm64/cryptox.c diff --git a/sys/arch/arm64/arm64/aesv8-armx.S b/sys/arch/arm64/arm64/aesv8-armx.S new file mode 100644 index 00000000000..d62cfcdd40f --- /dev/null +++ b/sys/arch/arm64/arm64/aesv8-armx.S @@ -0,0 +1,1143 @@ +/* $OpenBSD: aesv8-armx.S,v 1.1 2021/02/21 14:55:16 tobhe Exp $ */ +/* + * Copyright (c) 2006, CRYPTOGAMS by + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain copyright notices, + * this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials + * provided with the distribution. + * + * * Neither the name of the CRYPTOGAMS nor the names of its + * copyright holder and contributors may be used to endorse or + * promote products derived from this software without specific + * prior written permission. + * + * ALTERNATIVELY, provided that this notice is retained in full, this + * product may be distributed under the terms of the GNU General Public + * License (GPL), in which case the provisions of the GPL apply INSTEAD OF + * those given above. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Generated from CRYPTOGAMS aesv8-armx.pl. + * Changes to the original source code: + * + * - removed #include "arm_arch.h" + * - removed redundant __ARM_MAX_ARCH__ check + */ + +.arch armv8-a+crypto +.text +.align 5 +.Lrcon: +.long 0x01,0x01,0x01,0x01 +.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d // rotate-n-splat +.long 0x1b,0x1b,0x1b,0x1b + +.globl aes_v8_set_encrypt_key +.type aes_v8_set_encrypt_key,%function +.align 5 +aes_v8_set_encrypt_key: +.Lenc_key: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + mov x3,#-1 + cmp x0,#0 + b.eq .Lenc_key_abort + cmp x2,#0 + b.eq .Lenc_key_abort + mov x3,#-2 + cmp w1,#128 + b.lt .Lenc_key_abort + cmp w1,#256 + b.gt .Lenc_key_abort + tst w1,#0x3f + b.ne .Lenc_key_abort + + adr x3,.Lrcon + cmp w1,#192 + + eor v0.16b,v0.16b,v0.16b + ld1 {v3.16b},[x0],#16 + mov w1,#8 // reuse w1 + ld1 {v1.4s,v2.4s},[x3],#32 + + b.lt .Loop128 + b.eq .L192 + b .L256 + +.align 4 +.Loop128: + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + b.ne .Loop128 + + ld1 {v1.4s},[x3] + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + + tbl v6.16b,{v3.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v3.4s},[x2],#16 + aese v6.16b,v0.16b + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2] + add x2,x2,#0x50 + + mov w12,#10 + b .Ldone + +.align 4 +.L192: + ld1 {v4.8b},[x0],#8 + movi v6.16b,#8 // borrow v6.16b + st1 {v3.4s},[x2],#16 + sub v2.16b,v2.16b,v6.16b // adjust the mask + +.Loop192: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.8b},[x2],#8 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + + dup v5.4s,v3.s[3] + eor v5.16b,v5.16b,v4.16b + eor v6.16b,v6.16b,v1.16b + ext v4.16b,v0.16b,v4.16b,#12 + shl v1.16b,v1.16b,#1 + eor v4.16b,v4.16b,v5.16b + eor v3.16b,v3.16b,v6.16b + eor v4.16b,v4.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.ne .Loop192 + + mov w12,#12 + add x2,x2,#0x20 + b .Ldone + +.align 4 +.L256: + ld1 {v4.16b},[x0] + mov w1,#7 + mov w12,#14 + st1 {v3.4s},[x2],#16 + +.Loop256: + tbl v6.16b,{v4.16b},v2.16b + ext v5.16b,v0.16b,v3.16b,#12 + st1 {v4.4s},[x2],#16 + aese v6.16b,v0.16b + subs w1,w1,#1 + + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v3.16b,v3.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v6.16b,v6.16b,v1.16b + eor v3.16b,v3.16b,v5.16b + shl v1.16b,v1.16b,#1 + eor v3.16b,v3.16b,v6.16b + st1 {v3.4s},[x2],#16 + b.eq .Ldone + + dup v6.4s,v3.s[3] // just splat + ext v5.16b,v0.16b,v4.16b,#12 + aese v6.16b,v0.16b + + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + ext v5.16b,v0.16b,v5.16b,#12 + eor v4.16b,v4.16b,v5.16b + + eor v4.16b,v4.16b,v6.16b + b .Loop256 + +.Ldone: + str w12,[x2] + mov x3,#0 + +.Lenc_key_abort: + mov x0,x3 // return value + ldr x29,[sp],#16 + ret +.size aes_v8_set_encrypt_key,.-aes_v8_set_encrypt_key + +.globl aes_v8_set_decrypt_key +.type aes_v8_set_decrypt_key,%function +.align 5 +aes_v8_set_decrypt_key: +.inst 0xd503233f // paciasp + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + bl .Lenc_key + + cmp x0,#0 + b.ne .Ldec_key_abort + + sub x2,x2,#240 // restore original x2 + mov x4,#-16 + add x0,x2,x12,lsl#4 // end of key schedule + + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + +.Loop_imc: + ld1 {v0.4s},[x2] + ld1 {v1.4s},[x0] + aesimc v0.16b,v0.16b + aesimc v1.16b,v1.16b + st1 {v0.4s},[x0],x4 + st1 {v1.4s},[x2],#16 + cmp x0,x2 + b.hi .Loop_imc + + ld1 {v0.4s},[x2] + aesimc v0.16b,v0.16b + st1 {v0.4s},[x0] + + eor x0,x0,x0 // return value +.Ldec_key_abort: + ldp x29,x30,[sp],#16 +.inst 0xd50323bf // autiasp + ret +.size aes_v8_set_decrypt_key,.-aes_v8_set_decrypt_key +.globl aes_v8_encrypt +.type aes_v8_encrypt,%function +.align 5 +aes_v8_encrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_enc: + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aese v2.16b,v1.16b + aesmc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt .Loop_enc + + aese v2.16b,v0.16b + aesmc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aese v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_encrypt,.-aes_v8_encrypt +.globl aes_v8_decrypt +.type aes_v8_decrypt,%function +.align 5 +aes_v8_decrypt: + ldr w3,[x2,#240] + ld1 {v0.4s},[x2],#16 + ld1 {v2.16b},[x0] + sub w3,w3,#2 + ld1 {v1.4s},[x2],#16 + +.Loop_dec: + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2],#16 + subs w3,w3,#2 + aesd v2.16b,v1.16b + aesimc v2.16b,v2.16b + ld1 {v1.4s},[x2],#16 + b.gt .Loop_dec + + aesd v2.16b,v0.16b + aesimc v2.16b,v2.16b + ld1 {v0.4s},[x2] + aesd v2.16b,v1.16b + eor v2.16b,v2.16b,v0.16b + + st1 {v2.16b},[x1] + ret +.size aes_v8_decrypt,.-aes_v8_decrypt +.globl aes_v8_cbc_encrypt +.type aes_v8_cbc_encrypt,%function +.align 5 +aes_v8_cbc_encrypt: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + subs x2,x2,#16 + mov x8,#16 + b.lo .Lcbc_abort + csel x8,xzr,x8,eq + + cmp w5,#0 // en- or decrypting? + ldr w5,[x3,#240] + and x2,x2,#-16 + ld1 {v6.16b},[x4] + ld1 {v0.16b},[x0],x8 + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#6 + add x7,x3,x5,lsl#4 // pointer to last 7 round keys + sub w5,w5,#2 + ld1 {v18.4s,v19.4s},[x7],#32 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + + add x7,x3,#32 + mov w6,w5 + b.eq .Lcbc_dec + + cmp w5,#2 + eor v0.16b,v0.16b,v6.16b + eor v5.16b,v16.16b,v7.16b + b.eq .Lcbc_enc128 + + ld1 {v2.4s,v3.4s},[x7] + add x7,x3,#16 + add x6,x3,#16*4 + add x12,x3,#16*5 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + add x14,x3,#16*6 + add x3,x3,#16*7 + b .Lenter_cbc_enc + +.align 4 +.Loop_cbc_enc: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x6] + cmp w5,#4 + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x12] + b.eq .Lcbc_enc192 + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + ld1 {v16.4s},[x14] + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x3] + nop + +.Lcbc_enc192: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + ld1 {v17.4s},[x7] // re-pre-load rndkey[1] + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc + + st1 {v6.16b},[x1],#16 + b .Lcbc_done + +.align 5 +.Lcbc_enc128: + ld1 {v2.4s,v3.4s},[x7] + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + b .Lenter_cbc_enc128 +.Loop_cbc_enc128: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + st1 {v6.16b},[x1],#16 +.Lenter_cbc_enc128: + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + subs x2,x2,#16 + aese v0.16b,v2.16b + aesmc v0.16b,v0.16b + csel x8,xzr,x8,eq + aese v0.16b,v3.16b + aesmc v0.16b,v0.16b + aese v0.16b,v18.16b + aesmc v0.16b,v0.16b + aese v0.16b,v19.16b + aesmc v0.16b,v0.16b + ld1 {v16.16b},[x0],x8 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + eor v16.16b,v16.16b,v5.16b + aese v0.16b,v23.16b + eor v6.16b,v0.16b,v7.16b + b.hs .Loop_cbc_enc128 + + st1 {v6.16b},[x1],#16 + b .Lcbc_done +.align 5 +.Lcbc_dec: + ld1 {v24.16b},[x0],#16 + subs x2,x2,#32 // bias + add w6,w5,#2 + orr v3.16b,v0.16b,v0.16b + orr v1.16b,v0.16b,v0.16b + orr v27.16b,v24.16b,v24.16b + b.lo .Lcbc_dec_tail + + orr v1.16b,v24.16b,v24.16b + ld1 {v24.16b},[x0],#16 + orr v2.16b,v0.16b,v0.16b + orr v3.16b,v1.16b,v1.16b + orr v27.16b,v24.16b,v24.16b + cmp x2,#32 + b.lo .Loop3x_cbc_dec + + ld1 {v25.16b},[x0],#16 + ld1 {v26.16b},[x0],#16 + sub x2,x2,#32 // bias + mov w6,w5 + orr v28.16b,v25.16b,v25.16b + orr v29.16b,v26.16b,v26.16b + +.Loop5x_cbc_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop5x_cbc_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v16.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v16.16b + aesimc v26.16b,v26.16b + cmp x2,#0x40 // because .Lcbc_tail4x + sub x2,x2,#0x50 + + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v17.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v17.16b + aesimc v26.16b,v26.16b + csel x6,xzr,x2,gt // borrow x6, w6, "gt" is not typo + mov x7,x3 + + aesd v0.16b,v18.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v18.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v18.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v18.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v18.16b + aesimc v26.16b,v26.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v26.16b + // are loaded with last "words" + add x6,x2,#0x60 // because .Lcbc_tail4x + + aesd v0.16b,v19.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v19.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v19.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v19.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v19.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v20.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v20.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v21.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v21.16b + aesimc v26.16b,v26.16b + + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + aesd v25.16b,v22.16b + aesimc v25.16b,v25.16b + aesd v26.16b,v22.16b + aesimc v26.16b,v26.16b + + eor v4.16b,v6.16b,v7.16b + aesd v0.16b,v23.16b + eor v5.16b,v2.16b,v7.16b + ld1 {v2.16b},[x0],#16 + aesd v1.16b,v23.16b + eor v17.16b,v3.16b,v7.16b + ld1 {v3.16b},[x0],#16 + aesd v24.16b,v23.16b + eor v30.16b,v27.16b,v7.16b + ld1 {v27.16b},[x0],#16 + aesd v25.16b,v23.16b + eor v31.16b,v28.16b,v7.16b + ld1 {v28.16b},[x0],#16 + aesd v26.16b,v23.16b + orr v6.16b,v29.16b,v29.16b + ld1 {v29.16b},[x0],#16 + cbz x6,.Lcbc_tail4x + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + eor v4.16b,v4.16b,v0.16b + orr v0.16b,v2.16b,v2.16b + eor v5.16b,v5.16b,v1.16b + orr v1.16b,v3.16b,v3.16b + eor v17.16b,v17.16b,v24.16b + orr v24.16b,v27.16b,v27.16b + eor v30.16b,v30.16b,v25.16b + orr v25.16b,v28.16b,v28.16b + eor v31.16b,v31.16b,v26.16b + st1 {v4.16b},[x1],#16 + orr v26.16b,v29.16b,v29.16b + st1 {v5.16b},[x1],#16 + mov w6,w5 + st1 {v17.16b},[x1],#16 + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + b.hs .Loop5x_cbc_dec + + add x2,x2,#0x50 + cbz x2,.Lcbc_done + + add w6,w5,#2 + subs x2,x2,#0x30 + orr v0.16b,v27.16b,v27.16b + orr v2.16b,v27.16b,v27.16b + orr v1.16b,v28.16b,v28.16b + orr v3.16b,v28.16b,v28.16b + orr v24.16b,v29.16b,v29.16b + orr v27.16b,v29.16b,v29.16b + b.lo .Lcbc_dec_tail + + b .Loop3x_cbc_dec + +.align 4 +.Lcbc_tail4x: + eor v5.16b,v4.16b,v1.16b + eor v17.16b,v17.16b,v24.16b + eor v30.16b,v30.16b,v25.16b + eor v31.16b,v31.16b,v26.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + st1 {v30.16b},[x1],#16 + st1 {v31.16b},[x1],#16 + + b .Lcbc_done +.align 4 +.Loop3x_cbc_dec: + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_cbc_dec + + aesd v0.16b,v16.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + eor v4.16b,v6.16b,v7.16b + subs x2,x2,#0x30 + eor v5.16b,v2.16b,v7.16b + csel x6,x2,x6,lo // x6, w6, is zero at this point + aesd v0.16b,v17.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + eor v17.16b,v3.16b,v7.16b + add x0,x0,x6 // x0 is adjusted in such way that + // at exit from the loop v1.16b-v24.16b + // are loaded with last "words" + orr v6.16b,v27.16b,v27.16b + mov x7,x3 + aesd v0.16b,v20.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + ld1 {v2.16b},[x0],#16 + aesd v0.16b,v21.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + ld1 {v3.16b},[x0],#16 + aesd v0.16b,v22.16b + aesimc v0.16b,v0.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + ld1 {v27.16b},[x0],#16 + aesd v0.16b,v23.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + add w6,w5,#2 + eor v4.16b,v4.16b,v0.16b + eor v5.16b,v5.16b,v1.16b + eor v24.16b,v24.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v4.16b},[x1],#16 + orr v0.16b,v2.16b,v2.16b + st1 {v5.16b},[x1],#16 + orr v1.16b,v3.16b,v3.16b + st1 {v24.16b},[x1],#16 + orr v24.16b,v27.16b,v27.16b + b.hs .Loop3x_cbc_dec + + cmn x2,#0x30 + b.eq .Lcbc_done + nop + +.Lcbc_dec_tail: + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lcbc_dec_tail + + aesd v1.16b,v16.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v16.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v17.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v17.16b + aesimc v24.16b,v24.16b + aesd v1.16b,v20.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v20.16b + aesimc v24.16b,v24.16b + cmn x2,#0x20 + aesd v1.16b,v21.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v21.16b + aesimc v24.16b,v24.16b + eor v5.16b,v6.16b,v7.16b + aesd v1.16b,v22.16b + aesimc v1.16b,v1.16b + aesd v24.16b,v22.16b + aesimc v24.16b,v24.16b + eor v17.16b,v3.16b,v7.16b + aesd v1.16b,v23.16b + aesd v24.16b,v23.16b + b.eq .Lcbc_dec_one + eor v5.16b,v5.16b,v1.16b + eor v17.16b,v17.16b,v24.16b + orr v6.16b,v27.16b,v27.16b + st1 {v5.16b},[x1],#16 + st1 {v17.16b},[x1],#16 + b .Lcbc_done + +.Lcbc_dec_one: + eor v5.16b,v5.16b,v24.16b + orr v6.16b,v27.16b,v27.16b + st1 {v5.16b},[x1],#16 + +.Lcbc_done: + st1 {v6.16b},[x4] +.Lcbc_abort: + ldr x29,[sp],#16 + ret +.size aes_v8_cbc_encrypt,.-aes_v8_cbc_encrypt +.globl aes_v8_ctr32_encrypt_blocks +.type aes_v8_ctr32_encrypt_blocks,%function +.align 5 +aes_v8_ctr32_encrypt_blocks: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + ldr w5,[x3,#240] + + ldr w8, [x4, #12] + ld1 {v0.4s},[x4] + + ld1 {v16.4s,v17.4s},[x3] // load key schedule... + sub w5,w5,#4 + mov x12,#16 + cmp x2,#2 + add x7,x3,x5,lsl#4 // pointer to last 5 round keys + sub w5,w5,#2 + ld1 {v20.4s,v21.4s},[x7],#32 + ld1 {v22.4s,v23.4s},[x7],#32 + ld1 {v7.4s},[x7] + add x7,x3,#32 + mov w6,w5 + csel x12,xzr,x12,lo +#ifndef __ARMEB__ + rev w8, w8 +#endif + orr v1.16b,v0.16b,v0.16b + add w10, w8, #1 + orr v18.16b,v0.16b,v0.16b + add w8, w8, #2 + orr v6.16b,v0.16b,v0.16b + rev w10, w10 + mov v1.s[3],w10 + b.ls .Lctr32_tail + rev w12, w8 + sub x2,x2,#3 // bias + mov v18.s[3],w12 + cmp x2,#2 + b.lo .Loop3x_ctr32 + + add w13,w8,#1 + add w14,w8,#2 + orr v24.16b,v0.16b,v0.16b + rev w13,w13 + orr v25.16b,v0.16b,v0.16b + rev w14,w14 + mov v24.s[3],w13 + sub x2,x2,#2 // bias + mov v25.s[3],w14 + add w8,w8,#2 + b .Loop5x_ctr32 + +.align 4 +.Loop5x_ctr32: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop5x_ctr32 + + mov x7,x3 + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + aese v24.16b,v16.16b + aesmc v24.16b,v24.16b + aese v25.16b,v16.16b + aesmc v25.16b,v25.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + aese v24.16b,v17.16b + aesmc v24.16b,v24.16b + aese v25.16b,v17.16b + aesmc v25.16b,v25.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + add w9,w8,#1 + add w10,w8,#2 + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + add w12,w8,#3 + add w13,w8,#4 + aese v18.16b,v20.16b + aesmc v18.16b,v18.16b + add w14,w8,#5 + rev w9,w9 + aese v24.16b,v20.16b + aesmc v24.16b,v24.16b + rev w10,w10 + rev w12,w12 + aese v25.16b,v20.16b + aesmc v25.16b,v25.16b + rev w13,w13 + rev w14,w14 + + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + aese v18.16b,v21.16b + aesmc v18.16b,v18.16b + aese v24.16b,v21.16b + aesmc v24.16b,v24.16b + aese v25.16b,v21.16b + aesmc v25.16b,v25.16b + + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + ld1 {v2.16b},[x0],#16 + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0],#16 + aese v18.16b,v22.16b + aesmc v18.16b,v18.16b + ld1 {v19.16b},[x0],#16 + aese v24.16b,v22.16b + aesmc v24.16b,v24.16b + ld1 {v26.16b},[x0],#16 + aese v25.16b,v22.16b + aesmc v25.16b,v25.16b + ld1 {v27.16b},[x0],#16 + + aese v0.16b,v23.16b + eor v2.16b,v2.16b,v7.16b + aese v1.16b,v23.16b + eor v3.16b,v3.16b,v7.16b + aese v18.16b,v23.16b + eor v19.16b,v19.16b,v7.16b + aese v24.16b,v23.16b + eor v26.16b,v26.16b,v7.16b + aese v25.16b,v23.16b + eor v27.16b,v27.16b,v7.16b + + eor v2.16b,v2.16b,v0.16b + orr v0.16b,v6.16b,v6.16b + eor v3.16b,v3.16b,v1.16b + orr v1.16b,v6.16b,v6.16b + eor v19.16b,v19.16b,v18.16b + orr v18.16b,v6.16b,v6.16b + eor v26.16b,v26.16b,v24.16b + orr v24.16b,v6.16b,v6.16b + eor v27.16b,v27.16b,v25.16b + orr v25.16b,v6.16b,v6.16b + + st1 {v2.16b},[x1],#16 + mov v0.s[3],w9 + st1 {v3.16b},[x1],#16 + mov v1.s[3],w10 + st1 {v19.16b},[x1],#16 + mov v18.s[3],w12 + st1 {v26.16b},[x1],#16 + mov v24.s[3],w13 + st1 {v27.16b},[x1],#16 + mov v25.s[3],w14 + + mov w6,w5 + cbz x2,.Lctr32_done + + add w8,w8,#5 + subs x2,x2,#5 + b.hs .Loop5x_ctr32 + + add x2,x2,#5 + sub w8,w8,#5 + + cmp x2,#2 + mov x12,#16 + csel x12,xzr,x12,lo + b.ls .Lctr32_tail + + sub x2,x2,#3 // bias + add w8,w8,#3 + b .Loop3x_ctr32 + +.align 4 +.Loop3x_ctr32: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + aese v18.16b,v17.16b + aesmc v18.16b,v18.16b + ld1 {v17.4s},[x7],#16 + b.gt .Loop3x_ctr32 + + aese v0.16b,v16.16b + aesmc v4.16b,v0.16b + aese v1.16b,v16.16b + aesmc v5.16b,v1.16b + ld1 {v2.16b},[x0],#16 + orr v0.16b,v6.16b,v6.16b + aese v18.16b,v16.16b + aesmc v18.16b,v18.16b + ld1 {v3.16b},[x0],#16 + orr v1.16b,v6.16b,v6.16b + aese v4.16b,v17.16b + aesmc v4.16b,v4.16b + aese v5.16b,v17.16b + aesmc v5.16b,v5.16b + ld1 {v19.16b},[x0],#16 + mov x7,x3 + aese v18.16b,v17.16b + aesmc v17.16b,v18.16b + orr v18.16b,v6.16b,v6.16b + add w9,w8,#1 + aese v4.16b,v20.16b + aesmc v4.16b,v4.16b + aese v5.16b,v20.16b + aesmc v5.16b,v5.16b + eor v2.16b,v2.16b,v7.16b + add w10,w8,#2 + aese v17.16b,v20.16b + aesmc v17.16b,v17.16b + eor v3.16b,v3.16b,v7.16b + add w8,w8,#3 + aese v4.16b,v21.16b + aesmc v4.16b,v4.16b + aese v5.16b,v21.16b + aesmc v5.16b,v5.16b + eor v19.16b,v19.16b,v7.16b + rev w9,w9 + aese v17.16b,v21.16b + aesmc v17.16b,v17.16b + mov v0.s[3], w9 + rev w10,w10 + aese v4.16b,v22.16b + aesmc v4.16b,v4.16b + aese v5.16b,v22.16b + aesmc v5.16b,v5.16b + mov v1.s[3], w10 + rev w12,w8 + aese v17.16b,v22.16b + aesmc v17.16b,v17.16b + mov v18.s[3], w12 + subs x2,x2,#3 + aese v4.16b,v23.16b + aese v5.16b,v23.16b + aese v17.16b,v23.16b + + eor v2.16b,v2.16b,v4.16b + ld1 {v16.4s},[x7],#16 // re-pre-load rndkey[0] + st1 {v2.16b},[x1],#16 + eor v3.16b,v3.16b,v5.16b + mov w6,w5 + st1 {v3.16b},[x1],#16 + eor v19.16b,v19.16b,v17.16b + ld1 {v17.4s},[x7],#16 // re-pre-load rndkey[1] + st1 {v19.16b},[x1],#16 + b.hs .Loop3x_ctr32 + + adds x2,x2,#3 + b.eq .Lctr32_done + cmp x2,#1 + mov x12,#16 + csel x12,xzr,x12,eq + +.Lctr32_tail: + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + ld1 {v16.4s},[x7],#16 + subs w6,w6,#2 + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v17.4s},[x7],#16 + b.gt .Lctr32_tail + + aese v0.16b,v16.16b + aesmc v0.16b,v0.16b + aese v1.16b,v16.16b + aesmc v1.16b,v1.16b + aese v0.16b,v17.16b + aesmc v0.16b,v0.16b + aese v1.16b,v17.16b + aesmc v1.16b,v1.16b + ld1 {v2.16b},[x0],x12 + aese v0.16b,v20.16b + aesmc v0.16b,v0.16b + aese v1.16b,v20.16b + aesmc v1.16b,v1.16b + ld1 {v3.16b},[x0] + aese v0.16b,v21.16b + aesmc v0.16b,v0.16b + aese v1.16b,v21.16b + aesmc v1.16b,v1.16b + eor v2.16b,v2.16b,v7.16b + aese v0.16b,v22.16b + aesmc v0.16b,v0.16b + aese v1.16b,v22.16b + aesmc v1.16b,v1.16b + eor v3.16b,v3.16b,v7.16b + aese v0.16b,v23.16b + aese v1.16b,v23.16b + + cmp x2,#1 + eor v2.16b,v2.16b,v0.16b + eor v3.16b,v3.16b,v1.16b + st1 {v2.16b},[x1],#16 + b.eq .Lctr32_done + st1 {v3.16b},[x1] + +.Lctr32_done: + ldr x29,[sp],#16 + ret +.size aes_v8_ctr32_encrypt_blocks,.-aes_v8_ctr32_encrypt_blocks diff --git a/sys/arch/arm64/arm64/autoconf.c b/sys/arch/arm64/arm64/autoconf.c index 18be08a483b..bda3cb3f6b0 100644 --- a/sys/arch/arm64/arm64/autoconf.c +++ b/sys/arch/arm64/arm64/autoconf.c @@ -1,4 +1,4 @@ -/* $OpenBSD: autoconf.c,v 1.11 2020/11/06 13:32:38 patrick Exp $ */ +/* $OpenBSD: autoconf.c,v 1.12 2021/02/21 14:55:16 tobhe Exp $ */ /* * Copyright (c) 2009 Miodrag Vallat. * @@ -30,6 +30,11 @@ #include #endif +#ifdef CRYPTO +void cryptox_setup(void); +extern int arm64_has_aes; +#endif + #include extern void dumpconf(void); @@ -56,6 +61,11 @@ cpu_configure(void) unmap_startup(); +#ifdef CRYPTO + if (arm64_has_aes) + cryptox_setup(); +#endif + cold = 0; spl0(); } diff --git a/sys/arch/arm64/arm64/cpu.c b/sys/arch/arm64/arm64/cpu.c index dc65e10a011..21f841a7e25 100644 --- a/sys/arch/arm64/arm64/cpu.c +++ b/sys/arch/arm64/arm64/cpu.c @@ -1,4 +1,4 @@ -/* $OpenBSD: cpu.c,v 1.47 2021/02/10 20:51:27 kettenis Exp $ */ +/* $OpenBSD: cpu.c,v 1.48 2021/02/21 14:55:16 tobhe Exp $ */ /* * Copyright (c) 2016 Dale Rahn @@ -153,6 +153,10 @@ const struct implementers { char cpu_model[64]; int cpu_node; +#ifdef CRYPTO +int arm64_has_aes; +#endif + struct cpu_info *cpu_info_list = &cpu_info_primary; int cpu_match(struct device *, void *, void *); @@ -378,6 +382,9 @@ cpu_identify(struct cpu_info *ci) if (ID_AA64ISAR0_AES(id) >= ID_AA64ISAR0_AES_BASE) { printf("%sAES", sep); sep = ","; +#ifdef CRYPTO + arm64_has_aes = 1; +#endif } if (ID_AA64ISAR0_AES(id) >= ID_AA64ISAR0_AES_PMULL) printf("+PMULL"); diff --git a/sys/arch/arm64/arm64/cryptox.c b/sys/arch/arm64/arm64/cryptox.c new file mode 100644 index 00000000000..5542a485f16 --- /dev/null +++ b/sys/arch/arm64/arm64/cryptox.c @@ -0,0 +1,494 @@ +/* $OpenBSD: cryptox.c,v 1.1 2021/02/21 14:55:17 tobhe Exp $ */ +/* + * Copyright (c) 2003 Jason Wright + * Copyright (c) 2003, 2004 Theo de Raadt + * Copyright (c) 2010 Thordur I. Bjornsson + * Copyright (c) 2010 Mike Belopuhov + * Copyright (c) 2020 Tobias Heider + * All rights reserved. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +struct cryptox_aes_key { + uint32_t rd_key[4 *(AES_MAXROUNDS + 1)]; + int rounds; +}; + +struct cryptox_session { + struct cryptox_aes_key ses_ekey; + struct cryptox_aes_key ses_dkey; + uint32_t ses_klen; + int ses_sid; + struct swcr_data *ses_swd; + SMR_LIST_ENTRY(cryptox_session) + ses_entries; + uint8_t *ses_buf; + size_t ses_buflen; + struct smr_entry ses_smr; +}; + +struct cryptox_softc { + int32_t sc_cid; + uint32_t sc_sid; + struct mutex sc_mtx; + SMR_LIST_HEAD(, cryptox_session) + sc_sessions; +} *cryptox_sc; + +struct pool cryptoxpl; + +uint32_t cryptox_ops; + +extern int aes_v8_set_encrypt_key(const uint8_t *user_key, const int bits, + struct cryptox_aes_key *key); +extern int aes_v8_set_decrypt_key(const uint8_t *user_key, const int bits, + struct cryptox_aes_key *key); +extern void aes_v8_encrypt(const uint8_t *in, uint8_t *out, + const struct cryptox_aes_key *key); +extern void aes_v8_decrypt(const uint8_t *in, uint8_t *out, + const struct cryptox_aes_key *key); +extern void aes_v8_cbc_encrypt(const uint8_t *in, uint8_t *out, size_t length, + const struct cryptox_aes_key *key, uint8_t *ivec, const int enc); +extern void aes_v8_ctr32_encrypt_blocks(const uint8_t *in, uint8_t *out, + size_t len, const struct cryptox_aes_key *key, + const uint8_t ivec[16]); + +void cryptox_setup(void); +int cryptox_newsession(u_int32_t *, struct cryptoini *); +int cryptox_freesession(u_int64_t); +int cryptox_process(struct cryptop *); + +struct cryptox_session * + cryptox_get(uint32_t); +void cryptox_free(struct cryptox_session *); +void cryptox_free_smr(void *); + +int cryptox_swauth(struct cryptop *, struct cryptodesc *, struct swcr_data *, + caddr_t); + +int cryptox_encdec(struct cryptop *, struct cryptodesc *, + struct cryptox_session *); + +void +cryptox_setup(void) +{ + int algs[CRYPTO_ALGORITHM_MAX + 1]; + + cryptox_sc = malloc(sizeof(*cryptox_sc), M_DEVBUF, M_NOWAIT|M_ZERO); + if (cryptox_sc == NULL) + return; + + bzero(algs, sizeof(algs)); + + /* Encryption algorithms. */ + algs[CRYPTO_AES_CBC] = CRYPTO_ALG_FLAG_SUPPORTED; + + /* HMACs needed for IPsec, uses software crypto. */ + algs[CRYPTO_MD5_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; + algs[CRYPTO_SHA1_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; + algs[CRYPTO_RIPEMD160_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; + algs[CRYPTO_SHA2_256_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; + algs[CRYPTO_SHA2_384_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; + algs[CRYPTO_SHA2_512_HMAC] = CRYPTO_ALG_FLAG_SUPPORTED; + + /* IPsec Extended Sequence Numbers. */ + algs[CRYPTO_ESN] = CRYPTO_ALG_FLAG_SUPPORTED; + + cryptox_sc->sc_cid = crypto_get_driverid(CRYPTOCAP_F_MPSAFE); + if (cryptox_sc->sc_cid < 0) { + free(cryptox_sc, M_DEVBUF, sizeof(*cryptox_sc)); + cryptox_sc = NULL; + return; + } + + pool_init(&cryptoxpl, sizeof(struct cryptox_session), 16, IPL_VM, 0, + "cryptox", NULL); + pool_setlowat(&cryptoxpl, 2); + + mtx_init(&cryptox_sc->sc_mtx, IPL_VM); + + crypto_register(cryptox_sc->sc_cid, algs, cryptox_newsession, + cryptox_freesession, cryptox_process); +} + +int +cryptox_newsession(u_int32_t *sidp, struct cryptoini *cri) +{ + struct cryptox_session *ses = NULL; + struct cryptoini *c; + struct auth_hash *axf; + struct swcr_data *swd; + int i; + + if (sidp == NULL || cri == NULL) + return (EINVAL); + + ses = pool_get(&cryptoxpl, PR_NOWAIT | PR_ZERO); + if (!ses) + return (ENOMEM); + smr_init(&ses->ses_smr); + + ses->ses_buf = malloc(PAGE_SIZE, M_DEVBUF, M_NOWAIT|M_ZERO); + if (ses->ses_buf != NULL) + ses->ses_buflen = PAGE_SIZE; + + for (c = cri; c != NULL; c = c->cri_next) { + switch (c->cri_alg) { + case CRYPTO_AES_CBC: + ses->ses_klen = c->cri_klen / 8; + vfp_kernel_enter(); + aes_v8_set_encrypt_key(c->cri_key, c->cri_klen, &ses->ses_ekey); + aes_v8_set_decrypt_key(c->cri_key, c->cri_klen, &ses->ses_dkey); + vfp_kernel_exit(); + break; + + case CRYPTO_MD5_HMAC: + axf = &auth_hash_hmac_md5_96; + goto authcommon; + case CRYPTO_SHA1_HMAC: + axf = &auth_hash_hmac_sha1_96; + goto authcommon; + case CRYPTO_RIPEMD160_HMAC: + axf = &auth_hash_hmac_ripemd_160_96; + goto authcommon; + case CRYPTO_SHA2_256_HMAC: + axf = &auth_hash_hmac_sha2_256_128; + goto authcommon; + case CRYPTO_SHA2_384_HMAC: + axf = &auth_hash_hmac_sha2_384_192; + goto authcommon; + case CRYPTO_SHA2_512_HMAC: + axf = &auth_hash_hmac_sha2_512_256; + authcommon: + swd = malloc(sizeof(struct swcr_data), M_CRYPTO_DATA, + M_NOWAIT|M_ZERO); + if (swd == NULL) { + cryptox_free(ses); + return (ENOMEM); + } + ses->ses_swd = swd; + + swd->sw_ictx = malloc(axf->ctxsize, M_CRYPTO_DATA, + M_NOWAIT); + if (swd->sw_ictx == NULL) { + cryptox_free(ses); + return (ENOMEM); + } + + swd->sw_octx = malloc(axf->ctxsize, M_CRYPTO_DATA, + M_NOWAIT); + if (swd->sw_octx == NULL) { + cryptox_free(ses); + return (ENOMEM); + } + + for (i = 0; i < c->cri_klen / 8; i++) + c->cri_key[i] ^= HMAC_IPAD_VAL; + + axf->Init(swd->sw_ictx); + axf->Update(swd->sw_ictx, c->cri_key, c->cri_klen / 8); + axf->Update(swd->sw_ictx, hmac_ipad_buffer, + axf->blocksize - (c->cri_klen / 8)); + + for (i = 0; i < c->cri_klen / 8; i++) + c->cri_key[i] ^= (HMAC_IPAD_VAL ^ + HMAC_OPAD_VAL); + + axf->Init(swd->sw_octx); + axf->Update(swd->sw_octx, c->cri_key, c->cri_klen / 8); + axf->Update(swd->sw_octx, hmac_opad_buffer, + axf->blocksize - (c->cri_klen / 8)); + + for (i = 0; i < c->cri_klen / 8; i++) + c->cri_key[i] ^= HMAC_OPAD_VAL; + + swd->sw_axf = axf; + swd->sw_alg = c->cri_alg; + + break; + + case CRYPTO_ESN: + /* nothing to do */ + break; + + default: + cryptox_free(ses); + return (EINVAL); + } + } + + mtx_enter(&cryptox_sc->sc_mtx); + ses->ses_sid = ++cryptox_sc->sc_sid; + SMR_LIST_INSERT_HEAD_LOCKED(&cryptox_sc->sc_sessions, ses, ses_entries); + mtx_leave(&cryptox_sc->sc_mtx); + + *sidp = ses->ses_sid; + return (0); +} + +int +cryptox_freesession(u_int64_t tid) +{ + struct cryptox_session *ses; + u_int32_t sid = (u_int32_t)tid; + + mtx_enter(&cryptox_sc->sc_mtx); + SMR_LIST_FOREACH_LOCKED(ses, &cryptox_sc->sc_sessions, ses_entries) { + if (ses->ses_sid == sid) { + SMR_LIST_REMOVE_LOCKED(ses, ses_entries); + break; + } + } + mtx_leave(&cryptox_sc->sc_mtx); + + if (ses == NULL) + return (EINVAL); + + smr_call(&ses->ses_smr, cryptox_free_smr, ses); + + return (0); +} + +void +cryptox_free(struct cryptox_session *ses) +{ + struct swcr_data *swd; + struct auth_hash *axf; + + if (ses->ses_swd) { + swd = ses->ses_swd; + axf = swd->sw_axf; + + if (swd->sw_ictx) { + explicit_bzero(swd->sw_ictx, axf->ctxsize); + free(swd->sw_ictx, M_CRYPTO_DATA, axf->ctxsize); + } + if (swd->sw_octx) { + explicit_bzero(swd->sw_octx, axf->ctxsize); + free(swd->sw_octx, M_CRYPTO_DATA, axf->ctxsize); + } + free(swd, M_CRYPTO_DATA, sizeof(*swd)); + } + + if (ses->ses_buf) { + explicit_bzero(ses->ses_buf, ses->ses_buflen); + free(ses->ses_buf, M_DEVBUF, ses->ses_buflen); + } + + explicit_bzero(ses, sizeof (*ses)); + pool_put(&cryptoxpl, ses); +} + +void +cryptox_free_smr(void *arg) +{ + struct cryptox_session *ses = arg; + + cryptox_free(ses); +} + +struct cryptox_session * +cryptox_get(uint32_t sid) +{ + struct cryptox_session *ses = NULL; + + SMR_ASSERT_CRITICAL(); + SMR_LIST_FOREACH(ses, &cryptox_sc->sc_sessions, ses_entries) { + if (ses->ses_sid == sid) + break; + } + return (ses); +} + +int +cryptox_swauth(struct cryptop *crp, struct cryptodesc *crd, + struct swcr_data *sw, caddr_t buf) +{ + int type; + + if (crp->crp_flags & CRYPTO_F_IMBUF) + type = CRYPTO_BUF_MBUF; + else + type = CRYPTO_BUF_IOV; + + return (swcr_authcompute(crp, crd, sw, buf, type)); +} + +int +cryptox_encdec(struct cryptop *crp, struct cryptodesc *crd, + struct cryptox_session *ses) +{ + int err, ivlen, iskip, oskip, rlen; + uint8_t iv[EALG_MAX_BLOCK_LEN]; + uint8_t *buf = ses->ses_buf; + + rlen = err = iskip = oskip = 0; + + if (crd->crd_len > ses->ses_buflen) { + if (buf != NULL) { + explicit_bzero(buf, ses->ses_buflen); + free(buf, M_DEVBUF, ses->ses_buflen); + } + + ses->ses_buflen = 0; + rlen = roundup(crd->crd_len, EALG_MAX_BLOCK_LEN); + ses->ses_buf = buf = malloc(rlen, M_DEVBUF, M_NOWAIT | + M_ZERO); + if (buf == NULL) + return (ENOMEM); + ses->ses_buflen = rlen; + } + + /* CBC uses 16 */ + ivlen = 16; + + /* Initialize the IV */ + if (crd->crd_flags & CRD_F_ENCRYPT) { + if (crd->crd_flags & CRD_F_IV_EXPLICIT) + memcpy(iv, crd->crd_iv, ivlen); + else + arc4random_buf(iv, ivlen); + + /* Do we need to write the IV */ + if ((crd->crd_flags & CRD_F_IV_PRESENT) == 0) { + if (crp->crp_flags & CRYPTO_F_IMBUF) { + if (m_copyback((struct mbuf *)crp->crp_buf, + crd->crd_inject, ivlen, iv, M_NOWAIT)) { + err = ENOMEM; + goto out; + } + } else + cuio_copyback((struct uio *)crp->crp_buf, + crd->crd_inject, ivlen, iv); + } + } else { + if (crd->crd_flags & CRD_F_IV_EXPLICIT) + memcpy(iv, crd->crd_iv, ivlen); + else { + if (crp->crp_flags & CRYPTO_F_IMBUF) + m_copydata((struct mbuf *)crp->crp_buf, + crd->crd_inject, ivlen, iv); + else + cuio_copydata((struct uio *)crp->crp_buf, + crd->crd_inject, ivlen, iv); + } + } + + /* Copy data to be processed to the buffer */ + if (crp->crp_flags & CRYPTO_F_IMBUF) + m_copydata((struct mbuf *)crp->crp_buf, crd->crd_skip, + crd->crd_len, buf); + else + cuio_copydata((struct uio *)crp->crp_buf, crd->crd_skip, + crd->crd_len, buf); + + /* Apply cipher */ + vfp_kernel_enter(); + switch (crd->crd_alg) { + case CRYPTO_AES_CBC: + if (crd->crd_flags & CRD_F_ENCRYPT) + aes_v8_cbc_encrypt(buf, buf, crd->crd_len, &ses->ses_ekey, iv, 1); + else + aes_v8_cbc_encrypt(buf, buf, crd->crd_len, &ses->ses_dkey, iv, 0); + break; + } + vfp_kernel_exit(); + + cryptox_ops++; + + /* Copy back the result */ + if (crp->crp_flags & CRYPTO_F_IMBUF) { + if (m_copyback((struct mbuf *)crp->crp_buf, crd->crd_skip, + crd->crd_len, buf, M_NOWAIT)) { + err = ENOMEM; + goto out; + } + } else + cuio_copyback((struct uio *)crp->crp_buf, crd->crd_skip, + crd->crd_len, buf); + +out: + explicit_bzero(buf, roundup(crd->crd_len, EALG_MAX_BLOCK_LEN)); + return (err); +} + +int +cryptox_process(struct cryptop *crp) +{ + struct cryptox_session *ses; + struct cryptodesc *crd, *crde; + int err = 0; + int i; + + if (crp == NULL || crp->crp_callback == NULL) + return (EINVAL); + if (crp->crp_ndesc < 1) + return (EINVAL); + + smr_read_enter(); + ses = cryptox_get(crp->crp_sid & 0xffffffff); + if (!ses) { + err = EINVAL; + goto out; + } + + crde = NULL; + for (i = 0; i < crp->crp_ndesc; i++) { + crd = &crp->crp_desc[i]; + switch (crd->crd_alg) { + case CRYPTO_AES_CBC: + err = cryptox_encdec(crp, crd, ses); + if (err != 0) + goto out; + break; + case CRYPTO_MD5_HMAC: + case CRYPTO_SHA1_HMAC: + case CRYPTO_RIPEMD160_HMAC: + case CRYPTO_SHA2_256_HMAC: + case CRYPTO_SHA2_384_HMAC: + case CRYPTO_SHA2_512_HMAC: + err = cryptox_swauth(crp, crd, ses->ses_swd, + crp->crp_buf); + if (err != 0) + goto out; + break; + + default: + err = EINVAL; + goto out; + } + } + +out: + smr_read_leave(); + crp->crp_etype = err; + crypto_done(crp); + return (err); +} diff --git a/sys/arch/arm64/conf/files.arm64 b/sys/arch/arm64/conf/files.arm64 index de03add89a8..90c36777c23 100644 --- a/sys/arch/arm64/conf/files.arm64 +++ b/sys/arch/arm64/conf/files.arm64 @@ -1,4 +1,4 @@ -# $OpenBSD: files.arm64,v 1.32 2020/07/25 12:26:09 tobhe Exp $ +# $OpenBSD: files.arm64,v 1.33 2021/02/21 14:55:17 tobhe Exp $ maxpartitions 16 maxusers 2 8 128 @@ -40,6 +40,9 @@ file arch/arm64/arm64/bus_dma.c file arch/arm64/dev/arm64_bus_space.c file arch/arm64/dev/pci_machdep.c +file arch/arm64/arm64/cryptox.c crypto +file arch/arm64/arm64/aesv8-armx.S crypto + file arch/arm64/arm64/db_disasm.c ddb file arch/arm64/arm64/db_interface.c ddb file arch/arm64/arm64/db_trace.c ddb -- 2.20.1