.type _x86_64_AES_encrypt,\@abi-omnipotent
.align 16
_x86_64_AES_encrypt:
+ endbr64
xor 0($key),$s0 # xor with key
xor 4($key),$s1
xor 8($key),$s2
.type _x86_64_AES_encrypt_compact,\@abi-omnipotent
.align 16
_x86_64_AES_encrypt_compact:
+ endbr64
lea 128($sbox),$inp # size optimization
mov 0-128($inp),$acc1 # prefetch Te4
mov 32-128($inp),$acc2
.hidden asm_AES_encrypt
asm_AES_encrypt:
AES_encrypt:
+ endbr64
push %rbx
push %rbp
push %r12
.type _x86_64_AES_decrypt,\@abi-omnipotent
.align 16
_x86_64_AES_decrypt:
+ endbr64
xor 0($key),$s0 # xor with key
xor 4($key),$s1
xor 8($key),$s2
.type _x86_64_AES_decrypt_compact,\@abi-omnipotent
.align 16
_x86_64_AES_decrypt_compact:
+ endbr64
lea 128($sbox),$inp # size optimization
mov 0-128($inp),$acc1 # prefetch Td4
mov 32-128($inp),$acc2
.hidden asm_AES_decrypt
asm_AES_decrypt:
AES_decrypt:
+ endbr64
push %rbx
push %rbp
push %r12
.type AES_set_encrypt_key,\@function,3
.align 16
AES_set_encrypt_key:
+ endbr64
push %rbx
push %rbp
push %r12 # redundant, but allows to share
.type _x86_64_AES_set_encrypt_key,\@abi-omnipotent
.align 16
_x86_64_AES_set_encrypt_key:
+ endbr64
mov %esi,%ecx # %ecx=bits
mov %rdi,%rsi # %rsi=userKey
mov %rdx,%rdi # %rdi=key
.type AES_set_decrypt_key,\@function,3
.align 16
AES_set_decrypt_key:
+ endbr64
push %rbx
push %rbp
push %r12
.hidden asm_AES_cbc_encrypt
asm_AES_cbc_encrypt:
AES_cbc_encrypt:
+ endbr64
cmp \$0,%rdx # check length
je .Lcbc_epilogue
pushfq
.type block_se_handler,\@abi-omnipotent
.align 16
block_se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type key_se_handler,\@abi-omnipotent
.align 16
key_se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type cbc_se_handler,\@abi-omnipotent
.align 16
cbc_se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type aesni_cbc_sha1_enc,\@abi-omnipotent
.align 16
aesni_cbc_sha1_enc:
+ endbr64
# caller should check for SSSE3 and AES-NI bits
mov OPENSSL_ia32cap_P+0(%rip),%r10d
mov OPENSSL_ia32cap_P+4(%rip),%r11d
.type aesni_cbc_sha1_enc_ssse3,\@function,6
.align 16
aesni_cbc_sha1_enc_ssse3:
+ endbr64
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
#shr \$6,$len # debugging artefact
#jz .Lepilogue_ssse3 # debugging artefact
.type aesni_cbc_sha1_enc_avx,\@function,6
.align 16
aesni_cbc_sha1_enc_avx:
+ endbr64
mov `($win64?56:8)`(%rsp),$inp # load 7th argument
#shr \$6,$len # debugging artefact
#jz .Lepilogue_avx # debugging artefact
.type ssse3_handler,\@abi-omnipotent
.align 16
ssse3_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type ${PREFIX}_encrypt,\@abi-omnipotent
.align 16
${PREFIX}_encrypt:
+ endbr64
movups ($inp),$inout0 # load input
mov 240($key),$rounds # key->rounds
___
.type ${PREFIX}_decrypt,\@abi-omnipotent
.align 16
${PREFIX}_decrypt:
+ endbr64
movups ($inp),$inout0 # load input
mov 240($key),$rounds # key->rounds
___
.type _aesni_${dir}rypt3,\@abi-omnipotent
.align 16
_aesni_${dir}rypt3:
+ endbr64
$movkey ($key),$rndkey0
shr \$1,$rounds
$movkey 16($key),$rndkey1
.type _aesni_${dir}rypt4,\@abi-omnipotent
.align 16
_aesni_${dir}rypt4:
+ endbr64
$movkey ($key),$rndkey0
shr \$1,$rounds
$movkey 16($key),$rndkey1
.type _aesni_${dir}rypt6,\@abi-omnipotent
.align 16
_aesni_${dir}rypt6:
+ endbr64
$movkey ($key),$rndkey0
shr \$1,$rounds
$movkey 16($key),$rndkey1
.type _aesni_${dir}rypt8,\@abi-omnipotent
.align 16
_aesni_${dir}rypt8:
+ endbr64
$movkey ($key),$rndkey0
shr \$1,$rounds
$movkey 16($key),$rndkey1
.type aesni_ecb_encrypt,\@function,5
.align 16
aesni_ecb_encrypt:
+ endbr64
and \$-16,$len
jz .Lecb_ret
.type aesni_ccm64_encrypt_blocks,\@function,6
.align 16
aesni_ccm64_encrypt_blocks:
+ endbr64
___
$code.=<<___ if ($win64);
lea -0x58(%rsp),%rsp
.type ${PREFIX}_set_decrypt_key,\@abi-omnipotent
.align 16
${PREFIX}_set_decrypt_key:
+ endbr64
sub \$8,%rsp
call __aesni_set_encrypt_key
shl \$4,$bits # rounds-1 after _aesni_set_encrypt_key
.type ${PREFIX}_set_encrypt_key,\@abi-omnipotent
.align 16
${PREFIX}_set_encrypt_key:
+ endbr64
__aesni_set_encrypt_key:
sub \$8,%rsp
mov \$-1,%rax
.type ecb_se_handler,\@abi-omnipotent
.align 16
ecb_se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type ccm64_se_handler,\@abi-omnipotent
.align 16
ccm64_se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type ctr32_se_handler,\@abi-omnipotent
.align 16
ctr32_se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type xts_se_handler,\@abi-omnipotent
.align 16
xts_se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type cbc_se_handler,\@abi-omnipotent
.align 16
cbc_se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type _bsaes_encrypt8,\@abi-omnipotent
.align 64
_bsaes_encrypt8:
+ endbr64
lea .LBS0(%rip), $const # constants table
movdqa ($key), @XMM[9] # round 0 key
.type _bsaes_decrypt8,\@abi-omnipotent
.align 64
_bsaes_decrypt8:
+ endbr64
lea .LBS0(%rip), $const # constants table
movdqa ($key), @XMM[9] # round 0 key
.type _bsaes_key_convert,\@abi-omnipotent
.align 16
_bsaes_key_convert:
+ endbr64
lea .Lmasks(%rip), $const
movdqu ($inp), %xmm7 # load round 0 key
lea 0x10($inp), $inp
.type bsaes_enc_key_convert,\@function,2
.align 16
bsaes_enc_key_convert:
+ endbr64
mov 240($inp),%r10d # pass rounds
mov $inp,%rcx # pass key
mov $out,%rax # pass key schedule
.align 16
bsaes_encrypt_128:
.Lenc128_loop:
+ endbr64
movdqu 0x00($inp), @XMM[0] # load input
movdqu 0x10($inp), @XMM[1]
movdqu 0x20($inp), @XMM[2]
.type bsaes_dec_key_convert,\@function,2
.align 16
bsaes_dec_key_convert:
+ endbr64
mov 240($inp),%r10d # pass rounds
mov $inp,%rcx # pass key
mov $out,%rax # pass key schedule
.type bsaes_decrypt_128,\@function,4
.align 16
bsaes_decrypt_128:
+ endbr64
.Ldec128_loop:
movdqu 0x00($inp), @XMM[0] # load input
movdqu 0x10($inp), @XMM[1]
.type bsaes_ecb_encrypt_blocks,\@abi-omnipotent
.align 16
bsaes_ecb_encrypt_blocks:
+ endbr64
mov %rsp, %rax
.Lecb_enc_prologue:
push %rbp
.type bsaes_ecb_decrypt_blocks,\@abi-omnipotent
.align 16
bsaes_ecb_decrypt_blocks:
+ endbr64
mov %rsp, %rax
.Lecb_dec_prologue:
push %rbp
.type bsaes_cbc_encrypt,\@abi-omnipotent
.align 16
bsaes_cbc_encrypt:
+ endbr64
___
$code.=<<___ if ($win64);
mov 48(%rsp),$arg6 # pull direction flag
.type bsaes_ctr32_encrypt_blocks,\@abi-omnipotent
.align 16
bsaes_ctr32_encrypt_blocks:
+ endbr64
mov %rsp, %rax
.Lctr_enc_prologue:
push %rbp
.type bsaes_xts_encrypt,\@abi-omnipotent
.align 16
bsaes_xts_encrypt:
+ endbr64
mov %rsp, %rax
.Lxts_enc_prologue:
push %rbp
.type bsaes_xts_decrypt,\@abi-omnipotent
.align 16
bsaes_xts_decrypt:
+ endbr64
mov %rsp, %rax
.Lxts_dec_prologue:
push %rbp
.type se_handler,\@abi-omnipotent
.align 16
se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type _vpaes_encrypt_core,\@abi-omnipotent
.align 16
_vpaes_encrypt_core:
+ endbr64
mov %rdx, %r9
mov \$16, %r11
mov 240(%rdx),%eax
.type _vpaes_decrypt_core,\@abi-omnipotent
.align 16
_vpaes_decrypt_core:
+ endbr64
mov %rdx, %r9 # load key
mov 240(%rdx),%eax
movdqa %xmm9, %xmm1
.type _vpaes_schedule_core,\@abi-omnipotent
.align 16
_vpaes_schedule_core:
+ endbr64
# rdi = key
# rsi = size in bits
# rdx = buffer
.type _vpaes_schedule_192_smear,\@abi-omnipotent
.align 16
_vpaes_schedule_192_smear:
+ endbr64
pshufd \$0x80, %xmm6, %xmm0 # d c 0 0 -> c 0 0 0
pxor %xmm0, %xmm6 # -> c+d c 0 0
pshufd \$0xFE, %xmm7, %xmm0 # b a _ _ -> b b b a
.type _vpaes_schedule_round,\@abi-omnipotent
.align 16
_vpaes_schedule_round:
+ endbr64
# extract rcon from xmm8
pxor %xmm1, %xmm1
palignr \$15, %xmm8, %xmm1
.type _vpaes_schedule_transform,\@abi-omnipotent
.align 16
_vpaes_schedule_transform:
+ endbr64
movdqa %xmm9, %xmm1
pandn %xmm0, %xmm1
psrld \$4, %xmm1
.type _vpaes_schedule_mangle,\@abi-omnipotent
.align 16
_vpaes_schedule_mangle:
+ endbr64
movdqa %xmm0, %xmm4 # save xmm0 for later
movdqa .Lk_mc_forward(%rip),%xmm5
test %rcx, %rcx
.type ${PREFIX}_set_encrypt_key,\@function,3
.align 16
${PREFIX}_set_encrypt_key:
+ endbr64
___
$code.=<<___ if ($win64);
lea -0xb8(%rsp),%rsp
.type ${PREFIX}_set_decrypt_key,\@function,3
.align 16
${PREFIX}_set_decrypt_key:
+ endbr64
___
$code.=<<___ if ($win64);
lea -0xb8(%rsp),%rsp
.type ${PREFIX}_encrypt,\@function,3
.align 16
${PREFIX}_encrypt:
+ endbr64
___
$code.=<<___ if ($win64);
lea -0xb8(%rsp),%rsp
.type ${PREFIX}_decrypt,\@function,3
.align 16
${PREFIX}_decrypt:
+ endbr64
___
$code.=<<___ if ($win64);
lea -0xb8(%rsp),%rsp
.type ${PREFIX}_cbc_encrypt,\@function,6
.align 16
${PREFIX}_cbc_encrypt:
+ endbr64
xchg $key,$len
___
($len,$key)=($key,$len);
.type _vpaes_preheat,\@abi-omnipotent
.align 16
_vpaes_preheat:
+ endbr64
lea .Lk_s0F(%rip), %r10
movdqa -0x20(%r10), %xmm10 # .Lk_inv
movdqa -0x10(%r10), %xmm11 # .Lk_inv+16
.type se_handler,\@abi-omnipotent
.align 16
se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
S2N_BN_SYMBOL(bignum_add):
+ endbr64
#if WINDOWS_ABI
push rdi
S2N_BN_SYMBOL(bignum_cmadd):
+ endbr64
#if WINDOWS_ABI
push rdi
S2N_BN_SYMBOL(bignum_cmul):
+ endbr64
#if WINDOWS_ABI
push rdi
S2N_BN_SYMBOL(bignum_mul):
+ endbr64
#if WINDOWS_ABI
push rdi
adc h, rdx
S2N_BN_SYMBOL(bignum_mul_4_8_alt):
+ endbr64
#if WINDOWS_ABI
push rdi
adc h, rdx
S2N_BN_SYMBOL(bignum_mul_8_16_alt):
+ endbr64
#if WINDOWS_ABI
push rdi
#define llshort ebp
S2N_BN_SYMBOL(bignum_sqr):
+ endbr64
#if WINDOWS_ABI
push rdi
adc c, 0
S2N_BN_SYMBOL(bignum_sqr_4_8_alt):
+ endbr64
#if WINDOWS_ABI
push rdi
adc c, 0
S2N_BN_SYMBOL(bignum_sqr_8_16_alt):
+ endbr64
#if WINDOWS_ABI
push rdi
S2N_BN_SYMBOL(bignum_sub):
+ endbr64
#if WINDOWS_ABI
push rdi
.text
S2N_BN_SYMBOL(word_clz):
+ endbr64
#if WINDOWS_ABI
push rdi
.type MULADD_128x512,\@abi-omnipotent
.align 16
MULADD_128x512:
+ endbr64
___
&MULSTEP_512([map("%r$_",(8..15))], "(+8*0)(%rcx)", "%rsi", "%rbp", "%rbx");
$code.=<<___;
.type mont_reduce,\@abi-omnipotent
.align 16
mont_reduce:
+ endbr64
___
my $STACK_DEPTH = 8;
.type mont_mul_a3b,\@abi-omnipotent
.align 16
mont_mul_a3b:
+ endbr64
#
# multiply tmp = src1 * src2
# For multiply: dst = rcx, src1 = rdi, src2 = rsi
.type sqr_reduce,\@abi-omnipotent
.align 16
sqr_reduce:
+ endbr64
mov (+$pResult_offset+8)(%rsp), %rcx
___
&SQR_512("%rsp+$tmp16_offset+8", "%rcx", [map("%r$_",(10..15,8..9))], "%rbx", "%rbp", "%rsi", "%rdi");
.globl mod_exp_512
.type mod_exp_512,\@function,4
mod_exp_512:
+ endbr64
push %rbp
push %rbx
push %r12
.type bn_mul_mont,\@function,6
.align 16
bn_mul_mont:
+ endbr64
test \$3,${num}d
jnz .Lmul_enter
cmp \$8,${num}d
.align 16
bn_mul4x_mont:
.Lmul4x_enter:
+ endbr64
push %rbx
push %rbp
push %r12
.align 16
bn_sqr4x_mont:
.Lsqr4x_enter:
+ endbr64
push %rbx
push %rbp
push %r12
.type bn_mul_mont_gather5,\@function,6
.align 64
bn_mul_mont_gather5:
+ endbr64
test \$3,${num}d
jnz .Lmul_enter
cmp \$8,${num}d
.type bn_mul4x_mont_gather5,\@function,6
.align 16
bn_mul4x_mont_gather5:
+ endbr64
.Lmul4x_enter:
mov ${num}d,${num}d
movd `($win64?56:8)`(%rsp),%xmm5 # load 7th argument
.type bn_scatter5,\@abi-omnipotent
.align 16
bn_scatter5:
+ endbr64
cmp \$0, $num
jz .Lscatter_epilogue
lea ($tbl,$idx,8),$tbl
.type bn_gather5,\@abi-omnipotent
.align 16
bn_gather5:
+ endbr64
.LSEH_begin_bn_gather5: # Win64 thing, but harmless in other cases
# I can't trust assembler to use specific encoding:-(
.byte 0x4c,0x8d,0x14,0x24 # lea (%rsp),%r10
.type mul_handler,\@abi-omnipotent
.align 16
mul_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type Camellia_EncryptBlock,\@abi-omnipotent
.align 16
Camellia_EncryptBlock:
+ endbr64
movl \$128,%eax
subl $arg0d,%eax
movl \$3,$arg0d
.align 16
.Lenc_rounds:
Camellia_EncryptBlock_Rounds:
+ endbr64
push %rbx
push %rbp
push %r13
.type _x86_64_Camellia_encrypt,\@abi-omnipotent
.align 16
_x86_64_Camellia_encrypt:
+ endbr64
xor 0($key),@S[1]
xor 4($key),@S[0] # ^=key[0-3]
xor 8($key),@S[3]
.type Camellia_DecryptBlock,\@abi-omnipotent
.align 16
Camellia_DecryptBlock:
+ endbr64
movl \$128,%eax
subl $arg0d,%eax
movl \$3,$arg0d
.align 16
.Ldec_rounds:
Camellia_DecryptBlock_Rounds:
+ endbr64
push %rbx
push %rbp
push %r13
.type _x86_64_Camellia_decrypt,\@abi-omnipotent
.align 16
_x86_64_Camellia_decrypt:
+ endbr64
xor 0($key),@S[1]
xor 4($key),@S[0] # ^=key[0-3]
xor 8($key),@S[3]
.type Camellia_Ekeygen,\@function,3
.align 16
Camellia_Ekeygen:
+ endbr64
push %rbx
push %rbp
push %r13
.type Camellia_cbc_encrypt,\@function,6
.align 16
Camellia_cbc_encrypt:
+ endbr64
cmp \$0,%rdx
je .Lcbc_abort
push %rbx
.globl md5_block_asm_data_order
.type md5_block_asm_data_order,\@function,3
md5_block_asm_data_order:
+ endbr64
push %rbp
push %rbx
push %r12
.type gcm_init_clmul,\@abi-omnipotent
.align 16
gcm_init_clmul:
+ endbr64
movdqu ($Xip),$Hkey
pshufd \$0b01001110,$Hkey,$Hkey # dword swap
.type gcm_gmult_clmul,\@abi-omnipotent
.align 16
gcm_gmult_clmul:
+ endbr64
movdqu ($Xip),$Xi
movdqa .Lbswap_mask(%rip),$T3
movdqu ($Htbl),$Hkey
.type gcm_ghash_clmul,\@abi-omnipotent
.align 16
gcm_ghash_clmul:
+ endbr64
___
$code.=<<___ if ($win64);
.LSEH_begin_gcm_ghash_clmul:
.type se_handler,\@abi-omnipotent
.align 16
se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.globl $func
.type $func,\@function,$nargs
$func:
+ endbr64
cmp \$0,$len
je .Labort
push %rbx
.type RC4_set_key,\@function,3
.align 16
RC4_set_key:
+ endbr64
lea 8($dat),$dat
lea ($inp,$len),$inp
neg $len
.type RC4_options,\@abi-omnipotent
.align 16
RC4_options:
+ endbr64
lea .Lopts(%rip),%rax
ret
.align 64
.globl RC4
.type RC4,\@function,4
.align 16
-RC4: or $len,$len
+RC4:
+ endbr64
+ or $len,$len
jne .Lentry
ret
.Lentry:
.type RC4_set_key,\@function,3
.align 16
RC4_set_key:
+ endbr64
lea 8($dat),$dat
lea ($inp,$len),$inp
neg $len
.type RC4_options,\@abi-omnipotent
.align 16
RC4_options:
+ endbr64
lea .Lopts(%rip),%rax
mov OPENSSL_ia32cap_P(%rip),%edx
bt \$IA32CAP_BIT0_INTELP4,%edx
.type sha1_block_data_order,\@function,3
.align 16
sha1_block_data_order:
+ endbr64
mov OPENSSL_ia32cap_P+0(%rip),%r9d
mov OPENSSL_ia32cap_P+4(%rip),%r8d
test \$IA32CAP_MASK1_SSSE3,%r8d # check SSSE3 bit
.align 16
sha1_block_data_order_ssse3:
_ssse3_shortcut:
+ endbr64
push %rbx
push %rbp
push %r12
.align 16
sha1_block_data_order_avx:
_avx_shortcut:
+ endbr64
push %rbx
push %rbp
push %r12
.type se_handler,\@abi-omnipotent
.align 16
se_handler:
+ endbr64
push %rsi
push %rdi
push %rbx
.type $func,\@function,4
.align 16
$func:
+ endbr64
push %rbx
push %rbp
push %r12
.type $func,\@function,3
.align 16
$func:
+ endbr64
push %rbx
push %rbp
push %r12
.extern OPENSSL_cpuid_setup
.hidden OPENSSL_cpuid_setup
.section .init
+ endbr64
call OPENSSL_cpuid_setup
.extern OPENSSL_ia32cap_P
.type OPENSSL_ia32_cpuid,\@abi-omnipotent
.align 16
OPENSSL_ia32_cpuid:
+ endbr64
mov %rbx,%r8 # save %rbx
xor %eax,%eax