responsible from getting the proper address of those blocks.
ok tb@ jsing@
&xor ($s3,&DWP(12,$key));
&ret ();
+&function_end_B("_x86_AES_encrypt");
-&set_label("AES_Te",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("AES_Te",64);
&_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
&_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
&_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
&data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
&data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000);
&data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000);
-&function_end_B("_x86_AES_encrypt");
+ &previous();
# void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
&function_begin("AES_encrypt");
&add ("esp",4); # 4 is reserved for caller's return address
&mov ($_esp,$s0); # save stack pointer
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
# pick Te4 copy which can't "overlap" with stack frame or key schedule
&lea ($s1,&DWP(768-4,"esp"));
&xor ($s3,&DWP(12,$key));
&ret ();
+&function_end_B("_x86_AES_decrypt");
-&set_label("AES_Td",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("AES_Td",64);
&_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
&_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
&_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
&data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
&data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
&data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-&function_end_B("_x86_AES_decrypt");
+ &previous();
# void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
&function_begin("AES_decrypt");
&add ("esp",4); # 4 is reserved for caller's return address
&mov ($_esp,$s0); # save stack pointer
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
- &lea ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Td"), $tbl);
# pick Td4 copy which can't "overlap" with stack frame or key schedule
&lea ($s1,&DWP(768-4,"esp"));
&cmp ($s2,0);
&je (&label("drop_out"));
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tbl);
- &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
-
+ &picsetup($tbl);
+ &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
&cmp (&wparam(5),0);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
&jne (&label("picked_te"));
&lea ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl));
&set_label("picked_te");
&test ("edi",-1);
&jz (&label("badpointer"));
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($tbl);
- &lea ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($tbl, &label("AES_Te"), $tbl);
+
&lea ($tbl,&DWP(2048+128,$tbl));
# prefetch Te4
&xor ("eax","eax"); # return success
&function_end("AES_set_decrypt_key");
-&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
&set_label("dec_key_ret");
&ret ();
&function_end_B("${PREFIX}_set_decrypt_key");
-&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
my ($round, $base, $magic, $key, $const, $inp, $out)=
("eax", "ebx", "ecx", "edx","ebp", "esi","edi");
+ &rodataseg();
&static_label("_vpaes_consts");
&static_label("_vpaes_schedule_low_round");
$k_dsbo=0x2c0; # decryption sbox final output
&data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
&data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
-&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
-&align (64);
+ &previous();
&function_begin_B("_vpaes_preheat");
&add ($const,&DWP(0,"esp"));
&mov ($magic,0x30);
&mov ($out,0);
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_schedule_core");
-&set_label("pic_point");
&mov ("esp",&DWP(48,"esp"));
&xor ("eax","eax");
&and ($magic,32);
&xor ($magic,32); # nbist==192?0:32;
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_schedule_core");
-&set_label("pic_point");
&mov ("esp",&DWP(48,"esp"));
&xor ("eax","eax");
&function_end("${PREFIX}_set_decrypt_key");
&function_begin("${PREFIX}_encrypt");
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($out,&wparam(1)); # out
&function_end("${PREFIX}_encrypt");
&function_begin("${PREFIX}_decrypt");
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&mov ($inp,&wparam(0)); # inp
&lea ($base,&DWP(-56,"esp"));
&mov ($out,&wparam(1)); # out
&mov (&DWP(8,"esp"),$const); # save ivp
&mov ($out,$round); # $out works as $len
- &lea ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+ &picsetup($const);
+ &picsymbol($const, &label("_vpaes_consts"), $const);
+ &lea ($const,&DWP(0x30,$const))
+
&call ("_vpaes_preheat");
-&set_label("pic_point");
&cmp ($magic,0);
&je (&label("cbc_dec_loop"));
&jmp (&label("cbc_enc_loop"));
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("maw_non_sse2"));
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("mw_non_sse2"));
$c="ecx";
if ($sse2) {
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc(&label("sqr_non_sse2"));
# void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
&function_begin_B("bn_GF2m_mul_2x2");
if (!$x86only) {
- &picmeup("edx","OPENSSL_ia32cap_P");
+ &picsetup("edx");
+ &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");
&mov ("eax",&DWP(0,"edx"));
&mov ("edx",&DWP(4,"edx"));
&test ("eax","\$IA32CAP_MASK0_MMX"); # check MMX bit
&ret ();
&function_end_B("bn_GF2m_mul_2x2");
-&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
&asm_finish();
$temp="mm6";
$mask="mm7";
- &picmeup("eax","OPENSSL_ia32cap_P");
+ &picsetup("eax");
+ &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
&bt (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("non_sse2"));
&set_label("just_leave");
&function_end("bn_mul_mont");
-&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
&asm_finish();
&mov ($_esp,"ebx"); # save %esp
&mov ($_end,"eax"); # save keyEnd
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load plaintext
&mov (@T[1],&DWP(4,$idx));
&mov ($_esp,"ebx"); # save %esp
&mov ($_end,"eax"); # save keyEnd
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load plaintext
&mov (@T[1],&DWP(4,$idx));
&lea ($key,&DWP(0,$key,"eax"));
&mov (&DWP(5*4,"esp"),"ebx");# save %esp
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load ciphertext
&mov (@T[1],&DWP(4,$idx));
&lea ($key,&DWP(0,$key,"eax"));
&mov (&DWP(5*4,"esp"),"ebx");# save %esp
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov (@T[0],&DWP(0,$idx)); # load ciphertext
&mov (@T[1],&DWP(4,$idx));
&xor (@T[3],&DWP(1*8+4,$key));
&set_label("1st128",4);
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&lea ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl));
&mov ($idx,&DWP($step*8,$key)); # prefetch SIGMA[0]
sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; }
sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; }
+ &rodataseg();
&set_label("Camellia_SIGMA",64);
&data_word(
0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2,
# tables are interleaved, remember?
for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); }
for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); }
+ &previous();
# void Camellia_cbc_encrypt (const void char *inp, unsigned char *out,
# size_t length, const CAMELLIA_KEY *key,
&mov ($_key,$s3); # save copy of key
&mov ($_ivp,$Tbl); # save copy of ivp
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($Tbl);
- &lea ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+ &picsetup($Tbl);
+ &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
&mov ($idx,32);
&set_label("prefetch_sbox",4);
&function_end("Camellia_cbc_encrypt");
}
-&asciz("Camellia for x86 by <appro\@openssl.org>");
-
&asm_finish();
&rotl($L,3);
}
- # PIC-ification:-)
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop($trans);
- &lea ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans));
+ &picsetup($trans);
+ &picsymbol($trans, &label("DES_SPtrans"), $trans);
&mov( "ecx", &wparam(1) );
sub DES_SPtrans
{
+ &rodataseg();
&set_label("DES_SPtrans",64);
&data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802);
&data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002);
&data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000);
&data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000);
&data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080);
+ &previous();
}
&mov ($inp,&wparam(0)); # load Xi
&mov ($Htbl,&wparam(1)); # load Htable
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&movz ($Zll,&BP(15,$inp));
&mov ($inp,&wparam(2)); # load in
&mov ($Zlh,&wparam(3)); # load len
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&add ($Zlh,$inp);
&mov (&wparam(3),$Zlh); # len to point at the end of input
&mov ($inp,&wparam(0)); # load Xi
&mov ($Htbl,&wparam(1)); # load Htable
- &call (&label("pic_point"));
- &set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+ &picsetup("eax");
+ &picsymbol("eax", &label("rem_4bit"), "eax");
&movz ($Zll,&BP(15,$inp));
&mov ("ecx",&wparam(2)); # inp
&mov ("edx",&wparam(3)); # len
&mov ("ebp","esp"); # original %esp
- &call (&label("pic_point"));
- &set_label ("pic_point");
- &blindpop ($rem_8bit);
- &lea ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit));
+
+ &picsetup($rem_8bit);
+ &picsymbol($rem_8bit, &label("rem_8bit"), $rem_8bit);
&sub ("esp",512+16+16); # allocate stack frame...
&and ("esp",-64); # ...and align it
&mov ($Htbl,&wparam(0));
&mov ($Xip,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Hkey,&QWP(0,$Xip));
&pshufd ($Hkey,$Hkey,0b01001110);# dword swap
&mov ($Xip,&wparam(0));
&mov ($Htbl,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
&mov ($inp,&wparam(2));
&mov ($len,&wparam(3));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
&mov ($Htbl,&wparam(0));
&mov ($Xip,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Hkey,&QWP(0,$Xip));
&pshufd ($Hkey,$Hkey,0b01001110);# dword swap
&mov ($Xip,&wparam(0));
&mov ($Htbl,&wparam(1));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($Xn,&QWP(0,$const));
&mov ($inp,&wparam(2));
&mov ($len,&wparam(3));
- &call (&label("pic"));
-&set_label("pic");
- &blindpop ($const);
- &lea ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+ &picsetup($const);
+ &picsymbol($const, &label("bswap"), $const);
&movdqu ($Xi,&QWP(0,$Xip));
&movdqa ($T3,&QWP(0,$const));
}
\f
+ &rodataseg();
&set_label("bswap",64);
&data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
&data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial
+ &previous();
}} # $sse2
+ &rodataseg();
&set_label("rem_4bit",64);
&data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S);
&data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S);
&data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E);
&data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE);
&data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE);
+ &previous();
}}} # !$x86only
-&asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
# A question was risen about choice of vanilla MMX. Or rather why wasn't
# p1,p2,p3 are the offsets for parameters to be passed to the
# underlying calls.
+&static_label("cbc_enc_jmp_table_".$name);
+&static_label("ej1_".$name);
+&static_label("ej2_".$name);
+&static_label("ej3_".$name);
+&static_label("ej4_".$name);
+&static_label("ej5_".$name);
+&static_label("ej6_".$name);
+&static_label("ej7_".$name);
+
&function_begin_B($name,"");
&comment("");
&mov($count, &wparam(2)); # length
&and($count, 7);
&jz(&label("finish"));
- &call(&label("PIC_point"));
-&set_label("PIC_point");
- &blindpop("edx");
- &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx"));
+
+ &picsetup("edx");
+ &picsymbol("ecx", &label("cbc_enc_jmp_table_".$name), "edx")
&mov($count,&DWP(0,"ecx",$count,4));
- &add($count,"edx");
+ &picadjust($count, "edx");
+
&xor("ecx","ecx");
&xor("edx","edx");
- #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4));
&jmp_ptr($count);
-&set_label("ej7");
+&set_label("ej7_".$name);
&movb(&HB("edx"), &BP(6,$in,"",0));
&shl("edx",8);
-&set_label("ej6");
+&set_label("ej6_".$name);
&movb(&HB("edx"), &BP(5,$in,"",0));
-&set_label("ej5");
+&set_label("ej5_".$name);
&movb(&LB("edx"), &BP(4,$in,"",0));
-&set_label("ej4");
+&set_label("ej4_".$name);
&mov("ecx", &DWP(0,$in,"",0));
&jmp(&label("ejend"));
-&set_label("ej3");
+&set_label("ej3_".$name);
&movb(&HB("ecx"), &BP(2,$in,"",0));
&shl("ecx",8);
-&set_label("ej2");
+&set_label("ej2_".$name);
&movb(&HB("ecx"), &BP(1,$in,"",0));
-&set_label("ej1");
+&set_label("ej1_".$name);
&movb(&LB("ecx"), &BP(0,$in,"",0));
&set_label("ejend");
&mov("eax", &DWP(0,$in,"",0)); # get old cipher text,
&mov("ebx", &DWP(4,$in,"",0)); # next iv actually
-&set_label("dj7");
&rotr("edx", 16);
&movb(&BP(6,$out,"",0), &LB("edx"));
&shr("edx",16);
-&set_label("dj6");
&movb(&BP(5,$out,"",0), &HB("edx"));
-&set_label("dj5");
&movb(&BP(4,$out,"",0), &LB("edx"));
-&set_label("dj4");
&mov(&DWP(0,$out,"",0), "ecx");
- &jmp(&label("djend"));
-&set_label("dj3");
- &rotr("ecx", 16);
- &movb(&BP(2,$out,"",0), &LB("ecx"));
- &shl("ecx",16);
-&set_label("dj2");
- &movb(&BP(1,$in,"",0), &HB("ecx"));
-&set_label("dj1");
- &movb(&BP(0,$in,"",0), &LB("ecx"));
-&set_label("djend");
# final iv is still in eax:ebx
- &jmp(&label("finish"));
-
############################ FINISH #######################3
&set_label("finish",1);
&mov(&DWP(4,"ecx","",0), "ebx"); # save iv
&function_end_A($name);
+ &function_end_B($name);
+ &rodataseg();
&align(64);
- &set_label("cbc_enc_jmp_table");
+ &set_label("cbc_enc_jmp_table_".$name);
&data_word("0");
- &data_word(&label("ej1")."-".&label("PIC_point"));
- &data_word(&label("ej2")."-".&label("PIC_point"));
- &data_word(&label("ej3")."-".&label("PIC_point"));
- &data_word(&label("ej4")."-".&label("PIC_point"));
- &data_word(&label("ej5")."-".&label("PIC_point"));
- &data_word(&label("ej6")."-".&label("PIC_point"));
- &data_word(&label("ej7")."-".&label("PIC_point"));
- # not used
- #&set_label("cbc_dec_jmp_table",1);
- #&data_word("0");
- #&data_word(&label("dj1")."-".&label("PIC_point"));
- #&data_word(&label("dj2")."-".&label("PIC_point"));
- #&data_word(&label("dj3")."-".&label("PIC_point"));
- #&data_word(&label("dj4")."-".&label("PIC_point"));
- #&data_word(&label("dj5")."-".&label("PIC_point"));
- #&data_word(&label("dj6")."-".&label("PIC_point"));
- #&data_word(&label("dj7")."-".&label("PIC_point"));
- &align(64);
+ &data_word(&code_sym(&label("ej1_".$name)));
+ &data_word(&code_sym(&label("ej2_".$name)));
+ &data_word(&code_sym(&label("ej3_".$name)));
+ &data_word(&code_sym(&label("ej4_".$name)));
+ &data_word(&code_sym(&label("ej5_".$name)));
+ &data_word(&code_sym(&label("ej6_".$name)));
+ &data_word(&code_sym(&label("ej7_".$name)));
+ &previous();
- &function_end_B($name);
-
}
1;
push(@out,".align\t$val\n");
}
-sub ::picmeup
-{ my($dst,$sym,$base,$reflabel)=@_;
-
- if ($::openbsd)
- { &::emitraw("#if defined(PIC) || defined(__PIC__)");
- &::emitraw("PIC_PROLOGUE");
- &::mov($dst, &::DWP("PIC_GOT($sym)"));
- &::emitraw("PIC_EPILOGUE");
- &::emitraw("#else /* PIC */");
- &::lea($dst,&::DWP($sym));
- &::emitraw("#endif /* PIC */");
- }
- elsif (($::pic && ($::elf || $::aout)) || $::macosx)
- { if (!defined($base))
- { &::call(&::label("PIC_me_up"));
- &::set_label("PIC_me_up");
- &::blindpop($dst);
- $base=$dst;
- $reflabel=&::label("PIC_me_up");
- }
+#
+# PIC data access wrappers
+#
+# Usage:
+# picsetup($base)
+# - only allowed once per function (because of hardcoded label name),
+# sets up pic access, uses $base register as temporary
+# picsymbol($dst, $sym, $base)
+# - loads the address of symbol $sym into $dst with the help of $base
+# initialized by picsetup
+# picadjust($sym, $base)
+# - adjusts a code pointer read from a code_sym table with the help of
+# $base initialized by picsetup
+# code_sym($sym)
+# - emits a pointer to the given code symbol, relative to the GOT if
+# PIC. This pointer will need to be adjusted with picadjust above
+# before use.
+
+sub ::picsetup
+{ my($base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ &::call(&::label("PIC_setup"));
+ &::set_label("PIC_setup");
+ &::blindpop($base);
if ($::macosx)
{ my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr");
- &::mov($dst,&::DWP("$indirect-$reflabel",$base));
$non_lazy_ptr{"$nmdecor$sym"}=$indirect;
}
+ }
+}
+
+sub ::picsymbol
+{ my($dst,$sym,$base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ my $reflabel=&::label("PIC_setup");
+ if ($::macosx)
+ { my $indirect=$non_lazy_ptr{"$nmdecor$sym"};
+ &::mov($dst,&::DWP("$indirect-$reflabel",$base));
+ }
else
{ &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
- $base));
+ $base));
&::mov($dst,&::DWP("$sym\@GOT",$dst));
}
}
{ &::lea($dst,&::DWP($sym)); }
}
+sub ::picadjust
+{ my($sym,$base)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ my $reflabel=&::label("PIC_setup");
+ &::lea($sym,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
+ $base,$sym));
+ }
+}
+
+sub ::code_sym
+{ my($sym)=@_;
+
+ if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+ {
+ $sym."\@GOTOFF";
+ }
+ else
+ {
+ $sym;
+ }
+}
+
sub ::initseg
{ my $f=$nmdecor.shift;
sub ::dataseg
{ push(@out,".data\n"); }
+sub ::rodataseg
+{ push(@out,".rodata\n"); }
+
+sub ::previous
+{ push(@out,".previous\n"); }
+
1;
&mov (&wparam(3),$out); # $out as accumulator in these loops
&jz (&label("go4loop4"));
- &picmeup($out,"OPENSSL_ia32cap_P");
+ &picsetup($out);
+ &picsymbol($out, "OPENSSL_ia32cap_P", $out);
# check SSE2 bit [could have been MMX]
&bt (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("go4loop4"));
&mov ($out,&wparam(0)); # load key
&mov ($idi,&wparam(1)); # load len
&mov ($inp,&wparam(2)); # load data
- &picmeup($idx,"OPENSSL_ia32cap_P");
+
+ &picsetup($idx);
+ &picsymbol($idx, "OPENSSL_ia32cap_P", $idx);
&lea ($out,&DWP(2*4,$out)); # &key->data
&lea ($inp,&DWP(0,$inp,$idi)); # $inp to point at the end
&function_end("RC4_set_key");
# const char *RC4_options(void);
+&static_label("opts");
&function_begin_B("RC4_options");
- &call (&label("pic_point"));
-&set_label("pic_point");
- &blindpop("eax");
- &lea ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
- &picmeup("edx","OPENSSL_ia32cap_P");
+ &picsetup("edx");
+ &picsymbol("eax", &label("opts"), "edx");
+ &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");;
+
&mov ("edx",&DWP(0,"edx"));
&bt ("edx","\$IA32CAP_BIT0_INTELP4");
&jc (&label("1xchar"));
&add ("eax",12);
&set_label("ret");
&ret ();
-&set_label("opts",64);
+&function_end_B("RC4_options");
+
+ &rodataseg();
+&set_label("opts");
&asciz ("rc4(4x,int)");
&asciz ("rc4(1x,char)");
&asciz ("rc4(8x,mmx)");
-&asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>");
-&align (64);
-&function_end_B("RC4_options");
+ &previous();
&asm_finish();
&static_label("avx_shortcut") if ($ymm);
&static_label("K_XX_XX");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point"));
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($T, "OPENSSL_ia32cap_P", $tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
&mov ($A,&DWP(0,$T));
&mov ($D,&DWP(4,$T));
my $_ror=sub { &ror(@_) };
&function_begin("_sha1_block_data_order_ssse3");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
+
&set_label("ssse3_shortcut");
&movdqa (@X[3],&QWP(0,$tmp1)); # K_00_19
my $_ror=sub { &shrd(@_[0],@_) };
&function_begin("_sha1_block_data_order_avx");
- &call (&label("pic_point")); # make it PIC!
- &set_label("pic_point");
- &blindpop($tmp1);
- &lea ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+ &picsetup($tmp1);
+ &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
+
&set_label("avx_shortcut");
&vzeroall();
&mov (&DWP(16,@T[1]),$E);
&function_end("_sha1_block_data_order_avx");
}
+
+ &rodataseg();
&set_label("K_XX_XX",64);
&data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999); # K_00_19
&data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1); # K_20_39
&data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc); # K_40_59
&data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6); # K_60_79
&data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f); # pbswap mask
+ &previous();
}
-&asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
&asm_finish();
&add ($A,"esi"); # h += K256[i]
}
+&static_label("K256");
&function_begin("sha256_block_data_order");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp
- &call (&label("pic_point")); # make it PIC!
-&set_label("pic_point");
- &blindpop($K256);
- &lea ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));
+ &picsetup($K256);
+ &picsymbol($K256, &label("K256"), $K256);
&sub ("esp",16);
&and ("esp",-64);
&mov ("esp",&DWP(12,"esp")); # restore sp
&function_end_A();
+&function_end_B("sha256_block_data_order");
-&set_label("K256",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("K256",64);
&data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5);
&data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5);
&data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3);
&data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3);
&data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208);
&data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2);
-&function_end_B("sha256_block_data_order");
-&asciz("SHA256 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
+ &previous();
&asm_finish();
}
+&static_label("K512");
&function_begin("sha512_block_data_order");
&mov ("esi",wparam(0)); # ctx
&mov ("edi",wparam(1)); # inp
&mov ("eax",wparam(2)); # num
&mov ("ebx","esp"); # saved sp
- &call (&label("pic_point")); # make it PIC!
-&set_label("pic_point");
- &blindpop($K512);
- &lea ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512));
+ &picsetup($K512);
+if ($sse2) {
+ &picsymbol("edx", "OPENSSL_ia32cap_P", $K512);
+}
+ &picsymbol($K512, &label("K512"), $K512);
&sub ("esp",16);
&and ("esp",-64);
&mov (&DWP(12,"esp"),"ebx"); # saved sp
if ($sse2) {
- &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
&bt (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2");
&jnc (&label("loop_x86"));
&mov ("esp",&DWP(12,"esp")); # restore sp
&function_end_A();
+&function_end_B("sha512_block_data_order");
-&set_label("K512",64); # Yes! I keep it in the code segment!
+ &rodataseg();
+&set_label("K512",64);
&data_word(0xd728ae22,0x428a2f98); # u64
&data_word(0x23ef65cd,0x71374491); # u64
&data_word(0xec4d3b2f,0xb5c0fbcf); # u64
&data_word(0xfc657e2a,0x597f299c); # u64
&data_word(0x3ad6faec,0x5fcb6fab); # u64
&data_word(0x4a475817,0x6c44198c); # u64
-&function_end_B("sha512_block_data_order");
-&asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
+ &previous();
&asm_finish();
$tbl="ebp";
@mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7");
+&static_label("table");
+
&function_begin_B("whirlpool_block_mmx");
&push ("ebp");
&push ("ebx");
&mov (&DWP(8,"ebx"),"ebp");
&mov (&DWP(16,"ebx"),"eax"); # saved stack pointer
- &call (&label("pic_point"));
-&set_label("pic_point");
- &blindpop($tbl);
- &lea ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl));
+ &picsetup($tbl);
+ &picsymbol($tbl, &label("table"), $tbl);
&xor ("ecx","ecx");
&xor ("edx","edx");
&pop ("ebx");
&pop ("ebp");
&ret ();
+&function_end_B("whirlpool_block_mmx");
+ &rodataseg();
&align(64);
&set_label("table");
&LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8);
&L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8);
&L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e);
&L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33);
+ &previous();
-&function_end_B("whirlpool_block_mmx");
&asm_finish();