Move all data blocks from .text to .rodata and cleanup up and homogeneize code
authormiod <miod@openbsd.org>
Wed, 1 Feb 2023 20:45:04 +0000 (20:45 +0000)
committermiod <miod@openbsd.org>
Wed, 1 Feb 2023 20:45:04 +0000 (20:45 +0000)
responsible from getting the proper address of those blocks.

ok tb@ jsing@

16 files changed:
lib/libcrypto/aes/asm/aes-586.pl
lib/libcrypto/aes/asm/aesni-x86.pl
lib/libcrypto/aes/asm/vpaes-x86.pl
lib/libcrypto/bn/asm/bn-586.pl
lib/libcrypto/bn/asm/x86-gf2m.pl
lib/libcrypto/bn/asm/x86-mont.pl
lib/libcrypto/camellia/asm/cmll-x86.pl
lib/libcrypto/des/asm/des-586.pl
lib/libcrypto/modes/asm/ghash-x86.pl
lib/libcrypto/perlasm/cbc.pl
lib/libcrypto/perlasm/x86gas.pl
lib/libcrypto/rc4/asm/rc4-586.pl
lib/libcrypto/sha/asm/sha1-586.pl
lib/libcrypto/sha/asm/sha256-586.pl
lib/libcrypto/sha/asm/sha512-586.pl
lib/libcrypto/whrlpool/asm/wp-mmx.pl

index c5ae3f6..4e0f34c 100644 (file)
@@ -950,8 +950,10 @@ sub enclast()
        &xor    ($s3,&DWP(12,$key));
 
        &ret    ();
+&function_end_B("_x86_AES_encrypt");
 
-&set_label("AES_Te",64);       # Yes! I keep it in the code segment!
+       &rodataseg();
+&set_label("AES_Te",64);
        &_data_word(0xa56363c6, 0x847c7cf8, 0x997777ee, 0x8d7b7bf6);
        &_data_word(0x0df2f2ff, 0xbd6b6bd6, 0xb16f6fde, 0x54c5c591);
        &_data_word(0x50303060, 0x03010102, 0xa96767ce, 0x7d2b2b56);
@@ -1154,7 +1156,7 @@ sub enclast()
        &data_word(0x00000010, 0x00000020, 0x00000040, 0x00000080);
        &data_word(0x0000001b, 0x00000036, 0x00000000, 0x00000000);
        &data_word(0x00000000, 0x00000000, 0x00000000, 0x00000000);
-&function_end_B("_x86_AES_encrypt");
+       &previous();
 
 # void AES_encrypt (const void *inp,void *out,const AES_KEY *key);
 &function_begin("AES_encrypt");
@@ -1174,11 +1176,9 @@ sub enclast()
        &add    ("esp",4);      # 4 is reserved for caller's return address
        &mov    ($_esp,$s0);                    # save stack pointer
 
-       &call   (&label("pic_point"));          # make it PIC!
-       &set_label("pic_point");
-       &blindpop($tbl);
-       &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if (!$x86only);
-       &lea    ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
+       &picsetup($tbl);
+       &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+       &picsymbol($tbl, &label("AES_Te"), $tbl);
 
        # pick Te4 copy which can't "overlap" with stack frame or key schedule
        &lea    ($s1,&DWP(768-4,"esp"));
@@ -1744,8 +1744,10 @@ sub declast()
        &xor    ($s3,&DWP(12,$key));
 
        &ret    ();
+&function_end_B("_x86_AES_decrypt");
 
-&set_label("AES_Td",64);       # Yes! I keep it in the code segment!
+       &rodataseg();
+&set_label("AES_Td",64);
        &_data_word(0x50a7f451, 0x5365417e, 0xc3a4171a, 0x965e273a);
        &_data_word(0xcb6bab3b, 0xf1459d1f, 0xab58faac, 0x9303e34b);
        &_data_word(0x55fa3020, 0xf66d76ad, 0x9176cc88, 0x254c02f5);
@@ -1943,7 +1945,7 @@ sub declast()
        &data_byte(0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61);
        &data_byte(0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26);
        &data_byte(0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d);
-&function_end_B("_x86_AES_decrypt");
+       &previous();
 
 # void AES_decrypt (const void *inp,void *out,const AES_KEY *key);
 &function_begin("AES_decrypt");
@@ -1963,11 +1965,9 @@ sub declast()
        &add    ("esp",4);      # 4 is reserved for caller's return address
        &mov    ($_esp,$s0);    # save stack pointer
 
-       &call   (&label("pic_point"));          # make it PIC!
-       &set_label("pic_point");
-       &blindpop($tbl);
-       &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
-       &lea    ($tbl,&DWP(&label("AES_Td")."-".&label("pic_point"),$tbl));
+       &picsetup($tbl);
+       &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+       &picsymbol($tbl, &label("AES_Td"), $tbl);
 
        # pick Td4 copy which can't "overlap" with stack frame or key schedule
        &lea    ($s1,&DWP(768-4,"esp"));
@@ -2034,13 +2034,10 @@ my $mark=&DWP(76+240,"esp");    # copy of aes_key->rounds
        &cmp    ($s2,0);
        &je     (&label("drop_out"));
 
-       &call   (&label("pic_point"));          # make it PIC!
-       &set_label("pic_point");
-       &blindpop($tbl);
-       &picmeup($s0,"OPENSSL_ia32cap_P",$tbl,&label("pic_point")) if(!$x86only);
-
+       &picsetup($tbl);
+       &picsymbol($s0, "OPENSSL_ia32cap_P", $tbl);
+       &picsymbol($tbl, &label("AES_Te"), $tbl);
        &cmp    (&wparam(5),0);
-       &lea    ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
        &jne    (&label("picked_te"));
        &lea    ($tbl,&DWP(&label("AES_Td")."-".&label("AES_Te"),$tbl));
        &set_label("picked_te");
@@ -2659,10 +2656,9 @@ sub enckey()
        &test   ("edi",-1);
        &jz     (&label("badpointer"));
 
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop($tbl);
-       &lea    ($tbl,&DWP(&label("AES_Te")."-".&label("pic_point"),$tbl));
+       &picsetup($tbl);
+       &picsymbol($tbl, &label("AES_Te"), $tbl);
+
        &lea    ($tbl,&DWP(2048+128,$tbl));
 
        # prefetch Te4
@@ -2975,6 +2971,5 @@ sub deckey()
 
        &xor    ("eax","eax");                  # return success
 &function_end("AES_set_decrypt_key");
-&asciz("AES for x86, CRYPTOGAMS by <appro\@openssl.org>");
 
 &asm_finish();
index 8c1d0b5..ff44415 100644 (file)
@@ -2184,6 +2184,5 @@ if ($PREFIX eq "aesni") {
 &set_label("dec_key_ret");
        &ret    ();
 &function_end_B("${PREFIX}_set_decrypt_key");
-&asciz("AES for Intel AES-NI, CRYPTOGAMS by <appro\@openssl.org>");
 
 &asm_finish();
index 1533e2c..38cef61 100644 (file)
@@ -57,6 +57,7 @@ $PREFIX="vpaes";
 my  ($round, $base, $magic, $key, $const, $inp, $out)=
     ("eax",  "ebx", "ecx",  "edx","ebp",  "esi","edi");
 
+       &rodataseg();
 &static_label("_vpaes_consts");
 &static_label("_vpaes_schedule_low_round");
 
@@ -153,8 +154,7 @@ $k_dsbe=0x2a0;              # decryption sbox output *E*u, *E*t
 $k_dsbo=0x2c0;         # decryption sbox final output
        &data_word(0x7EF94000,0x1387EA53,0xD4943E2D,0xC7AA6DB9);
        &data_word(0x93441D00,0x12D7560F,0xD8C58E9C,0xCA4B8159);
-&asciz ("Vector Permutation AES for x86/SSSE3, Mike Hamburg (Stanford University)");
-&align (64);
+       &previous();
 
 &function_begin_B("_vpaes_preheat");
        &add    ($const,&DWP(0,"esp"));
@@ -762,9 +762,11 @@ $k_dsbo=0x2c0;             # decryption sbox final output
        &mov    ($magic,0x30);
        &mov    ($out,0);
 
-       &lea    ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+       &picsetup($const);
+       &picsymbol($const, &label("_vpaes_consts"), $const);
+       &lea    ($const,&DWP(0x30,$const))
+
        &call   ("_vpaes_schedule_core");
-&set_label("pic_point");
 
        &mov    ("esp",&DWP(48,"esp"));
        &xor    ("eax","eax");
@@ -792,18 +794,22 @@ $k_dsbo=0x2c0;            # decryption sbox final output
        &and    ($magic,32);
        &xor    ($magic,32);                    # nbist==192?0:32;
 
-       &lea    ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+       &picsetup($const);
+       &picsymbol($const, &label("_vpaes_consts"), $const);
+       &lea    ($const,&DWP(0x30,$const))
+
        &call   ("_vpaes_schedule_core");
-&set_label("pic_point");
 
        &mov    ("esp",&DWP(48,"esp"));
        &xor    ("eax","eax");
 &function_end("${PREFIX}_set_decrypt_key");
 
 &function_begin("${PREFIX}_encrypt");
-       &lea    ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+       &picsetup($const);
+       &picsymbol($const, &label("_vpaes_consts"), $const);
+       &lea    ($const,&DWP(0x30,$const))
+
        &call   ("_vpaes_preheat");
-&set_label("pic_point");
        &mov    ($inp,&wparam(0));              # inp
        &lea    ($base,&DWP(-56,"esp"));
        &mov    ($out,&wparam(1));              # out
@@ -820,9 +826,11 @@ $k_dsbo=0x2c0;             # decryption sbox final output
 &function_end("${PREFIX}_encrypt");
 
 &function_begin("${PREFIX}_decrypt");
-       &lea    ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+       &picsetup($const);
+       &picsymbol($const, &label("_vpaes_consts"), $const);
+       &lea    ($const,&DWP(0x30,$const))
+
        &call   ("_vpaes_preheat");
-&set_label("pic_point");
        &mov    ($inp,&wparam(0));              # inp
        &lea    ($base,&DWP(-56,"esp"));
        &mov    ($out,&wparam(1));              # out
@@ -859,9 +867,11 @@ $k_dsbo=0x2c0;             # decryption sbox final output
        &mov    (&DWP(8,"esp"),$const);         # save ivp
        &mov    ($out,$round);                  # $out works as $len
 
-       &lea    ($const,&DWP(&label("_vpaes_consts")."+0x30-".&label("pic_point")));
+       &picsetup($const);
+       &picsymbol($const, &label("_vpaes_consts"), $const);
+       &lea    ($const,&DWP(0x30,$const))
+
        &call   ("_vpaes_preheat");
-&set_label("pic_point");
        &cmp    ($magic,0);
        &je     (&label("cbc_dec_loop"));
        &jmp    (&label("cbc_enc_loop"));
index c4e2baa..b502fe6 100644 (file)
@@ -32,7 +32,8 @@ sub bn_mul_add_words
        $c="ecx";
 
        if ($sse2) {
-               &picmeup("eax","OPENSSL_ia32cap_P");
+               &picsetup("eax");
+               &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
                &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
                &jnc(&label("maw_non_sse2"));
 
@@ -218,7 +219,8 @@ sub bn_mul_words
        $c="ecx";
 
        if ($sse2) {
-               &picmeup("eax","OPENSSL_ia32cap_P");
+               &picsetup("eax");
+               &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
                &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
                &jnc(&label("mw_non_sse2"));
 
@@ -329,7 +331,8 @@ sub bn_sqr_words
        $c="ecx";
 
        if ($sse2) {
-               &picmeup("eax","OPENSSL_ia32cap_P");
+               &picsetup("eax");
+               &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
                &bt(&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
                &jnc(&label("sqr_non_sse2"));
 
index 9715b21..cb2f2a5 100644 (file)
@@ -200,7 +200,8 @@ $R="mm0";
 # void bn_GF2m_mul_2x2(BN_ULONG *r, BN_ULONG a1, BN_ULONG a0, BN_ULONG b1, BN_ULONG b0);
 &function_begin_B("bn_GF2m_mul_2x2");
 if (!$x86only) {
-       &picmeup("edx","OPENSSL_ia32cap_P");
+       &picsetup("edx");
+       &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");
        &mov    ("eax",&DWP(0,"edx"));
        &mov    ("edx",&DWP(4,"edx"));
        &test   ("eax","\$IA32CAP_MASK0_MMX");  # check MMX bit
@@ -308,6 +309,4 @@ if ($sse2) {
        &ret    ();
 &function_end_B("bn_GF2m_mul_2x2");
 
-&asciz ("GF(2^m) Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
 &asm_finish();
index e6c0473..6524651 100755 (executable)
@@ -113,7 +113,8 @@ $mul1="mm5";
 $temp="mm6";
 $mask="mm7";
 
-       &picmeup("eax","OPENSSL_ia32cap_P");
+       &picsetup("eax");
+       &picsymbol("eax", "OPENSSL_ia32cap_P", "eax");
        &bt     (&DWP(0,"eax"),"\$IA32CAP_BIT0_SSE2");
        &jnc    (&label("non_sse2"));
 
@@ -588,6 +589,4 @@ $sbit=$num;
 &set_label("just_leave");
 &function_end("bn_mul_mont");
 
-&asciz("Montgomery Multiplication for x86, CRYPTOGAMS by <appro\@openssl.org>");
-
 &asm_finish();
index 027302a..a4ab11e 100644 (file)
@@ -141,10 +141,8 @@ my $t0=@T[($j)%4],$t1=@T[($j+1)%4],$t2=@T[($j+2)%4],$t3=@T[($j+3)%4];
        &mov    ($_esp,"ebx");  # save %esp
        &mov    ($_end,"eax");  # save keyEnd
 
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop($Tbl);
-       &lea    ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+       &picsetup($Tbl);
+       &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
 
        &mov    (@T[0],&DWP(0,$idx));   # load plaintext
        &mov    (@T[1],&DWP(4,$idx));
@@ -206,10 +204,8 @@ if ($OPENSSL) {
        &mov    ($_esp,"ebx");  # save %esp
        &mov    ($_end,"eax");  # save keyEnd
 
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop($Tbl);
-       &lea    ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+       &picsetup($Tbl);
+       &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
 
        &mov    (@T[0],&DWP(0,$idx));   # load plaintext
        &mov    (@T[1],&DWP(4,$idx));
@@ -316,10 +312,8 @@ if ($OPENSSL) {
        &lea    ($key,&DWP(0,$key,"eax"));
        &mov    (&DWP(5*4,"esp"),"ebx");# save %esp
 
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop($Tbl);
-       &lea    ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+       &picsetup($Tbl);
+       &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
 
        &mov    (@T[0],&DWP(0,$idx));   # load ciphertext
        &mov    (@T[1],&DWP(4,$idx));
@@ -381,10 +375,8 @@ if ($OPENSSL) {
        &lea    ($key,&DWP(0,$key,"eax"));
        &mov    (&DWP(5*4,"esp"),"ebx");# save %esp
 
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop($Tbl);
-       &lea    ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+       &picsetup($Tbl);
+       &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
 
        &mov    (@T[0],&DWP(0,$idx));   # load ciphertext
        &mov    (@T[1],&DWP(4,$idx));
@@ -594,10 +586,8 @@ my $bias=int(@T[0])?shift(@T):0;
        &xor    (@T[3],&DWP(1*8+4,$key));
 
 &set_label("1st128",4);
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop($Tbl);
-       &lea    ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+       &picsetup($Tbl);
+       &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
        &lea    ($key,&DWP(&label("Camellia_SIGMA")."-".&label("Camellia_SBOX"),$Tbl));
 
        &mov    ($idx,&DWP($step*8,$key));      # prefetch SIGMA[0]
@@ -786,6 +776,7 @@ sub S4404 { my $i=shift; $i=($i<<1|$i>>7)&0xff; $i=@SBOX[$i]; return $i<<24|$i<<
 sub S0222 { my $i=shift; $i=@SBOX[$i]; $i=($i<<1|$i>>7)&0xff; return $i<<16|$i<<8|$i; }        
 sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<8|$i; }        
 
+       &rodataseg();
 &set_label("Camellia_SIGMA",64);
 &data_word(
     0xa09e667f, 0x3bcc908b, 0xb67ae858, 0x4caa73b2,
@@ -796,6 +787,7 @@ sub S3033 { my $i=shift; $i=@SBOX[$i]; $i=($i>>1|$i<<7)&0xff; return $i<<24|$i<<
 # tables are interleaved, remember?
 for ($i=0;$i<256;$i++) { &data_word(&S1110($i),&S4404($i)); }
 for ($i=0;$i<256;$i++) { &data_word(&S0222($i),&S3033($i)); }
+       &previous();
 
 # void Camellia_cbc_encrypt (const void char *inp, unsigned char *out,
 #                      size_t length, const CAMELLIA_KEY *key,
@@ -856,10 +848,8 @@ my ($s0,$s1,$s2,$s3) = @T;
        &mov    ($_key,$s3);            # save copy of key
        &mov    ($_ivp,$Tbl);           # save copy of ivp
 
-       &call   (&label("pic_point"));  # make it PIC!
-       &set_label("pic_point");
-       &blindpop($Tbl);
-       &lea    ($Tbl,&DWP(&label("Camellia_SBOX")."-".&label("pic_point"),$Tbl));
+       &picsetup($Tbl);
+       &picsymbol($Tbl, &label("Camellia_SBOX"), $Tbl);
 
        &mov    ($idx,32);
        &set_label("prefetch_sbox",4);
@@ -1133,6 +1123,4 @@ my ($s0,$s1,$s2,$s3) = @T;
 &function_end("Camellia_cbc_encrypt");
 }
 
-&asciz("Camellia for x86 by <appro\@openssl.org>");
-
 &asm_finish();
index 5b5f39c..e11b2ef 100644 (file)
@@ -154,11 +154,8 @@ sub DES_encrypt
                &rotl($L,3);
                }
 
-       # PIC-ification:-)
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop($trans);
-       &lea    ($trans,&DWP(&label("DES_SPtrans")."-".&label("pic_point"),$trans));
+       &picsetup($trans);
+       &picsymbol($trans, &label("DES_SPtrans"), $trans);
 
        &mov(   "ecx",  &wparam(1)      );
 
@@ -314,6 +311,7 @@ sub FP_new
 
 sub DES_SPtrans
        {
+       &rodataseg();
        &set_label("DES_SPtrans",64);
        &data_word(0x02080800, 0x00080000, 0x02000002, 0x02080802);
        &data_word(0x02000000, 0x00080802, 0x00080002, 0x02000002);
@@ -450,4 +448,5 @@ sub DES_SPtrans
        &data_word(0x00820000, 0x00020080, 0x20020080, 0x20800000);
        &data_word(0x00000080, 0x20820000, 0x00820080, 0x00000000);
        &data_word(0x20000000, 0x20800080, 0x00020000, 0x00820080);
+       &previous();
        }
index 2749259..5e868a4 100644 (file)
@@ -411,10 +411,8 @@ $S=12;             # shift factor for rem_4bit
        &mov    ($inp,&wparam(0));      # load Xi
        &mov    ($Htbl,&wparam(1));     # load Htable
 
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop("eax");
-       &lea    ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+       &picsetup("eax");
+       &picsymbol("eax", &label("rem_4bit"), "eax");
 
        &movz   ($Zll,&BP(15,$inp));
 
@@ -436,10 +434,8 @@ $S=12;             # shift factor for rem_4bit
        &mov    ($inp,&wparam(2));      # load in
        &mov    ($Zlh,&wparam(3));      # load len
 
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop("eax");
-       &lea    ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+       &picsetup("eax");
+       &picsymbol("eax", &label("rem_4bit"), "eax");
 
        &add    ($Zlh,$inp);
        &mov    (&wparam(3),$Zlh);      # len to point at the end of input
@@ -584,10 +580,8 @@ sub mmx_loop() {
        &mov    ($inp,&wparam(0));      # load Xi
        &mov    ($Htbl,&wparam(1));     # load Htable
 
-       &call   (&label("pic_point"));
-       &set_label("pic_point");
-       &blindpop("eax");
-       &lea    ("eax",&DWP(&label("rem_4bit")."-".&label("pic_point"),"eax"));
+       &picsetup("eax");
+       &picsymbol("eax", &label("rem_4bit"), "eax");
 
        &movz   ($Zll,&BP(15,$inp));
 
@@ -618,10 +612,9 @@ sub mmx_loop() {
     &mov       ("ecx",&wparam(2));             # inp
     &mov       ("edx",&wparam(3));             # len
     &mov       ("ebp","esp");                  # original %esp
-    &call      (&label("pic_point"));
-    &set_label ("pic_point");
-    &blindpop  ($rem_8bit);
-    &lea       ($rem_8bit,&DWP(&label("rem_8bit")."-".&label("pic_point"),$rem_8bit));
+
+    &picsetup($rem_8bit);
+    &picsymbol($rem_8bit, &label("rem_8bit"), $rem_8bit);
 
     &sub       ("esp",512+16+16);              # allocate stack frame...
     &and       ("esp",-64);                    # ...and align it
@@ -910,10 +903,8 @@ my ($Xhi,$Xi) = @_;
        &mov            ($Htbl,&wparam(0));
        &mov            ($Xip,&wparam(1));
 
-       &call           (&label("pic"));
-&set_label("pic");
-       &blindpop       ($const);
-       &lea            ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+       &picsetup($const);
+       &picsymbol($const, &label("bswap"), $const);
 
        &movdqu         ($Hkey,&QWP(0,$Xip));
        &pshufd         ($Hkey,$Hkey,0b01001110);# dword swap
@@ -947,10 +938,8 @@ my ($Xhi,$Xi) = @_;
        &mov            ($Xip,&wparam(0));
        &mov            ($Htbl,&wparam(1));
 
-       &call           (&label("pic"));
-&set_label("pic");
-       &blindpop       ($const);
-       &lea            ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+       &picsetup($const);
+       &picsymbol($const, &label("bswap"), $const);
 
        &movdqu         ($Xi,&QWP(0,$Xip));
        &movdqa         ($T3,&QWP(0,$const));
@@ -972,10 +961,8 @@ my ($Xhi,$Xi) = @_;
        &mov            ($inp,&wparam(2));
        &mov            ($len,&wparam(3));
 
-       &call           (&label("pic"));
-&set_label("pic");
-       &blindpop       ($const);
-       &lea            ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+       &picsetup($const);
+       &picsymbol($const, &label("bswap"), $const);
 
        &movdqu         ($Xi,&QWP(0,$Xip));
        &movdqa         ($T3,&QWP(0,$const));
@@ -1138,10 +1125,8 @@ my ($Xhi,$Xi)=@_;
        &mov            ($Htbl,&wparam(0));
        &mov            ($Xip,&wparam(1));
 
-       &call           (&label("pic"));
-&set_label("pic");
-       &blindpop       ($const);
-       &lea            ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+       &picsetup($const);
+       &picsymbol($const, &label("bswap"), $const);
 
        &movdqu         ($Hkey,&QWP(0,$Xip));
        &pshufd         ($Hkey,$Hkey,0b01001110);# dword swap
@@ -1161,10 +1146,8 @@ my ($Xhi,$Xi)=@_;
        &mov            ($Xip,&wparam(0));
        &mov            ($Htbl,&wparam(1));
 
-       &call           (&label("pic"));
-&set_label("pic");
-       &blindpop       ($const);
-       &lea            ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+       &picsetup($const);
+       &picsymbol($const, &label("bswap"), $const);
 
        &movdqu         ($Xi,&QWP(0,$Xip));
        &movdqa         ($Xn,&QWP(0,$const));
@@ -1186,10 +1169,8 @@ my ($Xhi,$Xi)=@_;
        &mov            ($inp,&wparam(2));
        &mov            ($len,&wparam(3));
 
-       &call           (&label("pic"));
-&set_label("pic");
-       &blindpop       ($const);
-       &lea            ($const,&DWP(&label("bswap")."-".&label("pic"),$const));
+       &picsetup($const);
+       &picsymbol($const, &label("bswap"), $const);
 
        &movdqu         ($Xi,&QWP(0,$Xip));
        &movdqa         ($T3,&QWP(0,$const));
@@ -1270,11 +1251,14 @@ my ($Xhi,$Xi)=@_;
 
 }
 \f
+       &rodataseg();
 &set_label("bswap",64);
        &data_byte(15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0);
        &data_byte(1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0xc2); # 0x1c2_polynomial
+       &previous();
 }}     # $sse2
 
+       &rodataseg();
 &set_label("rem_4bit",64);
        &data_word(0,0x0000<<$S,0,0x1C20<<$S,0,0x3840<<$S,0,0x2460<<$S);
        &data_word(0,0x7080<<$S,0,0x6CA0<<$S,0,0x48C0<<$S,0,0x54E0<<$S);
@@ -1313,9 +1297,9 @@ my ($Xhi,$Xi)=@_;
        &data_short(0xA7D0,0xA612,0xA454,0xA596,0xA0D8,0xA11A,0xA35C,0xA29E);
        &data_short(0xB5E0,0xB422,0xB664,0xB7A6,0xB2E8,0xB32A,0xB16C,0xB0AE);
        &data_short(0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE);
+       &previous();
 }}}    # !$x86only
 
-&asciz("GHASH for x86, CRYPTOGAMS by <appro\@openssl.org>");
 &asm_finish();
 
 # A question was risen about choice of vanilla MMX. Or rather why wasn't
index 24561e7..392f23e 100644 (file)
@@ -34,6 +34,15 @@ sub cbc
        # p1,p2,p3 are the offsets for parameters to be passed to the
        # underlying calls.
 
+&static_label("cbc_enc_jmp_table_".$name);
+&static_label("ej1_".$name);
+&static_label("ej2_".$name);
+&static_label("ej3_".$name);
+&static_label("ej4_".$name);
+&static_label("ej5_".$name);
+&static_label("ej6_".$name);
+&static_label("ej7_".$name);
+
        &function_begin_B($name,"");
        &comment("");
 
@@ -146,33 +155,32 @@ sub cbc
        &mov($count,    &wparam(2));    # length
        &and($count,    7);
        &jz(&label("finish"));
-       &call(&label("PIC_point"));
-&set_label("PIC_point");
-       &blindpop("edx");
-       &lea("ecx",&DWP(&label("cbc_enc_jmp_table")."-".&label("PIC_point"),"edx"));
+
+       &picsetup("edx");
+       &picsymbol("ecx", &label("cbc_enc_jmp_table_".$name), "edx")
        &mov($count,&DWP(0,"ecx",$count,4));
-       &add($count,"edx");
+       &picadjust($count, "edx");
+
        &xor("ecx","ecx");
        &xor("edx","edx");
-       #&mov($count,&DWP(&label("cbc_enc_jmp_table"),"",$count,4));
        &jmp_ptr($count);
 
-&set_label("ej7");
+&set_label("ej7_".$name);
        &movb(&HB("edx"),       &BP(6,$in,"",0));
        &shl("edx",8);
-&set_label("ej6");
+&set_label("ej6_".$name);
        &movb(&HB("edx"),       &BP(5,$in,"",0));
-&set_label("ej5");
+&set_label("ej5_".$name);
        &movb(&LB("edx"),       &BP(4,$in,"",0));
-&set_label("ej4");
+&set_label("ej4_".$name);
        &mov("ecx",             &DWP(0,$in,"",0));
        &jmp(&label("ejend"));
-&set_label("ej3");
+&set_label("ej3_".$name);
        &movb(&HB("ecx"),       &BP(2,$in,"",0));
        &shl("ecx",8);
-&set_label("ej2");
+&set_label("ej2_".$name);
        &movb(&HB("ecx"),       &BP(1,$in,"",0));
-&set_label("ej1");
+&set_label("ej1_".$name);
        &movb(&LB("ecx"),       &BP(0,$in,"",0));
 &set_label("ejend");
 
@@ -279,30 +287,14 @@ sub cbc
        &mov("eax",     &DWP(0,$in,"",0));      # get old cipher text,
        &mov("ebx",     &DWP(4,$in,"",0));      # next iv actually
 
-&set_label("dj7");
        &rotr("edx",    16);
        &movb(&BP(6,$out,"",0), &LB("edx"));
        &shr("edx",16);
-&set_label("dj6");
        &movb(&BP(5,$out,"",0), &HB("edx"));
-&set_label("dj5");
        &movb(&BP(4,$out,"",0), &LB("edx"));
-&set_label("dj4");
        &mov(&DWP(0,$out,"",0), "ecx");
-       &jmp(&label("djend"));
-&set_label("dj3");
-       &rotr("ecx",    16);
-       &movb(&BP(2,$out,"",0), &LB("ecx"));
-       &shl("ecx",16);
-&set_label("dj2");
-       &movb(&BP(1,$in,"",0),  &HB("ecx"));
-&set_label("dj1");
-       &movb(&BP(0,$in,"",0),  &LB("ecx"));
-&set_label("djend");
 
        # final iv is still in eax:ebx
-       &jmp(&label("finish"));
-
 
 ############################ FINISH #######################3
        &set_label("finish",1);
@@ -319,31 +311,21 @@ sub cbc
        &mov(&DWP(4,"ecx","",0),        "ebx"); # save iv
 
        &function_end_A($name);
+       &function_end_B($name);
 
+       &rodataseg();
        &align(64);
-       &set_label("cbc_enc_jmp_table");
+       &set_label("cbc_enc_jmp_table_".$name);
        &data_word("0");
-       &data_word(&label("ej1")."-".&label("PIC_point"));
-       &data_word(&label("ej2")."-".&label("PIC_point"));
-       &data_word(&label("ej3")."-".&label("PIC_point"));
-       &data_word(&label("ej4")."-".&label("PIC_point"));
-       &data_word(&label("ej5")."-".&label("PIC_point"));
-       &data_word(&label("ej6")."-".&label("PIC_point"));
-       &data_word(&label("ej7")."-".&label("PIC_point"));
-       # not used
-       #&set_label("cbc_dec_jmp_table",1);
-       #&data_word("0");
-       #&data_word(&label("dj1")."-".&label("PIC_point"));
-       #&data_word(&label("dj2")."-".&label("PIC_point"));
-       #&data_word(&label("dj3")."-".&label("PIC_point"));
-       #&data_word(&label("dj4")."-".&label("PIC_point"));
-       #&data_word(&label("dj5")."-".&label("PIC_point"));
-       #&data_word(&label("dj6")."-".&label("PIC_point"));
-       #&data_word(&label("dj7")."-".&label("PIC_point"));
-       &align(64);
+       &data_word(&code_sym(&label("ej1_".$name)));
+       &data_word(&code_sym(&label("ej2_".$name)));
+       &data_word(&code_sym(&label("ej3_".$name)));
+       &data_word(&code_sym(&label("ej4_".$name)));
+       &data_word(&code_sym(&label("ej5_".$name)));
+       &data_word(&code_sym(&label("ej6_".$name)));
+       &data_word(&code_sym(&label("ej7_".$name)));
+       &previous();
 
-       &function_end_B($name);
-       
        }
 
 1;
index ca644ba..f28a590 100644 (file)
@@ -177,34 +177,52 @@ sub ::align
     push(@out,".align\t$val\n");
 }
 
-sub ::picmeup
-{ my($dst,$sym,$base,$reflabel)=@_;
-
-    if ($::openbsd)
-    {  &::emitraw("#if defined(PIC) || defined(__PIC__)");
-       &::emitraw("PIC_PROLOGUE");
-       &::mov($dst, &::DWP("PIC_GOT($sym)"));
-       &::emitraw("PIC_EPILOGUE");
-       &::emitraw("#else /* PIC */");
-       &::lea($dst,&::DWP($sym));
-       &::emitraw("#endif /* PIC */");
-    }
-    elsif (($::pic && ($::elf || $::aout)) || $::macosx)
-    {  if (!defined($base))
-       {   &::call(&::label("PIC_me_up"));
-           &::set_label("PIC_me_up");
-           &::blindpop($dst);
-           $base=$dst;
-           $reflabel=&::label("PIC_me_up");
-       }
+#
+# PIC data access wrappers
+#
+# Usage:
+#   picsetup($base)
+#      - only allowed once per function (because of hardcoded label name),
+#        sets up pic access, uses $base register as temporary
+#   picsymbol($dst, $sym, $base)
+#      - loads the address of symbol $sym into $dst with the help of $base
+#        initialized by picsetup
+#   picadjust($sym, $base)
+#      - adjusts a code pointer read from a code_sym table with the help of
+#        $base initialized by picsetup
+#   code_sym($sym)
+#      - emits a pointer to the given code symbol, relative to the GOT if
+#        PIC. This pointer will need to be adjusted with picadjust above
+#        before use.
+
+sub ::picsetup
+{ my($base)=@_;
+
+    if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+    {
+       &::call(&::label("PIC_setup"));
+       &::set_label("PIC_setup");
+       &::blindpop($base);
        if ($::macosx)
        {   my $indirect=&::static_label("$nmdecor$sym\$non_lazy_ptr");
-           &::mov($dst,&::DWP("$indirect-$reflabel",$base));
            $non_lazy_ptr{"$nmdecor$sym"}=$indirect;
        }
+    }
+}
+
+sub ::picsymbol
+{ my($dst,$sym,$base)=@_;
+
+    if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+    {
+       my $reflabel=&::label("PIC_setup");
+       if ($::macosx)
+       {   my $indirect=$non_lazy_ptr{"$nmdecor$sym"};
+           &::mov($dst,&::DWP("$indirect-$reflabel",$base));
+       }
        else
        {   &::lea($dst,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
-                           $base));
+               $base));
            &::mov($dst,&::DWP("$sym\@GOT",$dst));
        }
     }
@@ -212,6 +230,30 @@ sub ::picmeup
     {  &::lea($dst,&::DWP($sym));      }
 }
 
+sub ::picadjust
+{ my($sym,$base)=@_;
+
+    if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+    {
+       my $reflabel=&::label("PIC_setup");
+       &::lea($sym,&::DWP("_GLOBAL_OFFSET_TABLE_+[.-$reflabel]",
+               $base,$sym));
+    }
+}
+
+sub ::code_sym
+{ my($sym)=@_;
+
+    if (($::pic && ($::openbsd || $::elf || $::aout)) || $::macosx)
+    {
+       $sym."\@GOTOFF";
+    }
+    else
+    {
+       $sym;
+    }
+}
+
 sub ::initseg
 { my $f=$nmdecor.shift;
 
@@ -264,4 +306,10 @@ ___
 sub ::dataseg
 {   push(@out,".data\n");   }
 
+sub ::rodataseg
+{   push(@out,".rodata\n");   }
+
+sub ::previous
+{   push(@out,".previous\n");   }
+
 1;
index f3c3e11..4991c37 100644 (file)
@@ -188,7 +188,8 @@ if ($alt=0) {
        &mov    (&wparam(3),$out);      # $out as accumulator in these loops
        &jz     (&label("go4loop4"));
 
-       &picmeup($out,"OPENSSL_ia32cap_P");
+       &picsetup($out);
+       &picsymbol($out, "OPENSSL_ia32cap_P", $out);
        # check SSE2 bit [could have been MMX]
        &bt     (&DWP(0,$out),"\$IA32CAP_BIT0_SSE2");
        &jnc    (&label("go4loop4"));
@@ -305,7 +306,9 @@ $idx="edx";
        &mov    ($out,&wparam(0));              # load key
        &mov    ($idi,&wparam(1));              # load len
        &mov    ($inp,&wparam(2));              # load data
-       &picmeup($idx,"OPENSSL_ia32cap_P");
+
+       &picsetup($idx);
+       &picsymbol($idx, "OPENSSL_ia32cap_P", $idx);
 
        &lea    ($out,&DWP(2*4,$out));          # &key->data
        &lea    ($inp,&DWP(0,$inp,$idi));       # $inp to point at the end
@@ -382,12 +385,12 @@ $idx="edx";
 &function_end("RC4_set_key");
 
 # const char *RC4_options(void);
+&static_label("opts");
 &function_begin_B("RC4_options");
-       &call   (&label("pic_point"));
-&set_label("pic_point");
-       &blindpop("eax");
-       &lea    ("eax",&DWP(&label("opts")."-".&label("pic_point"),"eax"));
-       &picmeup("edx","OPENSSL_ia32cap_P");
+       &picsetup("edx");
+       &picsymbol("eax", &label("opts"), "edx");
+       &picsymbol("edx", "OPENSSL_ia32cap_P", "edx");;
+
        &mov    ("edx",&DWP(0,"edx"));
        &bt     ("edx","\$IA32CAP_BIT0_INTELP4");
        &jc     (&label("1xchar"));
@@ -399,13 +402,14 @@ $idx="edx";
        &add    ("eax",12);
 &set_label("ret");
        &ret    ();
-&set_label("opts",64);
+&function_end_B("RC4_options");
+
+       &rodataseg();
+&set_label("opts");
 &asciz ("rc4(4x,int)");
 &asciz ("rc4(1x,char)");
 &asciz ("rc4(8x,mmx)");
-&asciz ("RC4 for x86, CRYPTOGAMS by <appro\@openssl.org>");
-&align (64);
-&function_end_B("RC4_options");
+       &previous();
 
 &asm_finish();
 
index 1de5e26..5928e08 100644 (file)
@@ -295,11 +295,9 @@ if ($xmm) {
   &static_label("avx_shortcut")                if ($ymm);
   &static_label("K_XX_XX");
 
-       &call   (&label("pic_point"));  # make it PIC!
-  &set_label("pic_point");
-       &blindpop($tmp1);
-       &picmeup($T,"OPENSSL_ia32cap_P",$tmp1,&label("pic_point"));
-       &lea    ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+       &picsetup($tmp1);
+       &picsymbol($T, "OPENSSL_ia32cap_P", $tmp1);
+       &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
 
        &mov    ($A,&DWP(0,$T));
        &mov    ($D,&DWP(4,$T));
@@ -419,10 +417,9 @@ my $_rol=sub { &rol(@_) };
 my $_ror=sub { &ror(@_) };
 
 &function_begin("_sha1_block_data_order_ssse3");
-       &call   (&label("pic_point"));  # make it PIC!
-       &set_label("pic_point");
-       &blindpop($tmp1);
-       &lea    ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+       &picsetup($tmp1);
+       &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
+
 &set_label("ssse3_shortcut");
 
        &movdqa (@X[3],&QWP(0,$tmp1));          # K_00_19
@@ -861,10 +858,9 @@ my $_rol=sub { &shld(@_[0],@_) };
 my $_ror=sub { &shrd(@_[0],@_) };
 
 &function_begin("_sha1_block_data_order_avx");
-       &call   (&label("pic_point"));  # make it PIC!
-       &set_label("pic_point");
-       &blindpop($tmp1);
-       &lea    ($tmp1,&DWP(&label("K_XX_XX")."-".&label("pic_point"),$tmp1));
+       &picsetup($tmp1);
+       &picsymbol($tmp1, &label("K_XX_XX"), $tmp1);
+
 &set_label("avx_shortcut");
        &vzeroall();
 
@@ -1213,13 +1209,15 @@ sub Xtail_avx()
        &mov    (&DWP(16,@T[1]),$E);
 &function_end("_sha1_block_data_order_avx");
 }
+
+       &rodataseg();
 &set_label("K_XX_XX",64);
 &data_word(0x5a827999,0x5a827999,0x5a827999,0x5a827999);       # K_00_19
 &data_word(0x6ed9eba1,0x6ed9eba1,0x6ed9eba1,0x6ed9eba1);       # K_20_39
 &data_word(0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc,0x8f1bbcdc);       # K_40_59
 &data_word(0xca62c1d6,0xca62c1d6,0xca62c1d6,0xca62c1d6);       # K_60_79
 &data_word(0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f);       # pbswap mask
+       &previous();
 }
-&asciz("SHA1 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
 
 &asm_finish();
index 928ec53..2b05c96 100644 (file)
@@ -96,16 +96,15 @@ sub BODY_00_15() {
        &add    ($A,"esi");     # h += K256[i]
 }
 
+&static_label("K256");
 &function_begin("sha256_block_data_order");
        &mov    ("esi",wparam(0));      # ctx
        &mov    ("edi",wparam(1));      # inp
        &mov    ("eax",wparam(2));      # num
        &mov    ("ebx","esp");          # saved sp
 
-       &call   (&label("pic_point"));  # make it PIC!
-&set_label("pic_point");
-       &blindpop($K256);
-       &lea    ($K256,&DWP(&label("K256")."-".&label("pic_point"),$K256));
+       &picsetup($K256);
+       &picsymbol($K256, &label("K256"), $K256);
 
        &sub    ("esp",16);
        &and    ("esp",-64);
@@ -225,8 +224,10 @@ sub BODY_00_15() {
 
        &mov    ("esp",&DWP(12,"esp"));         # restore sp
 &function_end_A();
+&function_end_B("sha256_block_data_order");
 
-&set_label("K256",64); # Yes! I keep it in the code segment!
+       &rodataseg();
+&set_label("K256",64);
        &data_word(0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5);
        &data_word(0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5);
        &data_word(0xd807aa98,0x12835b01,0x243185be,0x550c7dc3);
@@ -243,7 +244,6 @@ sub BODY_00_15() {
        &data_word(0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3);
        &data_word(0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208);
        &data_word(0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2);
-&function_end_B("sha256_block_data_order");
-&asciz("SHA256 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
+       &previous();
 
 &asm_finish();
index 163361e..c1d0684 100644 (file)
@@ -261,16 +261,18 @@ sub BODY_00_15_x86 {
 }
 
 
+&static_label("K512");
 &function_begin("sha512_block_data_order");
        &mov    ("esi",wparam(0));      # ctx
        &mov    ("edi",wparam(1));      # inp
        &mov    ("eax",wparam(2));      # num
        &mov    ("ebx","esp");          # saved sp
 
-       &call   (&label("pic_point"));  # make it PIC!
-&set_label("pic_point");
-       &blindpop($K512);
-       &lea    ($K512,&DWP(&label("K512")."-".&label("pic_point"),$K512));
+       &picsetup($K512);
+if ($sse2) {
+       &picsymbol("edx", "OPENSSL_ia32cap_P", $K512);
+}
+       &picsymbol($K512, &label("K512"), $K512);
 
        &sub    ("esp",16);
        &and    ("esp",-64);
@@ -283,7 +285,6 @@ sub BODY_00_15_x86 {
        &mov    (&DWP(12,"esp"),"ebx"); # saved sp
 
 if ($sse2) {
-       &picmeup("edx","OPENSSL_ia32cap_P",$K512,&label("K512"));
        &bt     (&DWP(0,"edx"),"\$IA32CAP_BIT0_SSE2");
        &jnc    (&label("loop_x86"));
 
@@ -556,8 +557,10 @@ if ($sse2) {
 
        &mov    ("esp",&DWP(12,"esp"));         # restore sp
 &function_end_A();
+&function_end_B("sha512_block_data_order");
 
-&set_label("K512",64); # Yes! I keep it in the code segment!
+       &rodataseg();
+&set_label("K512",64);
        &data_word(0xd728ae22,0x428a2f98);      # u64
        &data_word(0x23ef65cd,0x71374491);      # u64
        &data_word(0xec4d3b2f,0xb5c0fbcf);      # u64
@@ -638,7 +641,6 @@ if ($sse2) {
        &data_word(0xfc657e2a,0x597f299c);      # u64
        &data_word(0x3ad6faec,0x5fcb6fab);      # u64
        &data_word(0x4a475817,0x6c44198c);      # u64
-&function_end_B("sha512_block_data_order");
-&asciz("SHA512 block transform for x86, CRYPTOGAMS by <appro\@openssl.org>");
+       &previous();
 
 &asm_finish();
index 0ff8e5b..a54d702 100644 (file)
@@ -77,6 +77,8 @@ sub row()
 $tbl="ebp";
 @mm=("mm0","mm1","mm2","mm3","mm4","mm5","mm6","mm7");
 
+&static_label("table");
+
 &function_begin_B("whirlpool_block_mmx");
        &push   ("ebp");
        &push   ("ebx");
@@ -97,10 +99,8 @@ $tbl="ebp";
        &mov    (&DWP(8,"ebx"),"ebp");
        &mov    (&DWP(16,"ebx"),"eax");         # saved stack pointer
 
-       &call   (&label("pic_point"));
-&set_label("pic_point");
-       &blindpop($tbl);
-       &lea    ($tbl,&DWP(&label("table")."-".&label("pic_point"),$tbl));
+       &picsetup($tbl);
+       &picsymbol($tbl, &label("table"), $tbl);
 
        &xor    ("ecx","ecx");
        &xor    ("edx","edx");
@@ -218,7 +218,9 @@ for($i=0;$i<8;$i++) {
        &pop    ("ebx");
        &pop    ("ebp");
        &ret    ();
+&function_end_B("whirlpool_block_mmx");
 
+       &rodataseg();
 &align(64);
 &set_label("table");
        &LL(0x18,0x18,0x60,0x18,0xc0,0x78,0x30,0xd8);
@@ -488,6 +490,6 @@ for($i=0;$i<8;$i++) {
        &L(0xe4,0x27,0x41,0x8b,0xa7,0x7d,0x95,0xd8);
        &L(0xfb,0xee,0x7c,0x66,0xdd,0x17,0x47,0x9e);
        &L(0xca,0x2d,0xbf,0x07,0xad,0x5a,0x83,0x33);
+       &previous();
 
-&function_end_B("whirlpool_block_mmx");
 &asm_finish();