+++ /dev/null
-#!/usr/bin/env perl
-
-# ====================================================================
-# Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL
-# project. The module is, however, dual licensed under OpenSSL and
-# CRYPTOGAMS licenses depending on where you obtain it. For further
-# details see http://www.openssl.org/~appro/cryptogams/.
-# ====================================================================
-
-# RC4 for PA-RISC.
-
-# June 2009.
-#
-# Performance is 33% better than gcc 3.2 generated code on PA-7100LC.
-# For reference, [4x] unrolled loop is >40% faster than folded one.
-# It's possible to unroll loop 8 times on PA-RISC 2.0, but improvement
-# is believed to be not sufficient to justify the effort...
-#
-# Special thanks to polarhome.com for providing HP-UX account.
-
-$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
-
-$flavour = shift;
-$output = shift;
-open STDOUT,">$output";
-
-if ($flavour =~ /64/) {
- $LEVEL ="2.0W";
- $SIZE_T =8;
- $FRAME_MARKER =80;
- $SAVED_RP =16;
- $PUSH ="std";
- $PUSHMA ="std,ma";
- $POP ="ldd";
- $POPMB ="ldd,mb";
-} else {
- $LEVEL ="1.0";
- $SIZE_T =4;
- $FRAME_MARKER =48;
- $SAVED_RP =20;
- $PUSH ="stw";
- $PUSHMA ="stwm";
- $POP ="ldw";
- $POPMB ="ldwm";
-}
-
-$FRAME=4*$SIZE_T+$FRAME_MARKER; # 4 saved regs + frame marker
- # [+ argument transfer]
-$SZ=1; # defaults to RC4_CHAR
-if (open CONF,"<${dir}../../opensslconf.h") {
- while(<CONF>) {
- if (m/#\s*define\s+RC4_INT\s+(.*)/) {
- $SZ = ($1=~/char$/) ? 1 : 4;
- last;
- }
- }
- close CONF;
-}
-
-if ($SZ==1) { # RC4_CHAR
- $LD="ldb";
- $LDX="ldbx";
- $MKX="addl";
- $ST="stb";
-} else { # RC4_INT (~5% faster than RC4_CHAR on PA-7100LC)
- $LD="ldw";
- $LDX="ldwx,s";
- $MKX="sh2addl";
- $ST="stw";
-}
-
-$key="%r26";
-$len="%r25";
-$inp="%r24";
-$out="%r23";
-
-@XX=("%r19","%r20");
-@TX=("%r21","%r22");
-$YY="%r28";
-$TY="%r29";
-
-$acc="%r1";
-$ix="%r2";
-$iy="%r3";
-$dat0="%r4";
-$dat1="%r5";
-$rem="%r6";
-$mask="%r31";
-
-sub unrolledloopbody {
-for ($i=0;$i<4;$i++) {
-$code.=<<___;
- ldo 1($XX[0]),$XX[1]
- `sprintf("$LDX %$TY(%$key),%$dat1") if ($i>0)`
- and $mask,$XX[1],$XX[1]
- $LDX $YY($key),$TY
- $MKX $YY,$key,$ix
- $LDX $XX[1]($key),$TX[1]
- $MKX $XX[0],$key,$iy
- $ST $TX[0],0($ix)
- comclr,<> $XX[1],$YY,%r0 ; conditional
- copy $TX[0],$TX[1] ; move
- `sprintf("%sdep %$dat1,%d,8,%$acc",$i==1?"z":"",8*($i-1)+7) if ($i>0)`
- $ST $TY,0($iy)
- addl $TX[0],$TY,$TY
- addl $TX[1],$YY,$YY
- and $mask,$TY,$TY
- and $mask,$YY,$YY
-___
-push(@TX,shift(@TX)); push(@XX,shift(@XX)); # "rotate" registers
-} }
-
-sub foldedloop {
-my ($label,$count)=@_;
-$code.=<<___;
-$label
- $MKX $YY,$key,$iy
- $LDX $YY($key),$TY
- $MKX $XX[0],$key,$ix
- $ST $TX[0],0($iy)
- ldo 1($XX[0]),$XX[0]
- $ST $TY,0($ix)
- addl $TX[0],$TY,$TY
- ldbx $inp($out),$dat1
- and $mask,$TY,$TY
- and $mask,$XX[0],$XX[0]
- $LDX $TY($key),$acc
- $LDX $XX[0]($key),$TX[0]
- ldo 1($out),$out
- xor $dat1,$acc,$acc
- addl $TX[0],$YY,$YY
- stb $acc,-1($out)
- addib,<> -1,$count,$label ; $count is always small
- and $mask,$YY,$YY
-___
-}
-
-$code=<<___;
- .LEVEL $LEVEL
- .text
-
- .EXPORT RC4,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR,ARGW3=GR
-RC4
- .PROC
- .CALLINFO FRAME=`$FRAME-4*$SIZE_T`,NO_CALLS,SAVE_RP,ENTRY_GR=6
- .ENTRY
- $PUSH %r2,-$SAVED_RP(%sp) ; standard prologue
- $PUSHMA %r3,$FRAME(%sp)
- $PUSH %r4,`-$FRAME+1*$SIZE_T`(%sp)
- $PUSH %r5,`-$FRAME+2*$SIZE_T`(%sp)
- $PUSH %r6,`-$FRAME+3*$SIZE_T`(%sp)
-
- cmpib,*= 0,$len,L\$abort
- sub $inp,$out,$inp ; distance between $inp and $out
-
- $LD `0*$SZ`($key),$XX[0]
- $LD `1*$SZ`($key),$YY
- ldo `2*$SZ`($key),$key
-
- ldi 0xff,$mask
- ldi 3,$dat0
-
- ldo 1($XX[0]),$XX[0] ; warm up loop
- and $mask,$XX[0],$XX[0]
- $LDX $XX[0]($key),$TX[0]
- addl $TX[0],$YY,$YY
- cmpib,*>>= 6,$len,L\$oop1 ; is $len large enough to bother?
- and $mask,$YY,$YY
-
- and,<> $out,$dat0,$rem ; is $out aligned?
- b L\$alignedout
- subi 4,$rem,$rem
- sub $len,$rem,$len
-___
-&foldedloop("L\$alignout",$rem); # process till $out is aligned
-
-$code.=<<___;
-L\$alignedout ; $len is at least 4 here
- and,<> $inp,$dat0,$acc ; is $inp aligned?
- b L\$oop4
- sub $inp,$acc,$rem ; align $inp
-
- sh3addl $acc,%r0,$acc
- subi 32,$acc,$acc
- mtctl $acc,%cr11 ; load %sar with vshd align factor
- ldwx $rem($out),$dat0
- ldo 4($rem),$rem
-L\$oop4misalignedinp
-___
-&unrolledloopbody();
-$code.=<<___;
- $LDX $TY($key),$ix
- ldwx $rem($out),$dat1
- ldo -4($len),$len
- or $ix,$acc,$acc ; last piece, no need to dep
- vshd $dat0,$dat1,$iy ; align data
- copy $dat1,$dat0
- xor $iy,$acc,$acc
- stw $acc,0($out)
- cmpib,*<< 3,$len,L\$oop4misalignedinp
- ldo 4($out),$out
- cmpib,*= 0,$len,L\$done
- nop
- b L\$oop1
- nop
-
- .ALIGN 8
-L\$oop4
-___
-&unrolledloopbody();
-$code.=<<___;
- $LDX $TY($key),$ix
- ldwx $inp($out),$dat0
- ldo -4($len),$len
- or $ix,$acc,$acc ; last piece, no need to dep
- xor $dat0,$acc,$acc
- stw $acc,0($out)
- cmpib,*<< 3,$len,L\$oop4
- ldo 4($out),$out
- cmpib,*= 0,$len,L\$done
- nop
-___
-&foldedloop("L\$oop1",$len);
-$code.=<<___;
-L\$done
- $POP `-$FRAME-$SAVED_RP`(%sp),%r2
- ldo -1($XX[0]),$XX[0] ; chill out loop
- sub $YY,$TX[0],$YY
- and $mask,$XX[0],$XX[0]
- and $mask,$YY,$YY
- $ST $XX[0],`-2*$SZ`($key)
- $ST $YY,`-1*$SZ`($key)
- $POP `-$FRAME+1*$SIZE_T`(%sp),%r4
- $POP `-$FRAME+2*$SIZE_T`(%sp),%r5
- $POP `-$FRAME+3*$SIZE_T`(%sp),%r6
-L\$abort
- bv (%r2)
- .EXIT
- $POPMB -$FRAME(%sp),%r3
- .PROCEND
-___
-
-$code.=<<___;
-
- .EXPORT RC4_set_key,ENTRY,ARGW0=GR,ARGW1=GR,ARGW2=GR
- .ALIGN 8
-RC4_set_key
- .PROC
- .CALLINFO NO_CALLS
- .ENTRY
- $ST %r0,`0*$SZ`($key)
- $ST %r0,`1*$SZ`($key)
- ldo `2*$SZ`($key),$key
- copy %r0,@XX[0]
-L\$1st
- $ST @XX[0],0($key)
- ldo 1(@XX[0]),@XX[0]
- bb,>= @XX[0],`31-8`,L\$1st ; @XX[0]<256
- ldo $SZ($key),$key
-
- ldo `-256*$SZ`($key),$key ; rewind $key
- addl $len,$inp,$inp ; $inp to point at the end
- sub %r0,$len,%r23 ; inverse index
- copy %r0,@XX[0]
- copy %r0,@XX[1]
- ldi 0xff,$mask
-
-L\$2nd
- $LDX @XX[0]($key),@TX[0]
- ldbx %r23($inp),@TX[1]
- addi,nuv 1,%r23,%r23 ; increment and conditional
- sub %r0,$len,%r23 ; inverse index
- addl @TX[0],@XX[1],@XX[1]
- addl @TX[1],@XX[1],@XX[1]
- and $mask,@XX[1],@XX[1]
- $MKX @XX[0],$key,$TY
- $LDX @XX[1]($key),@TX[1]
- $MKX @XX[1],$key,$YY
- ldo 1(@XX[0]),@XX[0]
- $ST @TX[0],0($YY)
- bb,>= @XX[0],`31-8`,L\$2nd ; @XX[0]<256
- $ST @TX[1],0($TY)
-
- bv,n (%r2)
- .EXIT
- nop
- .PROCEND
-___
-$code =~ s/\`([^\`]*)\`/eval $1/gem;
-$code =~ s/cmpib,\*/comib,/gm if ($SIZE_T==4);
-$code =~ s/\bbv\b/bve/gm if ($SIZE_T==8);
-
-print $code;
-close STDOUT;