crypto: x86/sha256-avx2 - Fix RBP usage
Using RBP as a temporary register breaks frame pointer convention and breaks stack traces when unwinding from an interrupt in the crypto code. There's no need to use RBP as a temporary register for the TBL value, because it always stores the same value: the address of the K256 table. Instead just reference the address of K256 directly. Reported-by: Eric Biggers <ebiggers@google.com> Reported-by: Peter Zijlstra <peterz@infradead.org> Tested-by: Eric Biggers <ebiggers@google.com> Acked-by: Eric Biggers <ebiggers@google.com> Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com> Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>hifive-unleashed-5.1
parent
673ac6fbc7
commit
d3dfbfe2e6
|
@ -98,8 +98,6 @@ d = %r8d
|
||||||
e = %edx # clobbers NUM_BLKS
|
e = %edx # clobbers NUM_BLKS
|
||||||
y3 = %esi # clobbers INP
|
y3 = %esi # clobbers INP
|
||||||
|
|
||||||
|
|
||||||
TBL = %rbp
|
|
||||||
SRND = CTX # SRND is same register as CTX
|
SRND = CTX # SRND is same register as CTX
|
||||||
|
|
||||||
a = %eax
|
a = %eax
|
||||||
|
@ -531,7 +529,6 @@ STACK_SIZE = _RSP + _RSP_SIZE
|
||||||
ENTRY(sha256_transform_rorx)
|
ENTRY(sha256_transform_rorx)
|
||||||
.align 32
|
.align 32
|
||||||
pushq %rbx
|
pushq %rbx
|
||||||
pushq %rbp
|
|
||||||
pushq %r12
|
pushq %r12
|
||||||
pushq %r13
|
pushq %r13
|
||||||
pushq %r14
|
pushq %r14
|
||||||
|
@ -568,8 +565,6 @@ ENTRY(sha256_transform_rorx)
|
||||||
mov CTX, _CTX(%rsp)
|
mov CTX, _CTX(%rsp)
|
||||||
|
|
||||||
loop0:
|
loop0:
|
||||||
lea K256(%rip), TBL
|
|
||||||
|
|
||||||
## Load first 16 dwords from two blocks
|
## Load first 16 dwords from two blocks
|
||||||
VMOVDQ 0*32(INP),XTMP0
|
VMOVDQ 0*32(INP),XTMP0
|
||||||
VMOVDQ 1*32(INP),XTMP1
|
VMOVDQ 1*32(INP),XTMP1
|
||||||
|
@ -597,19 +592,19 @@ last_block_enter:
|
||||||
|
|
||||||
.align 16
|
.align 16
|
||||||
loop1:
|
loop1:
|
||||||
vpaddd 0*32(TBL, SRND), X0, XFER
|
vpaddd K256+0*32(SRND), X0, XFER
|
||||||
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
|
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
|
||||||
FOUR_ROUNDS_AND_SCHED _XFER + 0*32
|
FOUR_ROUNDS_AND_SCHED _XFER + 0*32
|
||||||
|
|
||||||
vpaddd 1*32(TBL, SRND), X0, XFER
|
vpaddd K256+1*32(SRND), X0, XFER
|
||||||
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
|
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
|
||||||
FOUR_ROUNDS_AND_SCHED _XFER + 1*32
|
FOUR_ROUNDS_AND_SCHED _XFER + 1*32
|
||||||
|
|
||||||
vpaddd 2*32(TBL, SRND), X0, XFER
|
vpaddd K256+2*32(SRND), X0, XFER
|
||||||
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
|
vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
|
||||||
FOUR_ROUNDS_AND_SCHED _XFER + 2*32
|
FOUR_ROUNDS_AND_SCHED _XFER + 2*32
|
||||||
|
|
||||||
vpaddd 3*32(TBL, SRND), X0, XFER
|
vpaddd K256+3*32(SRND), X0, XFER
|
||||||
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
|
vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
|
||||||
FOUR_ROUNDS_AND_SCHED _XFER + 3*32
|
FOUR_ROUNDS_AND_SCHED _XFER + 3*32
|
||||||
|
|
||||||
|
@ -619,10 +614,11 @@ loop1:
|
||||||
|
|
||||||
loop2:
|
loop2:
|
||||||
## Do last 16 rounds with no scheduling
|
## Do last 16 rounds with no scheduling
|
||||||
vpaddd 0*32(TBL, SRND), X0, XFER
|
vpaddd K256+0*32(SRND), X0, XFER
|
||||||
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
|
vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
|
||||||
DO_4ROUNDS _XFER + 0*32
|
DO_4ROUNDS _XFER + 0*32
|
||||||
vpaddd 1*32(TBL, SRND), X1, XFER
|
|
||||||
|
vpaddd K256+1*32(SRND), X1, XFER
|
||||||
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
|
vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
|
||||||
DO_4ROUNDS _XFER + 1*32
|
DO_4ROUNDS _XFER + 1*32
|
||||||
add $2*32, SRND
|
add $2*32, SRND
|
||||||
|
@ -676,9 +672,6 @@ loop3:
|
||||||
ja done_hash
|
ja done_hash
|
||||||
|
|
||||||
do_last_block:
|
do_last_block:
|
||||||
#### do last block
|
|
||||||
lea K256(%rip), TBL
|
|
||||||
|
|
||||||
VMOVDQ 0*16(INP),XWORD0
|
VMOVDQ 0*16(INP),XWORD0
|
||||||
VMOVDQ 1*16(INP),XWORD1
|
VMOVDQ 1*16(INP),XWORD1
|
||||||
VMOVDQ 2*16(INP),XWORD2
|
VMOVDQ 2*16(INP),XWORD2
|
||||||
|
@ -718,7 +711,6 @@ done_hash:
|
||||||
popq %r14
|
popq %r14
|
||||||
popq %r13
|
popq %r13
|
||||||
popq %r12
|
popq %r12
|
||||||
popq %rbp
|
|
||||||
popq %rbx
|
popq %rbx
|
||||||
ret
|
ret
|
||||||
ENDPROC(sha256_transform_rorx)
|
ENDPROC(sha256_transform_rorx)
|
||||||
|
|
Loading…
Reference in New Issue