1
0
Fork 0

crypto: sha512-avx2 - Fix RBP usage

Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Mix things up a little bit to get rid of the RBP usage, without hurting
performance too much.  Use RDI instead of RBP for the TBL pointer.  That
will clobber CTX, so spill CTX onto the stack and use R12 to read it in
the outer loop.  R12 is used as a non-persistent temporary variable
elsewhere, so it's safe to use.

Also remove the unused y4 variable.

Reported-by: Eric Biggers <ebiggers3@gmail.com>
Reported-by: Peter Zijlstra <peterz@infradead.org>
Tested-by: Eric Biggers <ebiggers@google.com>
Acked-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Josh Poimboeuf <jpoimboe@redhat.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
hifive-unleashed-5.1
Josh Poimboeuf 2017-09-18 14:42:10 -05:00 committed by Herbert Xu
parent 539012dcbd
commit ca04c82376
1 changed files with 39 additions and 36 deletions

View File

@ -69,8 +69,9 @@ XFER = YTMP0
BYTE_FLIP_MASK = %ymm9
# 1st arg
CTX = %rdi
# 1st arg is %rdi, which is saved to the stack and accessed later via %r12
CTX1 = %rdi
CTX2 = %r12
# 2nd arg
INP = %rsi
# 3rd arg
@ -81,7 +82,7 @@ d = %r8
e = %rdx
y3 = %rsi
TBL = %rbp
TBL = %rdi # clobbers CTX1
a = %rax
b = %rbx
@ -91,26 +92,26 @@ g = %r10
h = %r11
old_h = %r11
T1 = %r12
T1 = %r12 # clobbers CTX2
y0 = %r13
y1 = %r14
y2 = %r15
y4 = %r12
# Local variables (stack frame)
XFER_SIZE = 4*8
SRND_SIZE = 1*8
INP_SIZE = 1*8
INPEND_SIZE = 1*8
CTX_SIZE = 1*8
RSPSAVE_SIZE = 1*8
GPRSAVE_SIZE = 6*8
GPRSAVE_SIZE = 5*8
frame_XFER = 0
frame_SRND = frame_XFER + XFER_SIZE
frame_INP = frame_SRND + SRND_SIZE
frame_INPEND = frame_INP + INP_SIZE
frame_RSPSAVE = frame_INPEND + INPEND_SIZE
frame_CTX = frame_INPEND + INPEND_SIZE
frame_RSPSAVE = frame_CTX + CTX_SIZE
frame_GPRSAVE = frame_RSPSAVE + RSPSAVE_SIZE
frame_size = frame_GPRSAVE + GPRSAVE_SIZE
@ -576,12 +577,11 @@ ENTRY(sha512_transform_rorx)
mov %rax, frame_RSPSAVE(%rsp)
# Save GPRs
mov %rbp, frame_GPRSAVE(%rsp)
mov %rbx, 8*1+frame_GPRSAVE(%rsp)
mov %r12, 8*2+frame_GPRSAVE(%rsp)
mov %r13, 8*3+frame_GPRSAVE(%rsp)
mov %r14, 8*4+frame_GPRSAVE(%rsp)
mov %r15, 8*5+frame_GPRSAVE(%rsp)
mov %rbx, 8*0+frame_GPRSAVE(%rsp)
mov %r12, 8*1+frame_GPRSAVE(%rsp)
mov %r13, 8*2+frame_GPRSAVE(%rsp)
mov %r14, 8*3+frame_GPRSAVE(%rsp)
mov %r15, 8*4+frame_GPRSAVE(%rsp)
shl $7, NUM_BLKS # convert to bytes
jz done_hash
@ -589,14 +589,17 @@ ENTRY(sha512_transform_rorx)
mov NUM_BLKS, frame_INPEND(%rsp)
## load initial digest
mov 8*0(CTX),a
mov 8*1(CTX),b
mov 8*2(CTX),c
mov 8*3(CTX),d
mov 8*4(CTX),e
mov 8*5(CTX),f
mov 8*6(CTX),g
mov 8*7(CTX),h
mov 8*0(CTX1), a
mov 8*1(CTX1), b
mov 8*2(CTX1), c
mov 8*3(CTX1), d
mov 8*4(CTX1), e
mov 8*5(CTX1), f
mov 8*6(CTX1), g
mov 8*7(CTX1), h
# save %rdi (CTX) before it gets clobbered
mov %rdi, frame_CTX(%rsp)
vmovdqa PSHUFFLE_BYTE_FLIP_MASK(%rip), BYTE_FLIP_MASK
@ -652,14 +655,15 @@ loop2:
subq $1, frame_SRND(%rsp)
jne loop2
addm 8*0(CTX),a
addm 8*1(CTX),b
addm 8*2(CTX),c
addm 8*3(CTX),d
addm 8*4(CTX),e
addm 8*5(CTX),f
addm 8*6(CTX),g
addm 8*7(CTX),h
mov frame_CTX(%rsp), CTX2
addm 8*0(CTX2), a
addm 8*1(CTX2), b
addm 8*2(CTX2), c
addm 8*3(CTX2), d
addm 8*4(CTX2), e
addm 8*5(CTX2), f
addm 8*6(CTX2), g
addm 8*7(CTX2), h
mov frame_INP(%rsp), INP
add $128, INP
@ -669,12 +673,11 @@ loop2:
done_hash:
# Restore GPRs
mov frame_GPRSAVE(%rsp) ,%rbp
mov 8*1+frame_GPRSAVE(%rsp) ,%rbx
mov 8*2+frame_GPRSAVE(%rsp) ,%r12
mov 8*3+frame_GPRSAVE(%rsp) ,%r13
mov 8*4+frame_GPRSAVE(%rsp) ,%r14
mov 8*5+frame_GPRSAVE(%rsp) ,%r15
mov 8*0+frame_GPRSAVE(%rsp), %rbx
mov 8*1+frame_GPRSAVE(%rsp), %r12
mov 8*2+frame_GPRSAVE(%rsp), %r13
mov 8*3+frame_GPRSAVE(%rsp), %r14
mov 8*4+frame_GPRSAVE(%rsp), %r15
# Restore Stack Pointer
mov frame_RSPSAVE(%rsp), %rsp