1
0
Fork 0

s390: optimize memset implementation

Like for the memset16/32/64 variants avoid that subsequent mvc
instructions depend on each other since that might have negative
performance impacts.

This patch is currently hardly relevant since at least gcc 7.1
generates only inline memset code and not a single memset call.
However there is no reason to not provide an optimized version
just in case gcc generates memset calls again, like it did in
the past.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
hifive-unleashed-5.1
Heiko Carstens 2017-10-04 19:27:08 +02:00 committed by Martin Schwidefsky
parent 41879ff65d
commit 993fef95b9
1 changed files with 12 additions and 8 deletions

View File

@ -78,21 +78,25 @@ ENTRY(memset)
ex %r4,0(%r3)
br %r14
.Lmemset_fill:
stc %r3,0(%r2)
cghi %r4,1
lgr %r1,%r2
ber %r14
je .Lmemset_fill_exit
aghi %r4,-2
srlg %r3,%r4,8
ltgr %r3,%r3
srlg %r5,%r4,8
ltgr %r5,%r5
jz .Lmemset_fill_remainder
.Lmemset_fill_loop:
mvc 1(256,%r1),0(%r1)
stc %r3,0(%r1)
mvc 1(255,%r1),0(%r1)
la %r1,256(%r1)
brctg %r3,.Lmemset_fill_loop
brctg %r5,.Lmemset_fill_loop
.Lmemset_fill_remainder:
larl %r3,.Lmemset_mvc
ex %r4,0(%r3)
stc %r3,0(%r1)
larl %r5,.Lmemset_mvc
ex %r4,0(%r5)
br %r14
.Lmemset_fill_exit:
stc %r3,0(%r1)
br %r14
.Lmemset_xc:
xc 0(1,%r1),0(%r1)