powerpc: inline ip_fast_csum()

In several architectures, ip_fast_csum() is inlined
There are functions like ip_send_check() which do nothing
much more than calling ip_fast_csum().
Inlining ip_fast_csum() allows the compiler to optimise better

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
[scottwood: whitespace and cast fixes]
Signed-off-by: Scott Wood <oss@buserror.net>
This commit is contained in:
Christophe Leroy 2015-09-22 16:34:25 +02:00 committed by Scott Wood
parent 03bc8b0fc8
commit 37e08cad8f
4 changed files with 38 additions and 56 deletions

View file

@ -9,16 +9,9 @@
* 2 of the License, or (at your option) any later version.
*/
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries. ihl is the number
* of 32-bit words and is always >= 5.
*/
#ifdef CONFIG_GENERIC_CSUM
#include <asm-generic/checksum.h>
#else
extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)
@ -137,6 +130,44 @@ static inline __wsum csum_add(__wsum csum, __wsum addend)
#endif
}
/*
* This is a version of ip_compute_csum() optimized for IP headers,
* which always checksum on 4 octet boundaries. ihl is the number
* of 32-bit words and is always >= 5.
*/
static inline __wsum ip_fast_csum_nofold(const void *iph, unsigned int ihl)
{
const u32 *ptr = (const u32 *)iph + 1;
#ifdef __powerpc64__
unsigned int i;
u64 s = *(const u32 *)iph;
for (i = 0; i < ihl - 1; i++, ptr++)
s += *ptr;
s += (s >> 32);
return (__force __wsum)s;
#else
__wsum sum, tmp;
asm("mtctr %3;"
"addc %0,%4,%5;"
"1: lwzu %1, 4(%2);"
"adde %0,%0,%1;"
"bdnz 1b;"
"addze %0,%0;"
: "=r" (sum), "=r" (tmp), "+b" (ptr)
: "r" (ihl - 2), "r" (*(const u32 *)iph), "r" (*ptr)
: "ctr", "xer", "memory");
return sum;
#endif
}
static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
{
return csum_fold(ip_fast_csum_nofold(iph, ihl));
}
#endif
#endif /* __KERNEL__ */
#endif

View file

@ -19,27 +19,6 @@
.text
/*
* ip_fast_csum(buf, len) -- Optimized for IP header
* len is in words and is always >= 5.
*/
_GLOBAL(ip_fast_csum)
lwz r0,0(r3)
lwzu r5,4(r3)
addic. r4,r4,-2
addc r0,r0,r5
mtctr r4
blelr-
1: lwzu r4,4(r3)
adde r0,r0,r4
bdnz 1b
addze r0,r0 /* add in final carry */
rlwinm r3,r0,16,0,31 /* fold two halves together */
add r3,r0,r3
not r3,r3
srwi r3,r3,16
blr
/*
* computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit)

View file

@ -17,33 +17,6 @@
#include <asm/errno.h>
#include <asm/ppc_asm.h>
/*
* ip_fast_csum(r3=buf, r4=len) -- Optimized for IP header
* len is in words and is always >= 5.
*
* In practice len == 5, but this is not guaranteed. So this code does not
* attempt to use doubleword instructions.
*/
_GLOBAL(ip_fast_csum)
lwz r0,0(r3)
lwzu r5,4(r3)
addic. r4,r4,-2
addc r0,r0,r5
mtctr r4
blelr-
1: lwzu r4,4(r3)
adde r0,r0,r4
bdnz 1b
addze r0,r0 /* add in final carry */
rldicl r4,r0,32,0 /* fold two 32-bit halves together */
add r0,r0,r4
srdi r0,r0,32
rlwinm r3,r0,16,0,31 /* fold two halves together */
add r3,r0,r3
not r3,r3
srwi r3,r3,16
blr
/*
* Computes the checksum of a memory block at buff, length len,
* and adds in "sum" (32-bit).

View file

@ -19,7 +19,6 @@ EXPORT_SYMBOL(strncmp);
#ifndef CONFIG_GENERIC_CSUM
EXPORT_SYMBOL(csum_partial);
EXPORT_SYMBOL(csum_partial_copy_generic);
EXPORT_SYMBOL(ip_fast_csum);
#endif
EXPORT_SYMBOL(__copy_tofrom_user);