1
0
Fork 0
remarkable-linux/arch/parisc/include/asm/futex.h

125 lines
2.6 KiB
C
Raw Normal View History

#ifndef _ASM_PARISC_FUTEX_H
#define _ASM_PARISC_FUTEX_H
[PATCH] FUTEX_WAKE_OP: pthread_cond_signal() speedup ATM pthread_cond_signal is unnecessarily slow, because it wakes one waiter (which at least on UP usually means an immediate context switch to one of the waiter threads). This waiter wakes up and after a few instructions it attempts to acquire the cv internal lock, but that lock is still held by the thread calling pthread_cond_signal. So it goes to sleep and eventually the signalling thread is scheduled in, unlocks the internal lock and wakes the waiter again. Now, before 2003-09-21 NPTL was using FUTEX_REQUEUE in pthread_cond_signal to avoid this performance issue, but it was removed when locks were redesigned to the 3 state scheme (unlocked, locked uncontended, locked contended). Following scenario shows why simply using FUTEX_REQUEUE in pthread_cond_signal together with using lll_mutex_unlock_force in place of lll_mutex_unlock is not enough and probably why it has been disabled at that time: The number is value in cv->__data.__lock. thr1 thr2 thr3 0 pthread_cond_wait 1 lll_mutex_lock (cv->__data.__lock) 0 lll_mutex_unlock (cv->__data.__lock) 0 lll_futex_wait (&cv->__data.__futex, futexval) 0 pthread_cond_signal 1 lll_mutex_lock (cv->__data.__lock) 1 pthread_cond_signal 2 lll_mutex_lock (cv->__data.__lock) 2 lll_futex_wait (&cv->__data.__lock, 2) 2 lll_futex_requeue (&cv->__data.__futex, 0, 1, &cv->__data.__lock) # FUTEX_REQUEUE, not FUTEX_CMP_REQUEUE 2 lll_mutex_unlock_force (cv->__data.__lock) 0 cv->__data.__lock = 0 0 lll_futex_wake (&cv->__data.__lock, 1) 1 lll_mutex_lock (cv->__data.__lock) 0 lll_mutex_unlock (cv->__data.__lock) # Here, lll_mutex_unlock doesn't know there are threads waiting # on the internal cv's lock Now, I believe it is possible to use FUTEX_REQUEUE in pthread_cond_signal, but it will cost us not one, but 2 extra syscalls and, what's worse, one of these extra syscalls will be done for every single waiting loop in pthread_cond_*wait. We would need to use lll_mutex_unlock_force in pthread_cond_signal after requeue and lll_mutex_cond_lock in pthread_cond_*wait after lll_futex_wait. Another alternative is to do the unlocking pthread_cond_signal needs to do (the lock can't be unlocked before lll_futex_wake, as that is racy) in the kernel. I have implemented both variants, futex-requeue-glibc.patch is the first one and futex-wake_op{,-glibc}.patch is the unlocking inside of the kernel. The kernel interface allows userland to specify how exactly an unlocking operation should look like (some atomic arithmetic operation with optional constant argument and comparison of the previous futex value with another constant). It has been implemented just for ppc*, x86_64 and i?86, for other architectures I'm including just a stub header which can be used as a starting point by maintainers to write support for their arches and ATM will just return -ENOSYS for FUTEX_WAKE_OP. The requeue patch has been (lightly) tested just on x86_64, the wake_op patch on ppc64 kernel running 32-bit and 64-bit NPTL and x86_64 kernel running 32-bit and 64-bit NPTL. With the following benchmark on UP x86-64 I get: for i in nptl-orig nptl-requeue nptl-wake_op; do echo time elf/ld.so --library-path .:$i /tmp/bench; \ for j in 1 2; do echo ( time elf/ld.so --library-path .:$i /tmp/bench ) 2>&1; done; done time elf/ld.so --library-path .:nptl-orig /tmp/bench real 0m0.655s user 0m0.253s sys 0m0.403s real 0m0.657s user 0m0.269s sys 0m0.388s time elf/ld.so --library-path .:nptl-requeue /tmp/bench real 0m0.496s user 0m0.225s sys 0m0.271s real 0m0.531s user 0m0.242s sys 0m0.288s time elf/ld.so --library-path .:nptl-wake_op /tmp/bench real 0m0.380s user 0m0.176s sys 0m0.204s real 0m0.382s user 0m0.175s sys 0m0.207s The benchmark is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00001.txt Older futex-requeue-glibc.patch version is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00002.txt Older futex-wake_op-glibc.patch version is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00003.txt Will post a new version (just x86-64 fixes so that the patch applies against pthread_cond_signal.S) to libc-hacker ml soon. Attached is the kernel FUTEX_WAKE_OP patch as well as a simple-minded testcase that will not test the atomicity of the operation, but at least check if the threads that should have been woken up are woken up and whether the arithmetic operation in the kernel gave the expected results. Acked-by: Ingo Molnar <mingo@redhat.com> Cc: Ulrich Drepper <drepper@redhat.com> Cc: Jamie Lokier <jamie@shareable.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Yoichi Yuasa <yuasa@hh.iij4u.or.jp> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 16:16:25 -06:00
#ifdef __KERNEL__
[PATCH] FUTEX_WAKE_OP: pthread_cond_signal() speedup ATM pthread_cond_signal is unnecessarily slow, because it wakes one waiter (which at least on UP usually means an immediate context switch to one of the waiter threads). This waiter wakes up and after a few instructions it attempts to acquire the cv internal lock, but that lock is still held by the thread calling pthread_cond_signal. So it goes to sleep and eventually the signalling thread is scheduled in, unlocks the internal lock and wakes the waiter again. Now, before 2003-09-21 NPTL was using FUTEX_REQUEUE in pthread_cond_signal to avoid this performance issue, but it was removed when locks were redesigned to the 3 state scheme (unlocked, locked uncontended, locked contended). Following scenario shows why simply using FUTEX_REQUEUE in pthread_cond_signal together with using lll_mutex_unlock_force in place of lll_mutex_unlock is not enough and probably why it has been disabled at that time: The number is value in cv->__data.__lock. thr1 thr2 thr3 0 pthread_cond_wait 1 lll_mutex_lock (cv->__data.__lock) 0 lll_mutex_unlock (cv->__data.__lock) 0 lll_futex_wait (&cv->__data.__futex, futexval) 0 pthread_cond_signal 1 lll_mutex_lock (cv->__data.__lock) 1 pthread_cond_signal 2 lll_mutex_lock (cv->__data.__lock) 2 lll_futex_wait (&cv->__data.__lock, 2) 2 lll_futex_requeue (&cv->__data.__futex, 0, 1, &cv->__data.__lock) # FUTEX_REQUEUE, not FUTEX_CMP_REQUEUE 2 lll_mutex_unlock_force (cv->__data.__lock) 0 cv->__data.__lock = 0 0 lll_futex_wake (&cv->__data.__lock, 1) 1 lll_mutex_lock (cv->__data.__lock) 0 lll_mutex_unlock (cv->__data.__lock) # Here, lll_mutex_unlock doesn't know there are threads waiting # on the internal cv's lock Now, I believe it is possible to use FUTEX_REQUEUE in pthread_cond_signal, but it will cost us not one, but 2 extra syscalls and, what's worse, one of these extra syscalls will be done for every single waiting loop in pthread_cond_*wait. We would need to use lll_mutex_unlock_force in pthread_cond_signal after requeue and lll_mutex_cond_lock in pthread_cond_*wait after lll_futex_wait. Another alternative is to do the unlocking pthread_cond_signal needs to do (the lock can't be unlocked before lll_futex_wake, as that is racy) in the kernel. I have implemented both variants, futex-requeue-glibc.patch is the first one and futex-wake_op{,-glibc}.patch is the unlocking inside of the kernel. The kernel interface allows userland to specify how exactly an unlocking operation should look like (some atomic arithmetic operation with optional constant argument and comparison of the previous futex value with another constant). It has been implemented just for ppc*, x86_64 and i?86, for other architectures I'm including just a stub header which can be used as a starting point by maintainers to write support for their arches and ATM will just return -ENOSYS for FUTEX_WAKE_OP. The requeue patch has been (lightly) tested just on x86_64, the wake_op patch on ppc64 kernel running 32-bit and 64-bit NPTL and x86_64 kernel running 32-bit and 64-bit NPTL. With the following benchmark on UP x86-64 I get: for i in nptl-orig nptl-requeue nptl-wake_op; do echo time elf/ld.so --library-path .:$i /tmp/bench; \ for j in 1 2; do echo ( time elf/ld.so --library-path .:$i /tmp/bench ) 2>&1; done; done time elf/ld.so --library-path .:nptl-orig /tmp/bench real 0m0.655s user 0m0.253s sys 0m0.403s real 0m0.657s user 0m0.269s sys 0m0.388s time elf/ld.so --library-path .:nptl-requeue /tmp/bench real 0m0.496s user 0m0.225s sys 0m0.271s real 0m0.531s user 0m0.242s sys 0m0.288s time elf/ld.so --library-path .:nptl-wake_op /tmp/bench real 0m0.380s user 0m0.176s sys 0m0.204s real 0m0.382s user 0m0.175s sys 0m0.207s The benchmark is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00001.txt Older futex-requeue-glibc.patch version is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00002.txt Older futex-wake_op-glibc.patch version is at: http://sourceware.org/ml/libc-alpha/2005-03/txt00003.txt Will post a new version (just x86-64 fixes so that the patch applies against pthread_cond_signal.S) to libc-hacker ml soon. Attached is the kernel FUTEX_WAKE_OP patch as well as a simple-minded testcase that will not test the atomicity of the operation, but at least check if the threads that should have been woken up are woken up and whether the arithmetic operation in the kernel gave the expected results. Acked-by: Ingo Molnar <mingo@redhat.com> Cc: Ulrich Drepper <drepper@redhat.com> Cc: Jamie Lokier <jamie@shareable.org> Cc: Rusty Russell <rusty@rustcorp.com.au> Signed-off-by: Yoichi Yuasa <yuasa@hh.iij4u.or.jp> Signed-off-by: Andrew Morton <akpm@osdl.org> Signed-off-by: Linus Torvalds <torvalds@osdl.org>
2005-09-06 16:16:25 -06:00
#include <linux/futex.h>
#include <linux/uaccess.h>
#include <asm/atomic.h>
#include <asm/errno.h>
/* The following has to match the LWS code in syscall.S. We have
sixteen four-word locks. */
static inline void
_futex_spin_lock_irqsave(u32 __user *uaddr, unsigned long int *flags)
{
extern u32 lws_lock_start[];
long index = ((long)uaddr & 0xf0) >> 2;
arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index];
local_irq_save(*flags);
arch_spin_lock(s);
}
static inline void
_futex_spin_unlock_irqrestore(u32 __user *uaddr, unsigned long int *flags)
{
extern u32 lws_lock_start[];
long index = ((long)uaddr & 0xf0) >> 2;
arch_spinlock_t *s = (arch_spinlock_t *)&lws_lock_start[index];
arch_spin_unlock(s);
local_irq_restore(*flags);
}
static inline int
futex: Remove duplicated code and fix undefined behaviour There is code duplicated over all architecture's headers for futex_atomic_op_inuser. Namely op decoding, access_ok check for uaddr, and comparison of the result. Remove this duplication and leave up to the arches only the needed assembly which is now in arch_futex_atomic_op_inuser. This effectively distributes the Will Deacon's arm64 fix for undefined behaviour reported by UBSAN to all architectures. The fix was done in commit 5f16a046f8e1 (arm64: futex: Fix undefined behaviour with FUTEX_OP_OPARG_SHIFT usage). Look there for an example dump. And as suggested by Thomas, check for negative oparg too, because it was also reported to cause undefined behaviour report. Note that s390 removed access_ok check in d12a29703 ("s390/uaccess: remove pointless access_ok() checks") as access_ok there returns true. We introduce it back to the helper for the sake of simplicity (it gets optimized away anyway). Signed-off-by: Jiri Slaby <jslaby@suse.cz> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Russell King <rmk+kernel@armlinux.org.uk> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> [s390] Acked-by: Chris Metcalf <cmetcalf@mellanox.com> [for tile] Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org> Reviewed-by: Will Deacon <will.deacon@arm.com> [core/arm64] Cc: linux-mips@linux-mips.org Cc: Rich Felker <dalias@libc.org> Cc: linux-ia64@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: peterz@infradead.org Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Paul Mackerras <paulus@samba.org> Cc: sparclinux@vger.kernel.org Cc: Jonas Bonn <jonas@southpole.se> Cc: linux-s390@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: linux-hexagon@vger.kernel.org Cc: Helge Deller <deller@gmx.de> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: linux-snps-arc@lists.infradead.org Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: linux-xtensa@linux-xtensa.org Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> Cc: openrisc@lists.librecores.org Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Stafford Horne <shorne@gmail.com> Cc: linux-arm-kernel@lists.infradead.org Cc: Richard Henderson <rth@twiddle.net> Cc: Chris Zankel <chris@zankel.net> Cc: Michal Simek <monstr@monstr.eu> Cc: Tony Luck <tony.luck@intel.com> Cc: linux-parisc@vger.kernel.org Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: linux-alpha@vger.kernel.org Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: linuxppc-dev@lists.ozlabs.org Cc: "David S. Miller" <davem@davemloft.net> Link: http://lkml.kernel.org/r/20170824073105.3901-1-jslaby@suse.cz
2017-08-24 01:31:05 -06:00
arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
{
unsigned long int flags;
int oldval, ret;
u32 tmp;
_futex_spin_lock_irqsave(uaddr, &flags);
pagefault_disable();
ret = -EFAULT;
if (unlikely(get_user(oldval, uaddr) != 0))
goto out_pagefault_enable;
ret = 0;
tmp = oldval;
switch (op) {
case FUTEX_OP_SET:
tmp = oparg;
break;
case FUTEX_OP_ADD:
tmp += oparg;
break;
case FUTEX_OP_OR:
tmp |= oparg;
break;
case FUTEX_OP_ANDN:
tmp &= ~oparg;
break;
case FUTEX_OP_XOR:
tmp ^= oparg;
break;
default:
ret = -ENOSYS;
}
if (ret == 0 && unlikely(put_user(tmp, uaddr) != 0))
ret = -EFAULT;
out_pagefault_enable:
pagefault_enable();
_futex_spin_unlock_irqrestore(uaddr, &flags);
futex: Remove duplicated code and fix undefined behaviour There is code duplicated over all architecture's headers for futex_atomic_op_inuser. Namely op decoding, access_ok check for uaddr, and comparison of the result. Remove this duplication and leave up to the arches only the needed assembly which is now in arch_futex_atomic_op_inuser. This effectively distributes the Will Deacon's arm64 fix for undefined behaviour reported by UBSAN to all architectures. The fix was done in commit 5f16a046f8e1 (arm64: futex: Fix undefined behaviour with FUTEX_OP_OPARG_SHIFT usage). Look there for an example dump. And as suggested by Thomas, check for negative oparg too, because it was also reported to cause undefined behaviour report. Note that s390 removed access_ok check in d12a29703 ("s390/uaccess: remove pointless access_ok() checks") as access_ok there returns true. We introduce it back to the helper for the sake of simplicity (it gets optimized away anyway). Signed-off-by: Jiri Slaby <jslaby@suse.cz> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Acked-by: Russell King <rmk+kernel@armlinux.org.uk> Acked-by: Michael Ellerman <mpe@ellerman.id.au> (powerpc) Acked-by: Heiko Carstens <heiko.carstens@de.ibm.com> [s390] Acked-by: Chris Metcalf <cmetcalf@mellanox.com> [for tile] Reviewed-by: Darren Hart (VMware) <dvhart@infradead.org> Reviewed-by: Will Deacon <will.deacon@arm.com> [core/arm64] Cc: linux-mips@linux-mips.org Cc: Rich Felker <dalias@libc.org> Cc: linux-ia64@vger.kernel.org Cc: linux-sh@vger.kernel.org Cc: peterz@infradead.org Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org> Cc: Max Filippov <jcmvbkbc@gmail.com> Cc: Paul Mackerras <paulus@samba.org> Cc: sparclinux@vger.kernel.org Cc: Jonas Bonn <jonas@southpole.se> Cc: linux-s390@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: Yoshinori Sato <ysato@users.sourceforge.jp> Cc: linux-hexagon@vger.kernel.org Cc: Helge Deller <deller@gmx.de> Cc: "James E.J. Bottomley" <jejb@parisc-linux.org> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Matt Turner <mattst88@gmail.com> Cc: linux-snps-arc@lists.infradead.org Cc: Fenghua Yu <fenghua.yu@intel.com> Cc: Arnd Bergmann <arnd@arndb.de> Cc: linux-xtensa@linux-xtensa.org Cc: Stefan Kristiansson <stefan.kristiansson@saunalahti.fi> Cc: openrisc@lists.librecores.org Cc: Ivan Kokshaysky <ink@jurassic.park.msu.ru> Cc: Stafford Horne <shorne@gmail.com> Cc: linux-arm-kernel@lists.infradead.org Cc: Richard Henderson <rth@twiddle.net> Cc: Chris Zankel <chris@zankel.net> Cc: Michal Simek <monstr@monstr.eu> Cc: Tony Luck <tony.luck@intel.com> Cc: linux-parisc@vger.kernel.org Cc: Vineet Gupta <vgupta@synopsys.com> Cc: Ralf Baechle <ralf@linux-mips.org> Cc: Richard Kuo <rkuo@codeaurora.org> Cc: linux-alpha@vger.kernel.org Cc: Martin Schwidefsky <schwidefsky@de.ibm.com> Cc: linuxppc-dev@lists.ozlabs.org Cc: "David S. Miller" <davem@davemloft.net> Link: http://lkml.kernel.org/r/20170824073105.3901-1-jslaby@suse.cz
2017-08-24 01:31:05 -06:00
if (!ret)
*oval = oldval;
return ret;
}
static inline int
futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
u32 oldval, u32 newval)
{
u32 val;
unsigned long flags;
/* futex.c wants to do a cmpxchg_inatomic on kernel NULL, which is
* our gateway page, and causes no end of trouble...
*/
if (uaccess_kernel() && !uaddr)
return -EFAULT;
if (!access_ok(VERIFY_WRITE, uaddr, sizeof(u32)))
return -EFAULT;
/* HPPA has no cmpxchg in hardware and therefore the
* best we can do here is use an array of locks. The
* lock selected is based on a hash of the userspace
* address. This should scale to a couple of CPUs.
*/
_futex_spin_lock_irqsave(uaddr, &flags);
if (unlikely(get_user(val, uaddr) != 0)) {
_futex_spin_unlock_irqrestore(uaddr, &flags);
return -EFAULT;
}
if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) {
_futex_spin_unlock_irqrestore(uaddr, &flags);
return -EFAULT;
}
*uval = val;
_futex_spin_unlock_irqrestore(uaddr, &flags);
return 0;
}
#endif /*__KERNEL__*/
#endif /*_ASM_PARISC_FUTEX_H*/