From 784bdf3bb694b256fcd6120b93e8947a84249a3a Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Jul 2016 16:32:30 +0200 Subject: [PATCH 1/4] futex: Assume all mappings are private on !MMU systems To quote Rick why there is no need for shared mapping on !MMU systems: |With MMU, shared futex keys need to identify the physical backing for |a memory address because it may be mapped at different addresses in |different processes (or even multiple times in the same process). |Without MMU this cannot happen. You only have physical addresses. So |the "private futex" behavior of using the virtual address as the key |is always correct (for both shared and private cases) on nommu |systems. This patch disables the FLAGS_SHARED in a way that allows the compiler to remove that code. [bigeasy: Added changelog ] Reported-by: Rich Felker Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Cc: Andrew Morton Link: http://lkml.kernel.org/r/20160729143230.GA21715@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index 33664f70e2d2..46cb3a301bc1 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -179,7 +179,15 @@ int __read_mostly futex_cmpxchg_enabled; * Futex flags used to encode options to functions and preserve them across * restarts. */ -#define FLAGS_SHARED 0x01 +#ifdef CONFIG_MMU +# define FLAGS_SHARED 0x01 +#else +/* + * NOMMU does not have per process address space. Let the compiler optimize + * code away. + */ +# define FLAGS_SHARED 0x00 +#endif #define FLAGS_CLOCKRT 0x02 #define FLAGS_HAS_TIMEOUT 0x04 @@ -405,6 +413,16 @@ static void get_futex_key_refs(union futex_key *key) if (!key->both.ptr) return; + /* + * On MMU less systems futexes are always "private" as there is no per + * process address space. We need the smp wmb nevertheless - yes, + * arch/blackfin has MMU less SMP ... + */ + if (!IS_ENABLED(CONFIG_MMU)) { + smp_mb(); /* explicit smp_mb(); (B) */ + return; + } + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: ihold(key->shared.inode); /* implies smp_mb(); (B) */ @@ -436,6 +454,9 @@ static void drop_futex_key_refs(union futex_key *key) return; } + if (!IS_ENABLED(CONFIG_MMU)) + return; + switch (key->both.offset & (FUT_OFF_INODE|FUT_OFF_MMSHARED)) { case FUT_OFF_INODE: iput(key->shared.inode); From 2db34e8bf9a22f4e38b29deccee57457bc0e7d74 Mon Sep 17 00:00:00 2001 From: pan xinhui Date: Mon, 18 Jul 2016 17:47:39 +0800 Subject: [PATCH 2/4] locking/qrwlock: Fix write unlock bug on big endian systems This patch aims to get rid of endianness in queued_write_unlock(). We want to set __qrwlock->wmode to NULL, however the address is not &lock->cnts in big endian machine. That causes queued_write_unlock() write NULL to the wrong field of __qrwlock. So implement __qrwlock_write_byte() which returns the correct __qrwlock->wmode address. Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman.Long@hpe.com Cc: arnd@arndb.de Cc: boqun.feng@gmail.com Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1468835259-4486-1-git-send-email-xinhui.pan@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/asm-generic/qrwlock.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h index 54a8e65e18b6..7d026bf27713 100644 --- a/include/asm-generic/qrwlock.h +++ b/include/asm-generic/qrwlock.h @@ -25,7 +25,20 @@ #include /* - * Writer states & reader shift and bias + * Writer states & reader shift and bias. + * + * | +0 | +1 | +2 | +3 | + * ----+----+----+----+----+ + * LE | 78 | 56 | 34 | 12 | 0x12345678 + * ----+----+----+----+----+ + * | wr | rd | + * +----+----+----+----+ + * + * ----+----+----+----+----+ + * BE | 12 | 34 | 56 | 78 | 0x12345678 + * ----+----+----+----+----+ + * | rd | wr | + * +----+----+----+----+ */ #define _QW_WAITING 1 /* A writer is waiting */ #define _QW_LOCKED 0xff /* A writer holds the lock */ @@ -133,13 +146,23 @@ static inline void queued_read_unlock(struct qrwlock *lock) (void)atomic_sub_return_release(_QR_BIAS, &lock->cnts); } +/** + * __qrwlock_write_byte - retrieve the write byte address of a queue rwlock + * @lock : Pointer to queue rwlock structure + * Return: the write byte address of a queue rwlock + */ +static inline u8 *__qrwlock_write_byte(struct qrwlock *lock) +{ + return (u8 *)lock + 3 * IS_BUILTIN(CONFIG_CPU_BIG_ENDIAN); +} + /** * queued_write_unlock - release write lock of a queue rwlock * @lock : Pointer to queue rwlock structure */ static inline void queued_write_unlock(struct qrwlock *lock) { - smp_store_release((u8 *)&lock->cnts, 0); + smp_store_release(__qrwlock_write_byte(lock), 0); } /* From 229ce631574761870a2ac938845fadbd07f35caa Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 14 Jul 2016 16:15:56 +0800 Subject: [PATCH 3/4] locking/pvqspinlock: Fix double hash race When the lock holder vCPU is racing with the queue head: CPU 0 (lock holder) CPU1 (queue head) =================== ================= spin_lock(); spin_lock(); pv_kick_node(): pv_wait_head_or_lock(): if (!lp) { lp = pv_hash(lock, pn); xchg(&l->locked, _Q_SLOW_VAL); } WRITE_ONCE(pn->state, vcpu_halted); cmpxchg(&pn->state, vcpu_halted, vcpu_hashed); WRITE_ONCE(l->locked, _Q_SLOW_VAL); (void)pv_hash(lock, pn); In this case, lock holder inserts the pv_node of queue head into the hash table and set _Q_SLOW_VAL unnecessary. This patch avoids it by restoring/setting vcpu_hashed state after failing adaptive locking spinning. Signed-off-by: Wanpeng Li Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Pan Xinhui Cc: Andrew Morton Cc: Davidlohr Bueso Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Waiman Long Link: http://lkml.kernel.org/r/1468484156-4521-1-git-send-email-wanpeng.li@hotmail.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_paravirt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 37649e69056c..8a99abf58080 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h @@ -450,7 +450,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) goto gotlock; } } - WRITE_ONCE(pn->state, vcpu_halted); + WRITE_ONCE(pn->state, vcpu_hashed); qstat_inc(qstat_pv_wait_head, true); qstat_inc(qstat_pv_wait_again, waitcnt); pv_wait(&l->locked, _Q_SLOW_VAL); From c2ace36b884de9330c4149064ae8d212d2e0d9ee Mon Sep 17 00:00:00 2001 From: Pan Xinhui Date: Wed, 13 Jul 2016 18:23:34 +0800 Subject: [PATCH 4/4] locking/pvqspinlock: Fix a bug in qstat_read() It's obviously wrong to set stat to NULL. So lets remove it. Otherwise it is always zero when we check the latency of kick/wake. Signed-off-by: Pan Xinhui Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1468405414-3700-1-git-send-email-xinhui.pan@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_stat.h | 1 - 1 file changed, 1 deletion(-) diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index 22e025309845..b9d031516254 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -153,7 +153,6 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf, */ if ((counter == qstat_pv_latency_kick) || (counter == qstat_pv_latency_wake)) { - stat = 0; if (kicks) stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); }