From 6687659568e2ec5b3ac24b39c5d26ce8b9d90434 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 17 Apr 2016 23:31:41 -0700 Subject: [PATCH 1/4] locking/pvqspinlock: Fix division by zero in qstat_read() While playing with the qstat statistics (in /qlockstat/) I ran into the following splat on a VM when opening pv_hash_hops: divide error: 0000 [#1] SMP ... RIP: 0010:[] [] qstat_read+0x12e/0x1e0 ... Call Trace: [] ? mem_cgroup_commit_charge+0x6c/0xd0 [] ? page_add_new_anon_rmap+0x8c/0xd0 [] ? handle_mm_fault+0x1439/0x1b40 [] ? do_mmap+0x449/0x550 [] ? __vfs_read+0x23/0xd0 [] ? rw_verify_area+0x52/0xd0 [] ? vfs_read+0x81/0x120 [] ? SyS_read+0x42/0xa0 [] ? entry_SYSCALL_64_fastpath+0x1e/0xa8 Fix this by verifying that qstat_pv_kick_unlock is in fact non-zero, similarly to what the qstat_pv_latency_wake case does, as if nothing else, this can come from resetting the statistics, thus having 0 kicks should be quite valid in this context. Signed-off-by: Davidlohr Bueso Reviewed-by: Waiman Long Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: dave@stgolabs.net Cc: waiman.long@hpe.com Link: http://lkml.kernel.org/r/1460961103-24953-1-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- kernel/locking/qspinlock_stat.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index eb2a2c9bc3fc..d734b7502001 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -136,10 +136,12 @@ static ssize_t qstat_read(struct file *file, char __user *user_buf, } if (counter == qstat_pv_hash_hops) { - u64 frac; + u64 frac = 0; - frac = 100ULL * do_div(stat, kicks); - frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); + if (kicks) { + frac = 100ULL * do_div(stat, kicks); + frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); + } /* * Return a X.XX decimal number From 89e9e66ba1b3bde9d8ea90566c2aee20697ad681 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 15 Apr 2016 14:35:39 +0200 Subject: [PATCH 2/4] futex: Handle unlock_pi race gracefully If userspace calls UNLOCK_PI unconditionally without trying the TID -> 0 transition in user space first then the user space value might not have the waiters bit set. This opens the following race: CPU0 CPU1 uval = get_user(futex) lock(hb) lock(hb) futex |= FUTEX_WAITERS .... unlock(hb) cmpxchg(futex, uval, newval) So the cmpxchg fails and returns -EINVAL to user space, which is wrong because the futex value is valid. To handle this (yes, yet another) corner case gracefully, check for a flag change and retry. [ tglx: Massaged changelog and slightly reworked implementation ] Fixes: ccf9e6a80d9e ("futex: Make unlock_pi more robust") Signed-off-by: Sebastian Andrzej Siewior Cc: stable@vger.kernel.org Cc: Davidlohr Bueso Cc: Darren Hart Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1460723739-5195-1-git-send-email-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- kernel/futex.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/kernel/futex.c b/kernel/futex.c index a5d2e74c89e0..fd204e1670c9 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1295,10 +1295,20 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_q *this, if (unlikely(should_fail_futex(true))) ret = -EFAULT; - if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) + if (cmpxchg_futex_value_locked(&curval, uaddr, uval, newval)) { ret = -EFAULT; - else if (curval != uval) - ret = -EINVAL; + } else if (curval != uval) { + /* + * If a unconditional UNLOCK_PI operation (user space did not + * try the TID->0 transition) raced with a waiter setting the + * FUTEX_WAITERS flag between get_user() and locking the hash + * bucket lock, retry the operation. + */ + if ((FUTEX_TID_MASK & curval) == uval) + ret = -EAGAIN; + else + ret = -EINVAL; + } if (ret) { raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock); return ret; @@ -2622,6 +2632,15 @@ retry: */ if (ret == -EFAULT) goto pi_faulted; + /* + * A unconditional UNLOCK_PI op raced against a waiter + * setting the FUTEX_WAITERS bit. Try again. + */ + if (ret == -EAGAIN) { + spin_unlock(&hb->lock); + put_futex_key(&key); + goto retry; + } /* * wake_futex_pi has detected invalid state. Tell user * space. From fe1bce9e2107ba3a8faffe572483b6974201a0e6 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 20 Apr 2016 20:09:24 -0700 Subject: [PATCH 3/4] futex: Acknowledge a new waiter in counter before plist Otherwise an incoming waker on the dest hash bucket can miss the waiter adding itself to the plist during the lockless check optimization (small window but still the correct way of doing this); similarly to the decrement counterpart. Suggested-by: Peter Zijlstra Signed-off-by: Davidlohr Bueso Cc: Davidlohr Bueso Cc: bigeasy@linutronix.de Cc: dvhart@infradead.org Cc: stable@kernel.org Link: http://lkml.kernel.org/r/1461208164-29150-1-git-send-email-dave@stgolabs.net Signed-off-by: Thomas Gleixner --- kernel/futex.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/futex.c b/kernel/futex.c index fd204e1670c9..c20f06f38ef3 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -1535,8 +1535,8 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1, if (likely(&hb1->chain != &hb2->chain)) { plist_del(&q->list, &hb1->chain); hb_waiters_dec(hb1); - plist_add(&q->list, &hb2->chain); hb_waiters_inc(hb2); + plist_add(&q->list, &hb2->chain); q->lock_ptr = &hb2->lock; } get_futex_key_refs(key2); From fba7cd681b6155e2d93e7862fcd6f970336b83c3 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Thu, 14 Apr 2016 15:36:03 +0200 Subject: [PATCH 4/4] asm-generic/futex: Re-enable preemption in futex_atomic_cmpxchg_inatomic() The recent decoupling of pagefault disable and preempt disable added an explicit preempt_disable/enable() pair to the futex_atomic_cmpxchg_inatomic() implementation in asm-generic/futex.h. But it forgot to add preempt_enable() calls to the error handling code pathes, which results in a preemption count imbalance. This is observable on boot when the test for atomic_cmpxchg() is calling futex_atomic_cmpxchg_inatomic() on a NULL pointer. Add the missing preempt_enable() calls to the error handling code pathes. [ tglx: Massaged changelog ] Fixes: d9b9ff8c1889 ("sched/preempt, futex: Disable preemption in UP futex_atomic_cmpxchg_inatomic() explicitly") Signed-off-by: Romain Perier Cc: linux-arch@vger.kernel.org Cc: Thomas Petazzoni Cc: Arnd Bergmann Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1460640963-690-1-git-send-email-romain.perier@free-electrons.com Signed-off-by: Thomas Gleixner --- include/asm-generic/futex.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index e56272c919b5..bf2d34c9d804 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 val; preempt_disable(); - if (unlikely(get_user(val, uaddr) != 0)) + if (unlikely(get_user(val, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } - if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) + if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } *uval = val; preempt_enable();