A set of PI futex fixes:

- Address a longstanding issue where the user space part of the PI futex
     is not writeable. The kernel returns with inconsistent state which can
     in the worst case result in a UAF of a tasks kernel stack. The solution
     is to establish consistent kernel state which makes future operations
     on the futex fail because user space and kernel space state are
     inconsistent. Not a problem as PI futexes fundamentaly require a
     functional RW mapping and if user space pulls the rug under it, then it
     can keep the pieces it asked for.
 
   - Address an issue where the return value is incorrect in case that the
     futex was acquired after a timeout/signal made the waiter drop out of
     the rtmutex wait. In one of the corner cases the kernel returned an
     error code despite having successfully acquired the futex.
 -----BEGIN PGP SIGNATURE-----
 
 iQJHBAABCgAxFiEEQp8+kY+LLUocC4bMphj1TA10mKEFAmATCSYTHHRnbHhAbGlu
 dXRyb25peC5kZQAKCRCmGPVMDXSYoT8zEADDFWxDp9SwgMKBf98ZJqfPvPbIbmqW
 xhJr1R+RLBNBmYEahQKFPWqlPf0JMDzIjYeu8fxkwPVwsTBzSgJ7Lka1acDI/TQG
 X6kXTIgX/SNdkYZ534b+FcPutYW+KFrpprebIF5U8NLII2xnlNrcVaVzeUvs5qgx
 ssjP79eJAWFSCXl2CJFVneMIcbrU+7czgjW0Cr84Z6O0GanJqPWVzngzgts2bvZN
 K29BGQrWcQhq0W641f5toLYoF+kq98cgtMlwY49quZW9pbNToKeUXDrOro6y8fkg
 J8dYdzFH5KS9SHxACP19y4ILXqCi3GIkDs+RsUtcyY1ZoY20DLV4rEvG26pX9azO
 5b6MeWsWHwHHxnp1apYenq0QjvfKKuJZdet1xctC+vivl+4RX4m2IvFONJLDQ++o
 fJ2j1m/39szg8gw3oObtL0zIPMczFMKgh7rCNkkTedghXevynMoi88RE643ELq8d
 MFr0C0n2ThHFbewtdyOZtZ6zERAKmPynx57iIETC63sChxiUfwmhFkxIuFUQkkwg
 G239O4dIVLGT4JKkOgyftRyog8JOzvdQb7qh1aYoj6tJNevVXIfuFOo6gRFp2SXp
 T2Rot+3TJaUS+f/GqsyHreJYhpLK7KSesjdYS1xYA9oWVtgt0AKVcY1AbSpyBifk
 ACXMtP8cjU0KgA==
 =wzqT
 -----END PGP SIGNATURE-----

Merge tag 'locking-urgent-2021-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull locking fixes from Thomas Gleixner:
 "A set of PI futex fixes:

   - Address a longstanding issue where the user space part of the PI
     futex is not writeable. The kernel returns with inconsistent state
     which can in the worst case result in a UAF of a tasks kernel
     stack.

     The solution is to establish consistent kernel state which makes
     future operations on the futex fail because user space and kernel
     space state are inconsistent. Not a problem as PI futexes
     fundamentaly require a functional RW mapping and if user space
     pulls the rug under it, then it can keep the pieces it asked for.

   - Address an issue where the return value is incorrect in case that
     the futex was acquired after a timeout/signal made the waiter drop
     out of the rtmutex wait.

     In one of the corner cases the kernel returned an error code
     despite having successfully acquired the futex"

* tag 'locking-urgent-2021-01-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  futex: Handle faults correctly for PI futexes
  futex: Simplify fixup_pi_state_owner()
  futex: Use pi_state_update_owner() in put_pi_state()
  rtmutex: Remove unused argument from rt_mutex_proxy_unlock()
  futex: Provide and use pi_state_update_owner()
  futex: Replace pointless printk in fixup_owner()
  futex: Ensure the correct return value from futex_lock_pi()
This commit is contained in:
Linus Torvalds 2021-01-28 11:18:43 -08:00
commit c64396cc36
3 changed files with 98 additions and 127 deletions

View file

@ -763,6 +763,29 @@ static struct futex_pi_state *alloc_pi_state(void)
return pi_state;
}
static void pi_state_update_owner(struct futex_pi_state *pi_state,
struct task_struct *new_owner)
{
struct task_struct *old_owner = pi_state->owner;
lockdep_assert_held(&pi_state->pi_mutex.wait_lock);
if (old_owner) {
raw_spin_lock(&old_owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&old_owner->pi_lock);
}
if (new_owner) {
raw_spin_lock(&new_owner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
raw_spin_unlock(&new_owner->pi_lock);
}
}
static void get_pi_state(struct futex_pi_state *pi_state)
{
WARN_ON_ONCE(!refcount_inc_not_zero(&pi_state->refcount));
@ -785,17 +808,11 @@ static void put_pi_state(struct futex_pi_state *pi_state)
* and has cleaned up the pi_state already
*/
if (pi_state->owner) {
struct task_struct *owner;
unsigned long flags;
raw_spin_lock_irqsave(&pi_state->pi_mutex.wait_lock, flags);
owner = pi_state->owner;
if (owner) {
raw_spin_lock(&owner->pi_lock);
list_del_init(&pi_state->list);
raw_spin_unlock(&owner->pi_lock);
}
rt_mutex_proxy_unlock(&pi_state->pi_mutex, owner);
pi_state_update_owner(pi_state, NULL);
rt_mutex_proxy_unlock(&pi_state->pi_mutex);
raw_spin_unlock_irqrestore(&pi_state->pi_mutex.wait_lock, flags);
}
@ -941,7 +958,8 @@ static inline void exit_pi_state_list(struct task_struct *curr) { }
* FUTEX_OWNER_DIED bit. See [4]
*
* [10] There is no transient state which leaves owner and user space
* TID out of sync.
* TID out of sync. Except one error case where the kernel is denied
* write access to the user address, see fixup_pi_state_owner().
*
*
* Serialization and lifetime rules:
@ -1521,26 +1539,15 @@ static int wake_futex_pi(u32 __user *uaddr, u32 uval, struct futex_pi_state *pi_
ret = -EINVAL;
}
if (ret)
goto out_unlock;
/*
* This is a point of no return; once we modify the uval there is no
* going back and subsequent operations must not fail.
*/
raw_spin_lock(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&pi_state->owner->pi_lock);
raw_spin_lock(&new_owner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &new_owner->pi_state_list);
pi_state->owner = new_owner;
raw_spin_unlock(&new_owner->pi_lock);
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
if (!ret) {
/*
* This is a point of no return; once we modified the uval
* there is no going back and subsequent operations must
* not fail.
*/
pi_state_update_owner(pi_state, new_owner);
postunlock = __rt_mutex_futex_unlock(&pi_state->pi_mutex, &wake_q);
}
out_unlock:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
@ -2323,18 +2330,13 @@ static void unqueue_me_pi(struct futex_q *q)
spin_unlock(q->lock_ptr);
}
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *argowner)
static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *argowner)
{
struct futex_pi_state *pi_state = q->pi_state;
u32 uval, curval, newval;
struct task_struct *oldowner, *newowner;
u32 newtid;
int ret, err = 0;
lockdep_assert_held(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
u32 uval, curval, newval, newtid;
int err = 0;
oldowner = pi_state->owner;
@ -2368,14 +2370,12 @@ retry:
* We raced against a concurrent self; things are
* already fixed up. Nothing to do.
*/
ret = 0;
goto out_unlock;
return 0;
}
if (__rt_mutex_futex_trylock(&pi_state->pi_mutex)) {
/* We got the lock after all, nothing to fix. */
ret = 0;
goto out_unlock;
/* We got the lock. pi_state is correct. Tell caller. */
return 1;
}
/*
@ -2402,8 +2402,7 @@ retry:
* We raced against a concurrent self; things are
* already fixed up. Nothing to do.
*/
ret = 0;
goto out_unlock;
return 1;
}
newowner = argowner;
}
@ -2433,22 +2432,9 @@ retry:
* We fixed up user space. Now we need to fix the pi_state
* itself.
*/
if (pi_state->owner != NULL) {
raw_spin_lock(&pi_state->owner->pi_lock);
WARN_ON(list_empty(&pi_state->list));
list_del_init(&pi_state->list);
raw_spin_unlock(&pi_state->owner->pi_lock);
}
pi_state_update_owner(pi_state, newowner);
pi_state->owner = newowner;
raw_spin_lock(&newowner->pi_lock);
WARN_ON(!list_empty(&pi_state->list));
list_add(&pi_state->list, &newowner->pi_state_list);
raw_spin_unlock(&newowner->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return 0;
return argowner == current;
/*
* In order to reschedule or handle a page fault, we need to drop the
@ -2469,17 +2455,16 @@ handle_err:
switch (err) {
case -EFAULT:
ret = fault_in_user_writeable(uaddr);
err = fault_in_user_writeable(uaddr);
break;
case -EAGAIN:
cond_resched();
ret = 0;
err = 0;
break;
default:
WARN_ON_ONCE(1);
ret = err;
break;
}
@ -2489,17 +2474,44 @@ handle_err:
/*
* Check if someone else fixed it for us:
*/
if (pi_state->owner != oldowner) {
ret = 0;
goto out_unlock;
}
if (pi_state->owner != oldowner)
return argowner == current;
if (ret)
goto out_unlock;
/* Retry if err was -EAGAIN or the fault in succeeded */
if (!err)
goto retry;
goto retry;
/*
* fault_in_user_writeable() failed so user state is immutable. At
* best we can make the kernel state consistent but user state will
* be most likely hosed and any subsequent unlock operation will be
* rejected due to PI futex rule [10].
*
* Ensure that the rtmutex owner is also the pi_state owner despite
* the user space value claiming something different. There is no
* point in unlocking the rtmutex if current is the owner as it
* would need to wait until the next waiter has taken the rtmutex
* to guarantee consistent state. Keep it simple. Userspace asked
* for this wreckaged state.
*
* The rtmutex has an owner - either current or some other
* task. See the EAGAIN loop above.
*/
pi_state_update_owner(pi_state, rt_mutex_owner(&pi_state->pi_mutex));
out_unlock:
return err;
}
static int fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
struct task_struct *argowner)
{
struct futex_pi_state *pi_state = q->pi_state;
int ret;
lockdep_assert_held(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
ret = __fixup_pi_state_owner(uaddr, q, argowner);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
return ret;
}
@ -2523,8 +2535,6 @@ static long futex_wait_restart(struct restart_block *restart);
*/
static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
{
int ret = 0;
if (locked) {
/*
* Got the lock. We might not be the anticipated owner if we
@ -2535,8 +2545,8 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* stable state, anything else needs more attention.
*/
if (q->pi_state->owner != current)
ret = fixup_pi_state_owner(uaddr, q, current);
return ret ? ret : locked;
return fixup_pi_state_owner(uaddr, q, current);
return 1;
}
/*
@ -2547,23 +2557,17 @@ static int fixup_owner(u32 __user *uaddr, struct futex_q *q, int locked)
* Another speculative read; pi_state->owner == current is unstable
* but needs our attention.
*/
if (q->pi_state->owner == current) {
ret = fixup_pi_state_owner(uaddr, q, NULL);
return ret;
}
if (q->pi_state->owner == current)
return fixup_pi_state_owner(uaddr, q, NULL);
/*
* Paranoia check. If we did not take the lock, then we should not be
* the owner of the rt_mutex.
* the owner of the rt_mutex. Warn and establish consistent state.
*/
if (rt_mutex_owner(&q->pi_state->pi_mutex) == current) {
printk(KERN_ERR "fixup_owner: ret = %d pi-mutex: %p "
"pi-state %p\n", ret,
q->pi_state->pi_mutex.owner,
q->pi_state->owner);
}
if (WARN_ON_ONCE(rt_mutex_owner(&q->pi_state->pi_mutex) == current))
return fixup_pi_state_owner(uaddr, q, current);
return ret;
return 0;
}
/**
@ -2771,7 +2775,6 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ktime_t *time, int trylock)
{
struct hrtimer_sleeper timeout, *to;
struct futex_pi_state *pi_state = NULL;
struct task_struct *exiting = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
@ -2907,23 +2910,8 @@ no_block:
if (res)
ret = (res < 0) ? res : 0;
/*
* If fixup_owner() faulted and was unable to handle the fault, unlock
* it and return the fault to userspace.
*/
if (ret && (rt_mutex_owner(&q.pi_state->pi_mutex) == current)) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/* Unqueue and drop the lock */
unqueue_me_pi(&q);
if (pi_state) {
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
}
goto out;
out_unlock_put_key:
@ -3183,7 +3171,6 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
u32 __user *uaddr2)
{
struct hrtimer_sleeper timeout, *to;
struct futex_pi_state *pi_state = NULL;
struct rt_mutex_waiter rt_waiter;
struct futex_hash_bucket *hb;
union futex_key key2 = FUTEX_KEY_INIT;
@ -3261,16 +3248,17 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (q.pi_state && (q.pi_state->owner != current)) {
spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/*
* Drop the reference to the pi state which
* the requeue_pi() code acquired for us.
*/
put_pi_state(q.pi_state);
spin_unlock(q.lock_ptr);
/*
* Adjust the return value. It's either -EFAULT or
* success (1) but the caller expects 0 for success.
*/
ret = ret < 0 ? ret : 0;
}
} else {
struct rt_mutex *pi_mutex;
@ -3301,25 +3289,10 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
if (res)
ret = (res < 0) ? res : 0;
/*
* If fixup_pi_state_owner() faulted and was unable to handle
* the fault, unlock the rt_mutex and return the fault to
* userspace.
*/
if (ret && rt_mutex_owner(&q.pi_state->pi_mutex) == current) {
pi_state = q.pi_state;
get_pi_state(pi_state);
}
/* Unqueue and drop the lock. */
unqueue_me_pi(&q);
}
if (pi_state) {
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
}
if (ret == -EINTR) {
/*
* We've already been requeued, but cannot restart by calling

View file

@ -1716,8 +1716,7 @@ void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
* possible because it belongs to the pi_state which is about to be freed
* and it is not longer visible to other tasks.
*/
void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner)
void rt_mutex_proxy_unlock(struct rt_mutex *lock)
{
debug_rt_mutex_proxy_unlock(lock);
rt_mutex_set_owner(lock, NULL);

View file

@ -133,8 +133,7 @@ enum rtmutex_chainwalk {
extern struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock);
extern void rt_mutex_init_proxy_locked(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock,
struct task_struct *proxy_owner);
extern void rt_mutex_proxy_unlock(struct rt_mutex *lock);
extern void rt_mutex_init_waiter(struct rt_mutex_waiter *waiter);
extern int __rt_mutex_start_proxy_lock(struct rt_mutex *lock,
struct rt_mutex_waiter *waiter,