Merge branch 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler fixes from Ingo Molnar: - Apply a number of membarrier related fixes and cleanups, which fixes a use-after-free race in the membarrier code - Introduce proper RCU protection for tasks on the runqueue - to get rid of the subtle task_rcu_dereference() interface that was easy to get wrong - Misc fixes, but also an EAS speedup * 'sched-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: sched/fair: Avoid redundant EAS calculation sched/core: Remove double update_max_interval() call on CPU startup sched/core: Fix preempt_schedule() interrupt return comment sched/fair: Fix -Wunused-but-set-variable warnings sched/core: Fix migration to invalid CPU in __set_cpus_allowed_ptr() sched/membarrier: Return -ENOMEM to userspace on memory allocation failure sched/membarrier: Skip IPIs when mm->mm_users == 1 selftests, sched/membarrier: Add multi-threaded test sched/membarrier: Fix p->mm->membarrier_state racy load sched/membarrier: Call sync_core only before usermode for same mm sched/membarrier: Remove redundant check sched/membarrier: Fix private expedited registration check tasks, sched/core: RCUify the assignment of rq->curr tasks, sched/core: With a grace period after finish_task_switch(), remove unnecessary code tasks, sched/core: Ensure tasks are available for a grace period after leaving the runqueue tasks: Add a count of task RCU users sched/core: Convert vcpu_is_preempted() from macro to an inline function sched/fair: Remove unused cfs_rq_clock_task() function
This commit is contained in:
commit
9c5efe9ae7
|
@ -1033,6 +1033,7 @@ static int exec_mmap(struct mm_struct *mm)
|
||||||
}
|
}
|
||||||
task_lock(tsk);
|
task_lock(tsk);
|
||||||
active_mm = tsk->active_mm;
|
active_mm = tsk->active_mm;
|
||||||
|
membarrier_exec_mmap(mm);
|
||||||
tsk->mm = mm;
|
tsk->mm = mm;
|
||||||
tsk->active_mm = mm;
|
tsk->active_mm = mm;
|
||||||
activate_mm(active_mm, mm);
|
activate_mm(active_mm, mm);
|
||||||
|
@ -1825,7 +1826,6 @@ static int __do_execve_file(int fd, struct filename *filename,
|
||||||
/* execve succeeded */
|
/* execve succeeded */
|
||||||
current->fs->in_exec = 0;
|
current->fs->in_exec = 0;
|
||||||
current->in_execve = 0;
|
current->in_execve = 0;
|
||||||
membarrier_execve(current);
|
|
||||||
rseq_execve(current);
|
rseq_execve(current);
|
||||||
acct_update_integrals(current);
|
acct_update_integrals(current);
|
||||||
task_numa_free(current, false);
|
task_numa_free(current, false);
|
||||||
|
|
|
@ -383,6 +383,16 @@ struct mm_struct {
|
||||||
unsigned long highest_vm_end; /* highest vma end address */
|
unsigned long highest_vm_end; /* highest vma end address */
|
||||||
pgd_t * pgd;
|
pgd_t * pgd;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMBARRIER
|
||||||
|
/**
|
||||||
|
* @membarrier_state: Flags controlling membarrier behavior.
|
||||||
|
*
|
||||||
|
* This field is close to @pgd to hopefully fit in the same
|
||||||
|
* cache-line, which needs to be touched by switch_mm().
|
||||||
|
*/
|
||||||
|
atomic_t membarrier_state;
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @mm_users: The number of users including userspace.
|
* @mm_users: The number of users including userspace.
|
||||||
*
|
*
|
||||||
|
@ -452,9 +462,7 @@ struct mm_struct {
|
||||||
unsigned long flags; /* Must use atomic bitops to access */
|
unsigned long flags; /* Must use atomic bitops to access */
|
||||||
|
|
||||||
struct core_state *core_state; /* coredumping support */
|
struct core_state *core_state; /* coredumping support */
|
||||||
#ifdef CONFIG_MEMBARRIER
|
|
||||||
atomic_t membarrier_state;
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_AIO
|
#ifdef CONFIG_AIO
|
||||||
spinlock_t ioctx_lock;
|
spinlock_t ioctx_lock;
|
||||||
struct kioctx_table __rcu *ioctx_table;
|
struct kioctx_table __rcu *ioctx_table;
|
||||||
|
|
|
@ -6,16 +6,11 @@
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* rcuwait provides a way of blocking and waking up a single
|
* rcuwait provides a way of blocking and waking up a single
|
||||||
* task in an rcu-safe manner; where it is forbidden to use
|
* task in an rcu-safe manner.
|
||||||
* after exit_notify(). task_struct is not properly rcu protected,
|
|
||||||
* unless dealing with rcu-aware lists, ie: find_task_by_*().
|
|
||||||
*
|
*
|
||||||
* Alternatively we have task_rcu_dereference(), but the return
|
* The only time @task is non-nil is when a user is blocked (or
|
||||||
* semantics have different implications which would break the
|
* checking if it needs to) on a condition, and reset as soon as we
|
||||||
* wakeup side. The only time @task is non-nil is when a user is
|
* know that the condition has succeeded and are awoken.
|
||||||
* blocked (or checking if it needs to) on a condition, and reset
|
|
||||||
* as soon as we know that the condition has succeeded and are
|
|
||||||
* awoken.
|
|
||||||
*/
|
*/
|
||||||
struct rcuwait {
|
struct rcuwait {
|
||||||
struct task_struct __rcu *task;
|
struct task_struct __rcu *task;
|
||||||
|
@ -37,13 +32,6 @@ extern void rcuwait_wake_up(struct rcuwait *w);
|
||||||
*/
|
*/
|
||||||
#define rcuwait_wait_event(w, condition) \
|
#define rcuwait_wait_event(w, condition) \
|
||||||
({ \
|
({ \
|
||||||
/* \
|
|
||||||
* Complain if we are called after do_exit()/exit_notify(), \
|
|
||||||
* as we cannot rely on the rcu critical region for the \
|
|
||||||
* wakeup side. \
|
|
||||||
*/ \
|
|
||||||
WARN_ON(current->exit_state); \
|
|
||||||
\
|
|
||||||
rcu_assign_pointer((w)->task, current); \
|
rcu_assign_pointer((w)->task, current); \
|
||||||
for (;;) { \
|
for (;;) { \
|
||||||
/* \
|
/* \
|
||||||
|
|
|
@ -1130,7 +1130,10 @@ struct task_struct {
|
||||||
|
|
||||||
struct tlbflush_unmap_batch tlb_ubc;
|
struct tlbflush_unmap_batch tlb_ubc;
|
||||||
|
|
||||||
|
union {
|
||||||
|
refcount_t rcu_users;
|
||||||
struct rcu_head rcu;
|
struct rcu_head rcu;
|
||||||
|
};
|
||||||
|
|
||||||
/* Cache last used pipe for splice(): */
|
/* Cache last used pipe for splice(): */
|
||||||
struct pipe_inode_info *splice_pipe;
|
struct pipe_inode_info *splice_pipe;
|
||||||
|
@ -1839,7 +1842,10 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
|
||||||
* running or not.
|
* running or not.
|
||||||
*/
|
*/
|
||||||
#ifndef vcpu_is_preempted
|
#ifndef vcpu_is_preempted
|
||||||
# define vcpu_is_preempted(cpu) false
|
static inline bool vcpu_is_preempted(int cpu)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
|
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
|
||||||
|
|
|
@ -362,16 +362,16 @@ enum {
|
||||||
|
|
||||||
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
|
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
|
if (current->mm != mm)
|
||||||
|
return;
|
||||||
if (likely(!(atomic_read(&mm->membarrier_state) &
|
if (likely(!(atomic_read(&mm->membarrier_state) &
|
||||||
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE)))
|
||||||
return;
|
return;
|
||||||
sync_core_before_usermode();
|
sync_core_before_usermode();
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void membarrier_execve(struct task_struct *t)
|
extern void membarrier_exec_mmap(struct mm_struct *mm);
|
||||||
{
|
|
||||||
atomic_set(&t->mm->membarrier_state, 0);
|
|
||||||
}
|
|
||||||
#else
|
#else
|
||||||
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
|
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
|
||||||
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
||||||
|
@ -380,7 +380,7 @@ static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
static inline void membarrier_execve(struct task_struct *t)
|
static inline void membarrier_exec_mmap(struct mm_struct *mm)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
|
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
|
||||||
|
|
|
@ -119,7 +119,7 @@ static inline void put_task_struct(struct task_struct *t)
|
||||||
__put_task_struct(t);
|
__put_task_struct(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct task_struct *task_rcu_dereference(struct task_struct **ptask);
|
void put_task_struct_rcu_user(struct task_struct *task);
|
||||||
|
|
||||||
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
#ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT
|
||||||
extern int arch_task_struct_size __read_mostly;
|
extern int arch_task_struct_size __read_mostly;
|
||||||
|
|
|
@ -182,6 +182,11 @@ static void delayed_put_task_struct(struct rcu_head *rhp)
|
||||||
put_task_struct(tsk);
|
put_task_struct(tsk);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void put_task_struct_rcu_user(struct task_struct *task)
|
||||||
|
{
|
||||||
|
if (refcount_dec_and_test(&task->rcu_users))
|
||||||
|
call_rcu(&task->rcu, delayed_put_task_struct);
|
||||||
|
}
|
||||||
|
|
||||||
void release_task(struct task_struct *p)
|
void release_task(struct task_struct *p)
|
||||||
{
|
{
|
||||||
|
@ -222,76 +227,13 @@ repeat:
|
||||||
|
|
||||||
write_unlock_irq(&tasklist_lock);
|
write_unlock_irq(&tasklist_lock);
|
||||||
release_thread(p);
|
release_thread(p);
|
||||||
call_rcu(&p->rcu, delayed_put_task_struct);
|
put_task_struct_rcu_user(p);
|
||||||
|
|
||||||
p = leader;
|
p = leader;
|
||||||
if (unlikely(zap_leader))
|
if (unlikely(zap_leader))
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Note that if this function returns a valid task_struct pointer (!NULL)
|
|
||||||
* task->usage must remain >0 for the duration of the RCU critical section.
|
|
||||||
*/
|
|
||||||
struct task_struct *task_rcu_dereference(struct task_struct **ptask)
|
|
||||||
{
|
|
||||||
struct sighand_struct *sighand;
|
|
||||||
struct task_struct *task;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We need to verify that release_task() was not called and thus
|
|
||||||
* delayed_put_task_struct() can't run and drop the last reference
|
|
||||||
* before rcu_read_unlock(). We check task->sighand != NULL,
|
|
||||||
* but we can read the already freed and reused memory.
|
|
||||||
*/
|
|
||||||
retry:
|
|
||||||
task = rcu_dereference(*ptask);
|
|
||||||
if (!task)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
probe_kernel_address(&task->sighand, sighand);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Pairs with atomic_dec_and_test() in put_task_struct(). If this task
|
|
||||||
* was already freed we can not miss the preceding update of this
|
|
||||||
* pointer.
|
|
||||||
*/
|
|
||||||
smp_rmb();
|
|
||||||
if (unlikely(task != READ_ONCE(*ptask)))
|
|
||||||
goto retry;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* We've re-checked that "task == *ptask", now we have two different
|
|
||||||
* cases:
|
|
||||||
*
|
|
||||||
* 1. This is actually the same task/task_struct. In this case
|
|
||||||
* sighand != NULL tells us it is still alive.
|
|
||||||
*
|
|
||||||
* 2. This is another task which got the same memory for task_struct.
|
|
||||||
* We can't know this of course, and we can not trust
|
|
||||||
* sighand != NULL.
|
|
||||||
*
|
|
||||||
* In this case we actually return a random value, but this is
|
|
||||||
* correct.
|
|
||||||
*
|
|
||||||
* If we return NULL - we can pretend that we actually noticed that
|
|
||||||
* *ptask was updated when the previous task has exited. Or pretend
|
|
||||||
* that probe_slab_address(&sighand) reads NULL.
|
|
||||||
*
|
|
||||||
* If we return the new task (because sighand is not NULL for any
|
|
||||||
* reason) - this is fine too. This (new) task can't go away before
|
|
||||||
* another gp pass.
|
|
||||||
*
|
|
||||||
* And note: We could even eliminate the false positive if re-read
|
|
||||||
* task->sighand once again to avoid the falsely NULL. But this case
|
|
||||||
* is very unlikely so we don't care.
|
|
||||||
*/
|
|
||||||
if (!sighand)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
return task;
|
|
||||||
}
|
|
||||||
|
|
||||||
void rcuwait_wake_up(struct rcuwait *w)
|
void rcuwait_wake_up(struct rcuwait *w)
|
||||||
{
|
{
|
||||||
struct task_struct *task;
|
struct task_struct *task;
|
||||||
|
@ -311,10 +253,6 @@ void rcuwait_wake_up(struct rcuwait *w)
|
||||||
*/
|
*/
|
||||||
smp_mb(); /* (B) */
|
smp_mb(); /* (B) */
|
||||||
|
|
||||||
/*
|
|
||||||
* Avoid using task_rcu_dereference() magic as long as we are careful,
|
|
||||||
* see comment in rcuwait_wait_event() regarding ->exit_state.
|
|
||||||
*/
|
|
||||||
task = rcu_dereference(w->task);
|
task = rcu_dereference(w->task);
|
||||||
if (task)
|
if (task)
|
||||||
wake_up_process(task);
|
wake_up_process(task);
|
||||||
|
|
|
@ -915,10 +915,12 @@ static struct task_struct *dup_task_struct(struct task_struct *orig, int node)
|
||||||
tsk->cpus_ptr = &tsk->cpus_mask;
|
tsk->cpus_ptr = &tsk->cpus_mask;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* One for us, one for whoever does the "release_task()" (usually
|
* One for the user space visible state that goes away when reaped.
|
||||||
* parent)
|
* One for the scheduler.
|
||||||
*/
|
*/
|
||||||
refcount_set(&tsk->usage, 2);
|
refcount_set(&tsk->rcu_users, 2);
|
||||||
|
/* One for the rcu users */
|
||||||
|
refcount_set(&tsk->usage, 1);
|
||||||
#ifdef CONFIG_BLK_DEV_IO_TRACE
|
#ifdef CONFIG_BLK_DEV_IO_TRACE
|
||||||
tsk->btrace_seq = 0;
|
tsk->btrace_seq = 0;
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1656,7 +1656,8 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||||
if (cpumask_equal(p->cpus_ptr, new_mask))
|
if (cpumask_equal(p->cpus_ptr, new_mask))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
if (!cpumask_intersects(new_mask, cpu_valid_mask)) {
|
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
|
||||||
|
if (dest_cpu >= nr_cpu_ids) {
|
||||||
ret = -EINVAL;
|
ret = -EINVAL;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -1677,7 +1678,6 @@ static int __set_cpus_allowed_ptr(struct task_struct *p,
|
||||||
if (cpumask_test_cpu(task_cpu(p), new_mask))
|
if (cpumask_test_cpu(task_cpu(p), new_mask))
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
dest_cpu = cpumask_any_and(cpu_valid_mask, new_mask);
|
|
||||||
if (task_running(rq, p) || p->state == TASK_WAKING) {
|
if (task_running(rq, p) || p->state == TASK_WAKING) {
|
||||||
struct migration_arg arg = { p, dest_cpu };
|
struct migration_arg arg = { p, dest_cpu };
|
||||||
/* Need help from migration thread: drop lock and wait. */
|
/* Need help from migration thread: drop lock and wait. */
|
||||||
|
@ -3254,7 +3254,7 @@ static struct rq *finish_task_switch(struct task_struct *prev)
|
||||||
/* Task is done with its stack. */
|
/* Task is done with its stack. */
|
||||||
put_task_stack(prev);
|
put_task_stack(prev);
|
||||||
|
|
||||||
put_task_struct(prev);
|
put_task_struct_rcu_user(prev);
|
||||||
}
|
}
|
||||||
|
|
||||||
tick_nohz_task_switch();
|
tick_nohz_task_switch();
|
||||||
|
@ -3358,15 +3358,15 @@ context_switch(struct rq *rq, struct task_struct *prev,
|
||||||
else
|
else
|
||||||
prev->active_mm = NULL;
|
prev->active_mm = NULL;
|
||||||
} else { // to user
|
} else { // to user
|
||||||
|
membarrier_switch_mm(rq, prev->active_mm, next->mm);
|
||||||
/*
|
/*
|
||||||
* sys_membarrier() requires an smp_mb() between setting
|
* sys_membarrier() requires an smp_mb() between setting
|
||||||
* rq->curr and returning to userspace.
|
* rq->curr / membarrier_switch_mm() and returning to userspace.
|
||||||
*
|
*
|
||||||
* The below provides this either through switch_mm(), or in
|
* The below provides this either through switch_mm(), or in
|
||||||
* case 'prev->active_mm == next->mm' through
|
* case 'prev->active_mm == next->mm' through
|
||||||
* finish_task_switch()'s mmdrop().
|
* finish_task_switch()'s mmdrop().
|
||||||
*/
|
*/
|
||||||
|
|
||||||
switch_mm_irqs_off(prev->active_mm, next->mm, next);
|
switch_mm_irqs_off(prev->active_mm, next->mm, next);
|
||||||
|
|
||||||
if (!prev->mm) { // from kernel
|
if (!prev->mm) { // from kernel
|
||||||
|
@ -4042,7 +4042,11 @@ static void __sched notrace __schedule(bool preempt)
|
||||||
|
|
||||||
if (likely(prev != next)) {
|
if (likely(prev != next)) {
|
||||||
rq->nr_switches++;
|
rq->nr_switches++;
|
||||||
rq->curr = next;
|
/*
|
||||||
|
* RCU users of rcu_dereference(rq->curr) may not see
|
||||||
|
* changes to task_struct made by pick_next_task().
|
||||||
|
*/
|
||||||
|
RCU_INIT_POINTER(rq->curr, next);
|
||||||
/*
|
/*
|
||||||
* The membarrier system call requires each architecture
|
* The membarrier system call requires each architecture
|
||||||
* to have a full memory barrier after updating
|
* to have a full memory barrier after updating
|
||||||
|
@ -4223,9 +4227,8 @@ static void __sched notrace preempt_schedule_common(void)
|
||||||
|
|
||||||
#ifdef CONFIG_PREEMPTION
|
#ifdef CONFIG_PREEMPTION
|
||||||
/*
|
/*
|
||||||
* this is the entry point to schedule() from in-kernel preemption
|
* This is the entry point to schedule() from in-kernel preemption
|
||||||
* off of preempt_enable. Kernel preemptions off return from interrupt
|
* off of preempt_enable.
|
||||||
* occur there and call schedule directly.
|
|
||||||
*/
|
*/
|
||||||
asmlinkage __visible void __sched notrace preempt_schedule(void)
|
asmlinkage __visible void __sched notrace preempt_schedule(void)
|
||||||
{
|
{
|
||||||
|
@ -4296,7 +4299,7 @@ EXPORT_SYMBOL_GPL(preempt_schedule_notrace);
|
||||||
#endif /* CONFIG_PREEMPTION */
|
#endif /* CONFIG_PREEMPTION */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this is the entry point to schedule() from kernel preemption
|
* This is the entry point to schedule() from kernel preemption
|
||||||
* off of irq context.
|
* off of irq context.
|
||||||
* Note, that this is called and return with irqs disabled. This will
|
* Note, that this is called and return with irqs disabled. This will
|
||||||
* protect us against recursive calling from irq.
|
* protect us against recursive calling from irq.
|
||||||
|
@ -6069,7 +6072,8 @@ void init_idle(struct task_struct *idle, int cpu)
|
||||||
__set_task_cpu(idle, cpu);
|
__set_task_cpu(idle, cpu);
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
|
|
||||||
rq->curr = rq->idle = idle;
|
rq->idle = idle;
|
||||||
|
rcu_assign_pointer(rq->curr, idle);
|
||||||
idle->on_rq = TASK_ON_RQ_QUEUED;
|
idle->on_rq = TASK_ON_RQ_QUEUED;
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
idle->on_cpu = 1;
|
idle->on_cpu = 1;
|
||||||
|
@ -6430,8 +6434,6 @@ int sched_cpu_activate(unsigned int cpu)
|
||||||
}
|
}
|
||||||
rq_unlock_irqrestore(rq, &rf);
|
rq_unlock_irqrestore(rq, &rf);
|
||||||
|
|
||||||
update_max_interval();
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -749,7 +749,6 @@ void init_entity_runnable_average(struct sched_entity *se)
|
||||||
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
|
/* when this task enqueue'ed, it will contribute to its cfs_rq's load_avg */
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq);
|
|
||||||
static void attach_entity_cfs_rq(struct sched_entity *se);
|
static void attach_entity_cfs_rq(struct sched_entity *se);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1603,7 +1602,7 @@ static void task_numa_compare(struct task_numa_env *env,
|
||||||
return;
|
return;
|
||||||
|
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
cur = task_rcu_dereference(&dst_rq->curr);
|
cur = rcu_dereference(dst_rq->curr);
|
||||||
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
|
if (cur && ((cur->flags & PF_EXITING) || is_idle_task(cur)))
|
||||||
cur = NULL;
|
cur = NULL;
|
||||||
|
|
||||||
|
@ -4354,20 +4353,15 @@ static inline u64 sched_cfs_bandwidth_slice(void)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Replenish runtime according to assigned quota and update expiration time.
|
* Replenish runtime according to assigned quota. We use sched_clock_cpu
|
||||||
* We use sched_clock_cpu directly instead of rq->clock to avoid adding
|
* directly instead of rq->clock to avoid adding additional synchronization
|
||||||
* additional synchronization around rq->lock.
|
* around rq->lock.
|
||||||
*
|
*
|
||||||
* requires cfs_b->lock
|
* requires cfs_b->lock
|
||||||
*/
|
*/
|
||||||
void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
|
void __refill_cfs_bandwidth_runtime(struct cfs_bandwidth *cfs_b)
|
||||||
{
|
{
|
||||||
u64 now;
|
if (cfs_b->quota != RUNTIME_INF)
|
||||||
|
|
||||||
if (cfs_b->quota == RUNTIME_INF)
|
|
||||||
return;
|
|
||||||
|
|
||||||
now = sched_clock_cpu(smp_processor_id());
|
|
||||||
cfs_b->runtime = cfs_b->quota;
|
cfs_b->runtime = cfs_b->quota;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4376,15 +4370,6 @@ static inline struct cfs_bandwidth *tg_cfs_bandwidth(struct task_group *tg)
|
||||||
return &tg->cfs_bandwidth;
|
return &tg->cfs_bandwidth;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* rq->task_clock normalized against any time this cfs_rq has spent throttled */
|
|
||||||
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
|
|
||||||
{
|
|
||||||
if (unlikely(cfs_rq->throttle_count))
|
|
||||||
return cfs_rq->throttled_clock_task - cfs_rq->throttled_clock_task_time;
|
|
||||||
|
|
||||||
return rq_clock_task(rq_of(cfs_rq)) - cfs_rq->throttled_clock_task_time;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* returns 0 on failure to allocate runtime */
|
/* returns 0 on failure to allocate runtime */
|
||||||
static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
static int assign_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
||||||
{
|
{
|
||||||
|
@ -4476,7 +4461,6 @@ static int tg_unthrottle_up(struct task_group *tg, void *data)
|
||||||
|
|
||||||
cfs_rq->throttle_count--;
|
cfs_rq->throttle_count--;
|
||||||
if (!cfs_rq->throttle_count) {
|
if (!cfs_rq->throttle_count) {
|
||||||
/* adjust cfs_rq_clock_task() */
|
|
||||||
cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
|
cfs_rq->throttled_clock_task_time += rq_clock_task(rq) -
|
||||||
cfs_rq->throttled_clock_task;
|
cfs_rq->throttled_clock_task;
|
||||||
|
|
||||||
|
@ -4994,15 +4978,13 @@ static void init_cfs_rq_runtime(struct cfs_rq *cfs_rq)
|
||||||
|
|
||||||
void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
void start_cfs_bandwidth(struct cfs_bandwidth *cfs_b)
|
||||||
{
|
{
|
||||||
u64 overrun;
|
|
||||||
|
|
||||||
lockdep_assert_held(&cfs_b->lock);
|
lockdep_assert_held(&cfs_b->lock);
|
||||||
|
|
||||||
if (cfs_b->period_active)
|
if (cfs_b->period_active)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
cfs_b->period_active = 1;
|
cfs_b->period_active = 1;
|
||||||
overrun = hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
|
hrtimer_forward_now(&cfs_b->period_timer, cfs_b->period);
|
||||||
hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
|
hrtimer_start_expires(&cfs_b->period_timer, HRTIMER_MODE_ABS_PINNED);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5080,11 +5062,6 @@ static inline bool cfs_bandwidth_used(void)
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline u64 cfs_rq_clock_task(struct cfs_rq *cfs_rq)
|
|
||||||
{
|
|
||||||
return rq_clock_task(rq_of(cfs_rq));
|
|
||||||
}
|
|
||||||
|
|
||||||
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
|
static void account_cfs_rq_runtime(struct cfs_rq *cfs_rq, u64 delta_exec) {}
|
||||||
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
|
static bool check_cfs_rq_runtime(struct cfs_rq *cfs_rq) { return false; }
|
||||||
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
|
static void check_enqueue_throttle(struct cfs_rq *cfs_rq) {}
|
||||||
|
@ -6412,7 +6389,7 @@ static int find_energy_efficient_cpu(struct task_struct *p, int prev_cpu)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Evaluate the energy impact of using this CPU. */
|
/* Evaluate the energy impact of using this CPU. */
|
||||||
if (max_spare_cap_cpu >= 0) {
|
if (max_spare_cap_cpu >= 0 && max_spare_cap_cpu != prev_cpu) {
|
||||||
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
|
cur_delta = compute_energy(p, max_spare_cap_cpu, pd);
|
||||||
cur_delta -= base_energy_pd;
|
cur_delta -= base_energy_pd;
|
||||||
if (cur_delta < best_delta) {
|
if (cur_delta < best_delta) {
|
||||||
|
|
|
@ -30,10 +30,42 @@ static void ipi_mb(void *info)
|
||||||
smp_mb(); /* IPIs should be serializing but paranoid. */
|
smp_mb(); /* IPIs should be serializing but paranoid. */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void ipi_sync_rq_state(void *info)
|
||||||
|
{
|
||||||
|
struct mm_struct *mm = (struct mm_struct *) info;
|
||||||
|
|
||||||
|
if (current->mm != mm)
|
||||||
|
return;
|
||||||
|
this_cpu_write(runqueues.membarrier_state,
|
||||||
|
atomic_read(&mm->membarrier_state));
|
||||||
|
/*
|
||||||
|
* Issue a memory barrier after setting
|
||||||
|
* MEMBARRIER_STATE_GLOBAL_EXPEDITED in the current runqueue to
|
||||||
|
* guarantee that no memory access following registration is reordered
|
||||||
|
* before registration.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
}
|
||||||
|
|
||||||
|
void membarrier_exec_mmap(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* Issue a memory barrier before clearing membarrier_state to
|
||||||
|
* guarantee that no memory access prior to exec is reordered after
|
||||||
|
* clearing this state.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
atomic_set(&mm->membarrier_state, 0);
|
||||||
|
/*
|
||||||
|
* Keep the runqueue membarrier_state in sync with this mm
|
||||||
|
* membarrier_state.
|
||||||
|
*/
|
||||||
|
this_cpu_write(runqueues.membarrier_state, 0);
|
||||||
|
}
|
||||||
|
|
||||||
static int membarrier_global_expedited(void)
|
static int membarrier_global_expedited(void)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
bool fallback = false;
|
|
||||||
cpumask_var_t tmpmask;
|
cpumask_var_t tmpmask;
|
||||||
|
|
||||||
if (num_online_cpus() == 1)
|
if (num_online_cpus() == 1)
|
||||||
|
@ -45,17 +77,11 @@ static int membarrier_global_expedited(void)
|
||||||
*/
|
*/
|
||||||
smp_mb(); /* system call entry is not a mb. */
|
smp_mb(); /* system call entry is not a mb. */
|
||||||
|
|
||||||
/*
|
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
|
||||||
* Expedited membarrier commands guarantee that they won't
|
return -ENOMEM;
|
||||||
* block, hence the GFP_NOWAIT allocation flag and fallback
|
|
||||||
* implementation.
|
|
||||||
*/
|
|
||||||
if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
|
|
||||||
/* Fallback for OOM. */
|
|
||||||
fallback = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
cpus_read_lock();
|
cpus_read_lock();
|
||||||
|
rcu_read_lock();
|
||||||
for_each_online_cpu(cpu) {
|
for_each_online_cpu(cpu) {
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
|
|
||||||
|
@ -70,23 +96,28 @@ static int membarrier_global_expedited(void)
|
||||||
if (cpu == raw_smp_processor_id())
|
if (cpu == raw_smp_processor_id())
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
rcu_read_lock();
|
if (!(READ_ONCE(cpu_rq(cpu)->membarrier_state) &
|
||||||
p = task_rcu_dereference(&cpu_rq(cpu)->curr);
|
MEMBARRIER_STATE_GLOBAL_EXPEDITED))
|
||||||
if (p && p->mm && (atomic_read(&p->mm->membarrier_state) &
|
continue;
|
||||||
MEMBARRIER_STATE_GLOBAL_EXPEDITED)) {
|
|
||||||
if (!fallback)
|
/*
|
||||||
|
* Skip the CPU if it runs a kernel thread. The scheduler
|
||||||
|
* leaves the prior task mm in place as an optimization when
|
||||||
|
* scheduling a kthread.
|
||||||
|
*/
|
||||||
|
p = rcu_dereference(cpu_rq(cpu)->curr);
|
||||||
|
if (p->flags & PF_KTHREAD)
|
||||||
|
continue;
|
||||||
|
|
||||||
__cpumask_set_cpu(cpu, tmpmask);
|
__cpumask_set_cpu(cpu, tmpmask);
|
||||||
else
|
|
||||||
smp_call_function_single(cpu, ipi_mb, NULL, 1);
|
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
|
||||||
if (!fallback) {
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
|
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
|
|
||||||
free_cpumask_var(tmpmask);
|
free_cpumask_var(tmpmask);
|
||||||
}
|
|
||||||
cpus_read_unlock();
|
cpus_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -101,22 +132,22 @@ static int membarrier_global_expedited(void)
|
||||||
static int membarrier_private_expedited(int flags)
|
static int membarrier_private_expedited(int flags)
|
||||||
{
|
{
|
||||||
int cpu;
|
int cpu;
|
||||||
bool fallback = false;
|
|
||||||
cpumask_var_t tmpmask;
|
cpumask_var_t tmpmask;
|
||||||
|
struct mm_struct *mm = current->mm;
|
||||||
|
|
||||||
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
|
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
|
||||||
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
|
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
if (!(atomic_read(¤t->mm->membarrier_state) &
|
if (!(atomic_read(&mm->membarrier_state) &
|
||||||
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
} else {
|
} else {
|
||||||
if (!(atomic_read(¤t->mm->membarrier_state) &
|
if (!(atomic_read(&mm->membarrier_state) &
|
||||||
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY))
|
||||||
return -EPERM;
|
return -EPERM;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (num_online_cpus() == 1)
|
if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1)
|
||||||
return 0;
|
return 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -125,17 +156,11 @@ static int membarrier_private_expedited(int flags)
|
||||||
*/
|
*/
|
||||||
smp_mb(); /* system call entry is not a mb. */
|
smp_mb(); /* system call entry is not a mb. */
|
||||||
|
|
||||||
/*
|
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
|
||||||
* Expedited membarrier commands guarantee that they won't
|
return -ENOMEM;
|
||||||
* block, hence the GFP_NOWAIT allocation flag and fallback
|
|
||||||
* implementation.
|
|
||||||
*/
|
|
||||||
if (!zalloc_cpumask_var(&tmpmask, GFP_NOWAIT)) {
|
|
||||||
/* Fallback for OOM. */
|
|
||||||
fallback = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
cpus_read_lock();
|
cpus_read_lock();
|
||||||
|
rcu_read_lock();
|
||||||
for_each_online_cpu(cpu) {
|
for_each_online_cpu(cpu) {
|
||||||
struct task_struct *p;
|
struct task_struct *p;
|
||||||
|
|
||||||
|
@ -150,21 +175,17 @@ static int membarrier_private_expedited(int flags)
|
||||||
if (cpu == raw_smp_processor_id())
|
if (cpu == raw_smp_processor_id())
|
||||||
continue;
|
continue;
|
||||||
rcu_read_lock();
|
rcu_read_lock();
|
||||||
p = task_rcu_dereference(&cpu_rq(cpu)->curr);
|
p = rcu_dereference(cpu_rq(cpu)->curr);
|
||||||
if (p && p->mm == current->mm) {
|
if (p && p->mm == mm)
|
||||||
if (!fallback)
|
|
||||||
__cpumask_set_cpu(cpu, tmpmask);
|
__cpumask_set_cpu(cpu, tmpmask);
|
||||||
else
|
|
||||||
smp_call_function_single(cpu, ipi_mb, NULL, 1);
|
|
||||||
}
|
}
|
||||||
rcu_read_unlock();
|
rcu_read_unlock();
|
||||||
}
|
|
||||||
if (!fallback) {
|
|
||||||
preempt_disable();
|
preempt_disable();
|
||||||
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
|
smp_call_function_many(tmpmask, ipi_mb, NULL, 1);
|
||||||
preempt_enable();
|
preempt_enable();
|
||||||
|
|
||||||
free_cpumask_var(tmpmask);
|
free_cpumask_var(tmpmask);
|
||||||
}
|
|
||||||
cpus_read_unlock();
|
cpus_read_unlock();
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -177,32 +198,78 @@ static int membarrier_private_expedited(int flags)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int sync_runqueues_membarrier_state(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
int membarrier_state = atomic_read(&mm->membarrier_state);
|
||||||
|
cpumask_var_t tmpmask;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
if (atomic_read(&mm->mm_users) == 1 || num_online_cpus() == 1) {
|
||||||
|
this_cpu_write(runqueues.membarrier_state, membarrier_state);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For single mm user, we can simply issue a memory barrier
|
||||||
|
* after setting MEMBARRIER_STATE_GLOBAL_EXPEDITED in the
|
||||||
|
* mm and in the current runqueue to guarantee that no memory
|
||||||
|
* access following registration is reordered before
|
||||||
|
* registration.
|
||||||
|
*/
|
||||||
|
smp_mb();
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!zalloc_cpumask_var(&tmpmask, GFP_KERNEL))
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For mm with multiple users, we need to ensure all future
|
||||||
|
* scheduler executions will observe @mm's new membarrier
|
||||||
|
* state.
|
||||||
|
*/
|
||||||
|
synchronize_rcu();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For each cpu runqueue, if the task's mm match @mm, ensure that all
|
||||||
|
* @mm's membarrier state set bits are also set in in the runqueue's
|
||||||
|
* membarrier state. This ensures that a runqueue scheduling
|
||||||
|
* between threads which are users of @mm has its membarrier state
|
||||||
|
* updated.
|
||||||
|
*/
|
||||||
|
cpus_read_lock();
|
||||||
|
rcu_read_lock();
|
||||||
|
for_each_online_cpu(cpu) {
|
||||||
|
struct rq *rq = cpu_rq(cpu);
|
||||||
|
struct task_struct *p;
|
||||||
|
|
||||||
|
p = rcu_dereference(rq->curr);
|
||||||
|
if (p && p->mm == mm)
|
||||||
|
__cpumask_set_cpu(cpu, tmpmask);
|
||||||
|
}
|
||||||
|
rcu_read_unlock();
|
||||||
|
|
||||||
|
preempt_disable();
|
||||||
|
smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
|
||||||
|
preempt_enable();
|
||||||
|
|
||||||
|
free_cpumask_var(tmpmask);
|
||||||
|
cpus_read_unlock();
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int membarrier_register_global_expedited(void)
|
static int membarrier_register_global_expedited(void)
|
||||||
{
|
{
|
||||||
struct task_struct *p = current;
|
struct task_struct *p = current;
|
||||||
struct mm_struct *mm = p->mm;
|
struct mm_struct *mm = p->mm;
|
||||||
|
int ret;
|
||||||
|
|
||||||
if (atomic_read(&mm->membarrier_state) &
|
if (atomic_read(&mm->membarrier_state) &
|
||||||
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
|
MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY)
|
||||||
return 0;
|
return 0;
|
||||||
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
|
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED, &mm->membarrier_state);
|
||||||
if (atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1) {
|
ret = sync_runqueues_membarrier_state(mm);
|
||||||
/*
|
if (ret)
|
||||||
* For single mm user, single threaded process, we can
|
return ret;
|
||||||
* simply issue a memory barrier after setting
|
|
||||||
* MEMBARRIER_STATE_GLOBAL_EXPEDITED to guarantee that
|
|
||||||
* no memory access following registration is reordered
|
|
||||||
* before registration.
|
|
||||||
*/
|
|
||||||
smp_mb();
|
|
||||||
} else {
|
|
||||||
/*
|
|
||||||
* For multi-mm user threads, we need to ensure all
|
|
||||||
* future scheduler executions will observe the new
|
|
||||||
* thread flag state for this mm.
|
|
||||||
*/
|
|
||||||
synchronize_rcu();
|
|
||||||
}
|
|
||||||
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
|
atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY,
|
||||||
&mm->membarrier_state);
|
&mm->membarrier_state);
|
||||||
|
|
||||||
|
@ -213,12 +280,15 @@ static int membarrier_register_private_expedited(int flags)
|
||||||
{
|
{
|
||||||
struct task_struct *p = current;
|
struct task_struct *p = current;
|
||||||
struct mm_struct *mm = p->mm;
|
struct mm_struct *mm = p->mm;
|
||||||
int state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY;
|
int ready_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_READY,
|
||||||
|
set_state = MEMBARRIER_STATE_PRIVATE_EXPEDITED,
|
||||||
|
ret;
|
||||||
|
|
||||||
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
|
if (flags & MEMBARRIER_FLAG_SYNC_CORE) {
|
||||||
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
|
if (!IS_ENABLED(CONFIG_ARCH_HAS_MEMBARRIER_SYNC_CORE))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
state = MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
|
ready_state =
|
||||||
|
MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE_READY;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -226,20 +296,15 @@ static int membarrier_register_private_expedited(int flags)
|
||||||
* groups, which use the same mm. (CLONE_VM but not
|
* groups, which use the same mm. (CLONE_VM but not
|
||||||
* CLONE_THREAD).
|
* CLONE_THREAD).
|
||||||
*/
|
*/
|
||||||
if (atomic_read(&mm->membarrier_state) & state)
|
if ((atomic_read(&mm->membarrier_state) & ready_state) == ready_state)
|
||||||
return 0;
|
return 0;
|
||||||
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED, &mm->membarrier_state);
|
|
||||||
if (flags & MEMBARRIER_FLAG_SYNC_CORE)
|
if (flags & MEMBARRIER_FLAG_SYNC_CORE)
|
||||||
atomic_or(MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE,
|
set_state |= MEMBARRIER_STATE_PRIVATE_EXPEDITED_SYNC_CORE;
|
||||||
&mm->membarrier_state);
|
atomic_or(set_state, &mm->membarrier_state);
|
||||||
if (!(atomic_read(&mm->mm_users) == 1 && get_nr_threads(p) == 1)) {
|
ret = sync_runqueues_membarrier_state(mm);
|
||||||
/*
|
if (ret)
|
||||||
* Ensure all future scheduler executions will observe the
|
return ret;
|
||||||
* new thread flag state for this process.
|
atomic_or(ready_state, &mm->membarrier_state);
|
||||||
*/
|
|
||||||
synchronize_rcu();
|
|
||||||
}
|
|
||||||
atomic_or(state, &mm->membarrier_state);
|
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -253,8 +318,10 @@ static int membarrier_register_private_expedited(int flags)
|
||||||
* command specified does not exist, not available on the running
|
* command specified does not exist, not available on the running
|
||||||
* kernel, or if the command argument is invalid, this system call
|
* kernel, or if the command argument is invalid, this system call
|
||||||
* returns -EINVAL. For a given command, with flags argument set to 0,
|
* returns -EINVAL. For a given command, with flags argument set to 0,
|
||||||
* this system call is guaranteed to always return the same value until
|
* if this system call returns -ENOSYS or -EINVAL, it is guaranteed to
|
||||||
* reboot.
|
* always return the same value until reboot. In addition, it can return
|
||||||
|
* -ENOMEM if there is not enough memory available to perform the system
|
||||||
|
* call.
|
||||||
*
|
*
|
||||||
* All memory accesses performed in program order from each targeted thread
|
* All memory accesses performed in program order from each targeted thread
|
||||||
* is guaranteed to be ordered with respect to sys_membarrier(). If we use
|
* is guaranteed to be ordered with respect to sys_membarrier(). If we use
|
||||||
|
|
|
@ -911,6 +911,10 @@ struct rq {
|
||||||
|
|
||||||
atomic_t nr_iowait;
|
atomic_t nr_iowait;
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMBARRIER
|
||||||
|
int membarrier_state;
|
||||||
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
struct root_domain *rd;
|
struct root_domain *rd;
|
||||||
struct sched_domain __rcu *sd;
|
struct sched_domain __rcu *sd;
|
||||||
|
@ -2438,3 +2442,33 @@ static inline bool sched_energy_enabled(void)
|
||||||
static inline bool sched_energy_enabled(void) { return false; }
|
static inline bool sched_energy_enabled(void) { return false; }
|
||||||
|
|
||||||
#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
|
#endif /* CONFIG_ENERGY_MODEL && CONFIG_CPU_FREQ_GOV_SCHEDUTIL */
|
||||||
|
|
||||||
|
#ifdef CONFIG_MEMBARRIER
|
||||||
|
/*
|
||||||
|
* The scheduler provides memory barriers required by membarrier between:
|
||||||
|
* - prior user-space memory accesses and store to rq->membarrier_state,
|
||||||
|
* - store to rq->membarrier_state and following user-space memory accesses.
|
||||||
|
* In the same way it provides those guarantees around store to rq->curr.
|
||||||
|
*/
|
||||||
|
static inline void membarrier_switch_mm(struct rq *rq,
|
||||||
|
struct mm_struct *prev_mm,
|
||||||
|
struct mm_struct *next_mm)
|
||||||
|
{
|
||||||
|
int membarrier_state;
|
||||||
|
|
||||||
|
if (prev_mm == next_mm)
|
||||||
|
return;
|
||||||
|
|
||||||
|
membarrier_state = atomic_read(&next_mm->membarrier_state);
|
||||||
|
if (READ_ONCE(rq->membarrier_state) == membarrier_state)
|
||||||
|
return;
|
||||||
|
|
||||||
|
WRITE_ONCE(rq->membarrier_state, membarrier_state);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void membarrier_switch_mm(struct rq *rq,
|
||||||
|
struct mm_struct *prev_mm,
|
||||||
|
struct mm_struct *next_mm)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
|
@ -1 +1,2 @@
|
||||||
membarrier_test
|
membarrier_test_multi_thread
|
||||||
|
membarrier_test_single_thread
|
||||||
|
|
|
@ -1,7 +1,8 @@
|
||||||
# SPDX-License-Identifier: GPL-2.0-only
|
# SPDX-License-Identifier: GPL-2.0-only
|
||||||
CFLAGS += -g -I../../../../usr/include/
|
CFLAGS += -g -I../../../../usr/include/
|
||||||
|
LDLIBS += -lpthread
|
||||||
|
|
||||||
TEST_GEN_PROGS := membarrier_test
|
TEST_GEN_PROGS := membarrier_test_single_thread \
|
||||||
|
membarrier_test_multi_thread
|
||||||
|
|
||||||
include ../lib.mk
|
include ../lib.mk
|
||||||
|
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
// SPDX-License-Identifier: GPL-2.0
|
/* SPDX-License-Identifier: GPL-2.0 */
|
||||||
#define _GNU_SOURCE
|
#define _GNU_SOURCE
|
||||||
#include <linux/membarrier.h>
|
#include <linux/membarrier.h>
|
||||||
#include <syscall.h>
|
#include <syscall.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
#include "../kselftest.h"
|
#include "../kselftest.h"
|
||||||
|
|
||||||
|
@ -223,7 +224,7 @@ static int test_membarrier_global_expedited_success(void)
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int test_membarrier(void)
|
static int test_membarrier_fail(void)
|
||||||
{
|
{
|
||||||
int status;
|
int status;
|
||||||
|
|
||||||
|
@ -233,10 +234,27 @@ static int test_membarrier(void)
|
||||||
status = test_membarrier_flags_fail();
|
status = test_membarrier_flags_fail();
|
||||||
if (status)
|
if (status)
|
||||||
return status;
|
return status;
|
||||||
status = test_membarrier_global_success();
|
status = test_membarrier_private_expedited_fail();
|
||||||
if (status)
|
if (status)
|
||||||
return status;
|
return status;
|
||||||
status = test_membarrier_private_expedited_fail();
|
status = sys_membarrier(MEMBARRIER_CMD_QUERY, 0);
|
||||||
|
if (status < 0) {
|
||||||
|
ksft_test_result_fail("sys_membarrier() failed\n");
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
|
||||||
|
status = test_membarrier_private_expedited_sync_core_fail();
|
||||||
|
if (status)
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_membarrier_success(void)
|
||||||
|
{
|
||||||
|
int status;
|
||||||
|
|
||||||
|
status = test_membarrier_global_success();
|
||||||
if (status)
|
if (status)
|
||||||
return status;
|
return status;
|
||||||
status = test_membarrier_register_private_expedited_success();
|
status = test_membarrier_register_private_expedited_success();
|
||||||
|
@ -251,9 +269,6 @@ static int test_membarrier(void)
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
|
if (status & MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE) {
|
||||||
status = test_membarrier_private_expedited_sync_core_fail();
|
|
||||||
if (status)
|
|
||||||
return status;
|
|
||||||
status = test_membarrier_register_private_expedited_sync_core_success();
|
status = test_membarrier_register_private_expedited_sync_core_success();
|
||||||
if (status)
|
if (status)
|
||||||
return status;
|
return status;
|
||||||
|
@ -300,14 +315,3 @@ static int test_membarrier_query(void)
|
||||||
ksft_test_result_pass("sys_membarrier available\n");
|
ksft_test_result_pass("sys_membarrier available\n");
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(int argc, char **argv)
|
|
||||||
{
|
|
||||||
ksft_print_header();
|
|
||||||
ksft_set_plan(13);
|
|
||||||
|
|
||||||
test_membarrier_query();
|
|
||||||
test_membarrier();
|
|
||||||
|
|
||||||
return ksft_exit_pass();
|
|
||||||
}
|
|
|
@ -0,0 +1,73 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <linux/membarrier.h>
|
||||||
|
#include <syscall.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
#include "membarrier_test_impl.h"
|
||||||
|
|
||||||
|
static int thread_ready, thread_quit;
|
||||||
|
static pthread_mutex_t test_membarrier_thread_mutex =
|
||||||
|
PTHREAD_MUTEX_INITIALIZER;
|
||||||
|
static pthread_cond_t test_membarrier_thread_cond =
|
||||||
|
PTHREAD_COND_INITIALIZER;
|
||||||
|
|
||||||
|
void *test_membarrier_thread(void *arg)
|
||||||
|
{
|
||||||
|
pthread_mutex_lock(&test_membarrier_thread_mutex);
|
||||||
|
thread_ready = 1;
|
||||||
|
pthread_cond_broadcast(&test_membarrier_thread_cond);
|
||||||
|
pthread_mutex_unlock(&test_membarrier_thread_mutex);
|
||||||
|
|
||||||
|
pthread_mutex_lock(&test_membarrier_thread_mutex);
|
||||||
|
while (!thread_quit)
|
||||||
|
pthread_cond_wait(&test_membarrier_thread_cond,
|
||||||
|
&test_membarrier_thread_mutex);
|
||||||
|
pthread_mutex_unlock(&test_membarrier_thread_mutex);
|
||||||
|
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int test_mt_membarrier(void)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
pthread_t test_thread;
|
||||||
|
|
||||||
|
pthread_create(&test_thread, NULL,
|
||||||
|
test_membarrier_thread, NULL);
|
||||||
|
|
||||||
|
pthread_mutex_lock(&test_membarrier_thread_mutex);
|
||||||
|
while (!thread_ready)
|
||||||
|
pthread_cond_wait(&test_membarrier_thread_cond,
|
||||||
|
&test_membarrier_thread_mutex);
|
||||||
|
pthread_mutex_unlock(&test_membarrier_thread_mutex);
|
||||||
|
|
||||||
|
test_membarrier_fail();
|
||||||
|
|
||||||
|
test_membarrier_success();
|
||||||
|
|
||||||
|
pthread_mutex_lock(&test_membarrier_thread_mutex);
|
||||||
|
thread_quit = 1;
|
||||||
|
pthread_cond_broadcast(&test_membarrier_thread_cond);
|
||||||
|
pthread_mutex_unlock(&test_membarrier_thread_mutex);
|
||||||
|
|
||||||
|
pthread_join(test_thread, NULL);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(13);
|
||||||
|
|
||||||
|
test_membarrier_query();
|
||||||
|
|
||||||
|
/* Multi-threaded */
|
||||||
|
test_mt_membarrier();
|
||||||
|
|
||||||
|
return ksft_exit_pass();
|
||||||
|
}
|
|
@ -0,0 +1,24 @@
|
||||||
|
// SPDX-License-Identifier: GPL-2.0
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <linux/membarrier.h>
|
||||||
|
#include <syscall.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <pthread.h>
|
||||||
|
|
||||||
|
#include "membarrier_test_impl.h"
|
||||||
|
|
||||||
|
int main(int argc, char **argv)
|
||||||
|
{
|
||||||
|
ksft_print_header();
|
||||||
|
ksft_set_plan(13);
|
||||||
|
|
||||||
|
test_membarrier_query();
|
||||||
|
|
||||||
|
test_membarrier_fail();
|
||||||
|
|
||||||
|
test_membarrier_success();
|
||||||
|
|
||||||
|
return ksft_exit_pass();
|
||||||
|
}
|
Loading…
Reference in a new issue