From 142d106d5e62ff2cf0dfd2dfe1adfcaff1c2ed85 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 29 Nov 2018 09:15:54 -0800 Subject: [PATCH] rcu: Determine expedited-GP IPI handler at build time Back when there could be multiple RCU flavors running in the same kernel at the same time, it was necessary to specify the expedited grace-period IPI handler at runtime. Now that there is only one RCU flavor, the IPI handler can be determined at build time. There is therefore no longer any reason for the RCU-preempt and RCU-sched IPI handlers to have different names, nor is there any reason to pass these handlers in function arguments and in the data structures enclosing workqueues. This commit therefore makes all these changes, pushing the specification of the expedited grace-period IPI handler down to the point of use. Signed-off-by: Paul E. McKenney --- .../Expedited-Grace-Periods/ExpSchedFlow.svg | 18 ++++++----- .../Expedited-Grace-Periods.html | 26 ++++++++-------- kernel/rcu/tree.h | 1 - kernel/rcu/tree_exp.h | 30 +++++++++---------- 4 files changed, 38 insertions(+), 37 deletions(-) diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg b/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg index e4233ac93c2b..6189ffcc6aff 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/ExpSchedFlow.svg @@ -328,13 +328,13 @@ inkscape:window-height="1148" id="namedview90" showgrid="true" - inkscape:zoom="0.80021373" - inkscape:cx="462.49289" - inkscape:cy="473.6718" + inkscape:zoom="0.69092787" + inkscape:cx="476.34085" + inkscape:cy="712.80957" inkscape:window-x="770" inkscape:window-y="24" inkscape:window-maximized="0" - inkscape:current-layer="g4114-9-3-9" + inkscape:current-layer="g4" inkscape:snap-grids="false" fit-margin-top="5" fit-margin-right="5" @@ -813,14 +813,18 @@ reched_cpu() + id="tspan4925-1-2-4-5">Requestcontext switch diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html index 8e4f873b979f..19e7a5fb6b73 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html @@ -72,10 +72,10 @@ will ignore it because idle and offline CPUs are already residing in quiescent states. Otherwise, the expedited grace period will use smp_call_function_single() to send the CPU an IPI, which -is handled by sync_rcu_exp_handler(). +is handled by rcu_exp_handler().

-However, because this is preemptible RCU, sync_rcu_exp_handler() +However, because this is preemptible RCU, rcu_exp_handler() can check to see if the CPU is currently running in an RCU read-side critical section. If not, the handler can immediately report a quiescent state. @@ -145,19 +145,18 @@ expedited grace period is shown in the following diagram:

ExpSchedFlow.svg

-As with RCU-preempt's synchronize_rcu_expedited(), +As with RCU-preempt, RCU-sched's synchronize_sched_expedited() ignores offline and idle CPUs, again because they are in remotely detectable quiescent states. -However, the synchronize_rcu_expedited() handler -is sync_sched_exp_handler(), and because the +However, because the rcu_read_lock_sched() and rcu_read_unlock_sched() leave no trace of their invocation, in general it is not possible to tell whether or not the current CPU is in an RCU read-side critical section. -The best that sync_sched_exp_handler() can do is to check +The best that RCU-sched's rcu_exp_handler() can do is to check for idle, on the off-chance that the CPU went idle while the IPI was in flight. -If the CPU is idle, then sync_sched_exp_handler() reports +If the CPU is idle, then rcu_exp_handler() reports the quiescent state.

Otherwise, the handler forces a future context switch by setting the @@ -298,19 +297,18 @@ Instead, the task pushing the grace period forward will include the idle CPUs in the mask passed to rcu_report_exp_cpu_mult().

-For RCU-sched, there is an additional check for idle in the IPI -handler, sync_sched_exp_handler(). +For RCU-sched, there is an additional check: If the IPI has interrupted the idle loop, then -sync_sched_exp_handler() invokes rcu_report_exp_rdp() +rcu_exp_handler() invokes rcu_report_exp_rdp() to report the corresponding quiescent state.

For RCU-preempt, there is no specific check for idle in the -IPI handler (sync_rcu_exp_handler()), but because +IPI handler (rcu_exp_handler()), but because RCU read-side critical sections are not permitted within the -idle loop, if sync_rcu_exp_handler() sees that the CPU is within +idle loop, if rcu_exp_handler() sees that the CPU is within RCU read-side critical section, the CPU cannot possibly be idle. -Otherwise, sync_rcu_exp_handler() invokes +Otherwise, rcu_exp_handler() invokes rcu_report_exp_rdp() to report the corresponding quiescent state, regardless of whether or not that quiescent state was due to the CPU being idle. @@ -625,6 +623,8 @@ checks, but only during the mid-boot dead zone.

With this refinement, synchronous grace periods can now be used from task context pretty much any time during the life of the kernel. +That is, aside from some points in the suspend, hibernate, or shutdown +code path.

Summary

diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index bcfd684a5c57..50bb41cdc5fb 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -36,7 +36,6 @@ /* Communicate arguments to a workqueue handler. */ struct rcu_exp_work { - smp_call_func_t rew_func; unsigned long rew_s; struct work_struct rew_work; }; diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 6d4eb4694b6f..7f5cb4228b59 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -22,6 +22,8 @@ #include +static void rcu_exp_handler(void *unused); + /* * Record the start of an expedited grace period. */ @@ -344,7 +346,6 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) { int cpu; unsigned long flags; - smp_call_func_t func; unsigned long mask_ofl_test; unsigned long mask_ofl_ipi; int ret; @@ -352,7 +353,6 @@ static void sync_rcu_exp_select_node_cpus(struct work_struct *wp) container_of(wp, struct rcu_exp_work, rew_work); struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew); - func = rewp->rew_func; raw_spin_lock_irqsave_rcu_node(rnp, flags); /* Each pass checks a CPU for identity, offline, and idle. */ @@ -396,7 +396,7 @@ retry_ipi: mask_ofl_test |= mask; continue; } - ret = smp_call_function_single(cpu, func, NULL, 0); + ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0); if (!ret) { mask_ofl_ipi &= ~mask; continue; @@ -426,7 +426,7 @@ retry_ipi: * Select the nodes that the upcoming expedited grace period needs * to wait for. */ -static void sync_rcu_exp_select_cpus(smp_call_func_t func) +static void sync_rcu_exp_select_cpus(void) { int cpu; struct rcu_node *rnp; @@ -440,7 +440,6 @@ static void sync_rcu_exp_select_cpus(smp_call_func_t func) rnp->exp_need_flush = false; if (!READ_ONCE(rnp->expmask)) continue; /* Avoid early boot non-existent wq. */ - rnp->rew.rew_func = func; if (!READ_ONCE(rcu_par_gp_wq) || rcu_scheduler_active != RCU_SCHEDULER_RUNNING || rcu_is_last_leaf_node(rnp)) { @@ -580,10 +579,10 @@ static void rcu_exp_wait_wake(unsigned long s) * Common code to drive an expedited grace period forward, used by * workqueues and mid-boot-time tasks. */ -static void rcu_exp_sel_wait_wake(smp_call_func_t func, unsigned long s) +static void rcu_exp_sel_wait_wake(unsigned long s) { /* Initialize the rcu_node tree in preparation for the wait. */ - sync_rcu_exp_select_cpus(func); + sync_rcu_exp_select_cpus(); /* Wait and clean up, including waking everyone. */ rcu_exp_wait_wake(s); @@ -597,14 +596,14 @@ static void wait_rcu_exp_gp(struct work_struct *wp) struct rcu_exp_work *rewp; rewp = container_of(wp, struct rcu_exp_work, rew_work); - rcu_exp_sel_wait_wake(rewp->rew_func, rewp->rew_s); + rcu_exp_sel_wait_wake(rewp->rew_s); } /* * Given a smp_call_function() handler, kick off the specified * implementation of expedited grace period. */ -static void _synchronize_rcu_expedited(smp_call_func_t func) +static void _synchronize_rcu_expedited(void) { struct rcu_data *rdp; struct rcu_exp_work rew; @@ -625,10 +624,9 @@ static void _synchronize_rcu_expedited(smp_call_func_t func) /* Ensure that load happens before action based on it. */ if (unlikely(rcu_scheduler_active == RCU_SCHEDULER_INIT)) { /* Direct call during scheduler init and early_initcalls(). */ - rcu_exp_sel_wait_wake(func, s); + rcu_exp_sel_wait_wake(s); } else { /* Marshall arguments & schedule the expedited grace period. */ - rew.rew_func = func; rew.rew_s = s; INIT_WORK_ONSTACK(&rew.rew_work, wait_rcu_exp_gp); queue_work(rcu_gp_wq, &rew.rew_work); @@ -654,7 +652,7 @@ static void _synchronize_rcu_expedited(smp_call_func_t func) * ->expmask fields in the rcu_node tree. Otherwise, immediately * report the quiescent state. */ -static void sync_rcu_exp_handler(void *unused) +static void rcu_exp_handler(void *unused) { unsigned long flags; struct rcu_data *rdp = this_cpu_ptr(&rcu_data); @@ -760,14 +758,14 @@ void synchronize_rcu_expedited(void) if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; - _synchronize_rcu_expedited(sync_rcu_exp_handler); + _synchronize_rcu_expedited(); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited); #else /* #ifdef CONFIG_PREEMPT_RCU */ /* Invoked on each online non-idle CPU for expedited quiescent state. */ -static void sync_sched_exp_handler(void *unused) +static void rcu_exp_handler(void *unused) { struct rcu_data *rdp; struct rcu_node *rnp; @@ -799,7 +797,7 @@ static void sync_sched_exp_online_cleanup(int cpu) rnp = rdp->mynode; if (!(READ_ONCE(rnp->expmask) & rdp->grpmask)) return; - ret = smp_call_function_single(cpu, sync_sched_exp_handler, NULL, 0); + ret = smp_call_function_single(cpu, rcu_exp_handler, NULL, 0); WARN_ON_ONCE(ret); } @@ -835,7 +833,7 @@ void synchronize_rcu_expedited(void) if (rcu_blocking_is_gp()) return; - _synchronize_rcu_expedited(sync_sched_exp_handler); + _synchronize_rcu_expedited(); } EXPORT_SYMBOL_GPL(synchronize_rcu_expedited);