rcu: Extend expedited funnel locking to rcu_data structure
The strictly rcu_node based funnel-locking scheme works well in many cases, but systems with CONFIG_RCU_FANOUT_LEAF=64 won't necessarily get all that much concurrency. This commit therefore extends the funnel locking into the per-CPU rcu_data structure, providing concurrency equal to the number of CPUs. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
parent
704dd435ac
commit
2cd6ffafec
|
@ -3312,11 +3312,14 @@ static bool rcu_exp_gp_seq_done(struct rcu_state *rsp, unsigned long s)
|
||||||
|
|
||||||
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
|
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
|
||||||
static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
|
static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||||
|
struct rcu_data *rdp,
|
||||||
atomic_long_t *stat, unsigned long s)
|
atomic_long_t *stat, unsigned long s)
|
||||||
{
|
{
|
||||||
if (rcu_exp_gp_seq_done(rsp, s)) {
|
if (rcu_exp_gp_seq_done(rsp, s)) {
|
||||||
if (rnp)
|
if (rnp)
|
||||||
mutex_unlock(&rnp->exp_funnel_mutex);
|
mutex_unlock(&rnp->exp_funnel_mutex);
|
||||||
|
else if (rdp)
|
||||||
|
mutex_unlock(&rdp->exp_funnel_mutex);
|
||||||
/* Ensure test happens before caller kfree(). */
|
/* Ensure test happens before caller kfree(). */
|
||||||
smp_mb__before_atomic(); /* ^^^ */
|
smp_mb__before_atomic(); /* ^^^ */
|
||||||
atomic_long_inc(stat);
|
atomic_long_inc(stat);
|
||||||
|
@ -3332,6 +3335,7 @@ static bool sync_exp_work_done(struct rcu_state *rsp, struct rcu_node *rnp,
|
||||||
*/
|
*/
|
||||||
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
||||||
{
|
{
|
||||||
|
struct rcu_data *rdp;
|
||||||
struct rcu_node *rnp0;
|
struct rcu_node *rnp0;
|
||||||
struct rcu_node *rnp1 = NULL;
|
struct rcu_node *rnp1 = NULL;
|
||||||
|
|
||||||
|
@ -3343,16 +3347,24 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
|
||||||
* can be inexact, as it is just promoting locality and is not
|
* can be inexact, as it is just promoting locality and is not
|
||||||
* strictly needed for correctness.
|
* strictly needed for correctness.
|
||||||
*/
|
*/
|
||||||
rnp0 = per_cpu_ptr(rsp->rda, raw_smp_processor_id())->mynode;
|
rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
|
||||||
|
if (sync_exp_work_done(rsp, NULL, NULL, &rsp->expedited_workdone1, s))
|
||||||
|
return NULL;
|
||||||
|
mutex_lock(&rdp->exp_funnel_mutex);
|
||||||
|
rnp0 = rdp->mynode;
|
||||||
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
|
for (; rnp0 != NULL; rnp0 = rnp0->parent) {
|
||||||
if (sync_exp_work_done(rsp, rnp1, &rsp->expedited_workdone1, s))
|
if (sync_exp_work_done(rsp, rnp1, rdp,
|
||||||
|
&rsp->expedited_workdone2, s))
|
||||||
return NULL;
|
return NULL;
|
||||||
mutex_lock(&rnp0->exp_funnel_mutex);
|
mutex_lock(&rnp0->exp_funnel_mutex);
|
||||||
if (rnp1)
|
if (rnp1)
|
||||||
mutex_unlock(&rnp1->exp_funnel_mutex);
|
mutex_unlock(&rnp1->exp_funnel_mutex);
|
||||||
|
else
|
||||||
|
mutex_unlock(&rdp->exp_funnel_mutex);
|
||||||
rnp1 = rnp0;
|
rnp1 = rnp0;
|
||||||
}
|
}
|
||||||
if (sync_exp_work_done(rsp, rnp1, &rsp->expedited_workdone2, s))
|
if (sync_exp_work_done(rsp, rnp1, rdp,
|
||||||
|
&rsp->expedited_workdone3, s))
|
||||||
return NULL;
|
return NULL;
|
||||||
return rnp1;
|
return rnp1;
|
||||||
}
|
}
|
||||||
|
@ -3733,6 +3745,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
|
||||||
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
|
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);
|
||||||
rdp->cpu = cpu;
|
rdp->cpu = cpu;
|
||||||
rdp->rsp = rsp;
|
rdp->rsp = rsp;
|
||||||
|
mutex_init(&rdp->exp_funnel_mutex);
|
||||||
rcu_boot_init_nocb_percpu_data(rdp);
|
rcu_boot_init_nocb_percpu_data(rdp);
|
||||||
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
raw_spin_unlock_irqrestore(&rnp->lock, flags);
|
||||||
}
|
}
|
||||||
|
|
|
@ -364,11 +364,12 @@ struct rcu_data {
|
||||||
unsigned long n_rp_nocb_defer_wakeup;
|
unsigned long n_rp_nocb_defer_wakeup;
|
||||||
unsigned long n_rp_need_nothing;
|
unsigned long n_rp_need_nothing;
|
||||||
|
|
||||||
/* 6) _rcu_barrier() and OOM callbacks. */
|
/* 6) _rcu_barrier(), OOM callbacks, and expediting. */
|
||||||
struct rcu_head barrier_head;
|
struct rcu_head barrier_head;
|
||||||
#ifdef CONFIG_RCU_FAST_NO_HZ
|
#ifdef CONFIG_RCU_FAST_NO_HZ
|
||||||
struct rcu_head oom_head;
|
struct rcu_head oom_head;
|
||||||
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
|
||||||
|
struct mutex exp_funnel_mutex;
|
||||||
|
|
||||||
/* 7) Callback offloading. */
|
/* 7) Callback offloading. */
|
||||||
#ifdef CONFIG_RCU_NOCB_CPU
|
#ifdef CONFIG_RCU_NOCB_CPU
|
||||||
|
@ -494,6 +495,7 @@ struct rcu_state {
|
||||||
atomic_long_t expedited_tryfail; /* # acquisition failures. */
|
atomic_long_t expedited_tryfail; /* # acquisition failures. */
|
||||||
atomic_long_t expedited_workdone1; /* # done by others #1. */
|
atomic_long_t expedited_workdone1; /* # done by others #1. */
|
||||||
atomic_long_t expedited_workdone2; /* # done by others #2. */
|
atomic_long_t expedited_workdone2; /* # done by others #2. */
|
||||||
|
atomic_long_t expedited_workdone3; /* # done by others #3. */
|
||||||
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
atomic_long_t expedited_normal; /* # fallbacks to normal. */
|
||||||
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
atomic_t expedited_need_qs; /* # CPUs left to check in. */
|
||||||
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
|
wait_queue_head_t expedited_wq; /* Wait for check-ins. */
|
||||||
|
|
|
@ -185,11 +185,12 @@ static int show_rcuexp(struct seq_file *m, void *v)
|
||||||
{
|
{
|
||||||
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
struct rcu_state *rsp = (struct rcu_state *)m->private;
|
||||||
|
|
||||||
seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu n=%lu enq=%d sc=%lu\n",
|
seq_printf(m, "t=%lu tf=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
|
||||||
rsp->expedited_sequence,
|
rsp->expedited_sequence,
|
||||||
atomic_long_read(&rsp->expedited_tryfail),
|
atomic_long_read(&rsp->expedited_tryfail),
|
||||||
atomic_long_read(&rsp->expedited_workdone1),
|
atomic_long_read(&rsp->expedited_workdone1),
|
||||||
atomic_long_read(&rsp->expedited_workdone2),
|
atomic_long_read(&rsp->expedited_workdone2),
|
||||||
|
atomic_long_read(&rsp->expedited_workdone3),
|
||||||
atomic_long_read(&rsp->expedited_normal),
|
atomic_long_read(&rsp->expedited_normal),
|
||||||
atomic_read(&rsp->expedited_need_qs),
|
atomic_read(&rsp->expedited_need_qs),
|
||||||
rsp->expedited_sequence / 2);
|
rsp->expedited_sequence / 2);
|
||||||
|
|
Loading…
Reference in a new issue