rcu: Remove expedited GP funnel-lock bypass

Commit #cdacbe1f91264 ("rcu: Add fastpath bypassing funnel locking") turns out to be a pessimization at high load because it forces a tree full of tasks to wait for an expedited grace period that they probably do not need. This commit therefore removes this optimization. Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
2016-01-30 17:23:19 -08:00 · 2016-01-30 17:23:19 -08:00 · e2fd9d3584
parent 4f41530245
commit e2fd9d3584
4 changed files with 8 additions and 29 deletions
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@ -237,17 +237,17 @@ o	"ktl" is the low-order 16 bits (in hexadecimal) of the count of

 The output of "cat rcu/rcu_preempt/rcuexp" looks as follows:

-s=21872 wd0=0 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872
+s=21872 wd1=0 wd2=0 wd3=5 n=0 enq=0 sc=21872

 These fields are as follows:

 o	"s" is the sequence number, with an odd number indicating that
 	an expedited grace period is in progress.

-o	"wd0", "wd1", "wd2", and "wd3" are the number of times that an
-	attempt to start an expedited grace period found that someone
-	else had completed an expedited grace period that satisfies the
-	attempted request.  "Our work is done."
+o	"wd1", "wd2", and "wd3" are the number of times that an attempt
+	to start an expedited grace period found that someone else had
+	completed an expedited grace period that satisfies the attempted
+	request.  "Our work is done."

 o	"n" is number of times that a concurrent CPU-hotplug operation
 	forced a fallback to a normal grace period.
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@ -3616,25 +3616,6 @@ static struct rcu_node *exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
 	struct rcu_node *rnp0;
 	struct rcu_node *rnp1 = NULL;

-	/*
-	 * First try directly acquiring the root lock in order to reduce
-	 * latency in the common case where expedited grace periods are
-	 * rare.  We check mutex_is_locked() to avoid pathological levels of
-	 * memory contention on ->exp_funnel_mutex in the heavy-load case.
-	 */
-	rnp0 = rcu_get_root(rsp);
-	if (!mutex_is_locked(&rnp0->exp_funnel_mutex)) {
-		if (mutex_trylock(&rnp0->exp_funnel_mutex)) {
-			trace_rcu_exp_funnel_lock(rsp->name, rnp0->level,
-						  rnp0->grplo, rnp0->grphi,
-						  TPS("acq"));
-			if (sync_exp_work_done(rsp, rnp0, NULL,
-					       &rdp->expedited_workdone0, s))
-				return NULL;
-			return rnp0;
-		}
-	}
-
 	/*
 	 * Each pass through the following loop works its way
 	 * up the rcu_node tree, returning if others have done the
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@ -388,7 +388,6 @@ struct rcu_data {
 	struct rcu_head oom_head;
 #endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
 	struct mutex exp_funnel_mutex;
-	atomic_long_t expedited_workdone0;	/* # done by others #0. */
 	atomic_long_t expedited_workdone1;	/* # done by others #1. */
 	atomic_long_t expedited_workdone2;	/* # done by others #2. */
 	atomic_long_t expedited_workdone3;	/* # done by others #3. */
--- a/kernel/rcu/tree_trace.c
+++ b/kernel/rcu/tree_trace.c
@ -185,17 +185,16 @@ static int show_rcuexp(struct seq_file *m, void *v)
 	int cpu;
 	struct rcu_state *rsp = (struct rcu_state *)m->private;
 	struct rcu_data *rdp;
-	unsigned long s0 = 0, s1 = 0, s2 = 0, s3 = 0;
+	unsigned long s1 = 0, s2 = 0, s3 = 0;

 	for_each_possible_cpu(cpu) {
 		rdp = per_cpu_ptr(rsp->rda, cpu);
-		s0 += atomic_long_read(&rdp->expedited_workdone0);
 		s1 += atomic_long_read(&rdp->expedited_workdone1);
 		s2 += atomic_long_read(&rdp->expedited_workdone2);
 		s3 += atomic_long_read(&rdp->expedited_workdone3);
 	}
-	seq_printf(m, "s=%lu wd0=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
-		   rsp->expedited_sequence, s0, s1, s2, s3,
+	seq_printf(m, "s=%lu wd1=%lu wd2=%lu wd3=%lu n=%lu enq=%d sc=%lu\n",
+		   rsp->expedited_sequence, s1, s2, s3,
 		   atomic_long_read(&rsp->expedited_normal),
 		   atomic_read(&rsp->expedited_need_qs),
 		   rsp->expedited_sequence / 2);