rcu: limit rcu_node leaf-level fanout

Some recent benchmarks have indicated possible lock contention on the
leaf-level rcu_node locks.  This commit therefore limits the number of
CPUs per leaf-level rcu_node structure to 16, in other words, there
can be at most 16 rcu_data structures fanning into a given rcu_node
structure.  Prior to this, the limit was 32 on 32-bit systems and 64 on
64-bit systems.

Note that the fanout of non-leaf rcu_node structures is unchanged.  The
organization of accesses to the rcu_node tree is such that references
to non-leaf rcu_node structures are much less frequent than to the
leaf structures.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
This commit is contained in:
Paul E. McKenney 2010-12-14 16:07:52 -08:00
parent 121dfc4b3e
commit 0209f6490b
2 changed files with 26 additions and 20 deletions

View file

@ -1869,8 +1869,9 @@ static void __init rcu_init_levelspread(struct rcu_state *rsp)
{ {
int i; int i;
for (i = NUM_RCU_LVLS - 1; i >= 0; i--) for (i = NUM_RCU_LVLS - 1; i > 0; i--)
rsp->levelspread[i] = CONFIG_RCU_FANOUT; rsp->levelspread[i] = CONFIG_RCU_FANOUT;
rsp->levelspread[0] = RCU_FANOUT_LEAF;
} }
#else /* #ifdef CONFIG_RCU_FANOUT_EXACT */ #else /* #ifdef CONFIG_RCU_FANOUT_EXACT */
static void __init rcu_init_levelspread(struct rcu_state *rsp) static void __init rcu_init_levelspread(struct rcu_state *rsp)

View file

@ -31,46 +31,51 @@
/* /*
* Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT.
* In theory, it should be possible to add more levels straightforwardly. * In theory, it should be possible to add more levels straightforwardly.
* In practice, this has not been tested, so there is probably some * In practice, this did work well going from three levels to four.
* bug somewhere. * Of course, your mileage may vary.
*/ */
#define MAX_RCU_LVLS 4 #define MAX_RCU_LVLS 4
#define RCU_FANOUT (CONFIG_RCU_FANOUT) #if CONFIG_RCU_FANOUT > 16
#define RCU_FANOUT_SQ (RCU_FANOUT * RCU_FANOUT) #define RCU_FANOUT_LEAF 16
#define RCU_FANOUT_CUBE (RCU_FANOUT_SQ * RCU_FANOUT) #else /* #if CONFIG_RCU_FANOUT > 16 */
#define RCU_FANOUT_FOURTH (RCU_FANOUT_CUBE * RCU_FANOUT) #define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT)
#endif /* #else #if CONFIG_RCU_FANOUT > 16 */
#define RCU_FANOUT_1 (RCU_FANOUT_LEAF)
#define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT)
#define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT)
#if NR_CPUS <= RCU_FANOUT #if NR_CPUS <= RCU_FANOUT_1
# define NUM_RCU_LVLS 1 # define NUM_RCU_LVLS 1
# define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 (NR_CPUS) # define NUM_RCU_LVL_1 (NR_CPUS)
# define NUM_RCU_LVL_2 0 # define NUM_RCU_LVL_2 0
# define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0 # define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_SQ #elif NR_CPUS <= RCU_FANOUT_2
# define NUM_RCU_LVLS 2 # define NUM_RCU_LVLS 2
# define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_2 (NR_CPUS) # define NUM_RCU_LVL_2 (NR_CPUS)
# define NUM_RCU_LVL_3 0 # define NUM_RCU_LVL_3 0
# define NUM_RCU_LVL_4 0 # define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_CUBE #elif NR_CPUS <= RCU_FANOUT_3
# define NUM_RCU_LVLS 3 # define NUM_RCU_LVLS 3
# define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_3 NR_CPUS # define NUM_RCU_LVL_3 (NR_CPUS)
# define NUM_RCU_LVL_4 0 # define NUM_RCU_LVL_4 0
#elif NR_CPUS <= RCU_FANOUT_FOURTH #elif NR_CPUS <= RCU_FANOUT_4
# define NUM_RCU_LVLS 4 # define NUM_RCU_LVLS 4
# define NUM_RCU_LVL_0 1 # define NUM_RCU_LVL_0 1
# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_CUBE) # define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3)
# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_SQ) # define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2)
# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT) # define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1)
# define NUM_RCU_LVL_4 NR_CPUS # define NUM_RCU_LVL_4 (NR_CPUS)
#else #else
# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS"
#endif /* #if (NR_CPUS) <= RCU_FANOUT */ #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */
#define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4) #define RCU_SUM (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3 + NUM_RCU_LVL_4)
#define NUM_RCU_NODES (RCU_SUM - NR_CPUS) #define NUM_RCU_NODES (RCU_SUM - NR_CPUS)