From 09ed79d6d75f06cc963a78f25463251b0a758dc7 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 7 May 2019 10:01:47 -0700 Subject: [PATCH 1/4] percpu_ref: introduce PERCPU_REF_ALLOW_REINIT flag In most cases percpu reference counters are not switched to the percpu mode after they reach the atomic mode. Some obvious exceptions are reference counters which are initialized into the atomic mode (using PERCPU_REF_INIT_ATOMIC and PERCPU_REF_INIT_DEAD flags), and there are few other exceptions. But in most cases there is no way back, and once the reference counter is switched to the atomic mode, there is no reason to wait for percpu_ref_exit() to release the percpu memory. Of course, the size of a single counter is not so big, but because it can pin the whole percpu block in memory, the memory footprint can be noticeable (e.g. on my 32 CPUs machine a percpu block is 8Mb large). To make releasing of the percpu memory as early as possible, let's introduce the PERCPU_REF_ALLOW_REINIT flag with the following semantics: it has to be set in order to switch a percpu reference counter to the percpu mode after the initialization. PERCPU_REF_INIT_ATOMIC and PERCPU_REF_INIT_DEAD flags will implicitly assume PERCPU_REF_ALLOW_REINIT. This patch doesn't introduce any functional change to avoid any regressions. It will be done later in the patchset after adjusting all call sites, which are reviving percpu counters. Signed-off-by: Roman Gushchin Acked-by: Tejun Heo Signed-off-by: Dennis Zhou --- include/linux/percpu-refcount.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index b297cd1cd4f1..0f0240af8520 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -75,14 +75,21 @@ enum { * operation using percpu_ref_switch_to_percpu(). If initialized * with this flag, the ref will stay in atomic mode until * percpu_ref_switch_to_percpu() is invoked on it. + * Implies ALLOW_REINIT. */ PERCPU_REF_INIT_ATOMIC = 1 << 0, /* * Start dead w/ ref == 0 in atomic mode. Must be revived with - * percpu_ref_reinit() before used. Implies INIT_ATOMIC. + * percpu_ref_reinit() before used. Implies INIT_ATOMIC and + * ALLOW_REINIT. */ PERCPU_REF_INIT_DEAD = 1 << 1, + + /* + * Allow switching from atomic mode to percpu mode. + */ + PERCPU_REF_ALLOW_REINIT = 1 << 2, }; struct percpu_ref { From 214828962dead0c698f92b60ef97ce3c5fc2c8fe Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 7 May 2019 10:01:48 -0700 Subject: [PATCH 2/4] io_uring: initialize percpu refcounters using PERCU_REF_ALLOW_REINIT Percpu reference counters should now be initialized with the PERCPU_REF_ALLOW_REINIT in order to allow switching them to the percpu mode from the atomic mode. This is exactly what percpu_ref_reinit() called from __io_uring_register() is supposed to do. So let's initialize percpu refcounters with the PERCU_REF_ALLOW_REINIT flag. Signed-off-by: Roman Gushchin Acked-by: Tejun Heo Signed-off-by: Dennis Zhou --- fs/io_uring.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/fs/io_uring.c b/fs/io_uring.c index 84efb8956734..083c5dd95452 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -389,7 +389,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p) if (!ctx) return NULL; - if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, 0, GFP_KERNEL)) { + if (percpu_ref_init(&ctx->refs, io_ring_ctx_ref_free, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { kfree(ctx); return NULL; } From ddde2af747ad79010f14691f381522987fbcb860 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 7 May 2019 10:01:49 -0700 Subject: [PATCH 3/4] md: initialize percpu refcounters using PERCU_REF_ALLOW_REINIT Percpu reference counters should now be initialized with the PERCPU_REF_ALLOW_REINIT in order to allow switching them to the percpu mode from the atomic mode. To make percpu_ref_switch_to_percpu() call in set_in_sync() succeed,let's initialize percpu refcounters with the PERCU_REF_ALLOW_REINIT flag. Signed-off-by: Roman Gushchin Acked-by: Tejun Heo Signed-off-by: Dennis Zhou --- drivers/md/md.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/md/md.c b/drivers/md/md.c index 05ffffb8b769..16e034747a86 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -5249,7 +5249,8 @@ int mddev_init_writes_pending(struct mddev *mddev) { if (mddev->writes_pending.percpu_count_ptr) return 0; - if (percpu_ref_init(&mddev->writes_pending, no_op, 0, GFP_KERNEL) < 0) + if (percpu_ref_init(&mddev->writes_pending, no_op, + PERCPU_REF_ALLOW_REINIT, GFP_KERNEL) < 0) return -ENOMEM; /* We want to start with the refcount at zero */ percpu_ref_put(&mddev->writes_pending); From 7d9ab9b6adffd9c474c1274acb5f6208f9a09cf3 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Tue, 7 May 2019 10:01:50 -0700 Subject: [PATCH 4/4] percpu_ref: release percpu memory early without PERCPU_REF_ALLOW_REINIT Release percpu memory after finishing the switch to the atomic mode if only PERCPU_REF_ALLOW_REINIT isn't set. Signed-off-by: Roman Gushchin Acked-by: Tejun Heo Signed-off-by: Dennis Zhou --- include/linux/percpu-refcount.h | 1 + lib/percpu-refcount.c | 13 +++++++++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 0f0240af8520..7aef0abc194a 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -102,6 +102,7 @@ struct percpu_ref { percpu_ref_func_t *release; percpu_ref_func_t *confirm_switch; bool force_atomic:1; + bool allow_reinit:1; struct rcu_head rcu; }; diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 9877682e49c7..501b517bd3db 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -69,11 +69,14 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release, return -ENOMEM; ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC; + ref->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT; - if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) + if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) { ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC; - else + ref->allow_reinit = true; + } else { start_count += PERCPU_COUNT_BIAS; + } if (flags & PERCPU_REF_INIT_DEAD) ref->percpu_count_ptr |= __PERCPU_REF_DEAD; @@ -119,6 +122,9 @@ static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu) ref->confirm_switch = NULL; wake_up_all(&percpu_ref_switch_waitq); + if (!ref->allow_reinit) + percpu_ref_exit(ref); + /* drop ref from percpu_ref_switch_to_atomic() */ percpu_ref_put(ref); } @@ -194,6 +200,9 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) return; + if (WARN_ON_ONCE(!ref->allow_reinit)) + return; + atomic_long_add(PERCPU_COUNT_BIAS, &ref->count); /*