From 9467f85960a31d56f95371516e55e210e1e3d51c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 22 Sep 2016 08:05:17 -0600 Subject: [PATCH 1/3] blk-mq/cpu-notif: Convert to new hotplug state machine Replace the block-mq notifier list management with the multi instance facility in the cpu hotplug state machine. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: linux-block@vger.kernel.org Cc: rt@linutronix.de Cc: Christoph Hellwing Signed-off-by: Jens Axboe --- block/Makefile | 2 +- block/blk-mq-cpu.c | 67 ------------------------------------------ block/blk-mq.c | 36 ++++++++--------------- block/blk-mq.h | 7 ----- include/linux/blk-mq.h | 8 +---- 5 files changed, 15 insertions(+), 105 deletions(-) delete mode 100644 block/blk-mq-cpu.c diff --git a/block/Makefile b/block/Makefile index 9eda2322b2d4..ec9789745274 100644 --- a/block/Makefile +++ b/block/Makefile @@ -6,7 +6,7 @@ obj-$(CONFIG_BLOCK) := bio.o elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-lib.o blk-mq.o blk-mq-tag.o \ - blk-mq-sysfs.o blk-mq-cpu.o blk-mq-cpumap.o ioctl.o \ + blk-mq-sysfs.o blk-mq-cpumap.o ioctl.o \ genhd.o scsi_ioctl.o partition-generic.o ioprio.o \ badblocks.o partitions/ diff --git a/block/blk-mq-cpu.c b/block/blk-mq-cpu.c deleted file mode 100644 index bb3ed488f7b5..000000000000 --- a/block/blk-mq-cpu.c +++ /dev/null @@ -1,67 +0,0 @@ -/* - * CPU notifier helper code for blk-mq - * - * Copyright (C) 2013-2014 Jens Axboe - */ -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include "blk-mq.h" - -static LIST_HEAD(blk_mq_cpu_notify_list); -static DEFINE_RAW_SPINLOCK(blk_mq_cpu_notify_lock); - -static int blk_mq_main_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) -{ - unsigned int cpu = (unsigned long) hcpu; - struct blk_mq_cpu_notifier *notify; - int ret = NOTIFY_OK; - - raw_spin_lock(&blk_mq_cpu_notify_lock); - - list_for_each_entry(notify, &blk_mq_cpu_notify_list, list) { - ret = notify->notify(notify->data, action, cpu); - if (ret != NOTIFY_OK) - break; - } - - raw_spin_unlock(&blk_mq_cpu_notify_lock); - return ret; -} - -void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier) -{ - BUG_ON(!notifier->notify); - - raw_spin_lock(&blk_mq_cpu_notify_lock); - list_add_tail(¬ifier->list, &blk_mq_cpu_notify_list); - raw_spin_unlock(&blk_mq_cpu_notify_lock); -} - -void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier) -{ - raw_spin_lock(&blk_mq_cpu_notify_lock); - list_del(¬ifier->list); - raw_spin_unlock(&blk_mq_cpu_notify_lock); -} - -void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, - int (*fn)(void *, unsigned long, unsigned int), - void *data) -{ - notifier->notify = fn; - notifier->data = data; -} - -void __init blk_mq_cpu_init(void) -{ - hotcpu_notifier(blk_mq_main_cpu_notify, 0); -} diff --git a/block/blk-mq.c b/block/blk-mq.c index e0a69daddbd8..e46e7ca92f40 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -1593,11 +1593,13 @@ fail: * software queue to the hw queue dispatch list, and ensure that it * gets run. */ -static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) +static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node) { + struct blk_mq_hw_ctx *hctx; struct blk_mq_ctx *ctx; LIST_HEAD(tmp); + hctx = hlist_entry_safe(node, struct blk_mq_hw_ctx, cpuhp_dead); ctx = __blk_mq_get_ctx(hctx->queue, cpu); spin_lock(&ctx->lock); @@ -1608,30 +1610,20 @@ static int blk_mq_hctx_cpu_offline(struct blk_mq_hw_ctx *hctx, int cpu) spin_unlock(&ctx->lock); if (list_empty(&tmp)) - return NOTIFY_OK; + return 0; spin_lock(&hctx->lock); list_splice_tail_init(&tmp, &hctx->dispatch); spin_unlock(&hctx->lock); blk_mq_run_hw_queue(hctx, true); - return NOTIFY_OK; + return 0; } -static int blk_mq_hctx_notify(void *data, unsigned long action, - unsigned int cpu) +static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx) { - struct blk_mq_hw_ctx *hctx = data; - - if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) - return blk_mq_hctx_cpu_offline(hctx, cpu); - - /* - * In case of CPU online, tags may be reallocated - * in blk_mq_map_swqueue() after mapping is updated. - */ - - return NOTIFY_OK; + cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD, + &hctx->cpuhp_dead); } /* hctx->ctxs will be freed in queue's release handler */ @@ -1651,7 +1643,7 @@ static void blk_mq_exit_hctx(struct request_queue *q, if (set->ops->exit_hctx) set->ops->exit_hctx(hctx, hctx_idx); - blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); + blk_mq_remove_cpuhp(hctx); blk_free_flush_queue(hctx->fq); sbitmap_free(&hctx->ctx_map); } @@ -1698,9 +1690,7 @@ static int blk_mq_init_hctx(struct request_queue *q, hctx->queue_num = hctx_idx; hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED; - blk_mq_init_cpu_notifier(&hctx->cpu_notifier, - blk_mq_hctx_notify, hctx); - blk_mq_register_cpu_notifier(&hctx->cpu_notifier); + cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead); hctx->tags = set->tags[hctx_idx]; @@ -1745,8 +1735,7 @@ static int blk_mq_init_hctx(struct request_queue *q, free_ctxs: kfree(hctx->ctxs); unregister_cpu_notifier: - blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier); - + blk_mq_remove_cpuhp(hctx); return -1; } @@ -2399,7 +2388,8 @@ void blk_mq_enable_hotplug(void) static int __init blk_mq_init(void) { - blk_mq_cpu_init(); + cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, + blk_mq_hctx_notify_dead); hotcpu_notifier(blk_mq_queue_reinit_notify, 0); diff --git a/block/blk-mq.h b/block/blk-mq.h index 9b15d2ef7f7b..8fd3cc4fb715 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -32,13 +32,6 @@ void blk_mq_wake_waiters(struct request_queue *q); /* * CPU hotplug helpers */ -struct blk_mq_cpu_notifier; -void blk_mq_init_cpu_notifier(struct blk_mq_cpu_notifier *notifier, - int (*fn)(void *, unsigned long, unsigned int), - void *data); -void blk_mq_register_cpu_notifier(struct blk_mq_cpu_notifier *notifier); -void blk_mq_unregister_cpu_notifier(struct blk_mq_cpu_notifier *notifier); -void blk_mq_cpu_init(void); void blk_mq_enable_hotplug(void); void blk_mq_disable_hotplug(void); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fbcfdf323243..b3d1a7f4b5f2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -7,12 +7,6 @@ struct blk_mq_tags; struct blk_flush_queue; -struct blk_mq_cpu_notifier { - struct list_head list; - void *data; - int (*notify)(void *data, unsigned long action, unsigned int cpu); -}; - struct blk_mq_hw_ctx { struct { spinlock_t lock; @@ -53,7 +47,7 @@ struct blk_mq_hw_ctx { struct delayed_work delay_work; - struct blk_mq_cpu_notifier cpu_notifier; + struct hlist_node cpuhp_dead; struct kobject kobj; unsigned long poll_considered; From 65d5291eee667b9b310123991234f2fb18e51548 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 22 Sep 2016 08:05:19 -0600 Subject: [PATCH 2/3] blk-mq: Convert to new hotplug state machine Install the callbacks via the state machine so we can phase out the cpu hotplug notifiers mess. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: linux-block@vger.kernel.org Cc: rt@linutronix.de Cc: Christoph Hellwing Link: http://lkml.kernel.org/r/20160919212601.180033814@linutronix.de Signed-off-by: Thomas Gleixner Signed-off-by: Jens Axboe --- block/blk-mq.c | 87 +++++++++++++++++++++++++------------------------- 1 file changed, 43 insertions(+), 44 deletions(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index e46e7ca92f40..65347cb7d7e1 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2116,50 +2116,18 @@ static void blk_mq_queue_reinit(struct request_queue *q, blk_mq_sysfs_register(q); } -static int blk_mq_queue_reinit_notify(struct notifier_block *nb, - unsigned long action, void *hcpu) +/* + * New online cpumask which is going to be set in this hotplug event. + * Declare this cpumasks as global as cpu-hotplug operation is invoked + * one-by-one and dynamically allocating this could result in a failure. + */ +static struct cpumask cpuhp_online_new; + +static void blk_mq_queue_reinit_work(void) { struct request_queue *q; - int cpu = (unsigned long)hcpu; - /* - * New online cpumask which is going to be set in this hotplug event. - * Declare this cpumasks as global as cpu-hotplug operation is invoked - * one-by-one and dynamically allocating this could result in a failure. - */ - static struct cpumask online_new; - - /* - * Before hotadded cpu starts handling requests, new mappings must - * be established. Otherwise, these requests in hw queue might - * never be dispatched. - * - * For example, there is a single hw queue (hctx) and two CPU queues - * (ctx0 for CPU0, and ctx1 for CPU1). - * - * Now CPU1 is just onlined and a request is inserted into - * ctx1->rq_list and set bit0 in pending bitmap as ctx1->index_hw is - * still zero. - * - * And then while running hw queue, flush_busy_ctxs() finds bit0 is - * set in pending bitmap and tries to retrieve requests in - * hctx->ctxs[0]->rq_list. But htx->ctxs[0] is a pointer to ctx0, - * so the request in ctx1->rq_list is ignored. - */ - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_DEAD: - case CPU_UP_CANCELED: - cpumask_copy(&online_new, cpu_online_mask); - break; - case CPU_UP_PREPARE: - cpumask_copy(&online_new, cpu_online_mask); - cpumask_set_cpu(cpu, &online_new); - break; - default: - return NOTIFY_OK; - } mutex_lock(&all_q_mutex); - /* * We need to freeze and reinit all existing queues. Freezing * involves synchronous wait for an RCU grace period and doing it @@ -2180,13 +2148,43 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, } list_for_each_entry(q, &all_q_list, all_q_node) - blk_mq_queue_reinit(q, &online_new); + blk_mq_queue_reinit(q, &cpuhp_online_new); list_for_each_entry(q, &all_q_list, all_q_node) blk_mq_unfreeze_queue(q); mutex_unlock(&all_q_mutex); - return NOTIFY_OK; +} + +static int blk_mq_queue_reinit_dead(unsigned int cpu) +{ + cpumask_clear_cpu(cpu, &cpuhp_online_new); + blk_mq_queue_reinit_work(); + return 0; +} + +/* + * Before hotadded cpu starts handling requests, new mappings must be + * established. Otherwise, these requests in hw queue might never be + * dispatched. + * + * For example, there is a single hw queue (hctx) and two CPU queues (ctx0 + * for CPU0, and ctx1 for CPU1). + * + * Now CPU1 is just onlined and a request is inserted into ctx1->rq_list + * and set bit0 in pending bitmap as ctx1->index_hw is still zero. + * + * And then while running hw queue, flush_busy_ctxs() finds bit0 is set in + * pending bitmap and tries to retrieve requests in hctx->ctxs[0]->rq_list. + * But htx->ctxs[0] is a pointer to ctx0, so the request in ctx1->rq_list + * is ignored. + */ +static int blk_mq_queue_reinit_prepare(unsigned int cpu) +{ + cpumask_copy(&cpuhp_online_new, cpu_online_mask); + cpumask_set_cpu(cpu, &cpuhp_online_new); + blk_mq_queue_reinit_work(); + return 0; } static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) @@ -2391,8 +2389,9 @@ static int __init blk_mq_init(void) cpuhp_setup_state_multi(CPUHP_BLK_MQ_DEAD, "block/mq:dead", NULL, blk_mq_hctx_notify_dead); - hotcpu_notifier(blk_mq_queue_reinit_notify, 0); - + cpuhp_setup_state_nocalls(CPUHP_BLK_MQ_PREPARE, "block/mq:prepare", + blk_mq_queue_reinit_prepare, + blk_mq_queue_reinit_dead); return 0; } subsys_initcall(blk_mq_init); From 97a32864e6de5944c6356049f60569de01e9ba1f Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Fri, 23 Sep 2016 15:02:38 +0200 Subject: [PATCH 3/3] blk-mq: fixup "Convert to new hotplug state machine" The "blk_mq_queue_reinit_dead()" just cleared the cpumask instead doing a copy. Since we might never had an online callback we could end up with a ZERO mask which in turn leads to crash as test robot demonstarted. Fixes: 65d5291eee66 ("blk-mq: Convert to new hotplug state machine") Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Jens Axboe --- block/blk-mq.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/block/blk-mq.c b/block/blk-mq.c index 65347cb7d7e1..8c0f80198e52 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -2158,7 +2158,7 @@ static void blk_mq_queue_reinit_work(void) static int blk_mq_queue_reinit_dead(unsigned int cpu) { - cpumask_clear_cpu(cpu, &cpuhp_online_new); + cpumask_copy(&cpuhp_online_new, cpu_online_mask); blk_mq_queue_reinit_work(); return 0; }