From 8d354f133e86dd03ea7885a91df398c55ff699ff Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 25 Aug 2016 08:00:28 -0600 Subject: [PATCH] blk-mq: improve layout of blk_mq_hw_ctx Various cache line optimizations: - Move delay_work towards the end. It's huge, and we don't use it a lot (only SCSI). - Move the atomic state into the same cacheline as the the dispatch list and lock. - Rearrange a few members to pack it better. - Shrink the max-order for dispatch accounting from 10 to 7. This means that ->dispatched[] and ->run now take up their own cacheline. This shrinks struct blk_mq_hw_ctx down to 8 cachelines. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d579252e6463..e1544f0f8c21 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -22,11 +22,10 @@ struct blk_mq_hw_ctx { struct { spinlock_t lock; struct list_head dispatch; + unsigned long state; /* BLK_MQ_S_* flags */ } ____cacheline_aligned_in_smp; - unsigned long state; /* BLK_MQ_S_* flags */ struct work_struct run_work; - struct delayed_work delay_work; cpumask_var_t cpumask; int next_cpu; int next_cpu_batch; @@ -40,8 +39,8 @@ struct blk_mq_hw_ctx { struct blk_mq_ctxmap ctx_map; - unsigned int nr_ctx; struct blk_mq_ctx **ctxs; + unsigned int nr_ctx; atomic_t wait_index; @@ -49,7 +48,7 @@ struct blk_mq_hw_ctx { unsigned long queued; unsigned long run; -#define BLK_MQ_MAX_DISPATCH_ORDER 10 +#define BLK_MQ_MAX_DISPATCH_ORDER 7 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int numa_node; @@ -57,6 +56,8 @@ struct blk_mq_hw_ctx { atomic_t nr_active; + struct delayed_work delay_work; + struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj;