From b6dccf7fae4331b0ea41cf087e3f02d5db9161dc Mon Sep 17 00:00:00 2001 From: Arnav Dawn Date: Wed, 12 Jul 2017 16:10:40 +0530 Subject: [PATCH 01/35] nvme: add support for FW activation without reset This patch adds support for handling Fw activation without reset On completion of FW-activation-starting AER, all queues are paused till CSTS.PP is cleared or timed out (exceeds max time for fw activtion MTFA). If device fails to clear CSTS.PP within MTFA, driver issues reset controller. Signed-off-by: Arnav Dawn Reviewed-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 75 ++++++++++++++++++++++++++++++++++++++++ drivers/nvme/host/nvme.h | 2 ++ include/linux/nvme.h | 9 +++++ 3 files changed, 86 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index c596dd3c58b1..1a1dedbac39b 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -76,6 +76,11 @@ static DEFINE_SPINLOCK(dev_list_lock); static struct class *nvme_class; +static __le32 nvme_get_log_dw10(u8 lid, size_t size) +{ + return cpu_to_le32((((size / 4) - 1) << 16) | lid); +} + int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { if (!nvme_change_ctrl_state(ctrl, NVME_CTRL_RESETTING)) @@ -2534,6 +2539,71 @@ static void nvme_async_event_work(struct work_struct *work) spin_unlock_irq(&ctrl->lock); } +static bool nvme_ctrl_pp_status(struct nvme_ctrl *ctrl) +{ + + u32 csts; + + if (ctrl->ops->reg_read32(ctrl, NVME_REG_CSTS, &csts)) + return false; + + if (csts == ~0) + return false; + + return ((ctrl->ctrl_config & NVME_CC_ENABLE) && (csts & NVME_CSTS_PP)); +} + +static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) +{ + struct nvme_command c = { }; + struct nvme_fw_slot_info_log *log; + + log = kmalloc(sizeof(*log), GFP_KERNEL); + if (!log) + return; + + c.common.opcode = nvme_admin_get_log_page; + c.common.nsid = cpu_to_le32(0xffffffff); + c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log)); + + if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log))) + dev_warn(ctrl->device, + "Get FW SLOT INFO log error\n"); + kfree(log); +} + +static void nvme_fw_act_work(struct work_struct *work) +{ + struct nvme_ctrl *ctrl = container_of(work, + struct nvme_ctrl, fw_act_work); + unsigned long fw_act_timeout; + + if (ctrl->mtfa) + fw_act_timeout = jiffies + + msecs_to_jiffies(ctrl->mtfa * 100); + else + fw_act_timeout = jiffies + + msecs_to_jiffies(admin_timeout * 1000); + + nvme_stop_queues(ctrl); + while (nvme_ctrl_pp_status(ctrl)) { + if (time_after(jiffies, fw_act_timeout)) { + dev_warn(ctrl->device, + "Fw activation timeout, reset controller\n"); + nvme_reset_ctrl(ctrl); + break; + } + msleep(100); + } + + if (ctrl->state != NVME_CTRL_LIVE) + return; + + nvme_start_queues(ctrl); + /* read FW slot informationi to clear the AER*/ + nvme_get_fw_slot_info(ctrl); +} + void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, union nvme_result *res) { @@ -2560,6 +2630,9 @@ void nvme_complete_async_event(struct nvme_ctrl *ctrl, __le16 status, dev_info(ctrl->device, "rescanning\n"); nvme_queue_scan(ctrl); break; + case NVME_AER_NOTICE_FW_ACT_STARTING: + schedule_work(&ctrl->fw_act_work); + break; default: dev_warn(ctrl->device, "async event result %08x\n", result); } @@ -2607,6 +2680,7 @@ void nvme_stop_ctrl(struct nvme_ctrl *ctrl) nvme_stop_keep_alive(ctrl); flush_work(&ctrl->async_event_work); flush_work(&ctrl->scan_work); + cancel_work_sync(&ctrl->fw_act_work); } EXPORT_SYMBOL_GPL(nvme_stop_ctrl); @@ -2670,6 +2744,7 @@ int nvme_init_ctrl(struct nvme_ctrl *ctrl, struct device *dev, ctrl->quirks = quirks; INIT_WORK(&ctrl->scan_work, nvme_scan_work); INIT_WORK(&ctrl->async_event_work, nvme_async_event_work); + INIT_WORK(&ctrl->fw_act_work, nvme_fw_act_work); ret = nvme_set_instance(ctrl); if (ret) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 8f2a168ddc01..b40b9af4564f 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -142,6 +142,7 @@ struct nvme_ctrl { u16 cntlid; u32 ctrl_config; + u16 mtfa; u32 queue_count; u64 cap; @@ -167,6 +168,7 @@ struct nvme_ctrl { struct work_struct scan_work; struct work_struct async_event_work; struct delayed_work ka_work; + struct work_struct fw_act_work; /* Power saving configuration */ u64 ps_max_latency_us; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 25d8225dbd04..5d0f2f4fc3b8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -146,6 +146,7 @@ enum { NVME_CSTS_RDY = 1 << 0, NVME_CSTS_CFS = 1 << 1, NVME_CSTS_NSSRO = 1 << 4, + NVME_CSTS_PP = 1 << 5, NVME_CSTS_SHST_NORMAL = 0 << 2, NVME_CSTS_SHST_OCCUR = 1 << 2, NVME_CSTS_SHST_CMPLT = 2 << 2, @@ -376,6 +377,13 @@ struct nvme_smart_log { __u8 rsvd216[296]; }; +struct nvme_fw_slot_info_log { + __u8 afi; + __u8 rsvd1[7]; + __le64 frs[7]; + __u8 rsvd64[448]; +}; + enum { NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_TEMPERATURE = 1 << 1, @@ -386,6 +394,7 @@ enum { enum { NVME_AER_NOTICE_NS_CHANGED = 0x0002, + NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102, }; struct nvme_lba_range_type { From 62346eaeb2f1a0524b35eaa2f479596f40491165 Mon Sep 17 00:00:00 2001 From: Arnav Dawn Date: Wed, 12 Jul 2017 16:11:53 +0530 Subject: [PATCH 02/35] nvme: define NVME_NSID_ALL Define the constant "0xffffffff" (used as nsid for all namespaces) as NVME_NSID_ALL. Signed-off-by: Arnav Dawn Signed-off-by: Sagi Grimberg --- drivers/nvme/host/core.c | 6 +++--- include/linux/nvme.h | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1a1dedbac39b..a6afeedc009a 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -312,7 +312,7 @@ static int nvme_toggle_streams(struct nvme_ctrl *ctrl, bool enable) memset(&c, 0, sizeof(c)); c.directive.opcode = nvme_admin_directive_send; - c.directive.nsid = cpu_to_le32(0xffffffff); + c.directive.nsid = cpu_to_le32(NVME_NSID_ALL); c.directive.doper = NVME_DIR_SND_ID_OP_ENABLE; c.directive.dtype = NVME_DIR_IDENTIFY; c.directive.tdtype = NVME_DIR_STREAMS; @@ -362,7 +362,7 @@ static int nvme_configure_directives(struct nvme_ctrl *ctrl) if (ret) return ret; - ret = nvme_get_stream_params(ctrl, &s, 0xffffffff); + ret = nvme_get_stream_params(ctrl, &s, NVME_NSID_ALL); if (ret) return ret; @@ -2563,7 +2563,7 @@ static void nvme_get_fw_slot_info(struct nvme_ctrl *ctrl) return; c.common.opcode = nvme_admin_get_log_page; - c.common.nsid = cpu_to_le32(0xffffffff); + c.common.nsid = cpu_to_le32(NVME_NSID_ALL); c.common.cdw10[0] = nvme_get_log_dw10(NVME_LOG_FW_SLOT, sizeof(*log)); if (!nvme_submit_sync_cmd(ctrl->admin_q, &c, log, sizeof(*log))) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5d0f2f4fc3b8..1d79046bf9d4 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -32,6 +32,8 @@ #define NVME_RDMA_IP_PORT 4420 +#define NVME_NSID_ALL 0xffffffff + enum nvme_subsys_type { NVME_NQN_DISC = 1, /* Discovery type target subsystem */ NVME_NQN_NVME = 2, /* NVME type target subsystem */ From dbf86b39005d26b21c52a23720e15fb850d71cdc Mon Sep 17 00:00:00 2001 From: Jon Derrick Date: Wed, 16 Aug 2017 09:51:29 +0200 Subject: [PATCH 03/35] nvme: add support for NVMe 1.3 Timestamp Feature NVME's Timestamp feature allows controllers to be aware of the epoch time in milliseconds. This patch adds the set features hook for various transports through the identify path, so that resets and resumes can update the controller as necessary. Signed-off-by: Jon Derrick [hch: rebased on top of nvme-4.13 error handling changes, changed nvme_configure_timestamp to return the status] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 21 +++++++++++++++++++++ include/linux/nvme.h | 2 ++ 2 files changed, 23 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a6afeedc009a..0b979e16655e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1507,6 +1507,23 @@ static void nvme_set_queue_limits(struct nvme_ctrl *ctrl, blk_queue_write_cache(q, vwc, vwc); } +static int nvme_configure_timestamp(struct nvme_ctrl *ctrl) +{ + __le64 ts; + int ret; + + if (!(ctrl->oncs & NVME_CTRL_ONCS_TIMESTAMP)) + return 0; + + ts = cpu_to_le64(ktime_to_ms(ktime_get_real())); + ret = nvme_set_features(ctrl, NVME_FEAT_TIMESTAMP, 0, &ts, sizeof(ts), + NULL); + if (ret) + dev_warn_once(ctrl->device, + "could not set timestamp (%d)\n", ret); + return ret; +} + static int nvme_configure_apst(struct nvme_ctrl *ctrl) { /* @@ -1861,6 +1878,10 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ret = nvme_configure_apst(ctrl); if (ret < 0) return ret; + + ret = nvme_configure_timestamp(ctrl); + if (ret < 0) + return ret; ret = nvme_configure_directives(ctrl); if (ret < 0) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 1d79046bf9d4..a12b47073273 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -254,6 +254,7 @@ enum { NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, NVME_CTRL_ONCS_DSM = 1 << 2, NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, + NVME_CTRL_ONCS_TIMESTAMP = 1 << 6, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, @@ -688,6 +689,7 @@ enum { NVME_FEAT_ASYNC_EVENT = 0x0b, NVME_FEAT_AUTO_PST = 0x0c, NVME_FEAT_HOST_MEM_BUF = 0x0d, + NVME_FEAT_TIMESTAMP = 0x0e, NVME_FEAT_KATO = 0x0f, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, From 1645d5036f9201bd0925aff099fa92fbe7afaff4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 18 Jul 2017 19:46:36 +0200 Subject: [PATCH 04/35] nvmet: use NVME_NSID_ALL Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Max Gurtovoy --- drivers/nvme/target/admin-cmd.c | 2 +- drivers/nvme/target/configfs.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index a53bb6635b83..dffd8064163b 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -100,7 +100,7 @@ static u16 nvmet_get_smart_log(struct nvmet_req *req, u16 status; WARN_ON(req == NULL || slog == NULL); - if (req->cmd->get_log_page.nsid == cpu_to_le32(0xFFFFFFFF)) + if (req->cmd->get_log_page.nsid == cpu_to_le32(NVME_NSID_ALL)) status = nvmet_get_smart_log_all(req, slog); else status = nvmet_get_smart_log_nsid(req, slog); diff --git a/drivers/nvme/target/configfs.c b/drivers/nvme/target/configfs.c index 0a0067e771f5..b6aeb1d70951 100644 --- a/drivers/nvme/target/configfs.c +++ b/drivers/nvme/target/configfs.c @@ -444,7 +444,7 @@ static struct config_group *nvmet_ns_make(struct config_group *group, goto out; ret = -EINVAL; - if (nsid == 0 || nsid == 0xffffffff) + if (nsid == 0 || nsid == NVME_NSID_ALL) goto out; ret = -ENOMEM; From 130c24b5be80512be973e3a614abda75f5896b42 Mon Sep 17 00:00:00 2001 From: Guan Junxiong Date: Fri, 4 Aug 2017 17:27:47 +0800 Subject: [PATCH 05/35] nvmet: fix the return error code of target if host is not allowed nvmf target shall return NVME_SC_CONNECT_INVALID_HOST instead of the gereal code INVALID_PARAM when the given host nqn is not allowed to connect. Refer to the 2.2.1 section of the NVMe over Fabrics Spec. Signed-off-by: Guan Junxiong Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index f4b02bb4a1a8..ea7eb00dcb87 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -749,6 +749,7 @@ u16 nvmet_alloc_ctrl(const char *subsysnqn, const char *hostnqn, hostnqn, subsysnqn); req->rsp->result.u32 = IPO_IATTR_CONNECT_DATA(hostnqn); up_read(&nvmet_config_sem); + status = NVME_SC_CONNECT_INVALID_HOST | NVME_SC_DNR; goto out_put_subsystem; } up_read(&nvmet_config_sem); From 1c35be8c8a13fa714c1c783f14653ca9d3e3721f Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 3 Aug 2017 11:28:48 +0200 Subject: [PATCH 06/35] nvmet-fcloop: remove ALL_OPTS define ALL_OPTS isn't used anywhere, remove it. Signed-off-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fcloop.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 1bb9d5b311b1..1cb9847ec261 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -193,9 +193,6 @@ out_free_options: #define TGTPORT_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN) -#define ALL_OPTS (NVMF_OPT_WWNN | NVMF_OPT_WWPN | NVMF_OPT_ROLES | \ - NVMF_OPT_FCADDR | NVMF_OPT_LPWWNN | NVMF_OPT_LPWWPN) - static DEFINE_SPINLOCK(fcloop_lock); static LIST_HEAD(fcloop_lports); From 4897ad4e0812c358c8bdd7aa25fdc2201ae2de0b Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Thu, 3 Aug 2017 11:28:47 +0200 Subject: [PATCH 07/35] nvme-rdma: remove NVME_RDMA_MAX_SEGMENT_SIZE NVME_RDMA_MAX_SEGMENT_SIZE is not used anywhere, zap it. Signed-off-by: Johannes Thumshirn Reviewed-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 9ff0eb3a625e..ffd73890d077 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -36,8 +36,6 @@ #define NVME_RDMA_CONNECT_TIMEOUT_MS 3000 /* 3 second */ -#define NVME_RDMA_MAX_SEGMENT_SIZE 0xffffff /* 24-bit SGL field */ - #define NVME_RDMA_MAX_SEGMENTS 256 #define NVME_RDMA_MAX_INLINE_SEGMENTS 1 From 90af35123d3be8b011a8a3f69ce46fd431c55b25 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:28 +0300 Subject: [PATCH 08/35] nvme-rdma: move nvme_rdma_configure_admin_queue code location We will call it from other places so avoid having to forward declare it. Also move it next to nvme_rdma_destroy_admin_queue. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 185 ++++++++++++++++++++------------------- 1 file changed, 94 insertions(+), 91 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index ffd73890d077..3cecb087ee3a 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -149,6 +149,9 @@ static int nvme_rdma_cm_handler(struct rdma_cm_id *cm_id, struct rdma_cm_event *event); static void nvme_rdma_recv_done(struct ib_cq *cq, struct ib_wc *wc); +static const struct blk_mq_ops nvme_rdma_mq_ops; +static const struct blk_mq_ops nvme_rdma_admin_mq_ops; + /* XXX: really should move to a generic header sooner or later.. */ static inline void put_unaligned_le24(u32 val, u8 *p) { @@ -653,6 +656,97 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) nvme_rdma_dev_put(ctrl->device); } +static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) +{ + int error; + + error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); + if (error) + return error; + + ctrl->device = ctrl->queues[0].device; + + /* + * We need a reference on the device as long as the tag_set is alive, + * as the MRs in the request structures need a valid ib_device. + */ + error = -EINVAL; + if (!nvme_rdma_dev_get(ctrl->device)) + goto out_free_queue; + + ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, + ctrl->device->dev->attrs.max_fast_reg_page_list_len); + + memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); + ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops; + ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; + ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ + ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; + ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) + + SG_CHUNK_SIZE * sizeof(struct scatterlist); + ctrl->admin_tag_set.driver_data = ctrl; + ctrl->admin_tag_set.nr_hw_queues = 1; + ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; + + error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); + if (error) + goto out_put_dev; + + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + error = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_tagset; + } + + error = nvmf_connect_admin_queue(&ctrl->ctrl); + if (error) + goto out_cleanup_queue; + + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); + + error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, + &ctrl->ctrl.cap); + if (error) { + dev_err(ctrl->ctrl.device, + "prop_get NVME_REG_CAP failed\n"); + goto out_cleanup_queue; + } + + ctrl->ctrl.sqsize = + min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); + + error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); + if (error) + goto out_cleanup_queue; + + ctrl->ctrl.max_hw_sectors = + (ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9); + + error = nvme_init_identify(&ctrl->ctrl); + if (error) + goto out_cleanup_queue; + + error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, + &ctrl->async_event_sqe, sizeof(struct nvme_command), + DMA_TO_DEVICE); + if (error) + goto out_cleanup_queue; + + return 0; + +out_cleanup_queue: + blk_cleanup_queue(ctrl->ctrl.admin_q); +out_free_tagset: + /* disconnect and drain the queue before freeing the tagset */ + nvme_rdma_stop_queue(&ctrl->queues[0]); + blk_mq_free_tag_set(&ctrl->admin_tag_set); +out_put_dev: + nvme_rdma_dev_put(ctrl->device); +out_free_queue: + nvme_rdma_free_queue(&ctrl->queues[0]); + return error; +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -1517,97 +1611,6 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .timeout = nvme_rdma_timeout, }; -static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) -{ - int error; - - error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); - if (error) - return error; - - ctrl->device = ctrl->queues[0].device; - - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - error = -EINVAL; - if (!nvme_rdma_dev_get(ctrl->device)) - goto out_free_queue; - - ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, - ctrl->device->dev->attrs.max_fast_reg_page_list_len); - - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (error) - goto out_put_dev; - - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; - } - - error = nvmf_connect_admin_queue(&ctrl->ctrl); - if (error) - goto out_cleanup_queue; - - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, - &ctrl->ctrl.cap); - if (error) { - dev_err(ctrl->ctrl.device, - "prop_get NVME_REG_CAP failed\n"); - goto out_cleanup_queue; - } - - ctrl->ctrl.sqsize = - min_t(int, NVME_CAP_MQES(ctrl->ctrl.cap), ctrl->ctrl.sqsize); - - error = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); - if (error) - goto out_cleanup_queue; - - ctrl->ctrl.max_hw_sectors = - (ctrl->max_fr_pages - 1) << (PAGE_SHIFT - 9); - - error = nvme_init_identify(&ctrl->ctrl); - if (error) - goto out_cleanup_queue; - - error = nvme_rdma_alloc_qe(ctrl->queues[0].device->dev, - &ctrl->async_event_sqe, sizeof(struct nvme_command), - DMA_TO_DEVICE); - if (error) - goto out_cleanup_queue; - - return 0; - -out_cleanup_queue: - blk_cleanup_queue(ctrl->ctrl.admin_q); -out_free_tagset: - /* disconnect and drain the queue before freeing the tagset */ - nvme_rdma_stop_queue(&ctrl->queues[0]); - blk_mq_free_tag_set(&ctrl->admin_tag_set); -out_put_dev: - nvme_rdma_dev_put(ctrl->device); -out_free_queue: - nvme_rdma_free_queue(&ctrl->queues[0]); - return error; -} - static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) { cancel_work_sync(&ctrl->err_work); From 34b6c2315eb66e6411261aa440f6e3c4cded3506 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:29 +0300 Subject: [PATCH 09/35] nvme: Add admin_tagset pointer to nvme_ctrl Will be used when we centralize control flows. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 1 + drivers/nvme/host/nvme.h | 1 + drivers/nvme/host/pci.c | 1 + drivers/nvme/host/rdma.c | 1 + drivers/nvme/target/loop.c | 1 + 5 files changed, 5 insertions(+) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1438be649866..1912df412692 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -2731,6 +2731,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts, ret = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (ret) goto out_free_queues; + ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index b40b9af4564f..2c8a02be46fd 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -125,6 +125,7 @@ struct nvme_ctrl { struct kref kref; int instance; struct blk_mq_tag_set *tagset; + struct blk_mq_tag_set *admin_tagset; struct list_head namespaces; struct mutex namespaces_mutex; struct device *device; /* char device */ diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 925467b31a33..e6283745ecd2 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1376,6 +1376,7 @@ static int nvme_alloc_admin_tags(struct nvme_dev *dev) if (blk_mq_alloc_tag_set(&dev->admin_tagset)) return -ENOMEM; + dev->ctrl.admin_tagset = &dev->admin_tagset; dev->ctrl.admin_q = blk_mq_init_queue(&dev->admin_tagset); if (IS_ERR(dev->ctrl.admin_q)) { diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3cecb087ee3a..6ef56500fc9c 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -691,6 +691,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (error) goto out_put_dev; + ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 717ed7ddb2f6..92628c432926 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -375,6 +375,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl) error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); if (error) goto out_free_sq; + ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { From b28a308ee7774341312de28405e53a4a30cb7d31 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:30 +0300 Subject: [PATCH 10/35] nvme-rdma: move tagset allocation to a dedicated routine We always pair tagset allocation with rdma device reference and it shares some code, centralize it with an argument if its an admin or IO tagset. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 131 ++++++++++++++++++++++----------------- 1 file changed, 75 insertions(+), 56 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 6ef56500fc9c..3f580c198100 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -646,14 +646,79 @@ out_free_queues: return ret; } +static void nvme_rdma_free_tagset(struct nvme_ctrl *nctrl, bool admin) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + struct blk_mq_tag_set *set = admin ? + &ctrl->admin_tag_set : &ctrl->tag_set; + + blk_mq_free_tag_set(set); + nvme_rdma_dev_put(ctrl->device); +} + +static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl, + bool admin) +{ + struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); + struct blk_mq_tag_set *set; + int ret; + + if (admin) { + set = &ctrl->admin_tag_set; + memset(set, 0, sizeof(*set)); + set->ops = &nvme_rdma_admin_mq_ops; + set->queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; + set->reserved_tags = 2; /* connect + keep-alive */ + set->numa_node = NUMA_NO_NODE; + set->cmd_size = sizeof(struct nvme_rdma_request) + + SG_CHUNK_SIZE * sizeof(struct scatterlist); + set->driver_data = ctrl; + set->nr_hw_queues = 1; + set->timeout = ADMIN_TIMEOUT; + } else { + set = &ctrl->tag_set; + memset(set, 0, sizeof(*set)); + set->ops = &nvme_rdma_mq_ops; + set->queue_depth = nctrl->opts->queue_size; + set->reserved_tags = 1; /* fabric connect */ + set->numa_node = NUMA_NO_NODE; + set->flags = BLK_MQ_F_SHOULD_MERGE; + set->cmd_size = sizeof(struct nvme_rdma_request) + + SG_CHUNK_SIZE * sizeof(struct scatterlist); + set->driver_data = ctrl; + set->nr_hw_queues = nctrl->queue_count - 1; + set->timeout = NVME_IO_TIMEOUT; + } + + ret = blk_mq_alloc_tag_set(set); + if (ret) + goto out; + + /* + * We need a reference on the device as long as the tag_set is alive, + * as the MRs in the request structures need a valid ib_device. + */ + ret = nvme_rdma_dev_get(ctrl->device); + if (!ret) { + ret = -EINVAL; + goto out_free_tagset; + } + + return set; + +out_free_tagset: + blk_mq_free_tag_set(set); +out: + return ERR_PTR(ret); +} + static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) { nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); blk_cleanup_queue(ctrl->ctrl.admin_q); - blk_mq_free_tag_set(&ctrl->admin_tag_set); - nvme_rdma_dev_put(ctrl->device); + nvme_rdma_free_tagset(&ctrl->ctrl, true); } static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) @@ -666,32 +731,12 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) ctrl->device = ctrl->queues[0].device; - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - error = -EINVAL; - if (!nvme_rdma_dev_get(ctrl->device)) - goto out_free_queue; - ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, ctrl->device->dev->attrs.max_fast_reg_page_list_len); - memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set)); - ctrl->admin_tag_set.ops = &nvme_rdma_admin_mq_ops; - ctrl->admin_tag_set.queue_depth = NVME_RDMA_AQ_BLKMQ_DEPTH; - ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */ - ctrl->admin_tag_set.numa_node = NUMA_NO_NODE; - ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_rdma_request) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); - ctrl->admin_tag_set.driver_data = ctrl; - ctrl->admin_tag_set.nr_hw_queues = 1; - ctrl->admin_tag_set.timeout = ADMIN_TIMEOUT; - - error = blk_mq_alloc_tag_set(&ctrl->admin_tag_set); - if (error) - goto out_put_dev; - ctrl->ctrl.admin_tagset = &ctrl->admin_tag_set; + ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); + if (IS_ERR(ctrl->ctrl.admin_tagset)) + goto out_free_queue; ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); if (IS_ERR(ctrl->ctrl.admin_q)) { @@ -740,9 +785,7 @@ out_cleanup_queue: out_free_tagset: /* disconnect and drain the queue before freeing the tagset */ nvme_rdma_stop_queue(&ctrl->queues[0]); - blk_mq_free_tag_set(&ctrl->admin_tag_set); -out_put_dev: - nvme_rdma_dev_put(ctrl->device); + nvme_rdma_free_tagset(&ctrl->ctrl, true); out_free_queue: nvme_rdma_free_queue(&ctrl->queues[0]); return error; @@ -1644,8 +1687,7 @@ static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_uninit_ctrl(&ctrl->ctrl); if (ctrl->ctrl.tagset) { blk_cleanup_queue(ctrl->ctrl.connect_q); - blk_mq_free_tag_set(&ctrl->tag_set); - nvme_rdma_dev_put(ctrl->device); + nvme_rdma_free_tagset(&ctrl->ctrl, false); } nvme_put_ctrl(&ctrl->ctrl); @@ -1765,31 +1807,10 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) if (ret) return ret; - /* - * We need a reference on the device as long as the tag_set is alive, - * as the MRs in the request structures need a valid ib_device. - */ - ret = -EINVAL; - if (!nvme_rdma_dev_get(ctrl->device)) + ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); + if (IS_ERR(ctrl->ctrl.tagset)) goto out_free_io_queues; - memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set)); - ctrl->tag_set.ops = &nvme_rdma_mq_ops; - ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size; - ctrl->tag_set.reserved_tags = 1; /* fabric connect */ - ctrl->tag_set.numa_node = NUMA_NO_NODE; - ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; - ctrl->tag_set.cmd_size = sizeof(struct nvme_rdma_request) + - SG_CHUNK_SIZE * sizeof(struct scatterlist); - ctrl->tag_set.driver_data = ctrl; - ctrl->tag_set.nr_hw_queues = ctrl->ctrl.queue_count - 1; - ctrl->tag_set.timeout = NVME_IO_TIMEOUT; - - ret = blk_mq_alloc_tag_set(&ctrl->tag_set); - if (ret) - goto out_put_dev; - ctrl->ctrl.tagset = &ctrl->tag_set; - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); if (IS_ERR(ctrl->ctrl.connect_q)) { ret = PTR_ERR(ctrl->ctrl.connect_q); @@ -1805,9 +1826,7 @@ static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) out_cleanup_connect_q: blk_cleanup_queue(ctrl->ctrl.connect_q); out_free_tag_set: - blk_mq_free_tag_set(&ctrl->tag_set); -out_put_dev: - nvme_rdma_dev_put(ctrl->device); + nvme_rdma_free_tagset(&ctrl->ctrl, false); out_free_io_queues: nvme_rdma_free_io_queues(ctrl); return ret; From 18398af2dcca3044c74fd2697f1d5b624b7848fe Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:31 +0300 Subject: [PATCH 11/35] nvme-rdma: disable the controller on resets Mimic the pci driver as a controller disable might be more lightweight than a shutdown. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 3f580c198100..b1e67cd41c81 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1655,7 +1655,7 @@ static const struct blk_mq_ops nvme_rdma_admin_mq_ops = { .timeout = nvme_rdma_timeout, }; -static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) { cancel_work_sync(&ctrl->err_work); cancel_delayed_work_sync(&ctrl->reconnect_work); @@ -1667,8 +1667,10 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl) nvme_rdma_free_io_queues(ctrl); } - if (test_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags)) + if (shutdown) nvme_shutdown_ctrl(&ctrl->ctrl); + else + nvme_disable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); blk_mq_quiesce_queue(ctrl->ctrl.admin_q); blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, @@ -1682,7 +1684,7 @@ static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_stop_ctrl(&ctrl->ctrl); nvme_remove_namespaces(&ctrl->ctrl); if (shutdown) - nvme_rdma_shutdown_ctrl(ctrl); + nvme_rdma_shutdown_ctrl(ctrl, shutdown); nvme_uninit_ctrl(&ctrl->ctrl); if (ctrl->ctrl.tagset) { @@ -1746,7 +1748,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) bool changed; nvme_stop_ctrl(&ctrl->ctrl); - nvme_rdma_shutdown_ctrl(ctrl); + nvme_rdma_shutdown_ctrl(ctrl, false); ret = nvme_rdma_configure_admin_queue(ctrl); if (ret) { From 3f02fffb74ae7486232fef6ca4341c5e9719c759 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:32 +0300 Subject: [PATCH 12/35] nvme-rdma: don't free tagset on resets We're not supposed to do that. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 49 ++++++++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 19 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b1e67cd41c81..b8c07f2f2204 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -712,16 +712,20 @@ out: return ERR_PTR(ret); } -static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl) +static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, + bool remove) { nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); - blk_cleanup_queue(ctrl->ctrl.admin_q); - nvme_rdma_free_tagset(&ctrl->ctrl, true); + if (remove) { + blk_cleanup_queue(ctrl->ctrl.admin_q); + nvme_rdma_free_tagset(&ctrl->ctrl, true); + } } -static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, + bool new) { int error; @@ -734,14 +738,21 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) ctrl->max_fr_pages = min_t(u32, NVME_RDMA_MAX_SEGMENTS, ctrl->device->dev->attrs.max_fast_reg_page_list_len); - ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); - if (IS_ERR(ctrl->ctrl.admin_tagset)) - goto out_free_queue; + if (new) { + ctrl->ctrl.admin_tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); + if (IS_ERR(ctrl->ctrl.admin_tagset)) + goto out_free_queue; - ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); - if (IS_ERR(ctrl->ctrl.admin_q)) { - error = PTR_ERR(ctrl->ctrl.admin_q); - goto out_free_tagset; + ctrl->ctrl.admin_q = blk_mq_init_queue(&ctrl->admin_tag_set); + if (IS_ERR(ctrl->ctrl.admin_q)) { + error = PTR_ERR(ctrl->ctrl.admin_q); + goto out_free_tagset; + } + } else { + error = blk_mq_reinit_tagset(&ctrl->admin_tag_set, + nvme_rdma_reinit_request); + if (error) + goto out_free_queue; } error = nvmf_connect_admin_queue(&ctrl->ctrl); @@ -781,11 +792,11 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl) return 0; out_cleanup_queue: - blk_cleanup_queue(ctrl->ctrl.admin_q); + if (new) + blk_cleanup_queue(ctrl->ctrl.admin_q); out_free_tagset: - /* disconnect and drain the queue before freeing the tagset */ - nvme_rdma_stop_queue(&ctrl->queues[0]); - nvme_rdma_free_tagset(&ctrl->ctrl, true); + if (new) + nvme_rdma_free_tagset(&ctrl->ctrl, true); out_free_queue: nvme_rdma_free_queue(&ctrl->queues[0]); return error; @@ -1676,7 +1687,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) blk_mq_tagset_busy_iter(&ctrl->admin_tag_set, nvme_cancel_request, &ctrl->ctrl); blk_mq_unquiesce_queue(ctrl->ctrl.admin_q); - nvme_rdma_destroy_admin_queue(ctrl); + nvme_rdma_destroy_admin_queue(ctrl, shutdown); } static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) @@ -1750,7 +1761,7 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) nvme_stop_ctrl(&ctrl->ctrl); nvme_rdma_shutdown_ctrl(ctrl, false); - ret = nvme_rdma_configure_admin_queue(ctrl); + ret = nvme_rdma_configure_admin_queue(ctrl, false); if (ret) { /* ctrl is already shutdown, just remove the ctrl */ INIT_WORK(&ctrl->delete_work, nvme_rdma_remove_ctrl_work); @@ -1891,7 +1902,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, if (!ctrl->queues) goto out_uninit_ctrl; - ret = nvme_rdma_configure_admin_queue(ctrl); + ret = nvme_rdma_configure_admin_queue(ctrl, true); if (ret) goto out_kfree_queues; @@ -1948,7 +1959,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, return &ctrl->ctrl; out_remove_admin_queue: - nvme_rdma_destroy_admin_queue(ctrl); + nvme_rdma_destroy_admin_queue(ctrl, true); out_kfree_queues: kfree(ctrl->queues); out_uninit_ctrl: From 31fdf18401703df246e20fc0382ed3594e6cf8d8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 28 Aug 2017 21:40:06 +0200 Subject: [PATCH 13/35] nvme-rdma: reuse configure/destroy_admin_queue No need to open-code it. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b8c07f2f2204..cdf14226bf07 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -857,24 +857,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) goto requeue; } - nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); - - ret = blk_mq_reinit_tagset(&ctrl->admin_tag_set, - nvme_rdma_reinit_request); - if (ret) - goto requeue; - - ret = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); - if (ret) - goto requeue; - - ret = nvmf_connect_admin_queue(&ctrl->ctrl); - if (ret) - goto requeue; - - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - - ret = nvme_enable_ctrl(&ctrl->ctrl, ctrl->ctrl.cap); + nvme_rdma_destroy_admin_queue(ctrl, false); + ret = nvme_rdma_configure_admin_queue(ctrl, false); if (ret) goto requeue; From a57bd54122232b32414748fc8b14634bfd74a7ff Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 28 Aug 2017 21:41:10 +0200 Subject: [PATCH 14/35] nvme-rdma: introduce configure/destroy io queues Make a symmetrical handling with admin queue. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 166 +++++++++++++++++++-------------------- 1 file changed, 83 insertions(+), 83 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index cdf14226bf07..b5fdd2b009c0 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -562,22 +562,20 @@ out_destroy_cm_id: static void nvme_rdma_stop_queue(struct nvme_rdma_queue *queue) { + if (!test_and_clear_bit(NVME_RDMA_Q_LIVE, &queue->flags)) + return; + rdma_disconnect(queue->cm_id); ib_drain_qp(queue->qp); } static void nvme_rdma_free_queue(struct nvme_rdma_queue *queue) -{ - nvme_rdma_destroy_queue_ib(queue); - rdma_destroy_id(queue->cm_id); -} - -static void nvme_rdma_stop_and_free_queue(struct nvme_rdma_queue *queue) { if (test_and_set_bit(NVME_RDMA_Q_DELETING, &queue->flags)) return; - nvme_rdma_stop_queue(queue); - nvme_rdma_free_queue(queue); + + nvme_rdma_destroy_queue_ib(queue); + rdma_destroy_id(queue->cm_id); } static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) @@ -585,7 +583,15 @@ static void nvme_rdma_free_io_queues(struct nvme_rdma_ctrl *ctrl) int i; for (i = 1; i < ctrl->ctrl.queue_count; i++) - nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); + nvme_rdma_free_queue(&ctrl->queues[i]); +} + +static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) +{ + int i; + + for (i = 1; i < ctrl->ctrl.queue_count; i++) + nvme_rdma_stop_queue(&ctrl->queues[i]); } static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) @@ -597,15 +603,15 @@ static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) if (ret) { dev_info(ctrl->ctrl.device, "failed to connect i/o queue: %d\n", ret); - goto out_free_queues; + goto out_stop_queues; } set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); } return 0; -out_free_queues: - nvme_rdma_free_io_queues(ctrl); +out_stop_queues: + nvme_rdma_stop_io_queues(ctrl); return ret; } @@ -641,7 +647,7 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) out_free_queues: for (i--; i >= 1; i--) - nvme_rdma_stop_and_free_queue(&ctrl->queues[i]); + nvme_rdma_free_queue(&ctrl->queues[i]); return ret; } @@ -717,11 +723,12 @@ static void nvme_rdma_destroy_admin_queue(struct nvme_rdma_ctrl *ctrl, { nvme_rdma_free_qe(ctrl->queues[0].device->dev, &ctrl->async_event_sqe, sizeof(struct nvme_command), DMA_TO_DEVICE); - nvme_rdma_stop_and_free_queue(&ctrl->queues[0]); + nvme_rdma_stop_queue(&ctrl->queues[0]); if (remove) { blk_cleanup_queue(ctrl->ctrl.admin_q); nvme_rdma_free_tagset(&ctrl->ctrl, true); } + nvme_rdma_free_queue(&ctrl->queues[0]); } static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, @@ -802,6 +809,62 @@ out_free_queue: return error; } +static void nvme_rdma_destroy_io_queues(struct nvme_rdma_ctrl *ctrl, + bool remove) +{ + nvme_rdma_stop_io_queues(ctrl); + if (remove) { + blk_cleanup_queue(ctrl->ctrl.connect_q); + nvme_rdma_free_tagset(&ctrl->ctrl, false); + } + nvme_rdma_free_io_queues(ctrl); +} + +static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) +{ + int ret; + + ret = nvme_rdma_init_io_queues(ctrl); + if (ret) + return ret; + + if (new) { + ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, false); + if (IS_ERR(ctrl->ctrl.tagset)) + goto out_free_io_queues; + + ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); + if (IS_ERR(ctrl->ctrl.connect_q)) { + ret = PTR_ERR(ctrl->ctrl.connect_q); + goto out_free_tag_set; + } + } else { + ret = blk_mq_reinit_tagset(&ctrl->tag_set, + nvme_rdma_reinit_request); + if (ret) + goto out_free_io_queues; + + blk_mq_update_nr_hw_queues(&ctrl->tag_set, + ctrl->ctrl.queue_count - 1); + } + + ret = nvme_rdma_connect_io_queues(ctrl); + if (ret) + goto out_cleanup_connect_q; + + return 0; + +out_cleanup_connect_q: + if (new) + blk_cleanup_queue(ctrl->ctrl.connect_q); +out_free_tag_set: + if (new) + nvme_rdma_free_tagset(&ctrl->ctrl, false); +out_free_io_queues: + nvme_rdma_free_io_queues(ctrl); + return ret; +} + static void nvme_rdma_free_ctrl(struct nvme_ctrl *nctrl) { struct nvme_rdma_ctrl *ctrl = to_rdma_ctrl(nctrl); @@ -848,14 +911,8 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) ++ctrl->ctrl.nr_reconnects; - if (ctrl->ctrl.queue_count > 1) { - nvme_rdma_free_io_queues(ctrl); - - ret = blk_mq_reinit_tagset(&ctrl->tag_set, - nvme_rdma_reinit_request); - if (ret) - goto requeue; - } + if (ctrl->ctrl.queue_count > 1) + nvme_rdma_destroy_io_queues(ctrl, false); nvme_rdma_destroy_admin_queue(ctrl, false); ret = nvme_rdma_configure_admin_queue(ctrl, false); @@ -863,16 +920,9 @@ static void nvme_rdma_reconnect_ctrl_work(struct work_struct *work) goto requeue; if (ctrl->ctrl.queue_count > 1) { - ret = nvme_rdma_init_io_queues(ctrl); + ret = nvme_rdma_configure_io_queues(ctrl, false); if (ret) goto requeue; - - ret = nvme_rdma_connect_io_queues(ctrl); - if (ret) - goto requeue; - - blk_mq_update_nr_hw_queues(&ctrl->tag_set, - ctrl->ctrl.queue_count - 1); } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -1659,7 +1709,7 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_stop_queues(&ctrl->ctrl); blk_mq_tagset_busy_iter(&ctrl->tag_set, nvme_cancel_request, &ctrl->ctrl); - nvme_rdma_free_io_queues(ctrl); + nvme_rdma_destroy_io_queues(ctrl, shutdown); } if (shutdown) @@ -1682,11 +1732,6 @@ static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_rdma_shutdown_ctrl(ctrl, shutdown); nvme_uninit_ctrl(&ctrl->ctrl); - if (ctrl->ctrl.tagset) { - blk_cleanup_queue(ctrl->ctrl.connect_q); - nvme_rdma_free_tagset(&ctrl->ctrl, false); - } - nvme_put_ctrl(&ctrl->ctrl); } @@ -1753,21 +1798,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) } if (ctrl->ctrl.queue_count > 1) { - ret = blk_mq_reinit_tagset(&ctrl->tag_set, - nvme_rdma_reinit_request); + ret = nvme_rdma_configure_io_queues(ctrl, false); if (ret) goto del_dead_ctrl; - - ret = nvme_rdma_init_io_queues(ctrl); - if (ret) - goto del_dead_ctrl; - - ret = nvme_rdma_connect_io_queues(ctrl); - if (ret) - goto del_dead_ctrl; - - blk_mq_update_nr_hw_queues(&ctrl->tag_set, - ctrl->ctrl.queue_count - 1); } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -1796,39 +1829,6 @@ static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { .get_address = nvmf_get_address, }; -static int nvme_rdma_create_io_queues(struct nvme_rdma_ctrl *ctrl) -{ - int ret; - - ret = nvme_rdma_init_io_queues(ctrl); - if (ret) - return ret; - - ctrl->ctrl.tagset = nvme_rdma_alloc_tagset(&ctrl->ctrl, true); - if (IS_ERR(ctrl->ctrl.tagset)) - goto out_free_io_queues; - - ctrl->ctrl.connect_q = blk_mq_init_queue(&ctrl->tag_set); - if (IS_ERR(ctrl->ctrl.connect_q)) { - ret = PTR_ERR(ctrl->ctrl.connect_q); - goto out_free_tag_set; - } - - ret = nvme_rdma_connect_io_queues(ctrl); - if (ret) - goto out_cleanup_connect_q; - - return 0; - -out_cleanup_connect_q: - blk_cleanup_queue(ctrl->ctrl.connect_q); -out_free_tag_set: - nvme_rdma_free_tagset(&ctrl->ctrl, false); -out_free_io_queues: - nvme_rdma_free_io_queues(ctrl); - return ret; -} - static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, struct nvmf_ctrl_options *opts) { @@ -1921,7 +1921,7 @@ static struct nvme_ctrl *nvme_rdma_create_ctrl(struct device *dev, } if (opts->nr_io_queues) { - ret = nvme_rdma_create_io_queues(ctrl); + ret = nvme_rdma_configure_io_queues(ctrl, true); if (ret) goto out_remove_admin_queue; } From 148b4e7ff31e4bb90cf7851ad1bcd305c292be2c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:35 +0300 Subject: [PATCH 15/35] nvme-rdma: stop queues instead of simply flipping their state If we move the queues from LIVE state, we might as well stop them (drain for rdma). Do it after we stop the request queues to prevent a stray request sneaking in .queue_rq after we stop the queue. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b5fdd2b009c0..34d3ed8b249e 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -945,16 +945,15 @@ static void nvme_rdma_error_recovery_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, err_work); - int i; nvme_stop_ctrl(&ctrl->ctrl); - for (i = 0; i < ctrl->ctrl.queue_count; i++) - clear_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); - - if (ctrl->ctrl.queue_count > 1) + if (ctrl->ctrl.queue_count > 1) { nvme_stop_queues(&ctrl->ctrl); + nvme_rdma_stop_io_queues(ctrl); + } blk_mq_quiesce_queue(ctrl->ctrl.admin_q); + nvme_rdma_stop_queue(&ctrl->queues[0]); /* We must take care of fastfail/requeue all our inflight requests */ if (ctrl->ctrl.queue_count > 1) From 41e8cfa117cee46a732436dd303de4772c60965c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:36 +0300 Subject: [PATCH 16/35] nvme-rdma: rename nvme_rdma_init_queue to nvme_rdma_alloc_queue Give it a name symmetric to nvme_rdma_free_queue. Also pass in the ctrl sqsize+1 and not the opts queue_size. And suppress a superflous failure message. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 34d3ed8b249e..d87874e4902d 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -504,7 +504,7 @@ out_put_dev: return ret; } -static int nvme_rdma_init_queue(struct nvme_rdma_ctrl *ctrl, +static int nvme_rdma_alloc_queue(struct nvme_rdma_ctrl *ctrl, int idx, size_t queue_size) { struct nvme_rdma_queue *queue; @@ -615,7 +615,7 @@ out_stop_queues: return ret; } -static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl) { struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; unsigned int nr_io_queues; @@ -634,13 +634,10 @@ static int nvme_rdma_init_io_queues(struct nvme_rdma_ctrl *ctrl) "creating %d I/O queues.\n", nr_io_queues); for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvme_rdma_init_queue(ctrl, i, - ctrl->ctrl.opts->queue_size); - if (ret) { - dev_info(ctrl->ctrl.device, - "failed to initialize i/o queue: %d\n", ret); + ret = nvme_rdma_alloc_queue(ctrl, i, + ctrl->ctrl.sqsize + 1); + if (ret) goto out_free_queues; - } } return 0; @@ -736,7 +733,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, { int error; - error = nvme_rdma_init_queue(ctrl, 0, NVME_AQ_DEPTH); + error = nvme_rdma_alloc_queue(ctrl, 0, NVME_AQ_DEPTH); if (error) return error; @@ -824,7 +821,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) { int ret; - ret = nvme_rdma_init_io_queues(ctrl); + ret = nvme_rdma_alloc_io_queues(ctrl); if (ret) return ret; From 68e16fcfaf9bbde573e89f783cf1ca60acb49cf5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:37 +0300 Subject: [PATCH 17/35] nvme-rdma: introduce nvme_rdma_start_queue This should pair with nvme_rdma_stop_queue. While this is not a complete inverse, it still pairs up pretty well because in fabrics we don't have a disconnect capsule (yet) but we simply teardown the transport association. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index d87874e4902d..09c4ea8332d7 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -594,24 +594,38 @@ static void nvme_rdma_stop_io_queues(struct nvme_rdma_ctrl *ctrl) nvme_rdma_stop_queue(&ctrl->queues[i]); } -static int nvme_rdma_connect_io_queues(struct nvme_rdma_ctrl *ctrl) +static int nvme_rdma_start_queue(struct nvme_rdma_ctrl *ctrl, int idx) +{ + int ret; + + if (idx) + ret = nvmf_connect_io_queue(&ctrl->ctrl, idx); + else + ret = nvmf_connect_admin_queue(&ctrl->ctrl); + + if (!ret) + set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[idx].flags); + else + dev_info(ctrl->ctrl.device, + "failed to connect queue: %d ret=%d\n", idx, ret); + return ret; +} + +static int nvme_rdma_start_io_queues(struct nvme_rdma_ctrl *ctrl) { int i, ret = 0; for (i = 1; i < ctrl->ctrl.queue_count; i++) { - ret = nvmf_connect_io_queue(&ctrl->ctrl, i); - if (ret) { - dev_info(ctrl->ctrl.device, - "failed to connect i/o queue: %d\n", ret); + ret = nvme_rdma_start_queue(ctrl, i); + if (ret) goto out_stop_queues; - } - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[i].flags); } return 0; out_stop_queues: - nvme_rdma_stop_io_queues(ctrl); + for (i--; i >= 1; i--) + nvme_rdma_stop_queue(&ctrl->queues[i]); return ret; } @@ -759,12 +773,10 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, goto out_free_queue; } - error = nvmf_connect_admin_queue(&ctrl->ctrl); + error = nvme_rdma_start_queue(ctrl, 0); if (error) goto out_cleanup_queue; - set_bit(NVME_RDMA_Q_LIVE, &ctrl->queues[0].flags); - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (error) { @@ -845,7 +857,7 @@ static int nvme_rdma_configure_io_queues(struct nvme_rdma_ctrl *ctrl, bool new) ctrl->ctrl.queue_count - 1); } - ret = nvme_rdma_connect_io_queues(ctrl); + ret = nvme_rdma_start_io_queues(ctrl); if (ret) goto out_cleanup_connect_q; From 370ae6e450260c8bd3e1ea75c1390f746d3c71ee Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:38 +0300 Subject: [PATCH 18/35] nvme-rdma: cleanup error path in controller reset No need to queue an extra work to indirect controller removal, just call the ctrl remove routine. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 32 +++++++++----------------------- 1 file changed, 9 insertions(+), 23 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 09c4ea8332d7..1bf7c3a55c96 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1732,13 +1732,10 @@ static void nvme_rdma_shutdown_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) nvme_rdma_destroy_admin_queue(ctrl, shutdown); } -static void __nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl, bool shutdown) +static void nvme_rdma_remove_ctrl(struct nvme_rdma_ctrl *ctrl) { - nvme_stop_ctrl(&ctrl->ctrl); nvme_remove_namespaces(&ctrl->ctrl); - if (shutdown) - nvme_rdma_shutdown_ctrl(ctrl, shutdown); - + nvme_rdma_shutdown_ctrl(ctrl, true); nvme_uninit_ctrl(&ctrl->ctrl); nvme_put_ctrl(&ctrl->ctrl); } @@ -1748,7 +1745,8 @@ static void nvme_rdma_del_ctrl_work(struct work_struct *work) struct nvme_rdma_ctrl *ctrl = container_of(work, struct nvme_rdma_ctrl, delete_work); - __nvme_rdma_remove_ctrl(ctrl, true); + nvme_stop_ctrl(&ctrl->ctrl); + nvme_rdma_remove_ctrl(ctrl); } static int __nvme_rdma_del_ctrl(struct nvme_rdma_ctrl *ctrl) @@ -1780,14 +1778,6 @@ static int nvme_rdma_del_ctrl(struct nvme_ctrl *nctrl) return ret; } -static void nvme_rdma_remove_ctrl_work(struct work_struct *work) -{ - struct nvme_rdma_ctrl *ctrl = container_of(work, - struct nvme_rdma_ctrl, delete_work); - - __nvme_rdma_remove_ctrl(ctrl, false); -} - static void nvme_rdma_reset_ctrl_work(struct work_struct *work) { struct nvme_rdma_ctrl *ctrl = @@ -1799,16 +1789,13 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) nvme_rdma_shutdown_ctrl(ctrl, false); ret = nvme_rdma_configure_admin_queue(ctrl, false); - if (ret) { - /* ctrl is already shutdown, just remove the ctrl */ - INIT_WORK(&ctrl->delete_work, nvme_rdma_remove_ctrl_work); - goto del_dead_ctrl; - } + if (ret) + goto out_fail; if (ctrl->ctrl.queue_count > 1) { ret = nvme_rdma_configure_io_queues(ctrl, false); if (ret) - goto del_dead_ctrl; + goto out_fail; } changed = nvme_change_ctrl_state(&ctrl->ctrl, NVME_CTRL_LIVE); @@ -1818,10 +1805,9 @@ static void nvme_rdma_reset_ctrl_work(struct work_struct *work) return; -del_dead_ctrl: - /* Deleting this dead controller... */ +out_fail: dev_warn(ctrl->ctrl.device, "Removing after reset failure\n"); - WARN_ON(!queue_work(nvme_wq, &ctrl->delete_work)); + nvme_rdma_remove_ctrl(ctrl); } static const struct nvme_ctrl_ops nvme_rdma_ctrl_ops = { From 09fdc23b29618127709711606d4fa439a6839852 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 10 Jul 2017 09:22:39 +0300 Subject: [PATCH 19/35] nvme-rdma: call ops->reg_read64 instead of nvmf_reg_read64 To make the nvme_rdma_configure_admin_queue generic in preparation of moving it to common code. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index 1bf7c3a55c96..b51e7df63df5 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -777,7 +777,7 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl, if (error) goto out_cleanup_queue; - error = nvmf_reg_read64(&ctrl->ctrl, NVME_REG_CAP, + error = ctrl->ctrl.ops->reg_read64(&ctrl->ctrl, NVME_REG_CAP, &ctrl->ctrl.cap); if (error) { dev_err(ctrl->ctrl.device, From 9b483da15dd0c94b62ae4e789b37dfff4410f45e Mon Sep 17 00:00:00 2001 From: Guan Junxiong Date: Thu, 3 Aug 2017 21:40:26 +0800 Subject: [PATCH 20/35] nvme-fabrics: log a warning if hostid is invalid This helps users to quickly spot the reason of why connection fails if the hostid is not compliant with the uuid format. Signed-off-by: Guan Junxiong Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fabrics.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/nvme/host/fabrics.c b/drivers/nvme/host/fabrics.c index 5f5cd306f76d..fc3b6552f467 100644 --- a/drivers/nvme/host/fabrics.c +++ b/drivers/nvme/host/fabrics.c @@ -735,6 +735,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts, goto out; } if (uuid_parse(p, &hostid)) { + pr_err("Invalid hostid %s\n", p); ret = -EINVAL; goto out; } From caaa15c5097d58545075a8bbdf208078b87d5f28 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 15 Aug 2017 12:24:05 +0300 Subject: [PATCH 21/35] nvme: fix identify namespace logging Use ctrl->device and lose the func name. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 0b979e16655e..b2daeafbeadd 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1167,7 +1167,7 @@ static void nvme_config_discard(struct nvme_ns *ns) static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) { if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) { - dev_warn(ns->ctrl->dev, "%s: Identify failure\n", __func__); + dev_warn(ns->ctrl->device, "Identify namespace failed\n"); return -ENODEV; } From ad4e05b24c428d6125f6f10bd300600cae5079d4 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 13 Aug 2017 19:21:06 +0300 Subject: [PATCH 22/35] nvme: add symbolic constants for CC identifiers Signed-off-by: Max Gurtovoy Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/core.c | 14 +++++++------- include/linux/nvme.h | 24 +++++++++++++++--------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index ea7eb00dcb87..7c23eaf8e563 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -538,37 +538,37 @@ EXPORT_SYMBOL_GPL(nvmet_req_uninit); static inline bool nvmet_cc_en(u32 cc) { - return cc & 0x1; + return (cc >> NVME_CC_EN_SHIFT) & 0x1; } static inline u8 nvmet_cc_css(u32 cc) { - return (cc >> 4) & 0x7; + return (cc >> NVME_CC_CSS_SHIFT) & 0x7; } static inline u8 nvmet_cc_mps(u32 cc) { - return (cc >> 7) & 0xf; + return (cc >> NVME_CC_MPS_SHIFT) & 0xf; } static inline u8 nvmet_cc_ams(u32 cc) { - return (cc >> 11) & 0x7; + return (cc >> NVME_CC_AMS_SHIFT) & 0x7; } static inline u8 nvmet_cc_shn(u32 cc) { - return (cc >> 14) & 0x3; + return (cc >> NVME_CC_SHN_SHIFT) & 0x3; } static inline u8 nvmet_cc_iosqes(u32 cc) { - return (cc >> 16) & 0xf; + return (cc >> NVME_CC_IOSQES_SHIFT) & 0xf; } static inline u8 nvmet_cc_iocqes(u32 cc) { - return (cc >> 20) & 0xf; + return (cc >> NVME_CC_IOCQES_SHIFT) & 0xf; } static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl) diff --git a/include/linux/nvme.h b/include/linux/nvme.h index a12b47073273..7b4322dc2975 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -135,16 +135,22 @@ enum { enum { NVME_CC_ENABLE = 1 << 0, NVME_CC_CSS_NVM = 0 << 4, + NVME_CC_EN_SHIFT = 0, + NVME_CC_CSS_SHIFT = 4, NVME_CC_MPS_SHIFT = 7, - NVME_CC_ARB_RR = 0 << 11, - NVME_CC_ARB_WRRU = 1 << 11, - NVME_CC_ARB_VS = 7 << 11, - NVME_CC_SHN_NONE = 0 << 14, - NVME_CC_SHN_NORMAL = 1 << 14, - NVME_CC_SHN_ABRUPT = 2 << 14, - NVME_CC_SHN_MASK = 3 << 14, - NVME_CC_IOSQES = NVME_NVM_IOSQES << 16, - NVME_CC_IOCQES = NVME_NVM_IOCQES << 20, + NVME_CC_AMS_SHIFT = 11, + NVME_CC_SHN_SHIFT = 14, + NVME_CC_IOSQES_SHIFT = 16, + NVME_CC_IOCQES_SHIFT = 20, + NVME_CC_ARB_RR = 0 << NVME_CC_AMS_SHIFT, + NVME_CC_ARB_WRRU = 1 << NVME_CC_AMS_SHIFT, + NVME_CC_ARB_VS = 7 << NVME_CC_AMS_SHIFT, + NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, + NVME_CC_SHN_MASK = 3 << NVME_CC_SHN_SHIFT, + NVME_CC_IOSQES = NVME_NVM_IOSQES << NVME_CC_IOSQES_SHIFT, + NVME_CC_IOCQES = NVME_NVM_IOCQES << NVME_CC_IOCQES_SHIFT, NVME_CSTS_RDY = 1 << 0, NVME_CSTS_CFS = 1 << 1, NVME_CSTS_NSSRO = 1 << 4, From 60b43f627a71aaf233ef5af90f72e207c29781b4 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Sun, 13 Aug 2017 19:21:07 +0300 Subject: [PATCH 23/35] nvme: rename AMS symbolic constants to fit specification Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 2 +- include/linux/nvme.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b2daeafbeadd..1db8de0bee87 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1445,7 +1445,7 @@ int nvme_enable_ctrl(struct nvme_ctrl *ctrl, u64 cap) ctrl->ctrl_config = NVME_CC_CSS_NVM; ctrl->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT; - ctrl->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE; + ctrl->ctrl_config |= NVME_CC_AMS_RR | NVME_CC_SHN_NONE; ctrl->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES; ctrl->ctrl_config |= NVME_CC_ENABLE; diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7b4322dc2975..05560c2ecc83 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -142,9 +142,9 @@ enum { NVME_CC_SHN_SHIFT = 14, NVME_CC_IOSQES_SHIFT = 16, NVME_CC_IOCQES_SHIFT = 20, - NVME_CC_ARB_RR = 0 << NVME_CC_AMS_SHIFT, - NVME_CC_ARB_WRRU = 1 << NVME_CC_AMS_SHIFT, - NVME_CC_ARB_VS = 7 << NVME_CC_AMS_SHIFT, + NVME_CC_AMS_RR = 0 << NVME_CC_AMS_SHIFT, + NVME_CC_AMS_WRRU = 1 << NVME_CC_AMS_SHIFT, + NVME_CC_AMS_VS = 7 << NVME_CC_AMS_SHIFT, NVME_CC_SHN_NONE = 0 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_NORMAL = 1 << NVME_CC_SHN_SHIFT, NVME_CC_SHN_ABRUPT = 2 << NVME_CC_SHN_SHIFT, From 5533d42480d6ced6765401c55a3622b4c437d7eb Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 31 Jul 2017 13:20:30 -0700 Subject: [PATCH 24/35] nvme-fc: Reattach to localports on re-registration If the LLDD resets or detaches from an fc port, the LLDD will deregister all remoteports seen by the fc port and deregister the localport associated with the fc port. The teardown of the localport structure will be held off due to reference counting until all the remoteports are removed (and they are held off until all controllers/associations to terminated). Currently, if the fc port is reinit/reattached and registered again as a localport it is treated as an independent entity from the prior localport and all prior remoteports and controllers cannot be revived. They are created as new and separate entities. This patch changes the localport registration to look at the known localports that are waiting to be torndown. If they are the same port based on wwn's, the local port is transitioned out of the teardown state. This allows the remote ports and controller connections to be reestablished and resumed as long as the localport can also be reregistered within the timeout windows. The patch adds a new routine nvme_fc_attach_to_unreg_lport() with the functionality and moves the lport get/put routines to avoid forward references. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/host/fc.c | 144 ++++++++++++++++++++++++++++++----------- 1 file changed, 106 insertions(+), 38 deletions(-) diff --git a/drivers/nvme/host/fc.c b/drivers/nvme/host/fc.c index 1912df412692..d2e882c0f496 100644 --- a/drivers/nvme/host/fc.c +++ b/drivers/nvme/host/fc.c @@ -220,6 +220,90 @@ static int __nvme_fc_del_ctrl(struct nvme_fc_ctrl *); static void __nvme_fc_delete_hw_queue(struct nvme_fc_ctrl *, struct nvme_fc_queue *, unsigned int); +static void +nvme_fc_free_lport(struct kref *ref) +{ + struct nvme_fc_lport *lport = + container_of(ref, struct nvme_fc_lport, ref); + unsigned long flags; + + WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); + WARN_ON(!list_empty(&lport->endp_list)); + + /* remove from transport list */ + spin_lock_irqsave(&nvme_fc_lock, flags); + list_del(&lport->port_list); + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + /* let the LLDD know we've finished tearing it down */ + lport->ops->localport_delete(&lport->localport); + + ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); + ida_destroy(&lport->endp_cnt); + + put_device(lport->dev); + + kfree(lport); +} + +static void +nvme_fc_lport_put(struct nvme_fc_lport *lport) +{ + kref_put(&lport->ref, nvme_fc_free_lport); +} + +static int +nvme_fc_lport_get(struct nvme_fc_lport *lport) +{ + return kref_get_unless_zero(&lport->ref); +} + + +static struct nvme_fc_lport * +nvme_fc_attach_to_unreg_lport(struct nvme_fc_port_info *pinfo) +{ + struct nvme_fc_lport *lport; + unsigned long flags; + + spin_lock_irqsave(&nvme_fc_lock, flags); + + list_for_each_entry(lport, &nvme_fc_lport_list, port_list) { + if (lport->localport.node_name != pinfo->node_name || + lport->localport.port_name != pinfo->port_name) + continue; + + if (lport->localport.port_state != FC_OBJSTATE_DELETED) { + lport = ERR_PTR(-EEXIST); + goto out_done; + } + + if (!nvme_fc_lport_get(lport)) { + /* + * fails if ref cnt already 0. If so, + * act as if lport already deleted + */ + lport = NULL; + goto out_done; + } + + /* resume the lport */ + + lport->localport.port_role = pinfo->port_role; + lport->localport.port_id = pinfo->port_id; + lport->localport.port_state = FC_OBJSTATE_ONLINE; + + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return lport; + } + + lport = NULL; + +out_done: + spin_unlock_irqrestore(&nvme_fc_lock, flags); + + return lport; +} /** * nvme_fc_register_localport - transport entry point called by an @@ -257,6 +341,28 @@ nvme_fc_register_localport(struct nvme_fc_port_info *pinfo, goto out_reghost_failed; } + /* + * look to see if there is already a localport that had been + * deregistered and in the process of waiting for all the + * references to fully be removed. If the references haven't + * expired, we can simply re-enable the localport. Remoteports + * and controller reconnections should resume naturally. + */ + newrec = nvme_fc_attach_to_unreg_lport(pinfo); + + /* found an lport, but something about its state is bad */ + if (IS_ERR(newrec)) { + ret = PTR_ERR(newrec); + goto out_reghost_failed; + + /* found existing lport, which was resumed */ + } else if (newrec) { + *portptr = &newrec->localport; + return 0; + } + + /* nothing found - allocate a new localport struct */ + newrec = kmalloc((sizeof(*newrec) + template->local_priv_sz), GFP_KERNEL); if (!newrec) { @@ -310,44 +416,6 @@ out_reghost_failed: } EXPORT_SYMBOL_GPL(nvme_fc_register_localport); -static void -nvme_fc_free_lport(struct kref *ref) -{ - struct nvme_fc_lport *lport = - container_of(ref, struct nvme_fc_lport, ref); - unsigned long flags; - - WARN_ON(lport->localport.port_state != FC_OBJSTATE_DELETED); - WARN_ON(!list_empty(&lport->endp_list)); - - /* remove from transport list */ - spin_lock_irqsave(&nvme_fc_lock, flags); - list_del(&lport->port_list); - spin_unlock_irqrestore(&nvme_fc_lock, flags); - - /* let the LLDD know we've finished tearing it down */ - lport->ops->localport_delete(&lport->localport); - - ida_simple_remove(&nvme_fc_local_port_cnt, lport->localport.port_num); - ida_destroy(&lport->endp_cnt); - - put_device(lport->dev); - - kfree(lport); -} - -static void -nvme_fc_lport_put(struct nvme_fc_lport *lport) -{ - kref_put(&lport->ref, nvme_fc_free_lport); -} - -static int -nvme_fc_lport_get(struct nvme_fc_lport *lport) -{ - return kref_get_unless_zero(&lport->ref); -} - /** * nvme_fc_unregister_localport - transport entry point called by an * LLDD to deregister/remove a previously From 48fa362b6c3f4d69bdb6310b46626049092475e0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 31 Jul 2017 13:21:14 -0700 Subject: [PATCH 25/35] nvmet-fc: simplify sg list handling The existing nvmet_fc sg list handling has 2 faults: a) the request between LLDD and transport has too large of an sg list (256 elements), which is normally 256k (64 elements). b) sglist handling doesn't optimize on the fact that each element is a page. This patch removes the static sg list in the request and uses the dynamic list already present in the nvmet_fc transport. It also simplies the handling of the sg list on multiple sequences to take advantage of the per-page divisions. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- drivers/nvme/target/fc.c | 48 +++++++--------------------------- include/linux/nvme-fc-driver.h | 2 +- 2 files changed, 11 insertions(+), 39 deletions(-) diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 309c84aa7595..421e43bf1dd7 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -58,7 +58,8 @@ struct nvmet_fc_ls_iod { struct work_struct work; } __aligned(sizeof(unsigned long long)); -#define NVMET_FC_MAX_KB_PER_XFR 256 +#define NVMET_FC_MAX_SEQ_LENGTH (256 * 1024) +#define NVMET_FC_MAX_XFR_SGENTS (NVMET_FC_MAX_SEQ_LENGTH / PAGE_SIZE) enum nvmet_fcp_datadir { NVMET_FCP_NODATA, @@ -74,9 +75,7 @@ struct nvmet_fc_fcp_iod { struct nvme_fc_ersp_iu rspiubuf; dma_addr_t rspdma; struct scatterlist *data_sg; - struct scatterlist *next_sg; int data_sg_cnt; - u32 next_sg_offset; u32 total_length; u32 offset; enum nvmet_fcp_datadir io_dir; @@ -112,6 +111,7 @@ struct nvmet_fc_tgtport { struct ida assoc_cnt; struct nvmet_port *port; struct kref ref; + u32 max_sg_cnt; }; struct nvmet_fc_defer_fcp_req { @@ -994,6 +994,8 @@ nvmet_fc_register_targetport(struct nvmet_fc_port_info *pinfo, INIT_LIST_HEAD(&newrec->assoc_list); kref_init(&newrec->ref); ida_init(&newrec->assoc_cnt); + newrec->max_sg_cnt = min_t(u32, NVMET_FC_MAX_XFR_SGENTS, + template->max_sgl_segments); ret = nvmet_fc_alloc_ls_iodlist(newrec); if (ret) { @@ -1866,51 +1868,23 @@ nvmet_fc_transfer_fcp_data(struct nvmet_fc_tgtport *tgtport, struct nvmet_fc_fcp_iod *fod, u8 op) { struct nvmefc_tgt_fcp_req *fcpreq = fod->fcpreq; - struct scatterlist *sg, *datasg; unsigned long flags; - u32 tlen, sg_off; + u32 tlen; int ret; fcpreq->op = op; fcpreq->offset = fod->offset; fcpreq->timeout = NVME_FC_TGTOP_TIMEOUT_SEC; - tlen = min_t(u32, (NVMET_FC_MAX_KB_PER_XFR * 1024), + + tlen = min_t(u32, tgtport->max_sg_cnt * PAGE_SIZE, (fod->total_length - fod->offset)); - tlen = min_t(u32, tlen, NVME_FC_MAX_SEGMENTS * PAGE_SIZE); - tlen = min_t(u32, tlen, fod->tgtport->ops->max_sgl_segments - * PAGE_SIZE); fcpreq->transfer_length = tlen; fcpreq->transferred_length = 0; fcpreq->fcp_error = 0; fcpreq->rsplen = 0; - fcpreq->sg_cnt = 0; - - datasg = fod->next_sg; - sg_off = fod->next_sg_offset; - - for (sg = fcpreq->sg ; tlen; sg++) { - *sg = *datasg; - if (sg_off) { - sg->offset += sg_off; - sg->length -= sg_off; - sg->dma_address += sg_off; - sg_off = 0; - } - if (tlen < sg->length) { - sg->length = tlen; - fod->next_sg = datasg; - fod->next_sg_offset += tlen; - } else if (tlen == sg->length) { - fod->next_sg_offset = 0; - fod->next_sg = sg_next(datasg); - } else { - fod->next_sg_offset = 0; - datasg = sg_next(datasg); - } - tlen -= sg->length; - fcpreq->sg_cnt++; - } + fcpreq->sg = &fod->data_sg[fod->offset / PAGE_SIZE]; + fcpreq->sg_cnt = DIV_ROUND_UP(tlen, PAGE_SIZE); /* * If the last READDATA request: check if LLDD supports @@ -2225,8 +2199,6 @@ nvmet_fc_handle_fcp_rqst(struct nvmet_fc_tgtport *tgtport, fod->req.sg = fod->data_sg; fod->req.sg_cnt = fod->data_sg_cnt; fod->offset = 0; - fod->next_sg = fod->data_sg; - fod->next_sg_offset = 0; if (fod->io_dir == NVMET_FCP_WRITE) { /* pull the data over before invoking nvmet layer */ diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 2591878c1d48..9c5cb4480806 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -624,7 +624,7 @@ struct nvmefc_tgt_fcp_req { u32 timeout; u32 transfer_length; struct fc_ba_rjt ba_rjt; - struct scatterlist sg[NVME_FC_MAX_SEGMENTS]; + struct scatterlist *sg; int sg_cnt; void *rspaddr; dma_addr_t rspdma; From 01f33c336e2d298ea5d4ce5d6e5bcd12865cc30f Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 14 Aug 2017 22:12:38 +0200 Subject: [PATCH 26/35] string.h: add memcpy_and_pad() This helper function is useful for the nvme subsystem, and maybe others. Note: the warnings reported by the kbuild test robot for this patch are actually generated by the use of CONFIG_PROFILE_ALL_BRANCHES together with __FORTIFY_INLINE. Signed-off-by: Martin Wilck Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/string.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/include/linux/string.h b/include/linux/string.h index a467e617eeb0..0bec4151b0eb 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -200,6 +200,7 @@ static inline const char *kbasename(const char *path) void fortify_panic(const char *name) __noreturn __cold; void __read_overflow(void) __compiletime_error("detected read beyond size of object passed as 1st parameter"); void __read_overflow2(void) __compiletime_error("detected read beyond size of object passed as 2nd parameter"); +void __read_overflow3(void) __compiletime_error("detected read beyond size of object passed as 3rd parameter"); void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter"); #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE) @@ -395,4 +396,33 @@ __FORTIFY_INLINE char *strcpy(char *p, const char *q) #endif +/** + * memcpy_and_pad - Copy one buffer to another with padding + * @dest: Where to copy to + * @dest_len: The destination buffer size + * @src: Where to copy from + * @count: The number of bytes to copy + * @pad: Character to use for padding if space is left in destination. + */ +__FORTIFY_INLINE void memcpy_and_pad(void *dest, size_t dest_len, + const void *src, size_t count, int pad) +{ + size_t dest_size = __builtin_object_size(dest, 0); + size_t src_size = __builtin_object_size(src, 0); + + if (__builtin_constant_p(dest_len) && __builtin_constant_p(count)) { + if (dest_size < dest_len && dest_size < count) + __write_overflow(); + else if (src_size < dest_len && src_size < count) + __read_overflow3(); + } + if (dest_size < dest_len) + fortify_panic(__func__); + if (dest_len > count) { + memcpy(dest, src, count); + memset(dest + count, pad, dest_len - count); + } else + memcpy(dest, src, dest_len); +} + #endif /* _LINUX_STRING_H_ */ From 17c39d053a46b300fee786857458857086a4844e Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Mon, 14 Aug 2017 22:12:39 +0200 Subject: [PATCH 27/35] nvmet: use memcpy_and_pad for identify sn/fr This changes the earlier patch "nvmet: don't report 0-bytes in serial number" to use the memcpy_and_pad() helper introduced in a previous patch. Signed-off-by: Martin Wilck Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- drivers/nvme/target/admin-cmd.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index dffd8064163b..9496c71d2257 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -168,15 +168,6 @@ out: nvmet_req_complete(req, status); } -static void copy_and_pad(char *dst, int dst_len, const char *src, int src_len) -{ - int len = min(src_len, dst_len); - - memcpy(dst, src, len); - if (dst_len > len) - memset(dst + len, ' ', dst_len - len); -} - static void nvmet_execute_identify_ctrl(struct nvmet_req *req) { struct nvmet_ctrl *ctrl = req->sq->ctrl; @@ -196,8 +187,9 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req) bin2hex(id->sn, &ctrl->subsys->serial, min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2)); - copy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1); - copy_and_pad(id->fr, sizeof(id->fr), UTS_RELEASE, strlen(UTS_RELEASE)); + memcpy_and_pad(id->mn, sizeof(id->mn), model, sizeof(model) - 1, ' '); + memcpy_and_pad(id->fr, sizeof(id->fr), + UTS_RELEASE, strlen(UTS_RELEASE), ' '); id->rab = 6; From a7b7c7a105a528e6c2a0a2581b814a5acacb4c38 Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Mon, 14 Aug 2017 15:29:26 +0300 Subject: [PATCH 28/35] nvme-rdma: Use unlikely macro in the fast path This patch slightly improves performance (mainly for small block sizes). Signed-off-by: Max Gurtovoy Signed-off-by: Christoph Hellwig --- drivers/nvme/host/rdma.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b51e7df63df5..6a7682620d87 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -1047,7 +1047,7 @@ static void nvme_rdma_unmap_data(struct nvme_rdma_queue *queue, if (req->mr->need_inval) { res = nvme_rdma_inv_rkey(queue, req); - if (res < 0) { + if (unlikely(res < 0)) { dev_err(ctrl->ctrl.device, "Queueing INV WR for rkey %#x failed (%d)\n", req->mr->rkey, res); @@ -1112,7 +1112,7 @@ static int nvme_rdma_map_sg_fr(struct nvme_rdma_queue *queue, int nr; nr = ib_map_mr_sg(req->mr, req->sg_table.sgl, count, NULL, PAGE_SIZE); - if (nr < count) { + if (unlikely(nr < count)) { if (nr < 0) return nr; return -EINVAL; @@ -1248,7 +1248,7 @@ static int nvme_rdma_post_send(struct nvme_rdma_queue *queue, first = ≀ ret = ib_post_send(queue->qp, first, &bad_wr); - if (ret) { + if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); } @@ -1274,7 +1274,7 @@ static int nvme_rdma_post_recv(struct nvme_rdma_queue *queue, wr.num_sge = 1; ret = ib_post_recv(queue->qp, &wr, &bad_wr); - if (ret) { + if (unlikely(ret)) { dev_err(queue->ctrl->ctrl.device, "%s failed with error code %d\n", __func__, ret); } @@ -1634,7 +1634,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, blk_mq_start_request(rq); err = nvme_rdma_map_data(queue, rq, c); - if (err < 0) { + if (unlikely(err < 0)) { dev_err(queue->ctrl->ctrl.device, "Failed to map data (%d)\n", err); nvme_cleanup_cmd(rq); @@ -1648,7 +1648,7 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx, flush = true; err = nvme_rdma_post_send(queue, sqe, req->sge, req->num_sge, req->mr->need_inval ? &req->reg_wr.wr : NULL, flush); - if (err) { + if (unlikely(err)) { nvme_rdma_unmap_data(queue, rq); goto err; } From 5228b3280b9bb8fa6aef59f891cca64a028e9b36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Sun, 27 Aug 2017 15:56:37 +0200 Subject: [PATCH 29/35] nvme: fix uninitialized prp2 value on small transfers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The value of iod->first_dma ends up as prp2 in NVMe commands. In case there is not enough data to cross a page boundary, iod->first_dma is never initialized and contains random data. Comply with the NVMe specification and fill in 0 in that case. Signed-off-by: Jan H. Schönherr Signed-off-by: Christoph Hellwig --- drivers/nvme/host/pci.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index e6283745ecd2..544805a2421b 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -555,8 +555,10 @@ static blk_status_t nvme_setup_prps(struct nvme_dev *dev, struct request *req) int nprps, i; length -= (page_size - offset); - if (length <= 0) + if (length <= 0) { + iod->first_dma = 0; return BLK_STS_OK; + } dma_len -= (page_size - offset); if (dma_len) { From 07fbd32a6b215d8b2fc01ccc89622207b9b782fd Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 25 Aug 2017 19:14:50 -0400 Subject: [PATCH 30/35] nvme: honor RTD3 Entry Latency for shutdowns If an NVMe controller reports RTD3 Entry Latency larger than shutdown_timeout, up to a maximum of 60 seconds, use that value to set the shutdown timer. Otherwise fall back to the module parameter which defaults to 5 seconds. Signed-off-by: Martin K. Petersen Reviewed-by: Sagi Grimberg [hch: removed do_div, made transition time local scope] Signed-off-by: Christoph Hellwig --- drivers/nvme/host/core.c | 16 +++++++++++++++- drivers/nvme/host/nvme.h | 1 + 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1db8de0bee87..745058905da6 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1458,7 +1458,7 @@ EXPORT_SYMBOL_GPL(nvme_enable_ctrl); int nvme_shutdown_ctrl(struct nvme_ctrl *ctrl) { - unsigned long timeout = jiffies + (shutdown_timeout * HZ); + unsigned long timeout = jiffies + (ctrl->shutdown_timeout * HZ); u32 csts; int ret; @@ -1826,6 +1826,20 @@ int nvme_init_identify(struct nvme_ctrl *ctrl) ctrl->sgls = le32_to_cpu(id->sgls); ctrl->kas = le16_to_cpu(id->kas); + if (id->rtd3e) { + /* us -> s */ + u32 transition_time = le32_to_cpu(id->rtd3e) / 1000000; + + ctrl->shutdown_timeout = clamp_t(unsigned int, transition_time, + shutdown_timeout, 60); + + if (ctrl->shutdown_timeout != shutdown_timeout) + dev_warn(ctrl->device, + "Shutdown timeout set to %u seconds\n", + ctrl->shutdown_timeout); + } else + ctrl->shutdown_timeout = shutdown_timeout; + ctrl->npss = id->npss; ctrl->apsta = id->apsta; prev_apst_enabled = ctrl->apst_enabled; diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 2c8a02be46fd..9b959ee18cb6 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -162,6 +162,7 @@ struct nvme_ctrl { u16 kas; u8 npss; u8 apsta; + unsigned int shutdown_timeout; unsigned int kato; bool subsystem; unsigned long quirks; From a751da33945a2c94a0449d1005c3c973e5acfefb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Aug 2017 10:17:03 +0200 Subject: [PATCH 31/35] nvme: report more detailed status codes to the block layer Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 745058905da6..4c49ec4349bc 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -113,7 +113,16 @@ static blk_status_t nvme_error_status(struct request *req) case NVME_SC_WRITE_FAULT: case NVME_SC_READ_ERROR: case NVME_SC_UNWRITTEN_BLOCK: + case NVME_SC_ACCESS_DENIED: + case NVME_SC_READ_ONLY: return BLK_STS_MEDIUM; + case NVME_SC_GUARD_CHECK: + case NVME_SC_APPTAG_CHECK: + case NVME_SC_REFTAG_CHECK: + case NVME_SC_INVALID_PI: + return BLK_STS_PROTECTION; + case NVME_SC_RESERVATION_CONFLICT: + return BLK_STS_NEXUS; default: return BLK_STS_IOERR; } From 0a72bbba493bebd53fa78e6741b86a3003f070d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 22 Aug 2017 11:42:24 +0200 Subject: [PATCH 32/35] nvme: allow calling nvme_change_ctrl_state from irq context Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 4c49ec4349bc..d63e1fcf4437 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -176,9 +176,10 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, enum nvme_ctrl_state new_state) { enum nvme_ctrl_state old_state; + unsigned long flags; bool changed = false; - spin_lock_irq(&ctrl->lock); + spin_lock_irqsave(&ctrl->lock, flags); old_state = ctrl->state; switch (new_state) { @@ -239,7 +240,7 @@ bool nvme_change_ctrl_state(struct nvme_ctrl *ctrl, if (changed) ctrl->state = new_state; - spin_unlock_irq(&ctrl->lock); + spin_unlock_irqrestore(&ctrl->lock, flags); return changed; } From 57eeaf8ec67d5c5dee0ee9bb5b19866b8eef780d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Aug 2017 15:47:37 +0200 Subject: [PATCH 33/35] nvme: remove unused struct nvme_ns fields And move the flags for the flags field near that field while touching this area. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/nvme.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 9b959ee18cb6..936c4056d98e 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -211,13 +211,9 @@ struct nvme_ns { bool ext; u8 pi_type; unsigned long flags; - u16 noiob; - #define NVME_NS_REMOVING 0 #define NVME_NS_DEAD 1 - - u64 mode_select_num_blocks; - u32 mode_select_block_len; + u16 noiob; }; struct nvme_ctrl_ops { From cdbff4f26bd9fab11bfac22097f836892a5c3612 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 16 Aug 2017 16:14:47 +0200 Subject: [PATCH 34/35] nvme: remove nvme_revalidate_ns The function is used in two places, and the shared code for those will diverge later in this series. Instead factor out a new helper to get the ids for a namespace, simplify the calling conventions for nvme_identify_ns and just open code the sequence. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg --- drivers/nvme/host/core.c | 100 +++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 47 deletions(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index d63e1fcf4437..b87cf3a6e9ac 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -783,7 +783,8 @@ static int nvme_identify_ctrl(struct nvme_ctrl *dev, struct nvme_id_ctrl **id) return error; } -static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) +static int nvme_identify_ns_descs(struct nvme_ctrl *ctrl, unsigned nsid, + u8 *eui64, u8 *nguid, uuid_t *uuid) { struct nvme_command c = { }; int status; @@ -799,7 +800,7 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) if (!data) return -ENOMEM; - status = nvme_submit_sync_cmd(ns->ctrl->admin_q, &c, data, + status = nvme_submit_sync_cmd(ctrl->admin_q, &c, data, NVME_IDENTIFY_DATA_SIZE); if (status) goto free_data; @@ -813,33 +814,33 @@ static int nvme_identify_ns_descs(struct nvme_ns *ns, unsigned nsid) switch (cur->nidt) { case NVME_NIDT_EUI64: if (cur->nidl != NVME_NIDT_EUI64_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_EUI64\n", cur->nidl); goto free_data; } len = NVME_NIDT_EUI64_LEN; - memcpy(ns->eui, data + pos + sizeof(*cur), len); + memcpy(eui64, data + pos + sizeof(*cur), len); break; case NVME_NIDT_NGUID: if (cur->nidl != NVME_NIDT_NGUID_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_NGUID\n", cur->nidl); goto free_data; } len = NVME_NIDT_NGUID_LEN; - memcpy(ns->nguid, data + pos + sizeof(*cur), len); + memcpy(nguid, data + pos + sizeof(*cur), len); break; case NVME_NIDT_UUID: if (cur->nidl != NVME_NIDT_UUID_LEN) { - dev_warn(ns->ctrl->device, + dev_warn(ctrl->device, "ctrl returned bogus length: %d for NVME_NIDT_UUID\n", cur->nidl); goto free_data; } len = NVME_NIDT_UUID_LEN; - uuid_copy(&ns->uuid, data + pos + sizeof(*cur)); + uuid_copy(uuid, data + pos + sizeof(*cur)); break; default: /* Skip unnkown types */ @@ -864,9 +865,10 @@ static int nvme_identify_ns_list(struct nvme_ctrl *dev, unsigned nsid, __le32 *n return nvme_submit_sync_cmd(dev->admin_q, &c, ns_list, 0x1000); } -static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, - struct nvme_id_ns **id) +static struct nvme_id_ns *nvme_identify_ns(struct nvme_ctrl *ctrl, + unsigned nsid) { + struct nvme_id_ns *id; struct nvme_command c = { }; int error; @@ -875,15 +877,18 @@ static int nvme_identify_ns(struct nvme_ctrl *dev, unsigned nsid, c.identify.nsid = cpu_to_le32(nsid); c.identify.cns = NVME_ID_CNS_NS; - *id = kmalloc(sizeof(struct nvme_id_ns), GFP_KERNEL); - if (!*id) - return -ENOMEM; + id = kmalloc(sizeof(*id), GFP_KERNEL); + if (!id) + return NULL; - error = nvme_submit_sync_cmd(dev->admin_q, &c, *id, - sizeof(struct nvme_id_ns)); - if (error) - kfree(*id); - return error; + error = nvme_submit_sync_cmd(ctrl->admin_q, &c, id, sizeof(*id)); + if (error) { + dev_warn(ctrl->device, "Identify namespace failed\n"); + kfree(id); + return NULL; + } + + return id; } static int nvme_set_features(struct nvme_ctrl *dev, unsigned fid, unsigned dword11, @@ -1174,32 +1179,21 @@ static void nvme_config_discard(struct nvme_ns *ns) blk_queue_max_write_zeroes_sectors(ns->queue, UINT_MAX); } -static int nvme_revalidate_ns(struct nvme_ns *ns, struct nvme_id_ns **id) +static void nvme_report_ns_ids(struct nvme_ctrl *ctrl, unsigned int nsid, + struct nvme_id_ns *id, u8 *eui64, u8 *nguid, uuid_t *uuid) { - if (nvme_identify_ns(ns->ctrl, ns->ns_id, id)) { - dev_warn(ns->ctrl->device, "Identify namespace failed\n"); - return -ENODEV; - } - - if ((*id)->ncap == 0) { - kfree(*id); - return -ENODEV; - } - - if (ns->ctrl->vs >= NVME_VS(1, 1, 0)) - memcpy(ns->eui, (*id)->eui64, sizeof(ns->eui)); - if (ns->ctrl->vs >= NVME_VS(1, 2, 0)) - memcpy(ns->nguid, (*id)->nguid, sizeof(ns->nguid)); - if (ns->ctrl->vs >= NVME_VS(1, 3, 0)) { + if (ctrl->vs >= NVME_VS(1, 1, 0)) + memcpy(eui64, id->eui64, sizeof(id->eui64)); + if (ctrl->vs >= NVME_VS(1, 2, 0)) + memcpy(nguid, id->nguid, sizeof(id->nguid)); + if (ctrl->vs >= NVME_VS(1, 3, 0)) { /* Don't treat error as fatal we potentially * already have a NGUID or EUI-64 */ - if (nvme_identify_ns_descs(ns, ns->ns_id)) - dev_warn(ns->ctrl->device, + if (nvme_identify_ns_descs(ctrl, nsid, eui64, nguid, uuid)) + dev_warn(ctrl->device, "%s: Identify Descriptors failed\n", __func__); } - - return 0; } static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) @@ -1240,22 +1234,28 @@ static void __nvme_revalidate_disk(struct gendisk *disk, struct nvme_id_ns *id) static int nvme_revalidate_disk(struct gendisk *disk) { struct nvme_ns *ns = disk->private_data; - struct nvme_id_ns *id = NULL; - int ret; + struct nvme_ctrl *ctrl = ns->ctrl; + struct nvme_id_ns *id; + int ret = 0; if (test_bit(NVME_NS_DEAD, &ns->flags)) { set_capacity(disk, 0); return -ENODEV; } - ret = nvme_revalidate_ns(ns, &id); - if (ret) - return ret; + id = nvme_identify_ns(ctrl, ns->ns_id); + if (!id) + return -ENODEV; - __nvme_revalidate_disk(disk, id); + if (id->ncap == 0) { + ret = -ENODEV; + goto out; + } + + nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid); +out: kfree(id); - - return 0; + return ret; } static char nvme_pr_type(enum pr_type type) @@ -2361,9 +2361,15 @@ static void nvme_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid) sprintf(disk_name, "nvme%dn%d", ctrl->instance, ns->instance); - if (nvme_revalidate_ns(ns, &id)) + id = nvme_identify_ns(ctrl, nsid); + if (!id) goto out_free_queue; + if (id->ncap == 0) + goto out_free_id; + + nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid); + if (nvme_nvm_ns_supported(ns, id) && nvme_nvm_register(ns, disk_name, node)) { dev_warn(ctrl->device, "%s: LightNVM init failure\n", __func__); From 1d5df6af8c7469f9ae3e66e7bed0782cfe4f95db Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 17 Aug 2017 14:10:00 +0200 Subject: [PATCH 35/35] nvme: don't blindly overwrite identifiers on disk revalidate Instead validate that these identifiers do not change, as that is prohibited by the specification. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch --- drivers/nvme/host/core.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index b87cf3a6e9ac..b0dd58db110e 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -1236,6 +1236,8 @@ static int nvme_revalidate_disk(struct gendisk *disk) struct nvme_ns *ns = disk->private_data; struct nvme_ctrl *ctrl = ns->ctrl; struct nvme_id_ns *id; + u8 eui64[8] = { 0 }, nguid[16] = { 0 }; + uuid_t uuid = uuid_null; int ret = 0; if (test_bit(NVME_NS_DEAD, &ns->flags)) { @@ -1252,7 +1254,15 @@ static int nvme_revalidate_disk(struct gendisk *disk) goto out; } - nvme_report_ns_ids(ctrl, ns->ns_id, id, ns->eui, ns->nguid, &ns->uuid); + nvme_report_ns_ids(ctrl, ns->ns_id, id, eui64, nguid, &uuid); + if (!uuid_equal(&ns->uuid, &uuid) || + memcmp(&ns->nguid, &nguid, sizeof(ns->nguid)) || + memcmp(&ns->eui, &eui64, sizeof(ns->eui))) { + dev_err(ctrl->device, + "identifiers changed for nsid %d\n", ns->ns_id); + ret = -ENODEV; + } + out: kfree(id); return ret;