1
0
Fork 0

Merge branch 'nvme-5.2-rc-next' of git://git.infradead.org/nvme into for-linus

Pull NVMe fixes from Sagi.

* 'nvme-5.2-rc-next' of git://git.infradead.org/nvme:
  nvme-rdma: use dynamic dma mapping per command
  nvme: Fix u32 overflow in the number of namespace list calculation
  nvmet: fix data_len to 0 for bdev-backed write_zeroes
  nvme-tcp: fix queue mapping when queue count is limited
  nvme-rdma: fix queue mapping when queue count is limited
hifive-unleashed-5.2
Jens Axboe 2019-06-07 14:04:28 -06:00
commit 6c70f899b8
4 changed files with 150 additions and 63 deletions

View File

@ -3400,7 +3400,8 @@ static int nvme_scan_ns_list(struct nvme_ctrl *ctrl, unsigned nn)
{ {
struct nvme_ns *ns; struct nvme_ns *ns;
__le32 *ns_list; __le32 *ns_list;
unsigned i, j, nsid, prev = 0, num_lists = DIV_ROUND_UP(nn, 1024); unsigned i, j, nsid, prev = 0;
unsigned num_lists = DIV_ROUND_UP_ULL((u64)nn, 1024);
int ret = 0; int ret = 0;
ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL); ns_list = kzalloc(NVME_IDENTIFY_DATA_SIZE, GFP_KERNEL);

View File

@ -213,6 +213,11 @@ static struct nvme_rdma_qe *nvme_rdma_alloc_ring(struct ib_device *ibdev,
if (!ring) if (!ring)
return NULL; return NULL;
/*
* Bind the CQEs (post recv buffers) DMA mapping to the RDMA queue
* lifetime. It's safe, since any chage in the underlying RDMA device
* will issue error recovery and queue re-creation.
*/
for (i = 0; i < ib_queue_size; i++) { for (i = 0; i < ib_queue_size; i++) {
if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir)) if (nvme_rdma_alloc_qe(ibdev, &ring[i], capsule_size, dir))
goto out_free_ring; goto out_free_ring;
@ -274,14 +279,9 @@ static int nvme_rdma_create_qp(struct nvme_rdma_queue *queue, const int factor)
static void nvme_rdma_exit_request(struct blk_mq_tag_set *set, static void nvme_rdma_exit_request(struct blk_mq_tag_set *set,
struct request *rq, unsigned int hctx_idx) struct request *rq, unsigned int hctx_idx)
{ {
struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
nvme_rdma_free_qe(dev->dev, &req->sqe, sizeof(struct nvme_command), kfree(req->sqe.data);
DMA_TO_DEVICE);
} }
static int nvme_rdma_init_request(struct blk_mq_tag_set *set, static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
@ -292,15 +292,11 @@ static int nvme_rdma_init_request(struct blk_mq_tag_set *set,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0; int queue_idx = (set == &ctrl->tag_set) ? hctx_idx + 1 : 0;
struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx]; struct nvme_rdma_queue *queue = &ctrl->queues[queue_idx];
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
int ret;
nvme_req(rq)->ctrl = &ctrl->ctrl; nvme_req(rq)->ctrl = &ctrl->ctrl;
ret = nvme_rdma_alloc_qe(ibdev, &req->sqe, sizeof(struct nvme_command), req->sqe.data = kzalloc(sizeof(struct nvme_command), GFP_KERNEL);
DMA_TO_DEVICE); if (!req->sqe.data)
if (ret) return -ENOMEM;
return ret;
req->queue = queue; req->queue = queue;
@ -641,34 +637,16 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
{ {
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts; struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
struct ib_device *ibdev = ctrl->device->dev; struct ib_device *ibdev = ctrl->device->dev;
unsigned int nr_io_queues; unsigned int nr_io_queues, nr_default_queues;
unsigned int nr_read_queues, nr_poll_queues;
int i, ret; int i, ret;
nr_io_queues = min(opts->nr_io_queues, num_online_cpus()); nr_read_queues = min_t(unsigned int, ibdev->num_comp_vectors,
min(opts->nr_io_queues, num_online_cpus()));
/* nr_default_queues = min_t(unsigned int, ibdev->num_comp_vectors,
* we map queues according to the device irq vectors for min(opts->nr_write_queues, num_online_cpus()));
* optimal locality so we don't need more queues than nr_poll_queues = min(opts->nr_poll_queues, num_online_cpus());
* completion vectors. nr_io_queues = nr_read_queues + nr_default_queues + nr_poll_queues;
*/
nr_io_queues = min_t(unsigned int, nr_io_queues,
ibdev->num_comp_vectors);
if (opts->nr_write_queues) {
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(opts->nr_write_queues, nr_io_queues);
nr_io_queues += ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else {
ctrl->io_queues[HCTX_TYPE_DEFAULT] = nr_io_queues;
}
ctrl->io_queues[HCTX_TYPE_READ] = nr_io_queues;
if (opts->nr_poll_queues) {
ctrl->io_queues[HCTX_TYPE_POLL] =
min(opts->nr_poll_queues, num_online_cpus());
nr_io_queues += ctrl->io_queues[HCTX_TYPE_POLL];
}
ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues); ret = nvme_set_queue_count(&ctrl->ctrl, &nr_io_queues);
if (ret) if (ret)
@ -681,6 +659,34 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
dev_info(ctrl->ctrl.device, dev_info(ctrl->ctrl.device,
"creating %d I/O queues.\n", nr_io_queues); "creating %d I/O queues.\n", nr_io_queues);
if (opts->nr_write_queues && nr_read_queues < nr_io_queues) {
/*
* separate read/write queues
* hand out dedicated default queues only after we have
* sufficient read queues.
*/
ctrl->io_queues[HCTX_TYPE_READ] = nr_read_queues;
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(nr_default_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else {
/*
* shared read/write queues
* either no write queues were requested, or we don't have
* sufficient queue count to have dedicated default queues.
*/
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(nr_read_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
if (opts->nr_poll_queues && nr_io_queues) {
/* map dedicated poll queues only if we have queues left */
ctrl->io_queues[HCTX_TYPE_POLL] =
min(nr_poll_queues, nr_io_queues);
}
for (i = 1; i < ctrl->ctrl.queue_count; i++) { for (i = 1; i < ctrl->ctrl.queue_count; i++) {
ret = nvme_rdma_alloc_queue(ctrl, i, ret = nvme_rdma_alloc_queue(ctrl, i,
ctrl->ctrl.sqsize + 1); ctrl->ctrl.sqsize + 1);
@ -769,6 +775,11 @@ static int nvme_rdma_configure_admin_queue(struct nvme_rdma_ctrl *ctrl,
ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev); ctrl->max_fr_pages = nvme_rdma_get_max_fr_pages(ctrl->device->dev);
/*
* Bind the async event SQE DMA mapping to the admin queue lifetime.
* It's safe, since any chage in the underlying RDMA device will issue
* error recovery and queue re-creation.
*/
error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe, error = nvme_rdma_alloc_qe(ctrl->device->dev, &ctrl->async_event_sqe,
sizeof(struct nvme_command), DMA_TO_DEVICE); sizeof(struct nvme_command), DMA_TO_DEVICE);
if (error) if (error)
@ -1709,12 +1720,20 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq); return nvmf_fail_nonready_command(&queue->ctrl->ctrl, rq);
dev = queue->device->dev; dev = queue->device->dev;
req->sqe.dma = ib_dma_map_single(dev, req->sqe.data,
sizeof(struct nvme_command),
DMA_TO_DEVICE);
err = ib_dma_mapping_error(dev, req->sqe.dma);
if (unlikely(err))
return BLK_STS_RESOURCE;
ib_dma_sync_single_for_cpu(dev, sqe->dma, ib_dma_sync_single_for_cpu(dev, sqe->dma,
sizeof(struct nvme_command), DMA_TO_DEVICE); sizeof(struct nvme_command), DMA_TO_DEVICE);
ret = nvme_setup_cmd(ns, rq, c); ret = nvme_setup_cmd(ns, rq, c);
if (ret) if (ret)
return ret; goto unmap_qe;
blk_mq_start_request(rq); blk_mq_start_request(rq);
@ -1739,10 +1758,16 @@ static blk_status_t nvme_rdma_queue_rq(struct blk_mq_hw_ctx *hctx,
} }
return BLK_STS_OK; return BLK_STS_OK;
err: err:
if (err == -ENOMEM || err == -EAGAIN) if (err == -ENOMEM || err == -EAGAIN)
return BLK_STS_RESOURCE; ret = BLK_STS_RESOURCE;
return BLK_STS_IOERR; else
ret = BLK_STS_IOERR;
unmap_qe:
ib_dma_unmap_single(dev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
return ret;
} }
static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx) static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
@ -1755,25 +1780,36 @@ static int nvme_rdma_poll(struct blk_mq_hw_ctx *hctx)
static void nvme_rdma_complete_rq(struct request *rq) static void nvme_rdma_complete_rq(struct request *rq)
{ {
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_queue *queue = req->queue;
struct ib_device *ibdev = queue->device->dev;
nvme_rdma_unmap_data(req->queue, rq); nvme_rdma_unmap_data(queue, rq);
ib_dma_unmap_single(ibdev, req->sqe.dma, sizeof(struct nvme_command),
DMA_TO_DEVICE);
nvme_complete_rq(rq); nvme_complete_rq(rq);
} }
static int nvme_rdma_map_queues(struct blk_mq_tag_set *set) static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
{ {
struct nvme_rdma_ctrl *ctrl = set->driver_data; struct nvme_rdma_ctrl *ctrl = set->driver_data;
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_READ].nr_queues = ctrl->io_queues[HCTX_TYPE_READ];
if (ctrl->ctrl.opts->nr_write_queues) {
/* separate read/write queues */ /* separate read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues =
ctrl->io_queues[HCTX_TYPE_READ];
set->map[HCTX_TYPE_READ].queue_offset = set->map[HCTX_TYPE_READ].queue_offset =
ctrl->io_queues[HCTX_TYPE_DEFAULT]; ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else { } else {
/* mixed read/write queues */ /* shared read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_READ].queue_offset = 0; set->map[HCTX_TYPE_READ].queue_offset = 0;
} }
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT], blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_DEFAULT],
@ -1781,16 +1817,22 @@ static int nvme_rdma_map_queues(struct blk_mq_tag_set *set)
blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ], blk_mq_rdma_map_queues(&set->map[HCTX_TYPE_READ],
ctrl->device->dev, 0); ctrl->device->dev, 0);
if (ctrl->ctrl.opts->nr_poll_queues) { if (opts->nr_poll_queues && ctrl->io_queues[HCTX_TYPE_POLL]) {
/* map dedicated poll queues only if we have queues left */
set->map[HCTX_TYPE_POLL].nr_queues = set->map[HCTX_TYPE_POLL].nr_queues =
ctrl->io_queues[HCTX_TYPE_POLL]; ctrl->io_queues[HCTX_TYPE_POLL];
set->map[HCTX_TYPE_POLL].queue_offset = set->map[HCTX_TYPE_POLL].queue_offset =
ctrl->io_queues[HCTX_TYPE_DEFAULT]; ctrl->io_queues[HCTX_TYPE_DEFAULT] +
if (ctrl->ctrl.opts->nr_write_queues) ctrl->io_queues[HCTX_TYPE_READ];
set->map[HCTX_TYPE_POLL].queue_offset +=
ctrl->io_queues[HCTX_TYPE_READ];
blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]); blk_mq_map_queues(&set->map[HCTX_TYPE_POLL]);
} }
dev_info(ctrl->ctrl.device,
"mapped %d/%d/%d default/read/poll queues.\n",
ctrl->io_queues[HCTX_TYPE_DEFAULT],
ctrl->io_queues[HCTX_TYPE_READ],
ctrl->io_queues[HCTX_TYPE_POLL]);
return 0; return 0;
} }

View File

@ -111,6 +111,7 @@ struct nvme_tcp_ctrl {
struct work_struct err_work; struct work_struct err_work;
struct delayed_work connect_work; struct delayed_work connect_work;
struct nvme_tcp_request async_req; struct nvme_tcp_request async_req;
u32 io_queues[HCTX_MAX_TYPES];
}; };
static LIST_HEAD(nvme_tcp_ctrl_list); static LIST_HEAD(nvme_tcp_ctrl_list);
@ -1564,6 +1565,35 @@ static unsigned int nvme_tcp_nr_io_queues(struct nvme_ctrl *ctrl)
return nr_io_queues; return nr_io_queues;
} }
static void nvme_tcp_set_io_queues(struct nvme_ctrl *nctrl,
unsigned int nr_io_queues)
{
struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
struct nvmf_ctrl_options *opts = nctrl->opts;
if (opts->nr_write_queues && opts->nr_io_queues < nr_io_queues) {
/*
* separate read/write queues
* hand out dedicated default queues only after we have
* sufficient read queues.
*/
ctrl->io_queues[HCTX_TYPE_READ] = opts->nr_io_queues;
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_READ];
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(opts->nr_write_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else {
/*
* shared read/write queues
* either no write queues were requested, or we don't have
* sufficient queue count to have dedicated default queues.
*/
ctrl->io_queues[HCTX_TYPE_DEFAULT] =
min(opts->nr_io_queues, nr_io_queues);
nr_io_queues -= ctrl->io_queues[HCTX_TYPE_DEFAULT];
}
}
static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl) static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
{ {
unsigned int nr_io_queues; unsigned int nr_io_queues;
@ -1581,6 +1611,8 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
dev_info(ctrl->device, dev_info(ctrl->device,
"creating %d I/O queues.\n", nr_io_queues); "creating %d I/O queues.\n", nr_io_queues);
nvme_tcp_set_io_queues(ctrl, nr_io_queues);
return __nvme_tcp_alloc_io_queues(ctrl); return __nvme_tcp_alloc_io_queues(ctrl);
} }
@ -2089,23 +2121,34 @@ static blk_status_t nvme_tcp_queue_rq(struct blk_mq_hw_ctx *hctx,
static int nvme_tcp_map_queues(struct blk_mq_tag_set *set) static int nvme_tcp_map_queues(struct blk_mq_tag_set *set)
{ {
struct nvme_tcp_ctrl *ctrl = set->driver_data; struct nvme_tcp_ctrl *ctrl = set->driver_data;
struct nvmf_ctrl_options *opts = ctrl->ctrl.opts;
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0; if (opts->nr_write_queues && ctrl->io_queues[HCTX_TYPE_READ]) {
set->map[HCTX_TYPE_READ].nr_queues = ctrl->ctrl.opts->nr_io_queues;
if (ctrl->ctrl.opts->nr_write_queues) {
/* separate read/write queues */ /* separate read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues = set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->ctrl.opts->nr_write_queues; ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues =
ctrl->io_queues[HCTX_TYPE_READ];
set->map[HCTX_TYPE_READ].queue_offset = set->map[HCTX_TYPE_READ].queue_offset =
ctrl->ctrl.opts->nr_write_queues; ctrl->io_queues[HCTX_TYPE_DEFAULT];
} else { } else {
/* mixed read/write queues */ /* shared read/write queues */
set->map[HCTX_TYPE_DEFAULT].nr_queues = set->map[HCTX_TYPE_DEFAULT].nr_queues =
ctrl->ctrl.opts->nr_io_queues; ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_DEFAULT].queue_offset = 0;
set->map[HCTX_TYPE_READ].nr_queues =
ctrl->io_queues[HCTX_TYPE_DEFAULT];
set->map[HCTX_TYPE_READ].queue_offset = 0; set->map[HCTX_TYPE_READ].queue_offset = 0;
} }
blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]); blk_mq_map_queues(&set->map[HCTX_TYPE_DEFAULT]);
blk_mq_map_queues(&set->map[HCTX_TYPE_READ]); blk_mq_map_queues(&set->map[HCTX_TYPE_READ]);
dev_info(ctrl->ctrl.device,
"mapped %d/%d default/read queues.\n",
ctrl->io_queues[HCTX_TYPE_DEFAULT],
ctrl->io_queues[HCTX_TYPE_READ]);
return 0; return 0;
} }

View File

@ -293,6 +293,7 @@ u16 nvmet_bdev_parse_io_cmd(struct nvmet_req *req)
return 0; return 0;
case nvme_cmd_write_zeroes: case nvme_cmd_write_zeroes:
req->execute = nvmet_bdev_execute_write_zeroes; req->execute = nvmet_bdev_execute_write_zeroes;
req->data_len = 0;
return 0; return 0;
default: default:
pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode, pr_err("unhandled cmd %d on qid %d\n", cmd->common.opcode,