block: virtio-blk: support multi virt queues per virtio-blk device
Firstly this patch supports more than one virtual queues for virtio-blk device. Secondly this patch maps the virtual queue to blk-mq's hardware queue. With this approach, both scalability and performance can be improved. Signed-off-by: Ming Lei <ming.lei@canonical.com> Acked-by: Michael S. Tsirkin <mst@redhat.com> Signed-off-by: Jens Axboe <axboe@fb.com>hifive-unleashed-5.1
parent
cb553215d5
commit
6a27b656fc
|
@ -15,17 +15,22 @@
|
||||||
#include <linux/numa.h>
|
#include <linux/numa.h>
|
||||||
|
|
||||||
#define PART_BITS 4
|
#define PART_BITS 4
|
||||||
|
#define VQ_NAME_LEN 16
|
||||||
|
|
||||||
static int major;
|
static int major;
|
||||||
static DEFINE_IDA(vd_index_ida);
|
static DEFINE_IDA(vd_index_ida);
|
||||||
|
|
||||||
static struct workqueue_struct *virtblk_wq;
|
static struct workqueue_struct *virtblk_wq;
|
||||||
|
|
||||||
|
struct virtio_blk_vq {
|
||||||
|
struct virtqueue *vq;
|
||||||
|
spinlock_t lock;
|
||||||
|
char name[VQ_NAME_LEN];
|
||||||
|
} ____cacheline_aligned_in_smp;
|
||||||
|
|
||||||
struct virtio_blk
|
struct virtio_blk
|
||||||
{
|
{
|
||||||
struct virtio_device *vdev;
|
struct virtio_device *vdev;
|
||||||
struct virtqueue *vq;
|
|
||||||
spinlock_t vq_lock;
|
|
||||||
|
|
||||||
/* The disk structure for the kernel. */
|
/* The disk structure for the kernel. */
|
||||||
struct gendisk *disk;
|
struct gendisk *disk;
|
||||||
|
@ -47,6 +52,10 @@ struct virtio_blk
|
||||||
|
|
||||||
/* Ida index - used to track minor number allocations. */
|
/* Ida index - used to track minor number allocations. */
|
||||||
int index;
|
int index;
|
||||||
|
|
||||||
|
/* num of vqs */
|
||||||
|
int num_vqs;
|
||||||
|
struct virtio_blk_vq *vqs;
|
||||||
};
|
};
|
||||||
|
|
||||||
struct virtblk_req
|
struct virtblk_req
|
||||||
|
@ -133,14 +142,15 @@ static void virtblk_done(struct virtqueue *vq)
|
||||||
{
|
{
|
||||||
struct virtio_blk *vblk = vq->vdev->priv;
|
struct virtio_blk *vblk = vq->vdev->priv;
|
||||||
bool req_done = false;
|
bool req_done = false;
|
||||||
|
int qid = vq->index;
|
||||||
struct virtblk_req *vbr;
|
struct virtblk_req *vbr;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
|
|
||||||
spin_lock_irqsave(&vblk->vq_lock, flags);
|
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
|
||||||
do {
|
do {
|
||||||
virtqueue_disable_cb(vq);
|
virtqueue_disable_cb(vq);
|
||||||
while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
|
while ((vbr = virtqueue_get_buf(vblk->vqs[qid].vq, &len)) != NULL) {
|
||||||
blk_mq_complete_request(vbr->req);
|
blk_mq_complete_request(vbr->req);
|
||||||
req_done = true;
|
req_done = true;
|
||||||
}
|
}
|
||||||
|
@ -151,7 +161,7 @@ static void virtblk_done(struct virtqueue *vq)
|
||||||
/* In case queue is stopped waiting for more buffers. */
|
/* In case queue is stopped waiting for more buffers. */
|
||||||
if (req_done)
|
if (req_done)
|
||||||
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
|
blk_mq_start_stopped_hw_queues(vblk->disk->queue, true);
|
||||||
spin_unlock_irqrestore(&vblk->vq_lock, flags);
|
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
|
static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
|
||||||
|
@ -160,6 +170,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
|
||||||
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
unsigned int num;
|
unsigned int num;
|
||||||
|
int qid = hctx->queue_num;
|
||||||
const bool last = (req->cmd_flags & REQ_END) != 0;
|
const bool last = (req->cmd_flags & REQ_END) != 0;
|
||||||
int err;
|
int err;
|
||||||
bool notify = false;
|
bool notify = false;
|
||||||
|
@ -202,12 +213,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
|
||||||
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
|
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
|
||||||
}
|
}
|
||||||
|
|
||||||
spin_lock_irqsave(&vblk->vq_lock, flags);
|
spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
|
||||||
err = __virtblk_add_req(vblk->vq, vbr, vbr->sg, num);
|
err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
|
||||||
if (err) {
|
if (err) {
|
||||||
virtqueue_kick(vblk->vq);
|
virtqueue_kick(vblk->vqs[qid].vq);
|
||||||
blk_mq_stop_hw_queue(hctx);
|
blk_mq_stop_hw_queue(hctx);
|
||||||
spin_unlock_irqrestore(&vblk->vq_lock, flags);
|
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
|
||||||
/* Out of mem doesn't actually happen, since we fall back
|
/* Out of mem doesn't actually happen, since we fall back
|
||||||
* to direct descriptors */
|
* to direct descriptors */
|
||||||
if (err == -ENOMEM || err == -ENOSPC)
|
if (err == -ENOMEM || err == -ENOSPC)
|
||||||
|
@ -215,12 +226,12 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
|
||||||
return BLK_MQ_RQ_QUEUE_ERROR;
|
return BLK_MQ_RQ_QUEUE_ERROR;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (last && virtqueue_kick_prepare(vblk->vq))
|
if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
|
||||||
notify = true;
|
notify = true;
|
||||||
spin_unlock_irqrestore(&vblk->vq_lock, flags);
|
spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
|
||||||
|
|
||||||
if (notify)
|
if (notify)
|
||||||
virtqueue_notify(vblk->vq);
|
virtqueue_notify(vblk->vqs[qid].vq);
|
||||||
return BLK_MQ_RQ_QUEUE_OK;
|
return BLK_MQ_RQ_QUEUE_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -377,12 +388,64 @@ static void virtblk_config_changed(struct virtio_device *vdev)
|
||||||
static int init_vq(struct virtio_blk *vblk)
|
static int init_vq(struct virtio_blk *vblk)
|
||||||
{
|
{
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
int i;
|
||||||
|
vq_callback_t **callbacks;
|
||||||
|
const char **names;
|
||||||
|
struct virtqueue **vqs;
|
||||||
|
unsigned short num_vqs;
|
||||||
|
struct virtio_device *vdev = vblk->vdev;
|
||||||
|
|
||||||
/* We expect one virtqueue, for output. */
|
err = virtio_cread_feature(vdev, VIRTIO_BLK_F_MQ,
|
||||||
vblk->vq = virtio_find_single_vq(vblk->vdev, virtblk_done, "requests");
|
struct virtio_blk_config, num_queues,
|
||||||
if (IS_ERR(vblk->vq))
|
&num_vqs);
|
||||||
err = PTR_ERR(vblk->vq);
|
if (err)
|
||||||
|
num_vqs = 1;
|
||||||
|
|
||||||
|
vblk->vqs = kmalloc(sizeof(*vblk->vqs) * num_vqs, GFP_KERNEL);
|
||||||
|
if (!vblk->vqs) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
names = kmalloc(sizeof(*names) * num_vqs, GFP_KERNEL);
|
||||||
|
if (!names)
|
||||||
|
goto err_names;
|
||||||
|
|
||||||
|
callbacks = kmalloc(sizeof(*callbacks) * num_vqs, GFP_KERNEL);
|
||||||
|
if (!callbacks)
|
||||||
|
goto err_callbacks;
|
||||||
|
|
||||||
|
vqs = kmalloc(sizeof(*vqs) * num_vqs, GFP_KERNEL);
|
||||||
|
if (!vqs)
|
||||||
|
goto err_vqs;
|
||||||
|
|
||||||
|
for (i = 0; i < num_vqs; i++) {
|
||||||
|
callbacks[i] = virtblk_done;
|
||||||
|
snprintf(vblk->vqs[i].name, VQ_NAME_LEN, "req.%d", i);
|
||||||
|
names[i] = vblk->vqs[i].name;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Discover virtqueues and write information to configuration. */
|
||||||
|
err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
|
||||||
|
if (err)
|
||||||
|
goto err_find_vqs;
|
||||||
|
|
||||||
|
for (i = 0; i < num_vqs; i++) {
|
||||||
|
spin_lock_init(&vblk->vqs[i].lock);
|
||||||
|
vblk->vqs[i].vq = vqs[i];
|
||||||
|
}
|
||||||
|
vblk->num_vqs = num_vqs;
|
||||||
|
|
||||||
|
err_find_vqs:
|
||||||
|
kfree(vqs);
|
||||||
|
err_vqs:
|
||||||
|
kfree(callbacks);
|
||||||
|
err_callbacks:
|
||||||
|
kfree(names);
|
||||||
|
err_names:
|
||||||
|
if (err)
|
||||||
|
kfree(vblk->vqs);
|
||||||
|
out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -551,7 +614,6 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||||
err = init_vq(vblk);
|
err = init_vq(vblk);
|
||||||
if (err)
|
if (err)
|
||||||
goto out_free_vblk;
|
goto out_free_vblk;
|
||||||
spin_lock_init(&vblk->vq_lock);
|
|
||||||
|
|
||||||
/* FIXME: How many partitions? How long is a piece of string? */
|
/* FIXME: How many partitions? How long is a piece of string? */
|
||||||
vblk->disk = alloc_disk(1 << PART_BITS);
|
vblk->disk = alloc_disk(1 << PART_BITS);
|
||||||
|
@ -562,7 +624,7 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||||
|
|
||||||
/* Default queue sizing is to fill the ring. */
|
/* Default queue sizing is to fill the ring. */
|
||||||
if (!virtblk_queue_depth) {
|
if (!virtblk_queue_depth) {
|
||||||
virtblk_queue_depth = vblk->vq->num_free;
|
virtblk_queue_depth = vblk->vqs[0].vq->num_free;
|
||||||
/* ... but without indirect descs, we use 2 descs per req */
|
/* ... but without indirect descs, we use 2 descs per req */
|
||||||
if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
|
if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
|
||||||
virtblk_queue_depth /= 2;
|
virtblk_queue_depth /= 2;
|
||||||
|
@ -570,7 +632,6 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||||
|
|
||||||
memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
|
memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
|
||||||
vblk->tag_set.ops = &virtio_mq_ops;
|
vblk->tag_set.ops = &virtio_mq_ops;
|
||||||
vblk->tag_set.nr_hw_queues = 1;
|
|
||||||
vblk->tag_set.queue_depth = virtblk_queue_depth;
|
vblk->tag_set.queue_depth = virtblk_queue_depth;
|
||||||
vblk->tag_set.numa_node = NUMA_NO_NODE;
|
vblk->tag_set.numa_node = NUMA_NO_NODE;
|
||||||
vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
|
||||||
|
@ -578,6 +639,7 @@ static int virtblk_probe(struct virtio_device *vdev)
|
||||||
sizeof(struct virtblk_req) +
|
sizeof(struct virtblk_req) +
|
||||||
sizeof(struct scatterlist) * sg_elems;
|
sizeof(struct scatterlist) * sg_elems;
|
||||||
vblk->tag_set.driver_data = vblk;
|
vblk->tag_set.driver_data = vblk;
|
||||||
|
vblk->tag_set.nr_hw_queues = vblk->num_vqs;
|
||||||
|
|
||||||
err = blk_mq_alloc_tag_set(&vblk->tag_set);
|
err = blk_mq_alloc_tag_set(&vblk->tag_set);
|
||||||
if (err)
|
if (err)
|
||||||
|
@ -727,6 +789,7 @@ static void virtblk_remove(struct virtio_device *vdev)
|
||||||
refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
|
refc = atomic_read(&disk_to_dev(vblk->disk)->kobj.kref.refcount);
|
||||||
put_disk(vblk->disk);
|
put_disk(vblk->disk);
|
||||||
vdev->config->del_vqs(vdev);
|
vdev->config->del_vqs(vdev);
|
||||||
|
kfree(vblk->vqs);
|
||||||
kfree(vblk);
|
kfree(vblk);
|
||||||
|
|
||||||
/* Only free device id if we don't have any users */
|
/* Only free device id if we don't have any users */
|
||||||
|
@ -777,7 +840,8 @@ static const struct virtio_device_id id_table[] = {
|
||||||
static unsigned int features[] = {
|
static unsigned int features[] = {
|
||||||
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
|
VIRTIO_BLK_F_SEG_MAX, VIRTIO_BLK_F_SIZE_MAX, VIRTIO_BLK_F_GEOMETRY,
|
||||||
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
|
VIRTIO_BLK_F_RO, VIRTIO_BLK_F_BLK_SIZE, VIRTIO_BLK_F_SCSI,
|
||||||
VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE
|
VIRTIO_BLK_F_WCE, VIRTIO_BLK_F_TOPOLOGY, VIRTIO_BLK_F_CONFIG_WCE,
|
||||||
|
VIRTIO_BLK_F_MQ,
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct virtio_driver virtio_blk = {
|
static struct virtio_driver virtio_blk = {
|
||||||
|
|
Loading…
Reference in New Issue