Lots of virtio work which wasn't quite ready for last merge window. Plus
I dived into lguest again, reworking the pagetable code so we can move the switcher page: our fixmaps sometimes take more than 2MB now... Cheers, Rusty. -----BEGIN PGP SIGNATURE----- Version: GnuPG v1.4.12 (GNU/Linux) iQIcBAABAgAGBQJRga7lAAoJENkgDmzRrbjx/yIQAKpqIBtxOJeYH3SY+Uoe7Cfp toNYcpJEldvb0UcWN8M2cSZpHoxl1SUoq9djwcM29tcKa7EZAjHaGtb/Q1qMTDgv +B3WAfiGU2pmXFxLAkbrlLNGnysy24JspqJQ5hcYV84EiBxQdZp+nCYgOphd+GMK ww16vo9ya8jFjzt3GeRp/Heb3vEzV4Cp6BC3i0m8A3WNpEpbRb66pqXNk5o8ggJO SxQOKSXmUM+0m+jKSul5xn3e2Ls2LOrZZ8/DIHA+gW66N4Zab7n2/j1Q9VRxb4lh FqnR7KwgBX8OCh9IsBDqQYS7MohvMYge6eUdLtFrq84jvMleMEhrC8q9v2tucFUb 5t18CLwvyK7Gdg6UCKiZ7YSPcuURAILO16al9bh5IseeBDsuX+43VsvQoBmFn9k6 cLOVTZ6BlOmahK5PyRYFSvLa9Rxzr/05Mr7oYq9UgshD9io78dnqczFYIORF53rW zD7C4HuTZfYJFfNd0wAJ0RfVXnf8QvDlMdo7zPC26DSXNWqj8OexCY0qqSWUB+2F vcfJP6NkV4fZB8aawWIFUVwc64yqtt2uPVLa7ATZWqk16PgKrchGewmw3tiEwOgu 1l7xgffTRRUIJsqaCZoXdgw3yezcKRjuUBcOxL09lDAAhc+NxWNvzZBsKp66DwDk yZQKn0OdXnuf0CeEOfFf =1tYL -----END PGP SIGNATURE----- Merge tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux Pull virtio & lguest updates from Rusty Russell: "Lots of virtio work which wasn't quite ready for last merge window. Plus I dived into lguest again, reworking the pagetable code so we can move the switcher page: our fixmaps sometimes take more than 2MB now..." Ugh. Annoying conflicts with the tcm_vhost -> vhost_scsi rename. Hopefully correctly resolved. * tag 'virtio-next-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/rusty/linux: (57 commits) caif_virtio: Remove bouncing email addresses lguest: improve code readability in lg_cpu_start. virtio-net: fill only rx queues which are being used lguest: map Switcher below fixmap. lguest: cache last cpu we ran on. lguest: map Switcher text whenever we allocate a new pagetable. lguest: don't share Switcher PTE pages between guests. lguest: expost switcher_pages array (as lg_switcher_pages). lguest: extract shadow PTE walking / allocating. lguest: make check_gpte et. al return bool. lguest: assume Switcher text is a single page. lguest: rename switcher_page to switcher_pages. lguest: remove RESERVE_MEM constant. lguest: check vaddr not pgd for Switcher protection. lguest: prepare to make SWITCHER_ADDR a variable. virtio: console: replace EMFILE with EBUSY for already-open port virtio-scsi: reset virtqueue affinity when doing cpu hotplug virtio-scsi: introduce multiqueue support virtio-scsi: push vq lock/unlock into virtscsi_vq_done virtio-scsi: pass struct virtio_scsi to virtqueue completion function ...hifive-unleashed-5.1
commit
736a2dd257
|
@ -6,6 +6,3 @@ kvm/
|
||||||
- Kernel Virtual Machine. See also http://linux-kvm.org
|
- Kernel Virtual Machine. See also http://linux-kvm.org
|
||||||
uml/
|
uml/
|
||||||
- User Mode Linux, builds/runs Linux kernel as a userspace program.
|
- User Mode Linux, builds/runs Linux kernel as a userspace program.
|
||||||
virtio.txt
|
|
||||||
- Text version of draft virtio spec.
|
|
||||||
See http://ozlabs.org/~rusty/virtio-spec
|
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -8743,6 +8743,7 @@ F: drivers/virtio/
|
||||||
F: drivers/net/virtio_net.c
|
F: drivers/net/virtio_net.c
|
||||||
F: drivers/block/virtio_blk.c
|
F: drivers/block/virtio_blk.c
|
||||||
F: include/linux/virtio_*.h
|
F: include/linux/virtio_*.h
|
||||||
|
F: include/uapi/linux/virtio_*.h
|
||||||
|
|
||||||
VIRTIO HOST (VHOST)
|
VIRTIO HOST (VHOST)
|
||||||
M: "Michael S. Tsirkin" <mst@redhat.com>
|
M: "Michael S. Tsirkin" <mst@redhat.com>
|
||||||
|
|
|
@ -11,18 +11,11 @@
|
||||||
|
|
||||||
#define GUEST_PL 1
|
#define GUEST_PL 1
|
||||||
|
|
||||||
/* Every guest maps the core switcher code. */
|
/* Page for Switcher text itself, then two pages per cpu */
|
||||||
#define SHARED_SWITCHER_PAGES \
|
#define TOTAL_SWITCHER_PAGES (1 + 2 * nr_cpu_ids)
|
||||||
DIV_ROUND_UP(end_switcher_text - start_switcher_text, PAGE_SIZE)
|
|
||||||
/* Pages for switcher itself, then two pages per cpu */
|
|
||||||
#define TOTAL_SWITCHER_PAGES (SHARED_SWITCHER_PAGES + 2 * nr_cpu_ids)
|
|
||||||
|
|
||||||
/* We map at -4M (-2M for PAE) for ease of mapping (one PTE page). */
|
/* Where we map the Switcher, in both Host and Guest. */
|
||||||
#ifdef CONFIG_X86_PAE
|
extern unsigned long switcher_addr;
|
||||||
#define SWITCHER_ADDR 0xFFE00000
|
|
||||||
#else
|
|
||||||
#define SWITCHER_ADDR 0xFFC00000
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Found in switcher.S */
|
/* Found in switcher.S */
|
||||||
extern unsigned long default_idt_entries[];
|
extern unsigned long default_idt_entries[];
|
||||||
|
|
|
@ -110,7 +110,7 @@ new_segment:
|
||||||
if (!sg)
|
if (!sg)
|
||||||
sg = sglist;
|
sg = sglist;
|
||||||
else {
|
else {
|
||||||
sg->page_link &= ~0x02;
|
sg_unmark_end(sg);
|
||||||
sg = sg_next(sg);
|
sg = sg_next(sg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -143,7 +143,7 @@ new_segment:
|
||||||
* termination bit to avoid doing a full
|
* termination bit to avoid doing a full
|
||||||
* sg_init_table() in drivers for each command.
|
* sg_init_table() in drivers for each command.
|
||||||
*/
|
*/
|
||||||
(*sg)->page_link &= ~0x02;
|
sg_unmark_end(*sg);
|
||||||
*sg = sg_next(*sg);
|
*sg = sg_next(*sg);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -124,7 +124,7 @@ obj-$(CONFIG_PPC_PS3) += ps3/
|
||||||
obj-$(CONFIG_OF) += of/
|
obj-$(CONFIG_OF) += of/
|
||||||
obj-$(CONFIG_SSB) += ssb/
|
obj-$(CONFIG_SSB) += ssb/
|
||||||
obj-$(CONFIG_BCMA) += bcma/
|
obj-$(CONFIG_BCMA) += bcma/
|
||||||
obj-$(CONFIG_VHOST_NET) += vhost/
|
obj-$(CONFIG_VHOST_RING) += vhost/
|
||||||
obj-$(CONFIG_VLYNQ) += vlynq/
|
obj-$(CONFIG_VLYNQ) += vlynq/
|
||||||
obj-$(CONFIG_STAGING) += staging/
|
obj-$(CONFIG_STAGING) += staging/
|
||||||
obj-y += platform/
|
obj-y += platform/
|
||||||
|
|
|
@ -100,96 +100,103 @@ static inline struct virtblk_req *virtblk_alloc_req(struct virtio_blk *vblk,
|
||||||
return vbr;
|
return vbr;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtblk_add_buf_wait(struct virtio_blk *vblk,
|
static int __virtblk_add_req(struct virtqueue *vq,
|
||||||
struct virtblk_req *vbr,
|
struct virtblk_req *vbr,
|
||||||
unsigned long out,
|
struct scatterlist *data_sg,
|
||||||
unsigned long in)
|
bool have_data)
|
||||||
{
|
{
|
||||||
DEFINE_WAIT(wait);
|
struct scatterlist hdr, status, cmd, sense, inhdr, *sgs[6];
|
||||||
|
unsigned int num_out = 0, num_in = 0;
|
||||||
|
int type = vbr->out_hdr.type & ~VIRTIO_BLK_T_OUT;
|
||||||
|
|
||||||
for (;;) {
|
sg_init_one(&hdr, &vbr->out_hdr, sizeof(vbr->out_hdr));
|
||||||
|
sgs[num_out++] = &hdr;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If this is a packet command we need a couple of additional headers.
|
||||||
|
* Behind the normal outhdr we put a segment with the scsi command
|
||||||
|
* block, and before the normal inhdr we put the sense data and the
|
||||||
|
* inhdr with additional status information.
|
||||||
|
*/
|
||||||
|
if (type == VIRTIO_BLK_T_SCSI_CMD) {
|
||||||
|
sg_init_one(&cmd, vbr->req->cmd, vbr->req->cmd_len);
|
||||||
|
sgs[num_out++] = &cmd;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (have_data) {
|
||||||
|
if (vbr->out_hdr.type & VIRTIO_BLK_T_OUT)
|
||||||
|
sgs[num_out++] = data_sg;
|
||||||
|
else
|
||||||
|
sgs[num_out + num_in++] = data_sg;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (type == VIRTIO_BLK_T_SCSI_CMD) {
|
||||||
|
sg_init_one(&sense, vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
|
||||||
|
sgs[num_out + num_in++] = &sense;
|
||||||
|
sg_init_one(&inhdr, &vbr->in_hdr, sizeof(vbr->in_hdr));
|
||||||
|
sgs[num_out + num_in++] = &inhdr;
|
||||||
|
}
|
||||||
|
|
||||||
|
sg_init_one(&status, &vbr->status, sizeof(vbr->status));
|
||||||
|
sgs[num_out + num_in++] = &status;
|
||||||
|
|
||||||
|
return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtblk_add_req(struct virtblk_req *vbr, bool have_data)
|
||||||
|
{
|
||||||
|
struct virtio_blk *vblk = vbr->vblk;
|
||||||
|
DEFINE_WAIT(wait);
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
spin_lock_irq(vblk->disk->queue->queue_lock);
|
||||||
|
while (unlikely((ret = __virtblk_add_req(vblk->vq, vbr, vbr->sg,
|
||||||
|
have_data)) < 0)) {
|
||||||
prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
|
prepare_to_wait_exclusive(&vblk->queue_wait, &wait,
|
||||||
TASK_UNINTERRUPTIBLE);
|
TASK_UNINTERRUPTIBLE);
|
||||||
|
|
||||||
spin_lock_irq(vblk->disk->queue->queue_lock);
|
|
||||||
if (virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
|
|
||||||
GFP_ATOMIC) < 0) {
|
|
||||||
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
|
||||||
io_schedule();
|
|
||||||
} else {
|
|
||||||
virtqueue_kick(vblk->vq);
|
|
||||||
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
finish_wait(&vblk->queue_wait, &wait);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void virtblk_add_req(struct virtblk_req *vbr,
|
|
||||||
unsigned int out, unsigned int in)
|
|
||||||
{
|
|
||||||
struct virtio_blk *vblk = vbr->vblk;
|
|
||||||
|
|
||||||
spin_lock_irq(vblk->disk->queue->queue_lock);
|
|
||||||
if (unlikely(virtqueue_add_buf(vblk->vq, vbr->sg, out, in, vbr,
|
|
||||||
GFP_ATOMIC) < 0)) {
|
|
||||||
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
||||||
virtblk_add_buf_wait(vblk, vbr, out, in);
|
io_schedule();
|
||||||
return;
|
spin_lock_irq(vblk->disk->queue->queue_lock);
|
||||||
|
|
||||||
|
finish_wait(&vblk->queue_wait, &wait);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtqueue_kick(vblk->vq);
|
virtqueue_kick(vblk->vq);
|
||||||
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
spin_unlock_irq(vblk->disk->queue->queue_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int virtblk_bio_send_flush(struct virtblk_req *vbr)
|
static void virtblk_bio_send_flush(struct virtblk_req *vbr)
|
||||||
{
|
{
|
||||||
unsigned int out = 0, in = 0;
|
|
||||||
|
|
||||||
vbr->flags |= VBLK_IS_FLUSH;
|
vbr->flags |= VBLK_IS_FLUSH;
|
||||||
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
|
vbr->out_hdr.type = VIRTIO_BLK_T_FLUSH;
|
||||||
vbr->out_hdr.sector = 0;
|
vbr->out_hdr.sector = 0;
|
||||||
vbr->out_hdr.ioprio = 0;
|
vbr->out_hdr.ioprio = 0;
|
||||||
sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
|
|
||||||
sg_set_buf(&vbr->sg[out + in++], &vbr->status, sizeof(vbr->status));
|
|
||||||
|
|
||||||
virtblk_add_req(vbr, out, in);
|
virtblk_add_req(vbr, false);
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static int virtblk_bio_send_data(struct virtblk_req *vbr)
|
static void virtblk_bio_send_data(struct virtblk_req *vbr)
|
||||||
{
|
{
|
||||||
struct virtio_blk *vblk = vbr->vblk;
|
struct virtio_blk *vblk = vbr->vblk;
|
||||||
unsigned int num, out = 0, in = 0;
|
|
||||||
struct bio *bio = vbr->bio;
|
struct bio *bio = vbr->bio;
|
||||||
|
bool have_data;
|
||||||
|
|
||||||
vbr->flags &= ~VBLK_IS_FLUSH;
|
vbr->flags &= ~VBLK_IS_FLUSH;
|
||||||
vbr->out_hdr.type = 0;
|
vbr->out_hdr.type = 0;
|
||||||
vbr->out_hdr.sector = bio->bi_sector;
|
vbr->out_hdr.sector = bio->bi_sector;
|
||||||
vbr->out_hdr.ioprio = bio_prio(bio);
|
vbr->out_hdr.ioprio = bio_prio(bio);
|
||||||
|
|
||||||
sg_set_buf(&vbr->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
|
if (blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg)) {
|
||||||
|
have_data = true;
|
||||||
num = blk_bio_map_sg(vblk->disk->queue, bio, vbr->sg + out);
|
if (bio->bi_rw & REQ_WRITE)
|
||||||
|
|
||||||
sg_set_buf(&vbr->sg[num + out + in++], &vbr->status,
|
|
||||||
sizeof(vbr->status));
|
|
||||||
|
|
||||||
if (num) {
|
|
||||||
if (bio->bi_rw & REQ_WRITE) {
|
|
||||||
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
|
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
|
||||||
out += num;
|
else
|
||||||
} else {
|
|
||||||
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
|
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
|
||||||
in += num;
|
} else
|
||||||
}
|
have_data = false;
|
||||||
}
|
|
||||||
|
|
||||||
virtblk_add_req(vbr, out, in);
|
virtblk_add_req(vbr, have_data);
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtblk_bio_send_data_work(struct work_struct *work)
|
static void virtblk_bio_send_data_work(struct work_struct *work)
|
||||||
|
@ -298,7 +305,7 @@ static void virtblk_done(struct virtqueue *vq)
|
||||||
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
||||||
struct request *req)
|
struct request *req)
|
||||||
{
|
{
|
||||||
unsigned long num, out = 0, in = 0;
|
unsigned int num;
|
||||||
struct virtblk_req *vbr;
|
struct virtblk_req *vbr;
|
||||||
|
|
||||||
vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
|
vbr = virtblk_alloc_req(vblk, GFP_ATOMIC);
|
||||||
|
@ -335,40 +342,15 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
sg_set_buf(&vblk->sg[out++], &vbr->out_hdr, sizeof(vbr->out_hdr));
|
num = blk_rq_map_sg(q, vbr->req, vblk->sg);
|
||||||
|
|
||||||
/*
|
|
||||||
* If this is a packet command we need a couple of additional headers.
|
|
||||||
* Behind the normal outhdr we put a segment with the scsi command
|
|
||||||
* block, and before the normal inhdr we put the sense data and the
|
|
||||||
* inhdr with additional status information before the normal inhdr.
|
|
||||||
*/
|
|
||||||
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC)
|
|
||||||
sg_set_buf(&vblk->sg[out++], vbr->req->cmd, vbr->req->cmd_len);
|
|
||||||
|
|
||||||
num = blk_rq_map_sg(q, vbr->req, vblk->sg + out);
|
|
||||||
|
|
||||||
if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) {
|
|
||||||
sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE);
|
|
||||||
sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr,
|
|
||||||
sizeof(vbr->in_hdr));
|
|
||||||
}
|
|
||||||
|
|
||||||
sg_set_buf(&vblk->sg[num + out + in++], &vbr->status,
|
|
||||||
sizeof(vbr->status));
|
|
||||||
|
|
||||||
if (num) {
|
if (num) {
|
||||||
if (rq_data_dir(vbr->req) == WRITE) {
|
if (rq_data_dir(vbr->req) == WRITE)
|
||||||
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
|
vbr->out_hdr.type |= VIRTIO_BLK_T_OUT;
|
||||||
out += num;
|
else
|
||||||
} else {
|
|
||||||
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
|
vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
|
||||||
in += num;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (virtqueue_add_buf(vblk->vq, vblk->sg, out, in, vbr,
|
if (__virtblk_add_req(vblk->vq, vbr, vblk->sg, num) < 0) {
|
||||||
GFP_ATOMIC) < 0) {
|
|
||||||
mempool_free(vbr, vblk->pool);
|
mempool_free(vbr, vblk->pool);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -539,6 +521,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
|
||||||
struct virtio_device *vdev = vblk->vdev;
|
struct virtio_device *vdev = vblk->vdev;
|
||||||
struct request_queue *q = vblk->disk->queue;
|
struct request_queue *q = vblk->disk->queue;
|
||||||
char cap_str_2[10], cap_str_10[10];
|
char cap_str_2[10], cap_str_10[10];
|
||||||
|
char *envp[] = { "RESIZE=1", NULL };
|
||||||
u64 capacity, size;
|
u64 capacity, size;
|
||||||
|
|
||||||
mutex_lock(&vblk->config_lock);
|
mutex_lock(&vblk->config_lock);
|
||||||
|
@ -568,6 +551,7 @@ static void virtblk_config_changed_work(struct work_struct *work)
|
||||||
|
|
||||||
set_capacity(vblk->disk, capacity);
|
set_capacity(vblk->disk, capacity);
|
||||||
revalidate_disk(vblk->disk);
|
revalidate_disk(vblk->disk);
|
||||||
|
kobject_uevent_env(&disk_to_dev(vblk->disk)->kobj, KOBJ_CHANGE, envp);
|
||||||
done:
|
done:
|
||||||
mutex_unlock(&vblk->config_lock);
|
mutex_unlock(&vblk->config_lock);
|
||||||
}
|
}
|
||||||
|
|
|
@ -47,7 +47,7 @@ static void register_buffer(u8 *buf, size_t size)
|
||||||
sg_init_one(&sg, buf, size);
|
sg_init_one(&sg, buf, size);
|
||||||
|
|
||||||
/* There should always be room for one buffer. */
|
/* There should always be room for one buffer. */
|
||||||
if (virtqueue_add_buf(vq, &sg, 0, 1, buf, GFP_KERNEL) < 0)
|
if (virtqueue_add_inbuf(vq, &sg, 1, buf, GFP_KERNEL) < 0)
|
||||||
BUG();
|
BUG();
|
||||||
|
|
||||||
virtqueue_kick(vq);
|
virtqueue_kick(vq);
|
||||||
|
|
|
@ -78,8 +78,8 @@ struct ports_driver_data {
|
||||||
};
|
};
|
||||||
static struct ports_driver_data pdrvdata;
|
static struct ports_driver_data pdrvdata;
|
||||||
|
|
||||||
DEFINE_SPINLOCK(pdrvdata_lock);
|
static DEFINE_SPINLOCK(pdrvdata_lock);
|
||||||
DECLARE_COMPLETION(early_console_added);
|
static DECLARE_COMPLETION(early_console_added);
|
||||||
|
|
||||||
/* This struct holds information that's relevant only for console ports */
|
/* This struct holds information that's relevant only for console ports */
|
||||||
struct console {
|
struct console {
|
||||||
|
@ -503,7 +503,7 @@ static int add_inbuf(struct virtqueue *vq, struct port_buffer *buf)
|
||||||
|
|
||||||
sg_init_one(sg, buf->buf, buf->size);
|
sg_init_one(sg, buf->buf, buf->size);
|
||||||
|
|
||||||
ret = virtqueue_add_buf(vq, sg, 0, 1, buf, GFP_ATOMIC);
|
ret = virtqueue_add_inbuf(vq, sg, 1, buf, GFP_ATOMIC);
|
||||||
virtqueue_kick(vq);
|
virtqueue_kick(vq);
|
||||||
if (!ret)
|
if (!ret)
|
||||||
ret = vq->num_free;
|
ret = vq->num_free;
|
||||||
|
@ -572,7 +572,7 @@ static ssize_t __send_control_msg(struct ports_device *portdev, u32 port_id,
|
||||||
sg_init_one(sg, &cpkt, sizeof(cpkt));
|
sg_init_one(sg, &cpkt, sizeof(cpkt));
|
||||||
|
|
||||||
spin_lock(&portdev->c_ovq_lock);
|
spin_lock(&portdev->c_ovq_lock);
|
||||||
if (virtqueue_add_buf(vq, sg, 1, 0, &cpkt, GFP_ATOMIC) == 0) {
|
if (virtqueue_add_outbuf(vq, sg, 1, &cpkt, GFP_ATOMIC) == 0) {
|
||||||
virtqueue_kick(vq);
|
virtqueue_kick(vq);
|
||||||
while (!virtqueue_get_buf(vq, &len))
|
while (!virtqueue_get_buf(vq, &len))
|
||||||
cpu_relax();
|
cpu_relax();
|
||||||
|
@ -622,7 +622,7 @@ static ssize_t __send_to_port(struct port *port, struct scatterlist *sg,
|
||||||
|
|
||||||
reclaim_consumed_buffers(port);
|
reclaim_consumed_buffers(port);
|
||||||
|
|
||||||
err = virtqueue_add_buf(out_vq, sg, nents, 0, data, GFP_ATOMIC);
|
err = virtqueue_add_outbuf(out_vq, sg, nents, data, GFP_ATOMIC);
|
||||||
|
|
||||||
/* Tell Host to go! */
|
/* Tell Host to go! */
|
||||||
virtqueue_kick(out_vq);
|
virtqueue_kick(out_vq);
|
||||||
|
@ -1040,7 +1040,7 @@ static int port_fops_open(struct inode *inode, struct file *filp)
|
||||||
spin_lock_irq(&port->inbuf_lock);
|
spin_lock_irq(&port->inbuf_lock);
|
||||||
if (port->guest_connected) {
|
if (port->guest_connected) {
|
||||||
spin_unlock_irq(&port->inbuf_lock);
|
spin_unlock_irq(&port->inbuf_lock);
|
||||||
ret = -EMFILE;
|
ret = -EBUSY;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1202,7 +1202,7 @@ int __init virtio_cons_early_init(int (*put_chars)(u32, const char *, int))
|
||||||
return hvc_instantiate(0, 0, &hv_ops);
|
return hvc_instantiate(0, 0, &hv_ops);
|
||||||
}
|
}
|
||||||
|
|
||||||
int init_port_console(struct port *port)
|
static int init_port_console(struct port *port)
|
||||||
{
|
{
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
|
|
@ -5,10 +5,9 @@ config LGUEST
|
||||||
---help---
|
---help---
|
||||||
This is a very simple module which allows you to run
|
This is a very simple module which allows you to run
|
||||||
multiple instances of the same Linux kernel, using the
|
multiple instances of the same Linux kernel, using the
|
||||||
"lguest" command found in the Documentation/virtual/lguest
|
"lguest" command found in the tools/lguest directory.
|
||||||
directory.
|
|
||||||
|
|
||||||
Note that "lguest" is pronounced to rhyme with "fell quest",
|
Note that "lguest" is pronounced to rhyme with "fell quest",
|
||||||
not "rustyvisor". See Documentation/virtual/lguest/lguest.txt.
|
not "rustyvisor". See tools/lguest/lguest.txt.
|
||||||
|
|
||||||
If unsure, say N. If curious, say M. If masochistic, say Y.
|
If unsure, say N. If curious, say M. If masochistic, say Y.
|
||||||
|
|
|
@ -20,9 +20,9 @@
|
||||||
#include <asm/asm-offsets.h>
|
#include <asm/asm-offsets.h>
|
||||||
#include "lg.h"
|
#include "lg.h"
|
||||||
|
|
||||||
|
unsigned long switcher_addr;
|
||||||
|
struct page **lg_switcher_pages;
|
||||||
static struct vm_struct *switcher_vma;
|
static struct vm_struct *switcher_vma;
|
||||||
static struct page **switcher_page;
|
|
||||||
|
|
||||||
/* This One Big lock protects all inter-guest data structures. */
|
/* This One Big lock protects all inter-guest data structures. */
|
||||||
DEFINE_MUTEX(lguest_lock);
|
DEFINE_MUTEX(lguest_lock);
|
||||||
|
@ -52,13 +52,21 @@ static __init int map_switcher(void)
|
||||||
* easy.
|
* easy.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
/* We assume Switcher text fits into a single page. */
|
||||||
|
if (end_switcher_text - start_switcher_text > PAGE_SIZE) {
|
||||||
|
printk(KERN_ERR "lguest: switcher text too large (%zu)\n",
|
||||||
|
end_switcher_text - start_switcher_text);
|
||||||
|
return -EINVAL;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We allocate an array of struct page pointers. map_vm_area() wants
|
* We allocate an array of struct page pointers. map_vm_area() wants
|
||||||
* this, rather than just an array of pages.
|
* this, rather than just an array of pages.
|
||||||
*/
|
*/
|
||||||
switcher_page = kmalloc(sizeof(switcher_page[0])*TOTAL_SWITCHER_PAGES,
|
lg_switcher_pages = kmalloc(sizeof(lg_switcher_pages[0])
|
||||||
GFP_KERNEL);
|
* TOTAL_SWITCHER_PAGES,
|
||||||
if (!switcher_page) {
|
GFP_KERNEL);
|
||||||
|
if (!lg_switcher_pages) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
@ -68,32 +76,29 @@ static __init int map_switcher(void)
|
||||||
* so we make sure they're zeroed.
|
* so we make sure they're zeroed.
|
||||||
*/
|
*/
|
||||||
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
|
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
|
||||||
switcher_page[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
|
lg_switcher_pages[i] = alloc_page(GFP_KERNEL|__GFP_ZERO);
|
||||||
if (!switcher_page[i]) {
|
if (!lg_switcher_pages[i]) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
goto free_some_pages;
|
goto free_some_pages;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* First we check that the Switcher won't overlap the fixmap area at
|
* We place the Switcher underneath the fixmap area, which is the
|
||||||
* the top of memory. It's currently nowhere near, but it could have
|
* highest virtual address we can get. This is important, since we
|
||||||
* very strange effects if it ever happened.
|
* tell the Guest it can't access this memory, so we want its ceiling
|
||||||
|
* as high as possible.
|
||||||
*/
|
*/
|
||||||
if (SWITCHER_ADDR + (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE > FIXADDR_START){
|
switcher_addr = FIXADDR_START - (TOTAL_SWITCHER_PAGES+1)*PAGE_SIZE;
|
||||||
err = -ENOMEM;
|
|
||||||
printk("lguest: mapping switcher would thwack fixmap\n");
|
|
||||||
goto free_pages;
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Now we reserve the "virtual memory area" we want: 0xFFC00000
|
* Now we reserve the "virtual memory area" we want. We might
|
||||||
* (SWITCHER_ADDR). We might not get it in theory, but in practice
|
* not get it in theory, but in practice it's worked so far.
|
||||||
* it's worked so far. The end address needs +1 because __get_vm_area
|
* The end address needs +1 because __get_vm_area allocates an
|
||||||
* allocates an extra guard page, so we need space for that.
|
* extra guard page, so we need space for that.
|
||||||
*/
|
*/
|
||||||
switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
|
switcher_vma = __get_vm_area(TOTAL_SWITCHER_PAGES * PAGE_SIZE,
|
||||||
VM_ALLOC, SWITCHER_ADDR, SWITCHER_ADDR
|
VM_ALLOC, switcher_addr, switcher_addr
|
||||||
+ (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
|
+ (TOTAL_SWITCHER_PAGES+1) * PAGE_SIZE);
|
||||||
if (!switcher_vma) {
|
if (!switcher_vma) {
|
||||||
err = -ENOMEM;
|
err = -ENOMEM;
|
||||||
|
@ -103,12 +108,12 @@ static __init int map_switcher(void)
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This code actually sets up the pages we've allocated to appear at
|
* This code actually sets up the pages we've allocated to appear at
|
||||||
* SWITCHER_ADDR. map_vm_area() takes the vma we allocated above, the
|
* switcher_addr. map_vm_area() takes the vma we allocated above, the
|
||||||
* kind of pages we're mapping (kernel pages), and a pointer to our
|
* kind of pages we're mapping (kernel pages), and a pointer to our
|
||||||
* array of struct pages. It increments that pointer, but we don't
|
* array of struct pages. It increments that pointer, but we don't
|
||||||
* care.
|
* care.
|
||||||
*/
|
*/
|
||||||
pagep = switcher_page;
|
pagep = lg_switcher_pages;
|
||||||
err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
|
err = map_vm_area(switcher_vma, PAGE_KERNEL_EXEC, &pagep);
|
||||||
if (err) {
|
if (err) {
|
||||||
printk("lguest: map_vm_area failed: %i\n", err);
|
printk("lguest: map_vm_area failed: %i\n", err);
|
||||||
|
@ -133,8 +138,8 @@ free_pages:
|
||||||
i = TOTAL_SWITCHER_PAGES;
|
i = TOTAL_SWITCHER_PAGES;
|
||||||
free_some_pages:
|
free_some_pages:
|
||||||
for (--i; i >= 0; i--)
|
for (--i; i >= 0; i--)
|
||||||
__free_pages(switcher_page[i], 0);
|
__free_pages(lg_switcher_pages[i], 0);
|
||||||
kfree(switcher_page);
|
kfree(lg_switcher_pages);
|
||||||
out:
|
out:
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -149,8 +154,8 @@ static void unmap_switcher(void)
|
||||||
vunmap(switcher_vma->addr);
|
vunmap(switcher_vma->addr);
|
||||||
/* Now we just need to free the pages we copied the switcher into */
|
/* Now we just need to free the pages we copied the switcher into */
|
||||||
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++)
|
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++)
|
||||||
__free_pages(switcher_page[i], 0);
|
__free_pages(lg_switcher_pages[i], 0);
|
||||||
kfree(switcher_page);
|
kfree(lg_switcher_pages);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*H:032
|
/*H:032
|
||||||
|
@ -323,15 +328,10 @@ static int __init init(void)
|
||||||
if (err)
|
if (err)
|
||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
/* Now we set up the pagetable implementation for the Guests. */
|
|
||||||
err = init_pagetables(switcher_page, SHARED_SWITCHER_PAGES);
|
|
||||||
if (err)
|
|
||||||
goto unmap;
|
|
||||||
|
|
||||||
/* We might need to reserve an interrupt vector. */
|
/* We might need to reserve an interrupt vector. */
|
||||||
err = init_interrupts();
|
err = init_interrupts();
|
||||||
if (err)
|
if (err)
|
||||||
goto free_pgtables;
|
goto unmap;
|
||||||
|
|
||||||
/* /dev/lguest needs to be registered. */
|
/* /dev/lguest needs to be registered. */
|
||||||
err = lguest_device_init();
|
err = lguest_device_init();
|
||||||
|
@ -346,8 +346,6 @@ static int __init init(void)
|
||||||
|
|
||||||
free_interrupts:
|
free_interrupts:
|
||||||
free_interrupts();
|
free_interrupts();
|
||||||
free_pgtables:
|
|
||||||
free_pagetables();
|
|
||||||
unmap:
|
unmap:
|
||||||
unmap_switcher();
|
unmap_switcher();
|
||||||
out:
|
out:
|
||||||
|
@ -359,7 +357,6 @@ static void __exit fini(void)
|
||||||
{
|
{
|
||||||
lguest_device_remove();
|
lguest_device_remove();
|
||||||
free_interrupts();
|
free_interrupts();
|
||||||
free_pagetables();
|
|
||||||
unmap_switcher();
|
unmap_switcher();
|
||||||
|
|
||||||
lguest_arch_host_fini();
|
lguest_arch_host_fini();
|
||||||
|
|
|
@ -14,11 +14,10 @@
|
||||||
|
|
||||||
#include <asm/lguest.h>
|
#include <asm/lguest.h>
|
||||||
|
|
||||||
void free_pagetables(void);
|
|
||||||
int init_pagetables(struct page **switcher_page, unsigned int pages);
|
|
||||||
|
|
||||||
struct pgdir {
|
struct pgdir {
|
||||||
unsigned long gpgdir;
|
unsigned long gpgdir;
|
||||||
|
bool switcher_mapped;
|
||||||
|
int last_host_cpu;
|
||||||
pgd_t *pgdir;
|
pgd_t *pgdir;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -124,6 +123,7 @@ bool lguest_address_ok(const struct lguest *lg,
|
||||||
unsigned long addr, unsigned long len);
|
unsigned long addr, unsigned long len);
|
||||||
void __lgread(struct lg_cpu *, void *, unsigned long, unsigned);
|
void __lgread(struct lg_cpu *, void *, unsigned long, unsigned);
|
||||||
void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned);
|
void __lgwrite(struct lg_cpu *, unsigned long, const void *, unsigned);
|
||||||
|
extern struct page **lg_switcher_pages;
|
||||||
|
|
||||||
/*H:035
|
/*H:035
|
||||||
* Using memory-copy operations like that is usually inconvient, so we
|
* Using memory-copy operations like that is usually inconvient, so we
|
||||||
|
|
|
@ -250,13 +250,13 @@ static ssize_t read(struct file *file, char __user *user, size_t size,loff_t*o)
|
||||||
*/
|
*/
|
||||||
static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
|
static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
|
||||||
{
|
{
|
||||||
/* We have a limited number the number of CPUs in the lguest struct. */
|
/* We have a limited number of CPUs in the lguest struct. */
|
||||||
if (id >= ARRAY_SIZE(cpu->lg->cpus))
|
if (id >= ARRAY_SIZE(cpu->lg->cpus))
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
|
||||||
/* Set up this CPU's id, and pointer back to the lguest struct. */
|
/* Set up this CPU's id, and pointer back to the lguest struct. */
|
||||||
cpu->id = id;
|
cpu->id = id;
|
||||||
cpu->lg = container_of((cpu - id), struct lguest, cpus[0]);
|
cpu->lg = container_of(cpu, struct lguest, cpus[id]);
|
||||||
cpu->lg->nr_cpus++;
|
cpu->lg->nr_cpus++;
|
||||||
|
|
||||||
/* Each CPU has a timer it can set. */
|
/* Each CPU has a timer it can set. */
|
||||||
|
@ -270,7 +270,7 @@ static int lg_cpu_start(struct lg_cpu *cpu, unsigned id, unsigned long start_ip)
|
||||||
if (!cpu->regs_page)
|
if (!cpu->regs_page)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
/* We actually put the registers at the bottom of the page. */
|
/* We actually put the registers at the end of the page. */
|
||||||
cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs);
|
cpu->regs = (void *)cpu->regs_page + PAGE_SIZE - sizeof(*cpu->regs);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -7,7 +7,7 @@
|
||||||
* converted Guest pages when running the Guest.
|
* converted Guest pages when running the Guest.
|
||||||
:*/
|
:*/
|
||||||
|
|
||||||
/* Copyright (C) Rusty Russell IBM Corporation 2006.
|
/* Copyright (C) Rusty Russell IBM Corporation 2013.
|
||||||
* GPL v2 and any later version */
|
* GPL v2 and any later version */
|
||||||
#include <linux/mm.h>
|
#include <linux/mm.h>
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
|
@ -62,22 +62,11 @@
|
||||||
* will need the last pmd entry of the last pmd page.
|
* will need the last pmd entry of the last pmd page.
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_X86_PAE
|
#ifdef CONFIG_X86_PAE
|
||||||
#define SWITCHER_PMD_INDEX (PTRS_PER_PMD - 1)
|
|
||||||
#define RESERVE_MEM 2U
|
|
||||||
#define CHECK_GPGD_MASK _PAGE_PRESENT
|
#define CHECK_GPGD_MASK _PAGE_PRESENT
|
||||||
#else
|
#else
|
||||||
#define RESERVE_MEM 4U
|
|
||||||
#define CHECK_GPGD_MASK _PAGE_TABLE
|
#define CHECK_GPGD_MASK _PAGE_TABLE
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* We actually need a separate PTE page for each CPU. Remember that after the
|
|
||||||
* Switcher code itself comes two pages for each CPU, and we don't want this
|
|
||||||
* CPU's guest to see the pages of any other CPU.
|
|
||||||
*/
|
|
||||||
static DEFINE_PER_CPU(pte_t *, switcher_pte_pages);
|
|
||||||
#define switcher_pte_page(cpu) per_cpu(switcher_pte_pages, cpu)
|
|
||||||
|
|
||||||
/*H:320
|
/*H:320
|
||||||
* The page table code is curly enough to need helper functions to keep it
|
* The page table code is curly enough to need helper functions to keep it
|
||||||
* clear and clean. The kernel itself provides many of them; one advantage
|
* clear and clean. The kernel itself provides many of them; one advantage
|
||||||
|
@ -95,13 +84,6 @@ static pgd_t *spgd_addr(struct lg_cpu *cpu, u32 i, unsigned long vaddr)
|
||||||
{
|
{
|
||||||
unsigned int index = pgd_index(vaddr);
|
unsigned int index = pgd_index(vaddr);
|
||||||
|
|
||||||
#ifndef CONFIG_X86_PAE
|
|
||||||
/* We kill any Guest trying to touch the Switcher addresses. */
|
|
||||||
if (index >= SWITCHER_PGD_INDEX) {
|
|
||||||
kill_guest(cpu, "attempt to access switcher pages");
|
|
||||||
index = 0;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
/* Return a pointer index'th pgd entry for the i'th page table. */
|
/* Return a pointer index'th pgd entry for the i'th page table. */
|
||||||
return &cpu->lg->pgdirs[i].pgdir[index];
|
return &cpu->lg->pgdirs[i].pgdir[index];
|
||||||
}
|
}
|
||||||
|
@ -117,13 +99,6 @@ static pmd_t *spmd_addr(struct lg_cpu *cpu, pgd_t spgd, unsigned long vaddr)
|
||||||
unsigned int index = pmd_index(vaddr);
|
unsigned int index = pmd_index(vaddr);
|
||||||
pmd_t *page;
|
pmd_t *page;
|
||||||
|
|
||||||
/* We kill any Guest trying to touch the Switcher addresses. */
|
|
||||||
if (pgd_index(vaddr) == SWITCHER_PGD_INDEX &&
|
|
||||||
index >= SWITCHER_PMD_INDEX) {
|
|
||||||
kill_guest(cpu, "attempt to access switcher pages");
|
|
||||||
index = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* You should never call this if the PGD entry wasn't valid */
|
/* You should never call this if the PGD entry wasn't valid */
|
||||||
BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
|
BUG_ON(!(pgd_flags(spgd) & _PAGE_PRESENT));
|
||||||
page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
|
page = __va(pgd_pfn(spgd) << PAGE_SHIFT);
|
||||||
|
@ -275,29 +250,120 @@ static void release_pte(pte_t pte)
|
||||||
}
|
}
|
||||||
/*:*/
|
/*:*/
|
||||||
|
|
||||||
static void check_gpte(struct lg_cpu *cpu, pte_t gpte)
|
static bool check_gpte(struct lg_cpu *cpu, pte_t gpte)
|
||||||
{
|
{
|
||||||
if ((pte_flags(gpte) & _PAGE_PSE) ||
|
if ((pte_flags(gpte) & _PAGE_PSE) ||
|
||||||
pte_pfn(gpte) >= cpu->lg->pfn_limit)
|
pte_pfn(gpte) >= cpu->lg->pfn_limit) {
|
||||||
kill_guest(cpu, "bad page table entry");
|
kill_guest(cpu, "bad page table entry");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
|
static bool check_gpgd(struct lg_cpu *cpu, pgd_t gpgd)
|
||||||
{
|
{
|
||||||
if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) ||
|
if ((pgd_flags(gpgd) & ~CHECK_GPGD_MASK) ||
|
||||||
(pgd_pfn(gpgd) >= cpu->lg->pfn_limit))
|
(pgd_pfn(gpgd) >= cpu->lg->pfn_limit)) {
|
||||||
kill_guest(cpu, "bad page directory entry");
|
kill_guest(cpu, "bad page directory entry");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
#ifdef CONFIG_X86_PAE
|
||||||
static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
|
static bool check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
|
||||||
{
|
{
|
||||||
if ((pmd_flags(gpmd) & ~_PAGE_TABLE) ||
|
if ((pmd_flags(gpmd) & ~_PAGE_TABLE) ||
|
||||||
(pmd_pfn(gpmd) >= cpu->lg->pfn_limit))
|
(pmd_pfn(gpmd) >= cpu->lg->pfn_limit)) {
|
||||||
kill_guest(cpu, "bad page middle directory entry");
|
kill_guest(cpu, "bad page middle directory entry");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/*H:331
|
||||||
|
* This is the core routine to walk the shadow page tables and find the page
|
||||||
|
* table entry for a specific address.
|
||||||
|
*
|
||||||
|
* If allocate is set, then we allocate any missing levels, setting the flags
|
||||||
|
* on the new page directory and mid-level directories using the arguments
|
||||||
|
* (which are copied from the Guest's page table entries).
|
||||||
|
*/
|
||||||
|
static pte_t *find_spte(struct lg_cpu *cpu, unsigned long vaddr, bool allocate,
|
||||||
|
int pgd_flags, int pmd_flags)
|
||||||
|
{
|
||||||
|
pgd_t *spgd;
|
||||||
|
/* Mid level for PAE. */
|
||||||
|
#ifdef CONFIG_X86_PAE
|
||||||
|
pmd_t *spmd;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Get top level entry. */
|
||||||
|
spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
|
||||||
|
if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
|
||||||
|
/* No shadow entry: allocate a new shadow PTE page. */
|
||||||
|
unsigned long ptepage;
|
||||||
|
|
||||||
|
/* If they didn't want us to allocate anything, stop. */
|
||||||
|
if (!allocate)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
ptepage = get_zeroed_page(GFP_KERNEL);
|
||||||
|
/*
|
||||||
|
* This is not really the Guest's fault, but killing it is
|
||||||
|
* simple for this corner case.
|
||||||
|
*/
|
||||||
|
if (!ptepage) {
|
||||||
|
kill_guest(cpu, "out of memory allocating pte page");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
/*
|
||||||
|
* And we copy the flags to the shadow PGD entry. The page
|
||||||
|
* number in the shadow PGD is the page we just allocated.
|
||||||
|
*/
|
||||||
|
set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags));
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Intel's Physical Address Extension actually uses three levels of
|
||||||
|
* page tables, so we need to look in the mid-level.
|
||||||
|
*/
|
||||||
|
#ifdef CONFIG_X86_PAE
|
||||||
|
/* Now look at the mid-level shadow entry. */
|
||||||
|
spmd = spmd_addr(cpu, *spgd, vaddr);
|
||||||
|
|
||||||
|
if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
|
||||||
|
/* No shadow entry: allocate a new shadow PTE page. */
|
||||||
|
unsigned long ptepage;
|
||||||
|
|
||||||
|
/* If they didn't want us to allocate anything, stop. */
|
||||||
|
if (!allocate)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
ptepage = get_zeroed_page(GFP_KERNEL);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This is not really the Guest's fault, but killing it is
|
||||||
|
* simple for this corner case.
|
||||||
|
*/
|
||||||
|
if (!ptepage) {
|
||||||
|
kill_guest(cpu, "out of memory allocating pmd page");
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* And we copy the flags to the shadow PMD entry. The page
|
||||||
|
* number in the shadow PMD is the page we just allocated.
|
||||||
|
*/
|
||||||
|
set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Get the pointer to the shadow PTE entry we're going to set. */
|
||||||
|
return spte_addr(cpu, *spgd, vaddr);
|
||||||
|
}
|
||||||
|
|
||||||
/*H:330
|
/*H:330
|
||||||
* (i) Looking up a page table entry when the Guest faults.
|
* (i) Looking up a page table entry when the Guest faults.
|
||||||
*
|
*
|
||||||
|
@ -311,17 +377,15 @@ static void check_gpmd(struct lg_cpu *cpu, pmd_t gpmd)
|
||||||
*/
|
*/
|
||||||
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
||||||
{
|
{
|
||||||
pgd_t gpgd;
|
|
||||||
pgd_t *spgd;
|
|
||||||
unsigned long gpte_ptr;
|
unsigned long gpte_ptr;
|
||||||
pte_t gpte;
|
pte_t gpte;
|
||||||
pte_t *spte;
|
pte_t *spte;
|
||||||
|
|
||||||
/* Mid level for PAE. */
|
|
||||||
#ifdef CONFIG_X86_PAE
|
|
||||||
pmd_t *spmd;
|
|
||||||
pmd_t gpmd;
|
pmd_t gpmd;
|
||||||
#endif
|
pgd_t gpgd;
|
||||||
|
|
||||||
|
/* We never demand page the Switcher, so trying is a mistake. */
|
||||||
|
if (vaddr >= switcher_addr)
|
||||||
|
return false;
|
||||||
|
|
||||||
/* First step: get the top-level Guest page table entry. */
|
/* First step: get the top-level Guest page table entry. */
|
||||||
if (unlikely(cpu->linear_pages)) {
|
if (unlikely(cpu->linear_pages)) {
|
||||||
|
@ -332,65 +396,31 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
||||||
/* Toplevel not present? We can't map it in. */
|
/* Toplevel not present? We can't map it in. */
|
||||||
if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
|
if (!(pgd_flags(gpgd) & _PAGE_PRESENT))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This kills the Guest if it has weird flags or tries to
|
||||||
|
* refer to a "physical" address outside the bounds.
|
||||||
|
*/
|
||||||
|
if (!check_gpgd(cpu, gpgd))
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Now look at the matching shadow entry. */
|
/* This "mid-level" entry is only used for non-linear, PAE mode. */
|
||||||
spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
|
gpmd = __pmd(_PAGE_TABLE);
|
||||||
if (!(pgd_flags(*spgd) & _PAGE_PRESENT)) {
|
|
||||||
/* No shadow entry: allocate a new shadow PTE page. */
|
|
||||||
unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
|
|
||||||
/*
|
|
||||||
* This is not really the Guest's fault, but killing it is
|
|
||||||
* simple for this corner case.
|
|
||||||
*/
|
|
||||||
if (!ptepage) {
|
|
||||||
kill_guest(cpu, "out of memory allocating pte page");
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
/* We check that the Guest pgd is OK. */
|
|
||||||
check_gpgd(cpu, gpgd);
|
|
||||||
/*
|
|
||||||
* And we copy the flags to the shadow PGD entry. The page
|
|
||||||
* number in the shadow PGD is the page we just allocated.
|
|
||||||
*/
|
|
||||||
set_pgd(spgd, __pgd(__pa(ptepage) | pgd_flags(gpgd)));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
#ifdef CONFIG_X86_PAE
|
||||||
if (unlikely(cpu->linear_pages)) {
|
if (likely(!cpu->linear_pages)) {
|
||||||
/* Faking up a linear mapping. */
|
|
||||||
gpmd = __pmd(_PAGE_TABLE);
|
|
||||||
} else {
|
|
||||||
gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
|
gpmd = lgread(cpu, gpmd_addr(gpgd, vaddr), pmd_t);
|
||||||
/* Middle level not present? We can't map it in. */
|
/* Middle level not present? We can't map it in. */
|
||||||
if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
|
if (!(pmd_flags(gpmd) & _PAGE_PRESENT))
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
/* Now look at the matching shadow entry. */
|
/*
|
||||||
spmd = spmd_addr(cpu, *spgd, vaddr);
|
* This kills the Guest if it has weird flags or tries to
|
||||||
|
* refer to a "physical" address outside the bounds.
|
||||||
if (!(pmd_flags(*spmd) & _PAGE_PRESENT)) {
|
|
||||||
/* No shadow entry: allocate a new shadow PTE page. */
|
|
||||||
unsigned long ptepage = get_zeroed_page(GFP_KERNEL);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This is not really the Guest's fault, but killing it is
|
|
||||||
* simple for this corner case.
|
|
||||||
*/
|
*/
|
||||||
if (!ptepage) {
|
if (!check_gpmd(cpu, gpmd))
|
||||||
kill_guest(cpu, "out of memory allocating pte page");
|
|
||||||
return false;
|
return false;
|
||||||
}
|
|
||||||
|
|
||||||
/* We check that the Guest pmd is OK. */
|
|
||||||
check_gpmd(cpu, gpmd);
|
|
||||||
|
|
||||||
/*
|
|
||||||
* And we copy the flags to the shadow PMD entry. The page
|
|
||||||
* number in the shadow PMD is the page we just allocated.
|
|
||||||
*/
|
|
||||||
set_pmd(spmd, __pmd(__pa(ptepage) | pmd_flags(gpmd)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -433,7 +463,8 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
||||||
* Check that the Guest PTE flags are OK, and the page number is below
|
* Check that the Guest PTE flags are OK, and the page number is below
|
||||||
* the pfn_limit (ie. not mapping the Launcher binary).
|
* the pfn_limit (ie. not mapping the Launcher binary).
|
||||||
*/
|
*/
|
||||||
check_gpte(cpu, gpte);
|
if (!check_gpte(cpu, gpte))
|
||||||
|
return false;
|
||||||
|
|
||||||
/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
|
/* Add the _PAGE_ACCESSED and (for a write) _PAGE_DIRTY flag */
|
||||||
gpte = pte_mkyoung(gpte);
|
gpte = pte_mkyoung(gpte);
|
||||||
|
@ -441,7 +472,9 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
||||||
gpte = pte_mkdirty(gpte);
|
gpte = pte_mkdirty(gpte);
|
||||||
|
|
||||||
/* Get the pointer to the shadow PTE entry we're going to set. */
|
/* Get the pointer to the shadow PTE entry we're going to set. */
|
||||||
spte = spte_addr(cpu, *spgd, vaddr);
|
spte = find_spte(cpu, vaddr, true, pgd_flags(gpgd), pmd_flags(gpmd));
|
||||||
|
if (!spte)
|
||||||
|
return false;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If there was a valid shadow PTE entry here before, we release it.
|
* If there was a valid shadow PTE entry here before, we release it.
|
||||||
|
@ -493,29 +526,23 @@ bool demand_page(struct lg_cpu *cpu, unsigned long vaddr, int errcode)
|
||||||
*/
|
*/
|
||||||
static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
|
static bool page_writable(struct lg_cpu *cpu, unsigned long vaddr)
|
||||||
{
|
{
|
||||||
pgd_t *spgd;
|
pte_t *spte;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
/* You can't put your stack in the Switcher! */
|
||||||
pmd_t *spmd;
|
if (vaddr >= switcher_addr)
|
||||||
#endif
|
|
||||||
/* Look at the current top level entry: is it present? */
|
|
||||||
spgd = spgd_addr(cpu, cpu->cpu_pgd, vaddr);
|
|
||||||
if (!(pgd_flags(*spgd) & _PAGE_PRESENT))
|
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
/* If there's no shadow PTE, it's not writable. */
|
||||||
spmd = spmd_addr(cpu, *spgd, vaddr);
|
spte = find_spte(cpu, vaddr, false, 0, 0);
|
||||||
if (!(pmd_flags(*spmd) & _PAGE_PRESENT))
|
if (!spte)
|
||||||
return false;
|
return false;
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check the flags on the pte entry itself: it must be present and
|
* Check the flags on the pte entry itself: it must be present and
|
||||||
* writable.
|
* writable.
|
||||||
*/
|
*/
|
||||||
flags = pte_flags(*(spte_addr(cpu, *spgd, vaddr)));
|
flags = pte_flags(*spte);
|
||||||
|
|
||||||
return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
|
return (flags & (_PAGE_PRESENT|_PAGE_RW)) == (_PAGE_PRESENT|_PAGE_RW);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -678,9 +705,6 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
|
||||||
int *blank_pgdir)
|
int *blank_pgdir)
|
||||||
{
|
{
|
||||||
unsigned int next;
|
unsigned int next;
|
||||||
#ifdef CONFIG_X86_PAE
|
|
||||||
pmd_t *pmd_table;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We pick one entry at random to throw out. Choosing the Least
|
* We pick one entry at random to throw out. Choosing the Least
|
||||||
|
@ -695,29 +719,11 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
|
||||||
if (!cpu->lg->pgdirs[next].pgdir)
|
if (!cpu->lg->pgdirs[next].pgdir)
|
||||||
next = cpu->cpu_pgd;
|
next = cpu->cpu_pgd;
|
||||||
else {
|
else {
|
||||||
#ifdef CONFIG_X86_PAE
|
|
||||||
/*
|
/*
|
||||||
* In PAE mode, allocate a pmd page and populate the
|
* This is a blank page, so there are no kernel
|
||||||
* last pgd entry.
|
* mappings: caller must map the stack!
|
||||||
*/
|
*/
|
||||||
pmd_table = (pmd_t *)get_zeroed_page(GFP_KERNEL);
|
|
||||||
if (!pmd_table) {
|
|
||||||
free_page((long)cpu->lg->pgdirs[next].pgdir);
|
|
||||||
set_pgd(cpu->lg->pgdirs[next].pgdir, __pgd(0));
|
|
||||||
next = cpu->cpu_pgd;
|
|
||||||
} else {
|
|
||||||
set_pgd(cpu->lg->pgdirs[next].pgdir +
|
|
||||||
SWITCHER_PGD_INDEX,
|
|
||||||
__pgd(__pa(pmd_table) | _PAGE_PRESENT));
|
|
||||||
/*
|
|
||||||
* This is a blank page, so there are no kernel
|
|
||||||
* mappings: caller must map the stack!
|
|
||||||
*/
|
|
||||||
*blank_pgdir = 1;
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
*blank_pgdir = 1;
|
*blank_pgdir = 1;
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
/* Record which Guest toplevel this shadows. */
|
/* Record which Guest toplevel this shadows. */
|
||||||
|
@ -725,9 +731,50 @@ static unsigned int new_pgdir(struct lg_cpu *cpu,
|
||||||
/* Release all the non-kernel mappings. */
|
/* Release all the non-kernel mappings. */
|
||||||
flush_user_mappings(cpu->lg, next);
|
flush_user_mappings(cpu->lg, next);
|
||||||
|
|
||||||
|
/* This hasn't run on any CPU at all. */
|
||||||
|
cpu->lg->pgdirs[next].last_host_cpu = -1;
|
||||||
|
|
||||||
return next;
|
return next;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*H:501
|
||||||
|
* We do need the Switcher code mapped at all times, so we allocate that
|
||||||
|
* part of the Guest page table here. We map the Switcher code immediately,
|
||||||
|
* but defer mapping of the guest register page and IDT/LDT etc page until
|
||||||
|
* just before we run the guest in map_switcher_in_guest().
|
||||||
|
*
|
||||||
|
* We *could* do this setup in map_switcher_in_guest(), but at that point
|
||||||
|
* we've interrupts disabled, and allocating pages like that is fraught: we
|
||||||
|
* can't sleep if we need to free up some memory.
|
||||||
|
*/
|
||||||
|
static bool allocate_switcher_mapping(struct lg_cpu *cpu)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < TOTAL_SWITCHER_PAGES; i++) {
|
||||||
|
pte_t *pte = find_spte(cpu, switcher_addr + i * PAGE_SIZE, true,
|
||||||
|
CHECK_GPGD_MASK, _PAGE_TABLE);
|
||||||
|
if (!pte)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Map the switcher page if not already there. It might
|
||||||
|
* already be there because we call allocate_switcher_mapping()
|
||||||
|
* in guest_set_pgd() just in case it did discard our Switcher
|
||||||
|
* mapping, but it probably didn't.
|
||||||
|
*/
|
||||||
|
if (i == 0 && !(pte_flags(*pte) & _PAGE_PRESENT)) {
|
||||||
|
/* Get a reference to the Switcher page. */
|
||||||
|
get_page(lg_switcher_pages[0]);
|
||||||
|
/* Create a read-only, exectuable, kernel-style PTE */
|
||||||
|
set_pte(pte,
|
||||||
|
mk_pte(lg_switcher_pages[0], PAGE_KERNEL_RX));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
cpu->lg->pgdirs[cpu->cpu_pgd].switcher_mapped = true;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
/*H:470
|
/*H:470
|
||||||
* Finally, a routine which throws away everything: all PGD entries in all
|
* Finally, a routine which throws away everything: all PGD entries in all
|
||||||
* the shadow page tables, including the Guest's kernel mappings. This is used
|
* the shadow page tables, including the Guest's kernel mappings. This is used
|
||||||
|
@ -738,28 +785,16 @@ static void release_all_pagetables(struct lguest *lg)
|
||||||
unsigned int i, j;
|
unsigned int i, j;
|
||||||
|
|
||||||
/* Every shadow pagetable this Guest has */
|
/* Every shadow pagetable this Guest has */
|
||||||
for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++)
|
for (i = 0; i < ARRAY_SIZE(lg->pgdirs); i++) {
|
||||||
if (lg->pgdirs[i].pgdir) {
|
if (!lg->pgdirs[i].pgdir)
|
||||||
#ifdef CONFIG_X86_PAE
|
continue;
|
||||||
pgd_t *spgd;
|
|
||||||
pmd_t *pmdpage;
|
|
||||||
unsigned int k;
|
|
||||||
|
|
||||||
/* Get the last pmd page. */
|
/* Every PGD entry. */
|
||||||
spgd = lg->pgdirs[i].pgdir + SWITCHER_PGD_INDEX;
|
for (j = 0; j < PTRS_PER_PGD; j++)
|
||||||
pmdpage = __va(pgd_pfn(*spgd) << PAGE_SHIFT);
|
release_pgd(lg->pgdirs[i].pgdir + j);
|
||||||
|
lg->pgdirs[i].switcher_mapped = false;
|
||||||
/*
|
lg->pgdirs[i].last_host_cpu = -1;
|
||||||
* And release the pmd entries of that pmd page,
|
}
|
||||||
* except for the switcher pmd.
|
|
||||||
*/
|
|
||||||
for (k = 0; k < SWITCHER_PMD_INDEX; k++)
|
|
||||||
release_pmd(&pmdpage[k]);
|
|
||||||
#endif
|
|
||||||
/* Every PGD entry except the Switcher at the top */
|
|
||||||
for (j = 0; j < SWITCHER_PGD_INDEX; j++)
|
|
||||||
release_pgd(lg->pgdirs[i].pgdir + j);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -773,6 +808,9 @@ void guest_pagetable_clear_all(struct lg_cpu *cpu)
|
||||||
release_all_pagetables(cpu->lg);
|
release_all_pagetables(cpu->lg);
|
||||||
/* We need the Guest kernel stack mapped again. */
|
/* We need the Guest kernel stack mapped again. */
|
||||||
pin_stack_pages(cpu);
|
pin_stack_pages(cpu);
|
||||||
|
/* And we need Switcher allocated. */
|
||||||
|
if (!allocate_switcher_mapping(cpu))
|
||||||
|
kill_guest(cpu, "Cannot populate switcher mapping");
|
||||||
}
|
}
|
||||||
|
|
||||||
/*H:430
|
/*H:430
|
||||||
|
@ -808,9 +846,17 @@ void guest_new_pagetable(struct lg_cpu *cpu, unsigned long pgtable)
|
||||||
newpgdir = new_pgdir(cpu, pgtable, &repin);
|
newpgdir = new_pgdir(cpu, pgtable, &repin);
|
||||||
/* Change the current pgd index to the new one. */
|
/* Change the current pgd index to the new one. */
|
||||||
cpu->cpu_pgd = newpgdir;
|
cpu->cpu_pgd = newpgdir;
|
||||||
/* If it was completely blank, we map in the Guest kernel stack */
|
/*
|
||||||
|
* If it was completely blank, we map in the Guest kernel stack and
|
||||||
|
* the Switcher.
|
||||||
|
*/
|
||||||
if (repin)
|
if (repin)
|
||||||
pin_stack_pages(cpu);
|
pin_stack_pages(cpu);
|
||||||
|
|
||||||
|
if (!cpu->lg->pgdirs[cpu->cpu_pgd].switcher_mapped) {
|
||||||
|
if (!allocate_switcher_mapping(cpu))
|
||||||
|
kill_guest(cpu, "Cannot populate switcher mapping");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
/*:*/
|
/*:*/
|
||||||
|
|
||||||
|
@ -865,7 +911,8 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
|
||||||
* micro-benchmark.
|
* micro-benchmark.
|
||||||
*/
|
*/
|
||||||
if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
|
if (pte_flags(gpte) & (_PAGE_DIRTY | _PAGE_ACCESSED)) {
|
||||||
check_gpte(cpu, gpte);
|
if (!check_gpte(cpu, gpte))
|
||||||
|
return;
|
||||||
set_pte(spte,
|
set_pte(spte,
|
||||||
gpte_to_spte(cpu, gpte,
|
gpte_to_spte(cpu, gpte,
|
||||||
pte_flags(gpte) & _PAGE_DIRTY));
|
pte_flags(gpte) & _PAGE_DIRTY));
|
||||||
|
@ -897,6 +944,12 @@ static void do_set_pte(struct lg_cpu *cpu, int idx,
|
||||||
void guest_set_pte(struct lg_cpu *cpu,
|
void guest_set_pte(struct lg_cpu *cpu,
|
||||||
unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
|
unsigned long gpgdir, unsigned long vaddr, pte_t gpte)
|
||||||
{
|
{
|
||||||
|
/* We don't let you remap the Switcher; we need it to get back! */
|
||||||
|
if (vaddr >= switcher_addr) {
|
||||||
|
kill_guest(cpu, "attempt to set pte into Switcher pages");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Kernel mappings must be changed on all top levels. Slow, but doesn't
|
* Kernel mappings must be changed on all top levels. Slow, but doesn't
|
||||||
* happen often.
|
* happen often.
|
||||||
|
@ -933,14 +986,23 @@ void guest_set_pgd(struct lguest *lg, unsigned long gpgdir, u32 idx)
|
||||||
{
|
{
|
||||||
int pgdir;
|
int pgdir;
|
||||||
|
|
||||||
if (idx >= SWITCHER_PGD_INDEX)
|
if (idx > PTRS_PER_PGD) {
|
||||||
|
kill_guest(&lg->cpus[0], "Attempt to set pgd %u/%u",
|
||||||
|
idx, PTRS_PER_PGD);
|
||||||
return;
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/* If they're talking about a page table we have a shadow for... */
|
/* If they're talking about a page table we have a shadow for... */
|
||||||
pgdir = find_pgdir(lg, gpgdir);
|
pgdir = find_pgdir(lg, gpgdir);
|
||||||
if (pgdir < ARRAY_SIZE(lg->pgdirs))
|
if (pgdir < ARRAY_SIZE(lg->pgdirs)) {
|
||||||
/* ... throw it away. */
|
/* ... throw it away. */
|
||||||
release_pgd(lg->pgdirs[pgdir].pgdir + idx);
|
release_pgd(lg->pgdirs[pgdir].pgdir + idx);
|
||||||
|
/* That might have been the Switcher mapping, remap it. */
|
||||||
|
if (!allocate_switcher_mapping(&lg->cpus[0])) {
|
||||||
|
kill_guest(&lg->cpus[0],
|
||||||
|
"Cannot populate switcher mapping");
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
#ifdef CONFIG_X86_PAE
|
||||||
|
@ -958,6 +1020,9 @@ void guest_set_pmd(struct lguest *lg, unsigned long pmdp, u32 idx)
|
||||||
* we will populate on future faults. The Guest doesn't have any actual
|
* we will populate on future faults. The Guest doesn't have any actual
|
||||||
* pagetables yet, so we set linear_pages to tell demand_page() to fake it
|
* pagetables yet, so we set linear_pages to tell demand_page() to fake it
|
||||||
* for the moment.
|
* for the moment.
|
||||||
|
*
|
||||||
|
* We do need the Switcher to be mapped at all times, so we allocate that
|
||||||
|
* part of the Guest page table here.
|
||||||
*/
|
*/
|
||||||
int init_guest_pagetable(struct lguest *lg)
|
int init_guest_pagetable(struct lguest *lg)
|
||||||
{
|
{
|
||||||
|
@ -971,21 +1036,34 @@ int init_guest_pagetable(struct lguest *lg)
|
||||||
|
|
||||||
/* We start with a linear mapping until the initialize. */
|
/* We start with a linear mapping until the initialize. */
|
||||||
cpu->linear_pages = true;
|
cpu->linear_pages = true;
|
||||||
|
|
||||||
|
/* Allocate the page tables for the Switcher. */
|
||||||
|
if (!allocate_switcher_mapping(cpu)) {
|
||||||
|
release_all_pagetables(lg);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
|
/*H:508 When the Guest calls LHCALL_LGUEST_INIT we do more setup. */
|
||||||
void page_table_guest_data_init(struct lg_cpu *cpu)
|
void page_table_guest_data_init(struct lg_cpu *cpu)
|
||||||
{
|
{
|
||||||
|
/*
|
||||||
|
* We tell the Guest that it can't use the virtual addresses
|
||||||
|
* used by the Switcher. This trick is equivalent to 4GB -
|
||||||
|
* switcher_addr.
|
||||||
|
*/
|
||||||
|
u32 top = ~switcher_addr + 1;
|
||||||
|
|
||||||
/* We get the kernel address: above this is all kernel memory. */
|
/* We get the kernel address: above this is all kernel memory. */
|
||||||
if (get_user(cpu->lg->kernel_address,
|
if (get_user(cpu->lg->kernel_address,
|
||||||
&cpu->lg->lguest_data->kernel_address)
|
&cpu->lg->lguest_data->kernel_address)
|
||||||
/*
|
/*
|
||||||
* We tell the Guest that it can't use the top 2 or 4 MB
|
* We tell the Guest that it can't use the top virtual
|
||||||
* of virtual addresses used by the Switcher.
|
* addresses (used by the Switcher).
|
||||||
*/
|
*/
|
||||||
|| put_user(RESERVE_MEM * 1024 * 1024,
|
|| put_user(top, &cpu->lg->lguest_data->reserve_mem)) {
|
||||||
&cpu->lg->lguest_data->reserve_mem)) {
|
|
||||||
kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
|
kill_guest(cpu, "bad guest page %p", cpu->lg->lguest_data);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -995,12 +1073,7 @@ void page_table_guest_data_init(struct lg_cpu *cpu)
|
||||||
* "pgd_index(lg->kernel_address)". This assumes it won't hit the
|
* "pgd_index(lg->kernel_address)". This assumes it won't hit the
|
||||||
* Switcher mappings, so check that now.
|
* Switcher mappings, so check that now.
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_X86_PAE
|
if (cpu->lg->kernel_address >= switcher_addr)
|
||||||
if (pgd_index(cpu->lg->kernel_address) == SWITCHER_PGD_INDEX &&
|
|
||||||
pmd_index(cpu->lg->kernel_address) == SWITCHER_PMD_INDEX)
|
|
||||||
#else
|
|
||||||
if (pgd_index(cpu->lg->kernel_address) >= SWITCHER_PGD_INDEX)
|
|
||||||
#endif
|
|
||||||
kill_guest(cpu, "bad kernel address %#lx",
|
kill_guest(cpu, "bad kernel address %#lx",
|
||||||
cpu->lg->kernel_address);
|
cpu->lg->kernel_address);
|
||||||
}
|
}
|
||||||
|
@ -1017,102 +1090,96 @@ void free_guest_pagetable(struct lguest *lg)
|
||||||
free_page((long)lg->pgdirs[i].pgdir);
|
free_page((long)lg->pgdirs[i].pgdir);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*H:481
|
||||||
|
* This clears the Switcher mappings for cpu #i.
|
||||||
|
*/
|
||||||
|
static void remove_switcher_percpu_map(struct lg_cpu *cpu, unsigned int i)
|
||||||
|
{
|
||||||
|
unsigned long base = switcher_addr + PAGE_SIZE + i * PAGE_SIZE*2;
|
||||||
|
pte_t *pte;
|
||||||
|
|
||||||
|
/* Clear the mappings for both pages. */
|
||||||
|
pte = find_spte(cpu, base, false, 0, 0);
|
||||||
|
release_pte(*pte);
|
||||||
|
set_pte(pte, __pte(0));
|
||||||
|
|
||||||
|
pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0);
|
||||||
|
release_pte(*pte);
|
||||||
|
set_pte(pte, __pte(0));
|
||||||
|
}
|
||||||
|
|
||||||
/*H:480
|
/*H:480
|
||||||
* (vi) Mapping the Switcher when the Guest is about to run.
|
* (vi) Mapping the Switcher when the Guest is about to run.
|
||||||
*
|
*
|
||||||
* The Switcher and the two pages for this CPU need to be visible in the
|
* The Switcher and the two pages for this CPU need to be visible in the Guest
|
||||||
* Guest (and not the pages for other CPUs). We have the appropriate PTE pages
|
* (and not the pages for other CPUs).
|
||||||
* for each CPU already set up, we just need to hook them in now we know which
|
*
|
||||||
* Guest is about to run on this CPU.
|
* The pages for the pagetables have all been allocated before: we just need
|
||||||
|
* to make sure the actual PTEs are up-to-date for the CPU we're about to run
|
||||||
|
* on.
|
||||||
*/
|
*/
|
||||||
void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
|
void map_switcher_in_guest(struct lg_cpu *cpu, struct lguest_pages *pages)
|
||||||
{
|
{
|
||||||
pte_t *switcher_pte_page = __this_cpu_read(switcher_pte_pages);
|
unsigned long base;
|
||||||
pte_t regs_pte;
|
struct page *percpu_switcher_page, *regs_page;
|
||||||
|
pte_t *pte;
|
||||||
|
struct pgdir *pgdir = &cpu->lg->pgdirs[cpu->cpu_pgd];
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
/* Switcher page should always be mapped by now! */
|
||||||
pmd_t switcher_pmd;
|
BUG_ON(!pgdir->switcher_mapped);
|
||||||
pmd_t *pmd_table;
|
|
||||||
|
|
||||||
switcher_pmd = pfn_pmd(__pa(switcher_pte_page) >> PAGE_SHIFT,
|
/*
|
||||||
PAGE_KERNEL_EXEC);
|
* Remember that we have two pages for each Host CPU, so we can run a
|
||||||
|
* Guest on each CPU without them interfering. We need to make sure
|
||||||
/* Figure out where the pmd page is, by reading the PGD, and converting
|
* those pages are mapped correctly in the Guest, but since we usually
|
||||||
* it to a virtual address. */
|
* run on the same CPU, we cache that, and only update the mappings
|
||||||
pmd_table = __va(pgd_pfn(cpu->lg->
|
* when we move.
|
||||||
pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX])
|
|
||||||
<< PAGE_SHIFT);
|
|
||||||
/* Now write it into the shadow page table. */
|
|
||||||
set_pmd(&pmd_table[SWITCHER_PMD_INDEX], switcher_pmd);
|
|
||||||
#else
|
|
||||||
pgd_t switcher_pgd;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* Make the last PGD entry for this Guest point to the Switcher's PTE
|
|
||||||
* page for this CPU (with appropriate flags).
|
|
||||||
*/
|
*/
|
||||||
switcher_pgd = __pgd(__pa(switcher_pte_page) | __PAGE_KERNEL_EXEC);
|
if (pgdir->last_host_cpu == raw_smp_processor_id())
|
||||||
|
return;
|
||||||
|
|
||||||
cpu->lg->pgdirs[cpu->cpu_pgd].pgdir[SWITCHER_PGD_INDEX] = switcher_pgd;
|
/* -1 means unknown so we remove everything. */
|
||||||
|
if (pgdir->last_host_cpu == -1) {
|
||||||
#endif
|
unsigned int i;
|
||||||
/*
|
for_each_possible_cpu(i)
|
||||||
* We also change the Switcher PTE page. When we're running the Guest,
|
remove_switcher_percpu_map(cpu, i);
|
||||||
* we want the Guest's "regs" page to appear where the first Switcher
|
} else {
|
||||||
* page for this CPU is. This is an optimization: when the Switcher
|
/* We know exactly what CPU mapping to remove. */
|
||||||
* saves the Guest registers, it saves them into the first page of this
|
remove_switcher_percpu_map(cpu, pgdir->last_host_cpu);
|
||||||
* CPU's "struct lguest_pages": if we make sure the Guest's register
|
|
||||||
* page is already mapped there, we don't have to copy them out
|
|
||||||
* again.
|
|
||||||
*/
|
|
||||||
regs_pte = pfn_pte(__pa(cpu->regs_page) >> PAGE_SHIFT, PAGE_KERNEL);
|
|
||||||
set_pte(&switcher_pte_page[pte_index((unsigned long)pages)], regs_pte);
|
|
||||||
}
|
|
||||||
/*:*/
|
|
||||||
|
|
||||||
static void free_switcher_pte_pages(void)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for_each_possible_cpu(i)
|
|
||||||
free_page((long)switcher_pte_page(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
/*H:520
|
|
||||||
* Setting up the Switcher PTE page for given CPU is fairly easy, given
|
|
||||||
* the CPU number and the "struct page"s for the Switcher code itself.
|
|
||||||
*
|
|
||||||
* Currently the Switcher is less than a page long, so "pages" is always 1.
|
|
||||||
*/
|
|
||||||
static __init void populate_switcher_pte_page(unsigned int cpu,
|
|
||||||
struct page *switcher_page[],
|
|
||||||
unsigned int pages)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
pte_t *pte = switcher_pte_page(cpu);
|
|
||||||
|
|
||||||
/* The first entries are easy: they map the Switcher code. */
|
|
||||||
for (i = 0; i < pages; i++) {
|
|
||||||
set_pte(&pte[i], mk_pte(switcher_page[i],
|
|
||||||
__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The only other thing we map is this CPU's pair of pages. */
|
/*
|
||||||
i = pages + cpu*2;
|
* When we're running the Guest, we want the Guest's "regs" page to
|
||||||
|
* appear where the first Switcher page for this CPU is. This is an
|
||||||
/* First page (Guest registers) is writable from the Guest */
|
* optimization: when the Switcher saves the Guest registers, it saves
|
||||||
set_pte(&pte[i], pfn_pte(page_to_pfn(switcher_page[i]),
|
* them into the first page of this CPU's "struct lguest_pages": if we
|
||||||
__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_RW)));
|
* make sure the Guest's register page is already mapped there, we
|
||||||
|
* don't have to copy them out again.
|
||||||
|
*/
|
||||||
|
/* Find the shadow PTE for this regs page. */
|
||||||
|
base = switcher_addr + PAGE_SIZE
|
||||||
|
+ raw_smp_processor_id() * sizeof(struct lguest_pages);
|
||||||
|
pte = find_spte(cpu, base, false, 0, 0);
|
||||||
|
regs_page = pfn_to_page(__pa(cpu->regs_page) >> PAGE_SHIFT);
|
||||||
|
get_page(regs_page);
|
||||||
|
set_pte(pte, mk_pte(regs_page, __pgprot(__PAGE_KERNEL & ~_PAGE_GLOBAL)));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The second page contains the "struct lguest_ro_state", and is
|
* We map the second page of the struct lguest_pages read-only in
|
||||||
* read-only.
|
* the Guest: the IDT, GDT and other things it's not supposed to
|
||||||
|
* change.
|
||||||
*/
|
*/
|
||||||
set_pte(&pte[i+1], pfn_pte(page_to_pfn(switcher_page[i+1]),
|
pte = find_spte(cpu, base + PAGE_SIZE, false, 0, 0);
|
||||||
__pgprot(_PAGE_PRESENT|_PAGE_ACCESSED)));
|
percpu_switcher_page
|
||||||
|
= lg_switcher_pages[1 + raw_smp_processor_id()*2 + 1];
|
||||||
|
get_page(percpu_switcher_page);
|
||||||
|
set_pte(pte, mk_pte(percpu_switcher_page,
|
||||||
|
__pgprot(__PAGE_KERNEL_RO & ~_PAGE_GLOBAL)));
|
||||||
|
|
||||||
|
pgdir->last_host_cpu = raw_smp_processor_id();
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*H:490
|
||||||
* We've made it through the page table code. Perhaps our tired brains are
|
* We've made it through the page table code. Perhaps our tired brains are
|
||||||
* still processing the details, or perhaps we're simply glad it's over.
|
* still processing the details, or perhaps we're simply glad it's over.
|
||||||
*
|
*
|
||||||
|
@ -1124,29 +1191,3 @@ static __init void populate_switcher_pte_page(unsigned int cpu,
|
||||||
*
|
*
|
||||||
* There is just one file remaining in the Host.
|
* There is just one file remaining in the Host.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*H:510
|
|
||||||
* At boot or module load time, init_pagetables() allocates and populates
|
|
||||||
* the Switcher PTE page for each CPU.
|
|
||||||
*/
|
|
||||||
__init int init_pagetables(struct page **switcher_page, unsigned int pages)
|
|
||||||
{
|
|
||||||
unsigned int i;
|
|
||||||
|
|
||||||
for_each_possible_cpu(i) {
|
|
||||||
switcher_pte_page(i) = (pte_t *)get_zeroed_page(GFP_KERNEL);
|
|
||||||
if (!switcher_pte_page(i)) {
|
|
||||||
free_switcher_pte_pages();
|
|
||||||
return -ENOMEM;
|
|
||||||
}
|
|
||||||
populate_switcher_pte_page(i, switcher_page, pages);
|
|
||||||
}
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
/*:*/
|
|
||||||
|
|
||||||
/* Cleaning up simply involves freeing the PTE page for each CPU. */
|
|
||||||
void free_pagetables(void)
|
|
||||||
{
|
|
||||||
free_switcher_pte_pages();
|
|
||||||
}
|
|
||||||
|
|
|
@ -59,14 +59,13 @@ static struct {
|
||||||
/* Offset from where switcher.S was compiled to where we've copied it */
|
/* Offset from where switcher.S was compiled to where we've copied it */
|
||||||
static unsigned long switcher_offset(void)
|
static unsigned long switcher_offset(void)
|
||||||
{
|
{
|
||||||
return SWITCHER_ADDR - (unsigned long)start_switcher_text;
|
return switcher_addr - (unsigned long)start_switcher_text;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* This cpu's struct lguest_pages. */
|
/* This cpu's struct lguest_pages (after the Switcher text page) */
|
||||||
static struct lguest_pages *lguest_pages(unsigned int cpu)
|
static struct lguest_pages *lguest_pages(unsigned int cpu)
|
||||||
{
|
{
|
||||||
return &(((struct lguest_pages *)
|
return &(((struct lguest_pages *)(switcher_addr + PAGE_SIZE))[cpu]);
|
||||||
(SWITCHER_ADDR + SHARED_SWITCHER_PAGES*PAGE_SIZE))[cpu]);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static DEFINE_PER_CPU(struct lg_cpu *, lg_last_cpu);
|
static DEFINE_PER_CPU(struct lg_cpu *, lg_last_cpu);
|
||||||
|
|
|
@ -40,3 +40,17 @@ config CAIF_HSI
|
||||||
The caif low level driver for CAIF over HSI.
|
The caif low level driver for CAIF over HSI.
|
||||||
Be aware that if you enable this then you also need to
|
Be aware that if you enable this then you also need to
|
||||||
enable a low-level HSI driver.
|
enable a low-level HSI driver.
|
||||||
|
|
||||||
|
config CAIF_VIRTIO
|
||||||
|
tristate "CAIF virtio transport driver"
|
||||||
|
depends on CAIF
|
||||||
|
select VHOST_RING
|
||||||
|
select VIRTIO
|
||||||
|
select GENERIC_ALLOCATOR
|
||||||
|
default n
|
||||||
|
---help---
|
||||||
|
The caif driver for CAIF over Virtio.
|
||||||
|
|
||||||
|
if CAIF_VIRTIO
|
||||||
|
source "drivers/vhost/Kconfig"
|
||||||
|
endif
|
||||||
|
|
|
@ -9,3 +9,6 @@ obj-$(CONFIG_CAIF_SPI_SLAVE) += cfspi_slave.o
|
||||||
|
|
||||||
# HSI interface
|
# HSI interface
|
||||||
obj-$(CONFIG_CAIF_HSI) += caif_hsi.o
|
obj-$(CONFIG_CAIF_HSI) += caif_hsi.o
|
||||||
|
|
||||||
|
# Virtio interface
|
||||||
|
obj-$(CONFIG_CAIF_VIRTIO) += caif_virtio.o
|
||||||
|
|
|
@ -0,0 +1,790 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) ST-Ericsson AB 2013
|
||||||
|
* Authors: Vicram Arv
|
||||||
|
* Dmitry Tarnyagin <dmitry.tarnyagin@lockless.no>
|
||||||
|
* Sjur Brendeland
|
||||||
|
* License terms: GNU General Public License (GPL) version 2
|
||||||
|
*/
|
||||||
|
#include <linux/module.h>
|
||||||
|
#include <linux/if_arp.h>
|
||||||
|
#include <linux/virtio.h>
|
||||||
|
#include <linux/vringh.h>
|
||||||
|
#include <linux/debugfs.h>
|
||||||
|
#include <linux/spinlock.h>
|
||||||
|
#include <linux/genalloc.h>
|
||||||
|
#include <linux/interrupt.h>
|
||||||
|
#include <linux/netdevice.h>
|
||||||
|
#include <linux/rtnetlink.h>
|
||||||
|
#include <linux/virtio_ids.h>
|
||||||
|
#include <linux/virtio_caif.h>
|
||||||
|
#include <linux/virtio_ring.h>
|
||||||
|
#include <linux/dma-mapping.h>
|
||||||
|
#include <net/caif/caif_dev.h>
|
||||||
|
#include <linux/virtio_config.h>
|
||||||
|
|
||||||
|
MODULE_LICENSE("GPL v2");
|
||||||
|
MODULE_AUTHOR("Vicram Arv");
|
||||||
|
MODULE_AUTHOR("Sjur Brendeland");
|
||||||
|
MODULE_DESCRIPTION("Virtio CAIF Driver");
|
||||||
|
|
||||||
|
/* NAPI schedule quota */
|
||||||
|
#define CFV_DEFAULT_QUOTA 32
|
||||||
|
|
||||||
|
/* Defaults used if virtio config space is unavailable */
|
||||||
|
#define CFV_DEF_MTU_SIZE 4096
|
||||||
|
#define CFV_DEF_HEADROOM 32
|
||||||
|
#define CFV_DEF_TAILROOM 32
|
||||||
|
|
||||||
|
/* Required IP header alignment */
|
||||||
|
#define IP_HDR_ALIGN 4
|
||||||
|
|
||||||
|
/* struct cfv_napi_contxt - NAPI context info
|
||||||
|
* @riov: IOV holding data read from the ring. Note that riov may
|
||||||
|
* still hold data when cfv_rx_poll() returns.
|
||||||
|
* @head: Last descriptor ID we received from vringh_getdesc_kern.
|
||||||
|
* We use this to put descriptor back on the used ring. USHRT_MAX is
|
||||||
|
* used to indicate invalid head-id.
|
||||||
|
*/
|
||||||
|
struct cfv_napi_context {
|
||||||
|
struct vringh_kiov riov;
|
||||||
|
unsigned short head;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* struct cfv_stats - statistics for debugfs
|
||||||
|
* @rx_napi_complete: Number of NAPI completions (RX)
|
||||||
|
* @rx_napi_resched: Number of calls where the full quota was used (RX)
|
||||||
|
* @rx_nomem: Number of SKB alloc failures (RX)
|
||||||
|
* @rx_kicks: Number of RX kicks
|
||||||
|
* @tx_full_ring: Number times TX ring was full
|
||||||
|
* @tx_no_mem: Number of times TX went out of memory
|
||||||
|
* @tx_flow_on: Number of flow on (TX)
|
||||||
|
* @tx_kicks: Number of TX kicks
|
||||||
|
*/
|
||||||
|
struct cfv_stats {
|
||||||
|
u32 rx_napi_complete;
|
||||||
|
u32 rx_napi_resched;
|
||||||
|
u32 rx_nomem;
|
||||||
|
u32 rx_kicks;
|
||||||
|
u32 tx_full_ring;
|
||||||
|
u32 tx_no_mem;
|
||||||
|
u32 tx_flow_on;
|
||||||
|
u32 tx_kicks;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* struct cfv_info - Caif Virtio control structure
|
||||||
|
* @cfdev: caif common header
|
||||||
|
* @vdev: Associated virtio device
|
||||||
|
* @vr_rx: rx/downlink host vring
|
||||||
|
* @vq_tx: tx/uplink virtqueue
|
||||||
|
* @ndev: CAIF link layer device
|
||||||
|
* @watermark_tx: indicates number of free descriptors we need
|
||||||
|
* to reopen the tx-queues after overload.
|
||||||
|
* @tx_lock: protects vq_tx from concurrent use
|
||||||
|
* @tx_release_tasklet: Tasklet for freeing consumed TX buffers
|
||||||
|
* @napi: Napi context used in cfv_rx_poll()
|
||||||
|
* @ctx: Context data used in cfv_rx_poll()
|
||||||
|
* @tx_hr: transmit headroom
|
||||||
|
* @rx_hr: receive headroom
|
||||||
|
* @tx_tr: transmit tail room
|
||||||
|
* @rx_tr: receive tail room
|
||||||
|
* @mtu: transmit max size
|
||||||
|
* @mru: receive max size
|
||||||
|
* @allocsz: size of dma memory reserved for TX buffers
|
||||||
|
* @alloc_addr: virtual address to dma memory for TX buffers
|
||||||
|
* @alloc_dma: dma address to dma memory for TX buffers
|
||||||
|
* @genpool: Gen Pool used for allocating TX buffers
|
||||||
|
* @reserved_mem: Pointer to memory reserve allocated from genpool
|
||||||
|
* @reserved_size: Size of memory reserve allocated from genpool
|
||||||
|
* @stats: Statistics exposed in sysfs
|
||||||
|
* @debugfs: Debugfs dentry for statistic counters
|
||||||
|
*/
|
||||||
|
struct cfv_info {
|
||||||
|
struct caif_dev_common cfdev;
|
||||||
|
struct virtio_device *vdev;
|
||||||
|
struct vringh *vr_rx;
|
||||||
|
struct virtqueue *vq_tx;
|
||||||
|
struct net_device *ndev;
|
||||||
|
unsigned int watermark_tx;
|
||||||
|
/* Protect access to vq_tx */
|
||||||
|
spinlock_t tx_lock;
|
||||||
|
struct tasklet_struct tx_release_tasklet;
|
||||||
|
struct napi_struct napi;
|
||||||
|
struct cfv_napi_context ctx;
|
||||||
|
u16 tx_hr;
|
||||||
|
u16 rx_hr;
|
||||||
|
u16 tx_tr;
|
||||||
|
u16 rx_tr;
|
||||||
|
u32 mtu;
|
||||||
|
u32 mru;
|
||||||
|
size_t allocsz;
|
||||||
|
void *alloc_addr;
|
||||||
|
dma_addr_t alloc_dma;
|
||||||
|
struct gen_pool *genpool;
|
||||||
|
unsigned long reserved_mem;
|
||||||
|
size_t reserved_size;
|
||||||
|
struct cfv_stats stats;
|
||||||
|
struct dentry *debugfs;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* struct buf_info - maintains transmit buffer data handle
|
||||||
|
* @size: size of transmit buffer
|
||||||
|
* @dma_handle: handle to allocated dma device memory area
|
||||||
|
* @vaddr: virtual address mapping to allocated memory area
|
||||||
|
*/
|
||||||
|
struct buf_info {
|
||||||
|
size_t size;
|
||||||
|
u8 *vaddr;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Called from virtio device, in IRQ context */
|
||||||
|
static void cfv_release_cb(struct virtqueue *vq_tx)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = vq_tx->vdev->priv;
|
||||||
|
|
||||||
|
++cfv->stats.tx_kicks;
|
||||||
|
tasklet_schedule(&cfv->tx_release_tasklet);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void free_buf_info(struct cfv_info *cfv, struct buf_info *buf_info)
|
||||||
|
{
|
||||||
|
if (!buf_info)
|
||||||
|
return;
|
||||||
|
gen_pool_free(cfv->genpool, (unsigned long) buf_info->vaddr,
|
||||||
|
buf_info->size);
|
||||||
|
kfree(buf_info);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* This is invoked whenever the remote processor completed processing
|
||||||
|
* a TX msg we just sent, and the buffer is put back to the used ring.
|
||||||
|
*/
|
||||||
|
static void cfv_release_used_buf(struct virtqueue *vq_tx)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = vq_tx->vdev->priv;
|
||||||
|
unsigned long flags;
|
||||||
|
|
||||||
|
BUG_ON(vq_tx != cfv->vq_tx);
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
unsigned int len;
|
||||||
|
struct buf_info *buf_info;
|
||||||
|
|
||||||
|
/* Get used buffer from used ring to recycle used descriptors */
|
||||||
|
spin_lock_irqsave(&cfv->tx_lock, flags);
|
||||||
|
buf_info = virtqueue_get_buf(vq_tx, &len);
|
||||||
|
spin_unlock_irqrestore(&cfv->tx_lock, flags);
|
||||||
|
|
||||||
|
/* Stop looping if there are no more buffers to free */
|
||||||
|
if (!buf_info)
|
||||||
|
break;
|
||||||
|
|
||||||
|
free_buf_info(cfv, buf_info);
|
||||||
|
|
||||||
|
/* watermark_tx indicates if we previously stopped the tx
|
||||||
|
* queues. If we have enough free stots in the virtio ring,
|
||||||
|
* re-establish memory reserved and open up tx queues.
|
||||||
|
*/
|
||||||
|
if (cfv->vq_tx->num_free <= cfv->watermark_tx)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Re-establish memory reserve */
|
||||||
|
if (cfv->reserved_mem == 0 && cfv->genpool)
|
||||||
|
cfv->reserved_mem =
|
||||||
|
gen_pool_alloc(cfv->genpool,
|
||||||
|
cfv->reserved_size);
|
||||||
|
|
||||||
|
/* Open up the tx queues */
|
||||||
|
if (cfv->reserved_mem) {
|
||||||
|
cfv->watermark_tx =
|
||||||
|
virtqueue_get_vring_size(cfv->vq_tx);
|
||||||
|
netif_tx_wake_all_queues(cfv->ndev);
|
||||||
|
/* Buffers are recycled in cfv_netdev_tx, so
|
||||||
|
* disable notifications when queues are opened.
|
||||||
|
*/
|
||||||
|
virtqueue_disable_cb(cfv->vq_tx);
|
||||||
|
++cfv->stats.tx_flow_on;
|
||||||
|
} else {
|
||||||
|
/* if no memory reserve, wait for more free slots */
|
||||||
|
WARN_ON(cfv->watermark_tx >
|
||||||
|
virtqueue_get_vring_size(cfv->vq_tx));
|
||||||
|
cfv->watermark_tx +=
|
||||||
|
virtqueue_get_vring_size(cfv->vq_tx) / 4;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate a SKB and copy packet data to it */
|
||||||
|
static struct sk_buff *cfv_alloc_and_copy_skb(int *err,
|
||||||
|
struct cfv_info *cfv,
|
||||||
|
u8 *frm, u32 frm_len)
|
||||||
|
{
|
||||||
|
struct sk_buff *skb;
|
||||||
|
u32 cfpkt_len, pad_len;
|
||||||
|
|
||||||
|
*err = 0;
|
||||||
|
/* Verify that packet size with down-link header and mtu size */
|
||||||
|
if (frm_len > cfv->mru || frm_len <= cfv->rx_hr + cfv->rx_tr) {
|
||||||
|
netdev_err(cfv->ndev,
|
||||||
|
"Invalid frmlen:%u mtu:%u hr:%d tr:%d\n",
|
||||||
|
frm_len, cfv->mru, cfv->rx_hr,
|
||||||
|
cfv->rx_tr);
|
||||||
|
*err = -EPROTO;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
cfpkt_len = frm_len - (cfv->rx_hr + cfv->rx_tr);
|
||||||
|
pad_len = (unsigned long)(frm + cfv->rx_hr) & (IP_HDR_ALIGN - 1);
|
||||||
|
|
||||||
|
skb = netdev_alloc_skb(cfv->ndev, frm_len + pad_len);
|
||||||
|
if (!skb) {
|
||||||
|
*err = -ENOMEM;
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
skb_reserve(skb, cfv->rx_hr + pad_len);
|
||||||
|
|
||||||
|
memcpy(skb_put(skb, cfpkt_len), frm + cfv->rx_hr, cfpkt_len);
|
||||||
|
return skb;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Get packets from the host vring */
|
||||||
|
static int cfv_rx_poll(struct napi_struct *napi, int quota)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = container_of(napi, struct cfv_info, napi);
|
||||||
|
int rxcnt = 0;
|
||||||
|
int err = 0;
|
||||||
|
void *buf;
|
||||||
|
struct sk_buff *skb;
|
||||||
|
struct vringh_kiov *riov = &cfv->ctx.riov;
|
||||||
|
unsigned int skb_len;
|
||||||
|
|
||||||
|
again:
|
||||||
|
do {
|
||||||
|
skb = NULL;
|
||||||
|
|
||||||
|
/* Put the previous iovec back on the used ring and
|
||||||
|
* fetch a new iovec if we have processed all elements.
|
||||||
|
*/
|
||||||
|
if (riov->i == riov->used) {
|
||||||
|
if (cfv->ctx.head != USHRT_MAX) {
|
||||||
|
vringh_complete_kern(cfv->vr_rx,
|
||||||
|
cfv->ctx.head,
|
||||||
|
0);
|
||||||
|
cfv->ctx.head = USHRT_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
err = vringh_getdesc_kern(
|
||||||
|
cfv->vr_rx,
|
||||||
|
riov,
|
||||||
|
NULL,
|
||||||
|
&cfv->ctx.head,
|
||||||
|
GFP_ATOMIC);
|
||||||
|
|
||||||
|
if (err <= 0)
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf = phys_to_virt((unsigned long) riov->iov[riov->i].iov_base);
|
||||||
|
/* TODO: Add check on valid buffer address */
|
||||||
|
|
||||||
|
skb = cfv_alloc_and_copy_skb(&err, cfv, buf,
|
||||||
|
riov->iov[riov->i].iov_len);
|
||||||
|
if (unlikely(err))
|
||||||
|
goto exit;
|
||||||
|
|
||||||
|
/* Push received packet up the stack. */
|
||||||
|
skb_len = skb->len;
|
||||||
|
skb->protocol = htons(ETH_P_CAIF);
|
||||||
|
skb_reset_mac_header(skb);
|
||||||
|
skb->dev = cfv->ndev;
|
||||||
|
err = netif_receive_skb(skb);
|
||||||
|
if (unlikely(err)) {
|
||||||
|
++cfv->ndev->stats.rx_dropped;
|
||||||
|
} else {
|
||||||
|
++cfv->ndev->stats.rx_packets;
|
||||||
|
cfv->ndev->stats.rx_bytes += skb_len;
|
||||||
|
}
|
||||||
|
|
||||||
|
++riov->i;
|
||||||
|
++rxcnt;
|
||||||
|
} while (rxcnt < quota);
|
||||||
|
|
||||||
|
++cfv->stats.rx_napi_resched;
|
||||||
|
goto out;
|
||||||
|
|
||||||
|
exit:
|
||||||
|
switch (err) {
|
||||||
|
case 0:
|
||||||
|
++cfv->stats.rx_napi_complete;
|
||||||
|
|
||||||
|
/* Really out of patckets? (stolen from virtio_net)*/
|
||||||
|
napi_complete(napi);
|
||||||
|
if (unlikely(!vringh_notify_enable_kern(cfv->vr_rx)) &&
|
||||||
|
napi_schedule_prep(napi)) {
|
||||||
|
vringh_notify_disable_kern(cfv->vr_rx);
|
||||||
|
__napi_schedule(napi);
|
||||||
|
goto again;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case -ENOMEM:
|
||||||
|
++cfv->stats.rx_nomem;
|
||||||
|
dev_kfree_skb(skb);
|
||||||
|
/* Stop NAPI poll on OOM, we hope to be polled later */
|
||||||
|
napi_complete(napi);
|
||||||
|
vringh_notify_enable_kern(cfv->vr_rx);
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
/* We're doomed, any modem fault is fatal */
|
||||||
|
netdev_warn(cfv->ndev, "Bad ring, disable device\n");
|
||||||
|
cfv->ndev->stats.rx_dropped = riov->used - riov->i;
|
||||||
|
napi_complete(napi);
|
||||||
|
vringh_notify_disable_kern(cfv->vr_rx);
|
||||||
|
netif_carrier_off(cfv->ndev);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
out:
|
||||||
|
if (rxcnt && vringh_need_notify_kern(cfv->vr_rx) > 0)
|
||||||
|
vringh_notify(cfv->vr_rx);
|
||||||
|
return rxcnt;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cfv_recv(struct virtio_device *vdev, struct vringh *vr_rx)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = vdev->priv;
|
||||||
|
|
||||||
|
++cfv->stats.rx_kicks;
|
||||||
|
vringh_notify_disable_kern(cfv->vr_rx);
|
||||||
|
napi_schedule(&cfv->napi);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cfv_destroy_genpool(struct cfv_info *cfv)
|
||||||
|
{
|
||||||
|
if (cfv->alloc_addr)
|
||||||
|
dma_free_coherent(cfv->vdev->dev.parent->parent,
|
||||||
|
cfv->allocsz, cfv->alloc_addr,
|
||||||
|
cfv->alloc_dma);
|
||||||
|
|
||||||
|
if (!cfv->genpool)
|
||||||
|
return;
|
||||||
|
gen_pool_free(cfv->genpool, cfv->reserved_mem,
|
||||||
|
cfv->reserved_size);
|
||||||
|
gen_pool_destroy(cfv->genpool);
|
||||||
|
cfv->genpool = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int cfv_create_genpool(struct cfv_info *cfv)
|
||||||
|
{
|
||||||
|
int err;
|
||||||
|
|
||||||
|
/* dma_alloc can only allocate whole pages, and we need a more
|
||||||
|
* fine graned allocation so we use genpool. We ask for space needed
|
||||||
|
* by IP and a full ring. If the dma allcoation fails we retry with a
|
||||||
|
* smaller allocation size.
|
||||||
|
*/
|
||||||
|
err = -ENOMEM;
|
||||||
|
cfv->allocsz = (virtqueue_get_vring_size(cfv->vq_tx) *
|
||||||
|
(ETH_DATA_LEN + cfv->tx_hr + cfv->tx_tr) * 11)/10;
|
||||||
|
if (cfv->allocsz <= (num_possible_cpus() + 1) * cfv->ndev->mtu)
|
||||||
|
return -EINVAL;
|
||||||
|
|
||||||
|
for (;;) {
|
||||||
|
if (cfv->allocsz <= num_possible_cpus() * cfv->ndev->mtu) {
|
||||||
|
netdev_info(cfv->ndev, "Not enough device memory\n");
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
|
cfv->alloc_addr = dma_alloc_coherent(
|
||||||
|
cfv->vdev->dev.parent->parent,
|
||||||
|
cfv->allocsz, &cfv->alloc_dma,
|
||||||
|
GFP_ATOMIC);
|
||||||
|
if (cfv->alloc_addr)
|
||||||
|
break;
|
||||||
|
|
||||||
|
cfv->allocsz = (cfv->allocsz * 3) >> 2;
|
||||||
|
}
|
||||||
|
|
||||||
|
netdev_dbg(cfv->ndev, "Allocated %zd bytes from dma-memory\n",
|
||||||
|
cfv->allocsz);
|
||||||
|
|
||||||
|
/* Allocate on 128 bytes boundaries (1 << 7)*/
|
||||||
|
cfv->genpool = gen_pool_create(7, -1);
|
||||||
|
if (!cfv->genpool)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
err = gen_pool_add_virt(cfv->genpool, (unsigned long)cfv->alloc_addr,
|
||||||
|
(phys_addr_t)virt_to_phys(cfv->alloc_addr),
|
||||||
|
cfv->allocsz, -1);
|
||||||
|
if (err)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
/* Reserve some memory for low memory situations. If we hit the roof
|
||||||
|
* in the memory pool, we stop TX flow and release the reserve.
|
||||||
|
*/
|
||||||
|
cfv->reserved_size = num_possible_cpus() * cfv->ndev->mtu;
|
||||||
|
cfv->reserved_mem = gen_pool_alloc(cfv->genpool,
|
||||||
|
cfv->reserved_size);
|
||||||
|
if (!cfv->reserved_mem) {
|
||||||
|
err = -ENOMEM;
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
cfv->watermark_tx = virtqueue_get_vring_size(cfv->vq_tx);
|
||||||
|
return 0;
|
||||||
|
err:
|
||||||
|
cfv_destroy_genpool(cfv);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Enable the CAIF interface and allocate the memory-pool */
|
||||||
|
static int cfv_netdev_open(struct net_device *netdev)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = netdev_priv(netdev);
|
||||||
|
|
||||||
|
if (cfv_create_genpool(cfv))
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
netif_carrier_on(netdev);
|
||||||
|
napi_enable(&cfv->napi);
|
||||||
|
|
||||||
|
/* Schedule NAPI to read any pending packets */
|
||||||
|
napi_schedule(&cfv->napi);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Disable the CAIF interface and free the memory-pool */
|
||||||
|
static int cfv_netdev_close(struct net_device *netdev)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = netdev_priv(netdev);
|
||||||
|
unsigned long flags;
|
||||||
|
struct buf_info *buf_info;
|
||||||
|
|
||||||
|
/* Disable interrupts, queues and NAPI polling */
|
||||||
|
netif_carrier_off(netdev);
|
||||||
|
virtqueue_disable_cb(cfv->vq_tx);
|
||||||
|
vringh_notify_disable_kern(cfv->vr_rx);
|
||||||
|
napi_disable(&cfv->napi);
|
||||||
|
|
||||||
|
/* Release any TX buffers on both used and avilable rings */
|
||||||
|
cfv_release_used_buf(cfv->vq_tx);
|
||||||
|
spin_lock_irqsave(&cfv->tx_lock, flags);
|
||||||
|
while ((buf_info = virtqueue_detach_unused_buf(cfv->vq_tx)))
|
||||||
|
free_buf_info(cfv, buf_info);
|
||||||
|
spin_unlock_irqrestore(&cfv->tx_lock, flags);
|
||||||
|
|
||||||
|
/* Release all dma allocated memory and destroy the pool */
|
||||||
|
cfv_destroy_genpool(cfv);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Allocate a buffer in dma-memory and copy skb to it */
|
||||||
|
static struct buf_info *cfv_alloc_and_copy_to_shm(struct cfv_info *cfv,
|
||||||
|
struct sk_buff *skb,
|
||||||
|
struct scatterlist *sg)
|
||||||
|
{
|
||||||
|
struct caif_payload_info *info = (void *)&skb->cb;
|
||||||
|
struct buf_info *buf_info = NULL;
|
||||||
|
u8 pad_len, hdr_ofs;
|
||||||
|
|
||||||
|
if (!cfv->genpool)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
if (unlikely(cfv->tx_hr + skb->len + cfv->tx_tr > cfv->mtu)) {
|
||||||
|
netdev_warn(cfv->ndev, "Invalid packet len (%d > %d)\n",
|
||||||
|
cfv->tx_hr + skb->len + cfv->tx_tr, cfv->mtu);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
buf_info = kmalloc(sizeof(struct buf_info), GFP_ATOMIC);
|
||||||
|
if (unlikely(!buf_info))
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
/* Make the IP header aligned in tbe buffer */
|
||||||
|
hdr_ofs = cfv->tx_hr + info->hdr_len;
|
||||||
|
pad_len = hdr_ofs & (IP_HDR_ALIGN - 1);
|
||||||
|
buf_info->size = cfv->tx_hr + skb->len + cfv->tx_tr + pad_len;
|
||||||
|
|
||||||
|
/* allocate dma memory buffer */
|
||||||
|
buf_info->vaddr = (void *)gen_pool_alloc(cfv->genpool, buf_info->size);
|
||||||
|
if (unlikely(!buf_info->vaddr))
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
/* copy skbuf contents to send buffer */
|
||||||
|
skb_copy_bits(skb, 0, buf_info->vaddr + cfv->tx_hr + pad_len, skb->len);
|
||||||
|
sg_init_one(sg, buf_info->vaddr + pad_len,
|
||||||
|
skb->len + cfv->tx_hr + cfv->rx_hr);
|
||||||
|
|
||||||
|
return buf_info;
|
||||||
|
err:
|
||||||
|
kfree(buf_info);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Put the CAIF packet on the virtio ring and kick the receiver */
|
||||||
|
static int cfv_netdev_tx(struct sk_buff *skb, struct net_device *netdev)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = netdev_priv(netdev);
|
||||||
|
struct buf_info *buf_info;
|
||||||
|
struct scatterlist sg;
|
||||||
|
unsigned long flags;
|
||||||
|
bool flow_off = false;
|
||||||
|
int ret;
|
||||||
|
|
||||||
|
/* garbage collect released buffers */
|
||||||
|
cfv_release_used_buf(cfv->vq_tx);
|
||||||
|
spin_lock_irqsave(&cfv->tx_lock, flags);
|
||||||
|
|
||||||
|
/* Flow-off check takes into account number of cpus to make sure
|
||||||
|
* virtqueue will not be overfilled in any possible smp conditions.
|
||||||
|
*
|
||||||
|
* Flow-on is triggered when sufficient buffers are freed
|
||||||
|
*/
|
||||||
|
if (unlikely(cfv->vq_tx->num_free <= num_present_cpus())) {
|
||||||
|
flow_off = true;
|
||||||
|
cfv->stats.tx_full_ring++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If we run out of memory, we release the memory reserve and retry
|
||||||
|
* allocation.
|
||||||
|
*/
|
||||||
|
buf_info = cfv_alloc_and_copy_to_shm(cfv, skb, &sg);
|
||||||
|
if (unlikely(!buf_info)) {
|
||||||
|
cfv->stats.tx_no_mem++;
|
||||||
|
flow_off = true;
|
||||||
|
|
||||||
|
if (cfv->reserved_mem && cfv->genpool) {
|
||||||
|
gen_pool_free(cfv->genpool, cfv->reserved_mem,
|
||||||
|
cfv->reserved_size);
|
||||||
|
cfv->reserved_mem = 0;
|
||||||
|
buf_info = cfv_alloc_and_copy_to_shm(cfv, skb, &sg);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(flow_off)) {
|
||||||
|
/* Turn flow on when a 1/4 of the descriptors are released */
|
||||||
|
cfv->watermark_tx = virtqueue_get_vring_size(cfv->vq_tx) / 4;
|
||||||
|
/* Enable notifications of recycled TX buffers */
|
||||||
|
virtqueue_enable_cb(cfv->vq_tx);
|
||||||
|
netif_tx_stop_all_queues(netdev);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (unlikely(!buf_info)) {
|
||||||
|
/* If the memory reserve does it's job, this shouldn't happen */
|
||||||
|
netdev_warn(cfv->ndev, "Out of gen_pool memory\n");
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = virtqueue_add_outbuf(cfv->vq_tx, &sg, 1, buf_info, GFP_ATOMIC);
|
||||||
|
if (unlikely((ret < 0))) {
|
||||||
|
/* If flow control works, this shouldn't happen */
|
||||||
|
netdev_warn(cfv->ndev, "Failed adding buffer to TX vring:%d\n",
|
||||||
|
ret);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* update netdev statistics */
|
||||||
|
cfv->ndev->stats.tx_packets++;
|
||||||
|
cfv->ndev->stats.tx_bytes += skb->len;
|
||||||
|
spin_unlock_irqrestore(&cfv->tx_lock, flags);
|
||||||
|
|
||||||
|
/* tell the remote processor it has a pending message to read */
|
||||||
|
virtqueue_kick(cfv->vq_tx);
|
||||||
|
|
||||||
|
dev_kfree_skb(skb);
|
||||||
|
return NETDEV_TX_OK;
|
||||||
|
err:
|
||||||
|
spin_unlock_irqrestore(&cfv->tx_lock, flags);
|
||||||
|
cfv->ndev->stats.tx_dropped++;
|
||||||
|
free_buf_info(cfv, buf_info);
|
||||||
|
dev_kfree_skb(skb);
|
||||||
|
return NETDEV_TX_OK;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cfv_tx_release_tasklet(unsigned long drv)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = (struct cfv_info *)drv;
|
||||||
|
cfv_release_used_buf(cfv->vq_tx);
|
||||||
|
}
|
||||||
|
|
||||||
|
static const struct net_device_ops cfv_netdev_ops = {
|
||||||
|
.ndo_open = cfv_netdev_open,
|
||||||
|
.ndo_stop = cfv_netdev_close,
|
||||||
|
.ndo_start_xmit = cfv_netdev_tx,
|
||||||
|
};
|
||||||
|
|
||||||
|
static void cfv_netdev_setup(struct net_device *netdev)
|
||||||
|
{
|
||||||
|
netdev->netdev_ops = &cfv_netdev_ops;
|
||||||
|
netdev->type = ARPHRD_CAIF;
|
||||||
|
netdev->tx_queue_len = 100;
|
||||||
|
netdev->flags = IFF_POINTOPOINT | IFF_NOARP;
|
||||||
|
netdev->mtu = CFV_DEF_MTU_SIZE;
|
||||||
|
netdev->destructor = free_netdev;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Create debugfs counters for the device */
|
||||||
|
static inline void debugfs_init(struct cfv_info *cfv)
|
||||||
|
{
|
||||||
|
cfv->debugfs =
|
||||||
|
debugfs_create_dir(netdev_name(cfv->ndev), NULL);
|
||||||
|
|
||||||
|
if (IS_ERR(cfv->debugfs))
|
||||||
|
return;
|
||||||
|
|
||||||
|
debugfs_create_u32("rx-napi-complete", S_IRUSR, cfv->debugfs,
|
||||||
|
&cfv->stats.rx_napi_complete);
|
||||||
|
debugfs_create_u32("rx-napi-resched", S_IRUSR, cfv->debugfs,
|
||||||
|
&cfv->stats.rx_napi_resched);
|
||||||
|
debugfs_create_u32("rx-nomem", S_IRUSR, cfv->debugfs,
|
||||||
|
&cfv->stats.rx_nomem);
|
||||||
|
debugfs_create_u32("rx-kicks", S_IRUSR, cfv->debugfs,
|
||||||
|
&cfv->stats.rx_kicks);
|
||||||
|
debugfs_create_u32("tx-full-ring", S_IRUSR, cfv->debugfs,
|
||||||
|
&cfv->stats.tx_full_ring);
|
||||||
|
debugfs_create_u32("tx-no-mem", S_IRUSR, cfv->debugfs,
|
||||||
|
&cfv->stats.tx_no_mem);
|
||||||
|
debugfs_create_u32("tx-kicks", S_IRUSR, cfv->debugfs,
|
||||||
|
&cfv->stats.tx_kicks);
|
||||||
|
debugfs_create_u32("tx-flow-on", S_IRUSR, cfv->debugfs,
|
||||||
|
&cfv->stats.tx_flow_on);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Setup CAIF for the a virtio device */
|
||||||
|
static int cfv_probe(struct virtio_device *vdev)
|
||||||
|
{
|
||||||
|
vq_callback_t *vq_cbs = cfv_release_cb;
|
||||||
|
vrh_callback_t *vrh_cbs = cfv_recv;
|
||||||
|
const char *names = "output";
|
||||||
|
const char *cfv_netdev_name = "cfvrt";
|
||||||
|
struct net_device *netdev;
|
||||||
|
struct cfv_info *cfv;
|
||||||
|
int err = -EINVAL;
|
||||||
|
|
||||||
|
netdev = alloc_netdev(sizeof(struct cfv_info), cfv_netdev_name,
|
||||||
|
cfv_netdev_setup);
|
||||||
|
if (!netdev)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
cfv = netdev_priv(netdev);
|
||||||
|
cfv->vdev = vdev;
|
||||||
|
cfv->ndev = netdev;
|
||||||
|
|
||||||
|
spin_lock_init(&cfv->tx_lock);
|
||||||
|
|
||||||
|
/* Get the RX virtio ring. This is a "host side vring". */
|
||||||
|
err = -ENODEV;
|
||||||
|
if (!vdev->vringh_config || !vdev->vringh_config->find_vrhs)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
err = vdev->vringh_config->find_vrhs(vdev, 1, &cfv->vr_rx, &vrh_cbs);
|
||||||
|
if (err)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
/* Get the TX virtio ring. This is a "guest side vring". */
|
||||||
|
err = vdev->config->find_vqs(vdev, 1, &cfv->vq_tx, &vq_cbs, &names);
|
||||||
|
if (err)
|
||||||
|
goto err;
|
||||||
|
|
||||||
|
/* Get the CAIF configuration from virtio config space, if available */
|
||||||
|
#define GET_VIRTIO_CONFIG_OPS(_v, _var, _f) \
|
||||||
|
((_v)->config->get(_v, offsetof(struct virtio_caif_transf_config, _f), \
|
||||||
|
&_var, \
|
||||||
|
FIELD_SIZEOF(struct virtio_caif_transf_config, _f)))
|
||||||
|
|
||||||
|
if (vdev->config->get) {
|
||||||
|
GET_VIRTIO_CONFIG_OPS(vdev, cfv->tx_hr, headroom);
|
||||||
|
GET_VIRTIO_CONFIG_OPS(vdev, cfv->rx_hr, headroom);
|
||||||
|
GET_VIRTIO_CONFIG_OPS(vdev, cfv->tx_tr, tailroom);
|
||||||
|
GET_VIRTIO_CONFIG_OPS(vdev, cfv->rx_tr, tailroom);
|
||||||
|
GET_VIRTIO_CONFIG_OPS(vdev, cfv->mtu, mtu);
|
||||||
|
GET_VIRTIO_CONFIG_OPS(vdev, cfv->mru, mtu);
|
||||||
|
} else {
|
||||||
|
cfv->tx_hr = CFV_DEF_HEADROOM;
|
||||||
|
cfv->rx_hr = CFV_DEF_HEADROOM;
|
||||||
|
cfv->tx_tr = CFV_DEF_TAILROOM;
|
||||||
|
cfv->rx_tr = CFV_DEF_TAILROOM;
|
||||||
|
cfv->mtu = CFV_DEF_MTU_SIZE;
|
||||||
|
cfv->mru = CFV_DEF_MTU_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
netdev->needed_headroom = cfv->tx_hr;
|
||||||
|
netdev->needed_tailroom = cfv->tx_tr;
|
||||||
|
|
||||||
|
/* Disable buffer release interrupts unless we have stopped TX queues */
|
||||||
|
virtqueue_disable_cb(cfv->vq_tx);
|
||||||
|
|
||||||
|
netdev->mtu = cfv->mtu - cfv->tx_tr;
|
||||||
|
vdev->priv = cfv;
|
||||||
|
|
||||||
|
/* Initialize NAPI poll context data */
|
||||||
|
vringh_kiov_init(&cfv->ctx.riov, NULL, 0);
|
||||||
|
cfv->ctx.head = USHRT_MAX;
|
||||||
|
netif_napi_add(netdev, &cfv->napi, cfv_rx_poll, CFV_DEFAULT_QUOTA);
|
||||||
|
|
||||||
|
tasklet_init(&cfv->tx_release_tasklet,
|
||||||
|
cfv_tx_release_tasklet,
|
||||||
|
(unsigned long)cfv);
|
||||||
|
|
||||||
|
/* Carrier is off until netdevice is opened */
|
||||||
|
netif_carrier_off(netdev);
|
||||||
|
|
||||||
|
/* register Netdev */
|
||||||
|
err = register_netdev(netdev);
|
||||||
|
if (err) {
|
||||||
|
dev_err(&vdev->dev, "Unable to register netdev (%d)\n", err);
|
||||||
|
goto err;
|
||||||
|
}
|
||||||
|
|
||||||
|
debugfs_init(cfv);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
err:
|
||||||
|
netdev_warn(cfv->ndev, "CAIF Virtio probe failed:%d\n", err);
|
||||||
|
|
||||||
|
if (cfv->vr_rx)
|
||||||
|
vdev->vringh_config->del_vrhs(cfv->vdev);
|
||||||
|
if (cfv->vdev)
|
||||||
|
vdev->config->del_vqs(cfv->vdev);
|
||||||
|
free_netdev(netdev);
|
||||||
|
return err;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void cfv_remove(struct virtio_device *vdev)
|
||||||
|
{
|
||||||
|
struct cfv_info *cfv = vdev->priv;
|
||||||
|
|
||||||
|
rtnl_lock();
|
||||||
|
dev_close(cfv->ndev);
|
||||||
|
rtnl_unlock();
|
||||||
|
|
||||||
|
tasklet_kill(&cfv->tx_release_tasklet);
|
||||||
|
debugfs_remove_recursive(cfv->debugfs);
|
||||||
|
|
||||||
|
vringh_kiov_cleanup(&cfv->ctx.riov);
|
||||||
|
vdev->config->reset(vdev);
|
||||||
|
vdev->vringh_config->del_vrhs(cfv->vdev);
|
||||||
|
cfv->vr_rx = NULL;
|
||||||
|
vdev->config->del_vqs(cfv->vdev);
|
||||||
|
unregister_netdev(cfv->ndev);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct virtio_device_id id_table[] = {
|
||||||
|
{ VIRTIO_ID_CAIF, VIRTIO_DEV_ANY_ID },
|
||||||
|
{ 0 },
|
||||||
|
};
|
||||||
|
|
||||||
|
static unsigned int features[] = {
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct virtio_driver caif_virtio_driver = {
|
||||||
|
.feature_table = features,
|
||||||
|
.feature_table_size = ARRAY_SIZE(features),
|
||||||
|
.driver.name = KBUILD_MODNAME,
|
||||||
|
.driver.owner = THIS_MODULE,
|
||||||
|
.id_table = id_table,
|
||||||
|
.probe = cfv_probe,
|
||||||
|
.remove = cfv_remove,
|
||||||
|
};
|
||||||
|
|
||||||
|
module_virtio_driver(caif_virtio_driver);
|
||||||
|
MODULE_DEVICE_TABLE(virtio, id_table);
|
|
@ -39,7 +39,6 @@ module_param(gso, bool, 0444);
|
||||||
#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
|
#define MAX_PACKET_LEN (ETH_HLEN + VLAN_HLEN + ETH_DATA_LEN)
|
||||||
#define GOOD_COPY_LEN 128
|
#define GOOD_COPY_LEN 128
|
||||||
|
|
||||||
#define VIRTNET_SEND_COMMAND_SG_MAX 2
|
|
||||||
#define VIRTNET_DRIVER_VERSION "1.0.0"
|
#define VIRTNET_DRIVER_VERSION "1.0.0"
|
||||||
|
|
||||||
struct virtnet_stats {
|
struct virtnet_stats {
|
||||||
|
@ -444,7 +443,7 @@ static int add_recvbuf_small(struct receive_queue *rq, gfp_t gfp)
|
||||||
|
|
||||||
skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
|
skb_to_sgvec(skb, rq->sg + 1, 0, skb->len);
|
||||||
|
|
||||||
err = virtqueue_add_buf(rq->vq, rq->sg, 0, 2, skb, gfp);
|
err = virtqueue_add_inbuf(rq->vq, rq->sg, 2, skb, gfp);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
dev_kfree_skb(skb);
|
dev_kfree_skb(skb);
|
||||||
|
|
||||||
|
@ -489,8 +488,8 @@ static int add_recvbuf_big(struct receive_queue *rq, gfp_t gfp)
|
||||||
|
|
||||||
/* chain first in list head */
|
/* chain first in list head */
|
||||||
first->private = (unsigned long)list;
|
first->private = (unsigned long)list;
|
||||||
err = virtqueue_add_buf(rq->vq, rq->sg, 0, MAX_SKB_FRAGS + 2,
|
err = virtqueue_add_inbuf(rq->vq, rq->sg, MAX_SKB_FRAGS + 2,
|
||||||
first, gfp);
|
first, gfp);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
give_pages(rq, first);
|
give_pages(rq, first);
|
||||||
|
|
||||||
|
@ -508,7 +507,7 @@ static int add_recvbuf_mergeable(struct receive_queue *rq, gfp_t gfp)
|
||||||
|
|
||||||
sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
|
sg_init_one(rq->sg, page_address(page), PAGE_SIZE);
|
||||||
|
|
||||||
err = virtqueue_add_buf(rq->vq, rq->sg, 0, 1, page, gfp);
|
err = virtqueue_add_inbuf(rq->vq, rq->sg, 1, page, gfp);
|
||||||
if (err < 0)
|
if (err < 0)
|
||||||
give_pages(rq, page);
|
give_pages(rq, page);
|
||||||
|
|
||||||
|
@ -582,7 +581,7 @@ static void refill_work(struct work_struct *work)
|
||||||
bool still_empty;
|
bool still_empty;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < vi->max_queue_pairs; i++) {
|
for (i = 0; i < vi->curr_queue_pairs; i++) {
|
||||||
struct receive_queue *rq = &vi->rq[i];
|
struct receive_queue *rq = &vi->rq[i];
|
||||||
|
|
||||||
napi_disable(&rq->napi);
|
napi_disable(&rq->napi);
|
||||||
|
@ -637,7 +636,7 @@ static int virtnet_open(struct net_device *dev)
|
||||||
struct virtnet_info *vi = netdev_priv(dev);
|
struct virtnet_info *vi = netdev_priv(dev);
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
for (i = 0; i < vi->max_queue_pairs; i++) {
|
for (i = 0; i < vi->curr_queue_pairs; i++) {
|
||||||
/* Make sure we have some buffers: if oom use wq. */
|
/* Make sure we have some buffers: if oom use wq. */
|
||||||
if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
|
if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
|
||||||
schedule_delayed_work(&vi->refill, 0);
|
schedule_delayed_work(&vi->refill, 0);
|
||||||
|
@ -711,8 +710,7 @@ static int xmit_skb(struct send_queue *sq, struct sk_buff *skb)
|
||||||
sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
|
sg_set_buf(sq->sg, &hdr->hdr, sizeof hdr->hdr);
|
||||||
|
|
||||||
num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
|
num_sg = skb_to_sgvec(skb, sq->sg + 1, 0, skb->len) + 1;
|
||||||
return virtqueue_add_buf(sq->vq, sq->sg, num_sg,
|
return virtqueue_add_outbuf(sq->vq, sq->sg, num_sg, skb, GFP_ATOMIC);
|
||||||
0, skb, GFP_ATOMIC);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
|
static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||||
|
@ -767,32 +765,35 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
|
||||||
* never fail unless improperly formated.
|
* never fail unless improperly formated.
|
||||||
*/
|
*/
|
||||||
static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
|
static bool virtnet_send_command(struct virtnet_info *vi, u8 class, u8 cmd,
|
||||||
struct scatterlist *data, int out, int in)
|
struct scatterlist *out,
|
||||||
|
struct scatterlist *in)
|
||||||
{
|
{
|
||||||
struct scatterlist *s, sg[VIRTNET_SEND_COMMAND_SG_MAX + 2];
|
struct scatterlist *sgs[4], hdr, stat;
|
||||||
struct virtio_net_ctrl_hdr ctrl;
|
struct virtio_net_ctrl_hdr ctrl;
|
||||||
virtio_net_ctrl_ack status = ~0;
|
virtio_net_ctrl_ack status = ~0;
|
||||||
unsigned int tmp;
|
unsigned out_num = 0, in_num = 0, tmp;
|
||||||
int i;
|
|
||||||
|
|
||||||
/* Caller should know better */
|
/* Caller should know better */
|
||||||
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ) ||
|
BUG_ON(!virtio_has_feature(vi->vdev, VIRTIO_NET_F_CTRL_VQ));
|
||||||
(out + in > VIRTNET_SEND_COMMAND_SG_MAX));
|
|
||||||
|
|
||||||
out++; /* Add header */
|
|
||||||
in++; /* Add return status */
|
|
||||||
|
|
||||||
ctrl.class = class;
|
ctrl.class = class;
|
||||||
ctrl.cmd = cmd;
|
ctrl.cmd = cmd;
|
||||||
|
/* Add header */
|
||||||
|
sg_init_one(&hdr, &ctrl, sizeof(ctrl));
|
||||||
|
sgs[out_num++] = &hdr;
|
||||||
|
|
||||||
sg_init_table(sg, out + in);
|
if (out)
|
||||||
|
sgs[out_num++] = out;
|
||||||
|
if (in)
|
||||||
|
sgs[out_num + in_num++] = in;
|
||||||
|
|
||||||
sg_set_buf(&sg[0], &ctrl, sizeof(ctrl));
|
/* Add return status. */
|
||||||
for_each_sg(data, s, out + in - 2, i)
|
sg_init_one(&stat, &status, sizeof(status));
|
||||||
sg_set_buf(&sg[i + 1], sg_virt(s), s->length);
|
sgs[out_num + in_num++] = &stat;
|
||||||
sg_set_buf(&sg[out + in - 1], &status, sizeof(status));
|
|
||||||
|
|
||||||
BUG_ON(virtqueue_add_buf(vi->cvq, sg, out, in, vi, GFP_ATOMIC) < 0);
|
BUG_ON(out_num + in_num > ARRAY_SIZE(sgs));
|
||||||
|
BUG_ON(virtqueue_add_sgs(vi->cvq, sgs, out_num, in_num, vi, GFP_ATOMIC)
|
||||||
|
< 0);
|
||||||
|
|
||||||
virtqueue_kick(vi->cvq);
|
virtqueue_kick(vi->cvq);
|
||||||
|
|
||||||
|
@ -821,7 +822,7 @@ static int virtnet_set_mac_address(struct net_device *dev, void *p)
|
||||||
sg_init_one(&sg, addr->sa_data, dev->addr_len);
|
sg_init_one(&sg, addr->sa_data, dev->addr_len);
|
||||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
|
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
|
||||||
VIRTIO_NET_CTRL_MAC_ADDR_SET,
|
VIRTIO_NET_CTRL_MAC_ADDR_SET,
|
||||||
&sg, 1, 0)) {
|
&sg, NULL)) {
|
||||||
dev_warn(&vdev->dev,
|
dev_warn(&vdev->dev,
|
||||||
"Failed to set mac address by vq command.\n");
|
"Failed to set mac address by vq command.\n");
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
|
@ -889,8 +890,7 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi)
|
||||||
{
|
{
|
||||||
rtnl_lock();
|
rtnl_lock();
|
||||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
|
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_ANNOUNCE,
|
||||||
VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL,
|
VIRTIO_NET_CTRL_ANNOUNCE_ACK, NULL, NULL))
|
||||||
0, 0))
|
|
||||||
dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
|
dev_warn(&vi->dev->dev, "Failed to ack link announce.\n");
|
||||||
rtnl_unlock();
|
rtnl_unlock();
|
||||||
}
|
}
|
||||||
|
@ -900,6 +900,7 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
|
||||||
struct scatterlist sg;
|
struct scatterlist sg;
|
||||||
struct virtio_net_ctrl_mq s;
|
struct virtio_net_ctrl_mq s;
|
||||||
struct net_device *dev = vi->dev;
|
struct net_device *dev = vi->dev;
|
||||||
|
int i;
|
||||||
|
|
||||||
if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
|
if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ))
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -908,12 +909,16 @@ static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs)
|
||||||
sg_init_one(&sg, &s, sizeof(s));
|
sg_init_one(&sg, &s, sizeof(s));
|
||||||
|
|
||||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
|
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ,
|
||||||
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, 1, 0)){
|
VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg, NULL)) {
|
||||||
dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
|
dev_warn(&dev->dev, "Fail to set num of queue pairs to %d\n",
|
||||||
queue_pairs);
|
queue_pairs);
|
||||||
return -EINVAL;
|
return -EINVAL;
|
||||||
} else
|
} else {
|
||||||
|
for (i = vi->curr_queue_pairs; i < queue_pairs; i++)
|
||||||
|
if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
|
||||||
|
schedule_delayed_work(&vi->refill, 0);
|
||||||
vi->curr_queue_pairs = queue_pairs;
|
vi->curr_queue_pairs = queue_pairs;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -955,7 +960,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
|
||||||
|
|
||||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
|
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
|
||||||
VIRTIO_NET_CTRL_RX_PROMISC,
|
VIRTIO_NET_CTRL_RX_PROMISC,
|
||||||
sg, 1, 0))
|
sg, NULL))
|
||||||
dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
|
dev_warn(&dev->dev, "Failed to %sable promisc mode.\n",
|
||||||
promisc ? "en" : "dis");
|
promisc ? "en" : "dis");
|
||||||
|
|
||||||
|
@ -963,7 +968,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
|
||||||
|
|
||||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
|
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_RX,
|
||||||
VIRTIO_NET_CTRL_RX_ALLMULTI,
|
VIRTIO_NET_CTRL_RX_ALLMULTI,
|
||||||
sg, 1, 0))
|
sg, NULL))
|
||||||
dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
|
dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
|
||||||
allmulti ? "en" : "dis");
|
allmulti ? "en" : "dis");
|
||||||
|
|
||||||
|
@ -1000,7 +1005,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
|
||||||
|
|
||||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
|
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MAC,
|
||||||
VIRTIO_NET_CTRL_MAC_TABLE_SET,
|
VIRTIO_NET_CTRL_MAC_TABLE_SET,
|
||||||
sg, 2, 0))
|
sg, NULL))
|
||||||
dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");
|
dev_warn(&dev->dev, "Failed to set MAC fitler table.\n");
|
||||||
|
|
||||||
kfree(buf);
|
kfree(buf);
|
||||||
|
@ -1015,7 +1020,7 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev,
|
||||||
sg_init_one(&sg, &vid, sizeof(vid));
|
sg_init_one(&sg, &vid, sizeof(vid));
|
||||||
|
|
||||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
|
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
|
||||||
VIRTIO_NET_CTRL_VLAN_ADD, &sg, 1, 0))
|
VIRTIO_NET_CTRL_VLAN_ADD, &sg, NULL))
|
||||||
dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
|
dev_warn(&dev->dev, "Failed to add VLAN ID %d.\n", vid);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1029,7 +1034,7 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev,
|
||||||
sg_init_one(&sg, &vid, sizeof(vid));
|
sg_init_one(&sg, &vid, sizeof(vid));
|
||||||
|
|
||||||
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
|
if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN,
|
||||||
VIRTIO_NET_CTRL_VLAN_DEL, &sg, 1, 0))
|
VIRTIO_NET_CTRL_VLAN_DEL, &sg, NULL))
|
||||||
dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
|
dev_warn(&dev->dev, "Failed to kill VLAN ID %d.\n", vid);
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -1570,7 +1575,7 @@ static int virtnet_probe(struct virtio_device *vdev)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Last of all, set up some receive buffers. */
|
/* Last of all, set up some receive buffers. */
|
||||||
for (i = 0; i < vi->max_queue_pairs; i++) {
|
for (i = 0; i < vi->curr_queue_pairs; i++) {
|
||||||
try_fill_recv(&vi->rq[i], GFP_KERNEL);
|
try_fill_recv(&vi->rq[i], GFP_KERNEL);
|
||||||
|
|
||||||
/* If we didn't even get one input buffer, we're useless. */
|
/* If we didn't even get one input buffer, we're useless. */
|
||||||
|
@ -1694,7 +1699,7 @@ static int virtnet_restore(struct virtio_device *vdev)
|
||||||
|
|
||||||
netif_device_attach(vi->dev);
|
netif_device_attach(vi->dev);
|
||||||
|
|
||||||
for (i = 0; i < vi->max_queue_pairs; i++)
|
for (i = 0; i < vi->curr_queue_pairs; i++)
|
||||||
if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
|
if (!try_fill_recv(&vi->rq[i], GFP_KERNEL))
|
||||||
schedule_delayed_work(&vi->refill, 0);
|
schedule_delayed_work(&vi->refill, 0);
|
||||||
|
|
||||||
|
|
|
@ -757,14 +757,14 @@ int rpmsg_send_offchannel_raw(struct rpmsg_channel *rpdev, u32 src, u32 dst,
|
||||||
mutex_lock(&vrp->tx_lock);
|
mutex_lock(&vrp->tx_lock);
|
||||||
|
|
||||||
/* add message to the remote processor's virtqueue */
|
/* add message to the remote processor's virtqueue */
|
||||||
err = virtqueue_add_buf(vrp->svq, &sg, 1, 0, msg, GFP_KERNEL);
|
err = virtqueue_add_outbuf(vrp->svq, &sg, 1, msg, GFP_KERNEL);
|
||||||
if (err) {
|
if (err) {
|
||||||
/*
|
/*
|
||||||
* need to reclaim the buffer here, otherwise it's lost
|
* need to reclaim the buffer here, otherwise it's lost
|
||||||
* (memory won't leak, but rpmsg won't use it again for TX).
|
* (memory won't leak, but rpmsg won't use it again for TX).
|
||||||
* this will wait for a buffer management overhaul.
|
* this will wait for a buffer management overhaul.
|
||||||
*/
|
*/
|
||||||
dev_err(dev, "virtqueue_add_buf failed: %d\n", err);
|
dev_err(dev, "virtqueue_add_outbuf failed: %d\n", err);
|
||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -839,7 +839,7 @@ static void rpmsg_recv_done(struct virtqueue *rvq)
|
||||||
sg_init_one(&sg, msg, RPMSG_BUF_SIZE);
|
sg_init_one(&sg, msg, RPMSG_BUF_SIZE);
|
||||||
|
|
||||||
/* add the buffer back to the remote processor's virtqueue */
|
/* add the buffer back to the remote processor's virtqueue */
|
||||||
err = virtqueue_add_buf(vrp->rvq, &sg, 0, 1, msg, GFP_KERNEL);
|
err = virtqueue_add_inbuf(vrp->rvq, &sg, 1, msg, GFP_KERNEL);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
dev_err(dev, "failed to add a virtqueue buffer: %d\n", err);
|
dev_err(dev, "failed to add a virtqueue buffer: %d\n", err);
|
||||||
return;
|
return;
|
||||||
|
@ -972,7 +972,7 @@ static int rpmsg_probe(struct virtio_device *vdev)
|
||||||
|
|
||||||
sg_init_one(&sg, cpu_addr, RPMSG_BUF_SIZE);
|
sg_init_one(&sg, cpu_addr, RPMSG_BUF_SIZE);
|
||||||
|
|
||||||
err = virtqueue_add_buf(vrp->rvq, &sg, 0, 1, cpu_addr,
|
err = virtqueue_add_inbuf(vrp->rvq, &sg, 1, cpu_addr,
|
||||||
GFP_KERNEL);
|
GFP_KERNEL);
|
||||||
WARN_ON(err); /* sanity check; this can't really happen */
|
WARN_ON(err); /* sanity check; this can't really happen */
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,8 @@
|
||||||
*
|
*
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
|
||||||
|
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/mempool.h>
|
#include <linux/mempool.h>
|
||||||
|
@ -20,12 +22,14 @@
|
||||||
#include <linux/virtio_ids.h>
|
#include <linux/virtio_ids.h>
|
||||||
#include <linux/virtio_config.h>
|
#include <linux/virtio_config.h>
|
||||||
#include <linux/virtio_scsi.h>
|
#include <linux/virtio_scsi.h>
|
||||||
|
#include <linux/cpu.h>
|
||||||
#include <scsi/scsi_host.h>
|
#include <scsi/scsi_host.h>
|
||||||
#include <scsi/scsi_device.h>
|
#include <scsi/scsi_device.h>
|
||||||
#include <scsi/scsi_cmnd.h>
|
#include <scsi/scsi_cmnd.h>
|
||||||
|
|
||||||
#define VIRTIO_SCSI_MEMPOOL_SZ 64
|
#define VIRTIO_SCSI_MEMPOOL_SZ 64
|
||||||
#define VIRTIO_SCSI_EVENT_LEN 8
|
#define VIRTIO_SCSI_EVENT_LEN 8
|
||||||
|
#define VIRTIO_SCSI_VQ_BASE 2
|
||||||
|
|
||||||
/* Command queue element */
|
/* Command queue element */
|
||||||
struct virtio_scsi_cmd {
|
struct virtio_scsi_cmd {
|
||||||
|
@ -57,27 +61,61 @@ struct virtio_scsi_vq {
|
||||||
struct virtqueue *vq;
|
struct virtqueue *vq;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Per-target queue state */
|
/*
|
||||||
|
* Per-target queue state.
|
||||||
|
*
|
||||||
|
* This struct holds the data needed by the queue steering policy. When a
|
||||||
|
* target is sent multiple requests, we need to drive them to the same queue so
|
||||||
|
* that FIFO processing order is kept. However, if a target was idle, we can
|
||||||
|
* choose a queue arbitrarily. In this case the queue is chosen according to
|
||||||
|
* the current VCPU, so the driver expects the number of request queues to be
|
||||||
|
* equal to the number of VCPUs. This makes it easy and fast to select the
|
||||||
|
* queue, and also lets the driver optimize the IRQ affinity for the virtqueues
|
||||||
|
* (each virtqueue's affinity is set to the CPU that "owns" the queue).
|
||||||
|
*
|
||||||
|
* An interesting effect of this policy is that only writes to req_vq need to
|
||||||
|
* take the tgt_lock. Read can be done outside the lock because:
|
||||||
|
*
|
||||||
|
* - writes of req_vq only occur when atomic_inc_return(&tgt->reqs) returns 1.
|
||||||
|
* In that case, no other CPU is reading req_vq: even if they were in
|
||||||
|
* virtscsi_queuecommand_multi, they would be spinning on tgt_lock.
|
||||||
|
*
|
||||||
|
* - reads of req_vq only occur when the target is not idle (reqs != 0).
|
||||||
|
* A CPU that enters virtscsi_queuecommand_multi will not modify req_vq.
|
||||||
|
*
|
||||||
|
* Similarly, decrements of reqs are never concurrent with writes of req_vq.
|
||||||
|
* Thus they can happen outside the tgt_lock, provided of course we make reqs
|
||||||
|
* an atomic_t.
|
||||||
|
*/
|
||||||
struct virtio_scsi_target_state {
|
struct virtio_scsi_target_state {
|
||||||
/* Protects sg. Lock hierarchy is tgt_lock -> vq_lock. */
|
/* This spinlock never held at the same time as vq_lock. */
|
||||||
spinlock_t tgt_lock;
|
spinlock_t tgt_lock;
|
||||||
|
|
||||||
/* For sglist construction when adding commands to the virtqueue. */
|
/* Count of outstanding requests. */
|
||||||
struct scatterlist sg[];
|
atomic_t reqs;
|
||||||
|
|
||||||
|
/* Currently active virtqueue for requests sent to this target. */
|
||||||
|
struct virtio_scsi_vq *req_vq;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Driver instance state */
|
/* Driver instance state */
|
||||||
struct virtio_scsi {
|
struct virtio_scsi {
|
||||||
struct virtio_device *vdev;
|
struct virtio_device *vdev;
|
||||||
|
|
||||||
struct virtio_scsi_vq ctrl_vq;
|
|
||||||
struct virtio_scsi_vq event_vq;
|
|
||||||
struct virtio_scsi_vq req_vq;
|
|
||||||
|
|
||||||
/* Get some buffers ready for event vq */
|
/* Get some buffers ready for event vq */
|
||||||
struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN];
|
struct virtio_scsi_event_node event_list[VIRTIO_SCSI_EVENT_LEN];
|
||||||
|
|
||||||
struct virtio_scsi_target_state *tgt[];
|
u32 num_queues;
|
||||||
|
|
||||||
|
/* If the affinity hint is set for virtqueues */
|
||||||
|
bool affinity_hint_set;
|
||||||
|
|
||||||
|
/* CPU hotplug notifier */
|
||||||
|
struct notifier_block nb;
|
||||||
|
|
||||||
|
struct virtio_scsi_vq ctrl_vq;
|
||||||
|
struct virtio_scsi_vq event_vq;
|
||||||
|
struct virtio_scsi_vq req_vqs[];
|
||||||
};
|
};
|
||||||
|
|
||||||
static struct kmem_cache *virtscsi_cmd_cache;
|
static struct kmem_cache *virtscsi_cmd_cache;
|
||||||
|
@ -107,11 +145,13 @@ static void virtscsi_compute_resid(struct scsi_cmnd *sc, u32 resid)
|
||||||
*
|
*
|
||||||
* Called with vq_lock held.
|
* Called with vq_lock held.
|
||||||
*/
|
*/
|
||||||
static void virtscsi_complete_cmd(void *buf)
|
static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
|
||||||
{
|
{
|
||||||
struct virtio_scsi_cmd *cmd = buf;
|
struct virtio_scsi_cmd *cmd = buf;
|
||||||
struct scsi_cmnd *sc = cmd->sc;
|
struct scsi_cmnd *sc = cmd->sc;
|
||||||
struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
|
struct virtio_scsi_cmd_resp *resp = &cmd->resp.cmd;
|
||||||
|
struct virtio_scsi_target_state *tgt =
|
||||||
|
scsi_target(sc->device)->hostdata;
|
||||||
|
|
||||||
dev_dbg(&sc->device->sdev_gendev,
|
dev_dbg(&sc->device->sdev_gendev,
|
||||||
"cmd %p response %u status %#02x sense_len %u\n",
|
"cmd %p response %u status %#02x sense_len %u\n",
|
||||||
|
@ -166,32 +206,71 @@ static void virtscsi_complete_cmd(void *buf)
|
||||||
|
|
||||||
mempool_free(cmd, virtscsi_cmd_pool);
|
mempool_free(cmd, virtscsi_cmd_pool);
|
||||||
sc->scsi_done(sc);
|
sc->scsi_done(sc);
|
||||||
|
|
||||||
|
atomic_dec(&tgt->reqs);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtscsi_vq_done(struct virtqueue *vq, void (*fn)(void *buf))
|
static void virtscsi_vq_done(struct virtio_scsi *vscsi,
|
||||||
|
struct virtio_scsi_vq *virtscsi_vq,
|
||||||
|
void (*fn)(struct virtio_scsi *vscsi, void *buf))
|
||||||
{
|
{
|
||||||
void *buf;
|
void *buf;
|
||||||
unsigned int len;
|
unsigned int len;
|
||||||
|
unsigned long flags;
|
||||||
|
struct virtqueue *vq = virtscsi_vq->vq;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&virtscsi_vq->vq_lock, flags);
|
||||||
do {
|
do {
|
||||||
virtqueue_disable_cb(vq);
|
virtqueue_disable_cb(vq);
|
||||||
while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
|
while ((buf = virtqueue_get_buf(vq, &len)) != NULL)
|
||||||
fn(buf);
|
fn(vscsi, buf);
|
||||||
} while (!virtqueue_enable_cb(vq));
|
} while (!virtqueue_enable_cb(vq));
|
||||||
|
spin_unlock_irqrestore(&virtscsi_vq->vq_lock, flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtscsi_req_done(struct virtqueue *vq)
|
static void virtscsi_req_done(struct virtqueue *vq)
|
||||||
{
|
{
|
||||||
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
|
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
|
||||||
struct virtio_scsi *vscsi = shost_priv(sh);
|
struct virtio_scsi *vscsi = shost_priv(sh);
|
||||||
unsigned long flags;
|
int index = vq->index - VIRTIO_SCSI_VQ_BASE;
|
||||||
|
struct virtio_scsi_vq *req_vq = &vscsi->req_vqs[index];
|
||||||
|
|
||||||
spin_lock_irqsave(&vscsi->req_vq.vq_lock, flags);
|
/*
|
||||||
virtscsi_vq_done(vq, virtscsi_complete_cmd);
|
* Read req_vq before decrementing the reqs field in
|
||||||
spin_unlock_irqrestore(&vscsi->req_vq.vq_lock, flags);
|
* virtscsi_complete_cmd.
|
||||||
|
*
|
||||||
|
* With barriers:
|
||||||
|
*
|
||||||
|
* CPU #0 virtscsi_queuecommand_multi (CPU #1)
|
||||||
|
* ------------------------------------------------------------
|
||||||
|
* lock vq_lock
|
||||||
|
* read req_vq
|
||||||
|
* read reqs (reqs = 1)
|
||||||
|
* write reqs (reqs = 0)
|
||||||
|
* increment reqs (reqs = 1)
|
||||||
|
* write req_vq
|
||||||
|
*
|
||||||
|
* Possible reordering without barriers:
|
||||||
|
*
|
||||||
|
* CPU #0 virtscsi_queuecommand_multi (CPU #1)
|
||||||
|
* ------------------------------------------------------------
|
||||||
|
* lock vq_lock
|
||||||
|
* read reqs (reqs = 1)
|
||||||
|
* write reqs (reqs = 0)
|
||||||
|
* increment reqs (reqs = 1)
|
||||||
|
* write req_vq
|
||||||
|
* read (wrong) req_vq
|
||||||
|
*
|
||||||
|
* We do not need a full smp_rmb, because req_vq is required to get
|
||||||
|
* to tgt->reqs: tgt is &vscsi->tgt[sc->device->id], where sc is stored
|
||||||
|
* in the virtqueue as the user token.
|
||||||
|
*/
|
||||||
|
smp_read_barrier_depends();
|
||||||
|
|
||||||
|
virtscsi_vq_done(vscsi, req_vq, virtscsi_complete_cmd);
|
||||||
};
|
};
|
||||||
|
|
||||||
static void virtscsi_complete_free(void *buf)
|
static void virtscsi_complete_free(struct virtio_scsi *vscsi, void *buf)
|
||||||
{
|
{
|
||||||
struct virtio_scsi_cmd *cmd = buf;
|
struct virtio_scsi_cmd *cmd = buf;
|
||||||
|
|
||||||
|
@ -205,11 +284,8 @@ static void virtscsi_ctrl_done(struct virtqueue *vq)
|
||||||
{
|
{
|
||||||
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
|
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
|
||||||
struct virtio_scsi *vscsi = shost_priv(sh);
|
struct virtio_scsi *vscsi = shost_priv(sh);
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&vscsi->ctrl_vq.vq_lock, flags);
|
virtscsi_vq_done(vscsi, &vscsi->ctrl_vq, virtscsi_complete_free);
|
||||||
virtscsi_vq_done(vq, virtscsi_complete_free);
|
|
||||||
spin_unlock_irqrestore(&vscsi->ctrl_vq.vq_lock, flags);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static int virtscsi_kick_event(struct virtio_scsi *vscsi,
|
static int virtscsi_kick_event(struct virtio_scsi *vscsi,
|
||||||
|
@ -223,8 +299,8 @@ static int virtscsi_kick_event(struct virtio_scsi *vscsi,
|
||||||
|
|
||||||
spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
|
spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
|
||||||
|
|
||||||
err = virtqueue_add_buf(vscsi->event_vq.vq, &sg, 0, 1, event_node,
|
err = virtqueue_add_inbuf(vscsi->event_vq.vq, &sg, 1, event_node,
|
||||||
GFP_ATOMIC);
|
GFP_ATOMIC);
|
||||||
if (!err)
|
if (!err)
|
||||||
virtqueue_kick(vscsi->event_vq.vq);
|
virtqueue_kick(vscsi->event_vq.vq);
|
||||||
|
|
||||||
|
@ -254,7 +330,7 @@ static void virtscsi_cancel_event_work(struct virtio_scsi *vscsi)
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
|
static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
|
||||||
struct virtio_scsi_event *event)
|
struct virtio_scsi_event *event)
|
||||||
{
|
{
|
||||||
struct scsi_device *sdev;
|
struct scsi_device *sdev;
|
||||||
struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
|
struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
|
||||||
|
@ -332,7 +408,7 @@ static void virtscsi_handle_event(struct work_struct *work)
|
||||||
virtscsi_kick_event(vscsi, event_node);
|
virtscsi_kick_event(vscsi, event_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void virtscsi_complete_event(void *buf)
|
static void virtscsi_complete_event(struct virtio_scsi *vscsi, void *buf)
|
||||||
{
|
{
|
||||||
struct virtio_scsi_event_node *event_node = buf;
|
struct virtio_scsi_event_node *event_node = buf;
|
||||||
|
|
||||||
|
@ -344,82 +420,65 @@ static void virtscsi_event_done(struct virtqueue *vq)
|
||||||
{
|
{
|
||||||
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
|
struct Scsi_Host *sh = virtio_scsi_host(vq->vdev);
|
||||||
struct virtio_scsi *vscsi = shost_priv(sh);
|
struct virtio_scsi *vscsi = shost_priv(sh);
|
||||||
unsigned long flags;
|
|
||||||
|
|
||||||
spin_lock_irqsave(&vscsi->event_vq.vq_lock, flags);
|
virtscsi_vq_done(vscsi, &vscsi->event_vq, virtscsi_complete_event);
|
||||||
virtscsi_vq_done(vq, virtscsi_complete_event);
|
|
||||||
spin_unlock_irqrestore(&vscsi->event_vq.vq_lock, flags);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
static void virtscsi_map_sgl(struct scatterlist *sg, unsigned int *p_idx,
|
|
||||||
struct scsi_data_buffer *sdb)
|
|
||||||
{
|
|
||||||
struct sg_table *table = &sdb->table;
|
|
||||||
struct scatterlist *sg_elem;
|
|
||||||
unsigned int idx = *p_idx;
|
|
||||||
int i;
|
|
||||||
|
|
||||||
for_each_sg(table->sgl, sg_elem, table->nents, i)
|
|
||||||
sg[idx++] = *sg_elem;
|
|
||||||
|
|
||||||
*p_idx = idx;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* virtscsi_map_cmd - map a scsi_cmd to a virtqueue scatterlist
|
* virtscsi_add_cmd - add a virtio_scsi_cmd to a virtqueue
|
||||||
* @vscsi : virtio_scsi state
|
* @vq : the struct virtqueue we're talking about
|
||||||
* @cmd : command structure
|
* @cmd : command structure
|
||||||
* @out_num : number of read-only elements
|
|
||||||
* @in_num : number of write-only elements
|
|
||||||
* @req_size : size of the request buffer
|
* @req_size : size of the request buffer
|
||||||
* @resp_size : size of the response buffer
|
* @resp_size : size of the response buffer
|
||||||
*
|
* @gfp : flags to use for memory allocations
|
||||||
* Called with tgt_lock held.
|
|
||||||
*/
|
*/
|
||||||
static void virtscsi_map_cmd(struct virtio_scsi_target_state *tgt,
|
static int virtscsi_add_cmd(struct virtqueue *vq,
|
||||||
struct virtio_scsi_cmd *cmd,
|
struct virtio_scsi_cmd *cmd,
|
||||||
unsigned *out_num, unsigned *in_num,
|
size_t req_size, size_t resp_size, gfp_t gfp)
|
||||||
size_t req_size, size_t resp_size)
|
|
||||||
{
|
{
|
||||||
struct scsi_cmnd *sc = cmd->sc;
|
struct scsi_cmnd *sc = cmd->sc;
|
||||||
struct scatterlist *sg = tgt->sg;
|
struct scatterlist *sgs[4], req, resp;
|
||||||
unsigned int idx = 0;
|
struct sg_table *out, *in;
|
||||||
|
unsigned out_num = 0, in_num = 0;
|
||||||
|
|
||||||
|
out = in = NULL;
|
||||||
|
|
||||||
|
if (sc && sc->sc_data_direction != DMA_NONE) {
|
||||||
|
if (sc->sc_data_direction != DMA_FROM_DEVICE)
|
||||||
|
out = &scsi_out(sc)->table;
|
||||||
|
if (sc->sc_data_direction != DMA_TO_DEVICE)
|
||||||
|
in = &scsi_in(sc)->table;
|
||||||
|
}
|
||||||
|
|
||||||
/* Request header. */
|
/* Request header. */
|
||||||
sg_set_buf(&sg[idx++], &cmd->req, req_size);
|
sg_init_one(&req, &cmd->req, req_size);
|
||||||
|
sgs[out_num++] = &req;
|
||||||
|
|
||||||
/* Data-out buffer. */
|
/* Data-out buffer. */
|
||||||
if (sc && sc->sc_data_direction != DMA_FROM_DEVICE)
|
if (out)
|
||||||
virtscsi_map_sgl(sg, &idx, scsi_out(sc));
|
sgs[out_num++] = out->sgl;
|
||||||
|
|
||||||
*out_num = idx;
|
|
||||||
|
|
||||||
/* Response header. */
|
/* Response header. */
|
||||||
sg_set_buf(&sg[idx++], &cmd->resp, resp_size);
|
sg_init_one(&resp, &cmd->resp, resp_size);
|
||||||
|
sgs[out_num + in_num++] = &resp;
|
||||||
|
|
||||||
/* Data-in buffer */
|
/* Data-in buffer */
|
||||||
if (sc && sc->sc_data_direction != DMA_TO_DEVICE)
|
if (in)
|
||||||
virtscsi_map_sgl(sg, &idx, scsi_in(sc));
|
sgs[out_num + in_num++] = in->sgl;
|
||||||
|
|
||||||
*in_num = idx - *out_num;
|
return virtqueue_add_sgs(vq, sgs, out_num, in_num, cmd, gfp);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int virtscsi_kick_cmd(struct virtio_scsi_target_state *tgt,
|
static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq,
|
||||||
struct virtio_scsi_vq *vq,
|
|
||||||
struct virtio_scsi_cmd *cmd,
|
struct virtio_scsi_cmd *cmd,
|
||||||
size_t req_size, size_t resp_size, gfp_t gfp)
|
size_t req_size, size_t resp_size, gfp_t gfp)
|
||||||
{
|
{
|
||||||
unsigned int out_num, in_num;
|
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int err;
|
int err;
|
||||||
bool needs_kick = false;
|
bool needs_kick = false;
|
||||||
|
|
||||||
spin_lock_irqsave(&tgt->tgt_lock, flags);
|
spin_lock_irqsave(&vq->vq_lock, flags);
|
||||||
virtscsi_map_cmd(tgt, cmd, &out_num, &in_num, req_size, resp_size);
|
err = virtscsi_add_cmd(vq->vq, cmd, req_size, resp_size, gfp);
|
||||||
|
|
||||||
spin_lock(&vq->vq_lock);
|
|
||||||
err = virtqueue_add_buf(vq->vq, tgt->sg, out_num, in_num, cmd, gfp);
|
|
||||||
spin_unlock(&tgt->tgt_lock);
|
|
||||||
if (!err)
|
if (!err)
|
||||||
needs_kick = virtqueue_kick_prepare(vq->vq);
|
needs_kick = virtqueue_kick_prepare(vq->vq);
|
||||||
|
|
||||||
|
@ -430,10 +489,10 @@ static int virtscsi_kick_cmd(struct virtio_scsi_target_state *tgt,
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
|
static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
|
||||||
|
struct virtio_scsi_vq *req_vq,
|
||||||
|
struct scsi_cmnd *sc)
|
||||||
{
|
{
|
||||||
struct virtio_scsi *vscsi = shost_priv(sh);
|
|
||||||
struct virtio_scsi_target_state *tgt = vscsi->tgt[sc->device->id];
|
|
||||||
struct virtio_scsi_cmd *cmd;
|
struct virtio_scsi_cmd *cmd;
|
||||||
int ret;
|
int ret;
|
||||||
|
|
||||||
|
@ -467,7 +526,7 @@ static int virtscsi_queuecommand(struct Scsi_Host *sh, struct scsi_cmnd *sc)
|
||||||
BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
|
BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
|
||||||
memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
|
memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
|
||||||
|
|
||||||
if (virtscsi_kick_cmd(tgt, &vscsi->req_vq, cmd,
|
if (virtscsi_kick_cmd(req_vq, cmd,
|
||||||
sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
|
sizeof cmd->req.cmd, sizeof cmd->resp.cmd,
|
||||||
GFP_ATOMIC) == 0)
|
GFP_ATOMIC) == 0)
|
||||||
ret = 0;
|
ret = 0;
|
||||||
|
@ -478,14 +537,62 @@ out:
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int virtscsi_queuecommand_single(struct Scsi_Host *sh,
|
||||||
|
struct scsi_cmnd *sc)
|
||||||
|
{
|
||||||
|
struct virtio_scsi *vscsi = shost_priv(sh);
|
||||||
|
struct virtio_scsi_target_state *tgt =
|
||||||
|
scsi_target(sc->device)->hostdata;
|
||||||
|
|
||||||
|
atomic_inc(&tgt->reqs);
|
||||||
|
return virtscsi_queuecommand(vscsi, &vscsi->req_vqs[0], sc);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct virtio_scsi_vq *virtscsi_pick_vq(struct virtio_scsi *vscsi,
|
||||||
|
struct virtio_scsi_target_state *tgt)
|
||||||
|
{
|
||||||
|
struct virtio_scsi_vq *vq;
|
||||||
|
unsigned long flags;
|
||||||
|
u32 queue_num;
|
||||||
|
|
||||||
|
spin_lock_irqsave(&tgt->tgt_lock, flags);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The memory barrier after atomic_inc_return matches
|
||||||
|
* the smp_read_barrier_depends() in virtscsi_req_done.
|
||||||
|
*/
|
||||||
|
if (atomic_inc_return(&tgt->reqs) > 1)
|
||||||
|
vq = ACCESS_ONCE(tgt->req_vq);
|
||||||
|
else {
|
||||||
|
queue_num = smp_processor_id();
|
||||||
|
while (unlikely(queue_num >= vscsi->num_queues))
|
||||||
|
queue_num -= vscsi->num_queues;
|
||||||
|
|
||||||
|
tgt->req_vq = vq = &vscsi->req_vqs[queue_num];
|
||||||
|
}
|
||||||
|
|
||||||
|
spin_unlock_irqrestore(&tgt->tgt_lock, flags);
|
||||||
|
return vq;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int virtscsi_queuecommand_multi(struct Scsi_Host *sh,
|
||||||
|
struct scsi_cmnd *sc)
|
||||||
|
{
|
||||||
|
struct virtio_scsi *vscsi = shost_priv(sh);
|
||||||
|
struct virtio_scsi_target_state *tgt =
|
||||||
|
scsi_target(sc->device)->hostdata;
|
||||||
|
struct virtio_scsi_vq *req_vq = virtscsi_pick_vq(vscsi, tgt);
|
||||||
|
|
||||||
|
return virtscsi_queuecommand(vscsi, req_vq, sc);
|
||||||
|
}
|
||||||
|
|
||||||
static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
|
static int virtscsi_tmf(struct virtio_scsi *vscsi, struct virtio_scsi_cmd *cmd)
|
||||||
{
|
{
|
||||||
DECLARE_COMPLETION_ONSTACK(comp);
|
DECLARE_COMPLETION_ONSTACK(comp);
|
||||||
struct virtio_scsi_target_state *tgt = vscsi->tgt[cmd->sc->device->id];
|
|
||||||
int ret = FAILED;
|
int ret = FAILED;
|
||||||
|
|
||||||
cmd->comp = ∁
|
cmd->comp = ∁
|
||||||
if (virtscsi_kick_cmd(tgt, &vscsi->ctrl_vq, cmd,
|
if (virtscsi_kick_cmd(&vscsi->ctrl_vq, cmd,
|
||||||
sizeof cmd->req.tmf, sizeof cmd->resp.tmf,
|
sizeof cmd->req.tmf, sizeof cmd->resp.tmf,
|
||||||
GFP_NOIO) < 0)
|
GFP_NOIO) < 0)
|
||||||
goto out;
|
goto out;
|
||||||
|
@ -547,18 +654,57 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
|
||||||
return virtscsi_tmf(vscsi, cmd);
|
return virtscsi_tmf(vscsi, cmd);
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct scsi_host_template virtscsi_host_template = {
|
static int virtscsi_target_alloc(struct scsi_target *starget)
|
||||||
|
{
|
||||||
|
struct virtio_scsi_target_state *tgt =
|
||||||
|
kmalloc(sizeof(*tgt), GFP_KERNEL);
|
||||||
|
if (!tgt)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
spin_lock_init(&tgt->tgt_lock);
|
||||||
|
atomic_set(&tgt->reqs, 0);
|
||||||
|
tgt->req_vq = NULL;
|
||||||
|
|
||||||
|
starget->hostdata = tgt;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtscsi_target_destroy(struct scsi_target *starget)
|
||||||
|
{
|
||||||
|
struct virtio_scsi_target_state *tgt = starget->hostdata;
|
||||||
|
kfree(tgt);
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct scsi_host_template virtscsi_host_template_single = {
|
||||||
.module = THIS_MODULE,
|
.module = THIS_MODULE,
|
||||||
.name = "Virtio SCSI HBA",
|
.name = "Virtio SCSI HBA",
|
||||||
.proc_name = "virtio_scsi",
|
.proc_name = "virtio_scsi",
|
||||||
.queuecommand = virtscsi_queuecommand,
|
|
||||||
.this_id = -1,
|
.this_id = -1,
|
||||||
|
.queuecommand = virtscsi_queuecommand_single,
|
||||||
.eh_abort_handler = virtscsi_abort,
|
.eh_abort_handler = virtscsi_abort,
|
||||||
.eh_device_reset_handler = virtscsi_device_reset,
|
.eh_device_reset_handler = virtscsi_device_reset,
|
||||||
|
|
||||||
.can_queue = 1024,
|
.can_queue = 1024,
|
||||||
.dma_boundary = UINT_MAX,
|
.dma_boundary = UINT_MAX,
|
||||||
.use_clustering = ENABLE_CLUSTERING,
|
.use_clustering = ENABLE_CLUSTERING,
|
||||||
|
.target_alloc = virtscsi_target_alloc,
|
||||||
|
.target_destroy = virtscsi_target_destroy,
|
||||||
|
};
|
||||||
|
|
||||||
|
static struct scsi_host_template virtscsi_host_template_multi = {
|
||||||
|
.module = THIS_MODULE,
|
||||||
|
.name = "Virtio SCSI HBA",
|
||||||
|
.proc_name = "virtio_scsi",
|
||||||
|
.this_id = -1,
|
||||||
|
.queuecommand = virtscsi_queuecommand_multi,
|
||||||
|
.eh_abort_handler = virtscsi_abort,
|
||||||
|
.eh_device_reset_handler = virtscsi_device_reset,
|
||||||
|
|
||||||
|
.can_queue = 1024,
|
||||||
|
.dma_boundary = UINT_MAX,
|
||||||
|
.use_clustering = ENABLE_CLUSTERING,
|
||||||
|
.target_alloc = virtscsi_target_alloc,
|
||||||
|
.target_destroy = virtscsi_target_destroy,
|
||||||
};
|
};
|
||||||
|
|
||||||
#define virtscsi_config_get(vdev, fld) \
|
#define virtscsi_config_get(vdev, fld) \
|
||||||
|
@ -578,6 +724,64 @@ static struct scsi_host_template virtscsi_host_template = {
|
||||||
&__val, sizeof(__val)); \
|
&__val, sizeof(__val)); \
|
||||||
})
|
})
|
||||||
|
|
||||||
|
static void __virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
int cpu;
|
||||||
|
|
||||||
|
/* In multiqueue mode, when the number of cpu is equal
|
||||||
|
* to the number of request queues, we let the qeueues
|
||||||
|
* to be private to one cpu by setting the affinity hint
|
||||||
|
* to eliminate the contention.
|
||||||
|
*/
|
||||||
|
if ((vscsi->num_queues == 1 ||
|
||||||
|
vscsi->num_queues != num_online_cpus()) && affinity) {
|
||||||
|
if (vscsi->affinity_hint_set)
|
||||||
|
affinity = false;
|
||||||
|
else
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (affinity) {
|
||||||
|
i = 0;
|
||||||
|
for_each_online_cpu(cpu) {
|
||||||
|
virtqueue_set_affinity(vscsi->req_vqs[i].vq, cpu);
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
|
||||||
|
vscsi->affinity_hint_set = true;
|
||||||
|
} else {
|
||||||
|
for (i = 0; i < vscsi->num_queues - VIRTIO_SCSI_VQ_BASE; i++)
|
||||||
|
virtqueue_set_affinity(vscsi->req_vqs[i].vq, -1);
|
||||||
|
|
||||||
|
vscsi->affinity_hint_set = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void virtscsi_set_affinity(struct virtio_scsi *vscsi, bool affinity)
|
||||||
|
{
|
||||||
|
get_online_cpus();
|
||||||
|
__virtscsi_set_affinity(vscsi, affinity);
|
||||||
|
put_online_cpus();
|
||||||
|
}
|
||||||
|
|
||||||
|
static int virtscsi_cpu_callback(struct notifier_block *nfb,
|
||||||
|
unsigned long action, void *hcpu)
|
||||||
|
{
|
||||||
|
struct virtio_scsi *vscsi = container_of(nfb, struct virtio_scsi, nb);
|
||||||
|
switch(action) {
|
||||||
|
case CPU_ONLINE:
|
||||||
|
case CPU_ONLINE_FROZEN:
|
||||||
|
case CPU_DEAD:
|
||||||
|
case CPU_DEAD_FROZEN:
|
||||||
|
__virtscsi_set_affinity(vscsi, true);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
return NOTIFY_OK;
|
||||||
|
}
|
||||||
|
|
||||||
static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
|
static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
|
||||||
struct virtqueue *vq)
|
struct virtqueue *vq)
|
||||||
{
|
{
|
||||||
|
@ -585,24 +789,6 @@ static void virtscsi_init_vq(struct virtio_scsi_vq *virtscsi_vq,
|
||||||
virtscsi_vq->vq = vq;
|
virtscsi_vq->vq = vq;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct virtio_scsi_target_state *virtscsi_alloc_tgt(
|
|
||||||
struct virtio_device *vdev, int sg_elems)
|
|
||||||
{
|
|
||||||
struct virtio_scsi_target_state *tgt;
|
|
||||||
gfp_t gfp_mask = GFP_KERNEL;
|
|
||||||
|
|
||||||
/* We need extra sg elements at head and tail. */
|
|
||||||
tgt = kmalloc(sizeof(*tgt) + sizeof(tgt->sg[0]) * (sg_elems + 2),
|
|
||||||
gfp_mask);
|
|
||||||
|
|
||||||
if (!tgt)
|
|
||||||
return NULL;
|
|
||||||
|
|
||||||
spin_lock_init(&tgt->tgt_lock);
|
|
||||||
sg_init_table(tgt->sg, sg_elems + 2);
|
|
||||||
return tgt;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void virtscsi_scan(struct virtio_device *vdev)
|
static void virtscsi_scan(struct virtio_device *vdev)
|
||||||
{
|
{
|
||||||
struct Scsi_Host *shost = (struct Scsi_Host *)vdev->priv;
|
struct Scsi_Host *shost = (struct Scsi_Host *)vdev->priv;
|
||||||
|
@ -614,46 +800,56 @@ static void virtscsi_remove_vqs(struct virtio_device *vdev)
|
||||||
{
|
{
|
||||||
struct Scsi_Host *sh = virtio_scsi_host(vdev);
|
struct Scsi_Host *sh = virtio_scsi_host(vdev);
|
||||||
struct virtio_scsi *vscsi = shost_priv(sh);
|
struct virtio_scsi *vscsi = shost_priv(sh);
|
||||||
u32 i, num_targets;
|
|
||||||
|
virtscsi_set_affinity(vscsi, false);
|
||||||
|
|
||||||
/* Stop all the virtqueues. */
|
/* Stop all the virtqueues. */
|
||||||
vdev->config->reset(vdev);
|
vdev->config->reset(vdev);
|
||||||
|
|
||||||
num_targets = sh->max_id;
|
|
||||||
for (i = 0; i < num_targets; i++) {
|
|
||||||
kfree(vscsi->tgt[i]);
|
|
||||||
vscsi->tgt[i] = NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
vdev->config->del_vqs(vdev);
|
vdev->config->del_vqs(vdev);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int virtscsi_init(struct virtio_device *vdev,
|
static int virtscsi_init(struct virtio_device *vdev,
|
||||||
struct virtio_scsi *vscsi, int num_targets)
|
struct virtio_scsi *vscsi)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
struct virtqueue *vqs[3];
|
u32 i;
|
||||||
u32 i, sg_elems;
|
u32 num_vqs;
|
||||||
|
vq_callback_t **callbacks;
|
||||||
|
const char **names;
|
||||||
|
struct virtqueue **vqs;
|
||||||
|
|
||||||
vq_callback_t *callbacks[] = {
|
num_vqs = vscsi->num_queues + VIRTIO_SCSI_VQ_BASE;
|
||||||
virtscsi_ctrl_done,
|
vqs = kmalloc(num_vqs * sizeof(struct virtqueue *), GFP_KERNEL);
|
||||||
virtscsi_event_done,
|
callbacks = kmalloc(num_vqs * sizeof(vq_callback_t *), GFP_KERNEL);
|
||||||
virtscsi_req_done
|
names = kmalloc(num_vqs * sizeof(char *), GFP_KERNEL);
|
||||||
};
|
|
||||||
const char *names[] = {
|
if (!callbacks || !vqs || !names) {
|
||||||
"control",
|
err = -ENOMEM;
|
||||||
"event",
|
goto out;
|
||||||
"request"
|
}
|
||||||
};
|
|
||||||
|
callbacks[0] = virtscsi_ctrl_done;
|
||||||
|
callbacks[1] = virtscsi_event_done;
|
||||||
|
names[0] = "control";
|
||||||
|
names[1] = "event";
|
||||||
|
for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++) {
|
||||||
|
callbacks[i] = virtscsi_req_done;
|
||||||
|
names[i] = "request";
|
||||||
|
}
|
||||||
|
|
||||||
/* Discover virtqueues and write information to configuration. */
|
/* Discover virtqueues and write information to configuration. */
|
||||||
err = vdev->config->find_vqs(vdev, 3, vqs, callbacks, names);
|
err = vdev->config->find_vqs(vdev, num_vqs, vqs, callbacks, names);
|
||||||
if (err)
|
if (err)
|
||||||
return err;
|
goto out;
|
||||||
|
|
||||||
virtscsi_init_vq(&vscsi->ctrl_vq, vqs[0]);
|
virtscsi_init_vq(&vscsi->ctrl_vq, vqs[0]);
|
||||||
virtscsi_init_vq(&vscsi->event_vq, vqs[1]);
|
virtscsi_init_vq(&vscsi->event_vq, vqs[1]);
|
||||||
virtscsi_init_vq(&vscsi->req_vq, vqs[2]);
|
for (i = VIRTIO_SCSI_VQ_BASE; i < num_vqs; i++)
|
||||||
|
virtscsi_init_vq(&vscsi->req_vqs[i - VIRTIO_SCSI_VQ_BASE],
|
||||||
|
vqs[i]);
|
||||||
|
|
||||||
|
virtscsi_set_affinity(vscsi, true);
|
||||||
|
|
||||||
virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
|
virtscsi_config_set(vdev, cdb_size, VIRTIO_SCSI_CDB_SIZE);
|
||||||
virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
|
virtscsi_config_set(vdev, sense_size, VIRTIO_SCSI_SENSE_SIZE);
|
||||||
|
@ -661,19 +857,12 @@ static int virtscsi_init(struct virtio_device *vdev,
|
||||||
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
|
if (virtio_has_feature(vdev, VIRTIO_SCSI_F_HOTPLUG))
|
||||||
virtscsi_kick_event_all(vscsi);
|
virtscsi_kick_event_all(vscsi);
|
||||||
|
|
||||||
/* We need to know how many segments before we allocate. */
|
|
||||||
sg_elems = virtscsi_config_get(vdev, seg_max) ?: 1;
|
|
||||||
|
|
||||||
for (i = 0; i < num_targets; i++) {
|
|
||||||
vscsi->tgt[i] = virtscsi_alloc_tgt(vdev, sg_elems);
|
|
||||||
if (!vscsi->tgt[i]) {
|
|
||||||
err = -ENOMEM;
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
err = 0;
|
err = 0;
|
||||||
|
|
||||||
out:
|
out:
|
||||||
|
kfree(names);
|
||||||
|
kfree(callbacks);
|
||||||
|
kfree(vqs);
|
||||||
if (err)
|
if (err)
|
||||||
virtscsi_remove_vqs(vdev);
|
virtscsi_remove_vqs(vdev);
|
||||||
return err;
|
return err;
|
||||||
|
@ -686,13 +875,21 @@ static int virtscsi_probe(struct virtio_device *vdev)
|
||||||
int err;
|
int err;
|
||||||
u32 sg_elems, num_targets;
|
u32 sg_elems, num_targets;
|
||||||
u32 cmd_per_lun;
|
u32 cmd_per_lun;
|
||||||
|
u32 num_queues;
|
||||||
|
struct scsi_host_template *hostt;
|
||||||
|
|
||||||
|
/* We need to know how many queues before we allocate. */
|
||||||
|
num_queues = virtscsi_config_get(vdev, num_queues) ? : 1;
|
||||||
|
|
||||||
/* Allocate memory and link the structs together. */
|
|
||||||
num_targets = virtscsi_config_get(vdev, max_target) + 1;
|
num_targets = virtscsi_config_get(vdev, max_target) + 1;
|
||||||
shost = scsi_host_alloc(&virtscsi_host_template,
|
|
||||||
sizeof(*vscsi)
|
|
||||||
+ num_targets * sizeof(struct virtio_scsi_target_state));
|
|
||||||
|
|
||||||
|
if (num_queues == 1)
|
||||||
|
hostt = &virtscsi_host_template_single;
|
||||||
|
else
|
||||||
|
hostt = &virtscsi_host_template_multi;
|
||||||
|
|
||||||
|
shost = scsi_host_alloc(hostt,
|
||||||
|
sizeof(*vscsi) + sizeof(vscsi->req_vqs[0]) * num_queues);
|
||||||
if (!shost)
|
if (!shost)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
|
@ -700,12 +897,20 @@ static int virtscsi_probe(struct virtio_device *vdev)
|
||||||
shost->sg_tablesize = sg_elems;
|
shost->sg_tablesize = sg_elems;
|
||||||
vscsi = shost_priv(shost);
|
vscsi = shost_priv(shost);
|
||||||
vscsi->vdev = vdev;
|
vscsi->vdev = vdev;
|
||||||
|
vscsi->num_queues = num_queues;
|
||||||
vdev->priv = shost;
|
vdev->priv = shost;
|
||||||
|
|
||||||
err = virtscsi_init(vdev, vscsi, num_targets);
|
err = virtscsi_init(vdev, vscsi);
|
||||||
if (err)
|
if (err)
|
||||||
goto virtscsi_init_failed;
|
goto virtscsi_init_failed;
|
||||||
|
|
||||||
|
vscsi->nb.notifier_call = &virtscsi_cpu_callback;
|
||||||
|
err = register_hotcpu_notifier(&vscsi->nb);
|
||||||
|
if (err) {
|
||||||
|
pr_err("registering cpu notifier failed\n");
|
||||||
|
goto scsi_add_host_failed;
|
||||||
|
}
|
||||||
|
|
||||||
cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
|
cmd_per_lun = virtscsi_config_get(vdev, cmd_per_lun) ?: 1;
|
||||||
shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
|
shost->cmd_per_lun = min_t(u32, cmd_per_lun, shost->can_queue);
|
||||||
shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
|
shost->max_sectors = virtscsi_config_get(vdev, max_sectors) ?: 0xFFFF;
|
||||||
|
@ -743,6 +948,8 @@ static void virtscsi_remove(struct virtio_device *vdev)
|
||||||
|
|
||||||
scsi_remove_host(shost);
|
scsi_remove_host(shost);
|
||||||
|
|
||||||
|
unregister_hotcpu_notifier(&vscsi->nb);
|
||||||
|
|
||||||
virtscsi_remove_vqs(vdev);
|
virtscsi_remove_vqs(vdev);
|
||||||
scsi_host_put(shost);
|
scsi_host_put(shost);
|
||||||
}
|
}
|
||||||
|
@ -759,7 +966,7 @@ static int virtscsi_restore(struct virtio_device *vdev)
|
||||||
struct Scsi_Host *sh = virtio_scsi_host(vdev);
|
struct Scsi_Host *sh = virtio_scsi_host(vdev);
|
||||||
struct virtio_scsi *vscsi = shost_priv(sh);
|
struct virtio_scsi *vscsi = shost_priv(sh);
|
||||||
|
|
||||||
return virtscsi_init(vdev, vscsi, sh->max_id);
|
return virtscsi_init(vdev, vscsi);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -794,8 +1001,7 @@ static int __init init(void)
|
||||||
|
|
||||||
virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
|
virtscsi_cmd_cache = KMEM_CACHE(virtio_scsi_cmd, 0);
|
||||||
if (!virtscsi_cmd_cache) {
|
if (!virtscsi_cmd_cache) {
|
||||||
printk(KERN_ERR "kmem_cache_create() for "
|
pr_err("kmem_cache_create() for virtscsi_cmd_cache failed\n");
|
||||||
"virtscsi_cmd_cache failed\n");
|
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -804,8 +1010,7 @@ static int __init init(void)
|
||||||
mempool_create_slab_pool(VIRTIO_SCSI_MEMPOOL_SZ,
|
mempool_create_slab_pool(VIRTIO_SCSI_MEMPOOL_SZ,
|
||||||
virtscsi_cmd_cache);
|
virtscsi_cmd_cache);
|
||||||
if (!virtscsi_cmd_pool) {
|
if (!virtscsi_cmd_pool) {
|
||||||
printk(KERN_ERR "mempool_create() for"
|
pr_err("mempool_create() for virtscsi_cmd_pool failed\n");
|
||||||
"virtscsi_cmd_pool failed\n");
|
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
ret = register_virtio_driver(&virtio_scsi_driver);
|
ret = register_virtio_driver(&virtio_scsi_driver);
|
||||||
|
|
|
@ -1,6 +1,7 @@
|
||||||
config VHOST_NET
|
config VHOST_NET
|
||||||
tristate "Host kernel accelerator for virtio net"
|
tristate "Host kernel accelerator for virtio net"
|
||||||
depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
|
depends on NET && EVENTFD && (TUN || !TUN) && (MACVTAP || !MACVTAP)
|
||||||
|
select VHOST_RING
|
||||||
---help---
|
---help---
|
||||||
This kernel module can be loaded in host kernel to accelerate
|
This kernel module can be loaded in host kernel to accelerate
|
||||||
guest networking with virtio_net. Not to be confused with virtio_net
|
guest networking with virtio_net. Not to be confused with virtio_net
|
||||||
|
@ -12,7 +13,14 @@ config VHOST_NET
|
||||||
config VHOST_SCSI
|
config VHOST_SCSI
|
||||||
tristate "VHOST_SCSI TCM fabric driver"
|
tristate "VHOST_SCSI TCM fabric driver"
|
||||||
depends on TARGET_CORE && EVENTFD && m
|
depends on TARGET_CORE && EVENTFD && m
|
||||||
|
select VHOST_RING
|
||||||
default n
|
default n
|
||||||
---help---
|
---help---
|
||||||
Say M here to enable the vhost_scsi TCM fabric module
|
Say M here to enable the vhost_scsi TCM fabric module
|
||||||
for use with virtio-scsi guests
|
for use with virtio-scsi guests
|
||||||
|
|
||||||
|
config VHOST_RING
|
||||||
|
tristate
|
||||||
|
---help---
|
||||||
|
This option is selected by any driver which needs to access
|
||||||
|
the host side of a virtio ring.
|
||||||
|
|
|
@ -3,3 +3,5 @@ vhost_net-y := vhost.o net.o
|
||||||
|
|
||||||
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
|
obj-$(CONFIG_VHOST_SCSI) += vhost_scsi.o
|
||||||
vhost_scsi-y := scsi.o
|
vhost_scsi-y := scsi.o
|
||||||
|
|
||||||
|
obj-$(CONFIG_VHOST_RING) += vringh.o
|
||||||
|
|
|
@ -282,7 +282,9 @@ static long vhost_test_ioctl(struct file *f, unsigned int ioctl,
|
||||||
return vhost_test_reset_owner(n);
|
return vhost_test_reset_owner(n);
|
||||||
default:
|
default:
|
||||||
mutex_lock(&n->dev.mutex);
|
mutex_lock(&n->dev.mutex);
|
||||||
r = vhost_dev_ioctl(&n->dev, ioctl, arg);
|
r = vhost_dev_ioctl(&n->dev, ioctl, argp);
|
||||||
|
if (r == -ENOIOCTLCMD)
|
||||||
|
r = vhost_vring_ioctl(&n->dev, ioctl, argp);
|
||||||
vhost_test_flush(n);
|
vhost_test_flush(n);
|
||||||
mutex_unlock(&n->dev.mutex);
|
mutex_unlock(&n->dev.mutex);
|
||||||
return r;
|
return r;
|
||||||
|
|
File diff suppressed because it is too large
Load Diff
|
@ -108,7 +108,7 @@ static void tell_host(struct virtio_balloon *vb, struct virtqueue *vq)
|
||||||
sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
|
sg_init_one(&sg, vb->pfns, sizeof(vb->pfns[0]) * vb->num_pfns);
|
||||||
|
|
||||||
/* We should always be able to add one buffer to an empty queue. */
|
/* We should always be able to add one buffer to an empty queue. */
|
||||||
if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
|
if (virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL) < 0)
|
||||||
BUG();
|
BUG();
|
||||||
virtqueue_kick(vq);
|
virtqueue_kick(vq);
|
||||||
|
|
||||||
|
@ -256,7 +256,7 @@ static void stats_handle_request(struct virtio_balloon *vb)
|
||||||
if (!virtqueue_get_buf(vq, &len))
|
if (!virtqueue_get_buf(vq, &len))
|
||||||
return;
|
return;
|
||||||
sg_init_one(&sg, vb->stats, sizeof(vb->stats));
|
sg_init_one(&sg, vb->stats, sizeof(vb->stats));
|
||||||
if (virtqueue_add_buf(vq, &sg, 1, 0, vb, GFP_KERNEL) < 0)
|
if (virtqueue_add_outbuf(vq, &sg, 1, vb, GFP_KERNEL) < 0)
|
||||||
BUG();
|
BUG();
|
||||||
virtqueue_kick(vq);
|
virtqueue_kick(vq);
|
||||||
}
|
}
|
||||||
|
@ -341,7 +341,7 @@ static int init_vqs(struct virtio_balloon *vb)
|
||||||
* use it to signal us later.
|
* use it to signal us later.
|
||||||
*/
|
*/
|
||||||
sg_init_one(&sg, vb->stats, sizeof vb->stats);
|
sg_init_one(&sg, vb->stats, sizeof vb->stats);
|
||||||
if (virtqueue_add_buf(vb->stats_vq, &sg, 1, 0, vb, GFP_KERNEL)
|
if (virtqueue_add_outbuf(vb->stats_vq, &sg, 1, vb, GFP_KERNEL)
|
||||||
< 0)
|
< 0)
|
||||||
BUG();
|
BUG();
|
||||||
virtqueue_kick(vb->stats_vq);
|
virtqueue_kick(vb->stats_vq);
|
||||||
|
|
|
@ -24,27 +24,6 @@
|
||||||
#include <linux/module.h>
|
#include <linux/module.h>
|
||||||
#include <linux/hrtimer.h>
|
#include <linux/hrtimer.h>
|
||||||
|
|
||||||
/* virtio guest is communicating with a virtual "device" that actually runs on
|
|
||||||
* a host processor. Memory barriers are used to control SMP effects. */
|
|
||||||
#ifdef CONFIG_SMP
|
|
||||||
/* Where possible, use SMP barriers which are more lightweight than mandatory
|
|
||||||
* barriers, because mandatory barriers control MMIO effects on accesses
|
|
||||||
* through relaxed memory I/O windows (which virtio-pci does not use). */
|
|
||||||
#define virtio_mb(vq) \
|
|
||||||
do { if ((vq)->weak_barriers) smp_mb(); else mb(); } while(0)
|
|
||||||
#define virtio_rmb(vq) \
|
|
||||||
do { if ((vq)->weak_barriers) smp_rmb(); else rmb(); } while(0)
|
|
||||||
#define virtio_wmb(vq) \
|
|
||||||
do { if ((vq)->weak_barriers) smp_wmb(); else wmb(); } while(0)
|
|
||||||
#else
|
|
||||||
/* We must force memory ordering even if guest is UP since host could be
|
|
||||||
* running on another CPU, but SMP barriers are defined to barrier() in that
|
|
||||||
* configuration. So fall back to mandatory barriers instead. */
|
|
||||||
#define virtio_mb(vq) mb()
|
|
||||||
#define virtio_rmb(vq) rmb()
|
|
||||||
#define virtio_wmb(vq) wmb()
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
/* For development, we want to crash whenever the ring is screwed. */
|
/* For development, we want to crash whenever the ring is screwed. */
|
||||||
#define BAD_RING(_vq, fmt, args...) \
|
#define BAD_RING(_vq, fmt, args...) \
|
||||||
|
@ -119,16 +98,36 @@ struct vring_virtqueue
|
||||||
|
|
||||||
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
|
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
|
||||||
|
|
||||||
|
static inline struct scatterlist *sg_next_chained(struct scatterlist *sg,
|
||||||
|
unsigned int *count)
|
||||||
|
{
|
||||||
|
return sg_next(sg);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct scatterlist *sg_next_arr(struct scatterlist *sg,
|
||||||
|
unsigned int *count)
|
||||||
|
{
|
||||||
|
if (--(*count) == 0)
|
||||||
|
return NULL;
|
||||||
|
return sg + 1;
|
||||||
|
}
|
||||||
|
|
||||||
/* Set up an indirect table of descriptors and add it to the queue. */
|
/* Set up an indirect table of descriptors and add it to the queue. */
|
||||||
static int vring_add_indirect(struct vring_virtqueue *vq,
|
static inline int vring_add_indirect(struct vring_virtqueue *vq,
|
||||||
struct scatterlist sg[],
|
struct scatterlist *sgs[],
|
||||||
unsigned int out,
|
struct scatterlist *(*next)
|
||||||
unsigned int in,
|
(struct scatterlist *, unsigned int *),
|
||||||
gfp_t gfp)
|
unsigned int total_sg,
|
||||||
|
unsigned int total_out,
|
||||||
|
unsigned int total_in,
|
||||||
|
unsigned int out_sgs,
|
||||||
|
unsigned int in_sgs,
|
||||||
|
gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct vring_desc *desc;
|
struct vring_desc *desc;
|
||||||
unsigned head;
|
unsigned head;
|
||||||
int i;
|
struct scatterlist *sg;
|
||||||
|
int i, n;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We require lowmem mappings for the descriptors because
|
* We require lowmem mappings for the descriptors because
|
||||||
|
@ -137,25 +136,31 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
|
||||||
*/
|
*/
|
||||||
gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
|
gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
|
||||||
|
|
||||||
desc = kmalloc((out + in) * sizeof(struct vring_desc), gfp);
|
desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
|
||||||
if (!desc)
|
if (!desc)
|
||||||
return -ENOMEM;
|
return -ENOMEM;
|
||||||
|
|
||||||
/* Transfer entries from the sg list into the indirect page */
|
/* Transfer entries from the sg lists into the indirect page */
|
||||||
for (i = 0; i < out; i++) {
|
i = 0;
|
||||||
desc[i].flags = VRING_DESC_F_NEXT;
|
for (n = 0; n < out_sgs; n++) {
|
||||||
desc[i].addr = sg_phys(sg);
|
for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
|
||||||
desc[i].len = sg->length;
|
desc[i].flags = VRING_DESC_F_NEXT;
|
||||||
desc[i].next = i+1;
|
desc[i].addr = sg_phys(sg);
|
||||||
sg++;
|
desc[i].len = sg->length;
|
||||||
|
desc[i].next = i+1;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
for (; i < (out + in); i++) {
|
for (; n < (out_sgs + in_sgs); n++) {
|
||||||
desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
|
for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
|
||||||
desc[i].addr = sg_phys(sg);
|
desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
|
||||||
desc[i].len = sg->length;
|
desc[i].addr = sg_phys(sg);
|
||||||
desc[i].next = i+1;
|
desc[i].len = sg->length;
|
||||||
sg++;
|
desc[i].next = i+1;
|
||||||
|
i++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
BUG_ON(i != total_sg);
|
||||||
|
|
||||||
/* Last one doesn't continue. */
|
/* Last one doesn't continue. */
|
||||||
desc[i-1].flags &= ~VRING_DESC_F_NEXT;
|
desc[i-1].flags &= ~VRING_DESC_F_NEXT;
|
||||||
|
@ -176,6 +181,120 @@ static int vring_add_indirect(struct vring_virtqueue *vq,
|
||||||
return head;
|
return head;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static inline int virtqueue_add(struct virtqueue *_vq,
|
||||||
|
struct scatterlist *sgs[],
|
||||||
|
struct scatterlist *(*next)
|
||||||
|
(struct scatterlist *, unsigned int *),
|
||||||
|
unsigned int total_out,
|
||||||
|
unsigned int total_in,
|
||||||
|
unsigned int out_sgs,
|
||||||
|
unsigned int in_sgs,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp)
|
||||||
|
{
|
||||||
|
struct vring_virtqueue *vq = to_vvq(_vq);
|
||||||
|
struct scatterlist *sg;
|
||||||
|
unsigned int i, n, avail, uninitialized_var(prev), total_sg;
|
||||||
|
int head;
|
||||||
|
|
||||||
|
START_USE(vq);
|
||||||
|
|
||||||
|
BUG_ON(data == NULL);
|
||||||
|
|
||||||
|
#ifdef DEBUG
|
||||||
|
{
|
||||||
|
ktime_t now = ktime_get();
|
||||||
|
|
||||||
|
/* No kick or get, with .1 second between? Warn. */
|
||||||
|
if (vq->last_add_time_valid)
|
||||||
|
WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
|
||||||
|
> 100);
|
||||||
|
vq->last_add_time = now;
|
||||||
|
vq->last_add_time_valid = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
total_sg = total_in + total_out;
|
||||||
|
|
||||||
|
/* If the host supports indirect descriptor tables, and we have multiple
|
||||||
|
* buffers, then go indirect. FIXME: tune this threshold */
|
||||||
|
if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
|
||||||
|
head = vring_add_indirect(vq, sgs, next, total_sg, total_out,
|
||||||
|
total_in,
|
||||||
|
out_sgs, in_sgs, gfp);
|
||||||
|
if (likely(head >= 0))
|
||||||
|
goto add_head;
|
||||||
|
}
|
||||||
|
|
||||||
|
BUG_ON(total_sg > vq->vring.num);
|
||||||
|
BUG_ON(total_sg == 0);
|
||||||
|
|
||||||
|
if (vq->vq.num_free < total_sg) {
|
||||||
|
pr_debug("Can't add buf len %i - avail = %i\n",
|
||||||
|
total_sg, vq->vq.num_free);
|
||||||
|
/* FIXME: for historical reasons, we force a notify here if
|
||||||
|
* there are outgoing parts to the buffer. Presumably the
|
||||||
|
* host should service the ring ASAP. */
|
||||||
|
if (out_sgs)
|
||||||
|
vq->notify(&vq->vq);
|
||||||
|
END_USE(vq);
|
||||||
|
return -ENOSPC;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We're about to use some buffers from the free list. */
|
||||||
|
vq->vq.num_free -= total_sg;
|
||||||
|
|
||||||
|
head = i = vq->free_head;
|
||||||
|
for (n = 0; n < out_sgs; n++) {
|
||||||
|
for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
|
||||||
|
vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
|
||||||
|
vq->vring.desc[i].addr = sg_phys(sg);
|
||||||
|
vq->vring.desc[i].len = sg->length;
|
||||||
|
prev = i;
|
||||||
|
i = vq->vring.desc[i].next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (; n < (out_sgs + in_sgs); n++) {
|
||||||
|
for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
|
||||||
|
vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
|
||||||
|
vq->vring.desc[i].addr = sg_phys(sg);
|
||||||
|
vq->vring.desc[i].len = sg->length;
|
||||||
|
prev = i;
|
||||||
|
i = vq->vring.desc[i].next;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/* Last one doesn't continue. */
|
||||||
|
vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
|
||||||
|
|
||||||
|
/* Update free pointer */
|
||||||
|
vq->free_head = i;
|
||||||
|
|
||||||
|
add_head:
|
||||||
|
/* Set token. */
|
||||||
|
vq->data[head] = data;
|
||||||
|
|
||||||
|
/* Put entry in available array (but don't update avail->idx until they
|
||||||
|
* do sync). */
|
||||||
|
avail = (vq->vring.avail->idx & (vq->vring.num-1));
|
||||||
|
vq->vring.avail->ring[avail] = head;
|
||||||
|
|
||||||
|
/* Descriptors and available array need to be set before we expose the
|
||||||
|
* new available array entries. */
|
||||||
|
virtio_wmb(vq->weak_barriers);
|
||||||
|
vq->vring.avail->idx++;
|
||||||
|
vq->num_added++;
|
||||||
|
|
||||||
|
/* This is very unlikely, but theoretically possible. Kick
|
||||||
|
* just in case. */
|
||||||
|
if (unlikely(vq->num_added == (1 << 16) - 1))
|
||||||
|
virtqueue_kick(_vq);
|
||||||
|
|
||||||
|
pr_debug("Added buffer head %i to %p\n", head, vq);
|
||||||
|
END_USE(vq);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* virtqueue_add_buf - expose buffer to other end
|
* virtqueue_add_buf - expose buffer to other end
|
||||||
* @vq: the struct virtqueue we're talking about.
|
* @vq: the struct virtqueue we're talking about.
|
||||||
|
@ -197,101 +316,99 @@ int virtqueue_add_buf(struct virtqueue *_vq,
|
||||||
void *data,
|
void *data,
|
||||||
gfp_t gfp)
|
gfp_t gfp)
|
||||||
{
|
{
|
||||||
struct vring_virtqueue *vq = to_vvq(_vq);
|
struct scatterlist *sgs[2];
|
||||||
unsigned int i, avail, uninitialized_var(prev);
|
|
||||||
int head;
|
|
||||||
|
|
||||||
START_USE(vq);
|
sgs[0] = sg;
|
||||||
|
sgs[1] = sg + out;
|
||||||
|
|
||||||
BUG_ON(data == NULL);
|
return virtqueue_add(_vq, sgs, sg_next_arr,
|
||||||
|
out, in, out ? 1 : 0, in ? 1 : 0, data, gfp);
|
||||||
#ifdef DEBUG
|
|
||||||
{
|
|
||||||
ktime_t now = ktime_get();
|
|
||||||
|
|
||||||
/* No kick or get, with .1 second between? Warn. */
|
|
||||||
if (vq->last_add_time_valid)
|
|
||||||
WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
|
|
||||||
> 100);
|
|
||||||
vq->last_add_time = now;
|
|
||||||
vq->last_add_time_valid = true;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* If the host supports indirect descriptor tables, and we have multiple
|
|
||||||
* buffers, then go indirect. FIXME: tune this threshold */
|
|
||||||
if (vq->indirect && (out + in) > 1 && vq->vq.num_free) {
|
|
||||||
head = vring_add_indirect(vq, sg, out, in, gfp);
|
|
||||||
if (likely(head >= 0))
|
|
||||||
goto add_head;
|
|
||||||
}
|
|
||||||
|
|
||||||
BUG_ON(out + in > vq->vring.num);
|
|
||||||
BUG_ON(out + in == 0);
|
|
||||||
|
|
||||||
if (vq->vq.num_free < out + in) {
|
|
||||||
pr_debug("Can't add buf len %i - avail = %i\n",
|
|
||||||
out + in, vq->vq.num_free);
|
|
||||||
/* FIXME: for historical reasons, we force a notify here if
|
|
||||||
* there are outgoing parts to the buffer. Presumably the
|
|
||||||
* host should service the ring ASAP. */
|
|
||||||
if (out)
|
|
||||||
vq->notify(&vq->vq);
|
|
||||||
END_USE(vq);
|
|
||||||
return -ENOSPC;
|
|
||||||
}
|
|
||||||
|
|
||||||
/* We're about to use some buffers from the free list. */
|
|
||||||
vq->vq.num_free -= out + in;
|
|
||||||
|
|
||||||
head = vq->free_head;
|
|
||||||
for (i = vq->free_head; out; i = vq->vring.desc[i].next, out--) {
|
|
||||||
vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
|
|
||||||
vq->vring.desc[i].addr = sg_phys(sg);
|
|
||||||
vq->vring.desc[i].len = sg->length;
|
|
||||||
prev = i;
|
|
||||||
sg++;
|
|
||||||
}
|
|
||||||
for (; in; i = vq->vring.desc[i].next, in--) {
|
|
||||||
vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
|
|
||||||
vq->vring.desc[i].addr = sg_phys(sg);
|
|
||||||
vq->vring.desc[i].len = sg->length;
|
|
||||||
prev = i;
|
|
||||||
sg++;
|
|
||||||
}
|
|
||||||
/* Last one doesn't continue. */
|
|
||||||
vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
|
|
||||||
|
|
||||||
/* Update free pointer */
|
|
||||||
vq->free_head = i;
|
|
||||||
|
|
||||||
add_head:
|
|
||||||
/* Set token. */
|
|
||||||
vq->data[head] = data;
|
|
||||||
|
|
||||||
/* Put entry in available array (but don't update avail->idx until they
|
|
||||||
* do sync). */
|
|
||||||
avail = (vq->vring.avail->idx & (vq->vring.num-1));
|
|
||||||
vq->vring.avail->ring[avail] = head;
|
|
||||||
|
|
||||||
/* Descriptors and available array need to be set before we expose the
|
|
||||||
* new available array entries. */
|
|
||||||
virtio_wmb(vq);
|
|
||||||
vq->vring.avail->idx++;
|
|
||||||
vq->num_added++;
|
|
||||||
|
|
||||||
/* This is very unlikely, but theoretically possible. Kick
|
|
||||||
* just in case. */
|
|
||||||
if (unlikely(vq->num_added == (1 << 16) - 1))
|
|
||||||
virtqueue_kick(_vq);
|
|
||||||
|
|
||||||
pr_debug("Added buffer head %i to %p\n", head, vq);
|
|
||||||
END_USE(vq);
|
|
||||||
|
|
||||||
return 0;
|
|
||||||
}
|
}
|
||||||
EXPORT_SYMBOL_GPL(virtqueue_add_buf);
|
EXPORT_SYMBOL_GPL(virtqueue_add_buf);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* virtqueue_add_sgs - expose buffers to other end
|
||||||
|
* @vq: the struct virtqueue we're talking about.
|
||||||
|
* @sgs: array of terminated scatterlists.
|
||||||
|
* @out_num: the number of scatterlists readable by other side
|
||||||
|
* @in_num: the number of scatterlists which are writable (after readable ones)
|
||||||
|
* @data: the token identifying the buffer.
|
||||||
|
* @gfp: how to do memory allocations (if necessary).
|
||||||
|
*
|
||||||
|
* Caller must ensure we don't call this with other virtqueue operations
|
||||||
|
* at the same time (except where noted).
|
||||||
|
*
|
||||||
|
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
|
||||||
|
*/
|
||||||
|
int virtqueue_add_sgs(struct virtqueue *_vq,
|
||||||
|
struct scatterlist *sgs[],
|
||||||
|
unsigned int out_sgs,
|
||||||
|
unsigned int in_sgs,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp)
|
||||||
|
{
|
||||||
|
unsigned int i, total_out, total_in;
|
||||||
|
|
||||||
|
/* Count them first. */
|
||||||
|
for (i = total_out = total_in = 0; i < out_sgs; i++) {
|
||||||
|
struct scatterlist *sg;
|
||||||
|
for (sg = sgs[i]; sg; sg = sg_next(sg))
|
||||||
|
total_out++;
|
||||||
|
}
|
||||||
|
for (; i < out_sgs + in_sgs; i++) {
|
||||||
|
struct scatterlist *sg;
|
||||||
|
for (sg = sgs[i]; sg; sg = sg_next(sg))
|
||||||
|
total_in++;
|
||||||
|
}
|
||||||
|
return virtqueue_add(_vq, sgs, sg_next_chained,
|
||||||
|
total_out, total_in, out_sgs, in_sgs, data, gfp);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* virtqueue_add_outbuf - expose output buffers to other end
|
||||||
|
* @vq: the struct virtqueue we're talking about.
|
||||||
|
* @sgs: array of scatterlists (need not be terminated!)
|
||||||
|
* @num: the number of scatterlists readable by other side
|
||||||
|
* @data: the token identifying the buffer.
|
||||||
|
* @gfp: how to do memory allocations (if necessary).
|
||||||
|
*
|
||||||
|
* Caller must ensure we don't call this with other virtqueue operations
|
||||||
|
* at the same time (except where noted).
|
||||||
|
*
|
||||||
|
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
|
||||||
|
*/
|
||||||
|
int virtqueue_add_outbuf(struct virtqueue *vq,
|
||||||
|
struct scatterlist sg[], unsigned int num,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp)
|
||||||
|
{
|
||||||
|
return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* virtqueue_add_inbuf - expose input buffers to other end
|
||||||
|
* @vq: the struct virtqueue we're talking about.
|
||||||
|
* @sgs: array of scatterlists (need not be terminated!)
|
||||||
|
* @num: the number of scatterlists writable by other side
|
||||||
|
* @data: the token identifying the buffer.
|
||||||
|
* @gfp: how to do memory allocations (if necessary).
|
||||||
|
*
|
||||||
|
* Caller must ensure we don't call this with other virtqueue operations
|
||||||
|
* at the same time (except where noted).
|
||||||
|
*
|
||||||
|
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
|
||||||
|
*/
|
||||||
|
int virtqueue_add_inbuf(struct virtqueue *vq,
|
||||||
|
struct scatterlist sg[], unsigned int num,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp)
|
||||||
|
{
|
||||||
|
return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp);
|
||||||
|
}
|
||||||
|
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
|
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
|
||||||
* @vq: the struct virtqueue
|
* @vq: the struct virtqueue
|
||||||
|
@ -312,7 +429,7 @@ bool virtqueue_kick_prepare(struct virtqueue *_vq)
|
||||||
START_USE(vq);
|
START_USE(vq);
|
||||||
/* We need to expose available array entries before checking avail
|
/* We need to expose available array entries before checking avail
|
||||||
* event. */
|
* event. */
|
||||||
virtio_mb(vq);
|
virtio_mb(vq->weak_barriers);
|
||||||
|
|
||||||
old = vq->vring.avail->idx - vq->num_added;
|
old = vq->vring.avail->idx - vq->num_added;
|
||||||
new = vq->vring.avail->idx;
|
new = vq->vring.avail->idx;
|
||||||
|
@ -436,7 +553,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Only get used array entries after they have been exposed by host. */
|
/* Only get used array entries after they have been exposed by host. */
|
||||||
virtio_rmb(vq);
|
virtio_rmb(vq->weak_barriers);
|
||||||
|
|
||||||
last_used = (vq->last_used_idx & (vq->vring.num - 1));
|
last_used = (vq->last_used_idx & (vq->vring.num - 1));
|
||||||
i = vq->vring.used->ring[last_used].id;
|
i = vq->vring.used->ring[last_used].id;
|
||||||
|
@ -460,7 +577,7 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
|
||||||
* the read in the next get_buf call. */
|
* the read in the next get_buf call. */
|
||||||
if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
|
if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
|
||||||
vring_used_event(&vq->vring) = vq->last_used_idx;
|
vring_used_event(&vq->vring) = vq->last_used_idx;
|
||||||
virtio_mb(vq);
|
virtio_mb(vq->weak_barriers);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef DEBUG
|
#ifdef DEBUG
|
||||||
|
@ -513,7 +630,7 @@ bool virtqueue_enable_cb(struct virtqueue *_vq)
|
||||||
* entry. Always do both to keep code simple. */
|
* entry. Always do both to keep code simple. */
|
||||||
vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
|
vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
|
||||||
vring_used_event(&vq->vring) = vq->last_used_idx;
|
vring_used_event(&vq->vring) = vq->last_used_idx;
|
||||||
virtio_mb(vq);
|
virtio_mb(vq->weak_barriers);
|
||||||
if (unlikely(more_used(vq))) {
|
if (unlikely(more_used(vq))) {
|
||||||
END_USE(vq);
|
END_USE(vq);
|
||||||
return false;
|
return false;
|
||||||
|
@ -553,7 +670,7 @@ bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
|
||||||
/* TODO: tune this threshold */
|
/* TODO: tune this threshold */
|
||||||
bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
|
bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
|
||||||
vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
|
vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
|
||||||
virtio_mb(vq);
|
virtio_mb(vq->weak_barriers);
|
||||||
if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
|
if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
|
||||||
END_USE(vq);
|
END_USE(vq);
|
||||||
return false;
|
return false;
|
||||||
|
|
|
@ -171,6 +171,22 @@ static inline void sg_mark_end(struct scatterlist *sg)
|
||||||
sg->page_link &= ~0x01;
|
sg->page_link &= ~0x01;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sg_unmark_end - Undo setting the end of the scatterlist
|
||||||
|
* @sg: SG entryScatterlist
|
||||||
|
*
|
||||||
|
* Description:
|
||||||
|
* Removes the termination marker from the given entry of the scatterlist.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
static inline void sg_unmark_end(struct scatterlist *sg)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_DEBUG_SG
|
||||||
|
BUG_ON(sg->sg_magic != SG_MAGIC);
|
||||||
|
#endif
|
||||||
|
sg->page_link &= ~0x02;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* sg_phys - Return physical address of an sg entry
|
* sg_phys - Return physical address of an sg entry
|
||||||
* @sg: SG entry
|
* @sg: SG entry
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
#include <linux/device.h>
|
#include <linux/device.h>
|
||||||
#include <linux/mod_devicetable.h>
|
#include <linux/mod_devicetable.h>
|
||||||
#include <linux/gfp.h>
|
#include <linux/gfp.h>
|
||||||
|
#include <linux/vringh.h>
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* virtqueue - a queue to register buffers for sending or receiving.
|
* virtqueue - a queue to register buffers for sending or receiving.
|
||||||
|
@ -40,6 +41,23 @@ int virtqueue_add_buf(struct virtqueue *vq,
|
||||||
void *data,
|
void *data,
|
||||||
gfp_t gfp);
|
gfp_t gfp);
|
||||||
|
|
||||||
|
int virtqueue_add_outbuf(struct virtqueue *vq,
|
||||||
|
struct scatterlist sg[], unsigned int num,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
|
int virtqueue_add_inbuf(struct virtqueue *vq,
|
||||||
|
struct scatterlist sg[], unsigned int num,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
|
int virtqueue_add_sgs(struct virtqueue *vq,
|
||||||
|
struct scatterlist *sgs[],
|
||||||
|
unsigned int out_sgs,
|
||||||
|
unsigned int in_sgs,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
void virtqueue_kick(struct virtqueue *vq);
|
void virtqueue_kick(struct virtqueue *vq);
|
||||||
|
|
||||||
bool virtqueue_kick_prepare(struct virtqueue *vq);
|
bool virtqueue_kick_prepare(struct virtqueue *vq);
|
||||||
|
@ -64,6 +82,7 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
|
||||||
* @dev: underlying device.
|
* @dev: underlying device.
|
||||||
* @id: the device type identification (used to match it with a driver).
|
* @id: the device type identification (used to match it with a driver).
|
||||||
* @config: the configuration ops for this device.
|
* @config: the configuration ops for this device.
|
||||||
|
* @vringh_config: configuration ops for host vrings.
|
||||||
* @vqs: the list of virtqueues for this device.
|
* @vqs: the list of virtqueues for this device.
|
||||||
* @features: the features supported by both driver and device.
|
* @features: the features supported by both driver and device.
|
||||||
* @priv: private pointer for the driver's use.
|
* @priv: private pointer for the driver's use.
|
||||||
|
@ -73,6 +92,7 @@ struct virtio_device {
|
||||||
struct device dev;
|
struct device dev;
|
||||||
struct virtio_device_id id;
|
struct virtio_device_id id;
|
||||||
const struct virtio_config_ops *config;
|
const struct virtio_config_ops *config;
|
||||||
|
const struct vringh_config_ops *vringh_config;
|
||||||
struct list_head vqs;
|
struct list_head vqs;
|
||||||
/* Note that this is a Linux set_bit-style bitmap. */
|
/* Note that this is a Linux set_bit-style bitmap. */
|
||||||
unsigned long features[1];
|
unsigned long features[1];
|
||||||
|
|
|
@ -0,0 +1,24 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) ST-Ericsson AB 2012
|
||||||
|
* Author: Sjur Brændeland <sjur.brandeland@stericsson.com>
|
||||||
|
*
|
||||||
|
* This header is BSD licensed so
|
||||||
|
* anyone can use the definitions to implement compatible remote processors
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef VIRTIO_CAIF_H
|
||||||
|
#define VIRTIO_CAIF_H
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
struct virtio_caif_transf_config {
|
||||||
|
u16 headroom;
|
||||||
|
u16 tailroom;
|
||||||
|
u32 mtu;
|
||||||
|
u8 reserved[4];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct virtio_caif_config {
|
||||||
|
struct virtio_caif_transf_config uplink, downlink;
|
||||||
|
u8 reserved[8];
|
||||||
|
};
|
||||||
|
#endif
|
|
@ -4,6 +4,63 @@
|
||||||
#include <linux/irqreturn.h>
|
#include <linux/irqreturn.h>
|
||||||
#include <uapi/linux/virtio_ring.h>
|
#include <uapi/linux/virtio_ring.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Barriers in virtio are tricky. Non-SMP virtio guests can't assume
|
||||||
|
* they're not on an SMP host system, so they need to assume real
|
||||||
|
* barriers. Non-SMP virtio hosts could skip the barriers, but does
|
||||||
|
* anyone care?
|
||||||
|
*
|
||||||
|
* For virtio_pci on SMP, we don't need to order with respect to MMIO
|
||||||
|
* accesses through relaxed memory I/O windows, so smp_mb() et al are
|
||||||
|
* sufficient.
|
||||||
|
*
|
||||||
|
* For using virtio to talk to real devices (eg. other heterogeneous
|
||||||
|
* CPUs) we do need real barriers. In theory, we could be using both
|
||||||
|
* kinds of virtio, so it's a runtime decision, and the branch is
|
||||||
|
* actually quite cheap.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifdef CONFIG_SMP
|
||||||
|
static inline void virtio_mb(bool weak_barriers)
|
||||||
|
{
|
||||||
|
if (weak_barriers)
|
||||||
|
smp_mb();
|
||||||
|
else
|
||||||
|
mb();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtio_rmb(bool weak_barriers)
|
||||||
|
{
|
||||||
|
if (weak_barriers)
|
||||||
|
smp_rmb();
|
||||||
|
else
|
||||||
|
rmb();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtio_wmb(bool weak_barriers)
|
||||||
|
{
|
||||||
|
if (weak_barriers)
|
||||||
|
smp_wmb();
|
||||||
|
else
|
||||||
|
wmb();
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void virtio_mb(bool weak_barriers)
|
||||||
|
{
|
||||||
|
mb();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtio_rmb(bool weak_barriers)
|
||||||
|
{
|
||||||
|
rmb();
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void virtio_wmb(bool weak_barriers)
|
||||||
|
{
|
||||||
|
wmb();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
struct virtio_device;
|
struct virtio_device;
|
||||||
struct virtqueue;
|
struct virtqueue;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,225 @@
|
||||||
|
/*
|
||||||
|
* Linux host-side vring helpers; for when the kernel needs to access
|
||||||
|
* someone else's vring.
|
||||||
|
*
|
||||||
|
* Copyright IBM Corporation, 2013.
|
||||||
|
* Parts taken from drivers/vhost/vhost.c Copyright 2009 Red Hat, Inc.
|
||||||
|
*
|
||||||
|
* This program is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
|
||||||
|
*
|
||||||
|
* Written by: Rusty Russell <rusty@rustcorp.com.au>
|
||||||
|
*/
|
||||||
|
#ifndef _LINUX_VRINGH_H
|
||||||
|
#define _LINUX_VRINGH_H
|
||||||
|
#include <uapi/linux/virtio_ring.h>
|
||||||
|
#include <linux/uio.h>
|
||||||
|
#include <linux/slab.h>
|
||||||
|
#include <asm/barrier.h>
|
||||||
|
|
||||||
|
/* virtio_ring with information needed for host access. */
|
||||||
|
struct vringh {
|
||||||
|
/* Guest publishes used event idx (note: we always do). */
|
||||||
|
bool event_indices;
|
||||||
|
|
||||||
|
/* Can we get away with weak barriers? */
|
||||||
|
bool weak_barriers;
|
||||||
|
|
||||||
|
/* Last available index we saw (ie. where we're up to). */
|
||||||
|
u16 last_avail_idx;
|
||||||
|
|
||||||
|
/* Last index we used. */
|
||||||
|
u16 last_used_idx;
|
||||||
|
|
||||||
|
/* How many descriptors we've completed since last need_notify(). */
|
||||||
|
u32 completed;
|
||||||
|
|
||||||
|
/* The vring (note: it may contain user pointers!) */
|
||||||
|
struct vring vring;
|
||||||
|
|
||||||
|
/* The function to call to notify the guest about added buffers */
|
||||||
|
void (*notify)(struct vringh *);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct vringh_config_ops - ops for creating a host vring from a virtio driver
|
||||||
|
* @find_vrhs: find the host vrings and instantiate them
|
||||||
|
* vdev: the virtio_device
|
||||||
|
* nhvrs: the number of host vrings to find
|
||||||
|
* hvrs: on success, includes new host vrings
|
||||||
|
* callbacks: array of driver callbacks, for each host vring
|
||||||
|
* include a NULL entry for vqs that do not need a callback
|
||||||
|
* Returns 0 on success or error status
|
||||||
|
* @del_vrhs: free the host vrings found by find_vrhs().
|
||||||
|
*/
|
||||||
|
struct virtio_device;
|
||||||
|
typedef void vrh_callback_t(struct virtio_device *, struct vringh *);
|
||||||
|
struct vringh_config_ops {
|
||||||
|
int (*find_vrhs)(struct virtio_device *vdev, unsigned nhvrs,
|
||||||
|
struct vringh *vrhs[], vrh_callback_t *callbacks[]);
|
||||||
|
void (*del_vrhs)(struct virtio_device *vdev);
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The memory the vring can access, and what offset to apply. */
|
||||||
|
struct vringh_range {
|
||||||
|
u64 start, end_incl;
|
||||||
|
u64 offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct vringh_iov - iovec mangler.
|
||||||
|
*
|
||||||
|
* Mangles iovec in place, and restores it.
|
||||||
|
* Remaining data is iov + i, of used - i elements.
|
||||||
|
*/
|
||||||
|
struct vringh_iov {
|
||||||
|
struct iovec *iov;
|
||||||
|
size_t consumed; /* Within iov[i] */
|
||||||
|
unsigned i, used, max_num;
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct vringh_iov - kvec mangler.
|
||||||
|
*
|
||||||
|
* Mangles kvec in place, and restores it.
|
||||||
|
* Remaining data is iov + i, of used - i elements.
|
||||||
|
*/
|
||||||
|
struct vringh_kiov {
|
||||||
|
struct kvec *iov;
|
||||||
|
size_t consumed; /* Within iov[i] */
|
||||||
|
unsigned i, used, max_num;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Flag on max_num to indicate we're kmalloced. */
|
||||||
|
#define VRINGH_IOV_ALLOCATED 0x8000000
|
||||||
|
|
||||||
|
/* Helpers for userspace vrings. */
|
||||||
|
int vringh_init_user(struct vringh *vrh, u32 features,
|
||||||
|
unsigned int num, bool weak_barriers,
|
||||||
|
struct vring_desc __user *desc,
|
||||||
|
struct vring_avail __user *avail,
|
||||||
|
struct vring_used __user *used);
|
||||||
|
|
||||||
|
static inline void vringh_iov_init(struct vringh_iov *iov,
|
||||||
|
struct iovec *iovec, unsigned num)
|
||||||
|
{
|
||||||
|
iov->used = iov->i = 0;
|
||||||
|
iov->consumed = 0;
|
||||||
|
iov->max_num = num;
|
||||||
|
iov->iov = iovec;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vringh_iov_reset(struct vringh_iov *iov)
|
||||||
|
{
|
||||||
|
iov->iov[iov->i].iov_len += iov->consumed;
|
||||||
|
iov->iov[iov->i].iov_base -= iov->consumed;
|
||||||
|
iov->consumed = 0;
|
||||||
|
iov->i = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vringh_iov_cleanup(struct vringh_iov *iov)
|
||||||
|
{
|
||||||
|
if (iov->max_num & VRINGH_IOV_ALLOCATED)
|
||||||
|
kfree(iov->iov);
|
||||||
|
iov->max_num = iov->used = iov->i = iov->consumed = 0;
|
||||||
|
iov->iov = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Convert a descriptor into iovecs. */
|
||||||
|
int vringh_getdesc_user(struct vringh *vrh,
|
||||||
|
struct vringh_iov *riov,
|
||||||
|
struct vringh_iov *wiov,
|
||||||
|
bool (*getrange)(struct vringh *vrh,
|
||||||
|
u64 addr, struct vringh_range *r),
|
||||||
|
u16 *head);
|
||||||
|
|
||||||
|
/* Copy bytes from readable vsg, consuming it (and incrementing wiov->i). */
|
||||||
|
ssize_t vringh_iov_pull_user(struct vringh_iov *riov, void *dst, size_t len);
|
||||||
|
|
||||||
|
/* Copy bytes into writable vsg, consuming it (and incrementing wiov->i). */
|
||||||
|
ssize_t vringh_iov_push_user(struct vringh_iov *wiov,
|
||||||
|
const void *src, size_t len);
|
||||||
|
|
||||||
|
/* Mark a descriptor as used. */
|
||||||
|
int vringh_complete_user(struct vringh *vrh, u16 head, u32 len);
|
||||||
|
int vringh_complete_multi_user(struct vringh *vrh,
|
||||||
|
const struct vring_used_elem used[],
|
||||||
|
unsigned num_used);
|
||||||
|
|
||||||
|
/* Pretend we've never seen descriptor (for easy error handling). */
|
||||||
|
void vringh_abandon_user(struct vringh *vrh, unsigned int num);
|
||||||
|
|
||||||
|
/* Do we need to fire the eventfd to notify the other side? */
|
||||||
|
int vringh_need_notify_user(struct vringh *vrh);
|
||||||
|
|
||||||
|
bool vringh_notify_enable_user(struct vringh *vrh);
|
||||||
|
void vringh_notify_disable_user(struct vringh *vrh);
|
||||||
|
|
||||||
|
/* Helpers for kernelspace vrings. */
|
||||||
|
int vringh_init_kern(struct vringh *vrh, u32 features,
|
||||||
|
unsigned int num, bool weak_barriers,
|
||||||
|
struct vring_desc *desc,
|
||||||
|
struct vring_avail *avail,
|
||||||
|
struct vring_used *used);
|
||||||
|
|
||||||
|
static inline void vringh_kiov_init(struct vringh_kiov *kiov,
|
||||||
|
struct kvec *kvec, unsigned num)
|
||||||
|
{
|
||||||
|
kiov->used = kiov->i = 0;
|
||||||
|
kiov->consumed = 0;
|
||||||
|
kiov->max_num = num;
|
||||||
|
kiov->iov = kvec;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vringh_kiov_reset(struct vringh_kiov *kiov)
|
||||||
|
{
|
||||||
|
kiov->iov[kiov->i].iov_len += kiov->consumed;
|
||||||
|
kiov->iov[kiov->i].iov_base -= kiov->consumed;
|
||||||
|
kiov->consumed = 0;
|
||||||
|
kiov->i = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void vringh_kiov_cleanup(struct vringh_kiov *kiov)
|
||||||
|
{
|
||||||
|
if (kiov->max_num & VRINGH_IOV_ALLOCATED)
|
||||||
|
kfree(kiov->iov);
|
||||||
|
kiov->max_num = kiov->used = kiov->i = kiov->consumed = 0;
|
||||||
|
kiov->iov = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
int vringh_getdesc_kern(struct vringh *vrh,
|
||||||
|
struct vringh_kiov *riov,
|
||||||
|
struct vringh_kiov *wiov,
|
||||||
|
u16 *head,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
|
ssize_t vringh_iov_pull_kern(struct vringh_kiov *riov, void *dst, size_t len);
|
||||||
|
ssize_t vringh_iov_push_kern(struct vringh_kiov *wiov,
|
||||||
|
const void *src, size_t len);
|
||||||
|
void vringh_abandon_kern(struct vringh *vrh, unsigned int num);
|
||||||
|
int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len);
|
||||||
|
|
||||||
|
bool vringh_notify_enable_kern(struct vringh *vrh);
|
||||||
|
void vringh_notify_disable_kern(struct vringh *vrh);
|
||||||
|
|
||||||
|
int vringh_need_notify_kern(struct vringh *vrh);
|
||||||
|
|
||||||
|
/* Notify the guest about buffers added to the used ring */
|
||||||
|
static inline void vringh_notify(struct vringh *vrh)
|
||||||
|
{
|
||||||
|
if (vrh->notify)
|
||||||
|
vrh->notify(vrh);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif /* _LINUX_VRINGH_H */
|
|
@ -52,8 +52,8 @@ struct virtio_balloon_config
|
||||||
#define VIRTIO_BALLOON_S_NR 6
|
#define VIRTIO_BALLOON_S_NR 6
|
||||||
|
|
||||||
struct virtio_balloon_stat {
|
struct virtio_balloon_stat {
|
||||||
u16 tag;
|
__u16 tag;
|
||||||
u64 val;
|
__u64 val;
|
||||||
} __attribute__((packed));
|
} __attribute__((packed));
|
||||||
|
|
||||||
#endif /* _LINUX_VIRTIO_BALLOON_H */
|
#endif /* _LINUX_VIRTIO_BALLOON_H */
|
||||||
|
|
|
@ -38,5 +38,6 @@
|
||||||
#define VIRTIO_ID_SCSI 8 /* virtio scsi */
|
#define VIRTIO_ID_SCSI 8 /* virtio scsi */
|
||||||
#define VIRTIO_ID_9P 9 /* 9p virtio console */
|
#define VIRTIO_ID_9P 9 /* 9p virtio console */
|
||||||
#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
|
#define VIRTIO_ID_RPROC_SERIAL 11 /* virtio remoteproc serial link */
|
||||||
|
#define VIRTIO_ID_CAIF 12 /* Virtio caif */
|
||||||
|
|
||||||
#endif /* _LINUX_VIRTIO_IDS_H */
|
#endif /* _LINUX_VIRTIO_IDS_H */
|
||||||
|
|
|
@ -194,11 +194,14 @@ static int pack_sg_list(struct scatterlist *sg, int start,
|
||||||
if (s > count)
|
if (s > count)
|
||||||
s = count;
|
s = count;
|
||||||
BUG_ON(index > limit);
|
BUG_ON(index > limit);
|
||||||
|
/* Make sure we don't terminate early. */
|
||||||
|
sg_unmark_end(&sg[index]);
|
||||||
sg_set_buf(&sg[index++], data, s);
|
sg_set_buf(&sg[index++], data, s);
|
||||||
count -= s;
|
count -= s;
|
||||||
data += s;
|
data += s;
|
||||||
}
|
}
|
||||||
|
if (index-start)
|
||||||
|
sg_mark_end(&sg[index - 1]);
|
||||||
return index-start;
|
return index-start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -236,12 +239,17 @@ pack_sg_list_p(struct scatterlist *sg, int start, int limit,
|
||||||
s = rest_of_page(data);
|
s = rest_of_page(data);
|
||||||
if (s > count)
|
if (s > count)
|
||||||
s = count;
|
s = count;
|
||||||
|
/* Make sure we don't terminate early. */
|
||||||
|
sg_unmark_end(&sg[index]);
|
||||||
sg_set_page(&sg[index++], pdata[i++], s, data_off);
|
sg_set_page(&sg[index++], pdata[i++], s, data_off);
|
||||||
data_off = 0;
|
data_off = 0;
|
||||||
data += s;
|
data += s;
|
||||||
count -= s;
|
count -= s;
|
||||||
nr_pages--;
|
nr_pages--;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (index-start)
|
||||||
|
sg_mark_end(&sg[index - 1]);
|
||||||
return index - start;
|
return index - start;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -256,9 +264,10 @@ static int
|
||||||
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
|
p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
int in, out;
|
int in, out, out_sgs, in_sgs;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
struct virtio_chan *chan = client->trans;
|
struct virtio_chan *chan = client->trans;
|
||||||
|
struct scatterlist *sgs[2];
|
||||||
|
|
||||||
p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
|
p9_debug(P9_DEBUG_TRANS, "9p debug: virtio request\n");
|
||||||
|
|
||||||
|
@ -266,14 +275,19 @@ p9_virtio_request(struct p9_client *client, struct p9_req_t *req)
|
||||||
req_retry:
|
req_retry:
|
||||||
spin_lock_irqsave(&chan->lock, flags);
|
spin_lock_irqsave(&chan->lock, flags);
|
||||||
|
|
||||||
|
out_sgs = in_sgs = 0;
|
||||||
/* Handle out VirtIO ring buffers */
|
/* Handle out VirtIO ring buffers */
|
||||||
out = pack_sg_list(chan->sg, 0,
|
out = pack_sg_list(chan->sg, 0,
|
||||||
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
|
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
|
||||||
|
if (out)
|
||||||
|
sgs[out_sgs++] = chan->sg;
|
||||||
|
|
||||||
in = pack_sg_list(chan->sg, out,
|
in = pack_sg_list(chan->sg, out,
|
||||||
VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
|
VIRTQUEUE_NUM, req->rc->sdata, req->rc->capacity);
|
||||||
|
if (in)
|
||||||
|
sgs[out_sgs + in_sgs++] = chan->sg + out;
|
||||||
|
|
||||||
err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
|
err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
|
||||||
GFP_ATOMIC);
|
GFP_ATOMIC);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
if (err == -ENOSPC) {
|
if (err == -ENOSPC) {
|
||||||
|
@ -289,7 +303,7 @@ req_retry:
|
||||||
} else {
|
} else {
|
||||||
spin_unlock_irqrestore(&chan->lock, flags);
|
spin_unlock_irqrestore(&chan->lock, flags);
|
||||||
p9_debug(P9_DEBUG_TRANS,
|
p9_debug(P9_DEBUG_TRANS,
|
||||||
"virtio rpc add_buf returned failure\n");
|
"virtio rpc add_sgs returned failure\n");
|
||||||
return -EIO;
|
return -EIO;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -351,11 +365,12 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
|
||||||
char *uidata, char *uodata, int inlen,
|
char *uidata, char *uodata, int inlen,
|
||||||
int outlen, int in_hdr_len, int kern_buf)
|
int outlen, int in_hdr_len, int kern_buf)
|
||||||
{
|
{
|
||||||
int in, out, err;
|
int in, out, err, out_sgs, in_sgs;
|
||||||
unsigned long flags;
|
unsigned long flags;
|
||||||
int in_nr_pages = 0, out_nr_pages = 0;
|
int in_nr_pages = 0, out_nr_pages = 0;
|
||||||
struct page **in_pages = NULL, **out_pages = NULL;
|
struct page **in_pages = NULL, **out_pages = NULL;
|
||||||
struct virtio_chan *chan = client->trans;
|
struct virtio_chan *chan = client->trans;
|
||||||
|
struct scatterlist *sgs[4];
|
||||||
|
|
||||||
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
|
p9_debug(P9_DEBUG_TRANS, "virtio request\n");
|
||||||
|
|
||||||
|
@ -396,13 +411,22 @@ p9_virtio_zc_request(struct p9_client *client, struct p9_req_t *req,
|
||||||
req->status = REQ_STATUS_SENT;
|
req->status = REQ_STATUS_SENT;
|
||||||
req_retry_pinned:
|
req_retry_pinned:
|
||||||
spin_lock_irqsave(&chan->lock, flags);
|
spin_lock_irqsave(&chan->lock, flags);
|
||||||
|
|
||||||
|
out_sgs = in_sgs = 0;
|
||||||
|
|
||||||
/* out data */
|
/* out data */
|
||||||
out = pack_sg_list(chan->sg, 0,
|
out = pack_sg_list(chan->sg, 0,
|
||||||
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
|
VIRTQUEUE_NUM, req->tc->sdata, req->tc->size);
|
||||||
|
|
||||||
if (out_pages)
|
if (out)
|
||||||
|
sgs[out_sgs++] = chan->sg;
|
||||||
|
|
||||||
|
if (out_pages) {
|
||||||
|
sgs[out_sgs++] = chan->sg + out;
|
||||||
out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
|
out += pack_sg_list_p(chan->sg, out, VIRTQUEUE_NUM,
|
||||||
out_pages, out_nr_pages, uodata, outlen);
|
out_pages, out_nr_pages, uodata, outlen);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Take care of in data
|
* Take care of in data
|
||||||
* For example TREAD have 11.
|
* For example TREAD have 11.
|
||||||
|
@ -412,11 +436,17 @@ req_retry_pinned:
|
||||||
*/
|
*/
|
||||||
in = pack_sg_list(chan->sg, out,
|
in = pack_sg_list(chan->sg, out,
|
||||||
VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
|
VIRTQUEUE_NUM, req->rc->sdata, in_hdr_len);
|
||||||
if (in_pages)
|
if (in)
|
||||||
|
sgs[out_sgs + in_sgs++] = chan->sg + out;
|
||||||
|
|
||||||
|
if (in_pages) {
|
||||||
|
sgs[out_sgs + in_sgs++] = chan->sg + out + in;
|
||||||
in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
|
in += pack_sg_list_p(chan->sg, out + in, VIRTQUEUE_NUM,
|
||||||
in_pages, in_nr_pages, uidata, inlen);
|
in_pages, in_nr_pages, uidata, inlen);
|
||||||
|
}
|
||||||
|
|
||||||
err = virtqueue_add_buf(chan->vq, chan->sg, out, in, req->tc,
|
BUG_ON(out_sgs + in_sgs > ARRAY_SIZE(sgs));
|
||||||
|
err = virtqueue_add_sgs(chan->vq, sgs, out_sgs, in_sgs, req->tc,
|
||||||
GFP_ATOMIC);
|
GFP_ATOMIC);
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
if (err == -ENOSPC) {
|
if (err == -ENOSPC) {
|
||||||
|
@ -432,7 +462,7 @@ req_retry_pinned:
|
||||||
} else {
|
} else {
|
||||||
spin_unlock_irqrestore(&chan->lock, flags);
|
spin_unlock_irqrestore(&chan->lock, flags);
|
||||||
p9_debug(P9_DEBUG_TRANS,
|
p9_debug(P9_DEBUG_TRANS,
|
||||||
"virtio rpc add_buf returned failure\n");
|
"virtio rpc add_sgs returned failure\n");
|
||||||
err = -EIO;
|
err = -EIO;
|
||||||
goto err_out;
|
goto err_out;
|
||||||
}
|
}
|
||||||
|
|
|
@ -70,7 +70,7 @@ Running Lguest:
|
||||||
|
|
||||||
- Run an lguest as root:
|
- Run an lguest as root:
|
||||||
|
|
||||||
Documentation/virtual/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
|
tools/lguest/lguest 64 vmlinux --tunnet=192.168.19.1 \
|
||||||
--block=rootfile root=/dev/vda
|
--block=rootfile root=/dev/vda
|
||||||
|
|
||||||
Explanation:
|
Explanation:
|
||||||
|
|
|
@ -1,12 +1,14 @@
|
||||||
all: test mod
|
all: test mod
|
||||||
test: virtio_test
|
test: virtio_test vringh_test
|
||||||
virtio_test: virtio_ring.o virtio_test.o
|
virtio_test: virtio_ring.o virtio_test.o
|
||||||
CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -MMD
|
vringh_test: vringh_test.o vringh.o virtio_ring.o
|
||||||
vpath %.c ../../drivers/virtio
|
|
||||||
|
CFLAGS += -g -O2 -Wall -I. -I ../../usr/include/ -Wno-pointer-sign -fno-strict-overflow -fno-strict-aliasing -fno-common -MMD -U_FORTIFY_SOURCE
|
||||||
|
vpath %.c ../../drivers/virtio ../../drivers/vhost
|
||||||
mod:
|
mod:
|
||||||
${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
|
${MAKE} -C `pwd`/../.. M=`pwd`/vhost_test
|
||||||
.PHONY: all test mod clean
|
.PHONY: all test mod clean
|
||||||
clean:
|
clean:
|
||||||
${RM} *.o vhost_test/*.o vhost_test/.*.cmd \
|
${RM} *.o vringh_test virtio_test vhost_test/*.o vhost_test/.*.cmd \
|
||||||
vhost_test/Module.symvers vhost_test/modules.order *.d
|
vhost_test/Module.symvers vhost_test/modules.order *.d
|
||||||
-include *.d
|
-include *.d
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
#if defined(__i386__) || defined(__x86_64__)
|
||||||
|
#define barrier() asm volatile("" ::: "memory")
|
||||||
|
#define mb() __sync_synchronize()
|
||||||
|
|
||||||
|
#define smp_mb() mb()
|
||||||
|
# define smp_rmb() barrier()
|
||||||
|
# define smp_wmb() barrier()
|
||||||
|
/* Weak barriers should be used. If not - it's a bug */
|
||||||
|
# define rmb() abort()
|
||||||
|
# define wmb() abort()
|
||||||
|
#else
|
||||||
|
#error Please fill in barrier macros
|
||||||
|
#endif
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
#ifndef BUG_H
|
||||||
|
#define BUG_H
|
||||||
|
|
||||||
|
#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
|
||||||
|
|
||||||
|
#define BUILD_BUG_ON(x)
|
||||||
|
|
||||||
|
#define BUG() abort()
|
||||||
|
|
||||||
|
#endif /* BUG_H */
|
|
@ -0,0 +1,26 @@
|
||||||
|
#ifndef ERR_H
|
||||||
|
#define ERR_H
|
||||||
|
#define MAX_ERRNO 4095
|
||||||
|
|
||||||
|
#define IS_ERR_VALUE(x) unlikely((x) >= (unsigned long)-MAX_ERRNO)
|
||||||
|
|
||||||
|
static inline void * __must_check ERR_PTR(long error)
|
||||||
|
{
|
||||||
|
return (void *) error;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline long __must_check PTR_ERR(const void *ptr)
|
||||||
|
{
|
||||||
|
return (long) ptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline long __must_check IS_ERR(const void *ptr)
|
||||||
|
{
|
||||||
|
return IS_ERR_VALUE((unsigned long)ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline long __must_check IS_ERR_OR_NULL(const void *ptr)
|
||||||
|
{
|
||||||
|
return !ptr || IS_ERR_VALUE((unsigned long)ptr);
|
||||||
|
}
|
||||||
|
#endif /* ERR_H */
|
|
@ -0,0 +1,5 @@
|
||||||
|
#define EXPORT_SYMBOL(sym)
|
||||||
|
#define EXPORT_SYMBOL_GPL(sym)
|
||||||
|
#define EXPORT_SYMBOL_GPL_FUTURE(sym)
|
||||||
|
#define EXPORT_UNUSED_SYMBOL(sym)
|
||||||
|
#define EXPORT_UNUSED_SYMBOL_GPL(sym)
|
|
@ -0,0 +1 @@
|
||||||
|
#include "../../../include/linux/irqreturn.h"
|
|
@ -0,0 +1,112 @@
|
||||||
|
#ifndef KERNEL_H
|
||||||
|
#define KERNEL_H
|
||||||
|
#include <stdbool.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <stddef.h>
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <string.h>
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdarg.h>
|
||||||
|
|
||||||
|
#include <linux/types.h>
|
||||||
|
#include <linux/printk.h>
|
||||||
|
#include <linux/bug.h>
|
||||||
|
#include <errno.h>
|
||||||
|
#include <unistd.h>
|
||||||
|
#include <asm/barrier.h>
|
||||||
|
|
||||||
|
#define CONFIG_SMP
|
||||||
|
|
||||||
|
#define PAGE_SIZE getpagesize()
|
||||||
|
#define PAGE_MASK (~(PAGE_SIZE-1))
|
||||||
|
|
||||||
|
typedef unsigned long long dma_addr_t;
|
||||||
|
typedef size_t __kernel_size_t;
|
||||||
|
|
||||||
|
struct page {
|
||||||
|
unsigned long long dummy;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Physical == Virtual */
|
||||||
|
#define virt_to_phys(p) ((unsigned long)p)
|
||||||
|
#define phys_to_virt(a) ((void *)(unsigned long)(a))
|
||||||
|
/* Page address: Virtual / 4K */
|
||||||
|
#define page_to_phys(p) ((dma_addr_t)(unsigned long)(p))
|
||||||
|
#define virt_to_page(p) ((struct page *)((unsigned long)p & PAGE_MASK))
|
||||||
|
|
||||||
|
#define offset_in_page(p) (((unsigned long)p) % PAGE_SIZE)
|
||||||
|
|
||||||
|
#define __printf(a,b) __attribute__((format(printf,a,b)))
|
||||||
|
|
||||||
|
typedef enum {
|
||||||
|
GFP_KERNEL,
|
||||||
|
GFP_ATOMIC,
|
||||||
|
__GFP_HIGHMEM,
|
||||||
|
__GFP_HIGH
|
||||||
|
} gfp_t;
|
||||||
|
|
||||||
|
#define ARRAY_SIZE(x) (sizeof(x)/sizeof(x[0]))
|
||||||
|
|
||||||
|
extern void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
|
||||||
|
static inline void *kmalloc(size_t s, gfp_t gfp)
|
||||||
|
{
|
||||||
|
if (__kmalloc_fake)
|
||||||
|
return __kmalloc_fake;
|
||||||
|
return malloc(s);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void kfree(void *p)
|
||||||
|
{
|
||||||
|
if (p >= __kfree_ignore_start && p < __kfree_ignore_end)
|
||||||
|
return;
|
||||||
|
free(p);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void *krealloc(void *p, size_t s, gfp_t gfp)
|
||||||
|
{
|
||||||
|
return realloc(p, s);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static inline unsigned long __get_free_page(gfp_t gfp)
|
||||||
|
{
|
||||||
|
void *p;
|
||||||
|
|
||||||
|
posix_memalign(&p, PAGE_SIZE, PAGE_SIZE);
|
||||||
|
return (unsigned long)p;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void free_page(unsigned long addr)
|
||||||
|
{
|
||||||
|
free((void *)addr);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define container_of(ptr, type, member) ({ \
|
||||||
|
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
|
||||||
|
(type *)( (char *)__mptr - offsetof(type,member) );})
|
||||||
|
|
||||||
|
#define uninitialized_var(x) x = x
|
||||||
|
|
||||||
|
# ifndef likely
|
||||||
|
# define likely(x) (__builtin_expect(!!(x), 1))
|
||||||
|
# endif
|
||||||
|
# ifndef unlikely
|
||||||
|
# define unlikely(x) (__builtin_expect(!!(x), 0))
|
||||||
|
# endif
|
||||||
|
|
||||||
|
#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
|
||||||
|
#ifdef DEBUG
|
||||||
|
#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
|
||||||
|
#else
|
||||||
|
#define pr_debug(format, ...) do {} while (0)
|
||||||
|
#endif
|
||||||
|
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
|
||||||
|
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
|
||||||
|
|
||||||
|
#define min(x, y) ({ \
|
||||||
|
typeof(x) _min1 = (x); \
|
||||||
|
typeof(y) _min2 = (y); \
|
||||||
|
(void) (&_min1 == &_min2); \
|
||||||
|
_min1 < _min2 ? _min1 : _min2; })
|
||||||
|
|
||||||
|
#endif /* KERNEL_H */
|
|
@ -0,0 +1 @@
|
||||||
|
#include <linux/export.h>
|
|
@ -0,0 +1,4 @@
|
||||||
|
#include "../../../include/linux/kern_levels.h"
|
||||||
|
|
||||||
|
#define printk printf
|
||||||
|
#define vprintk vprintf
|
|
@ -0,0 +1,4 @@
|
||||||
|
#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init) int name = 0
|
||||||
|
|
||||||
|
#define __ratelimit(x) (*(x))
|
||||||
|
|
|
@ -0,0 +1,189 @@
|
||||||
|
#ifndef SCATTERLIST_H
|
||||||
|
#define SCATTERLIST_H
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
|
||||||
|
struct scatterlist {
|
||||||
|
unsigned long page_link;
|
||||||
|
unsigned int offset;
|
||||||
|
unsigned int length;
|
||||||
|
dma_addr_t dma_address;
|
||||||
|
};
|
||||||
|
|
||||||
|
/* Scatterlist helpers, stolen from linux/scatterlist.h */
|
||||||
|
#define sg_is_chain(sg) ((sg)->page_link & 0x01)
|
||||||
|
#define sg_is_last(sg) ((sg)->page_link & 0x02)
|
||||||
|
#define sg_chain_ptr(sg) \
|
||||||
|
((struct scatterlist *) ((sg)->page_link & ~0x03))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sg_assign_page - Assign a given page to an SG entry
|
||||||
|
* @sg: SG entry
|
||||||
|
* @page: The page
|
||||||
|
*
|
||||||
|
* Description:
|
||||||
|
* Assign page to sg entry. Also see sg_set_page(), the most commonly used
|
||||||
|
* variant.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
|
||||||
|
{
|
||||||
|
unsigned long page_link = sg->page_link & 0x3;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* In order for the low bit stealing approach to work, pages
|
||||||
|
* must be aligned at a 32-bit boundary as a minimum.
|
||||||
|
*/
|
||||||
|
BUG_ON((unsigned long) page & 0x03);
|
||||||
|
#ifdef CONFIG_DEBUG_SG
|
||||||
|
BUG_ON(sg->sg_magic != SG_MAGIC);
|
||||||
|
BUG_ON(sg_is_chain(sg));
|
||||||
|
#endif
|
||||||
|
sg->page_link = page_link | (unsigned long) page;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sg_set_page - Set sg entry to point at given page
|
||||||
|
* @sg: SG entry
|
||||||
|
* @page: The page
|
||||||
|
* @len: Length of data
|
||||||
|
* @offset: Offset into page
|
||||||
|
*
|
||||||
|
* Description:
|
||||||
|
* Use this function to set an sg entry pointing at a page, never assign
|
||||||
|
* the page directly. We encode sg table information in the lower bits
|
||||||
|
* of the page pointer. See sg_page() for looking up the page belonging
|
||||||
|
* to an sg entry.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
static inline void sg_set_page(struct scatterlist *sg, struct page *page,
|
||||||
|
unsigned int len, unsigned int offset)
|
||||||
|
{
|
||||||
|
sg_assign_page(sg, page);
|
||||||
|
sg->offset = offset;
|
||||||
|
sg->length = len;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct page *sg_page(struct scatterlist *sg)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_DEBUG_SG
|
||||||
|
BUG_ON(sg->sg_magic != SG_MAGIC);
|
||||||
|
BUG_ON(sg_is_chain(sg));
|
||||||
|
#endif
|
||||||
|
return (struct page *)((sg)->page_link & ~0x3);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Loop over each sg element, following the pointer to a new list if necessary
|
||||||
|
*/
|
||||||
|
#define for_each_sg(sglist, sg, nr, __i) \
|
||||||
|
for (__i = 0, sg = (sglist); __i < (nr); __i++, sg = sg_next(sg))
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sg_chain - Chain two sglists together
|
||||||
|
* @prv: First scatterlist
|
||||||
|
* @prv_nents: Number of entries in prv
|
||||||
|
* @sgl: Second scatterlist
|
||||||
|
*
|
||||||
|
* Description:
|
||||||
|
* Links @prv@ and @sgl@ together, to form a longer scatterlist.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
|
||||||
|
struct scatterlist *sgl)
|
||||||
|
{
|
||||||
|
/*
|
||||||
|
* offset and length are unused for chain entry. Clear them.
|
||||||
|
*/
|
||||||
|
prv[prv_nents - 1].offset = 0;
|
||||||
|
prv[prv_nents - 1].length = 0;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Set lowest bit to indicate a link pointer, and make sure to clear
|
||||||
|
* the termination bit if it happens to be set.
|
||||||
|
*/
|
||||||
|
prv[prv_nents - 1].page_link = ((unsigned long) sgl | 0x01) & ~0x02;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sg_mark_end - Mark the end of the scatterlist
|
||||||
|
* @sg: SG entryScatterlist
|
||||||
|
*
|
||||||
|
* Description:
|
||||||
|
* Marks the passed in sg entry as the termination point for the sg
|
||||||
|
* table. A call to sg_next() on this entry will return NULL.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
static inline void sg_mark_end(struct scatterlist *sg)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_DEBUG_SG
|
||||||
|
BUG_ON(sg->sg_magic != SG_MAGIC);
|
||||||
|
#endif
|
||||||
|
/*
|
||||||
|
* Set termination bit, clear potential chain bit
|
||||||
|
*/
|
||||||
|
sg->page_link |= 0x02;
|
||||||
|
sg->page_link &= ~0x01;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* sg_unmark_end - Undo setting the end of the scatterlist
|
||||||
|
* @sg: SG entryScatterlist
|
||||||
|
*
|
||||||
|
* Description:
|
||||||
|
* Removes the termination marker from the given entry of the scatterlist.
|
||||||
|
*
|
||||||
|
**/
|
||||||
|
static inline void sg_unmark_end(struct scatterlist *sg)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_DEBUG_SG
|
||||||
|
BUG_ON(sg->sg_magic != SG_MAGIC);
|
||||||
|
#endif
|
||||||
|
sg->page_link &= ~0x02;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline struct scatterlist *sg_next(struct scatterlist *sg)
|
||||||
|
{
|
||||||
|
#ifdef CONFIG_DEBUG_SG
|
||||||
|
BUG_ON(sg->sg_magic != SG_MAGIC);
|
||||||
|
#endif
|
||||||
|
if (sg_is_last(sg))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
sg++;
|
||||||
|
if (unlikely(sg_is_chain(sg)))
|
||||||
|
sg = sg_chain_ptr(sg);
|
||||||
|
|
||||||
|
return sg;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
|
||||||
|
{
|
||||||
|
memset(sgl, 0, sizeof(*sgl) * nents);
|
||||||
|
#ifdef CONFIG_DEBUG_SG
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
for (i = 0; i < nents; i++)
|
||||||
|
sgl[i].sg_magic = SG_MAGIC;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
sg_mark_end(&sgl[nents - 1]);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline dma_addr_t sg_phys(struct scatterlist *sg)
|
||||||
|
{
|
||||||
|
return page_to_phys(sg_page(sg)) + sg->offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
|
||||||
|
unsigned int buflen)
|
||||||
|
{
|
||||||
|
sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline void sg_init_one(struct scatterlist *sg,
|
||||||
|
const void *buf, unsigned int buflen)
|
||||||
|
{
|
||||||
|
sg_init_table(sg, 1);
|
||||||
|
sg_set_buf(sg, buf, buflen);
|
||||||
|
}
|
||||||
|
#endif /* SCATTERLIST_H */
|
|
@ -0,0 +1,28 @@
|
||||||
|
#ifndef TYPES_H
|
||||||
|
#define TYPES_H
|
||||||
|
#include <stdint.h>
|
||||||
|
|
||||||
|
#define __force
|
||||||
|
#define __user
|
||||||
|
#define __must_check
|
||||||
|
#define __cold
|
||||||
|
|
||||||
|
typedef uint64_t u64;
|
||||||
|
typedef int64_t s64;
|
||||||
|
typedef uint32_t u32;
|
||||||
|
typedef int32_t s32;
|
||||||
|
typedef uint16_t u16;
|
||||||
|
typedef int16_t s16;
|
||||||
|
typedef uint8_t u8;
|
||||||
|
typedef int8_t s8;
|
||||||
|
|
||||||
|
typedef uint64_t __u64;
|
||||||
|
typedef int64_t __s64;
|
||||||
|
typedef uint32_t __u32;
|
||||||
|
typedef int32_t __s32;
|
||||||
|
typedef uint16_t __u16;
|
||||||
|
typedef int16_t __s16;
|
||||||
|
typedef uint8_t __u8;
|
||||||
|
typedef int8_t __s8;
|
||||||
|
|
||||||
|
#endif /* TYPES_H */
|
|
@ -0,0 +1,50 @@
|
||||||
|
#ifndef UACCESS_H
|
||||||
|
#define UACCESS_H
|
||||||
|
extern void *__user_addr_min, *__user_addr_max;
|
||||||
|
|
||||||
|
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||||
|
|
||||||
|
static inline void __chk_user_ptr(const volatile void *p, size_t size)
|
||||||
|
{
|
||||||
|
assert(p >= __user_addr_min && p + size <= __user_addr_max);
|
||||||
|
}
|
||||||
|
|
||||||
|
#define put_user(x, ptr) \
|
||||||
|
({ \
|
||||||
|
typeof(ptr) __pu_ptr = (ptr); \
|
||||||
|
__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
|
||||||
|
ACCESS_ONCE(*(__pu_ptr)) = x; \
|
||||||
|
0; \
|
||||||
|
})
|
||||||
|
|
||||||
|
#define get_user(x, ptr) \
|
||||||
|
({ \
|
||||||
|
typeof(ptr) __pu_ptr = (ptr); \
|
||||||
|
__chk_user_ptr(__pu_ptr, sizeof(*__pu_ptr)); \
|
||||||
|
x = ACCESS_ONCE(*(__pu_ptr)); \
|
||||||
|
0; \
|
||||||
|
})
|
||||||
|
|
||||||
|
static void volatile_memcpy(volatile char *to, const volatile char *from,
|
||||||
|
unsigned long n)
|
||||||
|
{
|
||||||
|
while (n--)
|
||||||
|
*(to++) = *(from++);
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int copy_from_user(void *to, const void __user volatile *from,
|
||||||
|
unsigned long n)
|
||||||
|
{
|
||||||
|
__chk_user_ptr(from, n);
|
||||||
|
volatile_memcpy(to, from, n);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static inline int copy_to_user(void __user volatile *to, const void *from,
|
||||||
|
unsigned long n)
|
||||||
|
{
|
||||||
|
__chk_user_ptr(to, n);
|
||||||
|
volatile_memcpy(to, from, n);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif /* UACCESS_H */
|
|
@ -0,0 +1,3 @@
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
|
||||||
|
#include "../../../include/linux/uio.h"
|
|
@ -1,127 +1,7 @@
|
||||||
#ifndef LINUX_VIRTIO_H
|
#ifndef LINUX_VIRTIO_H
|
||||||
#define LINUX_VIRTIO_H
|
#define LINUX_VIRTIO_H
|
||||||
|
#include <linux/scatterlist.h>
|
||||||
#include <stdbool.h>
|
#include <linux/kernel.h>
|
||||||
#include <stdlib.h>
|
|
||||||
#include <stddef.h>
|
|
||||||
#include <stdio.h>
|
|
||||||
#include <string.h>
|
|
||||||
#include <assert.h>
|
|
||||||
|
|
||||||
#include <linux/types.h>
|
|
||||||
#include <errno.h>
|
|
||||||
|
|
||||||
typedef unsigned long long dma_addr_t;
|
|
||||||
|
|
||||||
struct scatterlist {
|
|
||||||
unsigned long page_link;
|
|
||||||
unsigned int offset;
|
|
||||||
unsigned int length;
|
|
||||||
dma_addr_t dma_address;
|
|
||||||
};
|
|
||||||
|
|
||||||
struct page {
|
|
||||||
unsigned long long dummy;
|
|
||||||
};
|
|
||||||
|
|
||||||
#define BUG_ON(__BUG_ON_cond) assert(!(__BUG_ON_cond))
|
|
||||||
|
|
||||||
/* Physical == Virtual */
|
|
||||||
#define virt_to_phys(p) ((unsigned long)p)
|
|
||||||
#define phys_to_virt(a) ((void *)(unsigned long)(a))
|
|
||||||
/* Page address: Virtual / 4K */
|
|
||||||
#define virt_to_page(p) ((struct page*)((virt_to_phys(p) / 4096) * \
|
|
||||||
sizeof(struct page)))
|
|
||||||
#define offset_in_page(p) (((unsigned long)p) % 4096)
|
|
||||||
#define sg_phys(sg) ((sg->page_link & ~0x3) / sizeof(struct page) * 4096 + \
|
|
||||||
sg->offset)
|
|
||||||
static inline void sg_mark_end(struct scatterlist *sg)
|
|
||||||
{
|
|
||||||
/*
|
|
||||||
* Set termination bit, clear potential chain bit
|
|
||||||
*/
|
|
||||||
sg->page_link |= 0x02;
|
|
||||||
sg->page_link &= ~0x01;
|
|
||||||
}
|
|
||||||
static inline void sg_init_table(struct scatterlist *sgl, unsigned int nents)
|
|
||||||
{
|
|
||||||
memset(sgl, 0, sizeof(*sgl) * nents);
|
|
||||||
sg_mark_end(&sgl[nents - 1]);
|
|
||||||
}
|
|
||||||
static inline void sg_assign_page(struct scatterlist *sg, struct page *page)
|
|
||||||
{
|
|
||||||
unsigned long page_link = sg->page_link & 0x3;
|
|
||||||
|
|
||||||
/*
|
|
||||||
* In order for the low bit stealing approach to work, pages
|
|
||||||
* must be aligned at a 32-bit boundary as a minimum.
|
|
||||||
*/
|
|
||||||
BUG_ON((unsigned long) page & 0x03);
|
|
||||||
sg->page_link = page_link | (unsigned long) page;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void sg_set_page(struct scatterlist *sg, struct page *page,
|
|
||||||
unsigned int len, unsigned int offset)
|
|
||||||
{
|
|
||||||
sg_assign_page(sg, page);
|
|
||||||
sg->offset = offset;
|
|
||||||
sg->length = len;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
|
|
||||||
unsigned int buflen)
|
|
||||||
{
|
|
||||||
sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf));
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void sg_init_one(struct scatterlist *sg, const void *buf, unsigned int buflen)
|
|
||||||
{
|
|
||||||
sg_init_table(sg, 1);
|
|
||||||
sg_set_buf(sg, buf, buflen);
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef __u16 u16;
|
|
||||||
|
|
||||||
typedef enum {
|
|
||||||
GFP_KERNEL,
|
|
||||||
GFP_ATOMIC,
|
|
||||||
} gfp_t;
|
|
||||||
typedef enum {
|
|
||||||
IRQ_NONE,
|
|
||||||
IRQ_HANDLED
|
|
||||||
} irqreturn_t;
|
|
||||||
|
|
||||||
static inline void *kmalloc(size_t s, gfp_t gfp)
|
|
||||||
{
|
|
||||||
return malloc(s);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void kfree(void *p)
|
|
||||||
{
|
|
||||||
free(p);
|
|
||||||
}
|
|
||||||
|
|
||||||
#define container_of(ptr, type, member) ({ \
|
|
||||||
const typeof( ((type *)0)->member ) *__mptr = (ptr); \
|
|
||||||
(type *)( (char *)__mptr - offsetof(type,member) );})
|
|
||||||
|
|
||||||
#define uninitialized_var(x) x = x
|
|
||||||
|
|
||||||
# ifndef likely
|
|
||||||
# define likely(x) (__builtin_expect(!!(x), 1))
|
|
||||||
# endif
|
|
||||||
# ifndef unlikely
|
|
||||||
# define unlikely(x) (__builtin_expect(!!(x), 0))
|
|
||||||
# endif
|
|
||||||
|
|
||||||
#define pr_err(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
|
|
||||||
#ifdef DEBUG
|
|
||||||
#define pr_debug(format, ...) fprintf (stderr, format, ## __VA_ARGS__)
|
|
||||||
#else
|
|
||||||
#define pr_debug(format, ...) do {} while (0)
|
|
||||||
#endif
|
|
||||||
#define dev_err(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
|
|
||||||
#define dev_warn(dev, format, ...) fprintf (stderr, format, ## __VA_ARGS__)
|
|
||||||
|
|
||||||
/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
|
/* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
|
||||||
#define list_add_tail(a, b) do {} while (0)
|
#define list_add_tail(a, b) do {} while (0)
|
||||||
|
@ -131,6 +11,7 @@ static inline void kfree(void *p)
|
||||||
#define BITS_PER_BYTE 8
|
#define BITS_PER_BYTE 8
|
||||||
#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
|
#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
|
||||||
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
|
#define BIT_MASK(nr) (1UL << ((nr) % BITS_PER_LONG))
|
||||||
|
|
||||||
/* TODO: Not atomic as it should be:
|
/* TODO: Not atomic as it should be:
|
||||||
* we don't use this for anything important. */
|
* we don't use this for anything important. */
|
||||||
static inline void clear_bit(int nr, volatile unsigned long *addr)
|
static inline void clear_bit(int nr, volatile unsigned long *addr)
|
||||||
|
@ -145,10 +26,6 @@ static inline int test_bit(int nr, const volatile unsigned long *addr)
|
||||||
{
|
{
|
||||||
return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
|
return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
|
||||||
}
|
}
|
||||||
|
|
||||||
/* The only feature we care to support */
|
|
||||||
#define virtio_has_feature(dev, feature) \
|
|
||||||
test_bit((feature), (dev)->features)
|
|
||||||
/* end of stubs */
|
/* end of stubs */
|
||||||
|
|
||||||
struct virtio_device {
|
struct virtio_device {
|
||||||
|
@ -163,39 +40,32 @@ struct virtqueue {
|
||||||
void (*callback)(struct virtqueue *vq);
|
void (*callback)(struct virtqueue *vq);
|
||||||
const char *name;
|
const char *name;
|
||||||
struct virtio_device *vdev;
|
struct virtio_device *vdev;
|
||||||
|
unsigned int index;
|
||||||
|
unsigned int num_free;
|
||||||
void *priv;
|
void *priv;
|
||||||
};
|
};
|
||||||
|
|
||||||
#define EXPORT_SYMBOL_GPL(__EXPORT_SYMBOL_GPL_name) \
|
|
||||||
void __EXPORT_SYMBOL_GPL##__EXPORT_SYMBOL_GPL_name() { \
|
|
||||||
}
|
|
||||||
#define MODULE_LICENSE(__MODULE_LICENSE_value) \
|
#define MODULE_LICENSE(__MODULE_LICENSE_value) \
|
||||||
const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value
|
const char *__MODULE_LICENSE_name = __MODULE_LICENSE_value
|
||||||
|
|
||||||
#define CONFIG_SMP
|
|
||||||
|
|
||||||
#if defined(__i386__) || defined(__x86_64__)
|
|
||||||
#define barrier() asm volatile("" ::: "memory")
|
|
||||||
#define mb() __sync_synchronize()
|
|
||||||
|
|
||||||
#define smp_mb() mb()
|
|
||||||
# define smp_rmb() barrier()
|
|
||||||
# define smp_wmb() barrier()
|
|
||||||
/* Weak barriers should be used. If not - it's a bug */
|
|
||||||
# define rmb() abort()
|
|
||||||
# define wmb() abort()
|
|
||||||
#else
|
|
||||||
#error Please fill in barrier macros
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Interfaces exported by virtio_ring. */
|
/* Interfaces exported by virtio_ring. */
|
||||||
int virtqueue_add_buf(struct virtqueue *vq,
|
int virtqueue_add_sgs(struct virtqueue *vq,
|
||||||
struct scatterlist sg[],
|
struct scatterlist *sgs[],
|
||||||
unsigned int out_num,
|
unsigned int out_sgs,
|
||||||
unsigned int in_num,
|
unsigned int in_sgs,
|
||||||
void *data,
|
void *data,
|
||||||
gfp_t gfp);
|
gfp_t gfp);
|
||||||
|
|
||||||
|
int virtqueue_add_outbuf(struct virtqueue *vq,
|
||||||
|
struct scatterlist sg[], unsigned int num,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
|
int virtqueue_add_inbuf(struct virtqueue *vq,
|
||||||
|
struct scatterlist sg[], unsigned int num,
|
||||||
|
void *data,
|
||||||
|
gfp_t gfp);
|
||||||
|
|
||||||
void virtqueue_kick(struct virtqueue *vq);
|
void virtqueue_kick(struct virtqueue *vq);
|
||||||
|
|
||||||
void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
|
void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len);
|
||||||
|
@ -206,7 +76,8 @@ bool virtqueue_enable_cb(struct virtqueue *vq);
|
||||||
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
|
bool virtqueue_enable_cb_delayed(struct virtqueue *vq);
|
||||||
|
|
||||||
void *virtqueue_detach_unused_buf(struct virtqueue *vq);
|
void *virtqueue_detach_unused_buf(struct virtqueue *vq);
|
||||||
struct virtqueue *vring_new_virtqueue(unsigned int num,
|
struct virtqueue *vring_new_virtqueue(unsigned int index,
|
||||||
|
unsigned int num,
|
||||||
unsigned int vring_align,
|
unsigned int vring_align,
|
||||||
struct virtio_device *vdev,
|
struct virtio_device *vdev,
|
||||||
bool weak_barriers,
|
bool weak_barriers,
|
||||||
|
|
|
@ -0,0 +1,6 @@
|
||||||
|
#define VIRTIO_TRANSPORT_F_START 28
|
||||||
|
#define VIRTIO_TRANSPORT_F_END 32
|
||||||
|
|
||||||
|
#define virtio_has_feature(dev, feature) \
|
||||||
|
test_bit((feature), (dev)->features)
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
#include "../../../include/linux/virtio_ring.h"
|
|
@ -0,0 +1 @@
|
||||||
|
#include "../../../include/linux/vringh.h"
|
|
@ -0,0 +1 @@
|
||||||
|
#include <sys/uio.h>
|
|
@ -0,0 +1 @@
|
||||||
|
#include "../../../../include/uapi/linux/virtio_config.h"
|
|
@ -0,0 +1,4 @@
|
||||||
|
#ifndef VIRTIO_RING_H
|
||||||
|
#define VIRTIO_RING_H
|
||||||
|
#include "../../../../include/uapi/linux/virtio_ring.h"
|
||||||
|
#endif /* VIRTIO_RING_H */
|
|
@ -10,11 +10,15 @@
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
#include <fcntl.h>
|
#include <fcntl.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <linux/vhost.h>
|
#include <linux/vhost.h>
|
||||||
#include <linux/virtio.h>
|
#include <linux/virtio.h>
|
||||||
#include <linux/virtio_ring.h>
|
#include <linux/virtio_ring.h>
|
||||||
#include "../../drivers/vhost/test.h"
|
#include "../../drivers/vhost/test.h"
|
||||||
|
|
||||||
|
/* Unused */
|
||||||
|
void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
|
||||||
|
|
||||||
struct vq_info {
|
struct vq_info {
|
||||||
int kick;
|
int kick;
|
||||||
int call;
|
int call;
|
||||||
|
@ -92,7 +96,8 @@ static void vq_info_add(struct vdev_info *dev, int num)
|
||||||
assert(r >= 0);
|
assert(r >= 0);
|
||||||
memset(info->ring, 0, vring_size(num, 4096));
|
memset(info->ring, 0, vring_size(num, 4096));
|
||||||
vring_init(&info->vring, num, info->ring, 4096);
|
vring_init(&info->vring, num, info->ring, 4096);
|
||||||
info->vq = vring_new_virtqueue(info->vring.num, 4096, &dev->vdev,
|
info->vq = vring_new_virtqueue(info->idx,
|
||||||
|
info->vring.num, 4096, &dev->vdev,
|
||||||
true, info->ring,
|
true, info->ring,
|
||||||
vq_notify, vq_callback, "test");
|
vq_notify, vq_callback, "test");
|
||||||
assert(info->vq);
|
assert(info->vq);
|
||||||
|
@ -161,9 +166,9 @@ static void run_test(struct vdev_info *dev, struct vq_info *vq,
|
||||||
do {
|
do {
|
||||||
if (started < bufs) {
|
if (started < bufs) {
|
||||||
sg_init_one(&sl, dev->buf, dev->buf_size);
|
sg_init_one(&sl, dev->buf, dev->buf_size);
|
||||||
r = virtqueue_add_buf(vq->vq, &sl, 1, 0,
|
r = virtqueue_add_outbuf(vq->vq, &sl, 1,
|
||||||
dev->buf + started,
|
dev->buf + started,
|
||||||
GFP_ATOMIC);
|
GFP_ATOMIC);
|
||||||
if (likely(r == 0)) {
|
if (likely(r == 0)) {
|
||||||
++started;
|
++started;
|
||||||
virtqueue_kick(vq->vq);
|
virtqueue_kick(vq->vq);
|
||||||
|
|
|
@ -0,0 +1,741 @@
|
||||||
|
/* Simple test of virtio code, entirely in userpsace. */
|
||||||
|
#define _GNU_SOURCE
|
||||||
|
#include <sched.h>
|
||||||
|
#include <err.h>
|
||||||
|
#include <linux/kernel.h>
|
||||||
|
#include <linux/err.h>
|
||||||
|
#include <linux/virtio.h>
|
||||||
|
#include <linux/vringh.h>
|
||||||
|
#include <linux/virtio_ring.h>
|
||||||
|
#include <linux/uaccess.h>
|
||||||
|
#include <sys/types.h>
|
||||||
|
#include <sys/stat.h>
|
||||||
|
#include <sys/mman.h>
|
||||||
|
#include <sys/wait.h>
|
||||||
|
#include <fcntl.h>
|
||||||
|
|
||||||
|
#define USER_MEM (1024*1024)
|
||||||
|
void *__user_addr_min, *__user_addr_max;
|
||||||
|
void *__kmalloc_fake, *__kfree_ignore_start, *__kfree_ignore_end;
|
||||||
|
static u64 user_addr_offset;
|
||||||
|
|
||||||
|
#define RINGSIZE 256
|
||||||
|
#define ALIGN 4096
|
||||||
|
|
||||||
|
static void never_notify_host(struct virtqueue *vq)
|
||||||
|
{
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
static void never_callback_guest(struct virtqueue *vq)
|
||||||
|
{
|
||||||
|
abort();
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool getrange_iov(struct vringh *vrh, u64 addr, struct vringh_range *r)
|
||||||
|
{
|
||||||
|
if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
|
||||||
|
return false;
|
||||||
|
if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
r->start = (u64)(unsigned long)__user_addr_min - user_addr_offset;
|
||||||
|
r->end_incl = (u64)(unsigned long)__user_addr_max - 1 - user_addr_offset;
|
||||||
|
r->offset = user_addr_offset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* We return single byte ranges. */
|
||||||
|
static bool getrange_slow(struct vringh *vrh, u64 addr, struct vringh_range *r)
|
||||||
|
{
|
||||||
|
if (addr < (u64)(unsigned long)__user_addr_min - user_addr_offset)
|
||||||
|
return false;
|
||||||
|
if (addr >= (u64)(unsigned long)__user_addr_max - user_addr_offset)
|
||||||
|
return false;
|
||||||
|
|
||||||
|
r->start = addr;
|
||||||
|
r->end_incl = r->start;
|
||||||
|
r->offset = user_addr_offset;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
struct guest_virtio_device {
|
||||||
|
struct virtio_device vdev;
|
||||||
|
int to_host_fd;
|
||||||
|
unsigned long notifies;
|
||||||
|
};
|
||||||
|
|
||||||
|
static void parallel_notify_host(struct virtqueue *vq)
|
||||||
|
{
|
||||||
|
struct guest_virtio_device *gvdev;
|
||||||
|
|
||||||
|
gvdev = container_of(vq->vdev, struct guest_virtio_device, vdev);
|
||||||
|
write(gvdev->to_host_fd, "", 1);
|
||||||
|
gvdev->notifies++;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void no_notify_host(struct virtqueue *vq)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
#define NUM_XFERS (10000000)
|
||||||
|
|
||||||
|
/* We aim for two "distant" cpus. */
|
||||||
|
static void find_cpus(unsigned int *first, unsigned int *last)
|
||||||
|
{
|
||||||
|
unsigned int i;
|
||||||
|
|
||||||
|
*first = -1U;
|
||||||
|
*last = 0;
|
||||||
|
for (i = 0; i < 4096; i++) {
|
||||||
|
cpu_set_t set;
|
||||||
|
CPU_ZERO(&set);
|
||||||
|
CPU_SET(i, &set);
|
||||||
|
if (sched_setaffinity(getpid(), sizeof(set), &set) == 0) {
|
||||||
|
if (i < *first)
|
||||||
|
*first = i;
|
||||||
|
if (i > *last)
|
||||||
|
*last = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Opencoded version for fast mode */
|
||||||
|
static inline int vringh_get_head(struct vringh *vrh, u16 *head)
|
||||||
|
{
|
||||||
|
u16 avail_idx, i;
|
||||||
|
int err;
|
||||||
|
|
||||||
|
err = get_user(avail_idx, &vrh->vring.avail->idx);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
if (vrh->last_avail_idx == avail_idx)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
/* Only get avail ring entries after they have been exposed by guest. */
|
||||||
|
virtio_rmb(vrh->weak_barriers);
|
||||||
|
|
||||||
|
i = vrh->last_avail_idx & (vrh->vring.num - 1);
|
||||||
|
|
||||||
|
err = get_user(*head, &vrh->vring.avail->ring[i]);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
|
vrh->last_avail_idx++;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int parallel_test(unsigned long features,
|
||||||
|
bool (*getrange)(struct vringh *vrh,
|
||||||
|
u64 addr, struct vringh_range *r),
|
||||||
|
bool fast_vringh)
|
||||||
|
{
|
||||||
|
void *host_map, *guest_map;
|
||||||
|
int fd, mapsize, to_guest[2], to_host[2];
|
||||||
|
unsigned long xfers = 0, notifies = 0, receives = 0;
|
||||||
|
unsigned int first_cpu, last_cpu;
|
||||||
|
cpu_set_t cpu_set;
|
||||||
|
char buf[128];
|
||||||
|
|
||||||
|
/* Create real file to mmap. */
|
||||||
|
fd = open("/tmp/vringh_test-file", O_RDWR|O_CREAT|O_TRUNC, 0600);
|
||||||
|
if (fd < 0)
|
||||||
|
err(1, "Opening /tmp/vringh_test-file");
|
||||||
|
|
||||||
|
/* Extra room at the end for some data, and indirects */
|
||||||
|
mapsize = vring_size(RINGSIZE, ALIGN)
|
||||||
|
+ RINGSIZE * 2 * sizeof(int)
|
||||||
|
+ RINGSIZE * 6 * sizeof(struct vring_desc);
|
||||||
|
mapsize = (mapsize + getpagesize() - 1) & ~(getpagesize() - 1);
|
||||||
|
ftruncate(fd, mapsize);
|
||||||
|
|
||||||
|
/* Parent and child use separate addresses, to check our mapping logic! */
|
||||||
|
host_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
guest_map = mmap(NULL, mapsize, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
|
||||||
|
|
||||||
|
pipe(to_guest);
|
||||||
|
pipe(to_host);
|
||||||
|
|
||||||
|
CPU_ZERO(&cpu_set);
|
||||||
|
find_cpus(&first_cpu, &last_cpu);
|
||||||
|
printf("Using CPUS %u and %u\n", first_cpu, last_cpu);
|
||||||
|
fflush(stdout);
|
||||||
|
|
||||||
|
if (fork() != 0) {
|
||||||
|
struct vringh vrh;
|
||||||
|
int status, err, rlen = 0;
|
||||||
|
char rbuf[5];
|
||||||
|
|
||||||
|
/* We are the host: never access guest addresses! */
|
||||||
|
munmap(guest_map, mapsize);
|
||||||
|
|
||||||
|
__user_addr_min = host_map;
|
||||||
|
__user_addr_max = __user_addr_min + mapsize;
|
||||||
|
user_addr_offset = host_map - guest_map;
|
||||||
|
assert(user_addr_offset);
|
||||||
|
|
||||||
|
close(to_guest[0]);
|
||||||
|
close(to_host[1]);
|
||||||
|
|
||||||
|
vring_init(&vrh.vring, RINGSIZE, host_map, ALIGN);
|
||||||
|
vringh_init_user(&vrh, features, RINGSIZE, true,
|
||||||
|
vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
|
||||||
|
CPU_SET(first_cpu, &cpu_set);
|
||||||
|
if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
|
||||||
|
errx(1, "Could not set affinity to cpu %u", first_cpu);
|
||||||
|
|
||||||
|
while (xfers < NUM_XFERS) {
|
||||||
|
struct iovec host_riov[2], host_wiov[2];
|
||||||
|
struct vringh_iov riov, wiov;
|
||||||
|
u16 head, written;
|
||||||
|
|
||||||
|
if (fast_vringh) {
|
||||||
|
for (;;) {
|
||||||
|
err = vringh_get_head(&vrh, &head);
|
||||||
|
if (err != 0)
|
||||||
|
break;
|
||||||
|
err = vringh_need_notify_user(&vrh);
|
||||||
|
if (err < 0)
|
||||||
|
errx(1, "vringh_need_notify_user: %i",
|
||||||
|
err);
|
||||||
|
if (err) {
|
||||||
|
write(to_guest[1], "", 1);
|
||||||
|
notifies++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (err != 1)
|
||||||
|
errx(1, "vringh_get_head");
|
||||||
|
written = 0;
|
||||||
|
goto complete;
|
||||||
|
} else {
|
||||||
|
vringh_iov_init(&riov,
|
||||||
|
host_riov,
|
||||||
|
ARRAY_SIZE(host_riov));
|
||||||
|
vringh_iov_init(&wiov,
|
||||||
|
host_wiov,
|
||||||
|
ARRAY_SIZE(host_wiov));
|
||||||
|
|
||||||
|
err = vringh_getdesc_user(&vrh, &riov, &wiov,
|
||||||
|
getrange, &head);
|
||||||
|
}
|
||||||
|
if (err == 0) {
|
||||||
|
err = vringh_need_notify_user(&vrh);
|
||||||
|
if (err < 0)
|
||||||
|
errx(1, "vringh_need_notify_user: %i",
|
||||||
|
err);
|
||||||
|
if (err) {
|
||||||
|
write(to_guest[1], "", 1);
|
||||||
|
notifies++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!vringh_notify_enable_user(&vrh))
|
||||||
|
continue;
|
||||||
|
|
||||||
|
/* Swallow all notifies at once. */
|
||||||
|
if (read(to_host[0], buf, sizeof(buf)) < 1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
vringh_notify_disable_user(&vrh);
|
||||||
|
receives++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (err != 1)
|
||||||
|
errx(1, "vringh_getdesc_user: %i", err);
|
||||||
|
|
||||||
|
/* We simply copy bytes. */
|
||||||
|
if (riov.used) {
|
||||||
|
rlen = vringh_iov_pull_user(&riov, rbuf,
|
||||||
|
sizeof(rbuf));
|
||||||
|
if (rlen != 4)
|
||||||
|
errx(1, "vringh_iov_pull_user: %i",
|
||||||
|
rlen);
|
||||||
|
assert(riov.i == riov.used);
|
||||||
|
written = 0;
|
||||||
|
} else {
|
||||||
|
err = vringh_iov_push_user(&wiov, rbuf, rlen);
|
||||||
|
if (err != rlen)
|
||||||
|
errx(1, "vringh_iov_push_user: %i",
|
||||||
|
err);
|
||||||
|
assert(wiov.i == wiov.used);
|
||||||
|
written = err;
|
||||||
|
}
|
||||||
|
complete:
|
||||||
|
xfers++;
|
||||||
|
|
||||||
|
err = vringh_complete_user(&vrh, head, written);
|
||||||
|
if (err != 0)
|
||||||
|
errx(1, "vringh_complete_user: %i", err);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = vringh_need_notify_user(&vrh);
|
||||||
|
if (err < 0)
|
||||||
|
errx(1, "vringh_need_notify_user: %i", err);
|
||||||
|
if (err) {
|
||||||
|
write(to_guest[1], "", 1);
|
||||||
|
notifies++;
|
||||||
|
}
|
||||||
|
wait(&status);
|
||||||
|
if (!WIFEXITED(status))
|
||||||
|
errx(1, "Child died with signal %i?", WTERMSIG(status));
|
||||||
|
if (WEXITSTATUS(status) != 0)
|
||||||
|
errx(1, "Child exited %i?", WEXITSTATUS(status));
|
||||||
|
printf("Host: notified %lu, pinged %lu\n", notifies, receives);
|
||||||
|
return 0;
|
||||||
|
} else {
|
||||||
|
struct guest_virtio_device gvdev;
|
||||||
|
struct virtqueue *vq;
|
||||||
|
unsigned int *data;
|
||||||
|
struct vring_desc *indirects;
|
||||||
|
unsigned int finished = 0;
|
||||||
|
|
||||||
|
/* We pass sg[]s pointing into here, but we need RINGSIZE+1 */
|
||||||
|
data = guest_map + vring_size(RINGSIZE, ALIGN);
|
||||||
|
indirects = (void *)data + (RINGSIZE + 1) * 2 * sizeof(int);
|
||||||
|
|
||||||
|
/* We are the guest. */
|
||||||
|
munmap(host_map, mapsize);
|
||||||
|
|
||||||
|
close(to_guest[1]);
|
||||||
|
close(to_host[0]);
|
||||||
|
|
||||||
|
gvdev.vdev.features[0] = features;
|
||||||
|
gvdev.to_host_fd = to_host[1];
|
||||||
|
gvdev.notifies = 0;
|
||||||
|
|
||||||
|
CPU_SET(first_cpu, &cpu_set);
|
||||||
|
if (sched_setaffinity(getpid(), sizeof(cpu_set), &cpu_set))
|
||||||
|
err(1, "Could not set affinity to cpu %u", first_cpu);
|
||||||
|
|
||||||
|
vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &gvdev.vdev, true,
|
||||||
|
guest_map, fast_vringh ? no_notify_host
|
||||||
|
: parallel_notify_host,
|
||||||
|
never_callback_guest, "guest vq");
|
||||||
|
|
||||||
|
/* Don't kfree indirects. */
|
||||||
|
__kfree_ignore_start = indirects;
|
||||||
|
__kfree_ignore_end = indirects + RINGSIZE * 6;
|
||||||
|
|
||||||
|
while (xfers < NUM_XFERS) {
|
||||||
|
struct scatterlist sg[4];
|
||||||
|
unsigned int num_sg, len;
|
||||||
|
int *dbuf, err;
|
||||||
|
bool output = !(xfers % 2);
|
||||||
|
|
||||||
|
/* Consume bufs. */
|
||||||
|
while ((dbuf = virtqueue_get_buf(vq, &len)) != NULL) {
|
||||||
|
if (len == 4)
|
||||||
|
assert(*dbuf == finished - 1);
|
||||||
|
else if (!fast_vringh)
|
||||||
|
assert(*dbuf == finished);
|
||||||
|
finished++;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Produce a buffer. */
|
||||||
|
dbuf = data + (xfers % (RINGSIZE + 1));
|
||||||
|
|
||||||
|
if (output)
|
||||||
|
*dbuf = xfers;
|
||||||
|
else
|
||||||
|
*dbuf = -1;
|
||||||
|
|
||||||
|
switch ((xfers / sizeof(*dbuf)) % 4) {
|
||||||
|
case 0:
|
||||||
|
/* Nasty three-element sg list. */
|
||||||
|
sg_init_table(sg, num_sg = 3);
|
||||||
|
sg_set_buf(&sg[0], (void *)dbuf, 1);
|
||||||
|
sg_set_buf(&sg[1], (void *)dbuf + 1, 2);
|
||||||
|
sg_set_buf(&sg[2], (void *)dbuf + 3, 1);
|
||||||
|
break;
|
||||||
|
case 1:
|
||||||
|
sg_init_table(sg, num_sg = 2);
|
||||||
|
sg_set_buf(&sg[0], (void *)dbuf, 1);
|
||||||
|
sg_set_buf(&sg[1], (void *)dbuf + 1, 3);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
sg_init_table(sg, num_sg = 1);
|
||||||
|
sg_set_buf(&sg[0], (void *)dbuf, 4);
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
sg_init_table(sg, num_sg = 4);
|
||||||
|
sg_set_buf(&sg[0], (void *)dbuf, 1);
|
||||||
|
sg_set_buf(&sg[1], (void *)dbuf + 1, 1);
|
||||||
|
sg_set_buf(&sg[2], (void *)dbuf + 2, 1);
|
||||||
|
sg_set_buf(&sg[3], (void *)dbuf + 3, 1);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* May allocate an indirect, so force it to allocate
|
||||||
|
* user addr */
|
||||||
|
__kmalloc_fake = indirects + (xfers % RINGSIZE) * 4;
|
||||||
|
if (output)
|
||||||
|
err = virtqueue_add_outbuf(vq, sg, num_sg, dbuf,
|
||||||
|
GFP_KERNEL);
|
||||||
|
else
|
||||||
|
err = virtqueue_add_inbuf(vq, sg, num_sg,
|
||||||
|
dbuf, GFP_KERNEL);
|
||||||
|
|
||||||
|
if (err == -ENOSPC) {
|
||||||
|
if (!virtqueue_enable_cb_delayed(vq))
|
||||||
|
continue;
|
||||||
|
/* Swallow all notifies at once. */
|
||||||
|
if (read(to_guest[0], buf, sizeof(buf)) < 1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
receives++;
|
||||||
|
virtqueue_disable_cb(vq);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (err)
|
||||||
|
errx(1, "virtqueue_add_in/outbuf: %i", err);
|
||||||
|
|
||||||
|
xfers++;
|
||||||
|
virtqueue_kick(vq);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Any extra? */
|
||||||
|
while (finished != xfers) {
|
||||||
|
int *dbuf;
|
||||||
|
unsigned int len;
|
||||||
|
|
||||||
|
/* Consume bufs. */
|
||||||
|
dbuf = virtqueue_get_buf(vq, &len);
|
||||||
|
if (dbuf) {
|
||||||
|
if (len == 4)
|
||||||
|
assert(*dbuf == finished - 1);
|
||||||
|
else
|
||||||
|
assert(len == 0);
|
||||||
|
finished++;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!virtqueue_enable_cb_delayed(vq))
|
||||||
|
continue;
|
||||||
|
if (read(to_guest[0], buf, sizeof(buf)) < 1)
|
||||||
|
break;
|
||||||
|
|
||||||
|
receives++;
|
||||||
|
virtqueue_disable_cb(vq);
|
||||||
|
}
|
||||||
|
|
||||||
|
printf("Guest: notified %lu, pinged %lu\n",
|
||||||
|
gvdev.notifies, receives);
|
||||||
|
vring_del_virtqueue(vq);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char *argv[])
|
||||||
|
{
|
||||||
|
struct virtio_device vdev;
|
||||||
|
struct virtqueue *vq;
|
||||||
|
struct vringh vrh;
|
||||||
|
struct scatterlist guest_sg[RINGSIZE], *sgs[2];
|
||||||
|
struct iovec host_riov[2], host_wiov[2];
|
||||||
|
struct vringh_iov riov, wiov;
|
||||||
|
struct vring_used_elem used[RINGSIZE];
|
||||||
|
char buf[28];
|
||||||
|
u16 head;
|
||||||
|
int err;
|
||||||
|
unsigned i;
|
||||||
|
void *ret;
|
||||||
|
bool (*getrange)(struct vringh *vrh, u64 addr, struct vringh_range *r);
|
||||||
|
bool fast_vringh = false, parallel = false;
|
||||||
|
|
||||||
|
getrange = getrange_iov;
|
||||||
|
vdev.features[0] = 0;
|
||||||
|
|
||||||
|
while (argv[1]) {
|
||||||
|
if (strcmp(argv[1], "--indirect") == 0)
|
||||||
|
vdev.features[0] |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
|
||||||
|
else if (strcmp(argv[1], "--eventidx") == 0)
|
||||||
|
vdev.features[0] |= (1 << VIRTIO_RING_F_EVENT_IDX);
|
||||||
|
else if (strcmp(argv[1], "--slow-range") == 0)
|
||||||
|
getrange = getrange_slow;
|
||||||
|
else if (strcmp(argv[1], "--fast-vringh") == 0)
|
||||||
|
fast_vringh = true;
|
||||||
|
else if (strcmp(argv[1], "--parallel") == 0)
|
||||||
|
parallel = true;
|
||||||
|
else
|
||||||
|
errx(1, "Unknown arg %s", argv[1]);
|
||||||
|
argv++;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (parallel)
|
||||||
|
return parallel_test(vdev.features[0], getrange, fast_vringh);
|
||||||
|
|
||||||
|
if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0)
|
||||||
|
abort();
|
||||||
|
__user_addr_max = __user_addr_min + USER_MEM;
|
||||||
|
memset(__user_addr_min, 0, vring_size(RINGSIZE, ALIGN));
|
||||||
|
|
||||||
|
/* Set up guest side. */
|
||||||
|
vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
|
||||||
|
__user_addr_min,
|
||||||
|
never_notify_host, never_callback_guest,
|
||||||
|
"guest vq");
|
||||||
|
|
||||||
|
/* Set up host side. */
|
||||||
|
vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN);
|
||||||
|
vringh_init_user(&vrh, vdev.features[0], RINGSIZE, true,
|
||||||
|
vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
|
||||||
|
|
||||||
|
/* No descriptor to get yet... */
|
||||||
|
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
|
||||||
|
if (err != 0)
|
||||||
|
errx(1, "vringh_getdesc_user: %i", err);
|
||||||
|
|
||||||
|
/* Guest puts in a descriptor. */
|
||||||
|
memcpy(__user_addr_max - 1, "a", 1);
|
||||||
|
sg_init_table(guest_sg, 1);
|
||||||
|
sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
|
||||||
|
sg_init_table(guest_sg+1, 1);
|
||||||
|
sg_set_buf(&guest_sg[1], __user_addr_max - 3, 2);
|
||||||
|
sgs[0] = &guest_sg[0];
|
||||||
|
sgs[1] = &guest_sg[1];
|
||||||
|
|
||||||
|
/* May allocate an indirect, so force it to allocate user addr */
|
||||||
|
__kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
|
||||||
|
err = virtqueue_add_sgs(vq, sgs, 1, 1, &err, GFP_KERNEL);
|
||||||
|
if (err)
|
||||||
|
errx(1, "virtqueue_add_sgs: %i", err);
|
||||||
|
__kmalloc_fake = NULL;
|
||||||
|
|
||||||
|
/* Host retreives it. */
|
||||||
|
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
|
||||||
|
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
|
||||||
|
|
||||||
|
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
|
||||||
|
if (err != 1)
|
||||||
|
errx(1, "vringh_getdesc_user: %i", err);
|
||||||
|
|
||||||
|
assert(riov.used == 1);
|
||||||
|
assert(riov.iov[0].iov_base == __user_addr_max - 1);
|
||||||
|
assert(riov.iov[0].iov_len == 1);
|
||||||
|
if (getrange != getrange_slow) {
|
||||||
|
assert(wiov.used == 1);
|
||||||
|
assert(wiov.iov[0].iov_base == __user_addr_max - 3);
|
||||||
|
assert(wiov.iov[0].iov_len == 2);
|
||||||
|
} else {
|
||||||
|
assert(wiov.used == 2);
|
||||||
|
assert(wiov.iov[0].iov_base == __user_addr_max - 3);
|
||||||
|
assert(wiov.iov[0].iov_len == 1);
|
||||||
|
assert(wiov.iov[1].iov_base == __user_addr_max - 2);
|
||||||
|
assert(wiov.iov[1].iov_len == 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
err = vringh_iov_pull_user(&riov, buf, 5);
|
||||||
|
if (err != 1)
|
||||||
|
errx(1, "vringh_iov_pull_user: %i", err);
|
||||||
|
assert(buf[0] == 'a');
|
||||||
|
assert(riov.i == 1);
|
||||||
|
assert(vringh_iov_pull_user(&riov, buf, 5) == 0);
|
||||||
|
|
||||||
|
memcpy(buf, "bcdef", 5);
|
||||||
|
err = vringh_iov_push_user(&wiov, buf, 5);
|
||||||
|
if (err != 2)
|
||||||
|
errx(1, "vringh_iov_push_user: %i", err);
|
||||||
|
assert(memcmp(__user_addr_max - 3, "bc", 2) == 0);
|
||||||
|
assert(wiov.i == wiov.used);
|
||||||
|
assert(vringh_iov_push_user(&wiov, buf, 5) == 0);
|
||||||
|
|
||||||
|
/* Host is done. */
|
||||||
|
err = vringh_complete_user(&vrh, head, err);
|
||||||
|
if (err != 0)
|
||||||
|
errx(1, "vringh_complete_user: %i", err);
|
||||||
|
|
||||||
|
/* Guest should see used token now. */
|
||||||
|
__kfree_ignore_start = __user_addr_min + vring_size(RINGSIZE, ALIGN);
|
||||||
|
__kfree_ignore_end = __kfree_ignore_start + 1;
|
||||||
|
ret = virtqueue_get_buf(vq, &i);
|
||||||
|
if (ret != &err)
|
||||||
|
errx(1, "virtqueue_get_buf: %p", ret);
|
||||||
|
assert(i == 2);
|
||||||
|
|
||||||
|
/* Guest puts in a huge descriptor. */
|
||||||
|
sg_init_table(guest_sg, RINGSIZE);
|
||||||
|
for (i = 0; i < RINGSIZE; i++) {
|
||||||
|
sg_set_buf(&guest_sg[i],
|
||||||
|
__user_addr_max - USER_MEM/4, USER_MEM/4);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Fill contents with recognisable garbage. */
|
||||||
|
for (i = 0; i < USER_MEM/4; i++)
|
||||||
|
((char *)__user_addr_max - USER_MEM/4)[i] = i;
|
||||||
|
|
||||||
|
/* This will allocate an indirect, so force it to allocate user addr */
|
||||||
|
__kmalloc_fake = __user_addr_min + vring_size(RINGSIZE, ALIGN);
|
||||||
|
err = virtqueue_add_outbuf(vq, guest_sg, RINGSIZE, &err, GFP_KERNEL);
|
||||||
|
if (err)
|
||||||
|
errx(1, "virtqueue_add_outbuf (large): %i", err);
|
||||||
|
__kmalloc_fake = NULL;
|
||||||
|
|
||||||
|
/* Host picks it up (allocates new iov). */
|
||||||
|
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
|
||||||
|
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
|
||||||
|
|
||||||
|
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
|
||||||
|
if (err != 1)
|
||||||
|
errx(1, "vringh_getdesc_user: %i", err);
|
||||||
|
|
||||||
|
assert(riov.max_num & VRINGH_IOV_ALLOCATED);
|
||||||
|
assert(riov.iov != host_riov);
|
||||||
|
if (getrange != getrange_slow)
|
||||||
|
assert(riov.used == RINGSIZE);
|
||||||
|
else
|
||||||
|
assert(riov.used == RINGSIZE * USER_MEM/4);
|
||||||
|
|
||||||
|
assert(!(wiov.max_num & VRINGH_IOV_ALLOCATED));
|
||||||
|
assert(wiov.used == 0);
|
||||||
|
|
||||||
|
/* Pull data back out (in odd chunks), should be as expected. */
|
||||||
|
for (i = 0; i < RINGSIZE * USER_MEM/4; i += 3) {
|
||||||
|
err = vringh_iov_pull_user(&riov, buf, 3);
|
||||||
|
if (err != 3 && i + err != RINGSIZE * USER_MEM/4)
|
||||||
|
errx(1, "vringh_iov_pull_user large: %i", err);
|
||||||
|
assert(buf[0] == (char)i);
|
||||||
|
assert(err < 2 || buf[1] == (char)(i + 1));
|
||||||
|
assert(err < 3 || buf[2] == (char)(i + 2));
|
||||||
|
}
|
||||||
|
assert(riov.i == riov.used);
|
||||||
|
vringh_iov_cleanup(&riov);
|
||||||
|
vringh_iov_cleanup(&wiov);
|
||||||
|
|
||||||
|
/* Complete using multi interface, just because we can. */
|
||||||
|
used[0].id = head;
|
||||||
|
used[0].len = 0;
|
||||||
|
err = vringh_complete_multi_user(&vrh, used, 1);
|
||||||
|
if (err)
|
||||||
|
errx(1, "vringh_complete_multi_user(1): %i", err);
|
||||||
|
|
||||||
|
/* Free up those descriptors. */
|
||||||
|
ret = virtqueue_get_buf(vq, &i);
|
||||||
|
if (ret != &err)
|
||||||
|
errx(1, "virtqueue_get_buf: %p", ret);
|
||||||
|
|
||||||
|
/* Add lots of descriptors. */
|
||||||
|
sg_init_table(guest_sg, 1);
|
||||||
|
sg_set_buf(&guest_sg[0], __user_addr_max - 1, 1);
|
||||||
|
for (i = 0; i < RINGSIZE; i++) {
|
||||||
|
err = virtqueue_add_outbuf(vq, guest_sg, 1, &err, GFP_KERNEL);
|
||||||
|
if (err)
|
||||||
|
errx(1, "virtqueue_add_outbuf (multiple): %i", err);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Now get many, and consume them all at once. */
|
||||||
|
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
|
||||||
|
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
|
||||||
|
|
||||||
|
for (i = 0; i < RINGSIZE; i++) {
|
||||||
|
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
|
||||||
|
if (err != 1)
|
||||||
|
errx(1, "vringh_getdesc_user: %i", err);
|
||||||
|
used[i].id = head;
|
||||||
|
used[i].len = 0;
|
||||||
|
}
|
||||||
|
/* Make sure it wraps around ring, to test! */
|
||||||
|
assert(vrh.vring.used->idx % RINGSIZE != 0);
|
||||||
|
err = vringh_complete_multi_user(&vrh, used, RINGSIZE);
|
||||||
|
if (err)
|
||||||
|
errx(1, "vringh_complete_multi_user: %i", err);
|
||||||
|
|
||||||
|
/* Free those buffers. */
|
||||||
|
for (i = 0; i < RINGSIZE; i++) {
|
||||||
|
unsigned len;
|
||||||
|
assert(virtqueue_get_buf(vq, &len) != NULL);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Test weird (but legal!) indirect. */
|
||||||
|
if (vdev.features[0] & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
|
||||||
|
char *data = __user_addr_max - USER_MEM/4;
|
||||||
|
struct vring_desc *d = __user_addr_max - USER_MEM/2;
|
||||||
|
struct vring vring;
|
||||||
|
|
||||||
|
/* Force creation of direct, which we modify. */
|
||||||
|
vdev.features[0] &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
|
||||||
|
vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
|
||||||
|
__user_addr_min,
|
||||||
|
never_notify_host,
|
||||||
|
never_callback_guest,
|
||||||
|
"guest vq");
|
||||||
|
|
||||||
|
sg_init_table(guest_sg, 4);
|
||||||
|
sg_set_buf(&guest_sg[0], d, sizeof(*d)*2);
|
||||||
|
sg_set_buf(&guest_sg[1], d + 2, sizeof(*d)*1);
|
||||||
|
sg_set_buf(&guest_sg[2], data + 6, 4);
|
||||||
|
sg_set_buf(&guest_sg[3], d + 3, sizeof(*d)*3);
|
||||||
|
|
||||||
|
err = virtqueue_add_outbuf(vq, guest_sg, 4, &err, GFP_KERNEL);
|
||||||
|
if (err)
|
||||||
|
errx(1, "virtqueue_add_outbuf (indirect): %i", err);
|
||||||
|
|
||||||
|
vring_init(&vring, RINGSIZE, __user_addr_min, ALIGN);
|
||||||
|
|
||||||
|
/* They're used in order, but double-check... */
|
||||||
|
assert(vring.desc[0].addr == (unsigned long)d);
|
||||||
|
assert(vring.desc[1].addr == (unsigned long)(d+2));
|
||||||
|
assert(vring.desc[2].addr == (unsigned long)data + 6);
|
||||||
|
assert(vring.desc[3].addr == (unsigned long)(d+3));
|
||||||
|
vring.desc[0].flags |= VRING_DESC_F_INDIRECT;
|
||||||
|
vring.desc[1].flags |= VRING_DESC_F_INDIRECT;
|
||||||
|
vring.desc[3].flags |= VRING_DESC_F_INDIRECT;
|
||||||
|
|
||||||
|
/* First indirect */
|
||||||
|
d[0].addr = (unsigned long)data;
|
||||||
|
d[0].len = 1;
|
||||||
|
d[0].flags = VRING_DESC_F_NEXT;
|
||||||
|
d[0].next = 1;
|
||||||
|
d[1].addr = (unsigned long)data + 1;
|
||||||
|
d[1].len = 2;
|
||||||
|
d[1].flags = 0;
|
||||||
|
|
||||||
|
/* Second indirect */
|
||||||
|
d[2].addr = (unsigned long)data + 3;
|
||||||
|
d[2].len = 3;
|
||||||
|
d[2].flags = 0;
|
||||||
|
|
||||||
|
/* Third indirect */
|
||||||
|
d[3].addr = (unsigned long)data + 10;
|
||||||
|
d[3].len = 5;
|
||||||
|
d[3].flags = VRING_DESC_F_NEXT;
|
||||||
|
d[3].next = 1;
|
||||||
|
d[4].addr = (unsigned long)data + 15;
|
||||||
|
d[4].len = 6;
|
||||||
|
d[4].flags = VRING_DESC_F_NEXT;
|
||||||
|
d[4].next = 2;
|
||||||
|
d[5].addr = (unsigned long)data + 21;
|
||||||
|
d[5].len = 7;
|
||||||
|
d[5].flags = 0;
|
||||||
|
|
||||||
|
/* Host picks it up (allocates new iov). */
|
||||||
|
vringh_iov_init(&riov, host_riov, ARRAY_SIZE(host_riov));
|
||||||
|
vringh_iov_init(&wiov, host_wiov, ARRAY_SIZE(host_wiov));
|
||||||
|
|
||||||
|
err = vringh_getdesc_user(&vrh, &riov, &wiov, getrange, &head);
|
||||||
|
if (err != 1)
|
||||||
|
errx(1, "vringh_getdesc_user: %i", err);
|
||||||
|
|
||||||
|
if (head != 0)
|
||||||
|
errx(1, "vringh_getdesc_user: head %i not 0", head);
|
||||||
|
|
||||||
|
assert(riov.max_num & VRINGH_IOV_ALLOCATED);
|
||||||
|
if (getrange != getrange_slow)
|
||||||
|
assert(riov.used == 7);
|
||||||
|
else
|
||||||
|
assert(riov.used == 28);
|
||||||
|
err = vringh_iov_pull_user(&riov, buf, 29);
|
||||||
|
assert(err == 28);
|
||||||
|
|
||||||
|
/* Data should be linear. */
|
||||||
|
for (i = 0; i < err; i++)
|
||||||
|
assert(buf[i] == i);
|
||||||
|
vringh_iov_cleanup(&riov);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Don't leak memory... */
|
||||||
|
vring_del_virtqueue(vq);
|
||||||
|
free(__user_addr_min);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
Loading…
Reference in New Issue