diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 5438a2d7907f..d9a203b058f1 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -999,7 +999,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. With this option on every unmap_single operation will result in a hardware IOTLB flush operation as opposed to batching them for performance. - + sp_off [Default Off] + By default, super page will be supported if Intel IOMMU + has the capability. With this option, super page will + not be supported. intremap= [X86-64, Intel-IOMMU] Format: { on (default) | off | nosid } on enable Interrupt Remapping (default) diff --git a/Documentation/virtual/lguest/Makefile b/Documentation/virtual/lguest/Makefile index bebac6b4f332..0ac34206f7a7 100644 --- a/Documentation/virtual/lguest/Makefile +++ b/Documentation/virtual/lguest/Makefile @@ -1,5 +1,5 @@ # This creates the demonstration utility "lguest" which runs a Linux guest. -# Missing headers? Add "-I../../include -I../../arch/x86/include" +# Missing headers? Add "-I../../../include -I../../../arch/x86/include" CFLAGS:=-m32 -Wall -Wmissing-declarations -Wmissing-prototypes -O3 -U_FORTIFY_SOURCE all: lguest diff --git a/Documentation/virtual/lguest/lguest.c b/Documentation/virtual/lguest/lguest.c index d9da7e148538..cd9d6af61d07 100644 --- a/Documentation/virtual/lguest/lguest.c +++ b/Documentation/virtual/lguest/lguest.c @@ -49,7 +49,7 @@ #include #include #include -#include "../../include/linux/lguest_launcher.h" +#include "../../../include/linux/lguest_launcher.h" /*L:110 * We can ignore the 42 include files we need for this program, but I do want * to draw attention to the use of kernel-style types. @@ -135,9 +135,6 @@ struct device { /* Is it operational */ bool running; - /* Does Guest want an intrrupt on empty? */ - bool irq_on_empty; - /* Device-specific data. */ void *priv; }; @@ -637,10 +634,7 @@ static void trigger_irq(struct virtqueue *vq) /* If they don't want an interrupt, don't send one... */ if (vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT) { - /* ... unless they've asked us to force one on empty. */ - if (!vq->dev->irq_on_empty - || lg_last_avail(vq) != vq->vring.avail->idx) - return; + return; } /* Send the Guest an interrupt tell them we used something up. */ @@ -1057,15 +1051,6 @@ static void create_thread(struct virtqueue *vq) close(vq->eventfd); } -static bool accepted_feature(struct device *dev, unsigned int bit) -{ - const u8 *features = get_feature_bits(dev) + dev->feature_len; - - if (dev->feature_len < bit / CHAR_BIT) - return false; - return features[bit / CHAR_BIT] & (1 << (bit % CHAR_BIT)); -} - static void start_device(struct device *dev) { unsigned int i; @@ -1079,8 +1064,6 @@ static void start_device(struct device *dev) verbose(" %02x", get_feature_bits(dev) [dev->feature_len+i]); - dev->irq_on_empty = accepted_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); - for (vq = dev->vq; vq; vq = vq->next) { if (vq->service) create_thread(vq); @@ -1564,7 +1547,6 @@ static void setup_tun_net(char *arg) /* Set up the tun device. */ configure_device(ipfd, tapif, ip); - add_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY); /* Expect Guest to handle everything except UFO */ add_feature(dev, VIRTIO_NET_F_CSUM); add_feature(dev, VIRTIO_NET_F_GUEST_CSUM); diff --git a/arch/ia64/include/asm/unistd.h b/arch/ia64/include/asm/unistd.h index 1cf0f496f744..7c928da35b17 100644 --- a/arch/ia64/include/asm/unistd.h +++ b/arch/ia64/include/asm/unistd.h @@ -320,11 +320,12 @@ #define __NR_clock_adjtime 1328 #define __NR_syncfs 1329 #define __NR_setns 1330 +#define __NR_sendmmsg 1331 #ifdef __KERNEL__ -#define NR_syscalls 307 /* length of syscall table */ +#define NR_syscalls 308 /* length of syscall table */ /* * The following defines stop scripts/checksyscalls.sh from complaining about diff --git a/arch/ia64/kernel/entry.S b/arch/ia64/kernel/entry.S index 9ca80193cd4e..97dd2abdeb1a 100644 --- a/arch/ia64/kernel/entry.S +++ b/arch/ia64/kernel/entry.S @@ -1776,6 +1776,7 @@ sys_call_table: data8 sys_clock_adjtime data8 sys_syncfs data8 sys_setns // 1330 + data8 sys_sendmmsg .org sys_call_table + 8*NR_syscalls // guard against failures to increase NR_syscalls #endif /* __IA64_ASM_PARAVIRTUALIZED_NATIVE */ diff --git a/arch/powerpc/platforms/powermac/pic.c b/arch/powerpc/platforms/powermac/pic.c index 9089b0421191..7667db448aa7 100644 --- a/arch/powerpc/platforms/powermac/pic.c +++ b/arch/powerpc/platforms/powermac/pic.c @@ -715,7 +715,8 @@ static struct syscore_ops pmacpic_syscore_ops = { static int __init init_pmacpic_syscore(void) { - register_syscore_ops(&pmacpic_syscore_ops); + if (pmac_irq_hw[0]) + register_syscore_ops(&pmacpic_syscore_ops); return 0; } diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index f5abe3a245b8..90b06d4daee2 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -8,6 +8,7 @@ CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FUNCTION_TRACER # Do not profile debug and lowlevel utilities +CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg CFLAGS_REMOVE_paravirt-spinlocks.o = -pg CFLAGS_REMOVE_pvclock.o = -pg @@ -28,6 +29,7 @@ CFLAGS_paravirt.o := $(nostackp) GCOV_PROFILE_vsyscall_64.o := n GCOV_PROFILE_hpet.o := n GCOV_PROFILE_tsc.o := n +GCOV_PROFILE_vread_tsc_64.o := n GCOV_PROFILE_paravirt.o := n # vread_tsc_64 is hot and should be fully optimized: diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 426a5b66f7e4..2e4928d45a2d 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -642,7 +642,7 @@ static int __init idle_setup(char *str) boot_option_idle_override = IDLE_POLL; } else if (!strcmp(str, "mwait")) { boot_option_idle_override = IDLE_FORCE_MWAIT; - WARN_ONCE(1, "\idle=mwait\" will be removed in 2012\"\n"); + WARN_ONCE(1, "\"idle=mwait\" will be removed in 2012\n"); } else if (!strcmp(str, "halt")) { /* * When the boot option of idle=halt is added, halt is diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index eefd96765e79..33a0c11797de 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1332,7 +1332,7 @@ static inline void mwait_play_dead(void) void *mwait_ptr; struct cpuinfo_x86 *c = __this_cpu_ptr(&cpu_info); - if (!this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c)) + if (!(this_cpu_has(X86_FEATURE_MWAIT) && mwait_usable(c))) return; if (!this_cpu_has(X86_FEATURE_CLFLSH)) return; diff --git a/arch/x86/lguest/boot.c b/arch/x86/lguest/boot.c index e191c096ab90..db832fd65ecb 100644 --- a/arch/x86/lguest/boot.c +++ b/arch/x86/lguest/boot.c @@ -993,6 +993,7 @@ static void lguest_time_irq(unsigned int irq, struct irq_desc *desc) static void lguest_time_init(void) { /* Set up the timer interrupt (0) to go to our simple timer routine */ + lguest_setup_irq(0); irq_set_handler(0, lguest_time_irq); clocksource_register_hz(&lguest_clock, NSEC_PER_SEC); diff --git a/drivers/block/paride/pcd.c b/drivers/block/paride/pcd.c index a0aabd904a51..46b8136c31bb 100644 --- a/drivers/block/paride/pcd.c +++ b/drivers/block/paride/pcd.c @@ -321,7 +321,6 @@ static void pcd_init_units(void) strcpy(disk->disk_name, cd->name); /* umm... */ disk->fops = &pcd_bdops; disk->flags = GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - disk->events = DISK_EVENT_MEDIA_CHANGE; } } diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6ecf89cdf006..079c08808d8a 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -6,10 +6,13 @@ #include #include #include +#include +#include #define PART_BITS 4 static int major, index; +struct workqueue_struct *virtblk_wq; struct virtio_blk { @@ -26,6 +29,9 @@ struct virtio_blk mempool_t *pool; + /* Process context for config space updates */ + struct work_struct config_work; + /* What host tells us, plus 2 for header & tailer. */ unsigned int sg_elems; @@ -141,7 +147,7 @@ static bool do_req(struct request_queue *q, struct virtio_blk *vblk, num = blk_rq_map_sg(q, vbr->req, vblk->sg + out); if (vbr->req->cmd_type == REQ_TYPE_BLOCK_PC) { - sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, 96); + sg_set_buf(&vblk->sg[num + out + in++], vbr->req->sense, SCSI_SENSE_BUFFERSIZE); sg_set_buf(&vblk->sg[num + out + in++], &vbr->in_hdr, sizeof(vbr->in_hdr)); } @@ -291,6 +297,46 @@ static ssize_t virtblk_serial_show(struct device *dev, } DEVICE_ATTR(serial, S_IRUGO, virtblk_serial_show, NULL); +static void virtblk_config_changed_work(struct work_struct *work) +{ + struct virtio_blk *vblk = + container_of(work, struct virtio_blk, config_work); + struct virtio_device *vdev = vblk->vdev; + struct request_queue *q = vblk->disk->queue; + char cap_str_2[10], cap_str_10[10]; + u64 capacity, size; + + /* Host must always specify the capacity. */ + vdev->config->get(vdev, offsetof(struct virtio_blk_config, capacity), + &capacity, sizeof(capacity)); + + /* If capacity is too big, truncate with warning. */ + if ((sector_t)capacity != capacity) { + dev_warn(&vdev->dev, "Capacity %llu too large: truncating\n", + (unsigned long long)capacity); + capacity = (sector_t)-1; + } + + size = capacity * queue_logical_block_size(q); + string_get_size(size, STRING_UNITS_2, cap_str_2, sizeof(cap_str_2)); + string_get_size(size, STRING_UNITS_10, cap_str_10, sizeof(cap_str_10)); + + dev_notice(&vdev->dev, + "new size: %llu %d-byte logical blocks (%s/%s)\n", + (unsigned long long)capacity, + queue_logical_block_size(q), + cap_str_10, cap_str_2); + + set_capacity(vblk->disk, capacity); +} + +static void virtblk_config_changed(struct virtio_device *vdev) +{ + struct virtio_blk *vblk = vdev->priv; + + queue_work(virtblk_wq, &vblk->config_work); +} + static int __devinit virtblk_probe(struct virtio_device *vdev) { struct virtio_blk *vblk; @@ -327,6 +373,7 @@ static int __devinit virtblk_probe(struct virtio_device *vdev) vblk->vdev = vdev; vblk->sg_elems = sg_elems; sg_init_table(vblk->sg, vblk->sg_elems); + INIT_WORK(&vblk->config_work, virtblk_config_changed_work); /* We expect one virtqueue, for output. */ vblk->vq = virtio_find_single_vq(vdev, blk_done, "requests"); @@ -477,6 +524,8 @@ static void __devexit virtblk_remove(struct virtio_device *vdev) { struct virtio_blk *vblk = vdev->priv; + flush_work(&vblk->config_work); + /* Nothing should be pending. */ BUG_ON(!list_empty(&vblk->reqs)); @@ -508,27 +557,47 @@ static unsigned int features[] = { * Use __refdata to avoid this warning. */ static struct virtio_driver __refdata virtio_blk = { - .feature_table = features, - .feature_table_size = ARRAY_SIZE(features), - .driver.name = KBUILD_MODNAME, - .driver.owner = THIS_MODULE, - .id_table = id_table, - .probe = virtblk_probe, - .remove = __devexit_p(virtblk_remove), + .feature_table = features, + .feature_table_size = ARRAY_SIZE(features), + .driver.name = KBUILD_MODNAME, + .driver.owner = THIS_MODULE, + .id_table = id_table, + .probe = virtblk_probe, + .remove = __devexit_p(virtblk_remove), + .config_changed = virtblk_config_changed, }; static int __init init(void) { + int error; + + virtblk_wq = alloc_workqueue("virtio-blk", 0, 0); + if (!virtblk_wq) + return -ENOMEM; + major = register_blkdev(0, "virtblk"); - if (major < 0) - return major; - return register_virtio_driver(&virtio_blk); + if (major < 0) { + error = major; + goto out_destroy_workqueue; + } + + error = register_virtio_driver(&virtio_blk); + if (error) + goto out_unregister_blkdev; + return 0; + +out_unregister_blkdev: + unregister_blkdev(major, "virtblk"); +out_destroy_workqueue: + destroy_workqueue(virtblk_wq); + return error; } static void __exit fini(void) { unregister_blkdev(major, "virtblk"); unregister_virtio_driver(&virtio_blk); + destroy_workqueue(virtblk_wq); } module_init(init); module_exit(fini); diff --git a/drivers/cdrom/viocd.c b/drivers/cdrom/viocd.c index ae15a4ddaa9b..7878da89d29e 100644 --- a/drivers/cdrom/viocd.c +++ b/drivers/cdrom/viocd.c @@ -627,7 +627,6 @@ static int viocd_probe(struct vio_dev *vdev, const struct vio_device_id *id) gendisk->fops = &viocd_fops; gendisk->flags = GENHD_FL_CD | GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - gendisk->events = DISK_EVENT_MEDIA_CHANGE; set_capacity(gendisk, 0); gendisk->private_data = d; d->viocd_disk = gendisk; diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c index 838568a7dbf5..fb68b1295373 100644 --- a/drivers/char/virtio_console.c +++ b/drivers/char/virtio_console.c @@ -1677,17 +1677,12 @@ static int __devinit virtcons_probe(struct virtio_device *vdev) portdev->config.max_nr_ports = 1; if (virtio_has_feature(vdev, VIRTIO_CONSOLE_F_MULTIPORT)) { multiport = true; - vdev->features[0] |= 1 << VIRTIO_CONSOLE_F_MULTIPORT; - vdev->config->get(vdev, offsetof(struct virtio_console_config, max_nr_ports), &portdev->config.max_nr_ports, sizeof(portdev->config.max_nr_ports)); } - /* Let the Host know we support multiple ports.*/ - vdev->config->finalize_features(vdev); - err = init_vqs(portdev); if (err < 0) { dev_err(&vdev->dev, "Error %d initializing vqs\n", err); diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index 6e5123b1d341..144d27261e43 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -1782,7 +1782,6 @@ static int ide_cd_probe(ide_drive_t *drive) ide_cd_read_toc(drive, &sense); g->fops = &idecd_ops; g->flags |= GENHD_FL_REMOVABLE | GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE; - g->events = DISK_EVENT_MEDIA_CHANGE; add_disk(g); return 0; diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index b0c56313dbbb..8cebec5e85ee 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -304,7 +304,10 @@ static int check_and_rewind_pc(char *put_str, char *arg) return 1; } /* Readjust the instruction pointer if needed */ - instruction_pointer_set(&kgdbts_regs, ip + offset); + ip += offset; +#ifdef GDB_ADJUSTS_BREAK_OFFSET + instruction_pointer_set(&kgdbts_regs, ip); +#endif return 0; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0cb0b0632672..f6853247a620 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -609,7 +609,7 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev) * before it gets out of hand. Naturally, this wastes entries. */ if (capacity < 2+MAX_SKB_FRAGS) { netif_stop_queue(dev); - if (unlikely(!virtqueue_enable_cb(vi->svq))) { + if (unlikely(!virtqueue_enable_cb_delayed(vi->svq))) { /* More just got used, free them then recheck. */ capacity += free_old_xmit_skbs(vi); if (capacity >= 2+MAX_SKB_FRAGS) { diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 12e02bf92c4a..3dc9befa5aec 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -698,12 +698,7 @@ int __init detect_intel_iommu(void) { #ifdef CONFIG_INTR_REMAP struct acpi_table_dmar *dmar; - /* - * for now we will disable dma-remapping when interrupt - * remapping is enabled. - * When support for queued invalidation for IOTLB invalidation - * is added, we will not need this any more. - */ + dmar = (struct acpi_table_dmar *) dmar_tbl; if (ret && cpu_has_x2apic && dmar->flags & 0x1) printk(KERN_INFO diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 6af6b628175b..59f17acf7f68 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -47,6 +47,8 @@ #define ROOT_SIZE VTD_PAGE_SIZE #define CONTEXT_SIZE VTD_PAGE_SIZE +#define IS_BRIDGE_HOST_DEVICE(pdev) \ + ((pdev->class >> 8) == PCI_CLASS_BRIDGE_HOST) #define IS_GFX_DEVICE(pdev) ((pdev->class >> 16) == PCI_BASE_CLASS_DISPLAY) #define IS_ISA_DEVICE(pdev) ((pdev->class >> 8) == PCI_CLASS_BRIDGE_ISA) #define IS_AZALIA(pdev) ((pdev)->vendor == 0x8086 && (pdev)->device == 0x3a3e) @@ -116,6 +118,11 @@ static inline unsigned long align_to_level(unsigned long pfn, int level) return (pfn + level_size(level) - 1) & level_mask(level); } +static inline unsigned long lvl_to_nr_pages(unsigned int lvl) +{ + return 1 << ((lvl - 1) * LEVEL_STRIDE); +} + /* VT-d pages must always be _smaller_ than MM pages. Otherwise things are never going to work. */ static inline unsigned long dma_to_mm_pfn(unsigned long dma_pfn) @@ -142,6 +149,12 @@ static struct intel_iommu **g_iommus; static void __init check_tylersburg_isoch(void); static int rwbf_quirk; +/* + * set to 1 to panic kernel if can't successfully enable VT-d + * (used when kernel is launched w/ TXT) + */ +static int force_on = 0; + /* * 0: Present * 1-11: Reserved @@ -338,6 +351,9 @@ struct dmar_domain { int iommu_coherency;/* indicate coherency of iommu access */ int iommu_snooping; /* indicate snooping control feature*/ int iommu_count; /* reference count of iommu */ + int iommu_superpage;/* Level of superpages supported: + 0 == 4KiB (no superpages), 1 == 2MiB, + 2 == 1GiB, 3 == 512GiB, 4 == 1TiB */ spinlock_t iommu_lock; /* protect iommu set in domain */ u64 max_addr; /* maximum mapped address */ }; @@ -387,6 +403,7 @@ int dmar_disabled = 1; static int dmar_map_gfx = 1; static int dmar_forcedac; static int intel_iommu_strict; +static int intel_iommu_superpage = 1; #define DUMMY_DEVICE_DOMAIN_INFO ((struct device_domain_info *)(-1)) static DEFINE_SPINLOCK(device_domain_lock); @@ -417,6 +434,10 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable batched IOTLB flush\n"); intel_iommu_strict = 1; + } else if (!strncmp(str, "sp_off", 6)) { + printk(KERN_INFO + "Intel-IOMMU: disable supported super page\n"); + intel_iommu_superpage = 0; } str += strcspn(str, ","); @@ -555,11 +576,32 @@ static void domain_update_iommu_snooping(struct dmar_domain *domain) } } +static void domain_update_iommu_superpage(struct dmar_domain *domain) +{ + int i, mask = 0xf; + + if (!intel_iommu_superpage) { + domain->iommu_superpage = 0; + return; + } + + domain->iommu_superpage = 4; /* 1TiB */ + + for_each_set_bit(i, &domain->iommu_bmp, g_num_of_iommus) { + mask |= cap_super_page_val(g_iommus[i]->cap); + if (!mask) { + break; + } + } + domain->iommu_superpage = fls(mask); +} + /* Some capabilities may be different across iommus */ static void domain_update_iommu_cap(struct dmar_domain *domain) { domain_update_iommu_coherency(domain); domain_update_iommu_snooping(domain); + domain_update_iommu_superpage(domain); } static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) @@ -689,23 +731,31 @@ out: } static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn) + unsigned long pfn, int large_level) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; int level = agaw_to_level(domain->agaw); - int offset; + int offset, target_level; BUG_ON(!domain->pgd); BUG_ON(addr_width < BITS_PER_LONG && pfn >> addr_width); parent = domain->pgd; + /* Search pte */ + if (!large_level) + target_level = 1; + else + target_level = large_level; + while (level > 0) { void *tmp_page; offset = pfn_level_offset(pfn, level); pte = &parent[offset]; - if (level == 1) + if (!large_level && (pte->val & DMA_PTE_LARGE_PAGE)) + break; + if (level == target_level) break; if (!dma_pte_present(pte)) { @@ -733,10 +783,11 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, return pte; } + /* return address's pte at specific level */ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, - int level) + int level, int *large_page) { struct dma_pte *parent, *pte = NULL; int total = agaw_to_level(domain->agaw); @@ -749,8 +800,16 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, if (level == total) return pte; - if (!dma_pte_present(pte)) + if (!dma_pte_present(pte)) { + *large_page = total; break; + } + + if (pte->val & DMA_PTE_LARGE_PAGE) { + *large_page = total; + return pte; + } + parent = phys_to_virt(dma_pte_addr(pte)); total--; } @@ -763,6 +822,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long last_pfn) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + unsigned int large_page = 1; struct dma_pte *first_pte, *pte; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); @@ -771,14 +831,15 @@ static void dma_pte_clear_range(struct dmar_domain *domain, /* we don't need lock here; nobody else touches the iova range */ do { - first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1); + large_page = 1; + first_pte = pte = dma_pfn_level_pte(domain, start_pfn, 1, &large_page); if (!pte) { - start_pfn = align_to_level(start_pfn + 1, 2); + start_pfn = align_to_level(start_pfn + 1, large_page + 1); continue; } - do { + do { dma_clear_pte(pte); - start_pfn++; + start_pfn += lvl_to_nr_pages(large_page); pte++; } while (start_pfn <= last_pfn && !first_pte_in_page(pte)); @@ -798,6 +859,7 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, int total = agaw_to_level(domain->agaw); int level; unsigned long tmp; + int large_page = 2; BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); @@ -813,7 +875,10 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, return; do { - first_pte = pte = dma_pfn_level_pte(domain, tmp, level); + large_page = level; + first_pte = pte = dma_pfn_level_pte(domain, tmp, level, &large_page); + if (large_page > level) + level = large_page + 1; if (!pte) { tmp = align_to_level(tmp + 1, level + 1); continue; @@ -1397,6 +1462,7 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_snooping = 0; + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); domain->iommu_count = 1; domain->nid = iommu->node; @@ -1417,6 +1483,10 @@ static void domain_exit(struct dmar_domain *domain) if (!domain) return; + /* Flush any lazy unmaps that may reference this domain */ + if (!intel_iommu_strict) + flush_unmaps_timeout(0); + domain_remove_dev_info(domain); /* destroy iovas */ put_iova_domain(&domain->iovad); @@ -1648,6 +1718,34 @@ static inline unsigned long aligned_nrpages(unsigned long host_addr, return PAGE_ALIGN(host_addr + size) >> VTD_PAGE_SHIFT; } +/* Return largest possible superpage level for a given mapping */ +static inline int hardware_largepage_caps(struct dmar_domain *domain, + unsigned long iov_pfn, + unsigned long phy_pfn, + unsigned long pages) +{ + int support, level = 1; + unsigned long pfnmerge; + + support = domain->iommu_superpage; + + /* To use a large page, the virtual *and* physical addresses + must be aligned to 2MiB/1GiB/etc. Lower bits set in either + of them will mean we have to use smaller pages. So just + merge them and check both at once. */ + pfnmerge = iov_pfn | phy_pfn; + + while (support && !(pfnmerge & ~VTD_STRIDE_MASK)) { + pages >>= VTD_STRIDE_SHIFT; + if (!pages) + break; + pfnmerge >>= VTD_STRIDE_SHIFT; + level++; + support--; + } + return level; +} + static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long phys_pfn, unsigned long nr_pages, int prot) @@ -1656,6 +1754,8 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, phys_addr_t uninitialized_var(pteval); int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; unsigned long sg_res; + unsigned int largepage_lvl = 0; + unsigned long lvl_pages = 0; BUG_ON(addr_width < BITS_PER_LONG && (iov_pfn + nr_pages - 1) >> addr_width); @@ -1671,7 +1771,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, pteval = ((phys_addr_t)phys_pfn << VTD_PAGE_SHIFT) | prot; } - while (nr_pages--) { + while (nr_pages > 0) { uint64_t tmp; if (!sg_res) { @@ -1679,11 +1779,21 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, sg->dma_address = ((dma_addr_t)iov_pfn << VTD_PAGE_SHIFT) + sg->offset; sg->dma_length = sg->length; pteval = page_to_phys(sg_page(sg)) | prot; + phys_pfn = pteval >> VTD_PAGE_SHIFT; } + if (!pte) { - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn); + largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); + + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); if (!pte) return -ENOMEM; + /* It is large page*/ + if (largepage_lvl > 1) + pteval |= DMA_PTE_LARGE_PAGE; + else + pteval &= ~(uint64_t)DMA_PTE_LARGE_PAGE; + } /* We don't need lock here, nobody else * touches the iova range @@ -1699,16 +1809,38 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, } WARN_ON(1); } + + lvl_pages = lvl_to_nr_pages(largepage_lvl); + + BUG_ON(nr_pages < lvl_pages); + BUG_ON(sg_res < lvl_pages); + + nr_pages -= lvl_pages; + iov_pfn += lvl_pages; + phys_pfn += lvl_pages; + pteval += lvl_pages * VTD_PAGE_SIZE; + sg_res -= lvl_pages; + + /* If the next PTE would be the first in a new page, then we + need to flush the cache on the entries we've just written. + And then we'll need to recalculate 'pte', so clear it and + let it get set again in the if (!pte) block above. + + If we're done (!nr_pages) we need to flush the cache too. + + Also if we've been setting superpages, we may need to + recalculate 'pte' and switch back to smaller pages for the + end of the mapping, if the trailing size is not enough to + use another superpage (i.e. sg_res < lvl_pages). */ pte++; - if (!nr_pages || first_pte_in_page(pte)) { + if (!nr_pages || first_pte_in_page(pte) || + (largepage_lvl > 1 && sg_res < lvl_pages)) { domain_flush_cache(domain, first_pte, (void *)pte - (void *)first_pte); pte = NULL; } - iov_pfn++; - pteval += VTD_PAGE_SIZE; - sg_res--; - if (!sg_res) + + if (!sg_res && nr_pages) sg = sg_next(sg); } return 0; @@ -2016,7 +2148,7 @@ static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) return 0; return iommu_prepare_identity_map(pdev, rmrr->base_address, - rmrr->end_address + 1); + rmrr->end_address); } #ifdef CONFIG_DMAR_FLOPPY_WA @@ -2030,7 +2162,7 @@ static inline void iommu_prepare_isa(void) return; printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024); + ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); if (ret) printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " @@ -2106,10 +2238,10 @@ static int identity_mapping(struct pci_dev *pdev) if (likely(!iommu_identity_mapping)) return 0; + info = pdev->dev.archdata.iommu; + if (info && info != DUMMY_DEVICE_DOMAIN_INFO) + return (info->domain == si_domain); - list_for_each_entry(info, &si_domain->devices, link) - if (info->dev == pdev) - return 1; return 0; } @@ -2187,8 +2319,19 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) * Assume that they will -- if they turn out not to be, then we can * take them out of the 1:1 domain later. */ - if (!startup) - return pdev->dma_mask > DMA_BIT_MASK(32); + if (!startup) { + /* + * If the device's dma_mask is less than the system's memory + * size then this is not a candidate for identity mapping. + */ + u64 dma_mask = pdev->dma_mask; + + if (pdev->dev.coherent_dma_mask && + pdev->dev.coherent_dma_mask < dma_mask) + dma_mask = pdev->dev.coherent_dma_mask; + + return dma_mask >= dma_get_required_mask(&pdev->dev); + } return 1; } @@ -2203,6 +2346,9 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return -EFAULT; for_each_pci_dev(pdev) { + /* Skip Host/PCI Bridge devices */ + if (IS_BRIDGE_HOST_DEVICE(pdev)) + continue; if (iommu_should_identity_map(pdev, 1)) { printk(KERN_INFO "IOMMU: %s identity mapping for device %s\n", hw ? "hardware" : "software", pci_name(pdev)); @@ -2218,7 +2364,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return 0; } -static int __init init_dmars(int force_on) +static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; @@ -2592,8 +2738,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), - pdev->dma_mask); + iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); if (!iova) goto error; @@ -3118,7 +3263,17 @@ static int init_iommu_hw(void) if (iommu->qi) dmar_reenable_qi(iommu); - for_each_active_iommu(iommu, drhd) { + for_each_iommu(iommu, drhd) { + if (drhd->ignored) { + /* + * we always have to disable PMRs or DMA may fail on + * this device + */ + if (force_on) + iommu_disable_protect_mem_regions(iommu); + continue; + } + iommu_flush_write_buffer(iommu); iommu_set_root_entry(iommu); @@ -3127,7 +3282,8 @@ static int init_iommu_hw(void) DMA_CCMD_GLOBAL_INVL); iommu->flush.flush_iotlb(iommu, 0, 0, 0, DMA_TLB_GLOBAL_FLUSH); - iommu_enable_translation(iommu); + if (iommu_enable_translation(iommu)) + return 1; iommu_disable_protect_mem_regions(iommu); } @@ -3194,7 +3350,10 @@ static void iommu_resume(void) unsigned long flag; if (init_iommu_hw()) { - WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); + if (force_on) + panic("tboot: IOMMU setup failed, DMAR can not resume!\n"); + else + WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); return; } @@ -3271,7 +3430,6 @@ static struct notifier_block device_nb = { int __init intel_iommu_init(void) { int ret = 0; - int force_on = 0; /* VT-d is required for a TXT/tboot launch, so enforce that */ force_on = tboot_force_iommu(); @@ -3309,7 +3467,7 @@ int __init intel_iommu_init(void) init_no_remapping_devices(); - ret = init_dmars(force_on); + ret = init_dmars(); if (ret) { if (force_on) panic("tboot: Failed to initialize DMARs\n"); @@ -3380,8 +3538,8 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_safe(entry, tmp, &domain->devices) { info = list_entry(entry, struct device_domain_info, link); - /* No need to compare PCI domain; it has to be the same */ - if (info->bus == pdev->bus->number && + if (info->segment == pci_domain_nr(pdev->bus) && + info->bus == pdev->bus->number && info->devfn == pdev->devfn) { list_del(&info->link); list_del(&info->global); @@ -3419,10 +3577,13 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, domain_update_iommu_cap(domain); spin_unlock_irqrestore(&domain->iommu_lock, tmp_flags); - spin_lock_irqsave(&iommu->lock, tmp_flags); - clear_bit(domain->id, iommu->domain_ids); - iommu->domains[domain->id] = NULL; - spin_unlock_irqrestore(&iommu->lock, tmp_flags); + if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY)) { + spin_lock_irqsave(&iommu->lock, tmp_flags); + clear_bit(domain->id, iommu->domain_ids); + iommu->domains[domain->id] = NULL; + spin_unlock_irqrestore(&iommu->lock, tmp_flags); + } } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -3505,6 +3666,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_count = 0; domain->iommu_coherency = 0; domain->iommu_snooping = 0; + domain->iommu_superpage = 0; domain->max_addr = 0; domain->nid = -1; @@ -3720,7 +3882,7 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, struct dma_pte *pte; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); if (pte) phys = dma_pte_addr(pte); diff --git a/drivers/pci/iova.c b/drivers/pci/iova.c index 9606e599a475..c5c274ab5c5a 100644 --- a/drivers/pci/iova.c +++ b/drivers/pci/iova.c @@ -63,8 +63,16 @@ __cached_rbnode_delete_update(struct iova_domain *iovad, struct iova *free) curr = iovad->cached32_node; cached_iova = container_of(curr, struct iova, node); - if (free->pfn_lo >= cached_iova->pfn_lo) - iovad->cached32_node = rb_next(&free->node); + if (free->pfn_lo >= cached_iova->pfn_lo) { + struct rb_node *node = rb_next(&free->node); + struct iova *iova = container_of(node, struct iova, node); + + /* only cache if it's below 32bit pfn */ + if (node && iova->pfn_lo < iovad->dma_32bit_pfn) + iovad->cached32_node = node; + else + iovad->cached32_node = NULL; + } } /* Computes the padding size required, to make the diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2f7c76a85e53..e224a92baa16 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -144,7 +144,7 @@ static void handle_tx(struct vhost_net *net) } mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); if (wmem < sock->sk->sk_sndbuf / 2) tx_poll_stop(net); @@ -166,8 +166,8 @@ static void handle_tx(struct vhost_net *net) set_bit(SOCK_ASYNC_NOSPACE, &sock->flags); break; } - if (unlikely(vhost_enable_notify(vq))) { - vhost_disable_notify(vq); + if (unlikely(vhost_enable_notify(&net->dev, vq))) { + vhost_disable_notify(&net->dev, vq); continue; } break; @@ -315,7 +315,7 @@ static void handle_rx(struct vhost_net *net) return; mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); vhost_hlen = vq->vhost_hlen; sock_hlen = vq->sock_hlen; @@ -334,10 +334,10 @@ static void handle_rx(struct vhost_net *net) break; /* OK, now we need to know about added descriptors. */ if (!headcount) { - if (unlikely(vhost_enable_notify(vq))) { + if (unlikely(vhost_enable_notify(&net->dev, vq))) { /* They have slipped one in as we were * doing that: check again. */ - vhost_disable_notify(vq); + vhost_disable_notify(&net->dev, vq); continue; } /* Nothing new? Wait for eventfd to tell us diff --git a/drivers/vhost/test.c b/drivers/vhost/test.c index 099f30230d06..734e1d74ad80 100644 --- a/drivers/vhost/test.c +++ b/drivers/vhost/test.c @@ -49,7 +49,7 @@ static void handle_vq(struct vhost_test *n) return; mutex_lock(&vq->mutex); - vhost_disable_notify(vq); + vhost_disable_notify(&n->dev, vq); for (;;) { head = vhost_get_vq_desc(&n->dev, vq, vq->iov, @@ -61,8 +61,8 @@ static void handle_vq(struct vhost_test *n) break; /* Nothing new? Wait for eventfd to tell us they refilled. */ if (head == vq->num) { - if (unlikely(vhost_enable_notify(vq))) { - vhost_disable_notify(vq); + if (unlikely(vhost_enable_notify(&n->dev, vq))) { + vhost_disable_notify(&n->dev, vq); continue; } break; diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 7aa4eea930f1..ea966b356352 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -37,6 +37,9 @@ enum { VHOST_MEMORY_F_LOG = 0x1, }; +#define vhost_used_event(vq) ((u16 __user *)&vq->avail->ring[vq->num]) +#define vhost_avail_event(vq) ((u16 __user *)&vq->used->ring[vq->num]) + static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh, poll_table *pt) { @@ -161,6 +164,8 @@ static void vhost_vq_reset(struct vhost_dev *dev, vq->last_avail_idx = 0; vq->avail_idx = 0; vq->last_used_idx = 0; + vq->signalled_used = 0; + vq->signalled_used_valid = false; vq->used_flags = 0; vq->log_used = false; vq->log_addr = -1ull; @@ -489,16 +494,17 @@ static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem, return 1; } -static int vq_access_ok(unsigned int num, +static int vq_access_ok(struct vhost_dev *d, unsigned int num, struct vring_desc __user *desc, struct vring_avail __user *avail, struct vring_used __user *used) { + size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; return access_ok(VERIFY_READ, desc, num * sizeof *desc) && access_ok(VERIFY_READ, avail, - sizeof *avail + num * sizeof *avail->ring) && + sizeof *avail + num * sizeof *avail->ring + s) && access_ok(VERIFY_WRITE, used, - sizeof *used + num * sizeof *used->ring); + sizeof *used + num * sizeof *used->ring + s); } /* Can we log writes? */ @@ -514,9 +520,11 @@ int vhost_log_access_ok(struct vhost_dev *dev) /* Verify access for write logging. */ /* Caller should have vq mutex and device mutex */ -static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) +static int vq_log_access_ok(struct vhost_dev *d, struct vhost_virtqueue *vq, + void __user *log_base) { struct vhost_memory *mp; + size_t s = vhost_has_feature(d, VIRTIO_RING_F_EVENT_IDX) ? 2 : 0; mp = rcu_dereference_protected(vq->dev->memory, lockdep_is_held(&vq->mutex)); @@ -524,15 +532,15 @@ static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base) vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) && (!vq->log_used || log_access_ok(log_base, vq->log_addr, sizeof *vq->used + - vq->num * sizeof *vq->used->ring)); + vq->num * sizeof *vq->used->ring + s)); } /* Can we start vq? */ /* Caller should have vq mutex and device mutex */ int vhost_vq_access_ok(struct vhost_virtqueue *vq) { - return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) && - vq_log_access_ok(vq, vq->log_base); + return vq_access_ok(vq->dev, vq->num, vq->desc, vq->avail, vq->used) && + vq_log_access_ok(vq->dev, vq, vq->log_base); } static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m) @@ -577,6 +585,7 @@ static int init_used(struct vhost_virtqueue *vq, if (r) return r; + vq->signalled_used_valid = false; return get_user(vq->last_used_idx, &used->idx); } @@ -674,7 +683,7 @@ static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp) * If it is not, we don't as size might not have been setup. * We will verify when backend is configured. */ if (vq->private_data) { - if (!vq_access_ok(vq->num, + if (!vq_access_ok(d, vq->num, (void __user *)(unsigned long)a.desc_user_addr, (void __user *)(unsigned long)a.avail_user_addr, (void __user *)(unsigned long)a.used_user_addr)) { @@ -818,7 +827,7 @@ long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg) vq = d->vqs + i; mutex_lock(&vq->mutex); /* If ring is inactive, will check when it's enabled. */ - if (vq->private_data && !vq_log_access_ok(vq, base)) + if (vq->private_data && !vq_log_access_ok(d, vq, base)) r = -EFAULT; else vq->log_base = base; @@ -1219,6 +1228,10 @@ int vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq, /* On success, increment avail index. */ vq->last_avail_idx++; + + /* Assume notifications from guest are disabled at this point, + * if they aren't we would need to update avail_event index. */ + BUG_ON(!(vq->used_flags & VRING_USED_F_NO_NOTIFY)); return head; } @@ -1267,6 +1280,12 @@ int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len) eventfd_signal(vq->log_ctx, 1); } vq->last_used_idx++; + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely(vq->last_used_idx == vq->signalled_used)) + vq->signalled_used_valid = false; return 0; } @@ -1275,6 +1294,7 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, unsigned count) { struct vring_used_elem __user *used; + u16 old, new; int start; start = vq->last_used_idx % vq->num; @@ -1292,7 +1312,14 @@ static int __vhost_add_used_n(struct vhost_virtqueue *vq, ((void __user *)used - (void __user *)vq->used), count * sizeof *used); } - vq->last_used_idx += count; + old = vq->last_used_idx; + new = (vq->last_used_idx += count); + /* If the driver never bothers to signal in a very long while, + * used index might wrap around. If that happens, invalidate + * signalled_used index we stored. TODO: make sure driver + * signals at least once in 2^16 and remove this. */ + if (unlikely((u16)(new - vq->signalled_used) < (u16)(new - old))) + vq->signalled_used_valid = false; return 0; } @@ -1331,29 +1358,47 @@ int vhost_add_used_n(struct vhost_virtqueue *vq, struct vring_used_elem *heads, return r; } -/* This actually signals the guest, using eventfd. */ -void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +static bool vhost_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { - __u16 flags; - + __u16 old, new, event; + bool v; /* Flush out used index updates. This is paired * with the barrier that the Guest executes when enabling * interrupts. */ smp_mb(); - if (__get_user(flags, &vq->avail->flags)) { - vq_err(vq, "Failed to get flags"); - return; + if (vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) && + unlikely(vq->avail_idx == vq->last_avail_idx)) + return true; + + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + __u16 flags; + if (__get_user(flags, &vq->avail->flags)) { + vq_err(vq, "Failed to get flags"); + return true; + } + return !(flags & VRING_AVAIL_F_NO_INTERRUPT); } + old = vq->signalled_used; + v = vq->signalled_used_valid; + new = vq->signalled_used = vq->last_used_idx; + vq->signalled_used_valid = true; - /* If they don't want an interrupt, don't signal, unless empty. */ - if ((flags & VRING_AVAIL_F_NO_INTERRUPT) && - (vq->avail_idx != vq->last_avail_idx || - !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY))) - return; + if (unlikely(!v)) + return true; + if (get_user(event, vhost_used_event(vq))) { + vq_err(vq, "Failed to get used event idx"); + return true; + } + return vring_need_event(event, new, old); +} + +/* This actually signals the guest, using eventfd. */ +void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq) +{ /* Signal the Guest tell them we used something up. */ - if (vq->call_ctx) + if (vq->call_ctx && vhost_notify(dev, vq)) eventfd_signal(vq->call_ctx, 1); } @@ -1376,7 +1421,7 @@ void vhost_add_used_and_signal_n(struct vhost_dev *dev, } /* OK, now we need to know about added descriptors. */ -bool vhost_enable_notify(struct vhost_virtqueue *vq) +bool vhost_enable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { u16 avail_idx; int r; @@ -1384,11 +1429,34 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY)) return false; vq->used_flags &= ~VRING_USED_F_NO_NOTIFY; - r = put_user(vq->used_flags, &vq->used->flags); - if (r) { - vq_err(vq, "Failed to enable notification at %p: %d\n", - &vq->used->flags, r); - return false; + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + r = put_user(vq->used_flags, &vq->used->flags); + if (r) { + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + return false; + } + } else { + r = put_user(vq->avail_idx, vhost_avail_event(vq)); + if (r) { + vq_err(vq, "Failed to update avail event index at %p: %d\n", + vhost_avail_event(vq), r); + return false; + } + } + if (unlikely(vq->log_used)) { + void __user *used; + /* Make sure data is seen before log. */ + smp_wmb(); + used = vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX) ? + &vq->used->flags : vhost_avail_event(vq); + /* Log used flags or event index entry write. Both are 16 bit + * fields. */ + log_write(vq->log_base, vq->log_addr + + (used - (void __user *)vq->used), + sizeof(u16)); + if (vq->log_ctx) + eventfd_signal(vq->log_ctx, 1); } /* They could have slipped one in as we were doing that: make * sure it's written, then check again. */ @@ -1404,15 +1472,17 @@ bool vhost_enable_notify(struct vhost_virtqueue *vq) } /* We don't need to be notified again. */ -void vhost_disable_notify(struct vhost_virtqueue *vq) +void vhost_disable_notify(struct vhost_dev *dev, struct vhost_virtqueue *vq) { int r; if (vq->used_flags & VRING_USED_F_NO_NOTIFY) return; vq->used_flags |= VRING_USED_F_NO_NOTIFY; - r = put_user(vq->used_flags, &vq->used->flags); - if (r) - vq_err(vq, "Failed to enable notification at %p: %d\n", - &vq->used->flags, r); + if (!vhost_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) { + r = put_user(vq->used_flags, &vq->used->flags); + if (r) + vq_err(vq, "Failed to enable notification at %p: %d\n", + &vq->used->flags, r); + } } diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h index b3363ae38518..8e03379dd30f 100644 --- a/drivers/vhost/vhost.h +++ b/drivers/vhost/vhost.h @@ -84,6 +84,12 @@ struct vhost_virtqueue { /* Used flags */ u16 used_flags; + /* Last used index value we have signalled on */ + u16 signalled_used; + + /* Last used index value we have signalled on */ + bool signalled_used_valid; + /* Log writes to used structure. */ bool log_used; u64 log_addr; @@ -149,8 +155,8 @@ void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *, void vhost_add_used_and_signal_n(struct vhost_dev *, struct vhost_virtqueue *, struct vring_used_elem *heads, unsigned count); void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *); -void vhost_disable_notify(struct vhost_virtqueue *); -bool vhost_enable_notify(struct vhost_virtqueue *); +void vhost_disable_notify(struct vhost_dev *, struct vhost_virtqueue *); +bool vhost_enable_notify(struct vhost_dev *, struct vhost_virtqueue *); int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, unsigned int log_num, u64 len); @@ -162,11 +168,12 @@ int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log, } while (0) enum { - VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) | - (1 << VIRTIO_RING_F_INDIRECT_DESC) | - (1 << VHOST_F_LOG_ALL) | - (1 << VHOST_NET_F_VIRTIO_NET_HDR) | - (1 << VIRTIO_NET_F_MRG_RXBUF), + VHOST_FEATURES = (1ULL << VIRTIO_F_NOTIFY_ON_EMPTY) | + (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX) | + (1ULL << VHOST_F_LOG_ALL) | + (1ULL << VHOST_NET_F_VIRTIO_NET_HDR) | + (1ULL << VIRTIO_NET_F_MRG_RXBUF), }; static inline int vhost_has_feature(struct vhost_dev *dev, int bit) diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c index 0f1da45ba47d..e058ace2a4ad 100644 --- a/drivers/virtio/virtio_balloon.c +++ b/drivers/virtio/virtio_balloon.c @@ -40,9 +40,6 @@ struct virtio_balloon /* Waiting for host to ack the pages we released. */ struct completion acked; - /* Do we have to tell Host *before* we reuse pages? */ - bool tell_host_first; - /* The pages we've told the Host we're not using. */ unsigned int num_pages; struct list_head pages; @@ -151,13 +148,14 @@ static void leak_balloon(struct virtio_balloon *vb, size_t num) vb->num_pages--; } - if (vb->tell_host_first) { - tell_host(vb, vb->deflate_vq); - release_pages_by_pfn(vb->pfns, vb->num_pfns); - } else { - release_pages_by_pfn(vb->pfns, vb->num_pfns); - tell_host(vb, vb->deflate_vq); - } + + /* + * Note that if + * virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); + * is true, we *have* to do it in this order + */ + tell_host(vb, vb->deflate_vq); + release_pages_by_pfn(vb->pfns, vb->num_pfns); } static inline void update_stat(struct virtio_balloon *vb, int idx, @@ -325,9 +323,6 @@ static int virtballoon_probe(struct virtio_device *vdev) goto out_del_vqs; } - vb->tell_host_first - = virtio_has_feature(vdev, VIRTIO_BALLOON_F_MUST_TELL_HOST); - return 0; out_del_vqs: diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c index b0043fb26a4d..68b9136847af 100644 --- a/drivers/virtio/virtio_ring.c +++ b/drivers/virtio/virtio_ring.c @@ -82,6 +82,9 @@ struct vring_virtqueue /* Host supports indirect buffers */ bool indirect; + /* Host publishes avail event idx */ + bool event; + /* Number of free buffers */ unsigned int num_free; /* Head of free buffer list. */ @@ -237,18 +240,22 @@ EXPORT_SYMBOL_GPL(virtqueue_add_buf_gfp); void virtqueue_kick(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); + u16 new, old; START_USE(vq); /* Descriptors and available array need to be set before we expose the * new available array entries. */ virtio_wmb(); - vq->vring.avail->idx += vq->num_added; + old = vq->vring.avail->idx; + new = vq->vring.avail->idx = old + vq->num_added; vq->num_added = 0; /* Need to update avail index before checking if we should notify */ virtio_mb(); - if (!(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) + if (vq->event ? + vring_need_event(vring_avail_event(&vq->vring), new, old) : + !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY)) /* Prod other side to tell it about changes. */ vq->notify(&vq->vq); @@ -324,6 +331,14 @@ void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len) ret = vq->data[i]; detach_buf(vq, i); vq->last_used_idx++; + /* If we expect an interrupt for the next entry, tell host + * by writing event index and flush out the write before + * the read in the next get_buf call. */ + if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) { + vring_used_event(&vq->vring) = vq->last_used_idx; + virtio_mb(); + } + END_USE(vq); return ret; } @@ -345,7 +360,11 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) /* We optimistically turn back on interrupts, then check if there was * more to do. */ + /* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + vring_used_event(&vq->vring) = vq->last_used_idx; virtio_mb(); if (unlikely(more_used(vq))) { END_USE(vq); @@ -357,6 +376,33 @@ bool virtqueue_enable_cb(struct virtqueue *_vq) } EXPORT_SYMBOL_GPL(virtqueue_enable_cb); +bool virtqueue_enable_cb_delayed(struct virtqueue *_vq) +{ + struct vring_virtqueue *vq = to_vvq(_vq); + u16 bufs; + + START_USE(vq); + + /* We optimistically turn back on interrupts, then check if there was + * more to do. */ + /* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to + * either clear the flags bit or point the event index at the next + * entry. Always do both to keep code simple. */ + vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT; + /* TODO: tune this threshold */ + bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4; + vring_used_event(&vq->vring) = vq->last_used_idx + bufs; + virtio_mb(); + if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) { + END_USE(vq); + return false; + } + + END_USE(vq); + return true; +} +EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed); + void *virtqueue_detach_unused_buf(struct virtqueue *_vq) { struct vring_virtqueue *vq = to_vvq(_vq); @@ -438,6 +484,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int num, #endif vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC); + vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX); /* No callback? Tell other side not to bother us. */ if (!callback) @@ -472,6 +519,8 @@ void vring_transport_features(struct virtio_device *vdev) switch (i) { case VIRTIO_RING_F_INDIRECT_DESC: break; + case VIRTIO_RING_F_EVENT_IDX: + break; default: /* We don't understand this bit. */ clear_bit(i, vdev->features); diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 87d95a8cddbc..f55ae23b137e 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -583,8 +583,6 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry) if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN)) return -EACCES; - dentry_unhash(dentry); - if (atomic_dec_and_test(&ino->count)) { p_ino = autofs4_dentry_ino(dentry->d_parent); if (p_ino && dentry->d_parent != dentry) diff --git a/fs/namei.c b/fs/namei.c index 1ab641f2e78e..e2e4e8d032ee 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2579,6 +2579,7 @@ int vfs_rmdir(struct inode *dir, struct dentry *dentry) if (error) goto out; + shrink_dcache_parent(dentry); error = dir->i_op->rmdir(dir, dentry); if (error) goto out; @@ -2993,6 +2994,8 @@ static int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry, if (d_mountpoint(old_dentry) || d_mountpoint(new_dentry)) goto out; + if (target) + shrink_dcache_parent(new_dentry); error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry); if (error) goto out; diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 5619f8522738..bbd8661b3473 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -9,8 +9,12 @@ #define VTD_PAGE_MASK (((u64)-1) << VTD_PAGE_SHIFT) #define VTD_PAGE_ALIGN(addr) (((addr) + VTD_PAGE_SIZE - 1) & VTD_PAGE_MASK) +#define VTD_STRIDE_SHIFT (9) +#define VTD_STRIDE_MASK (((u64)-1) << VTD_STRIDE_SHIFT) + #define DMA_PTE_READ (1) #define DMA_PTE_WRITE (2) +#define DMA_PTE_LARGE_PAGE (1 << 7) #define DMA_PTE_SNP (1 << 11) #define CONTEXT_TT_MULTI_LEVEL 0 diff --git a/include/linux/mtd/physmap.h b/include/linux/mtd/physmap.h index d40bfa1d9c91..e5f21d293c70 100644 --- a/include/linux/mtd/physmap.h +++ b/include/linux/mtd/physmap.h @@ -19,6 +19,7 @@ #include struct map_info; +struct platform_device; struct physmap_flash_data { unsigned int width; diff --git a/include/linux/virtio.h b/include/linux/virtio.h index aff5b4f74041..710885749605 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -51,6 +51,13 @@ struct virtqueue { * This re-enables callbacks; it returns "false" if there are pending * buffers in the queue, to detect a possible race between the driver * checking for more work, and enabling callbacks. + * virtqueue_enable_cb_delayed: restart callbacks after disable_cb. + * vq: the struct virtqueue we're talking about. + * This re-enables callbacks but hints to the other side to delay + * interrupts until most of the available buffers have been processed; + * it returns "false" if there are many pending buffers in the queue, + * to detect a possible race between the driver checking for more work, + * and enabling callbacks. * virtqueue_detach_unused_buf: detach first unused buffer * vq: the struct virtqueue we're talking about. * Returns NULL or the "data" token handed to add_buf @@ -86,6 +93,8 @@ void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); +bool virtqueue_enable_cb_delayed(struct virtqueue *vq); + void *virtqueue_detach_unused_buf(struct virtqueue *vq); /** diff --git a/include/linux/virtio_9p.h b/include/linux/virtio_9p.h index e68b439b2860..277c4ad44e84 100644 --- a/include/linux/virtio_9p.h +++ b/include/linux/virtio_9p.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_9P_H #define _LINUX_VIRTIO_9P_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_balloon.h b/include/linux/virtio_balloon.h index a50ecd1b81a2..652dc8bea921 100644 --- a/include/linux/virtio_balloon.h +++ b/include/linux/virtio_balloon.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BALLOON_H #define _LINUX_VIRTIO_BALLOON_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include diff --git a/include/linux/virtio_blk.h b/include/linux/virtio_blk.h index 167720d695ed..e0edb40ca7aa 100644 --- a/include/linux/virtio_blk.h +++ b/include/linux/virtio_blk.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_BLK_H #define _LINUX_VIRTIO_BLK_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 800617b4ddd5..39c88c5ad19d 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_CONFIG_H #define _LINUX_VIRTIO_CONFIG_H /* This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. */ + * anyone can use the definitions to implement compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ /* Virtio devices use a standardized configuration space to define their * features and pass configuration information, but each implementation can diff --git a/include/linux/virtio_console.h b/include/linux/virtio_console.h index e4d333543a33..bdf4b0034739 100644 --- a/include/linux/virtio_console.h +++ b/include/linux/virtio_console.h @@ -5,7 +5,31 @@ #include /* * This header, excluding the #ifdef __KERNEL__ part, is BSD licensed so - * anyone can use the definitions to implement compatible drivers/servers. + * anyone can use the definitions to implement compatible drivers/servers: + * + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. * * Copyright (C) Red Hat, Inc., 2009, 2010, 2011 * Copyright (C) Amit Shah , 2009, 2010, 2011 diff --git a/include/linux/virtio_ids.h b/include/linux/virtio_ids.h index 06660c0a78d7..85bb0bb66ffc 100644 --- a/include/linux/virtio_ids.h +++ b/include/linux/virtio_ids.h @@ -5,7 +5,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. - */ + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #define VIRTIO_ID_NET 1 /* virtio net */ #define VIRTIO_ID_BLOCK 2 /* virtio block */ diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 085e42298ce5..136040bba3e3 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -1,7 +1,30 @@ #ifndef _LINUX_VIRTIO_NET_H #define _LINUX_VIRTIO_NET_H /* This header is BSD licensed so anyone can use the definitions to implement - * compatible drivers/servers. */ + * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #include #include #include diff --git a/include/linux/virtio_pci.h b/include/linux/virtio_pci.h index 9a3d7c48c622..ea66f3f60d63 100644 --- a/include/linux/virtio_pci.h +++ b/include/linux/virtio_pci.h @@ -11,6 +11,29 @@ * * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. */ #ifndef _LINUX_VIRTIO_PCI_H diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e4d144b132b5..4a32cb6da425 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -7,6 +7,29 @@ * This header is BSD licensed so anyone can use the definitions to implement * compatible drivers/servers. * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the name of IBM nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * * Copyright Rusty Russell IBM Corporation 2007. */ #include @@ -29,6 +52,12 @@ /* We support indirect buffer descriptors */ #define VIRTIO_RING_F_INDIRECT_DESC 28 +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + /* Virtio ring descriptors: 16 bytes. These can chain together via "next". */ struct vring_desc { /* Address (guest-physical). */ @@ -83,6 +112,7 @@ struct vring { * __u16 avail_flags; * __u16 avail_idx; * __u16 available[num]; + * __u16 used_event_idx; * * // Padding to the next align boundary. * char pad[]; @@ -91,8 +121,14 @@ struct vring { * __u16 used_flags; * __u16 used_idx; * struct vring_used_elem used[num]; + * __u16 avail_event_idx; * }; */ +/* We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num]) + static inline void vring_init(struct vring *vr, unsigned int num, void *p, unsigned long align) { @@ -107,7 +143,21 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) { return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (2 + num) + align - 1) & ~(align - 1)) - + sizeof(__u16) * 2 + sizeof(struct vring_used_elem) * num; + + sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num; +} + +/* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ +/* Assuming a given event_idx value from the other size, if + * we have just incremented index from old to new_idx, + * should we trigger an event? */ +static inline int vring_need_event(__u16 event_idx, __u16 new_idx, __u16 old) +{ + /* Note: Xen has similar logic for notification hold-off + * in include/xen/interface/io/ring.h with req_event and req_prod + * corresponding to event_idx + 1 and new_idx respectively. + * Note also that req_event and req_prod in Xen start at 1, + * event indexes in virtio start at 0. */ + return (__u16)(new_idx - event_idx - 1) < (__u16)(new_idx - old); } #ifdef __KERNEL__ diff --git a/kernel/events/core.c b/kernel/events/core.c index cc5d57d1d0b6..ba89f40abe6a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7388,26 +7388,12 @@ static int __perf_cgroup_move(void *info) return 0; } -static void perf_cgroup_move(struct task_struct *task) +static void +perf_cgroup_attach_task(struct cgroup *cgrp, struct task_struct *task) { task_function_call(task, __perf_cgroup_move, task); } -static void perf_cgroup_attach(struct cgroup_subsys *ss, struct cgroup *cgrp, - struct cgroup *old_cgrp, struct task_struct *task, - bool threadgroup) -{ - perf_cgroup_move(task); - if (threadgroup) { - struct task_struct *c; - rcu_read_lock(); - list_for_each_entry_rcu(c, &task->thread_group, thread_group) { - perf_cgroup_move(c); - } - rcu_read_unlock(); - } -} - static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, struct cgroup *old_cgrp, struct task_struct *task) { @@ -7419,7 +7405,7 @@ static void perf_cgroup_exit(struct cgroup_subsys *ss, struct cgroup *cgrp, if (!(task->flags & PF_EXITING)) return; - perf_cgroup_move(task); + perf_cgroup_attach_task(cgrp, task); } struct cgroup_subsys perf_subsys = { @@ -7428,6 +7414,6 @@ struct cgroup_subsys perf_subsys = { .create = perf_cgroup_create, .destroy = perf_cgroup_destroy, .exit = perf_cgroup_exit, - .attach = perf_cgroup_attach, + .attach_task = perf_cgroup_attach_task, }; #endif /* CONFIG_CGROUP_PERF */ diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 77a7671dd147..89419ff92e99 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -1648,7 +1648,6 @@ static int __cpuinit rcu_spawn_one_cpu_kthread(int cpu) if (IS_ERR(t)) return PTR_ERR(t); kthread_bind(t, cpu); - set_task_state(t, TASK_INTERRUPTIBLE); per_cpu(rcu_cpu_kthread_cpu, cpu) = cpu; WARN_ON_ONCE(per_cpu(rcu_cpu_kthread_task, cpu) != NULL); per_cpu(rcu_cpu_kthread_task, cpu) = t; @@ -1756,7 +1755,6 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); - set_task_state(t, TASK_INTERRUPTIBLE); rnp->node_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = 99; @@ -1765,6 +1763,8 @@ static int __cpuinit rcu_spawn_one_node_kthread(struct rcu_state *rsp, return rcu_spawn_one_boost_kthread(rsp, rnp, rnp_index); } +static void rcu_wake_one_boost_kthread(struct rcu_node *rnp); + /* * Spawn all kthreads -- called as soon as the scheduler is running. */ @@ -1772,18 +1772,30 @@ static int __init rcu_spawn_kthreads(void) { int cpu; struct rcu_node *rnp; + struct task_struct *t; rcu_kthreads_spawnable = 1; for_each_possible_cpu(cpu) { per_cpu(rcu_cpu_has_work, cpu) = 0; - if (cpu_online(cpu)) + if (cpu_online(cpu)) { (void)rcu_spawn_one_cpu_kthread(cpu); + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + } } rnp = rcu_get_root(rcu_state); (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + if (rnp->node_kthread_task) + wake_up_process(rnp->node_kthread_task); if (NUM_RCU_NODES > 1) { - rcu_for_each_leaf_node(rcu_state, rnp) + rcu_for_each_leaf_node(rcu_state, rnp) { (void)rcu_spawn_one_node_kthread(rcu_state, rnp); + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + rcu_wake_one_boost_kthread(rnp); + } } return 0; } @@ -2188,14 +2200,14 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible) raw_spin_unlock_irqrestore(&rsp->onofflock, flags); } -static void __cpuinit rcu_online_cpu(int cpu) +static void __cpuinit rcu_prepare_cpu(int cpu) { rcu_init_percpu_data(cpu, &rcu_sched_state, 0); rcu_init_percpu_data(cpu, &rcu_bh_state, 0); rcu_preempt_init_percpu_data(cpu); } -static void __cpuinit rcu_online_kthreads(int cpu) +static void __cpuinit rcu_prepare_kthreads(int cpu) { struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); struct rcu_node *rnp = rdp->mynode; @@ -2208,6 +2220,31 @@ static void __cpuinit rcu_online_kthreads(int cpu) } } +/* + * kthread_create() creates threads in TASK_UNINTERRUPTIBLE state, + * but the RCU threads are woken on demand, and if demand is low this + * could be a while triggering the hung task watchdog. + * + * In order to avoid this, poke all tasks once the CPU is fully + * up and running. + */ +static void __cpuinit rcu_online_kthreads(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(rcu_state->rda, cpu); + struct rcu_node *rnp = rdp->mynode; + struct task_struct *t; + + t = per_cpu(rcu_cpu_kthread_task, cpu); + if (t) + wake_up_process(t); + + t = rnp->node_kthread_task; + if (t) + wake_up_process(t); + + rcu_wake_one_boost_kthread(rnp); +} + /* * Handle CPU online/offline notification events. */ @@ -2221,10 +2258,11 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self, switch (action) { case CPU_UP_PREPARE: case CPU_UP_PREPARE_FROZEN: - rcu_online_cpu(cpu); - rcu_online_kthreads(cpu); + rcu_prepare_cpu(cpu); + rcu_prepare_kthreads(cpu); break; case CPU_ONLINE: + rcu_online_kthreads(cpu); case CPU_DOWN_FAILED: rcu_node_kthread_setaffinity(rnp, -1); rcu_cpu_kthread_setrt(cpu, 1); diff --git a/kernel/rcutree_plugin.h b/kernel/rcutree_plugin.h index a767b7dac365..c8bff3099a89 100644 --- a/kernel/rcutree_plugin.h +++ b/kernel/rcutree_plugin.h @@ -1295,7 +1295,6 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, if (IS_ERR(t)) return PTR_ERR(t); raw_spin_lock_irqsave(&rnp->lock, flags); - set_task_state(t, TASK_INTERRUPTIBLE); rnp->boost_kthread_task = t; raw_spin_unlock_irqrestore(&rnp->lock, flags); sp.sched_priority = RCU_KTHREAD_PRIO; @@ -1303,6 +1302,12 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ + if (rnp->boost_kthread_task) + wake_up_process(rnp->boost_kthread_task); +} + #else /* #ifdef CONFIG_RCU_BOOST */ static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags) @@ -1326,6 +1331,10 @@ static int __cpuinit rcu_spawn_one_boost_kthread(struct rcu_state *rsp, return 0; } +static void __cpuinit rcu_wake_one_boost_kthread(struct rcu_node *rnp) +{ +} + #endif /* #else #ifdef CONFIG_RCU_BOOST */ #ifndef CONFIG_SMP diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 4fc92445a29c..f175d98bd355 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -938,6 +938,12 @@ static struct ctl_table kern_table[] = { }, #endif #ifdef CONFIG_PERF_EVENTS + /* + * User-space scripts rely on the existence of this file + * as a feature check for perf_events being enabled. + * + * So it's an ABI, do not remove! + */ { .procname = "perf_event_paranoid", .data = &sysctl_perf_event_paranoid, diff --git a/mm/page_alloc.c b/mm/page_alloc.c index a4e1db3f1981..4e8985acdab8 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -2247,10 +2247,6 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order, if (should_fail_alloc_page(gfp_mask, order)) return NULL; -#ifndef CONFIG_ZONE_DMA - if (WARN_ON_ONCE(gfp_mask & __GFP_DMA)) - return NULL; -#endif /* * Check the zones suitable for the gfp_mask contain at least one diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index ae3a698415e6..ec1bcecf2cda 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -593,7 +593,8 @@ static int apparmor_setprocattr(struct task_struct *task, char *name, sa.aad.op = OP_SETPROCATTR; sa.aad.info = name; sa.aad.error = -EINVAL; - return aa_audit(AUDIT_APPARMOR_DENIED, NULL, GFP_KERNEL, + return aa_audit(AUDIT_APPARMOR_DENIED, + __aa_current_profile(), GFP_KERNEL, &sa, NULL); } } else if (strcmp(name, "exec") == 0) { diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c index df0c6d2c3860..74d3331bdaf9 100644 --- a/tools/virtio/virtio_test.c +++ b/tools/virtio/virtio_test.c @@ -197,6 +197,14 @@ const struct option longopts[] = { .name = "help", .val = 'h', }, + { + .name = "event-idx", + .val = 'E', + }, + { + .name = "no-event-idx", + .val = 'e', + }, { .name = "indirect", .val = 'I', @@ -211,13 +219,17 @@ const struct option longopts[] = { static void help() { - fprintf(stderr, "Usage: virtio_test [--help] [--no-indirect]\n"); + fprintf(stderr, "Usage: virtio_test [--help]" + " [--no-indirect]" + " [--no-event-idx]" + "\n"); } int main(int argc, char **argv) { struct vdev_info dev; - unsigned long long features = 1ULL << VIRTIO_RING_F_INDIRECT_DESC; + unsigned long long features = (1ULL << VIRTIO_RING_F_INDIRECT_DESC) | + (1ULL << VIRTIO_RING_F_EVENT_IDX); int o; for (;;) { @@ -228,6 +240,9 @@ int main(int argc, char **argv) case '?': help(); exit(2); + case 'e': + features &= ~(1ULL << VIRTIO_RING_F_EVENT_IDX); + break; case 'h': help(); goto done;