From 76a10b86785c5e3fc49bcee355502d035b07e47a Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 22 Jul 2016 16:20:37 +0000 Subject: [PATCH 01/14] KVM: api: Pass the devid in the msi routing entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On ARM, the MSI msg (address and data) comes along with out-of-band device ID information. The device ID encodes the device that writes the MSI msg. Let's convey the device id in kvm_irq_routing_msi and use KVM_MSI_VALID_DEVID flag value in kvm_irq_routing_entry to indicate the msi devid is populated. Signed-off-by: Eric Auger Reviewed-by: Andre Przywara Acked-by: Radim Krčmář Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 19 +++++++++++++++++-- include/uapi/linux/kvm.h | 5 ++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 07049eadb124..415cde1647e9 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1468,7 +1468,11 @@ struct kvm_irq_routing_entry { #define KVM_IRQ_ROUTING_S390_ADAPTER 3 #define KVM_IRQ_ROUTING_HV_SINT 4 -No flags are specified so far, the corresponding field must be set to zero. +flags: +- KVM_MSI_VALID_DEVID: used along with KVM_IRQ_ROUTING_MSI + routing entry type, specifies that the devid field contains + a valid value. +- zero otherwise struct kvm_irq_routing_irqchip { __u32 irqchip; @@ -1479,9 +1483,20 @@ struct kvm_irq_routing_msi { __u32 address_lo; __u32 address_hi; __u32 data; - __u32 pad; + union { + __u32 pad; + __u32 devid; + }; }; +devid: If KVM_MSI_VALID_DEVID is set, contains a unique device identifier + for the device that wrote the MSI message. + For PCI, this is usually a BFD identifier in the lower 16 bits. + +The per-VM KVM_CAP_MSI_DEVID capability advertises the requirement to +provide the device ID. If this capability is not set, userland cannot +rely on the kernel to allow the KVM_MSI_VALID_DEVID flag being set. + struct kvm_irq_routing_s390_adapter { __u64 ind_addr; __u64 summary_addr; diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index d8c4c324cfae..eb2220895c6e 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -879,7 +879,10 @@ struct kvm_irq_routing_msi { __u32 address_lo; __u32 address_hi; __u32 data; - __u32 pad; + union { + __u32 pad; + __u32 devid; + }; }; struct kvm_irq_routing_s390_adapter { From 0455e72c9ae9b7e9589f2cc5ba5bc7804be83342 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 22 Jul 2016 16:20:38 +0000 Subject: [PATCH 02/14] KVM: Add devid in kvm_kernel_irq_routing_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enhance kvm_kernel_irq_routing_entry to transport the device id field, devid. A new flags field makes possible to indicate the devid is valid. Those additions are used for ARM GICv3 ITS MSI injection. The original struct msi_msg msi field is replaced by a new custom structure that embeds the new fields. Signed-off-by: Eric Auger Suggested-by: Radim Krčmář Acked-by: Radim Krčmář Signed-off-by: Marc Zyngier --- include/linux/kvm_host.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 614a98137c5f..a15828fe845c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -317,7 +317,13 @@ struct kvm_kernel_irq_routing_entry { unsigned irqchip; unsigned pin; } irqchip; - struct msi_msg msi; + struct { + u32 address_lo; + u32 address_hi; + u32 data; + u32 flags; + u32 devid; + } msi; struct kvm_s390_adapter_int adapter; struct kvm_hv_sint hv_sint; }; From ebe915259c731ee75e94ca86774c3d61df734d28 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 22 Jul 2016 16:20:39 +0000 Subject: [PATCH 03/14] KVM: irqchip: Convey devid to kvm_set_msi MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit on ARM, a devid field is populated in kvm_msi struct in case the flag is set to KVM_MSI_VALID_DEVID. Let's propagate both flags and devid field in kvm_kernel_irq_routing_entry. Signed-off-by: Eric Auger Reviewed-by: Andre Przywara Acked-by: Christoffer Dall Acked-by: Radim Krčmář Signed-off-by: Marc Zyngier --- virt/kvm/irqchip.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 8db197bb6c7a..0c000546aedc 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -62,12 +62,14 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) { struct kvm_kernel_irq_routing_entry route; - if (!irqchip_in_kernel(kvm) || msi->flags != 0) + if (!irqchip_in_kernel(kvm) || (msi->flags & ~KVM_MSI_VALID_DEVID)) return -EINVAL; route.msi.address_lo = msi->address_lo; route.msi.address_hi = msi->address_hi; route.msi.data = msi->data; + route.msi.flags = msi->flags; + route.msi.devid = msi->devid; return kvm_set_msi(&route, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); } From d9565a7399d665fa7313122504778cb3d5ef3e19 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 22 Jul 2016 16:20:40 +0000 Subject: [PATCH 04/14] KVM: Move kvm_setup_default/empty_irq_routing declaration in arch specific header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_setup_default_irq_routing and kvm_setup_empty_irq_routing are not used by generic code. So let's move the declarations in x86 irq.h header instead of kvm_host.h. Signed-off-by: Eric Auger Suggested-by: Andre Przywara Acked-by: Radim Krčmář Signed-off-by: Marc Zyngier --- arch/x86/kvm/irq.h | 3 +++ include/linux/kvm_host.h | 2 -- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 61ebdc13a29a..035731eb3897 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -120,4 +120,7 @@ void __kvm_migrate_timers(struct kvm_vcpu *vcpu); int apic_has_pending_timer(struct kvm_vcpu *vcpu); +int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_setup_empty_irq_routing(struct kvm *kvm); + #endif diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a15828fe845c..a7eb5c48251e 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1052,8 +1052,6 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #define KVM_MAX_IRQ_ROUTES 1024 #endif -int kvm_setup_default_irq_routing(struct kvm *kvm); -int kvm_setup_empty_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, From 180ae7b1182344ca617d8b5200306b02a6b5075d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 22 Jul 2016 16:20:41 +0000 Subject: [PATCH 05/14] KVM: arm/arm64: Enable irqchip routing This patch adds compilation and link against irqchip. Main motivation behind using irqchip code is to enable MSI routing code. In the future irqchip routing may also be useful when targeting multiple irqchips. Routing standard callbacks now are implemented in vgic-irqfd: - kvm_set_routing_entry - kvm_set_irq - kvm_set_msi They only are supported with new_vgic code. Both HAVE_KVM_IRQCHIP and HAVE_KVM_IRQ_ROUTING are defined. KVM_CAP_IRQ_ROUTING is advertised and KVM_SET_GSI_ROUTING is allowed. So from now on IRQCHIP routing is enabled and a routing table entry must exist for irqfd injection to succeed for a given SPI. This patch builds a default flat irqchip routing table (gsi=irqchip.pin) covering all the VGIC SPI indexes. This routing table is overwritten by the first first user-space call to KVM_SET_GSI_ROUTING ioctl. MSI routing setup is not yet allowed. Signed-off-by: Eric Auger Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 12 ++-- arch/arm/kvm/Kconfig | 2 + arch/arm/kvm/Makefile | 1 + arch/arm/kvm/irq.h | 19 ++++++ arch/arm64/kvm/Kconfig | 2 + arch/arm64/kvm/Makefile | 1 + arch/arm64/kvm/irq.h | 19 ++++++ include/kvm/arm_vgic.h | 7 ++ virt/kvm/arm/vgic/vgic-init.c | 4 ++ virt/kvm/arm/vgic/vgic-irqfd.c | 106 ++++++++++++++++++++++++------ virt/kvm/arm/vgic/vgic.c | 7 -- 11 files changed, 148 insertions(+), 32 deletions(-) create mode 100644 arch/arm/kvm/irq.h create mode 100644 arch/arm64/kvm/irq.h diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 415cde1647e9..7e5f9afcc693 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -1433,13 +1433,16 @@ KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. 4.52 KVM_SET_GSI_ROUTING Capability: KVM_CAP_IRQ_ROUTING -Architectures: x86 s390 +Architectures: x86 s390 arm arm64 Type: vm ioctl Parameters: struct kvm_irq_routing (in) Returns: 0 on success, -1 on error Sets the GSI routing table entries, overwriting any previously set entries. +On arm/arm64, GSI routing has the following limitation: +- GSI routing does not apply to KVM_IRQ_LINE but only to KVM_IRQFD. + struct kvm_irq_routing { __u32 nr; __u32 flags; @@ -2374,9 +2377,10 @@ Note that closing the resamplefd is not sufficient to disable the irqfd. The KVM_IRQFD_FLAG_RESAMPLE is only necessary on assignment and need not be specified with KVM_IRQFD_FLAG_DEASSIGN. -On ARM/ARM64, the gsi field in the kvm_irqfd struct specifies the Shared -Peripheral Interrupt (SPI) index, such that the GIC interrupt ID is -given by gsi + 32. +On arm/arm64, gsi routing being supported, the following can happen: +- in case no routing entry is associated to this gsi, injection fails +- in case the gsi is associated to an irqchip routing entry, + irqchip.pin + 32 corresponds to the injected SPI ID. 4.76 KVM_PPC_ALLOCATE_HTAB diff --git a/arch/arm/kvm/Kconfig b/arch/arm/kvm/Kconfig index 95a000515e43..3e1cd0452d67 100644 --- a/arch/arm/kvm/Kconfig +++ b/arch/arm/kvm/Kconfig @@ -32,6 +32,8 @@ config KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER ---help--- Support hosting virtualized guest machines. diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 5e28df80dca7..10d77a66cad5 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -29,4 +29,5 @@ obj-y += $(KVM)/arm/vgic/vgic-v2.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o +obj-y += $(KVM)/irqchip.o obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/irq.h b/arch/arm/kvm/irq.h new file mode 100644 index 000000000000..b74099b905fd --- /dev/null +++ b/arch/arm/kvm/irq.h @@ -0,0 +1,19 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This header is included by irqchip.c. However, on ARM, interrupt + * controller declarations are located in include/kvm/arm_vgic.h since + * they are mostly shared between arm and arm64. + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include + +#endif diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 9d2eff0b3ad3..9c9edc98d271 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -37,6 +37,8 @@ config KVM select KVM_ARM_VGIC_V3 select KVM_ARM_PMU if HW_PERF_EVENTS select HAVE_KVM_MSI + select HAVE_KVM_IRQCHIP + select HAVE_KVM_IRQ_ROUTING ---help--- Support hosting virtualized guest machines. We don't support KVM with 16K page tables yet, due to the multiple diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index a5b96642a9cb..695eb3c7ef41 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -30,5 +30,6 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-kvm-device.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-its.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/irqchip.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/arch_timer.o kvm-$(CONFIG_KVM_ARM_PMU) += $(KVM)/arm/pmu.o diff --git a/arch/arm64/kvm/irq.h b/arch/arm64/kvm/irq.h new file mode 100644 index 000000000000..b74099b905fd --- /dev/null +++ b/arch/arm64/kvm/irq.h @@ -0,0 +1,19 @@ +/* + * irq.h: in kernel interrupt controller related definitions + * Copyright (c) 2016 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This header is included by irqchip.c. However, on ARM, interrupt + * controller declarations are located in include/kvm/arm_vgic.h since + * they are mostly shared between arm and arm64. + */ + +#ifndef __IRQ_H +#define __IRQ_H + +#include + +#endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 540da5149ba7..19b698ef3336 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -34,6 +34,7 @@ #define VGIC_MAX_SPI 1019 #define VGIC_MAX_RESERVED 1023 #define VGIC_MIN_LPI 8192 +#define KVM_IRQCHIP_NUM_PINS (1020 - 32) enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ @@ -314,4 +315,10 @@ static inline int kvm_vgic_get_max_vcpus(void) int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); +/** + * kvm_vgic_setup_default_irq_routing: + * Setup a default flat gsi routing table mapping all SPIs + */ +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 01a60dcd05d6..1aba785cd498 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -264,6 +264,10 @@ int vgic_init(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) kvm_vgic_vcpu_init(vcpu); + ret = kvm_vgic_setup_default_irq_routing(kvm); + if (ret) + goto out; + dist->initialized = true; out: return ret; diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index c675513270bb..6e84d530d9f7 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -17,36 +17,100 @@ #include #include #include +#include +#include "vgic.h" -int kvm_irq_map_gsi(struct kvm *kvm, - struct kvm_kernel_irq_routing_entry *entries, - int gsi) +/** + * vgic_irqfd_set_irq: inject the IRQ corresponding to the + * irqchip routing entry + * + * This is the entry point for irqfd IRQ injection + */ +static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status) { - return 0; + unsigned int spi_id = e->irqchip.pin + VGIC_NR_PRIVATE_IRQS; + + if (!vgic_valid_spi(kvm, spi_id)) + return -EINVAL; + return kvm_vgic_inject_irq(kvm, 0, spi_id, level); } -int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned int irqchip, - unsigned int pin) +/** + * kvm_set_routing_entry: populate a kvm routing entry + * from a user routing entry + * + * @e: kvm kernel routing entry handle + * @ue: user api routing entry handle + * return 0 on success, -EINVAL on errors. + */ +int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) { - return pin; + int r = -EINVAL; + + switch (ue->type) { + case KVM_IRQ_ROUTING_IRQCHIP: + e->set = vgic_irqfd_set_irq; + e->irqchip.irqchip = ue->u.irqchip.irqchip; + e->irqchip.pin = ue->u.irqchip.pin; + if ((e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS) || + (e->irqchip.irqchip >= KVM_NR_IRQCHIPS)) + goto out; + break; + default: + goto out; + } + r = 0; +out: + return r; } -int kvm_set_irq(struct kvm *kvm, int irq_source_id, - u32 irq, int level, bool line_status) -{ - unsigned int spi = irq + VGIC_NR_PRIVATE_IRQS; - - trace_kvm_set_irq(irq, level, irq_source_id); - - BUG_ON(!vgic_initialized(kvm)); - - return kvm_vgic_inject_irq(kvm, 0, spi, level); -} - -/* MSI not implemented yet */ +/** + * kvm_set_msi: inject the MSI corresponding to the + * MSI routing entry + * + * This is the entry point for irqfd MSI injection + * and userspace MSI injection. + */ int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e, struct kvm *kvm, int irq_source_id, int level, bool line_status) { - return 0; + struct kvm_msi msi; + + msi.address_lo = e->msi.address_lo; + msi.address_hi = e->msi.address_hi; + msi.data = e->msi.data; + msi.flags = e->msi.flags; + msi.devid = e->msi.devid; + + if (!vgic_has_its(kvm)) + return -ENODEV; + + return vgic_its_inject_msi(kvm, &msi); +} + +int kvm_vgic_setup_default_irq_routing(struct kvm *kvm) +{ + struct kvm_irq_routing_entry *entries; + struct vgic_dist *dist = &kvm->arch.vgic; + u32 nr = dist->nr_spis; + int i, ret; + + entries = kcalloc(nr, sizeof(struct kvm_kernel_irq_routing_entry), + GFP_KERNEL); + if (!entries) + return -ENOMEM; + + for (i = 0; i < nr; i++) { + entries[i].gsi = i; + entries[i].type = KVM_IRQ_ROUTING_IRQCHIP; + entries[i].u.irqchip.irqchip = 0; + entries[i].u.irqchip.pin = i; + } + ret = kvm_set_irq_routing(kvm, entries, nr, 0); + kfree(entries); + return ret; } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 39f3358c6d91..e7aeac719e09 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -711,10 +711,3 @@ bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq) return map_is_active; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi) -{ - if (vgic_has_its(kvm)) - return vgic_its_inject_msi(kvm, msi); - else - return -ENODEV; -} From 995a0ee9809b6948bb7bfea77f129fe1d5157cc1 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 22 Jul 2016 16:20:42 +0000 Subject: [PATCH 06/14] KVM: arm/arm64: Enable MSI routing Up to now, only irqchip routing entries could be set. This patch adds the capability to insert MSI routing entries. For ARM64, let's also increase KVM_MAX_IRQ_ROUTES to 4096: this include SPI irqchip routes plus MSI routes. In the future this might be extended. Signed-off-by: Eric Auger Reviewed-by: Andre Przywara Signed-off-by: Marc Zyngier --- Documentation/virtual/kvm/api.txt | 3 +++ include/linux/kvm_host.h | 2 ++ virt/kvm/arm/vgic/vgic-irqfd.c | 8 ++++++++ virt/kvm/irqchip.c | 24 +++++++++++++++--------- 4 files changed, 28 insertions(+), 9 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index 7e5f9afcc693..7a04216d7278 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2381,6 +2381,9 @@ On arm/arm64, gsi routing being supported, the following can happen: - in case no routing entry is associated to this gsi, injection fails - in case the gsi is associated to an irqchip routing entry, irqchip.pin + 32 corresponds to the injected SPI ID. +- in case the gsi is associated to an MSI routing entry, the MSI + message and device ID are translated into an LPI (support restricted + to GICv3 ITS in-kernel emulation). 4.76 KVM_PPC_ALLOCATE_HTAB diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a7eb5c48251e..a318c3b21608 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1048,6 +1048,8 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #ifdef CONFIG_S390 #define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that... +#elif defined(CONFIG_ARM64) +#define KVM_MAX_IRQ_ROUTES 4096 #else #define KVM_MAX_IRQ_ROUTES 1024 #endif diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index 6e84d530d9f7..683a589711b0 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -59,6 +59,14 @@ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, (e->irqchip.irqchip >= KVM_NR_IRQCHIPS)) goto out; break; + case KVM_IRQ_ROUTING_MSI: + e->set = kvm_set_msi; + e->msi.address_lo = ue->u.msi.address_lo; + e->msi.address_hi = ue->u.msi.address_hi; + e->msi.data = ue->u.msi.data; + e->msi.flags = ue->flags; + e->msi.devid = ue->u.msi.devid; + break; default: goto out; } diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 0c000546aedc..c6202199e505 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -178,6 +178,7 @@ int kvm_set_irq_routing(struct kvm *kvm, unsigned flags) { struct kvm_irq_routing_table *new, *old; + struct kvm_kernel_irq_routing_entry *e; u32 i, j, nr_rt_entries = 0; int r; @@ -201,23 +202,25 @@ int kvm_set_irq_routing(struct kvm *kvm, new->chip[i][j] = -1; for (i = 0; i < nr; ++i) { - struct kvm_kernel_irq_routing_entry *e; - r = -ENOMEM; e = kzalloc(sizeof(*e), GFP_KERNEL); if (!e) goto out; r = -EINVAL; - if (ue->flags) { - kfree(e); - goto out; + switch (ue->type) { + case KVM_IRQ_ROUTING_MSI: + if (ue->flags & ~KVM_MSI_VALID_DEVID) + goto free_entry; + break; + default: + if (ue->flags) + goto free_entry; + break; } r = setup_routing_entry(new, e, ue); - if (r) { - kfree(e); - goto out; - } + if (r) + goto free_entry; ++ue; } @@ -234,7 +237,10 @@ int kvm_set_irq_routing(struct kvm *kvm, new = old; r = 0; + goto out; +free_entry: + kfree(e); out: free_irq_routing_table(new); From 3f312db6b65baf393bee02f6dfdfaa94d786dd0f Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 24 Jul 2016 10:34:08 +0100 Subject: [PATCH 07/14] KVM: arm: vgic-irqfd: Workaround changing kvm_set_routing_entry prototype kvm_set_routing_entry is changing in -next, and causes things to explode. Add a temporary workaround that should be dropped when we hit 4.8-rc1 Signed-off-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-irqfd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index 683a589711b0..b31a51a14efb 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -41,12 +41,20 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, * kvm_set_routing_entry: populate a kvm routing entry * from a user routing entry * + * @kvm: the VM this entry is applied to * @e: kvm kernel routing entry handle * @ue: user api routing entry handle * return 0 on success, -EINVAL on errors. */ +#ifdef KVM_CAP_X2APIC_API +int kvm_set_routing_entry(struct kvm *kvm, + struct kvm_kernel_irq_routing_entry *e, + const struct kvm_irq_routing_entry *ue) +#else +/* Remove this version and the ifdefery once merged into 4.8 */ int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) +#endif { int r = -EINVAL; From 89581f06b2bc225f0c9822fa52e714aa2e3810dd Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Fri, 22 Jul 2016 10:38:46 -0400 Subject: [PATCH 08/14] arm64: KVM: Set cpsr before spsr on fault injection We need to set cpsr before determining the spsr bank, as the bank depends on the target exception level of the injection, not the current mode of the vcpu. Normally this is one in the same (EL1), but not when we manage to trap an EL0 fault. It still doesn't really matter for the 64-bit EL0 case though, as vcpu_spsr() unconditionally uses the EL1 bank for that. However the 32-bit EL0 case gets fun, as that path will lead to the BUG() in vcpu_spsr32(). This patch fixes the assignment order and also modifies some white space in order to better group pairs of lines that have strict order. Cc: stable@vger.kernel.org # v4.5 Signed-off-by: Andrew Jones Signed-off-by: Marc Zyngier --- arch/arm64/kvm/inject_fault.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index e9e0e6db73f6..898c0e6aedd4 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -132,16 +132,14 @@ static u64 get_except_vector(struct kvm_vcpu *vcpu, enum exception_type type) static void inject_abt64(struct kvm_vcpu *vcpu, bool is_iabt, unsigned long addr) { unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_aarch32; + bool is_aarch32 = vcpu_mode_is_32bit(vcpu); u32 esr = 0; - is_aarch32 = vcpu_mode_is_32bit(vcpu); - - *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_spsr(vcpu) = cpsr; vcpu_sys_reg(vcpu, FAR_EL1) = addr; @@ -172,11 +170,11 @@ static void inject_undef64(struct kvm_vcpu *vcpu) unsigned long cpsr = *vcpu_cpsr(vcpu); u32 esr = (ESR_ELx_EC_UNKNOWN << ESR_ELx_EC_SHIFT); - *vcpu_spsr(vcpu) = cpsr; *vcpu_elr_el1(vcpu) = *vcpu_pc(vcpu); - *vcpu_pc(vcpu) = get_except_vector(vcpu, except_type_sync); + *vcpu_cpsr(vcpu) = PSTATE_FAULT_BITS_64; + *vcpu_spsr(vcpu) = cpsr; /* * Build an unknown exception, depending on the instruction From 3aed64f6d341cdb62bb2d6232589fb13448ce063 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Jun 2016 13:06:08 +0200 Subject: [PATCH 09/14] pvclock: introduce seqcount-like API The version field in struct pvclock_vcpu_time_info basically implements a seqcount. Wrap it with the usual read_begin and read_retry functions, and use these APIs instead of peppering the code with smp_rmb()s. While at it, change it to the more pedantically correct virt_rmb(). With this change, __pvclock_read_cycles can be simplified noticeably. Signed-off-by: Paolo Bonzini --- arch/x86/entry/vdso/vclock_gettime.c | 9 ++----- arch/x86/include/asm/pvclock.h | 39 ++++++++++++++++------------ arch/x86/kernel/pvclock.c | 17 +++++------- 3 files changed, 31 insertions(+), 34 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index 2f02d23a05ef..db1e3b4c3693 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -123,9 +123,7 @@ static notrace cycle_t vread_pvclock(int *mode) */ do { - version = pvti->version; - - smp_rmb(); + version = pvclock_read_begin(pvti); if (unlikely(!(pvti->flags & PVCLOCK_TSC_STABLE_BIT))) { *mode = VCLOCK_NONE; @@ -137,10 +135,7 @@ static notrace cycle_t vread_pvclock(int *mode) pvti_tsc_shift = pvti->tsc_shift; pvti_system_time = pvti->system_time; pvti_tsc = pvti->tsc_timestamp; - - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while (unlikely((version & 1) || version != pvti->version)); + } while (pvclock_read_retry(pvti, version)); delta = tsc - pvti_tsc; ret = pvti_system_time + diff --git a/arch/x86/include/asm/pvclock.h b/arch/x86/include/asm/pvclock.h index 7c1c89598688..d019f0cc80ec 100644 --- a/arch/x86/include/asm/pvclock.h +++ b/arch/x86/include/asm/pvclock.h @@ -25,6 +25,24 @@ void pvclock_resume(void); void pvclock_touch_watchdogs(void); +static __always_inline +unsigned pvclock_read_begin(const struct pvclock_vcpu_time_info *src) +{ + unsigned version = src->version & ~1; + /* Make sure that the version is read before the data. */ + virt_rmb(); + return version; +} + +static __always_inline +bool pvclock_read_retry(const struct pvclock_vcpu_time_info *src, + unsigned version) +{ + /* Make sure that the version is re-read after the data. */ + virt_rmb(); + return unlikely(version != src->version); +} + /* * Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction, * yielding a 64-bit result. @@ -69,23 +87,12 @@ static inline u64 pvclock_scale_delta(u64 delta, u32 mul_frac, int shift) } static __always_inline -unsigned __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src, - cycle_t *cycles, u8 *flags) +cycle_t __pvclock_read_cycles(const struct pvclock_vcpu_time_info *src) { - unsigned version; - cycle_t offset; - u64 delta; - - version = src->version; - /* Make the latest version visible */ - smp_rmb(); - - delta = rdtsc_ordered() - src->tsc_timestamp; - offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, - src->tsc_shift); - *cycles = src->system_time + offset; - *flags = src->flags; - return version; + u64 delta = rdtsc_ordered() - src->tsc_timestamp; + cycle_t offset = pvclock_scale_delta(delta, src->tsc_to_system_mul, + src->tsc_shift); + return src->system_time + offset; } struct pvclock_vsyscall_time_info { diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 06c58ce46762..3599404e3089 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -64,14 +64,9 @@ u8 pvclock_read_flags(struct pvclock_vcpu_time_info *src) u8 flags; do { - version = src->version; - /* Make the latest version visible */ - smp_rmb(); - + version = pvclock_read_begin(src); flags = src->flags; - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while ((src->version & 1) || version != src->version); + } while (pvclock_read_retry(src, version)); return flags & valid_flags; } @@ -84,10 +79,10 @@ cycle_t pvclock_clocksource_read(struct pvclock_vcpu_time_info *src) u8 flags; do { - version = __pvclock_read_cycles(src, &ret, &flags); - /* Make sure that the version double-check is last. */ - smp_rmb(); - } while ((src->version & 1) || version != src->version); + version = pvclock_read_begin(src); + ret = __pvclock_read_cycles(src); + flags = src->flags; + } while (pvclock_read_retry(src, version)); if (unlikely((flags & PVCLOCK_GUEST_STOPPED) != 0)) { src->flags &= ~PVCLOCK_GUEST_STOPPED; From abe9efa79be02cf2ba27f643b214b07877bb050b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Jun 2016 20:12:34 +0200 Subject: [PATCH 10/14] x86: vdso: use __pvclock_read_cycles The new simplified __pvclock_read_cycles does the same computation as vread_pvclock, except that (because it takes the pvclock_vcpu_time_info pointer) it has to be moved inside the loop. Since the loop is expected to never roll, this makes no difference. Acked-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- arch/x86/entry/vdso/vclock_gettime.c | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/arch/x86/entry/vdso/vclock_gettime.c b/arch/x86/entry/vdso/vclock_gettime.c index db1e3b4c3693..94d54d0defa7 100644 --- a/arch/x86/entry/vdso/vclock_gettime.c +++ b/arch/x86/entry/vdso/vclock_gettime.c @@ -96,9 +96,8 @@ static notrace cycle_t vread_pvclock(int *mode) { const struct pvclock_vcpu_time_info *pvti = &get_pvti0()->pvti; cycle_t ret; - u64 tsc, pvti_tsc; - u64 last, delta, pvti_system_time; - u32 version, pvti_tsc_to_system_mul, pvti_tsc_shift; + u64 last; + u32 version; /* * Note: The kernel and hypervisor must guarantee that cpu ID @@ -130,18 +129,9 @@ static notrace cycle_t vread_pvclock(int *mode) return 0; } - tsc = rdtsc_ordered(); - pvti_tsc_to_system_mul = pvti->tsc_to_system_mul; - pvti_tsc_shift = pvti->tsc_shift; - pvti_system_time = pvti->system_time; - pvti_tsc = pvti->tsc_timestamp; + ret = __pvclock_read_cycles(pvti); } while (pvclock_read_retry(pvti, version)); - delta = tsc - pvti_tsc; - ret = pvti_system_time + - pvclock_scale_delta(delta, pvti_tsc_to_system_mul, - pvti_tsc_shift); - /* refer to vread_tsc() comment for rationale */ last = gtod->cycle_last; From 055b6ae95e14326a514334a0a8528a40cd629d10 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Aug 2016 14:01:05 +0200 Subject: [PATCH 11/14] KVM: documentation: fix KVM_CAP_X2APIC_API information The KVM_X2APIC_API_USE_32BIT_IDS feature applies to both KVM_SET_GSI_ROUTING and KVM_SIGNAL_MSI, but was not mentioned in the documentation for the latter ioctl. Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index da3c395ed174..739db9ab16b2 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2224,9 +2224,10 @@ If KVM_MSI_VALID_DEVID is set, devid contains a unique device identifier for the device that wrote the MSI message. For PCI, this is usually a BFD identifier in the lower 16 bits. -On x86, address_hi is ignored unless the KVM_CAP_X2APIC_API capability is -enabled. If it is enabled, address_hi bits 31-8 provide bits 31-8 of the -destination id. Bits 7-0 of address_hi must be zero. +On x86, address_hi is ignored unless the KVM_X2APIC_API_USE_32BIT_IDS +feature of KVM_CAP_X2APIC_API capability is enabled. If it is enabled, +address_hi bits 31-8 provide bits 31-8 of the destination id. Bits 7-0 of +address_hi must be zero. 4.71 KVM_CREATE_PIT2 From 910053002ef1fdc1d42b7015d5b2400172f43e42 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Wed, 3 Aug 2016 12:04:12 +0800 Subject: [PATCH 12/14] KVM: lapic: fix access preemption timer stuff even if kernel_irqchip=off MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BUG: unable to handle kernel NULL pointer dereference at 000000000000008c IP: [] kvm_lapic_hv_timer_in_use+0x10/0x20 [kvm] PGD 0 Oops: 0000 [#1] SMP Call Trace: kvm_arch_vcpu_load+0x86/0x260 [kvm] vcpu_load+0x46/0x60 [kvm] kvm_vcpu_ioctl+0x79/0x7c0 [kvm] ? __lock_is_held+0x54/0x70 do_vfs_ioctl+0x96/0x6a0 ? __fget_light+0x2a/0x90 SyS_ioctl+0x79/0x90 do_syscall_64+0x7c/0x1e0 entry_SYSCALL64_slow_path+0x25/0x25 RIP [] kvm_lapic_hv_timer_in_use+0x10/0x20 [kvm] RSP CR2: 000000000000008c ---[ end trace a55fb79d2b3b4ee8 ]--- This can be reproduced steadily by kernel_irqchip=off. We should not access preemption timer stuff if lapic is emulated in userspace. This patch fix it by avoiding access preemption timer stuff when kernel_irqchip=off. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Yunhong Jiang Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 730cf174090a..b62c85229711 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -1349,6 +1349,9 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic) bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) { + if (!lapic_in_kernel(vcpu)) + return false; + return vcpu->arch.apic->lapic_timer.hv_timer_in_use; } EXPORT_SYMBOL_GPL(kvm_lapic_hv_timer_in_use); From 03331b4b8bd1f0fef34539afa6bb6c45ec25c47b Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 2 Aug 2016 16:32:35 -0400 Subject: [PATCH 13/14] nvmx: remove comment about missing nested vpid support Nested vpid is already supported and both single/global modes are advertised to the guest Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bc354f003ce1..b26c222ebeef 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2945,7 +2945,6 @@ static int vmx_get_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 *pdata) vmx->nested.nested_vmx_secondary_ctls_high); break; case MSR_IA32_VMX_EPT_VPID_CAP: - /* Currently, no nested vpid support */ *pdata = vmx->nested.nested_vmx_ept_caps | ((u64)vmx->nested.nested_vmx_vpid_caps << 32); break; From 45e11817d5703eceb65a673927a8bc74dc1286d6 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 2 Aug 2016 16:32:36 -0400 Subject: [PATCH 14/14] nvmx: mark ept single context invalidation as supported Commit 4b855078601f ("KVM: nVMX: Don't advertise single context invalidation for invept") removed advertising single context invalidation since the spec does not mandate it. However, some hypervisors (such as ESX) require it to be present before willing to use ept in a nested environment. Advertise it and fallback to the global case. Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index b26c222ebeef..a45d8580f91e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2809,12 +2809,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXECUTE_ONLY_BIT; vmx->nested.nested_vmx_ept_caps &= vmx_capability.ept; - /* - * For nested guests, we don't do anything specific - * for single context invalidation. Hence, only advertise - * support for global context invalidation. - */ - vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT; + vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | + VMX_EPT_EXTENT_CONTEXT_BIT; } else vmx->nested.nested_vmx_ept_caps = 0; @@ -7608,12 +7604,16 @@ static int handle_invept(struct kvm_vcpu *vcpu) switch (type) { case VMX_EPT_EXTENT_GLOBAL: + /* + * TODO: track mappings and invalidate + * single context requests appropriately + */ + case VMX_EPT_EXTENT_CONTEXT: kvm_mmu_sync_roots(vcpu); kvm_make_request(KVM_REQ_TLB_FLUSH, vcpu); nested_vmx_succeed(vcpu); break; default: - /* Trap single context invalidation invept calls */ BUG_ON(1); break; }