Trimmed second batch of KVM changes for Linux 4.15

* GICv4 Support for KVM/ARM All ARM patches were in next-20171113. I have postponed most x86 fixes to 4.15-rc2 and UMIP to 4.16, but there are fixes that would be good to have already in 4.15-rc1: * re-introduce support for CPUs without virtual NMI (cc stable) and allow testing of KVM without virtual NMI on available CPUs * fix long-standing performance issues with assigned devices on AMD (cc stable) -----BEGIN PGP SIGNATURE----- iQEcBAABCAAGBQJaGECGAAoJEED/6hsPKofoT08H/AuaMi8qprw2BNpVBbQxWRWM O4WPk7yz1zB4SkdRNrPzCMBy+qoK7FcV/3BpsFPuQS4NHQ+GvQ87N/7tUbouVyl6 CuPGJMCnNzMQ8GvLOJgB1/sz+uW5W/ph3y8kv1UP3/hNCZU4fqukoUeLroOH/wr6 N3bSY8bok7ycdpgybHmbUHY0Yk4IUk3m0RXWY9U5Jl3sjoNEwCw3pWdrq9Swfs/6 W8QJRdE4Z6KHPqW5sRnPj24IpoUpCxu+IT+gPuGlDUCN/h3sfhYvMS6GgDrCjiiZ 2z1TwaIAo+wGjlBQzGmyTUjUPjbGew+f3ixBlf2BtmNutX+tX2qsVfl1NKXYTto= =GGge -----END PGP SIGNATURE----- Merge tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm Pull KVM updates from Radim Krčmář: "Trimmed second batch of KVM changes for Linux 4.15: - GICv4 Support for KVM/ARM - re-introduce support for CPUs without virtual NMI (cc stable) and allow testing of KVM without virtual NMI on available CPUs - fix long-standing performance issues with assigned devices on AMD (cc stable)" * tag 'kvm-4.15-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (30 commits) kvm: vmx: Allow disabling virtual NMI support kvm: vmx: Reinstate support for CPUs without virtual NMI KVM: SVM: obey guest PAT KVM: arm/arm64: Don't queue VLPIs on INV/INVALL KVM: arm/arm64: Fix GICv4 ITS initialization issues KVM: arm/arm64: GICv4: Theory of operations KVM: arm/arm64: GICv4: Enable VLPI support KVM: arm/arm64: GICv4: Prevent userspace from changing doorbell affinity KVM: arm/arm64: GICv4: Prevent a VM using GICv4 from being saved KVM: arm/arm64: GICv4: Enable virtual cpuif if VLPIs can be delivered KVM: arm/arm64: GICv4: Hook vPE scheduling into vgic flush/sync KVM: arm/arm64: GICv4: Use the doorbell interrupt as an unblocking source KVM: arm/arm64: GICv4: Add doorbell interrupt handling KVM: arm/arm64: GICv4: Use pending_last as a scheduling hint KVM: arm/arm64: GICv4: Handle INVALL applied to a vPE KVM: arm/arm64: GICv4: Propagate property updates to VLPIs KVM: arm/arm64: GICv4: Handle MOVALL applied to a vPE KVM: arm/arm64: GICv4: Handle CLEAR applied to a VLPI KVM: arm/arm64: GICv4: Propagate affinity changes to the physical ITS KVM: arm/arm64: GICv4: Unmap VLPI when freeing an LPI ...
2017-11-24 19:44:25 -10:00 · 2017-11-24 19:44:25 -10:00 · 7753ea0964
parent 83ada03196 d02fcf5077
commit 7753ea0964
19 changed files with 819 additions and 158 deletions
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@ -1890,6 +1890,10 @@
 			[KVM,ARM] Trap guest accesses to GICv3 common
 			system registers
 	kvm-arm.vgic_v4_enable=
 			[KVM,ARM] Allow use of GICv4 for direct injection of
 			LPIs.
 	kvm-intel.ept=	[KVM,Intel] Disable extended page tables
 			(virtualized MMU) support on capable Intel chips.
 			Default is 1 (enabled)
--- a/Documentation/virtual/kvm/devices/arm-vgic-its.txt
+++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt
@ -64,6 +64,8 @@ Groups:
    -EINVAL: Inconsistent restored data
    -EFAULT: Invalid guest ram access
    -EBUSY:  One or more VCPUS are running
    -EACCES: The virtual ITS is backed by a physical GICv4 ITS, and the
 	     state is not available
  KVM_DEV_ARM_VGIC_GRP_ITS_REGS
  Attributes:
--- a/arch/arm/kvm/Kconfig
+++ b/arch/arm/kvm/Kconfig
@ -4,6 +4,7 @@
 #
 source "virt/kvm/Kconfig"
 source "virt/lib/Kconfig"
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
@ -23,6 +24,8 @@ config KVM
 	select PREEMPT_NOTIFIERS
 	select ANON_INODES
 	select ARM_GIC
 	select ARM_GIC_V3
 	select ARM_GIC_V3_ITS
 	select HAVE_KVM_CPU_RELAX_INTERCEPT
 	select HAVE_KVM_ARCH_TLB_FLUSH_ALL
 	select KVM_MMIO
@ -36,6 +39,8 @@ config KVM
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQ_ROUTING
 	select HAVE_KVM_MSI
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	depends on ARM_VIRT_EXT && ARM_LPAE && ARM_ARCH_TIMER
 	---help---
 	  Support hosting virtualized guest machines.
--- a/arch/arm/kvm/Makefile
+++ b/arch/arm/kvm/Makefile
@ -32,6 +32,7 @@ obj-y += $(KVM)/arm/vgic/vgic-init.o
 obj-y += $(KVM)/arm/vgic/vgic-irqfd.o
 obj-y += $(KVM)/arm/vgic/vgic-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-v3.o
 obj-y += $(KVM)/arm/vgic/vgic-v4.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o
 obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o
--- a/arch/arm64/kvm/Kconfig
+++ b/arch/arm64/kvm/Kconfig
@ -4,6 +4,7 @@
 #
 source "virt/kvm/Kconfig"
 source "virt/lib/Kconfig"
 menuconfig VIRTUALIZATION
 	bool "Virtualization"
@ -36,6 +37,8 @@ config KVM
 	select HAVE_KVM_MSI
 	select HAVE_KVM_IRQCHIP
 	select HAVE_KVM_IRQ_ROUTING
 	select IRQ_BYPASS_MANAGER
 	select HAVE_KVM_IRQ_BYPASS
 	---help---
 	  Support hosting virtualized guest machines.
 	  We don't support KVM with 16K page tables yet, due to the multiple
--- a/arch/arm64/kvm/Makefile
+++ b/arch/arm64/kvm/Makefile
@ -27,6 +27,7 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-irqfd.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v3.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-v4.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v2.o
 kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-mmio-v3.o
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@ -3671,6 +3671,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
 	u32 ecx = msr->index;
 	u64 data = msr->data;
 	switch (ecx) {
 	case MSR_IA32_CR_PAT:
 		if (!kvm_mtrr_valid(vcpu, MSR_IA32_CR_PAT, data))
 			return 1;
 		vcpu->arch.pat = data;
 		svm->vmcb->save.g_pat = data;
 		mark_dirty(svm->vmcb, VMCB_NPT);
 		break;
 	case MSR_IA32_TSC:
 		kvm_write_tsc(vcpu, msr);
 		break;
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@ -70,6 +70,9 @@ MODULE_DEVICE_TABLE(x86cpu, vmx_cpu_id);
 static bool __read_mostly enable_vpid = 1;
 module_param_named(vpid, enable_vpid, bool, 0444);
 static bool __read_mostly enable_vnmi = 1;
 module_param_named(vnmi, enable_vnmi, bool, S_IRUGO);
 static bool __read_mostly flexpriority_enabled = 1;
 module_param_named(flexpriority, flexpriority_enabled, bool, S_IRUGO);
@ -202,6 +205,10 @@ struct loaded_vmcs {
 	bool nmi_known_unmasked;
 	unsigned long vmcs_host_cr3;	/* May not match real cr3 */
 	unsigned long vmcs_host_cr4;	/* May not match real cr4 */
 	/* Support for vnmi-less CPUs */
 	int soft_vnmi_blocked;
 	ktime_t entry_time;
 	s64 vnmi_blocked_time;
 	struct list_head loaded_vmcss_on_cpu_link;
 };
@ -1291,6 +1298,11 @@ static inline bool cpu_has_vmx_invpcid(void)
 		SECONDARY_EXEC_ENABLE_INVPCID;
 }
 static inline bool cpu_has_virtual_nmis(void)
 {
 	return vmcs_config.pin_based_exec_ctrl & PIN_BASED_VIRTUAL_NMIS;
 }
 static inline bool cpu_has_vmx_wbinvd_exit(void)
 {
 	return vmcs_config.cpu_based_2nd_exec_ctrl &
@ -1348,11 +1360,6 @@ static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
 		(vmcs12->secondary_vm_exec_control & bit);
 }
 static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12)
 {
 	return vmcs12->pin_based_vm_exec_control & PIN_BASED_VIRTUAL_NMIS;
 }
 static inline bool nested_cpu_has_preemption_timer(struct vmcs12 *vmcs12)
 {
 	return vmcs12->pin_based_vm_exec_control &
@ -3712,9 +3719,9 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
 				&_vmexit_control) < 0)
 		return -EIO;
-	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING |
+	min = PIN_BASED_EXT_INTR_MASK | PIN_BASED_NMI_EXITING;
-		PIN_BASED_VIRTUAL_NMIS;
+	opt = PIN_BASED_VIRTUAL_NMIS | PIN_BASED_POSTED_INTR |
-	opt = PIN_BASED_POSTED_INTR | PIN_BASED_VMX_PREEMPTION_TIMER;
+		 PIN_BASED_VMX_PREEMPTION_TIMER;
 	if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PINBASED_CTLS,
 				&_pin_based_exec_control) < 0)
 		return -EIO;
@ -5232,6 +5239,10 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
 	if (!kvm_vcpu_apicv_active(&vmx->vcpu))
 		pin_based_exec_ctrl &= ~PIN_BASED_POSTED_INTR;
 	if (!enable_vnmi)
 		pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
 	/* Enable the preemption timer dynamically */
 	pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
 	return pin_based_exec_ctrl;
@ -5666,7 +5677,8 @@ static void enable_irq_window(struct kvm_vcpu *vcpu)
 static void enable_nmi_window(struct kvm_vcpu *vcpu)
 {
-	if (vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
+	if (!enable_vnmi ||
 	    vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) {
 		enable_irq_window(vcpu);
 		return;
 	}
@ -5706,6 +5718,19 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	if (!enable_vnmi) {
 		/*
 		 * Tracking the NMI-blocked state in software is built upon
 		 * finding the next open IRQ window. This, in turn, depends on
 		 * well-behaving guests: They have to keep IRQs disabled at
 		 * least as long as the NMI handler runs. Otherwise we may
 		 * cause NMI nesting, maybe breaking the guest. But as this is
 		 * highly unlikely, we can live with the residual risk.
 		 */
 		vmx->loaded_vmcs->soft_vnmi_blocked = 1;
 		vmx->loaded_vmcs->vnmi_blocked_time = 0;
 	}
 	++vcpu->stat.nmi_injections;
 	vmx->loaded_vmcs->nmi_known_unmasked = false;
@ -5724,6 +5749,8 @@ static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	bool masked;
 	if (!enable_vnmi)
 		return vmx->loaded_vmcs->soft_vnmi_blocked;
 	if (vmx->loaded_vmcs->nmi_known_unmasked)
 		return false;
 	masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI;
@ -5735,13 +5762,20 @@ static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked)
 {
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
-	vmx->loaded_vmcs->nmi_known_unmasked = !masked;
+	if (!enable_vnmi) {
-	if (masked)
+		if (vmx->loaded_vmcs->soft_vnmi_blocked != masked) {
-		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+			vmx->loaded_vmcs->soft_vnmi_blocked = masked;
-			      GUEST_INTR_STATE_NMI);
+			vmx->loaded_vmcs->vnmi_blocked_time = 0;
-	else
+		}
-		vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
+	} else {
-				GUEST_INTR_STATE_NMI);
+		vmx->loaded_vmcs->nmi_known_unmasked = !masked;
 		if (masked)
 			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				      GUEST_INTR_STATE_NMI);
 		else
 			vmcs_clear_bits(GUEST_INTERRUPTIBILITY_INFO,
 					GUEST_INTR_STATE_NMI);
 	}
 }
 static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
@ -5749,6 +5783,10 @@ static int vmx_nmi_allowed(struct kvm_vcpu *vcpu)
 	if (to_vmx(vcpu)->nested.nested_run_pending)
 		return 0;
 	if (!enable_vnmi &&
 	    to_vmx(vcpu)->loaded_vmcs->soft_vnmi_blocked)
 		return 0;
 	return	!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) &
 		  (GUEST_INTR_STATE_MOV_SS | GUEST_INTR_STATE_STI
 		   | GUEST_INTR_STATE_NMI));
@ -6476,6 +6514,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu)
 	 * AAK134, BY25.
 	 */
 	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			enable_vnmi &&
 			(exit_qualification & INTR_INFO_UNBLOCK_NMI))
 		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI);
@ -6535,6 +6574,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
 static int handle_nmi_window(struct kvm_vcpu *vcpu)
 {
 	WARN_ON_ONCE(!enable_vnmi);
 	vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
 			CPU_BASED_VIRTUAL_NMI_PENDING);
 	++vcpu->stat.nmi_window_exits;
@ -6758,6 +6798,9 @@ static __init int hardware_setup(void)
 	if (!cpu_has_vmx_flexpriority())
 		flexpriority_enabled = 0;
 	if (!cpu_has_virtual_nmis())
 		enable_vnmi = 0;
 	/*
 	 * set_apic_access_page_addr() is used to reload apic access
 	 * page upon invalidation.  No need to do anything if not
@ -6962,7 +7005,7 @@ static struct loaded_vmcs *nested_get_current_vmcs02(struct vcpu_vmx *vmx)
 	}
 	/* Create a new VMCS */
-	item = kmalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
+	item = kzalloc(sizeof(struct vmcs02_list), GFP_KERNEL);
 	if (!item)
 		return NULL;
 	item->vmcs02.vmcs = alloc_vmcs();
@ -7979,6 +8022,7 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
 	 * "blocked by NMI" bit has to be set before next VM entry.
 	 */
 	if (!(to_vmx(vcpu)->idt_vectoring_info & VECTORING_INFO_VALID_MASK) &&
 			enable_vnmi &&
 			(exit_qualification & INTR_INFO_UNBLOCK_NMI))
 		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
 				GUEST_INTR_STATE_NMI);
@ -8823,6 +8867,25 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
 		return 0;
 	}
 	if (unlikely(!enable_vnmi &&
 		     vmx->loaded_vmcs->soft_vnmi_blocked)) {
 		if (vmx_interrupt_allowed(vcpu)) {
 			vmx->loaded_vmcs->soft_vnmi_blocked = 0;
 		} else if (vmx->loaded_vmcs->vnmi_blocked_time > 1000000000LL &&
 			   vcpu->arch.nmi_pending) {
 			/*
 			 * This CPU don't support us in finding the end of an
 			 * NMI-blocked window if the guest runs with IRQs
 			 * disabled. So we pull the trigger after 1 s of
 			 * futile waiting, but inform the user about this.
 			 */
 			printk(KERN_WARNING "%s: Breaking out of NMI-blocked "
 			       "state on VCPU %d after 1 s timeout\n",
 			       __func__, vcpu->vcpu_id);
 			vmx->loaded_vmcs->soft_vnmi_blocked = 0;
 		}
 	}
 	if (exit_reason < kvm_vmx_max_exit_handlers
 	    && kvm_vmx_exit_handlers[exit_reason])
 		return kvm_vmx_exit_handlers[exit_reason](vcpu);
@ -9105,33 +9168,38 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx)
 	idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK;
-	if (vmx->loaded_vmcs->nmi_known_unmasked)
+	if (enable_vnmi) {
-		return;
+		if (vmx->loaded_vmcs->nmi_known_unmasked)
-	/*
+			return;
-	 * Can't use vmx->exit_intr_info since we're not sure what
+		/*
-	 * the exit reason is.
+		 * Can't use vmx->exit_intr_info since we're not sure what
-	 */
+		 * the exit reason is.
-	exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
+		 */
-	unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
+		exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
-	vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
+		unblock_nmi = (exit_intr_info & INTR_INFO_UNBLOCK_NMI) != 0;
-	/*
+		vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
-	 * SDM 3: 27.7.1.2 (September 2008)
+		/*
-	 * Re-set bit "block by NMI" before VM entry if vmexit caused by
+		 * SDM 3: 27.7.1.2 (September 2008)
-	 * a guest IRET fault.
+		 * Re-set bit "block by NMI" before VM entry if vmexit caused by
-	 * SDM 3: 23.2.2 (September 2008)
+		 * a guest IRET fault.
-	 * Bit 12 is undefined in any of the following cases:
+		 * SDM 3: 23.2.2 (September 2008)
-	 *  If the VM exit sets the valid bit in the IDT-vectoring
+		 * Bit 12 is undefined in any of the following cases:
-	 *   information field.
+		 *  If the VM exit sets the valid bit in the IDT-vectoring
-	 *  If the VM exit is due to a double fault.
+		 *   information field.
-	 */
+		 *  If the VM exit is due to a double fault.
-	if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
+		 */
-	    vector != DF_VECTOR && !idtv_info_valid)
+		if ((exit_intr_info & INTR_INFO_VALID_MASK) && unblock_nmi &&
-		vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
+		    vector != DF_VECTOR && !idtv_info_valid)
-			      GUEST_INTR_STATE_NMI);
+			vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO,
-	else
+				      GUEST_INTR_STATE_NMI);
-		vmx->loaded_vmcs->nmi_known_unmasked =
+		else
-			!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
+			vmx->loaded_vmcs->nmi_known_unmasked =
-			  & GUEST_INTR_STATE_NMI);
+				!(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO)
 				  & GUEST_INTR_STATE_NMI);
 	} else if (unlikely(vmx->loaded_vmcs->soft_vnmi_blocked))
 		vmx->loaded_vmcs->vnmi_blocked_time +=
 			ktime_to_ns(ktime_sub(ktime_get(),
 					      vmx->loaded_vmcs->entry_time));
 }
 static void __vmx_complete_interrupts(struct kvm_vcpu *vcpu,
@ -9248,6 +9316,11 @@ static void __noclone vmx_vcpu_run(struct kvm_vcpu *vcpu)
 	struct vcpu_vmx *vmx = to_vmx(vcpu);
 	unsigned long debugctlmsr, cr3, cr4;
 	/* Record the guest's net vcpu time for enforced NMI injections. */
 	if (unlikely(!enable_vnmi &&
 		     vmx->loaded_vmcs->soft_vnmi_blocked))
 		vmx->loaded_vmcs->entry_time = ktime_get();
 	/* Don't enter VMX if guest state is invalid, let the exit handler
 	   start emulation until we arrive back to a valid state */
 	if (vmx->emulation_required)
--- a/include/kvm/arm_vgic.h
+++ b/include/kvm/arm_vgic.h
@ -26,6 +26,8 @@
 #include <linux/list.h>
 #include <linux/jump_label.h>
 #include <linux/irqchip/arm-gic-v4.h>
 #define VGIC_V3_MAX_CPUS	255
 #define VGIC_V2_MAX_CPUS	8
 #define VGIC_NR_IRQS_LEGACY     256
@ -73,6 +75,9 @@ struct vgic_global {
 	/* Only needed for the legacy KVM_CREATE_IRQCHIP */
 	bool			can_emulate_gicv2;
 	/* Hardware has GICv4? */
 	bool			has_gicv4;
 	/* GIC system register CPU interface */
 	struct static_key_false gicv3_cpuif;
@ -116,6 +121,7 @@ struct vgic_irq {
 	bool hw;			/* Tied to HW IRQ */
 	struct kref refcount;		/* Used for LPIs */
 	u32 hwintid;			/* HW INTID number */
 	unsigned int host_irq;		/* linux irq corresponding to hwintid */
 	union {
 		u8 targets;			/* GICv2 target VCPUs mask */
 		u32 mpidr;			/* GICv3 target VCPU */
@ -232,6 +238,15 @@ struct vgic_dist {
 	/* used by vgic-debug */
 	struct vgic_state_iter *iter;
 	/*
 	 * GICv4 ITS per-VM data, containing the IRQ domain, the VPE
 	 * array, the property table pointer as well as allocation
 	 * data. This essentially ties the Linux IRQ core and ITS
 	 * together, and avoids leaking KVM's data structures anywhere
 	 * else.
 	 */
 	struct its_vm		its_vm;
 };
 struct vgic_v2_cpu_if {
@ -250,6 +265,14 @@ struct vgic_v3_cpu_if {
 	u32		vgic_ap0r[4];
 	u32		vgic_ap1r[4];
 	u64		vgic_lr[VGIC_V3_MAX_LRS];
 	/*
 	 * GICv4 ITS per-VPE data, containing the doorbell IRQ, the
 	 * pending table pointer, the its_vm pointer and a few other
 	 * HW specific things. As for the its_vm structure, this is
 	 * linking the Linux IRQ subsystem and the ITS together.
 	 */
 	struct its_vpe	its_vpe;
 };
 struct vgic_cpu {
@ -307,9 +330,10 @@ void kvm_vgic_init_cpu_hardware(void);
 int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 			bool level, void *owner);
-int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq);
+int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
-int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq);
+			  u32 vintid);
-bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq);
+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid);
 bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid);
 int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu);
@ -349,4 +373,15 @@ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm);
 int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner);
 struct kvm_kernel_irq_routing_entry;
 int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq,
 			       struct kvm_kernel_irq_routing_entry *irq_entry);
 int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq,
 				 struct kvm_kernel_irq_routing_entry *irq_entry);
 void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu);
 void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu);
 #endif /* __KVM_ARM_VGIC_H */
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@ -817,9 +817,6 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
 	struct arch_timer_context *vtimer = vcpu_vtimer(vcpu);
 	struct irq_desc *desc;
 	struct irq_data *data;
 	int phys_irq;
 	int ret;
 	if (timer->enabled)
@ -837,26 +834,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu)
 		return -EINVAL;
 	}
-	/*
+	ret = kvm_vgic_map_phys_irq(vcpu, host_vtimer_irq, vtimer->irq.irq);
 	 * Find the physical IRQ number corresponding to the host_vtimer_irq
 	 */
 	desc = irq_to_desc(host_vtimer_irq);
 	if (!desc) {
 		kvm_err("%s: no interrupt descriptor\n", __func__);
 		return -EINVAL;
 	}
 	data = irq_desc_get_irq_data(desc);
 	while (data->parent_data)
 		data = data->parent_data;
 	phys_irq = data->hwirq;
 	/*
 	 * Tell the VGIC that the virtual interrupt is tied to a
 	 * physical interrupt. We do that once per VCPU.
 	 */
 	ret = kvm_vgic_map_phys_irq(vcpu, vtimer->irq.irq, phys_irq);
 	if (ret)
 		return ret;
--- a/virt/kvm/arm/arm.c
+++ b/virt/kvm/arm/arm.c
@ -27,6 +27,8 @@
 #include <linux/mman.h>
 #include <linux/sched.h>
 #include <linux/kvm.h>
 #include <linux/kvm_irqfd.h>
 #include <linux/irqbypass.h>
 #include <trace/events/kvm.h>
 #include <kvm/arm_pmu.h>
@ -175,6 +177,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 {
 	int i;
 	kvm_vgic_destroy(kvm);
 	free_percpu(kvm->arch.last_vcpu_ran);
 	kvm->arch.last_vcpu_ran = NULL;
@ -184,8 +188,6 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
 			kvm->vcpus[i] = NULL;
 		}
 	}
 	kvm_vgic_destroy(kvm);
 }
 int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
@ -313,11 +315,13 @@ int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu)
 void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu)
 {
 	kvm_timer_schedule(vcpu);
 	kvm_vgic_v4_enable_doorbell(vcpu);
 }
 void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu)
 {
 	kvm_timer_unschedule(vcpu);
 	kvm_vgic_v4_disable_doorbell(vcpu);
 }
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
@ -1450,6 +1454,46 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr)
 	return NULL;
 }
 bool kvm_arch_has_irq_bypass(void)
 {
 	return true;
 }
 int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *cons,
 				      struct irq_bypass_producer *prod)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(cons, struct kvm_kernel_irqfd, consumer);
 	return kvm_vgic_v4_set_forwarding(irqfd->kvm, prod->irq,
 					  &irqfd->irq_entry);
 }
 void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *cons,
 				      struct irq_bypass_producer *prod)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(cons, struct kvm_kernel_irqfd, consumer);
 	kvm_vgic_v4_unset_forwarding(irqfd->kvm, prod->irq,
 				     &irqfd->irq_entry);
 }
 void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *cons)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(cons, struct kvm_kernel_irqfd, consumer);
 	kvm_arm_halt_guest(irqfd->kvm);
 }
 void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *cons)
 {
 	struct kvm_kernel_irqfd *irqfd =
 		container_of(cons, struct kvm_kernel_irqfd, consumer);
 	kvm_arm_resume_guest(irqfd->kvm);
 }
 /**
 * Initialize Hyp-mode and memory mappings on all CPUs.
 */
--- a/virt/kvm/arm/hyp/vgic-v3-sr.c
+++ b/virt/kvm/arm/hyp/vgic-v3-sr.c
@ -258,7 +258,8 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu)
 			cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0);
 		}
 	} else {
-		if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+		if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
 		    cpu_if->its_vpe.its_vm)
 			write_gicreg(0, ICH_HCR_EL2);
 		cpu_if->vgic_elrsr = 0xffff;
@ -337,9 +338,11 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu)
 		/*
 		 * If we need to trap system registers, we must write
 		 * ICH_HCR_EL2 anyway, even if no interrupts are being
-		 * injected,
+		 * injected. Same thing if GICv4 is used, as VLPI
 		 * delivery is gated by ICH_HCR_EL2.En.
 		 */
-		if (static_branch_unlikely(&vgic_v3_cpuif_trap))
+		if (static_branch_unlikely(&vgic_v3_cpuif_trap) ||
 		    cpu_if->its_vpe.its_vm)
 			write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2);
 	}
--- a/virt/kvm/arm/vgic/vgic-init.c
+++ b/virt/kvm/arm/vgic/vgic-init.c
@ -285,6 +285,10 @@ int vgic_init(struct kvm *kvm)
 	if (ret)
 		goto out;
 	ret = vgic_v4_init(kvm);
 	if (ret)
 		goto out;
 	kvm_for_each_vcpu(i, vcpu, kvm)
 		kvm_vgic_vcpu_enable(vcpu);
@ -320,6 +324,9 @@ static void kvm_vgic_dist_destroy(struct kvm *kvm)
 	kfree(dist->spis);
 	dist->nr_spis = 0;
 	if (vgic_supports_direct_msis(kvm))
 		vgic_v4_teardown(kvm);
 }
 void kvm_vgic_vcpu_destroy(struct kvm_vcpu *vcpu)
--- a/virt/kvm/arm/vgic/vgic-its.c
+++ b/virt/kvm/arm/vgic/vgic-its.c
@ -38,7 +38,7 @@ static int vgic_its_save_tables_v0(struct vgic_its *its);
 static int vgic_its_restore_tables_v0(struct vgic_its *its);
 static int vgic_its_commit_v0(struct vgic_its *its);
 static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
-			     struct kvm_vcpu *filter_vcpu);
+			     struct kvm_vcpu *filter_vcpu, bool needs_inv);
 /*
 * Creates a new (reference to a) struct vgic_irq for a given LPI.
@ -106,7 +106,7 @@ out_unlock:
 	 * However we only have those structs for mapped IRQs, so we read in
 	 * the respective config data from memory here upon mapping the LPI.
 	 */
-	ret = update_lpi_config(kvm, irq, NULL);
+	ret = update_lpi_config(kvm, irq, NULL, false);
 	if (ret)
 		return ERR_PTR(ret);
@ -273,7 +273,7 @@ static struct its_collection *find_collection(struct vgic_its *its, int coll_id)
 * VCPU. Unconditionally applies if filter_vcpu is NULL.
 */
 static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
-			     struct kvm_vcpu *filter_vcpu)
+			     struct kvm_vcpu *filter_vcpu, bool needs_inv)
 {
 	u64 propbase = GICR_PROPBASER_ADDRESS(kvm->arch.vgic.propbaser);
 	u8 prop;
@ -292,11 +292,17 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq,
 		irq->priority = LPI_PROP_PRIORITY(prop);
 		irq->enabled = LPI_PROP_ENABLE_BIT(prop);
-		vgic_queue_irq_unlock(kvm, irq, flags);
+		if (!irq->hw) {
-	} else {
+			vgic_queue_irq_unlock(kvm, irq, flags);
-		spin_unlock_irqrestore(&irq->irq_lock, flags);
+			return 0;
 		}
 	}
 	spin_unlock_irqrestore(&irq->irq_lock, flags);
 	if (irq->hw)
 		return its_prop_update_vlpi(irq->host_irq, prop, needs_inv);
 	return 0;
 }
@ -336,6 +342,29 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr)
 	return i;
 }
 static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu)
 {
 	int ret = 0;
 	spin_lock(&irq->irq_lock);
 	irq->target_vcpu = vcpu;
 	spin_unlock(&irq->irq_lock);
 	if (irq->hw) {
 		struct its_vlpi_map map;
 		ret = its_get_vlpi(irq->host_irq, &map);
 		if (ret)
 			return ret;
 		map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
 		ret = its_map_vlpi(irq->host_irq, &map);
 	}
 	return ret;
 }
 /*
 * Promotes the ITS view of affinity of an ITTE (which redistributor this LPI
 * is targeting) to the VGIC's view, which deals with target VCPUs.
@ -350,10 +379,7 @@ static void update_affinity_ite(struct kvm *kvm, struct its_ite *ite)
 		return;
 	vcpu = kvm_get_vcpu(kvm, ite->collection->target_addr);
-
+	update_affinity(ite->irq, vcpu);
 	spin_lock(&ite->irq->irq_lock);
 	ite->irq->target_vcpu = vcpu;
 	spin_unlock(&ite->irq->irq_lock);
 }
 /*
@ -505,19 +531,11 @@ static unsigned long vgic_mmio_read_its_idregs(struct kvm *kvm,
 	return 0;
 }
-/*
+int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
- * Find the target VCPU and the LPI number for a given devid/eventid pair
+			 u32 devid, u32 eventid, struct vgic_irq **irq)
 * and make this IRQ pending, possibly injecting it.
 * Must be called with the its_lock mutex held.
 * Returns 0 on success, a positive error value for any ITS mapping
 * related errors and negative error values for generic errors.
 */
 static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
 				u32 devid, u32 eventid)
 {
 	struct kvm_vcpu *vcpu;
 	struct its_ite *ite;
 	unsigned long flags;
 	if (!its->enabled)
 		return -EBUSY;
@ -533,26 +551,65 @@ static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
 	if (!vcpu->arch.vgic_cpu.lpis_enabled)
 		return -EBUSY;
-	spin_lock_irqsave(&ite->irq->irq_lock, flags);
+	*irq = ite->irq;
 	ite->irq->pending_latch = true;
 	vgic_queue_irq_unlock(kvm, ite->irq, flags);
 	return 0;
 }
-static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
+struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi)
 {
 	u64 address;
 	struct kvm_io_device *kvm_io_dev;
 	struct vgic_io_device *iodev;
-	if (dev->ops != &kvm_io_gic_ops)
+	if (!vgic_has_its(kvm))
-		return NULL;
+		return ERR_PTR(-ENODEV);
-	iodev = container_of(dev, struct vgic_io_device, dev);
+	if (!(msi->flags & KVM_MSI_VALID_DEVID))
 		return ERR_PTR(-EINVAL);
 	address = (u64)msi->address_hi << 32 | msi->address_lo;
 	kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
 	if (!kvm_io_dev)
 		return ERR_PTR(-EINVAL);
 	if (kvm_io_dev->ops != &kvm_io_gic_ops)
 		return ERR_PTR(-EINVAL);
 	iodev = container_of(kvm_io_dev, struct vgic_io_device, dev);
 	if (iodev->iodev_type != IODEV_ITS)
-		return NULL;
+		return ERR_PTR(-EINVAL);
-	return iodev;
+	return iodev->its;
 }
 /*
 * Find the target VCPU and the LPI number for a given devid/eventid pair
 * and make this IRQ pending, possibly injecting it.
 * Must be called with the its_lock mutex held.
 * Returns 0 on success, a positive error value for any ITS mapping
 * related errors and negative error values for generic errors.
 */
 static int vgic_its_trigger_msi(struct kvm *kvm, struct vgic_its *its,
 				u32 devid, u32 eventid)
 {
 	struct vgic_irq *irq = NULL;
 	unsigned long flags;
 	int err;
 	err = vgic_its_resolve_lpi(kvm, its, devid, eventid, &irq);
 	if (err)
 		return err;
 	if (irq->hw)
 		return irq_set_irqchip_state(irq->host_irq,
 					     IRQCHIP_STATE_PENDING, true);
 	spin_lock_irqsave(&irq->irq_lock, flags);
 	irq->pending_latch = true;
 	vgic_queue_irq_unlock(kvm, irq, flags);
 	return 0;
 }
 /*
@ -563,30 +620,16 @@ static struct vgic_io_device *vgic_get_its_iodev(struct kvm_io_device *dev)
 */
 int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi)
 {
-	u64 address;
+	struct vgic_its *its;
 	struct kvm_io_device *kvm_io_dev;
 	struct vgic_io_device *iodev;
 	int ret;
-	if (!vgic_has_its(kvm))
+	its = vgic_msi_to_its(kvm, msi);
-		return -ENODEV;
+	if (IS_ERR(its))
 		return PTR_ERR(its);
-	if (!(msi->flags & KVM_MSI_VALID_DEVID))
+	mutex_lock(&its->its_lock);
-		return -EINVAL;
+	ret = vgic_its_trigger_msi(kvm, its, msi->devid, msi->data);
-
+	mutex_unlock(&its->its_lock);
 	address = (u64)msi->address_hi << 32 | msi->address_lo;
 	kvm_io_dev = kvm_io_bus_get_dev(kvm, KVM_MMIO_BUS, address);
 	if (!kvm_io_dev)
 		return -EINVAL;
 	iodev = vgic_get_its_iodev(kvm_io_dev);
 	if (!iodev)
 		return -EINVAL;
 	mutex_lock(&iodev->its->its_lock);
 	ret = vgic_its_trigger_msi(kvm, iodev->its, msi->devid, msi->data);
 	mutex_unlock(&iodev->its->its_lock);
 	if (ret < 0)
 		return ret;
@ -608,8 +651,12 @@ static void its_free_ite(struct kvm *kvm, struct its_ite *ite)
 	list_del(&ite->ite_list);
 	/* This put matches the get in vgic_add_lpi. */
-	if (ite->irq)
+	if (ite->irq) {
 		if (ite->irq->hw)
 			WARN_ON(its_unmap_vlpi(ite->irq->host_irq));
 		vgic_put_irq(kvm, ite->irq);
 	}
 	kfree(ite);
 }
@ -683,11 +730,7 @@ static int vgic_its_cmd_handle_movi(struct kvm *kvm, struct vgic_its *its,
 	ite->collection = collection;
 	vcpu = kvm_get_vcpu(kvm, collection->target_addr);
-	spin_lock(&ite->irq->irq_lock);
+	return update_affinity(ite->irq, vcpu);
 	ite->irq->target_vcpu = vcpu;
 	spin_unlock(&ite->irq->irq_lock);
 	return 0;
 }
 /*
@ -1054,6 +1097,10 @@ static int vgic_its_cmd_handle_clear(struct kvm *kvm, struct vgic_its *its,
 	ite->irq->pending_latch = false;
 	if (ite->irq->hw)
 		return irq_set_irqchip_state(ite->irq->host_irq,
 					     IRQCHIP_STATE_PENDING, false);
 	return 0;
 }
@ -1073,7 +1120,7 @@ static int vgic_its_cmd_handle_inv(struct kvm *kvm, struct vgic_its *its,
 	if (!ite)
 		return E_ITS_INV_UNMAPPED_INTERRUPT;
-	return update_lpi_config(kvm, ite->irq, NULL);
+	return update_lpi_config(kvm, ite->irq, NULL, true);
 }
 /*
@ -1108,12 +1155,15 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
 		irq = vgic_get_irq(kvm, NULL, intids[i]);
 		if (!irq)
 			continue;
-		update_lpi_config(kvm, irq, vcpu);
+		update_lpi_config(kvm, irq, vcpu, false);
 		vgic_put_irq(kvm, irq);
 	}
 	kfree(intids);
 	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.its_vm)
 		its_invall_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe);
 	return 0;
 }
@ -1128,11 +1178,12 @@ static int vgic_its_cmd_handle_invall(struct kvm *kvm, struct vgic_its *its,
 static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
 				      u64 *its_cmd)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	u32 target1_addr = its_cmd_get_target_addr(its_cmd);
 	u32 target2_addr = its_cmd_mask_field(its_cmd, 3, 16, 32);
 	struct kvm_vcpu *vcpu1, *vcpu2;
 	struct vgic_irq *irq;
 	u32 *intids;
 	int irq_count, i;
 	if (target1_addr >= atomic_read(&kvm->online_vcpus) ||
 	    target2_addr >= atomic_read(&kvm->online_vcpus))
@ -1144,19 +1195,19 @@ static int vgic_its_cmd_handle_movall(struct kvm *kvm, struct vgic_its *its,
 	vcpu1 = kvm_get_vcpu(kvm, target1_addr);
 	vcpu2 = kvm_get_vcpu(kvm, target2_addr);
-	spin_lock(&dist->lpi_list_lock);
+	irq_count = vgic_copy_lpi_list(vcpu1, &intids);
 	if (irq_count < 0)
 		return irq_count;
-	list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) {
+	for (i = 0; i < irq_count; i++) {
-		spin_lock(&irq->irq_lock);
+		irq = vgic_get_irq(kvm, NULL, intids[i]);
-		if (irq->target_vcpu == vcpu1)
+		update_affinity(irq, vcpu2);
 			irq->target_vcpu = vcpu2;
-		spin_unlock(&irq->irq_lock);
+		vgic_put_irq(kvm, irq);
 	}
-	spin_unlock(&dist->lpi_list_lock);
+	kfree(intids);
 	return 0;
 }
@ -1634,6 +1685,14 @@ static int vgic_its_create(struct kvm_device *dev, u32 type)
 	if (!its)
 		return -ENOMEM;
 	if (vgic_initialized(dev->kvm)) {
 		int ret = vgic_v4_init(dev->kvm);
 		if (ret < 0) {
 			kfree(its);
 			return ret;
 		}
 	}
 	mutex_init(&its->its_lock);
 	mutex_init(&its->cmd_lock);
@ -1946,6 +2005,15 @@ static int vgic_its_save_itt(struct vgic_its *its, struct its_device *device)
 	list_for_each_entry(ite, &device->itt_head, ite_list) {
 		gpa_t gpa = base + ite->event_id * ite_esz;
 		/*
 		 * If an LPI carries the HW bit, this means that this
 		 * interrupt is controlled by GICv4, and we do not
 		 * have direct access to that state. Let's simply fail
 		 * the save operation...
 		 */
 		if (ite->irq->hw)
 			return -EACCES;
 		ret = vgic_its_save_ite(its, device, ite, gpa, ite_esz);
 		if (ret)
 			return ret;
--- a/virt/kvm/arm/vgic/vgic-mmio-v3.c
+++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c
@ -54,6 +54,11 @@ bool vgic_has_its(struct kvm *kvm)
 	return dist->has_its;
 }
 bool vgic_supports_direct_msis(struct kvm *kvm)
 {
 	return kvm_vgic_global_state.has_gicv4 && vgic_has_its(kvm);
 }
 static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu,
 					    gpa_t addr, unsigned int len)
 {
--- a/virt/kvm/arm/vgic/vgic-v3.c
+++ b/virt/kvm/arm/vgic/vgic-v3.c
@ -24,6 +24,7 @@
 static bool group0_trap;
 static bool group1_trap;
 static bool common_trap;
 static bool gicv4_enable;
 void vgic_v3_set_underflow(struct kvm_vcpu *vcpu)
 {
@ -461,6 +462,12 @@ static int __init early_common_trap_cfg(char *buf)
 }
 early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg);
 static int __init early_gicv4_enable(char *buf)
 {
 	return strtobool(buf, &gicv4_enable);
 }
 early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
 /**
 * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT
 * @node:	pointer to the DT node
@ -480,6 +487,13 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
 	kvm_vgic_global_state.can_emulate_gicv2 = false;
 	kvm_vgic_global_state.ich_vtr_el2 = ich_vtr_el2;
 	/* GICv4 support? */
 	if (info->has_v4) {
 		kvm_vgic_global_state.has_gicv4 = gicv4_enable;
 		kvm_info("GICv4 support %sabled\n",
 			 gicv4_enable ? "en" : "dis");
 	}
 	if (!info->vcpu.start) {
 		kvm_info("GICv3: no GICV resource entry\n");
 		kvm_vgic_global_state.vcpu_base = 0;
--- a/virt/kvm/arm/vgic/vgic-v4.c
+++ b/virt/kvm/arm/vgic/vgic-v4.c
@ -0,0 +1,364 @@
 /*
 * Copyright (C) 2017 ARM Ltd.
 * Author: Marc Zyngier <marc.zyngier@arm.com>
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/kvm_host.h>
 #include <linux/irqchip/arm-gic-v3.h>
 #include "vgic.h"
 /*
 * How KVM uses GICv4 (insert rude comments here):
 *
 * The vgic-v4 layer acts as a bridge between several entities:
 * - The GICv4 ITS representation offered by the ITS driver
 * - VFIO, which is in charge of the PCI endpoint
 * - The virtual ITS, which is the only thing the guest sees
 *
 * The configuration of VLPIs is triggered by a callback from VFIO,
 * instructing KVM that a PCI device has been configured to deliver
 * MSIs to a vITS.
 *
 * kvm_vgic_v4_set_forwarding() is thus called with the routing entry,
 * and this is used to find the corresponding vITS data structures
 * (ITS instance, device, event and irq) using a process that is
 * extremely similar to the injection of an MSI.
 *
 * At this stage, we can link the guest's view of an LPI (uniquely
 * identified by the routing entry) and the host irq, using the GICv4
 * driver mapping operation. Should the mapping succeed, we've then
 * successfully upgraded the guest's LPI to a VLPI. We can then start
 * with updating GICv4's view of the property table and generating an
 * INValidation in order to kickstart the delivery of this VLPI to the
 * guest directly, without software intervention. Well, almost.
 *
 * When the PCI endpoint is deconfigured, this operation is reversed
 * with VFIO calling kvm_vgic_v4_unset_forwarding().
 *
 * Once the VLPI has been mapped, it needs to follow any change the
 * guest performs on its LPI through the vITS. For that, a number of
 * command handlers have hooks to communicate these changes to the HW:
 * - Any invalidation triggers a call to its_prop_update_vlpi()
 * - The INT command results in a irq_set_irqchip_state(), which
 *   generates an INT on the corresponding VLPI.
 * - The CLEAR command results in a irq_set_irqchip_state(), which
 *   generates an CLEAR on the corresponding VLPI.
 * - DISCARD translates into an unmap, similar to a call to
 *   kvm_vgic_v4_unset_forwarding().
 * - MOVI is translated by an update of the existing mapping, changing
 *   the target vcpu, resulting in a VMOVI being generated.
 * - MOVALL is translated by a string of mapping updates (similar to
 *   the handling of MOVI). MOVALL is horrible.
 *
 * Note that a DISCARD/MAPTI sequence emitted from the guest without
 * reprogramming the PCI endpoint after MAPTI does not result in a
 * VLPI being mapped, as there is no callback from VFIO (the guest
 * will get the interrupt via the normal SW injection). Fixing this is
 * not trivial, and requires some horrible messing with the VFIO
 * internals. Not fun. Don't do that.
 *
 * Then there is the scheduling. Each time a vcpu is about to run on a
 * physical CPU, KVM must tell the corresponding redistributor about
 * it. And if we've migrated our vcpu from one CPU to another, we must
 * tell the ITS (so that the messages reach the right redistributor).
 * This is done in two steps: first issue a irq_set_affinity() on the
 * irq corresponding to the vcpu, then call its_schedule_vpe(). You
 * must be in a non-preemptible context. On exit, another call to
 * its_schedule_vpe() tells the redistributor that we're done with the
 * vcpu.
 *
 * Finally, the doorbell handling: Each vcpu is allocated an interrupt
 * which will fire each time a VLPI is made pending whilst the vcpu is
 * not running. Each time the vcpu gets blocked, the doorbell
 * interrupt gets enabled. When the vcpu is unblocked (for whatever
 * reason), the doorbell interrupt is disabled.
 */
 #define DB_IRQ_FLAGS	(IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY | IRQ_NO_BALANCING)
 static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info)
 {
 	struct kvm_vcpu *vcpu = info;
 	vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true;
 	kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu);
 	kvm_vcpu_kick(vcpu);
 	return IRQ_HANDLED;
 }
 /**
 * vgic_v4_init - Initialize the GICv4 data structures
 * @kvm:	Pointer to the VM being initialized
 *
 * We may be called each time a vITS is created, or when the
 * vgic is initialized. This relies on kvm->lock to be
 * held. In both cases, the number of vcpus should now be
 * fixed.
 */
 int vgic_v4_init(struct kvm *kvm)
 {
 	struct vgic_dist *dist = &kvm->arch.vgic;
 	struct kvm_vcpu *vcpu;
 	int i, nr_vcpus, ret;
 	if (!vgic_supports_direct_msis(kvm))
 		return 0; /* Nothing to see here... move along. */
 	if (dist->its_vm.vpes)
 		return 0;
 	nr_vcpus = atomic_read(&kvm->online_vcpus);
 	dist->its_vm.vpes = kzalloc(sizeof(*dist->its_vm.vpes) * nr_vcpus,
 				    GFP_KERNEL);
 	if (!dist->its_vm.vpes)
 		return -ENOMEM;
 	dist->its_vm.nr_vpes = nr_vcpus;
 	kvm_for_each_vcpu(i, vcpu, kvm)
 		dist->its_vm.vpes[i] = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe;
 	ret = its_alloc_vcpu_irqs(&dist->its_vm);
 	if (ret < 0) {
 		kvm_err("VPE IRQ allocation failure\n");
 		kfree(dist->its_vm.vpes);
 		dist->its_vm.nr_vpes = 0;
 		dist->its_vm.vpes = NULL;
 		return ret;
 	}
 	kvm_for_each_vcpu(i, vcpu, kvm) {
 		int irq = dist->its_vm.vpes[i]->irq;
 		/*
 		 * Don't automatically enable the doorbell, as we're
 		 * flipping it back and forth when the vcpu gets
 		 * blocked. Also disable the lazy disabling, as the
 		 * doorbell could kick us out of the guest too
 		 * early...
 		 */
 		irq_set_status_flags(irq, DB_IRQ_FLAGS);
 		ret = request_irq(irq, vgic_v4_doorbell_handler,
 				  0, "vcpu", vcpu);
 		if (ret) {
 			kvm_err("failed to allocate vcpu IRQ%d\n", irq);
 			/*
 			 * Trick: adjust the number of vpes so we know
 			 * how many to nuke on teardown...
 			 */
 			dist->its_vm.nr_vpes = i;
 			break;
 		}
 	}
 	if (ret)
 		vgic_v4_teardown(kvm);
 	return ret;
 }
 /**
 * vgic_v4_teardown - Free the GICv4 data structures
 * @kvm:	Pointer to the VM being destroyed
 *
 * Relies on kvm->lock to be held.
 */
 void vgic_v4_teardown(struct kvm *kvm)
 {
 	struct its_vm *its_vm = &kvm->arch.vgic.its_vm;
 	int i;
 	if (!its_vm->vpes)
 		return;
 	for (i = 0; i < its_vm->nr_vpes; i++) {
 		struct kvm_vcpu *vcpu = kvm_get_vcpu(kvm, i);
 		int irq = its_vm->vpes[i]->irq;
 		irq_clear_status_flags(irq, DB_IRQ_FLAGS);
 		free_irq(irq, vcpu);
 	}
 	its_free_vcpu_irqs(its_vm);
 	kfree(its_vm->vpes);
 	its_vm->nr_vpes = 0;
 	its_vm->vpes = NULL;
 }
 int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu)
 {
 	if (!vgic_supports_direct_msis(vcpu->kvm))
 		return 0;
 	return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false);
 }
 int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu)
 {
 	int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
 	int err;
 	if (!vgic_supports_direct_msis(vcpu->kvm))
 		return 0;
 	/*
 	 * Before making the VPE resident, make sure the redistributor
 	 * corresponding to our current CPU expects us here. See the
 	 * doc in drivers/irqchip/irq-gic-v4.c to understand how this
 	 * turns into a VMOVP command at the ITS level.
 	 */
 	err = irq_set_affinity(irq, cpumask_of(smp_processor_id()));
 	if (err)
 		return err;
 	err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true);
 	if (err)
 		return err;
 	/*
 	 * Now that the VPE is resident, let's get rid of a potential
 	 * doorbell interrupt that would still be pending.
 	 */
 	err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false);
 	return err;
 }
 static struct vgic_its *vgic_get_its(struct kvm *kvm,
 				     struct kvm_kernel_irq_routing_entry *irq_entry)
 {
 	struct kvm_msi msi  = (struct kvm_msi) {
 		.address_lo	= irq_entry->msi.address_lo,
 		.address_hi	= irq_entry->msi.address_hi,
 		.data		= irq_entry->msi.data,
 		.flags		= irq_entry->msi.flags,
 		.devid		= irq_entry->msi.devid,
 	};
 	return vgic_msi_to_its(kvm, &msi);
 }
 int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq,
 			       struct kvm_kernel_irq_routing_entry *irq_entry)
 {
 	struct vgic_its *its;
 	struct vgic_irq *irq;
 	struct its_vlpi_map map;
 	int ret;
 	if (!vgic_supports_direct_msis(kvm))
 		return 0;
 	/*
 	 * Get the ITS, and escape early on error (not a valid
 	 * doorbell for any of our vITSs).
 	 */
 	its = vgic_get_its(kvm, irq_entry);
 	if (IS_ERR(its))
 		return 0;
 	mutex_lock(&its->its_lock);
 	/* Perform then actual DevID/EventID -> LPI translation. */
 	ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
 				   irq_entry->msi.data, &irq);
 	if (ret)
 		goto out;
 	/*
 	 * Emit the mapping request. If it fails, the ITS probably
 	 * isn't v4 compatible, so let's silently bail out. Holding
 	 * the ITS lock should ensure that nothing can modify the
 	 * target vcpu.
 	 */
 	map = (struct its_vlpi_map) {
 		.vm		= &kvm->arch.vgic.its_vm,
 		.vpe		= &irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe,
 		.vintid		= irq->intid,
 		.properties	= ((irq->priority & 0xfc) |
 				   (irq->enabled ? LPI_PROP_ENABLED : 0) |
 				   LPI_PROP_GROUP1),
 		.db_enabled	= true,
 	};
 	ret = its_map_vlpi(virq, &map);
 	if (ret)
 		goto out;
 	irq->hw		= true;
 	irq->host_irq	= virq;
 out:
 	mutex_unlock(&its->its_lock);
 	return ret;
 }
 int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq,
 				 struct kvm_kernel_irq_routing_entry *irq_entry)
 {
 	struct vgic_its *its;
 	struct vgic_irq *irq;
 	int ret;
 	if (!vgic_supports_direct_msis(kvm))
 		return 0;
 	/*
 	 * Get the ITS, and escape early on error (not a valid
 	 * doorbell for any of our vITSs).
 	 */
 	its = vgic_get_its(kvm, irq_entry);
 	if (IS_ERR(its))
 		return 0;
 	mutex_lock(&its->its_lock);
 	ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid,
 				   irq_entry->msi.data, &irq);
 	if (ret)
 		goto out;
 	WARN_ON(!(irq->hw && irq->host_irq == virq));
 	irq->hw = false;
 	ret = its_unmap_vlpi(virq);
 out:
 	mutex_unlock(&its->its_lock);
 	return ret;
 }
 void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu)
 {
 	if (vgic_supports_direct_msis(vcpu->kvm)) {
 		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
 		if (irq)
 			enable_irq(irq);
 	}
 }
 void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu)
 {
 	if (vgic_supports_direct_msis(vcpu->kvm)) {
 		int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq;
 		if (irq)
 			disable_irq(irq);
 	}
 }
--- a/virt/kvm/arm/vgic/vgic.c
+++ b/virt/kvm/arm/vgic/vgic.c
@ -17,6 +17,8 @@
 #include <linux/kvm.h>
 #include <linux/kvm_host.h>
 #include <linux/list_sort.h>
 #include <linux/interrupt.h>
 #include <linux/irq.h>
 #include "vgic.h"
@ -409,25 +411,56 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid,
 	return 0;
 }
-int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq)
+/* @irq->irq_lock must be held */
 static int kvm_vgic_map_irq(struct kvm_vcpu *vcpu, struct vgic_irq *irq,
 			    unsigned int host_irq)
 {
-	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
+	struct irq_desc *desc;
 	struct irq_data *data;
 	/*
 	 * Find the physical IRQ number corresponding to @host_irq
 	 */
 	desc = irq_to_desc(host_irq);
 	if (!desc) {
 		kvm_err("%s: no interrupt descriptor\n", __func__);
 		return -EINVAL;
 	}
 	data = irq_desc_get_irq_data(desc);
 	while (data->parent_data)
 		data = data->parent_data;
 	irq->hw = true;
 	irq->host_irq = host_irq;
 	irq->hwintid = data->hwirq;
 	return 0;
 }
 /* @irq->irq_lock must be held */
 static inline void kvm_vgic_unmap_irq(struct vgic_irq *irq)
 {
 	irq->hw = false;
 	irq->hwintid = 0;
 }
 int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, unsigned int host_irq,
 			  u32 vintid)
 {
 	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 	unsigned long flags;
 	int ret;
 	BUG_ON(!irq);
 	spin_lock_irqsave(&irq->irq_lock, flags);
-
+	ret = kvm_vgic_map_irq(vcpu, irq, host_irq);
 	irq->hw = true;
 	irq->hwintid = phys_irq;
 	spin_unlock_irqrestore(&irq->irq_lock, flags);
 	vgic_put_irq(vcpu->kvm, irq);
-	return 0;
+	return ret;
 }
-int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
+int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int vintid)
 {
 	struct vgic_irq *irq;
 	unsigned long flags;
@ -435,14 +468,11 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq)
 	if (!vgic_initialized(vcpu->kvm))
 		return -EAGAIN;
-	irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
+	irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 	BUG_ON(!irq);
 	spin_lock_irqsave(&irq->irq_lock, flags);
-
+	kvm_vgic_unmap_irq(irq);
 	irq->hw = false;
 	irq->hwintid = 0;
 	spin_unlock_irqrestore(&irq->irq_lock, flags);
 	vgic_put_irq(vcpu->kvm, irq);
@ -688,6 +718,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 {
 	struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu;
 	WARN_ON(vgic_v4_sync_hwstate(vcpu));
 	/* An empty ap_list_head implies used_lrs == 0 */
 	if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head))
 		return;
@ -700,6 +732,8 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu)
 /* Flush our emulation state into the GIC hardware before entering the guest. */
 void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu)
 {
 	WARN_ON(vgic_v4_flush_hwstate(vcpu));
 	/*
 	 * If there are no virtual interrupts active or pending for this
 	 * VCPU, then there is no work to do and we can bail out without
@ -751,6 +785,9 @@ int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu)
 	if (!vcpu->kvm->arch.vgic.enabled)
 		return false;
 	if (vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last)
 		return true;
 	spin_lock_irqsave(&vgic_cpu->ap_list_lock, flags);
 	list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) {
@ -784,9 +821,9 @@ void vgic_kick_vcpus(struct kvm *kvm)
 	}
 }
-bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq)
+bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int vintid)
 {
-	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, virt_irq);
+	struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, vintid);
 	bool map_is_active;
 	unsigned long flags;
--- a/virt/kvm/arm/vgic/vgic.h
+++ b/virt/kvm/arm/vgic/vgic.h
@ -237,4 +237,14 @@ static inline int vgic_v3_max_apr_idx(struct kvm_vcpu *vcpu)
 	}
 }
 int vgic_its_resolve_lpi(struct kvm *kvm, struct vgic_its *its,
 			 u32 devid, u32 eventid, struct vgic_irq **irq);
 struct vgic_its *vgic_msi_to_its(struct kvm *kvm, struct kvm_msi *msi);
 bool vgic_supports_direct_msis(struct kvm *kvm);
 int vgic_v4_init(struct kvm *kvm);
 void vgic_v4_teardown(struct kvm *kvm);
 int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu);
 int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu);
 #endif