From 01d035d796fec0ab23dc3f3a3a9f58bbe034fc5b Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Sun, 27 Oct 2019 09:19:50 +0100 Subject: [PATCH 1/9] KVM: arm/arm64: Show halt poll counters in debugfs ARM/ARM64 has counters halt_successful_poll, halt_attempted_poll, halt_poll_invalid, and halt_wakeup but never exposed those in debugfs. Signed-off-by: Christian Borntraeger Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/1572164390-5851-1-git-send-email-borntraeger@de.ibm.com --- arch/arm/kvm/guest.c | 4 ++++ arch/arm64/kvm/guest.c | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index 684cf64b4033..66964642cd42 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -21,6 +21,10 @@ #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(halt_successful_poll), + VCPU_STAT(halt_attempted_poll), + VCPU_STAT(halt_poll_invalid), + VCPU_STAT(halt_wakeup), VCPU_STAT(hvc_exit_stat), VCPU_STAT(wfe_exit_stat), VCPU_STAT(wfi_exit_stat), diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index dfd626447482..260ea3158682 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -34,6 +34,10 @@ #define VCPU_STAT(x) { #x, offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU } struct kvm_stats_debugfs_item debugfs_entries[] = { + VCPU_STAT(halt_successful_poll), + VCPU_STAT(halt_attempted_poll), + VCPU_STAT(halt_poll_invalid), + VCPU_STAT(halt_wakeup), VCPU_STAT(hvc_exit_stat), VCPU_STAT(wfe_exit_stat), VCPU_STAT(wfi_exit_stat), From 5c401308017f256ae9de804b4a1c65be1d390571 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 28 Oct 2019 14:05:41 +0100 Subject: [PATCH 2/9] KVM: arm64: Don't set HCR_EL2.TVM when S2FWB is supported On CPUs that support S2FWB (Armv8.4+), KVM configures the stage 2 page tables to override the memory attributes of memory accesses, regardless of the stage 1 page table configurations, and also when the stage 1 MMU is turned off. This results in all memory accesses to RAM being cacheable, including during early boot of the guest. On CPUs without this feature, memory accesses were non-cacheable during boot until the guest turned on the stage 1 MMU, and we had to detect when the guest turned on the MMU, such that we could invalidate all cache entries and ensure a consistent view of memory with the MMU turned on. When the guest turned on the caches, we would call stage2_flush_vm() from kvm_toggle_cache(). However, stage2_flush_vm() walks all the stage 2 tables, and calls __kvm_flush-dcache_pte, which on a system with S2FWB does ... absolutely nothing. We can avoid that whole song and dance, and simply not set TVM when creating a VM on a system that has S2FWB. Signed-off-by: Christoffer Dall Signed-off-by: Marc Zyngier Reviewed-by: Mark Rutland Link: https://lore.kernel.org/r/20191028130541.30536-1-christoffer.dall@arm.com --- arch/arm64/include/asm/kvm_arm.h | 3 +-- arch/arm64/include/asm/kvm_emulate.h | 12 +++++++++++- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index ddf9d762ac62..6e5d839f42b5 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -61,7 +61,6 @@ * RW: 64bit by default, can be overridden for 32bit VMs * TAC: Trap ACTLR * TSC: Trap SMC - * TVM: Trap VM ops (until M+C set in SCTLR_EL1) * TSW: Trap cache operations by set/way * TWE: Trap WFE * TWI: Trap WFI @@ -74,7 +73,7 @@ * SWIO: Turn set/way invalidates into set/way clean+invalidate */ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ - HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ + HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW | HCR_TLOR | \ HCR_FMO | HCR_IMO) #define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index d69c1efc63e7..6e92f6c7b1e4 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -53,8 +53,18 @@ static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) /* trap error record accesses */ vcpu->arch.hcr_el2 |= HCR_TERR; } - if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) + + if (cpus_have_const_cap(ARM64_HAS_STAGE2_FWB)) { vcpu->arch.hcr_el2 |= HCR_FWB; + } else { + /* + * For non-FWB CPUs, we trap VM ops (HCR_EL2.TVM) until M+C + * get set in SCTLR_EL1 such that we can detect when the guest + * MMU gets turned on and do the necessary cache maintenance + * then. + */ + vcpu->arch.hcr_el2 |= HCR_TVM; + } if (test_bit(KVM_ARM_VCPU_EL1_32BIT, vcpu->arch.features)) vcpu->arch.hcr_el2 &= ~HCR_RW; From 8e01d9a396e6db153d94a6004e6473d9ff251a6a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 27 Oct 2019 14:41:59 +0000 Subject: [PATCH 3/9] KVM: arm64: vgic-v4: Move the GICv4 residency flow to be driven by vcpu_load/put When the VHE code was reworked, a lot of the vgic stuff was moved around, but the GICv4 residency code did stay untouched, meaning that we come in and out of residency on each flush/sync, which is obviously suboptimal. To address this, let's move things around a bit: - Residency entry (flush) moves to vcpu_load - Residency exit (sync) moves to vcpu_put - On blocking (entry to WFI), we "put" - On unblocking (exit from WFI), we "load" Because these can nest (load/block/put/load/unblock/put, for example), we now have per-VPE tracking of the residency state. Additionally, vgic_v4_put gains a "need doorbell" parameter, which only gets set to true when blocking because of a WFI. This allows a finer control of the doorbell, which now also gets disabled as soon as it gets signaled. Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20191027144234.8395-2-maz@kernel.org --- drivers/irqchip/irq-gic-v4.c | 7 +++- include/kvm/arm_vgic.h | 4 +-- include/linux/irqchip/arm-gic-v4.h | 2 ++ virt/kvm/arm/arm.c | 12 ++++--- virt/kvm/arm/vgic/vgic-v3.c | 4 +++ virt/kvm/arm/vgic/vgic-v4.c | 55 ++++++++++++++---------------- virt/kvm/arm/vgic/vgic.c | 4 --- virt/kvm/arm/vgic/vgic.h | 2 -- 8 files changed, 48 insertions(+), 42 deletions(-) diff --git a/drivers/irqchip/irq-gic-v4.c b/drivers/irqchip/irq-gic-v4.c index 563e87ed0766..45969927cc81 100644 --- a/drivers/irqchip/irq-gic-v4.c +++ b/drivers/irqchip/irq-gic-v4.c @@ -141,12 +141,17 @@ static int its_send_vpe_cmd(struct its_vpe *vpe, struct its_cmd_info *info) int its_schedule_vpe(struct its_vpe *vpe, bool on) { struct its_cmd_info info; + int ret; WARN_ON(preemptible()); info.cmd_type = on ? SCHEDULE_VPE : DESCHEDULE_VPE; - return its_send_vpe_cmd(vpe, &info); + ret = its_send_vpe_cmd(vpe, &info); + if (!ret) + vpe->resident = on; + + return ret; } int its_invall_vpe(struct its_vpe *vpe) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index af4f09c02bf1..4dc58d7a0010 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -396,7 +396,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int irq, int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int irq, struct kvm_kernel_irq_routing_entry *irq_entry); -void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu); -void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu); +int vgic_v4_load(struct kvm_vcpu *vcpu); +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db); #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index e6b155713b47..ab1396afe08a 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -35,6 +35,8 @@ struct its_vpe { /* Doorbell interrupt */ int irq; irq_hw_number_t vpe_db_lpi; + /* VPE resident */ + bool resident; /* VPE proxy mapping */ int vpe_proxy_event; /* diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 86c6aa1cb58e..bd2afcf9a13f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -322,20 +322,24 @@ void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu) /* * If we're about to block (most likely because we've just hit a * WFI), we need to sync back the state of the GIC CPU interface - * so that we have the lastest PMR and group enables. This ensures + * so that we have the latest PMR and group enables. This ensures * that kvm_arch_vcpu_runnable has up-to-date data to decide * whether we have pending interrupts. + * + * For the same reason, we want to tell GICv4 that we need + * doorbells to be signalled, should an interrupt become pending. */ preempt_disable(); kvm_vgic_vmcr_sync(vcpu); + vgic_v4_put(vcpu, true); preempt_enable(); - - kvm_vgic_v4_enable_doorbell(vcpu); } void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu) { - kvm_vgic_v4_disable_doorbell(vcpu); + preempt_disable(); + vgic_v4_load(vcpu); + preempt_enable(); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 8d69f007dd0c..48307a9eb1d8 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -664,6 +664,8 @@ void vgic_v3_load(struct kvm_vcpu *vcpu) if (has_vhe()) __vgic_v3_activate_traps(vcpu); + + WARN_ON(vgic_v4_load(vcpu)); } void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) @@ -676,6 +678,8 @@ void vgic_v3_vmcr_sync(struct kvm_vcpu *vcpu) void vgic_v3_put(struct kvm_vcpu *vcpu) { + WARN_ON(vgic_v4_put(vcpu, false)); + vgic_v3_vmcr_sync(vcpu); kvm_call_hyp(__vgic_v3_save_aprs, vcpu); diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 477af6aebb97..7e1f3202968a 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -85,6 +85,10 @@ static irqreturn_t vgic_v4_doorbell_handler(int irq, void *info) { struct kvm_vcpu *vcpu = info; + /* We got the message, no need to fire again */ + if (!irqd_irq_disabled(&irq_to_desc(irq)->irq_data)) + disable_irq_nosync(irq); + vcpu->arch.vgic_cpu.vgic_v3.its_vpe.pending_last = true; kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); @@ -192,20 +196,30 @@ void vgic_v4_teardown(struct kvm *kvm) its_vm->vpes = NULL; } -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu) +int vgic_v4_put(struct kvm_vcpu *vcpu, bool need_db) { - if (!vgic_supports_direct_msis(vcpu->kvm)) + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + struct irq_desc *desc = irq_to_desc(vpe->irq); + + if (!vgic_supports_direct_msis(vcpu->kvm) || !vpe->resident) return 0; - return its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, false); + /* + * If blocking, a doorbell is required. Undo the nested + * disable_irq() calls... + */ + while (need_db && irqd_irq_disabled(&desc->irq_data)) + enable_irq(vpe->irq); + + return its_schedule_vpe(vpe, false); } -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) +int vgic_v4_load(struct kvm_vcpu *vcpu) { - int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; + struct its_vpe *vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; int err; - if (!vgic_supports_direct_msis(vcpu->kvm)) + if (!vgic_supports_direct_msis(vcpu->kvm) || vpe->resident) return 0; /* @@ -214,11 +228,14 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) * doc in drivers/irqchip/irq-gic-v4.c to understand how this * turns into a VMOVP command at the ITS level. */ - err = irq_set_affinity(irq, cpumask_of(smp_processor_id())); + err = irq_set_affinity(vpe->irq, cpumask_of(smp_processor_id())); if (err) return err; - err = its_schedule_vpe(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe, true); + /* Disabled the doorbell, as we're about to enter the guest */ + disable_irq_nosync(vpe->irq); + + err = its_schedule_vpe(vpe, true); if (err) return err; @@ -226,9 +243,7 @@ int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu) * Now that the VPE is resident, let's get rid of a potential * doorbell interrupt that would still be pending. */ - err = irq_set_irqchip_state(irq, IRQCHIP_STATE_PENDING, false); - - return err; + return irq_set_irqchip_state(vpe->irq, IRQCHIP_STATE_PENDING, false); } static struct vgic_its *vgic_get_its(struct kvm *kvm, @@ -335,21 +350,3 @@ out: mutex_unlock(&its->its_lock); return ret; } - -void kvm_vgic_v4_enable_doorbell(struct kvm_vcpu *vcpu) -{ - if (vgic_supports_direct_msis(vcpu->kvm)) { - int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; - if (irq) - enable_irq(irq); - } -} - -void kvm_vgic_v4_disable_doorbell(struct kvm_vcpu *vcpu) -{ - if (vgic_supports_direct_msis(vcpu->kvm)) { - int irq = vcpu->arch.vgic_cpu.vgic_v3.its_vpe.irq; - if (irq) - disable_irq(irq); - } -} diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 45a870cb63f5..99b02ca730a8 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -857,8 +857,6 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - WARN_ON(vgic_v4_sync_hwstate(vcpu)); - /* An empty ap_list_head implies used_lrs == 0 */ if (list_empty(&vcpu->arch.vgic_cpu.ap_list_head)) return; @@ -882,8 +880,6 @@ static inline void vgic_restore_state(struct kvm_vcpu *vcpu) /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { - WARN_ON(vgic_v4_flush_hwstate(vcpu)); - /* * If there are no virtual interrupts active or pending for this * VCPU, then there is no work to do and we can bail out without diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 83066a81b16a..c7fefd6b1c80 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -316,7 +316,5 @@ void vgic_its_invalidate_cache(struct kvm *kvm); bool vgic_supports_direct_msis(struct kvm *kvm); int vgic_v4_init(struct kvm *kvm); void vgic_v4_teardown(struct kvm *kvm); -int vgic_v4_sync_hwstate(struct kvm_vcpu *vcpu); -int vgic_v4_flush_hwstate(struct kvm_vcpu *vcpu); #endif From 9ff624cdbff4466a356892500699aea9318d584e Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 29 Oct 2019 15:19:17 +0800 Subject: [PATCH 4/9] KVM: arm/arm64: vgic: Remove the declaration of kvm_send_userspace_msi() The callsite of kvm_send_userspace_msi() is currently arch agnostic. There seems no reason to keep an extra declaration of it in arm_vgic.h (we already have one in include/linux/kvm_host.h). Remove it. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Reviewed-by: Eric Auger Link: https://lore.kernel.org/r/20191029071919.177-2-yuzenghui@huawei.com --- include/kvm/arm_vgic.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 4dc58d7a0010..f66a02dac8b0 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -378,8 +378,6 @@ static inline int kvm_vgic_get_max_vcpus(void) return kvm_vgic_global_state.max_gic_vcpus; } -int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); - /** * kvm_vgic_setup_default_irq_routing: * Setup a default flat gsi routing table mapping all SPIs From bad36e4e8cdc9048948490293efefdbd85c40ecc Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 29 Oct 2019 15:19:18 +0800 Subject: [PATCH 5/9] KVM: arm/arm64: vgic: Fix some comments typo Fix various comments, including wrong function names, grammar mistakes and specification references. Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20191029071919.177-3-yuzenghui@huawei.com --- include/kvm/arm_vgic.h | 2 +- virt/kvm/arm/vgic/vgic-v3.c | 2 +- virt/kvm/arm/vgic/vgic-v4.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index f66a02dac8b0..9d53f545a3d5 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -240,7 +240,7 @@ struct vgic_dist { * Contains the attributes and gpa of the LPI configuration table. * Since we report GICR_TYPER.CommonLPIAff as 0b00, we can share * one address across all redistributors. - * GICv3 spec: 6.1.2 "LPI Configuration tables" + * GICv3 spec: IHI 0069E 6.1.1 "LPI Configuration tables" */ u64 propbaser; diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 48307a9eb1d8..e69c538a24ca 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -357,7 +357,7 @@ retry: } /** - * vgic_its_save_pending_tables - Save the pending tables into guest RAM + * vgic_v3_save_pending_tables - Save the pending tables into guest RAM * kvm lock and all vcpu lock must be held */ int vgic_v3_save_pending_tables(struct kvm *kvm) diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 7e1f3202968a..0965fb0c427a 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -281,7 +281,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, mutex_lock(&its->its_lock); - /* Perform then actual DevID/EventID -> LPI translation. */ + /* Perform the actual DevID/EventID -> LPI translation. */ ret = vgic_its_resolve_lpi(kvm, its, irq_entry->msi.devid, irq_entry->msi.data, &irq); if (ret) From ca185b260951d3b55108c0b95e188682d8a507b7 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Tue, 29 Oct 2019 15:19:19 +0800 Subject: [PATCH 6/9] KVM: arm/arm64: vgic: Don't rely on the wrong pending table It's possible that two LPIs locate in the same "byte_offset" but target two different vcpus, where their pending status are indicated by two different pending tables. In such a scenario, using last_byte_offset optimization will lead KVM relying on the wrong pending table entry. Let us use last_ptr instead, which can be treated as a byte index into a pending table and also, can be vcpu specific. Fixes: 280771252c1b ("KVM: arm64: vgic-v3: KVM_DEV_ARM_VGIC_SAVE_PENDING_TABLES") Cc: stable@vger.kernel.org Signed-off-by: Zenghui Yu Signed-off-by: Marc Zyngier Acked-by: Eric Auger Link: https://lore.kernel.org/r/20191029071919.177-4-yuzenghui@huawei.com --- virt/kvm/arm/vgic/vgic-v3.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index e69c538a24ca..f45635a6f0ec 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -363,8 +363,8 @@ retry: int vgic_v3_save_pending_tables(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; - int last_byte_offset = -1; struct vgic_irq *irq; + gpa_t last_ptr = ~(gpa_t)0; int ret; u8 val; @@ -384,11 +384,11 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) bit_nr = irq->intid % BITS_PER_BYTE; ptr = pendbase + byte_offset; - if (byte_offset != last_byte_offset) { + if (ptr != last_ptr) { ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; - last_byte_offset = byte_offset; + last_ptr = ptr; } stored = val & (1U << bit_nr); From 9090825fa99740f0c794f94b9cbd57ad79101228 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 7 Nov 2019 10:54:24 +0100 Subject: [PATCH 7/9] KVM: arm/arm64: Let the timer expire in hardirq context on RT The timers are canceled from an preempt-notifier which is invoked with disabled preemption which is not allowed on PREEMPT_RT. The timer callback is short so in could be invoked in hard-IRQ context on -RT. Let the timer expire on hard-IRQ context even on -RT. Signed-off-by: Thomas Gleixner Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Marc Zyngier Tested-by: Julien Grall Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/20191107095424.16647-1-bigeasy@linutronix.de --- virt/kvm/arm/arch_timer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index e2bb5bd60227..f182b2380345 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -80,7 +80,7 @@ static inline bool userspace_irqchip(struct kvm *kvm) static void soft_timer_start(struct hrtimer *hrt, u64 ns) { hrtimer_start(hrt, ktime_add_ns(ktime_get(), ns), - HRTIMER_MODE_ABS); + HRTIMER_MODE_ABS_HARD); } static void soft_timer_cancel(struct hrtimer *hrt) @@ -697,11 +697,11 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); ptimer->cntvoff = 0; - hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&timer->bg_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); timer->bg_timer.function = kvm_bg_timer_expire; - hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); - hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); + hrtimer_init(&vtimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); + hrtimer_init(&ptimer->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_HARD); vtimer->hrtimer.function = kvm_hrtimer_expire; ptimer->hrtimer.function = kvm_hrtimer_expire; From 5bd90b0989731520f2cdcfbbe467f1271f3cc803 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Nov 2019 16:04:11 +0000 Subject: [PATCH 8/9] KVM: vgic-v4: Track the number of VLPIs per vcpu In order to find out whether a vcpu is likely to be the target of VLPIs (and to further optimize the way we deal with those), let's track the number of VLPIs a vcpu can receive. This gets implemented with an atomic variable that gets incremented or decremented on map, unmap and move of a VLPI. Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Reviewed-by: Christoffer Dall Link: https://lore.kernel.org/r/20191107160412.30301-2-maz@kernel.org --- include/linux/irqchip/arm-gic-v4.h | 2 ++ virt/kvm/arm/vgic/vgic-init.c | 1 + virt/kvm/arm/vgic/vgic-its.c | 3 +++ virt/kvm/arm/vgic/vgic-v4.c | 2 ++ 4 files changed, 8 insertions(+) diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index ab1396afe08a..5dbcfc65f21e 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -32,6 +32,8 @@ struct its_vm { struct its_vpe { struct page *vpt_page; struct its_vm *its_vm; + /* per-vPE VLPI tracking */ + atomic_t vlpi_count; /* Doorbell interrupt */ int irq; irq_hw_number_t vpe_db_lpi; diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 6f50c429196d..b3c5de48064c 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -203,6 +203,7 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) INIT_LIST_HEAD(&vgic_cpu->ap_list_head); raw_spin_lock_init(&vgic_cpu->ap_list_lock); + atomic_set(&vgic_cpu->vgic_v3.its_vpe.vlpi_count, 0); /* * Enable and configure all SGIs to be edge-triggered and diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index 2be6b66b3856..98c7360d9fb7 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -360,7 +360,10 @@ static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) if (ret) return ret; + if (map.vpe) + atomic_dec(&map.vpe->vlpi_count); map.vpe = &vcpu->arch.vgic_cpu.vgic_v3.its_vpe; + atomic_inc(&map.vpe->vlpi_count); ret = its_map_vlpi(irq->host_irq, &map); } diff --git a/virt/kvm/arm/vgic/vgic-v4.c b/virt/kvm/arm/vgic/vgic-v4.c index 0965fb0c427a..46f875589c47 100644 --- a/virt/kvm/arm/vgic/vgic-v4.c +++ b/virt/kvm/arm/vgic/vgic-v4.c @@ -309,6 +309,7 @@ int kvm_vgic_v4_set_forwarding(struct kvm *kvm, int virq, irq->hw = true; irq->host_irq = virq; + atomic_inc(&map.vpe->vlpi_count); out: mutex_unlock(&its->its_lock); @@ -342,6 +343,7 @@ int kvm_vgic_v4_unset_forwarding(struct kvm *kvm, int virq, WARN_ON(!(irq->hw && irq->host_irq == virq)); if (irq->hw) { + atomic_dec(&irq->target_vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count); irq->hw = false; ret = its_unmap_vlpi(virq); } From ef2e78ddadbb939ce79553b10dee0131d65d8f3e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 7 Nov 2019 16:04:12 +0000 Subject: [PATCH 9/9] KVM: arm64: Opportunistically turn off WFI trapping when using direct LPI injection Just like we do for WFE trapping, it can be useful to turn off WFI trapping when the physical CPU is not oversubscribed (that is, the vcpu is the only runnable process on this CPU) *and* that we're using direct injection of interrupts. The conditions are reevaluated on each vcpu_load(), ensuring that we don't switch to this mode on a busy system. On a GICv4 system, this has the effect of reducing the generation of doorbell interrupts to zero when the right conditions are met, which is a huge improvement over the current situation (where the doorbells are screaming if the CPU ever hits a blocking WFI). Signed-off-by: Marc Zyngier Reviewed-by: Zenghui Yu Reviewed-by: Christoffer Dall Link: https://lore.kernel.org/r/20191107160412.30301-3-maz@kernel.org --- arch/arm/include/asm/kvm_emulate.h | 4 ++-- arch/arm64/include/asm/kvm_emulate.h | 9 +++++++-- virt/kvm/arm/arm.c | 4 ++-- 3 files changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index 40002416efec..023c01cad2b1 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -95,12 +95,12 @@ static inline unsigned long *vcpu_hcr(const struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr; } -static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr &= ~HCR_TWE; } -static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr |= HCR_TWE; } diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 6e92f6c7b1e4..5a542d801f07 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -87,14 +87,19 @@ static inline unsigned long *vcpu_hcr(struct kvm_vcpu *vcpu) return (unsigned long *)&vcpu->arch.hcr_el2; } -static inline void vcpu_clear_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_clear_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 &= ~HCR_TWE; + if (atomic_read(&vcpu->arch.vgic_cpu.vgic_v3.its_vpe.vlpi_count)) + vcpu->arch.hcr_el2 &= ~HCR_TWI; + else + vcpu->arch.hcr_el2 |= HCR_TWI; } -static inline void vcpu_set_wfe_traps(struct kvm_vcpu *vcpu) +static inline void vcpu_set_wfx_traps(struct kvm_vcpu *vcpu) { vcpu->arch.hcr_el2 |= HCR_TWE; + vcpu->arch.hcr_el2 |= HCR_TWI; } static inline void vcpu_ptrauth_enable(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index bd2afcf9a13f..dac96e355f69 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -386,9 +386,9 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) kvm_vcpu_pmu_restore_guest(vcpu); if (single_task_running()) - vcpu_clear_wfe_traps(vcpu); + vcpu_clear_wfx_traps(vcpu); else - vcpu_set_wfe_traps(vcpu); + vcpu_set_wfx_traps(vcpu); vcpu_ptrauth_setup_lazy(vcpu); }