diff --git a/Documentation/virtual/kvm/devices/arm-vgic-its.txt b/Documentation/virtual/kvm/devices/arm-vgic-its.txt new file mode 100644 index 000000000000..6081a5b7fc1e --- /dev/null +++ b/Documentation/virtual/kvm/devices/arm-vgic-its.txt @@ -0,0 +1,38 @@ +ARM Virtual Interrupt Translation Service (ITS) +=============================================== + +Device types supported: + KVM_DEV_TYPE_ARM_VGIC_ITS ARM Interrupt Translation Service Controller + +The ITS allows MSI(-X) interrupts to be injected into guests. This extension is +optional. Creating a virtual ITS controller also requires a host GICv3 (see +arm-vgic-v3.txt), but does not depend on having physical ITS controllers. + +There can be multiple ITS controllers per guest, each of them has to have +a separate, non-overlapping MMIO region. + + +Groups: + KVM_DEV_ARM_VGIC_GRP_ADDR + Attributes: + KVM_VGIC_ITS_ADDR_TYPE (rw, 64-bit) + Base address in the guest physical address space of the GICv3 ITS + control register frame. + This address needs to be 64K aligned and the region covers 128K. + Errors: + -E2BIG: Address outside of addressable IPA range + -EINVAL: Incorrectly aligned address + -EEXIST: Address already configured + -EFAULT: Invalid user pointer for attr->addr. + -ENODEV: Incorrect attribute or the ITS is not supported. + + + KVM_DEV_ARM_VGIC_GRP_CTRL + Attributes: + KVM_DEV_ARM_VGIC_CTRL_INIT + request the initialization of the ITS, no additional parameter in + kvm_device_attr.addr. + Errors: + -ENXIO: ITS not properly configured as required prior to setting + this attribute + -ENOMEM: Memory shortage when allocating ITS internal data diff --git a/Documentation/virtual/kvm/devices/arm-vgic-v3.txt b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt new file mode 100644 index 000000000000..9348b3caccd7 --- /dev/null +++ b/Documentation/virtual/kvm/devices/arm-vgic-v3.txt @@ -0,0 +1,206 @@ +ARM Virtual Generic Interrupt Controller v3 and later (VGICv3) +============================================================== + + +Device types supported: + KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0 + +Only one VGIC instance may be instantiated through this API. The created VGIC +will act as the VM interrupt controller, requiring emulated user-space devices +to inject interrupts to the VGIC instead of directly to CPUs. It is not +possible to create both a GICv3 and GICv2 on the same VM. + +Creating a guest GICv3 device requires a host GICv3 as well. + + +Groups: + KVM_DEV_ARM_VGIC_GRP_ADDR + Attributes: + KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit) + Base address in the guest physical address space of the GICv3 distributor + register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. + This address needs to be 64K aligned and the region covers 64 KByte. + + KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit) + Base address in the guest physical address space of the GICv3 + redistributor register mappings. There are two 64K pages for each + VCPU and all of the redistributor pages are contiguous. + Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. + This address needs to be 64K aligned. + Errors: + -E2BIG: Address outside of addressable IPA range + -EINVAL: Incorrectly aligned address + -EEXIST: Address already configured + -ENXIO: The group or attribute is unknown/unsupported for this device + or hardware support is missing. + -EFAULT: Invalid user pointer for attr->addr. + + + + KVM_DEV_ARM_VGIC_GRP_DIST_REGS + KVM_DEV_ARM_VGIC_GRP_REDIST_REGS + Attributes: + The attr field of kvm_device_attr encodes two values: + bits: | 63 .... 32 | 31 .... 0 | + values: | mpidr | offset | + + All distributor regs are (rw, 32-bit) and kvm_device_attr.addr points to a + __u32 value. 64-bit registers must be accessed by separately accessing the + lower and higher word. + + Writes to read-only registers are ignored by the kernel. + + KVM_DEV_ARM_VGIC_GRP_DIST_REGS accesses the main distributor registers. + KVM_DEV_ARM_VGIC_GRP_REDIST_REGS accesses the redistributor of the CPU + specified by the mpidr. + + The offset is relative to the "[Re]Distributor base address" as defined + in the GICv3/4 specs. Getting or setting such a register has the same + effect as reading or writing the register on real hardware, except for the + following registers: GICD_STATUSR, GICR_STATUSR, GICD_ISPENDR, + GICR_ISPENDR0, GICD_ICPENDR, and GICR_ICPENDR0. These registers behave + differently when accessed via this interface compared to their + architecturally defined behavior to allow software a full view of the + VGIC's internal state. + + The mpidr field is used to specify which + redistributor is accessed. The mpidr is ignored for the distributor. + + The mpidr encoding is based on the affinity information in the + architecture defined MPIDR, and the field is encoded as follows: + | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | + | Aff3 | Aff2 | Aff1 | Aff0 | + + Note that distributor fields are not banked, but return the same value + regardless of the mpidr used to access the register. + + The GICD_STATUSR and GICR_STATUSR registers are architecturally defined such + that a write of a clear bit has no effect, whereas a write with a set bit + clears that value. To allow userspace to freely set the values of these two + registers, setting the attributes with the register offsets for these two + registers simply sets the non-reserved bits to the value written. + + + Accesses (reads and writes) to the GICD_ISPENDR register region and + GICR_ISPENDR0 registers get/set the value of the latched pending state for + the interrupts. + + This is identical to the value returned by a guest read from ISPENDR for an + edge triggered interrupt, but may differ for level triggered interrupts. + For edge triggered interrupts, once an interrupt becomes pending (whether + because of an edge detected on the input line or because of a guest write + to ISPENDR) this state is "latched", and only cleared when either the + interrupt is activated or when the guest writes to ICPENDR. A level + triggered interrupt may be pending either because the level input is held + high by a device, or because of a guest write to the ISPENDR register. Only + ISPENDR writes are latched; if the device lowers the line level then the + interrupt is no longer pending unless the guest also wrote to ISPENDR, and + conversely writes to ICPENDR or activations of the interrupt do not clear + the pending status if the line level is still being held high. (These + rules are documented in the GICv3 specification descriptions of the ICPENDR + and ISPENDR registers.) For a level triggered interrupt the value accessed + here is that of the latch which is set by ISPENDR and cleared by ICPENDR or + interrupt activation, whereas the value returned by a guest read from + ISPENDR is the logical OR of the latch value and the input line level. + + Raw access to the latch state is provided to userspace so that it can save + and restore the entire GIC internal state (which is defined by the + combination of the current input line level and the latch state, and cannot + be deduced from purely the line level and the value of the ISPENDR + registers). + + Accesses to GICD_ICPENDR register region and GICR_ICPENDR0 registers have + RAZ/WI semantics, meaning that reads always return 0 and writes are always + ignored. + + Errors: + -ENXIO: Getting or setting this register is not yet supported + -EBUSY: One or more VCPUs are running + + + KVM_DEV_ARM_VGIC_CPU_SYSREGS + Attributes: + The attr field of kvm_device_attr encodes two values: + bits: | 63 .... 32 | 31 .... 16 | 15 .... 0 | + values: | mpidr | RES | instr | + + The mpidr field encodes the CPU ID based on the affinity information in the + architecture defined MPIDR, and the field is encoded as follows: + | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | + | Aff3 | Aff2 | Aff1 | Aff0 | + + The instr field encodes the system register to access based on the fields + defined in the A64 instruction set encoding for system register access + (RES means the bits are reserved for future use and should be zero): + + | 15 ... 14 | 13 ... 11 | 10 ... 7 | 6 ... 3 | 2 ... 0 | + | Op 0 | Op1 | CRn | CRm | Op2 | + + All system regs accessed through this API are (rw, 64-bit) and + kvm_device_attr.addr points to a __u64 value. + + KVM_DEV_ARM_VGIC_CPU_SYSREGS accesses the CPU interface registers for the + CPU specified by the mpidr field. + + Errors: + -ENXIO: Getting or setting this register is not yet supported + -EBUSY: VCPU is running + -EINVAL: Invalid mpidr supplied + + + KVM_DEV_ARM_VGIC_GRP_NR_IRQS + Attributes: + A value describing the number of interrupts (SGI, PPI and SPI) for + this GIC instance, ranging from 64 to 1024, in increments of 32. + + kvm_device_attr.addr points to a __u32 value. + + Errors: + -EINVAL: Value set is out of the expected range + -EBUSY: Value has already be set. + + + KVM_DEV_ARM_VGIC_GRP_CTRL + Attributes: + KVM_DEV_ARM_VGIC_CTRL_INIT + request the initialization of the VGIC, no additional parameter in + kvm_device_attr.addr. + Errors: + -ENXIO: VGIC not properly configured as required prior to calling + this attribute + -ENODEV: no online VCPU + -ENOMEM: memory shortage when allocating vgic internal data + + + KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO + Attributes: + The attr field of kvm_device_attr encodes the following values: + bits: | 63 .... 32 | 31 .... 10 | 9 .... 0 | + values: | mpidr | info | vINTID | + + The vINTID specifies which set of IRQs is reported on. + + The info field specifies which information userspace wants to get or set + using this interface. Currently we support the following info values: + + VGIC_LEVEL_INFO_LINE_LEVEL: + Get/Set the input level of the IRQ line for a set of 32 contiguously + numbered interrupts. + vINTID must be a multiple of 32. + + kvm_device_attr.addr points to a __u32 value which will contain a + bitmap where a set bit means the interrupt level is asserted. + + Bit[n] indicates the status for interrupt vINTID + n. + + SGIs and any interrupt with a higher ID than the number of interrupts + supported, will be RAZ/WI. LPIs are always edge-triggered and are + therefore not supported by this interface. + + PPIs are reported per VCPU as specified in the mpidr field, and SPIs are + reported with the same value regardless of the mpidr specified. + + The mpidr field encodes the CPU ID based on the affinity information in the + architecture defined MPIDR, and the field is encoded as follows: + | 63 .... 56 | 55 .... 48 | 47 .... 40 | 39 .... 32 | + | Aff3 | Aff2 | Aff1 | Aff0 | diff --git a/Documentation/virtual/kvm/devices/arm-vgic.txt b/Documentation/virtual/kvm/devices/arm-vgic.txt index 89182f80cc7f..76e61c883347 100644 --- a/Documentation/virtual/kvm/devices/arm-vgic.txt +++ b/Documentation/virtual/kvm/devices/arm-vgic.txt @@ -1,24 +1,19 @@ -ARM Virtual Generic Interrupt Controller (VGIC) -=============================================== +ARM Virtual Generic Interrupt Controller v2 (VGIC) +================================================== Device types supported: KVM_DEV_TYPE_ARM_VGIC_V2 ARM Generic Interrupt Controller v2.0 - KVM_DEV_TYPE_ARM_VGIC_V3 ARM Generic Interrupt Controller v3.0 - KVM_DEV_TYPE_ARM_VGIC_ITS ARM Interrupt Translation Service Controller -Only one VGIC instance of the V2/V3 types above may be instantiated through -either this API or the legacy KVM_CREATE_IRQCHIP api. The created VGIC will -act as the VM interrupt controller, requiring emulated user-space devices to -inject interrupts to the VGIC instead of directly to CPUs. +Only one VGIC instance may be instantiated through either this API or the +legacy KVM_CREATE_IRQCHIP API. The created VGIC will act as the VM interrupt +controller, requiring emulated user-space devices to inject interrupts to the +VGIC instead of directly to CPUs. -Creating a guest GICv3 device requires a host GICv3 as well. -GICv3 implementations with hardware compatibility support allow a guest GICv2 -as well. +GICv3 implementations with hardware compatibility support allow creating a +guest GICv2 through this interface. For information on creating a guest GICv3 +device and guest ITS devices, see arm-vgic-v3.txt. It is not possible to +create both a GICv3 and GICv2 device on the same VM. -Creating a virtual ITS controller requires a host GICv3 (but does not depend -on having physical ITS controllers). -There can be multiple ITS controllers per guest, each of them has to have -a separate, non-overlapping MMIO region. Groups: KVM_DEV_ARM_VGIC_GRP_ADDR @@ -32,26 +27,13 @@ Groups: Base address in the guest physical address space of the GIC virtual cpu interface register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V2. This address needs to be 4K aligned and the region covers 4 KByte. - - KVM_VGIC_V3_ADDR_TYPE_DIST (rw, 64-bit) - Base address in the guest physical address space of the GICv3 distributor - register mappings. Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. - This address needs to be 64K aligned and the region covers 64 KByte. - - KVM_VGIC_V3_ADDR_TYPE_REDIST (rw, 64-bit) - Base address in the guest physical address space of the GICv3 - redistributor register mappings. There are two 64K pages for each - VCPU and all of the redistributor pages are contiguous. - Only valid for KVM_DEV_TYPE_ARM_VGIC_V3. - This address needs to be 64K aligned. - - KVM_VGIC_V3_ADDR_TYPE_ITS (rw, 64-bit) - Base address in the guest physical address space of the GICv3 ITS - control register frame. The ITS allows MSI(-X) interrupts to be - injected into guests. This extension is optional. If the kernel - does not support the ITS, the call returns -ENODEV. - Only valid for KVM_DEV_TYPE_ARM_VGIC_ITS. - This address needs to be 64K aligned and the region covers 128K. + Errors: + -E2BIG: Address outside of addressable IPA range + -EINVAL: Incorrectly aligned address + -EEXIST: Address already configured + -ENXIO: The group or attribute is unknown/unsupported for this device + or hardware support is missing. + -EFAULT: Invalid user pointer for attr->addr. KVM_DEV_ARM_VGIC_GRP_DIST_REGS Attributes: diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt index c04165868faf..02f50686c418 100644 --- a/Documentation/virtual/kvm/devices/vcpu.txt +++ b/Documentation/virtual/kvm/devices/vcpu.txt @@ -30,4 +30,6 @@ Returns: -ENODEV: PMUv3 not supported attribute -EBUSY: PMUv3 already initialized -Request the initialization of the PMUv3. +Request the initialization of the PMUv3. This must be done after creating the +in-kernel irqchip. Creating a PMU with a userspace irqchip is currently not +supported. diff --git a/arch/arm/include/asm/arch_gicv3.h b/arch/arm/include/asm/arch_gicv3.h index e08d15184056..1fee657d3827 100644 --- a/arch/arm/include/asm/arch_gicv3.h +++ b/arch/arm/include/asm/arch_gicv3.h @@ -22,9 +22,7 @@ #include #include - -#define __ACCESS_CP15(CRn, Op1, CRm, Op2) p15, Op1, %0, CRn, CRm, Op2 -#define __ACCESS_CP15_64(Op1, CRm) p15, Op1, %Q0, %R0, CRm +#include #define ICC_EOIR1 __ACCESS_CP15(c12, 0, c12, 1) #define ICC_DIR __ACCESS_CP15(c12, 0, c11, 1) @@ -98,65 +96,135 @@ #define ICH_AP1R2 __AP1Rx(2) #define ICH_AP1R3 __AP1Rx(3) +/* A32-to-A64 mappings used by VGIC save/restore */ + +#define CPUIF_MAP(a32, a64) \ +static inline void write_ ## a64(u32 val) \ +{ \ + write_sysreg(val, a32); \ +} \ +static inline u32 read_ ## a64(void) \ +{ \ + return read_sysreg(a32); \ +} \ + +#define CPUIF_MAP_LO_HI(a32lo, a32hi, a64) \ +static inline void write_ ## a64(u64 val) \ +{ \ + write_sysreg(lower_32_bits(val), a32lo);\ + write_sysreg(upper_32_bits(val), a32hi);\ +} \ +static inline u64 read_ ## a64(void) \ +{ \ + u64 val = read_sysreg(a32lo); \ + \ + val |= (u64)read_sysreg(a32hi) << 32; \ + \ + return val; \ +} + +CPUIF_MAP(ICH_HCR, ICH_HCR_EL2) +CPUIF_MAP(ICH_VTR, ICH_VTR_EL2) +CPUIF_MAP(ICH_MISR, ICH_MISR_EL2) +CPUIF_MAP(ICH_EISR, ICH_EISR_EL2) +CPUIF_MAP(ICH_ELSR, ICH_ELSR_EL2) +CPUIF_MAP(ICH_VMCR, ICH_VMCR_EL2) +CPUIF_MAP(ICH_AP0R3, ICH_AP0R3_EL2) +CPUIF_MAP(ICH_AP0R2, ICH_AP0R2_EL2) +CPUIF_MAP(ICH_AP0R1, ICH_AP0R1_EL2) +CPUIF_MAP(ICH_AP0R0, ICH_AP0R0_EL2) +CPUIF_MAP(ICH_AP1R3, ICH_AP1R3_EL2) +CPUIF_MAP(ICH_AP1R2, ICH_AP1R2_EL2) +CPUIF_MAP(ICH_AP1R1, ICH_AP1R1_EL2) +CPUIF_MAP(ICH_AP1R0, ICH_AP1R0_EL2) +CPUIF_MAP(ICC_HSRE, ICC_SRE_EL2) +CPUIF_MAP(ICC_SRE, ICC_SRE_EL1) + +CPUIF_MAP_LO_HI(ICH_LR15, ICH_LRC15, ICH_LR15_EL2) +CPUIF_MAP_LO_HI(ICH_LR14, ICH_LRC14, ICH_LR14_EL2) +CPUIF_MAP_LO_HI(ICH_LR13, ICH_LRC13, ICH_LR13_EL2) +CPUIF_MAP_LO_HI(ICH_LR12, ICH_LRC12, ICH_LR12_EL2) +CPUIF_MAP_LO_HI(ICH_LR11, ICH_LRC11, ICH_LR11_EL2) +CPUIF_MAP_LO_HI(ICH_LR10, ICH_LRC10, ICH_LR10_EL2) +CPUIF_MAP_LO_HI(ICH_LR9, ICH_LRC9, ICH_LR9_EL2) +CPUIF_MAP_LO_HI(ICH_LR8, ICH_LRC8, ICH_LR8_EL2) +CPUIF_MAP_LO_HI(ICH_LR7, ICH_LRC7, ICH_LR7_EL2) +CPUIF_MAP_LO_HI(ICH_LR6, ICH_LRC6, ICH_LR6_EL2) +CPUIF_MAP_LO_HI(ICH_LR5, ICH_LRC5, ICH_LR5_EL2) +CPUIF_MAP_LO_HI(ICH_LR4, ICH_LRC4, ICH_LR4_EL2) +CPUIF_MAP_LO_HI(ICH_LR3, ICH_LRC3, ICH_LR3_EL2) +CPUIF_MAP_LO_HI(ICH_LR2, ICH_LRC2, ICH_LR2_EL2) +CPUIF_MAP_LO_HI(ICH_LR1, ICH_LRC1, ICH_LR1_EL2) +CPUIF_MAP_LO_HI(ICH_LR0, ICH_LRC0, ICH_LR0_EL2) + +#define read_gicreg(r) read_##r() +#define write_gicreg(v, r) write_##r(v) + /* Low-level accessors */ static inline void gic_write_eoir(u32 irq) { - asm volatile("mcr " __stringify(ICC_EOIR1) : : "r" (irq)); + write_sysreg(irq, ICC_EOIR1); isb(); } static inline void gic_write_dir(u32 val) { - asm volatile("mcr " __stringify(ICC_DIR) : : "r" (val)); + write_sysreg(val, ICC_DIR); isb(); } static inline u32 gic_read_iar(void) { - u32 irqstat; + u32 irqstat = read_sysreg(ICC_IAR1); - asm volatile("mrc " __stringify(ICC_IAR1) : "=r" (irqstat)); dsb(sy); + return irqstat; } static inline void gic_write_pmr(u32 val) { - asm volatile("mcr " __stringify(ICC_PMR) : : "r" (val)); + write_sysreg(val, ICC_PMR); } static inline void gic_write_ctlr(u32 val) { - asm volatile("mcr " __stringify(ICC_CTLR) : : "r" (val)); + write_sysreg(val, ICC_CTLR); isb(); } static inline void gic_write_grpen1(u32 val) { - asm volatile("mcr " __stringify(ICC_IGRPEN1) : : "r" (val)); + write_sysreg(val, ICC_IGRPEN1); isb(); } static inline void gic_write_sgi1r(u64 val) { - asm volatile("mcrr " __stringify(ICC_SGI1R) : : "r" (val)); + write_sysreg(val, ICC_SGI1R); } static inline u32 gic_read_sre(void) { - u32 val; - - asm volatile("mrc " __stringify(ICC_SRE) : "=r" (val)); - return val; + return read_sysreg(ICC_SRE); } static inline void gic_write_sre(u32 val) { - asm volatile("mcr " __stringify(ICC_SRE) : : "r" (val)); + write_sysreg(val, ICC_SRE); isb(); } +static inline void gic_write_bpr1(u32 val) +{ +#if defined(__write_sysreg) && defined(ICC_BPR1) + write_sysreg(val, ICC_BPR1); +#else + asm volatile("mcr " __stringify(ICC_BPR1) : : "r" (val)); +#endif +} + /* * Even in 32bit systems that use LPAE, there is no guarantee that the I/O * interface provides true 64bit atomic accesses, so using strd/ldrd doesn't diff --git a/arch/arm/include/asm/cp15.h b/arch/arm/include/asm/cp15.h index c3f11524f10c..dbdbce1b3a72 100644 --- a/arch/arm/include/asm/cp15.h +++ b/arch/arm/include/asm/cp15.h @@ -49,6 +49,21 @@ #ifdef CONFIG_CPU_CP15 +#define __ACCESS_CP15(CRn, Op1, CRm, Op2) \ + "mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32 +#define __ACCESS_CP15_64(Op1, CRm) \ + "mrrc", "mcrr", __stringify(p15, Op1, %Q0, %R0, CRm), u64 + +#define __read_sysreg(r, w, c, t) ({ \ + t __val; \ + asm volatile(r " " c : "=r" (__val)); \ + __val; \ +}) +#define read_sysreg(...) __read_sysreg(__VA_ARGS__) + +#define __write_sysreg(v, r, w, c, t) asm volatile(w " " c : : "r" ((t)(v))) +#define write_sysreg(v, ...) __write_sysreg(v, __VA_ARGS__) + extern unsigned long cr_alignment; /* defined in entry-armv.S */ static inline unsigned long get_cr(void) diff --git a/arch/arm/include/asm/cputype.h b/arch/arm/include/asm/cputype.h index 1ee94c716a7f..e2d94c1b07b8 100644 --- a/arch/arm/include/asm/cputype.h +++ b/arch/arm/include/asm/cputype.h @@ -55,6 +55,7 @@ #define MPIDR_LEVEL_BITS 8 #define MPIDR_LEVEL_MASK ((1 << MPIDR_LEVEL_BITS) - 1) +#define MPIDR_LEVEL_SHIFT(level) (MPIDR_LEVEL_BITS * level) #define MPIDR_AFFINITY_LEVEL(mpidr, level) \ ((mpidr >> (MPIDR_LEVEL_BITS * level)) & MPIDR_LEVEL_MASK) diff --git a/arch/arm/include/asm/kvm_asm.h b/arch/arm/include/asm/kvm_asm.h index 58faff5f1eb2..d7ea6bcb29bf 100644 --- a/arch/arm/include/asm/kvm_asm.h +++ b/arch/arm/include/asm/kvm_asm.h @@ -21,6 +21,10 @@ #include +#define ARM_EXIT_WITH_ABORT_BIT 31 +#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_ABORT_BIT)) +#define ARM_ABORT_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_ABORT_BIT)) + #define ARM_EXCEPTION_RESET 0 #define ARM_EXCEPTION_UNDEFINED 1 #define ARM_EXCEPTION_SOFTWARE 2 @@ -68,6 +72,9 @@ extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu); extern void __init_stage2_translation(void); extern void __kvm_hyp_reset(unsigned long); + +extern u64 __vgic_v3_get_ich_vtr_el2(void); +extern void __vgic_v3_init_lrs(void); #endif #endif /* __ARM_KVM_ASM_H__ */ diff --git a/arch/arm/include/asm/kvm_emulate.h b/arch/arm/include/asm/kvm_emulate.h index ee5328fc4b06..9a8a45aaf19a 100644 --- a/arch/arm/include/asm/kvm_emulate.h +++ b/arch/arm/include/asm/kvm_emulate.h @@ -40,18 +40,29 @@ static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, u8 reg_num, *vcpu_reg(vcpu, reg_num) = val; } -bool kvm_condition_valid(struct kvm_vcpu *vcpu); -void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr); +bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); +void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); +static inline bool kvm_condition_valid(const struct kvm_vcpu *vcpu) +{ + return kvm_condition_valid32(vcpu); +} + +static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) +{ + kvm_skip_instr32(vcpu, is_wide_instr); +} + static inline void vcpu_reset_hcr(struct kvm_vcpu *vcpu) { vcpu->arch.hcr = HCR_GUEST_MASK; } -static inline unsigned long vcpu_get_hcr(struct kvm_vcpu *vcpu) +static inline unsigned long vcpu_get_hcr(const struct kvm_vcpu *vcpu) { return vcpu->arch.hcr; } @@ -61,7 +72,7 @@ static inline void vcpu_set_hcr(struct kvm_vcpu *vcpu, unsigned long hcr) vcpu->arch.hcr = hcr; } -static inline bool vcpu_mode_is_32bit(struct kvm_vcpu *vcpu) +static inline bool vcpu_mode_is_32bit(const struct kvm_vcpu *vcpu) { return 1; } @@ -71,9 +82,9 @@ static inline unsigned long *vcpu_pc(struct kvm_vcpu *vcpu) return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_pc; } -static inline unsigned long *vcpu_cpsr(struct kvm_vcpu *vcpu) +static inline unsigned long *vcpu_cpsr(const struct kvm_vcpu *vcpu) { - return &vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr; + return (unsigned long *)&vcpu->arch.ctxt.gp_regs.usr_regs.ARM_cpsr; } static inline void vcpu_set_thumb(struct kvm_vcpu *vcpu) @@ -93,11 +104,21 @@ static inline bool vcpu_mode_priv(struct kvm_vcpu *vcpu) return cpsr_mode > USR_MODE;; } -static inline u32 kvm_vcpu_get_hsr(struct kvm_vcpu *vcpu) +static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.hsr; } +static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) +{ + u32 hsr = kvm_vcpu_get_hsr(vcpu); + + if (hsr & HSR_CV) + return (hsr & HSR_COND) >> HSR_COND_SHIFT; + + return -1; +} + static inline unsigned long kvm_vcpu_get_hfar(struct kvm_vcpu *vcpu) { return vcpu->arch.fault.hxfar; diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index 6ad21f04a922..2d19e02d03fd 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -39,7 +39,12 @@ #include + +#ifdef CONFIG_ARM_GIC_V3 +#define KVM_MAX_VCPUS VGIC_V3_MAX_CPUS +#else #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS +#endif #define KVM_REQ_VCPU_EXIT 8 diff --git a/arch/arm/include/asm/kvm_hyp.h b/arch/arm/include/asm/kvm_hyp.h index 6eaff28f2ff3..343135ede5fa 100644 --- a/arch/arm/include/asm/kvm_hyp.h +++ b/arch/arm/include/asm/kvm_hyp.h @@ -20,28 +20,15 @@ #include #include +#include #include #include #define __hyp_text __section(.hyp.text) notrace -#define __ACCESS_CP15(CRn, Op1, CRm, Op2) \ - "mrc", "mcr", __stringify(p15, Op1, %0, CRn, CRm, Op2), u32 -#define __ACCESS_CP15_64(Op1, CRm) \ - "mrrc", "mcrr", __stringify(p15, Op1, %Q0, %R0, CRm), u64 #define __ACCESS_VFP(CRn) \ "mrc", "mcr", __stringify(p10, 7, %0, CRn, cr0, 0), u32 -#define __write_sysreg(v, r, w, c, t) asm volatile(w " " c : : "r" ((t)(v))) -#define write_sysreg(v, ...) __write_sysreg(v, __VA_ARGS__) - -#define __read_sysreg(r, w, c, t) ({ \ - t __val; \ - asm volatile(r " " c : "=r" (__val)); \ - __val; \ -}) -#define read_sysreg(...) __read_sysreg(__VA_ARGS__) - #define write_special(v, r) \ asm volatile("msr " __stringify(r) ", %0" : : "r" (v)) #define read_special(r) ({ \ @@ -119,6 +106,9 @@ void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); void __sysreg_save_state(struct kvm_cpu_context *ctxt); void __sysreg_restore_state(struct kvm_cpu_context *ctxt); +void __vgic_v3_save_state(struct kvm_vcpu *vcpu); +void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); + void asmlinkage __vfp_save_state(struct vfp_hard_struct *vfp); void asmlinkage __vfp_restore_state(struct vfp_hard_struct *vfp); static inline bool __vfp_enabled(void) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 3bb803d6814b..74a44727f8e1 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -63,37 +63,13 @@ void kvm_clear_hyp_idmap(void); static inline void kvm_set_pmd(pmd_t *pmd, pmd_t new_pmd) { *pmd = new_pmd; - flush_pmd_entry(pmd); + dsb(ishst); } static inline void kvm_set_pte(pte_t *pte, pte_t new_pte) { *pte = new_pte; - /* - * flush_pmd_entry just takes a void pointer and cleans the necessary - * cache entries, so we can reuse the function for ptes. - */ - flush_pmd_entry(pte); -} - -static inline void kvm_clean_pgd(pgd_t *pgd) -{ - clean_dcache_area(pgd, PTRS_PER_S2_PGD * sizeof(pgd_t)); -} - -static inline void kvm_clean_pmd(pmd_t *pmd) -{ - clean_dcache_area(pmd, PTRS_PER_PMD * sizeof(pmd_t)); -} - -static inline void kvm_clean_pmd_entry(pmd_t *pmd) -{ - clean_pmd_entry(pmd); -} - -static inline void kvm_clean_pte(pte_t *pte) -{ - clean_pte_table(pte); + dsb(ishst); } static inline pte_t kvm_s2pte_mkwrite(pte_t pte) diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index a2b3eb313a25..b38c10c73579 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -84,6 +84,13 @@ struct kvm_regs { #define KVM_VGIC_V2_DIST_SIZE 0x1000 #define KVM_VGIC_V2_CPU_SIZE 0x2000 +/* Supported VGICv3 address types */ +#define KVM_VGIC_V3_ADDR_TYPE_DIST 2 +#define KVM_VGIC_V3_ADDR_TYPE_REDIST 3 + +#define KVM_VGIC_V3_DIST_SIZE SZ_64K +#define KVM_VGIC_V3_REDIST_SIZE (2 * SZ_64K) + #define KVM_ARM_VCPU_POWER_OFF 0 /* CPU is started in OFF state */ #define KVM_ARM_VCPU_PSCI_0_2 1 /* CPU uses PSCI v0.2 */ diff --git a/arch/arm/kvm/Makefile b/arch/arm/kvm/Makefile index 10d77a66cad5..f19842ea5418 100644 --- a/arch/arm/kvm/Makefile +++ b/arch/arm/kvm/Makefile @@ -21,13 +21,16 @@ obj-$(CONFIG_KVM_ARM_HOST) += hyp/ obj-y += kvm-arm.o init.o interrupts.o obj-y += arm.o handle_exit.o guest.o mmu.o emulate.o reset.o obj-y += coproc.o coproc_a15.o coproc_a7.o mmio.o psci.o perf.o +obj-y += $(KVM)/arm/aarch32.o obj-y += $(KVM)/arm/vgic/vgic.o obj-y += $(KVM)/arm/vgic/vgic-init.o obj-y += $(KVM)/arm/vgic/vgic-irqfd.o obj-y += $(KVM)/arm/vgic/vgic-v2.o +obj-y += $(KVM)/arm/vgic/vgic-v3.o obj-y += $(KVM)/arm/vgic/vgic-mmio.o obj-y += $(KVM)/arm/vgic/vgic-mmio-v2.o +obj-y += $(KVM)/arm/vgic/vgic-mmio-v3.o obj-y += $(KVM)/arm/vgic/vgic-kvm-device.o obj-y += $(KVM)/irqchip.o obj-y += $(KVM)/arm/arch_timer.o diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index c638935baad6..8a4a5637fab1 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -1188,6 +1188,10 @@ static int init_common_resources(void) return -ENOMEM; } + /* set size of VMID supported by CPU */ + kvm_vmid_bits = kvm_get_vmid_bits(); + kvm_info("%d-bit VMID\n", kvm_vmid_bits); + return 0; } @@ -1253,10 +1257,6 @@ static void teardown_hyp_mode(void) static int init_vhe_mode(void) { - /* set size of VMID supported by CPU */ - kvm_vmid_bits = kvm_get_vmid_bits(); - kvm_info("%d-bit VMID\n", kvm_vmid_bits); - kvm_info("VHE mode initialized successfully\n"); return 0; } @@ -1340,10 +1340,6 @@ static int init_hyp_mode(void) } } - /* set size of VMID supported by CPU */ - kvm_vmid_bits = kvm_get_vmid_bits(); - kvm_info("%d-bit VMID\n", kvm_vmid_bits); - kvm_info("Hyp mode initialized successfully\n"); return 0; diff --git a/arch/arm/kvm/coproc.c b/arch/arm/kvm/coproc.c index 1bb2b79c01ff..3e5e4194ef86 100644 --- a/arch/arm/kvm/coproc.c +++ b/arch/arm/kvm/coproc.c @@ -228,6 +228,35 @@ bool access_vm_reg(struct kvm_vcpu *vcpu, return true; } +static bool access_gic_sgi(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + u64 reg; + + if (!p->is_write) + return read_from_write_only(vcpu, p); + + reg = (u64)*vcpu_reg(vcpu, p->Rt2) << 32; + reg |= *vcpu_reg(vcpu, p->Rt1) ; + + vgic_v3_dispatch_sgi(vcpu, reg); + + return true; +} + +static bool access_gic_sre(struct kvm_vcpu *vcpu, + const struct coproc_params *p, + const struct coproc_reg *r) +{ + if (p->is_write) + return ignore_write(vcpu, p); + + *vcpu_reg(vcpu, p->Rt1) = vcpu->arch.vgic_cpu.vgic_v3.vgic_sre; + + return true; +} + /* * We could trap ID_DFR0 and tell the guest we don't support performance * monitoring. Unfortunately the patch to make the kernel check ID_DFR0 was @@ -361,10 +390,16 @@ static const struct coproc_reg cp15_regs[] = { { CRn(10), CRm( 3), Op1( 0), Op2( 1), is32, access_vm_reg, reset_unknown, c10_AMAIR1}, + /* ICC_SGI1R */ + { CRm64(12), Op1( 0), is64, access_gic_sgi}, + /* VBAR: swapped by interrupt.S. */ { CRn(12), CRm( 0), Op1( 0), Op2( 0), is32, NULL, reset_val, c12_VBAR, 0x00000000 }, + /* ICC_SRE */ + { CRn(12), CRm(12), Op1( 0), Op2(5), is32, access_gic_sre }, + /* CONTEXTIDR/TPIDRURW/TPIDRURO/TPIDRPRW: swapped by interrupt.S. */ { CRn(13), CRm( 0), Op1( 0), Op2( 1), is32, access_vm_reg, reset_val, c13_CID, 0x00000000 }, diff --git a/arch/arm/kvm/emulate.c b/arch/arm/kvm/emulate.c index af93e3ffc9f3..0064b86a2c87 100644 --- a/arch/arm/kvm/emulate.c +++ b/arch/arm/kvm/emulate.c @@ -161,105 +161,6 @@ unsigned long *vcpu_spsr(struct kvm_vcpu *vcpu) } } -/* - * A conditional instruction is allowed to trap, even though it - * wouldn't be executed. So let's re-implement the hardware, in - * software! - */ -bool kvm_condition_valid(struct kvm_vcpu *vcpu) -{ - unsigned long cpsr, cond, insn; - - /* - * Exception Code 0 can only happen if we set HCR.TGE to 1, to - * catch undefined instructions, and then we won't get past - * the arm_exit_handlers test anyway. - */ - BUG_ON(!kvm_vcpu_trap_get_class(vcpu)); - - /* Top two bits non-zero? Unconditional. */ - if (kvm_vcpu_get_hsr(vcpu) >> 30) - return true; - - cpsr = *vcpu_cpsr(vcpu); - - /* Is condition field valid? */ - if ((kvm_vcpu_get_hsr(vcpu) & HSR_CV) >> HSR_CV_SHIFT) - cond = (kvm_vcpu_get_hsr(vcpu) & HSR_COND) >> HSR_COND_SHIFT; - else { - /* This can happen in Thumb mode: examine IT state. */ - unsigned long it; - - it = ((cpsr >> 8) & 0xFC) | ((cpsr >> 25) & 0x3); - - /* it == 0 => unconditional. */ - if (it == 0) - return true; - - /* The cond for this insn works out as the top 4 bits. */ - cond = (it >> 4); - } - - /* Shift makes it look like an ARM-mode instruction */ - insn = cond << 28; - return arm_check_condition(insn, cpsr) != ARM_OPCODE_CONDTEST_FAIL; -} - -/** - * adjust_itstate - adjust ITSTATE when emulating instructions in IT-block - * @vcpu: The VCPU pointer - * - * When exceptions occur while instructions are executed in Thumb IF-THEN - * blocks, the ITSTATE field of the CPSR is not advanced (updated), so we have - * to do this little bit of work manually. The fields map like this: - * - * IT[7:0] -> CPSR[26:25],CPSR[15:10] - */ -static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) -{ - unsigned long itbits, cond; - unsigned long cpsr = *vcpu_cpsr(vcpu); - bool is_arm = !(cpsr & PSR_T_BIT); - - BUG_ON(is_arm && (cpsr & PSR_IT_MASK)); - - if (!(cpsr & PSR_IT_MASK)) - return; - - cond = (cpsr & 0xe000) >> 13; - itbits = (cpsr & 0x1c00) >> (10 - 2); - itbits |= (cpsr & (0x3 << 25)) >> 25; - - /* Perform ITAdvance (see page A-52 in ARM DDI 0406C) */ - if ((itbits & 0x7) == 0) - itbits = cond = 0; - else - itbits = (itbits << 1) & 0x1f; - - cpsr &= ~PSR_IT_MASK; - cpsr |= cond << 13; - cpsr |= (itbits & 0x1c) << (10 - 2); - cpsr |= (itbits & 0x3) << 25; - *vcpu_cpsr(vcpu) = cpsr; -} - -/** - * kvm_skip_instr - skip a trapped instruction and proceed to the next - * @vcpu: The vcpu pointer - */ -void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr) -{ - bool is_thumb; - - is_thumb = !!(*vcpu_cpsr(vcpu) & PSR_T_BIT); - if (is_thumb && !is_wide_instr) - *vcpu_pc(vcpu) += 2; - else - *vcpu_pc(vcpu) += 4; - kvm_adjust_itstate(vcpu); -} - - /****************************************************************************** * Inject exceptions into the guest */ @@ -402,3 +303,15 @@ void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr) { inject_abt(vcpu, true, addr); } + +/** + * kvm_inject_vabt - inject an async abort / SError into the guest + * @vcpu: The VCPU to receive the exception + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_vabt(struct kvm_vcpu *vcpu) +{ + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VA); +} diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 3f1ef0dbc899..4e40d1955e35 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -28,14 +28,6 @@ typedef int (*exit_handle_fn)(struct kvm_vcpu *, struct kvm_run *); -static int handle_svc_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* SVC called from Hyp mode should never get here */ - kvm_debug("SVC called from Hyp mode shouldn't go here\n"); - BUG(); - return -EINVAL; /* Squash warning */ -} - static int handle_hvc(struct kvm_vcpu *vcpu, struct kvm_run *run) { int ret; @@ -59,22 +51,6 @@ static int handle_smc(struct kvm_vcpu *vcpu, struct kvm_run *run) return 1; } -static int handle_pabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* The hypervisor should never cause aborts */ - kvm_err("Prefetch Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n", - kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); - return -EFAULT; -} - -static int handle_dabt_hyp(struct kvm_vcpu *vcpu, struct kvm_run *run) -{ - /* This is either an error in the ws. code or an external abort */ - kvm_err("Data Abort taken from Hyp mode at %#08lx (HSR: %#08x)\n", - kvm_vcpu_get_hfar(vcpu), kvm_vcpu_get_hsr(vcpu)); - return -EFAULT; -} - /** * kvm_handle_wfx - handle a WFI or WFE instructions trapped in guests * @vcpu: the vcpu pointer @@ -112,13 +88,10 @@ static exit_handle_fn arm_exit_handlers[] = { [HSR_EC_CP14_64] = kvm_handle_cp14_access, [HSR_EC_CP_0_13] = kvm_handle_cp_0_13_access, [HSR_EC_CP10_ID] = kvm_handle_cp10_id, - [HSR_EC_SVC_HYP] = handle_svc_hyp, [HSR_EC_HVC] = handle_hvc, [HSR_EC_SMC] = handle_smc, [HSR_EC_IABT] = kvm_handle_guest_abort, - [HSR_EC_IABT_HYP] = handle_pabt_hyp, [HSR_EC_DABT] = kvm_handle_guest_abort, - [HSR_EC_DABT_HYP] = handle_dabt_hyp, }; static exit_handle_fn kvm_get_exit_handler(struct kvm_vcpu *vcpu) @@ -144,6 +117,25 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, { exit_handle_fn exit_handler; + if (ARM_ABORT_PENDING(exception_index)) { + u8 hsr_ec = kvm_vcpu_trap_get_class(vcpu); + + /* + * HVC/SMC already have an adjusted PC, which we need + * to correct in order to return to after having + * injected the abort. + */ + if (hsr_ec == HSR_EC_HVC || hsr_ec == HSR_EC_SMC) { + u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; + *vcpu_pc(vcpu) -= adj; + } + + kvm_inject_vabt(vcpu); + return 1; + } + + exception_index = ARM_EXCEPTION_CODE(exception_index); + switch (exception_index) { case ARM_EXCEPTION_IRQ: return 1; @@ -160,6 +152,9 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, exit_handler = kvm_get_exit_handler(vcpu); return exit_handler(vcpu, run); + case ARM_EXCEPTION_DATA_ABORT: + kvm_inject_vabt(vcpu); + return 1; default: kvm_pr_unimpl("Unsupported exception type: %d", exception_index); diff --git a/arch/arm/kvm/hyp/Makefile b/arch/arm/kvm/hyp/Makefile index 8dfa5f7f9290..3023bb530edf 100644 --- a/arch/arm/kvm/hyp/Makefile +++ b/arch/arm/kvm/hyp/Makefile @@ -5,6 +5,7 @@ KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o obj-$(CONFIG_KVM_ARM_HOST) += tlb.o diff --git a/arch/arm/kvm/hyp/entry.S b/arch/arm/kvm/hyp/entry.S index 21c238871c9e..60783f3b57cc 100644 --- a/arch/arm/kvm/hyp/entry.S +++ b/arch/arm/kvm/hyp/entry.S @@ -18,6 +18,7 @@ #include #include #include +#include .arch_extension virt @@ -63,6 +64,36 @@ ENTRY(__guest_exit) ldr lr, [r0, #4] mov r0, r1 + mrs r1, SPSR + mrs r2, ELR_hyp + mrc p15, 4, r3, c5, c2, 0 @ HSR + + /* + * Force loads and stores to complete before unmasking aborts + * and forcing the delivery of the exception. This gives us a + * single instruction window, which the handler will try to + * match. + */ + dsb sy + cpsie a + + .global abort_guest_exit_start +abort_guest_exit_start: + + isb + + .global abort_guest_exit_end +abort_guest_exit_end: + + /* + * If we took an abort, r0[31] will be set, and cmp will set + * the N bit in PSTATE. + */ + cmp r0, #0 + msrmi SPSR_cxsf, r1 + msrmi ELR_hyp, r2 + mcrmi p15, 4, r3, c5, c2, 0 @ HSR + bx lr ENDPROC(__guest_exit) diff --git a/arch/arm/kvm/hyp/hyp-entry.S b/arch/arm/kvm/hyp/hyp-entry.S index 78091383a5d9..96beb53934c9 100644 --- a/arch/arm/kvm/hyp/hyp-entry.S +++ b/arch/arm/kvm/hyp/hyp-entry.S @@ -81,7 +81,6 @@ __kvm_hyp_vector: invalid_vector hyp_undef ARM_EXCEPTION_UNDEFINED invalid_vector hyp_svc ARM_EXCEPTION_SOFTWARE invalid_vector hyp_pabt ARM_EXCEPTION_PREF_ABORT - invalid_vector hyp_dabt ARM_EXCEPTION_DATA_ABORT invalid_vector hyp_fiq ARM_EXCEPTION_FIQ ENTRY(__hyp_do_panic) @@ -164,6 +163,21 @@ hyp_irq: load_vcpu r0 @ Load VCPU pointer to r0 b __guest_exit +hyp_dabt: + push {r0, r1} + mrs r0, ELR_hyp + ldr r1, =abort_guest_exit_start +THUMB( add r1, r1, #1) + cmp r0, r1 + ldrne r1, =abort_guest_exit_end +THUMB( addne r1, r1, #1) + cmpne r0, r1 + pop {r0, r1} + bne __hyp_panic + + orr r0, r0, #(1 << ARM_EXIT_WITH_ABORT_BIT) + eret + .ltorg .popsection diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index b13caa90cd44..92678b7bd046 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -14,6 +14,7 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ +#include #include #include @@ -54,6 +55,15 @@ static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) { u32 val; + /* + * If we pended a virtual abort, preserve it until it gets + * cleared. See B1.9.9 (Virtual Abort exception) for details, + * but the crucial bit is the zeroing of HCR.VA in the + * pseudocode. + */ + if (vcpu->arch.hcr & HCR_VA) + vcpu->arch.hcr = read_sysreg(HCR); + write_sysreg(0, HCR); write_sysreg(0, HSTR); val = read_sysreg(HDCR); @@ -74,14 +84,21 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) write_sysreg(read_sysreg(MIDR), VPIDR); } + static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) { - __vgic_v2_save_state(vcpu); + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + __vgic_v3_save_state(vcpu); + else + __vgic_v2_save_state(vcpu); } static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) { - __vgic_v2_restore_state(vcpu); + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + __vgic_v3_restore_state(vcpu); + else + __vgic_v2_restore_state(vcpu); } static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) @@ -134,7 +151,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) return true; } -static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) +int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; @@ -191,8 +208,6 @@ again: return exit_code; } -__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); - static const char * const __hyp_panic_string[] = { [ARM_EXCEPTION_RESET] = "\nHYP panic: RST PC:%08x CPSR:%08x", [ARM_EXCEPTION_UNDEFINED] = "\nHYP panic: UNDEF PC:%08x CPSR:%08x", diff --git a/arch/arm/kvm/hyp/tlb.c b/arch/arm/kvm/hyp/tlb.c index a2636001e616..729652854f90 100644 --- a/arch/arm/kvm/hyp/tlb.c +++ b/arch/arm/kvm/hyp/tlb.c @@ -34,7 +34,7 @@ * As v7 does not support flushing per IPA, just nuke the whole TLB * instead, ignoring the ipa value. */ -static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) +void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) { dsb(ishst); @@ -50,21 +50,14 @@ static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) write_sysreg(0, VTTBR); } -__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm); - -static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { - __tlb_flush_vmid(kvm); + __kvm_tlb_flush_vmid(kvm); } -__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, - phys_addr_t ipa); - -static void __hyp_text __tlb_flush_vm_context(void) +void __hyp_text __kvm_flush_vm_context(void) { write_sysreg(0, TLBIALLNSNHIS); write_sysreg(0, ICIALLUIS); dsb(ish); } - -__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void); diff --git a/arch/arm/kvm/mmio.c b/arch/arm/kvm/mmio.c index 10f80a6c797a..b6e715fd3c90 100644 --- a/arch/arm/kvm/mmio.c +++ b/arch/arm/kvm/mmio.c @@ -126,12 +126,6 @@ static int decode_hsr(struct kvm_vcpu *vcpu, bool *is_write, int *len) int access_size; bool sign_extend; - if (kvm_vcpu_dabt_isextabt(vcpu)) { - /* cache operation on I/O addr, tell guest unsupported */ - kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); - return 1; - } - if (kvm_vcpu_dabt_iss1tw(vcpu)) { /* page table accesses IO mem: tell guest to fix its TTBR */ kvm_inject_dabt(vcpu, kvm_vcpu_get_hfar(vcpu)); diff --git a/arch/arm/kvm/mmu.c b/arch/arm/kvm/mmu.c index 29d0b23af2a9..60e0c1ac86e8 100644 --- a/arch/arm/kvm/mmu.c +++ b/arch/arm/kvm/mmu.c @@ -744,7 +744,6 @@ int kvm_alloc_stage2_pgd(struct kvm *kvm) if (!pgd) return -ENOMEM; - kvm_clean_pgd(pgd); kvm->arch.pgd = pgd; return 0; } @@ -936,7 +935,6 @@ static int stage2_set_pte(struct kvm *kvm, struct kvm_mmu_memory_cache *cache, if (!cache) return 0; /* ignore calls from kvm_set_spte_hva */ pte = mmu_memory_cache_alloc(cache); - kvm_clean_pte(pte); pmd_populate_kernel(NULL, pmd, pte); get_page(virt_to_page(pmd)); } @@ -1434,6 +1432,11 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) int ret, idx; is_iabt = kvm_vcpu_trap_is_iabt(vcpu); + if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) { + kvm_inject_vabt(vcpu); + return 1; + } + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 8ec88e5b290f..ae7dbd79e257 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -79,6 +79,19 @@ #include #include +#define read_gicreg(r) \ + ({ \ + u64 reg; \ + asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \ + reg; \ + }) + +#define write_gicreg(v,r) \ + do { \ + u64 __val = (v); \ + asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ + } while (0) + /* * Low-level accessors * diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 4b5c977af465..2a2752b5b6aa 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -50,7 +50,7 @@ #define HCR_BSU (3 << 10) #define HCR_BSU_IS (UL(1) << 10) #define HCR_FB (UL(1) << 9) -#define HCR_VA (UL(1) << 8) +#define HCR_VSE (UL(1) << 8) #define HCR_VI (UL(1) << 7) #define HCR_VF (UL(1) << 6) #define HCR_AMO (UL(1) << 5) @@ -80,7 +80,7 @@ #define HCR_GUEST_FLAGS (HCR_TSC | HCR_TSW | HCR_TWE | HCR_TWI | HCR_VM | \ HCR_TVM | HCR_BSU_IS | HCR_FB | HCR_TAC | \ HCR_AMO | HCR_SWIO | HCR_TIDCP | HCR_RW) -#define HCR_VIRT_EXCP_MASK (HCR_VA | HCR_VI | HCR_VF) +#define HCR_VIRT_EXCP_MASK (HCR_VSE | HCR_VI | HCR_VF) #define HCR_INT_OVERRIDE (HCR_FMO | HCR_IMO) #define HCR_HOST_VHE_FLAGS (HCR_RW | HCR_TGE | HCR_E2H) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index 7561f63f1c28..18f746551bf6 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -20,10 +20,15 @@ #include +#define ARM_EXIT_WITH_SERROR_BIT 31 +#define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) +#define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT)) + #define ARM_EXCEPTION_IRQ 0 -#define ARM_EXCEPTION_TRAP 1 +#define ARM_EXCEPTION_EL1_SERROR 1 +#define ARM_EXCEPTION_TRAP 2 /* The hyp-stub will return this for any kvm_call_hyp() call */ -#define ARM_EXCEPTION_HYP_GONE 2 +#define ARM_EXCEPTION_HYP_GONE 3 #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) diff --git a/arch/arm64/include/asm/kvm_emulate.h b/arch/arm64/include/asm/kvm_emulate.h index 4cdeae3b17c6..fd9d5fd788f5 100644 --- a/arch/arm64/include/asm/kvm_emulate.h +++ b/arch/arm64/include/asm/kvm_emulate.h @@ -38,6 +38,7 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu); void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr); void kvm_inject_undefined(struct kvm_vcpu *vcpu); +void kvm_inject_vabt(struct kvm_vcpu *vcpu); void kvm_inject_dabt(struct kvm_vcpu *vcpu, unsigned long addr); void kvm_inject_pabt(struct kvm_vcpu *vcpu, unsigned long addr); @@ -147,6 +148,16 @@ static inline u32 kvm_vcpu_get_hsr(const struct kvm_vcpu *vcpu) return vcpu->arch.fault.esr_el2; } +static inline int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + + if (esr & ESR_ELx_CV) + return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; + + return -1; +} + static inline unsigned long kvm_vcpu_get_hfar(const struct kvm_vcpu *vcpu) { return vcpu->arch.fault.far_el2; diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index cff510574fae..b18e852d27e8 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -123,6 +123,7 @@ typeof(orig) * __hyp_text fname(void) \ void __vgic_v2_save_state(struct kvm_vcpu *vcpu); void __vgic_v2_restore_state(struct kvm_vcpu *vcpu); +int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index b6bb83400cd8..8f99ab625fc4 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -166,12 +166,6 @@ void kvm_clear_hyp_idmap(void); #define kvm_set_pte(ptep, pte) set_pte(ptep, pte) #define kvm_set_pmd(pmdp, pmd) set_pmd(pmdp, pmd) -static inline void kvm_clean_pgd(pgd_t *pgd) {} -static inline void kvm_clean_pmd(pmd_t *pmd) {} -static inline void kvm_clean_pmd_entry(pmd_t *pmd) {} -static inline void kvm_clean_pte(pte_t *pte) {} -static inline void kvm_clean_pte_entry(pte_t *pte) {} - static inline pte_t kvm_s2pte_mkwrite(pte_t pte) { pte_val(pte) |= PTE_S2_RDWR; diff --git a/arch/arm64/kvm/Kconfig b/arch/arm64/kvm/Kconfig index 9c9edc98d271..6eaf12c1d627 100644 --- a/arch/arm64/kvm/Kconfig +++ b/arch/arm64/kvm/Kconfig @@ -16,7 +16,7 @@ menuconfig VIRTUALIZATION if VIRTUALIZATION -config KVM_ARM_VGIC_V3 +config KVM_ARM_VGIC_V3_ITS bool config KVM @@ -34,7 +34,7 @@ config KVM select KVM_VFIO select HAVE_KVM_EVENTFD select HAVE_KVM_IRQFD - select KVM_ARM_VGIC_V3 + select KVM_ARM_VGIC_V3_ITS select KVM_ARM_PMU if HW_PERF_EVENTS select HAVE_KVM_MSI select HAVE_KVM_IRQCHIP diff --git a/arch/arm64/kvm/Makefile b/arch/arm64/kvm/Makefile index 695eb3c7ef41..d50a82a16ff6 100644 --- a/arch/arm64/kvm/Makefile +++ b/arch/arm64/kvm/Makefile @@ -16,9 +16,10 @@ kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o $(KVM)/e kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/arm.o $(ARM)/mmu.o $(ARM)/mmio.o kvm-$(CONFIG_KVM_ARM_HOST) += $(ARM)/psci.o $(ARM)/perf.o -kvm-$(CONFIG_KVM_ARM_HOST) += emulate.o inject_fault.o regmap.o +kvm-$(CONFIG_KVM_ARM_HOST) += inject_fault.o regmap.o kvm-$(CONFIG_KVM_ARM_HOST) += hyp.o hyp-init.o handle_exit.o kvm-$(CONFIG_KVM_ARM_HOST) += guest.o debug.o reset.o sys_regs.o sys_regs_generic_v8.o +kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/aarch32.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic.o kvm-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/vgic/vgic-init.o diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fa96fe2bd469..a204adf29f0a 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -170,9 +170,32 @@ int handle_exit(struct kvm_vcpu *vcpu, struct kvm_run *run, { exit_handle_fn exit_handler; + if (ARM_SERROR_PENDING(exception_index)) { + u8 hsr_ec = ESR_ELx_EC(kvm_vcpu_get_hsr(vcpu)); + + /* + * HVC/SMC already have an adjusted PC, which we need + * to correct in order to return to after having + * injected the SError. + */ + if (hsr_ec == ESR_ELx_EC_HVC32 || hsr_ec == ESR_ELx_EC_HVC64 || + hsr_ec == ESR_ELx_EC_SMC32 || hsr_ec == ESR_ELx_EC_SMC64) { + u32 adj = kvm_vcpu_trap_il_is32bit(vcpu) ? 4 : 2; + *vcpu_pc(vcpu) -= adj; + } + + kvm_inject_vabt(vcpu); + return 1; + } + + exception_index = ARM_EXCEPTION_CODE(exception_index); + switch (exception_index) { case ARM_EXCEPTION_IRQ: return 1; + case ARM_EXCEPTION_EL1_SERROR: + kvm_inject_vabt(vcpu); + return 1; case ARM_EXCEPTION_TRAP: /* * See ARM ARM B1.14.1: "Hyp traps on instructions diff --git a/arch/arm64/kvm/hyp/Makefile b/arch/arm64/kvm/hyp/Makefile index 0c85febcc1eb..aaf42ae8d8c3 100644 --- a/arch/arm64/kvm/hyp/Makefile +++ b/arch/arm64/kvm/hyp/Makefile @@ -5,9 +5,9 @@ KVM=../../../../virt/kvm obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v2-sr.o +obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += $(KVM)/arm/hyp/timer-sr.o -obj-$(CONFIG_KVM_ARM_HOST) += vgic-v3-sr.o obj-$(CONFIG_KVM_ARM_HOST) += sysreg-sr.o obj-$(CONFIG_KVM_ARM_HOST) += debug-sr.o obj-$(CONFIG_KVM_ARM_HOST) += entry.o diff --git a/arch/arm64/kvm/hyp/debug-sr.c b/arch/arm64/kvm/hyp/debug-sr.c index 33342a776ec7..4ba5c9095d03 100644 --- a/arch/arm64/kvm/hyp/debug-sr.c +++ b/arch/arm64/kvm/hyp/debug-sr.c @@ -131,9 +131,7 @@ void __hyp_text __debug_cond_restore_host_state(struct kvm_vcpu *vcpu) vcpu->arch.debug_flags &= ~KVM_ARM64_DEBUG_DIRTY; } -static u32 __hyp_text __debug_read_mdcr_el2(void) +u32 __hyp_text __kvm_get_mdcr_el2(void) { return read_sysreg(mdcr_el2); } - -__alias(__debug_read_mdcr_el2) u32 __kvm_get_mdcr_el2(void); diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index ce9e5e5f28cf..12ee62d6d410 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -55,79 +55,111 @@ */ ENTRY(__guest_enter) // x0: vcpu - // x1: host/guest context - // x2-x18: clobbered by macros + // x1: host context + // x2-x17: clobbered by macros + // x18: guest context // Store the host regs save_callee_saved_regs x1 - // Preserve vcpu & host_ctxt for use at exit time - stp x0, x1, [sp, #-16]! + // Store the host_ctxt for use at exit time + str x1, [sp, #-16]! - add x1, x0, #VCPU_CONTEXT + add x18, x0, #VCPU_CONTEXT - // Prepare x0-x1 for later restore by pushing them onto the stack - ldp x2, x3, [x1, #CPU_XREG_OFFSET(0)] - stp x2, x3, [sp, #-16]! + // Restore guest regs x0-x17 + ldp x0, x1, [x18, #CPU_XREG_OFFSET(0)] + ldp x2, x3, [x18, #CPU_XREG_OFFSET(2)] + ldp x4, x5, [x18, #CPU_XREG_OFFSET(4)] + ldp x6, x7, [x18, #CPU_XREG_OFFSET(6)] + ldp x8, x9, [x18, #CPU_XREG_OFFSET(8)] + ldp x10, x11, [x18, #CPU_XREG_OFFSET(10)] + ldp x12, x13, [x18, #CPU_XREG_OFFSET(12)] + ldp x14, x15, [x18, #CPU_XREG_OFFSET(14)] + ldp x16, x17, [x18, #CPU_XREG_OFFSET(16)] - // x2-x18 - ldp x2, x3, [x1, #CPU_XREG_OFFSET(2)] - ldp x4, x5, [x1, #CPU_XREG_OFFSET(4)] - ldp x6, x7, [x1, #CPU_XREG_OFFSET(6)] - ldp x8, x9, [x1, #CPU_XREG_OFFSET(8)] - ldp x10, x11, [x1, #CPU_XREG_OFFSET(10)] - ldp x12, x13, [x1, #CPU_XREG_OFFSET(12)] - ldp x14, x15, [x1, #CPU_XREG_OFFSET(14)] - ldp x16, x17, [x1, #CPU_XREG_OFFSET(16)] - ldr x18, [x1, #CPU_XREG_OFFSET(18)] + // Restore guest regs x19-x29, lr + restore_callee_saved_regs x18 - // x19-x29, lr - restore_callee_saved_regs x1 - - // Last bits of the 64bit state - ldp x0, x1, [sp], #16 + // Restore guest reg x18 + ldr x18, [x18, #CPU_XREG_OFFSET(18)] // Do not touch any register after this! eret ENDPROC(__guest_enter) ENTRY(__guest_exit) - // x0: vcpu - // x1: return code - // x2-x3: free - // x4-x29,lr: vcpu regs - // vcpu x0-x3 on the stack + // x0: return code + // x1: vcpu + // x2-x29,lr: vcpu regs + // vcpu x0-x1 on the stack - add x2, x0, #VCPU_CONTEXT + add x1, x1, #VCPU_CONTEXT - stp x4, x5, [x2, #CPU_XREG_OFFSET(4)] - stp x6, x7, [x2, #CPU_XREG_OFFSET(6)] - stp x8, x9, [x2, #CPU_XREG_OFFSET(8)] - stp x10, x11, [x2, #CPU_XREG_OFFSET(10)] - stp x12, x13, [x2, #CPU_XREG_OFFSET(12)] - stp x14, x15, [x2, #CPU_XREG_OFFSET(14)] - stp x16, x17, [x2, #CPU_XREG_OFFSET(16)] - str x18, [x2, #CPU_XREG_OFFSET(18)] + ALTERNATIVE(nop, SET_PSTATE_PAN(1), ARM64_HAS_PAN, CONFIG_ARM64_PAN) - ldp x6, x7, [sp], #16 // x2, x3 - ldp x4, x5, [sp], #16 // x0, x1 + // Store the guest regs x2 and x3 + stp x2, x3, [x1, #CPU_XREG_OFFSET(2)] - stp x4, x5, [x2, #CPU_XREG_OFFSET(0)] - stp x6, x7, [x2, #CPU_XREG_OFFSET(2)] + // Retrieve the guest regs x0-x1 from the stack + ldp x2, x3, [sp], #16 // x0, x1 - save_callee_saved_regs x2 + // Store the guest regs x0-x1 and x4-x18 + stp x2, x3, [x1, #CPU_XREG_OFFSET(0)] + stp x4, x5, [x1, #CPU_XREG_OFFSET(4)] + stp x6, x7, [x1, #CPU_XREG_OFFSET(6)] + stp x8, x9, [x1, #CPU_XREG_OFFSET(8)] + stp x10, x11, [x1, #CPU_XREG_OFFSET(10)] + stp x12, x13, [x1, #CPU_XREG_OFFSET(12)] + stp x14, x15, [x1, #CPU_XREG_OFFSET(14)] + stp x16, x17, [x1, #CPU_XREG_OFFSET(16)] + str x18, [x1, #CPU_XREG_OFFSET(18)] + + // Store the guest regs x19-x29, lr + save_callee_saved_regs x1 + + // Restore the host_ctxt from the stack + ldr x2, [sp], #16 - // Restore vcpu & host_ctxt from the stack - // (preserving return code in x1) - ldp x0, x2, [sp], #16 // Now restore the host regs restore_callee_saved_regs x2 - mov x0, x1 - ret + // If we have a pending asynchronous abort, now is the + // time to find out. From your VAXorcist book, page 666: + // "Threaten me not, oh Evil one! For I speak with + // the power of DEC, and I command thee to show thyself!" + mrs x2, elr_el2 + mrs x3, esr_el2 + mrs x4, spsr_el2 + mov x5, x0 + + dsb sy // Synchronize against in-flight ld/st + msr daifclr, #4 // Unmask aborts + + // This is our single instruction exception window. A pending + // SError is guaranteed to occur at the earliest when we unmask + // it, and at the latest just after the ISB. + .global abort_guest_exit_start +abort_guest_exit_start: + + isb + + .global abort_guest_exit_end +abort_guest_exit_end: + + // If the exception took place, restore the EL1 exception + // context so that we can report some information. + // Merge the exception code with the SError pending bit. + tbz x0, #ARM_EXIT_WITH_SERROR_BIT, 1f + msr elr_el2, x2 + msr esr_el2, x3 + msr spsr_el2, x4 + orr x0, x0, x5 +1: ret ENDPROC(__guest_exit) ENTRY(__fpsimd_guest_restore) + stp x2, x3, [sp, #-16]! stp x4, lr, [sp, #-16]! alternative_if_not ARM64_HAS_VIRT_HOST_EXTN diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index f6d9694ae3b1..4e92399f7105 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -27,16 +27,6 @@ .text .pushsection .hyp.text, "ax" -.macro save_x0_to_x3 - stp x0, x1, [sp, #-16]! - stp x2, x3, [sp, #-16]! -.endm - -.macro restore_x0_to_x3 - ldp x2, x3, [sp], #16 - ldp x0, x1, [sp], #16 -.endm - .macro do_el2_call /* * Shuffle the parameters before calling the function @@ -79,23 +69,23 @@ ENTRY(__kvm_hyp_teardown) ENDPROC(__kvm_hyp_teardown) el1_sync: // Guest trapped into EL2 - save_x0_to_x3 + stp x0, x1, [sp, #-16]! alternative_if_not ARM64_HAS_VIRT_HOST_EXTN mrs x1, esr_el2 alternative_else mrs x1, esr_el1 alternative_endif - lsr x2, x1, #ESR_ELx_EC_SHIFT + lsr x0, x1, #ESR_ELx_EC_SHIFT - cmp x2, #ESR_ELx_EC_HVC64 + cmp x0, #ESR_ELx_EC_HVC64 b.ne el1_trap - mrs x3, vttbr_el2 // If vttbr is valid, the 64bit guest - cbnz x3, el1_trap // called HVC + mrs x1, vttbr_el2 // If vttbr is valid, the 64bit guest + cbnz x1, el1_trap // called HVC /* Here, we're pretty sure the host called HVC. */ - restore_x0_to_x3 + ldp x0, x1, [sp], #16 cmp x0, #HVC_GET_VECTORS b.ne 1f @@ -113,24 +103,51 @@ alternative_endif el1_trap: /* - * x1: ESR - * x2: ESR_EC + * x0: ESR_EC */ /* Guest accessed VFP/SIMD registers, save host, restore Guest */ - cmp x2, #ESR_ELx_EC_FP_ASIMD + cmp x0, #ESR_ELx_EC_FP_ASIMD b.eq __fpsimd_guest_restore - mrs x0, tpidr_el2 - mov x1, #ARM_EXCEPTION_TRAP + mrs x1, tpidr_el2 + mov x0, #ARM_EXCEPTION_TRAP b __guest_exit el1_irq: - save_x0_to_x3 - mrs x0, tpidr_el2 - mov x1, #ARM_EXCEPTION_IRQ + stp x0, x1, [sp, #-16]! + mrs x1, tpidr_el2 + mov x0, #ARM_EXCEPTION_IRQ b __guest_exit +el1_error: + stp x0, x1, [sp, #-16]! + mrs x1, tpidr_el2 + mov x0, #ARM_EXCEPTION_EL1_SERROR + b __guest_exit + +el2_error: + /* + * Only two possibilities: + * 1) Either we come from the exit path, having just unmasked + * PSTATE.A: change the return code to an EL2 fault, and + * carry on, as we're already in a sane state to handle it. + * 2) Or we come from anywhere else, and that's a bug: we panic. + * + * For (1), x0 contains the original return code and x1 doesn't + * contain anything meaningful at that stage. We can reuse them + * as temp registers. + * For (2), who cares? + */ + mrs x0, elr_el2 + adr x1, abort_guest_exit_start + cmp x0, x1 + adr x1, abort_guest_exit_end + ccmp x0, x1, #4, ne + b.ne __hyp_panic + mov x0, #(1 << ARM_EXIT_WITH_SERROR_BIT) + eret + ENTRY(__hyp_do_panic) mov lr, #(PSR_F_BIT | PSR_I_BIT | PSR_A_BIT | PSR_D_BIT |\ PSR_MODE_EL1h) @@ -155,11 +172,9 @@ ENDPROC(\label) invalid_vector el2h_sync_invalid invalid_vector el2h_irq_invalid invalid_vector el2h_fiq_invalid - invalid_vector el2h_error_invalid invalid_vector el1_sync_invalid invalid_vector el1_irq_invalid invalid_vector el1_fiq_invalid - invalid_vector el1_error_invalid .ltorg @@ -174,15 +189,15 @@ ENTRY(__kvm_hyp_vector) ventry el2h_sync_invalid // Synchronous EL2h ventry el2h_irq_invalid // IRQ EL2h ventry el2h_fiq_invalid // FIQ EL2h - ventry el2h_error_invalid // Error EL2h + ventry el2_error // Error EL2h ventry el1_sync // Synchronous 64-bit EL1 ventry el1_irq // IRQ 64-bit EL1 ventry el1_fiq_invalid // FIQ 64-bit EL1 - ventry el1_error_invalid // Error 64-bit EL1 + ventry el1_error // Error 64-bit EL1 ventry el1_sync // Synchronous 32-bit EL1 ventry el1_irq // IRQ 32-bit EL1 ventry el1_fiq_invalid // FIQ 32-bit EL1 - ventry el1_error_invalid // Error 32-bit EL1 + ventry el1_error // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 5a84b4562603..83037cd62d01 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -16,7 +16,10 @@ */ #include +#include + #include +#include #include static bool __hyp_text __fpsimd_enabled_nvhe(void) @@ -109,6 +112,15 @@ static hyp_alternate_select(__deactivate_traps_arch, static void __hyp_text __deactivate_traps(struct kvm_vcpu *vcpu) { + /* + * If we pended a virtual abort, preserve it until it gets + * cleared. See D1.14.3 (Virtual Interrupts) for details, but + * the crucial bit is "On taking a vSError interrupt, + * HCR_EL2.VSE is cleared to 0." + */ + if (vcpu->arch.hcr_el2 & HCR_VSE) + vcpu->arch.hcr_el2 = read_sysreg(hcr_el2); + __deactivate_traps_arch()(); write_sysreg(0, hstr_el2); write_sysreg(read_sysreg(mdcr_el2) & MDCR_EL2_HPMN_MASK, mdcr_el2); @@ -126,17 +138,13 @@ static void __hyp_text __deactivate_vm(struct kvm_vcpu *vcpu) write_sysreg(0, vttbr_el2); } -static hyp_alternate_select(__vgic_call_save_state, - __vgic_v2_save_state, __vgic_v3_save_state, - ARM64_HAS_SYSREG_GIC_CPUIF); - -static hyp_alternate_select(__vgic_call_restore_state, - __vgic_v2_restore_state, __vgic_v3_restore_state, - ARM64_HAS_SYSREG_GIC_CPUIF); - static void __hyp_text __vgic_save_state(struct kvm_vcpu *vcpu) { - __vgic_call_save_state()(vcpu); + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + __vgic_v3_save_state(vcpu); + else + __vgic_v2_save_state(vcpu); + write_sysreg(read_sysreg(hcr_el2) & ~HCR_INT_OVERRIDE, hcr_el2); } @@ -149,7 +157,10 @@ static void __hyp_text __vgic_restore_state(struct kvm_vcpu *vcpu) val |= vcpu->arch.irq_lines; write_sysreg(val, hcr_el2); - __vgic_call_restore_state()(vcpu); + if (static_branch_unlikely(&kvm_vgic_global_state.gicv3_cpuif)) + __vgic_v3_restore_state(vcpu); + else + __vgic_v2_restore_state(vcpu); } static bool __hyp_text __true_value(void) @@ -232,7 +243,22 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu) return true; } -static int __hyp_text __guest_run(struct kvm_vcpu *vcpu) +static void __hyp_text __skip_instr(struct kvm_vcpu *vcpu) +{ + *vcpu_pc(vcpu) = read_sysreg_el2(elr); + + if (vcpu_mode_is_32bit(vcpu)) { + vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr); + kvm_skip_instr32(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); + write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr); + } else { + *vcpu_pc(vcpu) += 4; + } + + write_sysreg_el2(*vcpu_pc(vcpu), elr); +} + +int __hyp_text __kvm_vcpu_run(struct kvm_vcpu *vcpu) { struct kvm_cpu_context *host_ctxt; struct kvm_cpu_context *guest_ctxt; @@ -267,9 +293,43 @@ again: exit_code = __guest_enter(vcpu, host_ctxt); /* And we're baaack! */ + /* + * We're using the raw exception code in order to only process + * the trap if no SError is pending. We will come back to the + * same PC once the SError has been injected, and replay the + * trapping instruction. + */ if (exit_code == ARM_EXCEPTION_TRAP && !__populate_fault_info(vcpu)) goto again; + if (static_branch_unlikely(&vgic_v2_cpuif_trap) && + exit_code == ARM_EXCEPTION_TRAP) { + bool valid; + + valid = kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_DABT_LOW && + kvm_vcpu_trap_get_fault_type(vcpu) == FSC_FAULT && + kvm_vcpu_dabt_isvalid(vcpu) && + !kvm_vcpu_dabt_isextabt(vcpu) && + !kvm_vcpu_dabt_iss1tw(vcpu); + + if (valid) { + int ret = __vgic_v2_perform_cpuif_access(vcpu); + + if (ret == 1) { + __skip_instr(vcpu); + goto again; + } + + if (ret == -1) { + /* Promote an illegal access to an SError */ + __skip_instr(vcpu); + exit_code = ARM_EXCEPTION_EL1_SERROR; + } + + /* 0 falls through to be handler out of EL2 */ + } + } + fp_enabled = __fpsimd_enabled(); __sysreg_save_guest_state(guest_ctxt); @@ -293,8 +353,6 @@ again: return exit_code; } -__alias(__guest_run) int __kvm_vcpu_run(struct kvm_vcpu *vcpu); - static const char __hyp_panic_string[] = "HYP panic:\nPS:%08llx PC:%016llx ESR:%08llx\nFAR:%016llx HPFAR:%016llx PAR:%016llx\nVCPU:%p\n"; static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index be8177cdd3bf..9cc0ea784ae6 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -17,7 +17,7 @@ #include -static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) +void __hyp_text __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) { dsb(ishst); @@ -48,10 +48,7 @@ static void __hyp_text __tlb_flush_vmid_ipa(struct kvm *kvm, phys_addr_t ipa) write_sysreg(0, vttbr_el2); } -__alias(__tlb_flush_vmid_ipa) void __kvm_tlb_flush_vmid_ipa(struct kvm *kvm, - phys_addr_t ipa); - -static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) +void __hyp_text __kvm_tlb_flush_vmid(struct kvm *kvm) { dsb(ishst); @@ -67,14 +64,10 @@ static void __hyp_text __tlb_flush_vmid(struct kvm *kvm) write_sysreg(0, vttbr_el2); } -__alias(__tlb_flush_vmid) void __kvm_tlb_flush_vmid(struct kvm *kvm); - -static void __hyp_text __tlb_flush_vm_context(void) +void __hyp_text __kvm_flush_vm_context(void) { dsb(ishst); asm volatile("tlbi alle1is \n" "ic ialluis ": : ); dsb(ish); } - -__alias(__tlb_flush_vm_context) void __kvm_flush_vm_context(void); diff --git a/arch/arm64/kvm/inject_fault.c b/arch/arm64/kvm/inject_fault.c index 898c0e6aedd4..da6a8cfa54a0 100644 --- a/arch/arm64/kvm/inject_fault.c +++ b/arch/arm64/kvm/inject_fault.c @@ -231,3 +231,15 @@ void kvm_inject_undefined(struct kvm_vcpu *vcpu) else inject_undef64(vcpu); } + +/** + * kvm_inject_vabt - inject an async abort / SError into the guest + * @vcpu: The VCPU to receive the exception + * + * It is assumed that this code is called from the VCPU thread and that the + * VCPU therefore is not currently executing guest code. + */ +void kvm_inject_vabt(struct kvm_vcpu *vcpu) +{ + vcpu_set_hcr(vcpu, vcpu_get_hcr(vcpu) | HCR_VSE); +} diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 19b698ef3336..002f0922cd92 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -20,9 +20,11 @@ #include #include #include +#include #include #include #include +#include #define VGIC_V3_MAX_CPUS 255 #define VGIC_V2_MAX_CPUS 8 @@ -49,6 +51,9 @@ struct vgic_global { /* Physical address of vgic virtual cpu interface */ phys_addr_t vcpu_base; + /* GICV mapping */ + void __iomem *vcpu_base_va; + /* virtual control interface mapping */ void __iomem *vctrl_base; @@ -63,6 +68,9 @@ struct vgic_global { /* Only needed for the legacy KVM_CREATE_IRQCHIP */ bool can_emulate_gicv2; + + /* GIC system register CPU interface */ + struct static_key_false gicv3_cpuif; }; extern struct vgic_global kvm_vgic_global_state; @@ -217,7 +225,6 @@ struct vgic_v2_cpu_if { }; struct vgic_v3_cpu_if { -#ifdef CONFIG_KVM_ARM_VGIC_V3 u32 vgic_hcr; u32 vgic_vmcr; u32 vgic_sre; /* Restored only, change ignored */ @@ -227,7 +234,6 @@ struct vgic_v3_cpu_if { u32 vgic_ap0r[4]; u32 vgic_ap1r[4]; u64 vgic_lr[VGIC_V3_MAX_LRS]; -#endif }; struct vgic_cpu { @@ -265,6 +271,8 @@ struct vgic_cpu { bool lpis_enabled; }; +extern struct static_key_false vgic_v2_cpuif_trap; + int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); void kvm_vgic_early_init(struct kvm *kvm); int kvm_vgic_create(struct kvm *kvm, u32 type); @@ -294,13 +302,7 @@ bool kvm_vcpu_has_pending_irqs(struct kvm_vcpu *vcpu); void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu); void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu); -#ifdef CONFIG_KVM_ARM_VGIC_V3 void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg); -#else -static inline void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) -{ -} -#endif /** * kvm_vgic_get_max_vcpus - Get the maximum number of VCPUs allowed by HW diff --git a/arch/arm64/kvm/emulate.c b/virt/kvm/arm/aarch32.c similarity index 90% rename from arch/arm64/kvm/emulate.c rename to virt/kvm/arm/aarch32.c index f87d8fbaa48d..528af4b2d09e 100644 --- a/arch/arm64/kvm/emulate.c +++ b/virt/kvm/arm/aarch32.c @@ -22,8 +22,13 @@ */ #include -#include #include +#include + +#ifndef CONFIG_ARM64 +#define COMPAT_PSR_T_BIT PSR_T_BIT +#define COMPAT_PSR_IT_MASK PSR_IT_MASK +#endif /* * stolen from arch/arm/kernel/opcodes.c @@ -52,16 +57,6 @@ static const unsigned short cc_map[16] = { 0 /* NV */ }; -static int kvm_vcpu_get_condition(const struct kvm_vcpu *vcpu) -{ - u32 esr = kvm_vcpu_get_hsr(vcpu); - - if (esr & ESR_ELx_CV) - return (esr & ESR_ELx_COND_MASK) >> ESR_ELx_COND_SHIFT; - - return -1; -} - /* * Check if a trapped instruction should have been executed or not. */ @@ -114,15 +109,13 @@ bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) * * IT[7:0] -> CPSR[26:25],CPSR[15:10] */ -static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) +static void __hyp_text kvm_adjust_itstate(struct kvm_vcpu *vcpu) { unsigned long itbits, cond; unsigned long cpsr = *vcpu_cpsr(vcpu); bool is_arm = !(cpsr & COMPAT_PSR_T_BIT); - BUG_ON(is_arm && (cpsr & COMPAT_PSR_IT_MASK)); - - if (!(cpsr & COMPAT_PSR_IT_MASK)) + if (is_arm || !(cpsr & COMPAT_PSR_IT_MASK)) return; cond = (cpsr & 0xe000) >> 13; @@ -146,7 +139,7 @@ static void kvm_adjust_itstate(struct kvm_vcpu *vcpu) * kvm_skip_instr - skip a trapped instruction and proceed to the next * @vcpu: The vcpu pointer */ -void kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) +void __hyp_text kvm_skip_instr32(struct kvm_vcpu *vcpu, bool is_wide_instr) { bool is_thumb; diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 4309b60ebf17..27a1f6341d41 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -445,7 +445,7 @@ int kvm_timer_hyp_init(void) if (err) { kvm_err("kvm_arch_timer: can't request interrupt %d (%d)\n", host_vtimer_irq, err); - goto out; + return err; } kvm_info("virtual timer IRQ%d\n", host_vtimer_irq); @@ -453,10 +453,6 @@ int kvm_timer_hyp_init(void) cpuhp_setup_state(CPUHP_AP_KVM_ARM_TIMER_STARTING, "AP_KVM_ARM_TIMER_STARTING", kvm_timer_starting_cpu, kvm_timer_dying_cpu); - goto out; -out_free: - free_percpu_irq(host_vtimer_irq, kvm_get_running_vcpus()); -out: return err; } diff --git a/virt/kvm/arm/hyp/vgic-v2-sr.c b/virt/kvm/arm/hyp/vgic-v2-sr.c index 7cffd9338c49..c8aeb7b91ec8 100644 --- a/virt/kvm/arm/hyp/vgic-v2-sr.c +++ b/virt/kvm/arm/hyp/vgic-v2-sr.c @@ -19,6 +19,7 @@ #include #include +#include #include static void __hyp_text save_maint_int_state(struct kvm_vcpu *vcpu, @@ -167,3 +168,59 @@ void __hyp_text __vgic_v2_restore_state(struct kvm_vcpu *vcpu) writel_relaxed(cpu_if->vgic_vmcr, base + GICH_VMCR); vcpu->arch.vgic_cpu.live_lrs = live_lrs; } + +#ifdef CONFIG_ARM64 +/* + * __vgic_v2_perform_cpuif_access -- perform a GICV access on behalf of the + * guest. + * + * @vcpu: the offending vcpu + * + * Returns: + * 1: GICV access successfully performed + * 0: Not a GICV access + * -1: Illegal GICV access + */ +int __hyp_text __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + struct kvm *kvm = kern_hyp_va(vcpu->kvm); + struct vgic_dist *vgic = &kvm->arch.vgic; + phys_addr_t fault_ipa; + void __iomem *addr; + int rd; + + /* Build the full address */ + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + fault_ipa |= kvm_vcpu_get_hfar(vcpu) & GENMASK(11, 0); + + /* If not for GICV, move on */ + if (fault_ipa < vgic->vgic_cpu_base || + fault_ipa >= (vgic->vgic_cpu_base + KVM_VGIC_V2_CPU_SIZE)) + return 0; + + /* Reject anything but a 32bit access */ + if (kvm_vcpu_dabt_get_as(vcpu) != sizeof(u32)) + return -1; + + /* Not aligned? Don't bother */ + if (fault_ipa & 3) + return -1; + + rd = kvm_vcpu_dabt_get_rd(vcpu); + addr = kern_hyp_va((kern_hyp_va(&kvm_vgic_global_state))->vcpu_base_va); + addr += fault_ipa - vgic->vgic_cpu_base; + + if (kvm_vcpu_dabt_iswrite(vcpu)) { + u32 data = vcpu_data_guest_to_host(vcpu, + vcpu_get_reg(vcpu, rd), + sizeof(u32)); + writel_relaxed(data, addr); + } else { + u32 data = readl_relaxed(addr); + vcpu_set_reg(vcpu, rd, vcpu_data_host_to_guest(vcpu, data, + sizeof(u32))); + } + + return 1; +} +#endif diff --git a/arch/arm64/kvm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c similarity index 94% rename from arch/arm64/kvm/hyp/vgic-v3-sr.c rename to virt/kvm/arm/hyp/vgic-v3-sr.c index 5f8f80b4a224..3947095cc0a1 100644 --- a/arch/arm64/kvm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -24,19 +24,6 @@ #define vtr_to_max_lr_idx(v) ((v) & 0xf) #define vtr_to_nr_pri_bits(v) (((u32)(v) >> 29) + 1) -#define read_gicreg(r) \ - ({ \ - u64 reg; \ - asm volatile("mrs_s %0, " __stringify(r) : "=r" (reg)); \ - reg; \ - }) - -#define write_gicreg(v,r) \ - do { \ - u64 __val = (v); \ - asm volatile("msr_s " __stringify(r) ", %0" : : "r" (__val));\ - } while (0) - static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) { switch (lr & 0xf) { @@ -335,9 +322,7 @@ void __hyp_text __vgic_v3_init_lrs(void) __gic_v3_set_lr(0, i); } -static u64 __hyp_text __vgic_v3_read_ich_vtr_el2(void) +u64 __hyp_text __vgic_v3_get_ich_vtr_el2(void) { return read_gicreg(ICH_VTR_EL2); } - -__alias(__vgic_v3_read_ich_vtr_el2) u64 __vgic_v3_get_ich_vtr_el2(void); diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index a027569facfa..6e9c40eea208 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -423,6 +423,14 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) if (!kvm_arm_support_pmu_v3()) return -ENODEV; + /* + * We currently require an in-kernel VGIC to use the PMU emulation, + * because we do not support forwarding PMU overflow interrupts to + * userspace yet. + */ + if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm)) + return -ENODEV; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) || !kvm_arm_pmu_irq_initialized(vcpu)) return -ENXIO; diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index 83777c1cbae0..8cebfbc19e90 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -405,6 +405,10 @@ int kvm_vgic_hyp_init(void) break; case GIC_V3: ret = vgic_v3_probe(gic_kvm_info); + if (!ret) { + static_branch_enable(&kvm_vgic_global_state.gicv3_cpuif); + kvm_info("GIC system register CPU interface enabled\n"); + } break; default: ret = -ENODEV; diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index b31a51a14efb..d918dcf26a5a 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -46,15 +46,9 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, * @ue: user api routing entry handle * return 0 on success, -EINVAL on errors. */ -#ifdef KVM_CAP_X2APIC_API int kvm_set_routing_entry(struct kvm *kvm, struct kvm_kernel_irq_routing_entry *e, const struct kvm_irq_routing_entry *ue) -#else -/* Remove this version and the ifdefery once merged into 4.8 */ -int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e, - const struct kvm_irq_routing_entry *ue) -#endif { int r = -EINVAL; diff --git a/virt/kvm/arm/vgic/vgic-kvm-device.c b/virt/kvm/arm/vgic/vgic-kvm-device.c index 1813f93b5cde..ce1f4ed9daf4 100644 --- a/virt/kvm/arm/vgic/vgic-kvm-device.c +++ b/virt/kvm/arm/vgic/vgic-kvm-device.c @@ -71,7 +71,6 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) addr_ptr = &vgic->vgic_cpu_base; alignment = SZ_4K; break; -#ifdef CONFIG_KVM_ARM_VGIC_V3 case KVM_VGIC_V3_ADDR_TYPE_DIST: type_needed = KVM_DEV_TYPE_ARM_VGIC_V3; addr_ptr = &vgic->vgic_dist_base; @@ -82,7 +81,6 @@ int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write) addr_ptr = &vgic->vgic_redist_base; alignment = SZ_64K; break; -#endif default: r = -ENODEV; goto out; @@ -219,52 +217,65 @@ int kvm_register_vgic_device(unsigned long type) ret = kvm_register_device_ops(&kvm_arm_vgic_v2_ops, KVM_DEV_TYPE_ARM_VGIC_V2); break; -#ifdef CONFIG_KVM_ARM_VGIC_V3 case KVM_DEV_TYPE_ARM_VGIC_V3: ret = kvm_register_device_ops(&kvm_arm_vgic_v3_ops, KVM_DEV_TYPE_ARM_VGIC_V3); + +#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS if (ret) break; ret = kvm_vgic_register_its_device(); - break; #endif + break; } return ret; } -/** vgic_attr_regs_access: allows user space to read/write VGIC registers - * - * @dev: kvm device handle - * @attr: kvm device attribute - * @reg: address the value is read or written - * @is_write: write flag - * - */ -static int vgic_attr_regs_access(struct kvm_device *dev, - struct kvm_device_attr *attr, - u32 *reg, bool is_write) -{ +struct vgic_reg_attr { + struct kvm_vcpu *vcpu; gpa_t addr; - int cpuid, ret, c; - struct kvm_vcpu *vcpu, *tmp_vcpu; - int vcpu_lock_idx = -1; +}; + +static int parse_vgic_v2_attr(struct kvm_device *dev, + struct kvm_device_attr *attr, + struct vgic_reg_attr *reg_attr) +{ + int cpuid; cpuid = (attr->attr & KVM_DEV_ARM_VGIC_CPUID_MASK) >> KVM_DEV_ARM_VGIC_CPUID_SHIFT; - vcpu = kvm_get_vcpu(dev->kvm, cpuid); - addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - mutex_lock(&dev->kvm->lock); + if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) + return -EINVAL; - ret = vgic_init(dev->kvm); - if (ret) - goto out; + reg_attr->vcpu = kvm_get_vcpu(dev->kvm, cpuid); + reg_attr->addr = attr->attr & KVM_DEV_ARM_VGIC_OFFSET_MASK; - if (cpuid >= atomic_read(&dev->kvm->online_vcpus)) { - ret = -EINVAL; - goto out; + return 0; +} + +/* unlocks vcpus from @vcpu_lock_idx and smaller */ +static void unlock_vcpus(struct kvm *kvm, int vcpu_lock_idx) +{ + struct kvm_vcpu *tmp_vcpu; + + for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { + tmp_vcpu = kvm_get_vcpu(kvm, vcpu_lock_idx); + mutex_unlock(&tmp_vcpu->mutex); } +} + +static void unlock_all_vcpus(struct kvm *kvm) +{ + unlock_vcpus(kvm, atomic_read(&kvm->online_vcpus) - 1); +} + +/* Returns true if all vcpus were locked, false otherwise */ +static bool lock_all_vcpus(struct kvm *kvm) +{ + struct kvm_vcpu *tmp_vcpu; + int c; /* * Any time a vcpu is run, vcpu_load is called which tries to grab the @@ -272,11 +283,49 @@ static int vgic_attr_regs_access(struct kvm_device *dev, * that no other VCPUs are run and fiddle with the vgic state while we * access it. */ - ret = -EBUSY; - kvm_for_each_vcpu(c, tmp_vcpu, dev->kvm) { - if (!mutex_trylock(&tmp_vcpu->mutex)) - goto out; - vcpu_lock_idx = c; + kvm_for_each_vcpu(c, tmp_vcpu, kvm) { + if (!mutex_trylock(&tmp_vcpu->mutex)) { + unlock_vcpus(kvm, c - 1); + return false; + } + } + + return true; +} + +/** + * vgic_attr_regs_access_v2 - allows user space to access VGIC v2 state + * + * @dev: kvm device handle + * @attr: kvm device attribute + * @reg: address the value is read or written + * @is_write: true if userspace is writing a register + */ +static int vgic_attr_regs_access_v2(struct kvm_device *dev, + struct kvm_device_attr *attr, + u32 *reg, bool is_write) +{ + struct vgic_reg_attr reg_attr; + gpa_t addr; + struct kvm_vcpu *vcpu; + int ret; + + ret = parse_vgic_v2_attr(dev, attr, ®_attr); + if (ret) + return ret; + + vcpu = reg_attr.vcpu; + addr = reg_attr.addr; + + mutex_lock(&dev->kvm->lock); + + ret = vgic_init(dev->kvm); + if (ret) + goto out; + + if (!lock_all_vcpus(dev->kvm)) { + ret = -EBUSY; + goto out; } switch (attr->group) { @@ -291,18 +340,12 @@ static int vgic_attr_regs_access(struct kvm_device *dev, break; } + unlock_all_vcpus(dev->kvm); out: - for (; vcpu_lock_idx >= 0; vcpu_lock_idx--) { - tmp_vcpu = kvm_get_vcpu(dev->kvm, vcpu_lock_idx); - mutex_unlock(&tmp_vcpu->mutex); - } - mutex_unlock(&dev->kvm->lock); return ret; } -/* V2 ops */ - static int vgic_v2_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -321,7 +364,7 @@ static int vgic_v2_set_attr(struct kvm_device *dev, if (get_user(reg, uaddr)) return -EFAULT; - return vgic_attr_regs_access(dev, attr, ®, true); + return vgic_attr_regs_access_v2(dev, attr, ®, true); } } @@ -343,7 +386,7 @@ static int vgic_v2_get_attr(struct kvm_device *dev, u32 __user *uaddr = (u32 __user *)(long)attr->addr; u32 reg = 0; - ret = vgic_attr_regs_access(dev, attr, ®, false); + ret = vgic_attr_regs_access_v2(dev, attr, ®, false); if (ret) return ret; return put_user(reg, uaddr); @@ -387,10 +430,6 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = { .has_attr = vgic_v2_has_attr, }; -/* V3 ops */ - -#ifdef CONFIG_KVM_ARM_VGIC_V3 - static int vgic_v3_set_attr(struct kvm_device *dev, struct kvm_device_attr *attr) { @@ -433,5 +472,3 @@ struct kvm_device_ops kvm_arm_vgic_v3_ops = { .get_attr = vgic_v3_get_attr, .has_attr = vgic_v3_has_attr, }; - -#endif /* CONFIG_KVM_ARM_VGIC_V3 */ diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 90d81811fdda..0d3c76a4208b 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -23,7 +23,7 @@ #include "vgic-mmio.h" /* extract @num bytes at @offset bytes offset in data */ -unsigned long extract_bytes(unsigned long data, unsigned int offset, +unsigned long extract_bytes(u64 data, unsigned int offset, unsigned int num) { return (data >> (offset * 8)) & GENMASK_ULL(num * 8 - 1, 0); @@ -42,6 +42,7 @@ u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, return reg | ((u64)val << lower); } +#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS bool vgic_has_its(struct kvm *kvm) { struct vgic_dist *dist = &kvm->arch.vgic; @@ -51,6 +52,7 @@ bool vgic_has_its(struct kvm *kvm) return dist->has_its; } +#endif static unsigned long vgic_mmio_read_v3_misc(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) @@ -179,7 +181,7 @@ static unsigned long vgic_mmio_read_v3r_typer(struct kvm_vcpu *vcpu, int target_vcpu_id = vcpu->vcpu_id; u64 value; - value = (mpidr & GENMASK(23, 0)) << 32; + value = (u64)(mpidr & GENMASK(23, 0)) << 32; value |= ((target_vcpu_id & 0xffff) << 8); if (target_vcpu_id == atomic_read(&vcpu->kvm->online_vcpus) - 1) value |= GICR_TYPER_LAST; @@ -609,7 +611,7 @@ void vgic_v3_dispatch_sgi(struct kvm_vcpu *vcpu, u64 reg) bool broadcast; sgi = (reg & ICC_SGI1R_SGI_ID_MASK) >> ICC_SGI1R_SGI_ID_SHIFT; - broadcast = reg & BIT(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); + broadcast = reg & BIT_ULL(ICC_SGI1R_IRQ_ROUTING_MODE_BIT); target_cpus = (reg & ICC_SGI1R_TARGET_LIST_MASK) >> ICC_SGI1R_TARGET_LIST_SHIFT; mpidr = SGI_AFFINITY_LEVEL(reg, 3); mpidr |= SGI_AFFINITY_LEVEL(reg, 2); diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 3bad3c5ed431..e18b30ddcdce 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -550,11 +550,9 @@ int vgic_register_dist_iodev(struct kvm *kvm, gpa_t dist_base_address, case VGIC_V2: len = vgic_v2_init_dist_iodev(io_device); break; -#ifdef CONFIG_KVM_ARM_VGIC_V3 case VGIC_V3: len = vgic_v3_init_dist_iodev(io_device); break; -#endif default: BUG_ON(1); } diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 0b3ecf9d100e..4c34d39d44a0 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -96,7 +96,7 @@ unsigned long vgic_data_mmio_bus_to_host(const void *val, unsigned int len); void vgic_data_host_to_mmio_bus(void *buf, unsigned int len, unsigned long data); -unsigned long extract_bytes(unsigned long data, unsigned int offset, +unsigned long extract_bytes(u64 data, unsigned int offset, unsigned int num); u64 update_64bit_reg(u64 reg, unsigned int offset, unsigned int len, @@ -162,12 +162,10 @@ unsigned int vgic_v2_init_dist_iodev(struct vgic_io_device *dev); unsigned int vgic_v3_init_dist_iodev(struct vgic_io_device *dev); -#ifdef CONFIG_KVM_ARM_VGIC_V3 u64 vgic_sanitise_outer_cacheability(u64 reg); u64 vgic_sanitise_inner_cacheability(u64 reg); u64 vgic_sanitise_shareability(u64 reg); u64 vgic_sanitise_field(u64 reg, u64 field_mask, int field_shift, u64 (*sanitise_fn)(u64)); -#endif #endif diff --git a/virt/kvm/arm/vgic/vgic-v2.c b/virt/kvm/arm/vgic/vgic-v2.c index 0bf6709d1006..0a063af40565 100644 --- a/virt/kvm/arm/vgic/vgic-v2.c +++ b/virt/kvm/arm/vgic/vgic-v2.c @@ -278,12 +278,14 @@ int vgic_v2_map_resources(struct kvm *kvm) goto out; } - ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, - kvm_vgic_global_state.vcpu_base, - KVM_VGIC_V2_CPU_SIZE, true); - if (ret) { - kvm_err("Unable to remap VGIC CPU to VCPU\n"); - goto out; + if (!static_branch_unlikely(&vgic_v2_cpuif_trap)) { + ret = kvm_phys_addr_ioremap(kvm, dist->vgic_cpu_base, + kvm_vgic_global_state.vcpu_base, + KVM_VGIC_V2_CPU_SIZE, true); + if (ret) { + kvm_err("Unable to remap VGIC CPU to VCPU\n"); + goto out; + } } dist->ready = true; @@ -294,6 +296,8 @@ out: return ret; } +DEFINE_STATIC_KEY_FALSE(vgic_v2_cpuif_trap); + /** * vgic_v2_probe - probe for a GICv2 compatible interrupt controller in DT * @node: pointer to the DT node @@ -310,45 +314,51 @@ int vgic_v2_probe(const struct gic_kvm_info *info) return -ENXIO; } - if (!PAGE_ALIGNED(info->vcpu.start)) { - kvm_err("GICV physical address 0x%llx not page aligned\n", - (unsigned long long)info->vcpu.start); - return -ENXIO; - } + if (!PAGE_ALIGNED(info->vcpu.start) || + !PAGE_ALIGNED(resource_size(&info->vcpu))) { + kvm_info("GICV region size/alignment is unsafe, using trapping (reduced performance)\n"); + kvm_vgic_global_state.vcpu_base_va = ioremap(info->vcpu.start, + resource_size(&info->vcpu)); + if (!kvm_vgic_global_state.vcpu_base_va) { + kvm_err("Cannot ioremap GICV\n"); + return -ENOMEM; + } - if (!PAGE_ALIGNED(resource_size(&info->vcpu))) { - kvm_err("GICV size 0x%llx not a multiple of page size 0x%lx\n", - (unsigned long long)resource_size(&info->vcpu), - PAGE_SIZE); - return -ENXIO; + ret = create_hyp_io_mappings(kvm_vgic_global_state.vcpu_base_va, + kvm_vgic_global_state.vcpu_base_va + resource_size(&info->vcpu), + info->vcpu.start); + if (ret) { + kvm_err("Cannot map GICV into hyp\n"); + goto out; + } + + static_branch_enable(&vgic_v2_cpuif_trap); } kvm_vgic_global_state.vctrl_base = ioremap(info->vctrl.start, resource_size(&info->vctrl)); if (!kvm_vgic_global_state.vctrl_base) { kvm_err("Cannot ioremap GICH\n"); - return -ENOMEM; + ret = -ENOMEM; + goto out; } vtr = readl_relaxed(kvm_vgic_global_state.vctrl_base + GICH_VTR); kvm_vgic_global_state.nr_lr = (vtr & 0x3f) + 1; - ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); - if (ret) { - kvm_err("Cannot register GICv2 KVM device\n"); - iounmap(kvm_vgic_global_state.vctrl_base); - return ret; - } - ret = create_hyp_io_mappings(kvm_vgic_global_state.vctrl_base, kvm_vgic_global_state.vctrl_base + resource_size(&info->vctrl), info->vctrl.start); if (ret) { kvm_err("Cannot map VCTRL into hyp\n"); - kvm_unregister_device_ops(KVM_DEV_TYPE_ARM_VGIC_V2); - iounmap(kvm_vgic_global_state.vctrl_base); - return ret; + goto out; + } + + ret = kvm_register_vgic_device(KVM_DEV_TYPE_ARM_VGIC_V2); + if (ret) { + kvm_err("Cannot register GICv2 KVM device\n"); + goto out; } kvm_vgic_global_state.can_emulate_gicv2 = true; @@ -359,4 +369,11 @@ int vgic_v2_probe(const struct gic_kvm_info *info) kvm_info("vgic-v2@%llx\n", info->vctrl.start); return 0; +out: + if (kvm_vgic_global_state.vctrl_base) + iounmap(kvm_vgic_global_state.vctrl_base); + if (kvm_vgic_global_state.vcpu_base_va) + iounmap(kvm_vgic_global_state.vcpu_base_va); + + return ret; } diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index e83b7fe4baae..2893d5ba523a 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -29,7 +29,7 @@ #define DEBUG_SPINLOCK_BUG_ON(p) #endif -struct vgic_global __section(.hyp.text) kvm_vgic_global_state; +struct vgic_global __section(.hyp.text) kvm_vgic_global_state = {.gicv3_cpuif = STATIC_KEY_FALSE_INIT,}; /* * Locking order is always: @@ -645,6 +645,9 @@ next: /* Sync back the hardware VGIC state into our emulation after a guest's run. */ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + vgic_process_maintenance_interrupt(vcpu); vgic_fold_lr_state(vcpu); vgic_prune_ap_list(vcpu); @@ -653,6 +656,9 @@ void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) /* Flush our emulation state into the GIC hardware before entering the guest. */ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) { + if (unlikely(!vgic_initialized(vcpu->kvm))) + return; + spin_lock(&vcpu->arch.vgic_cpu.ap_list_lock); vgic_flush_lr_state(vcpu); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); diff --git a/virt/kvm/arm/vgic/vgic.h b/virt/kvm/arm/vgic/vgic.h index 6c4625c46368..9d9e014765a2 100644 --- a/virt/kvm/arm/vgic/vgic.h +++ b/virt/kvm/arm/vgic/vgic.h @@ -72,7 +72,6 @@ static inline void vgic_get_irq_kref(struct vgic_irq *irq) kref_get(&irq->refcount); } -#ifdef CONFIG_KVM_ARM_VGIC_V3 void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu); void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu); void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr); @@ -84,63 +83,14 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu); int vgic_v3_probe(const struct gic_kvm_info *info); int vgic_v3_map_resources(struct kvm *kvm); int vgic_register_redist_iodevs(struct kvm *kvm, gpa_t dist_base_address); + +#ifdef CONFIG_KVM_ARM_VGIC_V3_ITS int vgic_register_its_iodevs(struct kvm *kvm); bool vgic_has_its(struct kvm *kvm); int kvm_vgic_register_its_device(void); void vgic_enable_lpis(struct kvm_vcpu *vcpu); int vgic_its_inject_msi(struct kvm *kvm, struct kvm_msi *msi); #else -static inline void vgic_v3_process_maintenance(struct kvm_vcpu *vcpu) -{ -} - -static inline void vgic_v3_fold_lr_state(struct kvm_vcpu *vcpu) -{ -} - -static inline void vgic_v3_populate_lr(struct kvm_vcpu *vcpu, - struct vgic_irq *irq, int lr) -{ -} - -static inline void vgic_v3_clear_lr(struct kvm_vcpu *vcpu, int lr) -{ -} - -static inline void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) -{ -} - -static inline -void vgic_v3_set_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ -} - -static inline -void vgic_v3_get_vmcr(struct kvm_vcpu *vcpu, struct vgic_vmcr *vmcr) -{ -} - -static inline void vgic_v3_enable(struct kvm_vcpu *vcpu) -{ -} - -static inline int vgic_v3_probe(const struct gic_kvm_info *info) -{ - return -ENODEV; -} - -static inline int vgic_v3_map_resources(struct kvm *kvm) -{ - return -ENODEV; -} - -static inline int vgic_register_redist_iodevs(struct kvm *kvm, - gpa_t dist_base_address) -{ - return -ENODEV; -} - static inline int vgic_register_its_iodevs(struct kvm *kvm) { return -ENODEV;