1
0
Fork 0
* support for chained PMU counters in guests
 * improved SError handling
 * handle Neoverse N1 erratum #1349291
 * allow side-channel mitigation status to be migrated
 * standardise most AArch64 system register accesses to msr_s/mrs_s
 * fix host MPIDR corruption on 32bit
 * selftests ckleanups
 
 x86:
 * PMU event {white,black}listing
 * ability for the guest to disable host-side interrupt polling
 * fixes for enlightened VMCS (Hyper-V pv nested virtualization),
 * new hypercall to yield to IPI target
 * support for passing cstate MSRs through to the guest
 * lots of cleanups and optimizations
 
 Generic:
 * Some txt->rST conversions for the documentation
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQEcBAABAgAGBQJdJzdIAAoJEL/70l94x66DQDoH/i83/8kX4I8AWDlushPru4ts
 Q4lCE5VAPha+o4pLb1dtfFL3gTmSbsB1N++JSlqK3JOo6LphIOy6b0wBjQBbAa6U
 3CT1dJaHJoScLLj09vyBlvClGUH2ZKEQTWOiquCCf7JfPofxwPUA6vJ7TYsdkckx
 zR3ygbADWmnfS7hFfiqN3JzuYh9eoooGNWSU+Giq6VF41SiL3IqhBGZhWS0zE9c2
 2c5lpqqdeHmAYNBqsyzNiDRKp7+zLFSmZ7Z5/0L755L8KYwR6F5beTnmBMHvb4lA
 PWH/SWOC8EYR+PEowfrH+TxKZwp0gMn1kcAKjilHk0uCRwG1IzuHAr2jlNxICCk=
 =t/Oq
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Paolo Bonzini:
 "ARM:
   - support for chained PMU counters in guests
   - improved SError handling
   - handle Neoverse N1 erratum #1349291
   - allow side-channel mitigation status to be migrated
   - standardise most AArch64 system register accesses to msr_s/mrs_s
   - fix host MPIDR corruption on 32bit
   - selftests ckleanups

  x86:
   - PMU event {white,black}listing
   - ability for the guest to disable host-side interrupt polling
   - fixes for enlightened VMCS (Hyper-V pv nested virtualization),
   - new hypercall to yield to IPI target
   - support for passing cstate MSRs through to the guest
   - lots of cleanups and optimizations

  Generic:
   - Some txt->rST conversions for the documentation"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (128 commits)
  Documentation: virtual: Add toctree hooks
  Documentation: kvm: Convert cpuid.txt to .rst
  Documentation: virtual: Convert paravirt_ops.txt to .rst
  KVM: x86: Unconditionally enable irqs in guest context
  KVM: x86: PMU Event Filter
  kvm: x86: Fix -Wmissing-prototypes warnings
  KVM: Properly check if "page" is valid in kvm_vcpu_unmap
  KVM: arm/arm64: Initialise host's MPIDRs by reading the actual register
  KVM: LAPIC: Retry tune per-vCPU timer_advance_ns if adaptive tuning goes insane
  kvm: LAPIC: write down valid APIC registers
  KVM: arm64: Migrate _elx sysreg accessors to msr_s/mrs_s
  KVM: doc: Add API documentation on the KVM_REG_ARM_WORKAROUNDS register
  KVM: arm/arm64: Add save/restore support for firmware workaround state
  arm64: KVM: Propagate full Spectre v2 workaround state to KVM guests
  KVM: arm/arm64: Support chained PMU counters
  KVM: arm/arm64: Remove pmc->bitmask
  KVM: arm/arm64: Re-create event when setting counter value
  KVM: arm/arm64: Extract duplicated code to own function
  KVM: arm/arm64: Rename kvm_pmu_{enable/disable}_counter functions
  KVM: LAPIC: ARBPRI is a reserved register for x2APIC
  ...
alistair/sunxi64-5.4-dsi
Linus Torvalds 2019-07-12 15:35:14 -07:00
commit 39d7530d74
92 changed files with 2717 additions and 1456 deletions

View File

@ -86,6 +86,8 @@ stable kernels.
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1188873,1418040| ARM64_ERRATUM_1418040 |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | Neoverse-N1 | #1349291 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
| ARM | MMU-500 | #841119,826419 | N/A |
+----------------+-----------------+-----------------+-----------------------------+
+----------------+-----------------+-----------------+-----------------------------+

View File

@ -0,0 +1,18 @@
.. SPDX-License-Identifier: GPL-2.0
============================
Linux Virtualization Support
============================
.. toctree::
:maxdepth: 2
kvm/index
paravirt_ops
.. only:: html and subproject
Indices
=======
* :ref:`genindex`

View File

@ -4081,6 +4081,32 @@ KVM_ARM_VCPU_FINALIZE call.
See KVM_ARM_VCPU_INIT for details of vcpu features that require finalization
using this ioctl.
4.120 KVM_SET_PMU_EVENT_FILTER
Capability: KVM_CAP_PMU_EVENT_FILTER
Architectures: x86
Type: vm ioctl
Parameters: struct kvm_pmu_event_filter (in)
Returns: 0 on success, -1 on error
struct kvm_pmu_event_filter {
__u32 action;
__u32 nevents;
__u64 events[0];
};
This ioctl restricts the set of PMU events that the guest can program.
The argument holds a list of events which will be allowed or denied.
The eventsel+umask of each event the guest attempts to program is compared
against the events field to determine whether the guest should have access.
This only affects general purpose counters; fixed purpose counters can
be disabled by changing the perfmon CPUID leaf.
Valid values for 'action':
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
5. The kvm_run structure
------------------------
@ -4909,6 +4935,8 @@ Valid bits in args[0] are
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
Enabling this capability on a VM provides userspace with a way to no
longer intercept some instructions for improved latency in some

View File

@ -28,3 +28,34 @@ The following register is defined:
- Allows any PSCI version implemented by KVM and compatible with
v0.2 to be set with SET_ONE_REG
- Affects the whole VM (even if the register view is per-vcpu)
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
Holds the state of the firmware support to mitigate CVE-2017-5715, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_1 in [1].
Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL: KVM does not offer
firmware support for the workaround. The mitigation status for the
guest is unknown.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL: The workaround HVC call is
available to the guest and required for the mitigation.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED: The workaround HVC call
is available to the guest, but it is not needed on this VCPU.
* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
Holds the state of the firmware support to mitigate CVE-2018-3639, as
offered by KVM to the guest via a HVC call. The workaround is described
under SMCCC_ARCH_WORKAROUND_2 in [1].
Accepted values are:
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL: A workaround is not
available. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN: The workaround state is
unknown. KVM does not offer firmware support for the workaround.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL: The workaround is available,
and can be disabled by a vCPU. If
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED is set, it is active for
this vCPU.
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED: The workaround is
always active on this vCPU or it is not needed.
[1] https://developer.arm.com/-/media/developer/pdf/ARM_DEN_0070A_Firmware_interfaces_for_mitigating_CVE-2017-5715.pdf

View File

@ -0,0 +1,107 @@
.. SPDX-License-Identifier: GPL-2.0
==============
KVM CPUID bits
==============
:Author: Glauber Costa <glommer@gmail.com>
A guest running on a kvm host, can check some of its features using
cpuid. This is not always guaranteed to work, since userspace can
mask-out some, or even all KVM-related cpuid features before launching
a guest.
KVM cpuid functions are:
function: KVM_CPUID_SIGNATURE (0x40000000)
returns::
eax = 0x40000001
ebx = 0x4b4d564b
ecx = 0x564b4d56
edx = 0x4d
Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
The value in eax corresponds to the maximum cpuid function present in this leaf,
and will be updated if more functions are added in the future.
Note also that old hosts set eax value to 0x0. This should
be interpreted as if the value was 0x40000001.
This function queries the presence of KVM cpuid leafs.
function: define KVM_CPUID_FEATURES (0x40000001)
returns::
ebx, ecx
eax = an OR'ed group of (1 << flag)
where ``flag`` is defined as below:
================================= =========== ================================
flag value meaning
================================= =========== ================================
KVM_FEATURE_CLOCKSOURCE 0 kvmclock available at msrs
0x11 and 0x12
KVM_FEATURE_NOP_IO_DELAY 1 not necessary to perform delays
on PIO operations
KVM_FEATURE_MMU_OP 2 deprecated
KVM_FEATURE_CLOCKSOURCE2 3 kvmclock available at msrs
0x4b564d00 and 0x4b564d01
KVM_FEATURE_ASYNC_PF 4 async pf can be enabled by
writing to msr 0x4b564d02
KVM_FEATURE_STEAL_TIME 5 steal time can be enabled by
writing to msr 0x4b564d03
KVM_FEATURE_PV_EOI 6 paravirtualized end of interrupt
handler can be enabled by
writing to msr 0x4b564d04
KVM_FEATURE_PV_UNHAULT 7 guest checks this feature bit
before enabling paravirtualized
spinlock support
KVM_FEATURE_PV_TLB_FLUSH 9 guest checks this feature bit
before enabling paravirtualized
tlb flush
KVM_FEATURE_ASYNC_PF_VMEXIT 10 paravirtualized async PF VM EXIT
can be enabled by setting bit 2
when writing to msr 0x4b564d02
KVM_FEATURE_PV_SEND_IPI 11 guest checks this feature bit
before enabling paravirtualized
sebd IPIs
KVM_FEATURE_PV_POLL_CONTROL 12 host-side polling on HLT can
be disabled by writing
to msr 0x4b564d05.
KVM_FEATURE_PV_SCHED_YIELD 13 guest checks this feature bit
before using paravirtualized
sched yield.
KVM_FEATURE_CLOCSOURCE_STABLE_BIT 24 host will warn if no guest-side
per-cpu warps are expeced in
kvmclock
================================= =========== ================================
::
edx = an OR'ed group of (1 << flag)
Where ``flag`` here is defined as below:
================== ============ =================================
flag value meaning
================== ============ =================================
KVM_HINTS_REALTIME 0 guest checks this feature bit to
determine that vCPUs are never
preempted for an unlimited time
allowing optimizations
================== ============ =================================

View File

@ -1,83 +0,0 @@
KVM CPUID bits
Glauber Costa <glommer@redhat.com>, Red Hat Inc, 2010
=====================================================
A guest running on a kvm host, can check some of its features using
cpuid. This is not always guaranteed to work, since userspace can
mask-out some, or even all KVM-related cpuid features before launching
a guest.
KVM cpuid functions are:
function: KVM_CPUID_SIGNATURE (0x40000000)
returns : eax = 0x40000001,
ebx = 0x4b4d564b,
ecx = 0x564b4d56,
edx = 0x4d.
Note that this value in ebx, ecx and edx corresponds to the string "KVMKVMKVM".
The value in eax corresponds to the maximum cpuid function present in this leaf,
and will be updated if more functions are added in the future.
Note also that old hosts set eax value to 0x0. This should
be interpreted as if the value was 0x40000001.
This function queries the presence of KVM cpuid leafs.
function: define KVM_CPUID_FEATURES (0x40000001)
returns : ebx, ecx
eax = an OR'ed group of (1 << flag), where each flags is:
flag || value || meaning
=============================================================================
KVM_FEATURE_CLOCKSOURCE || 0 || kvmclock available at msrs
|| || 0x11 and 0x12.
------------------------------------------------------------------------------
KVM_FEATURE_NOP_IO_DELAY || 1 || not necessary to perform delays
|| || on PIO operations.
------------------------------------------------------------------------------
KVM_FEATURE_MMU_OP || 2 || deprecated.
------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs
|| || 0x4b564d00 and 0x4b564d01
------------------------------------------------------------------------------
KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by
|| || writing to msr 0x4b564d02
------------------------------------------------------------------------------
KVM_FEATURE_STEAL_TIME || 5 || steal time can be enabled by
|| || writing to msr 0x4b564d03.
------------------------------------------------------------------------------
KVM_FEATURE_PV_EOI || 6 || paravirtualized end of interrupt
|| || handler can be enabled by writing
|| || to msr 0x4b564d04.
------------------------------------------------------------------------------
KVM_FEATURE_PV_UNHALT || 7 || guest checks this feature bit
|| || before enabling paravirtualized
|| || spinlock support.
------------------------------------------------------------------------------
KVM_FEATURE_PV_TLB_FLUSH || 9 || guest checks this feature bit
|| || before enabling paravirtualized
|| || tlb flush.
------------------------------------------------------------------------------
KVM_FEATURE_ASYNC_PF_VMEXIT || 10 || paravirtualized async PF VM exit
|| || can be enabled by setting bit 2
|| || when writing to msr 0x4b564d02
------------------------------------------------------------------------------
KVM_FEATURE_PV_SEND_IPI || 11 || guest checks this feature bit
|| || before using paravirtualized
|| || send IPIs.
------------------------------------------------------------------------------
KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side
|| || per-cpu warps are expected in
|| || kvmclock.
------------------------------------------------------------------------------
edx = an OR'ed group of (1 << flag), where each flags is:
flag || value || meaning
==================================================================================
KVM_HINTS_REALTIME || 0 || guest checks this feature bit to
|| || determine that vCPUs are never
|| || preempted for an unlimited time,
|| || allowing optimizations
----------------------------------------------------------------------------------

View File

@ -141,3 +141,14 @@ a0 corresponds to the APIC ID in the third argument (a2), bit 1
corresponds to the APIC ID a2+1, and so on.
Returns the number of CPUs to which the IPIs were delivered successfully.
7. KVM_HC_SCHED_YIELD
------------------------
Architecture: x86
Status: active
Purpose: Hypercall used to yield if the IPI target vCPU is preempted
a0: destination APIC ID
Usage example: When sending a call-function IPI-many to vCPUs, yield if
any of the IPI target vCPUs was preempted.

View File

@ -0,0 +1,11 @@
.. SPDX-License-Identifier: GPL-2.0
===
KVM
===
.. toctree::
:maxdepth: 2
amd-memory-encryption
cpuid

View File

@ -15,8 +15,6 @@ The acquisition orders for mutexes are as follows:
On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock.
For spinlocks, kvm_lock is taken outside kvm->mmu_lock.
Everything else is a leaf: no other lock is taken inside the critical
sections.
@ -169,7 +167,7 @@ which time it will be set using the Dirty tracking mechanism described above.
------------
Name: kvm_lock
Type: spinlock_t
Type: mutex
Arch: any
Protects: - vm_list

View File

@ -273,3 +273,12 @@ MSR_KVM_EOI_EN: 0x4b564d04
guest must both read the least significant bit in the memory area and
clear it using a single CPU instruction, such as test and clear, or
compare and exchange.
MSR_KVM_POLL_CONTROL: 0x4b564d05
Control host-side polling.
data: Bit 0 enables (1) or disables (0) host-side HLT polling logic.
KVM guests can request the host not to poll on HLT, for example if
they are performing polling themselves.

View File

@ -1,3 +1,6 @@
.. SPDX-License-Identifier: GPL-2.0
============
Paravirt_ops
============
@ -18,15 +21,15 @@ at boot time.
pv_ops operations are classified into three categories:
- simple indirect call
These operations correspond to high level functionality where it is
known that the overhead of indirect call isn't very important.
These operations correspond to high level functionality where it is
known that the overhead of indirect call isn't very important.
- indirect call which allows optimization with binary patch
Usually these operations correspond to low level critical instructions. They
are called frequently and are performance critical. The overhead is
very important.
Usually these operations correspond to low level critical instructions. They
are called frequently and are performance critical. The overhead is
very important.
- a set of macros for hand written assembly code
Hand written assembly codes (.S files) also need paravirtualization
because they include sensitive instructions or some of code paths in
them are very performance critical.
Hand written assembly codes (.S files) also need paravirtualization
because they include sensitive instructions or some of code paths in
them are very performance critical.

View File

@ -271,6 +271,16 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
return vcpu_cp15(vcpu, c0_MPIDR) & MPIDR_HWID_BITMASK;
}
static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
{
return false;
}
static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
bool flag)
{
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
{
*vcpu_cpsr(vcpu) |= PSR_E_BIT;

View File

@ -15,7 +15,6 @@
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
#include <asm/fpstate.h>
#include <asm/smp_plat.h>
#include <kvm/arm_arch_timer.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@ -147,11 +146,10 @@ struct kvm_host_data {
typedef struct kvm_host_data kvm_host_data_t;
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
int cpu)
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
cpu_ctxt->cp15[c0_MPIDR] = cpu_logical_map(cpu);
cpu_ctxt->cp15[c0_MPIDR] = read_cpuid_mpidr();
}
struct vcpu_reset_state {
@ -362,7 +360,11 @@ static inline void kvm_vcpu_pmu_restore_host(struct kvm_vcpu *vcpu) {}
static inline void kvm_arm_vhe_guest_enter(void) {}
static inline void kvm_arm_vhe_guest_exit(void) {}
static inline bool kvm_arm_harden_branch_predictor(void)
#define KVM_BP_HARDEN_UNKNOWN -1
#define KVM_BP_HARDEN_WA_NEEDED 0
#define KVM_BP_HARDEN_NOT_REQUIRED 1
static inline int kvm_arm_harden_branch_predictor(void)
{
switch(read_cpuid_part()) {
#ifdef CONFIG_HARDEN_BRANCH_PREDICTOR
@ -370,10 +372,12 @@ static inline bool kvm_arm_harden_branch_predictor(void)
case ARM_CPU_PART_CORTEX_A12:
case ARM_CPU_PART_CORTEX_A15:
case ARM_CPU_PART_CORTEX_A17:
return true;
return KVM_BP_HARDEN_WA_NEEDED;
#endif
case ARM_CPU_PART_CORTEX_A7:
return KVM_BP_HARDEN_NOT_REQUIRED;
default:
return false;
return KVM_BP_HARDEN_UNKNOWN;
}
}

View File

@ -82,13 +82,14 @@
#define VFP_FPEXC __ACCESS_VFP(FPEXC)
/* AArch64 compatibility macros, only for the timer so far */
#define read_sysreg_el0(r) read_sysreg(r##_el0)
#define write_sysreg_el0(v, r) write_sysreg(v, r##_el0)
#define read_sysreg_el0(r) read_sysreg(r##_EL0)
#define write_sysreg_el0(v, r) write_sysreg(v, r##_EL0)
#define SYS_CNTP_CTL_EL0 CNTP_CTL
#define SYS_CNTP_CVAL_EL0 CNTP_CVAL
#define SYS_CNTV_CTL_EL0 CNTV_CTL
#define SYS_CNTV_CVAL_EL0 CNTV_CVAL
#define cntp_ctl_el0 CNTP_CTL
#define cntp_cval_el0 CNTP_CVAL
#define cntv_ctl_el0 CNTV_CTL
#define cntv_cval_el0 CNTV_CVAL
#define cntvoff_el2 CNTVOFF
#define cnthctl_el2 CNTHCTL

View File

@ -214,6 +214,18 @@ struct kvm_vcpu_events {
#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_FW | ((r) & 0xffff))
#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
/* Higher values mean better protection. */
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
/* Higher values mean better protection. */
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
/* Device Control API: ARM VGIC */
#define KVM_DEV_ARM_VGIC_GRP_ADDR 0

View File

@ -96,7 +96,11 @@
* RAS Error Synchronization barrier
*/
.macro esb
#ifdef CONFIG_ARM64_RAS_EXTN
hint #16
#else
nop
#endif
.endm
/*

View File

@ -620,6 +620,12 @@ static inline bool system_has_prio_mask_debugging(void)
system_uses_irq_prio_masking();
}
#define ARM64_BP_HARDEN_UNKNOWN -1
#define ARM64_BP_HARDEN_WA_NEEDED 0
#define ARM64_BP_HARDEN_NOT_REQUIRED 1
int get_spectre_v2_workaround_state(void);
#define ARM64_SSBD_UNKNOWN -1
#define ARM64_SSBD_FORCE_DISABLE 0
#define ARM64_SSBD_KERNEL 1

View File

@ -30,6 +30,12 @@
{ARM_EXCEPTION_TRAP, "TRAP" }, \
{ARM_EXCEPTION_HYP_GONE, "HYP_GONE" }
/*
* Size of the HYP vectors preamble. kvm_patch_vector_branch() generates code
* that jumps over this.
*/
#define KVM_VECTOR_PREAMBLE (2 * AARCH64_INSN_SIZE)
#ifndef __ASSEMBLY__
#include <linux/mm.h>

View File

@ -126,7 +126,7 @@ static inline unsigned long *__vcpu_elr_el1(const struct kvm_vcpu *vcpu)
static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
{
if (vcpu->arch.sysregs_loaded_on_cpu)
return read_sysreg_el1(elr);
return read_sysreg_el1(SYS_ELR);
else
return *__vcpu_elr_el1(vcpu);
}
@ -134,7 +134,7 @@ static inline unsigned long vcpu_read_elr_el1(const struct kvm_vcpu *vcpu)
static inline void vcpu_write_elr_el1(const struct kvm_vcpu *vcpu, unsigned long v)
{
if (vcpu->arch.sysregs_loaded_on_cpu)
write_sysreg_el1(v, elr);
write_sysreg_el1(v, SYS_ELR);
else
*__vcpu_elr_el1(vcpu) = v;
}
@ -186,7 +186,7 @@ static inline unsigned long vcpu_read_spsr(const struct kvm_vcpu *vcpu)
return vcpu_read_spsr32(vcpu);
if (vcpu->arch.sysregs_loaded_on_cpu)
return read_sysreg_el1(spsr);
return read_sysreg_el1(SYS_SPSR);
else
return vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1];
}
@ -199,7 +199,7 @@ static inline void vcpu_write_spsr(struct kvm_vcpu *vcpu, unsigned long v)
}
if (vcpu->arch.sysregs_loaded_on_cpu)
write_sysreg_el1(v, spsr);
write_sysreg_el1(v, SYS_SPSR);
else
vcpu_gp_regs(vcpu)->spsr[KVM_SPSR_EL1] = v;
}
@ -353,6 +353,20 @@ static inline unsigned long kvm_vcpu_get_mpidr_aff(struct kvm_vcpu *vcpu)
return vcpu_read_sys_reg(vcpu, MPIDR_EL1) & MPIDR_HWID_BITMASK;
}
static inline bool kvm_arm_get_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu)
{
return vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG;
}
static inline void kvm_arm_set_vcpu_workaround_2_flag(struct kvm_vcpu *vcpu,
bool flag)
{
if (flag)
vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG;
else
vcpu->arch.workaround_flags &= ~VCPU_WORKAROUND_2_FLAG;
}
static inline void kvm_vcpu_set_be(struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu)) {
@ -451,13 +465,13 @@ static inline void kvm_skip_instr(struct kvm_vcpu *vcpu, bool is_wide_instr)
*/
static inline void __hyp_text __kvm_skip_instr(struct kvm_vcpu *vcpu)
{
*vcpu_pc(vcpu) = read_sysreg_el2(elr);
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(spsr);
*vcpu_pc(vcpu) = read_sysreg_el2(SYS_ELR);
vcpu->arch.ctxt.gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu));
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, spsr);
write_sysreg_el2(*vcpu_pc(vcpu), elr);
write_sysreg_el2(vcpu->arch.ctxt.gp_regs.regs.pstate, SYS_SPSR);
write_sysreg_el2(*vcpu_pc(vcpu), SYS_ELR);
}
#endif /* __ARM64_KVM_EMULATE_H__ */

View File

@ -19,12 +19,12 @@
#include <asm/arch_gicv3.h>
#include <asm/barrier.h>
#include <asm/cpufeature.h>
#include <asm/cputype.h>
#include <asm/daifflags.h>
#include <asm/fpsimd.h>
#include <asm/kvm.h>
#include <asm/kvm_asm.h>
#include <asm/kvm_mmio.h>
#include <asm/smp_plat.h>
#include <asm/thread_info.h>
#define __KVM_HAVE_ARCH_INTC_INITIALIZED
@ -484,11 +484,10 @@ struct kvm_vcpu *kvm_mpidr_to_vcpu(struct kvm *kvm, unsigned long mpidr);
DECLARE_PER_CPU(kvm_host_data_t, kvm_host_data);
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt,
int cpu)
static inline void kvm_init_host_cpu_context(struct kvm_cpu_context *cpu_ctxt)
{
/* The host's MPIDR is immutable, so let's set it up at boot time */
cpu_ctxt->sys_regs[MPIDR_EL1] = cpu_logical_map(cpu);
cpu_ctxt->sys_regs[MPIDR_EL1] = read_cpuid_mpidr();
}
void __kvm_enable_ssbs(void);
@ -621,9 +620,21 @@ static inline void kvm_arm_vhe_guest_exit(void)
isb();
}
static inline bool kvm_arm_harden_branch_predictor(void)
#define KVM_BP_HARDEN_UNKNOWN -1
#define KVM_BP_HARDEN_WA_NEEDED 0
#define KVM_BP_HARDEN_NOT_REQUIRED 1
static inline int kvm_arm_harden_branch_predictor(void)
{
return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR);
switch (get_spectre_v2_workaround_state()) {
case ARM64_BP_HARDEN_WA_NEEDED:
return KVM_BP_HARDEN_WA_NEEDED;
case ARM64_BP_HARDEN_NOT_REQUIRED:
return KVM_BP_HARDEN_NOT_REQUIRED;
case ARM64_BP_HARDEN_UNKNOWN:
default:
return KVM_BP_HARDEN_UNKNOWN;
}
}
#define KVM_SSBD_UNKNOWN -1

View File

@ -18,7 +18,7 @@
#define read_sysreg_elx(r,nvh,vh) \
({ \
u64 reg; \
asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##nvh),\
asm volatile(ALTERNATIVE(__mrs_s("%0", r##nvh), \
__mrs_s("%0", r##vh), \
ARM64_HAS_VIRT_HOST_EXTN) \
: "=r" (reg)); \
@ -28,7 +28,7 @@
#define write_sysreg_elx(v,r,nvh,vh) \
do { \
u64 __val = (u64)(v); \
asm volatile(ALTERNATIVE("msr " __stringify(r##nvh) ", %x0",\
asm volatile(ALTERNATIVE(__msr_s(r##nvh, "%x0"), \
__msr_s(r##vh, "%x0"), \
ARM64_HAS_VIRT_HOST_EXTN) \
: : "rZ" (__val)); \
@ -37,55 +37,15 @@
/*
* Unified accessors for registers that have a different encoding
* between VHE and non-VHE. They must be specified without their "ELx"
* encoding.
* encoding, but with the SYS_ prefix, as defined in asm/sysreg.h.
*/
#define read_sysreg_el2(r) \
({ \
u64 reg; \
asm volatile(ALTERNATIVE("mrs %0, " __stringify(r##_EL2),\
"mrs %0, " __stringify(r##_EL1),\
ARM64_HAS_VIRT_HOST_EXTN) \
: "=r" (reg)); \
reg; \
})
#define write_sysreg_el2(v,r) \
do { \
u64 __val = (u64)(v); \
asm volatile(ALTERNATIVE("msr " __stringify(r##_EL2) ", %x0",\
"msr " __stringify(r##_EL1) ", %x0",\
ARM64_HAS_VIRT_HOST_EXTN) \
: : "rZ" (__val)); \
} while (0)
#define read_sysreg_el0(r) read_sysreg_elx(r, _EL0, _EL02)
#define write_sysreg_el0(v,r) write_sysreg_elx(v, r, _EL0, _EL02)
#define read_sysreg_el1(r) read_sysreg_elx(r, _EL1, _EL12)
#define write_sysreg_el1(v,r) write_sysreg_elx(v, r, _EL1, _EL12)
/* The VHE specific system registers and their encoding */
#define sctlr_EL12 sys_reg(3, 5, 1, 0, 0)
#define cpacr_EL12 sys_reg(3, 5, 1, 0, 2)
#define ttbr0_EL12 sys_reg(3, 5, 2, 0, 0)
#define ttbr1_EL12 sys_reg(3, 5, 2, 0, 1)
#define tcr_EL12 sys_reg(3, 5, 2, 0, 2)
#define afsr0_EL12 sys_reg(3, 5, 5, 1, 0)
#define afsr1_EL12 sys_reg(3, 5, 5, 1, 1)
#define esr_EL12 sys_reg(3, 5, 5, 2, 0)
#define far_EL12 sys_reg(3, 5, 6, 0, 0)
#define mair_EL12 sys_reg(3, 5, 10, 2, 0)
#define amair_EL12 sys_reg(3, 5, 10, 3, 0)
#define vbar_EL12 sys_reg(3, 5, 12, 0, 0)
#define contextidr_EL12 sys_reg(3, 5, 13, 0, 1)
#define cntkctl_EL12 sys_reg(3, 5, 14, 1, 0)
#define cntp_tval_EL02 sys_reg(3, 5, 14, 2, 0)
#define cntp_ctl_EL02 sys_reg(3, 5, 14, 2, 1)
#define cntp_cval_EL02 sys_reg(3, 5, 14, 2, 2)
#define cntv_tval_EL02 sys_reg(3, 5, 14, 3, 0)
#define cntv_ctl_EL02 sys_reg(3, 5, 14, 3, 1)
#define cntv_cval_EL02 sys_reg(3, 5, 14, 3, 2)
#define spsr_EL12 sys_reg(3, 5, 4, 0, 0)
#define elr_EL12 sys_reg(3, 5, 4, 0, 1)
#define read_sysreg_el2(r) read_sysreg_elx(r, _EL2, _EL1)
#define write_sysreg_el2(v,r) write_sysreg_elx(v, r, _EL2, _EL1)
/**
* hyp_alternate_select - Generates patchable code sequences that are

View File

@ -191,6 +191,9 @@
#define SYS_APGAKEYLO_EL1 sys_reg(3, 0, 2, 3, 0)
#define SYS_APGAKEYHI_EL1 sys_reg(3, 0, 2, 3, 1)
#define SYS_SPSR_EL1 sys_reg(3, 0, 4, 0, 0)
#define SYS_ELR_EL1 sys_reg(3, 0, 4, 0, 1)
#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
#define SYS_AFSR0_EL1 sys_reg(3, 0, 5, 1, 0)
@ -382,6 +385,9 @@
#define SYS_CNTP_CTL_EL0 sys_reg(3, 3, 14, 2, 1)
#define SYS_CNTP_CVAL_EL0 sys_reg(3, 3, 14, 2, 2)
#define SYS_CNTV_CTL_EL0 sys_reg(3, 3, 14, 3, 1)
#define SYS_CNTV_CVAL_EL0 sys_reg(3, 3, 14, 3, 2)
#define SYS_AARCH32_CNTP_TVAL sys_reg(0, 0, 14, 2, 0)
#define SYS_AARCH32_CNTP_CTL sys_reg(0, 0, 14, 2, 1)
#define SYS_AARCH32_CNTP_CVAL sys_reg(0, 2, 0, 14, 0)
@ -392,14 +398,17 @@
#define __TYPER_CRm(n) (0xc | (((n) >> 3) & 0x3))
#define SYS_PMEVTYPERn_EL0(n) sys_reg(3, 3, 14, __TYPER_CRm(n), __PMEV_op2(n))
#define SYS_PMCCFILTR_EL0 sys_reg (3, 3, 14, 15, 7)
#define SYS_PMCCFILTR_EL0 sys_reg(3, 3, 14, 15, 7)
#define SYS_ZCR_EL2 sys_reg(3, 4, 1, 2, 0)
#define SYS_DACR32_EL2 sys_reg(3, 4, 3, 0, 0)
#define SYS_SPSR_EL2 sys_reg(3, 4, 4, 0, 0)
#define SYS_ELR_EL2 sys_reg(3, 4, 4, 0, 1)
#define SYS_IFSR32_EL2 sys_reg(3, 4, 5, 0, 1)
#define SYS_ESR_EL2 sys_reg(3, 4, 5, 2, 0)
#define SYS_VSESR_EL2 sys_reg(3, 4, 5, 2, 3)
#define SYS_FPEXC32_EL2 sys_reg(3, 4, 5, 3, 0)
#define SYS_FAR_EL2 sys_reg(3, 4, 6, 0, 0)
#define SYS_VDISR_EL2 sys_reg(3, 4, 12, 1, 1)
#define __SYS__AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x)
@ -444,7 +453,29 @@
#define SYS_ICH_LR15_EL2 __SYS__LR8_EL2(7)
/* VHE encodings for architectural EL0/1 system registers */
#define SYS_SCTLR_EL12 sys_reg(3, 5, 1, 0, 0)
#define SYS_CPACR_EL12 sys_reg(3, 5, 1, 0, 2)
#define SYS_ZCR_EL12 sys_reg(3, 5, 1, 2, 0)
#define SYS_TTBR0_EL12 sys_reg(3, 5, 2, 0, 0)
#define SYS_TTBR1_EL12 sys_reg(3, 5, 2, 0, 1)
#define SYS_TCR_EL12 sys_reg(3, 5, 2, 0, 2)
#define SYS_SPSR_EL12 sys_reg(3, 5, 4, 0, 0)
#define SYS_ELR_EL12 sys_reg(3, 5, 4, 0, 1)
#define SYS_AFSR0_EL12 sys_reg(3, 5, 5, 1, 0)
#define SYS_AFSR1_EL12 sys_reg(3, 5, 5, 1, 1)
#define SYS_ESR_EL12 sys_reg(3, 5, 5, 2, 0)
#define SYS_FAR_EL12 sys_reg(3, 5, 6, 0, 0)
#define SYS_MAIR_EL12 sys_reg(3, 5, 10, 2, 0)
#define SYS_AMAIR_EL12 sys_reg(3, 5, 10, 3, 0)
#define SYS_VBAR_EL12 sys_reg(3, 5, 12, 0, 0)
#define SYS_CONTEXTIDR_EL12 sys_reg(3, 5, 13, 0, 1)
#define SYS_CNTKCTL_EL12 sys_reg(3, 5, 14, 1, 0)
#define SYS_CNTP_TVAL_EL02 sys_reg(3, 5, 14, 2, 0)
#define SYS_CNTP_CTL_EL02 sys_reg(3, 5, 14, 2, 1)
#define SYS_CNTP_CVAL_EL02 sys_reg(3, 5, 14, 2, 2)
#define SYS_CNTV_TVAL_EL02 sys_reg(3, 5, 14, 3, 0)
#define SYS_CNTV_CTL_EL02 sys_reg(3, 5, 14, 3, 1)
#define SYS_CNTV_CVAL_EL02 sys_reg(3, 5, 14, 3, 2)
/* Common SCTLR_ELx flags. */
#define SCTLR_ELx_DSSBS (_BITUL(44))

View File

@ -229,6 +229,16 @@ struct kvm_vcpu_events {
#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_FW | ((r) & 0xffff))
#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0)
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 KVM_REG_ARM_FW_REG(1)
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL 0
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL 1
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED 2
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 KVM_REG_ARM_FW_REG(2)
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL 0
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN 1
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL 2
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED 3
#define KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED (1U << 4)
/* SVE registers */
#define KVM_REG_ARM64_SVE (0x15 << KVM_REG_ARM_COPROC_SHIFT)

View File

@ -554,6 +554,17 @@ cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused)
static bool __hardenbp_enab = true;
static bool __spectrev2_safe = true;
int get_spectre_v2_workaround_state(void)
{
if (__spectrev2_safe)
return ARM64_BP_HARDEN_NOT_REQUIRED;
if (!__hardenbp_enab)
return ARM64_BP_HARDEN_UNKNOWN;
return ARM64_BP_HARDEN_WA_NEEDED;
}
/*
* List of CPUs that do not need any Spectre-v2 mitigation at all.
*/
@ -854,13 +865,15 @@ ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr,
ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr,
char *buf)
{
if (__spectrev2_safe)
switch (get_spectre_v2_workaround_state()) {
case ARM64_BP_HARDEN_NOT_REQUIRED:
return sprintf(buf, "Not affected\n");
if (__hardenbp_enab)
case ARM64_BP_HARDEN_WA_NEEDED:
return sprintf(buf, "Mitigation: Branch predictor hardening\n");
return sprintf(buf, "Vulnerable\n");
case ARM64_BP_HARDEN_UNKNOWN:
default:
return sprintf(buf, "Vulnerable\n");
}
}
ssize_t cpu_show_spec_store_bypass(struct device *dev,

View File

@ -871,6 +871,10 @@ bool arm64_is_fatal_ras_serror(struct pt_regs *regs, unsigned int esr)
/*
* The CPU can't make progress. The exception may have
* been imprecise.
*
* Neoverse-N1 #1349291 means a non-KVM SError reported as
* Unrecoverable should be treated as Uncontainable. We
* call arm64_serror_panic() in both cases.
*/
return true;

View File

@ -6,6 +6,7 @@
#include <linux/linkage.h>
#include <asm/alternative.h>
#include <asm/asm-offsets.h>
#include <asm/assembler.h>
#include <asm/fpsimdmacros.h>
@ -52,6 +53,20 @@ ENTRY(__guest_enter)
// Store the host regs
save_callee_saved_regs x1
// Now the host state is stored if we have a pending RAS SError it must
// affect the host. If any asynchronous exception is pending we defer
// the guest entry. The DSB isn't necessary before v8.2 as any SError
// would be fatal.
alternative_if ARM64_HAS_RAS_EXTN
dsb nshst
isb
alternative_else_nop_endif
mrs x1, isr_el1
cbz x1, 1f
mov x0, #ARM_EXCEPTION_IRQ
ret
1:
add x18, x0, #VCPU_CONTEXT
// Macro ptrauth_switch_to_guest format:
@ -127,8 +142,8 @@ ENTRY(__guest_exit)
alternative_if ARM64_HAS_RAS_EXTN
// If we have the RAS extensions we can consume a pending error
// without an unmask-SError and isb.
esb
// without an unmask-SError and isb. The ESB-instruction consumed any
// pending guest error when we took the exception from the guest.
mrs_s x2, SYS_DISR_EL1
str x2, [x1, #(VCPU_FAULT_DISR - VCPU_CONTEXT)]
cbz x2, 1f
@ -136,8 +151,16 @@ alternative_if ARM64_HAS_RAS_EXTN
orr x0, x0, #(1<<ARM_EXIT_WITH_SERROR_BIT)
1: ret
alternative_else
// If we have a pending asynchronous abort, now is the
// time to find out. From your VAXorcist book, page 666:
dsb sy // Synchronize against in-flight ld/st
isb // Prevent an early read of side-effect free ISR
mrs x2, isr_el1
tbnz x2, #8, 2f // ISR_EL1.A
ret
nop
2:
alternative_endif
// We know we have a pending asynchronous abort, now is the
// time to flush it out. From your VAXorcist book, page 666:
// "Threaten me not, oh Evil one! For I speak with
// the power of DEC, and I command thee to show thyself!"
mrs x2, elr_el2
@ -145,10 +168,7 @@ alternative_else
mrs x4, spsr_el2
mov x5, x0
dsb sy // Synchronize against in-flight ld/st
nop
msr daifclr, #4 // Unmask aborts
alternative_endif
// This is our single instruction exception window. A pending
// SError is guaranteed to occur at the earliest when we unmask
@ -161,6 +181,8 @@ abort_guest_exit_start:
.global abort_guest_exit_end
abort_guest_exit_end:
msr daifset, #4 // Mask aborts
// If the exception took place, restore the EL1 exception
// context so that we can report some information.
// Merge the exception code with the SError pending bit.

View File

@ -216,17 +216,34 @@ ENDPROC(\label)
.align 11
.macro check_preamble_length start, end
/* kvm_patch_vector_branch() generates code that jumps over the preamble. */
.if ((\end-\start) != KVM_VECTOR_PREAMBLE)
.error "KVM vector preamble length mismatch"
.endif
.endm
.macro valid_vect target
.align 7
661:
esb
stp x0, x1, [sp, #-16]!
662:
b \target
check_preamble_length 661b, 662b
.endm
.macro invalid_vect target
.align 7
661:
b \target
nop
662:
ldp x0, x1, [sp], #16
b \target
check_preamble_length 661b, 662b
.endm
ENTRY(__kvm_hyp_vector)
@ -254,13 +271,14 @@ ENDPROC(__kvm_hyp_vector)
#ifdef CONFIG_KVM_INDIRECT_VECTORS
.macro hyp_ventry
.align 7
1: .rept 27
1: esb
.rept 26
nop
.endr
/*
* The default sequence is to directly branch to the KVM vectors,
* using the computed offset. This applies for VHE as well as
* !ARM64_HARDEN_EL2_VECTORS.
* !ARM64_HARDEN_EL2_VECTORS. The first vector must always run the preamble.
*
* For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced
* with:
@ -271,12 +289,13 @@ ENDPROC(__kvm_hyp_vector)
* movk x0, #((addr >> 32) & 0xffff), lsl #32
* br x0
*
* Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4.
* Where:
* addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + KVM_VECTOR_PREAMBLE.
* See kvm_patch_vector_branch for details.
*/
alternative_cb kvm_patch_vector_branch
b __kvm_hyp_vector + (1b - 0b)
nop
stp x0, x1, [sp, #-16]!
b __kvm_hyp_vector + (1b - 0b + KVM_VECTOR_PREAMBLE)
nop
nop
nop
@ -301,6 +320,7 @@ ENTRY(__bp_harden_hyp_vecs_end)
.popsection
ENTRY(__smccc_workaround_1_smc_start)
esb
sub sp, sp, #(8 * 4)
stp x2, x3, [sp, #(8 * 0)]
stp x0, x1, [sp, #(8 * 2)]

View File

@ -284,7 +284,7 @@ static bool __hyp_text __populate_fault_info(struct kvm_vcpu *vcpu)
if (ec != ESR_ELx_EC_DABT_LOW && ec != ESR_ELx_EC_IABT_LOW)
return true;
far = read_sysreg_el2(far);
far = read_sysreg_el2(SYS_FAR);
/*
* The HPFAR can be invalid if the stage 2 fault did not
@ -401,7 +401,7 @@ static bool __hyp_text __hyp_handle_fpsimd(struct kvm_vcpu *vcpu)
static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code)
{
if (ARM_EXCEPTION_CODE(*exit_code) != ARM_EXCEPTION_IRQ)
vcpu->arch.fault.esr_el2 = read_sysreg_el2(esr);
vcpu->arch.fault.esr_el2 = read_sysreg_el2(SYS_ESR);
/*
* We're using the raw exception code in order to only process
@ -697,8 +697,8 @@ static void __hyp_text __hyp_call_panic_nvhe(u64 spsr, u64 elr, u64 par,
asm volatile("ldr %0, =__hyp_panic_string" : "=r" (str_va));
__hyp_do_panic(str_va,
spsr, elr,
read_sysreg(esr_el2), read_sysreg_el2(far),
spsr, elr,
read_sysreg(esr_el2), read_sysreg_el2(SYS_FAR),
read_sysreg(hpfar_el2), par, vcpu);
}
@ -713,15 +713,15 @@ static void __hyp_call_panic_vhe(u64 spsr, u64 elr, u64 par,
panic(__hyp_panic_string,
spsr, elr,
read_sysreg_el2(esr), read_sysreg_el2(far),
read_sysreg_el2(SYS_ESR), read_sysreg_el2(SYS_FAR),
read_sysreg(hpfar_el2), par, vcpu);
}
NOKPROBE_SYMBOL(__hyp_call_panic_vhe);
void __hyp_text __noreturn hyp_panic(struct kvm_cpu_context *host_ctxt)
{
u64 spsr = read_sysreg_el2(spsr);
u64 elr = read_sysreg_el2(elr);
u64 spsr = read_sysreg_el2(SYS_SPSR);
u64 elr = read_sysreg_el2(SYS_ELR);
u64 par = read_sysreg(par_el1);
if (!has_vhe())

View File

@ -43,33 +43,33 @@ static void __hyp_text __sysreg_save_user_state(struct kvm_cpu_context *ctxt)
static void __hyp_text __sysreg_save_el1_state(struct kvm_cpu_context *ctxt)
{
ctxt->sys_regs[CSSELR_EL1] = read_sysreg(csselr_el1);
ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(sctlr);
ctxt->sys_regs[SCTLR_EL1] = read_sysreg_el1(SYS_SCTLR);
ctxt->sys_regs[ACTLR_EL1] = read_sysreg(actlr_el1);
ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(cpacr);
ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(ttbr0);
ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(ttbr1);
ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(tcr);
ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(esr);
ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(afsr0);
ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(afsr1);
ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(far);
ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(mair);
ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(vbar);
ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(contextidr);
ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(amair);
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(cntkctl);
ctxt->sys_regs[CPACR_EL1] = read_sysreg_el1(SYS_CPACR);
ctxt->sys_regs[TTBR0_EL1] = read_sysreg_el1(SYS_TTBR0);
ctxt->sys_regs[TTBR1_EL1] = read_sysreg_el1(SYS_TTBR1);
ctxt->sys_regs[TCR_EL1] = read_sysreg_el1(SYS_TCR);
ctxt->sys_regs[ESR_EL1] = read_sysreg_el1(SYS_ESR);
ctxt->sys_regs[AFSR0_EL1] = read_sysreg_el1(SYS_AFSR0);
ctxt->sys_regs[AFSR1_EL1] = read_sysreg_el1(SYS_AFSR1);
ctxt->sys_regs[FAR_EL1] = read_sysreg_el1(SYS_FAR);
ctxt->sys_regs[MAIR_EL1] = read_sysreg_el1(SYS_MAIR);
ctxt->sys_regs[VBAR_EL1] = read_sysreg_el1(SYS_VBAR);
ctxt->sys_regs[CONTEXTIDR_EL1] = read_sysreg_el1(SYS_CONTEXTIDR);
ctxt->sys_regs[AMAIR_EL1] = read_sysreg_el1(SYS_AMAIR);
ctxt->sys_regs[CNTKCTL_EL1] = read_sysreg_el1(SYS_CNTKCTL);
ctxt->sys_regs[PAR_EL1] = read_sysreg(par_el1);
ctxt->sys_regs[TPIDR_EL1] = read_sysreg(tpidr_el1);
ctxt->gp_regs.sp_el1 = read_sysreg(sp_el1);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(elr);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(spsr);
ctxt->gp_regs.elr_el1 = read_sysreg_el1(SYS_ELR);
ctxt->gp_regs.spsr[KVM_SPSR_EL1]= read_sysreg_el1(SYS_SPSR);
}
static void __hyp_text __sysreg_save_el2_return_state(struct kvm_cpu_context *ctxt)
{
ctxt->gp_regs.regs.pc = read_sysreg_el2(elr);
ctxt->gp_regs.regs.pstate = read_sysreg_el2(spsr);
ctxt->gp_regs.regs.pc = read_sysreg_el2(SYS_ELR);
ctxt->gp_regs.regs.pstate = read_sysreg_el2(SYS_SPSR);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
ctxt->sys_regs[DISR_EL1] = read_sysreg_s(SYS_VDISR_EL2);
@ -109,35 +109,35 @@ static void __hyp_text __sysreg_restore_common_state(struct kvm_cpu_context *ctx
static void __hyp_text __sysreg_restore_user_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
write_sysreg(ctxt->sys_regs[TPIDR_EL0], tpidr_el0);
write_sysreg(ctxt->sys_regs[TPIDRRO_EL0], tpidrro_el0);
}
static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt)
{
write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2);
write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1);
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], sctlr);
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], cpacr);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], ttbr0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], ttbr1);
write_sysreg_el1(ctxt->sys_regs[TCR_EL1], tcr);
write_sysreg_el1(ctxt->sys_regs[ESR_EL1], esr);
write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], afsr0);
write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], afsr1);
write_sysreg_el1(ctxt->sys_regs[FAR_EL1], far);
write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], mair);
write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], vbar);
write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],contextidr);
write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], amair);
write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], cntkctl);
write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR);
write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1);
write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR);
write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0);
write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1);
write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR);
write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR);
write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0);
write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1);
write_sysreg_el1(ctxt->sys_regs[FAR_EL1], SYS_FAR);
write_sysreg_el1(ctxt->sys_regs[MAIR_EL1], SYS_MAIR);
write_sysreg_el1(ctxt->sys_regs[VBAR_EL1], SYS_VBAR);
write_sysreg_el1(ctxt->sys_regs[CONTEXTIDR_EL1],SYS_CONTEXTIDR);
write_sysreg_el1(ctxt->sys_regs[AMAIR_EL1], SYS_AMAIR);
write_sysreg_el1(ctxt->sys_regs[CNTKCTL_EL1], SYS_CNTKCTL);
write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1);
write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1);
write_sysreg(ctxt->gp_regs.sp_el1, sp_el1);
write_sysreg_el1(ctxt->gp_regs.elr_el1, elr);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],spsr);
write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR);
write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR);
}
static void __hyp_text
@ -160,8 +160,8 @@ __sysreg_restore_el2_return_state(struct kvm_cpu_context *ctxt)
if (!(mode & PSR_MODE32_BIT) && mode >= PSR_MODE_EL2t)
pstate = PSR_MODE_EL2h | PSR_IL_BIT;
write_sysreg_el2(ctxt->gp_regs.regs.pc, elr);
write_sysreg_el2(pstate, spsr);
write_sysreg_el2(ctxt->gp_regs.regs.pc, SYS_ELR);
write_sysreg_el2(pstate, SYS_SPSR);
if (cpus_have_const_cap(ARM64_HAS_RAS_EXTN))
write_sysreg_s(ctxt->sys_regs[DISR_EL1], SYS_VDISR_EL2);

View File

@ -33,12 +33,12 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm,
* in the TCR_EL1 register. We also need to prevent it to
* allocate IPA->PA walks, so we enable the S1 MMU...
*/
val = cxt->tcr = read_sysreg_el1(tcr);
val = cxt->tcr = read_sysreg_el1(SYS_TCR);
val |= TCR_EPD1_MASK | TCR_EPD0_MASK;
write_sysreg_el1(val, tcr);
val = cxt->sctlr = read_sysreg_el1(sctlr);
write_sysreg_el1(val, SYS_TCR);
val = cxt->sctlr = read_sysreg_el1(SYS_SCTLR);
val |= SCTLR_ELx_M;
write_sysreg_el1(val, sctlr);
write_sysreg_el1(val, SYS_SCTLR);
}
/*
@ -85,8 +85,8 @@ static void __hyp_text __tlb_switch_to_host_vhe(struct kvm *kvm,
if (cpus_have_const_cap(ARM64_WORKAROUND_1165522)) {
/* Restore the registers to what they were */
write_sysreg_el1(cxt->tcr, tcr);
write_sysreg_el1(cxt->sctlr, sctlr);
write_sysreg_el1(cxt->tcr, SYS_TCR);
write_sysreg_el1(cxt->sctlr, SYS_SCTLR);
}
local_irq_restore(cxt->flags);

View File

@ -16,7 +16,7 @@
static bool __hyp_text __is_be(struct kvm_vcpu *vcpu)
{
if (vcpu_mode_is_32bit(vcpu))
return !!(read_sysreg_el2(spsr) & PSR_AA32_E_BIT);
return !!(read_sysreg_el2(SYS_SPSR) & PSR_AA32_E_BIT);
return !!(read_sysreg(SCTLR_EL1) & SCTLR_ELx_EE);
}

View File

@ -152,7 +152,7 @@ unsigned long vcpu_read_spsr32(const struct kvm_vcpu *vcpu)
switch (spsr_idx) {
case KVM_SPSR_SVC:
return read_sysreg_el1(spsr);
return read_sysreg_el1(SYS_SPSR);
case KVM_SPSR_ABT:
return read_sysreg(spsr_abt);
case KVM_SPSR_UND:
@ -177,7 +177,7 @@ void vcpu_write_spsr32(struct kvm_vcpu *vcpu, unsigned long v)
switch (spsr_idx) {
case KVM_SPSR_SVC:
write_sysreg_el1(v, spsr);
write_sysreg_el1(v, SYS_SPSR);
case KVM_SPSR_ABT:
write_sysreg(v, spsr_abt);
case KVM_SPSR_UND:

View File

@ -81,24 +81,24 @@ u64 vcpu_read_sys_reg(const struct kvm_vcpu *vcpu, int reg)
*/
switch (reg) {
case CSSELR_EL1: return read_sysreg_s(SYS_CSSELR_EL1);
case SCTLR_EL1: return read_sysreg_s(sctlr_EL12);
case SCTLR_EL1: return read_sysreg_s(SYS_SCTLR_EL12);
case ACTLR_EL1: return read_sysreg_s(SYS_ACTLR_EL1);
case CPACR_EL1: return read_sysreg_s(cpacr_EL12);
case TTBR0_EL1: return read_sysreg_s(ttbr0_EL12);
case TTBR1_EL1: return read_sysreg_s(ttbr1_EL12);
case TCR_EL1: return read_sysreg_s(tcr_EL12);
case ESR_EL1: return read_sysreg_s(esr_EL12);
case AFSR0_EL1: return read_sysreg_s(afsr0_EL12);
case AFSR1_EL1: return read_sysreg_s(afsr1_EL12);
case FAR_EL1: return read_sysreg_s(far_EL12);
case MAIR_EL1: return read_sysreg_s(mair_EL12);
case VBAR_EL1: return read_sysreg_s(vbar_EL12);
case CONTEXTIDR_EL1: return read_sysreg_s(contextidr_EL12);
case CPACR_EL1: return read_sysreg_s(SYS_CPACR_EL12);
case TTBR0_EL1: return read_sysreg_s(SYS_TTBR0_EL12);
case TTBR1_EL1: return read_sysreg_s(SYS_TTBR1_EL12);
case TCR_EL1: return read_sysreg_s(SYS_TCR_EL12);
case ESR_EL1: return read_sysreg_s(SYS_ESR_EL12);
case AFSR0_EL1: return read_sysreg_s(SYS_AFSR0_EL12);
case AFSR1_EL1: return read_sysreg_s(SYS_AFSR1_EL12);
case FAR_EL1: return read_sysreg_s(SYS_FAR_EL12);
case MAIR_EL1: return read_sysreg_s(SYS_MAIR_EL12);
case VBAR_EL1: return read_sysreg_s(SYS_VBAR_EL12);
case CONTEXTIDR_EL1: return read_sysreg_s(SYS_CONTEXTIDR_EL12);
case TPIDR_EL0: return read_sysreg_s(SYS_TPIDR_EL0);
case TPIDRRO_EL0: return read_sysreg_s(SYS_TPIDRRO_EL0);
case TPIDR_EL1: return read_sysreg_s(SYS_TPIDR_EL1);
case AMAIR_EL1: return read_sysreg_s(amair_EL12);
case CNTKCTL_EL1: return read_sysreg_s(cntkctl_EL12);
case AMAIR_EL1: return read_sysreg_s(SYS_AMAIR_EL12);
case CNTKCTL_EL1: return read_sysreg_s(SYS_CNTKCTL_EL12);
case PAR_EL1: return read_sysreg_s(SYS_PAR_EL1);
case DACR32_EL2: return read_sysreg_s(SYS_DACR32_EL2);
case IFSR32_EL2: return read_sysreg_s(SYS_IFSR32_EL2);
@ -124,24 +124,24 @@ void vcpu_write_sys_reg(struct kvm_vcpu *vcpu, u64 val, int reg)
*/
switch (reg) {
case CSSELR_EL1: write_sysreg_s(val, SYS_CSSELR_EL1); return;
case SCTLR_EL1: write_sysreg_s(val, sctlr_EL12); return;
case SCTLR_EL1: write_sysreg_s(val, SYS_SCTLR_EL12); return;
case ACTLR_EL1: write_sysreg_s(val, SYS_ACTLR_EL1); return;
case CPACR_EL1: write_sysreg_s(val, cpacr_EL12); return;
case TTBR0_EL1: write_sysreg_s(val, ttbr0_EL12); return;
case TTBR1_EL1: write_sysreg_s(val, ttbr1_EL12); return;
case TCR_EL1: write_sysreg_s(val, tcr_EL12); return;
case ESR_EL1: write_sysreg_s(val, esr_EL12); return;
case AFSR0_EL1: write_sysreg_s(val, afsr0_EL12); return;
case AFSR1_EL1: write_sysreg_s(val, afsr1_EL12); return;
case FAR_EL1: write_sysreg_s(val, far_EL12); return;
case MAIR_EL1: write_sysreg_s(val, mair_EL12); return;
case VBAR_EL1: write_sysreg_s(val, vbar_EL12); return;
case CONTEXTIDR_EL1: write_sysreg_s(val, contextidr_EL12); return;
case CPACR_EL1: write_sysreg_s(val, SYS_CPACR_EL12); return;
case TTBR0_EL1: write_sysreg_s(val, SYS_TTBR0_EL12); return;
case TTBR1_EL1: write_sysreg_s(val, SYS_TTBR1_EL12); return;
case TCR_EL1: write_sysreg_s(val, SYS_TCR_EL12); return;
case ESR_EL1: write_sysreg_s(val, SYS_ESR_EL12); return;
case AFSR0_EL1: write_sysreg_s(val, SYS_AFSR0_EL12); return;
case AFSR1_EL1: write_sysreg_s(val, SYS_AFSR1_EL12); return;
case FAR_EL1: write_sysreg_s(val, SYS_FAR_EL12); return;
case MAIR_EL1: write_sysreg_s(val, SYS_MAIR_EL12); return;
case VBAR_EL1: write_sysreg_s(val, SYS_VBAR_EL12); return;
case CONTEXTIDR_EL1: write_sysreg_s(val, SYS_CONTEXTIDR_EL12); return;
case TPIDR_EL0: write_sysreg_s(val, SYS_TPIDR_EL0); return;
case TPIDRRO_EL0: write_sysreg_s(val, SYS_TPIDRRO_EL0); return;
case TPIDR_EL1: write_sysreg_s(val, SYS_TPIDR_EL1); return;
case AMAIR_EL1: write_sysreg_s(val, amair_EL12); return;
case CNTKCTL_EL1: write_sysreg_s(val, cntkctl_EL12); return;
case AMAIR_EL1: write_sysreg_s(val, SYS_AMAIR_EL12); return;
case CNTKCTL_EL1: write_sysreg_s(val, SYS_CNTKCTL_EL12); return;
case PAR_EL1: write_sysreg_s(val, SYS_PAR_EL1); return;
case DACR32_EL2: write_sysreg_s(val, SYS_DACR32_EL2); return;
case IFSR32_EL2: write_sysreg_s(val, SYS_IFSR32_EL2); return;
@ -865,12 +865,12 @@ static bool access_pmcnten(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
if (r->Op2 & 0x1) {
/* accessing PMCNTENSET_EL0 */
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) |= val;
kvm_pmu_enable_counter(vcpu, val);
kvm_pmu_enable_counter_mask(vcpu, val);
kvm_vcpu_pmu_restore_guest(vcpu);
} else {
/* accessing PMCNTENCLR_EL0 */
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) &= ~val;
kvm_pmu_disable_counter(vcpu, val);
kvm_pmu_disable_counter_mask(vcpu, val);
}
} else {
p->regval = __vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask;

View File

@ -170,11 +170,10 @@ void kvm_patch_vector_branch(struct alt_instr *alt,
addr |= ((u64)origptr & GENMASK_ULL(10, 7));
/*
* Branch to the second instruction in the vectors in order to
* avoid the initial store on the stack (which we already
* perform in the hardening vectors).
* Branch over the preamble in order to avoid the initial store on
* the stack (which we already perform in the hardening vectors).
*/
addr += AARCH64_INSN_SIZE;
addr += KVM_VECTOR_PREAMBLE;
/* stp x0, x1, [sp, #-16]! */
insn = aarch64_insn_gen_load_store_pair(AARCH64_INSN_REG_0,

View File

@ -123,9 +123,9 @@ int kvm_arch_hardware_setup(void)
return 0;
}
void kvm_arch_check_processor_compat(void *rtn)
int kvm_arch_check_processor_compat(void)
{
*(int *)rtn = 0;
return 0;
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)

View File

@ -414,9 +414,9 @@ int kvm_arch_hardware_setup(void)
return 0;
}
void kvm_arch_check_processor_compat(void *rtn)
int kvm_arch_check_processor_compat(void)
{
*(int *)rtn = kvmppc_core_check_processor_compat();
return kvmppc_core_check_processor_compat();
}
int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)

View File

@ -912,7 +912,6 @@ extern int kvm_s390_gisc_register(struct kvm *kvm, u32 gisc);
extern int kvm_s390_gisc_unregister(struct kvm *kvm, u32 gisc);
static inline void kvm_arch_hardware_disable(void) {}
static inline void kvm_arch_check_processor_compat(void *rtn) {}
static inline void kvm_arch_sync_events(struct kvm *kvm) {}
static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
static inline void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu) {}

View File

@ -227,6 +227,11 @@ int kvm_arch_hardware_enable(void)
return 0;
}
int kvm_arch_check_processor_compat(void)
{
return 0;
}
static void kvm_gmap_notifier(struct gmap *gmap, unsigned long start,
unsigned long end);
@ -2418,13 +2423,13 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags);
if (!kvm->arch.sca)
goto out_err;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
sca_offset += 16;
if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE)
sca_offset = 0;
kvm->arch.sca = (struct bsca_block *)
((char *) kvm->arch.sca + sca_offset);
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
sprintf(debug_name, "kvm-%u", current->pid);

View File

@ -686,6 +686,7 @@ struct kvm_vcpu_arch {
u32 virtual_tsc_mult;
u32 virtual_tsc_khz;
s64 ia32_tsc_adjust_msr;
u64 msr_ia32_power_ctl;
u64 tsc_scaling_ratio;
atomic_t nmi_queued; /* unprocessed asynchronous NMIs */
@ -752,6 +753,8 @@ struct kvm_vcpu_arch {
struct gfn_to_hva_cache data;
} pv_eoi;
u64 msr_kvm_poll_control;
/*
* Indicate whether the access faults on its page table in guest
* which is set when fix page fault and used to detect unhandeable
@ -879,6 +882,7 @@ struct kvm_arch {
bool mwait_in_guest;
bool hlt_in_guest;
bool pause_in_guest;
bool cstate_in_guest;
unsigned long irq_sources_bitmap;
s64 kvmclock_offset;
@ -926,6 +930,8 @@ struct kvm_arch {
bool guest_can_read_msr_platform_info;
bool exception_payload_enabled;
struct kvm_pmu_event_filter *pmu_event_filter;
};
struct kvm_vm_stat {
@ -996,7 +1002,7 @@ struct kvm_x86_ops {
int (*disabled_by_bios)(void); /* __init */
int (*hardware_enable)(void);
void (*hardware_disable)(void);
void (*check_processor_compatibility)(void *rtn);
int (*check_processor_compatibility)(void);/* __init */
int (*hardware_setup)(void); /* __init */
void (*hardware_unsetup)(void); /* __exit */
bool (*cpu_has_accelerated_tpr)(void);
@ -1110,7 +1116,7 @@ struct kvm_x86_ops {
int (*check_intercept)(struct kvm_vcpu *vcpu,
struct x86_instruction_info *info,
enum x86_intercept_stage stage);
void (*handle_external_intr)(struct kvm_vcpu *vcpu);
void (*handle_exit_irqoff)(struct kvm_vcpu *vcpu);
bool (*mpx_supported)(void);
bool (*xsaves_supported)(void);
bool (*umip_emulated)(void);
@ -1529,7 +1535,6 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
unsigned long ipi_bitmap_high, u32 min,
unsigned long icr, int op_64_bit);
u64 kvm_get_arch_capabilities(void);
void kvm_define_shared_msr(unsigned index, u32 msr);
int kvm_set_shared_msr(unsigned index, u64 val, u64 mask);

View File

@ -378,10 +378,11 @@ struct kvm_sync_regs {
struct kvm_vcpu_events events;
};
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
#define KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT (1 << 4)
#define KVM_STATE_NESTED_FORMAT_VMX 0
#define KVM_STATE_NESTED_FORMAT_SVM 1 /* unused */
@ -432,4 +433,14 @@ struct kvm_nested_state {
} data;
};
/* for KVM_CAP_PMU_EVENT_FILTER */
struct kvm_pmu_event_filter {
__u32 action;
__u32 nevents;
__u64 events[0];
};
#define KVM_PMU_EVENT_ALLOW 0
#define KVM_PMU_EVENT_DENY 1
#endif /* _ASM_X86_KVM_H */

View File

@ -29,6 +29,8 @@
#define KVM_FEATURE_PV_TLB_FLUSH 9
#define KVM_FEATURE_ASYNC_PF_VMEXIT 10
#define KVM_FEATURE_PV_SEND_IPI 11
#define KVM_FEATURE_POLL_CONTROL 12
#define KVM_FEATURE_PV_SCHED_YIELD 13
#define KVM_HINTS_REALTIME 0
@ -47,6 +49,7 @@
#define MSR_KVM_ASYNC_PF_EN 0x4b564d02
#define MSR_KVM_STEAL_TIME 0x4b564d03
#define MSR_KVM_PV_EOI_EN 0x4b564d04
#define MSR_KVM_POLL_CONTROL 0x4b564d05
struct kvm_steal_time {
__u64 steal;

View File

@ -146,7 +146,6 @@
#define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1
#define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2
#define VMX_ABORT_VMCS_CORRUPTED 3
#define VMX_ABORT_LOAD_HOST_MSR_FAIL 4
#endif /* _UAPIVMX_H */

View File

@ -527,6 +527,21 @@ static void kvm_setup_pv_ipi(void)
pr_info("KVM setup pv IPIs\n");
}
static void kvm_smp_send_call_func_ipi(const struct cpumask *mask)
{
int cpu;
native_send_call_func_ipi(mask);
/* Make sure other vCPUs get a chance to run if they need to. */
for_each_cpu(cpu, mask) {
if (vcpu_is_preempted(cpu)) {
kvm_hypercall1(KVM_HC_SCHED_YIELD, per_cpu(x86_cpu_to_apicid, cpu));
break;
}
}
}
static void __init kvm_smp_prepare_cpus(unsigned int max_cpus)
{
native_smp_prepare_cpus(max_cpus);
@ -638,6 +653,12 @@ static void __init kvm_guest_init(void)
#ifdef CONFIG_SMP
smp_ops.smp_prepare_cpus = kvm_smp_prepare_cpus;
smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
if (kvm_para_has_feature(KVM_FEATURE_PV_SCHED_YIELD) &&
!kvm_para_has_hint(KVM_HINTS_REALTIME) &&
kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) {
smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
pr_info("KVM setup pv sched yield\n");
}
if (cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "x86/kvm:online",
kvm_cpu_online, kvm_cpu_down_prepare) < 0)
pr_err("kvm_guest: Failed to install cpu hotplug callbacks\n");

View File

@ -41,6 +41,7 @@ config KVM
select PERF_EVENTS
select HAVE_KVM_MSI
select HAVE_KVM_CPU_RELAX_INTERCEPT
select HAVE_KVM_NO_POLL
select KVM_GENERIC_DIRTYLOG_READ_PROTECT
select KVM_VFIO
select SRCU

View File

@ -134,6 +134,16 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
(best->eax & (1 << KVM_FEATURE_PV_UNHALT)))
best->eax &= ~(1 << KVM_FEATURE_PV_UNHALT);
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT)) {
best = kvm_find_cpuid_entry(vcpu, 0x1, 0);
if (best) {
if (vcpu->arch.ia32_misc_enable_msr & MSR_IA32_MISC_ENABLE_MWAIT)
best->ecx |= F(MWAIT);
else
best->ecx &= ~F(MWAIT);
}
}
/* Update physical-address width */
vcpu->arch.maxphyaddr = cpuid_query_maxphyaddr(vcpu);
kvm_mmu_reset_context(vcpu);
@ -276,19 +286,38 @@ static void cpuid_mask(u32 *word, int wordnum)
*word &= boot_cpu_data.x86_capability[wordnum];
}
static void do_cpuid_1_ent(struct kvm_cpuid_entry2 *entry, u32 function,
static void do_host_cpuid(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index)
{
entry->function = function;
entry->index = index;
entry->flags = 0;
cpuid_count(entry->function, entry->index,
&entry->eax, &entry->ebx, &entry->ecx, &entry->edx);
entry->flags = 0;
switch (function) {
case 2:
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
break;
case 4:
case 7:
case 0xb:
case 0xd:
case 0x14:
case 0x8000001d:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
break;
}
}
static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
u32 func, u32 index, int *nent, int maxnent)
static int __do_cpuid_func_emulated(struct kvm_cpuid_entry2 *entry,
u32 func, int *nent, int maxnent)
{
entry->function = func;
entry->index = 0;
entry->flags = 0;
switch (func) {
case 0:
entry->eax = 7;
@ -300,21 +329,83 @@ static int __do_cpuid_ent_emulated(struct kvm_cpuid_entry2 *entry,
break;
case 7:
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
if (index == 0)
entry->ecx = F(RDPID);
entry->eax = 0;
entry->ecx = F(RDPID);
++*nent;
default:
break;
}
entry->function = func;
entry->index = index;
return 0;
}
static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
u32 index, int *nent, int maxnent)
static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
{
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
unsigned f_la57;
/* cpuid 7.0.ebx */
const u32 kvm_cpuid_7_0_ebx_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR);
switch (index) {
case 0:
entry->eax = 0;
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
/* TSC_ADJUST is emulated */
entry->ebx |= F(TSC_ADJUST);
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
f_la57 = entry->ecx & F(LA57);
cpuid_mask(&entry->ecx, CPUID_7_ECX);
/* Set LA57 based on hardware capability. */
entry->ecx |= f_la57;
entry->ecx |= f_umip;
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
cpuid_mask(&entry->edx, CPUID_7_EDX);
/*
* We emulate ARCH_CAPABILITIES in software even
* if the host doesn't support it.
*/
entry->edx |= F(ARCH_CAPABILITIES);
break;
default:
WARN_ON_ONCE(1);
entry->eax = 0;
entry->ebx = 0;
entry->ecx = 0;
entry->edx = 0;
break;
}
}
static inline int __do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 function,
int *nent, int maxnent)
{
int r;
unsigned f_nx = is_efer_nx() ? F(NX) : 0;
@ -327,12 +418,8 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
unsigned f_lm = 0;
#endif
unsigned f_rdtscp = kvm_x86_ops->rdtscp_supported() ? F(RDTSCP) : 0;
unsigned f_invpcid = kvm_x86_ops->invpcid_supported() ? F(INVPCID) : 0;
unsigned f_mpx = kvm_mpx_supported() ? F(MPX) : 0;
unsigned f_xsaves = kvm_x86_ops->xsaves_supported() ? F(XSAVES) : 0;
unsigned f_umip = kvm_x86_ops->umip_emulated() ? F(UMIP) : 0;
unsigned f_intel_pt = kvm_x86_ops->pt_supported() ? F(INTEL_PT) : 0;
unsigned f_la57 = 0;
/* cpuid 1.edx */
const u32 kvm_cpuid_1_edx_x86_features =
@ -377,7 +464,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
/* cpuid 0x80000008.ebx */
const u32 kvm_cpuid_8000_0008_ebx_x86_features =
F(WBNOINVD) | F(AMD_IBPB) | F(AMD_IBRS) | F(AMD_SSBD) | F(VIRT_SSBD) |
F(AMD_SSB_NO) | F(AMD_STIBP);
F(AMD_SSB_NO) | F(AMD_STIBP) | F(AMD_STIBP_ALWAYS_ON);
/* cpuid 0xC0000001.edx */
const u32 kvm_cpuid_C000_0001_edx_x86_features =
@ -385,31 +472,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
F(PMM) | F(PMM_EN);
/* cpuid 7.0.ebx */
const u32 kvm_cpuid_7_0_ebx_x86_features =
F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
F(BMI2) | F(ERMS) | f_invpcid | F(RTM) | f_mpx | F(RDSEED) |
F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | f_intel_pt;
/* cpuid 0xD.1.eax */
const u32 kvm_cpuid_D_1_eax_x86_features =
F(XSAVEOPT) | F(XSAVEC) | F(XGETBV1) | f_xsaves;
/* cpuid 7.0.ecx*/
const u32 kvm_cpuid_7_0_ecx_x86_features =
F(AVX512VBMI) | F(LA57) | F(PKU) | 0 /*OSPKE*/ |
F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B);
/* cpuid 7.0.edx*/
const u32 kvm_cpuid_7_0_edx_x86_features =
F(AVX512_4VNNIW) | F(AVX512_4FMAPS) | F(SPEC_CTRL) |
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR);
/* all calls to cpuid_count() should be made on the same cpu */
get_cpu();
@ -418,12 +484,13 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(entry, function, index);
do_host_cpuid(entry, function, 0);
++*nent;
switch (function) {
case 0:
entry->eax = min(entry->eax, (u32)(f_intel_pt ? 0x14 : 0xd));
/* Limited to the highest leaf implemented in KVM. */
entry->eax = min(entry->eax, 0x1fU);
break;
case 1:
entry->edx &= kvm_cpuid_1_edx_x86_features;
@ -441,14 +508,12 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 2: {
int t, times = entry->eax & 0xff;
entry->flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
entry->flags |= KVM_CPUID_FLAG_STATE_READ_NEXT;
for (t = 1; t < times; ++t) {
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[t], function, 0);
entry[t].flags |= KVM_CPUID_FLAG_STATEFUL_FUNC;
do_host_cpuid(&entry[t], function, 0);
++*nent;
}
break;
@ -458,7 +523,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
case 0x8000001d: {
int i, cache_type;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until cache_type is zero */
for (i = 1; ; ++i) {
if (*nent >= maxnent)
@ -467,9 +531,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
cache_type = entry[i - 1].eax & 0x1f;
if (!cache_type)
break;
do_cpuid_1_ent(&entry[i], function, i);
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
do_host_cpuid(&entry[i], function, i);
++*nent;
}
break;
@ -480,36 +542,21 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ecx = 0;
entry->edx = 0;
break;
/* function 7 has additional index. */
case 7: {
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* Mask ebx against host capability word 9 */
if (index == 0) {
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
// TSC_ADJUST is emulated
entry->ebx |= F(TSC_ADJUST);
entry->ecx &= kvm_cpuid_7_0_ecx_x86_features;
f_la57 = entry->ecx & F(LA57);
cpuid_mask(&entry->ecx, CPUID_7_ECX);
/* Set LA57 based on hardware capability. */
entry->ecx |= f_la57;
entry->ecx |= f_umip;
/* PKU is not yet implemented for shadow paging. */
if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE))
entry->ecx &= ~F(PKU);
entry->edx &= kvm_cpuid_7_0_edx_x86_features;
cpuid_mask(&entry->edx, CPUID_7_EDX);
/*
* We emulate ARCH_CAPABILITIES in software even
* if the host doesn't support it.
*/
entry->edx |= F(ARCH_CAPABILITIES);
} else {
entry->ebx = 0;
entry->ecx = 0;
entry->edx = 0;
int i;
for (i = 0; ; ) {
do_cpuid_7_mask(&entry[i], i);
if (i == entry->eax)
break;
if (*nent >= maxnent)
goto out;
++i;
do_host_cpuid(&entry[i], function, i);
++*nent;
}
entry->eax = 0;
break;
}
case 9:
@ -543,11 +590,14 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->edx = edx.full;
break;
}
/* function 0xb has additional index. */
/*
* Per Intel's SDM, the 0x1f is a superset of 0xb,
* thus they can be handled by common code.
*/
case 0x1f:
case 0xb: {
int i, level_type;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
/* read more entries until level_type is zero */
for (i = 1; ; ++i) {
if (*nent >= maxnent)
@ -556,9 +606,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
level_type = entry[i - 1].ecx & 0xff00;
if (!level_type)
break;
do_cpuid_1_ent(&entry[i], function, i);
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
do_host_cpuid(&entry[i], function, i);
++*nent;
}
break;
@ -571,7 +619,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
entry->ebx = xstate_required_size(supported, false);
entry->ecx = entry->ebx;
entry->edx &= supported >> 32;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
if (!supported)
break;
@ -580,7 +627,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[i], function, idx);
do_host_cpuid(&entry[i], function, idx);
if (idx == 1) {
entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
@ -597,8 +644,6 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
}
entry[i].ecx = 0;
entry[i].edx = 0;
entry[i].flags |=
KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
++*nent;
++i;
}
@ -611,12 +656,10 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
if (!f_intel_pt)
break;
entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
for (t = 1; t <= times; ++t) {
if (*nent >= maxnent)
goto out;
do_cpuid_1_ent(&entry[t], function, t);
entry[t].flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
do_host_cpuid(&entry[t], function, t);
++*nent;
}
break;
@ -640,7 +683,9 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
(1 << KVM_FEATURE_PV_UNHALT) |
(1 << KVM_FEATURE_PV_TLB_FLUSH) |
(1 << KVM_FEATURE_ASYNC_PF_VMEXIT) |
(1 << KVM_FEATURE_PV_SEND_IPI);
(1 << KVM_FEATURE_PV_SEND_IPI) |
(1 << KVM_FEATURE_POLL_CONTROL) |
(1 << KVM_FEATURE_PV_SCHED_YIELD);
if (sched_info_on())
entry->eax |= (1 << KVM_FEATURE_STEAL_TIME);
@ -730,21 +775,19 @@ out:
return r;
}
static int do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 func,
u32 idx, int *nent, int maxnent, unsigned int type)
static int do_cpuid_func(struct kvm_cpuid_entry2 *entry, u32 func,
int *nent, int maxnent, unsigned int type)
{
if (type == KVM_GET_EMULATED_CPUID)
return __do_cpuid_ent_emulated(entry, func, idx, nent, maxnent);
return __do_cpuid_func_emulated(entry, func, nent, maxnent);
return __do_cpuid_ent(entry, func, idx, nent, maxnent);
return __do_cpuid_func(entry, func, nent, maxnent);
}
#undef F
struct kvm_cpuid_param {
u32 func;
u32 idx;
bool has_leaf_count;
bool (*qualifier)(const struct kvm_cpuid_param *param);
};
@ -788,11 +831,10 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
int limit, nent = 0, r = -E2BIG, i;
u32 func;
static const struct kvm_cpuid_param param[] = {
{ .func = 0, .has_leaf_count = true },
{ .func = 0x80000000, .has_leaf_count = true },
{ .func = 0xC0000000, .qualifier = is_centaur_cpu, .has_leaf_count = true },
{ .func = 0 },
{ .func = 0x80000000 },
{ .func = 0xC0000000, .qualifier = is_centaur_cpu },
{ .func = KVM_CPUID_SIGNATURE },
{ .func = KVM_CPUID_FEATURES },
};
if (cpuid->nent < 1)
@ -816,19 +858,16 @@ int kvm_dev_ioctl_get_cpuid(struct kvm_cpuid2 *cpuid,
if (ent->qualifier && !ent->qualifier(ent))
continue;
r = do_cpuid_ent(&cpuid_entries[nent], ent->func, ent->idx,
&nent, cpuid->nent, type);
r = do_cpuid_func(&cpuid_entries[nent], ent->func,
&nent, cpuid->nent, type);
if (r)
goto out_free;
if (!ent->has_leaf_count)
continue;
limit = cpuid_entries[nent - 1].eax;
for (func = ent->func + 1; func <= limit && nent < cpuid->nent && r == 0; ++func)
r = do_cpuid_ent(&cpuid_entries[nent], func, ent->idx,
&nent, cpuid->nent, type);
r = do_cpuid_func(&cpuid_entries[nent], func,
&nent, cpuid->nent, type);
if (r)
goto out_free;

View File

@ -4258,7 +4258,7 @@ static int check_dr_read(struct x86_emulate_ctxt *ctxt)
ulong dr6;
ctxt->ops->get_dr(ctxt, 6, &dr6);
dr6 &= ~15;
dr6 &= ~DR_TRAP_BITS;
dr6 |= DR6_BD | DR6_RTM;
ctxt->ops->set_dr(ctxt, 6, dr6);
return emulate_db(ctxt);

View File

@ -102,7 +102,6 @@ static inline int irqchip_in_kernel(struct kvm *kvm)
return mode != KVM_IRQCHIP_NONE;
}
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
void kvm_inject_pending_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu);
void kvm_apic_nmi_wd_deliver(struct kvm_vcpu *vcpu);

View File

@ -75,7 +75,7 @@ int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
if (r < 0)
r = 0;
r += kvm_apic_set_irq(vcpu, irq, dest_map);
} else if (kvm_lapic_enabled(vcpu)) {
} else if (kvm_apic_sw_enabled(vcpu->arch.apic)) {
if (!kvm_vector_hashing_enabled()) {
if (!lowest)
lowest = vcpu;

View File

@ -69,6 +69,7 @@
#define X2APIC_BROADCAST 0xFFFFFFFFul
#define LAPIC_TIMER_ADVANCE_ADJUST_DONE 100
#define LAPIC_TIMER_ADVANCE_ADJUST_INIT 1000
/* step-by-step approximation to mitigate fluctuation */
#define LAPIC_TIMER_ADVANCE_ADJUST_STEP 8
@ -85,11 +86,6 @@ bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector)
apic_test_vector(vector, apic->regs + APIC_IRR);
}
static inline void apic_clear_vector(int vec, void *bitmap)
{
clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
static inline int __apic_test_and_set_vector(int vec, void *bitmap)
{
return __test_and_set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@ -443,12 +439,12 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic)
if (unlikely(vcpu->arch.apicv_active)) {
/* need to update RVI */
apic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_x86_ops->hwapic_irr_update(vcpu,
apic_find_highest_irr(apic));
} else {
apic->irr_pending = false;
apic_clear_vector(vec, apic->regs + APIC_IRR);
kvm_lapic_clear_vector(vec, apic->regs + APIC_IRR);
if (apic_search_irr(apic) != -1)
apic->irr_pending = true;
}
@ -1053,9 +1049,11 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
if (apic_test_vector(vector, apic->regs + APIC_TMR) != !!trig_mode) {
if (trig_mode)
kvm_lapic_set_vector(vector, apic->regs + APIC_TMR);
kvm_lapic_set_vector(vector,
apic->regs + APIC_TMR);
else
apic_clear_vector(vector, apic->regs + APIC_TMR);
kvm_lapic_clear_vector(vector,
apic->regs + APIC_TMR);
}
if (vcpu->arch.apicv_active)
@ -1313,21 +1311,45 @@ static inline struct kvm_lapic *to_lapic(struct kvm_io_device *dev)
return container_of(dev, struct kvm_lapic, dev);
}
#define APIC_REG_MASK(reg) (1ull << ((reg) >> 4))
#define APIC_REGS_MASK(first, count) \
(APIC_REG_MASK(first) * ((1ull << (count)) - 1))
int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
void *data)
{
unsigned char alignment = offset & 0xf;
u32 result;
/* this bitmask has a bit cleared for each reserved register */
static const u64 rmask = 0x43ff01ffffffe70cULL;
u64 valid_reg_mask =
APIC_REG_MASK(APIC_ID) |
APIC_REG_MASK(APIC_LVR) |
APIC_REG_MASK(APIC_TASKPRI) |
APIC_REG_MASK(APIC_PROCPRI) |
APIC_REG_MASK(APIC_LDR) |
APIC_REG_MASK(APIC_DFR) |
APIC_REG_MASK(APIC_SPIV) |
APIC_REGS_MASK(APIC_ISR, APIC_ISR_NR) |
APIC_REGS_MASK(APIC_TMR, APIC_ISR_NR) |
APIC_REGS_MASK(APIC_IRR, APIC_ISR_NR) |
APIC_REG_MASK(APIC_ESR) |
APIC_REG_MASK(APIC_ICR) |
APIC_REG_MASK(APIC_ICR2) |
APIC_REG_MASK(APIC_LVTT) |
APIC_REG_MASK(APIC_LVTTHMR) |
APIC_REG_MASK(APIC_LVTPC) |
APIC_REG_MASK(APIC_LVT0) |
APIC_REG_MASK(APIC_LVT1) |
APIC_REG_MASK(APIC_LVTERR) |
APIC_REG_MASK(APIC_TMICT) |
APIC_REG_MASK(APIC_TMCCT) |
APIC_REG_MASK(APIC_TDCR);
if ((alignment + len) > 4) {
apic_debug("KVM_APIC_READ: alignment error %x %d\n",
offset, len);
return 1;
}
/* ARBPRI is not valid on x2APIC */
if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
if (offset > 0x3f0 || !(rmask & (1ULL << (offset >> 4)))) {
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) {
apic_debug("KVM_APIC_READ: read reserved register %x\n",
offset);
return 1;
@ -1499,11 +1521,40 @@ static inline void __wait_lapic_expire(struct kvm_vcpu *vcpu, u64 guest_cycles)
}
}
void wait_lapic_expire(struct kvm_vcpu *vcpu)
static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
s64 advance_expire_delta)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u32 timer_advance_ns = apic->lapic_timer.timer_advance_ns;
u64 guest_tsc, tsc_deadline, ns;
u64 ns;
/* too early */
if (advance_expire_delta < 0) {
ns = -advance_expire_delta * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
timer_advance_ns -= min((u32)ns,
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
} else {
/* too late */
ns = advance_expire_delta * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
timer_advance_ns += min((u32)ns,
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
}
if (abs(advance_expire_delta) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
apic->lapic_timer.timer_advance_adjust_done = true;
if (unlikely(timer_advance_ns > 5000)) {
timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
apic->lapic_timer.timer_advance_adjust_done = false;
}
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{
struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline;
if (apic->lapic_timer.expired_tscdeadline == 0)
return;
@ -1514,34 +1565,15 @@ void wait_lapic_expire(struct kvm_vcpu *vcpu)
tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, guest_tsc - tsc_deadline);
apic->lapic_timer.advance_expire_delta = guest_tsc - tsc_deadline;
if (guest_tsc < tsc_deadline)
__wait_lapic_expire(vcpu, tsc_deadline - guest_tsc);
if (!apic->lapic_timer.timer_advance_adjust_done) {
/* too early */
if (guest_tsc < tsc_deadline) {
ns = (tsc_deadline - guest_tsc) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
timer_advance_ns -= min((u32)ns,
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
} else {
/* too late */
ns = (guest_tsc - tsc_deadline) * 1000000ULL;
do_div(ns, vcpu->arch.virtual_tsc_khz);
timer_advance_ns += min((u32)ns,
timer_advance_ns / LAPIC_TIMER_ADVANCE_ADJUST_STEP);
}
if (abs(guest_tsc - tsc_deadline) < LAPIC_TIMER_ADVANCE_ADJUST_DONE)
apic->lapic_timer.timer_advance_adjust_done = true;
if (unlikely(timer_advance_ns > 5000)) {
timer_advance_ns = 0;
apic->lapic_timer.timer_advance_adjust_done = true;
}
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
}
if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
}
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
static void start_sw_tscdeadline(struct kvm_lapic *apic)
{
@ -2014,7 +2046,7 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
apic_debug("%s: offset 0x%x with length 0x%x, and value is "
"0x%x\n", __func__, offset, len, val);
kvm_lapic_reg_write(apic, offset & 0xff0, val);
kvm_lapic_reg_write(apic, offset, val);
return 0;
}
@ -2311,7 +2343,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
HRTIMER_MODE_ABS_PINNED);
apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) {
apic->lapic_timer.timer_advance_ns = 1000;
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
apic->lapic_timer.timer_advance_adjust_done = false;
} else {
apic->lapic_timer.timer_advance_ns = timer_advance_ns;
@ -2321,7 +2353,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
/*
* APIC is created enabled. This will prevent kvm_lapic_set_base from
* thinking that APIC satet has changed.
* thinking that APIC state has changed.
*/
vcpu->arch.apic_base = MSR_IA32_APICBASE_ENABLE;
static_key_slow_inc(&apic_sw_disabled.key); /* sw disabled at reset */
@ -2330,6 +2362,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
return 0;
nomem_free_apic:
kfree(apic);
vcpu->arch.apic = NULL;
nomem:
return -ENOMEM;
}

View File

@ -32,6 +32,7 @@ struct kvm_timer {
u64 tscdeadline;
u64 expired_tscdeadline;
u32 timer_advance_ns;
s64 advance_expire_delta;
atomic_t pending; /* accumulated triggered timers */
bool hv_timer_in_use;
bool timer_advance_adjust_done;
@ -129,6 +130,11 @@ void kvm_lapic_exit(void);
#define VEC_POS(v) ((v) & (32 - 1))
#define REG_POS(v) (((v) >> 5) << 4)
static inline void kvm_lapic_clear_vector(int vec, void *bitmap)
{
clear_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
}
static inline void kvm_lapic_set_vector(int vec, void *bitmap)
{
set_bit(VEC_POS(vec), (bitmap) + REG_POS(vec));
@ -219,7 +225,7 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu)
bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector);
void wait_lapic_expire(struct kvm_vcpu *vcpu);
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu);
bool kvm_intr_is_single_vcpu_fast(struct kvm *kvm, struct kvm_lapic_irq *irq,
struct kvm_vcpu **dest_vcpu);

View File

@ -140,9 +140,6 @@ module_param(dbg, bool, 0644);
#include <trace/events/kvm.h>
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
#define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT)
#define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1))
@ -259,11 +256,20 @@ static const u64 shadow_nonpresent_or_rsvd_mask_len = 5;
*/
static u64 __read_mostly shadow_nonpresent_or_rsvd_lower_gfn_mask;
/*
* The number of non-reserved physical address bits irrespective of features
* that repurpose legal bits, e.g. MKTME.
*/
static u8 __read_mostly shadow_phys_bits;
static void mmu_spte_set(u64 *sptep, u64 spte);
static bool is_executable_pte(u64 spte);
static union kvm_mmu_page_role
kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu);
#define CREATE_TRACE_POINTS
#include "mmutrace.h"
static inline bool kvm_available_flush_tlb_with_range(void)
{
@ -468,6 +474,21 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
}
EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
static u8 kvm_get_shadow_phys_bits(void)
{
/*
* boot_cpu_data.x86_phys_bits is reduced when MKTME is detected
* in CPU detection code, but MKTME treats those reduced bits as
* 'keyID' thus they are not reserved bits. Therefore for MKTME
* we should still return physical address bits reported by CPUID.
*/
if (!boot_cpu_has(X86_FEATURE_TME) ||
WARN_ON_ONCE(boot_cpu_data.extended_cpuid_level < 0x80000008))
return boot_cpu_data.x86_phys_bits;
return cpuid_eax(0x80000008) & 0xff;
}
static void kvm_mmu_reset_all_pte_masks(void)
{
u8 low_phys_bits;
@ -481,6 +502,8 @@ static void kvm_mmu_reset_all_pte_masks(void)
shadow_present_mask = 0;
shadow_acc_track_mask = 0;
shadow_phys_bits = kvm_get_shadow_phys_bits();
/*
* If the CPU has 46 or less physical address bits, then set an
* appropriate mask to guard against L1TF attacks. Otherwise, it is
@ -1073,10 +1096,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct kvm_mmu_page *sp, int index)
static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn)
{
if (sp->role.direct)
BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index));
else
if (!sp->role.direct) {
sp->gfns[index] = gfn;
return;
}
if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index)))
pr_err_ratelimited("gfn mismatch under direct page %llx "
"(expected %llx, got %llx)\n",
sp->gfn,
kvm_mmu_page_get_gfn(sp, index), gfn);
}
/*
@ -3055,10 +3084,7 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
ret = RET_PF_EMULATE;
pgprintk("%s: setting spte %llx\n", __func__, *sptep);
pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n",
is_large_pte(*sptep)? "2MB" : "4kB",
*sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn,
*sptep, sptep);
trace_kvm_mmu_set_spte(level, gfn, sptep);
if (!was_rmapped && is_large_pte(*sptep))
++vcpu->kvm->stat.lpages;
@ -3070,8 +3096,6 @@ static int mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access,
}
}
kvm_release_pfn_clean(pfn);
return ret;
}
@ -3106,9 +3130,11 @@ static int direct_pte_prefetch_many(struct kvm_vcpu *vcpu,
if (ret <= 0)
return -1;
for (i = 0; i < ret; i++, gfn++, start++)
for (i = 0; i < ret; i++, gfn++, start++) {
mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn,
page_to_pfn(pages[i]), true, true);
put_page(pages[i]);
}
return 0;
}
@ -3156,40 +3182,40 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
__direct_pte_prefetch(vcpu, sp, sptep);
}
static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable,
int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault)
static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write,
int map_writable, int level, kvm_pfn_t pfn,
bool prefault)
{
struct kvm_shadow_walk_iterator iterator;
struct kvm_shadow_walk_iterator it;
struct kvm_mmu_page *sp;
int emulate = 0;
gfn_t pseudo_gfn;
int ret;
gfn_t gfn = gpa >> PAGE_SHIFT;
gfn_t base_gfn = gfn;
if (!VALID_PAGE(vcpu->arch.mmu->root_hpa))
return 0;
return RET_PF_RETRY;
for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) {
if (iterator.level == level) {
emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL,
write, level, gfn, pfn, prefault,
map_writable);
direct_pte_prefetch(vcpu, iterator.sptep);
++vcpu->stat.pf_fixed;
trace_kvm_mmu_spte_requested(gpa, level, pfn);
for_each_shadow_entry(vcpu, gpa, it) {
base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == level)
break;
}
drop_large_spte(vcpu, iterator.sptep);
if (!is_shadow_present_pte(*iterator.sptep)) {
u64 base_addr = iterator.addr;
drop_large_spte(vcpu, it.sptep);
if (!is_shadow_present_pte(*it.sptep)) {
sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr,
it.level - 1, true, ACC_ALL);
base_addr &= PT64_LVL_ADDR_MASK(iterator.level);
pseudo_gfn = base_addr >> PAGE_SHIFT;
sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr,
iterator.level - 1, 1, ACC_ALL);
link_shadow_page(vcpu, iterator.sptep, sp);
link_shadow_page(vcpu, it.sptep, sp);
}
}
return emulate;
ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL,
write, level, base_gfn, pfn, prefault,
map_writable);
direct_pte_prefetch(vcpu, it.sptep);
++vcpu->stat.pf_fixed;
return ret;
}
static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk)
@ -3216,11 +3242,10 @@ static int kvm_handle_bad_page(struct kvm_vcpu *vcpu, gfn_t gfn, kvm_pfn_t pfn)
}
static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
gfn_t *gfnp, kvm_pfn_t *pfnp,
gfn_t gfn, kvm_pfn_t *pfnp,
int *levelp)
{
kvm_pfn_t pfn = *pfnp;
gfn_t gfn = *gfnp;
int level = *levelp;
/*
@ -3247,8 +3272,6 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
mask = KVM_PAGES_PER_HPAGE(level) - 1;
VM_BUG_ON((gfn & mask) != (pfn & mask));
if (pfn & mask) {
gfn &= ~mask;
*gfnp = gfn;
kvm_release_pfn_clean(pfn);
pfn &= ~mask;
kvm_get_pfn(pfn);
@ -3505,22 +3528,19 @@ static int nonpaging_map(struct kvm_vcpu *vcpu, gva_t v, u32 error_code,
if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r))
return r;
r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
return r;
}
static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa,
@ -4015,19 +4035,6 @@ static int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
return kvm_setup_async_pf(vcpu, gva, kvm_vcpu_gfn_to_hva(vcpu, gfn), &arch);
}
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!lapic_in_kernel(vcpu) ||
kvm_event_needs_reinjection(vcpu) ||
vcpu->arch.exception.pending))
return false;
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
return false;
return kvm_x86_ops->interrupt_allowed(vcpu);
}
static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
gva_t gva, kvm_pfn_t *pfn, bool write, bool *writable)
{
@ -4147,22 +4154,19 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa, u32 error_code,
if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r))
return r;
r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (likely(!force_pt_level))
transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level);
r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
transparent_hugepage_adjust(vcpu, gfn, &pfn, &level);
r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault);
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
return r;
}
static void nonpaging_init_context(struct kvm_vcpu *vcpu,
@ -4494,7 +4498,7 @@ reset_shadow_zero_bits_mask(struct kvm_vcpu *vcpu, struct kvm_mmu *context)
*/
shadow_zero_check = &context->shadow_zero_check;
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
boot_cpu_data.x86_phys_bits,
shadow_phys_bits,
context->shadow_root_level, uses_nx,
guest_cpuid_has(vcpu, X86_FEATURE_GBPAGES),
is_pse(vcpu), true);
@ -4531,13 +4535,13 @@ reset_tdp_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
if (boot_cpu_is_amd())
__reset_rsvds_bits_mask(vcpu, shadow_zero_check,
boot_cpu_data.x86_phys_bits,
shadow_phys_bits,
context->shadow_root_level, false,
boot_cpu_has(X86_FEATURE_GBPAGES),
true, true);
else
__reset_rsvds_bits_mask_ept(shadow_zero_check,
boot_cpu_data.x86_phys_bits,
shadow_phys_bits,
false);
if (!shadow_me_mask)
@ -4558,7 +4562,7 @@ reset_ept_shadow_zero_bits_mask(struct kvm_vcpu *vcpu,
struct kvm_mmu *context, bool execonly)
{
__reset_rsvds_bits_mask_ept(&context->shadow_zero_check,
boot_cpu_data.x86_phys_bits, execonly);
shadow_phys_bits, execonly);
}
#define BYTE_MASK(access) \
@ -5935,7 +5939,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
int nr_to_scan = sc->nr_to_scan;
unsigned long freed = 0;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
int idx;
@ -5977,7 +5981,7 @@ mmu_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
break;
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
return freed;
}
@ -5999,6 +6003,34 @@ static void mmu_destroy_caches(void)
kmem_cache_destroy(mmu_page_header_cache);
}
static void kvm_set_mmio_spte_mask(void)
{
u64 mask;
/*
* Set the reserved bits and the present bit of an paging-structure
* entry to generate page fault with PFER.RSV = 1.
*/
/*
* Mask the uppermost physical address bit, which would be reserved as
* long as the supported physical address width is less than 52.
*/
mask = 1ull << 51;
/* Set the present bit. */
mask |= 1ull;
/*
* If reserved bit is not supported, clear the present bit to disable
* mmio page fault.
*/
if (IS_ENABLED(CONFIG_X86_64) && shadow_phys_bits == 52)
mask &= ~1ull;
kvm_mmu_set_mmio_spte_mask(mask, mask);
}
int kvm_mmu_module_init(void)
{
int ret = -ENOMEM;
@ -6015,6 +6047,8 @@ int kvm_mmu_module_init(void)
kvm_mmu_reset_all_pte_masks();
kvm_set_mmio_spte_mask();
pte_list_desc_cache = kmem_cache_create("pte_list_desc",
sizeof(struct pte_list_desc),
0, SLAB_ACCOUNT, NULL);

View File

@ -301,6 +301,65 @@ TRACE_EVENT(
__entry->kvm_gen == __entry->spte_gen
)
);
TRACE_EVENT(
kvm_mmu_set_spte,
TP_PROTO(int level, gfn_t gfn, u64 *sptep),
TP_ARGS(level, gfn, sptep),
TP_STRUCT__entry(
__field(u64, gfn)
__field(u64, spte)
__field(u64, sptep)
__field(u8, level)
/* These depend on page entry type, so compute them now. */
__field(bool, r)
__field(bool, x)
__field(u8, u)
),
TP_fast_assign(
__entry->gfn = gfn;
__entry->spte = *sptep;
__entry->sptep = virt_to_phys(sptep);
__entry->level = level;
__entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK);
__entry->x = is_executable_pte(__entry->spte);
__entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1;
),
TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx",
__entry->gfn, __entry->spte,
__entry->r ? "r" : "-",
__entry->spte & PT_WRITABLE_MASK ? "w" : "-",
__entry->x ? "x" : "-",
__entry->u == -1 ? "" : (__entry->u ? "u" : "-"),
__entry->level, __entry->sptep
)
);
TRACE_EVENT(
kvm_mmu_spte_requested,
TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn),
TP_ARGS(addr, level, pfn),
TP_STRUCT__entry(
__field(u64, gfn)
__field(u64, pfn)
__field(u8, level)
),
TP_fast_assign(
__entry->gfn = addr >> PAGE_SHIFT;
__entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1));
__entry->level = level;
),
TP_printk("gfn %llx pfn %llx level %d",
__entry->gfn, __entry->pfn, __entry->level
)
);
#endif /* _TRACE_KVMMMU_H */
#undef TRACE_INCLUDE_PATH

View File

@ -540,6 +540,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp,
mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn,
true, true);
kvm_release_pfn_clean(pfn);
return true;
}
@ -619,6 +620,7 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
struct kvm_shadow_walk_iterator it;
unsigned direct_access, access = gw->pt_access;
int top_level, ret;
gfn_t base_gfn;
direct_access = gw->pte_access;
@ -663,35 +665,34 @@ static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr,
link_shadow_page(vcpu, it.sptep, sp);
}
for (;
shadow_walk_okay(&it) && it.level > hlevel;
shadow_walk_next(&it)) {
gfn_t direct_gfn;
base_gfn = gw->gfn;
trace_kvm_mmu_spte_requested(addr, gw->level, pfn);
for (; shadow_walk_okay(&it); shadow_walk_next(&it)) {
clear_sp_write_flooding_count(it.sptep);
base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
if (it.level == hlevel)
break;
validate_direct_spte(vcpu, it.sptep, direct_access);
drop_large_spte(vcpu, it.sptep);
if (is_shadow_present_pte(*it.sptep))
continue;
direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1);
sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1,
true, direct_access);
link_shadow_page(vcpu, it.sptep, sp);
if (!is_shadow_present_pte(*it.sptep)) {
sp = kvm_mmu_get_page(vcpu, base_gfn, addr,
it.level - 1, true, direct_access);
link_shadow_page(vcpu, it.sptep, sp);
}
}
clear_sp_write_flooding_count(it.sptep);
ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault,
it.level, gw->gfn, pfn, prefault, map_writable);
it.level, base_gfn, pfn, prefault, map_writable);
FNAME(pte_prefetch)(vcpu, gw, it.sptep);
++vcpu->stat.pf_fixed;
return ret;
out_gpte_changed:
kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
}
@ -839,6 +840,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
walker.pte_access &= ~ACC_EXEC_MASK;
}
r = RET_PF_RETRY;
spin_lock(&vcpu->kvm->mmu_lock);
if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
goto out_unlock;
@ -847,19 +849,15 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr, u32 error_code,
if (make_mmu_pages_available(vcpu) < 0)
goto out_unlock;
if (!force_pt_level)
transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level);
transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level);
r = FNAME(fetch)(vcpu, addr, &walker, write_fault,
level, pfn, map_writable, prefault);
++vcpu->stat.pf_fixed;
kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT);
spin_unlock(&vcpu->kvm->mmu_lock);
return r;
out_unlock:
spin_unlock(&vcpu->kvm->mmu_lock);
kvm_release_pfn_clean(pfn);
return RET_PF_RETRY;
return r;
}
static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp)

View File

@ -19,6 +19,9 @@
#include "lapic.h"
#include "pmu.h"
/* This keeps the total size of the filter under 4k. */
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63
/* NOTE:
* - Each perf counter is defined as "struct kvm_pmc";
* - There are two types of perf counters: general purpose (gp) and fixed.
@ -141,6 +144,10 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
{
unsigned config, type = PERF_TYPE_RAW;
u8 event_select, unit_mask;
struct kvm *kvm = pmc->vcpu->kvm;
struct kvm_pmu_event_filter *filter;
int i;
bool allow_event = true;
if (eventsel & ARCH_PERFMON_EVENTSEL_PIN_CONTROL)
printk_once("kvm pmu: pin control bit is ignored\n");
@ -152,6 +159,22 @@ void reprogram_gp_counter(struct kvm_pmc *pmc, u64 eventsel)
if (!(eventsel & ARCH_PERFMON_EVENTSEL_ENABLE) || !pmc_is_enabled(pmc))
return;
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
if (filter) {
for (i = 0; i < filter->nevents; i++)
if (filter->events[i] ==
(eventsel & AMD64_RAW_EVENT_MASK_NB))
break;
if (filter->action == KVM_PMU_EVENT_ALLOW &&
i == filter->nevents)
allow_event = false;
if (filter->action == KVM_PMU_EVENT_DENY &&
i < filter->nevents)
allow_event = false;
}
if (!allow_event)
return;
event_select = eventsel & ARCH_PERFMON_EVENTSEL_EVENT;
unit_mask = (eventsel & ARCH_PERFMON_EVENTSEL_UMASK) >> 8;
@ -348,3 +371,43 @@ void kvm_pmu_destroy(struct kvm_vcpu *vcpu)
{
kvm_pmu_reset(vcpu);
}
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
{
struct kvm_pmu_event_filter tmp, *filter;
size_t size;
int r;
if (copy_from_user(&tmp, argp, sizeof(tmp)))
return -EFAULT;
if (tmp.action != KVM_PMU_EVENT_ALLOW &&
tmp.action != KVM_PMU_EVENT_DENY)
return -EINVAL;
if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
return -E2BIG;
size = struct_size(filter, events, tmp.nevents);
filter = kmalloc(size, GFP_KERNEL_ACCOUNT);
if (!filter)
return -ENOMEM;
r = -EFAULT;
if (copy_from_user(filter, argp, size))
goto cleanup;
/* Ensure nevents can't be changed between the user copies. */
*filter = tmp;
mutex_lock(&kvm->lock);
rcu_swap_protected(kvm->arch.pmu_event_filter, filter,
mutex_is_locked(&kvm->lock));
mutex_unlock(&kvm->lock);
synchronize_srcu_expedited(&kvm->srcu);
r = 0;
cleanup:
kfree(filter);
return r;
}

View File

@ -118,6 +118,7 @@ void kvm_pmu_refresh(struct kvm_vcpu *vcpu);
void kvm_pmu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_init(struct kvm_vcpu *vcpu);
void kvm_pmu_destroy(struct kvm_vcpu *vcpu);
int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp);
bool is_vmware_backdoor_pmc(u32 pmc_idx);

View File

@ -364,6 +364,10 @@ static int avic;
module_param(avic, int, S_IRUGO);
#endif
/* enable/disable Next RIP Save */
static int nrips = true;
module_param(nrips, int, 0444);
/* enable/disable Virtual VMLOAD VMSAVE */
static int vls = true;
module_param(vls, int, 0444);
@ -770,7 +774,7 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
{
struct vcpu_svm *svm = to_svm(vcpu);
if (svm->vmcb->control.next_rip != 0) {
if (nrips && svm->vmcb->control.next_rip != 0) {
WARN_ON_ONCE(!static_cpu_has(X86_FEATURE_NRIPS));
svm->next_rip = svm->vmcb->control.next_rip;
}
@ -807,7 +811,7 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu)
kvm_deliver_exception_payload(&svm->vcpu);
if (nr == BP_VECTOR && !static_cpu_has(X86_FEATURE_NRIPS)) {
if (nr == BP_VECTOR && !nrips) {
unsigned long rip, old_rip = kvm_rip_read(&svm->vcpu);
/*
@ -1364,6 +1368,11 @@ static __init int svm_hardware_setup(void)
} else
kvm_disable_tdp();
if (nrips) {
if (!boot_cpu_has(X86_FEATURE_NRIPS))
nrips = false;
}
if (avic) {
if (!npt_enabled ||
!boot_cpu_has(X86_FEATURE_AVIC) ||
@ -3290,7 +3299,7 @@ static int nested_svm_vmexit(struct vcpu_svm *svm)
vmcb->control.exit_int_info_err,
KVM_ISA_SVM);
rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(svm->nested.vmcb), &map);
rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb), &map);
if (rc) {
if (rc == -EINVAL)
kvm_inject_gp(&svm->vcpu, 0);
@ -3580,7 +3589,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
vmcb_gpa = svm->vmcb->save.rax;
rc = kvm_vcpu_map(&svm->vcpu, gfn_to_gpa(vmcb_gpa), &map);
rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb_gpa), &map);
if (rc) {
if (rc == -EINVAL)
kvm_inject_gp(&svm->vcpu, 0);
@ -3935,7 +3944,7 @@ static int rdpmc_interception(struct vcpu_svm *svm)
{
int err;
if (!static_cpu_has(X86_FEATURE_NRIPS))
if (!nrips)
return emulate_on_interception(svm);
err = kvm_rdpmc(&svm->vcpu);
@ -5160,10 +5169,13 @@ static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec)
kvm_lapic_set_irr(vec, vcpu->arch.apic);
smp_mb__after_atomic();
if (avic_vcpu_is_running(vcpu))
wrmsrl(SVM_AVIC_DOORBELL,
kvm_cpu_get_apicid(vcpu->cpu));
else
if (avic_vcpu_is_running(vcpu)) {
int cpuid = vcpu->cpu;
if (cpuid != get_cpu())
wrmsrl(SVM_AVIC_DOORBELL, kvm_cpu_get_apicid(cpuid));
put_cpu();
} else
kvm_vcpu_wake_up(vcpu);
}
@ -5640,6 +5652,10 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
clgi();
kvm_load_guest_xcr0(vcpu);
if (lapic_in_kernel(vcpu) &&
vcpu->arch.apic->lapic_timer.timer_advance_ns)
kvm_wait_lapic_expire(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
* it's non-zero. Since vmentry is serialising on affected CPUs, there
@ -5861,9 +5877,9 @@ svm_patch_hypercall(struct kvm_vcpu *vcpu, unsigned char *hypercall)
hypercall[2] = 0xd9;
}
static void svm_check_processor_compat(void *rtn)
static int __init svm_check_processor_compat(void)
{
*(int *)rtn = 0;
return 0;
}
static bool svm_cpu_has_accelerated_tpr(void)
@ -5875,6 +5891,7 @@ static bool svm_has_emulated_msr(int index)
{
switch (index) {
case MSR_IA32_MCG_EXT_CTL:
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return false;
default:
break;
@ -6162,15 +6179,9 @@ out:
return ret;
}
static void svm_handle_external_intr(struct kvm_vcpu *vcpu)
static void svm_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
local_irq_enable();
/*
* We must have an instruction with interrupts enabled, so
* the timer interrupt isn't delayed by the interrupt shadow.
*/
asm("nop");
local_irq_disable();
}
static void svm_sched_in(struct kvm_vcpu *vcpu, int cpu)
@ -7256,7 +7267,7 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = {
.set_tdp_cr3 = set_tdp_cr3,
.check_intercept = svm_check_intercept,
.handle_external_intr = svm_handle_external_intr,
.handle_exit_irqoff = svm_handle_exit_irqoff,
.request_immediate_exit = __kvm_request_immediate_exit,

View File

@ -1365,7 +1365,7 @@ TRACE_EVENT(kvm_hv_timer_state,
__entry->vcpu_id = vcpu_id;
__entry->hv_timer_in_use = hv_timer_in_use;
),
TP_printk("vcpu_id %x hv_timer %x\n",
TP_printk("vcpu_id %x hv_timer %x",
__entry->vcpu_id,
__entry->hv_timer_in_use)
);

View File

@ -3,6 +3,7 @@
#include <linux/errno.h>
#include <linux/smp.h>
#include "../hyperv.h"
#include "evmcs.h"
#include "vmcs.h"
#include "vmx.h"
@ -313,6 +314,23 @@ void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf)
}
#endif
bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa)
{
struct hv_vp_assist_page assist_page;
*evmcs_gpa = -1ull;
if (unlikely(!kvm_hv_get_assist_page(vcpu, &assist_page)))
return false;
if (unlikely(!assist_page.enlighten_vmentry))
return false;
*evmcs_gpa = assist_page.current_nested_vmcs;
return true;
}
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);

View File

@ -195,6 +195,7 @@ static inline void evmcs_sanitize_exec_ctrls(struct vmcs_config *vmcs_conf) {}
static inline void evmcs_touch_msr_bitmap(void) {}
#endif /* IS_ENABLED(CONFIG_HYPERV) */
bool nested_enlightened_vmentry(struct kvm_vcpu *vcpu, u64 *evmcs_gpa);
uint16_t nested_get_evmcs_version(struct kvm_vcpu *vcpu);
int nested_enable_evmcs(struct kvm_vcpu *vcpu,
uint16_t *vmcs_version);

File diff suppressed because it is too large Load Diff

View File

@ -17,11 +17,11 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry);
bool nested_vmx_exit_reflected(struct kvm_vcpu *vcpu, u32 exit_reason);
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason,
u32 exit_intr_info, unsigned long exit_qualification);
void nested_sync_from_vmcs12(struct kvm_vcpu *vcpu);
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu);
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data);
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata);
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
u32 vmx_instruction_info, bool wr, gva_t *ret);
u32 vmx_instruction_info, bool wr, int len, gva_t *ret);
static inline struct vmcs12 *get_vmcs12(struct kvm_vcpu *vcpu)
{

View File

@ -146,7 +146,6 @@ static __always_inline void vmcs_write64(unsigned long field, u64 value)
__vmcs_writel(field, value);
#ifndef CONFIG_X86_64
asm volatile ("");
__vmcs_writel(field+1, value >> 32);
#endif
}

View File

@ -42,6 +42,14 @@ struct vmcs_host_state {
#endif
};
struct vmcs_controls_shadow {
u32 vm_entry;
u32 vm_exit;
u32 pin;
u32 exec;
u32 secondary_exec;
};
/*
* Track a VMCS that may be loaded on a certain CPU. If it is (cpu!=-1), also
* remember whether it was VMLAUNCHed, and maintain a linked list of all VMCSs
@ -53,7 +61,7 @@ struct loaded_vmcs {
int cpu;
bool launched;
bool nmi_known_unmasked;
bool hv_timer_armed;
bool hv_timer_soft_disabled;
/* Support for vnmi-less CPUs */
int soft_vnmi_blocked;
ktime_t entry_time;
@ -61,6 +69,7 @@ struct loaded_vmcs {
unsigned long *msr_bitmap;
struct list_head loaded_vmcss_on_cpu_link;
struct vmcs_host_state host_state;
struct vmcs_controls_shadow controls_shadow;
};
static inline bool is_exception_n(u32 intr_info, u8 vector)
@ -115,6 +124,12 @@ static inline bool is_nmi(u32 intr_info)
== (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
}
static inline bool is_external_intr(u32 intr_info)
{
return (intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR);
}
enum vmcs_field_width {
VMCS_FIELD_WIDTH_U16 = 0,
VMCS_FIELD_WIDTH_U64 = 1,

View File

@ -395,69 +395,48 @@ static inline short vmcs_field_to_offset(unsigned long field)
#undef ROL16
/*
* Read a vmcs12 field. Since these can have varying lengths and we return
* one type, we chose the biggest type (u64) and zero-extend the return value
* to that size. Note that the caller, handle_vmread, might need to use only
* some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
* 64-bit fields are to be returned).
*/
static inline int vmcs12_read_any(struct vmcs12 *vmcs12,
unsigned long field, u64 *ret)
static inline u64 vmcs12_read_any(struct vmcs12 *vmcs12, unsigned long field,
u16 offset)
{
short offset = vmcs_field_to_offset(field);
char *p;
if (offset < 0)
return offset;
p = (char *)vmcs12 + offset;
char *p = (char *)vmcs12 + offset;
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*ret = *((natural_width *)p);
return 0;
return *((natural_width *)p);
case VMCS_FIELD_WIDTH_U16:
*ret = *((u16 *)p);
return 0;
return *((u16 *)p);
case VMCS_FIELD_WIDTH_U32:
*ret = *((u32 *)p);
return 0;
return *((u32 *)p);
case VMCS_FIELD_WIDTH_U64:
*ret = *((u64 *)p);
return 0;
return *((u64 *)p);
default:
WARN_ON(1);
return -ENOENT;
WARN_ON_ONCE(1);
return -1;
}
}
static inline int vmcs12_write_any(struct vmcs12 *vmcs12,
unsigned long field, u64 field_value){
short offset = vmcs_field_to_offset(field);
static inline void vmcs12_write_any(struct vmcs12 *vmcs12, unsigned long field,
u16 offset, u64 field_value)
{
char *p = (char *)vmcs12 + offset;
if (offset < 0)
return offset;
switch (vmcs_field_width(field)) {
case VMCS_FIELD_WIDTH_U16:
*(u16 *)p = field_value;
return 0;
break;
case VMCS_FIELD_WIDTH_U32:
*(u32 *)p = field_value;
return 0;
break;
case VMCS_FIELD_WIDTH_U64:
*(u64 *)p = field_value;
return 0;
break;
case VMCS_FIELD_WIDTH_NATURAL_WIDTH:
*(natural_width *)p = field_value;
return 0;
break;
default:
WARN_ON(1);
return -ENOENT;
WARN_ON_ONCE(1);
break;
}
}
#endif /* __KVM_X86_VMX_VMCS12_H */

View File

@ -1,8 +1,12 @@
#if !defined(SHADOW_FIELD_RO) && !defined(SHADOW_FIELD_RW)
BUILD_BUG_ON(1)
#endif
#ifndef SHADOW_FIELD_RO
#define SHADOW_FIELD_RO(x)
#define SHADOW_FIELD_RO(x, y)
#endif
#ifndef SHADOW_FIELD_RW
#define SHADOW_FIELD_RW(x)
#define SHADOW_FIELD_RW(x, y)
#endif
/*
@ -28,47 +32,48 @@
*/
/* 16-bits */
SHADOW_FIELD_RW(GUEST_INTR_STATUS)
SHADOW_FIELD_RW(GUEST_PML_INDEX)
SHADOW_FIELD_RW(HOST_FS_SELECTOR)
SHADOW_FIELD_RW(HOST_GS_SELECTOR)
SHADOW_FIELD_RW(GUEST_INTR_STATUS, guest_intr_status)
SHADOW_FIELD_RW(GUEST_PML_INDEX, guest_pml_index)
SHADOW_FIELD_RW(HOST_FS_SELECTOR, host_fs_selector)
SHADOW_FIELD_RW(HOST_GS_SELECTOR, host_gs_selector)
/* 32-bits */
SHADOW_FIELD_RO(VM_EXIT_REASON)
SHADOW_FIELD_RO(VM_EXIT_INTR_INFO)
SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN)
SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD)
SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE)
SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE)
SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL)
SHADOW_FIELD_RW(EXCEPTION_BITMAP)
SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE)
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD)
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN)
SHADOW_FIELD_RW(TPR_THRESHOLD)
SHADOW_FIELD_RW(GUEST_CS_AR_BYTES)
SHADOW_FIELD_RW(GUEST_SS_AR_BYTES)
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO)
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE)
SHADOW_FIELD_RO(VM_EXIT_REASON, vm_exit_reason)
SHADOW_FIELD_RO(VM_EXIT_INTR_INFO, vm_exit_intr_info)
SHADOW_FIELD_RO(VM_EXIT_INSTRUCTION_LEN, vm_exit_instruction_len)
SHADOW_FIELD_RO(IDT_VECTORING_INFO_FIELD, idt_vectoring_info_field)
SHADOW_FIELD_RO(IDT_VECTORING_ERROR_CODE, idt_vectoring_error_code)
SHADOW_FIELD_RO(VM_EXIT_INTR_ERROR_CODE, vm_exit_intr_error_code)
SHADOW_FIELD_RO(GUEST_CS_AR_BYTES, guest_cs_ar_bytes)
SHADOW_FIELD_RO(GUEST_SS_AR_BYTES, guest_ss_ar_bytes)
SHADOW_FIELD_RW(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control)
SHADOW_FIELD_RW(PIN_BASED_VM_EXEC_CONTROL, pin_based_vm_exec_control)
SHADOW_FIELD_RW(EXCEPTION_BITMAP, exception_bitmap)
SHADOW_FIELD_RW(VM_ENTRY_EXCEPTION_ERROR_CODE, vm_entry_exception_error_code)
SHADOW_FIELD_RW(VM_ENTRY_INTR_INFO_FIELD, vm_entry_intr_info_field)
SHADOW_FIELD_RW(VM_ENTRY_INSTRUCTION_LEN, vm_entry_instruction_len)
SHADOW_FIELD_RW(TPR_THRESHOLD, tpr_threshold)
SHADOW_FIELD_RW(GUEST_INTERRUPTIBILITY_INFO, guest_interruptibility_info)
SHADOW_FIELD_RW(VMX_PREEMPTION_TIMER_VALUE, vmx_preemption_timer_value)
/* Natural width */
SHADOW_FIELD_RO(EXIT_QUALIFICATION)
SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS)
SHADOW_FIELD_RW(GUEST_RIP)
SHADOW_FIELD_RW(GUEST_RSP)
SHADOW_FIELD_RW(GUEST_CR0)
SHADOW_FIELD_RW(GUEST_CR3)
SHADOW_FIELD_RW(GUEST_CR4)
SHADOW_FIELD_RW(GUEST_RFLAGS)
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK)
SHADOW_FIELD_RW(CR0_READ_SHADOW)
SHADOW_FIELD_RW(CR4_READ_SHADOW)
SHADOW_FIELD_RW(HOST_FS_BASE)
SHADOW_FIELD_RW(HOST_GS_BASE)
SHADOW_FIELD_RO(EXIT_QUALIFICATION, exit_qualification)
SHADOW_FIELD_RO(GUEST_LINEAR_ADDRESS, guest_linear_address)
SHADOW_FIELD_RW(GUEST_RIP, guest_rip)
SHADOW_FIELD_RW(GUEST_RSP, guest_rsp)
SHADOW_FIELD_RW(GUEST_CR0, guest_cr0)
SHADOW_FIELD_RW(GUEST_CR3, guest_cr3)
SHADOW_FIELD_RW(GUEST_CR4, guest_cr4)
SHADOW_FIELD_RW(GUEST_RFLAGS, guest_rflags)
SHADOW_FIELD_RW(CR0_GUEST_HOST_MASK, cr0_guest_host_mask)
SHADOW_FIELD_RW(CR0_READ_SHADOW, cr0_read_shadow)
SHADOW_FIELD_RW(CR4_READ_SHADOW, cr4_read_shadow)
SHADOW_FIELD_RW(HOST_FS_BASE, host_fs_base)
SHADOW_FIELD_RW(HOST_GS_BASE, host_gs_base)
/* 64-bit */
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS)
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH)
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS, guest_physical_address)
SHADOW_FIELD_RO(GUEST_PHYSICAL_ADDRESS_HIGH, guest_physical_address)
#undef SHADOW_FIELD_RO
#undef SHADOW_FIELD_RW

View File

@ -389,6 +389,7 @@ static const struct kvm_vmx_segment_field {
};
u64 host_efer;
static unsigned long host_idt_base;
/*
* Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
@ -1035,6 +1036,33 @@ static void pt_guest_exit(struct vcpu_vmx *vmx)
wrmsrl(MSR_IA32_RTIT_CTL, vmx->pt_desc.host.ctl);
}
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base)
{
if (unlikely(fs_sel != host->fs_sel)) {
if (!(fs_sel & 7))
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
else
vmcs_write16(HOST_FS_SELECTOR, 0);
host->fs_sel = fs_sel;
}
if (unlikely(gs_sel != host->gs_sel)) {
if (!(gs_sel & 7))
vmcs_write16(HOST_GS_SELECTOR, gs_sel);
else
vmcs_write16(HOST_GS_SELECTOR, 0);
host->gs_sel = gs_sel;
}
if (unlikely(fs_base != host->fs_base)) {
vmcs_writel(HOST_FS_BASE, fs_base);
host->fs_base = fs_base;
}
if (unlikely(gs_base != host->gs_base)) {
vmcs_writel(HOST_GS_BASE, gs_base);
host->gs_base = gs_base;
}
}
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@ -1053,20 +1081,18 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
* when guest state is loaded. This happens when guest transitions
* to/from long-mode by setting MSR_EFER.LMA.
*/
if (!vmx->loaded_cpu_state || vmx->guest_msrs_dirty) {
vmx->guest_msrs_dirty = false;
if (!vmx->guest_msrs_ready) {
vmx->guest_msrs_ready = true;
for (i = 0; i < vmx->save_nmsrs; ++i)
kvm_set_shared_msr(vmx->guest_msrs[i].index,
vmx->guest_msrs[i].data,
vmx->guest_msrs[i].mask);
}
if (vmx->loaded_cpu_state)
if (vmx->guest_state_loaded)
return;
vmx->loaded_cpu_state = vmx->loaded_vmcs;
host_state = &vmx->loaded_cpu_state->host_state;
host_state = &vmx->loaded_vmcs->host_state;
/*
* Set host fs and gs selectors. Unfortunately, 22.2.3 does not
@ -1100,42 +1126,20 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
gs_base = segment_base(gs_sel);
#endif
if (unlikely(fs_sel != host_state->fs_sel)) {
if (!(fs_sel & 7))
vmcs_write16(HOST_FS_SELECTOR, fs_sel);
else
vmcs_write16(HOST_FS_SELECTOR, 0);
host_state->fs_sel = fs_sel;
}
if (unlikely(gs_sel != host_state->gs_sel)) {
if (!(gs_sel & 7))
vmcs_write16(HOST_GS_SELECTOR, gs_sel);
else
vmcs_write16(HOST_GS_SELECTOR, 0);
host_state->gs_sel = gs_sel;
}
if (unlikely(fs_base != host_state->fs_base)) {
vmcs_writel(HOST_FS_BASE, fs_base);
host_state->fs_base = fs_base;
}
if (unlikely(gs_base != host_state->gs_base)) {
vmcs_writel(HOST_GS_BASE, gs_base);
host_state->gs_base = gs_base;
}
vmx_set_host_fs_gs(host_state, fs_sel, gs_sel, fs_base, gs_base);
vmx->guest_state_loaded = true;
}
static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
{
struct vmcs_host_state *host_state;
if (!vmx->loaded_cpu_state)
if (!vmx->guest_state_loaded)
return;
WARN_ON_ONCE(vmx->loaded_cpu_state != vmx->loaded_vmcs);
host_state = &vmx->loaded_cpu_state->host_state;
host_state = &vmx->loaded_vmcs->host_state;
++vmx->vcpu.stat.host_state_reload;
vmx->loaded_cpu_state = NULL;
#ifdef CONFIG_X86_64
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
@ -1161,13 +1165,15 @@ static void vmx_prepare_switch_to_host(struct vcpu_vmx *vmx)
wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base);
#endif
load_fixmap_gdt(raw_smp_processor_id());
vmx->guest_state_loaded = false;
vmx->guest_msrs_ready = false;
}
#ifdef CONFIG_X86_64
static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
{
preempt_disable();
if (vmx->loaded_cpu_state)
if (vmx->guest_state_loaded)
rdmsrl(MSR_KERNEL_GS_BASE, vmx->msr_guest_kernel_gs_base);
preempt_enable();
return vmx->msr_guest_kernel_gs_base;
@ -1176,7 +1182,7 @@ static u64 vmx_read_guest_kernel_gs_base(struct vcpu_vmx *vmx)
static void vmx_write_guest_kernel_gs_base(struct vcpu_vmx *vmx, u64 data)
{
preempt_disable();
if (vmx->loaded_cpu_state)
if (vmx->guest_state_loaded)
wrmsrl(MSR_KERNEL_GS_BASE, data);
preempt_enable();
vmx->msr_guest_kernel_gs_base = data;
@ -1225,11 +1231,7 @@ static void vmx_vcpu_pi_load(struct kvm_vcpu *vcpu, int cpu)
pi_set_on(pi_desc);
}
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
*/
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
bool already_loaded = vmx->loaded_vmcs->cpu == cpu;
@ -1290,8 +1292,20 @@ void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
if (kvm_has_tsc_control &&
vmx->current_tsc_ratio != vcpu->arch.tsc_scaling_ratio)
decache_tsc_multiplier(vmx);
}
/*
* Switches to specified vcpu, until a matching vcpu_put(), but assumes
* vcpu mutex is already taken.
*/
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
vmx_vcpu_load_vmcs(vcpu, cpu);
vmx_vcpu_pi_load(vcpu, cpu);
vmx->host_pkru = read_pkru();
vmx->host_debugctlmsr = get_debugctlmsr();
}
@ -1310,7 +1324,7 @@ static void vmx_vcpu_pi_put(struct kvm_vcpu *vcpu)
pi_set_sn(pi_desc);
}
void vmx_vcpu_put(struct kvm_vcpu *vcpu)
static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
{
vmx_vcpu_pi_put(vcpu);
@ -1579,7 +1593,7 @@ static void setup_msrs(struct vcpu_vmx *vmx)
move_msr_up(vmx, index, save_nmsrs++);
vmx->save_nmsrs = save_nmsrs;
vmx->guest_msrs_dirty = true;
vmx->guest_msrs_ready = false;
if (cpu_has_vmx_msr_bitmap())
vmx_update_msr_bitmap(&vmx->vcpu);
@ -1692,9 +1706,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_IA32_SYSENTER_ESP:
msr_info->data = vmcs_readl(GUEST_SYSENTER_ESP);
break;
case MSR_IA32_POWER_CTL:
msr_info->data = vmx->msr_ia32_power_ctl;
break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@ -1718,7 +1729,10 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return vmx_get_vmx_msr(&vmx->nested.msrs, msr_info->index,
&msr_info->data);
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
if (!vmx_xsaves_supported() ||
(!msr_info->host_initiated &&
!(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
return 1;
msr_info->data = vcpu->arch.ia32_xss;
break;
@ -1817,17 +1831,28 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
break;
#endif
case MSR_IA32_SYSENTER_CS:
if (is_guest_mode(vcpu))
get_vmcs12(vcpu)->guest_sysenter_cs = data;
vmcs_write32(GUEST_SYSENTER_CS, data);
break;
case MSR_IA32_SYSENTER_EIP:
if (is_guest_mode(vcpu))
get_vmcs12(vcpu)->guest_sysenter_eip = data;
vmcs_writel(GUEST_SYSENTER_EIP, data);
break;
case MSR_IA32_SYSENTER_ESP:
if (is_guest_mode(vcpu))
get_vmcs12(vcpu)->guest_sysenter_esp = data;
vmcs_writel(GUEST_SYSENTER_ESP, data);
break;
case MSR_IA32_POWER_CTL:
vmx->msr_ia32_power_ctl = data;
case MSR_IA32_DEBUGCTLMSR:
if (is_guest_mode(vcpu) && get_vmcs12(vcpu)->vm_exit_controls &
VM_EXIT_SAVE_DEBUG_CONTROLS)
get_vmcs12(vcpu)->guest_ia32_debugctl = data;
ret = kvm_set_msr_common(vcpu, msr_info);
break;
case MSR_IA32_BNDCFGS:
if (!kvm_mpx_supported() ||
(!msr_info->host_initiated &&
@ -1896,9 +1921,14 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
MSR_TYPE_W);
break;
case MSR_IA32_CR_PAT:
if (!kvm_pat_valid(data))
return 1;
if (is_guest_mode(vcpu) &&
get_vmcs12(vcpu)->vm_exit_controls & VM_EXIT_SAVE_IA32_PAT)
get_vmcs12(vcpu)->guest_ia32_pat = data;
if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
if (!kvm_pat_valid(data))
return 1;
vmcs_write64(GUEST_IA32_PAT, data);
vcpu->arch.pat = data;
break;
@ -1932,7 +1962,10 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
return vmx_set_vmx_msr(vcpu, msr_index, data);
case MSR_IA32_XSS:
if (!vmx_xsaves_supported())
if (!vmx_xsaves_supported() ||
(!msr_info->host_initiated &&
!(guest_cpuid_has(vcpu, X86_FEATURE_XSAVE) &&
guest_cpuid_has(vcpu, X86_FEATURE_XSAVES))))
return 1;
/*
* The only supported bit as of Skylake is bit 8, but
@ -2435,6 +2468,7 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
return -ENOMEM;
loaded_vmcs->shadow_vmcs = NULL;
loaded_vmcs->hv_timer_soft_disabled = false;
loaded_vmcs_init(loaded_vmcs);
if (cpu_has_vmx_msr_bitmap()) {
@ -2455,6 +2489,8 @@ int alloc_loaded_vmcs(struct loaded_vmcs *loaded_vmcs)
}
memset(&loaded_vmcs->host_state, 0, sizeof(struct vmcs_host_state));
memset(&loaded_vmcs->controls_shadow, 0,
sizeof(struct vmcs_controls_shadow));
return 0;
@ -2737,7 +2773,7 @@ static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
(unsigned long *)&vcpu->arch.regs_dirty))
return;
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
if (is_pae_paging(vcpu)) {
vmcs_write64(GUEST_PDPTR0, mmu->pdptrs[0]);
vmcs_write64(GUEST_PDPTR1, mmu->pdptrs[1]);
vmcs_write64(GUEST_PDPTR2, mmu->pdptrs[2]);
@ -2749,7 +2785,7 @@ void ept_save_pdptrs(struct kvm_vcpu *vcpu)
{
struct kvm_mmu *mmu = vcpu->arch.walk_mmu;
if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
if (is_pae_paging(vcpu)) {
mmu->pdptrs[0] = vmcs_read64(GUEST_PDPTR0);
mmu->pdptrs[1] = vmcs_read64(GUEST_PDPTR1);
mmu->pdptrs[2] = vmcs_read64(GUEST_PDPTR2);
@ -2766,22 +2802,20 @@ static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
unsigned long cr0,
struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!test_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail))
vmx_decache_cr3(vcpu);
if (!(cr0 & X86_CR0_PG)) {
/* From paging/starting to nonpaging */
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) |
(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
exec_controls_setbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
} else if (!is_paging(vcpu)) {
/* From nonpaging to paging */
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
vmcs_read32(CPU_BASED_VM_EXEC_CONTROL) &
~(CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING));
exec_controls_clearbit(vmx, CPU_BASED_CR3_LOAD_EXITING |
CPU_BASED_CR3_STORE_EXITING);
vcpu->arch.cr0 = cr0;
vmx_set_cr4(vcpu, kvm_read_cr4(vcpu));
}
@ -2881,6 +2915,7 @@ void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
/*
* Pass through host's Machine Check Enable value to hw_cr4, which
* is in force while we are in guest mode. Do not let guests control
@ -2891,20 +2926,19 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
hw_cr4 = (cr4_read_shadow() & X86_CR4_MCE) | (cr4 & ~X86_CR4_MCE);
if (enable_unrestricted_guest)
hw_cr4 |= KVM_VM_CR4_ALWAYS_ON_UNRESTRICTED_GUEST;
else if (to_vmx(vcpu)->rmode.vm86_active)
else if (vmx->rmode.vm86_active)
hw_cr4 |= KVM_RMODE_VM_CR4_ALWAYS_ON;
else
hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON;
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) {
if (cr4 & X86_CR4_UMIP) {
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_DESC);
hw_cr4 &= ~X86_CR4_UMIP;
} else if (!is_guest_mode(vcpu) ||
!nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC))
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
SECONDARY_EXEC_DESC);
!nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) {
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_DESC);
}
}
if (cr4 & X86_CR4_VMXE) {
@ -2919,7 +2953,7 @@ int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
return 1;
}
if (to_vmx(vcpu)->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
if (vmx->nested.vmxon && !nested_cr4_valid(vcpu, cr4))
return 1;
vcpu->arch.cr4 = cr4;
@ -3537,7 +3571,7 @@ static u8 vmx_msr_bitmap_mode(struct kvm_vcpu *vcpu)
u8 mode = 0;
if (cpu_has_secondary_exec_ctrls() &&
(vmcs_read32(SECONDARY_VM_EXEC_CONTROL) &
(secondary_exec_controls_get(to_vmx(vcpu)) &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE)) {
mode |= MSR_BITMAP_MODE_X2APIC;
if (enable_apicv && kvm_vcpu_apicv_active(vcpu))
@ -3731,7 +3765,6 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
{
u32 low32, high32;
unsigned long tmpl;
struct desc_ptr dt;
unsigned long cr0, cr3, cr4;
cr0 = read_cr0();
@ -3767,9 +3800,7 @@ void vmx_set_constant_host_state(struct vcpu_vmx *vmx)
vmcs_write16(HOST_SS_SELECTOR, __KERNEL_DS); /* 22.2.4 */
vmcs_write16(HOST_TR_SELECTOR, GDT_ENTRY_TSS*8); /* 22.2.4 */
store_idt(&dt);
vmcs_writel(HOST_IDTR_BASE, dt.address); /* 22.2.4 */
vmx->host_idt_base = dt.address;
vmcs_writel(HOST_IDTR_BASE, host_idt_base); /* 22.2.4 */
vmcs_writel(HOST_RIP, (unsigned long)vmx_vmexit); /* 22.2.5 */
@ -3798,7 +3829,7 @@ void set_cr4_guest_host_mask(struct vcpu_vmx *vmx)
vmcs_writel(CR4_GUEST_HOST_MASK, ~vmx->vcpu.arch.cr4_guest_owned_bits);
}
static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
{
u32 pin_based_exec_ctrl = vmcs_config.pin_based_exec_ctrl;
@ -3808,8 +3839,9 @@ static u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx)
if (!enable_vnmi)
pin_based_exec_ctrl &= ~PIN_BASED_VIRTUAL_NMIS;
/* Enable the preemption timer dynamically */
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
if (!enable_preemption_timer)
pin_based_exec_ctrl &= ~PIN_BASED_VMX_PREEMPTION_TIMER;
return pin_based_exec_ctrl;
}
@ -3817,14 +3849,14 @@ static void vmx_refresh_apicv_exec_ctrl(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
if (cpu_has_secondary_exec_ctrls()) {
if (kvm_vcpu_apicv_active(vcpu))
vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL,
secondary_exec_controls_setbit(vmx,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
else
vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL,
secondary_exec_controls_clearbit(vmx,
SECONDARY_EXEC_APIC_REGISTER_VIRT |
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY);
}
@ -4015,15 +4047,14 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
vmcs_write64(VMCS_LINK_POINTER, -1ull); /* 22.3.1.5 */
/* Control */
vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_ctrl(vmx));
pin_controls_set(vmx, vmx_pin_based_exec_ctrl(vmx));
vmx->hv_deadline_tsc = -1;
vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, vmx_exec_control(vmx));
exec_controls_set(vmx, vmx_exec_control(vmx));
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
vmx->secondary_exec_control);
secondary_exec_controls_set(vmx, vmx->secondary_exec_control);
}
if (kvm_vcpu_apicv_active(&vmx->vcpu)) {
@ -4081,10 +4112,10 @@ static void vmx_vcpu_setup(struct vcpu_vmx *vmx)
++vmx->nmsrs;
}
vm_exit_controls_init(vmx, vmx_vmexit_ctrl());
vm_exit_controls_set(vmx, vmx_vmexit_ctrl());
/* 22.2.1, 20.8.1 */
vm_entry_controls_init(vmx, vmx_vmentry_ctrl());
vm_entry_controls_set(vmx, vmx_vmentry_ctrl());
vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
@ -4208,8 +4239,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
static void enable_irq_window(struct kvm_vcpu *vcpu)
{
vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_INTR_PENDING);
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
}
static void enable_nmi_window(struct kvm_vcpu *vcpu)
@ -4220,8 +4250,7 @@ static void enable_nmi_window(struct kvm_vcpu *vcpu)
return;
}
vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING);
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
}
static void vmx_inject_irq(struct kvm_vcpu *vcpu)
@ -4442,11 +4471,11 @@ static void kvm_machine_check(void)
static int handle_machine_check(struct kvm_vcpu *vcpu)
{
/* already handled by vcpu_run */
/* handled by vmx_vcpu_run() */
return 1;
}
static int handle_exception(struct kvm_vcpu *vcpu)
static int handle_exception_nmi(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
struct kvm_run *kvm_run = vcpu->run;
@ -4458,11 +4487,8 @@ static int handle_exception(struct kvm_vcpu *vcpu)
vect_info = vmx->idt_vectoring_info;
intr_info = vmx->exit_intr_info;
if (is_machine_check(intr_info))
return handle_machine_check(vcpu);
if (is_nmi(intr_info))
return 1; /* already handled by vmx_vcpu_run() */
if (is_machine_check(intr_info) || is_nmi(intr_info))
return 1; /* handled by handle_exception_nmi_irqoff() */
if (is_invalid_opcode(intr_info))
return handle_ud(vcpu);
@ -4518,7 +4544,7 @@ static int handle_exception(struct kvm_vcpu *vcpu)
dr6 = vmcs_readl(EXIT_QUALIFICATION);
if (!(vcpu->guest_debug &
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))) {
vcpu->arch.dr6 &= ~15;
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
if (is_icebp(intr_info))
skip_emulated_instruction(vcpu);
@ -4763,7 +4789,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
vcpu->run->exit_reason = KVM_EXIT_DEBUG;
return 0;
} else {
vcpu->arch.dr6 &= ~15;
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= DR6_BD | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
return 1;
@ -4771,8 +4797,7 @@ static int handle_dr(struct kvm_vcpu *vcpu)
}
if (vcpu->guest_debug == 0) {
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_MOV_DR_EXITING);
exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
/*
* No more DR vmexits; force a reload of the debug registers
@ -4816,7 +4841,7 @@ static void vmx_sync_dirty_debug_regs(struct kvm_vcpu *vcpu)
vcpu->arch.dr7 = vmcs_readl(GUEST_DR7);
vcpu->arch.switch_db_regs &= ~KVM_DEBUGREG_WONT_EXIT;
vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, CPU_BASED_MOV_DR_EXITING);
exec_controls_setbit(to_vmx(vcpu), CPU_BASED_MOV_DR_EXITING);
}
static void vmx_set_dr7(struct kvm_vcpu *vcpu, unsigned long val)
@ -4876,8 +4901,7 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu)
static int handle_interrupt_window(struct kvm_vcpu *vcpu)
{
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_INTR_PENDING);
exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_INTR_PENDING);
kvm_make_request(KVM_REQ_EVENT, vcpu);
@ -5131,8 +5155,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu)
static int handle_nmi_window(struct kvm_vcpu *vcpu)
{
WARN_ON_ONCE(!enable_vnmi);
vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL,
CPU_BASED_VIRTUAL_NMI_PENDING);
exec_controls_clearbit(to_vmx(vcpu), CPU_BASED_VIRTUAL_NMI_PENDING);
++vcpu->stat.nmi_window_exits;
kvm_make_request(KVM_REQ_EVENT, vcpu);
@ -5144,7 +5167,6 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
struct vcpu_vmx *vmx = to_vmx(vcpu);
enum emulation_result err = EMULATE_DONE;
int ret = 1;
u32 cpu_exec_ctrl;
bool intr_window_requested;
unsigned count = 130;
@ -5155,8 +5177,8 @@ static int handle_invalid_guest_state(struct kvm_vcpu *vcpu)
*/
WARN_ON_ONCE(vmx->emulation_required && vmx->nested.nested_run_pending);
cpu_exec_ctrl = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL);
intr_window_requested = cpu_exec_ctrl & CPU_BASED_VIRTUAL_INTR_PENDING;
intr_window_requested = exec_controls_get(vmx) &
CPU_BASED_VIRTUAL_INTR_PENDING;
while (vmx->emulation_required && count-- != 0) {
if (intr_window_requested && vmx_interrupt_allowed(vcpu))
@ -5342,7 +5364,8 @@ static int handle_invpcid(struct kvm_vcpu *vcpu)
* is read even if it isn't needed (e.g., for type==all)
*/
if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
vmx_instruction_info, false, &gva))
vmx_instruction_info, false,
sizeof(operand), &gva))
return 1;
if (kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e)) {
@ -5437,8 +5460,12 @@ static int handle_pml_full(struct kvm_vcpu *vcpu)
static int handle_preemption_timer(struct kvm_vcpu *vcpu)
{
if (!to_vmx(vcpu)->req_immediate_exit)
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (!vmx->req_immediate_exit &&
!unlikely(vmx->loaded_vmcs->hv_timer_soft_disabled))
kvm_lapic_expired_hv_timer(vcpu);
return 1;
}
@ -5469,7 +5496,7 @@ static int handle_encls(struct kvm_vcpu *vcpu)
* to be done to userspace and return 0.
*/
static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu) = {
[EXIT_REASON_EXCEPTION_NMI] = handle_exception,
[EXIT_REASON_EXCEPTION_NMI] = handle_exception_nmi,
[EXIT_REASON_EXTERNAL_INTERRUPT] = handle_external_interrupt,
[EXIT_REASON_TRIPLE_FAULT] = handle_triple_fault,
[EXIT_REASON_NMI_WINDOW] = handle_nmi_window,
@ -5952,6 +5979,7 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 sec_exec_control;
if (!lapic_in_kernel(vcpu))
@ -5963,11 +5991,11 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
/* Postpone execution until vmcs01 is the current VMCS. */
if (is_guest_mode(vcpu)) {
to_vmx(vcpu)->nested.change_vmcs01_virtual_apic_mode = true;
vmx->nested.change_vmcs01_virtual_apic_mode = true;
return;
}
sec_exec_control = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
sec_exec_control = secondary_exec_controls_get(vmx);
sec_exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE);
@ -5989,7 +6017,7 @@ void vmx_set_virtual_apic_mode(struct kvm_vcpu *vcpu)
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE;
break;
}
vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control);
secondary_exec_controls_set(vmx, sec_exec_control);
vmx_update_msr_bitmap(vcpu);
}
@ -6107,76 +6135,81 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir));
}
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
static void handle_exception_nmi_irqoff(struct vcpu_vmx *vmx)
{
u32 exit_intr_info = 0;
u16 basic_exit_reason = (u16)vmx->exit_reason;
if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
|| basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
return;
if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
vmx->exit_intr_info = exit_intr_info;
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
/* if exit due to PF check for async PF */
if (is_page_fault(exit_intr_info))
if (is_page_fault(vmx->exit_intr_info))
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
/* Handle machine checks before interrupts are enabled */
if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
is_machine_check(exit_intr_info))
if (is_machine_check(vmx->exit_intr_info))
kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */
if (is_nmi(exit_intr_info)) {
if (is_nmi(vmx->exit_intr_info)) {
kvm_before_interrupt(&vmx->vcpu);
asm("int $2");
kvm_after_interrupt(&vmx->vcpu);
}
}
static void vmx_handle_external_intr(struct kvm_vcpu *vcpu)
static void handle_external_interrupt_irqoff(struct kvm_vcpu *vcpu)
{
u32 exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
unsigned int vector;
unsigned long entry;
#ifdef CONFIG_X86_64
unsigned long tmp;
#endif
gate_desc *desc;
u32 intr_info;
if ((exit_intr_info & (INTR_INFO_VALID_MASK | INTR_INFO_INTR_TYPE_MASK))
== (INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR)) {
unsigned int vector;
unsigned long entry;
gate_desc *desc;
struct vcpu_vmx *vmx = to_vmx(vcpu);
#ifdef CONFIG_X86_64
unsigned long tmp;
#endif
intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
if (WARN_ONCE(!is_external_intr(intr_info),
"KVM: unexpected VM-Exit interrupt info: 0x%x", intr_info))
return;
vector = exit_intr_info & INTR_INFO_VECTOR_MASK;
desc = (gate_desc *)vmx->host_idt_base + vector;
entry = gate_offset(desc);
asm volatile(
vector = intr_info & INTR_INFO_VECTOR_MASK;
desc = (gate_desc *)host_idt_base + vector;
entry = gate_offset(desc);
kvm_before_interrupt(vcpu);
asm volatile(
#ifdef CONFIG_X86_64
"mov %%" _ASM_SP ", %[sp]\n\t"
"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
"push $%c[ss]\n\t"
"push %[sp]\n\t"
"mov %%" _ASM_SP ", %[sp]\n\t"
"and $0xfffffffffffffff0, %%" _ASM_SP "\n\t"
"push $%c[ss]\n\t"
"push %[sp]\n\t"
#endif
"pushf\n\t"
__ASM_SIZE(push) " $%c[cs]\n\t"
CALL_NOSPEC
:
"pushf\n\t"
__ASM_SIZE(push) " $%c[cs]\n\t"
CALL_NOSPEC
:
#ifdef CONFIG_X86_64
[sp]"=&r"(tmp),
[sp]"=&r"(tmp),
#endif
ASM_CALL_CONSTRAINT
:
THUNK_TARGET(entry),
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
}
ASM_CALL_CONSTRAINT
:
THUNK_TARGET(entry),
[ss]"i"(__KERNEL_DS),
[cs]"i"(__KERNEL_CS)
);
kvm_after_interrupt(vcpu);
}
STACK_FRAME_NON_STANDARD(handle_external_interrupt_irqoff);
static void vmx_handle_exit_irqoff(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
if (vmx->exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT)
handle_external_interrupt_irqoff(vcpu);
else if (vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)
handle_exception_nmi_irqoff(vmx);
}
STACK_FRAME_NON_STANDARD(vmx_handle_external_intr);
static bool vmx_has_emulated_msr(int index)
{
@ -6187,6 +6220,8 @@ static bool vmx_has_emulated_msr(int index)
* real mode.
*/
return enable_unrestricted_guest || emulate_invalid_guest_state;
case MSR_IA32_VMX_BASIC ... MSR_IA32_VMX_VMFUNC:
return nested;
case MSR_AMD64_VIRT_SPEC_CTRL:
/* This is AMD only. */
return false;
@ -6332,15 +6367,6 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
msrs[i].host, false);
}
static void vmx_arm_hv_timer(struct vcpu_vmx *vmx, u32 val)
{
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, val);
if (!vmx->loaded_vmcs->hv_timer_armed)
vmcs_set_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
vmx->loaded_vmcs->hv_timer_armed = true;
}
static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
{
struct vcpu_vmx *vmx = to_vmx(vcpu);
@ -6348,11 +6374,9 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
u32 delta_tsc;
if (vmx->req_immediate_exit) {
vmx_arm_hv_timer(vmx, 0);
return;
}
if (vmx->hv_deadline_tsc != -1) {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, 0);
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
} else if (vmx->hv_deadline_tsc != -1) {
tscl = rdtsc();
if (vmx->hv_deadline_tsc > tscl)
/* set_hv_timer ensures the delta fits in 32-bits */
@ -6361,14 +6385,12 @@ static void vmx_update_hv_timer(struct kvm_vcpu *vcpu)
else
delta_tsc = 0;
vmx_arm_hv_timer(vmx, delta_tsc);
return;
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, delta_tsc);
vmx->loaded_vmcs->hv_timer_soft_disabled = false;
} else if (!vmx->loaded_vmcs->hv_timer_soft_disabled) {
vmcs_write32(VMX_PREEMPTION_TIMER_VALUE, -1);
vmx->loaded_vmcs->hv_timer_soft_disabled = true;
}
if (vmx->loaded_vmcs->hv_timer_armed)
vmcs_clear_bits(PIN_BASED_VM_EXEC_CONTROL,
PIN_BASED_VMX_PREEMPTION_TIMER);
vmx->loaded_vmcs->hv_timer_armed = false;
}
void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp)
@ -6401,8 +6423,8 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmcs_write32(PLE_WINDOW, vmx->ple_window);
}
if (vmx->nested.need_vmcs12_sync)
nested_sync_from_vmcs12(vcpu);
if (vmx->nested.need_vmcs12_to_shadow_sync)
nested_sync_vmcs12_to_shadow(vcpu);
if (test_bit(VCPU_REGS_RSP, (unsigned long *)&vcpu->arch.regs_dirty))
vmcs_writel(GUEST_RSP, vcpu->arch.regs[VCPU_REGS_RSP]);
@ -6440,7 +6462,12 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
atomic_switch_perf_msrs(vmx);
vmx_update_hv_timer(vcpu);
if (enable_preemption_timer)
vmx_update_hv_timer(vcpu);
if (lapic_in_kernel(vcpu) &&
vcpu->arch.apic->lapic_timer.timer_advance_ns)
kvm_wait_lapic_expire(vcpu);
/*
* If this vCPU has touched SPEC_CTRL, restore the guest's value if
@ -6533,13 +6560,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
vmx->idt_vectoring_info = 0;
vmx->exit_reason = vmx->fail ? 0xdead : vmcs_read32(VM_EXIT_REASON);
if ((u16)vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY)
kvm_machine_check();
if (vmx->fail || (vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
return;
vmx->loaded_vmcs->launched = 1;
vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
vmx_complete_atomic_exit(vmx);
vmx_recover_nmi_blocking(vmx);
vmx_complete_interrupts(vmx);
}
@ -6630,6 +6659,12 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_CS, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_ESP, MSR_TYPE_RW);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_IA32_SYSENTER_EIP, MSR_TYPE_RW);
if (kvm_cstate_in_guest(kvm)) {
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C1_RES, MSR_TYPE_R);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C3_RESIDENCY, MSR_TYPE_R);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C6_RESIDENCY, MSR_TYPE_R);
vmx_disable_intercept_for_msr(msr_bitmap, MSR_CORE_C7_RESIDENCY, MSR_TYPE_R);
}
vmx->msr_bitmap_mode = 0;
vmx->loaded_vmcs = &vmx->vmcs01;
@ -6726,22 +6761,22 @@ static int vmx_vm_init(struct kvm *kvm)
return 0;
}
static void __init vmx_check_processor_compat(void *rtn)
static int __init vmx_check_processor_compat(void)
{
struct vmcs_config vmcs_conf;
struct vmx_capability vmx_cap;
*(int *)rtn = 0;
if (setup_vmcs_config(&vmcs_conf, &vmx_cap) < 0)
*(int *)rtn = -EIO;
return -EIO;
if (nested)
nested_vmx_setup_ctls_msrs(&vmcs_conf.nested, vmx_cap.ept,
enable_apicv);
if (memcmp(&vmcs_config, &vmcs_conf, sizeof(struct vmcs_config)) != 0) {
printk(KERN_ERR "kvm: CPU %d feature inconsistency!\n",
smp_processor_id());
*(int *)rtn = -EIO;
return -EIO;
}
return 0;
}
static u64 vmx_get_mt_mask(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio)
@ -6795,7 +6830,7 @@ static int vmx_get_lpage_level(void)
return PT_PDPE_LEVEL;
}
static void vmcs_set_secondary_exec_control(u32 new_ctl)
static void vmcs_set_secondary_exec_control(struct vcpu_vmx *vmx)
{
/*
* These bits in the secondary execution controls field
@ -6809,10 +6844,10 @@ static void vmcs_set_secondary_exec_control(u32 new_ctl)
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
SECONDARY_EXEC_DESC;
u32 cur_ctl = vmcs_read32(SECONDARY_VM_EXEC_CONTROL);
u32 new_ctl = vmx->secondary_exec_control;
u32 cur_ctl = secondary_exec_controls_get(vmx);
vmcs_write32(SECONDARY_VM_EXEC_CONTROL,
(new_ctl & ~mask) | (cur_ctl & mask));
secondary_exec_controls_set(vmx, (new_ctl & ~mask) | (cur_ctl & mask));
}
/*
@ -6950,7 +6985,7 @@ static void vmx_cpuid_update(struct kvm_vcpu *vcpu)
if (cpu_has_secondary_exec_ctrls()) {
vmx_compute_secondary_exec_control(vmx);
vmcs_set_secondary_exec_control(vmx->secondary_exec_control);
vmcs_set_secondary_exec_control(vmx);
}
if (nested_vmx_allowed(vcpu))
@ -7424,10 +7459,14 @@ static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
static __init int hardware_setup(void)
{
unsigned long host_bndcfgs;
struct desc_ptr dt;
int r, i;
rdmsrl_safe(MSR_EFER, &host_efer);
store_idt(&dt);
host_idt_base = dt.address;
for (i = 0; i < ARRAY_SIZE(vmx_msr_index); ++i)
kvm_define_shared_msr(i, vmx_msr_index[i]);
@ -7531,17 +7570,33 @@ static __init int hardware_setup(void)
}
if (!cpu_has_vmx_preemption_timer())
kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
enable_preemption_timer = false;
if (cpu_has_vmx_preemption_timer() && enable_preemption_timer) {
if (enable_preemption_timer) {
u64 use_timer_freq = 5000ULL * 1000 * 1000;
u64 vmx_msr;
rdmsrl(MSR_IA32_VMX_MISC, vmx_msr);
cpu_preemption_timer_multi =
vmx_msr & VMX_MISC_PREEMPTION_TIMER_RATE_MASK;
} else {
if (tsc_khz)
use_timer_freq = (u64)tsc_khz * 1000;
use_timer_freq >>= cpu_preemption_timer_multi;
/*
* KVM "disables" the preemption timer by setting it to its max
* value. Don't use the timer if it might cause spurious exits
* at a rate faster than 0.1 Hz (of uninterrupted guest time).
*/
if (use_timer_freq > 0xffffffffu / 10)
enable_preemption_timer = false;
}
if (!enable_preemption_timer) {
kvm_x86_ops->set_hv_timer = NULL;
kvm_x86_ops->cancel_hv_timer = NULL;
kvm_x86_ops->request_immediate_exit = __kvm_request_immediate_exit;
}
kvm_set_posted_intr_wakeup_handler(wakeup_handler);
@ -7683,7 +7738,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = {
.set_tdp_cr3 = vmx_set_cr3,
.check_intercept = vmx_check_intercept,
.handle_external_intr = vmx_handle_external_intr,
.handle_exit_irqoff = vmx_handle_exit_irqoff,
.mpx_supported = vmx_mpx_supported,
.xsaves_supported = vmx_xsaves_supported,
.umip_emulated = vmx_umip_emulated,

View File

@ -109,13 +109,20 @@ struct nested_vmx {
* to guest memory during VM exit.
*/
struct vmcs12 *cached_shadow_vmcs12;
/*
* Indicates if the shadow vmcs or enlightened vmcs must be updated
* with the data held by struct vmcs12.
*/
bool need_vmcs12_sync;
bool need_vmcs12_to_shadow_sync;
bool dirty_vmcs12;
/*
* Indicates lazily loaded guest state has not yet been decached from
* vmcs02.
*/
bool need_sync_vmcs02_to_vmcs12_rare;
/*
* vmcs02 has been initialized, i.e. state that is constant for
* vmcs02 has been written to the backing VMCS. Initialization
@ -180,14 +187,24 @@ struct vcpu_vmx {
struct kvm_vcpu vcpu;
u8 fail;
u8 msr_bitmap_mode;
/*
* If true, host state has been stored in vmx->loaded_vmcs for
* the CPU registers that only need to be switched when transitioning
* to/from the kernel, and the registers have been loaded with guest
* values. If false, host state is loaded in the CPU registers
* and vmx->loaded_vmcs->host_state is invalid.
*/
bool guest_state_loaded;
u32 exit_intr_info;
u32 idt_vectoring_info;
ulong rflags;
struct shared_msr_entry *guest_msrs;
int nmsrs;
int save_nmsrs;
bool guest_msrs_dirty;
unsigned long host_idt_base;
bool guest_msrs_ready;
#ifdef CONFIG_X86_64
u64 msr_host_kernel_gs_base;
u64 msr_guest_kernel_gs_base;
@ -195,21 +212,15 @@ struct vcpu_vmx {
u64 spec_ctrl;
u32 vm_entry_controls_shadow;
u32 vm_exit_controls_shadow;
u32 secondary_exec_control;
/*
* loaded_vmcs points to the VMCS currently used in this vcpu. For a
* non-nested (L1) guest, it always points to vmcs01. For a nested
* guest (L2), it points to a different VMCS. loaded_cpu_state points
* to the VMCS whose state is loaded into the CPU registers that only
* need to be switched when transitioning to/from the kernel; a NULL
* value indicates that host state is loaded.
* guest (L2), it points to a different VMCS.
*/
struct loaded_vmcs vmcs01;
struct loaded_vmcs *loaded_vmcs;
struct loaded_vmcs *loaded_cpu_state;
struct msr_autoload {
struct vmx_msrs guest;
@ -260,8 +271,6 @@ struct vcpu_vmx {
unsigned long host_debugctlmsr;
u64 msr_ia32_power_ctl;
/*
* Only bits masked by msr_ia32_feature_control_valid_bits can be set in
* msr_ia32_feature_control. FEATURE_CONTROL_LOCKED is always included
@ -292,12 +301,14 @@ struct kvm_vmx {
};
bool nested_vmx_allowed(struct kvm_vcpu *vcpu);
void vmx_vcpu_load_vmcs(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
void vmx_vcpu_put(struct kvm_vcpu *vcpu);
int allocate_vpid(void);
void free_vpid(int vpid);
void vmx_set_constant_host_state(struct vcpu_vmx *vmx);
void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu);
void vmx_set_host_fs_gs(struct vmcs_host_state *host, u16 fs_sel, u16 gs_sel,
unsigned long fs_base, unsigned long gs_base);
int vmx_get_cpl(struct kvm_vcpu *vcpu);
unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu);
void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
@ -376,69 +387,31 @@ static inline u8 vmx_get_rvi(void)
return vmcs_read16(GUEST_INTR_STATUS) & 0xff;
}
static inline void vm_entry_controls_reset_shadow(struct vcpu_vmx *vmx)
{
vmx->vm_entry_controls_shadow = vmcs_read32(VM_ENTRY_CONTROLS);
}
static inline void vm_entry_controls_init(struct vcpu_vmx *vmx, u32 val)
{
vmcs_write32(VM_ENTRY_CONTROLS, val);
vmx->vm_entry_controls_shadow = val;
}
static inline void vm_entry_controls_set(struct vcpu_vmx *vmx, u32 val)
{
if (vmx->vm_entry_controls_shadow != val)
vm_entry_controls_init(vmx, val);
}
static inline u32 vm_entry_controls_get(struct vcpu_vmx *vmx)
{
return vmx->vm_entry_controls_shadow;
}
static inline void vm_entry_controls_setbit(struct vcpu_vmx *vmx, u32 val)
{
vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) | val);
}
static inline void vm_entry_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
{
vm_entry_controls_set(vmx, vm_entry_controls_get(vmx) & ~val);
}
static inline void vm_exit_controls_reset_shadow(struct vcpu_vmx *vmx)
{
vmx->vm_exit_controls_shadow = vmcs_read32(VM_EXIT_CONTROLS);
}
static inline void vm_exit_controls_init(struct vcpu_vmx *vmx, u32 val)
{
vmcs_write32(VM_EXIT_CONTROLS, val);
vmx->vm_exit_controls_shadow = val;
}
static inline void vm_exit_controls_set(struct vcpu_vmx *vmx, u32 val)
{
if (vmx->vm_exit_controls_shadow != val)
vm_exit_controls_init(vmx, val);
}
static inline u32 vm_exit_controls_get(struct vcpu_vmx *vmx)
{
return vmx->vm_exit_controls_shadow;
}
static inline void vm_exit_controls_setbit(struct vcpu_vmx *vmx, u32 val)
{
vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) | val);
}
static inline void vm_exit_controls_clearbit(struct vcpu_vmx *vmx, u32 val)
{
vm_exit_controls_set(vmx, vm_exit_controls_get(vmx) & ~val);
#define BUILD_CONTROLS_SHADOW(lname, uname) \
static inline void lname##_controls_set(struct vcpu_vmx *vmx, u32 val) \
{ \
if (vmx->loaded_vmcs->controls_shadow.lname != val) { \
vmcs_write32(uname, val); \
vmx->loaded_vmcs->controls_shadow.lname = val; \
} \
} \
static inline u32 lname##_controls_get(struct vcpu_vmx *vmx) \
{ \
return vmx->loaded_vmcs->controls_shadow.lname; \
} \
static inline void lname##_controls_setbit(struct vcpu_vmx *vmx, u32 val) \
{ \
lname##_controls_set(vmx, lname##_controls_get(vmx) | val); \
} \
static inline void lname##_controls_clearbit(struct vcpu_vmx *vmx, u32 val) \
{ \
lname##_controls_set(vmx, lname##_controls_get(vmx) & ~val); \
}
BUILD_CONTROLS_SHADOW(vm_entry, VM_ENTRY_CONTROLS)
BUILD_CONTROLS_SHADOW(vm_exit, VM_EXIT_CONTROLS)
BUILD_CONTROLS_SHADOW(pin, PIN_BASED_VM_EXEC_CONTROL)
BUILD_CONTROLS_SHADOW(exec, CPU_BASED_VM_EXEC_CONTROL)
BUILD_CONTROLS_SHADOW(secondary_exec, SECONDARY_VM_EXEC_CONTROL)
static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
{
@ -468,6 +441,7 @@ static inline u32 vmx_vmexit_ctrl(void)
}
u32 vmx_exec_control(struct vcpu_vmx *vmx);
u32 vmx_pin_based_exec_ctrl(struct vcpu_vmx *vmx);
static inline struct kvm_vmx *to_kvm_vmx(struct kvm *kvm)
{

View File

@ -717,7 +717,7 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu)
gfn_t gfn;
int r;
if (is_long_mode(vcpu) || !is_pae(vcpu) || !is_paging(vcpu))
if (!is_pae_paging(vcpu))
return false;
if (!test_bit(VCPU_EXREG_PDPTR,
@ -960,8 +960,8 @@ int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
if (is_long_mode(vcpu) &&
(cr3 & rsvd_bits(cpuid_maxphyaddr(vcpu), 63)))
return 1;
else if (is_pae(vcpu) && is_paging(vcpu) &&
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
else if (is_pae_paging(vcpu) &&
!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
return 1;
kvm_mmu_new_cr3(vcpu, cr3, skip_tlb_flush);
@ -1174,7 +1174,28 @@ static u32 emulated_msrs[] = {
MSR_AMD64_VIRT_SPEC_CTRL,
MSR_IA32_POWER_CTL,
/*
* The following list leaves out MSRs whose values are determined
* by arch/x86/kvm/vmx/nested.c based on CPUID or other MSRs.
* We always support the "true" VMX control MSRs, even if the host
* processor does not, so I am putting these registers here rather
* than in msrs_to_save.
*/
MSR_IA32_VMX_BASIC,
MSR_IA32_VMX_TRUE_PINBASED_CTLS,
MSR_IA32_VMX_TRUE_PROCBASED_CTLS,
MSR_IA32_VMX_TRUE_EXIT_CTLS,
MSR_IA32_VMX_TRUE_ENTRY_CTLS,
MSR_IA32_VMX_MISC,
MSR_IA32_VMX_CR0_FIXED0,
MSR_IA32_VMX_CR4_FIXED0,
MSR_IA32_VMX_VMCS_ENUM,
MSR_IA32_VMX_PROCBASED_CTLS2,
MSR_IA32_VMX_EPT_VPID_CAP,
MSR_IA32_VMX_VMFUNC,
MSR_K7_HWCR,
MSR_KVM_POLL_CONTROL,
};
static unsigned num_emulated_msrs;
@ -1210,11 +1231,12 @@ static u32 msr_based_features[] = {
static unsigned int num_msr_based_features;
u64 kvm_get_arch_capabilities(void)
static u64 kvm_get_arch_capabilities(void)
{
u64 data;
u64 data = 0;
rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data);
if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES))
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, data);
/*
* If we're doing cache flushes (either "always" or "cond")
@ -1230,7 +1252,6 @@ u64 kvm_get_arch_capabilities(void)
return data;
}
EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities);
static int kvm_get_msr_feature(struct kvm_msr_entry *msr)
{
@ -2545,13 +2566,24 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
}
break;
case MSR_IA32_MISC_ENABLE:
vcpu->arch.ia32_misc_enable_msr = data;
if (!kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT) &&
((vcpu->arch.ia32_misc_enable_msr ^ data) & MSR_IA32_MISC_ENABLE_MWAIT)) {
if (!guest_cpuid_has(vcpu, X86_FEATURE_XMM3))
return 1;
vcpu->arch.ia32_misc_enable_msr = data;
kvm_update_cpuid(vcpu);
} else {
vcpu->arch.ia32_misc_enable_msr = data;
}
break;
case MSR_IA32_SMBASE:
if (!msr_info->host_initiated)
return 1;
vcpu->arch.smbase = data;
break;
case MSR_IA32_POWER_CTL:
vcpu->arch.msr_ia32_power_ctl = data;
break;
case MSR_IA32_TSC:
kvm_write_tsc(vcpu, msr_info);
break;
@ -2626,6 +2658,14 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
break;
case MSR_KVM_POLL_CONTROL:
/* only enable bit supported */
if (data & (-1ULL << 1))
return 1;
vcpu->arch.msr_kvm_poll_control = data;
break;
case MSR_IA32_MCG_CTL:
case MSR_IA32_MCG_STATUS:
case MSR_IA32_MC0_CTL ... MSR_IA32_MCx_CTL(KVM_MAX_MCE_BANKS) - 1:
@ -2803,6 +2843,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
return 1;
msr_info->data = vcpu->arch.arch_capabilities;
break;
case MSR_IA32_POWER_CTL:
msr_info->data = vcpu->arch.msr_ia32_power_ctl;
break;
case MSR_IA32_TSC:
msr_info->data = kvm_scale_tsc(vcpu, rdtsc()) + vcpu->arch.tsc_offset;
break;
@ -2875,6 +2918,9 @@ int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
case MSR_KVM_PV_EOI_EN:
msr_info->data = vcpu->arch.pv_eoi.msr_val;
break;
case MSR_KVM_POLL_CONTROL:
msr_info->data = vcpu->arch.msr_kvm_poll_control;
break;
case MSR_IA32_P5_MC_ADDR:
case MSR_IA32_P5_MC_TYPE:
case MSR_IA32_MCG_CAP:
@ -3084,6 +3130,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_SET_BOOT_CPU_ID:
case KVM_CAP_SPLIT_IRQCHIP:
case KVM_CAP_IMMEDIATE_EXIT:
case KVM_CAP_PMU_EVENT_FILTER:
case KVM_CAP_GET_MSR_FEATURES:
case KVM_CAP_MSR_PLATFORM_INFO:
case KVM_CAP_EXCEPTION_PAYLOAD:
@ -3096,7 +3143,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
r = KVM_CLOCK_TSC_STABLE;
break;
case KVM_CAP_X86_DISABLE_EXITS:
r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE;
r |= KVM_X86_DISABLE_EXITS_HLT | KVM_X86_DISABLE_EXITS_PAUSE |
KVM_X86_DISABLE_EXITS_CSTATE;
if(kvm_can_mwait_in_guest())
r |= KVM_X86_DISABLE_EXITS_MWAIT;
break;
@ -4613,6 +4661,8 @@ split_irqchip_unlock:
kvm->arch.hlt_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_PAUSE)
kvm->arch.pause_in_guest = true;
if (cap->args[0] & KVM_X86_DISABLE_EXITS_CSTATE)
kvm->arch.cstate_in_guest = true;
r = 0;
break;
case KVM_CAP_MSR_PLATFORM_INFO:
@ -4927,6 +4977,9 @@ set_identity_unlock:
r = kvm_vm_ioctl_hv_eventfd(kvm, &hvevfd);
break;
}
case KVM_SET_PMU_EVENT_FILTER:
r = kvm_vm_ioctl_set_pmu_event_filter(kvm, argp);
break;
default:
r = -ENOTTY;
}
@ -6379,7 +6432,7 @@ static bool kvm_vcpu_check_breakpoint(struct kvm_vcpu *vcpu, int *r)
vcpu->arch.db);
if (dr6 != 0) {
vcpu->arch.dr6 &= ~15;
vcpu->arch.dr6 &= ~DR_TRAP_BITS;
vcpu->arch.dr6 |= dr6 | DR6_RTM;
kvm_queue_exception(vcpu, DB_VECTOR);
*r = EMULATE_DONE;
@ -6706,7 +6759,7 @@ static void kvm_hyperv_tsc_notifier(void)
struct kvm_vcpu *vcpu;
int cpu;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_make_mclock_inprogress_request(kvm);
@ -6732,7 +6785,7 @@ static void kvm_hyperv_tsc_notifier(void)
spin_unlock(&ka->pvclock_gtod_sync_lock);
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
}
#endif
@ -6783,17 +6836,17 @@ static void __kvmclock_cpufreq_notifier(struct cpufreq_freqs *freq, int cpu)
smp_call_function_single(cpu, tsc_khz_changed, freq, 1);
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
kvm_for_each_vcpu(i, vcpu, kvm) {
if (vcpu->cpu != cpu)
continue;
kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
if (vcpu->cpu != smp_processor_id())
if (vcpu->cpu != raw_smp_processor_id())
send_ipi = 1;
}
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
if (freq->old < freq->new && send_ipi) {
/*
@ -6908,35 +6961,6 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
.handle_intel_pt_intr = kvm_handle_intel_pt_intr,
};
static void kvm_set_mmio_spte_mask(void)
{
u64 mask;
int maxphyaddr = boot_cpu_data.x86_phys_bits;
/*
* Set the reserved bits and the present bit of an paging-structure
* entry to generate page fault with PFER.RSV = 1.
*/
/*
* Mask the uppermost physical address bit, which would be reserved as
* long as the supported physical address width is less than 52.
*/
mask = 1ull << 51;
/* Set the present bit. */
mask |= 1ull;
/*
* If reserved bit is not supported, clear the present bit to disable
* mmio page fault.
*/
if (IS_ENABLED(CONFIG_X86_64) && maxphyaddr == 52)
mask &= ~1ull;
kvm_mmu_set_mmio_spte_mask(mask, mask);
}
#ifdef CONFIG_X86_64
static void pvclock_gtod_update_fn(struct work_struct *work)
{
@ -6945,12 +6969,12 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
struct kvm_vcpu *vcpu;
int i;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list)
kvm_for_each_vcpu(i, vcpu, kvm)
kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
atomic_set(&kvm_guest_has_master_clock, 0);
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
}
static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
@ -7033,8 +7057,6 @@ int kvm_arch_init(void *opaque)
if (r)
goto out_free_percpu;
kvm_set_mmio_spte_mask();
kvm_x86_ops = ops;
kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
@ -7173,6 +7195,23 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu)
kvm_x86_ops->refresh_apicv_exec_ctrl(vcpu);
}
static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
{
struct kvm_vcpu *target = NULL;
struct kvm_apic_map *map;
rcu_read_lock();
map = rcu_dereference(kvm->arch.apic_map);
if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
target = map->phys_map[dest_id]->vcpu;
rcu_read_unlock();
if (target)
kvm_vcpu_yield_to(target);
}
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
{
unsigned long nr, a0, a1, a2, a3, ret;
@ -7219,6 +7258,10 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
case KVM_HC_SEND_IPI:
ret = kvm_pv_send_ipi(vcpu->kvm, a0, a1, a2, a3, op_64_bit);
break;
case KVM_HC_SCHED_YIELD:
kvm_sched_yield(vcpu->kvm, a0);
ret = 0;
break;
default:
ret = -KVM_ENOSYS;
break;
@ -7951,9 +7994,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
}
trace_kvm_entry(vcpu->vcpu_id);
if (lapic_in_kernel(vcpu) &&
vcpu->arch.apic->lapic_timer.timer_advance_ns)
wait_lapic_expire(vcpu);
guest_enter_irqoff();
fpregs_assert_state_consistent();
@ -8002,13 +8042,29 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
vcpu->mode = OUTSIDE_GUEST_MODE;
smp_wmb();
kvm_x86_ops->handle_exit_irqoff(vcpu);
/*
* Consume any pending interrupts, including the possible source of
* VM-Exit on SVM and any ticks that occur between VM-Exit and now.
* An instruction is required after local_irq_enable() to fully unblock
* interrupts on processors that implement an interrupt shadow, the
* stat.exits increment will do nicely.
*/
kvm_before_interrupt(vcpu);
kvm_x86_ops->handle_external_intr(vcpu);
local_irq_enable();
++vcpu->stat.exits;
local_irq_disable();
kvm_after_interrupt(vcpu);
++vcpu->stat.exits;
guest_exit_irqoff();
if (lapic_in_kernel(vcpu)) {
s64 delta = vcpu->arch.apic->lapic_timer.advance_expire_delta;
if (delta != S64_MIN) {
trace_kvm_wait_lapic_expire(vcpu->vcpu_id, delta);
vcpu->arch.apic->lapic_timer.advance_expire_delta = S64_MIN;
}
}
local_irq_enable();
preempt_enable();
@ -8594,7 +8650,7 @@ static int __set_sregs(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
kvm_update_cpuid(vcpu);
idx = srcu_read_lock(&vcpu->kvm->srcu);
if (!is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu)) {
if (is_pae_paging(vcpu)) {
load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
mmu_reset_needed = 1;
}
@ -8875,6 +8931,10 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
msr.host_initiated = true;
kvm_write_tsc(vcpu, &msr);
vcpu_put(vcpu);
/* poll control enabled by default */
vcpu->arch.msr_kvm_poll_control = 1;
mutex_unlock(&vcpu->mutex);
if (!kvmclock_periodic_sync)
@ -9107,9 +9167,9 @@ void kvm_arch_hardware_unsetup(void)
kvm_x86_ops->hardware_unsetup();
}
void kvm_arch_check_processor_compat(void *rtn)
int kvm_arch_check_processor_compat(void)
{
kvm_x86_ops->check_processor_compatibility(rtn);
return kvm_x86_ops->check_processor_compatibility();
}
bool kvm_vcpu_is_reset_bsp(struct kvm_vcpu *vcpu)
@ -9381,6 +9441,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
kvm_ioapic_destroy(kvm);
kvm_free_vcpus(kvm);
kvfree(rcu_dereference_check(kvm->arch.apic_map, 1));
kfree(srcu_dereference_check(kvm->arch.pmu_event_filter, &kvm->srcu, 1));
kvm_mmu_uninit_vm(kvm);
kvm_page_track_cleanup(kvm);
kvm_hv_destroy_vm(kvm);
@ -9789,6 +9850,36 @@ static int apf_get_user(struct kvm_vcpu *vcpu, u32 *val)
sizeof(u32));
}
static bool kvm_can_deliver_async_pf(struct kvm_vcpu *vcpu)
{
if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
return false;
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
(vcpu->arch.apf.send_user_only &&
kvm_x86_ops->get_cpl(vcpu) == 0))
return false;
return true;
}
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
{
if (unlikely(!lapic_in_kernel(vcpu) ||
kvm_event_needs_reinjection(vcpu) ||
vcpu->arch.exception.pending))
return false;
if (kvm_hlt_in_guest(vcpu->kvm) && !kvm_can_deliver_async_pf(vcpu))
return false;
/*
* If interrupts are off we cannot even use an artificial
* halt state.
*/
return kvm_x86_ops->interrupt_allowed(vcpu);
}
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
struct kvm_async_pf *work)
{
@ -9797,11 +9888,8 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
trace_kvm_async_pf_not_present(work->arch.token, work->gva);
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
if (!(vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) ||
(vcpu->arch.apf.send_user_only &&
kvm_x86_ops->get_cpl(vcpu) == 0))
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
else if (!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
if (kvm_can_deliver_async_pf(vcpu) &&
!apf_put_user(vcpu, KVM_PV_REASON_PAGE_NOT_PRESENT)) {
fault.vector = PF_VECTOR;
fault.error_code_valid = true;
fault.error_code = 0;
@ -9809,6 +9897,16 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
fault.address = work->arch.token;
fault.async_page_fault = true;
kvm_inject_page_fault(vcpu, &fault);
} else {
/*
* It is not possible to deliver a paravirtualized asynchronous
* page fault, but putting the guest in an artificial halt state
* can be beneficial nevertheless: if an interrupt arrives, we
* can deliver it timely and perhaps the guest will schedule
* another process. When the instruction that triggered a page
* fault is retried, hopefully the page will be ready in the host.
*/
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
}
}
@ -9949,6 +10047,13 @@ bool kvm_vector_hashing_enabled(void)
}
EXPORT_SYMBOL_GPL(kvm_vector_hashing_enabled);
bool kvm_arch_no_poll(struct kvm_vcpu *vcpu)
{
return (vcpu->arch.msr_kvm_poll_control & 1) == 0;
}
EXPORT_SYMBOL_GPL(kvm_arch_no_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_fast_mmio);
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);

View File

@ -139,6 +139,11 @@ static inline int is_paging(struct kvm_vcpu *vcpu)
return likely(kvm_read_cr0_bits(vcpu, X86_CR0_PG));
}
static inline bool is_pae_paging(struct kvm_vcpu *vcpu)
{
return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu);
}
static inline u32 bit(int bitno)
{
return 1 << (bitno & 31);
@ -333,6 +338,11 @@ static inline bool kvm_pause_in_guest(struct kvm *kvm)
return kvm->arch.pause_in_guest;
}
static inline bool kvm_cstate_in_guest(struct kvm *kvm)
{
return kvm->arch.cstate_in_guest;
}
DECLARE_PER_CPU(struct kvm_vcpu *, current_vcpu);
static inline void kvm_before_interrupt(struct kvm_vcpu *vcpu)

View File

@ -11,18 +11,19 @@
#include <asm/perf_event.h>
#define ARMV8_PMU_CYCLE_IDX (ARMV8_PMU_MAX_COUNTERS - 1)
#define ARMV8_PMU_MAX_COUNTER_PAIRS ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
#ifdef CONFIG_KVM_ARM_PMU
struct kvm_pmc {
u8 idx; /* index into the pmu->pmc array */
struct perf_event *perf_event;
u64 bitmask;
};
struct kvm_pmu {
int irq_num;
struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS];
DECLARE_BITMAP(chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
bool ready;
bool created;
bool irq_level;
@ -35,8 +36,8 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val);
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu);
void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu);
void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu);
void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val);
void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu);
void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu);
bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu);
@ -72,8 +73,8 @@ static inline u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
}
static inline void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu) {}
static inline void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu) {}
static inline void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val) {}
static inline void kvm_pmu_flush_hwstate(struct kvm_vcpu *vcpu) {}
static inline void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu) {}
static inline bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu)

View File

@ -159,7 +159,7 @@ static inline bool is_error_page(struct page *page)
extern struct kmem_cache *kvm_vcpu_cache;
extern spinlock_t kvm_lock;
extern struct mutex kvm_lock;
extern struct list_head vm_list;
struct kvm_io_range {
@ -867,7 +867,7 @@ int kvm_arch_hardware_enable(void);
void kvm_arch_hardware_disable(void);
int kvm_arch_hardware_setup(void);
void kvm_arch_hardware_unsetup(void);
void kvm_arch_check_processor_compat(void *rtn);
int kvm_arch_check_processor_compat(void);
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu);
bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu);
int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu);
@ -990,6 +990,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
struct kvm_irq_ack_notifier *kian);
int kvm_request_irq_source_id(struct kvm *kvm);
void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
bool kvm_arch_irqfd_allowed(struct kvm *kvm, struct kvm_irqfd *args);
/*
* search_memslots() and __gfn_to_memslot() are here because they are

View File

@ -696,9 +696,11 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
KVM_X86_DISABLE_EXITS_HLT | \
KVM_X86_DISABLE_EXITS_PAUSE)
KVM_X86_DISABLE_EXITS_PAUSE | \
KVM_X86_DISABLE_EXITS_CSTATE)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {
@ -993,6 +995,7 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_ARM_SVE 170
#define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
#define KVM_CAP_ARM_PTRAUTH_GENERIC 172
#define KVM_CAP_PMU_EVENT_FILTER 173
#ifdef KVM_CAP_IRQ_ROUTING
@ -1327,6 +1330,8 @@ struct kvm_s390_ucas_mapping {
#define KVM_PPC_GET_RMMU_INFO _IOW(KVMIO, 0xb0, struct kvm_ppc_rmmu_info)
/* Available with KVM_CAP_PPC_GET_CPU_CHAR */
#define KVM_PPC_GET_CPU_CHAR _IOR(KVMIO, 0xb1, struct kvm_ppc_cpu_char)
/* Available with KVM_CAP_PMU_EVENT_FILTER */
#define KVM_SET_PMU_EVENT_FILTER _IOW(KVMIO, 0xb2, struct kvm_pmu_event_filter)
/* ioctl for vm fd */
#define KVM_CREATE_DEVICE _IOWR(KVMIO, 0xe0, struct kvm_create_device)

View File

@ -28,6 +28,7 @@
#define KVM_HC_MIPS_CONSOLE_OUTPUT 8
#define KVM_HC_CLOCK_PAIRING 9
#define KVM_HC_SEND_IPI 10
#define KVM_HC_SCHED_YIELD 11
/*
* hypercalls use architecture specific

View File

@ -696,9 +696,11 @@ struct kvm_ioeventfd {
#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0)
#define KVM_X86_DISABLE_EXITS_HLT (1 << 1)
#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2)
#define KVM_X86_DISABLE_EXITS_CSTATE (1 << 3)
#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \
KVM_X86_DISABLE_EXITS_HLT | \
KVM_X86_DISABLE_EXITS_PAUSE)
KVM_X86_DISABLE_EXITS_PAUSE | \
KVM_X86_DISABLE_EXITS_CSTATE)
/* for KVM_ENABLE_CAP */
struct kvm_enable_cap {

View File

@ -121,7 +121,6 @@ static void *vcpu_worker(void *data)
uint64_t *guest_array;
uint64_t pages_count = 0;
struct kvm_run *run;
struct ucall uc;
run = vcpu_state(vm, VCPU_ID);
@ -132,7 +131,7 @@ static void *vcpu_worker(void *data)
/* Let the guest dirty the random pages */
ret = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
if (get_ucall(vm, VCPU_ID, &uc) == UCALL_SYNC) {
if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
pages_count += TEST_PAGES_PER_LOOP;
generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
} else {

View File

@ -52,4 +52,8 @@ static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint
vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
}
void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init);
void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_init *init, void *guest_code);
#endif /* SELFTEST_KVM_PROCESSOR_H */

View File

@ -86,8 +86,7 @@ int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
void *arg);
void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
int gdt_memslot);
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
uint32_t data_memslot, uint32_t pgd_memslot);
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,

View File

@ -235,28 +235,21 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
return vm;
}
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
{
size_t stack_size = vm->page_size == 4096 ?
DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
vm_vcpu_add(vm, vcpuid, 0, 0);
set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
}
void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
{
struct kvm_vcpu_init init;
struct kvm_vcpu_init default_init = { .target = -1, };
uint64_t sctlr_el1, tcr_el1;
memset(&init, 0, sizeof(init));
init.target = KVM_ARM_TARGET_GENERIC_V8;
vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, &init);
if (!init)
init = &default_init;
if (init->target == -1) {
struct kvm_vcpu_init preferred;
vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &preferred);
init->target = preferred.target;
}
vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, init);
/*
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
@ -316,3 +309,24 @@ void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
indent, "", pstate, pc);
}
void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_init *init, void *guest_code)
{
size_t stack_size = vm->page_size == 4096 ?
DEFAULT_STACK_PGS * vm->page_size :
vm->page_size;
uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
vm_vcpu_add(vm, vcpuid);
aarch64_vcpu_setup(vm, vcpuid, init);
set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
}
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
}

View File

@ -763,11 +763,10 @@ static int vcpu_mmap_sz(void)
*
* Return: None
*
* Creates and adds to the VM specified by vm and virtual CPU with
* the ID given by vcpuid.
* Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
* No additional VCPU setup is done.
*/
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
int gdt_memslot)
void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
{
struct vcpu *vcpu;
@ -801,8 +800,6 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid, int pgd_memslot,
vm->vcpu_head->prev = vcpu;
vcpu->next = vm->vcpu_head;
vm->vcpu_head = vcpu;
vcpu_setup(vm, vcpuid, pgd_memslot, gdt_memslot);
}
/*

View File

@ -64,8 +64,6 @@ struct kvm_vm {
};
struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot,
int gdt_memslot);
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);

View File

@ -125,16 +125,16 @@ void ucall(uint64_t cmd, int nargs, ...)
uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
{
struct kvm_run *run = vcpu_state(vm, vcpu_id);
memset(uc, 0, sizeof(*uc));
struct ucall ucall = {};
bool got_ucall = false;
#ifdef __x86_64__
if (ucall_type == UCALL_PIO && run->exit_reason == KVM_EXIT_IO &&
run->io.port == UCALL_PIO_PORT) {
struct kvm_regs regs;
vcpu_regs_get(vm, vcpu_id, &regs);
memcpy(uc, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(*uc));
return uc->cmd;
memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi), sizeof(ucall));
got_ucall = true;
}
#endif
if (ucall_type == UCALL_MMIO && run->exit_reason == KVM_EXIT_MMIO &&
@ -143,8 +143,15 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
"Unexpected ucall exit mmio address access");
memcpy(&gva, run->mmio.data, sizeof(gva));
memcpy(uc, addr_gva2hva(vm, gva), sizeof(*uc));
memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
got_ucall = true;
}
return uc->cmd;
if (got_ucall) {
vcpu_run_complete_io(vm, vcpu_id);
if (uc)
memcpy(uc, &ucall, sizeof(ucall));
}
return ucall.cmd;
}

View File

@ -609,7 +609,7 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
kvm_seg_fill_gdt_64bit(vm, segp);
}
void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
{
struct kvm_sregs sregs;
@ -655,7 +655,8 @@ void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
/* Create VCPU */
vm_vcpu_add(vm, vcpuid, 0, 0);
vm_vcpu_add(vm, vcpuid);
vcpu_setup(vm, vcpuid, 0, 0);
/* Setup guest general purpose registers */
vcpu_regs_get(vm, vcpuid, &regs);

View File

@ -144,7 +144,7 @@ int main(int argc, char *argv[])
/* Restore state in a new VM. */
kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID, 0, 0);
vm_vcpu_add(vm, VCPU_ID);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_ioctl(vm, VCPU_ID, KVM_ENABLE_CAP, &enable_evmcs_cap);
vcpu_load_state(vm, VCPU_ID, state);

View File

@ -33,7 +33,7 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
int vcpu_id = first_vcpu_id + i;
/* This asserts that the vCPU was created. */
vm_vcpu_add(vm, vcpu_id, 0, 0);
vm_vcpu_add(vm, vcpu_id);
}
kvm_vm_free(vm);

View File

@ -144,7 +144,7 @@ int main(int argc, char *argv[])
state = vcpu_save_state(vm, VCPU_ID);
kvm_vm_release(vm);
kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID, 0, 0);
vm_vcpu_add(vm, VCPU_ID);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);

View File

@ -176,7 +176,7 @@ int main(int argc, char *argv[])
/* Restore state in a new VM. */
kvm_vm_restart(vm, O_RDWR);
vm_vcpu_add(vm, VCPU_ID, 0, 0);
vm_vcpu_add(vm, VCPU_ID);
vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
vcpu_load_state(vm, VCPU_ID, state);
run = vcpu_state(vm, VCPU_ID);

View File

@ -237,10 +237,10 @@ static bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx)
switch (index) {
case TIMER_VTIMER:
cnt_ctl = read_sysreg_el0(cntv_ctl);
cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
cnt_ctl = read_sysreg_el0(cntp_ctl);
cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
/* GCC is braindead */
@ -350,20 +350,20 @@ static void timer_save_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
ctx->cnt_ctl = read_sysreg_el0(cntv_ctl);
ctx->cnt_cval = read_sysreg_el0(cntv_cval);
ctx->cnt_ctl = read_sysreg_el0(SYS_CNTV_CTL);
ctx->cnt_cval = read_sysreg_el0(SYS_CNTV_CVAL);
/* Disable the timer */
write_sysreg_el0(0, cntv_ctl);
write_sysreg_el0(0, SYS_CNTV_CTL);
isb();
break;
case TIMER_PTIMER:
ctx->cnt_ctl = read_sysreg_el0(cntp_ctl);
ctx->cnt_cval = read_sysreg_el0(cntp_cval);
ctx->cnt_ctl = read_sysreg_el0(SYS_CNTP_CTL);
ctx->cnt_cval = read_sysreg_el0(SYS_CNTP_CVAL);
/* Disable the timer */
write_sysreg_el0(0, cntp_ctl);
write_sysreg_el0(0, SYS_CNTP_CTL);
isb();
break;
@ -429,14 +429,14 @@ static void timer_restore_state(struct arch_timer_context *ctx)
switch (index) {
case TIMER_VTIMER:
write_sysreg_el0(ctx->cnt_cval, cntv_cval);
write_sysreg_el0(ctx->cnt_cval, SYS_CNTV_CVAL);
isb();
write_sysreg_el0(ctx->cnt_ctl, cntv_ctl);
write_sysreg_el0(ctx->cnt_ctl, SYS_CNTV_CTL);
break;
case TIMER_PTIMER:
write_sysreg_el0(ctx->cnt_cval, cntp_cval);
write_sysreg_el0(ctx->cnt_cval, SYS_CNTP_CVAL);
isb();
write_sysreg_el0(ctx->cnt_ctl, cntp_ctl);
write_sysreg_el0(ctx->cnt_ctl, SYS_CNTP_CTL);
break;
case NR_KVM_TIMERS:
BUG();

View File

@ -93,9 +93,9 @@ int kvm_arch_hardware_setup(void)
return 0;
}
void kvm_arch_check_processor_compat(void *rtn)
int kvm_arch_check_processor_compat(void)
{
*(int *)rtn = 0;
return 0;
}
@ -1332,6 +1332,8 @@ static void cpu_hyp_reset(void)
static void cpu_hyp_reinit(void)
{
kvm_init_host_cpu_context(&this_cpu_ptr(&kvm_host_data)->host_ctxt);
cpu_hyp_reset();
if (is_kernel_in_hyp_mode())
@ -1569,7 +1571,6 @@ static int init_hyp_mode(void)
kvm_host_data_t *cpu_data;
cpu_data = per_cpu_ptr(&kvm_host_data, cpu);
kvm_init_host_cpu_context(&cpu_data->host_ctxt, cpu);
err = create_hyp_mappings(cpu_data, cpu_data + 1, PAGE_HYP);
if (err) {

View File

@ -13,6 +13,123 @@
#include <kvm/arm_pmu.h>
#include <kvm/arm_vgic.h>
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx);
#define PERF_ATTR_CFG1_KVM_PMU_CHAINED 0x1
/**
* kvm_pmu_idx_is_64bit - determine if select_idx is a 64bit counter
* @vcpu: The vcpu pointer
* @select_idx: The counter index
*/
static bool kvm_pmu_idx_is_64bit(struct kvm_vcpu *vcpu, u64 select_idx)
{
return (select_idx == ARMV8_PMU_CYCLE_IDX &&
__vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_LC);
}
static struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu;
struct kvm_vcpu_arch *vcpu_arch;
pmc -= pmc->idx;
pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
return container_of(vcpu_arch, struct kvm_vcpu, arch);
}
/**
* kvm_pmu_pmc_is_chained - determine if the pmc is chained
* @pmc: The PMU counter pointer
*/
static bool kvm_pmu_pmc_is_chained(struct kvm_pmc *pmc)
{
struct kvm_vcpu *vcpu = kvm_pmc_to_vcpu(pmc);
return test_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
}
/**
* kvm_pmu_idx_is_high_counter - determine if select_idx is a high/low counter
* @select_idx: The counter index
*/
static bool kvm_pmu_idx_is_high_counter(u64 select_idx)
{
return select_idx & 0x1;
}
/**
* kvm_pmu_get_canonical_pmc - obtain the canonical pmc
* @pmc: The PMU counter pointer
*
* When a pair of PMCs are chained together we use the low counter (canonical)
* to hold the underlying perf event.
*/
static struct kvm_pmc *kvm_pmu_get_canonical_pmc(struct kvm_pmc *pmc)
{
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(pmc->idx))
return pmc - 1;
return pmc;
}
/**
* kvm_pmu_idx_has_chain_evtype - determine if the event type is chain
* @vcpu: The vcpu pointer
* @select_idx: The counter index
*/
static bool kvm_pmu_idx_has_chain_evtype(struct kvm_vcpu *vcpu, u64 select_idx)
{
u64 eventsel, reg;
select_idx |= 0x1;
if (select_idx == ARMV8_PMU_CYCLE_IDX)
return false;
reg = PMEVTYPER0_EL0 + select_idx;
eventsel = __vcpu_sys_reg(vcpu, reg) & ARMV8_PMU_EVTYPE_EVENT;
return eventsel == ARMV8_PMUV3_PERFCTR_CHAIN;
}
/**
* kvm_pmu_get_pair_counter_value - get PMU counter value
* @vcpu: The vcpu pointer
* @pmc: The PMU counter pointer
*/
static u64 kvm_pmu_get_pair_counter_value(struct kvm_vcpu *vcpu,
struct kvm_pmc *pmc)
{
u64 counter, counter_high, reg, enabled, running;
if (kvm_pmu_pmc_is_chained(pmc)) {
pmc = kvm_pmu_get_canonical_pmc(pmc);
reg = PMEVCNTR0_EL0 + pmc->idx;
counter = __vcpu_sys_reg(vcpu, reg);
counter_high = __vcpu_sys_reg(vcpu, reg + 1);
counter = lower_32_bits(counter) | (counter_high << 32);
} else {
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
counter = __vcpu_sys_reg(vcpu, reg);
}
/*
* The real counter value is equal to the value of counter register plus
* the value perf event counts.
*/
if (pmc->perf_event)
counter += perf_event_read_value(pmc->perf_event, &enabled,
&running);
return counter;
}
/**
* kvm_pmu_get_counter_value - get PMU counter value
* @vcpu: The vcpu pointer
@ -20,22 +137,20 @@
*/
u64 kvm_pmu_get_counter_value(struct kvm_vcpu *vcpu, u64 select_idx)
{
u64 counter, reg, enabled, running;
u64 counter;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
counter = __vcpu_sys_reg(vcpu, reg);
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
/* The real counter value is equal to the value of counter register plus
* the value perf event counts.
*/
if (pmc->perf_event)
counter += perf_event_read_value(pmc->perf_event, &enabled,
&running);
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(select_idx))
counter = upper_32_bits(counter);
return counter & pmc->bitmask;
else if (!kvm_pmu_idx_is_64bit(vcpu, select_idx))
counter = lower_32_bits(counter);
return counter;
}
/**
@ -51,6 +166,23 @@ void kvm_pmu_set_counter_value(struct kvm_vcpu *vcpu, u64 select_idx, u64 val)
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + select_idx;
__vcpu_sys_reg(vcpu, reg) += (s64)val - kvm_pmu_get_counter_value(vcpu, select_idx);
/* Recreate the perf event to reflect the updated sample_period */
kvm_pmu_create_perf_event(vcpu, select_idx);
}
/**
* kvm_pmu_release_perf_event - remove the perf event
* @pmc: The PMU counter pointer
*/
static void kvm_pmu_release_perf_event(struct kvm_pmc *pmc)
{
pmc = kvm_pmu_get_canonical_pmc(pmc);
if (pmc->perf_event) {
perf_event_disable(pmc->perf_event);
perf_event_release_kernel(pmc->perf_event);
pmc->perf_event = NULL;
}
}
/**
@ -63,15 +195,23 @@ static void kvm_pmu_stop_counter(struct kvm_vcpu *vcpu, struct kvm_pmc *pmc)
{
u64 counter, reg;
if (pmc->perf_event) {
counter = kvm_pmu_get_counter_value(vcpu, pmc->idx);
pmc = kvm_pmu_get_canonical_pmc(pmc);
if (!pmc->perf_event)
return;
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
if (kvm_pmu_pmc_is_chained(pmc)) {
reg = PMEVCNTR0_EL0 + pmc->idx;
__vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
__vcpu_sys_reg(vcpu, reg + 1) = upper_32_bits(counter);
} else {
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
? PMCCNTR_EL0 : PMEVCNTR0_EL0 + pmc->idx;
__vcpu_sys_reg(vcpu, reg) = counter;
perf_event_disable(pmc->perf_event);
perf_event_release_kernel(pmc->perf_event);
pmc->perf_event = NULL;
__vcpu_sys_reg(vcpu, reg) = lower_32_bits(counter);
}
kvm_pmu_release_perf_event(pmc);
}
/**
@ -87,8 +227,9 @@ void kvm_pmu_vcpu_reset(struct kvm_vcpu *vcpu)
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
kvm_pmu_stop_counter(vcpu, &pmu->pmc[i]);
pmu->pmc[i].idx = i;
pmu->pmc[i].bitmask = 0xffffffffUL;
}
bitmap_zero(vcpu->arch.pmu.chained, ARMV8_PMU_MAX_COUNTER_PAIRS);
}
/**
@ -101,15 +242,8 @@ void kvm_pmu_vcpu_destroy(struct kvm_vcpu *vcpu)
int i;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++) {
struct kvm_pmc *pmc = &pmu->pmc[i];
if (pmc->perf_event) {
perf_event_disable(pmc->perf_event);
perf_event_release_kernel(pmc->perf_event);
pmc->perf_event = NULL;
}
}
for (i = 0; i < ARMV8_PMU_MAX_COUNTERS; i++)
kvm_pmu_release_perf_event(&pmu->pmc[i]);
}
u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
@ -124,13 +258,13 @@ u64 kvm_pmu_valid_counter_mask(struct kvm_vcpu *vcpu)
}
/**
* kvm_pmu_enable_counter - enable selected PMU counter
* kvm_pmu_enable_counter_mask - enable selected PMU counters
* @vcpu: The vcpu pointer
* @val: the value guest writes to PMCNTENSET register
*
* Call perf_event_enable to start counting the perf event
*/
void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
void kvm_pmu_enable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
{
int i;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
@ -144,6 +278,18 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
continue;
pmc = &pmu->pmc[i];
/*
* For high counters of chained events we must recreate the
* perf event with the long (64bit) attribute set.
*/
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(i)) {
kvm_pmu_create_perf_event(vcpu, i);
continue;
}
/* At this point, pmc must be the canonical */
if (pmc->perf_event) {
perf_event_enable(pmc->perf_event);
if (pmc->perf_event->state != PERF_EVENT_STATE_ACTIVE)
@ -153,13 +299,13 @@ void kvm_pmu_enable_counter(struct kvm_vcpu *vcpu, u64 val)
}
/**
* kvm_pmu_disable_counter - disable selected PMU counter
* kvm_pmu_disable_counter_mask - disable selected PMU counters
* @vcpu: The vcpu pointer
* @val: the value guest writes to PMCNTENCLR register
*
* Call perf_event_disable to stop counting the perf event
*/
void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val)
void kvm_pmu_disable_counter_mask(struct kvm_vcpu *vcpu, u64 val)
{
int i;
struct kvm_pmu *pmu = &vcpu->arch.pmu;
@ -173,6 +319,18 @@ void kvm_pmu_disable_counter(struct kvm_vcpu *vcpu, u64 val)
continue;
pmc = &pmu->pmc[i];
/*
* For high counters of chained events we must recreate the
* perf event with the long (64bit) attribute unset.
*/
if (kvm_pmu_pmc_is_chained(pmc) &&
kvm_pmu_idx_is_high_counter(i)) {
kvm_pmu_create_perf_event(vcpu, i);
continue;
}
/* At this point, pmc must be the canonical */
if (pmc->perf_event)
perf_event_disable(pmc->perf_event);
}
@ -262,17 +420,6 @@ void kvm_pmu_sync_hwstate(struct kvm_vcpu *vcpu)
kvm_pmu_update_state(vcpu);
}
static inline struct kvm_vcpu *kvm_pmc_to_vcpu(struct kvm_pmc *pmc)
{
struct kvm_pmu *pmu;
struct kvm_vcpu_arch *vcpu_arch;
pmc -= pmc->idx;
pmu = container_of(pmc, struct kvm_pmu, pmc[0]);
vcpu_arch = container_of(pmu, struct kvm_vcpu_arch, pmu);
return container_of(vcpu_arch, struct kvm_vcpu, arch);
}
/**
* When the perf event overflows, set the overflow status and inform the vcpu.
*/
@ -329,17 +476,15 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val)
*/
void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
u64 mask;
int i;
mask = kvm_pmu_valid_counter_mask(vcpu);
if (val & ARMV8_PMU_PMCR_E) {
kvm_pmu_enable_counter(vcpu,
kvm_pmu_enable_counter_mask(vcpu,
__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & mask);
} else {
kvm_pmu_disable_counter(vcpu, mask);
kvm_pmu_disable_counter_mask(vcpu, mask);
}
if (val & ARMV8_PMU_PMCR_C)
@ -349,11 +494,6 @@ void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val)
for (i = 0; i < ARMV8_PMU_CYCLE_IDX; i++)
kvm_pmu_set_counter_value(vcpu, i, 0);
}
if (val & ARMV8_PMU_PMCR_LC) {
pmc = &pmu->pmc[ARMV8_PMU_CYCLE_IDX];
pmc->bitmask = 0xffffffffffffffffUL;
}
}
static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
@ -362,6 +502,112 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
(__vcpu_sys_reg(vcpu, PMCNTENSET_EL0) & BIT(select_idx));
}
/**
* kvm_pmu_create_perf_event - create a perf event for a counter
* @vcpu: The vcpu pointer
* @select_idx: The number of selected counter
*/
static void kvm_pmu_create_perf_event(struct kvm_vcpu *vcpu, u64 select_idx)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc;
struct perf_event *event;
struct perf_event_attr attr;
u64 eventsel, counter, reg, data;
/*
* For chained counters the event type and filtering attributes are
* obtained from the low/even counter. We also use this counter to
* determine if the event is enabled/disabled.
*/
pmc = kvm_pmu_get_canonical_pmc(&pmu->pmc[select_idx]);
reg = (pmc->idx == ARMV8_PMU_CYCLE_IDX)
? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + pmc->idx;
data = __vcpu_sys_reg(vcpu, reg);
kvm_pmu_stop_counter(vcpu, pmc);
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
/* Software increment event does't need to be backed by a perf event */
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
pmc->idx != ARMV8_PMU_CYCLE_IDX)
return;
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, pmc->idx);
attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
attr.config = (pmc->idx == ARMV8_PMU_CYCLE_IDX) ?
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
counter = kvm_pmu_get_pair_counter_value(vcpu, pmc);
if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
/**
* The initial sample period (overflow count) of an event. For
* chained counters we only support overflow interrupts on the
* high counter.
*/
attr.sample_period = (-counter) & GENMASK(63, 0);
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow,
pmc + 1);
if (kvm_pmu_counter_is_enabled(vcpu, pmc->idx + 1))
attr.config1 |= PERF_ATTR_CFG1_KVM_PMU_CHAINED;
} else {
/* The initial sample period (overflow count) of an event. */
if (kvm_pmu_idx_is_64bit(vcpu, pmc->idx))
attr.sample_period = (-counter) & GENMASK(63, 0);
else
attr.sample_period = (-counter) & GENMASK(31, 0);
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow, pmc);
}
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
return;
}
pmc->perf_event = event;
}
/**
* kvm_pmu_update_pmc_chained - update chained bitmap
* @vcpu: The vcpu pointer
* @select_idx: The number of selected counter
*
* Update the chained bitmap based on the event type written in the
* typer register.
*/
static void kvm_pmu_update_pmc_chained(struct kvm_vcpu *vcpu, u64 select_idx)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
if (kvm_pmu_idx_has_chain_evtype(vcpu, pmc->idx)) {
/*
* During promotion from !chained to chained we must ensure
* the adjacent counter is stopped and its event destroyed
*/
if (!kvm_pmu_pmc_is_chained(pmc))
kvm_pmu_stop_counter(vcpu, pmc);
set_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
} else {
clear_bit(pmc->idx >> 1, vcpu->arch.pmu.chained);
}
}
/**
* kvm_pmu_set_counter_event_type - set selected counter to monitor some event
* @vcpu: The vcpu pointer
@ -375,45 +621,15 @@ static bool kvm_pmu_counter_is_enabled(struct kvm_vcpu *vcpu, u64 select_idx)
void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
u64 select_idx)
{
struct kvm_pmu *pmu = &vcpu->arch.pmu;
struct kvm_pmc *pmc = &pmu->pmc[select_idx];
struct perf_event *event;
struct perf_event_attr attr;
u64 eventsel, counter;
u64 reg, event_type = data & ARMV8_PMU_EVTYPE_MASK;
kvm_pmu_stop_counter(vcpu, pmc);
eventsel = data & ARMV8_PMU_EVTYPE_EVENT;
reg = (select_idx == ARMV8_PMU_CYCLE_IDX)
? PMCCFILTR_EL0 : PMEVTYPER0_EL0 + select_idx;
/* Software increment event does't need to be backed by a perf event */
if (eventsel == ARMV8_PMUV3_PERFCTR_SW_INCR &&
select_idx != ARMV8_PMU_CYCLE_IDX)
return;
__vcpu_sys_reg(vcpu, reg) = event_type;
memset(&attr, 0, sizeof(struct perf_event_attr));
attr.type = PERF_TYPE_RAW;
attr.size = sizeof(attr);
attr.pinned = 1;
attr.disabled = !kvm_pmu_counter_is_enabled(vcpu, select_idx);
attr.exclude_user = data & ARMV8_PMU_EXCLUDE_EL0 ? 1 : 0;
attr.exclude_kernel = data & ARMV8_PMU_EXCLUDE_EL1 ? 1 : 0;
attr.exclude_hv = 1; /* Don't count EL2 events */
attr.exclude_host = 1; /* Don't count host events */
attr.config = (select_idx == ARMV8_PMU_CYCLE_IDX) ?
ARMV8_PMUV3_PERFCTR_CPU_CYCLES : eventsel;
counter = kvm_pmu_get_counter_value(vcpu, select_idx);
/* The initial sample period (overflow count) of an event. */
attr.sample_period = (-counter) & pmc->bitmask;
event = perf_event_create_kernel_counter(&attr, -1, current,
kvm_pmu_perf_overflow, pmc);
if (IS_ERR(event)) {
pr_err_once("kvm: pmu event creation failed %ld\n",
PTR_ERR(event));
return;
}
pmc->perf_event = event;
kvm_pmu_update_pmc_chained(vcpu, select_idx);
kvm_pmu_create_perf_event(vcpu, select_idx);
}
bool kvm_arm_support_pmu_v3(void)

View File

@ -401,8 +401,16 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
feature = smccc_get_arg1(vcpu);
switch(feature) {
case ARM_SMCCC_ARCH_WORKAROUND_1:
if (kvm_arm_harden_branch_predictor())
switch (kvm_arm_harden_branch_predictor()) {
case KVM_BP_HARDEN_UNKNOWN:
break;
case KVM_BP_HARDEN_WA_NEEDED:
val = SMCCC_RET_SUCCESS;
break;
case KVM_BP_HARDEN_NOT_REQUIRED:
val = SMCCC_RET_NOT_REQUIRED;
break;
}
break;
case ARM_SMCCC_ARCH_WORKAROUND_2:
switch (kvm_arm_have_ssbd()) {
@ -430,42 +438,103 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu)
int kvm_arm_get_fw_num_regs(struct kvm_vcpu *vcpu)
{
return 1; /* PSCI version */
return 3; /* PSCI version and two workaround registers */
}
int kvm_arm_copy_fw_reg_indices(struct kvm_vcpu *vcpu, u64 __user *uindices)
{
if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices))
if (put_user(KVM_REG_ARM_PSCI_VERSION, uindices++))
return -EFAULT;
if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1, uindices++))
return -EFAULT;
if (put_user(KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2, uindices++))
return -EFAULT;
return 0;
}
int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
#define KVM_REG_FEATURE_LEVEL_WIDTH 4
#define KVM_REG_FEATURE_LEVEL_MASK (BIT(KVM_REG_FEATURE_LEVEL_WIDTH) - 1)
/*
* Convert the workaround level into an easy-to-compare number, where higher
* values mean better protection.
*/
static int get_kernel_wa_level(u64 regid)
{
if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
void __user *uaddr = (void __user *)(long)reg->addr;
u64 val;
val = kvm_psci_version(vcpu, vcpu->kvm);
if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
return -EFAULT;
return 0;
switch (regid) {
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
switch (kvm_arm_harden_branch_predictor()) {
case KVM_BP_HARDEN_UNKNOWN:
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
case KVM_BP_HARDEN_WA_NEEDED:
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_AVAIL;
case KVM_BP_HARDEN_NOT_REQUIRED:
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_REQUIRED;
}
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1_NOT_AVAIL;
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
switch (kvm_arm_have_ssbd()) {
case KVM_SSBD_FORCE_DISABLE:
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_AVAIL;
case KVM_SSBD_KERNEL:
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL;
case KVM_SSBD_FORCE_ENABLE:
case KVM_SSBD_MITIGATED:
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED;
case KVM_SSBD_UNKNOWN:
default:
return KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_UNKNOWN;
}
}
return -EINVAL;
}
int kvm_arm_get_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
void __user *uaddr = (void __user *)(long)reg->addr;
u64 val;
switch (reg->id) {
case KVM_REG_ARM_PSCI_VERSION:
val = kvm_psci_version(vcpu, vcpu->kvm);
break;
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
break;
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
val = get_kernel_wa_level(reg->id) & KVM_REG_FEATURE_LEVEL_MASK;
if (val == KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
kvm_arm_get_vcpu_workaround_2_flag(vcpu))
val |= KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED;
break;
default:
return -ENOENT;
}
if (copy_to_user(uaddr, &val, KVM_REG_SIZE(reg->id)))
return -EFAULT;
return 0;
}
int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
{
if (reg->id == KVM_REG_ARM_PSCI_VERSION) {
void __user *uaddr = (void __user *)(long)reg->addr;
bool wants_02;
u64 val;
void __user *uaddr = (void __user *)(long)reg->addr;
u64 val;
int wa_level;
if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
return -EFAULT;
if (copy_from_user(&val, uaddr, KVM_REG_SIZE(reg->id)))
return -EFAULT;
switch (reg->id) {
case KVM_REG_ARM_PSCI_VERSION:
{
bool wants_02;
wants_02 = test_bit(KVM_ARM_VCPU_PSCI_0_2, vcpu->arch.features);
@ -482,6 +551,54 @@ int kvm_arm_set_fw_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg)
vcpu->kvm->arch.psci_version = val;
return 0;
}
break;
}
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1:
if (val & ~KVM_REG_FEATURE_LEVEL_MASK)
return -EINVAL;
if (get_kernel_wa_level(reg->id) < val)
return -EINVAL;
return 0;
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2:
if (val & ~(KVM_REG_FEATURE_LEVEL_MASK |
KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED))
return -EINVAL;
wa_level = val & KVM_REG_FEATURE_LEVEL_MASK;
if (get_kernel_wa_level(reg->id) < wa_level)
return -EINVAL;
/* The enabled bit must not be set unless the level is AVAIL. */
if (wa_level != KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL &&
wa_level != val)
return -EINVAL;
/* Are we finished or do we need to check the enable bit ? */
if (kvm_arm_have_ssbd() != KVM_SSBD_KERNEL)
return 0;
/*
* If this kernel supports the workaround to be switched on
* or off, make sure it matches the requested setting.
*/
switch (wa_level) {
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_AVAIL:
kvm_arm_set_vcpu_workaround_2_flag(vcpu,
val & KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_ENABLED);
break;
case KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2_NOT_REQUIRED:
kvm_arm_set_vcpu_workaround_2_flag(vcpu, true);
break;
}
return 0;
default:
return -ENOENT;
}
return -EINVAL;

View File

@ -184,9 +184,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
nr_rt_entries += 1;
new = kzalloc(sizeof(*new) + (nr_rt_entries * sizeof(struct hlist_head)),
GFP_KERNEL_ACCOUNT);
new = kzalloc(struct_size(new, map, nr_rt_entries), GFP_KERNEL_ACCOUNT);
if (!new)
return -ENOMEM;

View File

@ -95,7 +95,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink);
* kvm->lock --> kvm->slots_lock --> kvm->irq_lock
*/
DEFINE_SPINLOCK(kvm_lock);
DEFINE_MUTEX(kvm_lock);
static DEFINE_RAW_SPINLOCK(kvm_count_lock);
LIST_HEAD(vm_list);
@ -680,9 +680,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (r)
goto out_err;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_add(&kvm->vm_list, &vm_list);
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
preempt_notifier_inc();
@ -728,9 +728,9 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_del(&kvm->vm_list);
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) {
struct kvm_io_bus *bus = kvm_get_bus(kvm, i);
@ -1790,7 +1790,7 @@ void kvm_vcpu_unmap(struct kvm_vcpu *vcpu, struct kvm_host_map *map,
if (!map->hva)
return;
if (map->page)
if (map->page != KVM_UNMAPPED_PAGE)
kunmap(map->page);
#ifdef CONFIG_HAS_IOMEM
else
@ -4031,13 +4031,13 @@ static int vm_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
return 0;
}
@ -4050,12 +4050,12 @@ static int vm_stat_clear(void *_offset, u64 val)
if (val)
return -EINVAL;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vm_stat_clear_per_vm((void *)&stat_tmp, 0);
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
return 0;
}
@ -4070,13 +4070,13 @@ static int vcpu_stat_get(void *_offset, u64 *val)
u64 tmp_val;
*val = 0;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val);
*val += tmp_val;
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
return 0;
}
@ -4089,12 +4089,12 @@ static int vcpu_stat_clear(void *_offset, u64 val)
if (val)
return -EINVAL;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
list_for_each_entry(kvm, &vm_list, vm_list) {
stat_tmp.kvm = kvm;
vcpu_stat_clear_per_vm((void *)&stat_tmp, 0);
}
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
return 0;
}
@ -4115,7 +4115,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
if (!kvm_dev.this_device || !kvm)
return;
spin_lock(&kvm_lock);
mutex_lock(&kvm_lock);
if (type == KVM_EVENT_CREATE_VM) {
kvm_createvm_count++;
kvm_active_vms++;
@ -4124,7 +4124,7 @@ static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
}
created = kvm_createvm_count;
active = kvm_active_vms;
spin_unlock(&kvm_lock);
mutex_unlock(&kvm_lock);
env = kzalloc(sizeof(*env), GFP_KERNEL_ACCOUNT);
if (!env)
@ -4221,6 +4221,11 @@ static void kvm_sched_out(struct preempt_notifier *pn,
kvm_arch_vcpu_put(vcpu);
}
static void check_processor_compat(void *rtn)
{
*(int *)rtn = kvm_arch_check_processor_compat();
}
int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
struct module *module)
{
@ -4252,9 +4257,7 @@ int kvm_init(void *opaque, unsigned vcpu_size, unsigned vcpu_align,
goto out_free_0a;
for_each_online_cpu(cpu) {
smp_call_function_single(cpu,
kvm_arch_check_processor_compat,
&r, 1);
smp_call_function_single(cpu, check_processor_compat, &r, 1);
if (r < 0)
goto out_free_1;
}