1
0
Fork 0

Second batch of KVM updates for v4.13

Common:
  - add uevents for VM creation/destruction
  - annotate and properly access RCU-protected objects
 
 s390:
  - rename IOCTL added in the first v4.13 merge
 
 x86:
  - emulate VMLOAD VMSAVE feature in SVM
  - support paravirtual asynchronous page fault while nested
  - add Hyper-V userspace interfaces for better migration
  - improve master clock corner cases
  - extend internal error reporting after EPT misconfig
  - correct single-stepping of emulated instructions in SVM
  - handle MCE during VM entry
  - fix nVMX VM entry checks and nVMX VMCS shadowing
 -----BEGIN PGP SIGNATURE-----
 
 iQEcBAABCAAGBQJZaOm6AAoJEED/6hsPKofoqO8H/3breVIyVv9mwg7A5+o+6LTq
 GzV/YXHSC8NtfxZn8ViS/TCziYiBSFv7XiPSodkXbOgYSz8Yya5x9D+dbEH+xgG7
 l+LsZEqdSFbHCkvKrMiwSsoXtsT5WygA56+KZiBmu8cvlwqSyXWHFn3ZJ1wKzGq/
 zivlkfCoh2m6bGdNmrG9pHUSgxvDh94pXesaVBKy4hgeovY1qjzby3Lo+HuIUzai
 exuEU1EKRlUIfLK1B2Anp5IIv5Q1lFnMSvD6YSiWYywZb95dN/adsX1bv+MKeOdt
 TIAgotsWjaAuT9JolAJjfVPHG0+uMBMsWg4Zh9Ra/gPPaSh3KEC2h1++zEYKjvw=
 =1zII
 -----END PGP SIGNATURE-----

Merge tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more KVM updates from Radim Krčmář:
 "Second batch of KVM updates for v4.13

  Common:
   - add uevents for VM creation/destruction
   - annotate and properly access RCU-protected objects

  s390:
   - rename IOCTL added in the first v4.13 merge

  x86:
   - emulate VMLOAD VMSAVE feature in SVM
   - support paravirtual asynchronous page fault while nested
   - add Hyper-V userspace interfaces for better migration
   - improve master clock corner cases
   - extend internal error reporting after EPT misconfig
   - correct single-stepping of emulated instructions in SVM
   - handle MCE during VM entry
   - fix nVMX VM entry checks and nVMX VMCS shadowing"

* tag 'kvm-4.13-2' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (28 commits)
  kvm: x86: hyperv: make VP_INDEX managed by userspace
  KVM: async_pf: Let guest support delivery of async_pf from guest mode
  KVM: async_pf: Force a nested vmexit if the injected #PF is async_pf
  KVM: async_pf: Add L1 guest async_pf #PF vmexit handler
  KVM: x86: Simplify kvm_x86_ops->queue_exception parameter list
  kvm: x86: hyperv: add KVM_CAP_HYPERV_SYNIC2
  KVM: x86: make backwards_tsc_observed a per-VM variable
  KVM: trigger uevents when creating or destroying a VM
  KVM: SVM: Enable Virtual VMLOAD VMSAVE feature
  KVM: SVM: Add Virtual VMLOAD VMSAVE feature definition
  KVM: SVM: Rename lbr_ctl field in the vmcb control area
  KVM: SVM: Prepare for new bit definition in lbr_ctl
  KVM: SVM: handle singlestep exception when skipping emulated instructions
  KVM: x86: take slots_lock in kvm_free_pit
  KVM: s390: Fix KVM_S390_GET_CMMA_BITS ioctl definition
  kvm: vmx: Properly handle machine check during VM-entry
  KVM: x86: update master clock before computing kvmclock_offset
  kvm: nVMX: Shadow "high" parts of shadowed 64-bit VMCS fields
  kvm: nVMX: Fix nested_vmx_check_msr_bitmap_controls
  kvm: nVMX: Validate the I/O bitmaps on nested VM-entry
  ...
hifive-unleashed-5.1
Linus Torvalds 2017-07-15 10:18:16 -07:00
commit e37a07e0c2
21 changed files with 470 additions and 207 deletions

View File

@ -4329,3 +4329,21 @@ Querying this capability returns a bitmap indicating the possible
virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N virtual SMT modes that can be set using KVM_CAP_PPC_SMT. If bit N
(counting from the right) is set, then a virtual SMT mode of 2^N is (counting from the right) is set, then a virtual SMT mode of 2^N is
available. available.
8.11 KVM_CAP_HYPERV_SYNIC2
Architectures: x86
This capability enables a newer version of Hyper-V Synthetic interrupt
controller (SynIC). The only difference with KVM_CAP_HYPERV_SYNIC is that KVM
doesn't clear SynIC message and event flags pages when they are enabled by
writing to the respective MSRs.
8.12 KVM_CAP_HYPERV_VP_INDEX
Architectures: x86
This capability indicates that userspace can load HV_X64_MSR_VP_INDEX msr. Its
value is used to denote the target vcpu for a SynIC interrupt. For
compatibilty, KVM initializes this msr to KVM's internal vcpu index. When this
capability is absent, userspace can still query this msr's value.

View File

@ -166,10 +166,11 @@ MSR_KVM_SYSTEM_TIME: 0x12
MSR_KVM_ASYNC_PF_EN: 0x4b564d02 MSR_KVM_ASYNC_PF_EN: 0x4b564d02
data: Bits 63-6 hold 64-byte aligned physical address of a data: Bits 63-6 hold 64-byte aligned physical address of a
64 byte memory area which must be in guest RAM and must be 64 byte memory area which must be in guest RAM and must be
zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1 zeroed. Bits 5-3 are reserved and should be zero. Bit 0 is 1
when asynchronous page faults are enabled on the vcpu 0 when when asynchronous page faults are enabled on the vcpu 0 when
disabled. Bit 1 is 1 if asynchronous page faults can be injected disabled. Bit 1 is 1 if asynchronous page faults can be injected
when vcpu is in cpl == 0. when vcpu is in cpl == 0. Bit 2 is 1 if asynchronous page faults
are delivered to L1 as #PF vmexits.
First 4 byte of 64 byte memory location will be written to by First 4 byte of 64 byte memory location will be written to by
the hypervisor at the time of asynchronous page fault (APF) the hypervisor at the time of asynchronous page fault (APF)

View File

@ -286,6 +286,7 @@
#define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */ #define X86_FEATURE_PAUSEFILTER (15*32+10) /* filtered pause intercept */
#define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */ #define X86_FEATURE_PFTHRESHOLD (15*32+12) /* pause filter threshold */
#define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */ #define X86_FEATURE_AVIC (15*32+13) /* Virtual Interrupt Controller */
#define X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE (15*32+15) /* Virtual VMLOAD VMSAVE */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/

View File

@ -23,6 +23,7 @@ struct x86_exception {
u16 error_code; u16 error_code;
bool nested_page_fault; bool nested_page_fault;
u64 address; /* cr2 or nested page fault gpa */ u64 address; /* cr2 or nested page fault gpa */
u8 async_page_fault;
}; };
/* /*

View File

@ -462,10 +462,12 @@ struct kvm_vcpu_hv_synic {
DECLARE_BITMAP(auto_eoi_bitmap, 256); DECLARE_BITMAP(auto_eoi_bitmap, 256);
DECLARE_BITMAP(vec_bitmap, 256); DECLARE_BITMAP(vec_bitmap, 256);
bool active; bool active;
bool dont_zero_synic_pages;
}; };
/* Hyper-V per vcpu emulation context */ /* Hyper-V per vcpu emulation context */
struct kvm_vcpu_hv { struct kvm_vcpu_hv {
u32 vp_index;
u64 hv_vapic; u64 hv_vapic;
s64 runtime_offset; s64 runtime_offset;
struct kvm_vcpu_hv_synic synic; struct kvm_vcpu_hv_synic synic;
@ -549,6 +551,7 @@ struct kvm_vcpu_arch {
bool reinject; bool reinject;
u8 nr; u8 nr;
u32 error_code; u32 error_code;
u8 nested_apf;
} exception; } exception;
struct kvm_queued_interrupt { struct kvm_queued_interrupt {
@ -649,6 +652,9 @@ struct kvm_vcpu_arch {
u64 msr_val; u64 msr_val;
u32 id; u32 id;
bool send_user_only; bool send_user_only;
u32 host_apf_reason;
unsigned long nested_apf_token;
bool delivery_as_pf_vmexit;
} apf; } apf;
/* OSVW MSRs (AMD only) */ /* OSVW MSRs (AMD only) */
@ -803,6 +809,7 @@ struct kvm_arch {
int audit_point; int audit_point;
#endif #endif
bool backwards_tsc_observed;
bool boot_vcpu_runs_old_kvmclock; bool boot_vcpu_runs_old_kvmclock;
u32 bsp_vcpu_id; u32 bsp_vcpu_id;
@ -952,9 +959,7 @@ struct kvm_x86_ops {
unsigned char *hypercall_addr); unsigned char *hypercall_addr);
void (*set_irq)(struct kvm_vcpu *vcpu); void (*set_irq)(struct kvm_vcpu *vcpu);
void (*set_nmi)(struct kvm_vcpu *vcpu); void (*set_nmi)(struct kvm_vcpu *vcpu);
void (*queue_exception)(struct kvm_vcpu *vcpu, unsigned nr, void (*queue_exception)(struct kvm_vcpu *vcpu);
bool has_error_code, u32 error_code,
bool reinject);
void (*cancel_injection)(struct kvm_vcpu *vcpu); void (*cancel_injection)(struct kvm_vcpu *vcpu);
int (*interrupt_allowed)(struct kvm_vcpu *vcpu); int (*interrupt_allowed)(struct kvm_vcpu *vcpu);
int (*nmi_allowed)(struct kvm_vcpu *vcpu); int (*nmi_allowed)(struct kvm_vcpu *vcpu);

View File

@ -83,7 +83,7 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u32 event_inj; u32 event_inj;
u32 event_inj_err; u32 event_inj_err;
u64 nested_cr3; u64 nested_cr3;
u64 lbr_ctl; u64 virt_ext;
u32 clean; u32 clean;
u32 reserved_5; u32 reserved_5;
u64 next_rip; u64 next_rip;
@ -119,6 +119,9 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
#define AVIC_ENABLE_SHIFT 31 #define AVIC_ENABLE_SHIFT 31
#define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT) #define AVIC_ENABLE_MASK (1 << AVIC_ENABLE_SHIFT)
#define LBR_CTL_ENABLE_MASK BIT_ULL(0)
#define VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK BIT_ULL(1)
#define SVM_INTERRUPT_SHADOW_MASK 1 #define SVM_INTERRUPT_SHADOW_MASK 1
#define SVM_IOIO_STR_SHIFT 2 #define SVM_IOIO_STR_SHIFT 2

View File

@ -67,6 +67,7 @@ struct kvm_clock_pairing {
#define KVM_ASYNC_PF_ENABLED (1 << 0) #define KVM_ASYNC_PF_ENABLED (1 << 0)
#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) #define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1)
#define KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT (1 << 2)
/* Operations for KVM_HC_MMU_OP */ /* Operations for KVM_HC_MMU_OP */
#define KVM_MMU_OP_WRITE_PTE 1 #define KVM_MMU_OP_WRITE_PTE 1

View File

@ -330,7 +330,12 @@ static void kvm_guest_cpu_init(void)
#ifdef CONFIG_PREEMPT #ifdef CONFIG_PREEMPT
pa |= KVM_ASYNC_PF_SEND_ALWAYS; pa |= KVM_ASYNC_PF_SEND_ALWAYS;
#endif #endif
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); pa |= KVM_ASYNC_PF_ENABLED;
/* Async page fault support for L1 hypervisor is optional */
if (wrmsr_safe(MSR_KVM_ASYNC_PF_EN,
(pa | KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT) & 0xffffffff, pa >> 32) < 0)
wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
__this_cpu_write(apf_reason.enabled, 1); __this_cpu_write(apf_reason.enabled, 1);
printk(KERN_INFO"KVM setup async PF for cpu %d\n", printk(KERN_INFO"KVM setup async PF for cpu %d\n",
smp_processor_id()); smp_processor_id());

View File

@ -106,14 +106,27 @@ static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
return 0; return 0;
} }
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vcpu_id) static struct kvm_vcpu *get_vcpu_by_vpidx(struct kvm *kvm, u32 vpidx)
{
struct kvm_vcpu *vcpu = NULL;
int i;
if (vpidx < KVM_MAX_VCPUS)
vcpu = kvm_get_vcpu(kvm, vpidx);
if (vcpu && vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
kvm_for_each_vcpu(i, vcpu, kvm)
if (vcpu_to_hv_vcpu(vcpu)->vp_index == vpidx)
return vcpu;
return NULL;
}
static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
{ {
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
struct kvm_vcpu_hv_synic *synic; struct kvm_vcpu_hv_synic *synic;
if (vcpu_id >= atomic_read(&kvm->online_vcpus)) vcpu = get_vcpu_by_vpidx(kvm, vpidx);
return NULL;
vcpu = kvm_get_vcpu(kvm, vcpu_id);
if (!vcpu) if (!vcpu)
return NULL; return NULL;
synic = vcpu_to_synic(vcpu); synic = vcpu_to_synic(vcpu);
@ -221,7 +234,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
synic->version = data; synic->version = data;
break; break;
case HV_X64_MSR_SIEFP: case HV_X64_MSR_SIEFP:
if (data & HV_SYNIC_SIEFP_ENABLE) if ((data & HV_SYNIC_SIEFP_ENABLE) && !host &&
!synic->dont_zero_synic_pages)
if (kvm_clear_guest(vcpu->kvm, if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) { data & PAGE_MASK, PAGE_SIZE)) {
ret = 1; ret = 1;
@ -232,7 +246,8 @@ static int synic_set_msr(struct kvm_vcpu_hv_synic *synic,
synic_exit(synic, msr); synic_exit(synic, msr);
break; break;
case HV_X64_MSR_SIMP: case HV_X64_MSR_SIMP:
if (data & HV_SYNIC_SIMP_ENABLE) if ((data & HV_SYNIC_SIMP_ENABLE) && !host &&
!synic->dont_zero_synic_pages)
if (kvm_clear_guest(vcpu->kvm, if (kvm_clear_guest(vcpu->kvm,
data & PAGE_MASK, PAGE_SIZE)) { data & PAGE_MASK, PAGE_SIZE)) {
ret = 1; ret = 1;
@ -318,11 +333,11 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint)
return ret; return ret;
} }
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint) int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vpidx, u32 sint)
{ {
struct kvm_vcpu_hv_synic *synic; struct kvm_vcpu_hv_synic *synic;
synic = synic_get(kvm, vcpu_id); synic = synic_get(kvm, vpidx);
if (!synic) if (!synic)
return -EINVAL; return -EINVAL;
@ -341,11 +356,11 @@ void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector)
kvm_hv_notify_acked_sint(vcpu, i); kvm_hv_notify_acked_sint(vcpu, i);
} }
static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vcpu_id, u32 sint, int gsi) static int kvm_hv_set_sint_gsi(struct kvm *kvm, u32 vpidx, u32 sint, int gsi)
{ {
struct kvm_vcpu_hv_synic *synic; struct kvm_vcpu_hv_synic *synic;
synic = synic_get(kvm, vcpu_id); synic = synic_get(kvm, vpidx);
if (!synic) if (!synic)
return -EINVAL; return -EINVAL;
@ -687,14 +702,24 @@ void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu)
stimer_init(&hv_vcpu->stimer[i], i); stimer_init(&hv_vcpu->stimer[i], i);
} }
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu) void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu)
{ {
struct kvm_vcpu_hv *hv_vcpu = vcpu_to_hv_vcpu(vcpu);
hv_vcpu->vp_index = kvm_vcpu_get_idx(vcpu);
}
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages)
{
struct kvm_vcpu_hv_synic *synic = vcpu_to_synic(vcpu);
/* /*
* Hyper-V SynIC auto EOI SINT's are * Hyper-V SynIC auto EOI SINT's are
* not compatible with APICV, so deactivate APICV * not compatible with APICV, so deactivate APICV
*/ */
kvm_vcpu_deactivate_apicv(vcpu); kvm_vcpu_deactivate_apicv(vcpu);
vcpu_to_synic(vcpu)->active = true; synic->active = true;
synic->dont_zero_synic_pages = dont_zero_synic_pages;
return 0; return 0;
} }
@ -978,6 +1003,11 @@ static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host)
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
switch (msr) { switch (msr) {
case HV_X64_MSR_VP_INDEX:
if (!host)
return 1;
hv->vp_index = (u32)data;
break;
case HV_X64_MSR_APIC_ASSIST_PAGE: { case HV_X64_MSR_APIC_ASSIST_PAGE: {
u64 gfn; u64 gfn;
unsigned long addr; unsigned long addr;
@ -1089,18 +1119,9 @@ static int kvm_hv_get_msr(struct kvm_vcpu *vcpu, u32 msr, u64 *pdata)
struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv; struct kvm_vcpu_hv *hv = &vcpu->arch.hyperv;
switch (msr) { switch (msr) {
case HV_X64_MSR_VP_INDEX: { case HV_X64_MSR_VP_INDEX:
int r; data = hv->vp_index;
struct kvm_vcpu *v;
kvm_for_each_vcpu(r, v, vcpu->kvm) {
if (v == vcpu) {
data = r;
break;
}
}
break; break;
}
case HV_X64_MSR_EOI: case HV_X64_MSR_EOI:
return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata); return kvm_hv_vapic_msr_read(vcpu, APIC_EOI, pdata);
case HV_X64_MSR_ICR: case HV_X64_MSR_ICR:

View File

@ -56,9 +56,10 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu);
void kvm_hv_irq_routing_update(struct kvm *kvm); void kvm_hv_irq_routing_update(struct kvm *kvm);
int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint); int kvm_hv_synic_set_irq(struct kvm *kvm, u32 vcpu_id, u32 sint);
void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector); void kvm_hv_synic_send_eoi(struct kvm_vcpu *vcpu, int vector);
int kvm_hv_activate_synic(struct kvm_vcpu *vcpu); int kvm_hv_activate_synic(struct kvm_vcpu *vcpu, bool dont_zero_synic_pages);
void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu); void kvm_hv_vcpu_init(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_postcreate(struct kvm_vcpu *vcpu);
void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu); void kvm_hv_vcpu_uninit(struct kvm_vcpu *vcpu);
static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu, static inline struct kvm_vcpu_hv_stimer *vcpu_to_stimer(struct kvm_vcpu *vcpu,

View File

@ -724,8 +724,10 @@ void kvm_free_pit(struct kvm *kvm)
struct kvm_pit *pit = kvm->arch.vpit; struct kvm_pit *pit = kvm->arch.vpit;
if (pit) { if (pit) {
mutex_lock(&kvm->slots_lock);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev); kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->dev);
kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev); kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &pit->speaker_dev);
mutex_unlock(&kvm->slots_lock);
kvm_pit_set_reinject(pit, false); kvm_pit_set_reinject(pit, false);
hrtimer_cancel(&pit->pit_state.timer); hrtimer_cancel(&pit->pit_state.timer);
kthread_destroy_worker(pit->worker); kthread_destroy_worker(pit->worker);

View File

@ -46,6 +46,7 @@
#include <asm/io.h> #include <asm/io.h>
#include <asm/vmx.h> #include <asm/vmx.h>
#include <asm/kvm_page_track.h> #include <asm/kvm_page_track.h>
#include "trace.h"
/* /*
* When setting this variable to true it enables Two-Dimensional-Paging * When setting this variable to true it enables Two-Dimensional-Paging
@ -3748,7 +3749,7 @@ bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu)
kvm_event_needs_reinjection(vcpu))) kvm_event_needs_reinjection(vcpu)))
return false; return false;
if (is_guest_mode(vcpu)) if (!vcpu->arch.apf.delivery_as_pf_vmexit && is_guest_mode(vcpu))
return false; return false;
return kvm_x86_ops->interrupt_allowed(vcpu); return kvm_x86_ops->interrupt_allowed(vcpu);
@ -3780,6 +3781,38 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
return false; return false;
} }
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len,
bool need_unprotect)
{
int r = 1;
switch (vcpu->arch.apf.host_apf_reason) {
default:
trace_kvm_page_fault(fault_address, error_code);
if (need_unprotect && kvm_event_needs_reinjection(vcpu))
kvm_mmu_unprotect_page_virt(vcpu, fault_address);
r = kvm_mmu_page_fault(vcpu, fault_address, error_code, insn,
insn_len);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
vcpu->arch.apf.host_apf_reason = 0;
local_irq_disable();
kvm_async_pf_task_wait(fault_address);
local_irq_enable();
break;
case KVM_PV_REASON_PAGE_READY:
vcpu->arch.apf.host_apf_reason = 0;
local_irq_disable();
kvm_async_pf_task_wake(fault_address);
local_irq_enable();
break;
}
return r;
}
EXPORT_SYMBOL_GPL(kvm_handle_page_fault);
static bool static bool
check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level) check_hugepage_cache_consistency(struct kvm_vcpu *vcpu, gfn_t gfn, int level)
{ {

View File

@ -77,6 +77,9 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu);
void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly,
bool accessed_dirty); bool accessed_dirty);
bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu); bool kvm_can_do_async_pf(struct kvm_vcpu *vcpu);
int kvm_handle_page_fault(struct kvm_vcpu *vcpu, u64 error_code,
u64 fault_address, char *insn, int insn_len,
bool need_unprotect);
static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm)
{ {

View File

@ -194,7 +194,6 @@ struct vcpu_svm {
unsigned int3_injected; unsigned int3_injected;
unsigned long int3_rip; unsigned long int3_rip;
u32 apf_reason;
/* cached guest cpuid flags for faster access */ /* cached guest cpuid flags for faster access */
bool nrips_enabled : 1; bool nrips_enabled : 1;
@ -277,6 +276,10 @@ static int avic;
module_param(avic, int, S_IRUGO); module_param(avic, int, S_IRUGO);
#endif #endif
/* enable/disable Virtual VMLOAD VMSAVE */
static int vls = true;
module_param(vls, int, 0444);
/* AVIC VM ID bit masks and lock */ /* AVIC VM ID bit masks and lock */
static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR); static DECLARE_BITMAP(avic_vm_id_bitmap, AVIC_VM_ID_NR);
static DEFINE_SPINLOCK(avic_vm_id_lock); static DEFINE_SPINLOCK(avic_vm_id_lock);
@ -633,11 +636,13 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
svm_set_interrupt_shadow(vcpu, 0); svm_set_interrupt_shadow(vcpu, 0);
} }
static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, static void svm_queue_exception(struct kvm_vcpu *vcpu)
bool has_error_code, u32 error_code,
bool reinject)
{ {
struct vcpu_svm *svm = to_svm(vcpu); struct vcpu_svm *svm = to_svm(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
bool reinject = vcpu->arch.exception.reinject;
u32 error_code = vcpu->arch.exception.error_code;
/* /*
* If we are within a nested VM we'd better #VMEXIT and let the guest * If we are within a nested VM we'd better #VMEXIT and let the guest
@ -947,7 +952,7 @@ static void svm_enable_lbrv(struct vcpu_svm *svm)
{ {
u32 *msrpm = svm->msrpm; u32 *msrpm = svm->msrpm;
svm->vmcb->control.lbr_ctl = 1; svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1); set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1); set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1); set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
@ -958,7 +963,7 @@ static void svm_disable_lbrv(struct vcpu_svm *svm)
{ {
u32 *msrpm = svm->msrpm; u32 *msrpm = svm->msrpm;
svm->vmcb->control.lbr_ctl = 0; svm->vmcb->control.virt_ext &= ~LBR_CTL_ENABLE_MASK;
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0); set_msr_interception(msrpm, MSR_IA32_LASTBRANCHFROMIP, 0, 0);
set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0); set_msr_interception(msrpm, MSR_IA32_LASTBRANCHTOIP, 0, 0);
set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0); set_msr_interception(msrpm, MSR_IA32_LASTINTFROMIP, 0, 0);
@ -1093,6 +1098,16 @@ static __init int svm_hardware_setup(void)
} }
} }
if (vls) {
if (!npt_enabled ||
!boot_cpu_has(X86_FEATURE_VIRTUAL_VMLOAD_VMSAVE) ||
!IS_ENABLED(CONFIG_X86_64)) {
vls = false;
} else {
pr_info("Virtual VMLOAD VMSAVE supported\n");
}
}
return 0; return 0;
err: err:
@ -1280,6 +1295,16 @@ static void init_vmcb(struct vcpu_svm *svm)
if (avic) if (avic)
avic_init_vmcb(svm); avic_init_vmcb(svm);
/*
* If hardware supports Virtual VMLOAD VMSAVE then enable it
* in VMCB and clear intercepts to avoid #VMEXIT.
*/
if (vls) {
clr_intercept(svm, INTERCEPT_VMLOAD);
clr_intercept(svm, INTERCEPT_VMSAVE);
svm->vmcb->control.virt_ext |= VIRTUAL_VMLOAD_VMSAVE_ENABLE_MASK;
}
mark_all_dirty(svm->vmcb); mark_all_dirty(svm->vmcb);
enable_gif(svm); enable_gif(svm);
@ -2096,34 +2121,11 @@ static void svm_set_dr7(struct kvm_vcpu *vcpu, unsigned long value)
static int pf_interception(struct vcpu_svm *svm) static int pf_interception(struct vcpu_svm *svm)
{ {
u64 fault_address = svm->vmcb->control.exit_info_2; u64 fault_address = svm->vmcb->control.exit_info_2;
u64 error_code; u64 error_code = svm->vmcb->control.exit_info_1;
int r = 1;
switch (svm->apf_reason) { return kvm_handle_page_fault(&svm->vcpu, error_code, fault_address,
default:
error_code = svm->vmcb->control.exit_info_1;
trace_kvm_page_fault(fault_address, error_code);
if (!npt_enabled && kvm_event_needs_reinjection(&svm->vcpu))
kvm_mmu_unprotect_page_virt(&svm->vcpu, fault_address);
r = kvm_mmu_page_fault(&svm->vcpu, fault_address, error_code,
svm->vmcb->control.insn_bytes, svm->vmcb->control.insn_bytes,
svm->vmcb->control.insn_len); svm->vmcb->control.insn_len, !npt_enabled);
break;
case KVM_PV_REASON_PAGE_NOT_PRESENT:
svm->apf_reason = 0;
local_irq_disable();
kvm_async_pf_task_wait(fault_address);
local_irq_enable();
break;
case KVM_PV_REASON_PAGE_READY:
svm->apf_reason = 0;
local_irq_disable();
kvm_async_pf_task_wake(fault_address);
local_irq_enable();
break;
}
return r;
} }
static int db_interception(struct vcpu_svm *svm) static int db_interception(struct vcpu_svm *svm)
@ -2267,7 +2269,7 @@ static int io_interception(struct vcpu_svm *svm)
{ {
struct kvm_vcpu *vcpu = &svm->vcpu; struct kvm_vcpu *vcpu = &svm->vcpu;
u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */ u32 io_info = svm->vmcb->control.exit_info_1; /* address size bug? */
int size, in, string; int size, in, string, ret;
unsigned port; unsigned port;
++svm->vcpu.stat.io_exits; ++svm->vcpu.stat.io_exits;
@ -2279,10 +2281,16 @@ static int io_interception(struct vcpu_svm *svm)
port = io_info >> 16; port = io_info >> 16;
size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT; size = (io_info & SVM_IOIO_SIZE_MASK) >> SVM_IOIO_SIZE_SHIFT;
svm->next_rip = svm->vmcb->control.exit_info_2; svm->next_rip = svm->vmcb->control.exit_info_2;
skip_emulated_instruction(&svm->vcpu); ret = kvm_skip_emulated_instruction(&svm->vcpu);
return in ? kvm_fast_pio_in(vcpu, size, port) /*
: kvm_fast_pio_out(vcpu, size, port); * TODO: we might be squashing a KVM_GUESTDBG_SINGLESTEP-triggered
* KVM_EXIT_DEBUG here.
*/
if (in)
return kvm_fast_pio_in(vcpu, size, port) && ret;
else
return kvm_fast_pio_out(vcpu, size, port) && ret;
} }
static int nmi_interception(struct vcpu_svm *svm) static int nmi_interception(struct vcpu_svm *svm)
@ -2415,15 +2423,19 @@ static int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
if (!is_guest_mode(&svm->vcpu)) if (!is_guest_mode(&svm->vcpu))
return 0; return 0;
vmexit = nested_svm_intercept(svm);
if (vmexit != NESTED_EXIT_DONE)
return 0;
svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr; svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + nr;
svm->vmcb->control.exit_code_hi = 0; svm->vmcb->control.exit_code_hi = 0;
svm->vmcb->control.exit_info_1 = error_code; svm->vmcb->control.exit_info_1 = error_code;
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2; if (svm->vcpu.arch.exception.nested_apf)
svm->vmcb->control.exit_info_2 = svm->vcpu.arch.apf.nested_apf_token;
vmexit = nested_svm_intercept(svm); else
if (vmexit == NESTED_EXIT_DONE) svm->vmcb->control.exit_info_2 = svm->vcpu.arch.cr2;
svm->nested.exit_required = true;
svm->nested.exit_required = true;
return vmexit; return vmexit;
} }
@ -2598,7 +2610,7 @@ static int nested_svm_exit_special(struct vcpu_svm *svm)
break; break;
case SVM_EXIT_EXCP_BASE + PF_VECTOR: case SVM_EXIT_EXCP_BASE + PF_VECTOR:
/* When we're shadowing, trap PFs, but not async PF */ /* When we're shadowing, trap PFs, but not async PF */
if (!npt_enabled && svm->apf_reason == 0) if (!npt_enabled && svm->vcpu.arch.apf.host_apf_reason == 0)
return NESTED_EXIT_HOST; return NESTED_EXIT_HOST;
break; break;
default: default:
@ -2645,7 +2657,7 @@ static int nested_svm_intercept(struct vcpu_svm *svm)
} }
/* async page fault always cause vmexit */ /* async page fault always cause vmexit */
else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) && else if ((exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) &&
svm->apf_reason != 0) svm->vcpu.arch.exception.nested_apf != 0)
vmexit = NESTED_EXIT_DONE; vmexit = NESTED_EXIT_DONE;
break; break;
} }
@ -2702,7 +2714,7 @@ static inline void copy_vmcb_control_area(struct vmcb *dst_vmcb, struct vmcb *fr
dst->event_inj = from->event_inj; dst->event_inj = from->event_inj;
dst->event_inj_err = from->event_inj_err; dst->event_inj_err = from->event_inj_err;
dst->nested_cr3 = from->nested_cr3; dst->nested_cr3 = from->nested_cr3;
dst->lbr_ctl = from->lbr_ctl; dst->virt_ext = from->virt_ext;
} }
static int nested_svm_vmexit(struct vcpu_svm *svm) static int nested_svm_vmexit(struct vcpu_svm *svm)
@ -3008,7 +3020,7 @@ static bool nested_svm_vmrun(struct vcpu_svm *svm)
/* We don't want to see VMMCALLs from a nested guest */ /* We don't want to see VMMCALLs from a nested guest */
clr_intercept(svm, INTERCEPT_VMMCALL); clr_intercept(svm, INTERCEPT_VMMCALL);
svm->vmcb->control.lbr_ctl = nested_vmcb->control.lbr_ctl; svm->vmcb->control.virt_ext = nested_vmcb->control.virt_ext;
svm->vmcb->control.int_vector = nested_vmcb->control.int_vector; svm->vmcb->control.int_vector = nested_vmcb->control.int_vector;
svm->vmcb->control.int_state = nested_vmcb->control.int_state; svm->vmcb->control.int_state = nested_vmcb->control.int_state;
svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset; svm->vmcb->control.tsc_offset += nested_vmcb->control.tsc_offset;
@ -3055,6 +3067,7 @@ static int vmload_interception(struct vcpu_svm *svm)
{ {
struct vmcb *nested_vmcb; struct vmcb *nested_vmcb;
struct page *page; struct page *page;
int ret;
if (nested_svm_check_permissions(svm)) if (nested_svm_check_permissions(svm))
return 1; return 1;
@ -3064,18 +3077,19 @@ static int vmload_interception(struct vcpu_svm *svm)
return 1; return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu); ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(nested_vmcb, svm->vmcb); nested_svm_vmloadsave(nested_vmcb, svm->vmcb);
nested_svm_unmap(page); nested_svm_unmap(page);
return 1; return ret;
} }
static int vmsave_interception(struct vcpu_svm *svm) static int vmsave_interception(struct vcpu_svm *svm)
{ {
struct vmcb *nested_vmcb; struct vmcb *nested_vmcb;
struct page *page; struct page *page;
int ret;
if (nested_svm_check_permissions(svm)) if (nested_svm_check_permissions(svm))
return 1; return 1;
@ -3085,12 +3099,12 @@ static int vmsave_interception(struct vcpu_svm *svm)
return 1; return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu); ret = kvm_skip_emulated_instruction(&svm->vcpu);
nested_svm_vmloadsave(svm->vmcb, nested_vmcb); nested_svm_vmloadsave(svm->vmcb, nested_vmcb);
nested_svm_unmap(page); nested_svm_unmap(page);
return 1; return ret;
} }
static int vmrun_interception(struct vcpu_svm *svm) static int vmrun_interception(struct vcpu_svm *svm)
@ -3123,25 +3137,29 @@ failed:
static int stgi_interception(struct vcpu_svm *svm) static int stgi_interception(struct vcpu_svm *svm)
{ {
int ret;
if (nested_svm_check_permissions(svm)) if (nested_svm_check_permissions(svm))
return 1; return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu); ret = kvm_skip_emulated_instruction(&svm->vcpu);
kvm_make_request(KVM_REQ_EVENT, &svm->vcpu); kvm_make_request(KVM_REQ_EVENT, &svm->vcpu);
enable_gif(svm); enable_gif(svm);
return 1; return ret;
} }
static int clgi_interception(struct vcpu_svm *svm) static int clgi_interception(struct vcpu_svm *svm)
{ {
int ret;
if (nested_svm_check_permissions(svm)) if (nested_svm_check_permissions(svm))
return 1; return 1;
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu); ret = kvm_skip_emulated_instruction(&svm->vcpu);
disable_gif(svm); disable_gif(svm);
@ -3152,7 +3170,7 @@ static int clgi_interception(struct vcpu_svm *svm)
mark_dirty(svm->vmcb, VMCB_INTR); mark_dirty(svm->vmcb, VMCB_INTR);
} }
return 1; return ret;
} }
static int invlpga_interception(struct vcpu_svm *svm) static int invlpga_interception(struct vcpu_svm *svm)
@ -3166,8 +3184,7 @@ static int invlpga_interception(struct vcpu_svm *svm)
kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX)); kvm_mmu_invlpg(vcpu, kvm_register_read(&svm->vcpu, VCPU_REGS_RAX));
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu); return kvm_skip_emulated_instruction(&svm->vcpu);
return 1;
} }
static int skinit_interception(struct vcpu_svm *svm) static int skinit_interception(struct vcpu_svm *svm)
@ -3190,7 +3207,7 @@ static int xsetbv_interception(struct vcpu_svm *svm)
if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) { if (kvm_set_xcr(&svm->vcpu, index, new_bv) == 0) {
svm->next_rip = kvm_rip_read(&svm->vcpu) + 3; svm->next_rip = kvm_rip_read(&svm->vcpu) + 3;
skip_emulated_instruction(&svm->vcpu); return kvm_skip_emulated_instruction(&svm->vcpu);
} }
return 1; return 1;
@ -3286,8 +3303,7 @@ static int invlpg_interception(struct vcpu_svm *svm)
return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE; return emulate_instruction(&svm->vcpu, 0) == EMULATE_DONE;
kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1); kvm_mmu_invlpg(&svm->vcpu, svm->vmcb->control.exit_info_1);
skip_emulated_instruction(&svm->vcpu); return kvm_skip_emulated_instruction(&svm->vcpu);
return 1;
} }
static int emulate_on_interception(struct vcpu_svm *svm) static int emulate_on_interception(struct vcpu_svm *svm)
@ -3437,9 +3453,7 @@ static int dr_interception(struct vcpu_svm *svm)
kvm_register_write(&svm->vcpu, reg, val); kvm_register_write(&svm->vcpu, reg, val);
} }
skip_emulated_instruction(&svm->vcpu); return kvm_skip_emulated_instruction(&svm->vcpu);
return 1;
} }
static int cr8_write_interception(struct vcpu_svm *svm) static int cr8_write_interception(struct vcpu_svm *svm)
@ -3562,6 +3576,7 @@ static int rdmsr_interception(struct vcpu_svm *svm)
if (svm_get_msr(&svm->vcpu, &msr_info)) { if (svm_get_msr(&svm->vcpu, &msr_info)) {
trace_kvm_msr_read_ex(ecx); trace_kvm_msr_read_ex(ecx);
kvm_inject_gp(&svm->vcpu, 0); kvm_inject_gp(&svm->vcpu, 0);
return 1;
} else { } else {
trace_kvm_msr_read(ecx, msr_info.data); trace_kvm_msr_read(ecx, msr_info.data);
@ -3570,9 +3585,8 @@ static int rdmsr_interception(struct vcpu_svm *svm)
kvm_register_write(&svm->vcpu, VCPU_REGS_RDX, kvm_register_write(&svm->vcpu, VCPU_REGS_RDX,
msr_info.data >> 32); msr_info.data >> 32);
svm->next_rip = kvm_rip_read(&svm->vcpu) + 2; svm->next_rip = kvm_rip_read(&svm->vcpu) + 2;
skip_emulated_instruction(&svm->vcpu); return kvm_skip_emulated_instruction(&svm->vcpu);
} }
return 1;
} }
static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data) static int svm_set_vm_cr(struct kvm_vcpu *vcpu, u64 data)
@ -3698,11 +3712,11 @@ static int wrmsr_interception(struct vcpu_svm *svm)
if (kvm_set_msr(&svm->vcpu, &msr)) { if (kvm_set_msr(&svm->vcpu, &msr)) {
trace_kvm_msr_write_ex(ecx, data); trace_kvm_msr_write_ex(ecx, data);
kvm_inject_gp(&svm->vcpu, 0); kvm_inject_gp(&svm->vcpu, 0);
return 1;
} else { } else {
trace_kvm_msr_write(ecx, data); trace_kvm_msr_write(ecx, data);
skip_emulated_instruction(&svm->vcpu); return kvm_skip_emulated_instruction(&svm->vcpu);
} }
return 1;
} }
static int msr_interception(struct vcpu_svm *svm) static int msr_interception(struct vcpu_svm *svm)
@ -3731,8 +3745,7 @@ static int pause_interception(struct vcpu_svm *svm)
static int nop_interception(struct vcpu_svm *svm) static int nop_interception(struct vcpu_svm *svm)
{ {
skip_emulated_instruction(&(svm->vcpu)); return kvm_skip_emulated_instruction(&(svm->vcpu));
return 1;
} }
static int monitor_interception(struct vcpu_svm *svm) static int monitor_interception(struct vcpu_svm *svm)
@ -4117,7 +4130,7 @@ static void dump_vmcb(struct kvm_vcpu *vcpu)
pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar); pr_err("%-20s%016llx\n", "avic_vapic_bar:", control->avic_vapic_bar);
pr_err("%-20s%08x\n", "event_inj:", control->event_inj); pr_err("%-20s%08x\n", "event_inj:", control->event_inj);
pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err); pr_err("%-20s%08x\n", "event_inj_err:", control->event_inj_err);
pr_err("%-20s%lld\n", "lbr_ctl:", control->lbr_ctl); pr_err("%-20s%lld\n", "virt_ext:", control->virt_ext);
pr_err("%-20s%016llx\n", "next_rip:", control->next_rip); pr_err("%-20s%016llx\n", "next_rip:", control->next_rip);
pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page); pr_err("%-20s%016llx\n", "avic_backing_page:", control->avic_backing_page);
pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id); pr_err("%-20s%016llx\n", "avic_logical_id:", control->avic_logical_id);
@ -4965,7 +4978,7 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
/* if exit due to PF check for async PF */ /* if exit due to PF check for async PF */
if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR) if (svm->vmcb->control.exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR)
svm->apf_reason = kvm_read_and_reset_pf_reason(); svm->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
if (npt_enabled) { if (npt_enabled) {
vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR); vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);

View File

@ -2422,28 +2422,41 @@ static void skip_emulated_instruction(struct kvm_vcpu *vcpu)
* KVM wants to inject page-faults which it got to the guest. This function * KVM wants to inject page-faults which it got to the guest. This function
* checks whether in a nested guest, we need to inject them to L1 or L2. * checks whether in a nested guest, we need to inject them to L1 or L2.
*/ */
static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned nr) static int nested_vmx_check_exception(struct kvm_vcpu *vcpu)
{ {
struct vmcs12 *vmcs12 = get_vmcs12(vcpu); struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
unsigned int nr = vcpu->arch.exception.nr;
if (!(vmcs12->exception_bitmap & (1u << nr))) if (!((vmcs12->exception_bitmap & (1u << nr)) ||
(nr == PF_VECTOR && vcpu->arch.exception.nested_apf)))
return 0; return 0;
if (vcpu->arch.exception.nested_apf) {
vmcs_write32(VM_EXIT_INTR_ERROR_CODE, vcpu->arch.exception.error_code);
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
vcpu->arch.apf.nested_apf_token);
return 1;
}
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
vmcs_read32(VM_EXIT_INTR_INFO), vmcs_read32(VM_EXIT_INTR_INFO),
vmcs_readl(EXIT_QUALIFICATION)); vmcs_readl(EXIT_QUALIFICATION));
return 1; return 1;
} }
static void vmx_queue_exception(struct kvm_vcpu *vcpu, unsigned nr, static void vmx_queue_exception(struct kvm_vcpu *vcpu)
bool has_error_code, u32 error_code,
bool reinject)
{ {
struct vcpu_vmx *vmx = to_vmx(vcpu); struct vcpu_vmx *vmx = to_vmx(vcpu);
unsigned nr = vcpu->arch.exception.nr;
bool has_error_code = vcpu->arch.exception.has_error_code;
bool reinject = vcpu->arch.exception.reinject;
u32 error_code = vcpu->arch.exception.error_code;
u32 intr_info = nr | INTR_INFO_VALID_MASK; u32 intr_info = nr | INTR_INFO_VALID_MASK;
if (!reinject && is_guest_mode(vcpu) && if (!reinject && is_guest_mode(vcpu) &&
nested_vmx_check_exception(vcpu, nr)) nested_vmx_check_exception(vcpu))
return; return;
if (has_error_code) { if (has_error_code) {
@ -3764,6 +3777,25 @@ static void free_kvm_area(void)
} }
} }
enum vmcs_field_type {
VMCS_FIELD_TYPE_U16 = 0,
VMCS_FIELD_TYPE_U64 = 1,
VMCS_FIELD_TYPE_U32 = 2,
VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
};
static inline int vmcs_field_type(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
return VMCS_FIELD_TYPE_U32;
return (field >> 13) & 0x3 ;
}
static inline int vmcs_field_readonly(unsigned long field)
{
return (((field >> 10) & 0x3) == 1);
}
static void init_vmcs_shadow_fields(void) static void init_vmcs_shadow_fields(void)
{ {
int i, j; int i, j;
@ -3789,14 +3821,22 @@ static void init_vmcs_shadow_fields(void)
/* shadowed fields guest access without vmexit */ /* shadowed fields guest access without vmexit */
for (i = 0; i < max_shadow_read_write_fields; i++) { for (i = 0; i < max_shadow_read_write_fields; i++) {
clear_bit(shadow_read_write_fields[i], unsigned long field = shadow_read_write_fields[i];
vmx_vmwrite_bitmap);
clear_bit(shadow_read_write_fields[i], clear_bit(field, vmx_vmwrite_bitmap);
vmx_vmread_bitmap); clear_bit(field, vmx_vmread_bitmap);
if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64) {
clear_bit(field + 1, vmx_vmwrite_bitmap);
clear_bit(field + 1, vmx_vmread_bitmap);
}
}
for (i = 0; i < max_shadow_read_only_fields; i++) {
unsigned long field = shadow_read_only_fields[i];
clear_bit(field, vmx_vmread_bitmap);
if (vmcs_field_type(field) == VMCS_FIELD_TYPE_U64)
clear_bit(field + 1, vmx_vmread_bitmap);
} }
for (i = 0; i < max_shadow_read_only_fields; i++)
clear_bit(shadow_read_only_fields[i],
vmx_vmread_bitmap);
} }
static __init int alloc_kvm_area(void) static __init int alloc_kvm_area(void)
@ -4634,6 +4674,11 @@ static bool guest_state_valid(struct kvm_vcpu *vcpu)
return true; return true;
} }
static bool page_address_valid(struct kvm_vcpu *vcpu, gpa_t gpa)
{
return PAGE_ALIGNED(gpa) && !(gpa >> cpuid_maxphyaddr(vcpu));
}
static int init_rmode_tss(struct kvm *kvm) static int init_rmode_tss(struct kvm *kvm)
{ {
gfn_t fn; gfn_t fn;
@ -5664,14 +5709,11 @@ static int handle_exception(struct kvm_vcpu *vcpu)
} }
if (is_page_fault(intr_info)) { if (is_page_fault(intr_info)) {
/* EPT won't cause page fault directly */
BUG_ON(enable_ept);
cr2 = vmcs_readl(EXIT_QUALIFICATION); cr2 = vmcs_readl(EXIT_QUALIFICATION);
trace_kvm_page_fault(cr2, error_code); /* EPT won't cause page fault directly */
WARN_ON_ONCE(!vcpu->arch.apf.host_apf_reason && enable_ept);
if (kvm_event_needs_reinjection(vcpu)) return kvm_handle_page_fault(vcpu, error_code, cr2, NULL, 0,
kvm_mmu_unprotect_page_virt(vcpu, cr2); true);
return kvm_mmu_page_fault(vcpu, cr2, error_code, NULL, 0);
} }
ex_no = intr_info & INTR_INFO_VECTOR_MASK; ex_no = intr_info & INTR_INFO_VECTOR_MASK;
@ -7214,25 +7256,6 @@ static int handle_vmresume(struct kvm_vcpu *vcpu)
return nested_vmx_run(vcpu, false); return nested_vmx_run(vcpu, false);
} }
enum vmcs_field_type {
VMCS_FIELD_TYPE_U16 = 0,
VMCS_FIELD_TYPE_U64 = 1,
VMCS_FIELD_TYPE_U32 = 2,
VMCS_FIELD_TYPE_NATURAL_WIDTH = 3
};
static inline int vmcs_field_type(unsigned long field)
{
if (0x1 & field) /* the *_HIGH fields are all 32 bit */
return VMCS_FIELD_TYPE_U32;
return (field >> 13) & 0x3 ;
}
static inline int vmcs_field_readonly(unsigned long field)
{
return (((field >> 10) & 0x3) == 1);
}
/* /*
* Read a vmcs12 field. Since these can have varying lengths and we return * Read a vmcs12 field. Since these can have varying lengths and we return
* one type, we chose the biggest type (u64) and zero-extend the return value * one type, we chose the biggest type (u64) and zero-extend the return value
@ -8014,7 +8037,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
if (is_nmi(intr_info)) if (is_nmi(intr_info))
return false; return false;
else if (is_page_fault(intr_info)) else if (is_page_fault(intr_info))
return enable_ept; return !vmx->vcpu.arch.apf.host_apf_reason && enable_ept;
else if (is_no_device(intr_info) && else if (is_no_device(intr_info) &&
!(vmcs12->guest_cr0 & X86_CR0_TS)) !(vmcs12->guest_cr0 & X86_CR0_TS))
return false; return false;
@ -8418,9 +8441,15 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
exit_reason != EXIT_REASON_TASK_SWITCH)) { exit_reason != EXIT_REASON_TASK_SWITCH)) {
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR; vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV; vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_DELIVERY_EV;
vcpu->run->internal.ndata = 2; vcpu->run->internal.ndata = 3;
vcpu->run->internal.data[0] = vectoring_info; vcpu->run->internal.data[0] = vectoring_info;
vcpu->run->internal.data[1] = exit_reason; vcpu->run->internal.data[1] = exit_reason;
vcpu->run->internal.data[2] = vcpu->arch.exit_qualification;
if (exit_reason == EXIT_REASON_EPT_MISCONFIG) {
vcpu->run->internal.ndata++;
vcpu->run->internal.data[3] =
vmcs_read64(GUEST_PHYSICAL_ADDRESS);
}
return 0; return 0;
} }
@ -8611,17 +8640,24 @@ static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu)
static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx)
{ {
u32 exit_intr_info; u32 exit_intr_info = 0;
u16 basic_exit_reason = (u16)vmx->exit_reason;
if (!(vmx->exit_reason == EXIT_REASON_MCE_DURING_VMENTRY if (!(basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY
|| vmx->exit_reason == EXIT_REASON_EXCEPTION_NMI)) || basic_exit_reason == EXIT_REASON_EXCEPTION_NMI))
return; return;
vmx->exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO); if (!(vmx->exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY))
exit_intr_info = vmx->exit_intr_info; exit_intr_info = vmcs_read32(VM_EXIT_INTR_INFO);
vmx->exit_intr_info = exit_intr_info;
/* if exit due to PF check for async PF */
if (is_page_fault(exit_intr_info))
vmx->vcpu.arch.apf.host_apf_reason = kvm_read_and_reset_pf_reason();
/* Handle machine checks before interrupts are enabled */ /* Handle machine checks before interrupts are enabled */
if (is_machine_check(exit_intr_info)) if (basic_exit_reason == EXIT_REASON_MCE_DURING_VMENTRY ||
is_machine_check(exit_intr_info))
kvm_machine_check(); kvm_machine_check();
/* We need to handle NMIs before interrupts are enabled */ /* We need to handle NMIs before interrupts are enabled */
@ -9589,23 +9625,26 @@ static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu)
ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL); ns_to_ktime(preemption_timeout), HRTIMER_MODE_REL);
} }
static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12)
{
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
return 0;
if (!page_address_valid(vcpu, vmcs12->io_bitmap_a) ||
!page_address_valid(vcpu, vmcs12->io_bitmap_b))
return -EINVAL;
return 0;
}
static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu, static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
struct vmcs12 *vmcs12) struct vmcs12 *vmcs12)
{ {
int maxphyaddr;
u64 addr;
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS)) if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
return 0; return 0;
if (vmcs12_read_any(vcpu, MSR_BITMAP, &addr)) { if (!page_address_valid(vcpu, vmcs12->msr_bitmap))
WARN_ON(1);
return -EINVAL;
}
maxphyaddr = cpuid_maxphyaddr(vcpu);
if (!PAGE_ALIGNED(vmcs12->msr_bitmap) ||
((addr + PAGE_SIZE) >> maxphyaddr))
return -EINVAL; return -EINVAL;
return 0; return 0;
@ -10293,6 +10332,9 @@ static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT)
return VMXERR_ENTRY_INVALID_CONTROL_FIELD; return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_vmx_check_io_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12))
return VMXERR_ENTRY_INVALID_CONTROL_FIELD; return VMXERR_ENTRY_INVALID_CONTROL_FIELD;
@ -10429,8 +10471,6 @@ static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry)
return 1; return 1;
} }
vmcs12->launch_state = 1;
/* /*
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point * Note no nested_vmx_succeed or nested_vmx_fail here. At this point
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
@ -10804,6 +10844,8 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO); vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) { if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
vmcs12->launch_state = 1;
/* vm_entry_intr_info_field is cleared on exit. Emulate this /* vm_entry_intr_info_field is cleared on exit. Emulate this
* instead of reading the real value. */ * instead of reading the real value. */
vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK; vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;

View File

@ -134,8 +134,6 @@ module_param(lapic_timer_advance_ns, uint, S_IRUGO | S_IWUSR);
static bool __read_mostly vector_hashing = true; static bool __read_mostly vector_hashing = true;
module_param(vector_hashing, bool, S_IRUGO); module_param(vector_hashing, bool, S_IRUGO);
static bool __read_mostly backwards_tsc_observed = false;
#define KVM_NR_SHARED_MSRS 16 #define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global { struct kvm_shared_msrs_global {
@ -452,7 +450,12 @@ EXPORT_SYMBOL_GPL(kvm_complete_insn_gp);
void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault) void kvm_inject_page_fault(struct kvm_vcpu *vcpu, struct x86_exception *fault)
{ {
++vcpu->stat.pf_guest; ++vcpu->stat.pf_guest;
vcpu->arch.cr2 = fault->address; vcpu->arch.exception.nested_apf =
is_guest_mode(vcpu) && fault->async_page_fault;
if (vcpu->arch.exception.nested_apf)
vcpu->arch.apf.nested_apf_token = fault->address;
else
vcpu->arch.cr2 = fault->address;
kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code); kvm_queue_exception_e(vcpu, PF_VECTOR, fault->error_code);
} }
EXPORT_SYMBOL_GPL(kvm_inject_page_fault); EXPORT_SYMBOL_GPL(kvm_inject_page_fault);
@ -1719,7 +1722,7 @@ static void pvclock_update_vm_gtod_copy(struct kvm *kvm)
&ka->master_cycle_now); &ka->master_cycle_now);
ka->use_master_clock = host_tsc_clocksource && vcpus_matched ka->use_master_clock = host_tsc_clocksource && vcpus_matched
&& !backwards_tsc_observed && !ka->backwards_tsc_observed
&& !ka->boot_vcpu_runs_old_kvmclock; && !ka->boot_vcpu_runs_old_kvmclock;
if (ka->use_master_clock) if (ka->use_master_clock)
@ -2060,8 +2063,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
{ {
gpa_t gpa = data & ~0x3f; gpa_t gpa = data & ~0x3f;
/* Bits 2:5 are reserved, Should be zero */ /* Bits 3:5 are reserved, Should be zero */
if (data & 0x3c) if (data & 0x38)
return 1; return 1;
vcpu->arch.apf.msr_val = data; vcpu->arch.apf.msr_val = data;
@ -2077,6 +2080,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data)
return 1; return 1;
vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS);
vcpu->arch.apf.delivery_as_pf_vmexit = data & KVM_ASYNC_PF_DELIVERY_AS_PF_VMEXIT;
kvm_async_pf_wakeup_all(vcpu); kvm_async_pf_wakeup_all(vcpu);
return 0; return 0;
} }
@ -2661,6 +2665,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_HYPERV_VAPIC: case KVM_CAP_HYPERV_VAPIC:
case KVM_CAP_HYPERV_SPIN: case KVM_CAP_HYPERV_SPIN:
case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_HYPERV_SYNIC:
case KVM_CAP_HYPERV_SYNIC2:
case KVM_CAP_HYPERV_VP_INDEX:
case KVM_CAP_PCI_SEGMENT: case KVM_CAP_PCI_SEGMENT:
case KVM_CAP_DEBUGREGS: case KVM_CAP_DEBUGREGS:
case KVM_CAP_X86_ROBUST_SINGLESTEP: case KVM_CAP_X86_ROBUST_SINGLESTEP:
@ -3384,10 +3390,14 @@ static int kvm_vcpu_ioctl_enable_cap(struct kvm_vcpu *vcpu,
return -EINVAL; return -EINVAL;
switch (cap->cap) { switch (cap->cap) {
case KVM_CAP_HYPERV_SYNIC2:
if (cap->args[0])
return -EINVAL;
case KVM_CAP_HYPERV_SYNIC: case KVM_CAP_HYPERV_SYNIC:
if (!irqchip_in_kernel(vcpu->kvm)) if (!irqchip_in_kernel(vcpu->kvm))
return -EINVAL; return -EINVAL;
return kvm_hv_activate_synic(vcpu); return kvm_hv_activate_synic(vcpu, cap->cap ==
KVM_CAP_HYPERV_SYNIC2);
default: default:
return -EINVAL; return -EINVAL;
} }
@ -4188,9 +4198,15 @@ long kvm_arch_vm_ioctl(struct file *filp,
goto out; goto out;
r = 0; r = 0;
/*
* TODO: userspace has to take care of races with VCPU_RUN, so
* kvm_gen_update_masterclock() can be cut down to locked
* pvclock_update_vm_gtod_copy().
*/
kvm_gen_update_masterclock(kvm);
now_ns = get_kvmclock_ns(kvm); now_ns = get_kvmclock_ns(kvm);
kvm->arch.kvmclock_offset += user_ns.clock - now_ns; kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
kvm_gen_update_masterclock(kvm); kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
break; break;
} }
case KVM_GET_CLOCK: { case KVM_GET_CLOCK: {
@ -6347,10 +6363,7 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win)
kvm_update_dr7(vcpu); kvm_update_dr7(vcpu);
} }
kvm_x86_ops->queue_exception(vcpu, vcpu->arch.exception.nr, kvm_x86_ops->queue_exception(vcpu);
vcpu->arch.exception.has_error_code,
vcpu->arch.exception.error_code,
vcpu->arch.exception.reinject);
return 0; return 0;
} }
@ -7676,6 +7689,8 @@ void kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu)
struct msr_data msr; struct msr_data msr;
struct kvm *kvm = vcpu->kvm; struct kvm *kvm = vcpu->kvm;
kvm_hv_vcpu_postcreate(vcpu);
if (vcpu_load(vcpu)) if (vcpu_load(vcpu))
return; return;
msr.data = 0x0; msr.data = 0x0;
@ -7829,8 +7844,8 @@ int kvm_arch_hardware_enable(void)
*/ */
if (backwards_tsc) { if (backwards_tsc) {
u64 delta_cyc = max_tsc - local_tsc; u64 delta_cyc = max_tsc - local_tsc;
backwards_tsc_observed = true;
list_for_each_entry(kvm, &vm_list, vm_list) { list_for_each_entry(kvm, &vm_list, vm_list) {
kvm->arch.backwards_tsc_observed = true;
kvm_for_each_vcpu(i, vcpu, kvm) { kvm_for_each_vcpu(i, vcpu, kvm) {
vcpu->arch.tsc_offset_adjustment += delta_cyc; vcpu->arch.tsc_offset_adjustment += delta_cyc;
vcpu->arch.last_host_tsc = local_tsc; vcpu->arch.last_host_tsc = local_tsc;
@ -8576,6 +8591,7 @@ void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
fault.error_code = 0; fault.error_code = 0;
fault.nested_page_fault = false; fault.nested_page_fault = false;
fault.address = work->arch.token; fault.address = work->arch.token;
fault.async_page_fault = true;
kvm_inject_page_fault(vcpu, &fault); kvm_inject_page_fault(vcpu, &fault);
} }
} }
@ -8598,6 +8614,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
fault.error_code = 0; fault.error_code = 0;
fault.nested_page_fault = false; fault.nested_page_fault = false;
fault.address = work->arch.token; fault.address = work->arch.token;
fault.async_page_fault = true;
kvm_inject_page_fault(vcpu, &fault); kvm_inject_page_fault(vcpu, &fault);
} }
vcpu->arch.apf.halted = false; vcpu->arch.apf.halted = false;

View File

@ -234,7 +234,7 @@ struct kvm_vcpu {
int guest_fpu_loaded, guest_xcr0_loaded; int guest_fpu_loaded, guest_xcr0_loaded;
struct swait_queue_head wq; struct swait_queue_head wq;
struct pid *pid; struct pid __rcu *pid;
int sigset_active; int sigset_active;
sigset_t sigset; sigset_t sigset;
struct kvm_vcpu_stat stat; struct kvm_vcpu_stat stat;
@ -390,7 +390,7 @@ struct kvm {
spinlock_t mmu_lock; spinlock_t mmu_lock;
struct mutex slots_lock; struct mutex slots_lock;
struct mm_struct *mm; /* userspace tied to this vm */ struct mm_struct *mm; /* userspace tied to this vm */
struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM];
struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
/* /*
@ -404,7 +404,7 @@ struct kvm {
int last_boosted_vcpu; int last_boosted_vcpu;
struct list_head vm_list; struct list_head vm_list;
struct mutex lock; struct mutex lock;
struct kvm_io_bus *buses[KVM_NR_BUSES]; struct kvm_io_bus __rcu *buses[KVM_NR_BUSES];
#ifdef CONFIG_HAVE_KVM_EVENTFD #ifdef CONFIG_HAVE_KVM_EVENTFD
struct { struct {
spinlock_t lock; spinlock_t lock;
@ -473,6 +473,12 @@ struct kvm {
#define vcpu_err(vcpu, fmt, ...) \ #define vcpu_err(vcpu, fmt, ...) \
kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__)
static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx)
{
return srcu_dereference_check(kvm->buses[idx], &kvm->srcu,
lockdep_is_held(&kvm->slots_lock));
}
static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i)
{ {
/* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case
@ -562,9 +568,8 @@ void kvm_put_kvm(struct kvm *kvm);
static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id) static inline struct kvm_memslots *__kvm_memslots(struct kvm *kvm, int as_id)
{ {
return rcu_dereference_check(kvm->memslots[as_id], return srcu_dereference_check(kvm->memslots[as_id], &kvm->srcu,
srcu_read_lock_held(&kvm->srcu) lockdep_is_held(&kvm->slots_lock));
|| lockdep_is_held(&kvm->slots_lock));
} }
static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm) static inline struct kvm_memslots *kvm_memslots(struct kvm *kvm)

View File

@ -927,6 +927,8 @@ struct kvm_ppc_resize_hpt {
#define KVM_CAP_S390_CMMA_MIGRATION 145 #define KVM_CAP_S390_CMMA_MIGRATION 145
#define KVM_CAP_PPC_FWNMI 146 #define KVM_CAP_PPC_FWNMI 146
#define KVM_CAP_PPC_SMT_POSSIBLE 147 #define KVM_CAP_PPC_SMT_POSSIBLE 147
#define KVM_CAP_HYPERV_SYNIC2 148
#define KVM_CAP_HYPERV_VP_INDEX 149
#ifdef KVM_CAP_IRQ_ROUTING #ifdef KVM_CAP_IRQ_ROUTING
@ -1351,7 +1353,7 @@ struct kvm_s390_ucas_mapping {
/* Available with KVM_CAP_X86_SMM */ /* Available with KVM_CAP_X86_SMM */
#define KVM_SMI _IO(KVMIO, 0xb7) #define KVM_SMI _IO(KVMIO, 0xb7)
/* Available with KVM_CAP_S390_CMMA_MIGRATION */ /* Available with KVM_CAP_S390_CMMA_MIGRATION */
#define KVM_S390_GET_CMMA_BITS _IOW(KVMIO, 0xb8, struct kvm_s390_cmma_log) #define KVM_S390_GET_CMMA_BITS _IOWR(KVMIO, 0xb8, struct kvm_s390_cmma_log)
#define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log) #define KVM_S390_SET_CMMA_BITS _IOW(KVMIO, 0xb9, struct kvm_s390_cmma_log)
#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0)

View File

@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm,
if (ret < 0) if (ret < 0)
goto unlock_fail; goto unlock_fail;
kvm->buses[bus_idx]->ioeventfd_count++; kvm_get_bus(kvm, bus_idx)->ioeventfd_count++;
list_add_tail(&p->list, &kvm->ioeventfds); list_add_tail(&p->list, &kvm->ioeventfds);
mutex_unlock(&kvm->slots_lock); mutex_unlock(&kvm->slots_lock);
@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
{ {
struct _ioeventfd *p, *tmp; struct _ioeventfd *p, *tmp;
struct eventfd_ctx *eventfd; struct eventfd_ctx *eventfd;
struct kvm_io_bus *bus;
int ret = -ENOENT; int ret = -ENOENT;
eventfd = eventfd_ctx_fdget(args->fd); eventfd = eventfd_ctx_fdget(args->fd);
@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx,
continue; continue;
kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev);
if (kvm->buses[bus_idx]) bus = kvm_get_bus(kvm, bus_idx);
kvm->buses[bus_idx]->ioeventfd_count--; if (bus)
bus->ioeventfd_count--;
ioeventfd_release(p); ioeventfd_release(p);
ret = 0; ret = 0;
break; break;

View File

@ -230,7 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm,
} }
mutex_lock(&kvm->irq_lock); mutex_lock(&kvm->irq_lock);
old = kvm->irq_routing; old = rcu_dereference_protected(kvm->irq_routing, 1);
rcu_assign_pointer(kvm->irq_routing, new); rcu_assign_pointer(kvm->irq_routing, new);
kvm_irq_routing_update(kvm); kvm_irq_routing_update(kvm);
kvm_arch_irq_routing_update(kvm); kvm_arch_irq_routing_update(kvm);

View File

@ -130,6 +130,12 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
static bool largepages_enabled = true; static bool largepages_enabled = true;
#define KVM_EVENT_CREATE_VM 0
#define KVM_EVENT_DESTROY_VM 1
static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm);
static unsigned long long kvm_createvm_count;
static unsigned long long kvm_active_vms;
bool kvm_is_reserved_pfn(kvm_pfn_t pfn) bool kvm_is_reserved_pfn(kvm_pfn_t pfn)
{ {
if (pfn_valid(pfn)) if (pfn_valid(pfn))
@ -187,12 +193,23 @@ static void ack_flush(void *_completed)
{ {
} }
static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait)
{
if (unlikely(!cpus))
cpus = cpu_online_mask;
if (cpumask_empty(cpus))
return false;
smp_call_function_many(cpus, ack_flush, NULL, wait);
return true;
}
bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
{ {
int i, cpu, me; int i, cpu, me;
cpumask_var_t cpus; cpumask_var_t cpus;
bool called = true; bool called;
bool wait = req & KVM_REQUEST_WAIT;
struct kvm_vcpu *vcpu; struct kvm_vcpu *vcpu;
zalloc_cpumask_var(&cpus, GFP_ATOMIC); zalloc_cpumask_var(&cpus, GFP_ATOMIC);
@ -207,14 +224,9 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
if (cpus != NULL && cpu != -1 && cpu != me && if (cpus != NULL && cpu != -1 && cpu != me &&
kvm_request_needs_ipi(vcpu, req)) kvm_request_needs_ipi(vcpu, req))
cpumask_set_cpu(cpu, cpus); __cpumask_set_cpu(cpu, cpus);
} }
if (unlikely(cpus == NULL)) called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT));
smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait);
else if (!cpumask_empty(cpus))
smp_call_function_many(cpus, ack_flush, NULL, wait);
else
called = false;
put_cpu(); put_cpu();
free_cpumask_var(cpus); free_cpumask_var(cpus);
return called; return called;
@ -293,7 +305,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init);
void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) void kvm_vcpu_uninit(struct kvm_vcpu *vcpu)
{ {
put_pid(vcpu->pid); /*
* no need for rcu_read_lock as VCPU_RUN is the only place that
* will change the vcpu->pid pointer and on uninit all file
* descriptors are already gone.
*/
put_pid(rcu_dereference_protected(vcpu->pid, 1));
kvm_arch_vcpu_uninit(vcpu); kvm_arch_vcpu_uninit(vcpu);
free_page((unsigned long)vcpu->run); free_page((unsigned long)vcpu->run);
} }
@ -674,8 +691,8 @@ static struct kvm *kvm_create_vm(unsigned long type)
if (init_srcu_struct(&kvm->irq_srcu)) if (init_srcu_struct(&kvm->irq_srcu))
goto out_err_no_irq_srcu; goto out_err_no_irq_srcu;
for (i = 0; i < KVM_NR_BUSES; i++) { for (i = 0; i < KVM_NR_BUSES; i++) {
kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), rcu_assign_pointer(kvm->buses[i],
GFP_KERNEL); kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL));
if (!kvm->buses[i]) if (!kvm->buses[i])
goto out_err; goto out_err;
} }
@ -700,9 +717,10 @@ out_err_no_srcu:
hardware_disable_all(); hardware_disable_all();
out_err_no_disable: out_err_no_disable:
for (i = 0; i < KVM_NR_BUSES; i++) for (i = 0; i < KVM_NR_BUSES; i++)
kfree(kvm->buses[i]); kfree(rcu_access_pointer(kvm->buses[i]));
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, kvm->memslots[i]); kvm_free_memslots(kvm,
rcu_dereference_protected(kvm->memslots[i], 1));
kvm_arch_free_vm(kvm); kvm_arch_free_vm(kvm);
mmdrop(current->mm); mmdrop(current->mm);
return ERR_PTR(r); return ERR_PTR(r);
@ -728,6 +746,7 @@ static void kvm_destroy_vm(struct kvm *kvm)
int i; int i;
struct mm_struct *mm = kvm->mm; struct mm_struct *mm = kvm->mm;
kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm);
kvm_destroy_vm_debugfs(kvm); kvm_destroy_vm_debugfs(kvm);
kvm_arch_sync_events(kvm); kvm_arch_sync_events(kvm);
spin_lock(&kvm_lock); spin_lock(&kvm_lock);
@ -735,8 +754,11 @@ static void kvm_destroy_vm(struct kvm *kvm)
spin_unlock(&kvm_lock); spin_unlock(&kvm_lock);
kvm_free_irq_routing(kvm); kvm_free_irq_routing(kvm);
for (i = 0; i < KVM_NR_BUSES; i++) { for (i = 0; i < KVM_NR_BUSES; i++) {
if (kvm->buses[i]) struct kvm_io_bus *bus;
kvm_io_bus_destroy(kvm->buses[i]);
bus = rcu_dereference_protected(kvm->buses[i], 1);
if (bus)
kvm_io_bus_destroy(bus);
kvm->buses[i] = NULL; kvm->buses[i] = NULL;
} }
kvm_coalesced_mmio_free(kvm); kvm_coalesced_mmio_free(kvm);
@ -748,7 +770,8 @@ static void kvm_destroy_vm(struct kvm *kvm)
kvm_arch_destroy_vm(kvm); kvm_arch_destroy_vm(kvm);
kvm_destroy_devices(kvm); kvm_destroy_devices(kvm);
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
kvm_free_memslots(kvm, kvm->memslots[i]); kvm_free_memslots(kvm,
rcu_dereference_protected(kvm->memslots[i], 1));
cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->irq_srcu);
cleanup_srcu_struct(&kvm->srcu); cleanup_srcu_struct(&kvm->srcu);
kvm_arch_free_vm(kvm); kvm_arch_free_vm(kvm);
@ -2551,13 +2574,14 @@ static long kvm_vcpu_ioctl(struct file *filp,
if (r) if (r)
return r; return r;
switch (ioctl) { switch (ioctl) {
case KVM_RUN: case KVM_RUN: {
struct pid *oldpid;
r = -EINVAL; r = -EINVAL;
if (arg) if (arg)
goto out; goto out;
if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { oldpid = rcu_access_pointer(vcpu->pid);
if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) {
/* The thread running this VCPU changed. */ /* The thread running this VCPU changed. */
struct pid *oldpid = vcpu->pid;
struct pid *newpid = get_task_pid(current, PIDTYPE_PID); struct pid *newpid = get_task_pid(current, PIDTYPE_PID);
rcu_assign_pointer(vcpu->pid, newpid); rcu_assign_pointer(vcpu->pid, newpid);
@ -2568,6 +2592,7 @@ static long kvm_vcpu_ioctl(struct file *filp,
r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run);
trace_kvm_userspace_exit(vcpu->run->exit_reason, r); trace_kvm_userspace_exit(vcpu->run->exit_reason, r);
break; break;
}
case KVM_GET_REGS: { case KVM_GET_REGS: {
struct kvm_regs *kvm_regs; struct kvm_regs *kvm_regs;
@ -3202,6 +3227,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type)
fput(file); fput(file);
return -ENOMEM; return -ENOMEM;
} }
kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm);
fd_install(r, file); fd_install(r, file);
return r; return r;
@ -3563,7 +3589,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr,
{ {
struct kvm_io_bus *new_bus, *bus; struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx]; bus = kvm_get_bus(kvm, bus_idx);
if (!bus) if (!bus)
return -ENOMEM; return -ENOMEM;
@ -3592,7 +3618,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx,
int i; int i;
struct kvm_io_bus *new_bus, *bus; struct kvm_io_bus *new_bus, *bus;
bus = kvm->buses[bus_idx]; bus = kvm_get_bus(kvm, bus_idx);
if (!bus) if (!bus)
return; return;
@ -3854,6 +3880,67 @@ static const struct file_operations *stat_fops[] = {
[KVM_STAT_VM] = &vm_stat_fops, [KVM_STAT_VM] = &vm_stat_fops,
}; };
static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm)
{
struct kobj_uevent_env *env;
char *tmp, *pathbuf = NULL;
unsigned long long created, active;
if (!kvm_dev.this_device || !kvm)
return;
spin_lock(&kvm_lock);
if (type == KVM_EVENT_CREATE_VM) {
kvm_createvm_count++;
kvm_active_vms++;
} else if (type == KVM_EVENT_DESTROY_VM) {
kvm_active_vms--;
}
created = kvm_createvm_count;
active = kvm_active_vms;
spin_unlock(&kvm_lock);
env = kzalloc(sizeof(*env), GFP_KERNEL);
if (!env)
return;
add_uevent_var(env, "CREATED=%llu", created);
add_uevent_var(env, "COUNT=%llu", active);
if (type == KVM_EVENT_CREATE_VM)
add_uevent_var(env, "EVENT=create");
else if (type == KVM_EVENT_DESTROY_VM)
add_uevent_var(env, "EVENT=destroy");
if (kvm->debugfs_dentry) {
char p[ITOA_MAX_LEN];
snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name);
tmp = strchrnul(p + 1, '-');
*tmp = '\0';
add_uevent_var(env, "PID=%s", p);
pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
if (pathbuf) {
/* sizeof counts the final '\0' */
int len = sizeof("STATS_PATH=") - 1;
const char *pvar = "STATS_PATH=";
tmp = dentry_path_raw(kvm->debugfs_dentry,
pathbuf + len,
PATH_MAX - len);
if (!IS_ERR(tmp)) {
memcpy(tmp - len, pvar, len);
env->envp[env->envp_idx++] = tmp - len;
}
}
}
/* no need for checks, since we are adding at most only 5 keys */
env->envp[env->envp_idx++] = NULL;
kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp);
kfree(env);
kfree(pathbuf);
}
static int kvm_init_debug(void) static int kvm_init_debug(void)
{ {
int r = -EEXIST; int r = -EEXIST;