1
0
Fork 0

Mostly bugfixes, but also:

- s390 support for KVM selftests
 - LAPIC timer offloading to housekeeping CPUs
 - Extend an s390 optimization for overcommitted hosts to all architectures
 - Debugging cleanups and improvements
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQEcBAABAgAGBQJdMr1FAAoJEL/70l94x66DvIkH/iVuUX9jO1NoQ7qhxeo04MnT
 GP9mX3XnWoI/iN0zAIRfQSP2/9a6+KblgdiziABhju58j5dCfAZGb5793TQppweb
 3ubl11vy7YkzaXJ0b35K7CFhOU9oSlHHGyi5Uh+yyje5qWNxwmHpizxjynbFTKb6
 +/S7O2Ua1VrAVvx0i0IRtwanIK/jF4dStVButgVaVdUva3zLaQmeI71iaJl9ddXY
 bh50xoYua5Ek6+ENi+nwCNVy4OF152AwDbXlxrU0QbeA1B888Qio7nIqb3bwwPpZ
 /8wMVvPzQgL7RmgtY5E5Z4cCYuu7mK8wgGxhuk3oszlVwZJ5rmnaYwGEl4x1s7o=
 =giag
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull more KVM updates from Paolo Bonzini:
 "Mostly bugfixes, but also:

   - s390 support for KVM selftests

   - LAPIC timer offloading to housekeeping CPUs

   - Extend an s390 optimization for overcommitted hosts to all
     architectures

   - Debugging cleanups and improvements"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (25 commits)
  KVM: x86: Add fixed counters to PMU filter
  KVM: nVMX: do not use dangling shadow VMCS after guest reset
  KVM: VMX: dump VMCS on failed entry
  KVM: x86/vPMU: refine kvm_pmu err msg when event creation failed
  KVM: s390: Use kvm_vcpu_wake_up in kvm_s390_vcpu_wakeup
  KVM: Boost vCPUs that are delivering interrupts
  KVM: selftests: Remove superfluous define from vmx.c
  KVM: SVM: Fix detection of AMD Errata 1096
  KVM: LAPIC: Inject timer interrupt via posted interrupt
  KVM: LAPIC: Make lapic timer unpinned
  KVM: x86/vPMU: reset pmc->counter to 0 for pmu fixed_counters
  KVM: nVMX: Ignore segment base for VMX memory operand when segment not FS or GS
  kvm: x86: ioapic and apic debug macros cleanup
  kvm: x86: some tsc debug cleanup
  kvm: vmx: fix coccinelle warnings
  x86: kvm: avoid constant-conversion warning
  x86: kvm: avoid -Wsometimes-uninitized warning
  KVM: x86: expose AVX512_BF16 feature to guest
  KVM: selftests: enable pgste option for the linker on s390
  KVM: selftests: Move kvm_create_max_vcpus test to generic code
  ...
alistair/sunxi64-5.4-dsi
Linus Torvalds 2019-07-20 10:20:27 -07:00
commit 07ab9d5bc5
31 changed files with 723 additions and 232 deletions

View File

@ -4090,17 +4090,22 @@ Parameters: struct kvm_pmu_event_filter (in)
Returns: 0 on success, -1 on error Returns: 0 on success, -1 on error
struct kvm_pmu_event_filter { struct kvm_pmu_event_filter {
__u32 action; __u32 action;
__u32 nevents; __u32 nevents;
__u64 events[0]; __u32 fixed_counter_bitmap;
__u32 flags;
__u32 pad[4];
__u64 events[0];
}; };
This ioctl restricts the set of PMU events that the guest can program. This ioctl restricts the set of PMU events that the guest can program.
The argument holds a list of events which will be allowed or denied. The argument holds a list of events which will be allowed or denied.
The eventsel+umask of each event the guest attempts to program is compared The eventsel+umask of each event the guest attempts to program is compared
against the events field to determine whether the guest should have access. against the events field to determine whether the guest should have access.
This only affects general purpose counters; fixed purpose counters can The events field only controls general purpose counters; fixed purpose
be disabled by changing the perfmon CPUID leaf. counters are controlled by the fixed_counter_bitmap.
No flags are defined yet, the field must be zero.
Valid values for 'action': Valid values for 'action':
#define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_ALLOW 0

View File

@ -8878,6 +8878,8 @@ F: arch/s390/include/asm/gmap.h
F: arch/s390/include/asm/kvm* F: arch/s390/include/asm/kvm*
F: arch/s390/kvm/ F: arch/s390/kvm/
F: arch/s390/mm/gmap.c F: arch/s390/mm/gmap.c
F: tools/testing/selftests/kvm/s390x/
F: tools/testing/selftests/kvm/*/s390x/
KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86) KERNEL VIRTUAL MACHINE FOR X86 (KVM/x86)
M: Paolo Bonzini <pbonzini@redhat.com> M: Paolo Bonzini <pbonzini@redhat.com>

View File

@ -1224,28 +1224,11 @@ no_timer:
void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu) void kvm_s390_vcpu_wakeup(struct kvm_vcpu *vcpu)
{ {
/*
* We cannot move this into the if, as the CPU might be already
* in kvm_vcpu_block without having the waitqueue set (polling)
*/
vcpu->valid_wakeup = true; vcpu->valid_wakeup = true;
kvm_vcpu_wake_up(vcpu);
/* /*
* This is mostly to document, that the read in swait_active could * The VCPU might not be sleeping but rather executing VSIE. Let's
* be moved before other stores, leading to subtle races.
* All current users do not store or use an atomic like update
*/
smp_mb__after_atomic();
if (swait_active(&vcpu->wq)) {
/*
* The vcpu gave up the cpu voluntarily, mark it as a good
* yield-candidate.
*/
vcpu->preempted = true;
swake_up_one(&vcpu->wq);
vcpu->stat.halt_wakeup++;
}
/*
* The VCPU might not be sleeping but is executing the VSIE. Let's
* kick it, so it leaves the SIE to process the request. * kick it, so it leaves the SIE to process the request.
*/ */
kvm_s390_vsie_kick(vcpu); kvm_s390_vsie_kick(vcpu);

View File

@ -435,9 +435,12 @@ struct kvm_nested_state {
/* for KVM_CAP_PMU_EVENT_FILTER */ /* for KVM_CAP_PMU_EVENT_FILTER */
struct kvm_pmu_event_filter { struct kvm_pmu_event_filter {
__u32 action; __u32 action;
__u32 nevents; __u32 nevents;
__u64 events[0]; __u32 fixed_counter_bitmap;
__u32 flags;
__u32 pad[4];
__u64 events[0];
}; };
#define KVM_PMU_EVENT_ALLOW 0 #define KVM_PMU_EVENT_ALLOW 0

View File

@ -368,9 +368,13 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) | F(SPEC_CTRL_SSBD) | F(ARCH_CAPABILITIES) | F(INTEL_STIBP) |
F(MD_CLEAR); F(MD_CLEAR);
/* cpuid 7.1.eax */
const u32 kvm_cpuid_7_1_eax_x86_features =
F(AVX512_BF16);
switch (index) { switch (index) {
case 0: case 0:
entry->eax = 0; entry->eax = min(entry->eax, 1u);
entry->ebx &= kvm_cpuid_7_0_ebx_x86_features; entry->ebx &= kvm_cpuid_7_0_ebx_x86_features;
cpuid_mask(&entry->ebx, CPUID_7_0_EBX); cpuid_mask(&entry->ebx, CPUID_7_0_EBX);
/* TSC_ADJUST is emulated */ /* TSC_ADJUST is emulated */
@ -394,6 +398,12 @@ static inline void do_cpuid_7_mask(struct kvm_cpuid_entry2 *entry, int index)
*/ */
entry->edx |= F(ARCH_CAPABILITIES); entry->edx |= F(ARCH_CAPABILITIES);
break; break;
case 1:
entry->eax &= kvm_cpuid_7_1_eax_x86_features;
entry->ebx = 0;
entry->ecx = 0;
entry->edx = 0;
break;
default: default:
WARN_ON_ONCE(1); WARN_ON_ONCE(1);
entry->eax = 0; entry->eax = 0;

View File

@ -1594,7 +1594,7 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
{ {
u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS; u64 param, ingpa, outgpa, ret = HV_STATUS_SUCCESS;
uint16_t code, rep_idx, rep_cnt; uint16_t code, rep_idx, rep_cnt;
bool fast, longmode, rep; bool fast, rep;
/* /*
* hypercall generates UD from non zero cpl and real mode * hypercall generates UD from non zero cpl and real mode
@ -1605,9 +1605,14 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
return 1; return 1;
} }
longmode = is_64_bit_mode(vcpu); #ifdef CONFIG_X86_64
if (is_64_bit_mode(vcpu)) {
if (!longmode) { param = kvm_rcx_read(vcpu);
ingpa = kvm_rdx_read(vcpu);
outgpa = kvm_r8_read(vcpu);
} else
#endif
{
param = ((u64)kvm_rdx_read(vcpu) << 32) | param = ((u64)kvm_rdx_read(vcpu) << 32) |
(kvm_rax_read(vcpu) & 0xffffffff); (kvm_rax_read(vcpu) & 0xffffffff);
ingpa = ((u64)kvm_rbx_read(vcpu) << 32) | ingpa = ((u64)kvm_rbx_read(vcpu) << 32) |
@ -1615,13 +1620,6 @@ int kvm_hv_hypercall(struct kvm_vcpu *vcpu)
outgpa = ((u64)kvm_rdi_read(vcpu) << 32) | outgpa = ((u64)kvm_rdi_read(vcpu) << 32) |
(kvm_rsi_read(vcpu) & 0xffffffff); (kvm_rsi_read(vcpu) & 0xffffffff);
} }
#ifdef CONFIG_X86_64
else {
param = kvm_rcx_read(vcpu);
ingpa = kvm_rdx_read(vcpu);
outgpa = kvm_r8_read(vcpu);
}
#endif
code = param & 0xffff; code = param & 0xffff;
fast = !!(param & HV_HYPERCALL_FAST_BIT); fast = !!(param & HV_HYPERCALL_FAST_BIT);

View File

@ -45,11 +45,6 @@
#include "lapic.h" #include "lapic.h"
#include "irq.h" #include "irq.h"
#if 0
#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
#else
#define ioapic_debug(fmt, arg...)
#endif
static int ioapic_service(struct kvm_ioapic *vioapic, int irq, static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
bool line_status); bool line_status);
@ -294,7 +289,6 @@ static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
default: default:
index = (ioapic->ioregsel - 0x10) >> 1; index = (ioapic->ioregsel - 0x10) >> 1;
ioapic_debug("change redir index %x val %x\n", index, val);
if (index >= IOAPIC_NUM_PINS) if (index >= IOAPIC_NUM_PINS)
return; return;
e = &ioapic->redirtbl[index]; e = &ioapic->redirtbl[index];
@ -343,12 +337,6 @@ static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
entry->fields.remote_irr)) entry->fields.remote_irr))
return -1; return -1;
ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
"vector=%x trig_mode=%x\n",
entry->fields.dest_id, entry->fields.dest_mode,
entry->fields.delivery_mode, entry->fields.vector,
entry->fields.trig_mode);
irqe.dest_id = entry->fields.dest_id; irqe.dest_id = entry->fields.dest_id;
irqe.vector = entry->fields.vector; irqe.vector = entry->fields.vector;
irqe.dest_mode = entry->fields.dest_mode; irqe.dest_mode = entry->fields.dest_mode;
@ -515,7 +503,6 @@ static int ioapic_mmio_read(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr)) if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP; return -EOPNOTSUPP;
ioapic_debug("addr %lx\n", (unsigned long)addr);
ASSERT(!(addr & 0xf)); /* check alignment */ ASSERT(!(addr & 0xf)); /* check alignment */
addr &= 0xff; addr &= 0xff;
@ -558,8 +545,6 @@ static int ioapic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
if (!ioapic_in_range(ioapic, addr)) if (!ioapic_in_range(ioapic, addr))
return -EOPNOTSUPP; return -EOPNOTSUPP;
ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
(void*)addr, len, val);
ASSERT(!(addr & 0xf)); /* check alignment */ ASSERT(!(addr & 0xf)); /* check alignment */
switch (len) { switch (len) {

View File

@ -52,9 +52,6 @@
#define PRIu64 "u" #define PRIu64 "u"
#define PRIo64 "o" #define PRIo64 "o"
/* #define apic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg) */
#define apic_debug(fmt, arg...) do {} while (0)
/* 14 is the version for Xeon and Pentium 8.4.8*/ /* 14 is the version for Xeon and Pentium 8.4.8*/
#define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16)) #define APIC_VERSION (0x14UL | ((KVM_APIC_LVT_NUM - 1) << 16))
#define LAPIC_MMIO_LENGTH (1 << 12) #define LAPIC_MMIO_LENGTH (1 << 12)
@ -121,6 +118,17 @@ static inline u32 kvm_x2apic_id(struct kvm_lapic *apic)
return apic->vcpu->vcpu_id; return apic->vcpu->vcpu_id;
} }
bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu)
{
return pi_inject_timer && kvm_vcpu_apicv_active(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_can_post_timer_interrupt);
static bool kvm_use_posted_timer_interrupt(struct kvm_vcpu *vcpu)
{
return kvm_can_post_timer_interrupt(vcpu) && vcpu->mode == IN_GUEST_MODE;
}
static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map,
u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) {
switch (map->mode) { switch (map->mode) {
@ -627,7 +635,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
{ {
u8 val; u8 val;
if (pv_eoi_get_user(vcpu, &val) < 0) if (pv_eoi_get_user(vcpu, &val) < 0)
apic_debug("Can't read EOI MSR value: 0x%llx\n", printk(KERN_WARNING "Can't read EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val); (unsigned long long)vcpu->arch.pv_eoi.msr_val);
return val & 0x1; return val & 0x1;
} }
@ -635,7 +643,7 @@ static bool pv_eoi_get_pending(struct kvm_vcpu *vcpu)
static void pv_eoi_set_pending(struct kvm_vcpu *vcpu) static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
{ {
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) { if (pv_eoi_put_user(vcpu, KVM_PV_EOI_ENABLED) < 0) {
apic_debug("Can't set EOI MSR value: 0x%llx\n", printk(KERN_WARNING "Can't set EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val); (unsigned long long)vcpu->arch.pv_eoi.msr_val);
return; return;
} }
@ -645,7 +653,7 @@ static void pv_eoi_set_pending(struct kvm_vcpu *vcpu)
static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu)
{ {
if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) { if (pv_eoi_put_user(vcpu, KVM_PV_EOI_DISABLED) < 0) {
apic_debug("Can't clear EOI MSR value: 0x%llx\n", printk(KERN_WARNING "Can't clear EOI MSR value: 0x%llx\n",
(unsigned long long)vcpu->arch.pv_eoi.msr_val); (unsigned long long)vcpu->arch.pv_eoi.msr_val);
return; return;
} }
@ -679,9 +687,6 @@ static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr)
else else
ppr = isrv & 0xf0; ppr = isrv & 0xf0;
apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x",
apic, ppr, isr, isrv);
*new_ppr = ppr; *new_ppr = ppr;
if (old_ppr != ppr) if (old_ppr != ppr)
kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr);
@ -758,8 +763,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda)
return ((logical_id >> 4) == (mda >> 4)) return ((logical_id >> 4) == (mda >> 4))
&& (logical_id & mda & 0xf) != 0; && (logical_id & mda & 0xf) != 0;
default: default:
apic_debug("Bad DFR vcpu %d: %08x\n",
apic->vcpu->vcpu_id, kvm_lapic_get_reg(apic, APIC_DFR));
return false; return false;
} }
} }
@ -798,10 +801,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
struct kvm_lapic *target = vcpu->arch.apic; struct kvm_lapic *target = vcpu->arch.apic;
u32 mda = kvm_apic_mda(vcpu, dest, source, target); u32 mda = kvm_apic_mda(vcpu, dest, source, target);
apic_debug("target %p, source %p, dest 0x%x, "
"dest_mode 0x%x, short_hand 0x%x\n",
target, source, dest, dest_mode, short_hand);
ASSERT(target); ASSERT(target);
switch (short_hand) { switch (short_hand) {
case APIC_DEST_NOSHORT: case APIC_DEST_NOSHORT:
@ -816,8 +815,6 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
case APIC_DEST_ALLBUT: case APIC_DEST_ALLBUT:
return target != source; return target != source;
default: default:
apic_debug("kvm: apic: Bad dest shorthand value %x\n",
short_hand);
return false; return false;
} }
} }
@ -1095,15 +1092,10 @@ static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode,
smp_wmb(); smp_wmb();
kvm_make_request(KVM_REQ_EVENT, vcpu); kvm_make_request(KVM_REQ_EVENT, vcpu);
kvm_vcpu_kick(vcpu); kvm_vcpu_kick(vcpu);
} else {
apic_debug("Ignoring de-assert INIT to vcpu %d\n",
vcpu->vcpu_id);
} }
break; break;
case APIC_DM_STARTUP: case APIC_DM_STARTUP:
apic_debug("SIPI to vcpu %d vector 0x%02x\n",
vcpu->vcpu_id, vector);
result = 1; result = 1;
apic->sipi_vector = vector; apic->sipi_vector = vector;
/* make sure sipi_vector is visible for the receiver */ /* make sure sipi_vector is visible for the receiver */
@ -1221,14 +1213,6 @@ static void apic_send_ipi(struct kvm_lapic *apic)
trace_kvm_apic_ipi(icr_low, irq.dest_id); trace_kvm_apic_ipi(icr_low, irq.dest_id);
apic_debug("icr_high 0x%x, icr_low 0x%x, "
"short_hand 0x%x, dest 0x%x, trig_mode 0x%x, level 0x%x, "
"dest_mode 0x%x, delivery_mode 0x%x, vector 0x%x, "
"msi_redir_hint 0x%x\n",
icr_high, icr_low, irq.shorthand, irq.dest_id,
irq.trig_mode, irq.level, irq.dest_mode, irq.delivery_mode,
irq.vector, irq.msi_redir_hint);
kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL); kvm_irq_delivery_to_apic(apic->vcpu->kvm, apic, &irq, NULL);
} }
@ -1282,7 +1266,6 @@ static u32 __apic_read(struct kvm_lapic *apic, unsigned int offset)
switch (offset) { switch (offset) {
case APIC_ARBPRI: case APIC_ARBPRI:
apic_debug("Access APIC ARBPRI register which is for P6\n");
break; break;
case APIC_TMCCT: /* Timer CCR */ case APIC_TMCCT: /* Timer CCR */
@ -1349,11 +1332,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len,
if (!apic_x2apic_mode(apic)) if (!apic_x2apic_mode(apic))
valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI); valid_reg_mask |= APIC_REG_MASK(APIC_ARBPRI);
if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset))) { if (offset > 0x3f0 || !(valid_reg_mask & APIC_REG_MASK(offset)))
apic_debug("KVM_APIC_READ: read reserved register %x\n",
offset);
return 1; return 1;
}
result = __apic_read(apic, offset & ~0xf); result = __apic_read(apic, offset & ~0xf);
@ -1411,9 +1391,6 @@ static void update_divide_count(struct kvm_lapic *apic)
tmp1 = tdcr & 0xf; tmp1 = tdcr & 0xf;
tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1; tmp2 = ((tmp1 & 0x3) | ((tmp1 & 0x8) >> 1)) + 1;
apic->divide_count = 0x1 << (tmp2 & 0x7); apic->divide_count = 0x1 << (tmp2 & 0x7);
apic_debug("timer divide count is 0x%x\n",
apic->divide_count);
} }
static void limit_periodic_timer_frequency(struct kvm_lapic *apic) static void limit_periodic_timer_frequency(struct kvm_lapic *apic)
@ -1455,29 +1432,6 @@ static void apic_update_lvtt(struct kvm_lapic *apic)
} }
} }
static void apic_timer_expired(struct kvm_lapic *apic)
{
struct kvm_vcpu *vcpu = apic->vcpu;
struct swait_queue_head *q = &vcpu->wq;
struct kvm_timer *ktimer = &apic->lapic_timer;
if (atomic_read(&apic->lapic_timer.pending))
return;
atomic_inc(&apic->lapic_timer.pending);
kvm_set_pending_timer(vcpu);
/*
* For x86, the atomic_inc() is serialized, thus
* using swait_active() is safe.
*/
if (swait_active(q))
swake_up_one(q);
if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
ktimer->expired_tscdeadline = ktimer->tscdeadline;
}
/* /*
* On APICv, this test will cause a busy wait * On APICv, this test will cause a busy wait
* during a higher-priority task. * during a higher-priority task.
@ -1551,7 +1505,7 @@ static inline void adjust_lapic_timer_advance(struct kvm_vcpu *vcpu,
apic->lapic_timer.timer_advance_ns = timer_advance_ns; apic->lapic_timer.timer_advance_ns = timer_advance_ns;
} }
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu) static void __kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{ {
struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_lapic *apic = vcpu->arch.apic;
u64 guest_tsc, tsc_deadline; u64 guest_tsc, tsc_deadline;
@ -1559,9 +1513,6 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
if (apic->lapic_timer.expired_tscdeadline == 0) if (apic->lapic_timer.expired_tscdeadline == 0)
return; return;
if (!lapic_timer_int_injected(vcpu))
return;
tsc_deadline = apic->lapic_timer.expired_tscdeadline; tsc_deadline = apic->lapic_timer.expired_tscdeadline;
apic->lapic_timer.expired_tscdeadline = 0; apic->lapic_timer.expired_tscdeadline = 0;
guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc()); guest_tsc = kvm_read_l1_tsc(vcpu, rdtsc());
@ -1573,8 +1524,57 @@ void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
if (unlikely(!apic->lapic_timer.timer_advance_adjust_done)) if (unlikely(!apic->lapic_timer.timer_advance_adjust_done))
adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta); adjust_lapic_timer_advance(vcpu, apic->lapic_timer.advance_expire_delta);
} }
void kvm_wait_lapic_expire(struct kvm_vcpu *vcpu)
{
if (lapic_timer_int_injected(vcpu))
__kvm_wait_lapic_expire(vcpu);
}
EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire); EXPORT_SYMBOL_GPL(kvm_wait_lapic_expire);
static void kvm_apic_inject_pending_timer_irqs(struct kvm_lapic *apic)
{
struct kvm_timer *ktimer = &apic->lapic_timer;
kvm_apic_local_deliver(apic, APIC_LVTT);
if (apic_lvtt_tscdeadline(apic))
ktimer->tscdeadline = 0;
if (apic_lvtt_oneshot(apic)) {
ktimer->tscdeadline = 0;
ktimer->target_expiration = 0;
}
}
static void apic_timer_expired(struct kvm_lapic *apic)
{
struct kvm_vcpu *vcpu = apic->vcpu;
struct swait_queue_head *q = &vcpu->wq;
struct kvm_timer *ktimer = &apic->lapic_timer;
if (atomic_read(&apic->lapic_timer.pending))
return;
if (apic_lvtt_tscdeadline(apic) || ktimer->hv_timer_in_use)
ktimer->expired_tscdeadline = ktimer->tscdeadline;
if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
if (apic->lapic_timer.timer_advance_ns)
__kvm_wait_lapic_expire(vcpu);
kvm_apic_inject_pending_timer_irqs(apic);
return;
}
atomic_inc(&apic->lapic_timer.pending);
kvm_set_pending_timer(vcpu);
/*
* For x86, the atomic_inc() is serialized, thus
* using swait_active() is safe.
*/
if (swait_active(q))
swake_up_one(q);
}
static void start_sw_tscdeadline(struct kvm_lapic *apic) static void start_sw_tscdeadline(struct kvm_lapic *apic)
{ {
struct kvm_timer *ktimer = &apic->lapic_timer; struct kvm_timer *ktimer = &apic->lapic_timer;
@ -1601,7 +1601,7 @@ static void start_sw_tscdeadline(struct kvm_lapic *apic)
likely(ns > apic->lapic_timer.timer_advance_ns)) { likely(ns > apic->lapic_timer.timer_advance_ns)) {
expire = ktime_add_ns(now, ns); expire = ktime_add_ns(now, ns);
expire = ktime_sub_ns(expire, ktimer->timer_advance_ns); expire = ktime_sub_ns(expire, ktimer->timer_advance_ns);
hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS_PINNED); hrtimer_start(&ktimer->timer, expire, HRTIMER_MODE_ABS);
} else } else
apic_timer_expired(apic); apic_timer_expired(apic);
@ -1648,16 +1648,6 @@ static bool set_target_expiration(struct kvm_lapic *apic)
limit_periodic_timer_frequency(apic); limit_periodic_timer_frequency(apic);
apic_debug("%s: bus cycle is %" PRId64 "ns, now 0x%016"
PRIx64 ", "
"timer initial count 0x%x, period %lldns, "
"expire @ 0x%016" PRIx64 ".\n", __func__,
APIC_BUS_CYCLE_NS, ktime_to_ns(now),
kvm_lapic_get_reg(apic, APIC_TMICT),
apic->lapic_timer.period,
ktime_to_ns(ktime_add_ns(now,
apic->lapic_timer.period)));
apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) + apic->lapic_timer.tscdeadline = kvm_read_l1_tsc(apic->vcpu, tscl) +
nsec_to_cycles(apic->vcpu, apic->lapic_timer.period); nsec_to_cycles(apic->vcpu, apic->lapic_timer.period);
apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period); apic->lapic_timer.target_expiration = ktime_add_ns(now, apic->lapic_timer.period);
@ -1703,7 +1693,7 @@ static void start_sw_period(struct kvm_lapic *apic)
hrtimer_start(&apic->lapic_timer.timer, hrtimer_start(&apic->lapic_timer.timer,
apic->lapic_timer.target_expiration, apic->lapic_timer.target_expiration,
HRTIMER_MODE_ABS_PINNED); HRTIMER_MODE_ABS);
} }
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu) bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu)
@ -1860,8 +1850,6 @@ static void apic_manage_nmi_watchdog(struct kvm_lapic *apic, u32 lvt0_val)
if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) { if (apic->lvt0_in_nmi_mode != lvt0_in_nmi_mode) {
apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode; apic->lvt0_in_nmi_mode = lvt0_in_nmi_mode;
if (lvt0_in_nmi_mode) { if (lvt0_in_nmi_mode) {
apic_debug("Receive NMI setting on APIC_LVT0 "
"for cpu %d\n", apic->vcpu->vcpu_id);
atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); atomic_inc(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
} else } else
atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode); atomic_dec(&apic->vcpu->kvm->arch.vapics_in_nmi_mode);
@ -1975,8 +1963,6 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
case APIC_TDCR: { case APIC_TDCR: {
uint32_t old_divisor = apic->divide_count; uint32_t old_divisor = apic->divide_count;
if (val & 4)
apic_debug("KVM_WRITE:TDCR %x\n", val);
kvm_lapic_set_reg(apic, APIC_TDCR, val); kvm_lapic_set_reg(apic, APIC_TDCR, val);
update_divide_count(apic); update_divide_count(apic);
if (apic->divide_count != old_divisor && if (apic->divide_count != old_divisor &&
@ -1988,10 +1974,8 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
break; break;
} }
case APIC_ESR: case APIC_ESR:
if (apic_x2apic_mode(apic) && val != 0) { if (apic_x2apic_mode(apic) && val != 0)
apic_debug("KVM_WRITE:ESR not zero %x\n", val);
ret = 1; ret = 1;
}
break; break;
case APIC_SELF_IPI: case APIC_SELF_IPI:
@ -2004,8 +1988,7 @@ int kvm_lapic_reg_write(struct kvm_lapic *apic, u32 reg, u32 val)
ret = 1; ret = 1;
break; break;
} }
if (ret)
apic_debug("Local APIC Write to read-only register %x\n", reg);
return ret; return ret;
} }
EXPORT_SYMBOL_GPL(kvm_lapic_reg_write); EXPORT_SYMBOL_GPL(kvm_lapic_reg_write);
@ -2033,20 +2016,12 @@ static int apic_mmio_write(struct kvm_vcpu *vcpu, struct kvm_io_device *this,
* 32/64/128 bits registers must be accessed thru 32 bits. * 32/64/128 bits registers must be accessed thru 32 bits.
* Refer SDM 8.4.1 * Refer SDM 8.4.1
*/ */
if (len != 4 || (offset & 0xf)) { if (len != 4 || (offset & 0xf))
/* Don't shout loud, $infamous_os would cause only noise. */
apic_debug("apic write: bad size=%d %lx\n", len, (long)address);
return 0; return 0;
}
val = *(u32*)data; val = *(u32*)data;
/* too common printing */ kvm_lapic_reg_write(apic, offset & 0xff0, val);
if (offset != APIC_EOI)
apic_debug("%s: offset 0x%x with length 0x%x, and value is "
"0x%x\n", __func__, offset, len, val);
kvm_lapic_reg_write(apic, offset, val);
return 0; return 0;
} }
@ -2178,11 +2153,6 @@ void kvm_lapic_set_base(struct kvm_vcpu *vcpu, u64 value)
if ((value & MSR_IA32_APICBASE_ENABLE) && if ((value & MSR_IA32_APICBASE_ENABLE) &&
apic->base_address != APIC_DEFAULT_PHYS_BASE) apic->base_address != APIC_DEFAULT_PHYS_BASE)
pr_warn_once("APIC base relocation is unsupported by KVM"); pr_warn_once("APIC base relocation is unsupported by KVM");
/* with FSB delivery interrupt, we can restart APIC functionality */
apic_debug("apic base msr is 0x%016" PRIx64 ", and base address is "
"0x%lx.\n", apic->vcpu->arch.apic_base, apic->base_address);
} }
void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
@ -2193,8 +2163,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
if (!apic) if (!apic)
return; return;
apic_debug("%s\n", __func__);
/* Stop the timer in case it's a reset to an active apic */ /* Stop the timer in case it's a reset to an active apic */
hrtimer_cancel(&apic->lapic_timer.timer); hrtimer_cancel(&apic->lapic_timer.timer);
@ -2247,11 +2215,6 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event)
vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_arb_prio = 0;
vcpu->arch.apic_attention = 0; vcpu->arch.apic_attention = 0;
apic_debug("%s: vcpu=%p, id=0x%x, base_msr="
"0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__,
vcpu, kvm_lapic_get_reg(apic, APIC_ID),
vcpu->arch.apic_base, apic->base_address);
} }
/* /*
@ -2323,7 +2286,6 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
struct kvm_lapic *apic; struct kvm_lapic *apic;
ASSERT(vcpu != NULL); ASSERT(vcpu != NULL);
apic_debug("apic_init %d\n", vcpu->vcpu_id);
apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT); apic = kzalloc(sizeof(*apic), GFP_KERNEL_ACCOUNT);
if (!apic) if (!apic)
@ -2340,7 +2302,7 @@ int kvm_create_lapic(struct kvm_vcpu *vcpu, int timer_advance_ns)
apic->vcpu = vcpu; apic->vcpu = vcpu;
hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC, hrtimer_init(&apic->lapic_timer.timer, CLOCK_MONOTONIC,
HRTIMER_MODE_ABS_PINNED); HRTIMER_MODE_ABS);
apic->lapic_timer.timer.function = apic_timer_fn; apic->lapic_timer.timer.function = apic_timer_fn;
if (timer_advance_ns == -1) { if (timer_advance_ns == -1) {
apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT; apic->lapic_timer.timer_advance_ns = LAPIC_TIMER_ADVANCE_ADJUST_INIT;
@ -2397,13 +2359,7 @@ void kvm_inject_apic_timer_irqs(struct kvm_vcpu *vcpu)
struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_lapic *apic = vcpu->arch.apic;
if (atomic_read(&apic->lapic_timer.pending) > 0) { if (atomic_read(&apic->lapic_timer.pending) > 0) {
kvm_apic_local_deliver(apic, APIC_LVTT); kvm_apic_inject_pending_timer_irqs(apic);
if (apic_lvtt_tscdeadline(apic))
apic->lapic_timer.tscdeadline = 0;
if (apic_lvtt_oneshot(apic)) {
apic->lapic_timer.tscdeadline = 0;
apic->lapic_timer.target_expiration = 0;
}
atomic_set(&apic->lapic_timer.pending, 0); atomic_set(&apic->lapic_timer.pending, 0);
} }
} }
@ -2525,12 +2481,13 @@ void __kvm_migrate_apic_timer(struct kvm_vcpu *vcpu)
{ {
struct hrtimer *timer; struct hrtimer *timer;
if (!lapic_in_kernel(vcpu)) if (!lapic_in_kernel(vcpu) ||
kvm_can_post_timer_interrupt(vcpu))
return; return;
timer = &vcpu->arch.apic->lapic_timer.timer; timer = &vcpu->arch.apic->lapic_timer.timer;
if (hrtimer_cancel(timer)) if (hrtimer_cancel(timer))
hrtimer_start_expires(timer, HRTIMER_MODE_ABS_PINNED); hrtimer_start_expires(timer, HRTIMER_MODE_ABS);
} }
/* /*
@ -2678,11 +2635,8 @@ int kvm_x2apic_msr_read(struct kvm_vcpu *vcpu, u32 msr, u64 *data)
if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic)) if (!lapic_in_kernel(vcpu) || !apic_x2apic_mode(apic))
return 1; return 1;
if (reg == APIC_DFR || reg == APIC_ICR2) { if (reg == APIC_DFR || reg == APIC_ICR2)
apic_debug("KVM_APIC_READ: read x2apic reserved register %x\n",
reg);
return 1; return 1;
}
if (kvm_lapic_reg_read(apic, reg, 4, &low)) if (kvm_lapic_reg_read(apic, reg, 4, &low))
return 1; return 1;
@ -2780,8 +2734,6 @@ void kvm_apic_accept_events(struct kvm_vcpu *vcpu)
/* evaluate pending_events before reading the vector */ /* evaluate pending_events before reading the vector */
smp_rmb(); smp_rmb();
sipi_vector = apic->sipi_vector; sipi_vector = apic->sipi_vector;
apic_debug("vcpu %d received sipi with vector # %x\n",
vcpu->vcpu_id, sipi_vector);
kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector); kvm_vcpu_deliver_sipi_vector(vcpu, sipi_vector);
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
} }

View File

@ -236,6 +236,7 @@ void kvm_lapic_switch_to_hv_timer(struct kvm_vcpu *vcpu);
void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu); bool kvm_lapic_hv_timer_in_use(struct kvm_vcpu *vcpu);
void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu); void kvm_lapic_restart_hv_timer(struct kvm_vcpu *vcpu);
bool kvm_can_post_timer_interrupt(struct kvm_vcpu *vcpu);
static inline enum lapic_mode kvm_apic_mode(u64 apic_base) static inline enum lapic_mode kvm_apic_mode(u64 apic_base)
{ {

View File

@ -4597,11 +4597,11 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu,
*/ */
/* Faults from writes to non-writable pages */ /* Faults from writes to non-writable pages */
u8 wf = (pfec & PFERR_WRITE_MASK) ? ~w : 0; u8 wf = (pfec & PFERR_WRITE_MASK) ? (u8)~w : 0;
/* Faults from user mode accesses to supervisor pages */ /* Faults from user mode accesses to supervisor pages */
u8 uf = (pfec & PFERR_USER_MASK) ? ~u : 0; u8 uf = (pfec & PFERR_USER_MASK) ? (u8)~u : 0;
/* Faults from fetches of non-executable pages*/ /* Faults from fetches of non-executable pages*/
u8 ff = (pfec & PFERR_FETCH_MASK) ? ~x : 0; u8 ff = (pfec & PFERR_FETCH_MASK) ? (u8)~x : 0;
/* Faults from kernel mode fetches of user pages */ /* Faults from kernel mode fetches of user pages */
u8 smepf = 0; u8 smepf = 0;
/* Faults from kernel mode accesses of user pages */ /* Faults from kernel mode accesses of user pages */

View File

@ -19,8 +19,8 @@
#include "lapic.h" #include "lapic.h"
#include "pmu.h" #include "pmu.h"
/* This keeps the total size of the filter under 4k. */ /* This is enough to filter the vast majority of currently defined events. */
#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 63 #define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
/* NOTE: /* NOTE:
* - Each perf counter is defined as "struct kvm_pmc"; * - Each perf counter is defined as "struct kvm_pmc";
@ -131,8 +131,8 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type,
intr ? kvm_perf_overflow_intr : intr ? kvm_perf_overflow_intr :
kvm_perf_overflow, pmc); kvm_perf_overflow, pmc);
if (IS_ERR(event)) { if (IS_ERR(event)) {
printk_once("kvm_pmu: event creation failed %ld\n", pr_debug_ratelimited("kvm_pmu: event creation failed %ld for pmc->idx = %d\n",
PTR_ERR(event)); PTR_ERR(event), pmc->idx);
return; return;
} }
@ -206,12 +206,24 @@ void reprogram_fixed_counter(struct kvm_pmc *pmc, u8 ctrl, int idx)
{ {
unsigned en_field = ctrl & 0x3; unsigned en_field = ctrl & 0x3;
bool pmi = ctrl & 0x8; bool pmi = ctrl & 0x8;
struct kvm_pmu_event_filter *filter;
struct kvm *kvm = pmc->vcpu->kvm;
pmc_stop_counter(pmc); pmc_stop_counter(pmc);
if (!en_field || !pmc_is_enabled(pmc)) if (!en_field || !pmc_is_enabled(pmc))
return; return;
filter = srcu_dereference(kvm->arch.pmu_event_filter, &kvm->srcu);
if (filter) {
if (filter->action == KVM_PMU_EVENT_DENY &&
test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
return;
if (filter->action == KVM_PMU_EVENT_ALLOW &&
!test_bit(idx, (ulong *)&filter->fixed_counter_bitmap))
return;
}
pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE, pmc_reprogram_counter(pmc, PERF_TYPE_HARDWARE,
kvm_x86_ops->pmu_ops->find_fixed_event(idx), kvm_x86_ops->pmu_ops->find_fixed_event(idx),
!(en_field & 0x2), /* exclude user */ !(en_field & 0x2), /* exclude user */
@ -385,6 +397,9 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
tmp.action != KVM_PMU_EVENT_DENY) tmp.action != KVM_PMU_EVENT_DENY)
return -EINVAL; return -EINVAL;
if (tmp.flags != 0)
return -EINVAL;
if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS) if (tmp.nevents > KVM_PMU_EVENT_FILTER_MAX_EVENTS)
return -E2BIG; return -E2BIG;
@ -406,8 +421,8 @@ int kvm_vm_ioctl_set_pmu_event_filter(struct kvm *kvm, void __user *argp)
mutex_unlock(&kvm->lock); mutex_unlock(&kvm->lock);
synchronize_srcu_expedited(&kvm->srcu); synchronize_srcu_expedited(&kvm->srcu);
r = 0; r = 0;
cleanup: cleanup:
kfree(filter); kfree(filter);
return r; return r;
} }

View File

@ -7128,13 +7128,41 @@ static int nested_enable_evmcs(struct kvm_vcpu *vcpu,
static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{ {
bool is_user, smap; unsigned long cr4 = kvm_read_cr4(vcpu);
bool smep = cr4 & X86_CR4_SMEP;
is_user = svm_get_cpl(vcpu) == 3; bool smap = cr4 & X86_CR4_SMAP;
smap = !kvm_read_cr4_bits(vcpu, X86_CR4_SMAP); bool is_user = svm_get_cpl(vcpu) == 3;
/* /*
* Detect and workaround Errata 1096 Fam_17h_00_0Fh * Detect and workaround Errata 1096 Fam_17h_00_0Fh.
*
* Errata:
* When CPU raise #NPF on guest data access and vCPU CR4.SMAP=1, it is
* possible that CPU microcode implementing DecodeAssist will fail
* to read bytes of instruction which caused #NPF. In this case,
* GuestIntrBytes field of the VMCB on a VMEXIT will incorrectly
* return 0 instead of the correct guest instruction bytes.
*
* This happens because CPU microcode reading instruction bytes
* uses a special opcode which attempts to read data using CPL=0
* priviledges. The microcode reads CS:RIP and if it hits a SMAP
* fault, it gives up and returns no instruction bytes.
*
* Detection:
* We reach here in case CPU supports DecodeAssist, raised #NPF and
* returned 0 in GuestIntrBytes field of the VMCB.
* First, errata can only be triggered in case vCPU CR4.SMAP=1.
* Second, if vCPU CR4.SMEP=1, errata could only be triggered
* in case vCPU CPL==3 (Because otherwise guest would have triggered
* a SMEP fault instead of #NPF).
* Otherwise, vCPU CR4.SMEP=0, errata could be triggered by any vCPU CPL.
* As most guests enable SMAP if they have also enabled SMEP, use above
* logic in order to attempt minimize false-positive of detecting errata
* while still preserving all cases semantic correctness.
*
* Workaround:
* To determine what instruction the guest was executing, the hypervisor
* will have to decode the instruction at the instruction pointer.
* *
* In non SEV guest, hypervisor will be able to read the guest * In non SEV guest, hypervisor will be able to read the guest
* memory to decode the instruction pointer when insn_len is zero * memory to decode the instruction pointer when insn_len is zero
@ -7145,11 +7173,11 @@ static bool svm_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
* instruction pointer so we will not able to workaround it. Lets * instruction pointer so we will not able to workaround it. Lets
* print the error and request to kill the guest. * print the error and request to kill the guest.
*/ */
if (is_user && smap) { if (smap && (!smep || is_user)) {
if (!sev_guest(vcpu->kvm)) if (!sev_guest(vcpu->kvm))
return true; return true;
pr_err_ratelimited("KVM: Guest triggered AMD Erratum 1096\n"); pr_err_ratelimited("KVM: SEV Guest triggered AMD Erratum 1096\n");
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
} }

View File

@ -194,6 +194,7 @@ static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
{ {
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS); secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
vmcs_write64(VMCS_LINK_POINTER, -1ull); vmcs_write64(VMCS_LINK_POINTER, -1ull);
vmx->nested.need_vmcs12_to_shadow_sync = false;
} }
static inline void nested_release_evmcs(struct kvm_vcpu *vcpu) static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
@ -1341,6 +1342,9 @@ static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
unsigned long val; unsigned long val;
int i; int i;
if (WARN_ON(!shadow_vmcs))
return;
preempt_disable(); preempt_disable();
vmcs_load(shadow_vmcs); vmcs_load(shadow_vmcs);
@ -1373,6 +1377,9 @@ static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
unsigned long val; unsigned long val;
int i, q; int i, q;
if (WARN_ON(!shadow_vmcs))
return;
vmcs_load(shadow_vmcs); vmcs_load(shadow_vmcs);
for (q = 0; q < ARRAY_SIZE(fields); q++) { for (q = 0; q < ARRAY_SIZE(fields); q++) {
@ -4194,7 +4201,10 @@ int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
* mode, e.g. a 32-bit address size can yield a 64-bit virtual * mode, e.g. a 32-bit address size can yield a 64-bit virtual
* address when using FS/GS with a non-zero base. * address when using FS/GS with a non-zero base.
*/ */
*ret = s.base + off; if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
*ret = s.base + off;
else
*ret = off;
/* Long mode: #GP(0)/#SS(0) if the memory address is in a /* Long mode: #GP(0)/#SS(0) if the memory address is in a
* non-canonical form. This is the only check on the memory * non-canonical form. This is the only check on the memory
@ -4433,7 +4443,6 @@ static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
/* copy to memory all shadowed fields in case /* copy to memory all shadowed fields in case
they were modified */ they were modified */
copy_shadow_to_vmcs12(vmx); copy_shadow_to_vmcs12(vmx);
vmx->nested.need_vmcs12_to_shadow_sync = false;
vmx_disable_shadow_vmcs(vmx); vmx_disable_shadow_vmcs(vmx);
} }
vmx->nested.posted_intr_nv = -1; vmx->nested.posted_intr_nv = -1;

View File

@ -337,17 +337,22 @@ static void intel_pmu_init(struct kvm_vcpu *vcpu)
static void intel_pmu_reset(struct kvm_vcpu *vcpu) static void intel_pmu_reset(struct kvm_vcpu *vcpu)
{ {
struct kvm_pmu *pmu = vcpu_to_pmu(vcpu); struct kvm_pmu *pmu = vcpu_to_pmu(vcpu);
struct kvm_pmc *pmc = NULL;
int i; int i;
for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) { for (i = 0; i < INTEL_PMC_MAX_GENERIC; i++) {
struct kvm_pmc *pmc = &pmu->gp_counters[i]; pmc = &pmu->gp_counters[i];
pmc_stop_counter(pmc); pmc_stop_counter(pmc);
pmc->counter = pmc->eventsel = 0; pmc->counter = pmc->eventsel = 0;
} }
for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) for (i = 0; i < INTEL_PMC_MAX_FIXED; i++) {
pmc_stop_counter(&pmu->fixed_counters[i]); pmc = &pmu->fixed_counters[i];
pmc_stop_counter(pmc);
pmc->counter = 0;
}
pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status = pmu->fixed_ctr_ctrl = pmu->global_ctrl = pmu->global_status =
pmu->global_ovf_ctrl = 0; pmu->global_ovf_ctrl = 0;

View File

@ -5829,6 +5829,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu)
} }
if (unlikely(vmx->fail)) { if (unlikely(vmx->fail)) {
dump_vmcs();
vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY; vcpu->run->exit_reason = KVM_EXIT_FAIL_ENTRY;
vcpu->run->fail_entry.hardware_entry_failure_reason vcpu->run->fail_entry.hardware_entry_failure_reason
= vmcs_read32(VM_INSTRUCTION_ERROR); = vmcs_read32(VM_INSTRUCTION_ERROR);
@ -7064,7 +7065,8 @@ static int vmx_set_hv_timer(struct kvm_vcpu *vcpu, u64 guest_deadline_tsc,
u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles; u64 tscl, guest_tscl, delta_tsc, lapic_timer_advance_cycles;
struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer; struct kvm_timer *ktimer = &vcpu->arch.apic->lapic_timer;
if (kvm_mwait_in_guest(vcpu->kvm)) if (kvm_mwait_in_guest(vcpu->kvm) ||
kvm_can_post_timer_interrupt(vcpu))
return -EOPNOTSUPP; return -EOPNOTSUPP;
vmx = to_vmx(vcpu); vmx = to_vmx(vcpu);
@ -7453,7 +7455,7 @@ static int enable_smi_window(struct kvm_vcpu *vcpu)
static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu) static bool vmx_need_emulation_on_page_fault(struct kvm_vcpu *vcpu)
{ {
return 0; return false;
} }
static __init int hardware_setup(void) static __init int hardware_setup(void)

View File

@ -51,6 +51,7 @@
#include <linux/kvm_irqfd.h> #include <linux/kvm_irqfd.h>
#include <linux/irqbypass.h> #include <linux/irqbypass.h>
#include <linux/sched/stat.h> #include <linux/sched/stat.h>
#include <linux/sched/isolation.h>
#include <linux/mem_encrypt.h> #include <linux/mem_encrypt.h>
#include <trace/events/kvm.h> #include <trace/events/kvm.h>
@ -153,6 +154,9 @@ EXPORT_SYMBOL_GPL(enable_vmware_backdoor);
static bool __read_mostly force_emulation_prefix = false; static bool __read_mostly force_emulation_prefix = false;
module_param(force_emulation_prefix, bool, S_IRUGO); module_param(force_emulation_prefix, bool, S_IRUGO);
int __read_mostly pi_inject_timer = -1;
module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
#define KVM_NR_SHARED_MSRS 16 #define KVM_NR_SHARED_MSRS 16
struct kvm_shared_msrs_global { struct kvm_shared_msrs_global {
@ -1456,12 +1460,8 @@ static void update_pvclock_gtod(struct timekeeper *tk)
void kvm_set_pending_timer(struct kvm_vcpu *vcpu) void kvm_set_pending_timer(struct kvm_vcpu *vcpu)
{ {
/*
* Note: KVM_REQ_PENDING_TIMER is implicitly checked in
* vcpu_enter_guest. This function is only called from
* the physical CPU that is running vcpu.
*/
kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu); kvm_make_request(KVM_REQ_PENDING_TIMER, vcpu);
kvm_vcpu_kick(vcpu);
} }
static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock)
@ -1540,9 +1540,6 @@ static void kvm_get_time_scale(uint64_t scaled_hz, uint64_t base_hz,
*pshift = shift; *pshift = shift;
*pmultiplier = div_frac(scaled64, tps32); *pmultiplier = div_frac(scaled64, tps32);
pr_debug("%s: base_hz %llu => %llu, shift %d, mul %u\n",
__func__, base_hz, scaled_hz, shift, *pmultiplier);
} }
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
@ -1785,12 +1782,10 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) { vcpu->arch.virtual_tsc_khz == kvm->arch.last_tsc_khz) {
if (!kvm_check_tsc_unstable()) { if (!kvm_check_tsc_unstable()) {
offset = kvm->arch.cur_tsc_offset; offset = kvm->arch.cur_tsc_offset;
pr_debug("kvm: matched tsc offset for %llu\n", data);
} else { } else {
u64 delta = nsec_to_cycles(vcpu, elapsed); u64 delta = nsec_to_cycles(vcpu, elapsed);
data += delta; data += delta;
offset = kvm_compute_tsc_offset(vcpu, data); offset = kvm_compute_tsc_offset(vcpu, data);
pr_debug("kvm: adjusted tsc offset by %llu\n", delta);
} }
matched = true; matched = true;
already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation); already_matched = (vcpu->arch.this_tsc_generation == kvm->arch.cur_tsc_generation);
@ -1809,8 +1804,6 @@ void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr)
kvm->arch.cur_tsc_write = data; kvm->arch.cur_tsc_write = data;
kvm->arch.cur_tsc_offset = offset; kvm->arch.cur_tsc_offset = offset;
matched = false; matched = false;
pr_debug("kvm: new tsc generation %llu, clock %llu\n",
kvm->arch.cur_tsc_generation, data);
} }
/* /*
@ -6911,7 +6904,6 @@ static void kvm_timer_init(void)
cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block, cpufreq_register_notifier(&kvmclock_cpufreq_notifier_block,
CPUFREQ_TRANSITION_NOTIFIER); CPUFREQ_TRANSITION_NOTIFIER);
} }
pr_debug("kvm: max_tsc_khz = %ld\n", max_tsc_khz);
cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online", cpuhp_setup_state(CPUHP_AP_X86_KVM_CLK_ONLINE, "x86/kvm/clk:online",
kvmclock_cpu_online, kvmclock_cpu_down_prep); kvmclock_cpu_online, kvmclock_cpu_down_prep);
@ -7070,6 +7062,8 @@ int kvm_arch_init(void *opaque)
host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK); host_xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
kvm_lapic_init(); kvm_lapic_init();
if (pi_inject_timer == -1)
pi_inject_timer = housekeeping_enabled(HK_FLAG_TIMER);
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
pvclock_gtod_register_notifier(&pvclock_gtod_notifier); pvclock_gtod_register_notifier(&pvclock_gtod_notifier);

View File

@ -301,6 +301,8 @@ extern unsigned int min_timer_period_us;
extern bool enable_vmware_backdoor; extern bool enable_vmware_backdoor;
extern int pi_inject_timer;
extern struct static_key kvm_no_apic_vcpu; extern struct static_key kvm_no_apic_vcpu;
static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec) static inline u64 nsec_to_cycles(struct kvm_vcpu *vcpu, u64 nsec)

View File

@ -318,6 +318,7 @@ struct kvm_vcpu {
} spin_loop; } spin_loop;
#endif #endif
bool preempted; bool preempted;
bool ready;
struct kvm_vcpu_arch arch; struct kvm_vcpu_arch arch;
struct dentry *debugfs_dentry; struct dentry *debugfs_dentry;
}; };

View File

@ -19,6 +19,7 @@ enum hk_flags {
DECLARE_STATIC_KEY_FALSE(housekeeping_overridden); DECLARE_STATIC_KEY_FALSE(housekeeping_overridden);
extern int housekeeping_any_cpu(enum hk_flags flags); extern int housekeeping_any_cpu(enum hk_flags flags);
extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags);
extern bool housekeeping_enabled(enum hk_flags flags);
extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags);
extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags);
extern void __init housekeeping_init(void); extern void __init housekeeping_init(void);
@ -35,6 +36,11 @@ static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags)
return cpu_possible_mask; return cpu_possible_mask;
} }
static inline bool housekeeping_enabled(enum hk_flags flags)
{
return false;
}
static inline void housekeeping_affine(struct task_struct *t, static inline void housekeeping_affine(struct task_struct *t,
enum hk_flags flags) { } enum hk_flags flags) { }
static inline void housekeeping_init(void) { } static inline void housekeeping_init(void) { }

View File

@ -14,6 +14,12 @@ EXPORT_SYMBOL_GPL(housekeeping_overridden);
static cpumask_var_t housekeeping_mask; static cpumask_var_t housekeeping_mask;
static unsigned int housekeeping_flags; static unsigned int housekeeping_flags;
bool housekeeping_enabled(enum hk_flags flags)
{
return !!(housekeeping_flags & flags);
}
EXPORT_SYMBOL_GPL(housekeeping_enabled);
int housekeeping_any_cpu(enum hk_flags flags) int housekeeping_any_cpu(enum hk_flags flags)
{ {
if (static_branch_unlikely(&housekeeping_overridden)) if (static_branch_unlikely(&housekeeping_overridden))

View File

@ -10,11 +10,11 @@ UNAME_M := $(shell uname -m)
LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/ucall.c lib/sparsebit.c
LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c
LIBKVM_aarch64 = lib/aarch64/processor.c LIBKVM_aarch64 = lib/aarch64/processor.c
LIBKVM_s390x = lib/s390x/processor.c
TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
TEST_GEN_PROGS_x86_64 += x86_64/kvm_create_max_vcpus
TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
@ -26,9 +26,14 @@ TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
TEST_GEN_PROGS_x86_64 += clear_dirty_log_test TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
TEST_GEN_PROGS_x86_64 += dirty_log_test TEST_GEN_PROGS_x86_64 += dirty_log_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
TEST_GEN_PROGS_aarch64 += clear_dirty_log_test TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
TEST_GEN_PROGS_aarch64 += dirty_log_test TEST_GEN_PROGS_aarch64 += dirty_log_test
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M)) TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
LIBKVM += $(LIBKVM_$(UNAME_M)) LIBKVM += $(LIBKVM_$(UNAME_M))
@ -43,7 +48,12 @@ CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \ no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie) $(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
LDFLAGS += -pthread $(no-pie-option) # On s390, build the testcases KVM-enabled
pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
# After inclusion, $(OUTPUT) is defined and # After inclusion, $(OUTPUT) is defined and
# $(TEST_GEN_PROGS) starts with $(OUTPUT)/ # $(TEST_GEN_PROGS) starts with $(OUTPUT)/

View File

@ -41,6 +41,12 @@ enum vm_guest_mode {
NUM_VM_MODES, NUM_VM_MODES,
}; };
#ifdef __aarch64__
#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
#else
#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
#endif
#define vm_guest_mode_string(m) vm_guest_mode_string[m] #define vm_guest_mode_string(m) vm_guest_mode_string[m]
extern const char * const vm_guest_mode_string[]; extern const char * const vm_guest_mode_string[];
@ -111,10 +117,12 @@ void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_sregs *sregs); struct kvm_sregs *sregs);
int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_sregs *sregs); struct kvm_sregs *sregs);
#ifdef __KVM_HAVE_VCPU_EVENTS
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events); struct kvm_vcpu_events *events);
void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid, void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events); struct kvm_vcpu_events *events);
#endif
#ifdef __x86_64__ #ifdef __x86_64__
void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_nested_state *state); struct kvm_nested_state *state);

View File

@ -0,0 +1,22 @@
/* SPDX-License-Identifier: GPL-2.0-only */
/*
* s390x processor specific defines
*/
#ifndef SELFTEST_KVM_PROCESSOR_H
#define SELFTEST_KVM_PROCESSOR_H
/* Bits in the region/segment table entry */
#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */
#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */
#define REGION_ENTRY_NOEXEC 0x100 /* region no-execute bit */
#define REGION_ENTRY_OFFSET 0xc0 /* region table offset */
#define REGION_ENTRY_INVALID 0x20 /* invalid region table entry */
#define REGION_ENTRY_TYPE 0x0c /* region/segment table type mask */
#define REGION_ENTRY_LENGTH 0x03 /* region third length */
/* Bits in the page table entry */
#define PAGE_INVALID 0x400 /* HW invalid bit */
#define PAGE_PROTECT 0x200 /* HW read-only bit */
#define PAGE_NOEXEC 0x100 /* HW no-execute bit */
#endif

View File

@ -27,7 +27,7 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
printf("Testing creating %d vCPUs, with IDs %d...%d.\n", printf("Testing creating %d vCPUs, with IDs %d...%d.\n",
num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1); num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
vm = vm_create(VM_MODE_P52V48_4K, DEFAULT_GUEST_PHY_PAGES, O_RDWR); vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
for (i = 0; i < num_vcpus; i++) { for (i = 0; i < num_vcpus; i++) {
int vcpu_id = first_vcpu_id + i; int vcpu_id = first_vcpu_id + i;

View File

@ -227,7 +227,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2; uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
struct kvm_vm *vm; struct kvm_vm *vm;
vm = vm_create(VM_MODE_P40V48_4K, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR); vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0); kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
vm_vcpu_add_default(vm, vcpuid, guest_code); vm_vcpu_add_default(vm, vcpuid, guest_code);

View File

@ -556,6 +556,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
int ret; int ret;
struct userspace_mem_region *region; struct userspace_mem_region *region;
size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size; size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
size_t alignment;
TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical " TEST_ASSERT((guest_paddr % vm->page_size) == 0, "Guest physical "
"address not on a page boundary.\n" "address not on a page boundary.\n"
@ -605,9 +606,20 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
TEST_ASSERT(region != NULL, "Insufficient Memory"); TEST_ASSERT(region != NULL, "Insufficient Memory");
region->mmap_size = npages * vm->page_size; region->mmap_size = npages * vm->page_size;
/* Enough memory to align up to a huge page. */ #ifdef __s390x__
/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
alignment = 0x100000;
#else
alignment = 1;
#endif
if (src_type == VM_MEM_SRC_ANONYMOUS_THP) if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
region->mmap_size += huge_page_size; alignment = max(huge_page_size, alignment);
/* Add enough memory to align up if necessary */
if (alignment > 1)
region->mmap_size += alignment;
region->mmap_start = mmap(NULL, region->mmap_size, region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS MAP_PRIVATE | MAP_ANONYMOUS
@ -617,9 +629,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
"test_malloc failed, mmap_start: %p errno: %i", "test_malloc failed, mmap_start: %p errno: %i",
region->mmap_start, errno); region->mmap_start, errno);
/* Align THP allocation up to start of a huge page. */ /* Align host address */
region->host_mem = align(region->mmap_start, region->host_mem = align(region->mmap_start, alignment);
src_type == VM_MEM_SRC_ANONYMOUS_THP ? huge_page_size : 1);
/* As needed perform madvise */ /* As needed perform madvise */
if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) { if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
@ -1218,6 +1229,7 @@ void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
ret, errno); ret, errno);
} }
#ifdef __KVM_HAVE_VCPU_EVENTS
void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid, void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
struct kvm_vcpu_events *events) struct kvm_vcpu_events *events)
{ {
@ -1243,6 +1255,7 @@ void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i", TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
ret, errno); ret, errno);
} }
#endif
#ifdef __x86_64__ #ifdef __x86_64__
void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid, void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,

View File

@ -0,0 +1,278 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* KVM selftest s390x library code - CPU-related functions (page tables...)
*
* Copyright (C) 2019, Red Hat, Inc.
*/
#define _GNU_SOURCE /* for program_invocation_name */
#include "processor.h"
#include "kvm_util.h"
#include "../kvm_util_internal.h"
#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define PAGES_PER_REGION 4
void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
{
vm_paddr_t paddr;
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
vm->page_size);
if (vm->pgd_created)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
vm->pgd = paddr;
vm->pgd_created = true;
}
/*
* Allocate 4 pages for a region/segment table (ri < 4), or one page for
* a page table (ri == 4). Returns a suitable region/segment table entry
* which points to the freshly allocated pages.
*/
static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
{
uint64_t taddr;
taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
return (taddr & REGION_ENTRY_ORIGIN)
| (((4 - ri) << 2) & REGION_ENTRY_TYPE)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
/*
* VM Virtual Page Map
*
* Input Args:
* vm - Virtual Machine
* gva - VM Virtual Address
* gpa - VM Physical Address
* memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
* Return: None
*
* Within the VM given by vm, creates a virtual translation for the page
* starting at vaddr to the page starting at paddr.
*/
void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
uint32_t memslot)
{
int ri, idx;
uint64_t *entry;
TEST_ASSERT((gva % vm->page_size) == 0,
"Virtual address not on page boundary,\n"
" vaddr: 0x%lx vm->page_size: 0x%x",
gva, vm->page_size);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(gva >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
gva);
TEST_ASSERT((gpa % vm->page_size) == 0,
"Physical address not on page boundary,\n"
" paddr: 0x%lx vm->page_size: 0x%x",
gva, vm->page_size);
TEST_ASSERT((gpa >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
gva, vm->max_gfn, vm->page_size);
/* Walk through region and segment tables */
entry = addr_gpa2hva(vm, vm->pgd);
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
entry[idx] = virt_alloc_region(vm, ri, memslot);
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
/* Fill in page table entry */
idx = (gva >> 12) & 0x0ffu; /* page index */
if (!(entry[idx] & PAGE_INVALID))
fprintf(stderr,
"WARNING: PTE for gpa=0x%"PRIx64" already set!\n", gpa);
entry[idx] = gpa;
}
/*
* Address Guest Virtual to Guest Physical
*
* Input Args:
* vm - Virtual Machine
* gpa - VM virtual address
*
* Output Args: None
*
* Return:
* Equivalent VM physical address
*
* Translates the VM virtual address given by gva to a VM physical
* address and then locates the memory region containing the VM
* physical address, within the VM given by vm. When found, the host
* virtual address providing the memory to the vm physical address is
* returned.
* A TEST_ASSERT failure occurs if no region containing translated
* VM virtual address exists.
*/
vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
int ri, idx;
uint64_t *entry;
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
vm->page_size);
entry = addr_gpa2hva(vm, vm->pgd);
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
TEST_ASSERT(!(entry[idx] & REGION_ENTRY_INVALID),
"No region mapping for vm virtual address 0x%lx",
gva);
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
idx = (gva >> 12) & 0x0ffu; /* page index */
TEST_ASSERT(!(entry[idx] & PAGE_INVALID),
"No page mapping for vm virtual address 0x%lx", gva);
return (entry[idx] & ~0xffful) + (gva & 0xffful);
}
static void virt_dump_ptes(FILE *stream, struct kvm_vm *vm, uint8_t indent,
uint64_t ptea_start)
{
uint64_t *pte, ptea;
for (ptea = ptea_start; ptea < ptea_start + 0x100 * 8; ptea += 8) {
pte = addr_gpa2hva(vm, ptea);
if (*pte & PAGE_INVALID)
continue;
fprintf(stream, "%*spte @ 0x%lx: 0x%016lx\n",
indent, "", ptea, *pte);
}
}
static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
uint64_t reg_tab_addr)
{
uint64_t addr, *entry;
for (addr = reg_tab_addr; addr < reg_tab_addr + 0x400 * 8; addr += 8) {
entry = addr_gpa2hva(vm, addr);
if (*entry & REGION_ENTRY_INVALID)
continue;
fprintf(stream, "%*srt%lde @ 0x%lx: 0x%016lx\n",
indent, "", 4 - ((*entry & REGION_ENTRY_TYPE) >> 2),
addr, *entry);
if (*entry & REGION_ENTRY_TYPE) {
virt_dump_region(stream, vm, indent + 2,
*entry & REGION_ENTRY_ORIGIN);
} else {
virt_dump_ptes(stream, vm, indent + 2,
*entry & REGION_ENTRY_ORIGIN);
}
}
}
void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
if (!vm->pgd_created)
return;
virt_dump_region(stream, vm, indent, vm->pgd);
}
/*
* Create a VM with reasonable defaults
*
* Input Args:
* vcpuid - The id of the single VCPU to add to the VM.
* extra_mem_pages - The size of extra memories to add (this will
* decide how much extra space we will need to
* setup the page tables using mem slot 0)
* guest_code - The vCPU's entry point
*
* Output Args: None
*
* Return:
* Pointer to opaque structure that describes the created VM.
*/
struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
void *guest_code)
{
/*
* The additional amount of pages required for the page tables is:
* 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
* which is definitely smaller than (n / 256) * 2.
*/
uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
struct kvm_vm *vm;
vm = vm_create(VM_MODE_DEFAULT,
DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
vm_vcpu_add_default(vm, vcpuid, guest_code);
return vm;
}
/*
* Adds a vCPU with reasonable defaults (i.e. a stack and initial PSW)
*
* Input Args:
* vcpuid - The id of the VCPU to add to the VM.
* guest_code - The vCPU's entry point
*/
void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
{
size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
uint64_t stack_vaddr;
struct kvm_regs regs;
struct kvm_sregs sregs;
struct kvm_run *run;
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
vm->page_size);
stack_vaddr = vm_vaddr_alloc(vm, stack_size,
DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
vm_vcpu_add(vm, vcpuid);
/* Setup guest registers */
vcpu_regs_get(vm, vcpuid, &regs);
regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
vcpu_regs_set(vm, vcpuid, &regs);
vcpu_sregs_get(vm, vcpuid, &sregs);
sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
vcpu_sregs_set(vm, vcpuid, &sregs);
run = vcpu_state(vm, vcpuid);
run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
run->psw_addr = (uintptr_t)guest_code;
}
void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
{
struct vcpu *vcpu = vm->vcpu_head;
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
}

View File

@ -821,7 +821,7 @@ struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
uint64_t extra_pg_pages = extra_mem_pages / 512 * 2; uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
/* Create VM */ /* Create VM */
vm = vm_create(VM_MODE_P52V48_4K, vm = vm_create(VM_MODE_DEFAULT,
DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
O_RDWR); O_RDWR);

View File

@ -5,8 +5,6 @@
* Copyright (C) 2018, Google LLC. * Copyright (C) 2018, Google LLC.
*/ */
#define _GNU_SOURCE /* for program_invocation_name */
#include "test_util.h" #include "test_util.h"
#include "kvm_util.h" #include "kvm_util.h"
#include "processor.h" #include "processor.h"

View File

@ -0,0 +1,151 @@
// SPDX-License-Identifier: GPL-2.0-only
/*
* Test for s390x KVM_CAP_SYNC_REGS
*
* Based on the same test for x86:
* Copyright (C) 2018, Google LLC.
*
* Adaptions for s390x:
* Copyright (C) 2019, Red Hat, Inc.
*
* Test expected behavior of the KVM_CAP_SYNC_REGS functionality.
*/
#define _GNU_SOURCE /* for program_invocation_short_name */
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include "test_util.h"
#include "kvm_util.h"
#define VCPU_ID 5
static void guest_code(void)
{
for (;;) {
asm volatile ("diag 0,0,0x501");
asm volatile ("ahi 11,1");
}
}
#define REG_COMPARE(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
" values did not match: 0x%llx, 0x%llx\n", \
left->reg, right->reg)
static void compare_regs(struct kvm_regs *left, struct kvm_sync_regs *right)
{
int i;
for (i = 0; i < 16; i++)
REG_COMPARE(gprs[i]);
}
static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
{
int i;
for (i = 0; i < 16; i++)
REG_COMPARE(acrs[i]);
for (i = 0; i < 16; i++)
REG_COMPARE(crs[i]);
}
#undef REG_COMPARE
#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS)
#define INVALID_SYNC_FIELD 0x80000000
int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_run *run;
struct kvm_regs regs;
struct kvm_sregs sregs;
int rv, cap;
/* Tell stdout not to buffer its content */
setbuf(stdout, NULL);
cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
if (!cap) {
fprintf(stderr, "CAP_SYNC_REGS not supported, skipping test\n");
exit(KSFT_SKIP);
}
/* Create VM */
vm = vm_create_default(VCPU_ID, 0, guest_code);
run = vcpu_state(vm, VCPU_ID);
/* Request and verify all valid register sets. */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
rv = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
"Unexpected exit reason: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
(run->s390_sieic.ipa >> 8) == 0x83 &&
(run->s390_sieic.ipb >> 16) == 0x501,
"Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n",
run->s390_sieic.icptcode, run->s390_sieic.ipa,
run->s390_sieic.ipb);
vcpu_regs_get(vm, VCPU_ID, &regs);
compare_regs(&regs, &run->s.regs);
vcpu_sregs_get(vm, VCPU_ID, &sregs);
compare_sregs(&sregs, &run->s.regs);
/* Set and verify various register values */
run->s.regs.gprs[11] = 0xBAD1DEA;
run->s.regs.acrs[0] = 1 << 11;
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
rv = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
"Unexpected exit reason: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
"r11 sync regs value incorrect 0x%llx.",
run->s.regs.gprs[11]);
TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11,
"acr0 sync regs value incorrect 0x%llx.",
run->s.regs.acrs[0]);
vcpu_regs_get(vm, VCPU_ID, &regs);
compare_regs(&regs, &run->s.regs);
vcpu_sregs_get(vm, VCPU_ID, &sregs);
compare_sregs(&sregs, &run->s.regs);
/* Clear kvm_dirty_regs bits, verify new s.regs values are
* overwritten with existing guest values.
*/
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = 0;
run->s.regs.gprs[11] = 0xDEADBEEF;
rv = _vcpu_run(vm, VCPU_ID);
TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
"Unexpected exit reason: %u (%s)\n",
run->exit_reason,
exit_reason_str(run->exit_reason));
TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
"r11 sync regs value incorrect 0x%llx.",
run->s.regs.gprs[11]);
kvm_vm_free(vm);
return 0;
}

View File

@ -314,6 +314,7 @@ int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
kvm_vcpu_set_in_spin_loop(vcpu, false); kvm_vcpu_set_in_spin_loop(vcpu, false);
kvm_vcpu_set_dy_eligible(vcpu, false); kvm_vcpu_set_dy_eligible(vcpu, false);
vcpu->preempted = false; vcpu->preempted = false;
vcpu->ready = false;
r = kvm_arch_vcpu_init(vcpu); r = kvm_arch_vcpu_init(vcpu);
if (r < 0) if (r < 0)
@ -2387,6 +2388,7 @@ bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu)
wqp = kvm_arch_vcpu_wq(vcpu); wqp = kvm_arch_vcpu_wq(vcpu);
if (swq_has_sleeper(wqp)) { if (swq_has_sleeper(wqp)) {
swake_up_one(wqp); swake_up_one(wqp);
WRITE_ONCE(vcpu->ready, true);
++vcpu->stat.halt_wakeup; ++vcpu->stat.halt_wakeup;
return true; return true;
} }
@ -2500,7 +2502,7 @@ void kvm_vcpu_on_spin(struct kvm_vcpu *me, bool yield_to_kernel_mode)
continue; continue;
} else if (pass && i > last_boosted_vcpu) } else if (pass && i > last_boosted_vcpu)
break; break;
if (!READ_ONCE(vcpu->preempted)) if (!READ_ONCE(vcpu->ready))
continue; continue;
if (vcpu == me) if (vcpu == me)
continue; continue;
@ -4203,8 +4205,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
{ {
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
if (vcpu->preempted) vcpu->preempted = false;
vcpu->preempted = false; WRITE_ONCE(vcpu->ready, false);
kvm_arch_sched_in(vcpu, cpu); kvm_arch_sched_in(vcpu, cpu);
@ -4216,8 +4218,10 @@ static void kvm_sched_out(struct preempt_notifier *pn,
{ {
struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn); struct kvm_vcpu *vcpu = preempt_notifier_to_vcpu(pn);
if (current->state == TASK_RUNNING) if (current->state == TASK_RUNNING) {
vcpu->preempted = true; vcpu->preempted = true;
WRITE_ONCE(vcpu->ready, true);
}
kvm_arch_vcpu_put(vcpu); kvm_arch_vcpu_put(vcpu);
} }