diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h index d213ec5c3766..f0b0c90dd398 100644 --- a/arch/x86/include/uapi/asm/vmx.h +++ b/arch/x86/include/uapi/asm/vmx.h @@ -146,7 +146,6 @@ #define VMX_ABORT_SAVE_GUEST_MSR_FAIL 1 #define VMX_ABORT_LOAD_HOST_PDPTE_FAIL 2 -#define VMX_ABORT_VMCS_CORRUPTED 3 #define VMX_ABORT_LOAD_HOST_MSR_FAIL 4 #endif /* _UAPIVMX_H */ diff --git a/arch/x86/kvm/vmx/nested.c b/arch/x86/kvm/vmx/nested.c index 118d185764ec..d125304ae2c9 100644 --- a/arch/x86/kvm/vmx/nested.c +++ b/arch/x86/kvm/vmx/nested.c @@ -2978,6 +2978,25 @@ int nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) !(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)) vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); + /* + * Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and* + * nested early checks are disabled. In the event of a "late" VM-Fail, + * i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its + * software model to the pre-VMEntry host state. When EPT is disabled, + * GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes + * nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing + * vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to + * the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested + * VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is + * guaranteed to be overwritten with a shadow CR3 prior to re-entering + * L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as + * KVM modifies vcpu->arch.cr3 if and only if the early hardware checks + * pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail + * path would need to manually save/restore vmcs01.GUEST_CR3. + */ + if (!enable_ept && !nested_early_check) + vmcs_writel(GUEST_CR3, vcpu->arch.cr3); + vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02); prepare_vmcs02_early(vmx, vmcs12); @@ -3869,18 +3888,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW)); nested_ept_uninit_mmu_context(vcpu); - - /* - * This is only valid if EPT is in use, otherwise the vmcs01 GUEST_CR3 - * points to shadow pages! Fortunately we only get here after a WARN_ON - * if EPT is disabled, so a VMabort is perfectly fine. - */ - if (enable_ept) { - vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); - __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); - } else { - nested_vmx_abort(vcpu, VMX_ABORT_VMCS_CORRUPTED); - } + vcpu->arch.cr3 = vmcs_readl(GUEST_CR3); + __set_bit(VCPU_EXREG_CR3, (ulong *)&vcpu->arch.regs_avail); /* * Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs @@ -3888,7 +3897,8 @@ static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu) * VMFail, like everything else we just need to ensure our * software model is up-to-date. */ - ept_save_pdptrs(vcpu); + if (enable_ept) + ept_save_pdptrs(vcpu); kvm_mmu_reset_context(vcpu); @@ -5889,14 +5899,6 @@ __init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *)) { int i; - /* - * Without EPT it is not possible to restore L1's CR3 and PDPTR on - * VMfail, because they are not available in vmcs01. Just always - * use hardware checks. - */ - if (!enable_ept) - nested_early_check = 1; - if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) {