diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 74ef58c8ff53..7dbb8d622683 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -343,9 +343,10 @@ struct kvm_mmu { void (*update_pte)(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *spte, const void *pte); hpa_t root_hpa; - int root_level; - int shadow_root_level; union kvm_mmu_page_role base_role; + u8 root_level; + u8 shadow_root_level; + u8 ept_ad; bool direct_map; /* diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index cc54b7026567..dffe8d68fb27 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -516,12 +516,14 @@ struct vmx_msr_entry { #define EPT_VIOLATION_READABLE_BIT 3 #define EPT_VIOLATION_WRITABLE_BIT 4 #define EPT_VIOLATION_EXECUTABLE_BIT 5 +#define EPT_VIOLATION_GVA_TRANSLATED_BIT 8 #define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) #define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) #define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) +#define EPT_VIOLATION_GVA_TRANSLATED (1 << EPT_VIOLATION_GVA_TRANSLATED_BIT) /* * VM-instruction error numbers diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index ac7810513d0e..558676538fca 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4340,7 +4340,8 @@ void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_init_shadow_mmu); -void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) +void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, + bool accessed_dirty) { struct kvm_mmu *context = &vcpu->arch.mmu; @@ -4349,6 +4350,7 @@ void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly) context->shadow_root_level = kvm_x86_ops->get_tdp_level(); context->nx = true; + context->ept_ad = accessed_dirty; context->page_fault = ept_page_fault; context->gva_to_gpa = ept_gva_to_gpa; context->sync_page = ept_sync_page; diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h index ddc56e91f2e4..d8ccb32f7308 100644 --- a/arch/x86/kvm/mmu.h +++ b/arch/x86/kvm/mmu.h @@ -74,7 +74,8 @@ enum { int handle_mmio_page_fault(struct kvm_vcpu *vcpu, u64 addr, bool direct); void kvm_init_shadow_mmu(struct kvm_vcpu *vcpu); -void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly); +void kvm_init_shadow_ept_mmu(struct kvm_vcpu *vcpu, bool execonly, + bool accessed_dirty); static inline unsigned int kvm_mmu_available_pages(struct kvm *kvm) { diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h index 3e20f7b33892..314d2071b337 100644 --- a/arch/x86/kvm/paging_tmpl.h +++ b/arch/x86/kvm/paging_tmpl.h @@ -23,13 +23,6 @@ * so the code in this file is compiled twice, once per pte size. */ -/* - * This is used to catch non optimized PT_GUEST_(DIRTY|ACCESS)_SHIFT macro - * uses for EPT without A/D paging type. - */ -extern u64 __pure __using_nonexistent_pte_bit(void) - __compiletime_error("wrong use of PT_GUEST_(DIRTY|ACCESS)_SHIFT"); - #if PTTYPE == 64 #define pt_element_t u64 #define guest_walker guest_walker64 @@ -39,8 +32,6 @@ extern u64 __pure __using_nonexistent_pte_bit(void) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_BITS PT64_LEVEL_BITS - #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK - #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true @@ -61,8 +52,6 @@ extern u64 __pure __using_nonexistent_pte_bit(void) #define PT_INDEX(addr, level) PT32_INDEX(addr, level) #define PT_LEVEL_BITS PT32_LEVEL_BITS #define PT_MAX_FULL_LEVELS 2 - #define PT_GUEST_ACCESSED_MASK PT_ACCESSED_MASK - #define PT_GUEST_DIRTY_MASK PT_DIRTY_MASK #define PT_GUEST_DIRTY_SHIFT PT_DIRTY_SHIFT #define PT_GUEST_ACCESSED_SHIFT PT_ACCESSED_SHIFT #define PT_HAVE_ACCESSED_DIRTY(mmu) true @@ -76,17 +65,18 @@ extern u64 __pure __using_nonexistent_pte_bit(void) #define PT_LVL_OFFSET_MASK(lvl) PT64_LVL_OFFSET_MASK(lvl) #define PT_INDEX(addr, level) PT64_INDEX(addr, level) #define PT_LEVEL_BITS PT64_LEVEL_BITS - #define PT_GUEST_ACCESSED_MASK 0 - #define PT_GUEST_DIRTY_MASK 0 - #define PT_GUEST_DIRTY_SHIFT __using_nonexistent_pte_bit() - #define PT_GUEST_ACCESSED_SHIFT __using_nonexistent_pte_bit() - #define PT_HAVE_ACCESSED_DIRTY(mmu) false + #define PT_GUEST_DIRTY_SHIFT 9 + #define PT_GUEST_ACCESSED_SHIFT 8 + #define PT_HAVE_ACCESSED_DIRTY(mmu) ((mmu)->ept_ad) #define CMPXCHG cmpxchg64 #define PT_MAX_FULL_LEVELS 4 #else #error Invalid PTTYPE value #endif +#define PT_GUEST_DIRTY_MASK (1 << PT_GUEST_DIRTY_SHIFT) +#define PT_GUEST_ACCESSED_MASK (1 << PT_GUEST_ACCESSED_SHIFT) + #define gpte_to_gfn_lvl FNAME(gpte_to_gfn_lvl) #define gpte_to_gfn(pte) gpte_to_gfn_lvl((pte), PT_PAGE_TABLE_LEVEL) @@ -290,6 +280,7 @@ static int FNAME(walk_addr_generic)(struct guest_walker *walker, pt_element_t __user *uninitialized_var(ptep_user); gfn_t table_gfn; unsigned index, pt_access, pte_access, accessed_dirty, pte_pkey; + unsigned nested_access; gpa_t pte_gpa; bool have_ad; int offset; @@ -319,6 +310,14 @@ retry_walk: ASSERT(!(is_long_mode(vcpu) && !is_pae(vcpu))); accessed_dirty = have_ad ? PT_GUEST_ACCESSED_MASK : 0; + + /* + * FIXME: on Intel processors, loads of the PDPTE registers for PAE paging + * by the MOV to CR instruction are treated as reads and do not cause the + * processor to set the dirty flag in any EPT paging-structure entry. + */ + nested_access = (have_ad ? PFERR_WRITE_MASK : 0) | PFERR_USER_MASK; + pt_access = pte_access = ACC_ALL; ++walker->level; @@ -338,7 +337,7 @@ retry_walk: walker->pte_gpa[walker->level - 1] = pte_gpa; real_gfn = mmu->translate_gpa(vcpu, gfn_to_gpa(table_gfn), - PFERR_USER_MASK|PFERR_WRITE_MASK, + nested_access, &walker->fault); /* diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 1c372600a962..5556cce99749 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2767,6 +2767,8 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) vmx->nested.nested_vmx_ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT | VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT | VMX_EPT_1GB_PAGE_BIT; + if (enable_ept_ad_bits) + vmx->nested.nested_vmx_ept_caps |= VMX_EPT_AD_BIT; } else vmx->nested.nested_vmx_ept_caps = 0; @@ -6211,6 +6213,17 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) exit_qualification = vmcs_readl(EXIT_QUALIFICATION); + if (is_guest_mode(vcpu) + && !(exit_qualification & EPT_VIOLATION_GVA_TRANSLATED)) { + /* + * Fix up exit_qualification according to whether guest + * page table accesses are reads or writes. + */ + u64 eptp = nested_ept_get_cr3(vcpu); + if (eptp & VMX_EPT_AD_ENABLE_BIT) + exit_qualification &= ~EPT_VIOLATION_ACC_WRITE; + } + /* * EPT violation happened while executing iret from NMI, * "blocked by NMI" bit has to be set before next VM entry. @@ -9416,17 +9429,26 @@ static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu) return get_vmcs12(vcpu)->ept_pointer; } -static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) +static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu) { + u64 eptp; + WARN_ON(mmu_is_nested(vcpu)); + eptp = nested_ept_get_cr3(vcpu); + if ((eptp & VMX_EPT_AD_ENABLE_BIT) && !enable_ept_ad_bits) + return 1; + + kvm_mmu_unload(vcpu); kvm_init_shadow_ept_mmu(vcpu, to_vmx(vcpu)->nested.nested_vmx_ept_caps & - VMX_EPT_EXECUTE_ONLY_BIT); + VMX_EPT_EXECUTE_ONLY_BIT, + eptp & VMX_EPT_AD_ENABLE_BIT); vcpu->arch.mmu.set_cr3 = vmx_set_cr3; vcpu->arch.mmu.get_cr3 = nested_ept_get_cr3; vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault; vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu; + return 0; } static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu) @@ -10188,8 +10210,10 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } if (nested_cpu_has_ept(vmcs12)) { - kvm_mmu_unload(vcpu); - nested_ept_init_mmu_context(vcpu); + if (nested_ept_init_mmu_context(vcpu)) { + *entry_failure_code = ENTRY_FAIL_DEFAULT; + return 1; + } } else if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmx_flush_tlb_ept_only(vcpu);