From a4fa16353108431e7cfdfc3ecf683bac21b50755 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 3 May 2012 11:36:39 +0300 Subject: [PATCH 1/4] KVM: ensure async PF event wakes up vcpu from halt If vcpu executes hlt instruction while async PF is waiting to be delivered vcpu can block and deliver async PF only after another even wakes it up. This happens because kvm_check_async_pf_completion() will remove completion event from vcpu->async_pf.done before entering kvm_vcpu_block() and this will make kvm_arch_vcpu_runnable() return false. The solution is to make vcpu runnable when processing completion. Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 91a5e989abcf..185a2b823a2d 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6581,6 +6581,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, kvm_inject_page_fault(vcpu, &fault); } vcpu->arch.apf.halted = false; + vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE; } bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu) From 62c49cc976af84cb0ffcb5ec07ee88da1a94e222 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Wed, 2 May 2012 15:04:02 +0300 Subject: [PATCH 2/4] KVM: Do not take reference to mm during async #PF It turned to be totally unneeded. The reason the code was introduced is so that KVM can prefault swapped in page, but prefault can fail even if mm is pinned since page table can change anyway. KVM handles this situation correctly though and does not inject spurious page faults. Fixes: "INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected" warning while running LTP inside a KVM guest using the recent -next kernel. Reported-by: Sasha Levin Signed-off-by: Gleb Natapov Signed-off-by: Avi Kivity --- arch/x86/kernel/kvm.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index b8ba6e4a27e4..e554e5ad2fe8 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -79,7 +79,6 @@ struct kvm_task_sleep_node { u32 token; int cpu; bool halted; - struct mm_struct *mm; }; static struct kvm_task_sleep_head { @@ -126,9 +125,7 @@ void kvm_async_pf_task_wait(u32 token) n.token = token; n.cpu = smp_processor_id(); - n.mm = current->active_mm; n.halted = idle || preempt_count() > 1; - atomic_inc(&n.mm->mm_count); init_waitqueue_head(&n.wq); hlist_add_head(&n.link, &b->list); spin_unlock(&b->lock); @@ -161,9 +158,6 @@ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait); static void apf_task_wake_one(struct kvm_task_sleep_node *n) { hlist_del_init(&n->link); - if (!n->mm) - return; - mmdrop(n->mm); if (n->halted) smp_send_reschedule(n->cpu); else if (waitqueue_active(&n->wq)) @@ -207,7 +201,7 @@ again: * async PF was not yet handled. * Add dummy entry for the token. */ - n = kmalloc(sizeof(*n), GFP_ATOMIC); + n = kzalloc(sizeof(*n), GFP_ATOMIC); if (!n) { /* * Allocation failed! Busy wait while other cpu @@ -219,7 +213,6 @@ again: } n->token = token; n->cpu = smp_processor_id(); - n->mm = NULL; init_waitqueue_head(&n->wq); hlist_add_head(&n->link, &b->list); } else From de6c0b02d4d7bdf2587e679a6ddbb71b7d68bb89 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Tue, 8 May 2012 20:24:08 +1000 Subject: [PATCH 3/4] KVM: PPC: Book3S HV: Fix refcounting of hugepages The H_REGISTER_VPA hcall implementation in HV Power KVM needs to pin some guest memory pages into host memory so that they can be safely accessed from usermode. It does this used get_user_pages_fast(). When the VPA is unregistered, or the VCPUs are cleaned up, these pages are released using put_page(). However, the get_user_pages() is invoked on the specific memory are of the VPA which could lie within hugepages. In case the pinned page is huge, we explicitly find the head page of the compound page before calling put_page() on it. At least with the latest kernel, this is not correct. put_page() already handles finding the correct head page of a compound, and also deals with various counts on the individual tail page which are important for transparent huge pages. We don't support transparent hugepages on Power, but even so, bypassing this count maintenance can lead (when the VM ends) to a hugepage being released back to the pool with a non-zero mapcount on one of the tail pages. This can then lead to a bad_page() when the page is released from the hugepage pool. This removes the explicit compound_head() call to correct this bug. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras Acked-by: Alexander Graf Signed-off-by: Avi Kivity --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 22 +++++++++++++--------- arch/powerpc/kvm/book3s_hv.c | 2 -- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index ddc485a529f2..c3beaeef3f60 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -258,6 +258,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, !(memslot->userspace_addr & (s - 1))) { start &= ~(s - 1); pgsize = s; + get_page(hpage); + put_page(page); page = hpage; } } @@ -281,11 +283,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn, err = 0; out: - if (got) { - if (PageHuge(page)) - page = compound_head(page); + if (got) put_page(page); - } return err; up_err: @@ -678,8 +677,15 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, SetPageDirty(page); out_put: - if (page) - put_page(page); + if (page) { + /* + * We drop pages[0] here, not page because page might + * have been set to the head page of a compound, but + * we have to drop the reference on the correct tail + * page to match the get inside gup() + */ + put_page(pages[0]); + } return ret; out_unlock: @@ -979,6 +985,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, pa = *physp; } page = pfn_to_page(pa >> PAGE_SHIFT); + get_page(page); } else { hva = gfn_to_hva_memslot(memslot, gfn); npages = get_user_pages_fast(hva, 1, 1, pages); @@ -991,8 +998,6 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa, page = compound_head(page); psize <<= compound_order(page); } - if (!kvm->arch.using_mmu_notifiers) - get_page(page); offset = gpa & (psize - 1); if (nb_ret) *nb_ret = psize - offset; @@ -1003,7 +1008,6 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va) { struct page *page = virt_to_page(va); - page = compound_head(page); put_page(page); } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 01294a5099dd..108d1f580177 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -1192,8 +1192,6 @@ static void unpin_slot(struct kvm *kvm, int slot_id) continue; pfn = physp[j] >> PAGE_SHIFT; page = pfn_to_page(pfn); - if (PageHuge(page)) - page = compound_head(page); SetPageDirty(page); put_page(page); } From 331b646d60b0c3885208e1e02bd9f40319953efc Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Wed, 18 Apr 2012 19:23:50 +0300 Subject: [PATCH 4/4] KVM: ia64: fix build due to typo s/kcm/kvm/. Signed-off-by: Marcelo Tosatti Signed-off-by: Avi Kivity --- arch/ia64/kvm/kvm-ia64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c index f5104b7c52cd..463fb3bbe11e 100644 --- a/arch/ia64/kvm/kvm-ia64.c +++ b/arch/ia64/kvm/kvm-ia64.c @@ -1174,7 +1174,7 @@ out: bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { - return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL); + return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL); } int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)