From a4fa16353108431e7cfdfc3ecf683bac21b50755 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Thu, 3 May 2012 11:36:39 +0300
Subject: [PATCH 1/4] KVM: ensure async PF event wakes up vcpu from halt

If vcpu executes hlt instruction while async PF is waiting to be delivered
vcpu can block and deliver async PF only after another even wakes it
up. This happens because kvm_check_async_pf_completion() will remove
completion event from vcpu->async_pf.done before entering kvm_vcpu_block()
and this will make kvm_arch_vcpu_runnable() return false. The solution
is to make vcpu runnable when processing completion.

Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/x86.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 91a5e989abcf..185a2b823a2d 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6581,6 +6581,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 		kvm_inject_page_fault(vcpu, &fault);
 	}
 	vcpu->arch.apf.halted = false;
+	vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
 }
 
 bool kvm_arch_can_inject_async_page_present(struct kvm_vcpu *vcpu)

From 62c49cc976af84cb0ffcb5ec07ee88da1a94e222 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 2 May 2012 15:04:02 +0300
Subject: [PATCH 2/4] KVM: Do not take reference to mm during async #PF

It turned to be totally unneeded. The reason the code was introduced is
so that KVM can prefault swapped in page, but prefault can fail even
if mm is pinned since page table can change anyway. KVM handles this
situation correctly though and does not inject spurious page faults.

Fixes:
 "INFO: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected" warning while
 running LTP inside a KVM guest using the recent -next kernel.

Reported-by: Sasha Levin <levinsasha928@gmail.com>
Signed-off-by: Gleb Natapov <gleb@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kernel/kvm.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index b8ba6e4a27e4..e554e5ad2fe8 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -79,7 +79,6 @@ struct kvm_task_sleep_node {
 	u32 token;
 	int cpu;
 	bool halted;
-	struct mm_struct *mm;
 };
 
 static struct kvm_task_sleep_head {
@@ -126,9 +125,7 @@ void kvm_async_pf_task_wait(u32 token)
 
 	n.token = token;
 	n.cpu = smp_processor_id();
-	n.mm = current->active_mm;
 	n.halted = idle || preempt_count() > 1;
-	atomic_inc(&n.mm->mm_count);
 	init_waitqueue_head(&n.wq);
 	hlist_add_head(&n.link, &b->list);
 	spin_unlock(&b->lock);
@@ -161,9 +158,6 @@ EXPORT_SYMBOL_GPL(kvm_async_pf_task_wait);
 static void apf_task_wake_one(struct kvm_task_sleep_node *n)
 {
 	hlist_del_init(&n->link);
-	if (!n->mm)
-		return;
-	mmdrop(n->mm);
 	if (n->halted)
 		smp_send_reschedule(n->cpu);
 	else if (waitqueue_active(&n->wq))
@@ -207,7 +201,7 @@ again:
 		 * async PF was not yet handled.
 		 * Add dummy entry for the token.
 		 */
-		n = kmalloc(sizeof(*n), GFP_ATOMIC);
+		n = kzalloc(sizeof(*n), GFP_ATOMIC);
 		if (!n) {
 			/*
 			 * Allocation failed! Busy wait while other cpu
@@ -219,7 +213,6 @@ again:
 		}
 		n->token = token;
 		n->cpu = smp_processor_id();
-		n->mm = NULL;
 		init_waitqueue_head(&n->wq);
 		hlist_add_head(&n->link, &b->list);
 	} else

From de6c0b02d4d7bdf2587e679a6ddbb71b7d68bb89 Mon Sep 17 00:00:00 2001
From: David Gibson <david@gibson.dropbear.id.au>
Date: Tue, 8 May 2012 20:24:08 +1000
Subject: [PATCH 3/4] KVM: PPC: Book3S HV: Fix refcounting of hugepages

The H_REGISTER_VPA hcall implementation in HV Power KVM needs to pin some
guest memory pages into host memory so that they can be safely accessed
from usermode.  It does this used get_user_pages_fast().  When the VPA is
unregistered, or the VCPUs are cleaned up, these pages are released using
put_page().

However, the get_user_pages() is invoked on the specific memory are of the
VPA which could lie within hugepages.  In case the pinned page is huge,
we explicitly find the head page of the compound page before calling
put_page() on it.

At least with the latest kernel, this is not correct.  put_page() already
handles finding the correct head page of a compound, and also deals with
various counts on the individual tail page which are important for
transparent huge pages.  We don't support transparent hugepages on Power,
but even so, bypassing this count maintenance can lead (when the VM ends)
to a hugepage being released back to the pool with a non-zero mapcount on
one of the tail pages.  This can then lead to a bad_page() when the page
is released from the hugepage pool.

This removes the explicit compound_head() call to correct this bug.

Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Paul Mackerras <paulus@samba.org>
Acked-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/book3s_64_mmu_hv.c | 22 +++++++++++++---------
 arch/powerpc/kvm/book3s_hv.c        |  2 --
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index ddc485a529f2..c3beaeef3f60 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -258,6 +258,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 			    !(memslot->userspace_addr & (s - 1))) {
 				start &= ~(s - 1);
 				pgsize = s;
+				get_page(hpage);
+				put_page(page);
 				page = hpage;
 			}
 		}
@@ -281,11 +283,8 @@ static long kvmppc_get_guest_page(struct kvm *kvm, unsigned long gfn,
 	err = 0;
 
  out:
-	if (got) {
-		if (PageHuge(page))
-			page = compound_head(page);
+	if (got)
 		put_page(page);
-	}
 	return err;
 
  up_err:
@@ -678,8 +677,15 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu,
 		SetPageDirty(page);
 
  out_put:
-	if (page)
-		put_page(page);
+	if (page) {
+		/*
+		 * We drop pages[0] here, not page because page might
+		 * have been set to the head page of a compound, but
+		 * we have to drop the reference on the correct tail
+		 * page to match the get inside gup()
+		 */
+		put_page(pages[0]);
+	}
 	return ret;
 
  out_unlock:
@@ -979,6 +985,7 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 			pa = *physp;
 		}
 		page = pfn_to_page(pa >> PAGE_SHIFT);
+		get_page(page);
 	} else {
 		hva = gfn_to_hva_memslot(memslot, gfn);
 		npages = get_user_pages_fast(hva, 1, 1, pages);
@@ -991,8 +998,6 @@ void *kvmppc_pin_guest_page(struct kvm *kvm, unsigned long gpa,
 		page = compound_head(page);
 		psize <<= compound_order(page);
 	}
-	if (!kvm->arch.using_mmu_notifiers)
-		get_page(page);
 	offset = gpa & (psize - 1);
 	if (nb_ret)
 		*nb_ret = psize - offset;
@@ -1003,7 +1008,6 @@ void kvmppc_unpin_guest_page(struct kvm *kvm, void *va)
 {
 	struct page *page = virt_to_page(va);
 
-	page = compound_head(page);
 	put_page(page);
 }
 
diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 01294a5099dd..108d1f580177 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -1192,8 +1192,6 @@ static void unpin_slot(struct kvm *kvm, int slot_id)
 				continue;
 			pfn = physp[j] >> PAGE_SHIFT;
 			page = pfn_to_page(pfn);
-			if (PageHuge(page))
-				page = compound_head(page);
 			SetPageDirty(page);
 			put_page(page);
 		}

From 331b646d60b0c3885208e1e02bd9f40319953efc Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Wed, 18 Apr 2012 19:23:50 +0300
Subject: [PATCH 4/4] KVM: ia64: fix build due to typo

s/kcm/kvm/.

Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index f5104b7c52cd..463fb3bbe11e 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1174,7 +1174,7 @@ out:
 
 bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
 {
-	return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL);
+	return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
 }
 
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)