From 6a14c2222419daa7ecebc7f566841b9b4d63b397 Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Wed, 23 Sep 2015 18:06:22 +0300 Subject: [PATCH 1/9] powerpc/e6500: add TMCFG0 register definition The register is not currently used in the base kernel but will be in a forthcoming kvm patch. Signed-off-by: Laurentiu Tudor Acked-by: Scott Wood Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/reg_booke.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/powerpc/include/asm/reg_booke.h b/arch/powerpc/include/asm/reg_booke.h index 16547efa2d5a..2fef74b474f0 100644 --- a/arch/powerpc/include/asm/reg_booke.h +++ b/arch/powerpc/include/asm/reg_booke.h @@ -742,6 +742,12 @@ #define MMUBE1_VBE4 0x00000002 #define MMUBE1_VBE5 0x00000001 +#define TMRN_TMCFG0 16 /* Thread Management Configuration Register 0 */ +#define TMRN_TMCFG0_NPRIBITS 0x003f0000 /* Bits of thread priority */ +#define TMRN_TMCFG0_NPRIBITS_SHIFT 16 +#define TMRN_TMCFG0_NATHRD 0x00003f00 /* Number of active threads */ +#define TMRN_TMCFG0_NATHRD_SHIFT 8 +#define TMRN_TMCFG0_NTHRD 0x0000003f /* Number of threads */ #define TMRN_IMSR0 0x120 /* Initial MSR Register 0 (e6500) */ #define TMRN_IMSR1 0x121 /* Initial MSR Register 1 (e6500) */ #define TMRN_INIA0 0x140 /* Next Instruction Address Register 0 */ From d4cd4f9586f87a5fc828b4c4698aa4faf56c96fc Mon Sep 17 00:00:00 2001 From: Andrzej Hajda Date: Thu, 24 Sep 2015 16:00:23 +0200 Subject: [PATCH 2/9] KVM: PPC: e500: fix handling local_sid_lookup result The function can return negative value. The problem has been detected using proposed semantic patch scripts/coccinelle/tests/assign_signed_to_unsigned.cocci [1]. [1]: http://permalink.gmane.org/gmane.linux.kernel/2046107 Signed-off-by: Andrzej Hajda Acked-by: Scott Wood Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/e500.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c index b29ce752c7d6..32fdab57d604 100644 --- a/arch/powerpc/kvm/e500.c +++ b/arch/powerpc/kvm/e500.c @@ -237,7 +237,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, struct kvm_book3e_206_tlb_entry *gtlbe) { struct vcpu_id_table *idt = vcpu_e500->idt; - unsigned int pr, tid, ts, pid; + unsigned int pr, tid, ts; + int pid; u32 val, eaddr; unsigned long flags; From 2daab50e17997424af57d1ab14042a51e9a368ab Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Fri, 25 Sep 2015 18:02:23 +0300 Subject: [PATCH 3/9] KVM: PPC: e500: Emulate TMCFG0 TMRN register Emulate TMCFG0 TMRN register exposing one HW thread per vcpu. Signed-off-by: Mihai Caraman [Laurentiu.Tudor@freescale.com: rebased on latest kernel, use define instead of hardcoded value, moved code in own function] Signed-off-by: Laurentiu Tudor Acked-by: Scott Wood Signed-off-by: Paul Mackerras --- arch/powerpc/include/asm/disassemble.h | 5 +++++ arch/powerpc/kvm/e500_emulate.c | 19 +++++++++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/powerpc/include/asm/disassemble.h b/arch/powerpc/include/asm/disassemble.h index 6330a61b875a..4852e849128b 100644 --- a/arch/powerpc/include/asm/disassemble.h +++ b/arch/powerpc/include/asm/disassemble.h @@ -42,6 +42,11 @@ static inline unsigned int get_dcrn(u32 inst) return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); } +static inline unsigned int get_tmrn(u32 inst) +{ + return ((inst >> 16) & 0x1f) | ((inst >> 6) & 0x3e0); +} + static inline unsigned int get_rt(u32 inst) { return (inst >> 21) & 0x1f; diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index ce7291c79f6c..990db69a1d0b 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "booke.h" #include "e500.h" @@ -22,6 +23,7 @@ #define XOP_DCBTLS 166 #define XOP_MSGSND 206 #define XOP_MSGCLR 238 +#define XOP_MFTMR 366 #define XOP_TLBIVAX 786 #define XOP_TLBSX 914 #define XOP_TLBRE 946 @@ -113,6 +115,19 @@ static int kvmppc_e500_emul_dcbtls(struct kvm_vcpu *vcpu) return EMULATE_DONE; } +static int kvmppc_e500_emul_mftmr(struct kvm_vcpu *vcpu, unsigned int inst, + int rt) +{ + /* Expose one thread per vcpu */ + if (get_tmrn(inst) == TMRN_TMCFG0) { + kvmppc_set_gpr(vcpu, rt, + 1 | (1 << TMRN_TMCFG0_NATHRD_SHIFT)); + return EMULATE_DONE; + } + + return EMULATE_FAIL; +} + int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -165,6 +180,10 @@ int kvmppc_core_emulate_op_e500(struct kvm_run *run, struct kvm_vcpu *vcpu, emulated = kvmppc_e500_emul_tlbivax(vcpu, ea); break; + case XOP_MFTMR: + emulated = kvmppc_e500_emul_mftmr(vcpu, inst, rt); + break; + case XOP_EHPRIV: emulated = kvmppc_e500_emul_ehpriv(run, vcpu, inst, advance); From 224f363246c3668452ec0ab5a0ff51824822f3fd Mon Sep 17 00:00:00 2001 From: Tudor Laurentiu Date: Thu, 1 Oct 2015 15:58:03 +0300 Subject: [PATCH 4/9] KVM: PPC: e500: fix couple of shift operations on 64 bits Fix couple of cases where we shift left a 32-bit value thus might get truncated results on 64-bit targets. Signed-off-by: Laurentiu Tudor Suggested-by: Scott Wood Acked-by: Scott Wood Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/e500_mmu_host.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 4d33e199edcc..5e2102c19586 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -406,7 +406,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, for (; tsize > BOOK3E_PAGESZ_4K; tsize -= 2) { unsigned long gfn_start, gfn_end; - tsize_pages = 1 << (tsize - 2); + tsize_pages = 1UL << (tsize - 2); gfn_start = gfn & ~(tsize_pages - 1); gfn_end = gfn_start + tsize_pages; @@ -447,7 +447,7 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, } if (likely(!pfnmap)) { - tsize_pages = 1 << (tsize + 10 - PAGE_SHIFT); + tsize_pages = 1UL << (tsize + 10 - PAGE_SHIFT); pfn = gfn_to_pfn_memslot(slot, gfn); if (is_error_noslot_pfn(pfn)) { if (printk_ratelimit()) From 966d713e86db1916a0b45a324a935d009737316e Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 23 Mar 2015 22:24:45 +0530 Subject: [PATCH 5/9] KVM: PPC: Book3S HV: Deliver machine check with MSR(RI=0) to guest as MCE For the machine check interrupt that happens while we are in the guest, kvm layer attempts the recovery, and then delivers the machine check interrupt directly to the guest if recovery fails. On successful recovery we go back to normal functioning of the guest. But there can be cases where a machine check interrupt can happen with MSR(RI=0) while we are in the guest. This means MC interrupt is unrecoverable and we have to deliver a machine check to the guest since the machine check interrupt might have trashed valid values in SRR0/1. The current implementation do not handle this case, causing guest to crash with Bad kernel stack pointer instead of machine check oops message. [26281.490060] Bad kernel stack pointer 3fff9ccce5b0 at c00000000000490c [26281.490434] Oops: Bad kernel stack pointer, sig: 6 [#1] [26281.490472] SMP NR_CPUS=2048 NUMA pSeries This patch fixes this issue by checking MSR(RI=0) in KVM layer and forwarding unrecoverable interrupt to guest which then panics with proper machine check Oops message. Signed-off-by: Mahesh Salgaonkar Acked-by: Paul Mackerras Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index b98889e9851d..fb8d29650612 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -2377,7 +2377,6 @@ machine_check_realmode: mr r3, r9 /* get vcpu pointer */ bl kvmppc_realmode_machine_check nop - cmpdi r3, 0 /* Did we handle MCE ? */ ld r9, HSTATE_KVM_VCPU(r13) li r12, BOOK3S_INTERRUPT_MACHINE_CHECK /* @@ -2390,13 +2389,18 @@ machine_check_realmode: * The old code used to return to host for unhandled errors which * was causing guest to hang with soft lockups inside guest and * makes it difficult to recover guest instance. + * + * if we receive machine check with MSR(RI=0) then deliver it to + * guest as machine check causing guest to crash. */ - ld r10, VCPU_PC(r9) ld r11, VCPU_MSR(r9) + andi. r10, r11, MSR_RI /* check for unrecoverable exception */ + beq 1f /* Deliver a machine check to guest */ + ld r10, VCPU_PC(r9) + cmpdi r3, 0 /* Did we handle MCE ? */ bne 2f /* Continue guest execution. */ /* If not, deliver a machine check. SRR0/1 are already set */ - li r10, BOOK3S_INTERRUPT_MACHINE_CHECK - ld r11, VCPU_MSR(r9) +1: li r10, BOOK3S_INTERRUPT_MACHINE_CHECK bl kvmppc_msr_interrupt 2: b fast_interrupt_c_return From 572abd563befd56bee918316c4f7ab144d2decf5 Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Tue, 29 Sep 2015 17:15:45 +1000 Subject: [PATCH 6/9] KVM: PPC: Book3S HV: Don't fall back to smaller HPT size in allocation ioctl Currently the KVM_PPC_ALLOCATE_HTAB will try to allocate the requested size of HPT, and if that is not possible, then try to allocate smaller sizes (by factors of 2) until either a minimum is reached or the allocation succeeds. This is not ideal for userspace, particularly in migration scenarios, where the destination VM really does require the size requested. Also, the minimum HPT size of 256kB may be insufficient for the guest to run successfully. This removes the fallback to smaller sizes on allocation failure for the KVM_PPC_ALLOCATE_HTAB ioctl. The fallback still exists for the case where the HPT is allocated at the time the first VCPU is run, if no HPT has been allocated by ioctl by that time. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_64_mmu_hv.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 1f9c0a17f445..10722b1e38b5 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -70,7 +70,8 @@ long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) } /* Lastly try successively smaller sizes from the page allocator */ - while (!hpt && order > PPC_MIN_HPT_ORDER) { + /* Only do this if userspace didn't specify a size via ioctl */ + while (!hpt && order > PPC_MIN_HPT_ORDER && !htab_orderp) { hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| __GFP_NOWARN, order - PAGE_SHIFT); if (!hpt) From c64dfe2af3de448f9afa2e542983015b31c1f91c Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 18 May 2015 14:10:54 +1000 Subject: [PATCH 7/9] KVM: PPC: Book3S HV: Make H_REMOVE return correct HPTE value for absent HPTEs This fixes a bug where the old HPTE value returned by H_REMOVE has the valid bit clear if the HPTE was an absent HPTE, as happens for HPTEs for emulated MMIO pages and for RAM pages that have been paged out by the host. If the absent bit is set, we clear it and set the valid bit, because from the guest's point of view, the HPTE is valid. Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index c1df9bb1e413..97e7f8c853d8 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -470,6 +470,8 @@ long kvmppc_do_h_remove(struct kvm *kvm, unsigned long flags, note_hpte_modification(kvm, rev); unlock_hpte(hpte, 0); + if (v & HPTE_V_ABSENT) + v = (v & ~HPTE_V_ABSENT) | HPTE_V_VALID; hpret[0] = v; hpret[1] = r; return H_SUCCESS; From bfec5c2cc0adad3b343281eb4f9b94222c5f594f Mon Sep 17 00:00:00 2001 From: Nikunj A Dadhania Date: Fri, 16 Oct 2015 10:27:53 +0530 Subject: [PATCH 8/9] KVM: PPC: Implement extension to report number of memslots QEMU assumes 32 memslots if this extension is not implemented. Although, current value of KVM_USER_MEM_SLOTS is 32, once KVM_USER_MEM_SLOTS changes QEMU would take a wrong value. Signed-off-by: Nikunj A Dadhania Reviewed-by: Thomas Huth Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/powerpc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2e51289610e4..6fd2405c7f4a 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -559,6 +559,9 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) else r = num_online_cpus(); break; + case KVM_CAP_NR_MEMSLOTS: + r = KVM_USER_MEM_SLOTS; + break; case KVM_CAP_MAX_VCPUS: r = KVM_MAX_VCPUS; break; From 70aa3961a196ac32baf54032b2051bac9a941118 Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Thu, 15 Oct 2015 11:29:58 +0530 Subject: [PATCH 9/9] KVM: PPC: Book3S HV: Handle H_DOORBELL on the guest exit path Currently a CPU running a guest can receive a H_DOORBELL in the following two cases: 1) When the CPU is napping due to CEDE or there not being a guest vcpu. 2) The CPU is running the guest vcpu. Case 1), the doorbell message is not cleared since we were waking up from nap. Hence when the EE bit gets set on transition from guest to host, the H_DOORBELL interrupt is delivered to the host and the corresponding handler is invoked. However in Case 2), the message gets cleared by the action of taking the H_DOORBELL interrupt. Since the CPU was running a guest, instead of invoking the doorbell handler, the code invokes the second-level interrupt handler to switch the context from the guest to the host. At this point the setting of the EE bit doesn't result in the CPU getting the doorbell interrupt since it has already been delivered once. So, the handler for this doorbell is never invoked! This causes softlockups if the missed DOORBELL was an IPI sent from a sibling subcore on the same CPU. This patch fixes it by explitly invoking the doorbell handler on the exit path if the exit reason is H_DOORBELL similar to the way an EXTERNAL interrupt is handled. Since this will also handle Case 1), we can unconditionally clear the doorbell message in kvmppc_check_wake_reason. Signed-off-by: Gautham R. Shenoy Signed-off-by: Paul Mackerras --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index fb8d29650612..b1dab8d1d885 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -150,6 +150,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) cmpwi cr1, r12, BOOK3S_INTERRUPT_MACHINE_CHECK cmpwi r12, BOOK3S_INTERRUPT_EXTERNAL beq 11f + cmpwi r12, BOOK3S_INTERRUPT_H_DOORBELL + beq 15f /* Invoke the H_DOORBELL handler */ cmpwi cr2, r12, BOOK3S_INTERRUPT_HMI beq cr2, 14f /* HMI check */ @@ -174,6 +176,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mtspr SPRN_HSRR1, r7 b hmi_exception_after_realmode +15: mtspr SPRN_HSRR0, r8 + mtspr SPRN_HSRR1, r7 + ba 0xe80 + kvmppc_primary_no_guest: /* We handle this much like a ceded vcpu */ /* put the HDEC into the DEC, since HDEC interrupts don't wake us */ @@ -2440,14 +2446,19 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) /* hypervisor doorbell */ 3: li r12, BOOK3S_INTERRUPT_H_DOORBELL + + /* + * Clear the doorbell as we will invoke the handler + * explicitly in the guest exit path. + */ + lis r6, (PPC_DBELL_SERVER << (63-36))@h + PPC_MSGCLR(6) /* see if it's a host IPI */ li r3, 1 lbz r0, HSTATE_HOST_IPI(r13) cmpwi r0, 0 bnelr - /* if not, clear it and return -1 */ - lis r6, (PPC_DBELL_SERVER << (63-36))@h - PPC_MSGCLR(6) + /* if not, return -1 */ li r3, -1 blr