diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h b/arch/powerpc/include/asm/book3s/64/mmu.h index 77529a3e3811..5b4023c616f7 100644 --- a/arch/powerpc/include/asm/book3s/64/mmu.h +++ b/arch/powerpc/include/asm/book3s/64/mmu.h @@ -59,13 +59,14 @@ extern struct patb_entry *partition_tb; #define PRTS_MASK 0x1f /* process table size field */ #define PRTB_MASK 0x0ffffffffffff000UL -/* - * Limit process table to PAGE_SIZE table. This - * also limit the max pid we can support. - * MAX_USER_CONTEXT * 16 bytes of space. - */ -#define PRTB_SIZE_SHIFT (CONTEXT_BITS + 4) -#define PRTB_ENTRIES (1ul << CONTEXT_BITS) +/* Number of supported PID bits */ +extern unsigned int mmu_pid_bits; + +/* Base PID to allocate from */ +extern unsigned int mmu_base_pid; + +#define PRTB_SIZE_SHIFT (mmu_pid_bits + 4) +#define PRTB_ENTRIES (1ul << mmu_pid_bits) /* * Power9 currently only support 64K partition table size. diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index da7e9432fa8f..0c76675394c5 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -45,7 +45,7 @@ extern void set_context(unsigned long id, pgd_t *pgd); #ifdef CONFIG_PPC_BOOK3S_64 extern void radix__switch_mmu_context(struct mm_struct *prev, - struct mm_struct *next); + struct mm_struct *next); static inline void switch_mmu_context(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) @@ -67,6 +67,12 @@ extern void __destroy_context(unsigned long context_id); extern void mmu_context_init(void); #endif +#if defined(CONFIG_KVM_BOOK3S_HV_POSSIBLE) && defined(CONFIG_PPC_RADIX_MMU) +extern void radix_kvm_prefetch_workaround(struct mm_struct *mm); +#else +static inline void radix_kvm_prefetch_workaround(struct mm_struct *mm) { } +#endif + extern void switch_cop(struct mm_struct *next); extern int use_cop(unsigned long acop, struct mm_struct *mm); extern void drop_cop(unsigned long acop, struct mm_struct *mm); @@ -79,9 +85,13 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next, struct task_struct *tsk) { + bool new_on_cpu = false; + /* Mark this context has been used on the new CPU */ - if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) + if (!cpumask_test_cpu(smp_processor_id(), mm_cpumask(next))) { cpumask_set_cpu(smp_processor_id(), mm_cpumask(next)); + new_on_cpu = true; + } /* 32-bit keeps track of the current PGDIR in the thread struct */ #ifdef CONFIG_PPC32 @@ -109,6 +119,10 @@ static inline void switch_mm_irqs_off(struct mm_struct *prev, if (cpu_has_feature(CPU_FTR_ALTIVEC)) asm volatile ("dssall"); #endif /* CONFIG_ALTIVEC */ + + if (new_on_cpu) + radix_kvm_prefetch_workaround(next); + /* * The actual HW switching method differs between the various * sub architectures. Out of line for now diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index cb44065e2946..c52184a8efdf 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -1443,12 +1443,14 @@ mc_cont: ori r6,r6,1 mtspr SPRN_CTRLT,r6 4: - /* Read the guest SLB and save it away */ + /* Check if we are running hash or radix and store it in cr2 */ ld r5, VCPU_KVM(r9) lbz r0, KVM_RADIX(r5) - cmpwi r0, 0 + cmpwi cr2,r0,0 + + /* Read the guest SLB and save it away */ li r5, 0 - bne 3f /* for radix, save 0 entries */ + bne cr2, 3f /* for radix, save 0 entries */ lwz r0,VCPU_SLB_NR(r9) /* number of entries in SLB */ mtctr r0 li r6,0 @@ -1712,11 +1714,6 @@ BEGIN_FTR_SECTION_NESTED(96) END_FTR_SECTION_NESTED(CPU_FTR_ARCH_300, 0, 96) END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) 22: - /* Clear out SLB */ - li r5,0 - slbmte r5,r5 - slbia - ptesync /* Restore host values of some registers */ BEGIN_FTR_SECTION @@ -1737,10 +1734,56 @@ BEGIN_FTR_SECTION mtspr SPRN_PID, r7 mtspr SPRN_IAMR, r8 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + +#ifdef CONFIG_PPC_RADIX_MMU + /* + * Are we running hash or radix ? + */ + beq cr2,3f + + /* Radix: Handle the case where the guest used an illegal PID */ + LOAD_REG_ADDR(r4, mmu_base_pid) + lwz r3, VCPU_GUEST_PID(r9) + lwz r5, 0(r4) + cmpw cr0,r3,r5 + blt 2f + + /* + * Illegal PID, the HW might have prefetched and cached in the TLB + * some translations for the LPID 0 / guest PID combination which + * Linux doesn't know about, so we need to flush that PID out of + * the TLB. First we need to set LPIDR to 0 so tlbiel applies to + * the right context. + */ + li r0,0 + mtspr SPRN_LPID,r0 + isync + + /* Then do a congruence class local flush */ + ld r6,VCPU_KVM(r9) + lwz r0,KVM_TLB_SETS(r6) + mtctr r0 + li r7,0x400 /* IS field = 0b01 */ + ptesync + sldi r0,r3,32 /* RS has PID */ +1: PPC_TLBIEL(7,0,2,1,1) /* RIC=2, PRS=1, R=1 */ + addi r7,r7,0x1000 + bdnz 1b + ptesync + +2: /* Flush the ERAT on radix P9 DD1 guest exit */ BEGIN_FTR_SECTION PPC_INVALIDATE_ERAT END_FTR_SECTION_IFSET(CPU_FTR_POWER9_DD1) + b 4f +#endif /* CONFIG_PPC_RADIX_MMU */ + /* Hash: clear out SLB */ +3: li r5,0 + slbmte r5,r5 + slbia + ptesync +4: /* * POWER7/POWER8 guest -> host partition switch code. * We don't have to lock against tlbies but we do diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index abed1fe6992f..a75f63833284 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -126,9 +126,10 @@ static int hash__init_new_context(struct mm_struct *mm) static int radix__init_new_context(struct mm_struct *mm) { unsigned long rts_field; - int index; + int index, max_id; - index = alloc_context_id(1, PRTB_ENTRIES - 1); + max_id = (1 << mmu_pid_bits) - 1; + index = alloc_context_id(mmu_base_pid, max_id); if (index < 0) return index; diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 5cc50d47ce3f..671a45d86c18 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -25,6 +25,9 @@ #include +unsigned int mmu_pid_bits; +unsigned int mmu_base_pid; + static int native_register_process_table(unsigned long base, unsigned long pg_sz, unsigned long table_size) { @@ -261,11 +264,34 @@ static void __init radix_init_pgtable(void) for_each_memblock(memory, reg) WARN_ON(create_physical_mapping(reg->base, reg->base + reg->size)); + + /* Find out how many PID bits are supported */ + if (cpu_has_feature(CPU_FTR_HVMODE)) { + if (!mmu_pid_bits) + mmu_pid_bits = 20; +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + /* + * When KVM is possible, we only use the top half of the + * PID space to avoid collisions between host and guest PIDs + * which can cause problems due to prefetch when exiting the + * guest with AIL=3 + */ + mmu_base_pid = 1 << (mmu_pid_bits - 1); +#else + mmu_base_pid = 1; +#endif + } else { + /* The guest uses the bottom half of the PID space */ + if (!mmu_pid_bits) + mmu_pid_bits = 19; + mmu_base_pid = 1; + } + /* * Allocate Partition table and process table for the * host. */ - BUILD_BUG_ON_MSG((PRTB_SIZE_SHIFT > 36), "Process table size too large."); + BUG_ON(PRTB_SIZE_SHIFT > 36); process_tb = early_alloc_pgtable(1UL << PRTB_SIZE_SHIFT); /* * Fill in the process table. @@ -339,6 +365,12 @@ static int __init radix_dt_scan_page_sizes(unsigned long node, if (type == NULL || strcmp(type, "cpu") != 0) return 0; + /* Find MMU PID size */ + prop = of_get_flat_dt_prop(node, "ibm,mmu-pid-bits", &size); + if (prop && size == 4) + mmu_pid_bits = be32_to_cpup(prop); + + /* Grab page size encodings */ prop = of_get_flat_dt_prop(node, "ibm,processor-radix-AP-encodings", &size); if (!prop) return 0; diff --git a/arch/powerpc/mm/tlb-radix.c b/arch/powerpc/mm/tlb-radix.c index 744e0164ecf5..16ae1bbe13f0 100644 --- a/arch/powerpc/mm/tlb-radix.c +++ b/arch/powerpc/mm/tlb-radix.c @@ -12,12 +12,12 @@ #include #include #include -#include +#include #include #include #include - +#include #define RIC_FLUSH_TLB 0 #define RIC_FLUSH_PWC 1 @@ -454,3 +454,44 @@ void radix__flush_tlb_pte_p9_dd1(unsigned long old_pte, struct mm_struct *mm, else radix__flush_tlb_page_psize(mm, address, mmu_virtual_psize); } + +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +extern void radix_kvm_prefetch_workaround(struct mm_struct *mm) +{ + unsigned int pid = mm->context.id; + + if (unlikely(pid == MMU_NO_CONTEXT)) + return; + + /* + * If this context hasn't run on that CPU before and KVM is + * around, there's a slim chance that the guest on another + * CPU just brought in obsolete translation into the TLB of + * this CPU due to a bad prefetch using the guest PID on + * the way into the hypervisor. + * + * We work around this here. If KVM is possible, we check if + * any sibling thread is in KVM. If it is, the window may exist + * and thus we flush that PID from the core. + * + * A potential future improvement would be to mark which PIDs + * have never been used on the system and avoid it if the PID + * is new and the process has no other cpumask bit set. + */ + if (cpu_has_feature(CPU_FTR_HVMODE) && radix_enabled()) { + int cpu = smp_processor_id(); + int sib = cpu_first_thread_sibling(cpu); + bool flush = false; + + for (; sib <= cpu_last_thread_sibling(cpu) && !flush; sib++) { + if (sib == cpu) + continue; + if (paca[sib].kvm_hstate.kvm_vcpu) + flush = true; + } + if (flush) + _tlbiel_pid(pid, RIC_FLUSH_ALL); + } +} +EXPORT_SYMBOL_GPL(radix_kvm_prefetch_workaround); +#endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */