diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 0ce513f2926f..36fc7bfe9e11 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -91,6 +91,7 @@ static inline int hash__pgd_bad(pgd_t pgd) } #ifdef CONFIG_STRICT_KERNEL_RWX extern void hash__mark_rodata_ro(void); +extern void hash__mark_initmem_nx(void); #endif extern void hpte_need_flush(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index c0737c86a362..d1da415e283c 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1192,5 +1192,6 @@ static inline const int pud_pfn(pud_t pud) BUILD_BUG(); return 0; } + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ */ diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 487709ff6875..544440b5aff3 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -118,6 +118,7 @@ #ifdef CONFIG_STRICT_KERNEL_RWX extern void radix__mark_rodata_ro(void); +extern void radix__mark_initmem_nx(void); #endif static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr, diff --git a/arch/powerpc/include/asm/pgtable.h b/arch/powerpc/include/asm/pgtable.h index dd01212935ac..afae9a336136 100644 --- a/arch/powerpc/include/asm/pgtable.h +++ b/arch/powerpc/include/asm/pgtable.h @@ -80,6 +80,13 @@ unsigned long vmalloc_to_phys(void *vmalloc_addr); void pgtable_cache_add(unsigned shift, void (*ctor)(void *)); void pgtable_cache_init(void); + +#ifdef CONFIG_STRICT_KERNEL_RWX +void mark_initmem_nx(void); +#else +static inline void mark_initmem_nx(void) { } +#endif + #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_PGTABLE_H */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e6d8354d79ef..9029afd1fa2a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -824,7 +824,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) * r3 volatile parameter and return value for status * r4-r10 volatile input and output value * r11 volatile hypercall number and output value - * r12 volatile + * r12 volatile input and output value * r13-r31 nonvolatile * LR nonvolatile * CTR volatile @@ -834,25 +834,26 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) * Other registers nonvolatile * * The intersection of volatile registers that don't contain possible - * inputs is: r12, cr0, xer, ctr. We may use these as scratch regs - * upon entry without saving. + * inputs is: cr0, xer, ctr. We may use these as scratch regs upon entry + * without saving, though xer is not a good idea to use, as hardware may + * interpret some bits so it may be costly to change them. */ #ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * There is a little bit of juggling to get syscall and hcall - * working well. Save r10 in ctr to be restored in case it is a - * hcall. + * working well. Save r13 in ctr to avoid using SPRG scratch + * register. * * Userspace syscalls have already saved the PPR, hcalls must save * it before setting HMT_MEDIUM. */ #define SYSCALL_KVMTEST \ - mr r12,r13; \ + mtctr r13; \ GET_PACA(r13); \ - mtctr r10; \ + std r10,PACA_EXGEN+EX_R10(r13); \ KVMTEST_PR(0xc00); /* uses r10, branch to do_kvm_0xc00_system_call */ \ HMT_MEDIUM; \ - mr r9,r12; \ + mfctr r9; #else #define SYSCALL_KVMTEST \ @@ -935,8 +936,8 @@ EXC_VIRT_END(system_call, 0x4c00, 0x100) * This is a hcall, so register convention is as above, with these * differences: * r13 = PACA - * r12 = orig r13 - * ctr = orig r10 + * ctr = orig r13 + * orig r10 saved in PACA */ TRAMP_KVM_BEGIN(do_kvm_0xc00) /* @@ -944,14 +945,13 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00) * HMT_MEDIUM. That allows the KVM code to save that value into the * guest state (it is the guest's PPR value). */ - OPT_GET_SPR(r0, SPRN_PPR, CPU_FTR_HAS_PPR) + OPT_GET_SPR(r10, SPRN_PPR, CPU_FTR_HAS_PPR) HMT_MEDIUM - OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r0, CPU_FTR_HAS_PPR) + OPT_SAVE_REG_TO_PACA(PACA_EXGEN+EX_PPR, r10, CPU_FTR_HAS_PPR) mfctr r10 - SET_SCRATCH0(r12) + SET_SCRATCH0(r10) std r9,PACA_EXGEN+EX_R9(r13) mfcr r9 - std r10,PACA_EXGEN+EX_R10(r13) KVM_HANDLER(PACA_EXGEN, EXC_STD, 0xc00) #endif diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 5adb390e773b..516ebef905c0 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -30,6 +30,7 @@ * Use unused space in the interrupt stack to save and restore * registers for winkle support. */ +#define _MMCR0 GPR0 #define _SDR1 GPR3 #define _PTCR GPR3 #define _RPR GPR4 @@ -272,6 +273,14 @@ power_enter_stop: b pnv_wakeup_noloss .Lhandle_esl_ec_set: + /* + * POWER9 DD2 can incorrectly set PMAO when waking up after a + * state-loss idle. Saving and restoring MMCR0 over idle is a + * workaround. + */ + mfspr r4,SPRN_MMCR0 + std r4,_MMCR0(r1) + /* * Check if the requested state is a deep idle state. */ @@ -450,10 +459,14 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_ARCH_300) pnv_restore_hyp_resource_arch300: /* * Workaround for POWER9, if we lost resources, the ERAT - * might have been mixed up and needs flushing. + * might have been mixed up and needs flushing. We also need + * to reload MMCR0 (see comment above). */ blt cr3,1f PPC_INVALIDATE_ERAT + ld r1,PACAR1(r13) + ld r4,_MMCR0(r1) + mtspr SPRN_MMCR0,r4 1: /* * POWER ISA 3. Use PSSCR to determine if we diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 8541f18694a4..46b4e67d2372 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -402,6 +402,7 @@ void __init mem_init(void) void free_initmem(void) { ppc_md.progress = ppc_printk_progress; + mark_initmem_nx(); free_initmem_default(POISON_FREE_INITMEM); } diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index 188b4107584d..443a2c66a304 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -425,33 +425,51 @@ int hash__has_transparent_hugepage(void) #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #ifdef CONFIG_STRICT_KERNEL_RWX -void hash__mark_rodata_ro(void) +static bool hash__change_memory_range(unsigned long start, unsigned long end, + unsigned long newpp) { - unsigned long start = (unsigned long)_stext; - unsigned long end = (unsigned long)__init_begin; unsigned long idx; unsigned int step, shift; - unsigned long newpp = PP_RXXX; shift = mmu_psize_defs[mmu_linear_psize].shift; step = 1 << shift; - start = ((start + step - 1) >> shift) << shift; - end = (end >> shift) << shift; + start = ALIGN_DOWN(start, step); + end = ALIGN(end, step); // aligns up - pr_devel("marking ro start %lx, end %lx, step %x\n", - start, end, step); + if (start >= end) + return false; - if (start == end) { - pr_warn("could not set rodata ro, relocate the start" - " of the kernel to a 0x%x boundary\n", step); - return; - } + pr_debug("Changing page protection on range 0x%lx-0x%lx, to 0x%lx, step 0x%x\n", + start, end, newpp, step); for (idx = start; idx < end; idx += step) /* Not sure if we can do much with the return value */ mmu_hash_ops.hpte_updateboltedpp(newpp, idx, mmu_linear_psize, mmu_kernel_ssize); + return true; +} + +void hash__mark_rodata_ro(void) +{ + unsigned long start, end; + + start = (unsigned long)_stext; + end = (unsigned long)__init_begin; + + WARN_ON(!hash__change_memory_range(start, end, PP_RXXX)); +} + +void hash__mark_initmem_nx(void) +{ + unsigned long start, end, pp; + + start = (unsigned long)__init_begin; + end = (unsigned long)__init_end; + + pp = htab_convert_pte_flags(pgprot_val(PAGE_KERNEL)); + + WARN_ON(!hash__change_memory_range(start, end, pp)); } #endif diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index 8c13e4282308..5cc50d47ce3f 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -112,10 +112,9 @@ set_the_pte: } #ifdef CONFIG_STRICT_KERNEL_RWX -void radix__mark_rodata_ro(void) +void radix__change_memory_range(unsigned long start, unsigned long end, + unsigned long clear) { - unsigned long start = (unsigned long)_stext; - unsigned long end = (unsigned long)__init_begin; unsigned long idx; pgd_t *pgdp; pud_t *pudp; @@ -125,7 +124,8 @@ void radix__mark_rodata_ro(void) start = ALIGN_DOWN(start, PAGE_SIZE); end = PAGE_ALIGN(end); // aligns up - pr_devel("marking ro start %lx, end %lx\n", start, end); + pr_debug("Changing flags on range %lx-%lx removing 0x%lx\n", + start, end, clear); for (idx = start; idx < end; idx += PAGE_SIZE) { pgdp = pgd_offset_k(idx); @@ -147,11 +147,29 @@ void radix__mark_rodata_ro(void) if (!ptep) continue; update_the_pte: - radix__pte_update(&init_mm, idx, ptep, _PAGE_WRITE, 0, 0); + radix__pte_update(&init_mm, idx, ptep, clear, 0, 0); } radix__flush_tlb_kernel_range(start, end); } + +void radix__mark_rodata_ro(void) +{ + unsigned long start, end; + + start = (unsigned long)_stext; + end = (unsigned long)__init_begin; + + radix__change_memory_range(start, end, _PAGE_WRITE); +} + +void radix__mark_initmem_nx(void) +{ + unsigned long start = (unsigned long)__init_begin; + unsigned long end = (unsigned long)__init_end; + + radix__change_memory_range(start, end, _PAGE_EXEC); +} #endif /* CONFIG_STRICT_KERNEL_RWX */ static inline void __meminit print_mapping(unsigned long start, diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index 5c0b795d656c..0736e94c7615 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -505,4 +505,12 @@ void mark_rodata_ro(void) else hash__mark_rodata_ro(); } + +void mark_initmem_nx(void) +{ + if (radix_enabled()) + radix__mark_initmem_nx(); + else + hash__mark_initmem_nx(); +} #endif diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 9b87abb178f0..cad6b57ce494 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -78,7 +78,7 @@ void opal_configure_cores(void) * ie. Host hash supports hash guests * Host radix supports hash/radix guests */ - if (cpu_has_feature(CPU_FTR_ARCH_300)) { + if (early_cpu_has_feature(CPU_FTR_ARCH_300)) { reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH; if (early_radix_enabled()) reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX;