From cee3536d24a1d5db66b9f68c3ece0af128187ab4 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 22 Jul 2019 22:26:56 +1000 Subject: [PATCH 1/6] powerpc: Wire up clone3 syscall Wire up the new clone3 syscall added in commit 7f192e3cd316 ("fork: add clone3"). This requires a ppc_clone3 wrapper, in order to save the non-volatile GPRs before calling into the generic syscall code. Otherwise we hit the BUG_ON in CHECK_FULL_REGS in copy_thread(). Lightly tested using Christian's test code on a Power8 LE VM. Signed-off-by: Michael Ellerman Acked-by: Christian Brauner Link: https://lore.kernel.org/r/20190724140259.23554-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/unistd.h | 1 + arch/powerpc/kernel/entry_32.S | 8 ++++++++ arch/powerpc/kernel/entry_64.S | 5 +++++ arch/powerpc/kernel/syscalls/syscall.tbl | 2 +- 4 files changed, 15 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/unistd.h b/arch/powerpc/include/asm/unistd.h index 68473c3c471c..b0720c7c3fcf 100644 --- a/arch/powerpc/include/asm/unistd.h +++ b/arch/powerpc/include/asm/unistd.h @@ -49,6 +49,7 @@ #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK #define __ARCH_WANT_SYS_CLONE +#define __ARCH_WANT_SYS_CLONE3 #endif /* __ASSEMBLY__ */ #endif /* _ASM_POWERPC_UNISTD_H_ */ diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index 85fdb6d879f1..54fab22c9a43 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -597,6 +597,14 @@ ppc_clone: stw r0,_TRAP(r1) /* register set saved */ b sys_clone + .globl ppc_clone3 +ppc_clone3: + SAVE_NVGPRS(r1) + lwz r0,_TRAP(r1) + rlwinm r0,r0,0,0,30 /* clear LSB to indicate full */ + stw r0,_TRAP(r1) /* register set saved */ + b sys_clone3 + .globl ppc_swapcontext ppc_swapcontext: SAVE_NVGPRS(r1) diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index d9105fcf4021..0a0b5310f54a 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -487,6 +487,11 @@ _GLOBAL(ppc_clone) bl sys_clone b .Lsyscall_exit +_GLOBAL(ppc_clone3) + bl save_nvgprs + bl sys_clone3 + b .Lsyscall_exit + _GLOBAL(ppc32_swapcontext) bl save_nvgprs bl compat_sys_swapcontext diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 3331749aab20..43f736ed47f2 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -516,4 +516,4 @@ 432 common fsmount sys_fsmount 433 common fspick sys_fspick 434 common pidfd_open sys_pidfd_open -# 435 reserved for clone3 +435 nospu clone3 ppc_clone3 From 705d0abbcc7adf04743d04ea8754acbcdf21c326 Mon Sep 17 00:00:00 2001 From: Santosh Sivaraj Date: Mon, 29 Jul 2019 11:25:36 +0530 Subject: [PATCH 2/6] powerpc/kvm: Fall through switch case explicitly Implicit fallthrough warning was enabled globally which broke the build. Make it explicit with a `fall through` comment. Signed-off-by: Santosh Sivaraj Reviewed-by: Stephen Rothwell Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190729055536.25591-1-santosh@fossix.org --- arch/powerpc/kvm/book3s_32_mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_32_mmu.c b/arch/powerpc/kvm/book3s_32_mmu.c index 653936177857..18f244aad7aa 100644 --- a/arch/powerpc/kvm/book3s_32_mmu.c +++ b/arch/powerpc/kvm/book3s_32_mmu.c @@ -239,6 +239,7 @@ static int kvmppc_mmu_book3s_32_xlate_pte(struct kvm_vcpu *vcpu, gva_t eaddr, case 2: case 6: pte->may_write = true; + /* fall through */ case 3: case 5: case 7: From da1115fdbd6e86c62185cdd2b4bf7add39f2f82b Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 29 Jul 2019 15:21:28 +0530 Subject: [PATCH 3/6] powerpc/nvdimm: Pick nearby online node if the device node is not online MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, nvdimm subsystem expects the device numa node for SCM device to be an online node. It also doesn't try to bring the device numa node online. Hence if we use a non-online numa node as device node we hit crashes like below. This is because we try to access uninitialized NODE_DATA in different code paths. cpu 0x0: Vector: 300 (Data Access) at [c0000000fac53170] pc: c0000000004bbc50: ___slab_alloc+0x120/0xca0 lr: c0000000004bc834: __slab_alloc+0x64/0xc0 sp: c0000000fac53400 msr: 8000000002009033 dar: 73e8 dsisr: 80000 current = 0xc0000000fabb6d80 paca = 0xc000000003870000 irqmask: 0x03 irq_happened: 0x01 pid = 7, comm = kworker/u16:0 Linux version 5.2.0-06234-g76bd729b2644 (kvaneesh@ltc-boston123) (gcc version 7.4.0 (Ubuntu 7.4.0-1ubuntu1~18.04.1)) #135 SMP Thu Jul 11 05:36:30 CDT 2019 enter ? for help [link register ] c0000000004bc834 __slab_alloc+0x64/0xc0 [c0000000fac53400] c0000000fac53480 (unreliable) [c0000000fac53500] c0000000004bc818 __slab_alloc+0x48/0xc0 [c0000000fac53560] c0000000004c30a0 __kmalloc_node_track_caller+0x3c0/0x6b0 [c0000000fac535d0] c000000000cfafe4 devm_kmalloc+0x74/0xc0 [c0000000fac53600] c000000000d69434 nd_region_activate+0x144/0x560 [c0000000fac536d0] c000000000d6b19c nd_region_probe+0x17c/0x370 [c0000000fac537b0] c000000000d6349c nvdimm_bus_probe+0x10c/0x230 [c0000000fac53840] c000000000cf3cc4 really_probe+0x254/0x4e0 [c0000000fac538d0] c000000000cf429c driver_probe_device+0x16c/0x1e0 [c0000000fac53950] c000000000cf0b44 bus_for_each_drv+0x94/0x130 [c0000000fac539b0] c000000000cf392c __device_attach+0xdc/0x200 [c0000000fac53a50] c000000000cf231c bus_probe_device+0x4c/0xf0 [c0000000fac53a90] c000000000ced268 device_add+0x528/0x810 [c0000000fac53b60] c000000000d62a58 nd_async_device_register+0x28/0xa0 [c0000000fac53bd0] c0000000001ccb8c async_run_entry_fn+0xcc/0x1f0 [c0000000fac53c50] c0000000001bcd9c process_one_work+0x46c/0x860 [c0000000fac53d20] c0000000001bd4f4 worker_thread+0x364/0x5f0 [c0000000fac53db0] c0000000001c7260 kthread+0x1b0/0x1c0 [c0000000fac53e20] c00000000000b954 ret_from_kernel_thread+0x5c/0x68 The patch tries to fix this by picking the nearest online node as the SCM node. This does have a problem of us losing the information that SCM node is equidistant from two other online nodes. If applications need to understand these fine-grained details we should express then like x86 does via /sys/devices/system/node/nodeX/accessY/initiators/ With the patch we get # numactl -H available: 2 nodes (0-1) node 0 cpus: node 0 size: 0 MB node 0 free: 0 MB node 1 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 node 1 size: 130865 MB node 1 free: 129130 MB node distances: node 0 1 0: 10 20 1: 20 10 # cat /sys/bus/nd/devices/region0/numa_node 0 # dmesg | grep papr_scm [ 91.332305] papr_scm ibm,persistent-memory:ibm,pmemory@44104001: Region registered with target node 2 and online node 0 Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190729095128.23707-1-aneesh.kumar@linux.ibm.com --- arch/powerpc/platforms/pseries/papr_scm.c | 29 +++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 2c07908359b2..a5ac371a3f06 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -275,12 +275,32 @@ static const struct attribute_group *papr_scm_dimm_groups[] = { NULL, }; +static inline int papr_scm_node(int node) +{ + int min_dist = INT_MAX, dist; + int nid, min_node; + + if ((node == NUMA_NO_NODE) || node_online(node)) + return node; + + min_node = first_online_node; + for_each_online_node(nid) { + dist = node_distance(node, nid); + if (dist < min_dist) { + min_dist = dist; + min_node = nid; + } + } + return min_node; +} + static int papr_scm_nvdimm_init(struct papr_scm_priv *p) { struct device *dev = &p->pdev->dev; struct nd_mapping_desc mapping; struct nd_region_desc ndr_desc; unsigned long dimm_flags; + int target_nid, online_nid; p->bus_desc.ndctl = papr_scm_ndctl; p->bus_desc.module = THIS_MODULE; @@ -319,8 +339,10 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) memset(&ndr_desc, 0, sizeof(ndr_desc)); ndr_desc.attr_groups = region_attr_groups; - ndr_desc.numa_node = dev_to_node(&p->pdev->dev); - ndr_desc.target_node = ndr_desc.numa_node; + target_nid = dev_to_node(&p->pdev->dev); + online_nid = papr_scm_node(target_nid); + ndr_desc.numa_node = online_nid; + ndr_desc.target_node = target_nid; ndr_desc.res = &p->res; ndr_desc.of_node = p->dn; ndr_desc.provider_data = p; @@ -338,6 +360,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) ndr_desc.res, p->dn); goto err; } + if (target_nid != online_nid) + dev_info(dev, "Region registered with target node %d and online node %d", + target_nid, online_nid); return 0; From 7db57e77586744af46c8bbf8f831bb2b941b7afc Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 31 Jul 2019 00:00:15 +1000 Subject: [PATCH 4/6] powerpc/spe: Mark expected switch fall-throughs Mark switch cases where we are expecting to fall through. Fixes errors such as below, seen with mpc85xx_defconfig: arch/powerpc/kernel/align.c: In function 'emulate_spe': arch/powerpc/kernel/align.c:178:8: error: this statement may fall through ret |= __get_user_inatomic(temp.v[3], p++); ^~ Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190730141917.21817-1-mpe@ellerman.id.au --- arch/powerpc/kernel/align.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index 7107ad86de65..92045ed64976 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -176,9 +176,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __get_user_inatomic(temp.v[1], p++); ret |= __get_user_inatomic(temp.v[2], p++); ret |= __get_user_inatomic(temp.v[3], p++); + /* fall through */ case 4: ret |= __get_user_inatomic(temp.v[4], p++); ret |= __get_user_inatomic(temp.v[5], p++); + /* fall through */ case 2: ret |= __get_user_inatomic(temp.v[6], p++); ret |= __get_user_inatomic(temp.v[7], p++); @@ -259,9 +261,11 @@ static int emulate_spe(struct pt_regs *regs, unsigned int reg, ret |= __put_user_inatomic(data.v[1], p++); ret |= __put_user_inatomic(data.v[2], p++); ret |= __put_user_inatomic(data.v[3], p++); + /* fall through */ case 4: ret |= __put_user_inatomic(data.v[4], p++); ret |= __put_user_inatomic(data.v[5], p++); + /* fall through */ case 2: ret |= __put_user_inatomic(data.v[6], p++); ret |= __put_user_inatomic(data.v[7], p++); From 7440ea8b2a4430eef5120d0a7faac6c39304ae6d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 30 Jul 2019 14:37:04 +1000 Subject: [PATCH 5/6] drivers/macintosh/smu.c: Mark expected switch fall-through Mark switch cases where we are expecting to fall through. This patch fixes the following warning (Building: powerpc): drivers/macintosh/smu.c: In function 'smu_queue_i2c': drivers/macintosh/smu.c:854:21: warning: this statement may fall through [-Wimplicit-fallthrough=] cmd->info.devaddr &= 0xfe; ~~~~~~~~~~~~~~~~~~^~~~~~~ drivers/macintosh/smu.c:855:2: note: here case SMU_I2C_TRANSFER_STDSUB: ^~~~ Fixes: 0365ba7fb1fa ("[PATCH] ppc64: SMU driver update & i2c support") Signed-off-by: Stephen Rothwell Reviewed-by: Kees Cook Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20190730143704.060a2606@canb.auug.org.au --- drivers/macintosh/smu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 276065c888bc..23f1f41c8602 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -852,6 +852,7 @@ int smu_queue_i2c(struct smu_i2c_cmd *cmd) break; case SMU_I2C_TRANSFER_COMBINED: cmd->info.devaddr &= 0xfe; + /* fall through */ case SMU_I2C_TRANSFER_STDSUB: if (cmd->info.sublen > 3) return -EINVAL; From d7e23b887f67178c4f840781be7a6aa6aeb52ab1 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 31 Jul 2019 06:01:42 +0000 Subject: [PATCH 6/6] powerpc/kasan: fix early boot failure on PPC32 Due to commit 4a6d8cf90017 ("powerpc/mm: don't use pte_alloc_kernel() until slab is available on PPC32"), pte_alloc_kernel() cannot be used during early KASAN init. Fix it by using memblock_alloc() instead. Fixes: 2edb16efc899 ("powerpc/32: Add KASAN support") Cc: stable@vger.kernel.org # v5.2+ Reported-by: Erhard F. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/da89670093651437f27d2975224712e0a130b055.1564552796.git.christophe.leroy@c-s.fr --- arch/powerpc/mm/kasan/kasan_init_32.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/mm/kasan/kasan_init_32.c b/arch/powerpc/mm/kasan/kasan_init_32.c index 0d62be3cba47..74f4555a62ba 100644 --- a/arch/powerpc/mm/kasan/kasan_init_32.c +++ b/arch/powerpc/mm/kasan/kasan_init_32.c @@ -21,7 +21,7 @@ static void kasan_populate_pte(pte_t *ptep, pgprot_t prot) __set_pte_at(&init_mm, va, ptep, pfn_pte(PHYS_PFN(pa), prot), 0); } -static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) +static int __ref kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_end) { pmd_t *pmd; unsigned long k_cur, k_next; @@ -35,7 +35,10 @@ static int kasan_init_shadow_page_tables(unsigned long k_start, unsigned long k_ if ((void *)pmd_page_vaddr(*pmd) != kasan_early_shadow_pte) continue; - new = pte_alloc_one_kernel(&init_mm); + if (slab_is_available()) + new = pte_alloc_one_kernel(&init_mm); + else + new = memblock_alloc(PTE_FRAG_SIZE, PTE_FRAG_SIZE); if (!new) return -ENOMEM;