From 42c184ade43a79d36f50a0b3394b3326633f53f5 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Sep 2019 22:36:44 +0800 Subject: [PATCH 01/93] perf: hisi: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c | 5 +---- drivers/perf/hisilicon/hisi_uncore_hha_pmu.c | 4 +--- drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c | 4 +--- 3 files changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c index e42d4464c2cf..453f1c6a16ca 100644 --- a/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_ddrc_pmu.c @@ -243,8 +243,6 @@ MODULE_DEVICE_TABLE(acpi, hisi_ddrc_pmu_acpi_match); static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *ddrc_pmu) { - struct resource *res; - /* * Use the SCCL_ID and DDRC channel ID to identify the * DDRC PMU, while SCCL_ID is in MPIDR[aff2]. @@ -263,8 +261,7 @@ static int hisi_ddrc_pmu_init_data(struct platform_device *pdev, /* DDRC PMUs only share the same SCCL */ ddrc_pmu->ccl_id = -1; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ddrc_pmu->base = devm_ioremap_resource(&pdev->dev, res); + ddrc_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ddrc_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for ddrc_pmu resource\n"); return PTR_ERR(ddrc_pmu->base); diff --git a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c index f28063873e11..6a1dd72d8abb 100644 --- a/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_hha_pmu.c @@ -234,7 +234,6 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *hha_pmu) { unsigned long long id; - struct resource *res; acpi_status status; status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), @@ -256,8 +255,7 @@ static int hisi_hha_pmu_init_data(struct platform_device *pdev, /* HHA PMUs only share the same SCCL */ hha_pmu->ccl_id = -1; - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - hha_pmu->base = devm_ioremap_resource(&pdev->dev, res); + hha_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(hha_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for hha_pmu resource\n"); return PTR_ERR(hha_pmu->base); diff --git a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c index 078b8dc57250..1151e99b241c 100644 --- a/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_l3c_pmu.c @@ -233,7 +233,6 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, struct hisi_pmu *l3c_pmu) { unsigned long long id; - struct resource *res; acpi_status status; status = acpi_evaluate_integer(ACPI_HANDLE(&pdev->dev), @@ -259,8 +258,7 @@ static int hisi_l3c_pmu_init_data(struct platform_device *pdev, return -EINVAL; } - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - l3c_pmu->base = devm_ioremap_resource(&pdev->dev, res); + l3c_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(l3c_pmu->base)) { dev_err(&pdev->dev, "ioremap failed for l3c_pmu resource\n"); return PTR_ERR(l3c_pmu->base); From 7fdd7f7c33d2451cf4865b95b24d57785c0ff515 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Sep 2019 22:37:35 +0800 Subject: [PATCH 02/93] perf: xgene: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/xgene_pmu.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) diff --git a/drivers/perf/xgene_pmu.c b/drivers/perf/xgene_pmu.c index 7e328d6385c3..46ee6807d533 100644 --- a/drivers/perf/xgene_pmu.c +++ b/drivers/perf/xgene_pmu.c @@ -1282,25 +1282,21 @@ static int acpi_pmu_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, struct platform_device *pdev) { void __iomem *csw_csr, *mcba_csr, *mcbb_csr; - struct resource *res; unsigned int reg; - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - csw_csr = devm_ioremap_resource(&pdev->dev, res); + csw_csr = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(csw_csr)) { dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n"); return PTR_ERR(csw_csr); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 2); - mcba_csr = devm_ioremap_resource(&pdev->dev, res); + mcba_csr = devm_platform_ioremap_resource(pdev, 2); if (IS_ERR(mcba_csr)) { dev_err(&pdev->dev, "ioremap failed for MCBA CSR resource\n"); return PTR_ERR(mcba_csr); } - res = platform_get_resource(pdev, IORESOURCE_MEM, 3); - mcbb_csr = devm_ioremap_resource(&pdev->dev, res); + mcbb_csr = devm_platform_ioremap_resource(pdev, 3); if (IS_ERR(mcbb_csr)) { dev_err(&pdev->dev, "ioremap failed for MCBB CSR resource\n"); return PTR_ERR(mcbb_csr); @@ -1332,13 +1328,11 @@ static int acpi_pmu_v3_probe_active_mcb_mcu_l3c(struct xgene_pmu *xgene_pmu, struct platform_device *pdev) { void __iomem *csw_csr; - struct resource *res; unsigned int reg; u32 mcb0routing; u32 mcb1routing; - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - csw_csr = devm_ioremap_resource(&pdev->dev, res); + csw_csr = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(csw_csr)) { dev_err(&pdev->dev, "ioremap failed for CSW CSR resource\n"); return PTR_ERR(csw_csr); From 1c8d96b41d8b05ed9dbc2bea03edf878f544f93e Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Sep 2019 22:39:28 +0800 Subject: [PATCH 03/93] perf/arm-ccn: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 6fc0273b6129..fee5cb2e8afb 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1477,8 +1477,7 @@ static int arm_ccn_probe(struct platform_device *pdev) ccn->dev = &pdev->dev; platform_set_drvdata(pdev, ccn); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - ccn->base = devm_ioremap_resource(ccn->dev, res); + ccn->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(ccn->base)) return PTR_ERR(ccn->base); From 504db0f82660898cbb76478cb2eaee46817c85b8 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Fri, 6 Sep 2019 22:40:14 +0800 Subject: [PATCH 04/93] perf/arm-cci: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 8f8606b9bc9e..1b8e337a29ca 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1642,7 +1642,6 @@ static struct cci_pmu *cci_pmu_alloc(struct device *dev) static int cci_pmu_probe(struct platform_device *pdev) { - struct resource *res; struct cci_pmu *cci_pmu; int i, ret, irq; @@ -1650,8 +1649,7 @@ static int cci_pmu_probe(struct platform_device *pdev) if (IS_ERR(cci_pmu)) return PTR_ERR(cci_pmu); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - cci_pmu->base = devm_ioremap_resource(&pdev->dev, res); + cci_pmu->base = devm_platform_ioremap_resource(pdev, 0); if (IS_ERR(cci_pmu->base)) return -ENOMEM; From c8b0de762e0b7cf9f51ad13b19bd279809317f37 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Mon, 9 Sep 2019 09:22:57 +0800 Subject: [PATCH 05/93] perf/smmuv3: use devm_platform_ioremap_resource() to simplify code Use devm_platform_ioremap_resource() to simplify the code a bit. This is detected by coccinelle. Signed-off-by: YueHaibing Signed-off-by: Will Deacon --- drivers/perf/arm_smmuv3_pmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm_smmuv3_pmu.c b/drivers/perf/arm_smmuv3_pmu.c index abcf54f7d19c..773128f411f1 100644 --- a/drivers/perf/arm_smmuv3_pmu.c +++ b/drivers/perf/arm_smmuv3_pmu.c @@ -727,7 +727,7 @@ static void smmu_pmu_get_acpi_options(struct smmu_pmu *smmu_pmu) static int smmu_pmu_probe(struct platform_device *pdev) { struct smmu_pmu *smmu_pmu; - struct resource *res_0, *res_1; + struct resource *res_0; u32 cfgr, reg_size; u64 ceid_64[2]; int irq, err; @@ -764,8 +764,7 @@ static int smmu_pmu_probe(struct platform_device *pdev) /* Determine if page 1 is present */ if (cfgr & SMMU_PMCG_CFGR_RELOC_CTRS) { - res_1 = platform_get_resource(pdev, IORESOURCE_MEM, 1); - smmu_pmu->reloc_base = devm_ioremap_resource(dev, res_1); + smmu_pmu->reloc_base = devm_platform_ioremap_resource(pdev, 1); if (IS_ERR(smmu_pmu->reloc_base)) return PTR_ERR(smmu_pmu->reloc_base); } else { From 3a25e46c99e9c8b294b89df7a13a4638bf722af8 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 3 Oct 2019 12:12:09 +0100 Subject: [PATCH 06/93] docs/arm64: elf_hwcaps: sort the HWCAP{, 2} documentation by ascending value Part of the hardware capabilities documented in elf_hwcap.rst are ordered following the definition in the header arch/arm64/include/uapi/asm/hwcap.h but others seems to be documented in random order. To make easier to match against the definition in the header, they are now sorted in the same order as they are defined in header. I.e., HWCAP first by ascending value, and then HWCAP2 in the similar fashion. Acked-by: Will Deacon Signed-off-by: Julien Grall Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.rst | 64 +++++++++++++++--------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 91f79529c58c..9ee7f8ff1fae 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -119,10 +119,6 @@ HWCAP_LRCPC HWCAP_DCPOP Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001. -HWCAP2_DCPODP - - Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. - HWCAP_SHA3 Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001. @@ -141,6 +137,38 @@ HWCAP_SHA512 HWCAP_SVE Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001. +HWCAP_ASIMDFHM + Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001. + +HWCAP_DIT + Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001. + +HWCAP_USCAT + Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001. + +HWCAP_ILRCPC + Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010. + +HWCAP_FLAGM + Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. + +HWCAP_SSBS + Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. + +HWCAP_PACA + Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or + ID_AA64ISAR1_EL1.API == 0b0001, as described by + Documentation/arm64/pointer-authentication.rst. + +HWCAP_PACG + Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or + ID_AA64ISAR1_EL1.GPI == 0b0001, as described by + Documentation/arm64/pointer-authentication.rst. + +HWCAP2_DCPODP + + Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010. + HWCAP2_SVE2 Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001. @@ -165,38 +193,10 @@ HWCAP2_SVESM4 Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001. -HWCAP_ASIMDFHM - Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001. - -HWCAP_DIT - Functionality implied by ID_AA64PFR0_EL1.DIT == 0b0001. - -HWCAP_USCAT - Functionality implied by ID_AA64MMFR2_EL1.AT == 0b0001. - -HWCAP_ILRCPC - Functionality implied by ID_AA64ISAR1_EL1.LRCPC == 0b0010. - -HWCAP_FLAGM - Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0001. - HWCAP2_FLAGM2 Functionality implied by ID_AA64ISAR0_EL1.TS == 0b0010. -HWCAP_SSBS - Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. - -HWCAP_PACA - Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or - ID_AA64ISAR1_EL1.API == 0b0001, as described by - Documentation/arm64/pointer-authentication.rst. - -HWCAP_PACG - Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or - ID_AA64ISAR1_EL1.GPI == 0b0001, as described by - Documentation/arm64/pointer-authentication.rst. - HWCAP2_FRINT Functionality implied by ID_AA64ISAR1_EL1.FRINTTS == 0b0001. From 0f6e4c40164d4283b8bbe4ed80bf54424b756bc7 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 3 Oct 2019 12:12:10 +0100 Subject: [PATCH 07/93] docs/arm64: elf_hwcaps: Document HWCAP_SB All the hardware capabilities but HWCAP_SB is not documented in elf_hwcaps.rst. So document it. Acked-by: Will Deacon Signed-off-by: Julien Grall Signed-off-by: Catalin Marinas --- Documentation/arm64/elf_hwcaps.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/arm64/elf_hwcaps.rst b/Documentation/arm64/elf_hwcaps.rst index 9ee7f8ff1fae..7fa3d215ae6a 100644 --- a/Documentation/arm64/elf_hwcaps.rst +++ b/Documentation/arm64/elf_hwcaps.rst @@ -155,6 +155,9 @@ HWCAP_FLAGM HWCAP_SSBS Functionality implied by ID_AA64PFR1_EL1.SSBS == 0b0010. +HWCAP_SB + Functionality implied by ID_AA64ISAR1_EL1.SB == 0b0001. + HWCAP_PACA Functionality implied by ID_AA64ISAR1_EL1.APA == 0b0001 or ID_AA64ISAR1_EL1.API == 0b0001, as described by From a8613e7070e771cea90d93eb1e8397246883065a Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Thu, 3 Oct 2019 12:12:11 +0100 Subject: [PATCH 08/93] docs/arm64: cpu-feature-registers: Documents missing visible fields A couple of fields visible to userspace are not described in the documentation. So update it. Acked-by: Will Deacon Signed-off-by: Julien Grall Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 2955287e9acc..ffcf4e2c71ef 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -193,6 +193,10 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ + | SB | [36-39] | y | + +------------------------------+---------+---------+ + | FRINTTS | [32-35] | y | + +------------------------------+---------+---------+ | GPI | [31-28] | y | +------------------------------+---------+---------+ | GPA | [27-24] | y | From ce87de45b3243d7023e8a4a76ba004002a7ec087 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 13 Sep 2019 13:55:50 +0100 Subject: [PATCH 09/93] arm64: simplify syscall wrapper ifdeffery Back in commit: 4378a7d4be30ec69 ("arm64: implement syscall wrappers") ... I implemented the arm64 syscall wrapper glue following the approach taken on x86. While doing so, I also copied across some ifdeffery that isn't necessary on arm64. On arm64 we don't share any of the native wrappers with compat tasks, and unlike x86 we don't have alternative implementations of SYSCALL_DEFINE0(), COND_SYSCALL(), or SYS_NI() defined when AArch32 compat support is enabled. Thus we don't need to prevent multiple definitions of these macros, and can remove the #ifndef ... #endif guards protecting them. If any of these had been previously defined elsewhere, syscalls are unlikely to work correctly, and we'd want the compiler to warn about the multiple definitions. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/syscall_wrapper.h | 6 ------ 1 file changed, 6 deletions(-) diff --git a/arch/arm64/include/asm/syscall_wrapper.h b/arch/arm64/include/asm/syscall_wrapper.h index 06d880b3526c..b383b4802a7b 100644 --- a/arch/arm64/include/asm/syscall_wrapper.h +++ b/arch/arm64/include/asm/syscall_wrapper.h @@ -66,24 +66,18 @@ struct pt_regs; } \ static inline long __do_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__)) -#ifndef SYSCALL_DEFINE0 #define SYSCALL_DEFINE0(sname) \ SYSCALL_METADATA(_##sname, 0); \ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused); \ ALLOW_ERROR_INJECTION(__arm64_sys_##sname, ERRNO); \ asmlinkage long __arm64_sys_##sname(const struct pt_regs *__unused) -#endif -#ifndef COND_SYSCALL #define COND_SYSCALL(name) \ asmlinkage long __weak __arm64_sys_##name(const struct pt_regs *regs) \ { \ return sys_ni_syscall(); \ } -#endif -#ifndef SYS_NI #define SYS_NI(name) SYSCALL_ALIAS(__arm64_sys_##name, sys_ni_posix_timers); -#endif #endif /* __ASM_SYSCALL_WRAPPER_H */ From 6b7fe77c334ae59fed9500140e08f4f896b36871 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:40 +0100 Subject: [PATCH 10/93] arm/arm64: smccc/psci: add arm_smccc_1_1_get_conduit() SMCCC callers are currently amassing a collection of enums for the SMCCC conduit, and are having to dig into the PSCI driver's internals in order to figure out what to do. Let's clean this up, with common SMCCC_CONDUIT_* definitions, and an arm_smccc_1_1_get_conduit() helper that abstracts the PSCI driver's internal state. We can kill off the PSCI_CONDUIT_* definitions once we've migrated users over to the new interface. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/firmware/psci/psci.c | 15 +++++++++++++++ include/linux/arm-smccc.h | 16 ++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index 84f4ff351c62..eb797081d159 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -57,6 +57,21 @@ struct psci_operations psci_ops = { .smccc_version = SMCCC_VERSION_1_0, }; +enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) +{ + if (psci_ops.smccc_version < SMCCC_VERSION_1_1) + return SMCCC_CONDUIT_NONE; + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_SMC: + return SMCCC_CONDUIT_SMC; + case PSCI_CONDUIT_HVC: + return SMCCC_CONDUIT_HVC; + default: + return SMCCC_CONDUIT_NONE; + } +} + typedef unsigned long (psci_fn)(unsigned long, unsigned long, unsigned long, unsigned long); static psci_fn *invoke_psci_fn; diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index 080012a6f025..df01a8579034 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -80,6 +80,22 @@ #include #include + +enum arm_smccc_conduit { + SMCCC_CONDUIT_NONE, + SMCCC_CONDUIT_SMC, + SMCCC_CONDUIT_HVC, +}; + +/** + * arm_smccc_1_1_get_conduit() + * + * Returns the conduit to be used for SMCCCv1.1 or later. + * + * When SMCCCv1.1 is not present, returns SMCCC_CONDUIT_NONE. + */ +enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void); + /** * struct arm_smccc_res - Result from SMC/HVC call * @a0-a3 result values from registers 0 to 3 From c98bd29917281a5023f71a3148f538ad2709c0f0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:41 +0100 Subject: [PATCH 11/93] arm64: errata: use arm_smccc_1_1_get_conduit() Now that we have arm_smccc_1_1_get_conduit(), we can hide the PSCI implementation details from the arm64 cpu errata code, so let's do so. As arm_smccc_1_1_get_conduit() implicitly checks that the SMCCC version is at least SMCCC_VERSION_1_1, we no longer need to check this explicitly where switch statements have a default case, e.g. in has_ssbd_mitigation(). There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Lorenzo Pieralisi Cc: Will Deacon Cc: Marc Zyngier Cc: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 37 +++++++++++----------------------- 1 file changed, 12 insertions(+), 25 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index f593f4cffc0d..9c0b011eee20 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -6,7 +6,6 @@ */ #include -#include #include #include #include @@ -166,9 +165,7 @@ static void install_bp_hardening_cb(bp_hardening_cb_t fn, } #endif /* CONFIG_KVM_INDIRECT_VECTORS */ -#include #include -#include static void call_smc_arch_workaround_1(void) { @@ -212,11 +209,8 @@ static int detect_harden_bp_fw(void) struct arm_smccc_res res; u32 midr = read_cpuid_id(); - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - return -1; - - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); switch ((int)res.a0) { @@ -234,7 +228,7 @@ static int detect_harden_bp_fw(void) } break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); switch ((int)res.a0) { @@ -308,11 +302,11 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt, BUG_ON(nr_inst != 1); - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: insn = aarch64_insn_get_hvc_value(); break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: insn = aarch64_insn_get_smc_value(); break; default: @@ -351,12 +345,12 @@ void arm64_set_ssbd_mitigation(bool state) return; } - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); break; @@ -390,20 +384,13 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, goto out_printmsg; } - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { - ssbd_state = ARM64_SSBD_UNKNOWN; - if (!this_cpu_safe) - __ssb_safe = false; - return false; - } - - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_2, &res); break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_2, &res); break; From 6848253ddeae9fa44680bab6212599283f9d4ef2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:42 +0100 Subject: [PATCH 12/93] arm: spectre-v2: use arm_smccc_1_1_get_conduit() Now that we have arm_smccc_1_1_get_conduit(), we can hide the PSCI implementation details from the arm spectre-v2 code, so let's do so. As arm_smccc_1_1_get_conduit() implicitly checks that the SMCCC version is at least SMCCC_VERSION_1_1, we no longer need to check this explicitly where switch statements have a default case. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Cc: Marc Zyngier Cc: Russell King Signed-off-by: Catalin Marinas --- arch/arm/mm/proc-v7-bugs.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/arch/arm/mm/proc-v7-bugs.c b/arch/arm/mm/proc-v7-bugs.c index 9a07916af8dd..54d87506d3b5 100644 --- a/arch/arm/mm/proc-v7-bugs.c +++ b/arch/arm/mm/proc-v7-bugs.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include #include @@ -75,11 +74,8 @@ static void cpu_v7_spectre_init(void) case ARM_CPU_PART_CORTEX_A72: { struct arm_smccc_res res; - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) - break; - - switch (psci_ops.conduit) { - case PSCI_CONDUIT_HVC: + switch (arm_smccc_1_1_get_conduit()) { + case SMCCC_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) @@ -90,7 +86,7 @@ static void cpu_v7_spectre_init(void) spectre_v2_method = "hypervisor"; break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_1, &res); if ((int)res.a0 != 0) From a5520eac4d2dafb7a48c1b0f1c486afcebd6fe0d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:43 +0100 Subject: [PATCH 13/93] firmware/psci: use common SMCCC_CONDUIT_* Now that we have common SMCCC_CONDUIT_* definitions, migrate the PSCI code over to them, and kill off the old PSCI_CONDUIT_* definitions. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- drivers/firmware/psci/psci.c | 25 +++++++++---------------- include/linux/psci.h | 9 ++------- 2 files changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/firmware/psci/psci.c b/drivers/firmware/psci/psci.c index eb797081d159..b3b6c15e7b36 100644 --- a/drivers/firmware/psci/psci.c +++ b/drivers/firmware/psci/psci.c @@ -53,7 +53,7 @@ bool psci_tos_resident_on(int cpu) } struct psci_operations psci_ops = { - .conduit = PSCI_CONDUIT_NONE, + .conduit = SMCCC_CONDUIT_NONE, .smccc_version = SMCCC_VERSION_1_0, }; @@ -62,14 +62,7 @@ enum arm_smccc_conduit arm_smccc_1_1_get_conduit(void) if (psci_ops.smccc_version < SMCCC_VERSION_1_1) return SMCCC_CONDUIT_NONE; - switch (psci_ops.conduit) { - case PSCI_CONDUIT_SMC: - return SMCCC_CONDUIT_SMC; - case PSCI_CONDUIT_HVC: - return SMCCC_CONDUIT_HVC; - default: - return SMCCC_CONDUIT_NONE; - } + return psci_ops.conduit; } typedef unsigned long (psci_fn)(unsigned long, unsigned long, @@ -227,13 +220,13 @@ static unsigned long psci_migrate_info_up_cpu(void) 0, 0, 0); } -static void set_conduit(enum psci_conduit conduit) +static void set_conduit(enum arm_smccc_conduit conduit) { switch (conduit) { - case PSCI_CONDUIT_HVC: + case SMCCC_CONDUIT_HVC: invoke_psci_fn = __invoke_psci_fn_hvc; break; - case PSCI_CONDUIT_SMC: + case SMCCC_CONDUIT_SMC: invoke_psci_fn = __invoke_psci_fn_smc; break; default: @@ -255,9 +248,9 @@ static int get_set_conduit_method(struct device_node *np) } if (!strcmp("hvc", method)) { - set_conduit(PSCI_CONDUIT_HVC); + set_conduit(SMCCC_CONDUIT_HVC); } else if (!strcmp("smc", method)) { - set_conduit(PSCI_CONDUIT_SMC); + set_conduit(SMCCC_CONDUIT_SMC); } else { pr_warn("invalid \"method\" property: %s\n", method); return -EINVAL; @@ -598,9 +591,9 @@ int __init psci_acpi_init(void) pr_info("probing for conduit method from ACPI.\n"); if (acpi_psci_use_hvc()) - set_conduit(PSCI_CONDUIT_HVC); + set_conduit(SMCCC_CONDUIT_HVC); else - set_conduit(PSCI_CONDUIT_SMC); + set_conduit(SMCCC_CONDUIT_SMC); return psci_probe(); } diff --git a/include/linux/psci.h b/include/linux/psci.h index e2bacc6fd2f2..ebe0a881d13d 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -7,6 +7,7 @@ #ifndef __LINUX_PSCI_H #define __LINUX_PSCI_H +#include #include #include @@ -18,12 +19,6 @@ bool psci_tos_resident_on(int cpu); int psci_cpu_suspend_enter(u32 state); bool psci_power_state_is_valid(u32 state); -enum psci_conduit { - PSCI_CONDUIT_NONE, - PSCI_CONDUIT_SMC, - PSCI_CONDUIT_HVC, -}; - enum smccc_version { SMCCC_VERSION_1_0, SMCCC_VERSION_1_1, @@ -38,7 +33,7 @@ struct psci_operations { int (*affinity_info)(unsigned long target_affinity, unsigned long lowest_affinity_level); int (*migrate_info_type)(void); - enum psci_conduit conduit; + enum arm_smccc_conduit conduit; enum smccc_version smccc_version; }; From e6ea46511b1ae8c4491904c79411fcd29139af14 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 9 Aug 2019 14:22:44 +0100 Subject: [PATCH 14/93] firmware: arm_sdei: use common SMCCC_CONDUIT_* Now that we have common definitions for SMCCC conduits, move the SDEI code over to them, and remove the SDEI-specific definitions. There should be no functional change as a result of this patch. Signed-off-by: Mark Rutland Acked-by: Lorenzo Pieralisi Acked-by: James Morse Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sdei.c | 3 ++- drivers/firmware/arm_sdei.c | 12 ++++++------ include/linux/arm_sdei.h | 6 ------ 3 files changed, 8 insertions(+), 13 deletions(-) diff --git a/arch/arm64/kernel/sdei.c b/arch/arm64/kernel/sdei.c index ea94cf8f9dc6..d6259dac62b6 100644 --- a/arch/arm64/kernel/sdei.c +++ b/arch/arm64/kernel/sdei.c @@ -2,6 +2,7 @@ // Copyright (C) 2017 Arm Ltd. #define pr_fmt(fmt) "sdei: " fmt +#include #include #include #include @@ -161,7 +162,7 @@ unsigned long sdei_arch_get_entry_point(int conduit) return 0; } - sdei_exit_mode = (conduit == CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; + sdei_exit_mode = (conduit == SMCCC_CONDUIT_HVC) ? SDEI_EXIT_HVC : SDEI_EXIT_SMC; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 if (arm64_kernel_unmapped_at_el0()) { diff --git a/drivers/firmware/arm_sdei.c b/drivers/firmware/arm_sdei.c index 9cd70d1a5622..a479023fa036 100644 --- a/drivers/firmware/arm_sdei.c +++ b/drivers/firmware/arm_sdei.c @@ -967,29 +967,29 @@ static int sdei_get_conduit(struct platform_device *pdev) if (np) { if (of_property_read_string(np, "method", &method)) { pr_warn("missing \"method\" property\n"); - return CONDUIT_INVALID; + return SMCCC_CONDUIT_NONE; } if (!strcmp("hvc", method)) { sdei_firmware_call = &sdei_smccc_hvc; - return CONDUIT_HVC; + return SMCCC_CONDUIT_HVC; } else if (!strcmp("smc", method)) { sdei_firmware_call = &sdei_smccc_smc; - return CONDUIT_SMC; + return SMCCC_CONDUIT_SMC; } pr_warn("invalid \"method\" property: %s\n", method); } else if (IS_ENABLED(CONFIG_ACPI) && !acpi_disabled) { if (acpi_psci_use_hvc()) { sdei_firmware_call = &sdei_smccc_hvc; - return CONDUIT_HVC; + return SMCCC_CONDUIT_HVC; } else { sdei_firmware_call = &sdei_smccc_smc; - return CONDUIT_SMC; + return SMCCC_CONDUIT_SMC; } } - return CONDUIT_INVALID; + return SMCCC_CONDUIT_NONE; } static int sdei_probe(struct platform_device *pdev) diff --git a/include/linux/arm_sdei.h b/include/linux/arm_sdei.h index 3305ea7f9dc7..0a241c5c911d 100644 --- a/include/linux/arm_sdei.h +++ b/include/linux/arm_sdei.h @@ -5,12 +5,6 @@ #include -enum sdei_conduit_types { - CONDUIT_INVALID = 0, - CONDUIT_SMC, - CONDUIT_HVC, -}; - #include #ifdef CONFIG_ARM_SDE_INTERFACE From ae970dc096b2d39f65f2e18d142e3978dc9ee1c7 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:43 +0200 Subject: [PATCH 15/93] arm64: mm: use arm64_dma_phys_limit instead of calling max_zone_dma_phys() By the time we call zones_sizes_init() arm64_dma_phys_limit already contains the result of max_zone_dma_phys(). We use the variable instead of calling the function directly to save some precious cpu time. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 45c00a54909c..098c0f5bedf6 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -187,7 +187,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(max_zone_dma_phys()); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max; From a573cdd7973dedd87e62196c400332896bb236c8 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:44 +0200 Subject: [PATCH 16/93] arm64: rename variables used to calculate ZONE_DMA32's size Let the name indicate that they are used to calculate ZONE_DMA32's size as opposed to ZONE_DMA. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 098c0f5bedf6..8e9bc64c5878 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -56,7 +56,7 @@ EXPORT_SYMBOL(physvirt_offset); struct page *vmemmap __ro_after_init; EXPORT_SYMBOL(vmemmap); -phys_addr_t arm64_dma_phys_limit __ro_after_init; +phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* @@ -174,7 +174,7 @@ static void __init reserve_elfcorehdr(void) * currently assumes that for memory starting above 4G, 32-bit devices will * use a DMA offset. */ -static phys_addr_t __init max_zone_dma_phys(void) +static phys_addr_t __init max_zone_dma32_phys(void) { phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); return min(offset + (1ULL << 32), memblock_end_of_DRAM()); @@ -187,7 +187,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; #ifdef CONFIG_ZONE_DMA32 - max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma_phys_limit); + max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); #endif max_zone_pfns[ZONE_NORMAL] = max; @@ -200,16 +200,16 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; - unsigned long max_dma = min; + unsigned long max_dma32 = min; memset(zone_size, 0, sizeof(zone_size)); /* 4GB maximum for 32-bit only capable devices */ #ifdef CONFIG_ZONE_DMA32 - max_dma = PFN_DOWN(arm64_dma_phys_limit); - zone_size[ZONE_DMA32] = max_dma - min; + max_dma32 = PFN_DOWN(arm64_dma32_phys_limit); + zone_size[ZONE_DMA32] = max_dma32 - min; #endif - zone_size[ZONE_NORMAL] = max - max_dma; + zone_size[ZONE_NORMAL] = max - max_dma32; memcpy(zhole_size, zone_size, sizeof(zhole_size)); @@ -221,14 +221,14 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) continue; #ifdef CONFIG_ZONE_DMA32 - if (start < max_dma) { - unsigned long dma_end = min(end, max_dma); + if (start < max_dma32) { + unsigned long dma_end = min(end, max_dma32); zhole_size[ZONE_DMA32] -= dma_end - start; } #endif - if (end > max_dma) { + if (end > max_dma32) { unsigned long normal_end = min(end, max); - unsigned long normal_start = max(start, max_dma); + unsigned long normal_start = max(start, max_dma32); zhole_size[ZONE_NORMAL] -= normal_end - normal_start; } } @@ -420,9 +420,9 @@ void __init arm64_memblock_init(void) /* 4GB maximum for 32-bit only capable devices */ if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma_phys_limit = max_zone_dma_phys(); + arm64_dma32_phys_limit = max_zone_dma32_phys(); else - arm64_dma_phys_limit = PHYS_MASK + 1; + arm64_dma32_phys_limit = PHYS_MASK + 1; reserve_crashkernel(); @@ -430,7 +430,7 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - dma_contiguous_reserve(arm64_dma_phys_limit); + dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -534,7 +534,7 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) + max_pfn > (arm64_dma32_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; From 1a8e1cef7603e218339ac63cb3178b25554524e5 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:45 +0200 Subject: [PATCH 17/93] arm64: use both ZONE_DMA and ZONE_DMA32 So far all arm64 devices have supported 32 bit DMA masks for their peripherals. This is not true anymore for the Raspberry Pi 4 as most of it's peripherals can only address the first GB of memory on a total of up to 4 GB. This goes against ZONE_DMA32's intent, as it's expected for ZONE_DMA32 to be addressable with a 32 bit mask. So it was decided to re-introduce ZONE_DMA in arm64. ZONE_DMA will contain the lower 1G of memory, which is currently the memory area addressable by any peripheral on an arm64 device. ZONE_DMA32 will contain the rest of the 32 bit addressable memory. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 4 +++ arch/arm64/include/asm/page.h | 2 ++ arch/arm64/mm/init.c | 54 +++++++++++++++++++++++++---------- 3 files changed, 45 insertions(+), 15 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 950a56b71ff0..1b6ea5a9d1a6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -266,6 +266,10 @@ config GENERIC_CSUM config GENERIC_CALIBRATE_DELAY def_bool y +config ZONE_DMA + bool "Support DMA zone" if EXPERT + default y + config ZONE_DMA32 bool "Support DMA32 zone" if EXPERT default y diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index d39ddb258a04..7b8c98830101 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -38,4 +38,6 @@ extern int pfn_valid(unsigned long); #include +#define ARCH_ZONE_DMA_BITS 30 + #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 8e9bc64c5878..44f07fdf7a59 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -56,6 +56,13 @@ EXPORT_SYMBOL(physvirt_offset); struct page *vmemmap __ro_after_init; EXPORT_SYMBOL(vmemmap); +/* + * We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of + * memory as some devices, namely the Raspberry Pi 4, have peripherals with + * this limited view of the memory. ZONE_DMA32 will cover the rest of the 32 + * bit addressable memory area. + */ +phys_addr_t arm64_dma_phys_limit __ro_after_init; phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE @@ -169,15 +176,16 @@ static void __init reserve_elfcorehdr(void) { } #endif /* CONFIG_CRASH_DUMP */ + /* - * Return the maximum physical address for ZONE_DMA32 (DMA_BIT_MASK(32)). It - * currently assumes that for memory starting above 4G, 32-bit devices will - * use a DMA offset. + * Return the maximum physical address for a zone with a given address size + * limit. It currently assumes that for memory starting above 4G, 32-bit + * devices will use a DMA offset. */ -static phys_addr_t __init max_zone_dma32_phys(void) +static phys_addr_t __init max_zone_phys(unsigned int zone_bits) { - phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, 32); - return min(offset + (1ULL << 32), memblock_end_of_DRAM()); + phys_addr_t offset = memblock_start_of_DRAM() & GENMASK_ULL(63, zone_bits); + return min(offset + (1ULL << zone_bits), memblock_end_of_DRAM()); } #ifdef CONFIG_NUMA @@ -186,6 +194,9 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) { unsigned long max_zone_pfns[MAX_NR_ZONES] = {0}; +#ifdef CONFIG_ZONE_DMA + max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit); +#endif #ifdef CONFIG_ZONE_DMA32 max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit); #endif @@ -201,13 +212,18 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; unsigned long max_dma32 = min; + unsigned long max_dma = min; memset(zone_size, 0, sizeof(zone_size)); - /* 4GB maximum for 32-bit only capable devices */ +#ifdef CONFIG_ZONE_DMA + max_dma = PFN_DOWN(arm64_dma_phys_limit); + zone_size[ZONE_DMA] = max_dma - min; + max_dma32 = max_dma; +#endif #ifdef CONFIG_ZONE_DMA32 max_dma32 = PFN_DOWN(arm64_dma32_phys_limit); - zone_size[ZONE_DMA32] = max_dma32 - min; + zone_size[ZONE_DMA32] = max_dma32 - max_dma; #endif zone_size[ZONE_NORMAL] = max - max_dma32; @@ -219,11 +235,17 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) if (start >= max) continue; - +#ifdef CONFIG_ZONE_DMA + if (start < max_dma) { + unsigned long dma_end = min_not_zero(end, max_dma); + zhole_size[ZONE_DMA] -= dma_end - start; + } +#endif #ifdef CONFIG_ZONE_DMA32 if (start < max_dma32) { - unsigned long dma_end = min(end, max_dma32); - zhole_size[ZONE_DMA32] -= dma_end - start; + unsigned long dma32_end = min(end, max_dma32); + unsigned long dma32_start = max(start, max_dma); + zhole_size[ZONE_DMA32] -= dma32_end - dma32_start; } #endif if (end > max_dma32) { @@ -418,9 +440,11 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - /* 4GB maximum for 32-bit only capable devices */ + if (IS_ENABLED(CONFIG_ZONE_DMA)) + arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS); + if (IS_ENABLED(CONFIG_ZONE_DMA32)) - arm64_dma32_phys_limit = max_zone_dma32_phys(); + arm64_dma32_phys_limit = max_zone_phys(32); else arm64_dma32_phys_limit = PHYS_MASK + 1; @@ -430,7 +454,7 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - dma_contiguous_reserve(arm64_dma32_phys_limit); + dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit); } void __init bootmem_init(void) @@ -534,7 +558,7 @@ static void __init free_unused_memmap(void) void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma32_phys_limit >> PAGE_SHIFT)) + max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; From 734f9246e791d8da278957b2c326d7709b2a97c0 Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Wed, 11 Sep 2019 20:25:46 +0200 Subject: [PATCH 18/93] mm: refresh ZONE_DMA and ZONE_DMA32 comments in 'enum zone_type' These zones usage has evolved with time and the comments were outdated. This joins both ZONE_DMA and ZONE_DMA32 explanation and gives up to date examples on how they are used on different architectures. Signed-off-by: Nicolas Saenz Julienne Reviewed-by: Christoph Hellwig Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- include/linux/mmzone.h | 45 ++++++++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index bda20282746b..b0a36d1580b6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -359,33 +359,40 @@ struct per_cpu_nodestat { #endif /* !__GENERATING_BOUNDS.H */ enum zone_type { -#ifdef CONFIG_ZONE_DMA /* - * ZONE_DMA is used when there are devices that are not able - * to do DMA to all of addressable memory (ZONE_NORMAL). Then we - * carve out the portion of memory that is needed for these devices. - * The range is arch specific. + * ZONE_DMA and ZONE_DMA32 are used when there are peripherals not able + * to DMA to all of the addressable memory (ZONE_NORMAL). + * On architectures where this area covers the whole 32 bit address + * space ZONE_DMA32 is used. ZONE_DMA is left for the ones with smaller + * DMA addressing constraints. This distinction is important as a 32bit + * DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit + * platforms may need both zones as they support peripherals with + * different DMA addressing limitations. * - * Some examples + * Some examples: * - * Architecture Limit - * --------------------------- - * parisc, ia64, sparc <4G - * s390, powerpc <2G - * arm Various - * alpha Unlimited or 0-16MB. + * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the + * rest of the lower 4G. * - * i386, x86_64 and multiple other arches - * <16M. + * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on + * the specific device. + * + * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the + * lower 4G. + * + * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary + * depending on the specific device. + * + * - s390 uses ZONE_DMA fixed to the lower 2G. + * + * - ia64 and riscv only use ZONE_DMA32. + * + * - parisc uses neither. */ +#ifdef CONFIG_ZONE_DMA ZONE_DMA, #endif #ifdef CONFIG_ZONE_DMA32 - /* - * x86_64 needs two ZONE_DMAs because it supports devices that are - * only able to do DMA to the lower 16M but also 32 bit devices that - * can only do DMA areas below 4G. - */ ZONE_DMA32, #endif /* From f226650494c6aa87526d12135b7de8b8c074f3de Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Oct 2019 10:06:12 +0100 Subject: [PATCH 19/93] arm64: Relax ICC_PMR_EL1 accesses when ICC_CTLR_EL1.PMHE is clear The GICv3 architecture specification is incredibly misleading when it comes to PMR and the requirement for a DSB. It turns out that this DSB is only required if the CPU interface sends an Upstream Control message to the redistributor in order to update the RD's view of PMR. This message is only sent when ICC_CTLR_EL1.PMHE is set, which isn't the case in Linux. It can still be set from EL3, so some special care is required. But the upshot is that in the (hopefuly large) majority of the cases, we can drop the DSB altogether. This relies on a new static key being set if the boot CPU has PMHE set. The drawback is that this static key has to be exported to modules. Cc: Will Deacon Cc: James Morse Cc: Julien Thierry Cc: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 12 ++++++++++++ arch/arm64/include/asm/daifflags.h | 3 ++- arch/arm64/include/asm/irqflags.h | 19 ++++++++++--------- arch/arm64/include/asm/kvm_host.h | 3 +-- arch/arm64/kernel/entry.S | 6 ++++-- arch/arm64/kvm/hyp/switch.c | 4 ++-- drivers/irqchip/irq-gic-v3.c | 20 ++++++++++++++++++++ include/linux/irqchip/arm-gic-v3.h | 2 ++ 8 files changed, 53 insertions(+), 16 deletions(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index e0e2b1946f42..7d9cc5ec4971 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -29,6 +29,18 @@ SB_BARRIER_INSN"nop\n", \ ARM64_HAS_SB)) +#ifdef CONFIG_ARM64_PSEUDO_NMI +#define pmr_sync() \ + do { \ + extern struct static_key_false gic_pmr_sync; \ + \ + if (static_branch_unlikely(&gic_pmr_sync)) \ + dsb(sy); \ + } while(0) +#else +#define pmr_sync() do {} while (0) +#endif + #define mb() dsb(sy) #define rmb() dsb(ld) #define wmb() dsb(st) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 063c964af705..53cd5fab79a8 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -8,6 +8,7 @@ #include #include +#include #include #define DAIF_PROCCTX 0 @@ -65,7 +66,7 @@ static inline void local_daif_restore(unsigned long flags) if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON); - dsb(sy); + pmr_sync(); } } else if (system_uses_irq_prio_masking()) { u64 pmr; diff --git a/arch/arm64/include/asm/irqflags.h b/arch/arm64/include/asm/irqflags.h index 1a59f0ed1ae3..aa4b6521ef14 100644 --- a/arch/arm64/include/asm/irqflags.h +++ b/arch/arm64/include/asm/irqflags.h @@ -6,6 +6,7 @@ #define __ASM_IRQFLAGS_H #include +#include #include #include @@ -34,14 +35,14 @@ static inline void arch_local_irq_enable(void) } asm volatile(ALTERNATIVE( - "msr daifclr, #2 // arch_local_irq_enable\n" - "nop", - __msr_s(SYS_ICC_PMR_EL1, "%0") - "dsb sy", + "msr daifclr, #2 // arch_local_irq_enable", + __msr_s(SYS_ICC_PMR_EL1, "%0"), ARM64_HAS_IRQ_PRIO_MASKING) : : "r" ((unsigned long) GIC_PRIO_IRQON) : "memory"); + + pmr_sync(); } static inline void arch_local_irq_disable(void) @@ -116,14 +117,14 @@ static inline unsigned long arch_local_irq_save(void) static inline void arch_local_irq_restore(unsigned long flags) { asm volatile(ALTERNATIVE( - "msr daif, %0\n" - "nop", - __msr_s(SYS_ICC_PMR_EL1, "%0") - "dsb sy", - ARM64_HAS_IRQ_PRIO_MASKING) + "msr daif, %0", + __msr_s(SYS_ICC_PMR_EL1, "%0"), + ARM64_HAS_IRQ_PRIO_MASKING) : : "r" (flags) : "memory"); + + pmr_sync(); } #endif /* __ASM_IRQFLAGS_H */ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index f656169db8c3..5ecb091c8576 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -600,8 +600,7 @@ static inline void kvm_arm_vhe_guest_enter(void) * local_daif_mask() already sets GIC_PRIO_PSR_I_SET, we just need a * dsb to ensure the redistributor is forwards EL2 IRQs to the CPU. */ - if (system_uses_irq_prio_masking()) - dsb(sy); + pmr_sync(); } static inline void kvm_arm_vhe_guest_exit(void) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e304fe04b098..0a44f21bf087 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -269,8 +269,10 @@ alternative_else_nop_endif alternative_if ARM64_HAS_IRQ_PRIO_MASKING ldr x20, [sp, #S_PMR_SAVE] msr_s SYS_ICC_PMR_EL1, x20 - /* Ensure priority change is seen by redistributor */ - dsb sy + mrs_s x21, SYS_ICC_CTLR_EL1 + tbz x21, #6, .L__skip_pmr_sync\@ // Check for ICC_CTLR_EL1.PMHE + dsb sy // Ensure priority change is seen by redistributor +.L__skip_pmr_sync\@: alternative_else_nop_endif ldp x21, x22, [sp, #S_PC] // load ELR, SPSR diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 3d3815020e36..402f18664f25 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -12,7 +12,7 @@ #include -#include +#include #include #include #include @@ -592,7 +592,7 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) */ if (system_uses_irq_prio_masking()) { gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); - dsb(sy); + pmr_sync(); } vcpu = kern_hyp_va(vcpu); diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 422664ac5f53..0abc5a13adaa 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -87,6 +87,15 @@ static DEFINE_STATIC_KEY_TRUE(supports_deactivate_key); */ static DEFINE_STATIC_KEY_FALSE(supports_pseudo_nmis); +/* + * Global static key controlling whether an update to PMR allowing more + * interrupts requires to be propagated to the redistributor (DSB SY). + * And this needs to be exported for modules to be able to enable + * interrupts... + */ +DEFINE_STATIC_KEY_FALSE(gic_pmr_sync); +EXPORT_SYMBOL(gic_pmr_sync); + /* ppi_nmi_refs[n] == number of cpus having ppi[n + 16] set as NMI */ static refcount_t *ppi_nmi_refs; @@ -1502,6 +1511,17 @@ static void gic_enable_nmi_support(void) for (i = 0; i < gic_data.ppi_nr; i++) refcount_set(&ppi_nmi_refs[i], 0); + /* + * Linux itself doesn't use 1:N distribution, so has no need to + * set PMHE. The only reason to have it set is if EL3 requires it + * (and we can't change it). + */ + if (gic_read_ctlr() & ICC_CTLR_EL1_PMHE_MASK) + static_branch_enable(&gic_pmr_sync); + + pr_info("%s ICC_PMR_EL1 synchronisation\n", + static_branch_unlikely(&gic_pmr_sync) ? "Forcing" : "Relaxing"); + static_branch_enable(&supports_pseudo_nmis); if (static_branch_likely(&supports_deactivate_key)) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 5cc10cf7cb3e..a0bde9e12efa 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -487,6 +487,8 @@ #define ICC_CTLR_EL1_EOImode_MASK (1 << ICC_CTLR_EL1_EOImode_SHIFT) #define ICC_CTLR_EL1_CBPR_SHIFT 0 #define ICC_CTLR_EL1_CBPR_MASK (1 << ICC_CTLR_EL1_CBPR_SHIFT) +#define ICC_CTLR_EL1_PMHE_SHIFT 6 +#define ICC_CTLR_EL1_PMHE_MASK (1 << ICC_CTLR_EL1_PMHE_SHIFT) #define ICC_CTLR_EL1_PRI_BITS_SHIFT 8 #define ICC_CTLR_EL1_PRI_BITS_MASK (0x7 << ICC_CTLR_EL1_PRI_BITS_SHIFT) #define ICC_CTLR_EL1_ID_BITS_SHIFT 11 From 7e3a57fa6ca831fa232a7cd4659eaed674236810 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 2 Oct 2019 10:06:13 +0100 Subject: [PATCH 20/93] arm64: Document ICC_CTLR_EL3.PMHE setting requirements It goes without saying, but better saying it: the kernel expects ICC_CTLR_EL3.PMHE to have the same value across all CPUs, and for that setting not to change during the lifetime of the kernel. Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- Documentation/arm64/booting.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/Documentation/arm64/booting.rst b/Documentation/arm64/booting.rst index d3f3a60fbf25..5d78a6f5b0ae 100644 --- a/Documentation/arm64/booting.rst +++ b/Documentation/arm64/booting.rst @@ -213,6 +213,9 @@ Before jumping into the kernel, the following conditions must be met: - ICC_SRE_EL3.Enable (bit 3) must be initialiased to 0b1. - ICC_SRE_EL3.SRE (bit 0) must be initialised to 0b1. + - ICC_CTLR_EL3.PMHE (bit 6) must be set to the same value across + all CPUs the kernel is executing on, and must stay constant + for the lifetime of the kernel. - If the kernel is entered at EL1: From 899ee4afe5eb262236717188ccdaa0192c00dc5a Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Sat, 28 Sep 2019 11:02:26 +0300 Subject: [PATCH 21/93] arm64: use generic free_initrd_mem() arm64 calls memblock_free() for the initrd area in its implementation of free_initrd_mem(), but this call has no actual effect that late in the boot process. By the time initrd is freed, all the reserved memory is managed by the page allocator and the memblock.reserved is unused, so the only purpose of the memblock_free() call is to keep track of initrd memory for debugging and accounting. Without the memblock_free() call the only difference between arm64 and the generic versions of free_initrd_mem() is the memory poisoning. Move memblock_free() call to the generic code, enable it there for the architectures that define ARCH_KEEP_MEMBLOCK and use the generic implementation of free_initrd_mem() on arm64. Tested-by: Anshuman Khandual #arm64 Reviewed-by: Anshuman Khandual Acked-by: Will Deacon Signed-off-by: Mike Rapoport Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 12 ------------ init/initramfs.c | 8 ++++++++ 2 files changed, 8 insertions(+), 12 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 45c00a54909c..87a0e3b6c146 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -580,18 +580,6 @@ void free_initmem(void) unmap_kernel_range((u64)__init_begin, (u64)(__init_end - __init_begin)); } -#ifdef CONFIG_BLK_DEV_INITRD -void __init free_initrd_mem(unsigned long start, unsigned long end) -{ - unsigned long aligned_start, aligned_end; - - aligned_start = __virt_to_phys(start) & PAGE_MASK; - aligned_end = PAGE_ALIGN(__virt_to_phys(end)); - memblock_free(aligned_start, aligned_end - aligned_start); - free_reserved_area((void *)start, (void *)end, 0, "initrd"); -} -#endif - /* * Dump out memory limit information on panic. */ diff --git a/init/initramfs.c b/init/initramfs.c index c47dad0884f7..8ec1be4d7d51 100644 --- a/init/initramfs.c +++ b/init/initramfs.c @@ -10,6 +10,7 @@ #include #include #include +#include static ssize_t __init xwrite(int fd, const char *p, size_t count) { @@ -529,6 +530,13 @@ extern unsigned long __initramfs_size; void __weak free_initrd_mem(unsigned long start, unsigned long end) { +#ifdef CONFIG_ARCH_KEEP_MEMBLOCK + unsigned long aligned_start = ALIGN_DOWN(start, PAGE_SIZE); + unsigned long aligned_end = ALIGN(end, PAGE_SIZE); + + memblock_free(__pa(aligned_start), aligned_end - aligned_start); +#endif + free_reserved_area((void *)start, (void *)end, POISON_FREE_INITMEM, "initrd"); } From 6ec939f8b809cb06ba7802e17ef7024d1bc0ee84 Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 4 Oct 2019 09:53:58 +0530 Subject: [PATCH 22/93] arm64/mm: Poison initmem while freeing with free_reserved_area() Platform implementation for free_initmem() should poison the memory while freeing it up. Hence pass across POISON_FREE_INITMEM while calling into free_reserved_area(). The same is being followed in the generic fallback for free_initmem() and some other platforms overriding it. Cc: Mark Rutland Cc: linux-kernel@vger.kernel.org Reviewed-by: Steven Price Acked-by: Will Deacon Signed-off-by: Anshuman Khandual Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 87a0e3b6c146..7c225d0132b8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -571,7 +571,7 @@ void free_initmem(void) { free_reserved_area(lm_alias(__init_begin), lm_alias(__init_end), - 0, "unused kernel"); + POISON_FREE_INITMEM, "unused kernel"); /* * Unmap the __init region but leave the VM area in place. This * prevents the region from being reused for kernel modules, which From 4399d430700d3974ed6c5a1b1380bc6527f17e99 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 16 Oct 2019 07:47:14 -0700 Subject: [PATCH 23/93] arm64: mm: Fix unused variable warning in zone_sizes_init When building arm64 allnoconfig, CONFIG_ZONE_DMA and CONFIG_ZONE_DMA32 get disabled so there is a warning about max_dma being unused. ../arch/arm64/mm/init.c:215:16: warning: unused variable 'max_dma' [-Wunused-variable] unsigned long max_dma = min; ^ 1 warning generated. Add __maybe_unused to make this clear to the compiler. Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") Reviewed-by: Nicolas Saenz Julienne Signed-off-by: Nathan Chancellor Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 44f07fdf7a59..71b45c58218b 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -212,7 +212,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max) struct memblock_region *reg; unsigned long zone_size[MAX_NR_ZONES], zhole_size[MAX_NR_ZONES]; unsigned long max_dma32 = min; - unsigned long max_dma = min; + unsigned long __maybe_unused max_dma = min; memset(zone_size, 0, sizeof(zone_size)); From 47d7b15b88f96a90694cfc607d0717d62dff6c45 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 11 Oct 2019 22:09:36 +0800 Subject: [PATCH 24/93] arm64: cpufeature: introduce helper cpu_has_hw_af() We unconditionally set the HW_AFDBM capability and only enable it on CPUs which really have the feature. But sometimes we need to know whether this cpu has the capability of HW AF. So decouple AF from DBM by a new helper cpu_has_hw_af(). If later we noticed a potential performance issue on this path, we can turn it into a static label as with other CPU features. Signed-off-by: Jia He Suggested-by: Suzuki Poulose Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 9cde5d2e768f..4261d55e8506 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -659,6 +659,20 @@ static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange) default: return CONFIG_ARM64_PA_BITS; } } + +/* Check whether hardware update of the Access flag is supported */ +static inline bool cpu_has_hw_af(void) +{ + u64 mmfr1; + + if (!IS_ENABLED(CONFIG_ARM64_HW_AFDBM)) + return false; + + mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + return cpuid_feature_extract_unsigned_field(mmfr1, + ID_AA64MMFR1_HADBS_SHIFT); +} + #endif /* __ASSEMBLY__ */ #endif From 6af31226d0394691f5562eca0134262bb935fa9c Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 11 Oct 2019 22:09:37 +0800 Subject: [PATCH 25/93] arm64: mm: implement arch_faults_on_old_pte() on arm64 On arm64 without hardware Access Flag, copying from user will fail because the pte is old and cannot be marked young. So we always end up with zeroed page after fork() + CoW for pfn mappings. We don't always have a hardware-managed Access Flag on arm64. Hence implement arch_faults_on_old_pte on arm64 to indicate that it might cause page fault when accessing old pte. Signed-off-by: Jia He Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7576df00eb50..e96fb82f62de 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -885,6 +885,20 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #define phys_to_ttbr(addr) (addr) #endif +/* + * On arm64 without hardware Access Flag, copying from user will fail because + * the pte is old and cannot be marked young. So we always end up with zeroed + * page after fork() + CoW for pfn mappings. We don't always have a + * hardware-managed access flag on arm64. + */ +static inline bool arch_faults_on_old_pte(void) +{ + WARN_ON(preemptible()); + + return !cpu_has_hw_af(); +} +#define arch_faults_on_old_pte arch_faults_on_old_pte + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_PGTABLE_H */ From f2c4e5970cece75a895fcc45f0cd66b5a5ec0819 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 11 Oct 2019 22:09:38 +0800 Subject: [PATCH 26/93] x86/mm: implement arch_faults_on_old_pte() stub on x86 arch_faults_on_old_pte is a helper to indicate that it might cause page fault when accessing old pte. But on x86, there is feature to setting pte access flag by hardware. Hence implement an overriding stub which always returns false. Signed-off-by: Jia He Suggested-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/x86/include/asm/pgtable.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 0bc530c4eb13..ad97dc155195 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -1463,6 +1463,12 @@ static inline bool arch_has_pfn_modify_check(void) return boot_cpu_has_bug(X86_BUG_L1TF); } +#define arch_faults_on_old_pte arch_faults_on_old_pte +static inline bool arch_faults_on_old_pte(void) +{ + return false; +} + #include #endif /* __ASSEMBLY__ */ From 83d116c53058d505ddef051e90ab27f57015b025 Mon Sep 17 00:00:00 2001 From: Jia He Date: Fri, 11 Oct 2019 22:09:39 +0800 Subject: [PATCH 27/93] mm: fix double page fault on arm64 if PTE_AF is cleared When we tested pmdk unit test [1] vmmalloc_fork TEST3 on arm64 guest, there will be a double page fault in __copy_from_user_inatomic of cow_user_page. To reproduce the bug, the cmd is as follows after you deployed everything: make -C src/test/vmmalloc_fork/ TEST_TIME=60m check Below call trace is from arm64 do_page_fault for debugging purpose: [ 110.016195] Call trace: [ 110.016826] do_page_fault+0x5a4/0x690 [ 110.017812] do_mem_abort+0x50/0xb0 [ 110.018726] el1_da+0x20/0xc4 [ 110.019492] __arch_copy_from_user+0x180/0x280 [ 110.020646] do_wp_page+0xb0/0x860 [ 110.021517] __handle_mm_fault+0x994/0x1338 [ 110.022606] handle_mm_fault+0xe8/0x180 [ 110.023584] do_page_fault+0x240/0x690 [ 110.024535] do_mem_abort+0x50/0xb0 [ 110.025423] el0_da+0x20/0x24 The pte info before __copy_from_user_inatomic is (PTE_AF is cleared): [ffff9b007000] pgd=000000023d4f8003, pud=000000023da9b003, pmd=000000023d4b3003, pte=360000298607bd3 As told by Catalin: "On arm64 without hardware Access Flag, copying from user will fail because the pte is old and cannot be marked young. So we always end up with zeroed page after fork() + CoW for pfn mappings. we don't always have a hardware-managed access flag on arm64." This patch fixes it by calling pte_mkyoung. Also, the parameter is changed because vmf should be passed to cow_user_page() Add a WARN_ON_ONCE when __copy_from_user_inatomic() returns error in case there can be some obscure use-case (by Kirill). [1] https://github.com/pmem/pmdk/tree/master/src/test/vmmalloc_fork Signed-off-by: Jia He Reported-by: Yibo Cai Reviewed-by: Catalin Marinas Acked-by: Kirill A. Shutemov Signed-off-by: Catalin Marinas --- mm/memory.c | 104 ++++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 89 insertions(+), 15 deletions(-) diff --git a/mm/memory.c b/mm/memory.c index b1ca51a079f2..b6a5d6a08438 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -118,6 +118,18 @@ int randomize_va_space __read_mostly = 2; #endif +#ifndef arch_faults_on_old_pte +static inline bool arch_faults_on_old_pte(void) +{ + /* + * Those arches which don't have hw access flag feature need to + * implement their own helper. By default, "true" means pagefault + * will be hit on old pte. + */ + return true; +} +#endif + static int __init disable_randmaps(char *s) { randomize_va_space = 0; @@ -2145,32 +2157,82 @@ static inline int pte_unmap_same(struct mm_struct *mm, pmd_t *pmd, return same; } -static inline void cow_user_page(struct page *dst, struct page *src, unsigned long va, struct vm_area_struct *vma) +static inline bool cow_user_page(struct page *dst, struct page *src, + struct vm_fault *vmf) { + bool ret; + void *kaddr; + void __user *uaddr; + bool force_mkyoung; + struct vm_area_struct *vma = vmf->vma; + struct mm_struct *mm = vma->vm_mm; + unsigned long addr = vmf->address; + debug_dma_assert_idle(src); + if (likely(src)) { + copy_user_highpage(dst, src, addr, vma); + return true; + } + /* * If the source page was a PFN mapping, we don't have * a "struct page" for it. We do a best-effort copy by * just copying from the original user address. If that * fails, we just zero-fill it. Live with it. */ - if (unlikely(!src)) { - void *kaddr = kmap_atomic(dst); - void __user *uaddr = (void __user *)(va & PAGE_MASK); + kaddr = kmap_atomic(dst); + uaddr = (void __user *)(addr & PAGE_MASK); + /* + * On architectures with software "accessed" bits, we would + * take a double page fault, so mark it accessed here. + */ + force_mkyoung = arch_faults_on_old_pte() && !pte_young(vmf->orig_pte); + if (force_mkyoung) { + pte_t entry; + + vmf->pte = pte_offset_map_lock(mm, vmf->pmd, addr, &vmf->ptl); + if (!likely(pte_same(*vmf->pte, vmf->orig_pte))) { + /* + * Other thread has already handled the fault + * and we don't need to do anything. If it's + * not the case, the fault will be triggered + * again on the same address. + */ + ret = false; + goto pte_unlock; + } + + entry = pte_mkyoung(vmf->orig_pte); + if (ptep_set_access_flags(vma, addr, vmf->pte, entry, 0)) + update_mmu_cache(vma, addr, vmf->pte); + } + + /* + * This really shouldn't fail, because the page is there + * in the page tables. But it might just be unreadable, + * in which case we just give up and fill the result with + * zeroes. + */ + if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) { /* - * This really shouldn't fail, because the page is there - * in the page tables. But it might just be unreadable, - * in which case we just give up and fill the result with - * zeroes. + * Give a warn in case there can be some obscure + * use-case */ - if (__copy_from_user_inatomic(kaddr, uaddr, PAGE_SIZE)) - clear_page(kaddr); - kunmap_atomic(kaddr); - flush_dcache_page(dst); - } else - copy_user_highpage(dst, src, va, vma); + WARN_ON_ONCE(1); + clear_page(kaddr); + } + + ret = true; + +pte_unlock: + if (force_mkyoung) + pte_unmap_unlock(vmf->pte, vmf->ptl); + kunmap_atomic(kaddr); + flush_dcache_page(dst); + + return ret; } static gfp_t __get_fault_gfp_mask(struct vm_area_struct *vma) @@ -2327,7 +2389,19 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf) vmf->address); if (!new_page) goto oom; - cow_user_page(new_page, old_page, vmf->address, vma); + + if (!cow_user_page(new_page, old_page, vmf)) { + /* + * COW failed, if the fault was solved by other, + * it's fine. If not, userspace would re-fault on + * the same address and we will handle the fault + * from the second attempt. + */ + put_page(new_page); + if (old_page) + put_page(old_page); + return 0; + } } if (mem_cgroup_try_charge_delay(new_page, mm, GFP_KERNEL, &memcg, false)) From f75e2294a4415621b223150065c8d1e823896da5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 23 Nov 2018 17:25:52 +0000 Subject: [PATCH 28/93] arm64: Add ARM64_WORKAROUND_1319367 for all A57 and A72 versions Rework the EL2 vector hardening that is only selected for A57 and A72 so that the table can also be used for ARM64_WORKAROUND_1319367. Acked-by: Catalin Marinas Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier --- arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 13 ++++++++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index f19fe4b9acc4..277e37b2a513 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -52,7 +52,8 @@ #define ARM64_HAS_IRQ_PRIO_MASKING 42 #define ARM64_HAS_DCPODP 43 #define ARM64_WORKAROUND_1463225 44 +#define ARM64_WORKAROUND_1319367 45 -#define ARM64_NCAPS 45 +#define ARM64_NCAPS 46 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index f593f4cffc0d..a7afa6d4a58f 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -623,9 +623,9 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } -#ifdef CONFIG_HARDEN_EL2_VECTORS +#if defined(CONFIG_HARDEN_EL2_VECTORS) || defined(CONFIG_ARM64_ERRATUM_1319367) -static const struct midr_range arm64_harden_el2_vectors[] = { +static const struct midr_range ca57_a72[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), {}, @@ -819,7 +819,7 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, - ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), + ERRATA_MIDR_RANGE_LIST(ca57_a72), }, #endif { @@ -851,6 +851,13 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_cortex_a76_erratum_1463225, }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1319367 + { + .desc = "ARM erratum 1319367", + .capability = ARM64_WORKAROUND_1319367, + ERRATA_MIDR_RANGE_LIST(ca57_a72), + }, #endif { } From 7db3e57e6a95435cef5b33f2a90efcac5ce577da Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Fri, 25 Oct 2019 14:32:06 +0800 Subject: [PATCH 29/93] arm64: cpufeature: Fix typos in comment Fix up one typos: CTR_E0 -> CTR_EL0 Cc: Will Deacon Acked-by: Suzuki K Poulose Signed-off-by: Shaokun Zhang Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 05933c065732..56bba746da1c 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -329,7 +329,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_cntfrq = arch_timer_get_cntfrq(); /* * Use the effective value of the CTR_EL0 than the raw value - * exposed by the CPU. CTR_E0.IDC field value must be interpreted + * exposed by the CPU. CTR_EL0.IDC field value must be interpreted * with the CLIDR_EL1 fields to avoid triggering false warnings * when there is a mismatch across the CPUs. Keep track of the * effective value of the CTR_EL0 in our internal records for From ade12b8631d91b9c2849facb0a1dc3af317ecbb3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 23 Oct 2019 18:52:22 +0100 Subject: [PATCH 30/93] arm64: docs: cpu-feature-registers: Document ID_AA64PFR1_EL1 Commit d71be2b6c0e1 ("arm64: cpufeature: Detect SSBS and advertise to userspace") exposes ID_AA64PFR1_EL1 to userspace, but didn't update the documentation to match. Add it. Acked-by: Will Deacon Signed-off-by: Dave Martin Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index 2955287e9acc..b86828f86e39 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -168,8 +168,15 @@ infrastructure: +------------------------------+---------+---------+ - 3) MIDR_EL1 - Main ID Register + 3) ID_AA64PFR1_EL1 - Processor Feature Register 1 + +------------------------------+---------+---------+ + | Name | bits | visible | + +------------------------------+---------+---------+ + | SSBS | [7-4] | y | + +------------------------------+---------+---------+ + + 4) MIDR_EL1 - Main ID Register +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ @@ -188,7 +195,7 @@ infrastructure: as available on the CPU where it is fetched and is not a system wide safe value. - 4) ID_AA64ISAR1_EL1 - Instruction set attribute register 1 + 5) ID_AA64ISAR1_EL1 - Instruction set attribute register 1 +------------------------------+---------+---------+ | Name | bits | visible | @@ -210,7 +217,7 @@ infrastructure: | DPB | [3-0] | y | +------------------------------+---------+---------+ - 5) ID_AA64MMFR2_EL1 - Memory model feature register 2 + 6) ID_AA64MMFR2_EL1 - Memory model feature register 2 +------------------------------+---------+---------+ | Name | bits | visible | @@ -218,7 +225,7 @@ infrastructure: | AT | [35-32] | y | +------------------------------+---------+---------+ - 6) ID_AA64ZFR0_EL1 - SVE feature ID register 0 + 7) ID_AA64ZFR0_EL1 - SVE feature ID register 0 +------------------------------+---------+---------+ | Name | bits | visible | From a5315819c5e7e50b2b457b60aaf2cc61d76888a2 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 24 Oct 2019 13:01:43 +0100 Subject: [PATCH 31/93] arm64: pgtable: Correct typo in comment vmmemmap -> vmemmap Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 7576df00eb50..4438a23f969c 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -17,7 +17,7 @@ * VMALLOC range. * * VMALLOC_START: beginning of the kernel vmalloc space - * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space + * VMALLOC_END: extends to the available space below vmemmap, PCI I/O space * and fixed mappings */ #define VMALLOC_START (MODULES_END) From bfcef4ab1d7ee8921bc322109b1692036cc6cbe0 Mon Sep 17 00:00:00 2001 From: Yunfeng Ye Date: Mon, 21 Oct 2019 19:31:21 +0800 Subject: [PATCH 32/93] arm64: psci: Reduce the waiting time for cpu_psci_cpu_kill() In cases like suspend-to-disk and suspend-to-ram, a large number of CPU cores need to be shut down. At present, the CPU hotplug operation is serialised, and the CPU cores can only be shut down one by one. In this process, if PSCI affinity_info() does not return LEVEL_OFF quickly, cpu_psci_cpu_kill() needs to wait for 10ms. If hundreds of CPU cores need to be shut down, it will take a long time. Normally, there is no need to wait 10ms in cpu_psci_cpu_kill(). So change the wait interval from 10 ms to max 1 ms and use usleep_range() instead of msleep() for more accurate timer. In addition, reducing the time interval will increase the messages output, so remove the "Retry ..." message, instead, track time and output to the the sucessful message. Signed-off-by: Yunfeng Ye Reviewed-by: Sudeep Holla Signed-off-by: Catalin Marinas --- arch/arm64/kernel/psci.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index c9f72b2665f1..43ae4e0c968f 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -81,7 +81,8 @@ static void cpu_psci_cpu_die(unsigned int cpu) static int cpu_psci_cpu_kill(unsigned int cpu) { - int err, i; + int err; + unsigned long start, end; if (!psci_ops.affinity_info) return 0; @@ -91,16 +92,18 @@ static int cpu_psci_cpu_kill(unsigned int cpu) * while it is dying. So, try again a few times. */ - for (i = 0; i < 10; i++) { + start = jiffies; + end = start + msecs_to_jiffies(100); + do { err = psci_ops.affinity_info(cpu_logical_map(cpu), 0); if (err == PSCI_0_2_AFFINITY_LEVEL_OFF) { - pr_info("CPU%d killed.\n", cpu); + pr_info("CPU%d killed (polled %d ms)\n", cpu, + jiffies_to_msecs(jiffies - start)); return 0; } - msleep(10); - pr_info("Retrying again to check for CPU kill\n"); - } + usleep_range(100, 1000); + } while (time_before(jiffies, end)); pr_warn("CPU%d may not have shut down cleanly (AFFINITY_INFO reports %d)\n", cpu, err); From 05460849c3b51180d5ada3373d0449aea19075e4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 17 Oct 2019 18:42:58 +0100 Subject: [PATCH 33/93] arm64: errata: Hide CTR_EL0.DIC on systems affected by Neoverse-N1 #1542419 Cores affected by Neoverse-N1 #1542419 could execute a stale instruction when a branch is updated to point to freshly generated instructions. To workaround this issue we need user-space to issue unnecessary icache maintenance that we can trap. Start by hiding CTR_EL0.DIC. Reviewed-by: Suzuki K Poulose Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- Documentation/arm64/silicon-errata.rst | 2 ++ arch/arm64/Kconfig | 16 +++++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 32 +++++++++++++++++++++++++- arch/arm64/kernel/traps.c | 3 +++ 5 files changed, 54 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 17ea3fecddaa..d0d480dac361 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -88,6 +88,8 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Neoverse-N1 | #1349291 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Neoverse-N1 | #1542419 | ARM64_ERRATUM_1542419 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | MMU-500 | #841119,826419 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +----------------+-----------------+-----------------+-----------------------------+ diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 950a56b71ff0..7c3e3e334421 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -558,6 +558,22 @@ config ARM64_ERRATUM_1463225 If unsure, say Y. +config ARM64_ERRATUM_1542419 + bool "Neoverse-N1: workaround mis-ordering of instruction fetches" + default y + help + This option adds a workaround for ARM Neoverse-N1 erratum + 1542419. + + Affected Neoverse-N1 cores could execute a stale instruction when + modified by another CPU. The workaround depends on a firmware + counterpart. + + Workaround the issue by hiding the DIC feature from EL0. This + forces user-space to perform cache maintenance. + + If unsure, say Y. + config CAVIUM_ERRATUM_22375 bool "Cavium erratum 22375, 24313" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index f19fe4b9acc4..f05afaec18cd 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -52,7 +52,8 @@ #define ARM64_HAS_IRQ_PRIO_MASKING 42 #define ARM64_HAS_DCPODP 43 #define ARM64_WORKAROUND_1463225 44 +#define ARM64_WORKAROUND_1542419 45 -#define ARM64_NCAPS 45 +#define ARM64_NCAPS 46 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index f593f4cffc0d..3ae9b78b6d94 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -87,13 +87,21 @@ has_mismatched_cache_type(const struct arm64_cpu_capabilities *entry, } static void -cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *__unused) +cpu_enable_trap_ctr_access(const struct arm64_cpu_capabilities *cap) { u64 mask = arm64_ftr_reg_ctrel0.strict_mask; + bool enable_uct_trap = false; /* Trap CTR_EL0 access on this CPU, only if it has a mismatch */ if ((read_cpuid_cachetype() & mask) != (arm64_ftr_reg_ctrel0.sys_val & mask)) + enable_uct_trap = true; + + /* ... or if the system is affected by an erratum */ + if (cap->capability == ARM64_WORKAROUND_1542419) + enable_uct_trap = true; + + if (enable_uct_trap) sysreg_clear_set(sctlr_el1, SCTLR_EL1_UCT, 0); } @@ -623,6 +631,18 @@ check_branch_predictor(const struct arm64_cpu_capabilities *entry, int scope) return (need_wa > 0); } +static bool __maybe_unused +has_neoverse_n1_erratum_1542419(const struct arm64_cpu_capabilities *entry, + int scope) +{ + u32 midr = read_cpuid_id(); + bool has_dic = read_cpuid_cachetype() & BIT(CTR_DIC_SHIFT); + const struct midr_range range = MIDR_ALL_VERSIONS(MIDR_NEOVERSE_N1); + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + return is_midr_in_range(midr, &range) && has_dic; +} + #ifdef CONFIG_HARDEN_EL2_VECTORS static const struct midr_range arm64_harden_el2_vectors[] = { @@ -851,6 +871,16 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, .matches = has_cortex_a76_erratum_1463225, }, +#endif +#ifdef CONFIG_ARM64_ERRATUM_1542419 + { + /* we depend on the firmware portion for correctness */ + .desc = "ARM erratum 1542419 (kernel portion)", + .capability = ARM64_WORKAROUND_1542419, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_neoverse_n1_erratum_1542419, + .cpu_enable = cpu_enable_trap_ctr_access, + }, #endif { } diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 34739e80211b..465f0a0f8f0a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -470,6 +470,9 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) + val &= ~BIT(CTR_DIC_SHIFT); + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); From ee9d90be9ddace01b7fb126567e4b539fbe1f82f Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 17 Oct 2019 18:42:59 +0100 Subject: [PATCH 34/93] arm64: Fake the IminLine size on systems affected by Neoverse-N1 #1542419 Systems affected by Neoverse-N1 #1542419 support DIC so do not need to perform icache maintenance once new instructions are cleaned to the PoU. For the errata workaround, the kernel hides DIC from user-space, so that the unnecessary cache maintenance can be trapped by firmware. To reduce the number of traps, produce a fake IminLine value based on PAGE_SIZE. Signed-off-by: James Morse Reviewed-by: Suzuki K Poulose Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cache.h | 3 ++- arch/arm64/kernel/traps.c | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 43da6dd29592..806e9dc2a852 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -11,6 +11,7 @@ #define CTR_L1IP_MASK 3 #define CTR_DMINLINE_SHIFT 16 #define CTR_IMINLINE_SHIFT 0 +#define CTR_IMINLINE_MASK 0xf #define CTR_ERG_SHIFT 20 #define CTR_CWG_SHIFT 24 #define CTR_CWG_MASK 15 @@ -18,7 +19,7 @@ #define CTR_DIC_SHIFT 29 #define CTR_CACHE_MINLINE_MASK \ - (0xf << CTR_DMINLINE_SHIFT | 0xf << CTR_IMINLINE_SHIFT) + (0xf << CTR_DMINLINE_SHIFT | CTR_IMINLINE_MASK << CTR_IMINLINE_SHIFT) #define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 465f0a0f8f0a..4e3e9d9c8151 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -470,9 +470,15 @@ static void ctr_read_handler(unsigned int esr, struct pt_regs *regs) int rt = ESR_ELx_SYS64_ISS_RT(esr); unsigned long val = arm64_ftr_reg_user_value(&arm64_ftr_reg_ctrel0); - if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { + /* Hide DIC so that we can trap the unnecessary maintenance...*/ val &= ~BIT(CTR_DIC_SHIFT); + /* ... and fake IminLine to reduce the number of traps. */ + val &= ~CTR_IMINLINE_MASK; + val |= (PAGE_SHIFT - 2) & CTR_IMINLINE_MASK; + } + pt_regs_write_reg(regs, rt, val); arm64_skip_faulting_instruction(regs, AARCH64_INSN_SIZE); From 222fc0c8503d98cec3cb2bac2780cdd21a6e31c0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Thu, 17 Oct 2019 18:43:00 +0100 Subject: [PATCH 35/93] arm64: compat: Workaround Neoverse-N1 #1542419 for compat user-space Compat user-space is unable to perform ICIMVAU instructions from user-space. Instead it uses a compat-syscall. Add the workaround for Neoverse-N1 #1542419 to this code path. Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sys_compat.c | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index f1cb64959427..c9fb02927d3e 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include @@ -17,6 +18,7 @@ #include #include +#include #include static long @@ -30,6 +32,15 @@ __do_compat_cache_op(unsigned long start, unsigned long end) if (fatal_signal_pending(current)) return 0; + if (cpus_have_const_cap(ARM64_WORKAROUND_1542419)) { + /* + * The workaround requires an inner-shareable tlbi. + * We pick the reserved-ASID to minimise the impact. + */ + __tlbi(aside1is, 0); + dsb(ish); + } + ret = __flush_cache_user_range(start, start + chunk); if (ret) return ret; From 1d8cd06af548bb1ba29e16eec78c0862e799a731 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Jan 2019 14:46:23 +0000 Subject: [PATCH 36/93] arm64: KVM: Reorder system register restoration and stage-2 activation In order to prepare for handling erratum 1319367, we need to make sure that all system registers (and most importantly the registers configuring the virtual memory) are set before we enable stage-2 translation. This results in a minor reorganisation of the load sequence, without any functional change. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/switch.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 3d3815020e36..69e10b29cbd0 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -605,18 +605,23 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) __sysreg_save_state_nvhe(host_ctxt); + /* + * We must restore the 32-bit state before the sysregs, thanks + * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). + * + * Also, and in order to be able to deal with erratum #1319537 (A57) + * and #1319367 (A72), we must ensure that all VM-related sysreg are + * restored before we enable S2 translation. + */ + __sysreg32_restore_state(vcpu); + __sysreg_restore_state_nvhe(guest_ctxt); + __activate_vm(kern_hyp_va(vcpu->kvm)); __activate_traps(vcpu); __hyp_vgic_restore_state(vcpu); __timer_enable_traps(vcpu); - /* - * We must restore the 32-bit state before the sysregs, thanks - * to erratum #852523 (Cortex-A57) or #853709 (Cortex-A72). - */ - __sysreg32_restore_state(vcpu); - __sysreg_restore_state_nvhe(guest_ctxt); __debug_switch_to_guest(vcpu); __set_guest_arch_workaround_state(vcpu); From 37553941c670c3ad160b25843e6cdcbee2b3c6eb Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 30 Jul 2019 10:50:38 +0100 Subject: [PATCH 37/93] arm64: KVM: Disable EL1 PTW when invalidating S2 TLBs When erratum 1319367 is being worked around, special care must be taken not to allow the page table walker to populate TLBs while we have the stage-2 translation enabled (which would otherwise result in a bizare mix of the host S1 and the guest S2). We enforce this by setting TCR_EL1.EPD{0,1} before restoring the S2 configuration, and clear the same bits after having disabled S2. Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/tlb.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/arch/arm64/kvm/hyp/tlb.c b/arch/arm64/kvm/hyp/tlb.c index eb0efc5557f3..c2bc17ca6430 100644 --- a/arch/arm64/kvm/hyp/tlb.c +++ b/arch/arm64/kvm/hyp/tlb.c @@ -63,6 +63,22 @@ static void __hyp_text __tlb_switch_to_guest_vhe(struct kvm *kvm, static void __hyp_text __tlb_switch_to_guest_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + u64 val; + + /* + * For CPUs that are affected by ARM 1319367, we need to + * avoid a host Stage-1 walk while we have the guest's + * VMID set in the VTTBR in order to invalidate TLBs. + * We're guaranteed that the S1 MMU is enabled, so we can + * simply set the EPD bits to avoid any further TLB fill. + */ + val = cxt->tcr = read_sysreg_el1(SYS_TCR); + val |= TCR_EPD1_MASK | TCR_EPD0_MASK; + write_sysreg_el1(val, SYS_TCR); + isb(); + } + __load_guest_stage2(kvm); isb(); } @@ -100,6 +116,13 @@ static void __hyp_text __tlb_switch_to_host_nvhe(struct kvm *kvm, struct tlb_inv_context *cxt) { write_sysreg(0, vttbr_el2); + + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + /* Ensure write of the host VMID */ + isb(); + /* Restore the host's TCR_EL1 */ + write_sysreg_el1(cxt->tcr, SYS_TCR); + } } static void __hyp_text __tlb_switch_to_host(struct kvm *kvm, From bd227553ad5077f21ddb382dcd910ba46181805a Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 30 Jul 2019 11:15:31 +0100 Subject: [PATCH 38/93] arm64: KVM: Prevent speculative S1 PTW when restoring vcpu context When handling erratum 1319367, we must ensure that the page table walker cannot parse the S1 page tables while the guest is in an inconsistent state. This is done as follows: On guest entry: - TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur - all system registers are restored, except for TCR_EL1 and SCTLR_EL1 - stage-2 is restored - SCTLR_EL1 and TCR_EL1 are restored On guest exit: - SCTLR_EL1.M and TCR_EL1.EPD{0,1} are set, ensuring that no PTW can occur - stage-2 is disabled - All host system registers are restored Reviewed-by: James Morse Signed-off-by: Marc Zyngier --- arch/arm64/kvm/hyp/switch.c | 31 ++++++++++++++++++++++++++++++ arch/arm64/kvm/hyp/sysreg-sr.c | 35 ++++++++++++++++++++++++++++++++-- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 69e10b29cbd0..5765b17c38c7 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -118,6 +118,20 @@ static void __hyp_text __activate_traps_nvhe(struct kvm_vcpu *vcpu) } write_sysreg(val, cptr_el2); + + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + struct kvm_cpu_context *ctxt = &vcpu->arch.ctxt; + + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * configured and enabled. We can now restore the guest's S1 + * configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } } static void __hyp_text __activate_traps(struct kvm_vcpu *vcpu) @@ -156,6 +170,23 @@ static void __hyp_text __deactivate_traps_nvhe(void) { u64 mdcr_el2 = read_sysreg(mdcr_el2); + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + u64 val; + + /* + * Set the TCR and SCTLR registers in the exact opposite + * sequence as __activate_traps_nvhe (first prevent walks, + * then force the MMU on). A generous sprinkling of isb() + * ensure that things happen in this exact order. + */ + val = read_sysreg_el1(SYS_TCR); + write_sysreg_el1(val | TCR_EPD1_MASK | TCR_EPD0_MASK, SYS_TCR); + isb(); + val = read_sysreg_el1(SYS_SCTLR); + write_sysreg_el1(val | SCTLR_ELx_M, SYS_SCTLR); + isb(); + } + __deactivate_traps_common(); mdcr_el2 &= MDCR_EL2_HPMN_MASK; diff --git a/arch/arm64/kvm/hyp/sysreg-sr.c b/arch/arm64/kvm/hyp/sysreg-sr.c index 7ddbc849b580..22b8128d19f6 100644 --- a/arch/arm64/kvm/hyp/sysreg-sr.c +++ b/arch/arm64/kvm/hyp/sysreg-sr.c @@ -117,12 +117,26 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) { write_sysreg(ctxt->sys_regs[MPIDR_EL1], vmpidr_el2); write_sysreg(ctxt->sys_regs[CSSELR_EL1], csselr_el1); - write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + + if (!cpus_have_const_cap(ARM64_WORKAROUND_1319367)) { + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } else if (!ctxt->__hyp_running_vcpu) { + /* + * Must only be done for guest registers, hence the context + * test. We're coming from the host, so SCTLR.M is already + * set. Pairs with __activate_traps_nvhe(). + */ + write_sysreg_el1((ctxt->sys_regs[TCR_EL1] | + TCR_EPD1_MASK | TCR_EPD0_MASK), + SYS_TCR); + isb(); + } + write_sysreg(ctxt->sys_regs[ACTLR_EL1], actlr_el1); write_sysreg_el1(ctxt->sys_regs[CPACR_EL1], SYS_CPACR); write_sysreg_el1(ctxt->sys_regs[TTBR0_EL1], SYS_TTBR0); write_sysreg_el1(ctxt->sys_regs[TTBR1_EL1], SYS_TTBR1); - write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); write_sysreg_el1(ctxt->sys_regs[ESR_EL1], SYS_ESR); write_sysreg_el1(ctxt->sys_regs[AFSR0_EL1], SYS_AFSR0); write_sysreg_el1(ctxt->sys_regs[AFSR1_EL1], SYS_AFSR1); @@ -135,6 +149,23 @@ static void __hyp_text __sysreg_restore_el1_state(struct kvm_cpu_context *ctxt) write_sysreg(ctxt->sys_regs[PAR_EL1], par_el1); write_sysreg(ctxt->sys_regs[TPIDR_EL1], tpidr_el1); + if (cpus_have_const_cap(ARM64_WORKAROUND_1319367) && + ctxt->__hyp_running_vcpu) { + /* + * Must only be done for host registers, hence the context + * test. Pairs with __deactivate_traps_nvhe(). + */ + isb(); + /* + * At this stage, and thanks to the above isb(), S2 is + * deconfigured and disabled. We can now restore the host's + * S1 configuration: SCTLR, and only then TCR. + */ + write_sysreg_el1(ctxt->sys_regs[SCTLR_EL1], SYS_SCTLR); + isb(); + write_sysreg_el1(ctxt->sys_regs[TCR_EL1], SYS_TCR); + } + write_sysreg(ctxt->gp_regs.sp_el1, sp_el1); write_sysreg_el1(ctxt->gp_regs.elr_el1, SYS_ELR); write_sysreg_el1(ctxt->gp_regs.spsr[KVM_SPSR_EL1],SYS_SPSR); From c2cc62d831863151fd0cb7da7ac9a0c324aab871 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 9 Jan 2019 14:36:34 +0000 Subject: [PATCH 39/93] arm64: Enable and document ARM errata 1319367 and 1319537 Now that everything is in place, let's get the ball rolling by allowing the corresponding config option to be selected. Also add the required information to silicon_errata.rst. Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier --- Documentation/arm64/silicon-errata.rst | 4 ++++ arch/arm64/Kconfig | 10 ++++++++++ 2 files changed, 14 insertions(+) diff --git a/Documentation/arm64/silicon-errata.rst b/Documentation/arm64/silicon-errata.rst index 17ea3fecddaa..0808be134fce 100644 --- a/Documentation/arm64/silicon-errata.rst +++ b/Documentation/arm64/silicon-errata.rst @@ -70,8 +70,12 @@ stable kernels. +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A57 | #834220 | ARM64_ERRATUM_834220 | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A57 | #1319537 | ARM64_ERRATUM_1319367 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A72 | #853709 | N/A | +----------------+-----------------+-----------------+-----------------------------+ +| ARM | Cortex-A72 | #1319367 | ARM64_ERRATUM_1319367 | ++----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A73 | #858921 | ARM64_ERRATUM_858921 | +----------------+-----------------+-----------------+-----------------------------+ | ARM | Cortex-A55 | #1024718 | ARM64_ERRATUM_1024718 | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 950a56b71ff0..b2877ed09307 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -538,6 +538,16 @@ config ARM64_ERRATUM_1286807 invalidated has been observed by other observers. The workaround repeats the TLBI+DSB operation. +config ARM64_ERRATUM_1319367 + bool "Cortex-A57/A72: Speculative AT instruction using out-of-context translation regime could cause subsequent request to generate an incorrect translation" + default y + help + This option adds work arounds for ARM Cortex-A57 erratum 1319537 + and A72 erratum 1319367 + + Cortex-A57 and A72 cores could end-up with corrupted TLBs by + speculating an AT instruction during a guest context switch. + If unsure, say Y. config ARM64_ERRATUM_1463225 From 27a22fbdeedd6c5c451cf5f830d51782bf50c3a2 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 28 Oct 2019 09:08:34 +0000 Subject: [PATCH 40/93] arm64: Silence clang warning on mismatched value/register sizes Clang reports a warning on the __tlbi(aside1is, 0) macro expansion since the value size does not match the register size specified in the inline asm. Construct the ASID value using the __TLBI_VADDR() macro. Fixes: 222fc0c8503d ("arm64: compat: Workaround Neoverse-N1 #1542419 for compat user-space") Reported-by: Nathan Chancellor Cc: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/kernel/sys_compat.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index c9fb02927d3e..3c18c2454089 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -37,7 +37,7 @@ __do_compat_cache_op(unsigned long start, unsigned long end) * The workaround requires an inner-shareable tlbi. * We pick the reserved-ASID to minimise the impact. */ - __tlbi(aside1is, 0); + __tlbi(aside1is, __TLBI_VADDR(0, 0)); dsb(ish); } From b6e43c0e3129ffe87e65c85f20fcbdf0eb86fba0 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 25 Oct 2019 17:42:10 +0100 Subject: [PATCH 41/93] arm64: remove __exception annotations Since commit 732674980139 ("arm64: unwind: reference pt_regs via embedded stack frame") arm64 has not used the __exception annotation to dump the pt_regs during stack tracing. in_exception_text() has no callers. This annotation is only used to blacklist kprobes, it means the same as __kprobes. Section annotations like this require the functions to be grouped together between the start/end markers, and placed according to the linker script. For kprobes we also have NOKPROBE_SYMBOL() which logs the symbol address in a section that kprobes parses and blacklists at boot. Using NOKPROBE_SYMBOL() instead lets kprobes publish the list of blacklisted symbols, and saves us from having an arm64 specific spelling of __kprobes. do_debug_exception() already has a NOKPROBE_SYMBOL() annotation. Signed-off-by: James Morse Acked-by: Mark Rutland Acked-by: Masami Hiramatsu Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/exception.h | 4 ++-- arch/arm64/include/asm/traps.h | 10 --------- arch/arm64/kernel/probes/kprobes.c | 4 ---- arch/arm64/kernel/traps.c | 10 ++++++--- arch/arm64/kernel/vmlinux.lds.S | 3 --- arch/arm64/mm/fault.c | 34 +++++++++++++++--------------- 6 files changed, 26 insertions(+), 39 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index a17393ff6677..b0b3ba56e919 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -8,14 +8,14 @@ #define __ASM_EXCEPTION_H #include +#include #include -#define __exception __attribute__((section(".exception.text"))) #ifdef CONFIG_FUNCTION_GRAPH_TRACER #define __exception_irq_entry __irq_entry #else -#define __exception_irq_entry __exception +#define __exception_irq_entry __kprobes #endif static inline u32 disr_to_esr(u64 disr) diff --git a/arch/arm64/include/asm/traps.h b/arch/arm64/include/asm/traps.h index 59690613ac31..cee5928e1b7d 100644 --- a/arch/arm64/include/asm/traps.h +++ b/arch/arm64/include/asm/traps.h @@ -42,16 +42,6 @@ static inline int __in_irqentry_text(unsigned long ptr) ptr < (unsigned long)&__irqentry_text_end; } -static inline int in_exception_text(unsigned long ptr) -{ - int in; - - in = ptr >= (unsigned long)&__exception_text_start && - ptr < (unsigned long)&__exception_text_end; - - return in ? : __in_irqentry_text(ptr); -} - static inline int in_entry_text(unsigned long ptr) { return ptr >= (unsigned long)&__entry_text_start && diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index c4452827419b..d1c95dcf1d78 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -455,10 +455,6 @@ int __init arch_populate_kprobe_blacklist(void) (unsigned long)__irqentry_text_end); if (ret) return ret; - ret = kprobe_add_area_blacklist((unsigned long)__exception_text_start, - (unsigned long)__exception_text_end); - if (ret) - return ret; ret = kprobe_add_area_blacklist((unsigned long)__idmap_text_start, (unsigned long)__idmap_text_end); if (ret) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 34739e80211b..ba1a571a7774 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -393,7 +394,7 @@ void arm64_notify_segfault(unsigned long addr) force_signal_inject(SIGSEGV, code, addr); } -asmlinkage void __exception do_undefinstr(struct pt_regs *regs) +asmlinkage void do_undefinstr(struct pt_regs *regs) { /* check for AArch32 breakpoint instructions */ if (!aarch32_break_handler(regs)) @@ -405,6 +406,7 @@ asmlinkage void __exception do_undefinstr(struct pt_regs *regs) BUG_ON(!user_mode(regs)); force_signal_inject(SIGILL, ILL_ILLOPC, regs->pc); } +NOKPROBE_SYMBOL(do_undefinstr); #define __user_cache_maint(insn, address, res) \ if (address >= user_addr_max()) { \ @@ -667,7 +669,7 @@ static const struct sys64_hook cp15_64_hooks[] = { {}, }; -asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) +asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs) { const struct sys64_hook *hook, *hook_base; @@ -705,9 +707,10 @@ asmlinkage void __exception do_cp15instr(unsigned int esr, struct pt_regs *regs) */ do_undefinstr(regs); } +NOKPROBE_SYMBOL(do_cp15instr); #endif -asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) +asmlinkage void do_sysinstr(unsigned int esr, struct pt_regs *regs) { const struct sys64_hook *hook; @@ -724,6 +727,7 @@ asmlinkage void __exception do_sysinstr(unsigned int esr, struct pt_regs *regs) */ do_undefinstr(regs); } +NOKPROBE_SYMBOL(do_sysinstr); static const char *esr_class_str[] = { [0 ... ESR_ELx_EC_MAX] = "UNRECOGNIZED EC", diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index aa76f7259668..009057517bdd 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -111,9 +111,6 @@ SECTIONS } .text : { /* Real text segment */ _stext = .; /* Text and read-only data */ - __exception_text_start = .; - *(.exception.text) - __exception_text_end = .; IRQENTRY_TEXT SOFTIRQENTRY_TEXT ENTRY_TEXT diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 855f2a7954e6..844cd2535826 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -732,8 +733,8 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs) +asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { const struct fault_info *inf = esr_to_fault_info(esr); @@ -749,16 +750,17 @@ asmlinkage void __exception do_mem_abort(unsigned long addr, unsigned int esr, arm64_notify_die(inf->name, regs, inf->sig, inf->code, (void __user *)addr, esr); } +NOKPROBE_SYMBOL(do_mem_abort); -asmlinkage void __exception do_el0_irq_bp_hardening(void) +asmlinkage void do_el0_irq_bp_hardening(void) { /* PC has already been checked in entry.S */ arm64_apply_bp_hardening(); } +NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); -asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) +asmlinkage void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { /* * We've taken an instruction abort from userspace and not yet @@ -771,11 +773,10 @@ asmlinkage void __exception do_el0_ia_bp_hardening(unsigned long addr, local_daif_restore(DAIF_PROCCTX); do_mem_abort(addr, esr, regs); } +NOKPROBE_SYMBOL(do_el0_ia_bp_hardening); - -asmlinkage void __exception do_sp_pc_abort(unsigned long addr, - unsigned int esr, - struct pt_regs *regs) +asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { if (user_mode(regs)) { if (!is_ttbr0_addr(instruction_pointer(regs))) @@ -786,6 +787,7 @@ asmlinkage void __exception do_sp_pc_abort(unsigned long addr, arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, (void __user *)addr, esr); } +NOKPROBE_SYMBOL(do_sp_pc_abort); int __init early_brk64(unsigned long addr, unsigned int esr, struct pt_regs *regs); @@ -868,8 +870,7 @@ NOKPROBE_SYMBOL(debug_exception_exit); #ifdef CONFIG_ARM64_ERRATUM_1463225 DECLARE_PER_CPU(int, __in_cortex_a76_erratum_1463225_wa); -static int __exception -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { if (user_mode(regs)) return 0; @@ -888,16 +889,15 @@ cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) return 1; } #else -static int __exception -cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) +static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) { return 0; } #endif /* CONFIG_ARM64_ERRATUM_1463225 */ +NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler); -asmlinkage void __exception do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, - struct pt_regs *regs) +asmlinkage void do_debug_exception(unsigned long addr_if_watchpoint, + unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); From e540e0a7fa1ff889e37ca9af44eb44ec3d2c8a01 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 25 Oct 2019 17:42:11 +0100 Subject: [PATCH 42/93] arm64: Add prototypes for functions called by entry.S Functions that are only called by assembly don't always have a C header file prototype. Add the prototypes before moving the assembly callers to C. Signed-off-by: James Morse Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/exception.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index b0b3ba56e919..a9e376623ecf 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -9,6 +9,7 @@ #include #include +#include #include @@ -31,5 +32,26 @@ static inline u32 disr_to_esr(u64 disr) } asmlinkage void enter_from_user_mode(void); +asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr, + struct pt_regs *regs); +asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr, + struct pt_regs *regs); +asmlinkage void do_undefinstr(struct pt_regs *regs); +asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); +asmlinkage void do_debug_exception(unsigned long addr_if_watchpoint, + unsigned int esr, struct pt_regs *regs); +asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); +asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs); +asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); +asmlinkage void do_sysinstr(unsigned int esr, struct pt_regs *regs); +asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr, + struct pt_regs *regs); +asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, + unsigned int esr); +asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs); +asmlinkage void el0_svc_handler(struct pt_regs *regs); +asmlinkage void el0_svc_compat_handler(struct pt_regs *regs); +asmlinkage void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, + struct pt_regs *regs); #endif /* __ASM_EXCEPTION_H */ From 51077e03b8cef2a24d6582b8c54b718fced6878c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 25 Oct 2019 17:42:12 +0100 Subject: [PATCH 43/93] arm64: add local_daif_inherit() Some synchronous exceptions can be taken from a number of contexts, e.g. where IRQs may or may not be masked. In the entry assembly for these exceptions, we use the inherit_daif assembly macro to ensure that we only mask those exceptions which were masked when the exception was taken. So that we can do the same from C code, this patch adds a new local_daif_inherit() function, following the existing local_daif_*() naming scheme. Signed-off-by: Mark Rutland [moved away from local_daif_restore()] Signed-off-by: James Morse Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/daifflags.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/daifflags.h b/arch/arm64/include/asm/daifflags.h index 063c964af705..9207cd5aa39e 100644 --- a/arch/arm64/include/asm/daifflags.h +++ b/arch/arm64/include/asm/daifflags.h @@ -9,6 +9,7 @@ #include #include +#include #define DAIF_PROCCTX 0 #define DAIF_PROCCTX_NOIRQ PSR_I_BIT @@ -109,4 +110,19 @@ static inline void local_daif_restore(unsigned long flags) trace_hardirqs_off(); } +/* + * Called by synchronous exception handlers to restore the DAIF bits that were + * modified by taking an exception. + */ +static inline void local_daif_inherit(struct pt_regs *regs) +{ + unsigned long flags = regs->pstate & DAIF_MASK; + + /* + * We can't use local_daif_restore(regs->pstate) here as + * system_has_prio_mask_debugging() won't restore the I bit if it can + * use the pmr instead. + */ + write_sysreg(flags, daif); +} #endif From ed3768db588291ddb5dc794daed12cc751373566 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 25 Oct 2019 17:42:13 +0100 Subject: [PATCH 44/93] arm64: entry: convert el1_sync to C This patch converts the EL1 sync entry assembly logic to C code. Doing this will allow us to make changes in a slightly more readable way. A case in point is supporting kernel-first RAS. do_sea() should be called on the CPU that took the fault. Largely the assembly code is converted to C in a relatively straightforward manner. Since all sync sites share a common asm entry point, the ASM_BUG() instances are no longer required for effective backtraces back to assembly, and we don't need similar BUG() entries. The ESR_ELx.EC codes for all (supported) debug exceptions are now checked in the el1_sync_handler's switch statement, which renders the check in el1_dbg redundant. This both simplifies the el1_dbg handler, and makes the EL1 exception handling more robust to currently-unallocated ESR_ELx.EC encodings. Signed-off-by: Mark Rutland [split out of a bigger series, added nokprobes, moved prototypes] Signed-off-by: James Morse Reviewed-by: Mark Rutland Cc: Julien Thierry Signed-off-by: Catalin Marinas --- arch/arm64/kernel/Makefile | 6 +- arch/arm64/kernel/entry-common.c | 98 ++++++++++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 69 +--------------------- 3 files changed, 102 insertions(+), 71 deletions(-) create mode 100644 arch/arm64/kernel/entry-common.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 478491f07b4f..fc6488660f64 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -13,9 +13,9 @@ CFLAGS_REMOVE_return_address.o = $(CC_FLAGS_FTRACE) # Object file lists. obj-y := debug-monitors.o entry.o irq.o fpsimd.o \ - entry-fpsimd.o process.o ptrace.o setup.o signal.o \ - sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o cpu_ops.o insn.o \ + entry-common.o entry-fpsimd.o process.o ptrace.o \ + setup.o signal.o sys.o stacktrace.o time.o traps.o \ + io.o vdso.o hyp-stub.o psci.o cpu_ops.o insn.o \ return_address.o cpuinfo.o cpu_errata.o \ cpufeature.o alternative.o cacheinfo.o \ smp.o smp_spin_table.o topology.o smccc-call.o \ diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c new file mode 100644 index 000000000000..e726d1f4b9e9 --- /dev/null +++ b/arch/arm64/kernel/entry-common.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Exception handling code + * + * Copyright (C) 2019 ARM Ltd. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + local_daif_inherit(regs); + far = untagged_addr(far); + do_mem_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el1_abort); + +static void notrace el1_pc(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + local_daif_inherit(regs); + do_sp_pc_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el1_pc); + +static void el1_undef(struct pt_regs *regs) +{ + local_daif_inherit(regs); + do_undefinstr(regs); +} +NOKPROBE_SYMBOL(el1_undef); + +static void el1_inv(struct pt_regs *regs, unsigned long esr) +{ + local_daif_inherit(regs); + bad_mode(regs, 0, esr); +} +NOKPROBE_SYMBOL(el1_inv); + +static void notrace el1_dbg(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + /* + * The CPU masked interrupts, and we are leaving them masked during + * do_debug_exception(). Update PMR as if we had called + * local_mask_daif(). + */ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + do_debug_exception(far, esr, regs); +} +NOKPROBE_SYMBOL(el1_dbg); + +asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) +{ + unsigned long esr = read_sysreg(esr_el1); + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_DABT_CUR: + case ESR_ELx_EC_IABT_CUR: + el1_abort(regs, esr); + break; + /* + * We don't handle ESR_ELx_EC_SP_ALIGN, since we will have hit a + * recursive exception when trying to push the initial pt_regs. + */ + case ESR_ELx_EC_PC_ALIGN: + el1_pc(regs, esr); + break; + case ESR_ELx_EC_SYS64: + case ESR_ELx_EC_UNKNOWN: + el1_undef(regs); + break; + case ESR_ELx_EC_BREAKPT_CUR: + case ESR_ELx_EC_SOFTSTP_CUR: + case ESR_ELx_EC_WATCHPT_CUR: + case ESR_ELx_EC_BRK64: + el1_dbg(regs, esr); + break; + default: + el1_inv(regs, esr); + }; +} +NOKPROBE_SYMBOL(el1_sync_handler); diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e304fe04b098..5d7f42eb0e89 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -578,76 +578,9 @@ ENDPROC(el1_error_invalid) .align 6 el1_sync: kernel_entry 1 - mrs x1, esr_el1 // read the syndrome register - lsr x24, x1, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_DABT_CUR // data abort in EL1 - b.eq el1_da - cmp x24, #ESR_ELx_EC_IABT_CUR // instruction abort in EL1 - b.eq el1_ia - cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - b.eq el1_undef - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el1_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL1 - b.eq el1_undef - cmp x24, #ESR_ELx_EC_BREAKPT_CUR // debug exception in EL1 - b.ge el1_dbg - b el1_inv - -el1_ia: - /* - * Fall through to the Data abort case - */ -el1_da: - /* - * Data abort handling - */ - mrs x3, far_el1 - inherit_daif pstate=x23, tmp=x2 - clear_address_tag x0, x3 - mov x2, sp // struct pt_regs - bl do_mem_abort - - kernel_exit 1 -el1_pc: - /* - * PC alignment exception handling. We don't handle SP alignment faults, - * since we will have hit a recursive exception when trying to push the - * initial pt_regs. - */ - mrs x0, far_el1 - inherit_daif pstate=x23, tmp=x2 - mov x2, sp - bl do_sp_pc_abort - ASM_BUG() -el1_undef: - /* - * Undefined instruction - */ - inherit_daif pstate=x23, tmp=x2 mov x0, sp - bl do_undefinstr + bl el1_sync_handler kernel_exit 1 -el1_dbg: - /* - * Debug exception handling - */ - cmp x24, #ESR_ELx_EC_BRK64 // if BRK64 - cinc x24, x24, eq // set bit '0' - tbz x24, #0, el1_inv // EL1 only - gic_prio_kentry_setup tmp=x3 - mrs x0, far_el1 - mov x2, sp // struct pt_regs - bl do_debug_exception - kernel_exit 1 -el1_inv: - // TODO: add support for undefined instructions in kernel mode - inherit_daif pstate=x23, tmp=x2 - mov x0, sp - mov x2, x1 - mov x1, #BAD_SYNC - bl bad_mode - ASM_BUG() ENDPROC(el1_sync) .align 6 From 582f95835a8fc812cd38dce0447fe9386b78913e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 25 Oct 2019 17:42:14 +0100 Subject: [PATCH 45/93] arm64: entry: convert el0_sync to C This is largely a 1-1 conversion of asm to C, with a couple of caveats. The el0_sync{_compat} switches explicitly handle all the EL0 debug cases, so el0_dbg doesn't have to try to bail out for unexpected EL1 debug ESR values. This also means that an unexpected vector catch from AArch32 is routed to el0_inv. We *could* merge the native and compat switches, which would make the diffstat negative, but I've tried to stay as close to the existing assembly as possible for the moment. Signed-off-by: Mark Rutland [split out of a bigger series, added nokprobes. removed irq trace calls as the C helpers do this. renamed el0_dbg's use of FAR] Signed-off-by: James Morse Reviewed-by: Mark Rutland Cc: Julien Thierry Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/asm-uaccess.h | 10 -- arch/arm64/kernel/entry-common.c | 222 +++++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 206 +------------------------ 3 files changed, 227 insertions(+), 211 deletions(-) diff --git a/arch/arm64/include/asm/asm-uaccess.h b/arch/arm64/include/asm/asm-uaccess.h index f74909ba29bd..a70575edae8e 100644 --- a/arch/arm64/include/asm/asm-uaccess.h +++ b/arch/arm64/include/asm/asm-uaccess.h @@ -74,14 +74,4 @@ alternative_if ARM64_ALT_PAN_NOT_UAO SET_PSTATE_PAN(0) alternative_else_nop_endif .endm - -/* - * Remove the address tag from a virtual address, if present. - */ - .macro clear_address_tag, dst, addr - tst \addr, #(1 << 55) - bic \dst, \addr, #(0xff << 56) - csel \dst, \dst, \addr, eq - .endm - #endif diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index e726d1f4b9e9..2c318e41d84b 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -96,3 +96,225 @@ asmlinkage void notrace el1_sync_handler(struct pt_regs *regs) }; } NOKPROBE_SYMBOL(el1_sync_handler); + +static void notrace el0_da(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + far = untagged_addr(far); + do_mem_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el0_da); + +static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX_NOIRQ); + do_el0_ia_bp_hardening(far, esr, regs); +} +NOKPROBE_SYMBOL(el0_ia); + +static void notrace el0_fpsimd_acc(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_fpsimd_acc(esr, regs); +} +NOKPROBE_SYMBOL(el0_fpsimd_acc); + +static void notrace el0_sve_acc(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_sve_acc(esr, regs); +} +NOKPROBE_SYMBOL(el0_sve_acc); + +static void notrace el0_fpsimd_exc(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_fpsimd_exc(esr, regs); +} +NOKPROBE_SYMBOL(el0_fpsimd_exc); + +static void notrace el0_sys(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_sysinstr(esr, regs); +} +NOKPROBE_SYMBOL(el0_sys); + +static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) +{ + unsigned long far = read_sysreg(far_el1); + + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX_NOIRQ); + do_sp_pc_abort(far, esr, regs); +} +NOKPROBE_SYMBOL(el0_pc); + +static void notrace el0_sp(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX_NOIRQ); + do_sp_pc_abort(regs->sp, esr, regs); +} +NOKPROBE_SYMBOL(el0_sp); + +static void notrace el0_undef(struct pt_regs *regs) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_undefinstr(regs); +} +NOKPROBE_SYMBOL(el0_undef); + +static void notrace el0_inv(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + bad_el0_sync(regs, 0, esr); +} +NOKPROBE_SYMBOL(el0_inv); + +static void notrace el0_dbg(struct pt_regs *regs, unsigned long esr) +{ + /* Only watchpoints write FAR_EL1, otherwise its UNKNOWN */ + unsigned long far = read_sysreg(far_el1); + + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + user_exit_irqoff(); + do_debug_exception(far, esr, regs); + local_daif_restore(DAIF_PROCCTX_NOIRQ); +} +NOKPROBE_SYMBOL(el0_dbg); + +static void notrace el0_svc(struct pt_regs *regs) +{ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + el0_svc_handler(regs); +} +NOKPROBE_SYMBOL(el0_svc); + +asmlinkage void notrace el0_sync_handler(struct pt_regs *regs) +{ + unsigned long esr = read_sysreg(esr_el1); + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_SVC64: + el0_svc(regs); + break; + case ESR_ELx_EC_DABT_LOW: + el0_da(regs, esr); + break; + case ESR_ELx_EC_IABT_LOW: + el0_ia(regs, esr); + break; + case ESR_ELx_EC_FP_ASIMD: + el0_fpsimd_acc(regs, esr); + break; + case ESR_ELx_EC_SVE: + el0_sve_acc(regs, esr); + break; + case ESR_ELx_EC_FP_EXC64: + el0_fpsimd_exc(regs, esr); + break; + case ESR_ELx_EC_SYS64: + case ESR_ELx_EC_WFx: + el0_sys(regs, esr); + break; + case ESR_ELx_EC_SP_ALIGN: + el0_sp(regs, esr); + break; + case ESR_ELx_EC_PC_ALIGN: + el0_pc(regs, esr); + break; + case ESR_ELx_EC_UNKNOWN: + el0_undef(regs); + break; + case ESR_ELx_EC_BREAKPT_LOW: + case ESR_ELx_EC_SOFTSTP_LOW: + case ESR_ELx_EC_WATCHPT_LOW: + case ESR_ELx_EC_BRK64: + el0_dbg(regs, esr); + break; + default: + el0_inv(regs, esr); + } +} +NOKPROBE_SYMBOL(el0_sync_handler); + +#ifdef CONFIG_COMPAT +static void notrace el0_cp15(struct pt_regs *regs, unsigned long esr) +{ + user_exit_irqoff(); + local_daif_restore(DAIF_PROCCTX); + do_cp15instr(esr, regs); +} +NOKPROBE_SYMBOL(el0_cp15); + +static void notrace el0_svc_compat(struct pt_regs *regs) +{ + if (system_uses_irq_prio_masking()) + gic_write_pmr(GIC_PRIO_IRQON | GIC_PRIO_PSR_I_SET); + + el0_svc_compat_handler(regs); +} +NOKPROBE_SYMBOL(el0_svc_compat); + +asmlinkage void notrace el0_sync_compat_handler(struct pt_regs *regs) +{ + unsigned long esr = read_sysreg(esr_el1); + + switch (ESR_ELx_EC(esr)) { + case ESR_ELx_EC_SVC32: + el0_svc_compat(regs); + break; + case ESR_ELx_EC_DABT_LOW: + el0_da(regs, esr); + break; + case ESR_ELx_EC_IABT_LOW: + el0_ia(regs, esr); + break; + case ESR_ELx_EC_FP_ASIMD: + el0_fpsimd_acc(regs, esr); + break; + case ESR_ELx_EC_FP_EXC32: + el0_fpsimd_exc(regs, esr); + break; + case ESR_ELx_EC_PC_ALIGN: + el0_pc(regs, esr); + break; + case ESR_ELx_EC_UNKNOWN: + case ESR_ELx_EC_CP14_MR: + case ESR_ELx_EC_CP14_LS: + case ESR_ELx_EC_CP14_64: + el0_undef(regs); + break; + case ESR_ELx_EC_CP15_32: + case ESR_ELx_EC_CP15_64: + el0_cp15(regs, esr); + break; + case ESR_ELx_EC_BREAKPT_LOW: + case ESR_ELx_EC_SOFTSTP_LOW: + case ESR_ELx_EC_WATCHPT_LOW: + case ESR_ELx_EC_BKPT32: + el0_dbg(regs, esr); + break; + default: + el0_inv(regs, esr); + } +} +NOKPROBE_SYMBOL(el0_sync_compat_handler); +#endif /* CONFIG_COMPAT */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 5d7f42eb0e89..15822a0fe37f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -647,71 +647,18 @@ ENDPROC(el1_irq) .align 6 el0_sync: kernel_entry 0 - mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_SVC64 // SVC in 64-bit state - b.eq el0_svc - cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 - b.eq el0_da - cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 - b.eq el0_ia - cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access - b.eq el0_fpsimd_acc - cmp x24, #ESR_ELx_EC_SVE // SVE access - b.eq el0_sve_acc - cmp x24, #ESR_ELx_EC_FP_EXC64 // FP/ASIMD exception - b.eq el0_fpsimd_exc - cmp x24, #ESR_ELx_EC_SYS64 // configurable trap - ccmp x24, #ESR_ELx_EC_WFx, #4, ne - b.eq el0_sys - cmp x24, #ESR_ELx_EC_SP_ALIGN // stack alignment exception - b.eq el0_sp - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 - b.eq el0_undef - cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 - b.ge el0_dbg - b el0_inv + mov x0, sp + bl el0_sync_handler + b ret_to_user #ifdef CONFIG_COMPAT .align 6 el0_sync_compat: kernel_entry 0, 32 - mrs x25, esr_el1 // read the syndrome register - lsr x24, x25, #ESR_ELx_EC_SHIFT // exception class - cmp x24, #ESR_ELx_EC_SVC32 // SVC in 32-bit state - b.eq el0_svc_compat - cmp x24, #ESR_ELx_EC_DABT_LOW // data abort in EL0 - b.eq el0_da - cmp x24, #ESR_ELx_EC_IABT_LOW // instruction abort in EL0 - b.eq el0_ia - cmp x24, #ESR_ELx_EC_FP_ASIMD // FP/ASIMD access - b.eq el0_fpsimd_acc - cmp x24, #ESR_ELx_EC_FP_EXC32 // FP/ASIMD exception - b.eq el0_fpsimd_exc - cmp x24, #ESR_ELx_EC_PC_ALIGN // pc alignment exception - b.eq el0_pc - cmp x24, #ESR_ELx_EC_UNKNOWN // unknown exception in EL0 - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP15_32 // CP15 MRC/MCR trap - b.eq el0_cp15 - cmp x24, #ESR_ELx_EC_CP15_64 // CP15 MRRC/MCRR trap - b.eq el0_cp15 - cmp x24, #ESR_ELx_EC_CP14_MR // CP14 MRC/MCR trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP14_LS // CP14 LDC/STC trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_CP14_64 // CP14 MRRC/MCRR trap - b.eq el0_undef - cmp x24, #ESR_ELx_EC_BREAKPT_LOW // debug exception in EL0 - b.ge el0_dbg - b el0_inv -el0_svc_compat: - gic_prio_kentry_setup tmp=x1 mov x0, sp - bl el0_svc_compat_handler + bl el0_sync_compat_handler b ret_to_user +ENDPROC(el0_sync) .align 6 el0_irq_compat: @@ -721,140 +668,8 @@ el0_irq_compat: el0_error_compat: kernel_entry 0, 32 b el0_error_naked - -el0_cp15: - /* - * Trapped CP15 (MRC, MCR, MRRC, MCRR) instructions - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_cp15instr - b ret_to_user #endif -el0_da: - /* - * Data abort handling - */ - mrs x26, far_el1 - ct_user_exit_irqoff - enable_daif - clear_address_tag x0, x26 - mov x1, x25 - mov x2, sp - bl do_mem_abort - b ret_to_user -el0_ia: - /* - * Instruction abort handling - */ - mrs x26, far_el1 - gic_prio_kentry_setup tmp=x0 - ct_user_exit_irqoff - enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - mov x0, x26 - mov x1, x25 - mov x2, sp - bl do_el0_ia_bp_hardening - b ret_to_user -el0_fpsimd_acc: - /* - * Floating Point or Advanced SIMD access - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_fpsimd_acc - b ret_to_user -el0_sve_acc: - /* - * Scalable Vector Extension access - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_sve_acc - b ret_to_user -el0_fpsimd_exc: - /* - * Floating Point, Advanced SIMD or SVE exception - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_fpsimd_exc - b ret_to_user -el0_sp: - ldr x26, [sp, #S_SP] - b el0_sp_pc -el0_pc: - mrs x26, far_el1 -el0_sp_pc: - /* - * Stack or PC alignment exception handling - */ - gic_prio_kentry_setup tmp=x0 - ct_user_exit_irqoff - enable_da_f -#ifdef CONFIG_TRACE_IRQFLAGS - bl trace_hardirqs_off -#endif - mov x0, x26 - mov x1, x25 - mov x2, sp - bl do_sp_pc_abort - b ret_to_user -el0_undef: - /* - * Undefined instruction - */ - ct_user_exit_irqoff - enable_daif - mov x0, sp - bl do_undefinstr - b ret_to_user -el0_sys: - /* - * System instructions, for trapped cache maintenance instructions - */ - ct_user_exit_irqoff - enable_daif - mov x0, x25 - mov x1, sp - bl do_sysinstr - b ret_to_user -el0_dbg: - /* - * Debug exception handling - */ - tbnz x24, #0, el0_inv // EL0 only - mrs x24, far_el1 - gic_prio_kentry_setup tmp=x3 - ct_user_exit_irqoff - mov x0, x24 - mov x1, x25 - mov x2, sp - bl do_debug_exception - enable_da_f - b ret_to_user -el0_inv: - ct_user_exit_irqoff - enable_daif - mov x0, sp - mov x1, #BAD_SYNC - mov x2, x25 - bl bad_el0_sync - b ret_to_user -ENDPROC(el0_sync) - .align 6 el0_irq: kernel_entry 0 @@ -932,17 +747,6 @@ finish_ret_to_user: kernel_exit 0 ENDPROC(ret_to_user) -/* - * SVC handler. - */ - .align 6 -el0_svc: - gic_prio_kentry_setup tmp=x1 - mov x0, sp - bl el0_svc_handler - b ret_to_user -ENDPROC(el0_svc) - .popsection // .entry.text #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 From afa7c0e5b965cdb945ad8a2e2973c6d7e19969f9 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 25 Oct 2019 17:42:15 +0100 Subject: [PATCH 46/93] arm64: Remove asmlinkage from updated functions Now that the callers of these functions have moved into C, they no longer need the asmlinkage annotation. Remove it. Signed-off-by: James Morse Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/exception.h | 36 +++++++++++++----------------- arch/arm64/kernel/fpsimd.c | 6 ++--- arch/arm64/kernel/syscall.c | 4 ++-- arch/arm64/kernel/traps.c | 8 +++---- arch/arm64/mm/fault.c | 16 ++++++------- 5 files changed, 32 insertions(+), 38 deletions(-) diff --git a/arch/arm64/include/asm/exception.h b/arch/arm64/include/asm/exception.h index a9e376623ecf..4d5f3b5f50cd 100644 --- a/arch/arm64/include/asm/exception.h +++ b/arch/arm64/include/asm/exception.h @@ -32,26 +32,22 @@ static inline u32 disr_to_esr(u64 disr) } asmlinkage void enter_from_user_mode(void); -asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs); -asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs); -asmlinkage void do_undefinstr(struct pt_regs *regs); +void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); +void do_undefinstr(struct pt_regs *regs); asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr); -asmlinkage void do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, struct pt_regs *regs); -asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); -asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs); -asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); -asmlinkage void do_sysinstr(unsigned int esr, struct pt_regs *regs); -asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs); -asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, - unsigned int esr); -asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs); -asmlinkage void el0_svc_handler(struct pt_regs *regs); -asmlinkage void el0_svc_compat_handler(struct pt_regs *regs); -asmlinkage void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, - struct pt_regs *regs); +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, + struct pt_regs *regs); +void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs); +void do_sve_acc(unsigned int esr, struct pt_regs *regs); +void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs); +void do_sysinstr(unsigned int esr, struct pt_regs *regs); +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs); +void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr); +void do_cp15instr(unsigned int esr, struct pt_regs *regs); +void el0_svc_handler(struct pt_regs *regs); +void el0_svc_compat_handler(struct pt_regs *regs); +void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, + struct pt_regs *regs); #endif /* __ASM_EXCEPTION_H */ diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 37d3912cfe06..3eb338f14386 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -920,7 +920,7 @@ void fpsimd_release_task(struct task_struct *dead_task) * would have disabled the SVE access trap for userspace during * ret_to_user, making an SVE access trap impossible in that case. */ -asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) +void do_sve_acc(unsigned int esr, struct pt_regs *regs) { /* Even if we chose not to use SVE, the hardware could still trap: */ if (unlikely(!system_supports_sve()) || WARN_ON(is_compat_task())) { @@ -947,7 +947,7 @@ asmlinkage void do_sve_acc(unsigned int esr, struct pt_regs *regs) /* * Trapped FP/ASIMD access. */ -asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) +void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) { /* TODO: implement lazy context saving/restoring */ WARN_ON(1); @@ -956,7 +956,7 @@ asmlinkage void do_fpsimd_acc(unsigned int esr, struct pt_regs *regs) /* * Raise a SIGFPE for the current process. */ -asmlinkage void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) +void do_fpsimd_exc(unsigned int esr, struct pt_regs *regs) { unsigned int si_code = FPE_FLTUNK; diff --git a/arch/arm64/kernel/syscall.c b/arch/arm64/kernel/syscall.c index 871c739f060a..9a9d98a443fc 100644 --- a/arch/arm64/kernel/syscall.c +++ b/arch/arm64/kernel/syscall.c @@ -154,14 +154,14 @@ static inline void sve_user_discard(void) sve_user_disable(); } -asmlinkage void el0_svc_handler(struct pt_regs *regs) +void el0_svc_handler(struct pt_regs *regs) { sve_user_discard(); el0_svc_common(regs, regs->regs[8], __NR_syscalls, sys_call_table); } #ifdef CONFIG_COMPAT -asmlinkage void el0_svc_compat_handler(struct pt_regs *regs) +void el0_svc_compat_handler(struct pt_regs *regs) { el0_svc_common(regs, regs->regs[7], __NR_compat_syscalls, compat_sys_call_table); diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index ba1a571a7774..54ebe24ef4b1 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -394,7 +394,7 @@ void arm64_notify_segfault(unsigned long addr) force_signal_inject(SIGSEGV, code, addr); } -asmlinkage void do_undefinstr(struct pt_regs *regs) +void do_undefinstr(struct pt_regs *regs) { /* check for AArch32 breakpoint instructions */ if (!aarch32_break_handler(regs)) @@ -669,7 +669,7 @@ static const struct sys64_hook cp15_64_hooks[] = { {}, }; -asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs) +void do_cp15instr(unsigned int esr, struct pt_regs *regs) { const struct sys64_hook *hook, *hook_base; @@ -710,7 +710,7 @@ asmlinkage void do_cp15instr(unsigned int esr, struct pt_regs *regs) NOKPROBE_SYMBOL(do_cp15instr); #endif -asmlinkage void do_sysinstr(unsigned int esr, struct pt_regs *regs) +void do_sysinstr(unsigned int esr, struct pt_regs *regs) { const struct sys64_hook *hook; @@ -797,7 +797,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) * bad_el0_sync handles unexpected, but potentially recoverable synchronous * exceptions taken from EL0. Unlike bad_mode, this returns. */ -asmlinkage void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) +void bad_el0_sync(struct pt_regs *regs, int reason, unsigned int esr) { void __user *pc = (void __user *)instruction_pointer(regs); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 844cd2535826..cb13f4daa878 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -733,8 +733,7 @@ static const struct fault_info fault_info[] = { { do_bad, SIGKILL, SI_KERNEL, "unknown 63" }, }; -asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs) +void do_mem_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { const struct fault_info *inf = esr_to_fault_info(esr); @@ -752,15 +751,15 @@ asmlinkage void do_mem_abort(unsigned long addr, unsigned int esr, } NOKPROBE_SYMBOL(do_mem_abort); -asmlinkage void do_el0_irq_bp_hardening(void) +void do_el0_irq_bp_hardening(void) { /* PC has already been checked in entry.S */ arm64_apply_bp_hardening(); } NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); -asmlinkage void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, - struct pt_regs *regs) +void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, + struct pt_regs *regs) { /* * We've taken an instruction abort from userspace and not yet @@ -775,8 +774,7 @@ asmlinkage void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, } NOKPROBE_SYMBOL(do_el0_ia_bp_hardening); -asmlinkage void do_sp_pc_abort(unsigned long addr, unsigned int esr, - struct pt_regs *regs) +void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { if (user_mode(regs)) { if (!is_ttbr0_addr(instruction_pointer(regs))) @@ -896,8 +894,8 @@ static int cortex_a76_erratum_1463225_debug_handler(struct pt_regs *regs) #endif /* CONFIG_ARM64_ERRATUM_1463225 */ NOKPROBE_SYMBOL(cortex_a76_erratum_1463225_debug_handler); -asmlinkage void do_debug_exception(unsigned long addr_if_watchpoint, - unsigned int esr, struct pt_regs *regs) +void do_debug_exception(unsigned long addr_if_watchpoint, unsigned int esr, + struct pt_regs *regs) { const struct fault_info *inf = esr_to_debug_fault_info(esr); unsigned long pc = instruction_pointer(regs); From bfe298745afc9548ad9344a9a3f26c81fd1a76c4 Mon Sep 17 00:00:00 2001 From: James Morse Date: Fri, 25 Oct 2019 17:42:16 +0100 Subject: [PATCH 47/93] arm64: entry-common: don't touch daif before bp-hardening The previous patches mechanically transformed the assembly version of entry.S to entry-common.c for synchronous exceptions. The C version of local_daif_restore() doesn't quite do the same thing as the assembly versions if pseudo-NMI is in use. In particular, | local_daif_restore(DAIF_PROCCTX_NOIRQ) will still allow pNMI to be delivered. This is not the behaviour do_el0_ia_bp_hardening() and do_sp_pc_abort() want as it should not be possible for the PMU handler to run as an NMI until the bp-hardening sequence has run. The bp-hardening calls were placed where they are because this was the first C code to run after the relevant exceptions. As we've now moved that point earlier, move the checks and calls earlier too. This makes it clearer that this stuff runs before any kind of exception, and saves modifying PSTATE twice. Signed-off-by: James Morse Reviewed-by: Mark Rutland Cc: Julien Thierry Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 14 ++++++++++++ arch/arm64/kernel/entry-common.c | 18 ++++++++++++--- arch/arm64/mm/fault.c | 36 +----------------------------- 3 files changed, 30 insertions(+), 38 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5623685c7d13..8899d26f73ff 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -26,10 +26,12 @@ #include #include #include +#include #include #include #include +#include #include #include #include @@ -214,6 +216,18 @@ static inline void start_thread(struct pt_regs *regs, unsigned long pc, regs->sp = sp; } +static inline bool is_ttbr0_addr(unsigned long addr) +{ + /* entry assembly clears tags for TTBR0 addrs */ + return addr < TASK_SIZE; +} + +static inline bool is_ttbr1_addr(unsigned long addr) +{ + /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ + return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; +} + #ifdef CONFIG_COMPAT static inline void compat_start_thread(struct pt_regs *regs, unsigned long pc, unsigned long sp) diff --git a/arch/arm64/kernel/entry-common.c b/arch/arm64/kernel/entry-common.c index 2c318e41d84b..5dce5e56995a 100644 --- a/arch/arm64/kernel/entry-common.c +++ b/arch/arm64/kernel/entry-common.c @@ -14,6 +14,7 @@ #include #include #include +#include #include static void notrace el1_abort(struct pt_regs *regs, unsigned long esr) @@ -112,9 +113,17 @@ static void notrace el0_ia(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + /* + * We've taken an instruction abort from userspace and not yet + * re-enabled IRQs. If the address is a kernel address, apply + * BP hardening prior to enabling IRQs and pre-emption. + */ + if (!is_ttbr0_addr(far)) + arm64_apply_bp_hardening(); + user_exit_irqoff(); - local_daif_restore(DAIF_PROCCTX_NOIRQ); - do_el0_ia_bp_hardening(far, esr, regs); + local_daif_restore(DAIF_PROCCTX); + do_mem_abort(far, esr, regs); } NOKPROBE_SYMBOL(el0_ia); @@ -154,8 +163,11 @@ static void notrace el0_pc(struct pt_regs *regs, unsigned long esr) { unsigned long far = read_sysreg(far_el1); + if (!is_ttbr0_addr(instruction_pointer(regs))) + arm64_apply_bp_hardening(); + user_exit_irqoff(); - local_daif_restore(DAIF_PROCCTX_NOIRQ); + local_daif_restore(DAIF_PROCCTX); do_sp_pc_abort(far, esr, regs); } NOKPROBE_SYMBOL(el0_pc); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index cb13f4daa878..1bb2e3737e51 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -32,8 +32,8 @@ #include #include #include -#include #include +#include #include #include #include @@ -102,18 +102,6 @@ static void mem_abort_decode(unsigned int esr) data_abort_decode(esr); } -static inline bool is_ttbr0_addr(unsigned long addr) -{ - /* entry assembly clears tags for TTBR0 addrs */ - return addr < TASK_SIZE; -} - -static inline bool is_ttbr1_addr(unsigned long addr) -{ - /* TTBR1 addresses may have a tag if KASAN_SW_TAGS is in use */ - return arch_kasan_reset_tag(addr) >= PAGE_OFFSET; -} - static inline unsigned long mm_to_pgd_phys(struct mm_struct *mm) { /* Either init_pg_dir or swapper_pg_dir */ @@ -758,30 +746,8 @@ void do_el0_irq_bp_hardening(void) } NOKPROBE_SYMBOL(do_el0_irq_bp_hardening); -void do_el0_ia_bp_hardening(unsigned long addr, unsigned int esr, - struct pt_regs *regs) -{ - /* - * We've taken an instruction abort from userspace and not yet - * re-enabled IRQs. If the address is a kernel address, apply - * BP hardening prior to enabling IRQs and pre-emption. - */ - if (!is_ttbr0_addr(addr)) - arm64_apply_bp_hardening(); - - local_daif_restore(DAIF_PROCCTX); - do_mem_abort(addr, esr, regs); -} -NOKPROBE_SYMBOL(do_el0_ia_bp_hardening); - void do_sp_pc_abort(unsigned long addr, unsigned int esr, struct pt_regs *regs) { - if (user_mode(regs)) { - if (!is_ttbr0_addr(instruction_pointer(regs))) - arm64_apply_bp_hardening(); - local_daif_restore(DAIF_PROCCTX); - } - arm64_notify_die("SP/PC alignment exception", regs, SIGBUS, BUS_ADRALN, (void __user *)addr, esr); } From 126b0a1700c5c2688d9319286d84db7f75627810 Mon Sep 17 00:00:00 2001 From: Marek Bykowski Date: Wed, 16 Oct 2019 11:57:39 +0200 Subject: [PATCH 48/93] perf: arm-ccn: Enable stats for CCN-512 interconnect Add compatible string for the ARM CCN-512 interconnect Acked-by: Pawel Moll Signed-off-by: Marek Bykowski Signed-off-by: Boleslaw Malecki Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index fee5cb2e8afb..fea354d6fb29 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1536,6 +1536,7 @@ static int arm_ccn_remove(struct platform_device *pdev) static const struct of_device_id arm_ccn_match[] = { { .compatible = "arm,ccn-502", }, { .compatible = "arm,ccn-504", }, + { .compatible = "arm,ccn-512", }, {}, }; MODULE_DEVICE_TABLE(of, arm_ccn_match); From 05daff069f8f29436673e8f417b2d0ac805b6f69 Mon Sep 17 00:00:00 2001 From: Marek Bykowski Date: Mon, 7 Oct 2019 15:21:15 +0200 Subject: [PATCH 49/93] Documentation: Add documentation for CCN-512 DTS binding Indicate the arm-ccn perf back-end supports now ccn-512. Acked-by: Rob Herring Signed-off-by: Marek Bykowski Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/perf/arm-ccn.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/perf/arm-ccn.txt b/Documentation/devicetree/bindings/perf/arm-ccn.txt index 43b5a71a5a9d..1c53b5aa3317 100644 --- a/Documentation/devicetree/bindings/perf/arm-ccn.txt +++ b/Documentation/devicetree/bindings/perf/arm-ccn.txt @@ -6,6 +6,7 @@ Required properties: "arm,ccn-502" "arm,ccn-504" "arm,ccn-508" + "arm,ccn-512" - reg: (standard registers property) physical address and size (16MB) of the configuration registers block From 4686da5140c18c84ca01a8ab994571d832c63398 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 28 Oct 2019 16:45:07 +0000 Subject: [PATCH 50/93] arm64: Make arm64_dma32_phys_limit static This variable is only used in the arch/arm64/mm/init.c file for ZONE_DMA32 initialisation, no need to expose it. Reported-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 71b45c58218b..39fc69873b18 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -63,7 +63,7 @@ EXPORT_SYMBOL(vmemmap); * bit addressable memory area. */ phys_addr_t arm64_dma_phys_limit __ro_after_init; -phys_addr_t arm64_dma32_phys_limit __ro_after_init; +static phys_addr_t arm64_dma32_phys_limit __ro_after_init; #ifdef CONFIG_KEXEC_CORE /* From 030f6f84e556759ecffbdd560713c3bfac4483e4 Mon Sep 17 00:00:00 2001 From: Ganapatrao Prabhakerrao Kulkarni Date: Wed, 16 Oct 2019 09:36:59 +0000 Subject: [PATCH 51/93] Documentation: perf: Update documentation for ThunderX2 PMU uncore driver Add documentation for Cavium Coherent Processor Interconnect (CCPI2) PMU. Signed-off-by: Ganapatrao Prabhakerrao Kulkarni Signed-off-by: Will Deacon --- .../admin-guide/perf/thunderx2-pmu.rst | 20 ++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/Documentation/admin-guide/perf/thunderx2-pmu.rst b/Documentation/admin-guide/perf/thunderx2-pmu.rst index 08e33675853a..01f158238ae1 100644 --- a/Documentation/admin-guide/perf/thunderx2-pmu.rst +++ b/Documentation/admin-guide/perf/thunderx2-pmu.rst @@ -3,24 +3,26 @@ Cavium ThunderX2 SoC Performance Monitoring Unit (PMU UNCORE) ============================================================= The ThunderX2 SoC PMU consists of independent, system-wide, per-socket -PMUs such as the Level 3 Cache (L3C) and DDR4 Memory Controller (DMC). +PMUs such as the Level 3 Cache (L3C), DDR4 Memory Controller (DMC) and +Cavium Coherent Processor Interconnect (CCPI2). The DMC has 8 interleaved channels and the L3C has 16 interleaved tiles. Events are counted for the default channel (i.e. channel 0) and prorated to the total number of channels/tiles. -The DMC and L3C support up to 4 counters. Counters are independently -programmable and can be started and stopped individually. Each counter -can be set to a different event. Counters are 32-bit and do not support -an overflow interrupt; they are read every 2 seconds. +The DMC and L3C support up to 4 counters, while the CCPI2 supports up to 8 +counters. Counters are independently programmable to different events and +can be started and stopped individually. None of the counters support an +overflow interrupt. DMC and L3C counters are 32-bit and read every 2 seconds. +The CCPI2 counters are 64-bit and assumed not to overflow in normal operation. PMU UNCORE (perf) driver: The thunderx2_pmu driver registers per-socket perf PMUs for the DMC and -L3C devices. Each PMU can be used to count up to 4 events -simultaneously. The PMUs provide a description of their available events -and configuration options under sysfs, see -/sys/devices/uncore_; S is the socket id. +L3C devices. Each PMU can be used to count up to 4 (DMC/L3C) or up to 8 +(CCPI2) events simultaneously. The PMUs provide a description of their +available events and configuration options under sysfs, see +/sys/devices/uncore_; S is the socket id. The driver does not support sampling, therefore "perf record" will not work. Per-task perf sessions are also not supported. From 5e2c27e833bb92487fdf49ff6d358361d00521c9 Mon Sep 17 00:00:00 2001 From: Ganapatrao Prabhakerrao Kulkarni Date: Wed, 16 Oct 2019 09:37:00 +0000 Subject: [PATCH 52/93] drivers/perf: Add CCPI2 PMU support in ThunderX2 UNCORE driver. CCPI2 is a low-latency high-bandwidth serial interface for inter socket connectivity of ThunderX2 processors. CCPI2 PMU supports up to 8 counters per socket. Counters are independently programmable to different events and can be started and stopped individually. The CCPI2 counters are 64-bit and do not overflow in normal operation. Signed-off-by: Ganapatrao Prabhakerrao Kulkarni Signed-off-by: Will Deacon --- drivers/perf/thunderx2_pmu.c | 267 ++++++++++++++++++++++++++++++----- 1 file changed, 234 insertions(+), 33 deletions(-) diff --git a/drivers/perf/thunderx2_pmu.c b/drivers/perf/thunderx2_pmu.c index 43d76c85da56..51b31d6ff2c4 100644 --- a/drivers/perf/thunderx2_pmu.c +++ b/drivers/perf/thunderx2_pmu.c @@ -16,23 +16,36 @@ * they need to be sampled before overflow(i.e, at every 2 seconds). */ -#define TX2_PMU_MAX_COUNTERS 4 +#define TX2_PMU_DMC_L3C_MAX_COUNTERS 4 +#define TX2_PMU_CCPI2_MAX_COUNTERS 8 +#define TX2_PMU_MAX_COUNTERS TX2_PMU_CCPI2_MAX_COUNTERS + + #define TX2_PMU_DMC_CHANNELS 8 #define TX2_PMU_L3_TILES 16 #define TX2_PMU_HRTIMER_INTERVAL (2 * NSEC_PER_SEC) -#define GET_EVENTID(ev) ((ev->hw.config) & 0x1f) -#define GET_COUNTERID(ev) ((ev->hw.idx) & 0x3) +#define GET_EVENTID(ev, mask) ((ev->hw.config) & mask) +#define GET_COUNTERID(ev, mask) ((ev->hw.idx) & mask) /* 1 byte per counter(4 counters). * Event id is encoded in bits [5:1] of a byte, */ #define DMC_EVENT_CFG(idx, val) ((val) << (((idx) * 8) + 1)) +/* bits[3:0] to select counters, are indexed from 8 to 15. */ +#define CCPI2_COUNTER_OFFSET 8 + #define L3C_COUNTER_CTL 0xA8 #define L3C_COUNTER_DATA 0xAC #define DMC_COUNTER_CTL 0x234 #define DMC_COUNTER_DATA 0x240 +#define CCPI2_PERF_CTL 0x108 +#define CCPI2_COUNTER_CTL 0x10C +#define CCPI2_COUNTER_SEL 0x12c +#define CCPI2_COUNTER_DATA_L 0x130 +#define CCPI2_COUNTER_DATA_H 0x134 + /* L3C event IDs */ #define L3_EVENT_READ_REQ 0xD #define L3_EVENT_WRITEBACK_REQ 0xE @@ -51,15 +64,28 @@ #define DMC_EVENT_READ_TXNS 0xF #define DMC_EVENT_MAX 0x10 +#define CCPI2_EVENT_REQ_PKT_SENT 0x3D +#define CCPI2_EVENT_SNOOP_PKT_SENT 0x65 +#define CCPI2_EVENT_DATA_PKT_SENT 0x105 +#define CCPI2_EVENT_GIC_PKT_SENT 0x12D +#define CCPI2_EVENT_MAX 0x200 + +#define CCPI2_PERF_CTL_ENABLE BIT(0) +#define CCPI2_PERF_CTL_START BIT(1) +#define CCPI2_PERF_CTL_RESET BIT(4) +#define CCPI2_EVENT_LEVEL_RISING_EDGE BIT(10) +#define CCPI2_EVENT_TYPE_EDGE_SENSITIVE BIT(11) + enum tx2_uncore_type { PMU_TYPE_L3C, PMU_TYPE_DMC, + PMU_TYPE_CCPI2, PMU_TYPE_INVALID, }; /* - * pmu on each socket has 2 uncore devices(dmc and l3c), - * each device has 4 counters. + * Each socket has 3 uncore devices associated with a PMU. The DMC and + * L3C have 4 32-bit counters and the CCPI2 has 8 64-bit counters. */ struct tx2_uncore_pmu { struct hlist_node hpnode; @@ -69,8 +95,10 @@ struct tx2_uncore_pmu { int node; int cpu; u32 max_counters; + u32 counters_mask; u32 prorate_factor; u32 max_events; + u32 events_mask; u64 hrtimer_interval; void __iomem *base; DECLARE_BITMAP(active_counters, TX2_PMU_MAX_COUNTERS); @@ -79,6 +107,7 @@ struct tx2_uncore_pmu { struct hrtimer hrtimer; const struct attribute_group **attr_groups; enum tx2_uncore_type type; + enum hrtimer_restart (*hrtimer_callback)(struct hrtimer *cb); void (*init_cntr_base)(struct perf_event *event, struct tx2_uncore_pmu *tx2_pmu); void (*stop_event)(struct perf_event *event); @@ -92,7 +121,21 @@ static inline struct tx2_uncore_pmu *pmu_to_tx2_pmu(struct pmu *pmu) return container_of(pmu, struct tx2_uncore_pmu, pmu); } -PMU_FORMAT_ATTR(event, "config:0-4"); +#define TX2_PMU_FORMAT_ATTR(_var, _name, _format) \ +static ssize_t \ +__tx2_pmu_##_var##_show(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ + \ +static struct device_attribute format_attr_##_var = \ + __ATTR(_name, 0444, __tx2_pmu_##_var##_show, NULL) + +TX2_PMU_FORMAT_ATTR(event, event, "config:0-4"); +TX2_PMU_FORMAT_ATTR(event_ccpi2, event, "config:0-9"); static struct attribute *l3c_pmu_format_attrs[] = { &format_attr_event.attr, @@ -104,6 +147,11 @@ static struct attribute *dmc_pmu_format_attrs[] = { NULL, }; +static struct attribute *ccpi2_pmu_format_attrs[] = { + &format_attr_event_ccpi2.attr, + NULL, +}; + static const struct attribute_group l3c_pmu_format_attr_group = { .name = "format", .attrs = l3c_pmu_format_attrs, @@ -114,6 +162,11 @@ static const struct attribute_group dmc_pmu_format_attr_group = { .attrs = dmc_pmu_format_attrs, }; +static const struct attribute_group ccpi2_pmu_format_attr_group = { + .name = "format", + .attrs = ccpi2_pmu_format_attrs, +}; + /* * sysfs event attributes */ @@ -164,6 +217,19 @@ static struct attribute *dmc_pmu_events_attrs[] = { NULL, }; +TX2_EVENT_ATTR(req_pktsent, CCPI2_EVENT_REQ_PKT_SENT); +TX2_EVENT_ATTR(snoop_pktsent, CCPI2_EVENT_SNOOP_PKT_SENT); +TX2_EVENT_ATTR(data_pktsent, CCPI2_EVENT_DATA_PKT_SENT); +TX2_EVENT_ATTR(gic_pktsent, CCPI2_EVENT_GIC_PKT_SENT); + +static struct attribute *ccpi2_pmu_events_attrs[] = { + &tx2_pmu_event_attr_req_pktsent.attr.attr, + &tx2_pmu_event_attr_snoop_pktsent.attr.attr, + &tx2_pmu_event_attr_data_pktsent.attr.attr, + &tx2_pmu_event_attr_gic_pktsent.attr.attr, + NULL, +}; + static const struct attribute_group l3c_pmu_events_attr_group = { .name = "events", .attrs = l3c_pmu_events_attrs, @@ -174,6 +240,11 @@ static const struct attribute_group dmc_pmu_events_attr_group = { .attrs = dmc_pmu_events_attrs, }; +static const struct attribute_group ccpi2_pmu_events_attr_group = { + .name = "events", + .attrs = ccpi2_pmu_events_attrs, +}; + /* * sysfs cpumask attributes */ @@ -213,6 +284,13 @@ static const struct attribute_group *dmc_pmu_attr_groups[] = { NULL }; +static const struct attribute_group *ccpi2_pmu_attr_groups[] = { + &ccpi2_pmu_format_attr_group, + &pmu_cpumask_attr_group, + &ccpi2_pmu_events_attr_group, + NULL +}; + static inline u32 reg_readl(unsigned long addr) { return readl((void __iomem *)addr); @@ -245,33 +323,58 @@ static void init_cntr_base_l3c(struct perf_event *event, struct tx2_uncore_pmu *tx2_pmu) { struct hw_perf_event *hwc = &event->hw; + u32 cmask; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + cmask = tx2_pmu->counters_mask; /* counter ctrl/data reg offset at 8 */ hwc->config_base = (unsigned long)tx2_pmu->base - + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event)); + + L3C_COUNTER_CTL + (8 * GET_COUNTERID(event, cmask)); hwc->event_base = (unsigned long)tx2_pmu->base - + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event)); + + L3C_COUNTER_DATA + (8 * GET_COUNTERID(event, cmask)); } static void init_cntr_base_dmc(struct perf_event *event, struct tx2_uncore_pmu *tx2_pmu) { struct hw_perf_event *hwc = &event->hw; + u32 cmask; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + cmask = tx2_pmu->counters_mask; hwc->config_base = (unsigned long)tx2_pmu->base + DMC_COUNTER_CTL; /* counter data reg offset at 0xc */ hwc->event_base = (unsigned long)tx2_pmu->base - + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event)); + + DMC_COUNTER_DATA + (0xc * GET_COUNTERID(event, cmask)); +} + +static void init_cntr_base_ccpi2(struct perf_event *event, + struct tx2_uncore_pmu *tx2_pmu) +{ + struct hw_perf_event *hwc = &event->hw; + u32 cmask; + + cmask = tx2_pmu->counters_mask; + + hwc->config_base = (unsigned long)tx2_pmu->base + + CCPI2_COUNTER_CTL + (4 * GET_COUNTERID(event, cmask)); + hwc->event_base = (unsigned long)tx2_pmu->base; } static void uncore_start_event_l3c(struct perf_event *event, int flags) { - u32 val; + u32 val, emask; struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + emask = tx2_pmu->events_mask; /* event id encoded in bits [07:03] */ - val = GET_EVENTID(event) << 3; + val = GET_EVENTID(event, emask) << 3; reg_writel(val, hwc->config_base); local64_set(&hwc->prev_count, 0); reg_writel(0, hwc->event_base); @@ -284,10 +387,17 @@ static inline void uncore_stop_event_l3c(struct perf_event *event) static void uncore_start_event_dmc(struct perf_event *event, int flags) { - u32 val; + u32 val, cmask, emask; struct hw_perf_event *hwc = &event->hw; - int idx = GET_COUNTERID(event); - int event_id = GET_EVENTID(event); + struct tx2_uncore_pmu *tx2_pmu; + int idx, event_id; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + cmask = tx2_pmu->counters_mask; + emask = tx2_pmu->events_mask; + + idx = GET_COUNTERID(event, cmask); + event_id = GET_EVENTID(event, emask); /* enable and start counters. * 8 bits for each counter, bits[05:01] of a counter to set event type. @@ -302,9 +412,14 @@ static void uncore_start_event_dmc(struct perf_event *event, int flags) static void uncore_stop_event_dmc(struct perf_event *event) { - u32 val; + u32 val, cmask; struct hw_perf_event *hwc = &event->hw; - int idx = GET_COUNTERID(event); + struct tx2_uncore_pmu *tx2_pmu; + int idx; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + cmask = tx2_pmu->counters_mask; + idx = GET_COUNTERID(event, cmask); /* clear event type(bits[05:01]) to stop counter */ val = reg_readl(hwc->config_base); @@ -312,27 +427,72 @@ static void uncore_stop_event_dmc(struct perf_event *event) reg_writel(val, hwc->config_base); } +static void uncore_start_event_ccpi2(struct perf_event *event, int flags) +{ + u32 emask; + struct hw_perf_event *hwc = &event->hw; + struct tx2_uncore_pmu *tx2_pmu; + + tx2_pmu = pmu_to_tx2_pmu(event->pmu); + emask = tx2_pmu->events_mask; + + /* Bit [09:00] to set event id. + * Bits [10], set level to rising edge. + * Bits [11], set type to edge sensitive. + */ + reg_writel((CCPI2_EVENT_TYPE_EDGE_SENSITIVE | + CCPI2_EVENT_LEVEL_RISING_EDGE | + GET_EVENTID(event, emask)), hwc->config_base); + + /* reset[4], enable[0] and start[1] counters */ + reg_writel(CCPI2_PERF_CTL_RESET | + CCPI2_PERF_CTL_START | + CCPI2_PERF_CTL_ENABLE, + hwc->event_base + CCPI2_PERF_CTL); + local64_set(&event->hw.prev_count, 0ULL); +} + +static void uncore_stop_event_ccpi2(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* disable and stop counter */ + reg_writel(0, hwc->event_base + CCPI2_PERF_CTL); +} + static void tx2_uncore_event_update(struct perf_event *event) { - s64 prev, delta, new = 0; + u64 prev, delta, new = 0; struct hw_perf_event *hwc = &event->hw; struct tx2_uncore_pmu *tx2_pmu; enum tx2_uncore_type type; u32 prorate_factor; + u32 cmask, emask; tx2_pmu = pmu_to_tx2_pmu(event->pmu); type = tx2_pmu->type; + cmask = tx2_pmu->counters_mask; + emask = tx2_pmu->events_mask; prorate_factor = tx2_pmu->prorate_factor; - - new = reg_readl(hwc->event_base); - prev = local64_xchg(&hwc->prev_count, new); - - /* handles rollover of 32 bit counter */ - delta = (u32)(((1UL << 32) - prev) + new); + if (type == PMU_TYPE_CCPI2) { + reg_writel(CCPI2_COUNTER_OFFSET + + GET_COUNTERID(event, cmask), + hwc->event_base + CCPI2_COUNTER_SEL); + new = reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_H); + new = (new << 32) + + reg_readl(hwc->event_base + CCPI2_COUNTER_DATA_L); + prev = local64_xchg(&hwc->prev_count, new); + delta = new - prev; + } else { + new = reg_readl(hwc->event_base); + prev = local64_xchg(&hwc->prev_count, new); + /* handles rollover of 32 bit counter */ + delta = (u32)(((1UL << 32) - prev) + new); + } /* DMC event data_transfers granularity is 16 Bytes, convert it to 64 */ if (type == PMU_TYPE_DMC && - GET_EVENTID(event) == DMC_EVENT_DATA_TRANSFERS) + GET_EVENTID(event, emask) == DMC_EVENT_DATA_TRANSFERS) delta = delta/4; /* L3C and DMC has 16 and 8 interleave channels respectively. @@ -351,6 +511,7 @@ static enum tx2_uncore_type get_tx2_pmu_type(struct acpi_device *adev) } devices[] = { {"CAV901D", PMU_TYPE_L3C}, {"CAV901F", PMU_TYPE_DMC}, + {"CAV901E", PMU_TYPE_CCPI2}, {"", PMU_TYPE_INVALID} }; @@ -380,7 +541,8 @@ static bool tx2_uncore_validate_event(struct pmu *pmu, * Make sure the group of events can be scheduled at once * on the PMU. */ -static bool tx2_uncore_validate_event_group(struct perf_event *event) +static bool tx2_uncore_validate_event_group(struct perf_event *event, + int max_counters) { struct perf_event *sibling, *leader = event->group_leader; int counters = 0; @@ -403,7 +565,7 @@ static bool tx2_uncore_validate_event_group(struct perf_event *event) * If the group requires more counters than the HW has, * it cannot ever be scheduled. */ - return counters <= TX2_PMU_MAX_COUNTERS; + return counters <= max_counters; } @@ -439,7 +601,7 @@ static int tx2_uncore_event_init(struct perf_event *event) hwc->config = event->attr.config; /* Validate the group */ - if (!tx2_uncore_validate_event_group(event)) + if (!tx2_uncore_validate_event_group(event, tx2_pmu->max_counters)) return -EINVAL; return 0; @@ -456,6 +618,10 @@ static void tx2_uncore_event_start(struct perf_event *event, int flags) tx2_pmu->start_event(event, flags); perf_event_update_userpage(event); + /* No hrtimer needed for CCPI2, 64-bit counters */ + if (!tx2_pmu->hrtimer_callback) + return; + /* Start timer for first event */ if (bitmap_weight(tx2_pmu->active_counters, tx2_pmu->max_counters) == 1) { @@ -510,15 +676,23 @@ static void tx2_uncore_event_del(struct perf_event *event, int flags) { struct tx2_uncore_pmu *tx2_pmu = pmu_to_tx2_pmu(event->pmu); struct hw_perf_event *hwc = &event->hw; + u32 cmask; + cmask = tx2_pmu->counters_mask; tx2_uncore_event_stop(event, PERF_EF_UPDATE); /* clear the assigned counter */ - free_counter(tx2_pmu, GET_COUNTERID(event)); + free_counter(tx2_pmu, GET_COUNTERID(event, cmask)); perf_event_update_userpage(event); tx2_pmu->events[hwc->idx] = NULL; hwc->idx = -1; + + if (!tx2_pmu->hrtimer_callback) + return; + + if (bitmap_empty(tx2_pmu->active_counters, tx2_pmu->max_counters)) + hrtimer_cancel(&tx2_pmu->hrtimer); } static void tx2_uncore_event_read(struct perf_event *event) @@ -580,8 +754,12 @@ static int tx2_uncore_pmu_add_dev(struct tx2_uncore_pmu *tx2_pmu) cpu_online_mask); tx2_pmu->cpu = cpu; - hrtimer_init(&tx2_pmu->hrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - tx2_pmu->hrtimer.function = tx2_hrtimer_callback; + + if (tx2_pmu->hrtimer_callback) { + hrtimer_init(&tx2_pmu->hrtimer, + CLOCK_MONOTONIC, HRTIMER_MODE_REL); + tx2_pmu->hrtimer.function = tx2_pmu->hrtimer_callback; + } ret = tx2_uncore_pmu_register(tx2_pmu); if (ret) { @@ -653,10 +831,13 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, switch (tx2_pmu->type) { case PMU_TYPE_L3C: - tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS; + tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS; + tx2_pmu->counters_mask = 0x3; tx2_pmu->prorate_factor = TX2_PMU_L3_TILES; tx2_pmu->max_events = L3_EVENT_MAX; + tx2_pmu->events_mask = 0x1f; tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; + tx2_pmu->hrtimer_callback = tx2_hrtimer_callback; tx2_pmu->attr_groups = l3c_pmu_attr_groups; tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, "uncore_l3c_%d", tx2_pmu->node); @@ -665,10 +846,13 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, tx2_pmu->stop_event = uncore_stop_event_l3c; break; case PMU_TYPE_DMC: - tx2_pmu->max_counters = TX2_PMU_MAX_COUNTERS; + tx2_pmu->max_counters = TX2_PMU_DMC_L3C_MAX_COUNTERS; + tx2_pmu->counters_mask = 0x3; tx2_pmu->prorate_factor = TX2_PMU_DMC_CHANNELS; tx2_pmu->max_events = DMC_EVENT_MAX; + tx2_pmu->events_mask = 0x1f; tx2_pmu->hrtimer_interval = TX2_PMU_HRTIMER_INTERVAL; + tx2_pmu->hrtimer_callback = tx2_hrtimer_callback; tx2_pmu->attr_groups = dmc_pmu_attr_groups; tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, "uncore_dmc_%d", tx2_pmu->node); @@ -676,6 +860,21 @@ static struct tx2_uncore_pmu *tx2_uncore_pmu_init_dev(struct device *dev, tx2_pmu->start_event = uncore_start_event_dmc; tx2_pmu->stop_event = uncore_stop_event_dmc; break; + case PMU_TYPE_CCPI2: + /* CCPI2 has 8 counters */ + tx2_pmu->max_counters = TX2_PMU_CCPI2_MAX_COUNTERS; + tx2_pmu->counters_mask = 0x7; + tx2_pmu->prorate_factor = 1; + tx2_pmu->max_events = CCPI2_EVENT_MAX; + tx2_pmu->events_mask = 0x1ff; + tx2_pmu->attr_groups = ccpi2_pmu_attr_groups; + tx2_pmu->name = devm_kasprintf(dev, GFP_KERNEL, + "uncore_ccpi2_%d", tx2_pmu->node); + tx2_pmu->init_cntr_base = init_cntr_base_ccpi2; + tx2_pmu->start_event = uncore_start_event_ccpi2; + tx2_pmu->stop_event = uncore_stop_event_ccpi2; + tx2_pmu->hrtimer_callback = NULL; + break; case PMU_TYPE_INVALID: devm_kfree(dev, tx2_pmu); return NULL; @@ -744,7 +943,9 @@ static int tx2_uncore_pmu_offline_cpu(unsigned int cpu, if (cpu != tx2_pmu->cpu) return 0; - hrtimer_cancel(&tx2_pmu->hrtimer); + if (tx2_pmu->hrtimer_callback) + hrtimer_cancel(&tx2_pmu->hrtimer); + cpumask_copy(&cpu_online_mask_temp, cpu_online_mask); cpumask_clear_cpu(cpu, &cpu_online_mask_temp); new_cpu = cpumask_any_and( From e44ec4a35dbdf3f3fe772f176fab3b8be7e02b0f Mon Sep 17 00:00:00 2001 From: Xiang Zheng Date: Tue, 29 Oct 2019 20:41:31 +0800 Subject: [PATCH 53/93] arm64: print additional fault message when executing non-exec memory When attempting to executing non-executable memory, the fault message shows: Unable to handle kernel read from unreadable memory at virtual address ffff802dac469000 This may confuse someone, so add a new fault message for instruction abort. Acked-by: Will Deacon Signed-off-by: Xiang Zheng Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 855f2a7954e6..d46a2bb90f54 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -314,6 +314,8 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (is_el1_permission_fault(addr, esr, regs)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; + else if (is_el1_instruction_abort(esr)) + msg = "execute from non-executable memory"; else msg = "read from unreadable memory"; } else if (addr < PAGE_SIZE) { From 8b5369ea580964dbc982781bfb9fb93459fc5e8d Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Mon, 14 Oct 2019 20:31:03 +0200 Subject: [PATCH 54/93] dma/direct: turn ARCH_ZONE_DMA_BITS into a variable Some architectures, notably ARM, are interested in tweaking this depending on their runtime DMA addressing limitations. Acked-by: Christoph Hellwig Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/page.h | 2 -- arch/arm64/mm/init.c | 9 +++++++-- arch/powerpc/include/asm/page.h | 9 --------- arch/powerpc/mm/mem.c | 20 +++++++++++++++----- arch/s390/include/asm/page.h | 2 -- arch/s390/mm/init.c | 1 + include/linux/dma-direct.h | 2 ++ kernel/dma/direct.c | 13 ++++++------- 8 files changed, 31 insertions(+), 27 deletions(-) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 7b8c98830101..d39ddb258a04 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -38,6 +38,4 @@ extern int pfn_valid(unsigned long); #include -#define ARCH_ZONE_DMA_BITS 30 - #endif diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 39fc69873b18..35f27b839101 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -41,6 +42,8 @@ #include #include +#define ARM64_ZONE_DMA_BITS 30 + /* * We need to be able to catch inadvertent references to memstart_addr * that occur (potentially in generic code) before arm64_memblock_init() @@ -440,8 +443,10 @@ void __init arm64_memblock_init(void) early_init_fdt_scan_reserved_mem(); - if (IS_ENABLED(CONFIG_ZONE_DMA)) - arm64_dma_phys_limit = max_zone_phys(ARCH_ZONE_DMA_BITS); + if (IS_ENABLED(CONFIG_ZONE_DMA)) { + zone_dma_bits = ARM64_ZONE_DMA_BITS; + arm64_dma_phys_limit = max_zone_phys(ARM64_ZONE_DMA_BITS); + } if (IS_ENABLED(CONFIG_ZONE_DMA32)) arm64_dma32_phys_limit = max_zone_phys(32); diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h index c8bb14ff4713..f6c562acc3f8 100644 --- a/arch/powerpc/include/asm/page.h +++ b/arch/powerpc/include/asm/page.h @@ -329,13 +329,4 @@ struct vm_area_struct; #endif /* __ASSEMBLY__ */ #include -/* - * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. - */ -#ifdef CONFIG_PPC32 -#define ARCH_ZONE_DMA_BITS 30 -#else -#define ARCH_ZONE_DMA_BITS 31 -#endif - #endif /* _ASM_POWERPC_PAGE_H */ diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index be941d382c8d..c95b7fe9f298 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include @@ -201,10 +202,10 @@ static int __init mark_nonram_nosave(void) * everything else. GFP_DMA32 page allocations automatically fall back to * ZONE_DMA. * - * By using 31-bit unconditionally, we can exploit ARCH_ZONE_DMA_BITS to - * inform the generic DMA mapping code. 32-bit only devices (if not handled - * by an IOMMU anyway) will take a first dip into ZONE_NORMAL and get - * otherwise served by ZONE_DMA. + * By using 31-bit unconditionally, we can exploit zone_dma_bits to inform the + * generic DMA mapping code. 32-bit only devices (if not handled by an IOMMU + * anyway) will take a first dip into ZONE_NORMAL and get otherwise served by + * ZONE_DMA. */ static unsigned long max_zone_pfns[MAX_NR_ZONES]; @@ -237,9 +238,18 @@ void __init paging_init(void) printk(KERN_DEBUG "Memory hole size: %ldMB\n", (long int)((top_of_ram - total_ram) >> 20)); + /* + * Allow 30-bit DMA for very limited Broadcom wifi chips on many + * powerbooks. + */ + if (IS_ENABLED(CONFIG_PPC32)) + zone_dma_bits = 30; + else + zone_dma_bits = 31; + #ifdef CONFIG_ZONE_DMA max_zone_pfns[ZONE_DMA] = min(max_low_pfn, - 1UL << (ARCH_ZONE_DMA_BITS - PAGE_SHIFT)); + 1UL << (zone_dma_bits - PAGE_SHIFT)); #endif max_zone_pfns[ZONE_NORMAL] = max_low_pfn; #ifdef CONFIG_HIGHMEM diff --git a/arch/s390/include/asm/page.h b/arch/s390/include/asm/page.h index 823578c6b9e2..a4d38092530a 100644 --- a/arch/s390/include/asm/page.h +++ b/arch/s390/include/asm/page.h @@ -177,8 +177,6 @@ static inline int devmem_is_allowed(unsigned long pfn) #define VM_DATA_DEFAULT_FLAGS (VM_READ | VM_WRITE | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define ARCH_ZONE_DMA_BITS 31 - #include #include diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c index a124f19f7b3c..f0ce22220565 100644 --- a/arch/s390/mm/init.c +++ b/arch/s390/mm/init.c @@ -118,6 +118,7 @@ void __init paging_init(void) sparse_memory_present_with_active_regions(MAX_NUMNODES); sparse_init(); + zone_dma_bits = 31; memset(max_zone_pfns, 0, sizeof(max_zone_pfns)); max_zone_pfns[ZONE_DMA] = PFN_DOWN(MAX_DMA_ADDRESS); max_zone_pfns[ZONE_NORMAL] = max_low_pfn; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index adf993a3bd58..d03af3605460 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -5,6 +5,8 @@ #include #include +extern unsigned int zone_dma_bits; + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 8402b29c280f..0b67c04e531b 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -16,12 +16,11 @@ #include /* - * Most architectures use ZONE_DMA for the first 16 Megabytes, but - * some use it for entirely different regions: + * Most architectures use ZONE_DMA for the first 16 Megabytes, but some use it + * it for entirely different regions. In that case the arch code needs to + * override the variable below for dma-direct to work properly. */ -#ifndef ARCH_ZONE_DMA_BITS -#define ARCH_ZONE_DMA_BITS 24 -#endif +unsigned int zone_dma_bits __ro_after_init = 24; static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) { @@ -69,7 +68,7 @@ static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding * zones. */ - if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) + if (*phys_mask <= DMA_BIT_MASK(zone_dma_bits)) return GFP_DMA; if (*phys_mask <= DMA_BIT_MASK(32)) return GFP_DMA32; @@ -395,7 +394,7 @@ int dma_direct_supported(struct device *dev, u64 mask) u64 min_mask; if (IS_ENABLED(CONFIG_ZONE_DMA)) - min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); + min_mask = DMA_BIT_MASK(zone_dma_bits); else min_mask = DMA_BIT_MASK(32); From 9ef8567ccf2eb00473b1280d0911caf3f413dc67 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Wed, 30 Oct 2019 11:46:17 +0800 Subject: [PATCH 55/93] arm64: perf: Simplify the ARMv8 PMUv3 event attributes For each PMU event, there is a ARMV8_EVENT_ATTR(xx, XX) and &armv8_event_attr_xx.attr.attr. Let's redefine the ARMV8_EVENT_ATTR to simplify the armv8_pmuv3_event_attrs. Cc: Will Deacon Cc: Mark Rutland Signed-off-by: Shaokun Zhang [will: Dropped unnecessary array syntax] Signed-off-by: Will Deacon --- arch/arm64/kernel/perf_event.c | 191 ++++++++++++--------------------- 1 file changed, 66 insertions(+), 125 deletions(-) diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index a0b4f1bca491..e40b65645c86 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -158,133 +158,74 @@ armv8pmu_events_sysfs_show(struct device *dev, return sprintf(page, "event=0x%03llx\n", pmu_attr->id); } -#define ARMV8_EVENT_ATTR(name, config) \ - PMU_EVENT_ATTR(name, armv8_event_attr_##name, \ - config, armv8pmu_events_sysfs_show) - -ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR); -ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL); -ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL); -ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE); -ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL); -ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED); -ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED); -ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED); -ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN); -ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN); -ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED); -ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED); -ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED); -ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED); -ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED); -ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED); -ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES); -ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED); -ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS); -ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE); -ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB); -ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE); -ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB); -ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS); -ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR); -ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC); -ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED); -ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES); -/* Don't expose the chain event in /sys, since it's useless in isolation */ -ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED); -ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED); -ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND); -ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND); -ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB); -ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB); -ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE); -ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL); -ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE); -ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL); -ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE); -ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB); -ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL); -ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL); -ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB); -ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB); -ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS); -ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE); -ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS); -ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK); -ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK); -ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD); -ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD); -ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD); -ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP); -ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED); -ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE); -ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION); +#define ARMV8_EVENT_ATTR(name, config) \ + (&((struct perf_pmu_events_attr) { \ + .attr = __ATTR(name, 0444, armv8pmu_events_sysfs_show, NULL), \ + .id = config, \ + }).attr.attr) static struct attribute *armv8_pmuv3_event_attrs[] = { - &armv8_event_attr_sw_incr.attr.attr, - &armv8_event_attr_l1i_cache_refill.attr.attr, - &armv8_event_attr_l1i_tlb_refill.attr.attr, - &armv8_event_attr_l1d_cache_refill.attr.attr, - &armv8_event_attr_l1d_cache.attr.attr, - &armv8_event_attr_l1d_tlb_refill.attr.attr, - &armv8_event_attr_ld_retired.attr.attr, - &armv8_event_attr_st_retired.attr.attr, - &armv8_event_attr_inst_retired.attr.attr, - &armv8_event_attr_exc_taken.attr.attr, - &armv8_event_attr_exc_return.attr.attr, - &armv8_event_attr_cid_write_retired.attr.attr, - &armv8_event_attr_pc_write_retired.attr.attr, - &armv8_event_attr_br_immed_retired.attr.attr, - &armv8_event_attr_br_return_retired.attr.attr, - &armv8_event_attr_unaligned_ldst_retired.attr.attr, - &armv8_event_attr_br_mis_pred.attr.attr, - &armv8_event_attr_cpu_cycles.attr.attr, - &armv8_event_attr_br_pred.attr.attr, - &armv8_event_attr_mem_access.attr.attr, - &armv8_event_attr_l1i_cache.attr.attr, - &armv8_event_attr_l1d_cache_wb.attr.attr, - &armv8_event_attr_l2d_cache.attr.attr, - &armv8_event_attr_l2d_cache_refill.attr.attr, - &armv8_event_attr_l2d_cache_wb.attr.attr, - &armv8_event_attr_bus_access.attr.attr, - &armv8_event_attr_memory_error.attr.attr, - &armv8_event_attr_inst_spec.attr.attr, - &armv8_event_attr_ttbr_write_retired.attr.attr, - &armv8_event_attr_bus_cycles.attr.attr, - &armv8_event_attr_l1d_cache_allocate.attr.attr, - &armv8_event_attr_l2d_cache_allocate.attr.attr, - &armv8_event_attr_br_retired.attr.attr, - &armv8_event_attr_br_mis_pred_retired.attr.attr, - &armv8_event_attr_stall_frontend.attr.attr, - &armv8_event_attr_stall_backend.attr.attr, - &armv8_event_attr_l1d_tlb.attr.attr, - &armv8_event_attr_l1i_tlb.attr.attr, - &armv8_event_attr_l2i_cache.attr.attr, - &armv8_event_attr_l2i_cache_refill.attr.attr, - &armv8_event_attr_l3d_cache_allocate.attr.attr, - &armv8_event_attr_l3d_cache_refill.attr.attr, - &armv8_event_attr_l3d_cache.attr.attr, - &armv8_event_attr_l3d_cache_wb.attr.attr, - &armv8_event_attr_l2d_tlb_refill.attr.attr, - &armv8_event_attr_l2i_tlb_refill.attr.attr, - &armv8_event_attr_l2d_tlb.attr.attr, - &armv8_event_attr_l2i_tlb.attr.attr, - &armv8_event_attr_remote_access.attr.attr, - &armv8_event_attr_ll_cache.attr.attr, - &armv8_event_attr_ll_cache_miss.attr.attr, - &armv8_event_attr_dtlb_walk.attr.attr, - &armv8_event_attr_itlb_walk.attr.attr, - &armv8_event_attr_ll_cache_rd.attr.attr, - &armv8_event_attr_ll_cache_miss_rd.attr.attr, - &armv8_event_attr_remote_access_rd.attr.attr, - &armv8_event_attr_sample_pop.attr.attr, - &armv8_event_attr_sample_feed.attr.attr, - &armv8_event_attr_sample_filtrate.attr.attr, - &armv8_event_attr_sample_collision.attr.attr, + ARMV8_EVENT_ATTR(sw_incr, ARMV8_PMUV3_PERFCTR_SW_INCR), + ARMV8_EVENT_ATTR(l1i_cache_refill, ARMV8_PMUV3_PERFCTR_L1I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1i_tlb_refill, ARMV8_PMUV3_PERFCTR_L1I_TLB_REFILL), + ARMV8_EVENT_ATTR(l1d_cache_refill, ARMV8_PMUV3_PERFCTR_L1D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l1d_cache, ARMV8_PMUV3_PERFCTR_L1D_CACHE), + ARMV8_EVENT_ATTR(l1d_tlb_refill, ARMV8_PMUV3_PERFCTR_L1D_TLB_REFILL), + ARMV8_EVENT_ATTR(ld_retired, ARMV8_PMUV3_PERFCTR_LD_RETIRED), + ARMV8_EVENT_ATTR(st_retired, ARMV8_PMUV3_PERFCTR_ST_RETIRED), + ARMV8_EVENT_ATTR(inst_retired, ARMV8_PMUV3_PERFCTR_INST_RETIRED), + ARMV8_EVENT_ATTR(exc_taken, ARMV8_PMUV3_PERFCTR_EXC_TAKEN), + ARMV8_EVENT_ATTR(exc_return, ARMV8_PMUV3_PERFCTR_EXC_RETURN), + ARMV8_EVENT_ATTR(cid_write_retired, ARMV8_PMUV3_PERFCTR_CID_WRITE_RETIRED), + ARMV8_EVENT_ATTR(pc_write_retired, ARMV8_PMUV3_PERFCTR_PC_WRITE_RETIRED), + ARMV8_EVENT_ATTR(br_immed_retired, ARMV8_PMUV3_PERFCTR_BR_IMMED_RETIRED), + ARMV8_EVENT_ATTR(br_return_retired, ARMV8_PMUV3_PERFCTR_BR_RETURN_RETIRED), + ARMV8_EVENT_ATTR(unaligned_ldst_retired, ARMV8_PMUV3_PERFCTR_UNALIGNED_LDST_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED), + ARMV8_EVENT_ATTR(cpu_cycles, ARMV8_PMUV3_PERFCTR_CPU_CYCLES), + ARMV8_EVENT_ATTR(br_pred, ARMV8_PMUV3_PERFCTR_BR_PRED), + ARMV8_EVENT_ATTR(mem_access, ARMV8_PMUV3_PERFCTR_MEM_ACCESS), + ARMV8_EVENT_ATTR(l1i_cache, ARMV8_PMUV3_PERFCTR_L1I_CACHE), + ARMV8_EVENT_ATTR(l1d_cache_wb, ARMV8_PMUV3_PERFCTR_L1D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_cache, ARMV8_PMUV3_PERFCTR_L2D_CACHE), + ARMV8_EVENT_ATTR(l2d_cache_refill, ARMV8_PMUV3_PERFCTR_L2D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l2d_cache_wb, ARMV8_PMUV3_PERFCTR_L2D_CACHE_WB), + ARMV8_EVENT_ATTR(bus_access, ARMV8_PMUV3_PERFCTR_BUS_ACCESS), + ARMV8_EVENT_ATTR(memory_error, ARMV8_PMUV3_PERFCTR_MEMORY_ERROR), + ARMV8_EVENT_ATTR(inst_spec, ARMV8_PMUV3_PERFCTR_INST_SPEC), + ARMV8_EVENT_ATTR(ttbr_write_retired, ARMV8_PMUV3_PERFCTR_TTBR_WRITE_RETIRED), + ARMV8_EVENT_ATTR(bus_cycles, ARMV8_PMUV3_PERFCTR_BUS_CYCLES), + /* Don't expose the chain event in /sys, since it's useless in isolation */ + ARMV8_EVENT_ATTR(l1d_cache_allocate, ARMV8_PMUV3_PERFCTR_L1D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l2d_cache_allocate, ARMV8_PMUV3_PERFCTR_L2D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(br_retired, ARMV8_PMUV3_PERFCTR_BR_RETIRED), + ARMV8_EVENT_ATTR(br_mis_pred_retired, ARMV8_PMUV3_PERFCTR_BR_MIS_PRED_RETIRED), + ARMV8_EVENT_ATTR(stall_frontend, ARMV8_PMUV3_PERFCTR_STALL_FRONTEND), + ARMV8_EVENT_ATTR(stall_backend, ARMV8_PMUV3_PERFCTR_STALL_BACKEND), + ARMV8_EVENT_ATTR(l1d_tlb, ARMV8_PMUV3_PERFCTR_L1D_TLB), + ARMV8_EVENT_ATTR(l1i_tlb, ARMV8_PMUV3_PERFCTR_L1I_TLB), + ARMV8_EVENT_ATTR(l2i_cache, ARMV8_PMUV3_PERFCTR_L2I_CACHE), + ARMV8_EVENT_ATTR(l2i_cache_refill, ARMV8_PMUV3_PERFCTR_L2I_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache_allocate, ARMV8_PMUV3_PERFCTR_L3D_CACHE_ALLOCATE), + ARMV8_EVENT_ATTR(l3d_cache_refill, ARMV8_PMUV3_PERFCTR_L3D_CACHE_REFILL), + ARMV8_EVENT_ATTR(l3d_cache, ARMV8_PMUV3_PERFCTR_L3D_CACHE), + ARMV8_EVENT_ATTR(l3d_cache_wb, ARMV8_PMUV3_PERFCTR_L3D_CACHE_WB), + ARMV8_EVENT_ATTR(l2d_tlb_refill, ARMV8_PMUV3_PERFCTR_L2D_TLB_REFILL), + ARMV8_EVENT_ATTR(l2i_tlb_refill, ARMV8_PMUV3_PERFCTR_L2I_TLB_REFILL), + ARMV8_EVENT_ATTR(l2d_tlb, ARMV8_PMUV3_PERFCTR_L2D_TLB), + ARMV8_EVENT_ATTR(l2i_tlb, ARMV8_PMUV3_PERFCTR_L2I_TLB), + ARMV8_EVENT_ATTR(remote_access, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS), + ARMV8_EVENT_ATTR(ll_cache, ARMV8_PMUV3_PERFCTR_LL_CACHE), + ARMV8_EVENT_ATTR(ll_cache_miss, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS), + ARMV8_EVENT_ATTR(dtlb_walk, ARMV8_PMUV3_PERFCTR_DTLB_WALK), + ARMV8_EVENT_ATTR(itlb_walk, ARMV8_PMUV3_PERFCTR_ITLB_WALK), + ARMV8_EVENT_ATTR(ll_cache_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_RD), + ARMV8_EVENT_ATTR(ll_cache_miss_rd, ARMV8_PMUV3_PERFCTR_LL_CACHE_MISS_RD), + ARMV8_EVENT_ATTR(remote_access_rd, ARMV8_PMUV3_PERFCTR_REMOTE_ACCESS_RD), + ARMV8_EVENT_ATTR(sample_pop, ARMV8_SPE_PERFCTR_SAMPLE_POP), + ARMV8_EVENT_ATTR(sample_feed, ARMV8_SPE_PERFCTR_SAMPLE_FEED), + ARMV8_EVENT_ATTR(sample_filtrate, ARMV8_SPE_PERFCTR_SAMPLE_FILTRATE), + ARMV8_EVENT_ATTR(sample_collision, ARMV8_SPE_PERFCTR_SAMPLE_COLLISION), NULL, }; From 478016c3839d53bd4c89af1f095195be543fa1a3 Mon Sep 17 00:00:00 2001 From: Julien Grall Date: Fri, 1 Nov 2019 15:20:22 +0000 Subject: [PATCH 56/93] docs/arm64: cpu-feature-registers: Rewrite bitfields that don't follow [e, s] Commit "docs/arm64: cpu-feature-registers: Documents missing visible fields" added bitfields following the convention [s, e]. However, the documentation is following [s, e] and so does the Arm ARM. Rewrite the bitfields to match the format [s, e]. Fixes: a8613e7070e7 ("docs/arm64: cpu-feature-registers: Documents missing visible fields") Signed-off-by: Julien Grall Signed-off-by: Catalin Marinas --- Documentation/arm64/cpu-feature-registers.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Documentation/arm64/cpu-feature-registers.rst b/Documentation/arm64/cpu-feature-registers.rst index ffcf4e2c71ef..7c40e4581bae 100644 --- a/Documentation/arm64/cpu-feature-registers.rst +++ b/Documentation/arm64/cpu-feature-registers.rst @@ -193,9 +193,9 @@ infrastructure: +------------------------------+---------+---------+ | Name | bits | visible | +------------------------------+---------+---------+ - | SB | [36-39] | y | + | SB | [39-36] | y | +------------------------------+---------+---------+ - | FRINTTS | [32-35] | y | + | FRINTTS | [35-32] | y | +------------------------------+---------+---------+ | GPI | [31-28] | y | +------------------------------+---------+---------+ From 76d835fcd429615b61fccbe65de4f53360a89ca7 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 1 Nov 2019 08:36:10 +0000 Subject: [PATCH 57/93] docs/perf: Add explanation for DDR_CAP_AXI_ID_FILTER_ENHANCED quirk Add explanation for DDR_CAP_AXI_ID_FILTER_ENHANCED quirk. Signed-off-by: Joakim Zhang [will: Simplified wording] Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/imx-ddr.rst | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst index 517a205abad6..66988b481217 100644 --- a/Documentation/admin-guide/perf/imx-ddr.rst +++ b/Documentation/admin-guide/perf/imx-ddr.rst @@ -50,3 +50,8 @@ in the driver. axi_id to monitor a specific id, rather than having to specify axi_mask. e.g.:: perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12 + +* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk. + This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits + counting the number of bytes (as opposed to the number of bursts) from DDR + read and write transactions concurrently with another set of data counters. From 1178addaca66292b309b3cbbdc12421cef4894b2 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 1 Nov 2019 08:36:13 +0000 Subject: [PATCH 58/93] bindings: perf: imx-ddr: Add new compatible string Add new compatible string for i.MX8MPlus DDR PMU core. Signed-off-by: Joakim Zhang Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt index d77e3f26f9e6..7822a806ea0a 100644 --- a/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt +++ b/Documentation/devicetree/bindings/perf/fsl-imx-ddr.txt @@ -5,6 +5,7 @@ Required properties: - compatible: should be one of: "fsl,imx8-ddr-pmu" "fsl,imx8m-ddr-pmu" + "fsl,imx8mp-ddr-pmu" - reg: physical address and size From 44f8bd014a94ed679ddb77d0b92350d4ac4f23a5 Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 1 Nov 2019 08:36:16 +0000 Subject: [PATCH 59/93] perf/imx_ddr: Add enhanced AXI ID filter support With DDR_CAP_AXI_ID_FILTER quirk, indicating HW supports AXI ID filter which only can get bursts from DDR transaction, i.e. DDR read/write requests. This patch add DDR_CAP_AXI_ID_ENHANCED_FILTER quirk, indicating HW supports AXI ID filter which can get bursts and bytes from DDR transaction at the same time. We hope PMU always return bytes in the driver due to it is more meaningful for users. Signed-off-by: Joakim Zhang Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 63 +++++++++++++++++++++----------- 1 file changed, 42 insertions(+), 21 deletions(-) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index ce7345745b42..2a3966d059e7 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -45,7 +45,8 @@ static DEFINE_IDA(ddr_ida); /* DDR Perf hardware feature */ -#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */ +#define DDR_CAP_AXI_ID_FILTER 0x1 /* support AXI ID filter */ +#define DDR_CAP_AXI_ID_FILTER_ENHANCED 0x3 /* support enhanced AXI ID filter */ struct fsl_ddr_devtype_data { unsigned int quirks; /* quirks needed for different DDR Perf core */ @@ -178,6 +179,36 @@ static const struct attribute_group *attr_groups[] = { NULL, }; +static bool ddr_perf_is_filtered(struct perf_event *event) +{ + return event->attr.config == 0x41 || event->attr.config == 0x42; +} + +static u32 ddr_perf_filter_val(struct perf_event *event) +{ + return event->attr.config1; +} + +static bool ddr_perf_filters_compatible(struct perf_event *a, + struct perf_event *b) +{ + if (!ddr_perf_is_filtered(a)) + return true; + if (!ddr_perf_is_filtered(b)) + return true; + return ddr_perf_filter_val(a) == ddr_perf_filter_val(b); +} + +static bool ddr_perf_is_enhanced_filtered(struct perf_event *event) +{ + unsigned int filt; + struct ddr_pmu *pmu = to_ddr_pmu(event->pmu); + + filt = pmu->devtype_data->quirks & DDR_CAP_AXI_ID_FILTER_ENHANCED; + return (filt == DDR_CAP_AXI_ID_FILTER_ENHANCED) && + ddr_perf_is_filtered(event); +} + static u32 ddr_perf_alloc_counter(struct ddr_pmu *pmu, int event) { int i; @@ -209,27 +240,17 @@ static void ddr_perf_free_counter(struct ddr_pmu *pmu, int counter) static u32 ddr_perf_read_counter(struct ddr_pmu *pmu, int counter) { - return readl_relaxed(pmu->base + COUNTER_READ + counter * 4); -} + struct perf_event *event = pmu->events[counter]; + void __iomem *base = pmu->base; -static bool ddr_perf_is_filtered(struct perf_event *event) -{ - return event->attr.config == 0x41 || event->attr.config == 0x42; -} - -static u32 ddr_perf_filter_val(struct perf_event *event) -{ - return event->attr.config1; -} - -static bool ddr_perf_filters_compatible(struct perf_event *a, - struct perf_event *b) -{ - if (!ddr_perf_is_filtered(a)) - return true; - if (!ddr_perf_is_filtered(b)) - return true; - return ddr_perf_filter_val(a) == ddr_perf_filter_val(b); + /* + * return bytes instead of bursts from ddr transaction for + * axid-read and axid-write event if PMU core supports enhanced + * filter. + */ + base += ddr_perf_is_enhanced_filtered(event) ? COUNTER_DPCR1 : + COUNTER_READ; + return readl_relaxed(base + counter * 4); } static int ddr_perf_event_init(struct perf_event *event) From d3eeece9a8ab87f98fa60664fd1ce817661f39cb Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Fri, 1 Nov 2019 08:36:20 +0000 Subject: [PATCH 60/93] perf/imx_ddr: Add driver for DDR PMU in i.MX8MPlus Add driver for DDR PMU in i.MX8MPlus. Signed-off-by: Joakim Zhang Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 2a3966d059e7..3bbf806209a6 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -58,9 +58,14 @@ static const struct fsl_ddr_devtype_data imx8m_devtype_data = { .quirks = DDR_CAP_AXI_ID_FILTER, }; +static const struct fsl_ddr_devtype_data imx8mp_devtype_data = { + .quirks = DDR_CAP_AXI_ID_FILTER_ENHANCED, +}; + static const struct of_device_id imx_ddr_pmu_dt_ids[] = { { .compatible = "fsl,imx8-ddr-pmu", .data = &imx8_devtype_data}, { .compatible = "fsl,imx8m-ddr-pmu", .data = &imx8m_devtype_data}, + { .compatible = "fsl,imx8mp-ddr-pmu", .data = &imx8mp_devtype_data}, { /* sentinel */ } }; MODULE_DEVICE_TABLE(of, imx_ddr_pmu_dt_ids); From ed0207a33adda11db88c9c3b6066f0c6051119fe Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 4 Nov 2019 07:09:20 +0000 Subject: [PATCH 61/93] docs/perf: Add AXI ID filter capabilities information Add capabilities information for AXI ID filter. Signed-off-by: Joakim Zhang Signed-off-by: Will Deacon --- Documentation/admin-guide/perf/imx-ddr.rst | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Documentation/admin-guide/perf/imx-ddr.rst b/Documentation/admin-guide/perf/imx-ddr.rst index 66988b481217..90056e4e8859 100644 --- a/Documentation/admin-guide/perf/imx-ddr.rst +++ b/Documentation/admin-guide/perf/imx-ddr.rst @@ -17,7 +17,8 @@ The "format" directory describes format of the config (event ID) and config1 (AXI filtering) fields of the perf_event_attr structure, see /sys/bus/event_source/ devices/imx8_ddr0/format/. The "events" directory describes the events types hardware supported that can be used with perf tool, see /sys/bus/event_source/ -devices/imx8_ddr0/events/. +devices/imx8_ddr0/events/. The "caps" directory describes filter features implemented +in DDR PMU, see /sys/bus/events_source/devices/imx8_ddr0/caps/. e.g.:: perf stat -a -e imx8_ddr0/cycles/ cmd perf stat -a -e imx8_ddr0/read/,imx8_ddr0/write/ cmd @@ -25,9 +26,12 @@ devices/imx8_ddr0/events/. AXI filtering is only used by CSV modes 0x41 (axid-read) and 0x42 (axid-write) to count reading or writing matches filter setting. Filter setting is various from different DRAM controller implementations, which is distinguished by quirks -in the driver. +in the driver. You also can dump info from userspace, filter in "caps" directory +indicates whether PMU supports AXI ID filter or not; enhanced_filter indicates +whether PMU supports enhanced AXI ID filter or not. Value 0 for un-supported, and +value 1 for supported. -* With DDR_CAP_AXI_ID_FILTER quirk. +* With DDR_CAP_AXI_ID_FILTER quirk(filter: 1, enhanced_filter: 0). Filter is defined with two configuration parts: --AXI_ID defines AxID matching value. --AXI_MASKING defines which bits of AxID are meaningful for the matching. @@ -51,7 +55,7 @@ in the driver. e.g.:: perf stat -a -e imx8_ddr0/axid-read,axi_id=0x12/ cmd, which will monitor ARID=0x12 -* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk. +* With DDR_CAP_AXI_ID_FILTER_ENHANCED quirk(filter: 1, enhanced_filter: 1). This is an extension to the DDR_CAP_AXI_ID_FILTER quirk which permits counting the number of bytes (as opposed to the number of bursts) from DDR read and write transactions concurrently with another set of data counters. From f1d303a1b5dd75d18734a2ca56ca90691639a79d Mon Sep 17 00:00:00 2001 From: Joakim Zhang Date: Mon, 4 Nov 2019 07:09:24 +0000 Subject: [PATCH 62/93] perf/imx_ddr: Dump AXI ID filter info to userspace caps/filter indicates whether HW supports AXI ID filter or not. caps/enhanced_filter indicates whether HW supports enhanced AXI ID filter or not. Users can check filter features from userspace with these attributions. Suggested-by: Will Deacon Signed-off-by: Joakim Zhang [will: reworked cap switch to be less error-prone] Signed-off-by: Will Deacon --- drivers/perf/fsl_imx8_ddr_perf.c | 56 ++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/drivers/perf/fsl_imx8_ddr_perf.c b/drivers/perf/fsl_imx8_ddr_perf.c index 3bbf806209a6..55083c67b2bb 100644 --- a/drivers/perf/fsl_imx8_ddr_perf.c +++ b/drivers/perf/fsl_imx8_ddr_perf.c @@ -84,6 +84,61 @@ struct ddr_pmu { int id; }; +enum ddr_perf_filter_capabilities { + PERF_CAP_AXI_ID_FILTER = 0, + PERF_CAP_AXI_ID_FILTER_ENHANCED, + PERF_CAP_AXI_ID_FEAT_MAX, +}; + +static u32 ddr_perf_filter_cap_get(struct ddr_pmu *pmu, int cap) +{ + u32 quirks = pmu->devtype_data->quirks; + + switch (cap) { + case PERF_CAP_AXI_ID_FILTER: + return !!(quirks & DDR_CAP_AXI_ID_FILTER); + case PERF_CAP_AXI_ID_FILTER_ENHANCED: + quirks &= DDR_CAP_AXI_ID_FILTER_ENHANCED; + return quirks == DDR_CAP_AXI_ID_FILTER_ENHANCED; + default: + WARN(1, "unknown filter cap %d\n", cap); + } + + return 0; +} + +static ssize_t ddr_perf_filter_cap_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct ddr_pmu *pmu = dev_get_drvdata(dev); + struct dev_ext_attribute *ea = + container_of(attr, struct dev_ext_attribute, attr); + int cap = (long)ea->var; + + return snprintf(buf, PAGE_SIZE, "%u\n", + ddr_perf_filter_cap_get(pmu, cap)); +} + +#define PERF_EXT_ATTR_ENTRY(_name, _func, _var) \ + (&((struct dev_ext_attribute) { \ + __ATTR(_name, 0444, _func, NULL), (void *)_var \ + }).attr.attr) + +#define PERF_FILTER_EXT_ATTR_ENTRY(_name, _var) \ + PERF_EXT_ATTR_ENTRY(_name, ddr_perf_filter_cap_show, _var) + +static struct attribute *ddr_perf_filter_cap_attr[] = { + PERF_FILTER_EXT_ATTR_ENTRY(filter, PERF_CAP_AXI_ID_FILTER), + PERF_FILTER_EXT_ATTR_ENTRY(enhanced_filter, PERF_CAP_AXI_ID_FILTER_ENHANCED), + NULL, +}; + +static struct attribute_group ddr_perf_filter_cap_attr_group = { + .name = "caps", + .attrs = ddr_perf_filter_cap_attr, +}; + static ssize_t ddr_perf_cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) { @@ -181,6 +236,7 @@ static const struct attribute_group *attr_groups[] = { &ddr_perf_events_attr_group, &ddr_perf_format_attr_group, &ddr_perf_cpumask_attr_group, + &ddr_perf_filter_cap_attr_group, NULL, }; From 32d1870877ba7675c642e903e5ef71c82a245325 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Sun, 3 Nov 2019 21:35:58 +0900 Subject: [PATCH 63/93] arm64: mm: simplify the page end calculation in __create_pgd_mapping() Calculate the page-aligned end address more simply. The local variable, "length" is unneeded. Reviewed-by: Mark Rutland Signed-off-by: Masahiro Yamada Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 60c929f3683b..a9f541912289 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -338,7 +338,7 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, phys_addr_t (*pgtable_alloc)(int), int flags) { - unsigned long addr, length, end, next; + unsigned long addr, end, next; pgd_t *pgdp = pgd_offset_raw(pgdir, virt); /* @@ -350,9 +350,8 @@ static void __create_pgd_mapping(pgd_t *pgdir, phys_addr_t phys, phys &= PAGE_MASK; addr = virt & PAGE_MASK; - length = PAGE_ALIGN(size + (virt & ~PAGE_MASK)); + end = PAGE_ALIGN(virt + size); - end = addr + length; do { next = pgd_addr_end(addr, end); alloc_init_pud(pgdp, addr, next, phys, prot, pgtable_alloc, From 218564b164ad9d283d3cb3d5367705726123a610 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Tue, 5 Nov 2019 03:26:46 +0530 Subject: [PATCH 64/93] arm64: mm: Remove MAX_USER_VA_BITS definition commit 9b31cf493ffa ("arm64: mm: Introduce MAX_USER_VA_BITS definition") introduced the MAX_USER_VA_BITS definition, which was used to support the arm64 mm use-cases where the user-space could use 52-bit virtual addresses whereas the kernel-space would still could a maximum of 48-bit virtual addressing. But, now with commit b6d00d47e81a ("arm64: mm: Introduce 52-bit Kernel VAs"), we removed the 52-bit user/48-bit kernel kconfig option and hence there is no longer any scenario where user VA != kernel VA size (even with CONFIG_ARM64_FORCE_52BIT enabled, the same is true). Hence we can do away with the MAX_USER_VA_BITS macro as it is equal to VA_BITS (maximum VA space size) in all possible use-cases. Note that even though the 'vabits_actual' value would be 48 for arm64 hardware which don't support LVA-8.2 extension (even when CONFIG_ARM64_VA_BITS_52 is enabled), VA_BITS would still be set to a value 52. Hence this change would be safe in all possible VA address space combinations. Cc: James Morse Cc: Will Deacon Cc: Steve Capper Cc: Ard Biesheuvel Cc: linux-kernel@vger.kernel.org Cc: kexec@lists.infradead.org Reviewed-by: Mark Rutland Signed-off-by: Bhupesh Sharma Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/memory.h | 6 ------ arch/arm64/include/asm/pgtable-hwdef.h | 2 +- arch/arm64/include/asm/processor.h | 2 +- 3 files changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index b61b50bf68b1..4867e58dbc9c 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -69,12 +69,6 @@ #define KERNEL_START _text #define KERNEL_END _end -#ifdef CONFIG_ARM64_VA_BITS_52 -#define MAX_USER_VA_BITS 52 -#else -#define MAX_USER_VA_BITS VA_BITS -#endif - /* * Generic and tag-based KASAN require 1/8th and 1/16th of the kernel virtual * address space for the shadow region respectively. They can bloat the stack diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 3df60f97da1f..d9fbd433cc17 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -69,7 +69,7 @@ #define PGDIR_SHIFT ARM64_HW_PGTABLE_LEVEL_SHIFT(4 - CONFIG_PGTABLE_LEVELS) #define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -#define PTRS_PER_PGD (1 << (MAX_USER_VA_BITS - PGDIR_SHIFT)) +#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) /* * Section address mask and size definitions. diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 5623685c7d13..586fcd4b1965 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -9,7 +9,7 @@ #define __ASM_PROCESSOR_H #define KERNEL_DS UL(-1) -#define USER_DS ((UL(1) << MAX_USER_VA_BITS) - 1) +#define USER_DS ((UL(1) << VA_BITS) - 1) /* * On arm64 systems, unaligned accesses by the CPU are cheap, and so there is From 918e1946c8ac2c0473eefc1dc910780178490e95 Mon Sep 17 00:00:00 2001 From: Rich Wiley Date: Tue, 5 Nov 2019 10:45:10 -0800 Subject: [PATCH 65/93] arm64: kpti: Add NVIDIA's Carmel core to the KPTI whitelist NVIDIA Carmel CPUs don't implement ID_AA64PFR0_EL1.CSV3 but aren't susceptible to Meltdown, so add Carmel to kpti_safe_list[]. Signed-off-by: Rich Wiley Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpufeature.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index cabebf1a7976..b3eea965c930 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -977,6 +977,7 @@ static bool unmap_kernel_at_el0(const struct arm64_cpu_capabilities *entry, MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), MIDR_ALL_VERSIONS(MIDR_CORTEX_A73), MIDR_ALL_VERSIONS(MIDR_HISI_TSV110), + MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL), { /* sentinel */ } }; char const *str = "kpti command line option"; From fbf6c73c5b264c25484fa9f449b5546569fe11f0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Oct 2019 17:51:10 +0100 Subject: [PATCH 66/93] ftrace: add ftrace_init_nop() Architectures may need to perform special initialization of ftrace callsites, and today they do so by special-casing ftrace_make_nop() when the expected branch address is MCOUNT_ADDR. In some cases (e.g. for patchable-function-entry), we don't have an mcount-like symbol and don't want a synthetic MCOUNT_ADDR, but we may need to perform some initialization of callsites. To make it possible to separate initialization from runtime modification, and to handle cases without an mcount-like symbol, this patch adds an optional ftrace_init_nop() function that architectures can implement, which does not pass a branch address. Where an architecture does not provide ftrace_init_nop(), we will fall back to the existing behaviour of calling ftrace_make_nop() with MCOUNT_ADDR. At the same time, ftrace_code_disable() is renamed to ftrace_nop_initialize() to make it clearer that it is intended to intialize a callsite into a disabled state, and is not for disabling a callsite that has been runtime enabled. The kerneldoc description of rec arguments is updated to cover non-mcount callsites. Signed-off-by: Mark Rutland Reviewed-by: Amit Daniel Kachhap Reviewed-by: Ard Biesheuvel Reviewed-by: Miroslav Benes Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Sven Schnelle Tested-by: Torsten Duwe Cc: Ingo Molnar --- include/linux/ftrace.h | 35 ++++++++++++++++++++++++++++++++--- kernel/trace/ftrace.c | 6 +++--- 2 files changed, 35 insertions(+), 6 deletions(-) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 8a8cb3c401b2..9867d90d635e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -499,7 +499,7 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } /** * ftrace_make_nop - convert code into nop * @mod: module structure if called by module load initialization - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @addr: the address that the call site should be calling * * This is a very sensitive operation and great care needs @@ -520,9 +520,38 @@ static inline int ftrace_disable_ftrace_graph_caller(void) { return 0; } extern int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr); + +/** + * ftrace_init_nop - initialize a nop call site + * @mod: module structure if called by module load initialization + * @rec: the call site record (e.g. mcount/fentry) + * + * This is a very sensitive operation and great care needs + * to be taken by the arch. The operation should carefully + * read the location, check to see if what is read is indeed + * what we expect it to be, and then on success of the compare, + * it should write to the location. + * + * The code segment at @rec->ip should contain the contents created by + * the compiler + * + * Return must be: + * 0 on success + * -EFAULT on error reading the location + * -EINVAL on a failed compare of the contents + * -EPERM on error writing to the location + * Any other value will be considered a failure. + */ +#ifndef ftrace_init_nop +static inline int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + return ftrace_make_nop(mod, rec, MCOUNT_ADDR); +} +#endif + /** * ftrace_make_call - convert a nop call site into a call to addr - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @addr: the address that the call site should call * * This is a very sensitive operation and great care needs @@ -545,7 +574,7 @@ extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr); #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS /** * ftrace_modify_call - convert from one addr to another (no nop) - * @rec: the mcount call site record + * @rec: the call site record (e.g. mcount/fentry) * @old_addr: the address expected to be currently called to * @addr: the address to change to * diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index f296d89be757..5259d4dea675 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -2494,14 +2494,14 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter) } static int -ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec) +ftrace_nop_initialize(struct module *mod, struct dyn_ftrace *rec) { int ret; if (unlikely(ftrace_disabled)) return 0; - ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR); + ret = ftrace_init_nop(mod, rec); if (ret) { ftrace_bug_type = FTRACE_BUG_INIT; ftrace_bug(ret, rec); @@ -2943,7 +2943,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs) * to the NOP instructions. */ if (!__is_defined(CC_USING_NOP_MCOUNT) && - !ftrace_code_disable(mod, p)) + !ftrace_nop_initialize(mod, p)) break; update_cnt++; From a1326b17ac03a9012cb3d01e434aacb4d67a416c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Oct 2019 18:17:11 +0100 Subject: [PATCH 67/93] module/ftrace: handle patchable-function-entry When using patchable-function-entry, the compiler will record the callsites into a section named "__patchable_function_entries" rather than "__mcount_loc". Let's abstract this difference behind a new FTRACE_CALLSITE_SECTION, so that architectures don't have to handle this explicitly (e.g. with custom module linker scripts). As parisc currently handles this explicitly, it is fixed up accordingly, with its custom linker script removed. Since FTRACE_CALLSITE_SECTION is only defined when DYNAMIC_FTRACE is selected, the parisc module loading code is updated to only use the definition in that case. When DYNAMIC_FTRACE is not selected, modules shouldn't have this section, so this removes some redundant work in that case. To make sure that this is keep up-to-date for modules and the main kernel, a comment is added to vmlinux.lds.h, with the existing ifdeffery simplified for legibility. I built parisc generic-{32,64}bit_defconfig with DYNAMIC_FTRACE enabled, and verified that the section made it into the .ko files for modules. Signed-off-by: Mark Rutland Acked-by: Helge Deller Acked-by: Steven Rostedt (VMware) Reviewed-by: Ard Biesheuvel Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Sven Schnelle Tested-by: Torsten Duwe Cc: Ingo Molnar Cc: James E.J. Bottomley Cc: Jessica Yu Cc: linux-parisc@vger.kernel.org --- arch/parisc/Makefile | 1 - arch/parisc/kernel/module.c | 10 +++++++--- arch/parisc/kernel/module.lds | 7 ------- include/asm-generic/vmlinux.lds.h | 14 +++++++------- include/linux/ftrace.h | 5 +++++ kernel/module.c | 2 +- 6 files changed, 20 insertions(+), 19 deletions(-) delete mode 100644 arch/parisc/kernel/module.lds diff --git a/arch/parisc/Makefile b/arch/parisc/Makefile index 36b834f1c933..dca8f2de8cf5 100644 --- a/arch/parisc/Makefile +++ b/arch/parisc/Makefile @@ -60,7 +60,6 @@ KBUILD_CFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY=1 \ -DFTRACE_PATCHABLE_FUNCTION_SIZE=$(NOP_COUNT) CC_FLAGS_FTRACE := -fpatchable-function-entry=$(NOP_COUNT),$(shell echo $$(($(NOP_COUNT)-1))) -KBUILD_LDS_MODULE += $(srctree)/arch/parisc/kernel/module.lds endif OBJCOPY_FLAGS =-O binary -R .note -R .comment -S diff --git a/arch/parisc/kernel/module.c b/arch/parisc/kernel/module.c index ac5f34993b53..1c50093e2ebe 100644 --- a/arch/parisc/kernel/module.c +++ b/arch/parisc/kernel/module.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include @@ -862,7 +863,7 @@ int module_finalize(const Elf_Ehdr *hdr, const char *strtab = NULL; const Elf_Shdr *s; char *secstrings; - int err, symindex = -1; + int symindex = -1; Elf_Sym *newptr, *oldptr; Elf_Shdr *symhdr = NULL; #ifdef DEBUG @@ -946,11 +947,13 @@ int module_finalize(const Elf_Ehdr *hdr, /* patch .altinstructions */ apply_alternatives(aseg, aseg + s->sh_size, me->name); +#ifdef CONFIG_DYNAMIC_FTRACE /* For 32 bit kernels we're compiling modules with * -ffunction-sections so we must relocate the addresses in the - *__mcount_loc section. + * ftrace callsite section. */ - if (symindex != -1 && !strcmp(secname, "__mcount_loc")) { + if (symindex != -1 && !strcmp(secname, FTRACE_CALLSITE_SECTION)) { + int err; if (s->sh_type == SHT_REL) err = apply_relocate((Elf_Shdr *)sechdrs, strtab, symindex, @@ -962,6 +965,7 @@ int module_finalize(const Elf_Ehdr *hdr, if (err) return err; } +#endif } return 0; } diff --git a/arch/parisc/kernel/module.lds b/arch/parisc/kernel/module.lds deleted file mode 100644 index 1a9a92aca5c8..000000000000 --- a/arch/parisc/kernel/module.lds +++ /dev/null @@ -1,7 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ - -SECTIONS { - __mcount_loc : { - *(__patchable_function_entries) - } -} diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index dae64600ccbf..a9c4e4721434 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -110,17 +110,17 @@ #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD -#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY -#define MCOUNT_REC() . = ALIGN(8); \ - __start_mcount_loc = .; \ - KEEP(*(__patchable_function_entries)) \ - __stop_mcount_loc = .; -#else +/* + * The ftrace call sites are logged to a section whose name depends on the + * compiler option used. A given kernel image will only use one, AKA + * FTRACE_CALLSITE_SECTION. We capture all of them here to avoid header + * dependencies for FTRACE_CALLSITE_SECTION's definition. + */ #define MCOUNT_REC() . = ALIGN(8); \ __start_mcount_loc = .; \ KEEP(*(__mcount_loc)) \ + KEEP(*(__patchable_function_entries)) \ __stop_mcount_loc = .; -#endif #else #define MCOUNT_REC() #endif diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 9867d90d635e..9141f2263286 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -738,6 +738,11 @@ static inline unsigned long get_lock_parent_ip(void) #ifdef CONFIG_FTRACE_MCOUNT_RECORD extern void ftrace_init(void); +#ifdef CC_USING_PATCHABLE_FUNCTION_ENTRY +#define FTRACE_CALLSITE_SECTION "__patchable_function_entries" +#else +#define FTRACE_CALLSITE_SECTION "__mcount_loc" +#endif #else static inline void ftrace_init(void) { } #endif diff --git a/kernel/module.c b/kernel/module.c index ff2d7359a418..acf7962936c4 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3222,7 +3222,7 @@ static int find_module_sections(struct module *mod, struct load_info *info) #endif #ifdef CONFIG_FTRACE_MCOUNT_RECORD /* sechdrs[0].sh_size is always zero */ - mod->ftrace_callsites = section_objs(info, "__mcount_loc", + mod->ftrace_callsites = section_objs(info, FTRACE_CALLSITE_SECTION, sizeof(*mod->ftrace_callsites), &mod->num_ftrace_callsites); #endif From bd8b21d3dd661658addc1cd4cc869bab11d28596 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 17 Oct 2019 14:03:26 +0100 Subject: [PATCH 68/93] arm64: module: rework special section handling When we load a module, we have to perform some special work for a couple of named sections. To do this, we iterate over all of the module's sections, and perform work for each section we recognize. To make it easier to handle the unexpected absence of a section, and to make the section-specific logic easer to read, let's factor the section search into a helper. Similar is already done in the core module loader, and other architectures (and ideally we'd unify these in future). If we expect a module to have an ftrace trampoline section, but it doesn't have one, we'll now reject loading the module. When ARM64_MODULE_PLTS is selected, any correctly built module should have one (and this is assumed by arm64's ftrace PLT code) and the absence of such a section implies something has gone wrong at build time. Subsequent patches will make use of the new helper. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Torsten Duwe Cc: Catalin Marinas Cc: James Morse Cc: Will Deacon --- arch/arm64/kernel/module.c | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 03ff15bffbb6..763a86d52fef 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -470,22 +470,39 @@ overflow: return -ENOEXEC; } -int module_finalize(const Elf_Ehdr *hdr, - const Elf_Shdr *sechdrs, - struct module *me) +static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + const char *name) { const Elf_Shdr *s, *se; const char *secstrs = (void *)hdr + sechdrs[hdr->e_shstrndx].sh_offset; for (s = sechdrs, se = sechdrs + hdr->e_shnum; s < se; s++) { - if (strcmp(".altinstructions", secstrs + s->sh_name) == 0) - apply_alternatives_module((void *)s->sh_addr, s->sh_size); -#ifdef CONFIG_ARM64_MODULE_PLTS - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE) && - !strcmp(".text.ftrace_trampoline", secstrs + s->sh_name)) - me->arch.ftrace_trampoline = (void *)s->sh_addr; -#endif + if (strcmp(name, secstrs + s->sh_name) == 0) + return s; } + return NULL; +} + +int module_finalize(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *me) +{ + const Elf_Shdr *s; + + s = find_section(hdr, sechdrs, ".altinstructions"); + if (s) + apply_alternatives_module((void *)s->sh_addr, s->sh_size); + +#ifdef CONFIG_ARM64_MODULE_PLTS + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE)) { + s = find_section(hdr, sechdrs, ".text.ftrace_trampoline"); + if (!s) + return -ENOEXEC; + me->arch.ftrace_trampoline = (void *)s->sh_addr; + } +#endif + return 0; } From f1a54ae9af0da4d76239256ed640a93ab3aadac0 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 17 Oct 2019 15:26:38 +0100 Subject: [PATCH 69/93] arm64: module/ftrace: intialize PLT at load time Currently we lazily-initialize a module's ftrace PLT at runtime when we install the first ftrace call. To do so we have to apply a number of sanity checks, transiently mark the module text as RW, and perform an IPI as part of handling Neoverse-N1 erratum #1542419. We only expect the ftrace trampoline to point at ftrace_caller() (AKA FTRACE_ADDR), so let's simplify all of this by intializing the PLT at module load time, before the module loader marks the module RO and performs the intial I-cache maintenance for the module. Thus we can rely on the module having been correctly intialized, and can simplify the runtime work necessary to install an ftrace call in a module. This will also allow for the removal of module_disable_ro(). Tested by forcing ftrace_make_call() to use the module PLT, and then loading up a module after setting up ftrace with: | echo ":mod:" > set_ftrace_filter; | echo function > current_tracer; | modprobe Since FTRACE_ADDR is only defined when CONFIG_DYNAMIC_FTRACE is selected, we wrap its use along with most of module_init_ftrace_plt() with ifdeffery rather than using IS_ENABLED(). Signed-off-by: Mark Rutland Reviewed-by: Amit Daniel Kachhap Reviewed-by: Ard Biesheuvel Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Torsten Duwe Cc: Catalin Marinas Cc: James Morse Cc: Peter Zijlstra Cc: Will Deacon --- arch/arm64/kernel/ftrace.c | 55 ++++++++++---------------------------- arch/arm64/kernel/module.c | 32 ++++++++++++++-------- 2 files changed, 35 insertions(+), 52 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 06e56b470315..822718eafdb4 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -73,9 +73,21 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS - struct plt_entry trampoline, *dst; struct module *mod; + /* + * There is only one ftrace trampoline per module. For now, + * this is not a problem since on arm64, all dynamic ftrace + * invocations are routed via ftrace_caller(). This will need + * to be revisited if support for multiple ftrace entry points + * is added in the future, but for now, the pr_err() below + * deals with a theoretical issue only. + */ + if (addr != FTRACE_ADDR) { + pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); + return -EINVAL; + } + /* * On kernels that support module PLTs, the offset between the * branch instruction and its target may legally exceed the @@ -93,46 +105,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (WARN_ON(!mod)) return -EINVAL; - /* - * There is only one ftrace trampoline per module. For now, - * this is not a problem since on arm64, all dynamic ftrace - * invocations are routed via ftrace_caller(). This will need - * to be revisited if support for multiple ftrace entry points - * is added in the future, but for now, the pr_err() below - * deals with a theoretical issue only. - * - * Note that PLTs are place relative, and plt_entries_equal() - * checks whether they point to the same target. Here, we need - * to check if the actual opcodes are in fact identical, - * regardless of the offset in memory so use memcmp() instead. - */ - dst = mod->arch.ftrace_trampoline; - trampoline = get_plt_entry(addr, dst); - if (memcmp(dst, &trampoline, sizeof(trampoline))) { - if (plt_entry_is_initialized(dst)) { - pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); - return -EINVAL; - } - - /* point the trampoline to our ftrace entry point */ - module_disable_ro(mod); - *dst = trampoline; - module_enable_ro(mod, true); - - /* - * Ensure updated trampoline is visible to instruction - * fetch before we patch in the branch. Although the - * architecture doesn't require an IPI in this case, - * Neoverse-N1 erratum #1542419 does require one - * if the TLB maintenance in module_enable_ro() is - * skipped due to rodata_enabled. It doesn't seem worth - * it to make it conditional given that this is - * certainly not a fast-path. - */ - flush_icache_range((unsigned long)&dst[0], - (unsigned long)&dst[1]); - } - addr = (unsigned long)dst; + addr = (unsigned long)mod->arch.ftrace_trampoline; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index 763a86d52fef..d0692ecb99bb 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include @@ -485,24 +486,33 @@ static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, return NULL; } +static int module_init_ftrace_plt(const Elf_Ehdr *hdr, + const Elf_Shdr *sechdrs, + struct module *mod) +{ +#if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) + const Elf_Shdr *s; + struct plt_entry *plt; + + s = find_section(hdr, sechdrs, ".text.ftrace_trampoline"); + if (!s) + return -ENOEXEC; + + plt = (void *)s->sh_addr; + *plt = get_plt_entry(FTRACE_ADDR, plt); + mod->arch.ftrace_trampoline = plt; +#endif + return 0; +} + int module_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *me) { const Elf_Shdr *s; - s = find_section(hdr, sechdrs, ".altinstructions"); if (s) apply_alternatives_module((void *)s->sh_addr, s->sh_size); -#ifdef CONFIG_ARM64_MODULE_PLTS - if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE)) { - s = find_section(hdr, sechdrs, ".text.ftrace_trampoline"); - if (!s) - return -ENOEXEC; - me->arch.ftrace_trampoline = (void *)s->sh_addr; - } -#endif - - return 0; + return module_init_ftrace_plt(hdr, sechdrs, me); } From e3bf8a67f759b498e09999804c3837688e03b304 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Oct 2019 11:25:26 +0100 Subject: [PATCH 70/93] arm64: insn: add encoder for MOV (register) For FTRACE_WITH_REGS, we're going to want to generate a MOV (register) instruction as part of the callsite intialization. As MOV (register) is an alias for ORR (shifted register), we can generate this with aarch64_insn_gen_logical_shifted_reg(), but it's somewhat verbose and difficult to read in-context. Add a aarch64_insn_gen_move_reg() wrapper for this case so that we can write callers in a more straightforward way. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Torsten Duwe Cc: Catalin Marinas Cc: Will Deacon --- arch/arm64/include/asm/insn.h | 3 +++ arch/arm64/kernel/insn.c | 13 +++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/arm64/include/asm/insn.h b/arch/arm64/include/asm/insn.h index 39e7780bedd6..bb313dde58a4 100644 --- a/arch/arm64/include/asm/insn.h +++ b/arch/arm64/include/asm/insn.h @@ -440,6 +440,9 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, int shift, enum aarch64_insn_variant variant, enum aarch64_insn_logic_type type); +u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst, + enum aarch64_insn_register src, + enum aarch64_insn_variant variant); u32 aarch64_insn_gen_logical_immediate(enum aarch64_insn_logic_type type, enum aarch64_insn_variant variant, enum aarch64_insn_register Rn, diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index d801a7094076..513b29c3e735 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -1268,6 +1268,19 @@ u32 aarch64_insn_gen_logical_shifted_reg(enum aarch64_insn_register dst, return aarch64_insn_encode_immediate(AARCH64_INSN_IMM_6, insn, shift); } +/* + * MOV (register) is architecturally an alias of ORR (shifted register) where + * MOV <*d>, <*m> is equivalent to ORR <*d>, <*ZR>, <*m> + */ +u32 aarch64_insn_gen_move_reg(enum aarch64_insn_register dst, + enum aarch64_insn_register src, + enum aarch64_insn_variant variant) +{ + return aarch64_insn_gen_logical_shifted_reg(dst, AARCH64_INSN_REG_ZR, + src, 0, variant, + AARCH64_INSN_LOGIC_ORR); +} + u32 aarch64_insn_gen_adr(unsigned long pc, unsigned long addr, enum aarch64_insn_register reg, enum aarch64_insn_adr_type type) From 1f377e043b3b8ef68caffe47bdad794f4e2cb030 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Oct 2019 16:37:47 +0100 Subject: [PATCH 71/93] arm64: asm-offsets: add S_FP So that assembly code can more easily manipulate the FP (x29) within a pt_regs, add an S_FP asm-offsets definition. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Torsten Duwe Cc: Catalin Marinas Cc: Will Deacon --- arch/arm64/kernel/asm-offsets.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 214685760e1c..a5bdce8af65b 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -56,6 +56,7 @@ int main(void) DEFINE(S_X24, offsetof(struct pt_regs, regs[24])); DEFINE(S_X26, offsetof(struct pt_regs, regs[26])); DEFINE(S_X28, offsetof(struct pt_regs, regs[28])); + DEFINE(S_FP, offsetof(struct pt_regs, regs[29])); DEFINE(S_LR, offsetof(struct pt_regs, regs[30])); DEFINE(S_SP, offsetof(struct pt_regs, sp)); DEFINE(S_PSTATE, offsetof(struct pt_regs, pstate)); From 3b23e4991fb66f6d152f9055ede271a726ef9f21 Mon Sep 17 00:00:00 2001 From: Torsten Duwe Date: Fri, 8 Feb 2019 16:10:19 +0100 Subject: [PATCH 72/93] arm64: implement ftrace with regs This patch implements FTRACE_WITH_REGS for arm64, which allows a traced function's arguments (and some other registers) to be captured into a struct pt_regs, allowing these to be inspected and/or modified. This is a building block for live-patching, where a function's arguments may be forwarded to another function. This is also necessary to enable ftrace and in-kernel pointer authentication at the same time, as it allows the LR value to be captured and adjusted prior to signing. Using GCC's -fpatchable-function-entry=N option, we can have the compiler insert a configurable number of NOPs between the function entry point and the usual prologue. This also ensures functions are AAPCS compliant (e.g. disabling inter-procedural register allocation). For example, with -fpatchable-function-entry=2, GCC 8.1.0 compiles the following: | unsigned long bar(void); | | unsigned long foo(void) | { | return bar() + 1; | } ... to: | : | nop | nop | stp x29, x30, [sp, #-16]! | mov x29, sp | bl 0 | add x0, x0, #0x1 | ldp x29, x30, [sp], #16 | ret This patch builds the kernel with -fpatchable-function-entry=2, prefixing each function with two NOPs. To trace a function, we replace these NOPs with a sequence that saves the LR into a GPR, then calls an ftrace entry assembly function which saves this and other relevant registers: | mov x9, x30 | bl Since patchable functions are AAPCS compliant (and the kernel does not use x18 as a platform register), x9-x18 can be safely clobbered in the patched sequence and the ftrace entry code. There are now two ftrace entry functions, ftrace_regs_entry (which saves all GPRs), and ftrace_entry (which saves the bare minimum). A PLT is allocated for each within modules. Signed-off-by: Torsten Duwe [Mark: rework asm, comments, PLTs, initialization, commit message] Signed-off-by: Mark Rutland Reviewed-by: Amit Daniel Kachhap Reviewed-by: Ard Biesheuvel Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Torsten Duwe Cc: AKASHI Takahiro Cc: Catalin Marinas Cc: Josh Poimboeuf Cc: Julien Thierry Cc: Will Deacon --- arch/arm64/Kconfig | 2 + arch/arm64/Makefile | 5 ++ arch/arm64/include/asm/ftrace.h | 23 +++++ arch/arm64/include/asm/module.h | 2 +- arch/arm64/kernel/entry-ftrace.S | 140 +++++++++++++++++++++++++++++-- arch/arm64/kernel/ftrace.c | 84 +++++++++++++++---- arch/arm64/kernel/module-plts.c | 3 +- arch/arm64/kernel/module.c | 18 +++- 8 files changed, 252 insertions(+), 25 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 950a56b71ff0..0ffb8596b8a1 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -143,6 +143,8 @@ config ARM64 select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_CONTIGUOUS select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS \ + if $(cc-option,-fpatchable-function-entry=2) select HAVE_EFFICIENT_UNALIGNED_ACCESS select HAVE_FAST_GUP select HAVE_FTRACE_MCOUNT_RECORD diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 2c0238ce0551..1fbe24d4fdb6 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -95,6 +95,11 @@ ifeq ($(CONFIG_ARM64_MODULE_PLTS),y) KBUILD_LDS_MODULE += $(srctree)/arch/arm64/kernel/module.lds endif +ifeq ($(CONFIG_DYNAMIC_FTRACE_WITH_REGS),y) + KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY + CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif + # Default value head-y := arch/arm64/kernel/head.o diff --git a/arch/arm64/include/asm/ftrace.h b/arch/arm64/include/asm/ftrace.h index d48667b04c41..91fa4baa1a93 100644 --- a/arch/arm64/include/asm/ftrace.h +++ b/arch/arm64/include/asm/ftrace.h @@ -11,9 +11,20 @@ #include #define HAVE_FUNCTION_GRAPH_FP_TEST + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +#define ARCH_SUPPORTS_FTRACE_OPS 1 +#else #define MCOUNT_ADDR ((unsigned long)_mcount) +#endif + +/* The BL at the callsite's adjusted rec->ip */ #define MCOUNT_INSN_SIZE AARCH64_INSN_SIZE +#define FTRACE_PLT_IDX 0 +#define FTRACE_REGS_PLT_IDX 1 +#define NR_FTRACE_PLTS 2 + /* * Currently, gcc tends to save the link register after the local variables * on the stack. This causes the max stack tracer to report the function @@ -43,6 +54,12 @@ extern void return_to_handler(void); static inline unsigned long ftrace_call_adjust(unsigned long addr) { + /* + * Adjust addr to point at the BL in the callsite. + * See ftrace_init_nop() for the callsite sequence. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + return addr + AARCH64_INSN_SIZE; /* * addr is the address of the mcount call instruction. * recordmcount does the necessary offset calculation. @@ -50,6 +67,12 @@ static inline unsigned long ftrace_call_adjust(unsigned long addr) return addr; } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +struct dyn_ftrace; +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec); +#define ftrace_init_nop ftrace_init_nop +#endif + #define ftrace_return_address(n) return_address(n) /* diff --git a/arch/arm64/include/asm/module.h b/arch/arm64/include/asm/module.h index f80e13cbf8ec..1e93de68c044 100644 --- a/arch/arm64/include/asm/module.h +++ b/arch/arm64/include/asm/module.h @@ -21,7 +21,7 @@ struct mod_arch_specific { struct mod_plt_sec init; /* for CONFIG_DYNAMIC_FTRACE */ - struct plt_entry *ftrace_trampoline; + struct plt_entry *ftrace_trampolines; }; #endif diff --git a/arch/arm64/kernel/entry-ftrace.S b/arch/arm64/kernel/entry-ftrace.S index 33d003d80121..4fe1514fcbfd 100644 --- a/arch/arm64/kernel/entry-ftrace.S +++ b/arch/arm64/kernel/entry-ftrace.S @@ -7,10 +7,137 @@ */ #include +#include #include #include #include +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +/* + * Due to -fpatchable-function-entry=2, the compiler has placed two NOPs before + * the regular function prologue. For an enabled callsite, ftrace_init_nop() and + * ftrace_make_call() have patched those NOPs to: + * + * MOV X9, LR + * BL + * + * ... where is either ftrace_caller or ftrace_regs_caller. + * + * Each instrumented function follows the AAPCS, so here x0-x8 and x19-x30 are + * live, and x9-x18 are safe to clobber. + * + * We save the callsite's context into a pt_regs before invoking any ftrace + * callbacks. So that we can get a sensible backtrace, we create a stack record + * for the callsite and the ftrace entry assembly. This is not sufficient for + * reliable stacktrace: until we create the callsite stack record, its caller + * is missing from the LR and existing chain of frame records. + */ + .macro ftrace_regs_entry, allregs=0 + /* Make room for pt_regs, plus a callee frame */ + sub sp, sp, #(S_FRAME_SIZE + 16) + + /* Save function arguments (and x9 for simplicity) */ + stp x0, x1, [sp, #S_X0] + stp x2, x3, [sp, #S_X2] + stp x4, x5, [sp, #S_X4] + stp x6, x7, [sp, #S_X6] + stp x8, x9, [sp, #S_X8] + + /* Optionally save the callee-saved registers, always save the FP */ + .if \allregs == 1 + stp x10, x11, [sp, #S_X10] + stp x12, x13, [sp, #S_X12] + stp x14, x15, [sp, #S_X14] + stp x16, x17, [sp, #S_X16] + stp x18, x19, [sp, #S_X18] + stp x20, x21, [sp, #S_X20] + stp x22, x23, [sp, #S_X22] + stp x24, x25, [sp, #S_X24] + stp x26, x27, [sp, #S_X26] + stp x28, x29, [sp, #S_X28] + .else + str x29, [sp, #S_FP] + .endif + + /* Save the callsite's SP and LR */ + add x10, sp, #(S_FRAME_SIZE + 16) + stp x9, x10, [sp, #S_LR] + + /* Save the PC after the ftrace callsite */ + str x30, [sp, #S_PC] + + /* Create a frame record for the callsite above pt_regs */ + stp x29, x9, [sp, #S_FRAME_SIZE] + add x29, sp, #S_FRAME_SIZE + + /* Create our frame record within pt_regs. */ + stp x29, x30, [sp, #S_STACKFRAME] + add x29, sp, #S_STACKFRAME + .endm + +ENTRY(ftrace_regs_caller) + ftrace_regs_entry 1 + b ftrace_common +ENDPROC(ftrace_regs_caller) + +ENTRY(ftrace_caller) + ftrace_regs_entry 0 + b ftrace_common +ENDPROC(ftrace_caller) + +ENTRY(ftrace_common) + sub x0, x30, #AARCH64_INSN_SIZE // ip (callsite's BL insn) + mov x1, x9 // parent_ip (callsite's LR) + ldr_l x2, function_trace_op // op + mov x3, sp // regs + +GLOBAL(ftrace_call) + bl ftrace_stub + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +GLOBAL(ftrace_graph_call) // ftrace_graph_caller(); + nop // If enabled, this will be replaced + // "b ftrace_graph_caller" +#endif + +/* + * At the callsite x0-x8 and x19-x30 were live. Any C code will have preserved + * x19-x29 per the AAPCS, and we created frame records upon entry, so we need + * to restore x0-x8, x29, and x30. + */ +ftrace_common_return: + /* Restore function arguments */ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + ldr x8, [sp, #S_X8] + + /* Restore the callsite's FP, LR, PC */ + ldr x29, [sp, #S_FP] + ldr x30, [sp, #S_LR] + ldr x9, [sp, #S_PC] + + /* Restore the callsite's SP */ + add sp, sp, #S_FRAME_SIZE + 16 + + ret x9 +ENDPROC(ftrace_common) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER +ENTRY(ftrace_graph_caller) + ldr x0, [sp, #S_PC] + sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn) + add x1, sp, #S_LR // parent_ip (callsite's LR) + ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP) + bl prepare_ftrace_return + b ftrace_common_return +ENDPROC(ftrace_graph_caller) +#else +#endif + +#else /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ + /* * Gcc with -pg will put the following code in the beginning of each function: * mov x0, x30 @@ -160,11 +287,6 @@ GLOBAL(ftrace_graph_call) // ftrace_graph_caller(); mcount_exit ENDPROC(ftrace_caller) -#endif /* CONFIG_DYNAMIC_FTRACE */ - -ENTRY(ftrace_stub) - ret -ENDPROC(ftrace_stub) #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* @@ -184,7 +306,15 @@ ENTRY(ftrace_graph_caller) mcount_exit ENDPROC(ftrace_graph_caller) +#endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#endif /* CONFIG_DYNAMIC_FTRACE */ +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */ +ENTRY(ftrace_stub) + ret +ENDPROC(ftrace_stub) + +#ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * void return_to_handler(void) * diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index 822718eafdb4..aea652c33a38 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -62,6 +62,19 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(pc, 0, new, false); } +#ifdef CONFIG_ARM64_MODULE_PLTS +static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) +{ + struct plt_entry *plt = mod->arch.ftrace_trampolines; + + if (addr == FTRACE_ADDR) + return &plt[FTRACE_PLT_IDX]; + if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS)) + return &plt[FTRACE_REGS_PLT_IDX]; + return NULL; +} +#endif + /* * Turn on the call to ftrace_caller() in instrumented function */ @@ -74,19 +87,7 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (offset < -SZ_128M || offset >= SZ_128M) { #ifdef CONFIG_ARM64_MODULE_PLTS struct module *mod; - - /* - * There is only one ftrace trampoline per module. For now, - * this is not a problem since on arm64, all dynamic ftrace - * invocations are routed via ftrace_caller(). This will need - * to be revisited if support for multiple ftrace entry points - * is added in the future, but for now, the pr_err() below - * deals with a theoretical issue only. - */ - if (addr != FTRACE_ADDR) { - pr_err("ftrace: far branches to multiple entry points unsupported inside a single module\n"); - return -EINVAL; - } + struct plt_entry *plt; /* * On kernels that support module PLTs, the offset between the @@ -105,7 +106,13 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (WARN_ON(!mod)) return -EINVAL; - addr = (unsigned long)mod->arch.ftrace_trampoline; + plt = get_ftrace_plt(mod, addr); + if (!plt) { + pr_err("ftrace: no module PLT for %ps\n", (void *)addr); + return -EINVAL; + } + + addr = (unsigned long)plt; #else /* CONFIG_ARM64_MODULE_PLTS */ return -EINVAL; #endif /* CONFIG_ARM64_MODULE_PLTS */ @@ -117,6 +124,55 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return ftrace_modify_code(pc, old, new, true); } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS +int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, + unsigned long addr) +{ + unsigned long pc = rec->ip; + u32 old, new; + + old = aarch64_insn_gen_branch_imm(pc, old_addr, + AARCH64_INSN_BRANCH_LINK); + new = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); + + return ftrace_modify_code(pc, old, new, true); +} + +/* + * The compiler has inserted two NOPs before the regular function prologue. + * All instrumented functions follow the AAPCS, so x0-x8 and x19-x30 are live, + * and x9-x18 are free for our use. + * + * At runtime we want to be able to swing a single NOP <-> BL to enable or + * disable the ftrace call. The BL requires us to save the original LR value, + * so here we insert a over the first NOP so the instructions + * before the regular prologue are: + * + * | Compiled | Disabled | Enabled | + * +----------+------------+------------+ + * | NOP | MOV X9, LR | MOV X9, LR | + * | NOP | NOP | BL | + * + * The LR value will be recovered by ftrace_regs_entry, and restored into LR + * before returning to the regular function prologue. When a function is not + * being traced, the MOV is not harmful given x9 is not live per the AAPCS. + * + * Note: ftrace_process_locs() has pre-adjusted rec->ip to be the address of + * the BL. + */ +int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) +{ + unsigned long pc = rec->ip - AARCH64_INSN_SIZE; + u32 old, new; + + old = aarch64_insn_gen_nop(); + new = aarch64_insn_gen_move_reg(AARCH64_INSN_REG_9, + AARCH64_INSN_REG_LR, + AARCH64_INSN_VARIANT_64BIT); + return ftrace_modify_code(pc, old, new, true); +} +#endif + /* * Turn off the call to ftrace_caller() in instrumented function */ diff --git a/arch/arm64/kernel/module-plts.c b/arch/arm64/kernel/module-plts.c index b182442b87a3..65b08a74aec6 100644 --- a/arch/arm64/kernel/module-plts.c +++ b/arch/arm64/kernel/module-plts.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include @@ -330,7 +331,7 @@ int module_frob_arch_sections(Elf_Ehdr *ehdr, Elf_Shdr *sechdrs, tramp->sh_type = SHT_NOBITS; tramp->sh_flags = SHF_EXECINSTR | SHF_ALLOC; tramp->sh_addralign = __alignof__(struct plt_entry); - tramp->sh_size = sizeof(struct plt_entry); + tramp->sh_size = NR_FTRACE_PLTS * sizeof(struct plt_entry); } return 0; diff --git a/arch/arm64/kernel/module.c b/arch/arm64/kernel/module.c index d0692ecb99bb..1cd1a4d0ed30 100644 --- a/arch/arm64/kernel/module.c +++ b/arch/arm64/kernel/module.c @@ -486,21 +486,31 @@ static const Elf_Shdr *find_section(const Elf_Ehdr *hdr, return NULL; } +static inline void __init_plt(struct plt_entry *plt, unsigned long addr) +{ + *plt = get_plt_entry(addr, plt); +} + static int module_init_ftrace_plt(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs, struct module *mod) { #if defined(CONFIG_ARM64_MODULE_PLTS) && defined(CONFIG_DYNAMIC_FTRACE) const Elf_Shdr *s; - struct plt_entry *plt; + struct plt_entry *plts; s = find_section(hdr, sechdrs, ".text.ftrace_trampoline"); if (!s) return -ENOEXEC; - plt = (void *)s->sh_addr; - *plt = get_plt_entry(FTRACE_ADDR, plt); - mod->arch.ftrace_trampoline = plt; + plts = (void *)s->sh_addr; + + __init_plt(&plts[FTRACE_PLT_IDX], FTRACE_ADDR); + + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS)) + __init_plt(&plts[FTRACE_REGS_PLT_IDX], FTRACE_REGS_ADDR); + + mod->arch.ftrace_trampolines = plts; #endif return 0; } From 7f08ae53a7e3ac2a2f86175226ee19f0117d5b6c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 21 Oct 2019 15:05:52 +0100 Subject: [PATCH 73/93] arm64: ftrace: minimize ifdeffery Now that we no longer refer to mod->arch.ftrace_trampolines in the body of ftrace_make_call(), we can use IS_ENABLED() rather than ifdeffery, and make the code easier to follow. Likewise in ftrace_make_nop(). Let's do so. Signed-off-by: Mark Rutland Reviewed-by: Ard Biesheuvel Reviewed-by: Torsten Duwe Tested-by: Amit Daniel Kachhap Tested-by: Torsten Duwe Cc: Catalin Marinas Cc: Will Deacon --- arch/arm64/kernel/ftrace.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/ftrace.c b/arch/arm64/kernel/ftrace.c index aea652c33a38..8618faa82e6d 100644 --- a/arch/arm64/kernel/ftrace.c +++ b/arch/arm64/kernel/ftrace.c @@ -62,18 +62,18 @@ int ftrace_update_ftrace_func(ftrace_func_t func) return ftrace_modify_code(pc, 0, new, false); } -#ifdef CONFIG_ARM64_MODULE_PLTS static struct plt_entry *get_ftrace_plt(struct module *mod, unsigned long addr) { +#ifdef CONFIG_ARM64_MODULE_PLTS struct plt_entry *plt = mod->arch.ftrace_trampolines; if (addr == FTRACE_ADDR) return &plt[FTRACE_PLT_IDX]; if (addr == FTRACE_REGS_ADDR && IS_ENABLED(CONFIG_FTRACE_WITH_REGS)) return &plt[FTRACE_REGS_PLT_IDX]; +#endif return NULL; } -#endif /* * Turn on the call to ftrace_caller() in instrumented function @@ -85,10 +85,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) long offset = (long)pc - (long)addr; if (offset < -SZ_128M || offset >= SZ_128M) { -#ifdef CONFIG_ARM64_MODULE_PLTS struct module *mod; struct plt_entry *plt; + if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + return -EINVAL; + /* * On kernels that support module PLTs, the offset between the * branch instruction and its target may legally exceed the @@ -113,9 +115,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) } addr = (unsigned long)plt; -#else /* CONFIG_ARM64_MODULE_PLTS */ - return -EINVAL; -#endif /* CONFIG_ARM64_MODULE_PLTS */ } old = aarch64_insn_gen_nop(); @@ -185,9 +184,11 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, long offset = (long)pc - (long)addr; if (offset < -SZ_128M || offset >= SZ_128M) { -#ifdef CONFIG_ARM64_MODULE_PLTS u32 replaced; + if (!IS_ENABLED(CONFIG_ARM64_MODULE_PLTS)) + return -EINVAL; + /* * 'mod' is only set at module load time, but if we end up * dealing with an out-of-range condition, we can assume it @@ -218,9 +219,6 @@ int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, return -EINVAL; validate = false; -#else /* CONFIG_ARM64_MODULE_PLTS */ - return -EINVAL; -#endif /* CONFIG_ARM64_MODULE_PLTS */ } else { old = aarch64_insn_gen_branch_imm(pc, addr, AARCH64_INSN_BRANCH_LINK); From bff3b04460a80f425442fe8e5c6ee8c3ebef611f Mon Sep 17 00:00:00 2001 From: Nicolas Saenz Julienne Date: Thu, 7 Nov 2019 10:56:11 +0100 Subject: [PATCH 74/93] arm64: mm: reserve CMA and crashkernel in ZONE_DMA32 With the introduction of ZONE_DMA in arm64 we moved the default CMA and crashkernel reservation into that area. This caused a regression on big machines that need big CMA and crashkernel reservations. Note that ZONE_DMA is only 1GB big. Restore the previous behavior as the wide majority of devices are OK with reserving these in ZONE_DMA32. The ones that need them in ZONE_DMA will configure it explicitly. Fixes: 1a8e1cef7603 ("arm64: use both ZONE_DMA and ZONE_DMA32") Reported-by: Qian Cai Signed-off-by: Nicolas Saenz Julienne Signed-off-by: Catalin Marinas --- arch/arm64/mm/init.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 35f27b839101..d933589c48e8 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -91,7 +91,7 @@ static void __init reserve_crashkernel(void) if (crash_base == 0) { /* Current arm64 boot protocol requires 2MB alignment */ - crash_base = memblock_find_in_range(0, ARCH_LOW_ADDRESS_LIMIT, + crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit, crash_size, SZ_2M); if (crash_base == 0) { pr_warn("cannot allocate crashkernel (size:0x%llx)\n", @@ -459,7 +459,7 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; - dma_contiguous_reserve(arm64_dma_phys_limit ? : arm64_dma32_phys_limit); + dma_contiguous_reserve(arm64_dma32_phys_limit); } void __init bootmem_init(void) From 8703317ae576c9bf3e07e5b97275e3f957e8d74b Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 7 Nov 2019 15:56:04 +0800 Subject: [PATCH 75/93] drivers/perf: hisi: update the sccl_id/ccl_id for certain HiSilicon platform For some HiSilicon platform, the originally designed SCCL_ID and CCL_ID are not satisfied with much rich topology when the MT is set, so we extend the SCCL_ID to MPIDR[aff3] and CCL_ID to MPIDR[aff2]. Let's update this for HiSilicon uncore PMU driver. Cc: John Garry Cc: Hanjun Guo Cc: Mark Rutland Cc: Will Deacon Signed-off-by: Shaokun Zhang Signed-off-by: Will Deacon --- drivers/perf/hisilicon/hisi_uncore_pmu.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/drivers/perf/hisilicon/hisi_uncore_pmu.c b/drivers/perf/hisilicon/hisi_uncore_pmu.c index 79f76f8dda8e..96183e31b96a 100644 --- a/drivers/perf/hisilicon/hisi_uncore_pmu.c +++ b/drivers/perf/hisilicon/hisi_uncore_pmu.c @@ -15,6 +15,7 @@ #include #include +#include #include #include "hisi_uncore_pmu.h" @@ -338,8 +339,10 @@ void hisi_uncore_pmu_disable(struct pmu *pmu) /* * Read Super CPU cluster and CPU cluster ID from MPIDR_EL1. - * If multi-threading is supported, CCL_ID is the low 3-bits in MPIDR[Aff2] - * and SCCL_ID is the upper 5-bits of Aff2 field; if not, SCCL_ID + * If multi-threading is supported, On Huawei Kunpeng 920 SoC whose cpu + * core is tsv110, CCL_ID is the low 3-bits in MPIDR[Aff2] and SCCL_ID + * is the upper 5-bits of Aff2 field; while for other cpu types, SCCL_ID + * is in MPIDR[Aff3] and CCL_ID is in MPIDR[Aff2], if not, SCCL_ID * is in MPIDR[Aff2] and CCL_ID is in MPIDR[Aff1]. */ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) @@ -347,12 +350,19 @@ static void hisi_read_sccl_and_ccl_id(int *sccl_id, int *ccl_id) u64 mpidr = read_cpuid_mpidr(); if (mpidr & MPIDR_MT_BITMASK) { - int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); + if (read_cpuid_part_number() == HISI_CPU_PART_TSV110) { + int aff2 = MPIDR_AFFINITY_LEVEL(mpidr, 2); - if (sccl_id) - *sccl_id = aff2 >> 3; - if (ccl_id) - *ccl_id = aff2 & 0x7; + if (sccl_id) + *sccl_id = aff2 >> 3; + if (ccl_id) + *ccl_id = aff2 & 0x7; + } else { + if (sccl_id) + *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 3); + if (ccl_id) + *ccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + } } else { if (sccl_id) *sccl_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); From 313a4db7f3387608a3ab4531ac8c0509a3d7617f Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:06 +0100 Subject: [PATCH 76/93] kselftest: arm64: extend toplevel skeleton Makefile Modify KSFT arm64 toplevel Makefile to maintain arm64 kselftests organized by subsystem, keeping them into distinct subdirectories under arm64 custom KSFT directory: tools/testing/selftests/arm64/ Add to such toplevel Makefile a mechanism to guess the effective location of Kernel headers as installed by KSFT framework. Fit existing arm64 tags kselftest into this new schema moving them into their own subdirectory (arm64/tags). Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- tools/testing/selftests/Makefile | 1 + tools/testing/selftests/arm64/Makefile | 64 +++++++++++++++++-- tools/testing/selftests/arm64/README | 25 ++++++++ .../selftests/arm64/{ => tags}/.gitignore | 0 tools/testing/selftests/arm64/tags/Makefile | 7 ++ .../arm64/{ => tags}/run_tags_test.sh | 0 .../selftests/arm64/{ => tags}/tags_test.c | 0 7 files changed, 92 insertions(+), 5 deletions(-) create mode 100644 tools/testing/selftests/arm64/README rename tools/testing/selftests/arm64/{ => tags}/.gitignore (100%) create mode 100644 tools/testing/selftests/arm64/tags/Makefile rename tools/testing/selftests/arm64/{ => tags}/run_tags_test.sh (100%) rename tools/testing/selftests/arm64/{ => tags}/tags_test.c (100%) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 4cdbae6f4e61..a740198e07d7 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -1,5 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 TARGETS = android +TARGETS += arm64 TARGETS += bpf TARGETS += breakpoints TARGETS += capabilities diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index f9f79fb272f0..cd27ca689224 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -1,12 +1,66 @@ # SPDX-License-Identifier: GPL-2.0 -# ARCH can be overridden by the user for cross compiling +# When ARCH not overridden for crosscompiling, lookup machine ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -CFLAGS += -I../../../../usr/include/ -TEST_GEN_PROGS := tags_test -TEST_PROGS := run_tags_test.sh +ARM64_SUBTARGETS ?= tags +else +ARM64_SUBTARGETS := endif -include ../lib.mk +CFLAGS := -Wall -O2 -g + +# A proper top_srcdir is needed by KSFT(lib.mk) +top_srcdir = $(realpath ../../../../) + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -I$(top_srcdir)/tools/testing/selftests/ + +# Guessing where the Kernel headers could have been installed +# depending on ENV config +ifeq ($(KBUILD_OUTPUT),) +khdr_dir = $(top_srcdir)/usr/include +else +# the KSFT preferred location when KBUILD_OUTPUT is set +khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include +endif + +CFLAGS += -I$(khdr_dir) + +export CFLAGS +export top_srcdir + +all: + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + mkdir -p $$BUILD_TARGET; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +install: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +run_tests: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +# Avoid any output on non arm64 on emit_tests +emit_tests: all + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +clean: + @for DIR in $(ARM64_SUBTARGETS); do \ + BUILD_TARGET=$(OUTPUT)/$$DIR; \ + make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \ + done + +.PHONY: all clean install run_tests emit_tests diff --git a/tools/testing/selftests/arm64/README b/tools/testing/selftests/arm64/README new file mode 100644 index 000000000000..a1badd882102 --- /dev/null +++ b/tools/testing/selftests/arm64/README @@ -0,0 +1,25 @@ +KSelfTest ARM64 +=============== + +- These tests are arm64 specific and so not built or run but just skipped + completely when env-variable ARCH is found to be different than 'arm64' + and `uname -m` reports other than 'aarch64'. + +- Holding true the above, ARM64 KSFT tests can be run within the KSelfTest + framework using standard Linux top-level-makefile targets: + + $ make TARGETS=arm64 kselftest-clean + $ make TARGETS=arm64 kselftest + + or + + $ make -C tools/testing/selftests TARGETS=arm64 \ + INSTALL_PATH= install + + or, alternatively, only specific arm64/ subtargets can be picked: + + $ make -C tools/testing/selftests TARGETS=arm64 ARM64_SUBTARGETS="tags signal" \ + INSTALL_PATH= install + + Further details on building and running KFST can be found in: + Documentation/dev-tools/kselftest.rst diff --git a/tools/testing/selftests/arm64/.gitignore b/tools/testing/selftests/arm64/tags/.gitignore similarity index 100% rename from tools/testing/selftests/arm64/.gitignore rename to tools/testing/selftests/arm64/tags/.gitignore diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile new file mode 100644 index 000000000000..41cb75070511 --- /dev/null +++ b/tools/testing/selftests/arm64/tags/Makefile @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS += -I../../../../../usr/include/ +TEST_GEN_PROGS := tags_test +TEST_PROGS := run_tags_test.sh + +include ../../lib.mk diff --git a/tools/testing/selftests/arm64/run_tags_test.sh b/tools/testing/selftests/arm64/tags/run_tags_test.sh similarity index 100% rename from tools/testing/selftests/arm64/run_tags_test.sh rename to tools/testing/selftests/arm64/tags/run_tags_test.sh diff --git a/tools/testing/selftests/arm64/tags_test.c b/tools/testing/selftests/arm64/tags/tags_test.c similarity index 100% rename from tools/testing/selftests/arm64/tags_test.c rename to tools/testing/selftests/arm64/tags/tags_test.c From f96bf43403165e4478942b0998931b14621ec207 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:07 +0100 Subject: [PATCH 77/93] kselftest: arm64: mangle_pstate_invalid_compat_toggle and common utils Add some arm64/signal specific boilerplate and utility code to help further testcases' development. Introduce also one simple testcase mangle_pstate_invalid_compat_toggle and some related helpers: it is a simple mangle testcase which messes with the ucontext_t from within the signal handler, trying to toggle PSTATE state bits to switch the system between 32bit/64bit execution state. Expects SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/Makefile | 2 +- .../testing/selftests/arm64/signal/.gitignore | 3 + tools/testing/selftests/arm64/signal/Makefile | 32 ++ tools/testing/selftests/arm64/signal/README | 59 ++++ .../selftests/arm64/signal/test_signals.c | 29 ++ .../selftests/arm64/signal/test_signals.h | 93 ++++++ .../arm64/signal/test_signals_utils.c | 283 ++++++++++++++++++ .../arm64/signal/test_signals_utils.h | 19 ++ .../mangle_pstate_invalid_compat_toggle.c | 31 ++ .../arm64/signal/testcases/testcases.c | 150 ++++++++++ .../arm64/signal/testcases/testcases.h | 100 +++++++ 11 files changed, 800 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/signal/.gitignore create mode 100644 tools/testing/selftests/arm64/signal/Makefile create mode 100644 tools/testing/selftests/arm64/signal/README create mode 100644 tools/testing/selftests/arm64/signal/test_signals.c create mode 100644 tools/testing/selftests/arm64/signal/test_signals.h create mode 100644 tools/testing/selftests/arm64/signal/test_signals_utils.c create mode 100644 tools/testing/selftests/arm64/signal/test_signals_utils.h create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/testcases.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/testcases.h diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile index cd27ca689224..93b567d23c8b 100644 --- a/tools/testing/selftests/arm64/Makefile +++ b/tools/testing/selftests/arm64/Makefile @@ -4,7 +4,7 @@ ARCH ?= $(shell uname -m 2>/dev/null || echo not) ifneq (,$(filter $(ARCH),aarch64 arm64)) -ARM64_SUBTARGETS ?= tags +ARM64_SUBTARGETS ?= tags signal else ARM64_SUBTARGETS := endif diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore new file mode 100644 index 000000000000..3c5b4e8ff894 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/.gitignore @@ -0,0 +1,3 @@ +mangle_* +fake_sigreturn_* +!*.[ch] diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile new file mode 100644 index 000000000000..f78f5190e3d4 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: GPL-2.0 +# Copyright (C) 2019 ARM Limited + +# Additional include paths needed by kselftest.h and local headers +CFLAGS += -D_GNU_SOURCE -std=gnu99 -I. + +SRCS := $(filter-out testcases/testcases.c,$(wildcard testcases/*.c)) +PROGS := $(patsubst %.c,%,$(SRCS)) + +# Generated binaries to be installed by top KSFT script +TEST_GEN_PROGS := $(notdir $(PROGS)) + +# Get Kernel headers installed and use them. +KSFT_KHDR_INSTALL := 1 + +# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list +# to account for any OUTPUT target-dirs optionally provided by +# the toplevel makefile +include ../../lib.mk + +$(TEST_GEN_PROGS): $(PROGS) + cp $(PROGS) $(OUTPUT)/ + +clean: + $(CLEAN) + rm -f $(PROGS) + +# Common test-unit targets to build common-layout test-cases executables +# Needs secondary expansion to properly include the testcase c-file in pre-reqs +.SECONDEXPANSION: +$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c $$@.c test_signals.h test_signals_utils.h testcases/testcases.h + $(CC) $(CFLAGS) $^ -o $@ diff --git a/tools/testing/selftests/arm64/signal/README b/tools/testing/selftests/arm64/signal/README new file mode 100644 index 000000000000..967a531b245c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/README @@ -0,0 +1,59 @@ +KSelfTest arm64/signal/ +======================= + +Signals Tests ++++++++++++++ + +- Tests are built around a common main compilation unit: such shared main + enforces a standard sequence of operations needed to perform a single + signal-test (setup/trigger/run/result/cleanup) + +- The above mentioned ops are configurable on a test-by-test basis: each test + is described (and configured) using the descriptor signals.h::struct tdescr + +- Each signal testcase is compiled into its own executable: a separate + executable is used for each test since many tests complete successfully + by receiving some kind of fatal signal from the Kernel, so it's safer + to run each test unit in its own standalone process, so as to start each + test from a clean slate. + +- New tests can be simply defined in testcases/ dir providing a proper struct + tdescr overriding all the defaults we wish to change (as of now providing a + custom run method is mandatory though) + +- Signals' test-cases hereafter defined belong currently to two + principal families: + + - 'mangle_' tests: a real signal (SIGUSR1) is raised and used as a trigger + and then the test case code modifies the signal frame from inside the + signal handler itself. + + - 'fake_sigreturn_' tests: a brand new custom artificial sigframe structure + is placed on the stack and a sigreturn syscall is called to simulate a + real signal return. This kind of tests does not use a trigger usually and + they are just fired using some simple included assembly trampoline code. + + - Most of these tests are successfully passing if the process gets killed by + some fatal signal: usually SIGSEGV or SIGBUS. Since while writing this + kind of tests it is extremely easy in fact to end-up injecting other + unrelated SEGV bugs in the testcases, it becomes extremely tricky to + be really sure that the tests are really addressing what they are meant + to address and they are not instead falling apart due to unplanned bugs + in the test code. + In order to alleviate the misery of the life of such test-developer, a few + helpers are provided: + + - a couple of ASSERT_BAD/GOOD_CONTEXT() macros to easily parse a ucontext_t + and verify if it is indeed GOOD or BAD (depending on what we were + expecting), using the same logic/perspective as in the arm64 Kernel signals + routines. + + - a sanity mechanism to be used in 'fake_sigreturn_'-alike tests: enabled by + default it takes care to verify that the test-execution had at least + successfully progressed up to the stage of triggering the fake sigreturn + call. + + In both cases test results are expected in terms of: + - some fatal signal sent by the Kernel to the test process + or + - analyzing some final regs state diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c new file mode 100644 index 000000000000..cb970346b280 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -0,0 +1,29 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Generic test wrapper for arm64 signal tests. + * + * Each test provides its own tde struct tdescr descriptor to link with + * this wrapper. Framework provides common helpers. + */ +#include + +#include "test_signals.h" +#include "test_signals_utils.h" + +struct tdescr *current; + +int main(int argc, char *argv[]) +{ + current = &tde; + + ksft_print_msg("%s :: %s\n", current->name, current->descr); + if (test_setup(current)) { + test_run(current); + test_result(current); + test_cleanup(current); + } + + return current->pass ? KSFT_PASS : KSFT_FAIL; +} diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h new file mode 100644 index 000000000000..d730e9041da9 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -0,0 +1,93 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#ifndef __TEST_SIGNALS_H__ +#define __TEST_SIGNALS_H__ + +#include +#include +#include + +/* + * Using ARCH specific and sanitized Kernel headers installed by KSFT + * framework since we asked for it by setting flag KSFT_KHDR_INSTALL + * in our Makefile. + */ +#include +#include + +#define __stringify_1(x...) #x +#define __stringify(x...) __stringify_1(x) + +#define get_regval(regname, out) \ +{ \ + asm volatile("mrs %0, " __stringify(regname) \ + : "=r" (out) \ + : \ + : "memory"); \ +} + +/* + * Feature flags used in tdescr.feats_required to specify + * any feature by the test + */ +enum { + FSSBS_BIT, + FMAX_END +}; + +#define FEAT_SSBS (1UL << FSSBS_BIT) + +/* + * A descriptor used to describe and configure a test case. + * Fields with a non-trivial meaning are described inline in the following. + */ +struct tdescr { + /* KEEP THIS FIELD FIRST for easier lookup from assembly */ + void *token; + /* when disabled token based sanity checking is skipped in handler */ + bool sanity_disabled; + /* just a name for the test-case; manadatory field */ + char *name; + char *descr; + unsigned long feats_required; + /* bitmask of effectively supported feats: populated at run-time */ + unsigned long feats_supported; + bool initialized; + unsigned int minsigstksz; + /* signum used as a test trigger. Zero if no trigger-signal is used */ + int sig_trig; + /* + * signum considered as a successful test completion. + * Zero when no signal is expected on success + */ + int sig_ok; + /* signum expected on unsupported CPU features. */ + int sig_unsupp; + /* a timeout in second for test completion */ + unsigned int timeout; + bool triggered; + bool pass; + /* optional sa_flags for the installed handler */ + int sa_flags; + ucontext_t saved_uc; + /* optional test private data */ + void *priv; + + /* a custom setup function to be called before test starts */ + int (*setup)(struct tdescr *td); + /* a custom cleanup function called before test exits */ + void (*cleanup)(struct tdescr *td); + /* an optional function to be used as a trigger for test starting */ + int (*trigger)(struct tdescr *td); + /* + * the actual test-core: invoked differently depending on the + * presence of the trigger function above; this is mandatory + */ + int (*run)(struct tdescr *td, siginfo_t *si, ucontext_t *uc); + /* an optional function for custom results' processing */ + void (*check_result)(struct tdescr *td); +}; + +extern struct tdescr tde; +#endif diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c new file mode 100644 index 000000000000..fbce41750590 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -0,0 +1,283 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 ARM Limited */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "test_signals.h" +#include "test_signals_utils.h" +#include "testcases/testcases.h" + +extern struct tdescr *current; + +static char const *const feats_names[FMAX_END] = { + " SSBS ", +}; + +#define MAX_FEATS_SZ 128 +static char feats_string[MAX_FEATS_SZ]; + +static inline char *feats_to_string(unsigned long feats) +{ + size_t flen = MAX_FEATS_SZ - 1; + + for (int i = 0; i < FMAX_END; i++) { + if (feats & (1UL << i)) { + size_t tlen = strlen(feats_names[i]); + + assert(flen > tlen); + flen -= tlen; + strncat(feats_string, feats_names[i], flen); + } + } + + return feats_string; +} + +static void unblock_signal(int signum) +{ + sigset_t sset; + + sigemptyset(&sset); + sigaddset(&sset, signum); + sigprocmask(SIG_UNBLOCK, &sset, NULL); +} + +static void default_result(struct tdescr *td, bool force_exit) +{ + if (td->pass) + fprintf(stderr, "==>> completed. PASS(1)\n"); + else + fprintf(stdout, "==>> completed. FAIL(0)\n"); + if (force_exit) + exit(td->pass ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* + * The following handle_signal_* helpers are used by main default_handler + * and are meant to return true when signal is handled successfully: + * when false is returned instead, it means that the signal was somehow + * unexpected in that context and it was NOT handled; default_handler will + * take care of such unexpected situations. + */ + +static bool handle_signal_unsupported(struct tdescr *td, + siginfo_t *si, void *uc) +{ + if (feats_ok(td)) + return false; + + /* Mangling PC to avoid loops on original SIGILL */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + + if (!td->initialized) { + fprintf(stderr, + "Got SIG_UNSUPP @test_init. Ignore.\n"); + } else { + fprintf(stderr, + "-- RX SIG_UNSUPP on unsupported feat...OK\n"); + td->pass = 1; + default_result(current, 1); + } + + return true; +} + +static bool handle_signal_trigger(struct tdescr *td, + siginfo_t *si, void *uc) +{ + td->triggered = 1; + /* ->run was asserted NON-NULL in test_setup() already */ + td->run(td, si, uc); + + return true; +} + +static bool handle_signal_ok(struct tdescr *td, + siginfo_t *si, void *uc) +{ + /* + * it's a bug in the test code when this assert fail: + * if sig_trig was defined, it must have been used before getting here. + */ + assert(!td->sig_trig || td->triggered); + fprintf(stderr, + "SIG_OK -- SP:0x%llX si_addr@:%p si_code:%d token@:%p offset:%ld\n", + ((ucontext_t *)uc)->uc_mcontext.sp, + si->si_addr, si->si_code, td->token, td->token - si->si_addr); + /* + * fake_sigreturn tests, which have sanity_enabled=1, set, at the very + * last time, the token field to the SP address used to place the fake + * sigframe: so token==0 means we never made it to the end, + * segfaulting well-before, and the test is possibly broken. + */ + if (!td->sanity_disabled && !td->token) { + fprintf(stdout, + "current->token ZEROED...test is probably broken!\n"); + abort(); + } + /* + * Trying to narrow down the SEGV to the ones generated by Kernel itself + * via arm64_notify_segfault(). This is a best-effort check anyway, and + * the si_code check may need to change if this aspect of the kernel + * ABI changes. + */ + if (td->sig_ok == SIGSEGV && si->si_code != SEGV_ACCERR) { + fprintf(stdout, + "si_code != SEGV_ACCERR...test is probably broken!\n"); + abort(); + } + td->pass = 1; + /* + * Some tests can lead to SEGV loops: in such a case we want to + * terminate immediately exiting straight away; some others are not + * supposed to outlive the signal handler code, due to the content of + * the fake sigframe which caused the signal itself. + */ + default_result(current, 1); + + return true; +} + +static void default_handler(int signum, siginfo_t *si, void *uc) +{ + if (current->sig_unsupp && signum == current->sig_unsupp && + handle_signal_unsupported(current, si, uc)) { + fprintf(stderr, "Handled SIG_UNSUPP\n"); + } else if (current->sig_trig && signum == current->sig_trig && + handle_signal_trigger(current, si, uc)) { + fprintf(stderr, "Handled SIG_TRIG\n"); + } else if (current->sig_ok && signum == current->sig_ok && + handle_signal_ok(current, si, uc)) { + fprintf(stderr, "Handled SIG_OK\n"); + } else { + if (signum == SIGALRM && current->timeout) { + fprintf(stderr, "-- Timeout !\n"); + } else { + fprintf(stderr, + "-- RX UNEXPECTED SIGNAL: %d\n", signum); + } + default_result(current, 1); + } +} + +static int default_setup(struct tdescr *td) +{ + struct sigaction sa; + + sa.sa_sigaction = default_handler; + sa.sa_flags = SA_SIGINFO | SA_RESTART; + sa.sa_flags |= td->sa_flags; + sigemptyset(&sa.sa_mask); + /* uncatchable signals naturally skipped ... */ + for (int sig = 1; sig < 32; sig++) + sigaction(sig, &sa, NULL); + /* + * RT Signals default disposition is Term but they cannot be + * generated by the Kernel in response to our tests; so just catch + * them all and report them as UNEXPECTED signals. + */ + for (int sig = SIGRTMIN; sig <= SIGRTMAX; sig++) + sigaction(sig, &sa, NULL); + + /* just in case...unblock explicitly all we need */ + if (td->sig_trig) + unblock_signal(td->sig_trig); + if (td->sig_ok) + unblock_signal(td->sig_ok); + if (td->sig_unsupp) + unblock_signal(td->sig_unsupp); + + if (td->timeout) { + unblock_signal(SIGALRM); + alarm(td->timeout); + } + fprintf(stderr, "Registered handlers for all signals.\n"); + + return 1; +} + +static inline int default_trigger(struct tdescr *td) +{ + return !raise(td->sig_trig); +} + +static int test_init(struct tdescr *td) +{ + td->minsigstksz = getauxval(AT_MINSIGSTKSZ); + if (!td->minsigstksz) + td->minsigstksz = MINSIGSTKSZ; + fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz); + + if (td->feats_required) { + td->feats_supported = 0; + /* + * Checking for CPU required features using both the + * auxval and the arm64 MRS Emulation to read sysregs. + */ + if (getauxval(AT_HWCAP) & HWCAP_SSBS) + td->feats_supported |= FEAT_SSBS; + if (feats_ok(td)) + fprintf(stderr, + "Required Features: [%s] supported\n", + feats_to_string(td->feats_required & + td->feats_supported)); + else + fprintf(stderr, + "Required Features: [%s] NOT supported\n", + feats_to_string(td->feats_required & + ~td->feats_supported)); + } + + td->initialized = 1; + return 1; +} + +int test_setup(struct tdescr *td) +{ + /* assert core invariants symptom of a rotten testcase */ + assert(current); + assert(td); + assert(td->name); + assert(td->run); + + if (!test_init(td)) + return 0; + + if (td->setup) + return td->setup(td); + else + return default_setup(td); +} + +int test_run(struct tdescr *td) +{ + if (td->sig_trig) { + if (td->trigger) + return td->trigger(td); + else + return default_trigger(td); + } else { + return td->run(td, NULL, NULL); + } +} + +void test_result(struct tdescr *td) +{ + if (td->check_result) + td->check_result(td); + default_result(td, 0); +} + +void test_cleanup(struct tdescr *td) +{ + if (td->cleanup) + td->cleanup(td); +} diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h new file mode 100644 index 000000000000..47a7592b7c53 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -0,0 +1,19 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#ifndef __TEST_SIGNALS_UTILS_H__ +#define __TEST_SIGNALS_UTILS_H__ + +#include "test_signals.h" + +int test_setup(struct tdescr *td); +void test_cleanup(struct tdescr *td); +int test_run(struct tdescr *td); +void test_result(struct tdescr *td); + +static inline bool feats_ok(struct tdescr *td) +{ + return (td->feats_required & td->feats_supported) == td->feats_required; +} + +#endif diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c new file mode 100644 index 000000000000..2cb118b0ba05 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_compat_toggle.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the execution state bit: this attempt must be spotted by Kernel and + * the test case is expected to be terminated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + /* This config should trigger a SIGSEGV by Kernel */ + uc->uc_mcontext.pstate ^= PSR_MODE32_BIT; + + return 1; +} + +struct tdescr tde = { + .sanity_disabled = true, + .name = "MANGLE_PSTATE_INVALID_STATE_TOGGLE", + .descr = "Mangling uc_mcontext with INVALID STATE_TOGGLE", + .sig_trig = SIGUSR1, + .sig_ok = SIGSEGV, + .run = mangle_invalid_pstate_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c new file mode 100644 index 000000000000..1914a01222a1 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019 ARM Limited */ +#include "testcases.h" + +struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, + size_t resv_sz, size_t *offset) +{ + size_t offs = 0; + struct _aarch64_ctx *found = NULL; + + if (!head || resv_sz < HDR_SZ) + return found; + + while (offs <= resv_sz - HDR_SZ && + head->magic != magic && head->magic) { + offs += head->size; + head = GET_RESV_NEXT_HEAD(head); + } + if (head->magic == magic) { + found = head; + if (offset) + *offset = offs; + } + + return found; +} + +bool validate_extra_context(struct extra_context *extra, char **err) +{ + struct _aarch64_ctx *term; + + if (!extra || !err) + return false; + + fprintf(stderr, "Validating EXTRA...\n"); + term = GET_RESV_NEXT_HEAD(extra); + if (!term || term->magic || term->size) { + *err = "Missing terminator after EXTRA context"; + return false; + } + if (extra->datap & 0x0fUL) + *err = "Extra DATAP misaligned"; + else if (extra->size & 0x0fUL) + *err = "Extra SIZE misaligned"; + else if (extra->datap != (uint64_t)term + sizeof(*term)) + *err = "Extra DATAP misplaced (not contiguos)"; + if (*err) + return false; + + return true; +} + +bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) +{ + bool terminated = false; + size_t offs = 0; + int flags = 0; + struct extra_context *extra = NULL; + struct _aarch64_ctx *head = + (struct _aarch64_ctx *)uc->uc_mcontext.__reserved; + + if (!err) + return false; + /* Walk till the end terminator verifying __reserved contents */ + while (head && !terminated && offs < resv_sz) { + if ((uint64_t)head & 0x0fUL) { + *err = "Misaligned HEAD"; + return false; + } + + switch (head->magic) { + case 0: + if (head->size) + *err = "Bad size for terminator"; + else + terminated = true; + break; + case FPSIMD_MAGIC: + if (flags & FPSIMD_CTX) + *err = "Multiple FPSIMD_MAGIC"; + else if (head->size != + sizeof(struct fpsimd_context)) + *err = "Bad size for fpsimd_context"; + flags |= FPSIMD_CTX; + break; + case ESR_MAGIC: + if (head->size != sizeof(struct esr_context)) + *err = "Bad size for esr_context"; + break; + case SVE_MAGIC: + if (flags & SVE_CTX) + *err = "Multiple SVE_MAGIC"; + else if (head->size != + sizeof(struct sve_context)) + *err = "Bad size for sve_context"; + flags |= SVE_CTX; + break; + case EXTRA_MAGIC: + if (flags & EXTRA_CTX) + *err = "Multiple EXTRA_MAGIC"; + else if (head->size != + sizeof(struct extra_context)) + *err = "Bad size for extra_context"; + flags |= EXTRA_CTX; + extra = (struct extra_context *)head; + break; + case KSFT_BAD_MAGIC: + /* + * This is a BAD magic header defined + * artificially by a testcase and surely + * unknown to the Kernel parse_user_sigframe(). + * It MUST cause a Kernel induced SEGV + */ + *err = "BAD MAGIC !"; + break; + default: + /* + * A still unknown Magic: potentially freshly added + * to the Kernel code and still unknown to the + * tests. + */ + fprintf(stdout, + "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n", + head->magic); + break; + } + + if (*err) + return false; + + offs += head->size; + if (resv_sz < offs + sizeof(*head)) { + *err = "HEAD Overrun"; + return false; + } + + if (flags & EXTRA_CTX) + if (!validate_extra_context(extra, err)) + return false; + + head = GET_RESV_NEXT_HEAD(head); + } + + if (terminated && !(flags & FPSIMD_CTX)) { + *err = "Missing FPSIMD"; + return false; + } + + return true; +} diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h new file mode 100644 index 000000000000..04987f7870bc --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -0,0 +1,100 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ +#ifndef __TESTCASES_H__ +#define __TESTCASES_H__ + +#include +#include +#include +#include +#include +#include +#include + +/* Architecture specific sigframe definitions */ +#include + +#define FPSIMD_CTX (1 << 0) +#define SVE_CTX (1 << 1) +#define EXTRA_CTX (1 << 2) + +#define KSFT_BAD_MAGIC 0xdeadbeef + +#define HDR_SZ \ + sizeof(struct _aarch64_ctx) + +#define GET_SF_RESV_HEAD(sf) \ + (struct _aarch64_ctx *)(&(sf).uc.uc_mcontext.__reserved) + +#define GET_SF_RESV_SIZE(sf) \ + sizeof((sf).uc.uc_mcontext.__reserved) + +#define GET_UCP_RESV_SIZE(ucp) \ + sizeof((ucp)->uc_mcontext.__reserved) + +#define ASSERT_BAD_CONTEXT(uc) do { \ + char *err = NULL; \ + if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \ + if (err) \ + fprintf(stderr, \ + "Using badly built context - ERR: %s\n",\ + err); \ + } else { \ + abort(); \ + } \ +} while (0) + +#define ASSERT_GOOD_CONTEXT(uc) do { \ + char *err = NULL; \ + if (!validate_reserved((uc), GET_UCP_RESV_SIZE((uc)), &err)) { \ + if (err) \ + fprintf(stderr, \ + "Detected BAD context - ERR: %s\n", err);\ + abort(); \ + } else { \ + fprintf(stderr, "uc context validated.\n"); \ + } \ +} while (0) + +/* + * A simple record-walker for __reserved area: it walks through assuming + * only to find a proper struct __aarch64_ctx header descriptor. + * + * Instead it makes no assumptions on the content and ordering of the + * records, any needed bounds checking must be enforced by the caller + * if wanted: this way can be used by caller on any maliciously built bad + * contexts. + * + * head->size accounts both for payload and header _aarch64_ctx size ! + */ +#define GET_RESV_NEXT_HEAD(h) \ + (struct _aarch64_ctx *)((char *)(h) + (h)->size) + +struct fake_sigframe { + siginfo_t info; + ucontext_t uc; +}; + + +bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err); + +bool validate_extra_context(struct extra_context *extra, char **err); + +struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic, + size_t resv_sz, size_t *offset); + +static inline struct _aarch64_ctx *get_terminator(struct _aarch64_ctx *head, + size_t resv_sz, + size_t *offset) +{ + return get_header(head, 0, resv_sz, offset); +} + +static inline void write_terminator_record(struct _aarch64_ctx *tail) +{ + if (tail) { + tail->magic = 0; + tail->size = 0; + } +} +#endif From 0fc89f08df8cf9878eb0a1a957f5948f831fbb8c Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:08 +0100 Subject: [PATCH 78/93] kselftest: arm64: mangle_pstate_invalid_daif_bits Add a simple mangle testcase which messes with the ucontext_t from within the signal handler, trying to set PSTATE DAIF bits to an invalid value (masking everything). Expects SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../mangle_pstate_invalid_daif_bits.c | 35 +++++++++++++++++++ 1 file changed, 35 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c new file mode 100644 index 000000000000..434b82597007 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_daif_bits.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, mangling the + * DAIF bits in an illegal manner: this attempt must be spotted by Kernel + * and the test case is expected to be terminated via SEGV. + * + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, + ucontext_t *uc) +{ + ASSERT_GOOD_CONTEXT(uc); + + /* + * This config should trigger a SIGSEGV by Kernel when it checks + * the sigframe consistency in valid_user_regs() routine. + */ + uc->uc_mcontext.pstate |= PSR_D_BIT | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT; + + return 1; +} + +struct tdescr tde = { + .sanity_disabled = true, + .name = "MANGLE_PSTATE_INVALID_DAIF_BITS", + .descr = "Mangling uc_mcontext with INVALID DAIF_BITS", + .sig_trig = SIGUSR1, + .sig_ok = SIGSEGV, + .run = mangle_invalid_pstate_run, +}; From c2820987047c08d813396b2b1d8ac2894ba0bd5f Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:09 +0100 Subject: [PATCH 79/93] kselftest: arm64: mangle_pstate_invalid_mode_el[123][ht] Add 6 simple mangle testcases that mess with the ucontext_t from within the signal handler, trying to toggle PSTATE mode bits to trick the system into switching to EL1/EL2/EL3 using both SP_EL0(t) and SP_ELx(h). Expects SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../mangle_pstate_invalid_mode_el1h.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el1t.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el2h.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el2t.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el3h.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_el3t.c | 15 ++++++++++ .../mangle_pstate_invalid_mode_template.h | 28 +++++++++++++++++++ 7 files changed, 118 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c create mode 100644 tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c new file mode 100644 index 000000000000..95f821abdf46 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c new file mode 100644 index 000000000000..cc222d8a618a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el1t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(1t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c new file mode 100644 index 000000000000..2188add7d28c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c new file mode 100644 index 000000000000..df32dd5a479c --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el2t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(2t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c new file mode 100644 index 000000000000..9e6829b7e5db --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3h.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3h); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c new file mode 100644 index 000000000000..5685a4f10d06 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_el3t.c @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Try to mangle the ucontext from inside a signal handler, toggling + * the mode bit to escalate exception level: this attempt must be spotted + * by Kernel and the test case is expected to be termninated via SEGV. + */ + +#include "test_signals_utils.h" +#include "testcases.h" + +#include "mangle_pstate_invalid_mode_template.h" + +DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(3t); diff --git a/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h new file mode 100644 index 000000000000..f5bf1804d858 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/mangle_pstate_invalid_mode_template.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Copyright (C) 2019 ARM Limited + * + * Utility macro to ease definition of testcases toggling mode EL + */ + +#define DEFINE_TESTCASE_MANGLE_PSTATE_INVALID_MODE(_mode) \ + \ +static int mangle_invalid_pstate_run(struct tdescr *td, siginfo_t *si, \ + ucontext_t *uc) \ +{ \ + ASSERT_GOOD_CONTEXT(uc); \ + \ + uc->uc_mcontext.pstate &= ~PSR_MODE_MASK; \ + uc->uc_mcontext.pstate |= PSR_MODE_EL ## _mode; \ + \ + return 1; \ +} \ + \ +struct tdescr tde = { \ + .sanity_disabled = true, \ + .name = "MANGLE_PSTATE_INVALID_MODE_EL"#_mode, \ + .descr = "Mangling uc_mcontext INVALID MODE EL"#_mode, \ + .sig_trig = SIGUSR1, \ + .sig_ok = SIGSEGV, \ + .run = mangle_invalid_pstate_run, \ +} From 837387a2cbc719667822856beabac127921a36c4 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:10 +0100 Subject: [PATCH 80/93] kselftest: arm64: extend test_init functionalities Extend signal testing framework to allow the definition of a custom per test initialization function to be run at the end of the common test_init after test setup phase has completed and before test-run routine. This custom per-test initialization function also enables the test writer to decide on its own when forcibly skip the test itself using standard KSFT mechanism. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../selftests/arm64/signal/test_signals.c | 6 ++-- .../selftests/arm64/signal/test_signals.h | 7 +++-- .../arm64/signal/test_signals_utils.c | 30 ++++++++++++++----- .../arm64/signal/test_signals_utils.h | 1 + 4 files changed, 31 insertions(+), 13 deletions(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c index cb970346b280..416b1ff43199 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.c +++ b/tools/testing/selftests/arm64/signal/test_signals.c @@ -19,11 +19,11 @@ int main(int argc, char *argv[]) current = &tde; ksft_print_msg("%s :: %s\n", current->name, current->descr); - if (test_setup(current)) { + if (test_setup(current) && test_init(current)) { test_run(current); - test_result(current); test_cleanup(current); } + test_result(current); - return current->pass ? KSFT_PASS : KSFT_FAIL; + return current->result; } diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index d730e9041da9..c431e7b3e46c 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -68,17 +68,20 @@ struct tdescr { unsigned int timeout; bool triggered; bool pass; + unsigned int result; /* optional sa_flags for the installed handler */ int sa_flags; ucontext_t saved_uc; /* optional test private data */ void *priv; - /* a custom setup function to be called before test starts */ + /* a custom setup: called alternatively to default_setup */ int (*setup)(struct tdescr *td); + /* a custom init: called by default test init after test_setup */ + bool (*init)(struct tdescr *td); /* a custom cleanup function called before test exits */ void (*cleanup)(struct tdescr *td); - /* an optional function to be used as a trigger for test starting */ + /* an optional function to be used as a trigger for starting test */ int (*trigger)(struct tdescr *td); /* * the actual test-core: invoked differently depending on the diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index fbce41750590..76eaa505a789 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -11,6 +11,8 @@ #include #include +#include + #include "test_signals.h" #include "test_signals_utils.h" #include "testcases/testcases.h" @@ -52,12 +54,18 @@ static void unblock_signal(int signum) static void default_result(struct tdescr *td, bool force_exit) { - if (td->pass) + if (td->result == KSFT_SKIP) { + fprintf(stderr, "==>> completed. SKIP.\n"); + } else if (td->pass) { fprintf(stderr, "==>> completed. PASS(1)\n"); - else + td->result = KSFT_PASS; + } else { fprintf(stdout, "==>> completed. FAIL(0)\n"); + td->result = KSFT_FAIL; + } + if (force_exit) - exit(td->pass ? EXIT_SUCCESS : EXIT_FAILURE); + exit(td->result); } /* @@ -209,7 +217,7 @@ static inline int default_trigger(struct tdescr *td) return !raise(td->sig_trig); } -static int test_init(struct tdescr *td) +int test_init(struct tdescr *td) { td->minsigstksz = getauxval(AT_MINSIGSTKSZ); if (!td->minsigstksz) @@ -236,7 +244,14 @@ static int test_init(struct tdescr *td) ~td->feats_supported)); } + /* Perform test specific additional initialization */ + if (td->init && !td->init(td)) { + fprintf(stderr, "FAILED Testcase initialization.\n"); + return 0; + } td->initialized = 1; + fprintf(stderr, "Testcase initialized.\n"); + return 1; } @@ -248,9 +263,8 @@ int test_setup(struct tdescr *td) assert(td->name); assert(td->run); - if (!test_init(td)) - return 0; - + /* Default result is FAIL if test setup fails */ + td->result = KSFT_FAIL; if (td->setup) return td->setup(td); else @@ -271,7 +285,7 @@ int test_run(struct tdescr *td) void test_result(struct tdescr *td) { - if (td->check_result) + if (td->initialized && td->result != KSFT_SKIP && td->check_result) td->check_result(td); default_result(td, 0); } diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 47a7592b7c53..5e3a2b7aaa8b 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -6,6 +6,7 @@ #include "test_signals.h" +int test_init(struct tdescr *td); int test_setup(struct tdescr *td); void test_cleanup(struct tdescr *td); int test_run(struct tdescr *td); From 34306b05d3106447c87d29d262e824ca4a30c569 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:11 +0100 Subject: [PATCH 81/93] kselftest: arm64: add helper get_current_context Introduce a new common utility function get_current_context() which can be used to grab a ucontext without the help of libc, and also to detect if such ucontext has been successfully used by placing it on the stack as a fake sigframe. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../selftests/arm64/signal/test_signals.h | 6 +- .../arm64/signal/test_signals_utils.c | 31 ++++++ .../arm64/signal/test_signals_utils.h | 98 +++++++++++++++++++ 3 files changed, 134 insertions(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h index c431e7b3e46c..f96baf1cef1a 100644 --- a/tools/testing/selftests/arm64/signal/test_signals.h +++ b/tools/testing/selftests/arm64/signal/test_signals.h @@ -72,8 +72,12 @@ struct tdescr { /* optional sa_flags for the installed handler */ int sa_flags; ucontext_t saved_uc; + /* used by get_current_ctx() */ + size_t live_sz; + ucontext_t *live_uc; + volatile sig_atomic_t live_uc_valid; /* optional test private data */ - void *priv; + void *priv; /* a custom setup: called alternatively to default_setup */ int (*setup)(struct tdescr *td); diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c index 76eaa505a789..2de6e5ed5e25 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.c +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c @@ -11,14 +11,19 @@ #include #include +#include + #include #include "test_signals.h" #include "test_signals_utils.h" #include "testcases/testcases.h" + extern struct tdescr *current; +static int sig_copyctx = SIGTRAP; + static char const *const feats_names[FMAX_END] = { " SSBS ", }; @@ -154,6 +159,20 @@ static bool handle_signal_ok(struct tdescr *td, return true; } +static bool handle_signal_copyctx(struct tdescr *td, + siginfo_t *si, void *uc) +{ + /* Mangling PC to avoid loops on original BRK instr */ + ((ucontext_t *)uc)->uc_mcontext.pc += 4; + memcpy(td->live_uc, uc, td->live_sz); + ASSERT_GOOD_CONTEXT(td->live_uc); + td->live_uc_valid = 1; + fprintf(stderr, + "GOOD CONTEXT grabbed from sig_copyctx handler\n"); + + return true; +} + static void default_handler(int signum, siginfo_t *si, void *uc) { if (current->sig_unsupp && signum == current->sig_unsupp && @@ -165,6 +184,9 @@ static void default_handler(int signum, siginfo_t *si, void *uc) } else if (current->sig_ok && signum == current->sig_ok && handle_signal_ok(current, si, uc)) { fprintf(stderr, "Handled SIG_OK\n"); + } else if (signum == sig_copyctx && current->live_uc && + handle_signal_copyctx(current, si, uc)) { + fprintf(stderr, "Handled SIG_COPYCTX\n"); } else { if (signum == SIGALRM && current->timeout) { fprintf(stderr, "-- Timeout !\n"); @@ -219,6 +241,15 @@ static inline int default_trigger(struct tdescr *td) int test_init(struct tdescr *td) { + if (td->sig_trig == sig_copyctx) { + fprintf(stdout, + "Signal %d is RESERVED, cannot be used as a trigger. Aborting\n", + sig_copyctx); + return 0; + } + /* just in case */ + unblock_signal(sig_copyctx); + td->minsigstksz = getauxval(AT_MINSIGSTKSZ); if (!td->minsigstksz) td->minsigstksz = MINSIGSTKSZ; diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index 5e3a2b7aaa8b..fd67b1f23c41 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -4,6 +4,10 @@ #ifndef __TEST_SIGNALS_UTILS_H__ #define __TEST_SIGNALS_UTILS_H__ +#include +#include +#include + #include "test_signals.h" int test_init(struct tdescr *td); @@ -17,4 +21,98 @@ static inline bool feats_ok(struct tdescr *td) return (td->feats_required & td->feats_supported) == td->feats_required; } +/* + * Obtaining a valid and full-blown ucontext_t from userspace is tricky: + * libc getcontext does() not save all the regs and messes with some of + * them (pstate value in particular is not reliable). + * + * Here we use a service signal to grab the ucontext_t from inside a + * dedicated signal handler, since there, it is populated by Kernel + * itself in setup_sigframe(). The grabbed context is then stored and + * made available in td->live_uc. + * + * As service-signal is used a SIGTRAP induced by a 'brk' instruction, + * because here we have to avoid syscalls to trigger the signal since + * they would cause any SVE sigframe content (if any) to be removed. + * + * Anyway this function really serves a dual purpose: + * + * 1. grab a valid sigcontext into td->live_uc for result analysis: in + * such case it returns 1. + * + * 2. detect if, somehow, a previously grabbed live_uc context has been + * used actively with a sigreturn: in such a case the execution would have + * magically resumed in the middle of this function itself (seen_already==1): + * in such a case return 0, since in fact we have not just simply grabbed + * the context. + * + * This latter case is useful to detect when a fake_sigreturn test-case has + * unexpectedly survived without hitting a SEGV. + * + * Note that the case of runtime dynamically sized sigframes (like in SVE + * context) is still NOT addressed: sigframe size is supposed to be fixed + * at sizeof(ucontext_t). + */ +static __always_inline bool get_current_context(struct tdescr *td, + ucontext_t *dest_uc) +{ + static volatile bool seen_already; + + assert(td && dest_uc); + /* it's a genuine invocation..reinit */ + seen_already = 0; + td->live_uc_valid = 0; + td->live_sz = sizeof(*dest_uc); + memset(dest_uc, 0x00, td->live_sz); + td->live_uc = dest_uc; + /* + * Grab ucontext_t triggering a SIGTRAP. + * + * Note that: + * - live_uc_valid is declared volatile sig_atomic_t in + * struct tdescr since it will be changed inside the + * sig_copyctx handler + * - the additional 'memory' clobber is there to avoid possible + * compiler's assumption on live_uc_valid and the content + * pointed by dest_uc, which are all changed inside the signal + * handler + * - BRK causes a debug exception which is handled by the Kernel + * and finally causes the SIGTRAP signal to be delivered to this + * test thread. Since such delivery happens on the ret_to_user() + * /do_notify_resume() debug exception return-path, we are sure + * that the registered SIGTRAP handler has been run to completion + * before the execution path is restored here: as a consequence + * we can be sure that the volatile sig_atomic_t live_uc_valid + * carries a meaningful result. Being in a single thread context + * we'll also be sure that any access to memory modified by the + * handler (namely ucontext_t) will be visible once returned. + * - note that since we are using a breakpoint instruction here + * to cause a SIGTRAP, the ucontext_t grabbed from the signal + * handler would naturally contain a PC pointing exactly to this + * BRK line, which means that, on return from the signal handler, + * or if we place the ucontext_t on the stack to fake a sigreturn, + * we'll end up in an infinite loop of BRK-SIGTRAP-handler. + * For this reason we take care to artificially move forward the + * PC to the next instruction while inside the signal handler. + */ + asm volatile ("brk #666" + : "+m" (*dest_uc) + : + : "memory"); + + /* + * If we get here with seen_already==1 it implies the td->live_uc + * context has been used to get back here....this probably means + * a test has failed to cause a SEGV...anyway live_uc does not + * point to a just acquired copy of ucontext_t...so return 0 + */ + if (seen_already) { + fprintf(stdout, + "Unexpected successful sigreturn detected: live_uc is stale !\n"); + return 0; + } + seen_already = 1; + + return td->live_uc_valid; +} #endif From 6c2aa4284513585e9cc0c25b125ab4d57ef4ce76 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:12 +0100 Subject: [PATCH 82/93] kselftest: arm64: fake_sigreturn_bad_magic Add a simple fake_sigreturn testcase which builds a ucontext_t with a bad magic header and place it onto the stack. Expects a SIGSEGV on test PASS. Introduce a common utility assembly trampoline function to invoke a sigreturn while placing the provided sigframe at wanted alignment and also an helper to make space when needed inside the sigframe reserved area. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/Makefile | 2 +- .../testing/selftests/arm64/signal/signals.S | 64 +++++++++++++++++++ .../arm64/signal/test_signals_utils.h | 2 + .../testcases/fake_sigreturn_bad_magic.c | 52 +++++++++++++++ .../arm64/signal/testcases/testcases.c | 46 +++++++++++++ .../arm64/signal/testcases/testcases.h | 4 ++ 6 files changed, 169 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/arm64/signal/signals.S create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile index f78f5190e3d4..b497cfea4643 100644 --- a/tools/testing/selftests/arm64/signal/Makefile +++ b/tools/testing/selftests/arm64/signal/Makefile @@ -28,5 +28,5 @@ clean: # Common test-unit targets to build common-layout test-cases executables # Needs secondary expansion to properly include the testcase c-file in pre-reqs .SECONDEXPANSION: -$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c $$@.c test_signals.h test_signals_utils.h testcases/testcases.h +$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c signals.S $$@.c test_signals.h test_signals_utils.h testcases/testcases.h $(CC) $(CFLAGS) $^ -o $@ diff --git a/tools/testing/selftests/arm64/signal/signals.S b/tools/testing/selftests/arm64/signal/signals.S new file mode 100644 index 000000000000..9f8c1aefc3b9 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/signals.S @@ -0,0 +1,64 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019 ARM Limited */ + +#include + +.section .rodata, "a" +call_fmt: + .asciz "Calling sigreturn with fake sigframe sized:%zd at SP @%08lX\n" + +.text + +.globl fake_sigreturn + +/* fake_sigreturn x0:&sigframe, x1:sigframe_size, x2:misalign_bytes */ +fake_sigreturn: + stp x29, x30, [sp, #-16]! + mov x29, sp + + mov x20, x0 + mov x21, x1 + mov x22, x2 + + /* create space on the stack for fake sigframe 16 bytes-aligned */ + add x0, x21, x22 + add x0, x0, #15 + bic x0, x0, #15 /* round_up(sigframe_size + misalign_bytes, 16) */ + sub sp, sp, x0 + add x23, sp, x22 /* new sigframe base with misaligment if any */ + + ldr x0, =call_fmt + mov x1, x21 + mov x2, x23 + bl printf + + /* memcpy the provided content, while still keeping SP aligned */ + mov x0, x23 + mov x1, x20 + mov x2, x21 + bl memcpy + + /* + * Here saving a last minute SP to current->token acts as a marker: + * if we got here, we are successfully faking a sigreturn; in other + * words we are sure no bad fatal signal has been raised till now + * for unrelated reasons, so we should consider the possibly observed + * fatal signal like SEGV coming from Kernel restore_sigframe() and + * triggered as expected from our test-case. + * For simplicity this assumes that current field 'token' is laid out + * as first in struct tdescr + */ + ldr x0, current + str x23, [x0] + /* finally move SP to misaligned address...if any requested */ + mov sp, x23 + + mov x8, #__NR_rt_sigreturn + svc #0 + + /* + * Above sigreturn should not return...looping here leads to a timeout + * and ensure proper and clean test failure, instead of jumping around + * on a potentially corrupted stack. + */ + b . diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h index fd67b1f23c41..6772b5c8d274 100644 --- a/tools/testing/selftests/arm64/signal/test_signals_utils.h +++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h @@ -115,4 +115,6 @@ static __always_inline bool get_current_context(struct tdescr *td, return td->live_uc_valid; } + +int fake_sigreturn(void *sigframe, size_t sz, int misalign_bytes); #endif diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c new file mode 100644 index 000000000000..8dc600a7d4fd --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a BAD Unknown magic + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_bad_magic_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */ + head = get_starting_head(shead, HDR_SZ * 2, GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + /* + * use a well known NON existent bad magic...something + * we should pretty sure won't be ever defined in Kernel + */ + head->magic = KSFT_BAD_MAGIC; + head->size = HDR_SZ; + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_MAGIC", + .descr = "Trigger a sigreturn with a sigframe with a bad magic", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_magic_run, +}; diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index 1914a01222a1..e3521949b800 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -148,3 +148,49 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err) return true; } + +/* + * This function walks through the records inside the provided reserved area + * trying to find enough space to fit @need_sz bytes: if not enough space is + * available and an extra_context record is present, it throws away the + * extra_context record. + * + * It returns a pointer to a new header where it is possible to start storing + * our need_sz bytes. + * + * @shead: points to the start of reserved area + * @need_sz: needed bytes + * @resv_sz: reserved area size in bytes + * @offset: if not null, this will be filled with the offset of the return + * head pointer from @shead + * + * @return: pointer to a new head where to start storing need_sz bytes, or + * NULL if space could not be made available. + */ +struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead, + size_t need_sz, size_t resv_sz, + size_t *offset) +{ + size_t offs = 0; + struct _aarch64_ctx *head; + + head = get_terminator(shead, resv_sz, &offs); + /* not found a terminator...no need to update offset if any */ + if (!head) + return head; + if (resv_sz - offs < need_sz) { + fprintf(stderr, "Low on space:%zd. Discarding extra_context.\n", + resv_sz - offs); + head = get_header(shead, EXTRA_MAGIC, resv_sz, &offs); + if (!head || resv_sz - offs < need_sz) { + fprintf(stderr, + "Failed to reclaim space on sigframe.\n"); + return NULL; + } + } + + fprintf(stderr, "Available space:%zd\n", resv_sz - offs); + if (offset) + *offset = offs; + return head; +} diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h index 04987f7870bc..ad884c135314 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.h +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h @@ -97,4 +97,8 @@ static inline void write_terminator_record(struct _aarch64_ctx *tail) tail->size = 0; } } + +struct _aarch64_ctx *get_starting_head(struct _aarch64_ctx *shead, + size_t need_sz, size_t resv_sz, + size_t *offset); #endif From 4c94a0ba02b20068a8e3f80b471dfcedb5c8800a Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:13 +0100 Subject: [PATCH 83/93] kselftest: arm64: fake_sigreturn_bad_size_for_magic0 Add a simple fake_sigreturn testcase which builds a ucontext_t with a badly sized terminator record and place it onto the stack. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../fake_sigreturn_bad_size_for_magic0.c | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c new file mode 100644 index 000000000000..a44b88bfc81a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a badly sized terminator + * record: on sigreturn Kernel must spot this attempt and the test case + * is expected to be terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* at least HDR_SZ for the badly sized terminator. */ + head = get_starting_head(shead, HDR_SZ, GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + head->magic = 0; + head->size = HDR_SZ; + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_SIZE_FOR_TERMINATOR", + .descr = "Trigger a sigreturn using non-zero size terminator", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_size_for_magic0_run, +}; From 8aa9d08fcb5368777f64d4a7c94899159249f0a0 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:14 +0100 Subject: [PATCH 84/93] kselftest: arm64: fake_sigreturn_missing_fpsimd Add a simple fake_sigreturn testcase which builds a ucontext_t without the required fpsimd_context and place it onto the stack. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../testcases/fake_sigreturn_missing_fpsimd.c | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c new file mode 100644 index 000000000000..08ecd8073a1a --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack missing the mandatory FPSIMD + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + size_t resv_sz, offset; + struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf); + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + head = get_header(head, FPSIMD_MAGIC, resv_sz, &offset); + if (head && resv_sz - offset >= HDR_SZ) { + fprintf(stderr, "Mangling template header. Spare space:%zd\n", + resv_sz - offset); + /* Just overwrite fpsmid_context */ + write_terminator_record(head); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + } + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_MISSING_FPSIMD", + .descr = "Triggers a sigreturn with a missing fpsimd_context", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_missing_fpsimd_run, +}; From 46185cd1241b46c94933752137cde18ecfaf3766 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:15 +0100 Subject: [PATCH 85/93] kselftest: arm64: fake_sigreturn_duplicated_fpsimd Add a simple fake_sigreturn testcase which builds a ucontext_t with an anomalous additional fpsimd_context and place it onto the stack. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../fake_sigreturn_duplicated_fpsimd.c | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c new file mode 100644 index 000000000000..afe8915f0998 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including an additional FPSIMD + * record: on sigreturn Kernel must spot this attempt and the test + * case is expected to be terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ, + GET_SF_RESV_SIZE(sf), NULL); + if (!head) + return 0; + + /* Add a spurious fpsimd_context */ + head->magic = FPSIMD_MAGIC; + head->size = sizeof(struct fpsimd_context); + /* and terminate */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_DUPLICATED_FPSIMD", + .descr = "Triggers a sigreturn including two fpsimd_context", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_duplicated_fpsimd_run, +}; From 49978aa8f079633192928cb966dcde39a3ff0747 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:16 +0100 Subject: [PATCH 86/93] kselftest: arm64: fake_sigreturn_bad_size Add a simple fake_sigreturn testcase which builds a ucontext_t with a badly sized header that causes a overrun in the __reserved area and place it onto the stack. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../testcases/fake_sigreturn_bad_size.c | 77 +++++++++++++++++++ 1 file changed, 77 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c new file mode 100644 index 000000000000..b3c362100666 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c @@ -0,0 +1,77 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack including a bad record overflowing + * the __reserved space: on sigreturn Kernel must spot this attempt and + * the test case is expected to be terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +#define MIN_SZ_ALIGN 16 + +static int fake_sigreturn_bad_size_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + size_t resv_sz, need_sz, offset; + struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head; + + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + resv_sz = GET_SF_RESV_SIZE(sf); + /* at least HDR_SZ + bad sized esr_context needed */ + need_sz = sizeof(struct esr_context) + HDR_SZ; + head = get_starting_head(shead, need_sz, resv_sz, &offset); + if (!head) + return 0; + + /* + * Use an esr_context to build a fake header with a + * size greater then the free __reserved area minus HDR_SZ; + * using ESR_MAGIC here since it is not checked for size nor + * is limited to one instance. + * + * At first inject an additional normal esr_context + */ + head->magic = ESR_MAGIC; + head->size = sizeof(struct esr_context); + /* and terminate properly */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + ASSERT_GOOD_CONTEXT(&sf.uc); + + /* + * now mess with fake esr_context size: leaving less space than + * needed while keeping size value 16-aligned + * + * It must trigger a SEGV from Kernel on: + * + * resv_sz - offset < sizeof(*head) + */ + /* at first set the maximum good 16-aligned size */ + head->size = (resv_sz - offset - need_sz + MIN_SZ_ALIGN) & ~0xfUL; + /* plus a bit more of 16-aligned sized stuff */ + head->size += MIN_SZ_ALIGN; + /* and terminate properly */ + write_terminator_record(GET_RESV_NEXT_HEAD(head)); + ASSERT_BAD_CONTEXT(&sf.uc); + fake_sigreturn(&sf, sizeof(sf), 0); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_BAD_SIZE", + .descr = "Triggers a sigreturn with a overrun __reserved area", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_bad_size_run, +}; From 3f484ce3750f7a29c3be806e891de99aa5c4ca43 Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Fri, 25 Oct 2019 18:57:17 +0100 Subject: [PATCH 87/93] kselftest: arm64: fake_sigreturn_misaligned_sp Add a simple fake_sigreturn testcase which places a valid sigframe on a non-16 bytes aligned SP. Expects a SIGSEGV on test PASS. Reviewed-by: Dave Martin Signed-off-by: Cristian Marussi Signed-off-by: Catalin Marinas --- .../testcases/fake_sigreturn_misaligned_sp.c | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c new file mode 100644 index 000000000000..1e089e66f9f3 --- /dev/null +++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2019 ARM Limited + * + * Place a fake sigframe on the stack at a misaligned SP: on sigreturn + * Kernel must spot this attempt and the test case is expected to be + * terminated via SEGV. + */ + +#include +#include + +#include "test_signals_utils.h" +#include "testcases.h" + +struct fake_sigframe sf; + +static int fake_sigreturn_misaligned_run(struct tdescr *td, + siginfo_t *si, ucontext_t *uc) +{ + /* just to fill the ucontext_t with something real */ + if (!get_current_context(td, &sf.uc)) + return 1; + + /* Forcing sigframe on misaligned SP (16 + 3) */ + fake_sigreturn(&sf, sizeof(sf), 3); + + return 1; +} + +struct tdescr tde = { + .name = "FAKE_SIGRETURN_MISALIGNED_SP", + .descr = "Triggers a sigreturn with a misaligned sigframe", + .sig_ok = SIGSEGV, + .timeout = 3, + .run = fake_sigreturn_misaligned_run, +}; From 294a9ddde6cdbf931a28b8c8c928d3f799b61cb5 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Nov 2019 17:12:43 +0000 Subject: [PATCH 88/93] arm64: kaslr: Announce KASLR status on boot Currently the KASLR code is silent at boot unless it forces on KPTI in which case a message will be printed for that. This can lead to users incorrectly believing their system has the feature enabled when it in fact does not, and if they notice the problem the lack of any diagnostics makes it harder to understand the problem. Add an initcall which prints a message showing the status of KASLR during boot to make the status clear. This is particularly useful in cases where we don't have a seed. It seems to be a relatively common error for system integrators and administrators to enable KASLR in their configuration but not provide the seed at runtime, often due to seed provisioning breaking at some later point after it is initially enabled and verified. Signed-off-by: Mark Brown Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/kaslr.c | 41 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 416f537bf614..0039dc50e556 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -19,6 +19,14 @@ #include #include +enum kaslr_status { + KASLR_ENABLED, + KASLR_DISABLED_CMDLINE, + KASLR_DISABLED_NO_SEED, + KASLR_DISABLED_FDT_REMAP, +}; + +enum kaslr_status __ro_after_init kaslr_status; u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; @@ -91,15 +99,19 @@ u64 __init kaslr_early_init(u64 dt_phys) */ early_fixmap_init(); fdt = fixmap_remap_fdt(dt_phys, &size, PAGE_KERNEL); - if (!fdt) + if (!fdt) { + kaslr_status = KASLR_DISABLED_FDT_REMAP; return 0; + } /* * Retrieve (and wipe) the seed from the FDT */ seed = get_kaslr_seed(fdt); - if (!seed) + if (!seed) { + kaslr_status = KASLR_DISABLED_NO_SEED; return 0; + } /* * Check if 'nokaslr' appears on the command line, and @@ -107,8 +119,10 @@ u64 __init kaslr_early_init(u64 dt_phys) */ cmdline = kaslr_get_cmdline(fdt); str = strstr(cmdline, "nokaslr"); - if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) + if (str == cmdline || (str > cmdline && *(str - 1) == ' ')) { + kaslr_status = KASLR_DISABLED_CMDLINE; return 0; + } /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable @@ -170,3 +184,24 @@ u64 __init kaslr_early_init(u64 dt_phys) return offset; } + +static int __init kaslr_init(void) +{ + switch (kaslr_status) { + case KASLR_ENABLED: + pr_info("KASLR enabled\n"); + break; + case KASLR_DISABLED_CMDLINE: + pr_info("KASLR disabled on command line\n"); + break; + case KASLR_DISABLED_NO_SEED: + pr_warn("KASLR disabled due to lack of seed\n"); + break; + case KASLR_DISABLED_FDT_REMAP: + pr_warn("KASLR disabled due to FDT remapping failure\n"); + break; + } + + return 0; +} +core_initcall(kaslr_init) From 2203e1adb936a92ab2fd8f705e888af322462736 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 8 Nov 2019 17:12:44 +0000 Subject: [PATCH 89/93] arm64: kaslr: Check command line before looking for a seed Now that we print diagnostics at boot the reason why we do not initialise KASLR matters. Currently we check for a seed before we check if the user has explicitly disabled KASLR on the command line which will result in misleading diagnostics so reverse the order of those checks. We still parse the seed from the DT early so that if the user has both provided a seed and disabled KASLR on the command line we still mask the seed on the command line. Signed-off-by: Mark Brown Acked-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/kernel/kaslr.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/kaslr.c b/arch/arm64/kernel/kaslr.c index 0039dc50e556..2a11a962e571 100644 --- a/arch/arm64/kernel/kaslr.c +++ b/arch/arm64/kernel/kaslr.c @@ -26,7 +26,7 @@ enum kaslr_status { KASLR_DISABLED_FDT_REMAP, }; -enum kaslr_status __ro_after_init kaslr_status; +static enum kaslr_status __initdata kaslr_status; u64 __ro_after_init module_alloc_base; u16 __initdata memstart_offset_seed; @@ -108,10 +108,6 @@ u64 __init kaslr_early_init(u64 dt_phys) * Retrieve (and wipe) the seed from the FDT */ seed = get_kaslr_seed(fdt); - if (!seed) { - kaslr_status = KASLR_DISABLED_NO_SEED; - return 0; - } /* * Check if 'nokaslr' appears on the command line, and @@ -124,6 +120,11 @@ u64 __init kaslr_early_init(u64 dt_phys) return 0; } + if (!seed) { + kaslr_status = KASLR_DISABLED_NO_SEED; + return 0; + } + /* * OK, so we are proceeding with KASLR enabled. Calculate a suitable * kernel image offset from the seed. Let's place the kernel in the From ba603c2f0794a5c44516ad42f1064008048329ba Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 8 Nov 2019 14:46:54 +0000 Subject: [PATCH 90/93] MAINTAINERS: Add arm64 selftests to the ARM64 PORT entry Since these are tests specific to the arm64 architecture, it makes sense for the arm64 maintainers to gatekeep the corresponding changes. Cc: Shuah Khan Cc: Will Deacon Signed-off-by: Catalin Marinas --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index a69e6db80c79..553c64308060 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2609,6 +2609,7 @@ S: Maintained F: arch/arm64/ X: arch/arm64/boot/dts/ F: Documentation/arm64/ +F: tools/testing/selftests/arm64/ AS3645A LED FLASH CONTROLLER DRIVER M: Sakari Ailus From f70c08e46d55126efa6d23e149f8472c178686f2 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Mon, 11 Nov 2019 09:59:56 +0100 Subject: [PATCH 91/93] arm64: Kconfig: make CMDLINE_FORCE depend on CMDLINE When building allmodconfig KCONFIG_ALLCONFIG=$(pwd)/arch/arm64/configs/defconfig CONFIG_CMDLINE_FORCE gets enabled. Which forces the user to pass the full cmdline to CONFIG_CMDLINE="...". Rework so that CONFIG_CMDLINE_FORCE gets set only if CONFIG_CMDLINE is set to something except an empty string. Suggested-by: John Garry Acked-by: Will Deacon Signed-off-by: Anders Roxell Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3e53441ee067..8a0800e5be9d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1629,6 +1629,7 @@ config CMDLINE config CMDLINE_FORCE bool "Always use the default kernel command string" + depends on CMDLINE != "" help Always use the default kernel command string, even if the boot loader passes other arguments to the kernel. From 32667745cab91cda458fade64d591136dff1422b Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 11 Nov 2019 09:12:36 +0000 Subject: [PATCH 92/93] kselftest: arm64: fix spelling mistake "contiguos" -> "contiguous" There is a spelling mistake in an error message literal string. Fix it. Fixes: f96bf4340316 ("kselftest: arm64: mangle_pstate_invalid_compat_toggle and common utils") Signed-off-by: Colin Ian King Signed-off-by: Catalin Marinas --- tools/testing/selftests/arm64/signal/testcases/testcases.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c index e3521949b800..61ebcdf63831 100644 --- a/tools/testing/selftests/arm64/signal/testcases/testcases.c +++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c @@ -43,7 +43,7 @@ bool validate_extra_context(struct extra_context *extra, char **err) else if (extra->size & 0x0fUL) *err = "Extra SIZE misaligned"; else if (extra->datap != (uint64_t)term + sizeof(*term)) - *err = "Extra DATAP misplaced (not contiguos)"; + *err = "Extra DATAP misplaced (not contiguous)"; if (*err) return false; From d8e85e144bbe12e8d82c6b05d690a34da62cc991 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 13 Nov 2019 10:26:52 +0100 Subject: [PATCH 93/93] arm64: Kconfig: add a choice for endianness When building allmodconfig KCONFIG_ALLCONFIG=$(pwd)/arch/arm64/configs/defconfig CONFIG_CPU_BIG_ENDIAN gets enabled. Which tends not to be what most people want. Another concern that has come up is that ACPI isn't built for an allmodconfig kernel today since that also depends on !CPU_BIG_ENDIAN. Rework so that we introduce a 'choice' and default the choice to CPU_LITTLE_ENDIAN. That means that when we build an allmodconfig kernel it will default to CPU_LITTLE_ENDIAN that most people tends to want. Reviewed-by: John Garry Acked-by: Will Deacon Signed-off-by: Anders Roxell Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8a0800e5be9d..d66a9727344d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -877,10 +877,26 @@ config ARM64_PA_BITS default 48 if ARM64_PA_BITS_48 default 52 if ARM64_PA_BITS_52 +choice + prompt "Endianness" + default CPU_LITTLE_ENDIAN + help + Select the endianness of data accesses performed by the CPU. Userspace + applications will need to be compiled and linked for the endianness + that is selected here. + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help - Say Y if you plan on running a kernel in big-endian mode. + Say Y if you plan on running a kernel with a big-endian userspace. + +config CPU_LITTLE_ENDIAN + bool "Build little-endian kernel" + help + Say Y if you plan on running a kernel with a little-endian userspace. + This is usually the case for distributions targeting arm64. + +endchoice config SCHED_MC bool "Multi-core scheduler support"