From d51d0af43b30dcae1ca13ea67fd717e03b37f153 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 30 Jun 2014 16:01:30 +0100 Subject: [PATCH 01/55] irqchip: gic: Move some bits of GICv2 to a library-type file A few GICv2 low-level function are actually very useful to GICv3, and it makes some sense to share them across the two drivers. They end-up in their own file, with an additional parameter used to ensure an optional synchronization (unused on GICv2). Cc: Thomas Gleixner Cc: Jason Cooper Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Link: https://lkml.kernel.org/r/1404140510-5382-2-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- drivers/irqchip/Makefile | 2 +- drivers/irqchip/irq-gic-common.c | 115 +++++++++++++++++++++++++++++++ drivers/irqchip/irq-gic-common.h | 29 ++++++++ drivers/irqchip/irq-gic.c | 59 ++-------------- 4 files changed, 149 insertions(+), 56 deletions(-) create mode 100644 drivers/irqchip/irq-gic-common.c create mode 100644 drivers/irqchip/irq-gic-common.h diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 62a13e5ef98f..9b9505c8e774 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -15,7 +15,7 @@ obj-$(CONFIG_ORION_IRQCHIP) += irq-orion.o obj-$(CONFIG_ARCH_SUNXI) += irq-sun4i.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi-nmi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o -obj-$(CONFIG_ARM_GIC) += irq-gic.o +obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o obj-$(CONFIG_ARM_NVIC) += irq-nvic.o obj-$(CONFIG_ARM_VIC) += irq-vic.o obj-$(CONFIG_IMGPDC_IRQ) += irq-imgpdc.o diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c new file mode 100644 index 000000000000..60ac704d2090 --- /dev/null +++ b/drivers/irqchip/irq-gic-common.c @@ -0,0 +1,115 @@ +/* + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include + +#include "irq-gic-common.h" + +void gic_configure_irq(unsigned int irq, unsigned int type, + void __iomem *base, void (*sync_access)(void)) +{ + u32 enablemask = 1 << (irq % 32); + u32 enableoff = (irq / 32) * 4; + u32 confmask = 0x2 << ((irq % 16) * 2); + u32 confoff = (irq / 16) * 4; + bool enabled = false; + u32 val; + + /* + * Read current configuration register, and insert the config + * for "irq", depending on "type". + */ + val = readl_relaxed(base + GIC_DIST_CONFIG + confoff); + if (type == IRQ_TYPE_LEVEL_HIGH) + val &= ~confmask; + else if (type == IRQ_TYPE_EDGE_RISING) + val |= confmask; + + /* + * As recommended by the spec, disable the interrupt before changing + * the configuration + */ + if (readl_relaxed(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) { + writel_relaxed(enablemask, base + GIC_DIST_ENABLE_CLEAR + enableoff); + if (sync_access) + sync_access(); + enabled = true; + } + + /* + * Write back the new configuration, and possibly re-enable + * the interrupt. + */ + writel_relaxed(val, base + GIC_DIST_CONFIG + confoff); + + if (enabled) + writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff); + + if (sync_access) + sync_access(); +} + +void __init gic_dist_config(void __iomem *base, int gic_irqs, + void (*sync_access)(void)) +{ + unsigned int i; + + /* + * Set all global interrupts to be level triggered, active low. + */ + for (i = 32; i < gic_irqs; i += 16) + writel_relaxed(0, base + GIC_DIST_CONFIG + i / 4); + + /* + * Set priority on all global interrupts. + */ + for (i = 32; i < gic_irqs; i += 4) + writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i); + + /* + * Disable all interrupts. Leave the PPI and SGIs alone + * as they are enabled by redistributor registers. + */ + for (i = 32; i < gic_irqs; i += 32) + writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i / 8); + + if (sync_access) + sync_access(); +} + +void gic_cpu_config(void __iomem *base, void (*sync_access)(void)) +{ + int i; + + /* + * Deal with the banked PPI and SGI interrupts - disable all + * PPI interrupts, ensure all SGI interrupts are enabled. + */ + writel_relaxed(0xffff0000, base + GIC_DIST_ENABLE_CLEAR); + writel_relaxed(0x0000ffff, base + GIC_DIST_ENABLE_SET); + + /* + * Set priority on PPI and SGI interrupts + */ + for (i = 0; i < 32; i += 4) + writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4); + + if (sync_access) + sync_access(); +} diff --git a/drivers/irqchip/irq-gic-common.h b/drivers/irqchip/irq-gic-common.h new file mode 100644 index 000000000000..b41f02481c3a --- /dev/null +++ b/drivers/irqchip/irq-gic-common.h @@ -0,0 +1,29 @@ +/* + * Copyright (C) 2002 ARM Limited, All Rights Reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef _IRQ_GIC_COMMON_H +#define _IRQ_GIC_COMMON_H + +#include +#include + +void gic_configure_irq(unsigned int irq, unsigned int type, + void __iomem *base, void (*sync_access)(void)); +void gic_dist_config(void __iomem *base, int gic_irqs, + void (*sync_access)(void)); +void gic_cpu_config(void __iomem *base, void (*sync_access)(void)); + +#endif /* _IRQ_GIC_COMMON_H */ diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c index 7e11c9d6ae8c..508b81536b8a 100644 --- a/drivers/irqchip/irq-gic.c +++ b/drivers/irqchip/irq-gic.c @@ -46,6 +46,7 @@ #include #include +#include "irq-gic-common.h" #include "irqchip.h" union gic_base { @@ -188,12 +189,6 @@ static int gic_set_type(struct irq_data *d, unsigned int type) { void __iomem *base = gic_dist_base(d); unsigned int gicirq = gic_irq(d); - u32 enablemask = 1 << (gicirq % 32); - u32 enableoff = (gicirq / 32) * 4; - u32 confmask = 0x2 << ((gicirq % 16) * 2); - u32 confoff = (gicirq / 16) * 4; - bool enabled = false; - u32 val; /* Interrupt configuration for SGIs can't be changed */ if (gicirq < 16) @@ -207,25 +202,7 @@ static int gic_set_type(struct irq_data *d, unsigned int type) if (gic_arch_extn.irq_set_type) gic_arch_extn.irq_set_type(d, type); - val = readl_relaxed(base + GIC_DIST_CONFIG + confoff); - if (type == IRQ_TYPE_LEVEL_HIGH) - val &= ~confmask; - else if (type == IRQ_TYPE_EDGE_RISING) - val |= confmask; - - /* - * As recommended by the spec, disable the interrupt before changing - * the configuration - */ - if (readl_relaxed(base + GIC_DIST_ENABLE_SET + enableoff) & enablemask) { - writel_relaxed(enablemask, base + GIC_DIST_ENABLE_CLEAR + enableoff); - enabled = true; - } - - writel_relaxed(val, base + GIC_DIST_CONFIG + confoff); - - if (enabled) - writel_relaxed(enablemask, base + GIC_DIST_ENABLE_SET + enableoff); + gic_configure_irq(gicirq, type, base, NULL); raw_spin_unlock(&irq_controller_lock); @@ -386,12 +363,6 @@ static void __init gic_dist_init(struct gic_chip_data *gic) writel_relaxed(0, base + GIC_DIST_CTRL); - /* - * Set all global interrupts to be level triggered, active low. - */ - for (i = 32; i < gic_irqs; i += 16) - writel_relaxed(0, base + GIC_DIST_CONFIG + i * 4 / 16); - /* * Set all global interrupts to this CPU only. */ @@ -401,18 +372,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic) for (i = 32; i < gic_irqs; i += 4) writel_relaxed(cpumask, base + GIC_DIST_TARGET + i * 4 / 4); - /* - * Set priority on all global interrupts. - */ - for (i = 32; i < gic_irqs; i += 4) - writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4); - - /* - * Disable all interrupts. Leave the PPI and SGIs alone - * as these enables are banked registers. - */ - for (i = 32; i < gic_irqs; i += 32) - writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i * 4 / 32); + gic_dist_config(base, gic_irqs, NULL); writel_relaxed(1, base + GIC_DIST_CTRL); } @@ -439,18 +399,7 @@ static void gic_cpu_init(struct gic_chip_data *gic) if (i != cpu) gic_cpu_map[i] &= ~cpu_mask; - /* - * Deal with the banked PPI and SGI interrupts - disable all - * PPI interrupts, ensure all SGI interrupts are enabled. - */ - writel_relaxed(0xffff0000, dist_base + GIC_DIST_ENABLE_CLEAR); - writel_relaxed(0x0000ffff, dist_base + GIC_DIST_ENABLE_SET); - - /* - * Set priority on PPI and SGI interrupts - */ - for (i = 0; i < 32; i += 4) - writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4 / 4); + gic_cpu_config(dist_base, NULL); writel_relaxed(0xf0, base + GIC_CPU_PRIMASK); writel_relaxed(1, base + GIC_CPU_CTRL); From 021f653791ad17e03f98aaa7fb933816ae16f161 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 30 Jun 2014 16:01:31 +0100 Subject: [PATCH 02/55] irqchip: gic-v3: Initial support for GICv3 The Generic Interrupt Controller (version 3) offers services that are similar to GICv2, with a number of additional features: - Affinity routing based on the CPU MPIDR (ARE) - System register for the CPU interfaces (SRE) - Support for more that 8 CPUs - Locality-specific Peripheral Interrupts (LPIs) - Interrupt Translation Services (ITS) This patch adds preliminary support for GICv3 with ARE and SRE, non-secure mode only. It relies on higher exception levels to grant ARE and SRE access. Support for LPI and ITS will be added at a later time. Cc: Thomas Gleixner Cc: Jason Cooper Reviewed-by: Zi Shen Lim Reviewed-by: Christoffer Dall Reviewed-by: Tirumalesh Chalamarla Reviewed-by: Yun Wu Reviewed-by: Zhen Lei Tested-by: Tirumalesh Chalamarla Tested-by: Radha Mohan Chintakuntla Acked-by: Radha Mohan Chintakuntla Acked-by: Catalin Marinas Signed-off-by: Marc Zyngier Reviewed-by: Mark Rutland Link: https://lkml.kernel.org/r/1404140510-5382-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Jason Cooper --- arch/arm64/Kconfig | 1 + arch/arm64/kernel/head.S | 18 + arch/arm64/kernel/hyp-stub.S | 1 + drivers/irqchip/Kconfig | 5 + drivers/irqchip/Makefile | 1 + drivers/irqchip/irq-gic-v3.c | 692 +++++++++++++++++++++++++++++ include/linux/irqchip/arm-gic-v3.h | 198 +++++++++ 7 files changed, 916 insertions(+) create mode 100644 drivers/irqchip/irq-gic-v3.c create mode 100644 include/linux/irqchip/arm-gic-v3.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 7295419165e1..be52492c2291 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -9,6 +9,7 @@ config ARM64 select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select ARM_GIC_V3 select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a96d3a6a63f6..96623502519c 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -296,6 +297,23 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 msr cnthctl_el2, x0 msr cntvoff_el2, xzr // Clear virtual offset +#ifdef CONFIG_ARM_GIC_V3 + /* GICv3 system register access */ + mrs x0, id_aa64pfr0_el1 + ubfx x0, x0, #24, #4 + cmp x0, #1 + b.ne 3f + + mrs x0, ICC_SRE_EL2 + orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 + orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 + msr ICC_SRE_EL2, x0 + isb // Make sure SRE is now set + msr ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + +3: +#endif + /* Populate ID registers. */ mrs x0, midr_el1 mrs x1, mpidr_el1 diff --git a/arch/arm64/kernel/hyp-stub.S b/arch/arm64/kernel/hyp-stub.S index 0959611d9ff1..a272f335c289 100644 --- a/arch/arm64/kernel/hyp-stub.S +++ b/arch/arm64/kernel/hyp-stub.S @@ -19,6 +19,7 @@ #include #include +#include #include #include diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig index bbb746e35500..7f0c2a30267b 100644 --- a/drivers/irqchip/Kconfig +++ b/drivers/irqchip/Kconfig @@ -10,6 +10,11 @@ config ARM_GIC config GIC_NON_BANKED bool +config ARM_GIC_V3 + bool + select IRQ_DOMAIN + select MULTI_IRQ_HANDLER + config ARM_NVIC bool select IRQ_DOMAIN diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile index 9b9505c8e774..c57e642700d4 100644 --- a/drivers/irqchip/Makefile +++ b/drivers/irqchip/Makefile @@ -16,6 +16,7 @@ obj-$(CONFIG_ARCH_SUNXI) += irq-sun4i.o obj-$(CONFIG_ARCH_SUNXI) += irq-sunxi-nmi.o obj-$(CONFIG_ARCH_SPEAR3XX) += spear-shirq.o obj-$(CONFIG_ARM_GIC) += irq-gic.o irq-gic-common.o +obj-$(CONFIG_ARM_GIC_V3) += irq-gic-v3.o irq-gic-common.o obj-$(CONFIG_ARM_NVIC) += irq-nvic.o obj-$(CONFIG_ARM_VIC) += irq-vic.o obj-$(CONFIG_IMGPDC_IRQ) += irq-imgpdc.o diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c new file mode 100644 index 000000000000..81519bae0453 --- /dev/null +++ b/drivers/irqchip/irq-gic-v3.c @@ -0,0 +1,692 @@ +/* + * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include "irq-gic-common.h" +#include "irqchip.h" + +struct gic_chip_data { + void __iomem *dist_base; + void __iomem **redist_base; + void __percpu __iomem **rdist; + struct irq_domain *domain; + u64 redist_stride; + u32 redist_regions; + unsigned int irq_nr; +}; + +static struct gic_chip_data gic_data __read_mostly; + +#define gic_data_rdist() (this_cpu_ptr(gic_data.rdist)) +#define gic_data_rdist_rd_base() (*gic_data_rdist()) +#define gic_data_rdist_sgi_base() (gic_data_rdist_rd_base() + SZ_64K) + +/* Our default, arbitrary priority value. Linux only uses one anyway. */ +#define DEFAULT_PMR_VALUE 0xf0 + +static inline unsigned int gic_irq(struct irq_data *d) +{ + return d->hwirq; +} + +static inline int gic_irq_in_rdist(struct irq_data *d) +{ + return gic_irq(d) < 32; +} + +static inline void __iomem *gic_dist_base(struct irq_data *d) +{ + if (gic_irq_in_rdist(d)) /* SGI+PPI -> SGI_base for this CPU */ + return gic_data_rdist_sgi_base(); + + if (d->hwirq <= 1023) /* SPI -> dist_base */ + return gic_data.dist_base; + + if (d->hwirq >= 8192) + BUG(); /* LPI Detected!!! */ + + return NULL; +} + +static void gic_do_wait_for_rwp(void __iomem *base) +{ + u32 count = 1000000; /* 1s! */ + + while (readl_relaxed(base + GICD_CTLR) & GICD_CTLR_RWP) { + count--; + if (!count) { + pr_err_ratelimited("RWP timeout, gone fishing\n"); + return; + } + cpu_relax(); + udelay(1); + }; +} + +/* Wait for completion of a distributor change */ +static void gic_dist_wait_for_rwp(void) +{ + gic_do_wait_for_rwp(gic_data.dist_base); +} + +/* Wait for completion of a redistributor change */ +static void gic_redist_wait_for_rwp(void) +{ + gic_do_wait_for_rwp(gic_data_rdist_rd_base()); +} + +/* Low level accessors */ +static u64 gic_read_iar(void) +{ + u64 irqstat; + + asm volatile("mrs %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); + return irqstat; +} + +static void gic_write_pmr(u64 val) +{ + asm volatile("msr " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val)); +} + +static void gic_write_ctlr(u64 val) +{ + asm volatile("msr " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val)); + isb(); +} + +static void gic_write_grpen1(u64 val) +{ + asm volatile("msr " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val)); + isb(); +} + +static void gic_write_sgi1r(u64 val) +{ + asm volatile("msr " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val)); +} + +static void gic_enable_sre(void) +{ + u64 val; + + asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + val |= ICC_SRE_EL1_SRE; + asm volatile("msr " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val)); + isb(); + + /* + * Need to check that the SRE bit has actually been set. If + * not, it means that SRE is disabled at EL2. We're going to + * die painfully, and there is nothing we can do about it. + * + * Kindly inform the luser. + */ + asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + if (!(val & ICC_SRE_EL1_SRE)) + pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n"); +} + +static void gic_enable_redist(void) +{ + void __iomem *rbase; + u32 count = 1000000; /* 1s! */ + u32 val; + + rbase = gic_data_rdist_rd_base(); + + /* Wake up this CPU redistributor */ + val = readl_relaxed(rbase + GICR_WAKER); + val &= ~GICR_WAKER_ProcessorSleep; + writel_relaxed(val, rbase + GICR_WAKER); + + while (readl_relaxed(rbase + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) { + count--; + if (!count) { + pr_err_ratelimited("redist didn't wake up...\n"); + return; + } + cpu_relax(); + udelay(1); + }; +} + +/* + * Routines to disable, enable, EOI and route interrupts + */ +static void gic_poke_irq(struct irq_data *d, u32 offset) +{ + u32 mask = 1 << (gic_irq(d) % 32); + void (*rwp_wait)(void); + void __iomem *base; + + if (gic_irq_in_rdist(d)) { + base = gic_data_rdist_sgi_base(); + rwp_wait = gic_redist_wait_for_rwp; + } else { + base = gic_data.dist_base; + rwp_wait = gic_dist_wait_for_rwp; + } + + writel_relaxed(mask, base + offset + (gic_irq(d) / 32) * 4); + rwp_wait(); +} + +static int gic_peek_irq(struct irq_data *d, u32 offset) +{ + u32 mask = 1 << (gic_irq(d) % 32); + void __iomem *base; + + if (gic_irq_in_rdist(d)) + base = gic_data_rdist_sgi_base(); + else + base = gic_data.dist_base; + + return !!(readl_relaxed(base + offset + (gic_irq(d) / 32) * 4) & mask); +} + +static void gic_mask_irq(struct irq_data *d) +{ + gic_poke_irq(d, GICD_ICENABLER); +} + +static void gic_unmask_irq(struct irq_data *d) +{ + gic_poke_irq(d, GICD_ISENABLER); +} + +static void gic_eoi_irq(struct irq_data *d) +{ + gic_write_eoir(gic_irq(d)); +} + +static int gic_set_type(struct irq_data *d, unsigned int type) +{ + unsigned int irq = gic_irq(d); + void (*rwp_wait)(void); + void __iomem *base; + + /* Interrupt configuration for SGIs can't be changed */ + if (irq < 16) + return -EINVAL; + + if (type != IRQ_TYPE_LEVEL_HIGH && type != IRQ_TYPE_EDGE_RISING) + return -EINVAL; + + if (gic_irq_in_rdist(d)) { + base = gic_data_rdist_sgi_base(); + rwp_wait = gic_redist_wait_for_rwp; + } else { + base = gic_data.dist_base; + rwp_wait = gic_dist_wait_for_rwp; + } + + gic_configure_irq(irq, type, base, rwp_wait); + + return 0; +} + +static u64 gic_mpidr_to_affinity(u64 mpidr) +{ + u64 aff; + + aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 32 | + MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | + MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 0)); + + return aff; +} + +static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs) +{ + u64 irqnr; + + do { + irqnr = gic_read_iar(); + + if (likely(irqnr > 15 && irqnr < 1020)) { + u64 irq = irq_find_mapping(gic_data.domain, irqnr); + if (likely(irq)) { + handle_IRQ(irq, regs); + continue; + } + + WARN_ONCE(true, "Unexpected SPI received!\n"); + gic_write_eoir(irqnr); + } + if (irqnr < 16) { + gic_write_eoir(irqnr); +#ifdef CONFIG_SMP + handle_IPI(irqnr, regs); +#else + WARN_ONCE(true, "Unexpected SGI received!\n"); +#endif + continue; + } + } while (irqnr != ICC_IAR1_EL1_SPURIOUS); +} + +static void __init gic_dist_init(void) +{ + unsigned int i; + u64 affinity; + void __iomem *base = gic_data.dist_base; + + /* Disable the distributor */ + writel_relaxed(0, base + GICD_CTLR); + gic_dist_wait_for_rwp(); + + gic_dist_config(base, gic_data.irq_nr, gic_dist_wait_for_rwp); + + /* Enable distributor with ARE, Group1 */ + writel_relaxed(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A | GICD_CTLR_ENABLE_G1, + base + GICD_CTLR); + + /* + * Set all global interrupts to the boot CPU only. ARE must be + * enabled. + */ + affinity = gic_mpidr_to_affinity(cpu_logical_map(smp_processor_id())); + for (i = 32; i < gic_data.irq_nr; i++) + writeq_relaxed(affinity, base + GICD_IROUTER + i * 8); +} + +static int gic_populate_rdist(void) +{ + u64 mpidr = cpu_logical_map(smp_processor_id()); + u64 typer; + u32 aff; + int i; + + /* + * Convert affinity to a 32bit value that can be matched to + * GICR_TYPER bits [63:32]. + */ + aff = (MPIDR_AFFINITY_LEVEL(mpidr, 3) << 24 | + MPIDR_AFFINITY_LEVEL(mpidr, 2) << 16 | + MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 | + MPIDR_AFFINITY_LEVEL(mpidr, 0)); + + for (i = 0; i < gic_data.redist_regions; i++) { + void __iomem *ptr = gic_data.redist_base[i]; + u32 reg; + + reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK; + if (reg != GIC_PIDR2_ARCH_GICv3 && + reg != GIC_PIDR2_ARCH_GICv4) { /* We're in trouble... */ + pr_warn("No redistributor present @%p\n", ptr); + break; + } + + do { + typer = readq_relaxed(ptr + GICR_TYPER); + if ((typer >> 32) == aff) { + gic_data_rdist_rd_base() = ptr; + pr_info("CPU%d: found redistributor %llx @%p\n", + smp_processor_id(), + (unsigned long long)mpidr, ptr); + return 0; + } + + if (gic_data.redist_stride) { + ptr += gic_data.redist_stride; + } else { + ptr += SZ_64K * 2; /* Skip RD_base + SGI_base */ + if (typer & GICR_TYPER_VLPIS) + ptr += SZ_64K * 2; /* Skip VLPI_base + reserved page */ + } + } while (!(typer & GICR_TYPER_LAST)); + } + + /* We couldn't even deal with ourselves... */ + WARN(true, "CPU%d: mpidr %llx has no re-distributor!\n", + smp_processor_id(), (unsigned long long)mpidr); + return -ENODEV; +} + +static void gic_cpu_init(void) +{ + void __iomem *rbase; + + /* Register ourselves with the rest of the world */ + if (gic_populate_rdist()) + return; + + gic_enable_redist(); + + rbase = gic_data_rdist_sgi_base(); + + gic_cpu_config(rbase, gic_redist_wait_for_rwp); + + /* Enable system registers */ + gic_enable_sre(); + + /* Set priority mask register */ + gic_write_pmr(DEFAULT_PMR_VALUE); + + /* EOI deactivates interrupt too (mode 0) */ + gic_write_ctlr(ICC_CTLR_EL1_EOImode_drop_dir); + + /* ... and let's hit the road... */ + gic_write_grpen1(1); +} + +#ifdef CONFIG_SMP +static int gic_secondary_init(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + if (action == CPU_STARTING || action == CPU_STARTING_FROZEN) + gic_cpu_init(); + return NOTIFY_OK; +} + +/* + * Notifier for enabling the GIC CPU interface. Set an arbitrarily high + * priority because the GIC needs to be up before the ARM generic timers. + */ +static struct notifier_block gic_cpu_notifier = { + .notifier_call = gic_secondary_init, + .priority = 100, +}; + +static u16 gic_compute_target_list(int *base_cpu, const struct cpumask *mask, + u64 cluster_id) +{ + int cpu = *base_cpu; + u64 mpidr = cpu_logical_map(cpu); + u16 tlist = 0; + + while (cpu < nr_cpu_ids) { + /* + * If we ever get a cluster of more than 16 CPUs, just + * scream and skip that CPU. + */ + if (WARN_ON((mpidr & 0xff) >= 16)) + goto out; + + tlist |= 1 << (mpidr & 0xf); + + cpu = cpumask_next(cpu, mask); + if (cpu == nr_cpu_ids) + goto out; + + mpidr = cpu_logical_map(cpu); + + if (cluster_id != (mpidr & ~0xffUL)) { + cpu--; + goto out; + } + } +out: + *base_cpu = cpu; + return tlist; +} + +static void gic_send_sgi(u64 cluster_id, u16 tlist, unsigned int irq) +{ + u64 val; + + val = (MPIDR_AFFINITY_LEVEL(cluster_id, 3) << 48 | + MPIDR_AFFINITY_LEVEL(cluster_id, 2) << 32 | + irq << 24 | + MPIDR_AFFINITY_LEVEL(cluster_id, 1) << 16 | + tlist); + + pr_debug("CPU%d: ICC_SGI1R_EL1 %llx\n", smp_processor_id(), val); + gic_write_sgi1r(val); +} + +static void gic_raise_softirq(const struct cpumask *mask, unsigned int irq) +{ + int cpu; + + if (WARN_ON(irq >= 16)) + return; + + /* + * Ensure that stores to Normal memory are visible to the + * other CPUs before issuing the IPI. + */ + smp_wmb(); + + for_each_cpu_mask(cpu, *mask) { + u64 cluster_id = cpu_logical_map(cpu) & ~0xffUL; + u16 tlist; + + tlist = gic_compute_target_list(&cpu, mask, cluster_id); + gic_send_sgi(cluster_id, tlist, irq); + } + + /* Force the above writes to ICC_SGI1R_EL1 to be executed */ + isb(); +} + +static void gic_smp_init(void) +{ + set_smp_cross_call(gic_raise_softirq); + register_cpu_notifier(&gic_cpu_notifier); +} + +static int gic_set_affinity(struct irq_data *d, const struct cpumask *mask_val, + bool force) +{ + unsigned int cpu = cpumask_any_and(mask_val, cpu_online_mask); + void __iomem *reg; + int enabled; + u64 val; + + if (gic_irq_in_rdist(d)) + return -EINVAL; + + /* If interrupt was enabled, disable it first */ + enabled = gic_peek_irq(d, GICD_ISENABLER); + if (enabled) + gic_mask_irq(d); + + reg = gic_dist_base(d) + GICD_IROUTER + (gic_irq(d) * 8); + val = gic_mpidr_to_affinity(cpu_logical_map(cpu)); + + writeq_relaxed(val, reg); + + /* + * If the interrupt was enabled, enabled it again. Otherwise, + * just wait for the distributor to have digested our changes. + */ + if (enabled) + gic_unmask_irq(d); + else + gic_dist_wait_for_rwp(); + + return IRQ_SET_MASK_OK; +} +#else +#define gic_set_affinity NULL +#define gic_smp_init() do { } while(0) +#endif + +static struct irq_chip gic_chip = { + .name = "GICv3", + .irq_mask = gic_mask_irq, + .irq_unmask = gic_unmask_irq, + .irq_eoi = gic_eoi_irq, + .irq_set_type = gic_set_type, + .irq_set_affinity = gic_set_affinity, +}; + +static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hw) +{ + /* SGIs are private to the core kernel */ + if (hw < 16) + return -EPERM; + /* PPIs */ + if (hw < 32) { + irq_set_percpu_devid(irq); + irq_set_chip_and_handler(irq, &gic_chip, + handle_percpu_devid_irq); + set_irq_flags(irq, IRQF_VALID | IRQF_NOAUTOEN); + } + /* SPIs */ + if (hw >= 32 && hw < gic_data.irq_nr) { + irq_set_chip_and_handler(irq, &gic_chip, + handle_fasteoi_irq); + set_irq_flags(irq, IRQF_VALID | IRQF_PROBE); + } + irq_set_chip_data(irq, d->host_data); + return 0; +} + +static int gic_irq_domain_xlate(struct irq_domain *d, + struct device_node *controller, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type) +{ + if (d->of_node != controller) + return -EINVAL; + if (intsize < 3) + return -EINVAL; + + switch(intspec[0]) { + case 0: /* SPI */ + *out_hwirq = intspec[1] + 32; + break; + case 1: /* PPI */ + *out_hwirq = intspec[1] + 16; + break; + default: + return -EINVAL; + } + + *out_type = intspec[2] & IRQ_TYPE_SENSE_MASK; + return 0; +} + +static const struct irq_domain_ops gic_irq_domain_ops = { + .map = gic_irq_domain_map, + .xlate = gic_irq_domain_xlate, +}; + +static int __init gic_of_init(struct device_node *node, struct device_node *parent) +{ + void __iomem *dist_base; + void __iomem **redist_base; + u64 redist_stride; + u32 redist_regions; + u32 reg; + int gic_irqs; + int err; + int i; + + dist_base = of_iomap(node, 0); + if (!dist_base) { + pr_err("%s: unable to map gic dist registers\n", + node->full_name); + return -ENXIO; + } + + reg = readl_relaxed(dist_base + GICD_PIDR2) & GIC_PIDR2_ARCH_MASK; + if (reg != GIC_PIDR2_ARCH_GICv3 && reg != GIC_PIDR2_ARCH_GICv4) { + pr_err("%s: no distributor detected, giving up\n", + node->full_name); + err = -ENODEV; + goto out_unmap_dist; + } + + if (of_property_read_u32(node, "#redistributor-regions", &redist_regions)) + redist_regions = 1; + + redist_base = kzalloc(sizeof(*redist_base) * redist_regions, GFP_KERNEL); + if (!redist_base) { + err = -ENOMEM; + goto out_unmap_dist; + } + + for (i = 0; i < redist_regions; i++) { + redist_base[i] = of_iomap(node, 1 + i); + if (!redist_base[i]) { + pr_err("%s: couldn't map region %d\n", + node->full_name, i); + err = -ENODEV; + goto out_unmap_rdist; + } + } + + if (of_property_read_u64(node, "redistributor-stride", &redist_stride)) + redist_stride = 0; + + gic_data.dist_base = dist_base; + gic_data.redist_base = redist_base; + gic_data.redist_regions = redist_regions; + gic_data.redist_stride = redist_stride; + + /* + * Find out how many interrupts are supported. + * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI) + */ + gic_irqs = readl_relaxed(gic_data.dist_base + GICD_TYPER) & 0x1f; + gic_irqs = (gic_irqs + 1) * 32; + if (gic_irqs > 1020) + gic_irqs = 1020; + gic_data.irq_nr = gic_irqs; + + gic_data.domain = irq_domain_add_tree(node, &gic_irq_domain_ops, + &gic_data); + gic_data.rdist = alloc_percpu(typeof(*gic_data.rdist)); + + if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdist)) { + err = -ENOMEM; + goto out_free; + } + + set_handle_irq(gic_handle_irq); + + gic_smp_init(); + gic_dist_init(); + gic_cpu_init(); + + return 0; + +out_free: + if (gic_data.domain) + irq_domain_remove(gic_data.domain); + free_percpu(gic_data.rdist); +out_unmap_rdist: + for (i = 0; i < redist_regions; i++) + if (redist_base[i]) + iounmap(redist_base[i]); + kfree(redist_base); +out_unmap_dist: + iounmap(dist_base); + return err; +} + +IRQCHIP_DECLARE(gic_v3, "arm,gic-v3", gic_of_init); diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h new file mode 100644 index 000000000000..30cb7556d43f --- /dev/null +++ b/include/linux/irqchip/arm-gic-v3.h @@ -0,0 +1,198 @@ +/* + * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved. + * Author: Marc Zyngier + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H +#define __LINUX_IRQCHIP_ARM_GIC_V3_H + +/* + * Distributor registers. We assume we're running non-secure, with ARE + * being set. Secure-only and non-ARE registers are not described. + */ +#define GICD_CTLR 0x0000 +#define GICD_TYPER 0x0004 +#define GICD_IIDR 0x0008 +#define GICD_STATUSR 0x0010 +#define GICD_SETSPI_NSR 0x0040 +#define GICD_CLRSPI_NSR 0x0048 +#define GICD_SETSPI_SR 0x0050 +#define GICD_CLRSPI_SR 0x0058 +#define GICD_SEIR 0x0068 +#define GICD_ISENABLER 0x0100 +#define GICD_ICENABLER 0x0180 +#define GICD_ISPENDR 0x0200 +#define GICD_ICPENDR 0x0280 +#define GICD_ISACTIVER 0x0300 +#define GICD_ICACTIVER 0x0380 +#define GICD_IPRIORITYR 0x0400 +#define GICD_ICFGR 0x0C00 +#define GICD_IROUTER 0x6000 +#define GICD_PIDR2 0xFFE8 + +#define GICD_CTLR_RWP (1U << 31) +#define GICD_CTLR_ARE_NS (1U << 4) +#define GICD_CTLR_ENABLE_G1A (1U << 1) +#define GICD_CTLR_ENABLE_G1 (1U << 0) + +#define GICD_IROUTER_SPI_MODE_ONE (0U << 31) +#define GICD_IROUTER_SPI_MODE_ANY (1U << 31) + +#define GIC_PIDR2_ARCH_MASK 0xf0 +#define GIC_PIDR2_ARCH_GICv3 0x30 +#define GIC_PIDR2_ARCH_GICv4 0x40 + +/* + * Re-Distributor registers, offsets from RD_base + */ +#define GICR_CTLR GICD_CTLR +#define GICR_IIDR 0x0004 +#define GICR_TYPER 0x0008 +#define GICR_STATUSR GICD_STATUSR +#define GICR_WAKER 0x0014 +#define GICR_SETLPIR 0x0040 +#define GICR_CLRLPIR 0x0048 +#define GICR_SEIR GICD_SEIR +#define GICR_PROPBASER 0x0070 +#define GICR_PENDBASER 0x0078 +#define GICR_INVLPIR 0x00A0 +#define GICR_INVALLR 0x00B0 +#define GICR_SYNCR 0x00C0 +#define GICR_MOVLPIR 0x0100 +#define GICR_MOVALLR 0x0110 +#define GICR_PIDR2 GICD_PIDR2 + +#define GICR_WAKER_ProcessorSleep (1U << 1) +#define GICR_WAKER_ChildrenAsleep (1U << 2) + +/* + * Re-Distributor registers, offsets from SGI_base + */ +#define GICR_ISENABLER0 GICD_ISENABLER +#define GICR_ICENABLER0 GICD_ICENABLER +#define GICR_ISPENDR0 GICD_ISPENDR +#define GICR_ICPENDR0 GICD_ICPENDR +#define GICR_ISACTIVER0 GICD_ISACTIVER +#define GICR_ICACTIVER0 GICD_ICACTIVER +#define GICR_IPRIORITYR0 GICD_IPRIORITYR +#define GICR_ICFGR0 GICD_ICFGR + +#define GICR_TYPER_VLPIS (1U << 1) +#define GICR_TYPER_LAST (1U << 4) + +/* + * CPU interface registers + */ +#define ICC_CTLR_EL1_EOImode_drop_dir (0U << 1) +#define ICC_CTLR_EL1_EOImode_drop (1U << 1) +#define ICC_SRE_EL1_SRE (1U << 0) + +/* + * Hypervisor interface registers (SRE only) + */ +#define ICH_LR_VIRTUAL_ID_MASK ((1UL << 32) - 1) + +#define ICH_LR_EOI (1UL << 41) +#define ICH_LR_GROUP (1UL << 60) +#define ICH_LR_STATE (3UL << 62) +#define ICH_LR_PENDING_BIT (1UL << 62) +#define ICH_LR_ACTIVE_BIT (1UL << 63) + +#define ICH_MISR_EOI (1 << 0) +#define ICH_MISR_U (1 << 1) + +#define ICH_HCR_EN (1 << 0) +#define ICH_HCR_UIE (1 << 1) + +#define ICH_VMCR_CTLR_SHIFT 0 +#define ICH_VMCR_CTLR_MASK (0x21f << ICH_VMCR_CTLR_SHIFT) +#define ICH_VMCR_BPR1_SHIFT 18 +#define ICH_VMCR_BPR1_MASK (7 << ICH_VMCR_BPR1_SHIFT) +#define ICH_VMCR_BPR0_SHIFT 21 +#define ICH_VMCR_BPR0_MASK (7 << ICH_VMCR_BPR0_SHIFT) +#define ICH_VMCR_PMR_SHIFT 24 +#define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) + +#define ICC_EOIR1_EL1 S3_0_C12_C12_1 +#define ICC_IAR1_EL1 S3_0_C12_C12_0 +#define ICC_SGI1R_EL1 S3_0_C12_C11_5 +#define ICC_PMR_EL1 S3_0_C4_C6_0 +#define ICC_CTLR_EL1 S3_0_C12_C12_4 +#define ICC_SRE_EL1 S3_0_C12_C12_5 +#define ICC_GRPEN1_EL1 S3_0_C12_C12_7 + +#define ICC_IAR1_EL1_SPURIOUS 0x3ff + +#define ICC_SRE_EL2 S3_4_C12_C9_5 + +#define ICC_SRE_EL2_SRE (1 << 0) +#define ICC_SRE_EL2_ENABLE (1 << 3) + +/* + * System register definitions + */ +#define ICH_VSEIR_EL2 S3_4_C12_C9_4 +#define ICH_HCR_EL2 S3_4_C12_C11_0 +#define ICH_VTR_EL2 S3_4_C12_C11_1 +#define ICH_MISR_EL2 S3_4_C12_C11_2 +#define ICH_EISR_EL2 S3_4_C12_C11_3 +#define ICH_ELSR_EL2 S3_4_C12_C11_5 +#define ICH_VMCR_EL2 S3_4_C12_C11_7 + +#define __LR0_EL2(x) S3_4_C12_C12_ ## x +#define __LR8_EL2(x) S3_4_C12_C13_ ## x + +#define ICH_LR0_EL2 __LR0_EL2(0) +#define ICH_LR1_EL2 __LR0_EL2(1) +#define ICH_LR2_EL2 __LR0_EL2(2) +#define ICH_LR3_EL2 __LR0_EL2(3) +#define ICH_LR4_EL2 __LR0_EL2(4) +#define ICH_LR5_EL2 __LR0_EL2(5) +#define ICH_LR6_EL2 __LR0_EL2(6) +#define ICH_LR7_EL2 __LR0_EL2(7) +#define ICH_LR8_EL2 __LR8_EL2(0) +#define ICH_LR9_EL2 __LR8_EL2(1) +#define ICH_LR10_EL2 __LR8_EL2(2) +#define ICH_LR11_EL2 __LR8_EL2(3) +#define ICH_LR12_EL2 __LR8_EL2(4) +#define ICH_LR13_EL2 __LR8_EL2(5) +#define ICH_LR14_EL2 __LR8_EL2(6) +#define ICH_LR15_EL2 __LR8_EL2(7) + +#define __AP0Rx_EL2(x) S3_4_C12_C8_ ## x +#define ICH_AP0R0_EL2 __AP0Rx_EL2(0) +#define ICH_AP0R1_EL2 __AP0Rx_EL2(1) +#define ICH_AP0R2_EL2 __AP0Rx_EL2(2) +#define ICH_AP0R3_EL2 __AP0Rx_EL2(3) + +#define __AP1Rx_EL2(x) S3_4_C12_C9_ ## x +#define ICH_AP1R0_EL2 __AP1Rx_EL2(0) +#define ICH_AP1R1_EL2 __AP1Rx_EL2(1) +#define ICH_AP1R2_EL2 __AP1Rx_EL2(2) +#define ICH_AP1R3_EL2 __AP1Rx_EL2(3) + +#ifndef __ASSEMBLY__ + +#include + +static inline void gic_write_eoir(u64 irq) +{ + asm volatile("msr " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq)); + isb(); +} + +#endif + +#endif From 4e6f7084096c08e37f909d7b075a91b72580405f Mon Sep 17 00:00:00 2001 From: Zi Shen Lim Date: Sat, 7 Jun 2014 01:55:27 +0100 Subject: [PATCH 03/55] arm64: topology: add MPIDR-based detection Create cpu topology based on MPIDR. When hardware sets MPIDR to sane values, this method will always work. Therefore it should also work well as the fallback method. [1] When we have multiple processing elements in the system, we create the cpu topology by mapping each affinity level (from lowest to highest) to threads (if they exist), cores, and clusters. [1] http://www.spinics.net/lists/arm-kernel/msg317445.html Acked-by: Lorenzo Pieralisi Signed-off-by: Zi Shen Lim Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 2 ++ arch/arm64/kernel/topology.c | 47 ++++++++++++++++++++++---------- 2 files changed, 35 insertions(+), 14 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index 27f54a7cc81b..ed48a3a7836a 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -18,6 +18,8 @@ #define INVALID_HWID ULONG_MAX +#define MPIDR_UP_BITMASK (0x1 << 30) +#define MPIDR_MT_BITMASK (0x1 << 24) #define MPIDR_HWID_BITMASK 0xff00ffffff #define MPIDR_LEVEL_BITS_SHIFT 3 diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 43514f905916..b6ee26b0939a 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -20,6 +20,7 @@ #include #include +#include #include static int __init get_cpu_for_node(struct device_node *node) @@ -188,13 +189,9 @@ static int __init parse_dt_topology(void) * Check that all cores are in the topology; the SMP code will * only mark cores described in the DT as possible. */ - for_each_possible_cpu(cpu) { - if (cpu_topology[cpu].cluster_id == -1) { - pr_err("CPU%d: No topology information specified\n", - cpu); + for_each_possible_cpu(cpu) + if (cpu_topology[cpu].cluster_id == -1) ret = -EINVAL; - } - } out_map: of_node_put(map); @@ -219,14 +216,6 @@ static void update_siblings_masks(unsigned int cpuid) struct cpu_topology *cpu_topo, *cpuid_topo = &cpu_topology[cpuid]; int cpu; - if (cpuid_topo->cluster_id == -1) { - /* - * DT does not contain topology information for this cpu. - */ - pr_debug("CPU%u: No topology information configured\n", cpuid); - return; - } - /* update core and thread sibling masks */ for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; @@ -249,6 +238,36 @@ static void update_siblings_masks(unsigned int cpuid) void store_cpu_topology(unsigned int cpuid) { + struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; + u64 mpidr; + + if (cpuid_topo->cluster_id != -1) + goto topology_populated; + + mpidr = read_cpuid_mpidr(); + + /* Uniprocessor systems can rely on default topology values */ + if (mpidr & MPIDR_UP_BITMASK) + return; + + /* Create cpu topology mapping based on MPIDR. */ + if (mpidr & MPIDR_MT_BITMASK) { + /* Multiprocessor system : Multi-threads per core */ + cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2); + } else { + /* Multiprocessor system : Single-thread per core */ + cpuid_topo->thread_id = -1; + cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); + cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); + } + + pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", + cpuid, cpuid_topo->cluster_id, cpuid_topo->core_id, + cpuid_topo->thread_id, mpidr); + +topology_populated: update_siblings_masks(cpuid); } From c0c264ae5112d1cdb7d37d4e208b7a7e766a7418 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Wed, 25 Jun 2014 23:55:03 +0100 Subject: [PATCH 04/55] arm64: Add CONFIG_CC_STACKPROTECTOR arm64 currently lacks support for -fstack-protector. Add similar functionality to arm to detect stack corruption. Acked-by: Will Deacon Acked-by: Kees Cook Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/stackprotector.h | 38 +++++++++++++++++++++++++ arch/arm64/kernel/process.c | 6 ++++ 3 files changed, 45 insertions(+) create mode 100644 arch/arm64/include/asm/stackprotector.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index a474de346be6..f865436096b6 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -32,6 +32,7 @@ config ARM64 select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK select HAVE_C_RECORDMCOUNT + select HAVE_CC_STACKPROTECTOR select HAVE_DEBUG_BUGVERBOSE select HAVE_DEBUG_KMEMLEAK select HAVE_DMA_API_DEBUG diff --git a/arch/arm64/include/asm/stackprotector.h b/arch/arm64/include/asm/stackprotector.h new file mode 100644 index 000000000000..fe5e287dc56b --- /dev/null +++ b/arch/arm64/include/asm/stackprotector.h @@ -0,0 +1,38 @@ +/* + * GCC stack protector support. + * + * Stack protector works by putting predefined pattern at the start of + * the stack frame and verifying that it hasn't been overwritten when + * returning from the function. The pattern is called stack canary + * and gcc expects it to be defined by a global variable called + * "__stack_chk_guard" on ARM. This unfortunately means that on SMP + * we cannot have a different canary value per task. + */ + +#ifndef __ASM_STACKPROTECTOR_H +#define __ASM_STACKPROTECTOR_H + +#include +#include + +extern unsigned long __stack_chk_guard; + +/* + * Initialize the stackprotector canary value. + * + * NOTE: this must only be called from functions that never return, + * and it must always be inlined. + */ +static __always_inline void boot_init_stack_canary(void) +{ + unsigned long canary; + + /* Try to get a semi random initial value. */ + get_random_bytes(&canary, sizeof(canary)); + canary ^= LINUX_VERSION_CODE; + + current->stack_canary = canary; + __stack_chk_guard = current->stack_canary; +} + +#endif /* _ASM_STACKPROTECTOR_H */ diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 43b7c34f92cb..1309d64aa926 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -51,6 +51,12 @@ #include #include +#ifdef CONFIG_CC_STACKPROTECTOR +#include +unsigned long __stack_chk_guard __read_mostly; +EXPORT_SYMBOL(__stack_chk_guard); +#endif + static void setup_restart(void) { /* From 6ab6463aeb5fbc75fa3227befb508fc33b34dbf1 Mon Sep 17 00:00:00 2001 From: Larry Bassel Date: Fri, 30 May 2014 20:34:14 +0100 Subject: [PATCH 05/55] arm64: adjust el0_sync so that a function can be called To implement the context tracker properly on arm64, a function call needs to be made after debugging and interrupts are turned on, but before the lr is changed to point to ret_to_user(). If the function call is made after the lr is changed the function will not return to the correct place. For similar reasons, defer the setting of x0 so that it doesn't need to be saved around the function call (save far_el1 in x26 temporarily instead). Acked-by: Will Deacon Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: Larry Bassel Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/entry.S | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 9ce04ba6bcb0..d7230bf68ad1 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -353,7 +353,6 @@ el0_sync: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC64 // SVC in 64-bit state b.eq el0_svc - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eq el0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -382,7 +381,6 @@ el0_sync_compat: lsr x24, x25, #ESR_EL1_EC_SHIFT // exception class cmp x24, #ESR_EL1_EC_SVC32 // SVC in 32-bit state b.eq el0_svc_compat - adr lr, ret_to_user cmp x24, #ESR_EL1_EC_DABT_EL0 // data abort in EL0 b.eq el0_da cmp x24, #ESR_EL1_EC_IABT_EL0 // instruction abort in EL0 @@ -425,22 +423,25 @@ el0_da: /* * Data abort handling */ - mrs x0, far_el1 - bic x0, x0, #(0xff << 56) + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_ia: /* * Instruction abort handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp + adr lr, ret_to_user b do_mem_abort el0_fpsimd_acc: /* @@ -449,6 +450,7 @@ el0_fpsimd_acc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_acc el0_fpsimd_exc: /* @@ -457,16 +459,19 @@ el0_fpsimd_exc: enable_dbg mov x0, x25 mov x1, sp + adr lr, ret_to_user b do_fpsimd_exc el0_sp_pc: /* * Stack or PC alignment exception handling */ - mrs x0, far_el1 + mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + mov x0, x26 mov x1, x25 mov x2, sp + adr lr, ret_to_user b do_sp_pc_abort el0_undef: /* @@ -475,6 +480,7 @@ el0_undef: // enable interrupts before calling the main handler enable_dbg_and_irq mov x0, sp + adr lr, ret_to_user b do_undefinstr el0_dbg: /* @@ -492,6 +498,7 @@ el0_inv: mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 + adr lr, ret_to_user b bad_mode ENDPROC(el0_sync) From 6c81fe7925cc4c42de49e17be21eb86d1173c3a7 Mon Sep 17 00:00:00 2001 From: Larry Bassel Date: Fri, 30 May 2014 12:34:15 -0700 Subject: [PATCH 06/55] arm64: enable context tracking Make calls to ct_user_enter when the kernel is exited and ct_user_exit when the kernel is entered (in el0_da, el0_ia, el0_svc, el0_irq and all of the "error" paths). These macros expand to function calls which will only work properly if el0_sync and related code has been rearranged (in a previous patch of this series). The calls to ct_user_exit are made after hw debugging has been enabled (enable_dbg_and_irq). The call to ct_user_enter is made at the beginning of the kernel_exit macro. This patch is based on earlier work by Kevin Hilman. Save/restore optimizations were also done by Kevin. Acked-by: Will Deacon Reviewed-by: Kevin Hilman Tested-by: Kevin Hilman Signed-off-by: Larry Bassel Signed-off-by: Kevin Hilman Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/include/asm/thread_info.h | 5 +++- arch/arm64/kernel/entry.S | 36 ++++++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index f865436096b6..1cb806529393 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -63,6 +63,7 @@ config ARM64 select RTC_LIB select SPARSE_IRQ select SYSCTL_EXCEPTION_TRACE + select HAVE_CONTEXT_TRACKING help ARM 64-bit (AArch64) Linux support. diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index e40b6d06d515..45108d802f5e 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -103,6 +103,7 @@ static inline struct thread_info *current_thread_info(void) #define TIF_NEED_RESCHED 1 #define TIF_NOTIFY_RESUME 2 /* callback before returning to user */ #define TIF_FOREIGN_FPSTATE 3 /* CPU's FP state is not current's */ +#define TIF_NOHZ 7 #define TIF_SYSCALL_TRACE 8 #define TIF_SYSCALL_AUDIT 9 #define TIF_SYSCALL_TRACEPOINT 10 @@ -118,6 +119,7 @@ static inline struct thread_info *current_thread_info(void) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) #define _TIF_NOTIFY_RESUME (1 << TIF_NOTIFY_RESUME) #define _TIF_FOREIGN_FPSTATE (1 << TIF_FOREIGN_FPSTATE) +#define _TIF_NOHZ (1 << TIF_NOHZ) #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SYSCALL_AUDIT (1 << TIF_SYSCALL_AUDIT) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) @@ -128,7 +130,8 @@ static inline struct thread_info *current_thread_info(void) _TIF_NOTIFY_RESUME | _TIF_FOREIGN_FPSTATE) #define _TIF_SYSCALL_WORK (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \ - _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP) + _TIF_SYSCALL_TRACEPOINT | _TIF_SECCOMP | \ + _TIF_NOHZ) #endif /* __KERNEL__ */ #endif /* __ASM_THREAD_INFO_H */ diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index d7230bf68ad1..93ac58dbf07c 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -29,6 +29,32 @@ #include #include +/* + * Context tracking subsystem. Used to instrument transitions + * between user and kernel mode. + */ + .macro ct_user_exit, syscall = 0 +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_exit + .if \syscall == 1 + /* + * Save/restore needed during syscalls. Restore syscall arguments from + * the values already saved on stack during kernel_entry. + */ + ldp x0, x1, [sp] + ldp x2, x3, [sp, #S_X2] + ldp x4, x5, [sp, #S_X4] + ldp x6, x7, [sp, #S_X6] + .endif +#endif + .endm + + .macro ct_user_enter +#ifdef CONFIG_CONTEXT_TRACKING + bl context_tracking_user_enter +#endif + .endm + /* * Bad Abort numbers *----------------- @@ -91,6 +117,7 @@ .macro kernel_exit, el, ret = 0 ldp x21, x22, [sp, #S_PC] // load ELR, SPSR .if \el == 0 + ct_user_enter ldr x23, [sp, #S_SP] // load return stack pointer .endif .if \ret @@ -426,6 +453,7 @@ el0_da: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit bic x0, x26, #(0xff << 56) mov x1, x25 mov x2, sp @@ -438,6 +466,7 @@ el0_ia: mrs x26, far_el1 // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, x26 orr x1, x25, #1 << 24 // use reserved ISS bit for instruction aborts mov x2, sp @@ -448,6 +477,7 @@ el0_fpsimd_acc: * Floating Point or Advanced SIMD access */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -457,6 +487,7 @@ el0_fpsimd_exc: * Floating Point or Advanced SIMD exception */ enable_dbg + ct_user_exit mov x0, x25 mov x1, sp adr lr, ret_to_user @@ -479,6 +510,7 @@ el0_undef: */ // enable interrupts before calling the main handler enable_dbg_and_irq + ct_user_exit mov x0, sp adr lr, ret_to_user b do_undefinstr @@ -492,9 +524,11 @@ el0_dbg: mov x2, sp bl do_debug_exception enable_dbg + ct_user_exit b ret_to_user el0_inv: enable_dbg + ct_user_exit mov x0, sp mov x1, #BAD_SYNC mrs x2, esr_el1 @@ -511,6 +545,7 @@ el0_irq_naked: bl trace_hardirqs_off #endif + ct_user_exit irq_handler #ifdef CONFIG_TRACE_IRQFLAGS @@ -615,6 +650,7 @@ el0_svc: el0_svc_naked: // compat entry point stp x0, scno, [sp, #S_ORIG_X0] // save the original x0 and syscall number enable_dbg_and_irq + ct_user_exit 1 ldr x16, [tsk, #TI_FLAGS] // check for syscall hooks tst x16, #_TIF_SYSCALL_WORK From f3e5c847ec3d12b4de7898662024ee25622b25d7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 30 Jan 2014 17:56:56 +0000 Subject: [PATCH 07/55] arm64: Add __NR_* definitions for compat syscalls This patch adds __NR_* definitions to asm/unistd32.h, moves the __NR_compat_* definitions to asm/unistd.h and removes all the explicit unistd32.h includes apart from the one building the compat syscall table. The aim is to have the compat __NR_* definitions available but without colliding with the native syscall definitions (required by lib/compat_audit.c to avoid duplicating the audit header files between native and compat). Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/unistd.h | 17 + arch/arm64/include/asm/unistd32.h | 1166 +++++++++++++++++++---------- arch/arm64/kernel/entry.S | 1 - arch/arm64/kernel/kuser32.S | 2 +- arch/arm64/kernel/signal32.c | 2 +- arch/arm64/kernel/sys_compat.c | 2 +- 6 files changed, 786 insertions(+), 404 deletions(-) diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index e5f47df00c24..4bc95d27e063 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -26,7 +26,24 @@ #define __ARCH_WANT_COMPAT_SYS_SENDFILE #define __ARCH_WANT_SYS_FORK #define __ARCH_WANT_SYS_VFORK + +/* + * Compat syscall numbers used by the AArch64 kernel. + */ +#define __NR_compat_restart_syscall 0 +#define __NR_compat_sigreturn 119 +#define __NR_compat_rt_sigreturn 173 + +/* + * The following SVCs are ARM private. + */ +#define __ARM_NR_COMPAT_BASE 0x0f0000 +#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) +#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) + +#define __NR_compat_syscalls 383 #endif + #define __ARCH_WANT_SYS_CLONE #include diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index c8d8fc17bd5a..e242600c4046 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -21,403 +21,769 @@ #define __SYSCALL(x, y) #endif -__SYSCALL(0, sys_restart_syscall) -__SYSCALL(1, sys_exit) -__SYSCALL(2, sys_fork) -__SYSCALL(3, sys_read) -__SYSCALL(4, sys_write) -__SYSCALL(5, compat_sys_open) -__SYSCALL(6, sys_close) -__SYSCALL(7, sys_ni_syscall) /* 7 was sys_waitpid */ -__SYSCALL(8, sys_creat) -__SYSCALL(9, sys_link) -__SYSCALL(10, sys_unlink) -__SYSCALL(11, compat_sys_execve) -__SYSCALL(12, sys_chdir) -__SYSCALL(13, sys_ni_syscall) /* 13 was sys_time */ -__SYSCALL(14, sys_mknod) -__SYSCALL(15, sys_chmod) -__SYSCALL(16, sys_lchown16) -__SYSCALL(17, sys_ni_syscall) /* 17 was sys_break */ -__SYSCALL(18, sys_ni_syscall) /* 18 was sys_stat */ -__SYSCALL(19, compat_sys_lseek) -__SYSCALL(20, sys_getpid) -__SYSCALL(21, compat_sys_mount) -__SYSCALL(22, sys_ni_syscall) /* 22 was sys_umount */ -__SYSCALL(23, sys_setuid16) -__SYSCALL(24, sys_getuid16) -__SYSCALL(25, sys_ni_syscall) /* 25 was sys_stime */ -__SYSCALL(26, compat_sys_ptrace) -__SYSCALL(27, sys_ni_syscall) /* 27 was sys_alarm */ -__SYSCALL(28, sys_ni_syscall) /* 28 was sys_fstat */ -__SYSCALL(29, sys_pause) -__SYSCALL(30, sys_ni_syscall) /* 30 was sys_utime */ -__SYSCALL(31, sys_ni_syscall) /* 31 was sys_stty */ -__SYSCALL(32, sys_ni_syscall) /* 32 was sys_gtty */ -__SYSCALL(33, sys_access) -__SYSCALL(34, sys_nice) -__SYSCALL(35, sys_ni_syscall) /* 35 was sys_ftime */ -__SYSCALL(36, sys_sync) -__SYSCALL(37, sys_kill) -__SYSCALL(38, sys_rename) -__SYSCALL(39, sys_mkdir) -__SYSCALL(40, sys_rmdir) -__SYSCALL(41, sys_dup) -__SYSCALL(42, sys_pipe) -__SYSCALL(43, compat_sys_times) -__SYSCALL(44, sys_ni_syscall) /* 44 was sys_prof */ -__SYSCALL(45, sys_brk) -__SYSCALL(46, sys_setgid16) -__SYSCALL(47, sys_getgid16) -__SYSCALL(48, sys_ni_syscall) /* 48 was sys_signal */ -__SYSCALL(49, sys_geteuid16) -__SYSCALL(50, sys_getegid16) -__SYSCALL(51, sys_acct) -__SYSCALL(52, sys_umount) -__SYSCALL(53, sys_ni_syscall) /* 53 was sys_lock */ -__SYSCALL(54, compat_sys_ioctl) -__SYSCALL(55, compat_sys_fcntl) -__SYSCALL(56, sys_ni_syscall) /* 56 was sys_mpx */ -__SYSCALL(57, sys_setpgid) -__SYSCALL(58, sys_ni_syscall) /* 58 was sys_ulimit */ -__SYSCALL(59, sys_ni_syscall) /* 59 was sys_olduname */ -__SYSCALL(60, sys_umask) -__SYSCALL(61, sys_chroot) -__SYSCALL(62, compat_sys_ustat) -__SYSCALL(63, sys_dup2) -__SYSCALL(64, sys_getppid) -__SYSCALL(65, sys_getpgrp) -__SYSCALL(66, sys_setsid) -__SYSCALL(67, compat_sys_sigaction) -__SYSCALL(68, sys_ni_syscall) /* 68 was sys_sgetmask */ -__SYSCALL(69, sys_ni_syscall) /* 69 was sys_ssetmask */ -__SYSCALL(70, sys_setreuid16) -__SYSCALL(71, sys_setregid16) -__SYSCALL(72, sys_sigsuspend) -__SYSCALL(73, compat_sys_sigpending) -__SYSCALL(74, sys_sethostname) -__SYSCALL(75, compat_sys_setrlimit) -__SYSCALL(76, sys_ni_syscall) /* 76 was compat_sys_getrlimit */ -__SYSCALL(77, compat_sys_getrusage) -__SYSCALL(78, compat_sys_gettimeofday) -__SYSCALL(79, compat_sys_settimeofday) -__SYSCALL(80, sys_getgroups16) -__SYSCALL(81, sys_setgroups16) -__SYSCALL(82, sys_ni_syscall) /* 82 was compat_sys_select */ -__SYSCALL(83, sys_symlink) -__SYSCALL(84, sys_ni_syscall) /* 84 was sys_lstat */ -__SYSCALL(85, sys_readlink) -__SYSCALL(86, sys_uselib) -__SYSCALL(87, sys_swapon) -__SYSCALL(88, sys_reboot) -__SYSCALL(89, sys_ni_syscall) /* 89 was sys_readdir */ -__SYSCALL(90, sys_ni_syscall) /* 90 was sys_mmap */ -__SYSCALL(91, sys_munmap) -__SYSCALL(92, compat_sys_truncate) -__SYSCALL(93, compat_sys_ftruncate) -__SYSCALL(94, sys_fchmod) -__SYSCALL(95, sys_fchown16) -__SYSCALL(96, sys_getpriority) -__SYSCALL(97, sys_setpriority) -__SYSCALL(98, sys_ni_syscall) /* 98 was sys_profil */ -__SYSCALL(99, compat_sys_statfs) -__SYSCALL(100, compat_sys_fstatfs) -__SYSCALL(101, sys_ni_syscall) /* 101 was sys_ioperm */ -__SYSCALL(102, sys_ni_syscall) /* 102 was sys_socketcall */ -__SYSCALL(103, sys_syslog) -__SYSCALL(104, compat_sys_setitimer) -__SYSCALL(105, compat_sys_getitimer) -__SYSCALL(106, compat_sys_newstat) -__SYSCALL(107, compat_sys_newlstat) -__SYSCALL(108, compat_sys_newfstat) -__SYSCALL(109, sys_ni_syscall) /* 109 was sys_uname */ -__SYSCALL(110, sys_ni_syscall) /* 110 was sys_iopl */ -__SYSCALL(111, sys_vhangup) -__SYSCALL(112, sys_ni_syscall) /* 112 was sys_idle */ -__SYSCALL(113, sys_ni_syscall) /* 113 was sys_syscall */ -__SYSCALL(114, compat_sys_wait4) -__SYSCALL(115, sys_swapoff) -__SYSCALL(116, compat_sys_sysinfo) -__SYSCALL(117, sys_ni_syscall) /* 117 was sys_ipc */ -__SYSCALL(118, sys_fsync) -__SYSCALL(119, compat_sys_sigreturn_wrapper) -__SYSCALL(120, sys_clone) -__SYSCALL(121, sys_setdomainname) -__SYSCALL(122, sys_newuname) -__SYSCALL(123, sys_ni_syscall) /* 123 was sys_modify_ldt */ -__SYSCALL(124, compat_sys_adjtimex) -__SYSCALL(125, sys_mprotect) -__SYSCALL(126, compat_sys_sigprocmask) -__SYSCALL(127, sys_ni_syscall) /* 127 was sys_create_module */ -__SYSCALL(128, sys_init_module) -__SYSCALL(129, sys_delete_module) -__SYSCALL(130, sys_ni_syscall) /* 130 was sys_get_kernel_syms */ -__SYSCALL(131, sys_quotactl) -__SYSCALL(132, sys_getpgid) -__SYSCALL(133, sys_fchdir) -__SYSCALL(134, sys_bdflush) -__SYSCALL(135, sys_sysfs) -__SYSCALL(136, sys_personality) -__SYSCALL(137, sys_ni_syscall) /* 137 was sys_afs_syscall */ -__SYSCALL(138, sys_setfsuid16) -__SYSCALL(139, sys_setfsgid16) -__SYSCALL(140, sys_llseek) -__SYSCALL(141, compat_sys_getdents) -__SYSCALL(142, compat_sys_select) -__SYSCALL(143, sys_flock) -__SYSCALL(144, sys_msync) -__SYSCALL(145, compat_sys_readv) -__SYSCALL(146, compat_sys_writev) -__SYSCALL(147, sys_getsid) -__SYSCALL(148, sys_fdatasync) -__SYSCALL(149, compat_sys_sysctl) -__SYSCALL(150, sys_mlock) -__SYSCALL(151, sys_munlock) -__SYSCALL(152, sys_mlockall) -__SYSCALL(153, sys_munlockall) -__SYSCALL(154, sys_sched_setparam) -__SYSCALL(155, sys_sched_getparam) -__SYSCALL(156, sys_sched_setscheduler) -__SYSCALL(157, sys_sched_getscheduler) -__SYSCALL(158, sys_sched_yield) -__SYSCALL(159, sys_sched_get_priority_max) -__SYSCALL(160, sys_sched_get_priority_min) -__SYSCALL(161, compat_sys_sched_rr_get_interval) -__SYSCALL(162, compat_sys_nanosleep) -__SYSCALL(163, sys_mremap) -__SYSCALL(164, sys_setresuid16) -__SYSCALL(165, sys_getresuid16) -__SYSCALL(166, sys_ni_syscall) /* 166 was sys_vm86 */ -__SYSCALL(167, sys_ni_syscall) /* 167 was sys_query_module */ -__SYSCALL(168, sys_poll) -__SYSCALL(169, sys_ni_syscall) -__SYSCALL(170, sys_setresgid16) -__SYSCALL(171, sys_getresgid16) -__SYSCALL(172, sys_prctl) -__SYSCALL(173, compat_sys_rt_sigreturn_wrapper) -__SYSCALL(174, compat_sys_rt_sigaction) -__SYSCALL(175, compat_sys_rt_sigprocmask) -__SYSCALL(176, compat_sys_rt_sigpending) -__SYSCALL(177, compat_sys_rt_sigtimedwait) -__SYSCALL(178, compat_sys_rt_sigqueueinfo) -__SYSCALL(179, compat_sys_rt_sigsuspend) -__SYSCALL(180, compat_sys_pread64_wrapper) -__SYSCALL(181, compat_sys_pwrite64_wrapper) -__SYSCALL(182, sys_chown16) -__SYSCALL(183, sys_getcwd) -__SYSCALL(184, sys_capget) -__SYSCALL(185, sys_capset) -__SYSCALL(186, compat_sys_sigaltstack) -__SYSCALL(187, compat_sys_sendfile) -__SYSCALL(188, sys_ni_syscall) /* 188 reserved */ -__SYSCALL(189, sys_ni_syscall) /* 189 reserved */ -__SYSCALL(190, sys_vfork) -__SYSCALL(191, compat_sys_getrlimit) /* SuS compliant getrlimit */ -__SYSCALL(192, sys_mmap_pgoff) -__SYSCALL(193, compat_sys_truncate64_wrapper) -__SYSCALL(194, compat_sys_ftruncate64_wrapper) -__SYSCALL(195, sys_stat64) -__SYSCALL(196, sys_lstat64) -__SYSCALL(197, sys_fstat64) -__SYSCALL(198, sys_lchown) -__SYSCALL(199, sys_getuid) -__SYSCALL(200, sys_getgid) -__SYSCALL(201, sys_geteuid) -__SYSCALL(202, sys_getegid) -__SYSCALL(203, sys_setreuid) -__SYSCALL(204, sys_setregid) -__SYSCALL(205, sys_getgroups) -__SYSCALL(206, sys_setgroups) -__SYSCALL(207, sys_fchown) -__SYSCALL(208, sys_setresuid) -__SYSCALL(209, sys_getresuid) -__SYSCALL(210, sys_setresgid) -__SYSCALL(211, sys_getresgid) -__SYSCALL(212, sys_chown) -__SYSCALL(213, sys_setuid) -__SYSCALL(214, sys_setgid) -__SYSCALL(215, sys_setfsuid) -__SYSCALL(216, sys_setfsgid) -__SYSCALL(217, compat_sys_getdents64) -__SYSCALL(218, sys_pivot_root) -__SYSCALL(219, sys_mincore) -__SYSCALL(220, sys_madvise) -__SYSCALL(221, compat_sys_fcntl64) -__SYSCALL(222, sys_ni_syscall) /* 222 for tux */ -__SYSCALL(223, sys_ni_syscall) /* 223 is unused */ -__SYSCALL(224, sys_gettid) -__SYSCALL(225, compat_sys_readahead_wrapper) -__SYSCALL(226, sys_setxattr) -__SYSCALL(227, sys_lsetxattr) -__SYSCALL(228, sys_fsetxattr) -__SYSCALL(229, sys_getxattr) -__SYSCALL(230, sys_lgetxattr) -__SYSCALL(231, sys_fgetxattr) -__SYSCALL(232, sys_listxattr) -__SYSCALL(233, sys_llistxattr) -__SYSCALL(234, sys_flistxattr) -__SYSCALL(235, sys_removexattr) -__SYSCALL(236, sys_lremovexattr) -__SYSCALL(237, sys_fremovexattr) -__SYSCALL(238, sys_tkill) -__SYSCALL(239, sys_sendfile64) -__SYSCALL(240, compat_sys_futex) -__SYSCALL(241, compat_sys_sched_setaffinity) -__SYSCALL(242, compat_sys_sched_getaffinity) -__SYSCALL(243, compat_sys_io_setup) -__SYSCALL(244, sys_io_destroy) -__SYSCALL(245, compat_sys_io_getevents) -__SYSCALL(246, compat_sys_io_submit) -__SYSCALL(247, sys_io_cancel) -__SYSCALL(248, sys_exit_group) -__SYSCALL(249, compat_sys_lookup_dcookie) -__SYSCALL(250, sys_epoll_create) -__SYSCALL(251, sys_epoll_ctl) -__SYSCALL(252, sys_epoll_wait) -__SYSCALL(253, sys_remap_file_pages) -__SYSCALL(254, sys_ni_syscall) /* 254 for set_thread_area */ -__SYSCALL(255, sys_ni_syscall) /* 255 for get_thread_area */ -__SYSCALL(256, sys_set_tid_address) -__SYSCALL(257, compat_sys_timer_create) -__SYSCALL(258, compat_sys_timer_settime) -__SYSCALL(259, compat_sys_timer_gettime) -__SYSCALL(260, sys_timer_getoverrun) -__SYSCALL(261, sys_timer_delete) -__SYSCALL(262, compat_sys_clock_settime) -__SYSCALL(263, compat_sys_clock_gettime) -__SYSCALL(264, compat_sys_clock_getres) -__SYSCALL(265, compat_sys_clock_nanosleep) -__SYSCALL(266, compat_sys_statfs64_wrapper) -__SYSCALL(267, compat_sys_fstatfs64_wrapper) -__SYSCALL(268, sys_tgkill) -__SYSCALL(269, compat_sys_utimes) -__SYSCALL(270, compat_sys_fadvise64_64_wrapper) -__SYSCALL(271, sys_pciconfig_iobase) -__SYSCALL(272, sys_pciconfig_read) -__SYSCALL(273, sys_pciconfig_write) -__SYSCALL(274, compat_sys_mq_open) -__SYSCALL(275, sys_mq_unlink) -__SYSCALL(276, compat_sys_mq_timedsend) -__SYSCALL(277, compat_sys_mq_timedreceive) -__SYSCALL(278, compat_sys_mq_notify) -__SYSCALL(279, compat_sys_mq_getsetattr) -__SYSCALL(280, compat_sys_waitid) -__SYSCALL(281, sys_socket) -__SYSCALL(282, sys_bind) -__SYSCALL(283, sys_connect) -__SYSCALL(284, sys_listen) -__SYSCALL(285, sys_accept) -__SYSCALL(286, sys_getsockname) -__SYSCALL(287, sys_getpeername) -__SYSCALL(288, sys_socketpair) -__SYSCALL(289, sys_send) -__SYSCALL(290, sys_sendto) -__SYSCALL(291, compat_sys_recv) -__SYSCALL(292, compat_sys_recvfrom) -__SYSCALL(293, sys_shutdown) -__SYSCALL(294, compat_sys_setsockopt) -__SYSCALL(295, compat_sys_getsockopt) -__SYSCALL(296, compat_sys_sendmsg) -__SYSCALL(297, compat_sys_recvmsg) -__SYSCALL(298, sys_semop) -__SYSCALL(299, sys_semget) -__SYSCALL(300, compat_sys_semctl) -__SYSCALL(301, compat_sys_msgsnd) -__SYSCALL(302, compat_sys_msgrcv) -__SYSCALL(303, sys_msgget) -__SYSCALL(304, compat_sys_msgctl) -__SYSCALL(305, compat_sys_shmat) -__SYSCALL(306, sys_shmdt) -__SYSCALL(307, sys_shmget) -__SYSCALL(308, compat_sys_shmctl) -__SYSCALL(309, sys_add_key) -__SYSCALL(310, sys_request_key) -__SYSCALL(311, compat_sys_keyctl) -__SYSCALL(312, compat_sys_semtimedop) -__SYSCALL(313, sys_ni_syscall) -__SYSCALL(314, sys_ioprio_set) -__SYSCALL(315, sys_ioprio_get) -__SYSCALL(316, sys_inotify_init) -__SYSCALL(317, sys_inotify_add_watch) -__SYSCALL(318, sys_inotify_rm_watch) -__SYSCALL(319, compat_sys_mbind) -__SYSCALL(320, compat_sys_get_mempolicy) -__SYSCALL(321, compat_sys_set_mempolicy) -__SYSCALL(322, compat_sys_openat) -__SYSCALL(323, sys_mkdirat) -__SYSCALL(324, sys_mknodat) -__SYSCALL(325, sys_fchownat) -__SYSCALL(326, compat_sys_futimesat) -__SYSCALL(327, sys_fstatat64) -__SYSCALL(328, sys_unlinkat) -__SYSCALL(329, sys_renameat) -__SYSCALL(330, sys_linkat) -__SYSCALL(331, sys_symlinkat) -__SYSCALL(332, sys_readlinkat) -__SYSCALL(333, sys_fchmodat) -__SYSCALL(334, sys_faccessat) -__SYSCALL(335, compat_sys_pselect6) -__SYSCALL(336, compat_sys_ppoll) -__SYSCALL(337, sys_unshare) -__SYSCALL(338, compat_sys_set_robust_list) -__SYSCALL(339, compat_sys_get_robust_list) -__SYSCALL(340, sys_splice) -__SYSCALL(341, compat_sys_sync_file_range2_wrapper) -__SYSCALL(342, sys_tee) -__SYSCALL(343, compat_sys_vmsplice) -__SYSCALL(344, compat_sys_move_pages) -__SYSCALL(345, sys_getcpu) -__SYSCALL(346, compat_sys_epoll_pwait) -__SYSCALL(347, compat_sys_kexec_load) -__SYSCALL(348, compat_sys_utimensat) -__SYSCALL(349, compat_sys_signalfd) -__SYSCALL(350, sys_timerfd_create) -__SYSCALL(351, sys_eventfd) -__SYSCALL(352, compat_sys_fallocate_wrapper) -__SYSCALL(353, compat_sys_timerfd_settime) -__SYSCALL(354, compat_sys_timerfd_gettime) -__SYSCALL(355, compat_sys_signalfd4) -__SYSCALL(356, sys_eventfd2) -__SYSCALL(357, sys_epoll_create1) -__SYSCALL(358, sys_dup3) -__SYSCALL(359, sys_pipe2) -__SYSCALL(360, sys_inotify_init1) -__SYSCALL(361, compat_sys_preadv) -__SYSCALL(362, compat_sys_pwritev) -__SYSCALL(363, compat_sys_rt_tgsigqueueinfo) -__SYSCALL(364, sys_perf_event_open) -__SYSCALL(365, compat_sys_recvmmsg) -__SYSCALL(366, sys_accept4) -__SYSCALL(367, sys_fanotify_init) -__SYSCALL(368, compat_sys_fanotify_mark) -__SYSCALL(369, sys_prlimit64) -__SYSCALL(370, sys_name_to_handle_at) -__SYSCALL(371, compat_sys_open_by_handle_at) -__SYSCALL(372, compat_sys_clock_adjtime) -__SYSCALL(373, sys_syncfs) -__SYSCALL(374, compat_sys_sendmmsg) -__SYSCALL(375, sys_setns) -__SYSCALL(376, compat_sys_process_vm_readv) -__SYSCALL(377, compat_sys_process_vm_writev) -__SYSCALL(378, sys_kcmp) -__SYSCALL(379, sys_finit_module) -__SYSCALL(380, sys_sched_setattr) -__SYSCALL(381, sys_sched_getattr) -__SYSCALL(382, sys_renameat2) - -#define __NR_compat_syscalls 383 - -/* - * Compat syscall numbers used by the AArch64 kernel. - */ -#define __NR_compat_restart_syscall 0 -#define __NR_compat_sigreturn 119 -#define __NR_compat_rt_sigreturn 173 - - -/* - * The following SVCs are ARM private. - */ -#define __ARM_NR_COMPAT_BASE 0x0f0000 -#define __ARM_NR_compat_cacheflush (__ARM_NR_COMPAT_BASE+2) -#define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE+5) +#define __NR_restart_syscall 0 +__SYSCALL(__NR_restart_syscall, sys_restart_syscall) +#define __NR_exit 1 +__SYSCALL(__NR_exit, sys_exit) +#define __NR_fork 2 +__SYSCALL(__NR_fork, sys_fork) +#define __NR_read 3 +__SYSCALL(__NR_read, sys_read) +#define __NR_write 4 +__SYSCALL(__NR_write, sys_write) +#define __NR_open 5 +__SYSCALL(__NR_open, compat_sys_open) +#define __NR_close 6 +__SYSCALL(__NR_close, sys_close) + /* 7 was sys_waitpid */ +__SYSCALL(7, sys_ni_syscall) +#define __NR_creat 8 +__SYSCALL(__NR_creat, sys_creat) +#define __NR_link 9 +__SYSCALL(__NR_link, sys_link) +#define __NR_unlink 10 +__SYSCALL(__NR_unlink, sys_unlink) +#define __NR_execve 11 +__SYSCALL(__NR_execve, compat_sys_execve) +#define __NR_chdir 12 +__SYSCALL(__NR_chdir, sys_chdir) + /* 13 was sys_time */ +__SYSCALL(13, sys_ni_syscall) +#define __NR_mknod 14 +__SYSCALL(__NR_mknod, sys_mknod) +#define __NR_chmod 15 +__SYSCALL(__NR_chmod, sys_chmod) +#define __NR_lchown 16 +__SYSCALL(__NR_lchown, sys_lchown16) + /* 17 was sys_break */ +__SYSCALL(17, sys_ni_syscall) + /* 18 was sys_stat */ +__SYSCALL(18, sys_ni_syscall) +#define __NR_lseek 19 +__SYSCALL(__NR_lseek, compat_sys_lseek) +#define __NR_getpid 20 +__SYSCALL(__NR_getpid, sys_getpid) +#define __NR_mount 21 +__SYSCALL(__NR_mount, compat_sys_mount) + /* 22 was sys_umount */ +__SYSCALL(22, sys_ni_syscall) +#define __NR_setuid 23 +__SYSCALL(__NR_setuid, sys_setuid16) +#define __NR_getuid 24 +__SYSCALL(__NR_getuid, sys_getuid16) + /* 25 was sys_stime */ +__SYSCALL(25, sys_ni_syscall) +#define __NR_ptrace 26 +__SYSCALL(__NR_ptrace, compat_sys_ptrace) + /* 27 was sys_alarm */ +__SYSCALL(27, sys_ni_syscall) + /* 28 was sys_fstat */ +__SYSCALL(28, sys_ni_syscall) +#define __NR_pause 29 +__SYSCALL(__NR_pause, sys_pause) + /* 30 was sys_utime */ +__SYSCALL(30, sys_ni_syscall) + /* 31 was sys_stty */ +__SYSCALL(31, sys_ni_syscall) + /* 32 was sys_gtty */ +__SYSCALL(32, sys_ni_syscall) +#define __NR_access 33 +__SYSCALL(__NR_access, sys_access) +#define __NR_nice 34 +__SYSCALL(__NR_nice, sys_nice) + /* 35 was sys_ftime */ +__SYSCALL(35, sys_ni_syscall) +#define __NR_sync 36 +__SYSCALL(__NR_sync, sys_sync) +#define __NR_kill 37 +__SYSCALL(__NR_kill, sys_kill) +#define __NR_rename 38 +__SYSCALL(__NR_rename, sys_rename) +#define __NR_mkdir 39 +__SYSCALL(__NR_mkdir, sys_mkdir) +#define __NR_rmdir 40 +__SYSCALL(__NR_rmdir, sys_rmdir) +#define __NR_dup 41 +__SYSCALL(__NR_dup, sys_dup) +#define __NR_pipe 42 +__SYSCALL(__NR_pipe, sys_pipe) +#define __NR_times 43 +__SYSCALL(__NR_times, compat_sys_times) + /* 44 was sys_prof */ +__SYSCALL(44, sys_ni_syscall) +#define __NR_brk 45 +__SYSCALL(__NR_brk, sys_brk) +#define __NR_setgid 46 +__SYSCALL(__NR_setgid, sys_setgid16) +#define __NR_getgid 47 +__SYSCALL(__NR_getgid, sys_getgid16) + /* 48 was sys_signal */ +__SYSCALL(48, sys_ni_syscall) +#define __NR_geteuid 49 +__SYSCALL(__NR_geteuid, sys_geteuid16) +#define __NR_getegid 50 +__SYSCALL(__NR_getegid, sys_getegid16) +#define __NR_acct 51 +__SYSCALL(__NR_acct, sys_acct) +#define __NR_umount2 52 +__SYSCALL(__NR_umount2, sys_umount) + /* 53 was sys_lock */ +__SYSCALL(53, sys_ni_syscall) +#define __NR_ioctl 54 +__SYSCALL(__NR_ioctl, compat_sys_ioctl) +#define __NR_fcntl 55 +__SYSCALL(__NR_fcntl, compat_sys_fcntl) + /* 56 was sys_mpx */ +__SYSCALL(56, sys_ni_syscall) +#define __NR_setpgid 57 +__SYSCALL(__NR_setpgid, sys_setpgid) + /* 58 was sys_ulimit */ +__SYSCALL(58, sys_ni_syscall) + /* 59 was sys_olduname */ +__SYSCALL(59, sys_ni_syscall) +#define __NR_umask 60 +__SYSCALL(__NR_umask, sys_umask) +#define __NR_chroot 61 +__SYSCALL(__NR_chroot, sys_chroot) +#define __NR_ustat 62 +__SYSCALL(__NR_ustat, compat_sys_ustat) +#define __NR_dup2 63 +__SYSCALL(__NR_dup2, sys_dup2) +#define __NR_getppid 64 +__SYSCALL(__NR_getppid, sys_getppid) +#define __NR_getpgrp 65 +__SYSCALL(__NR_getpgrp, sys_getpgrp) +#define __NR_setsid 66 +__SYSCALL(__NR_setsid, sys_setsid) +#define __NR_sigaction 67 +__SYSCALL(__NR_sigaction, compat_sys_sigaction) + /* 68 was sys_sgetmask */ +__SYSCALL(68, sys_ni_syscall) + /* 69 was sys_ssetmask */ +__SYSCALL(69, sys_ni_syscall) +#define __NR_setreuid 70 +__SYSCALL(__NR_setreuid, sys_setreuid16) +#define __NR_setregid 71 +__SYSCALL(__NR_setregid, sys_setregid16) +#define __NR_sigsuspend 72 +__SYSCALL(__NR_sigsuspend, sys_sigsuspend) +#define __NR_sigpending 73 +__SYSCALL(__NR_sigpending, compat_sys_sigpending) +#define __NR_sethostname 74 +__SYSCALL(__NR_sethostname, sys_sethostname) +#define __NR_setrlimit 75 +__SYSCALL(__NR_setrlimit, compat_sys_setrlimit) + /* 76 was compat_sys_getrlimit */ +__SYSCALL(76, sys_ni_syscall) +#define __NR_getrusage 77 +__SYSCALL(__NR_getrusage, compat_sys_getrusage) +#define __NR_gettimeofday 78 +__SYSCALL(__NR_gettimeofday, compat_sys_gettimeofday) +#define __NR_settimeofday 79 +__SYSCALL(__NR_settimeofday, compat_sys_settimeofday) +#define __NR_getgroups 80 +__SYSCALL(__NR_getgroups, sys_getgroups16) +#define __NR_setgroups 81 +__SYSCALL(__NR_setgroups, sys_setgroups16) + /* 82 was compat_sys_select */ +__SYSCALL(82, sys_ni_syscall) +#define __NR_symlink 83 +__SYSCALL(__NR_symlink, sys_symlink) + /* 84 was sys_lstat */ +__SYSCALL(84, sys_ni_syscall) +#define __NR_readlink 85 +__SYSCALL(__NR_readlink, sys_readlink) +#define __NR_uselib 86 +__SYSCALL(__NR_uselib, sys_uselib) +#define __NR_swapon 87 +__SYSCALL(__NR_swapon, sys_swapon) +#define __NR_reboot 88 +__SYSCALL(__NR_reboot, sys_reboot) + /* 89 was sys_readdir */ +__SYSCALL(89, sys_ni_syscall) + /* 90 was sys_mmap */ +__SYSCALL(90, sys_ni_syscall) +#define __NR_munmap 91 +__SYSCALL(__NR_munmap, sys_munmap) +#define __NR_truncate 92 +__SYSCALL(__NR_truncate, compat_sys_truncate) +#define __NR_ftruncate 93 +__SYSCALL(__NR_ftruncate, compat_sys_ftruncate) +#define __NR_fchmod 94 +__SYSCALL(__NR_fchmod, sys_fchmod) +#define __NR_fchown 95 +__SYSCALL(__NR_fchown, sys_fchown16) +#define __NR_getpriority 96 +__SYSCALL(__NR_getpriority, sys_getpriority) +#define __NR_setpriority 97 +__SYSCALL(__NR_setpriority, sys_setpriority) + /* 98 was sys_profil */ +__SYSCALL(98, sys_ni_syscall) +#define __NR_statfs 99 +__SYSCALL(__NR_statfs, compat_sys_statfs) +#define __NR_fstatfs 100 +__SYSCALL(__NR_fstatfs, compat_sys_fstatfs) + /* 101 was sys_ioperm */ +__SYSCALL(101, sys_ni_syscall) + /* 102 was sys_socketcall */ +__SYSCALL(102, sys_ni_syscall) +#define __NR_syslog 103 +__SYSCALL(__NR_syslog, sys_syslog) +#define __NR_setitimer 104 +__SYSCALL(__NR_setitimer, compat_sys_setitimer) +#define __NR_getitimer 105 +__SYSCALL(__NR_getitimer, compat_sys_getitimer) +#define __NR_stat 106 +__SYSCALL(__NR_stat, compat_sys_newstat) +#define __NR_lstat 107 +__SYSCALL(__NR_lstat, compat_sys_newlstat) +#define __NR_fstat 108 +__SYSCALL(__NR_fstat, compat_sys_newfstat) + /* 109 was sys_uname */ +__SYSCALL(109, sys_ni_syscall) + /* 110 was sys_iopl */ +__SYSCALL(110, sys_ni_syscall) +#define __NR_vhangup 111 +__SYSCALL(__NR_vhangup, sys_vhangup) + /* 112 was sys_idle */ +__SYSCALL(112, sys_ni_syscall) + /* 113 was sys_syscall */ +__SYSCALL(113, sys_ni_syscall) +#define __NR_wait4 114 +__SYSCALL(__NR_wait4, compat_sys_wait4) +#define __NR_swapoff 115 +__SYSCALL(__NR_swapoff, sys_swapoff) +#define __NR_sysinfo 116 +__SYSCALL(__NR_sysinfo, compat_sys_sysinfo) + /* 117 was sys_ipc */ +__SYSCALL(117, sys_ni_syscall) +#define __NR_fsync 118 +__SYSCALL(__NR_fsync, sys_fsync) +#define __NR_sigreturn 119 +__SYSCALL(__NR_sigreturn, compat_sys_sigreturn_wrapper) +#define __NR_clone 120 +__SYSCALL(__NR_clone, sys_clone) +#define __NR_setdomainname 121 +__SYSCALL(__NR_setdomainname, sys_setdomainname) +#define __NR_uname 122 +__SYSCALL(__NR_uname, sys_newuname) + /* 123 was sys_modify_ldt */ +__SYSCALL(123, sys_ni_syscall) +#define __NR_adjtimex 124 +__SYSCALL(__NR_adjtimex, compat_sys_adjtimex) +#define __NR_mprotect 125 +__SYSCALL(__NR_mprotect, sys_mprotect) +#define __NR_sigprocmask 126 +__SYSCALL(__NR_sigprocmask, compat_sys_sigprocmask) + /* 127 was sys_create_module */ +__SYSCALL(127, sys_ni_syscall) +#define __NR_init_module 128 +__SYSCALL(__NR_init_module, sys_init_module) +#define __NR_delete_module 129 +__SYSCALL(__NR_delete_module, sys_delete_module) + /* 130 was sys_get_kernel_syms */ +__SYSCALL(130, sys_ni_syscall) +#define __NR_quotactl 131 +__SYSCALL(__NR_quotactl, sys_quotactl) +#define __NR_getpgid 132 +__SYSCALL(__NR_getpgid, sys_getpgid) +#define __NR_fchdir 133 +__SYSCALL(__NR_fchdir, sys_fchdir) +#define __NR_bdflush 134 +__SYSCALL(__NR_bdflush, sys_bdflush) +#define __NR_sysfs 135 +__SYSCALL(__NR_sysfs, sys_sysfs) +#define __NR_personality 136 +__SYSCALL(__NR_personality, sys_personality) + /* 137 was sys_afs_syscall */ +__SYSCALL(137, sys_ni_syscall) +#define __NR_setfsuid 138 +__SYSCALL(__NR_setfsuid, sys_setfsuid16) +#define __NR_setfsgid 139 +__SYSCALL(__NR_setfsgid, sys_setfsgid16) +#define __NR__llseek 140 +__SYSCALL(__NR__llseek, sys_llseek) +#define __NR_getdents 141 +__SYSCALL(__NR_getdents, compat_sys_getdents) +#define __NR__newselect 142 +__SYSCALL(__NR__newselect, compat_sys_select) +#define __NR_flock 143 +__SYSCALL(__NR_flock, sys_flock) +#define __NR_msync 144 +__SYSCALL(__NR_msync, sys_msync) +#define __NR_readv 145 +__SYSCALL(__NR_readv, compat_sys_readv) +#define __NR_writev 146 +__SYSCALL(__NR_writev, compat_sys_writev) +#define __NR_getsid 147 +__SYSCALL(__NR_getsid, sys_getsid) +#define __NR_fdatasync 148 +__SYSCALL(__NR_fdatasync, sys_fdatasync) +#define __NR__sysctl 149 +__SYSCALL(__NR__sysctl, compat_sys_sysctl) +#define __NR_mlock 150 +__SYSCALL(__NR_mlock, sys_mlock) +#define __NR_munlock 151 +__SYSCALL(__NR_munlock, sys_munlock) +#define __NR_mlockall 152 +__SYSCALL(__NR_mlockall, sys_mlockall) +#define __NR_munlockall 153 +__SYSCALL(__NR_munlockall, sys_munlockall) +#define __NR_sched_setparam 154 +__SYSCALL(__NR_sched_setparam, sys_sched_setparam) +#define __NR_sched_getparam 155 +__SYSCALL(__NR_sched_getparam, sys_sched_getparam) +#define __NR_sched_setscheduler 156 +__SYSCALL(__NR_sched_setscheduler, sys_sched_setscheduler) +#define __NR_sched_getscheduler 157 +__SYSCALL(__NR_sched_getscheduler, sys_sched_getscheduler) +#define __NR_sched_yield 158 +__SYSCALL(__NR_sched_yield, sys_sched_yield) +#define __NR_sched_get_priority_max 159 +__SYSCALL(__NR_sched_get_priority_max, sys_sched_get_priority_max) +#define __NR_sched_get_priority_min 160 +__SYSCALL(__NR_sched_get_priority_min, sys_sched_get_priority_min) +#define __NR_sched_rr_get_interval 161 +__SYSCALL(__NR_sched_rr_get_interval, compat_sys_sched_rr_get_interval) +#define __NR_nanosleep 162 +__SYSCALL(__NR_nanosleep, compat_sys_nanosleep) +#define __NR_mremap 163 +__SYSCALL(__NR_mremap, sys_mremap) +#define __NR_setresuid 164 +__SYSCALL(__NR_setresuid, sys_setresuid16) +#define __NR_getresuid 165 +__SYSCALL(__NR_getresuid, sys_getresuid16) + /* 166 was sys_vm86 */ +__SYSCALL(166, sys_ni_syscall) + /* 167 was sys_query_module */ +__SYSCALL(167, sys_ni_syscall) +#define __NR_poll 168 +__SYSCALL(__NR_poll, sys_poll) +#define __NR_nfsservctl 169 +__SYSCALL(__NR_nfsservctl, sys_ni_syscall) +#define __NR_setresgid 170 +__SYSCALL(__NR_setresgid, sys_setresgid16) +#define __NR_getresgid 171 +__SYSCALL(__NR_getresgid, sys_getresgid16) +#define __NR_prctl 172 +__SYSCALL(__NR_prctl, sys_prctl) +#define __NR_rt_sigreturn 173 +__SYSCALL(__NR_rt_sigreturn, compat_sys_rt_sigreturn_wrapper) +#define __NR_rt_sigaction 174 +__SYSCALL(__NR_rt_sigaction, compat_sys_rt_sigaction) +#define __NR_rt_sigprocmask 175 +__SYSCALL(__NR_rt_sigprocmask, compat_sys_rt_sigprocmask) +#define __NR_rt_sigpending 176 +__SYSCALL(__NR_rt_sigpending, compat_sys_rt_sigpending) +#define __NR_rt_sigtimedwait 177 +__SYSCALL(__NR_rt_sigtimedwait, compat_sys_rt_sigtimedwait) +#define __NR_rt_sigqueueinfo 178 +__SYSCALL(__NR_rt_sigqueueinfo, compat_sys_rt_sigqueueinfo) +#define __NR_rt_sigsuspend 179 +__SYSCALL(__NR_rt_sigsuspend, compat_sys_rt_sigsuspend) +#define __NR_pread64 180 +__SYSCALL(__NR_pread64, compat_sys_pread64_wrapper) +#define __NR_pwrite64 181 +__SYSCALL(__NR_pwrite64, compat_sys_pwrite64_wrapper) +#define __NR_chown 182 +__SYSCALL(__NR_chown, sys_chown16) +#define __NR_getcwd 183 +__SYSCALL(__NR_getcwd, sys_getcwd) +#define __NR_capget 184 +__SYSCALL(__NR_capget, sys_capget) +#define __NR_capset 185 +__SYSCALL(__NR_capset, sys_capset) +#define __NR_sigaltstack 186 +__SYSCALL(__NR_sigaltstack, compat_sys_sigaltstack) +#define __NR_sendfile 187 +__SYSCALL(__NR_sendfile, compat_sys_sendfile) + /* 188 reserved */ +__SYSCALL(188, sys_ni_syscall) + /* 189 reserved */ +__SYSCALL(189, sys_ni_syscall) +#define __NR_vfork 190 +__SYSCALL(__NR_vfork, sys_vfork) +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +__SYSCALL(__NR_ugetrlimit, compat_sys_getrlimit) /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +__SYSCALL(__NR_mmap2, sys_mmap_pgoff) +#define __NR_truncate64 193 +__SYSCALL(__NR_truncate64, compat_sys_truncate64_wrapper) +#define __NR_ftruncate64 194 +__SYSCALL(__NR_ftruncate64, compat_sys_ftruncate64_wrapper) +#define __NR_stat64 195 +__SYSCALL(__NR_stat64, sys_stat64) +#define __NR_lstat64 196 +__SYSCALL(__NR_lstat64, sys_lstat64) +#define __NR_fstat64 197 +__SYSCALL(__NR_fstat64, sys_fstat64) +#define __NR_lchown32 198 +__SYSCALL(__NR_lchown32, sys_lchown) +#define __NR_getuid32 199 +__SYSCALL(__NR_getuid32, sys_getuid) +#define __NR_getgid32 200 +__SYSCALL(__NR_getgid32, sys_getgid) +#define __NR_geteuid32 201 +__SYSCALL(__NR_geteuid32, sys_geteuid) +#define __NR_getegid32 202 +__SYSCALL(__NR_getegid32, sys_getegid) +#define __NR_setreuid32 203 +__SYSCALL(__NR_setreuid32, sys_setreuid) +#define __NR_setregid32 204 +__SYSCALL(__NR_setregid32, sys_setregid) +#define __NR_getgroups32 205 +__SYSCALL(__NR_getgroups32, sys_getgroups) +#define __NR_setgroups32 206 +__SYSCALL(__NR_setgroups32, sys_setgroups) +#define __NR_fchown32 207 +__SYSCALL(__NR_fchown32, sys_fchown) +#define __NR_setresuid32 208 +__SYSCALL(__NR_setresuid32, sys_setresuid) +#define __NR_getresuid32 209 +__SYSCALL(__NR_getresuid32, sys_getresuid) +#define __NR_setresgid32 210 +__SYSCALL(__NR_setresgid32, sys_setresgid) +#define __NR_getresgid32 211 +__SYSCALL(__NR_getresgid32, sys_getresgid) +#define __NR_chown32 212 +__SYSCALL(__NR_chown32, sys_chown) +#define __NR_setuid32 213 +__SYSCALL(__NR_setuid32, sys_setuid) +#define __NR_setgid32 214 +__SYSCALL(__NR_setgid32, sys_setgid) +#define __NR_setfsuid32 215 +__SYSCALL(__NR_setfsuid32, sys_setfsuid) +#define __NR_setfsgid32 216 +__SYSCALL(__NR_setfsgid32, sys_setfsgid) +#define __NR_getdents64 217 +__SYSCALL(__NR_getdents64, compat_sys_getdents64) +#define __NR_pivot_root 218 +__SYSCALL(__NR_pivot_root, sys_pivot_root) +#define __NR_mincore 219 +__SYSCALL(__NR_mincore, sys_mincore) +#define __NR_madvise 220 +__SYSCALL(__NR_madvise, sys_madvise) +#define __NR_fcntl64 221 +__SYSCALL(__NR_fcntl64, compat_sys_fcntl64) + /* 222 for tux */ +__SYSCALL(222, sys_ni_syscall) + /* 223 is unused */ +__SYSCALL(223, sys_ni_syscall) +#define __NR_gettid 224 +__SYSCALL(__NR_gettid, sys_gettid) +#define __NR_readahead 225 +__SYSCALL(__NR_readahead, compat_sys_readahead_wrapper) +#define __NR_setxattr 226 +__SYSCALL(__NR_setxattr, sys_setxattr) +#define __NR_lsetxattr 227 +__SYSCALL(__NR_lsetxattr, sys_lsetxattr) +#define __NR_fsetxattr 228 +__SYSCALL(__NR_fsetxattr, sys_fsetxattr) +#define __NR_getxattr 229 +__SYSCALL(__NR_getxattr, sys_getxattr) +#define __NR_lgetxattr 230 +__SYSCALL(__NR_lgetxattr, sys_lgetxattr) +#define __NR_fgetxattr 231 +__SYSCALL(__NR_fgetxattr, sys_fgetxattr) +#define __NR_listxattr 232 +__SYSCALL(__NR_listxattr, sys_listxattr) +#define __NR_llistxattr 233 +__SYSCALL(__NR_llistxattr, sys_llistxattr) +#define __NR_flistxattr 234 +__SYSCALL(__NR_flistxattr, sys_flistxattr) +#define __NR_removexattr 235 +__SYSCALL(__NR_removexattr, sys_removexattr) +#define __NR_lremovexattr 236 +__SYSCALL(__NR_lremovexattr, sys_lremovexattr) +#define __NR_fremovexattr 237 +__SYSCALL(__NR_fremovexattr, sys_fremovexattr) +#define __NR_tkill 238 +__SYSCALL(__NR_tkill, sys_tkill) +#define __NR_sendfile64 239 +__SYSCALL(__NR_sendfile64, sys_sendfile64) +#define __NR_futex 240 +__SYSCALL(__NR_futex, compat_sys_futex) +#define __NR_sched_setaffinity 241 +__SYSCALL(__NR_sched_setaffinity, compat_sys_sched_setaffinity) +#define __NR_sched_getaffinity 242 +__SYSCALL(__NR_sched_getaffinity, compat_sys_sched_getaffinity) +#define __NR_io_setup 243 +__SYSCALL(__NR_io_setup, compat_sys_io_setup) +#define __NR_io_destroy 244 +__SYSCALL(__NR_io_destroy, sys_io_destroy) +#define __NR_io_getevents 245 +__SYSCALL(__NR_io_getevents, compat_sys_io_getevents) +#define __NR_io_submit 246 +__SYSCALL(__NR_io_submit, compat_sys_io_submit) +#define __NR_io_cancel 247 +__SYSCALL(__NR_io_cancel, sys_io_cancel) +#define __NR_exit_group 248 +__SYSCALL(__NR_exit_group, sys_exit_group) +#define __NR_lookup_dcookie 249 +__SYSCALL(__NR_lookup_dcookie, compat_sys_lookup_dcookie) +#define __NR_epoll_create 250 +__SYSCALL(__NR_epoll_create, sys_epoll_create) +#define __NR_epoll_ctl 251 +__SYSCALL(__NR_epoll_ctl, sys_epoll_ctl) +#define __NR_epoll_wait 252 +__SYSCALL(__NR_epoll_wait, sys_epoll_wait) +#define __NR_remap_file_pages 253 +__SYSCALL(__NR_remap_file_pages, sys_remap_file_pages) + /* 254 for set_thread_area */ +__SYSCALL(254, sys_ni_syscall) + /* 255 for get_thread_area */ +__SYSCALL(255, sys_ni_syscall) +#define __NR_set_tid_address 256 +__SYSCALL(__NR_set_tid_address, sys_set_tid_address) +#define __NR_timer_create 257 +__SYSCALL(__NR_timer_create, compat_sys_timer_create) +#define __NR_timer_settime 258 +__SYSCALL(__NR_timer_settime, compat_sys_timer_settime) +#define __NR_timer_gettime 259 +__SYSCALL(__NR_timer_gettime, compat_sys_timer_gettime) +#define __NR_timer_getoverrun 260 +__SYSCALL(__NR_timer_getoverrun, sys_timer_getoverrun) +#define __NR_timer_delete 261 +__SYSCALL(__NR_timer_delete, sys_timer_delete) +#define __NR_clock_settime 262 +__SYSCALL(__NR_clock_settime, compat_sys_clock_settime) +#define __NR_clock_gettime 263 +__SYSCALL(__NR_clock_gettime, compat_sys_clock_gettime) +#define __NR_clock_getres 264 +__SYSCALL(__NR_clock_getres, compat_sys_clock_getres) +#define __NR_clock_nanosleep 265 +__SYSCALL(__NR_clock_nanosleep, compat_sys_clock_nanosleep) +#define __NR_statfs64 266 +__SYSCALL(__NR_statfs64, compat_sys_statfs64_wrapper) +#define __NR_fstatfs64 267 +__SYSCALL(__NR_fstatfs64, compat_sys_fstatfs64_wrapper) +#define __NR_tgkill 268 +__SYSCALL(__NR_tgkill, sys_tgkill) +#define __NR_utimes 269 +__SYSCALL(__NR_utimes, compat_sys_utimes) +#define __NR_arm_fadvise64_64 270 +__SYSCALL(__NR_arm_fadvise64_64, compat_sys_fadvise64_64_wrapper) +#define __NR_pciconfig_iobase 271 +__SYSCALL(__NR_pciconfig_iobase, sys_pciconfig_iobase) +#define __NR_pciconfig_read 272 +__SYSCALL(__NR_pciconfig_read, sys_pciconfig_read) +#define __NR_pciconfig_write 273 +__SYSCALL(__NR_pciconfig_write, sys_pciconfig_write) +#define __NR_mq_open 274 +__SYSCALL(__NR_mq_open, compat_sys_mq_open) +#define __NR_mq_unlink 275 +__SYSCALL(__NR_mq_unlink, sys_mq_unlink) +#define __NR_mq_timedsend 276 +__SYSCALL(__NR_mq_timedsend, compat_sys_mq_timedsend) +#define __NR_mq_timedreceive 277 +__SYSCALL(__NR_mq_timedreceive, compat_sys_mq_timedreceive) +#define __NR_mq_notify 278 +__SYSCALL(__NR_mq_notify, compat_sys_mq_notify) +#define __NR_mq_getsetattr 279 +__SYSCALL(__NR_mq_getsetattr, compat_sys_mq_getsetattr) +#define __NR_waitid 280 +__SYSCALL(__NR_waitid, compat_sys_waitid) +#define __NR_socket 281 +__SYSCALL(__NR_socket, sys_socket) +#define __NR_bind 282 +__SYSCALL(__NR_bind, sys_bind) +#define __NR_connect 283 +__SYSCALL(__NR_connect, sys_connect) +#define __NR_listen 284 +__SYSCALL(__NR_listen, sys_listen) +#define __NR_accept 285 +__SYSCALL(__NR_accept, sys_accept) +#define __NR_getsockname 286 +__SYSCALL(__NR_getsockname, sys_getsockname) +#define __NR_getpeername 287 +__SYSCALL(__NR_getpeername, sys_getpeername) +#define __NR_socketpair 288 +__SYSCALL(__NR_socketpair, sys_socketpair) +#define __NR_send 289 +__SYSCALL(__NR_send, sys_send) +#define __NR_sendto 290 +__SYSCALL(__NR_sendto, sys_sendto) +#define __NR_recv 291 +__SYSCALL(__NR_recv, compat_sys_recv) +#define __NR_recvfrom 292 +__SYSCALL(__NR_recvfrom, compat_sys_recvfrom) +#define __NR_shutdown 293 +__SYSCALL(__NR_shutdown, sys_shutdown) +#define __NR_setsockopt 294 +__SYSCALL(__NR_setsockopt, compat_sys_setsockopt) +#define __NR_getsockopt 295 +__SYSCALL(__NR_getsockopt, compat_sys_getsockopt) +#define __NR_sendmsg 296 +__SYSCALL(__NR_sendmsg, compat_sys_sendmsg) +#define __NR_recvmsg 297 +__SYSCALL(__NR_recvmsg, compat_sys_recvmsg) +#define __NR_semop 298 +__SYSCALL(__NR_semop, sys_semop) +#define __NR_semget 299 +__SYSCALL(__NR_semget, sys_semget) +#define __NR_semctl 300 +__SYSCALL(__NR_semctl, compat_sys_semctl) +#define __NR_msgsnd 301 +__SYSCALL(__NR_msgsnd, compat_sys_msgsnd) +#define __NR_msgrcv 302 +__SYSCALL(__NR_msgrcv, compat_sys_msgrcv) +#define __NR_msgget 303 +__SYSCALL(__NR_msgget, sys_msgget) +#define __NR_msgctl 304 +__SYSCALL(__NR_msgctl, compat_sys_msgctl) +#define __NR_shmat 305 +__SYSCALL(__NR_shmat, compat_sys_shmat) +#define __NR_shmdt 306 +__SYSCALL(__NR_shmdt, sys_shmdt) +#define __NR_shmget 307 +__SYSCALL(__NR_shmget, sys_shmget) +#define __NR_shmctl 308 +__SYSCALL(__NR_shmctl, compat_sys_shmctl) +#define __NR_add_key 309 +__SYSCALL(__NR_add_key, sys_add_key) +#define __NR_request_key 310 +__SYSCALL(__NR_request_key, sys_request_key) +#define __NR_keyctl 311 +__SYSCALL(__NR_keyctl, compat_sys_keyctl) +#define __NR_semtimedop 312 +__SYSCALL(__NR_semtimedop, compat_sys_semtimedop) +#define __NR_vserver 313 +__SYSCALL(__NR_vserver, sys_ni_syscall) +#define __NR_ioprio_set 314 +__SYSCALL(__NR_ioprio_set, sys_ioprio_set) +#define __NR_ioprio_get 315 +__SYSCALL(__NR_ioprio_get, sys_ioprio_get) +#define __NR_inotify_init 316 +__SYSCALL(__NR_inotify_init, sys_inotify_init) +#define __NR_inotify_add_watch 317 +__SYSCALL(__NR_inotify_add_watch, sys_inotify_add_watch) +#define __NR_inotify_rm_watch 318 +__SYSCALL(__NR_inotify_rm_watch, sys_inotify_rm_watch) +#define __NR_mbind 319 +__SYSCALL(__NR_mbind, compat_sys_mbind) +#define __NR_get_mempolicy 320 +__SYSCALL(__NR_get_mempolicy, compat_sys_get_mempolicy) +#define __NR_set_mempolicy 321 +__SYSCALL(__NR_set_mempolicy, compat_sys_set_mempolicy) +#define __NR_openat 322 +__SYSCALL(__NR_openat, compat_sys_openat) +#define __NR_mkdirat 323 +__SYSCALL(__NR_mkdirat, sys_mkdirat) +#define __NR_mknodat 324 +__SYSCALL(__NR_mknodat, sys_mknodat) +#define __NR_fchownat 325 +__SYSCALL(__NR_fchownat, sys_fchownat) +#define __NR_futimesat 326 +__SYSCALL(__NR_futimesat, compat_sys_futimesat) +#define __NR_fstatat64 327 +__SYSCALL(__NR_fstatat64, sys_fstatat64) +#define __NR_unlinkat 328 +__SYSCALL(__NR_unlinkat, sys_unlinkat) +#define __NR_renameat 329 +__SYSCALL(__NR_renameat, sys_renameat) +#define __NR_linkat 330 +__SYSCALL(__NR_linkat, sys_linkat) +#define __NR_symlinkat 331 +__SYSCALL(__NR_symlinkat, sys_symlinkat) +#define __NR_readlinkat 332 +__SYSCALL(__NR_readlinkat, sys_readlinkat) +#define __NR_fchmodat 333 +__SYSCALL(__NR_fchmodat, sys_fchmodat) +#define __NR_faccessat 334 +__SYSCALL(__NR_faccessat, sys_faccessat) +#define __NR_pselect6 335 +__SYSCALL(__NR_pselect6, compat_sys_pselect6) +#define __NR_ppoll 336 +__SYSCALL(__NR_ppoll, compat_sys_ppoll) +#define __NR_unshare 337 +__SYSCALL(__NR_unshare, sys_unshare) +#define __NR_set_robust_list 338 +__SYSCALL(__NR_set_robust_list, compat_sys_set_robust_list) +#define __NR_get_robust_list 339 +__SYSCALL(__NR_get_robust_list, compat_sys_get_robust_list) +#define __NR_splice 340 +__SYSCALL(__NR_splice, sys_splice) +#define __NR_sync_file_range2 341 +__SYSCALL(__NR_sync_file_range2, compat_sys_sync_file_range2_wrapper) +#define __NR_tee 342 +__SYSCALL(__NR_tee, sys_tee) +#define __NR_vmsplice 343 +__SYSCALL(__NR_vmsplice, compat_sys_vmsplice) +#define __NR_move_pages 344 +__SYSCALL(__NR_move_pages, compat_sys_move_pages) +#define __NR_getcpu 345 +__SYSCALL(__NR_getcpu, sys_getcpu) +#define __NR_epoll_pwait 346 +__SYSCALL(__NR_epoll_pwait, compat_sys_epoll_pwait) +#define __NR_kexec_load 347 +__SYSCALL(__NR_kexec_load, compat_sys_kexec_load) +#define __NR_utimensat 348 +__SYSCALL(__NR_utimensat, compat_sys_utimensat) +#define __NR_signalfd 349 +__SYSCALL(__NR_signalfd, compat_sys_signalfd) +#define __NR_timerfd_create 350 +__SYSCALL(__NR_timerfd_create, sys_timerfd_create) +#define __NR_eventfd 351 +__SYSCALL(__NR_eventfd, sys_eventfd) +#define __NR_fallocate 352 +__SYSCALL(__NR_fallocate, compat_sys_fallocate_wrapper) +#define __NR_timerfd_settime 353 +__SYSCALL(__NR_timerfd_settime, compat_sys_timerfd_settime) +#define __NR_timerfd_gettime 354 +__SYSCALL(__NR_timerfd_gettime, compat_sys_timerfd_gettime) +#define __NR_signalfd4 355 +__SYSCALL(__NR_signalfd4, compat_sys_signalfd4) +#define __NR_eventfd2 356 +__SYSCALL(__NR_eventfd2, sys_eventfd2) +#define __NR_epoll_create1 357 +__SYSCALL(__NR_epoll_create1, sys_epoll_create1) +#define __NR_dup3 358 +__SYSCALL(__NR_dup3, sys_dup3) +#define __NR_pipe2 359 +__SYSCALL(__NR_pipe2, sys_pipe2) +#define __NR_inotify_init1 360 +__SYSCALL(__NR_inotify_init1, sys_inotify_init1) +#define __NR_preadv 361 +__SYSCALL(__NR_preadv, compat_sys_preadv) +#define __NR_pwritev 362 +__SYSCALL(__NR_pwritev, compat_sys_pwritev) +#define __NR_rt_tgsigqueueinfo 363 +__SYSCALL(__NR_rt_tgsigqueueinfo, compat_sys_rt_tgsigqueueinfo) +#define __NR_perf_event_open 364 +__SYSCALL(__NR_perf_event_open, sys_perf_event_open) +#define __NR_recvmmsg 365 +__SYSCALL(__NR_recvmmsg, compat_sys_recvmmsg) +#define __NR_accept4 366 +__SYSCALL(__NR_accept4, sys_accept4) +#define __NR_fanotify_init 367 +__SYSCALL(__NR_fanotify_init, sys_fanotify_init) +#define __NR_fanotify_mark 368 +__SYSCALL(__NR_fanotify_mark, compat_sys_fanotify_mark) +#define __NR_prlimit64 369 +__SYSCALL(__NR_prlimit64, sys_prlimit64) +#define __NR_name_to_handle_at 370 +__SYSCALL(__NR_name_to_handle_at, sys_name_to_handle_at) +#define __NR_open_by_handle_at 371 +__SYSCALL(__NR_open_by_handle_at, compat_sys_open_by_handle_at) +#define __NR_clock_adjtime 372 +__SYSCALL(__NR_clock_adjtime, compat_sys_clock_adjtime) +#define __NR_syncfs 373 +__SYSCALL(__NR_syncfs, sys_syncfs) +#define __NR_sendmmsg 374 +__SYSCALL(__NR_sendmmsg, compat_sys_sendmmsg) +#define __NR_setns 375 +__SYSCALL(__NR_setns, sys_setns) +#define __NR_process_vm_readv 376 +__SYSCALL(__NR_process_vm_readv, compat_sys_process_vm_readv) +#define __NR_process_vm_writev 377 +__SYSCALL(__NR_process_vm_writev, compat_sys_process_vm_writev) +#define __NR_kcmp 378 +__SYSCALL(__NR_kcmp, sys_kcmp) +#define __NR_finit_module 379 +__SYSCALL(__NR_finit_module, sys_finit_module) +#define __NR_sched_setattr 380 +__SYSCALL(__NR_sched_setattr, sys_sched_setattr) +#define __NR_sched_getattr 381 +__SYSCALL(__NR_sched_getattr, sys_sched_getattr) +#define __NR_renameat2 382 +__SYSCALL(__NR_renameat2, sys_renameat2) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 93ac58dbf07c..f0b5e5120a87 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -27,7 +27,6 @@ #include #include #include -#include /* * Context tracking subsystem. Used to instrument transitions diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 7787208e8cc6..997e6b27ff6a 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -28,7 +28,7 @@ * See Documentation/arm/kernel_user_helpers.txt for formal definitions. */ -#include +#include .align 5 .globl __kuser_helper_start diff --git a/arch/arm64/kernel/signal32.c b/arch/arm64/kernel/signal32.c index 3491c638f172..c5ee208321c3 100644 --- a/arch/arm64/kernel/signal32.c +++ b/arch/arm64/kernel/signal32.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include struct compat_sigcontext { /* We always set these two fields to 0 */ diff --git a/arch/arm64/kernel/sys_compat.c b/arch/arm64/kernel/sys_compat.c index 26e9c4eeaba8..de2b0226e06d 100644 --- a/arch/arm64/kernel/sys_compat.c +++ b/arch/arm64/kernel/sys_compat.c @@ -26,7 +26,7 @@ #include #include -#include +#include static inline void do_compat_cache_op(unsigned long start, unsigned long end, int flags) From 5701ede884c2221e6ebbb54aec83dc433287bc50 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 4 Jul 2014 08:28:31 +0100 Subject: [PATCH 08/55] arm64: audit: Add audit hook in syscall_trace_enter/exit() This patch adds auditing functions on entry to or exit from every system call invocation. Acked-by: Richard Guy Briggs Acked-by Will Deacon Signed-off-by: AKASHI Takahiro Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 9fde010c945f..70526cfda056 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -19,6 +19,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -39,6 +40,7 @@ #include #include #include +#include #include #include @@ -1113,11 +1115,16 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, regs->syscallno); + audit_syscall_entry(syscall_get_arch(), regs->syscallno, + regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]); + return regs->syscallno; } asmlinkage void syscall_trace_exit(struct pt_regs *regs) { + audit_syscall_exit(regs); + if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_exit(regs, regs_return_value(regs)); From 875cbf3e4614cfdcc7f65033e25292aec80f09c0 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 4 Jul 2014 08:28:30 +0100 Subject: [PATCH 09/55] arm64: Add audit support On AArch64, audit is supported through generic lib/audit.c and compat_audit.c, and so this patch adds arch specific definitions required. Acked-by Will Deacon Acked-by: Richard Guy Briggs Signed-off-by: AKASHI Takahiro Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 2 ++ arch/arm64/include/asm/syscall.h | 14 ++++++++++++++ include/uapi/linux/audit.h | 1 + 3 files changed, 17 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1cb806529393..ce6e733e0c05 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -10,6 +10,7 @@ config ARM64 select ARM_AMBA select ARM_ARCH_TIMER select ARM_GIC + select AUDIT_ARCH_COMPAT_GENERIC select BUILDTIME_EXTABLE_SORT select CLONE_BACKWARDS select COMMON_CLK @@ -28,6 +29,7 @@ config ARM64 select GENERIC_STRNLEN_USER select GENERIC_TIME_VSYSCALL select HARDIRQS_SW_RESEND + select HAVE_ARCH_AUDITSYSCALL select HAVE_ARCH_JUMP_LABEL select HAVE_ARCH_KGDB select HAVE_ARCH_TRACEHOOK diff --git a/arch/arm64/include/asm/syscall.h b/arch/arm64/include/asm/syscall.h index 383771eb0b87..709a574468f0 100644 --- a/arch/arm64/include/asm/syscall.h +++ b/arch/arm64/include/asm/syscall.h @@ -16,6 +16,8 @@ #ifndef __ASM_SYSCALL_H #define __ASM_SYSCALL_H +#include +#include #include extern const void *sys_call_table[]; @@ -105,4 +107,16 @@ static inline void syscall_set_arguments(struct task_struct *task, memcpy(®s->regs[i], args, n * sizeof(args[0])); } +/* + * We don't care about endianness (__AUDIT_ARCH_LE bit) here because + * AArch64 has the same system calls both on little- and big- endian. + */ +static inline int syscall_get_arch(void) +{ + if (is_compat_task()) + return AUDIT_ARCH_ARM; + + return AUDIT_ARCH_AARCH64; +} + #endif /* __ASM_SYSCALL_H */ diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index cf6714752b69..3b9ff33e1768 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -342,6 +342,7 @@ enum { #define __AUDIT_ARCH_64BIT 0x80000000 #define __AUDIT_ARCH_LE 0x40000000 +#define AUDIT_ARCH_AARCH64 (EM_AARCH64|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ALPHA (EM_ALPHA|__AUDIT_ARCH_64BIT|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARM (EM_ARM|__AUDIT_ARCH_LE) #define AUDIT_ARCH_ARMEB (EM_ARM) From ebe6152e722b9df6b487a0d9464aeff216b6d364 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 10 Jul 2014 11:37:40 +0100 Subject: [PATCH 10/55] arm64: Cast KSTK_(EIP|ESP) to unsigned long This is for similarity with thread_saved_(pc|sp) and to avoid some compiler warnings in the audit code. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/processor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 34de2a8f7d93..77712454486b 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -137,8 +137,8 @@ extern struct task_struct *cpu_switch_to(struct task_struct *prev, #define task_pt_regs(p) \ ((struct pt_regs *)(THREAD_START_SP + task_stack_page(p)) - 1) -#define KSTK_EIP(tsk) task_pt_regs(tsk)->pc -#define KSTK_ESP(tsk) task_pt_regs(tsk)->sp +#define KSTK_EIP(tsk) ((unsigned long)task_pt_regs(tsk)->pc) +#define KSTK_ESP(tsk) ((unsigned long)task_pt_regs(tsk)->sp) /* * Prefetching support From 909a4069da65a5cfca8c968edf9f0d99f694d2f3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 24 Jun 2014 16:51:34 +0100 Subject: [PATCH 11/55] arm64: head.S: remove unnecessary function alignment Currently __turn_mmu_on is aligned to 64 bytes to ensure that it doesn't span any page boundary, which simplifies the idmap and spares us requiring an additional page table to map half of the function. In keeping with other important requirements in architecture code, this fact is undocumented. Additionally, as the function consists of three instructions totalling 12 bytes with no literal pool data, a smaller alignment of 16 bytes would be sufficient. This patch reduces the alignment to 16 bytes and documents the underlying reason for the alignment. This reduces the required alignment of the entire .head.text section from 64 bytes to 16 bytes, though it may still be aligned to a larger value depending on TEXT_OFFSET. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/head.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a2c1195abb7f..6dfb21e6e105 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -455,8 +455,13 @@ ENDPROC(__enable_mmu) * x27 = *virtual* address to jump to upon completion * * other registers depend on the function called upon completion + * + * We align the entire function to the smallest power of two larger than it to + * ensure it fits within a single block map entry. Otherwise were PHYS_OFFSET + * close to the end of a 512MB or 1GB block we might require an additional + * table to map the entire function. */ - .align 6 + .align 4 __turn_mmu_on: msr sctlr_el1, x0 isb From bd00cd5f8c8c3c282bb1e1eac6a6679a4f808091 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 24 Jun 2014 16:51:35 +0100 Subject: [PATCH 12/55] arm64: place initial page tables above the kernel Currently we place swapper_pg_dir and idmap_pg_dir below the kernel image, between PHYS_OFFSET and (PHYS_OFFSET + TEXT_OFFSET). However, bootloaders may use portions of this memory below the kernel and we do not parse the memory reservation list until after the MMU has been enabled. As such we may clobber some memory a bootloader wishes to have preserved. To enable the use of all of this memory by bootloaders (when the required memory reservations are communicated to the kernel) it is necessary to move our initial page tables elsewhere. As we currently have an effectively unbound requirement for memory at the end of the kernel image for .bss, we can place the page tables here. This patch moves the initial page table to the end of the kernel image, after the BSS. As they do not consist of any initialised data they will be stripped from the kernel Image as with the BSS. The BSS clearing routine is updated to stop at __bss_stop rather than _end so as to not clobber the page tables, and memory reservations made redundant by the new organisation are removed. Signed-off-by: Mark Rutland Tested-by: Laura Abbott Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/page.h | 9 +++++++++ arch/arm64/kernel/head.S | 28 ++++++++-------------------- arch/arm64/kernel/vmlinux.lds.S | 7 +++++++ arch/arm64/mm/init.c | 12 ++++-------- 4 files changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 46bf66628b6a..a6331e6a92b5 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -31,6 +31,15 @@ /* We do define AT_SYSINFO_EHDR but don't use the gate mechanism */ #define __HAVE_ARCH_GATE_AREA 1 +/* + * The idmap and swapper page tables need some space reserved in the kernel + * image. The idmap only requires a pgd and a next level table to (section) map + * the kernel, while the swapper also maps the FDT and requires an additional + * table to map an early UART. See __create_page_tables for more information. + */ +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) + #ifndef __ASSEMBLY__ #ifdef CONFIG_ARM64_64K_PAGES diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 6dfb21e6e105..15d3c02a38d0 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -35,29 +35,17 @@ #include #include -/* - * swapper_pg_dir is the virtual address of the initial page table. We place - * the page tables 3 * PAGE_SIZE below KERNEL_RAM_VADDR. The idmap_pg_dir has - * 2 pages and is placed below swapper_pg_dir. - */ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) #if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 #error KERNEL_RAM_VADDR must start at 0xXXX80000 #endif -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - - .globl swapper_pg_dir - .equ swapper_pg_dir, KERNEL_RAM_VADDR - SWAPPER_DIR_SIZE - - .globl idmap_pg_dir - .equ idmap_pg_dir, swapper_pg_dir - IDMAP_DIR_SIZE - - .macro pgtbl, ttb0, ttb1, phys - add \ttb1, \phys, #TEXT_OFFSET - SWAPPER_DIR_SIZE - sub \ttb0, \ttb1, #IDMAP_DIR_SIZE + .macro pgtbl, ttb0, ttb1, virt_to_phys + ldr \ttb1, =swapper_pg_dir + ldr \ttb0, =idmap_pg_dir + add \ttb1, \ttb1, \virt_to_phys + add \ttb0, \ttb0, \virt_to_phys .endm #ifdef CONFIG_ARM64_64K_PAGES @@ -413,7 +401,7 @@ ENTRY(secondary_startup) mov x23, x0 // x23=current cpu_table cbz x23, __error_p // invalid processor (x23=0)? - pgtbl x25, x26, x24 // x25=TTBR0, x26=TTBR1 + pgtbl x25, x26, x28 // x25=TTBR0, x26=TTBR1 ldr x12, [x23, #CPU_INFO_SETUP] add x12, x12, x28 // __virt_to_phys blr x12 // initialise processor @@ -527,7 +515,7 @@ ENDPROC(__calc_phys_offset) * - pgd entry for fixed mappings (TTBR1) */ __create_page_tables: - pgtbl x25, x26, x24 // idmap_pg_dir and swapper_pg_dir addresses + pgtbl x25, x26, x28 // idmap_pg_dir and swapper_pg_dir addresses mov x27, lr /* @@ -616,7 +604,7 @@ ENDPROC(__create_page_tables) __switch_data: .quad __mmap_switched .quad __bss_start // x6 - .quad _end // x7 + .quad __bss_stop // x7 .quad processor_id // x4 .quad __fdt_pointer // x5 .quad memstart_addr // x6 diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index f1e6d5c032e1..c6648d301adf 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -104,6 +104,13 @@ SECTIONS _edata = .; BSS_SECTION(0, 0, 0) + + . = ALIGN(PAGE_SIZE); + idmap_pg_dir = .; + . += IDMAP_DIR_SIZE; + swapper_pg_dir = .; + . += SWAPPER_DIR_SIZE; + _end = .; STABS_DEBUG diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index f43db8a69262..7f68804814a1 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -128,20 +128,16 @@ void __init arm64_memblock_init(void) { phys_addr_t dma_phys_limit = 0; - /* Register the kernel text, kernel data and initrd with memblock */ + /* + * Register the kernel text, kernel data, initrd, and initial + * pagetables with memblock. + */ memblock_reserve(__pa(_text), _end - _text); #ifdef CONFIG_BLK_DEV_INITRD if (initrd_start) memblock_reserve(__virt_to_phys(initrd_start), initrd_end - initrd_start); #endif - /* - * Reserve the page tables. These are already in use, - * and can only be in node 0. - */ - memblock_reserve(__pa(swapper_pg_dir), SWAPPER_DIR_SIZE); - memblock_reserve(__pa(idmap_pg_dir), IDMAP_DIR_SIZE); - early_init_fdt_scan_reserved_mem(); /* 4GB maximum for 32-bit only capable devices */ From a2c1d73b94ed49f5fac12e95052d7b140783f800 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 24 Jun 2014 16:51:36 +0100 Subject: [PATCH 13/55] arm64: Update the Image header Currently the kernel Image is stripped of everything past the initial stack, and at runtime the memory is initialised and used by the kernel. This makes the effective minimum memory footprint of the kernel larger than the size of the loaded binary, though bootloaders have no mechanism to identify how large this minimum memory footprint is. This makes it difficult to choose safe locations to place both the kernel and other binaries required at boot (DTB, initrd, etc), such that the kernel won't clobber said binaries or other reserved memory during initialisation. Additionally when big endian support was added the image load offset was overlooked, and is currently of an arbitrary endianness, which makes it difficult for bootloaders to make use of it. It seems that bootloaders aren't respecting the image load offset at present anyway, and are assuming that offset 0x80000 will always be correct. This patch adds an effective image size to the kernel header which describes the amount of memory from the start of the kernel Image binary which the kernel expects to use before detecting memory and handling any memory reservations. This can be used by bootloaders to choose suitable locations to load the kernel and/or other binaries such that the kernel will not clobber any memory unexpectedly. As before, memory reservations are required to prevent the kernel from clobbering these locations later. Both the image load offset and the effective image size are forced to be little-endian regardless of the native endianness of the kernel to enable bootloaders to load a kernel of arbitrary endianness. Bootloaders which wish to make use of the load offset can inspect the effective image size field for a non-zero value to determine if the offset is of a known endianness. To enable software to determine the endinanness of the kernel as may be required for certain use-cases, a new flags field (also little-endian) is added to the kernel header to export this information. The documentation is updated to clarify these details. To discourage future assumptions regarding the value of text_offset, the value at this point in time is removed from the main flow of the documentation (though kept as a compatibility note). Some minor formatting issues in the documentation are also corrected. Signed-off-by: Mark Rutland Acked-by: Tom Rini Cc: Geoff Levand Cc: Kevin Hilman Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- Documentation/arm64/booting.txt | 43 ++++++++++++++++++----- arch/arm64/kernel/head.S | 6 ++-- arch/arm64/kernel/image.h | 62 +++++++++++++++++++++++++++++++++ arch/arm64/kernel/vmlinux.lds.S | 4 +++ 4 files changed, 104 insertions(+), 11 deletions(-) create mode 100644 arch/arm64/kernel/image.h diff --git a/Documentation/arm64/booting.txt b/Documentation/arm64/booting.txt index 37fc4f632176..85af34d55cee 100644 --- a/Documentation/arm64/booting.txt +++ b/Documentation/arm64/booting.txt @@ -72,27 +72,54 @@ The decompressed kernel image contains a 64-byte header as follows: u32 code0; /* Executable code */ u32 code1; /* Executable code */ - u64 text_offset; /* Image load offset */ - u64 res0 = 0; /* reserved */ - u64 res1 = 0; /* reserved */ + u64 text_offset; /* Image load offset, little endian */ + u64 image_size; /* Effective Image size, little endian */ + u64 flags; /* kernel flags, little endian */ u64 res2 = 0; /* reserved */ u64 res3 = 0; /* reserved */ u64 res4 = 0; /* reserved */ u32 magic = 0x644d5241; /* Magic number, little endian, "ARM\x64" */ - u32 res5 = 0; /* reserved */ + u32 res5; /* reserved (used for PE COFF offset) */ Header notes: +- As of v3.17, all fields are little endian unless stated otherwise. + - code0/code1 are responsible for branching to stext. + - when booting through EFI, code0/code1 are initially skipped. res5 is an offset to the PE header and the PE header has the EFI - entry point (efi_stub_entry). When the stub has done its work, it + entry point (efi_stub_entry). When the stub has done its work, it jumps to code0 to resume the normal boot process. -The image must be placed at the specified offset (currently 0x80000) -from the start of the system RAM and called there. The start of the -system RAM must be aligned to 2MB. +- Prior to v3.17, the endianness of text_offset was not specified. In + these cases image_size is zero and text_offset is 0x80000 in the + endianness of the kernel. Where image_size is non-zero image_size is + little-endian and must be respected. Where image_size is zero, + text_offset can be assumed to be 0x80000. + +- The flags field (introduced in v3.17) is a little-endian 64-bit field + composed as follows: + Bit 0: Kernel endianness. 1 if BE, 0 if LE. + Bits 1-63: Reserved. + +- When image_size is zero, a bootloader should attempt to keep as much + memory as possible free for use by the kernel immediately after the + end of the kernel image. The amount of space required will vary + depending on selected features, and is effectively unbound. + +The Image must be placed text_offset bytes from a 2MB aligned base +address near the start of usable system RAM and called there. Memory +below that base address is currently unusable by Linux, and therefore it +is strongly recommended that this location is the start of system RAM. +At least image_size bytes from the start of the image must be free for +use by the kernel. + +Any memory described to the kernel (even that below the 2MB aligned base +address) which is not marked as reserved from the kernel e.g. with a +memreserve region in the device tree) will be considered as available to +the kernel. Before jumping into the kernel, the following conditions must be met: diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 15d3c02a38d0..3ba0fc02c0de 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -108,9 +108,9 @@ efi_head: b stext // branch to kernel start, magic .long 0 // reserved #endif - .quad TEXT_OFFSET // Image load offset from start of RAM - .quad 0 // reserved - .quad 0 // reserved + .quad _kernel_offset_le // Image load offset from start of RAM, little-endian + .quad _kernel_size_le // Effective size of kernel image, little-endian + .quad _kernel_flags_le // Informative flags, little-endian .quad 0 // reserved .quad 0 // reserved .quad 0 // reserved diff --git a/arch/arm64/kernel/image.h b/arch/arm64/kernel/image.h new file mode 100644 index 000000000000..8fae0756e175 --- /dev/null +++ b/arch/arm64/kernel/image.h @@ -0,0 +1,62 @@ +/* + * Linker script macros to generate Image header fields. + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_IMAGE_H +#define __ASM_IMAGE_H + +#ifndef LINKER_SCRIPT +#error This file should only be included in vmlinux.lds.S +#endif + +/* + * There aren't any ELF relocations we can use to endian-swap values known only + * at link time (e.g. the subtraction of two symbol addresses), so we must get + * the linker to endian-swap certain values before emitting them. + */ +#ifdef CONFIG_CPU_BIG_ENDIAN +#define DATA_LE64(data) \ + ((((data) & 0x00000000000000ff) << 56) | \ + (((data) & 0x000000000000ff00) << 40) | \ + (((data) & 0x0000000000ff0000) << 24) | \ + (((data) & 0x00000000ff000000) << 8) | \ + (((data) & 0x000000ff00000000) >> 8) | \ + (((data) & 0x0000ff0000000000) >> 24) | \ + (((data) & 0x00ff000000000000) >> 40) | \ + (((data) & 0xff00000000000000) >> 56)) +#else +#define DATA_LE64(data) ((data) & 0xffffffffffffffff) +#endif + +#ifdef CONFIG_CPU_BIG_ENDIAN +#define __HEAD_FLAG_BE 1 +#else +#define __HEAD_FLAG_BE 0 +#endif + +#define __HEAD_FLAGS (__HEAD_FLAG_BE << 0) + +/* + * These will output as part of the Image header, which should be little-endian + * regardless of the endianness of the kernel. While constant values could be + * endian swapped in head.S, all are done here for consistency. + */ +#define HEAD_SYMBOLS \ + _kernel_size_le = DATA_LE64(_end - _text); \ + _kernel_offset_le = DATA_LE64(TEXT_OFFSET); \ + _kernel_flags_le = DATA_LE64(__HEAD_FLAGS); + +#endif /* __ASM_IMAGE_H */ diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index c6648d301adf..a814768ae148 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -9,6 +9,8 @@ #include #include +#include "image.h" + #define ARM_EXIT_KEEP(x) #define ARM_EXIT_DISCARD(x) x @@ -114,6 +116,8 @@ SECTIONS _end = .; STABS_DEBUG + + HEAD_SYMBOLS } /* From da57a369d3bc5cd61db90f7e9555840381db9b09 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 24 Jun 2014 16:51:37 +0100 Subject: [PATCH 14/55] arm64: Enable TEXT_OFFSET fuzzing The arm64 Image header contains a text_offset field which bootloaders are supposed to read to determine the offset (from a 2MB aligned "start of memory" per booting.txt) at which to load the kernel. The offset is not well respected by bootloaders at present, and due to the lack of variation there is little incentive to support it. This is unfortunate for the sake of future kernels where we may wish to vary the text offset (even zeroing it). This patch adds options to arm64 to enable fuzz-testing of text_offset. CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET forces the text offset to a random 16-byte aligned value value in the range [0..2MB) upon a build of the kernel. It is recommended that distribution kernels enable randomization to test bootloaders such that any compliance issues can be fixed early. Signed-off-by: Mark Rutland Acked-by: Tom Rini Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig.debug | 15 +++++++++++++++ arch/arm64/Makefile | 4 ++++ arch/arm64/kernel/head.S | 8 ++++++-- arch/arm64/kernel/vmlinux.lds.S | 5 +++++ 4 files changed, 30 insertions(+), 2 deletions(-) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index 1c1b75629842..4ee8e90b7a45 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -28,4 +28,19 @@ config PID_IN_CONTEXTIDR instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. +config ARM64_RANDOMIZE_TEXT_OFFSET + bool "Randomize TEXT_OFFSET at build time" + help + Say Y here if you want the image load offset (AKA TEXT_OFFSET) + of the kernel to be randomized at build-time. When selected, + this option will cause TEXT_OFFSET to be randomized upon any + build of the kernel, and the offset will be reflected in the + text_offset field of the resulting Image. This can be used to + fuzz-test bootloaders which respect text_offset. + + This option is intended for bootloader and/or kernel testing + only. Bootloaders must make no assumptions regarding the value + of TEXT_OFFSET and platforms must not require a specific + value. + endmenu diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 8185a913c5ed..e8d025c1459e 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -38,7 +38,11 @@ CHECKFLAGS += -D__aarch64__ head-y := arch/arm64/kernel/head.o # The byte offset of the kernel image in RAM from the start of RAM. +ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y) +TEXT_OFFSET := $(shell awk 'BEGIN {srand(); printf "0x%04x0\n", int(65535 * rand())}') +else TEXT_OFFSET := 0x00080000 +endif export TEXT_OFFSET GZFLAGS diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 3ba0fc02c0de..69dafe9621fd 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -37,8 +37,12 @@ #define KERNEL_RAM_VADDR (PAGE_OFFSET + TEXT_OFFSET) -#if (KERNEL_RAM_VADDR & 0xfffff) != 0x80000 -#error KERNEL_RAM_VADDR must start at 0xXXX80000 +#if (TEXT_OFFSET & 0xf) != 0 +#error TEXT_OFFSET must be at least 16B aligned +#elif (PAGE_OFFSET & 0xfffff) != 0 +#error PAGE_OFFSET must be at least 2MB aligned +#elif TEXT_OFFSET > 0xfffff +#error TEXT_OFFSET must be less than 2MB #endif .macro pgtbl, ttb0, ttb1, virt_to_phys diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index a814768ae148..97f0c0429dfa 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -125,3 +125,8 @@ SECTIONS */ ASSERT(((__hyp_idmap_text_start + PAGE_SIZE) > __hyp_idmap_text_end), "HYP init code too big") + +/* + * If padding is applied before .head.text, virt<->phys conversions will fail. + */ +ASSERT(_text == (PAGE_OFFSET + TEXT_OFFSET), "HEAD is misaligned") From ac7b406c1a9d50ddbf5e5cbce8ca4d68d36ac2db Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Mon, 12 May 2014 10:40:30 +0100 Subject: [PATCH 15/55] arm64: Use pr_* instead of printk This patch fixed the following checkpatch complaint as using pr_* instead of printk. WARNING: printk() should include KERN_ facility level Signed-off-by: Jungseok Lee Reviewed-by: Sungjinn Chung Acked-by: Kukjin Kim Signed-off-by: Catalin Marinas --- arch/arm64/kernel/traps.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index c43cfa9b8304..506f7814e305 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -156,7 +156,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk) frame.pc = thread_saved_pc(tsk); } - printk("Call trace:\n"); + pr_emerg("Call trace:\n"); while (1) { unsigned long where = frame.pc; int ret; @@ -331,17 +331,17 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) void __pte_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pte %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pte %016lx.\n", file, line, val); } void __pmd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pmd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); } void __pgd_error(const char *file, int line, unsigned long val) { - printk("%s:%d: bad pgd %016lx.\n", file, line, val); + pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); } void __init trap_init(void) From b2f8c07bcb7d1a3575f41444d2d8048d0c922762 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 15 Jul 2014 15:46:02 +0100 Subject: [PATCH 16/55] arm64: Remove duplicate (SWAPPER|IDMAP)_DIR_SIZE definitions Just keep the asm/page.h definition as this is included in vmlinux.lds.S as well. Signed-off-by: Catalin Marinas Acked-by: Mark Rutland --- arch/arm64/include/asm/pgtable.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index e0ccceb317d9..d7455fa83bc7 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -383,9 +383,6 @@ static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot) extern pgd_t swapper_pg_dir[PTRS_PER_PGD]; extern pgd_t idmap_pg_dir[PTRS_PER_PGD]; -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) - /* * Encode and decode a swap entry: * bits 0-1: present (must be zero) From 8715493852783358ef8656a0054a14bf822509cf Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Jul 2014 19:22:11 +0100 Subject: [PATCH 17/55] arm64: vdso: put vdso datapage in a separate vma The VDSO datapage doesn't need to be executable (no code there) or CoW-able (the kernel writes the page, so a private copy is totally useless). This patch moves the datapage into its own VMA, identified as "[vvar]" in /proc//maps. Cc: Andy Lutomirski Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 50384fec56c4..84cafbc3eb54 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -138,11 +138,12 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; - unsigned long vdso_base, vdso_mapping_len; + unsigned long vdso_base, vdso_text_len, vdso_mapping_len; int ret; + vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ - vdso_mapping_len = (vdso_pages + 1) << PAGE_SHIFT; + vdso_mapping_len = vdso_text_len + PAGE_SIZE; down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); @@ -152,35 +153,52 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, } mm->context.vdso = (void *)vdso_base; - ret = install_special_mapping(mm, vdso_base, vdso_mapping_len, + ret = install_special_mapping(mm, vdso_base, vdso_text_len, VM_READ|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, vdso_pagelist); - if (ret) { - mm->context.vdso = NULL; + if (ret) goto up_fail; - } + + vdso_base += vdso_text_len; + ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + vdso_pagelist + vdso_pages); + if (ret) + goto up_fail; + + up_write(&mm->mmap_sem); + return 0; up_fail: + mm->context.vdso = NULL; up_write(&mm->mmap_sem); - return ret; } const char *arch_vma_name(struct vm_area_struct *vma) { + unsigned long vdso_text; + + if (!vma->vm_mm) + return NULL; + + vdso_text = (unsigned long)vma->vm_mm->context.vdso; + /* * We can re-use the vdso pointer in mm_context_t for identifying * the vectors page for compat applications. The vDSO will always * sit above TASK_UNMAPPED_BASE and so we don't need to worry about * it conflicting with the vectors base. */ - if (vma->vm_mm && vma->vm_start == (long)vma->vm_mm->context.vdso) { + if (vma->vm_start == vdso_text) { #ifdef CONFIG_COMPAT if (vma->vm_start == AARCH32_VECTORS_BASE) return "[vectors]"; #endif return "[vdso]"; + } else if (vma->vm_start == (vdso_text + (vdso_pages << PAGE_SHIFT))) { + return "[vvar]"; } return NULL; From 2fea7f6c98f5957e539eb8aa0ce849729b900342 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Jul 2014 19:22:12 +0100 Subject: [PATCH 18/55] arm64: vdso: move to _install_special_mapping and remove arch_vma_name _install_special_mapping replaces install_special_mapping and removes the need to detect special VMA in arch_vma_name. This patch moves the vdso and compat vectors page code over to the new API. Cc: Andy Lutomirski Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 80 ++++++++++++++++++---------------------- 1 file changed, 35 insertions(+), 45 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 84cafbc3eb54..60ae12087d9f 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -88,22 +88,29 @@ int aarch32_setup_vectors_page(struct linux_binprm *bprm, int uses_interp) { struct mm_struct *mm = current->mm; unsigned long addr = AARCH32_VECTORS_BASE; - int ret; + static struct vm_special_mapping spec = { + .name = "[vectors]", + .pages = vectors_page, + + }; + void *ret; down_write(&mm->mmap_sem); current->mm->context.vdso = (void *)addr; /* Map vectors page at the high address. */ - ret = install_special_mapping(mm, addr, PAGE_SIZE, - VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, - vectors_page); + ret = _install_special_mapping(mm, addr, PAGE_SIZE, + VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYEXEC, + &spec); up_write(&mm->mmap_sem); - return ret; + return PTR_ERR_OR_ZERO(ret); } #endif /* CONFIG_COMPAT */ +static struct vm_special_mapping vdso_spec[2]; + static int __init vdso_init(void) { int i; @@ -130,6 +137,17 @@ static int __init vdso_init(void) /* Grab the vDSO data page. */ vdso_pagelist[i] = virt_to_page(vdso_data); + /* Populate the special mapping structures */ + vdso_spec[0] = (struct vm_special_mapping) { + .name = "[vdso]", + .pages = vdso_pagelist, + }; + + vdso_spec[1] = (struct vm_special_mapping) { + .name = "[vvar]", + .pages = vdso_pagelist + vdso_pages, + }; + return 0; } arch_initcall(vdso_init); @@ -139,7 +157,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, { struct mm_struct *mm = current->mm; unsigned long vdso_base, vdso_text_len, vdso_mapping_len; - int ret; + void *ret; vdso_text_len = vdso_pages << PAGE_SHIFT; /* Be sure to map the data page */ @@ -148,23 +166,23 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, down_write(&mm->mmap_sem); vdso_base = get_unmapped_area(NULL, 0, vdso_mapping_len, 0, 0); if (IS_ERR_VALUE(vdso_base)) { - ret = vdso_base; + ret = ERR_PTR(vdso_base); goto up_fail; } mm->context.vdso = (void *)vdso_base; - ret = install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - vdso_pagelist); - if (ret) + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + &vdso_spec[0]); + if (IS_ERR(ret)) goto up_fail; vdso_base += vdso_text_len; - ret = install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, - vdso_pagelist + vdso_pages); - if (ret) + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, + &vdso_spec[1]); + if (IS_ERR(ret)) goto up_fail; up_write(&mm->mmap_sem); @@ -173,35 +191,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, up_fail: mm->context.vdso = NULL; up_write(&mm->mmap_sem); - return ret; -} - -const char *arch_vma_name(struct vm_area_struct *vma) -{ - unsigned long vdso_text; - - if (!vma->vm_mm) - return NULL; - - vdso_text = (unsigned long)vma->vm_mm->context.vdso; - - /* - * We can re-use the vdso pointer in mm_context_t for identifying - * the vectors page for compat applications. The vDSO will always - * sit above TASK_UNMAPPED_BASE and so we don't need to worry about - * it conflicting with the vectors base. - */ - if (vma->vm_start == vdso_text) { -#ifdef CONFIG_COMPAT - if (vma->vm_start == AARCH32_VECTORS_BASE) - return "[vectors]"; -#endif - return "[vdso]"; - } else if (vma->vm_start == (vdso_text + (vdso_pages << PAGE_SHIFT))) { - return "[vvar]"; - } - - return NULL; + return PTR_ERR(ret); } /* From 601255ae3c98fdeeee3a8bb4696425e4f868b4f1 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 9 Jul 2014 19:22:13 +0100 Subject: [PATCH 19/55] arm64: vdso: move data page before code pages Andy pointed out that binutils generates additional sections in the vdso image (e.g. section string table) which, if our .text section gets big enough, could cross a page boundary and end up screwing up the location where the kernel expects to put the data page. This patch solves the issue in the same manner as x86_32, by moving the data page before the code pages. Cc: Andy Lutomirski Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 34 +++++++++++++++---------------- arch/arm64/kernel/vdso/vdso.lds.S | 4 +--- 2 files changed, 18 insertions(+), 20 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 60ae12087d9f..24f2e8c62479 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -121,8 +121,8 @@ static int __init vdso_init(void) } vdso_pages = (&vdso_end - &vdso_start) >> PAGE_SHIFT; - pr_info("vdso: %ld pages (%ld code, %ld data) at base %p\n", - vdso_pages + 1, vdso_pages, 1L, &vdso_start); + pr_info("vdso: %ld pages (%ld code @ %p, %ld data @ %p)\n", + vdso_pages + 1, vdso_pages, &vdso_start, 1L, vdso_data); /* Allocate the vDSO pagelist, plus a page for the data. */ vdso_pagelist = kcalloc(vdso_pages + 1, sizeof(struct page *), @@ -130,22 +130,22 @@ static int __init vdso_init(void) if (vdso_pagelist == NULL) return -ENOMEM; + /* Grab the vDSO data page. */ + vdso_pagelist[0] = virt_to_page(vdso_data); + /* Grab the vDSO code pages. */ for (i = 0; i < vdso_pages; i++) - vdso_pagelist[i] = virt_to_page(&vdso_start + i * PAGE_SIZE); - - /* Grab the vDSO data page. */ - vdso_pagelist[i] = virt_to_page(vdso_data); + vdso_pagelist[i + 1] = virt_to_page(&vdso_start + i * PAGE_SIZE); /* Populate the special mapping structures */ vdso_spec[0] = (struct vm_special_mapping) { - .name = "[vdso]", + .name = "[vvar]", .pages = vdso_pagelist, }; vdso_spec[1] = (struct vm_special_mapping) { - .name = "[vvar]", - .pages = vdso_pagelist + vdso_pages, + .name = "[vdso]", + .pages = &vdso_pagelist[1], }; return 0; @@ -169,22 +169,22 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, ret = ERR_PTR(vdso_base); goto up_fail; } - mm->context.vdso = (void *)vdso_base; - - ret = _install_special_mapping(mm, vdso_base, vdso_text_len, - VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, + ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, + VM_READ|VM_MAYREAD, &vdso_spec[0]); if (IS_ERR(ret)) goto up_fail; - vdso_base += vdso_text_len; - ret = _install_special_mapping(mm, vdso_base, PAGE_SIZE, - VM_READ|VM_MAYREAD, + vdso_base += PAGE_SIZE; + mm->context.vdso = (void *)vdso_base; + ret = _install_special_mapping(mm, vdso_base, vdso_text_len, + VM_READ|VM_EXEC| + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, &vdso_spec[1]); if (IS_ERR(ret)) goto up_fail; + up_write(&mm->mmap_sem); return 0; diff --git a/arch/arm64/kernel/vdso/vdso.lds.S b/arch/arm64/kernel/vdso/vdso.lds.S index 8154b8d1c826..beca249bc2f3 100644 --- a/arch/arm64/kernel/vdso/vdso.lds.S +++ b/arch/arm64/kernel/vdso/vdso.lds.S @@ -28,6 +28,7 @@ OUTPUT_ARCH(aarch64) SECTIONS { + PROVIDE(_vdso_data = . - PAGE_SIZE); . = VDSO_LBASE + SIZEOF_HEADERS; .hash : { *(.hash) } :text @@ -57,9 +58,6 @@ SECTIONS _end = .; PROVIDE(end = .); - . = ALIGN(PAGE_SIZE); - PROVIDE(_vdso_data = .); - /DISCARD/ : { *(.note.GNU-stack) *(.data .data.* .gnu.linkonce.d.* .sdata*) From ad789ba5f7086138461420d2156478d33fb61077 Mon Sep 17 00:00:00 2001 From: Ian Campbell Date: Tue, 15 Jul 2014 08:38:08 +0100 Subject: [PATCH 20/55] arm64: Align the kbuild output for VDSOL and VDSOA Signed-off-by: Ian Campbell Cc: Catalin Marinas Cc: Will Deacon Cc: Michal Marek Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kbuild@vger.kernel.org Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/Makefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 6d20b7d162d8..84b942612051 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -47,9 +47,9 @@ $(obj-vdso): %.o: %.S $(call if_changed_dep,vdsoas) # Actual build commands -quiet_cmd_vdsold = VDSOL $@ +quiet_cmd_vdsold = VDSOL $@ cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ -quiet_cmd_vdsoas = VDSOA $@ +quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< # Install commands for the unstripped file From 5959e25729a521097856d71528504deda1e11584 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 10 Jul 2014 12:40:09 +0100 Subject: [PATCH 21/55] arm64: fpsimd: avoid restoring fpcr if the contents haven't changed Writing to the FPCR is commonly implemented as a self-synchronising operation in the CPU, so avoid writing to the register when the saved value matches that in the hardware already. Cc: Ard Biesheuvel Reviewed-by: Lorenzo Pieralisi Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimdmacros.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index 768414d55e64..007618b8188c 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -40,6 +40,19 @@ str w\tmpnr, [\state, #16 * 2 + 4] .endm +.macro fpsimd_restore_fpcr state, tmp + /* + * Writes to fpcr may be self-synchronising, so avoid restoring + * the register if it hasn't changed. + */ + mrs \tmp, fpcr + cmp \tmp, \state + b.eq 9999f + msr fpcr, \state +9999: +.endm + +/* Clobbers \state */ .macro fpsimd_restore state, tmpnr ldp q0, q1, [\state, #16 * 0] ldp q2, q3, [\state, #16 * 2] @@ -60,7 +73,7 @@ ldr w\tmpnr, [\state, #16 * 2] msr fpsr, x\tmpnr ldr w\tmpnr, [\state, #16 * 2 + 4] - msr fpcr, x\tmpnr + fpsimd_restore_fpcr x\tmpnr, \state .endm .altmacro @@ -84,7 +97,7 @@ .macro fpsimd_restore_partial state, tmpnr1, tmpnr2 ldp w\tmpnr1, w\tmpnr2, [\state] msr fpsr, x\tmpnr1 - msr fpcr, x\tmpnr2 + fpsimd_restore_fpcr x\tmpnr2, x\tmpnr1 adr x\tmpnr1, 0f ldr w\tmpnr2, [\state, #8] add \state, \state, x\tmpnr2, lsl #4 From 756854d9b99a735f86bc3b86df5c19be12e8746e Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 17 Jul 2014 18:19:18 +0100 Subject: [PATCH 22/55] arm64: kernel: enable PSCI cpu operations on UP systems PSCI CPU operations have to be enabled on UP kernels so that calls like eg cpu_suspend can be made functional on UP too. This patch reworks the PSCI CPU operations so that they can be enabled on UP systems. Acked-by: Mark Rutland Signed-off-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_ops.c | 2 +- arch/arm64/kernel/psci.c | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/cpu_ops.c b/arch/arm64/kernel/cpu_ops.c index d62d12fb36c8..cce952440c64 100644 --- a/arch/arm64/kernel/cpu_ops.c +++ b/arch/arm64/kernel/cpu_ops.c @@ -30,8 +30,8 @@ const struct cpu_operations *cpu_ops[NR_CPUS]; static const struct cpu_operations *supported_cpu_ops[] __initconst = { #ifdef CONFIG_SMP &smp_spin_table_ops, - &cpu_psci_ops, #endif + &cpu_psci_ops, NULL, }; diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index 9e9798f91172..a623c44a6bc4 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -434,9 +434,11 @@ static int cpu_psci_cpu_kill(unsigned int cpu) return 0; } #endif +#endif const struct cpu_operations cpu_psci_ops = { .name = "psci", +#ifdef CONFIG_SMP .cpu_init = cpu_psci_cpu_init, .cpu_prepare = cpu_psci_cpu_prepare, .cpu_boot = cpu_psci_cpu_boot, @@ -445,6 +447,6 @@ const struct cpu_operations cpu_psci_ops = { .cpu_die = cpu_psci_cpu_die, .cpu_kill = cpu_psci_cpu_kill, #endif +#endif }; -#endif From b9e97ef93c630404f305350d88d09391d1a55648 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 17 Jul 2014 18:19:19 +0100 Subject: [PATCH 23/55] arm64: kernel: add __init marker to PSCI init functions PSCI init functions must be marked as __init so that they are freed by the kernel upon boot. This patch marks the PSCI init functions as such since they need not be persistent in the kernel address space after the kernel has booted. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas --- arch/arm64/kernel/psci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/psci.c b/arch/arm64/kernel/psci.c index a623c44a6bc4..553954771a67 100644 --- a/arch/arm64/kernel/psci.c +++ b/arch/arm64/kernel/psci.c @@ -235,7 +235,7 @@ static void psci_sys_poweroff(void) * PSCI Function IDs for v0.2+ are well defined so use * standard values. */ -static int psci_0_2_init(struct device_node *np) +static int __init psci_0_2_init(struct device_node *np) { int err, ver; @@ -296,7 +296,7 @@ out_put_node: /* * PSCI < v0.2 get PSCI Function IDs via DT. */ -static int psci_0_1_init(struct device_node *np) +static int __init psci_0_1_init(struct device_node *np) { u32 id; int err; From 18ab7db6b749ac27aac08d572afbbd2f4d937934 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 17 Jul 2014 18:19:20 +0100 Subject: [PATCH 24/55] arm64: kernel: add missing __init section marker to cpu_suspend_init Suspend init function must be marked as __init, since it is not needed after the kernel has booted. This patch moves the cpu_suspend_init() function to the __init section. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Catalin Marinas --- arch/arm64/kernel/suspend.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index 1fa9ce4afd8f..55a99b9a97e0 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -119,7 +119,7 @@ int cpu_suspend(unsigned long arg) extern struct sleep_save_sp sleep_save_sp; extern phys_addr_t sleep_idmap_phys; -static int cpu_suspend_init(void) +static int __init cpu_suspend_init(void) { void *ctx_ptr; From 89c4a306e7631bcb71cc537c8a029172af6047fe Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Jul 2014 16:32:43 +0100 Subject: [PATCH 25/55] arm64: add MIDR_EL1 field accessors The MIDR_EL1 register is composed of a number of bitfields, and uses of the fields has so far involved open-coding of the shifts and masks required. This patch adds shifts and masks for each of the MIDR_EL1 subfields, and also provides accessors built atop of these. Existing uses within cputype.h are updated to use these accessors. The read_cpuid_part_number macro is modified to return the extracted bitfield rather than returning the value in-place with all other fields (including revision) masked out, to better match the other accessors. As the value is only used in comparison with the *_CPU_PART_* macros which are similarly updated, and these values are never exposed to userspace, this change should not affect any functionality. Signed-off-by: Mark Rutland Acked-by: Will Deacon Reviewed-by: Will Deacon Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cputype.h | 33 +++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/arch/arm64/include/asm/cputype.h b/arch/arm64/include/asm/cputype.h index ed48a3a7836a..379d0b874328 100644 --- a/arch/arm64/include/asm/cputype.h +++ b/arch/arm64/include/asm/cputype.h @@ -38,15 +38,34 @@ __val; \ }) +#define MIDR_REVISION_MASK 0xf +#define MIDR_REVISION(midr) ((midr) & MIDR_REVISION_MASK) +#define MIDR_PARTNUM_SHIFT 4 +#define MIDR_PARTNUM_MASK (0xfff << MIDR_PARTNUM_SHIFT) +#define MIDR_PARTNUM(midr) \ + (((midr) & MIDR_PARTNUM_MASK) >> MIDR_PARTNUM_SHIFT) +#define MIDR_ARCHITECTURE_SHIFT 16 +#define MIDR_ARCHITECTURE_MASK (0xf << MIDR_ARCHITECTURE_SHIFT) +#define MIDR_ARCHITECTURE(midr) \ + (((midr) & MIDR_ARCHITECTURE_MASK) >> MIDR_ARCHITECTURE_SHIFT) +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) +#define MIDR_VARIANT(midr) \ + (((midr) & MIDR_VARIANT_MASK) >> MIDR_VARIANT_SHIFT) +#define MIDR_IMPLEMENTOR_SHIFT 24 +#define MIDR_IMPLEMENTOR_MASK (0xff << MIDR_IMPLEMENTOR_SHIFT) +#define MIDR_IMPLEMENTOR(midr) \ + (((midr) & MIDR_IMPLEMENTOR_MASK) >> MIDR_IMPLEMENTOR_SHIFT) + #define ARM_CPU_IMP_ARM 0x41 #define ARM_CPU_IMP_APM 0x50 -#define ARM_CPU_PART_AEM_V8 0xD0F0 -#define ARM_CPU_PART_FOUNDATION 0xD000 -#define ARM_CPU_PART_CORTEX_A53 0xD030 -#define ARM_CPU_PART_CORTEX_A57 0xD070 +#define ARM_CPU_PART_AEM_V8 0xD0F +#define ARM_CPU_PART_FOUNDATION 0xD00 +#define ARM_CPU_PART_CORTEX_A57 0xD07 +#define ARM_CPU_PART_CORTEX_A53 0xD03 -#define APM_CPU_PART_POTENZA 0x0000 +#define APM_CPU_PART_POTENZA 0x000 #ifndef __ASSEMBLY__ @@ -67,12 +86,12 @@ static inline u64 __attribute_const__ read_cpuid_mpidr(void) static inline unsigned int __attribute_const__ read_cpuid_implementor(void) { - return (read_cpuid_id() & 0xFF000000) >> 24; + return MIDR_IMPLEMENTOR(read_cpuid_id()); } static inline unsigned int __attribute_const__ read_cpuid_part_number(void) { - return (read_cpuid_id() & 0xFFF0); + return MIDR_PARTNUM(read_cpuid_id()); } static inline u32 __attribute_const__ read_cpuid_cachetype(void) From df857416a13734ed9356f6e4f0152d55e4fb748a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Jul 2014 16:32:44 +0100 Subject: [PATCH 26/55] arm64: cpuinfo: record cpu system register values Several kernel subsystems need to know details about CPU system register values, sometimes for CPUs other than that they are executing on. Rather than hard-coding system register accesses and cross-calls for these cases, this patch adds logic to record various system register values at boot-time. This may be used for feature reporting, firmware bug detection, etc. Separate hooks are added for the boot and hotplug paths to enable one-time intialisation and cold/warm boot value mismatch detection in later patches. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpu.h | 59 +++++++++++++++++++++++++++++ arch/arm64/kernel/Makefile | 3 +- arch/arm64/kernel/cpuinfo.c | 73 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 7 ++-- arch/arm64/kernel/smp.c | 6 +++ 5 files changed, 144 insertions(+), 4 deletions(-) create mode 100644 arch/arm64/include/asm/cpu.h create mode 100644 arch/arm64/kernel/cpuinfo.c diff --git a/arch/arm64/include/asm/cpu.h b/arch/arm64/include/asm/cpu.h new file mode 100644 index 000000000000..056443086019 --- /dev/null +++ b/arch/arm64/include/asm/cpu.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_CPU_H +#define __ASM_CPU_H + +#include +#include +#include + +/* + * Records attributes of an individual CPU. + */ +struct cpuinfo_arm64 { + struct cpu cpu; + u32 reg_ctr; + u32 reg_cntfrq; + u32 reg_dczid; + u32 reg_midr; + + u64 reg_id_aa64isar0; + u64 reg_id_aa64isar1; + u64 reg_id_aa64mmfr0; + u64 reg_id_aa64mmfr1; + u64 reg_id_aa64pfr0; + u64 reg_id_aa64pfr1; + + u32 reg_id_isar0; + u32 reg_id_isar1; + u32 reg_id_isar2; + u32 reg_id_isar3; + u32 reg_id_isar4; + u32 reg_id_isar5; + u32 reg_id_mmfr0; + u32 reg_id_mmfr1; + u32 reg_id_mmfr2; + u32 reg_id_mmfr3; + u32 reg_id_pfr0; + u32 reg_id_pfr1; +}; + +DECLARE_PER_CPU(struct cpuinfo_arm64, cpu_data); + +void cpuinfo_store_cpu(void); +void __init cpuinfo_store_boot_cpu(void); + +#endif /* __ASM_CPU_H */ diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index cdaedad3afe5..27c72ef4fd7a 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -15,7 +15,8 @@ CFLAGS_REMOVE_return_address.o = -pg arm64-obj-y := cputable.o debug-monitors.o entry.o irq.o fpsimd.o \ entry-fpsimd.o process.o ptrace.o setup.o signal.o \ sys.o stacktrace.o time.o traps.o io.o vdso.o \ - hyp-stub.o psci.o cpu_ops.o insn.o return_address.o + hyp-stub.o psci.o cpu_ops.o insn.o return_address.o \ + cpuinfo.o arm64-obj-$(CONFIG_COMPAT) += sys32.o kuser32.o signal32.o \ sys_compat.o diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c new file mode 100644 index 000000000000..1f350fe5b318 --- /dev/null +++ b/arch/arm64/kernel/cpuinfo.c @@ -0,0 +1,73 @@ +/* + * Record and handle CPU attributes. + * + * Copyright (C) 2014 ARM Ltd. + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#include +#include +#include +#include + +#include +#include + +/* + * In case the boot CPU is hotpluggable, we record its initial state and + * current state separately. Certain system registers may contain different + * values depending on configuration at or after reset. + */ +DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); +static struct cpuinfo_arm64 boot_cpu_data; + +static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) +{ + info->reg_cntfrq = arch_timer_get_cntfrq(); + info->reg_ctr = read_cpuid_cachetype(); + info->reg_dczid = read_cpuid(DCZID_EL0); + info->reg_midr = read_cpuid_id(); + + info->reg_id_aa64isar0 = read_cpuid(ID_AA64ISAR0_EL1); + info->reg_id_aa64isar1 = read_cpuid(ID_AA64ISAR1_EL1); + info->reg_id_aa64mmfr0 = read_cpuid(ID_AA64MMFR0_EL1); + info->reg_id_aa64mmfr1 = read_cpuid(ID_AA64MMFR1_EL1); + info->reg_id_aa64pfr0 = read_cpuid(ID_AA64PFR0_EL1); + info->reg_id_aa64pfr1 = read_cpuid(ID_AA64PFR1_EL1); + + info->reg_id_isar0 = read_cpuid(ID_ISAR0_EL1); + info->reg_id_isar1 = read_cpuid(ID_ISAR1_EL1); + info->reg_id_isar2 = read_cpuid(ID_ISAR2_EL1); + info->reg_id_isar3 = read_cpuid(ID_ISAR3_EL1); + info->reg_id_isar4 = read_cpuid(ID_ISAR4_EL1); + info->reg_id_isar5 = read_cpuid(ID_ISAR5_EL1); + info->reg_id_mmfr0 = read_cpuid(ID_MMFR0_EL1); + info->reg_id_mmfr1 = read_cpuid(ID_MMFR1_EL1); + info->reg_id_mmfr2 = read_cpuid(ID_MMFR2_EL1); + info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); + info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); + info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); +} + +void cpuinfo_store_cpu(void) +{ + struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); + __cpuinfo_store_cpu(info); +} + +void __init cpuinfo_store_boot_cpu(void) +{ + struct cpuinfo_arm64 *info = &per_cpu(cpu_data, 0); + __cpuinfo_store_cpu(info); + + boot_cpu_data = *info; +} diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 46d1125571f6..edb146d01857 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -45,6 +45,7 @@ #include #include +#include #include #include #include @@ -219,6 +220,8 @@ static void __init setup_processor(void) sprintf(init_utsname()->machine, ELF_PLATFORM); elf_hwcap = 0; + cpuinfo_store_boot_cpu(); + /* * Check for sane CTR_EL0.CWG value. */ @@ -417,14 +420,12 @@ static int __init arm64_device_init(void) } arch_initcall_sync(arm64_device_init); -static DEFINE_PER_CPU(struct cpu, cpu_data); - static int __init topology_init(void) { int i; for_each_possible_cpu(i) { - struct cpu *cpu = &per_cpu(cpu_data, i); + struct cpu *cpu = &per_cpu(cpu_data.cpu, i); cpu->hotpluggable = 1; register_cpu(cpu, i); } diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 40f38f46c8e0..3e2f5ebbf63e 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -39,6 +39,7 @@ #include #include +#include #include #include #include @@ -154,6 +155,11 @@ asmlinkage void secondary_start_kernel(void) if (cpu_ops[cpu]->cpu_postboot) cpu_ops[cpu]->cpu_postboot(); + /* + * Log the CPU info before it is marked online and might get read. + */ + cpuinfo_store_cpu(); + /* * Enable GIC and timers. */ From 59ccc0d41b7a60c82c636cd056021f522c30557e Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Jul 2014 16:32:45 +0100 Subject: [PATCH 27/55] arm64: cachetype: report weakest cache policy In big.LITTLE systems, the I-cache policy may differ across CPUs, and thus we must always meet the most stringent maintenance requirements of any I-cache in the system when performing maintenance to ensure correctness. Unfortunately this requirement is not met as we always look at the current CPU's cache type register to determine the maintenance requirements. This patch causes the I-cache policy of all CPUs to be taken into account for icache_is_aliasing and icache_is_aivivt. If any I-cache in the system is aliasing or AIVIVT, the respective function will return true. At boot each CPU may set flags to identify that at least one I-cache in the system is aliasing and/or AIVIVT. The now unused and potentially misleading icache_policy function is removed. Signed-off-by: Mark Rutland Acked-by: Will Deacon Reviewed-by: Will Deacon Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cachetype.h | 16 ++++++++++------ arch/arm64/kernel/cpuinfo.c | 26 ++++++++++++++++++++++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/cachetype.h b/arch/arm64/include/asm/cachetype.h index 4b23e758d5e0..7a2e0762cb40 100644 --- a/arch/arm64/include/asm/cachetype.h +++ b/arch/arm64/include/asm/cachetype.h @@ -30,10 +30,14 @@ #ifndef __ASSEMBLY__ -static inline u32 icache_policy(void) -{ - return (read_cpuid_cachetype() >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK; -} +#include + +#define CTR_L1IP(ctr) (((ctr) >> CTR_L1IP_SHIFT) & CTR_L1IP_MASK) + +#define ICACHEF_ALIASING BIT(0) +#define ICACHEF_AIVIVT BIT(1) + +extern unsigned long __icache_flags; /* * Whilst the D-side always behaves as PIPT on AArch64, aliasing is @@ -41,12 +45,12 @@ static inline u32 icache_policy(void) */ static inline int icache_is_aliasing(void) { - return icache_policy() != ICACHE_POLICY_PIPT; + return test_bit(ICACHEF_ALIASING, &__icache_flags); } static inline int icache_is_aivivt(void) { - return icache_policy() == ICACHE_POLICY_AIVIVT; + return test_bit(ICACHEF_AIVIVT, &__icache_flags); } static inline u32 cache_type_cwg(void) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 1f350fe5b318..3ce99fc1fde1 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -19,7 +19,9 @@ #include #include +#include #include +#include #include /* @@ -30,6 +32,28 @@ DEFINE_PER_CPU(struct cpuinfo_arm64, cpu_data); static struct cpuinfo_arm64 boot_cpu_data; +static char *icache_policy_str[] = { + [ICACHE_POLICY_RESERVED] = "RESERVED/UNKNOWN", + [ICACHE_POLICY_AIVIVT] = "AIVIVT", + [ICACHE_POLICY_VIPT] = "VIPT", + [ICACHE_POLICY_PIPT] = "PIPT", +}; + +unsigned long __icache_flags; + +static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) +{ + unsigned int cpu = smp_processor_id(); + u32 l1ip = CTR_L1IP(info->reg_ctr); + + if (l1ip != ICACHE_POLICY_PIPT) + set_bit(ICACHEF_ALIASING, &__icache_flags); + if (l1ip == ICACHE_POLICY_AIVIVT); + set_bit(ICACHEF_AIVIVT, &__icache_flags); + + pr_info("Detected %s I-cache on CPU%d", icache_policy_str[l1ip], cpu); +} + static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); @@ -56,6 +80,8 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) info->reg_id_mmfr3 = read_cpuid(ID_MMFR3_EL1); info->reg_id_pfr0 = read_cpuid(ID_PFR0_EL1); info->reg_id_pfr1 = read_cpuid(ID_PFR1_EL1); + + cpuinfo_detect_icache_policy(info); } void cpuinfo_store_cpu(void) From 127161aaf0fcd37680984f79e1451b870b3e310a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 16 Jul 2014 16:32:46 +0100 Subject: [PATCH 28/55] arm64: add runtime system sanity checks Unexpected variation in certain system register values across CPUs is an indicator of potential problems with a system. The kernel expects CPUs to be mostly identical in terms of supported features, even in systems with heterogeneous CPUs, with uniform instruction set support being critical for the correct operation of userspace. To help detect issues early where hardware violates the expectations of the kernel, this patch adds simple runtime sanity checks on important ID registers in the bring up path of each CPU. Where CPUs are fundamentally mismatched, set TAINT_CPU_OUT_OF_SPEC. Given that the kernel assumes CPUs are identical feature wise, let's not pretend that we expect such configurations to work. Supporting such configurations would require massive rework, and hopefully they will never exist. Signed-off-by: Mark Rutland Reviewed-by: Will Deacon Reviewed-by: Catalin Marinas Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpuinfo.c | 93 +++++++++++++++++++++++++++++++++++++ 1 file changed, 93 insertions(+) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index 3ce99fc1fde1..f82f7d1c468e 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -21,6 +21,7 @@ #include #include +#include #include #include @@ -54,6 +55,97 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) pr_info("Detected %s I-cache on CPU%d", icache_policy_str[l1ip], cpu); } +static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu) +{ + if ((boot & mask) == (cur & mask)) + return 0; + + pr_warn("SANITY CHECK: Unexpected variation in %s. Boot CPU: %#016lx, CPU%d: %#016lx\n", + name, (unsigned long)boot, cpu, (unsigned long)cur); + + return 1; +} + +#define CHECK_MASK(field, mask, boot, cur, cpu) \ + check_reg_mask(#field, mask, (boot)->reg_ ## field, (cur)->reg_ ## field, cpu) + +#define CHECK(field, boot, cur, cpu) \ + CHECK_MASK(field, ~0ULL, boot, cur, cpu) + +/* + * Verify that CPUs don't have unexpected differences that will cause problems. + */ +static void cpuinfo_sanity_check(struct cpuinfo_arm64 *cur) +{ + unsigned int cpu = smp_processor_id(); + struct cpuinfo_arm64 *boot = &boot_cpu_data; + unsigned int diff = 0; + + /* + * The kernel can handle differing I-cache policies, but otherwise + * caches should look identical. Userspace JITs will make use of + * *minLine. + */ + diff |= CHECK_MASK(ctr, 0xffff3fff, boot, cur, cpu); + + /* + * Userspace may perform DC ZVA instructions. Mismatched block sizes + * could result in too much or too little memory being zeroed if a + * process is preempted and migrated between CPUs. + */ + diff |= CHECK(dczid, boot, cur, cpu); + + /* If different, timekeeping will be broken (especially with KVM) */ + diff |= CHECK(cntfrq, boot, cur, cpu); + + /* + * Even in big.LITTLE, processors should be identical instruction-set + * wise. + */ + diff |= CHECK(id_aa64isar0, boot, cur, cpu); + diff |= CHECK(id_aa64isar1, boot, cur, cpu); + + /* + * Differing PARange support is fine as long as all peripherals and + * memory are mapped within the minimum PARange of all CPUs. + * Linux should not care about secure memory. + * ID_AA64MMFR1 is currently RES0. + */ + diff |= CHECK_MASK(id_aa64mmfr0, 0xffffffffffff0ff0, boot, cur, cpu); + diff |= CHECK(id_aa64mmfr1, boot, cur, cpu); + + /* + * EL3 is not our concern. + * ID_AA64PFR1 is currently RES0. + */ + diff |= CHECK_MASK(id_aa64pfr0, 0xffffffffffff0fff, boot, cur, cpu); + diff |= CHECK(id_aa64pfr1, boot, cur, cpu); + + /* + * If we have AArch32, we care about 32-bit features for compat. These + * registers should be RES0 otherwise. + */ + diff |= CHECK(id_isar0, boot, cur, cpu); + diff |= CHECK(id_isar1, boot, cur, cpu); + diff |= CHECK(id_isar2, boot, cur, cpu); + diff |= CHECK(id_isar3, boot, cur, cpu); + diff |= CHECK(id_isar4, boot, cur, cpu); + diff |= CHECK(id_isar5, boot, cur, cpu); + diff |= CHECK(id_mmfr0, boot, cur, cpu); + diff |= CHECK(id_mmfr1, boot, cur, cpu); + diff |= CHECK(id_mmfr2, boot, cur, cpu); + diff |= CHECK(id_mmfr3, boot, cur, cpu); + diff |= CHECK(id_pfr0, boot, cur, cpu); + diff |= CHECK(id_pfr1, boot, cur, cpu); + + /* + * Mismatched CPU features are a recipe for disaster. Don't even + * pretend to support them. + */ + WARN_TAINT_ONCE(diff, TAINT_CPU_OUT_OF_SPEC, + "Unsupported CPU feature variation."); +} + static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info) { info->reg_cntfrq = arch_timer_get_cntfrq(); @@ -88,6 +180,7 @@ void cpuinfo_store_cpu(void) { struct cpuinfo_arm64 *info = this_cpu_ptr(&cpu_data); __cpuinfo_store_cpu(info); + cpuinfo_sanity_check(info); } void __init cpuinfo_store_boot_cpu(void) From d7a49086f263164a2c4c178eb76412d48cd671d7 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 18 Jul 2014 14:57:38 +0100 Subject: [PATCH 29/55] arm64: cpuinfo: print info for all CPUs Currently reading /proc/cpuinfo will result in information being read out of the MIDR_EL1 of the current CPU, and the information is not associated with any particular logical CPU number. This is problematic for systems with heterogeneous CPUs (i.e. big.LITTLE) where MIDR fields will vary across CPUs, and the output will differ depending on the executing CPU. This patch reorganises the code responsible for /proc/cpuinfo to print information per-cpu. In the process, we perform several cleanups: * Property names are coerced to lower-case (to match "processor" as per glibc's expectations). * Property names are simplified and made to match the MIDR field names. * Revision is changed to hex as with every other field. * The meaningless Architecture property is removed. * The ripe-for-abuse Machine field is removed. The features field (a human-readable representation of the hwcaps) remains printed once, as this is expected to remain in use as the globally support CPU features. To enable the possibility of the addition of per-cpu HW feature information later, this is printed before any CPU-specific information. Comments are added to guide userspace developers in the right direction (using the hwcaps provided in auxval). Hopefully where userspace applications parse /proc/cpuinfo rather than using the readily available hwcaps, they limit themselves to reading said first line. If CPU features differ from each other, the previously installed sanity checks will give us some advance notice with warnings and TAINT_CPU_OUT_OF_SPEC. If we are lucky, we will never see such systems. Rework will be required in many places to support such systems anyway. Signed-off-by: Mark Rutland Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Marcus Shawcroft Cc: Peter Maydell Acked-by: Will Deacon [catalin.marinas@arm.com: remove machine_name as it is no longer reported] Signed-off-by: Catalin Marinas --- arch/arm64/kernel/setup.c | 40 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 22 deletions(-) diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index edb146d01857..f6f0ccf35ae6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -78,7 +78,6 @@ unsigned int compat_elf_hwcap2 __read_mostly; #endif static const char *cpu_name; -static const char *machine_name; phys_addr_t __fdt_pointer __initdata; /* @@ -310,8 +309,6 @@ static void __init setup_machine_fdt(phys_addr_t dt_phys) while (true) cpu_relax(); } - - machine_name = of_flat_dt_get_machine_name(); } /* @@ -450,10 +447,21 @@ static int c_show(struct seq_file *m, void *v) { int i; - seq_printf(m, "Processor\t: %s rev %d (%s)\n", - cpu_name, read_cpuid_id() & 15, ELF_PLATFORM); + /* + * Dump out the common processor features in a single line. Userspace + * should read the hwcaps with getauxval(AT_HWCAP) rather than + * attempting to parse this. + */ + seq_puts(m, "features\t:"); + for (i = 0; hwcap_str[i]; i++) + if (elf_hwcap & (1 << i)) + seq_printf(m, " %s", hwcap_str[i]); + seq_puts(m, "\n\n"); for_each_online_cpu(i) { + struct cpuinfo_arm64 *cpuinfo = &per_cpu(cpu_data, i); + u32 midr = cpuinfo->reg_midr; + /* * glibc reads /proc/cpuinfo to determine the number of * online processors, looking for lines beginning with @@ -462,25 +470,13 @@ static int c_show(struct seq_file *m, void *v) #ifdef CONFIG_SMP seq_printf(m, "processor\t: %d\n", i); #endif + seq_printf(m, "implementer\t: 0x%02x\n", + MIDR_IMPLEMENTOR(midr)); + seq_printf(m, "variant\t\t: 0x%x\n", MIDR_VARIANT(midr)); + seq_printf(m, "partnum\t\t: 0x%03x\n", MIDR_PARTNUM(midr)); + seq_printf(m, "revision\t: 0x%x\n\n", MIDR_REVISION(midr)); } - /* dump out the processor features */ - seq_puts(m, "Features\t: "); - - for (i = 0; hwcap_str[i]; i++) - if (elf_hwcap & (1 << i)) - seq_printf(m, "%s ", hwcap_str[i]); - - seq_printf(m, "\nCPU implementer\t: 0x%02x\n", read_cpuid_id() >> 24); - seq_printf(m, "CPU architecture: AArch64\n"); - seq_printf(m, "CPU variant\t: 0x%x\n", (read_cpuid_id() >> 20) & 15); - seq_printf(m, "CPU part\t: 0x%03x\n", (read_cpuid_id() >> 4) & 0xfff); - seq_printf(m, "CPU revision\t: %d\n", read_cpuid_id() & 15); - - seq_puts(m, "\n"); - - seq_printf(m, "Hardware\t: %s\n", machine_name); - return 0; } From a28e3f4b90543f7c249a956e3ca518e243a04618 Mon Sep 17 00:00:00 2001 From: Yi Li Date: Fri, 11 Jul 2014 12:46:50 +0100 Subject: [PATCH 30/55] arm64: dmi: Add SMBIOS/DMI support SMbios is important for server hardware vendors. It implements a spec for providing descriptive information about the platform. Things like serial numbers, physical layout of the ports, build configuration data, and the like. This has been tested by dmidecode and lshw tools. Signed-off-by: Yi Li Signed-off-by: Ard Biesheuvel Reviewed-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 11 ++++++++++ arch/arm64/include/asm/dmi.h | 41 ++++++++++++++++++++++++++++++++++++ arch/arm64/kernel/setup.c | 2 ++ 3 files changed, 54 insertions(+) create mode 100644 arch/arm64/include/asm/dmi.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index ce6e733e0c05..62a71e829770 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -311,6 +311,17 @@ config EFI allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. +config DMI + bool "Enable support for SMBIOS (DMI) tables" + depends on EFI + default y + help + This enables SMBIOS/DMI feature for systems. + + This option is only useful on systems that have UEFI firmware. + However, even with this option, the resultant kernel should + continue to boot on existing non-UEFI platforms. + endmenu menu "Userspace binary formats" diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h new file mode 100644 index 000000000000..b0882a8620e1 --- /dev/null +++ b/arch/arm64/include/asm/dmi.h @@ -0,0 +1,41 @@ +/* + * arch/arm64/include/asm/dmi.h + * + * Copyright (C) 2013 Linaro Limited. + * Written by: Yi Li (yi.li@linaro.org) + * + * based on arch/ia64/include/asm/dmi.h + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + */ + +#ifndef __ASM_DMI_H +#define __ASM_DMI_H + +#include +#include + +static inline void __iomem *dmi_remap(u64 phys, u64 size) +{ + void __iomem *p = efi_lookup_mapped_addr(phys); + + /* + * If the mapping spans multiple pages, do a minimal check to ensure + * that the mapping returned by efi_lookup_mapped_addr() covers the + * whole requested range (but ignore potential holes) + */ + if ((phys & ~PAGE_MASK) + size > PAGE_SIZE + && (p + size - 1) != efi_lookup_mapped_addr(phys + size - 1)) + return NULL; + return p; +} + +/* Reuse existing UEFI mappings for DMI */ +#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) +#define dmi_early_remap(x, l) dmi_remap(x, l) +#define dmi_early_unmap(x, l) +#define dmi_unmap(x) + +#endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index f6f0ccf35ae6..35339a0e1592 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include @@ -413,6 +414,7 @@ void __init setup_arch(char **cmdline_p) static int __init arm64_device_init(void) { of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); + dmi_scan_machine(); return 0; } arch_initcall_sync(arm64_device_init); From affeafbb84776b8527591aa83c16be67dfa5c985 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Mon, 21 Jul 2014 18:44:20 +0100 Subject: [PATCH 31/55] arm64: Remove stray ARCH_HAS_OPP reference A reference to ARCH_HAS_OPP was added in commit 333d17e56 (arm64: add ARCH_HAS_OPP to allow enabling OPP library) however this symbol is no longer needed after commit 049d595a4db3b3a (PM / OPP: Make OPP invisible to users in Kconfig). Signed-off-by: Mark Brown Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 62a71e829770..95cda2be0695 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1,7 +1,6 @@ config ARM64 def_bool y select ARCH_HAS_ATOMIC64_DEC_IF_POSITIVE - select ARCH_HAS_OPP select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF select ARCH_WANT_OPTIONAL_GPIOLIB From 7edd88ad7e59c2b7b49da0e00f251884fb785d4f Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 16 Jul 2014 12:07:17 +0100 Subject: [PATCH 32/55] arm64: Do not initialise the fixmap page tables in head.S The early_ioremap_init() function already handles fixmap pte initialisation, so upgrade this to cover all of pud/pmd/pte and remove one page from swapper_pg_dir. Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/include/asm/page.h | 8 ++++---- arch/arm64/kernel/head.S | 7 ------- arch/arm64/mm/ioremap.c | 26 ++++++++++++++++++-------- 3 files changed, 22 insertions(+), 19 deletions(-) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index a6331e6a92b5..e84ca637af6b 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,11 +33,11 @@ /* * The idmap and swapper page tables need some space reserved in the kernel - * image. The idmap only requires a pgd and a next level table to (section) map - * the kernel, while the swapper also maps the FDT and requires an additional - * table to map an early UART. See __create_page_tables for more information. + * image. Both require a pgd and a next level table to (section) map the + * kernel. The the swapper also maps the FDT (see __create_page_tables for + * more information). */ -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) #define IDMAP_DIR_SIZE (2 * PAGE_SIZE) #ifndef __ASSEMBLY__ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 69dafe9621fd..fa3b7fb8a77a 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -582,13 +582,6 @@ __create_page_tables: sub x6, x6, #1 // inclusive range create_block_map x0, x7, x3, x5, x6 1: - /* - * Create the pgd entry for the fixed mappings. - */ - ldr x5, =FIXADDR_TOP // Fixed mapping virtual address - add x0, x26, #2 * PAGE_SIZE // section table address - create_pgd_entry x26, x0, x5, x6, x7 - /* * Since the page tables have been populated with non-cacheable * accesses (MMU disabled), invalidate the idmap and swapper page diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 7ec328392ae0..69000efa015e 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -103,19 +103,25 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) } EXPORT_SYMBOL(ioremap_cache); -#ifndef CONFIG_ARM64_64K_PAGES static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; +#ifndef CONFIG_ARM64_64K_PAGES +static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif -static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) +static inline pud_t * __init early_ioremap_pud(unsigned long addr) { pgd_t *pgd; - pud_t *pud; pgd = pgd_offset_k(addr); BUG_ON(pgd_none(*pgd) || pgd_bad(*pgd)); - pud = pud_offset(pgd, addr); + return pud_offset(pgd, addr); +} + +static inline pmd_t * __init early_ioremap_pmd(unsigned long addr) +{ + pud_t *pud = early_ioremap_pud(addr); + BUG_ON(pud_none(*pud) || pud_bad(*pud)); return pmd_offset(pud, addr); @@ -132,13 +138,17 @@ static inline pte_t * __init early_ioremap_pte(unsigned long addr) void __init early_ioremap_init(void) { + pgd_t *pgd; + pud_t *pud; pmd_t *pmd; + unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); - pmd = early_ioremap_pmd(fix_to_virt(FIX_BTMAP_BEGIN)); -#ifndef CONFIG_ARM64_64K_PAGES - /* need to populate pmd for 4k pagesize only */ + pgd = pgd_offset_k(addr); + pud = pud_offset(pgd, addr); + pud_populate(&init_mm, pud, bm_pmd); + pmd = pmd_offset(pud, addr); pmd_populate_kernel(&init_mm, pmd, bm_pte); -#endif + /* * The boot-ioremap range spans multiple pmds, for which * we are not prepared: From e41ceed035966d593ae34c3de33924965a6b9fba Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Mon, 12 May 2014 10:40:38 +0100 Subject: [PATCH 33/55] arm64: Introduce VA_BITS and translation level options This patch adds virtual address space size and a level of translation tables to kernel configuration. It facilicates introduction of different MMU options, such as 4KB + 4 levels, 16KB + 4 levels and 64KB + 3 levels, easily. The idea is based on the discussion with Catalin Marinas: http://www.spinics.net/linux/lists/arm-kernel/msg319552.html Signed-off-by: Jungseok Lee Reviewed-by: Sungjinn Chung Acked-by: Kukjin Kim Reviewed-by: Christoffer Dall Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/Kconfig | 45 +++++++++++++++++++++++++- arch/arm64/include/asm/memory.h | 6 +--- arch/arm64/include/asm/page.h | 2 +- arch/arm64/include/asm/pgalloc.h | 4 +-- arch/arm64/include/asm/pgtable-hwdef.h | 2 +- arch/arm64/include/asm/pgtable.h | 8 ++--- arch/arm64/include/asm/tlb.h | 2 +- 7 files changed, 54 insertions(+), 15 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 95cda2be0695..94ba45b198ff 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -157,14 +157,57 @@ endmenu menu "Kernel Features" +choice + prompt "Page size" + default ARM64_4K_PAGES + help + Page size (translation granule) configuration. + +config ARM64_4K_PAGES + bool "4KB" + help + This feature enables 4KB pages support. + config ARM64_64K_PAGES - bool "Enable 64KB pages support" + bool "64KB" help This feature enables 64KB pages support (4KB by default) allowing only two levels of page tables and faster TLB look-up. AArch32 emulation is not available when this feature is enabled. +endchoice + +choice + prompt "Virtual address space size" + default ARM64_VA_BITS_39 if ARM64_4K_PAGES + default ARM64_VA_BITS_42 if ARM64_64K_PAGES + help + Allows choosing one of multiple possible virtual address + space sizes. The level of translation table is determined by + a combination of page size and virtual address space size. + +config ARM64_VA_BITS_39 + bool "39-bit" + depends on ARM64_4K_PAGES + +config ARM64_VA_BITS_42 + bool "42-bit" + depends on ARM64_64K_PAGES + +endchoice + +config ARM64_VA_BITS + int + default 39 if ARM64_VA_BITS_39 + default 42 if ARM64_VA_BITS_42 + +config ARM64_2_LEVELS + def_bool y if ARM64_64K_PAGES && ARM64_VA_BITS_42 + +config ARM64_3_LEVELS + def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_39 + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help diff --git a/arch/arm64/include/asm/memory.h b/arch/arm64/include/asm/memory.h index 993bce527b85..45ad6cf678dd 100644 --- a/arch/arm64/include/asm/memory.h +++ b/arch/arm64/include/asm/memory.h @@ -41,11 +41,7 @@ * The module space lives between the addresses given by TASK_SIZE * and PAGE_OFFSET - it must be within 128MB of the kernel text. */ -#ifdef CONFIG_ARM64_64K_PAGES -#define VA_BITS (42) -#else -#define VA_BITS (39) -#endif +#define VA_BITS (CONFIG_ARM64_VA_BITS) #define PAGE_OFFSET (UL(0xffffffffffffffff) << (VA_BITS - 1)) #define MODULES_END (PAGE_OFFSET) #define MODULES_VADDR (MODULES_END - SZ_64M) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index e84ca637af6b..1cbde2773c4f 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -42,7 +42,7 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_ARM64_64K_PAGES +#ifdef CONFIG_ARM64_2_LEVELS #include #else #include diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 9bea6e74a001..48298376e46a 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#ifndef CONFIG_ARM64_64K_PAGES +#ifndef CONFIG_ARM64_2_LEVELS static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -44,7 +44,7 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); } -#endif /* CONFIG_ARM64_64K_PAGES */ +#endif /* CONFIG_ARM64_2_LEVELS */ extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index 955e8c5f0afb..c7c603b489b8 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,7 +16,7 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H -#ifdef CONFIG_ARM64_64K_PAGES +#ifdef CONFIG_ARM64_2_LEVELS #include #else #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d7455fa83bc7..6d5854972a77 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -47,7 +47,7 @@ extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) -#ifndef CONFIG_ARM64_64K_PAGES +#ifndef CONFIG_ARM64_2_LEVELS #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #endif #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) @@ -323,7 +323,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) */ #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) -#ifndef CONFIG_ARM64_64K_PAGES +#ifndef CONFIG_ARM64_2_LEVELS #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & 2)) @@ -345,7 +345,7 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); } -#endif /* CONFIG_ARM64_64K_PAGES */ +#endif /* CONFIG_ARM64_2_LEVELS */ /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -356,7 +356,7 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) /* Find an entry in the second-level page table.. */ -#ifndef CONFIG_ARM64_64K_PAGES +#ifndef CONFIG_ARM64_2_LEVELS #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) { diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 80e2c08900d6..bc19101edaeb 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -91,7 +91,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb_remove_page(tlb, pte); } -#ifndef CONFIG_ARM64_64K_PAGES +#ifndef CONFIG_ARM64_2_LEVELS static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { From 4edae01e89100821d167076dec6ecdd40318b7c1 Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Mon, 12 May 2014 10:40:44 +0100 Subject: [PATCH 34/55] arm64: Add a description on 48-bit address space with 4KB pages This patch adds memory layout and translation lookup information about 48-bit address space with 4K pages. The description is based on 4 levels of translation tables. Signed-off-by: Jungseok Lee Reviewed-by: Sungjinn Chung Acked-by: Kukjin Kim Acked-by: Christoffer Dall Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- Documentation/arm64/memory.txt | 59 +++++++++++++++++++++++++++++----- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index d50fa618371b..4c720d698e8e 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -8,10 +8,11 @@ This document describes the virtual memory layout used by the AArch64 Linux kernel. The architecture allows up to 4 levels of translation tables with a 4KB page size and up to 3 levels with a 64KB page size. -AArch64 Linux uses 3 levels of translation tables with the 4KB page -configuration, allowing 39-bit (512GB) virtual addresses for both user -and kernel. With 64KB pages, only 2 levels of translation tables are -used but the memory layout is the same. +AArch64 Linux uses either 3 levels or 4 levels of translation tables with +the 4KB page configuration, allowing 39-bit (512GB) or 48-bit (256TB) +virtual addresses, respectively, for both user and kernel. With 64KB +pages, only 2 levels of translation tables, allowing 42-bit (4TB) +virtual address, are used but the memory layout is the same. User addresses have bits 63:39 set to 0 while the kernel addresses have the same bits set to 1. TTBRx selection is given by bit 63 of the @@ -21,7 +22,7 @@ The swapper_pgd_dir address is written to TTBR1 and never written to TTBR0. -AArch64 Linux memory layout with 4KB pages: +AArch64 Linux memory layout with 4KB pages + 3 levels: Start End Size Use ----------------------------------------------------------------------- @@ -48,7 +49,34 @@ ffffffbffc000000 ffffffbfffffffff 64MB modules ffffffc000000000 ffffffffffffffff 256GB kernel logical memory map -AArch64 Linux memory layout with 64KB pages: +AArch64 Linux memory layout with 4KB pages + 4 levels: + +Start End Size Use +----------------------------------------------------------------------- +0000000000000000 0000ffffffffffff 256TB user + +ffff000000000000 ffff7bfffffeffff ~124TB vmalloc + +ffff7bffffff0000 ffff7bffffffffff 64KB [guard page] + +ffff7c0000000000 ffff7dffffffffff 2TB vmemmap + +ffff7e0000000000 ffff7ffffbbfffff ~2TB [guard, future vmmemap] + +ffff7ffffa000000 ffff7ffffaffffff 16MB PCI I/O space + +ffff7ffffb000000 ffff7ffffbbfffff 12MB [guard] + +ffff7ffffbc00000 ffff7ffffbdfffff 2MB fixed mappings + +ffff7ffffbe00000 ffff7ffffbffffff 2MB [guard] + +ffff7ffffc000000 ffff7fffffffffff 64MB modules + +ffff800000000000 ffffffffffffffff 128TB kernel logical memory map + + +AArch64 Linux memory layout with 64KB pages + 2 levels: Start End Size Use ----------------------------------------------------------------------- @@ -75,7 +103,7 @@ fffffdfffc000000 fffffdffffffffff 64MB modules fffffe0000000000 ffffffffffffffff 2TB kernel logical memory map -Translation table lookup with 4KB pages: +Translation table lookup with 4KB pages + 3 levels: +--------+--------+--------+--------+--------+--------+--------+--------+ |63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| @@ -90,7 +118,22 @@ Translation table lookup with 4KB pages: +-------------------------------------------------> [63] TTBR0/1 -Translation table lookup with 64KB pages: +Translation table lookup with 4KB pages + 4 levels: + ++--------+--------+--------+--------+--------+--------+--------+--------+ +|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| ++--------+--------+--------+--------+--------+--------+--------+--------+ + | | | | | | + | | | | | v + | | | | | [11:0] in-page offset + | | | | +-> [20:12] L3 index + | | | +-----------> [29:21] L2 index + | | +---------------------> [38:30] L1 index + | +-------------------------------> [47:39] L0 index + +-------------------------------------------------> [63] TTBR0/1 + + +Translation table lookup with 64KB pages + 2 levels: +--------+--------+--------+--------+--------+--------+--------+--------+ |63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| From 57e0139041a978c0cfa4d2366c96ea3418e7d553 Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Mon, 12 May 2014 10:40:47 +0100 Subject: [PATCH 35/55] arm64: Add 4 levels of page tables definition with 4KB pages This patch adds hardware definition and types for 4 levels of translation tables with 4KB pages. Signed-off-by: Jungseok Lee Reviewed-by: Sungjinn Chung Acked-by: Kukjin Kim Reviewed-by: Christoffer Dall Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/include/asm/pgtable-4level-hwdef.h | 50 +++++++++++++ arch/arm64/include/asm/pgtable-4level-types.h | 71 +++++++++++++++++++ 2 files changed, 121 insertions(+) create mode 100644 arch/arm64/include/asm/pgtable-4level-hwdef.h create mode 100644 arch/arm64/include/asm/pgtable-4level-types.h diff --git a/arch/arm64/include/asm/pgtable-4level-hwdef.h b/arch/arm64/include/asm/pgtable-4level-hwdef.h new file mode 100644 index 000000000000..0ec84e2274a3 --- /dev/null +++ b/arch/arm64/include/asm/pgtable-4level-hwdef.h @@ -0,0 +1,50 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_PGTABLE_4LEVEL_HWDEF_H +#define __ASM_PGTABLE_4LEVEL_HWDEF_H + +#define PTRS_PER_PTE 512 +#define PTRS_PER_PMD 512 +#define PTRS_PER_PUD 512 +#define PTRS_PER_PGD 512 + +/* + * PGDIR_SHIFT determines the size a top-level page table entry can map. + */ +#define PGDIR_SHIFT 39 +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) + +/* + * PUD_SHIFT determines the size the second level page table entry can map. + */ +#define PUD_SHIFT 30 +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* + * PMD_SHIFT determines the size the third level page table entry can map. + */ +#define PMD_SHIFT 21 +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) + +/* + * section address mask and size definitions. + */ +#define SECTION_SHIFT 21 +#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) +#define SECTION_MASK (~(SECTION_SIZE-1)) + +#endif diff --git a/arch/arm64/include/asm/pgtable-4level-types.h b/arch/arm64/include/asm/pgtable-4level-types.h new file mode 100644 index 000000000000..7ad8dd257ea1 --- /dev/null +++ b/arch/arm64/include/asm/pgtable-4level-types.h @@ -0,0 +1,71 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __ASM_PGTABLE_4LEVEL_TYPES_H +#define __ASM_PGTABLE_4LEVEL_TYPES_H + +#include + +typedef u64 pteval_t; +typedef u64 pmdval_t; +typedef u64 pudval_t; +typedef u64 pgdval_t; + +#undef STRICT_MM_TYPECHECKS + +#ifdef STRICT_MM_TYPECHECKS + +/* + * These are used to make use of C type-checking.. + */ +typedef struct { pteval_t pte; } pte_t; +typedef struct { pmdval_t pmd; } pmd_t; +typedef struct { pudval_t pud; } pud_t; +typedef struct { pgdval_t pgd; } pgd_t; +typedef struct { pteval_t pgprot; } pgprot_t; + +#define pte_val(x) ((x).pte) +#define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) +#define pgd_val(x) ((x).pgd) +#define pgprot_val(x) ((x).pgprot) + +#define __pte(x) ((pte_t) { (x) } ) +#define __pmd(x) ((pmd_t) { (x) } ) +#define __pud(x) ((pud_t) { (x) } ) +#define __pgd(x) ((pgd_t) { (x) } ) +#define __pgprot(x) ((pgprot_t) { (x) } ) + +#else /* !STRICT_MM_TYPECHECKS */ + +typedef pteval_t pte_t; +typedef pmdval_t pmd_t; +typedef pudval_t pud_t; +typedef pgdval_t pgd_t; +typedef pteval_t pgprot_t; + +#define pte_val(x) (x) +#define pmd_val(x) (x) +#define pud_val(x) (x) +#define pgd_val(x) (x) +#define pgprot_val(x) (x) + +#define __pte(x) (x) +#define __pmd(x) (x) +#define __pud(x) (x) +#define __pgd(x) (x) +#define __pgprot(x) (x) + +#endif /* STRICT_MM_TYPECHECKS */ + +#endif /* __ASM_PGTABLE_4LEVEL_TYPES_H */ From c79b954bf6c006f2d3dd9d01f231abeead13a410 Mon Sep 17 00:00:00 2001 From: Jungseok Lee Date: Mon, 12 May 2014 18:40:51 +0900 Subject: [PATCH 36/55] arm64: mm: Implement 4 levels of translation tables This patch implements 4 levels of translation tables since 3 levels of page tables with 4KB pages cannot support 40-bit physical address space described in [1] due to the following issue. It is a restriction that kernel logical memory map with 4KB + 3 levels (0xffffffc000000000-0xffffffffffffffff) cannot cover RAM region from 544GB to 1024GB in [1]. Specifically, ARM64 kernel fails to create mapping for this region in map_mem function since __phys_to_virt for this region reaches to address overflow. If SoC design follows the document, [1], over 32GB RAM would be placed from 544GB. Even 64GB system is supposed to use the region from 544GB to 576GB for only 32GB RAM. Naturally, it would reach to enable 4 levels of page tables to avoid hacking __virt_to_phys and __phys_to_virt. However, it is recommended 4 levels of page table should be only enabled if memory map is too sparse or there is about 512GB RAM. References ---------- [1]: Principles of ARM Memory Maps, White Paper, Issue C Signed-off-by: Jungseok Lee Reviewed-by: Sungjinn Chung Acked-by: Kukjin Kim Reviewed-by: Christoffer Dall Reviewed-by: Steve Capper [catalin.marinas@arm.com: MEMBLOCK_INITIAL_LIMIT removed, same as PUD_SIZE] [catalin.marinas@arm.com: early_ioremap_init() updated for 4 levels] [catalin.marinas@arm.com: 48-bit VA depends on BROKEN until KVM is fixed] Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/Kconfig | 8 +++++ arch/arm64/include/asm/page.h | 13 ++++++-- arch/arm64/include/asm/pgalloc.h | 20 ++++++++++++ arch/arm64/include/asm/pgtable-hwdef.h | 6 ++-- arch/arm64/include/asm/pgtable.h | 40 ++++++++++++++++++++++++ arch/arm64/include/asm/tlb.h | 9 ++++++ arch/arm64/kernel/head.S | 42 +++++++++++++++++++++----- arch/arm64/kernel/traps.c | 5 +++ arch/arm64/mm/fault.c | 1 + arch/arm64/mm/ioremap.c | 6 +++- arch/arm64/mm/mmu.c | 14 +++++++-- 11 files changed, 147 insertions(+), 17 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 94ba45b198ff..cf07cc7295bb 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -195,12 +195,17 @@ config ARM64_VA_BITS_42 bool "42-bit" depends on ARM64_64K_PAGES +config ARM64_VA_BITS_48 + bool "48-bit" + depends on BROKEN + endchoice config ARM64_VA_BITS int default 39 if ARM64_VA_BITS_39 default 42 if ARM64_VA_BITS_42 + default 48 if ARM64_VA_BITS_48 config ARM64_2_LEVELS def_bool y if ARM64_64K_PAGES && ARM64_VA_BITS_42 @@ -208,6 +213,9 @@ config ARM64_2_LEVELS config ARM64_3_LEVELS def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_39 +config ARM64_4_LEVELS + def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_48 + config CPU_BIG_ENDIAN bool "Build big-endian kernel" help diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 1cbde2773c4f..cf9afa0366b6 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -33,19 +33,26 @@ /* * The idmap and swapper page tables need some space reserved in the kernel - * image. Both require a pgd and a next level table to (section) map the - * kernel. The the swapper also maps the FDT (see __create_page_tables for + * image. Both require pgd, pud (4 levels only) and pmd tables to (section) + * map the kernel. The swapper also maps the FDT (see __create_page_tables for * more information). */ +#ifdef CONFIG_ARM64_4_LEVELS +#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) +#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) +#else #define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) #define IDMAP_DIR_SIZE (2 * PAGE_SIZE) +#endif #ifndef __ASSEMBLY__ #ifdef CONFIG_ARM64_2_LEVELS #include -#else +#elif defined(CONFIG_ARM64_3_LEVELS) #include +#else +#include #endif extern void __cpu_clear_user_page(void *p, unsigned long user); diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 48298376e46a..7deb5750a945 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -46,6 +46,26 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) #endif /* CONFIG_ARM64_2_LEVELS */ +#ifdef CONFIG_ARM64_4_LEVELS + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return (pud_t *)get_zeroed_page(GFP_KERNEL | __GFP_REPEAT); +} + +static inline void pud_free(struct mm_struct *mm, pud_t *pud) +{ + BUG_ON((unsigned long)pud & (PAGE_SIZE-1)); + free_page((unsigned long)pud); +} + +static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) +{ + set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); +} + +#endif /* CONFIG_ARM64_4_LEVELS */ + extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index c7c603b489b8..fddcc3efa569 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -18,8 +18,10 @@ #ifdef CONFIG_ARM64_2_LEVELS #include -#else +#elif defined(CONFIG_ARM64_3_LEVELS) #include +#else +#include #endif /* @@ -27,7 +29,7 @@ * * Level 1 descriptor (PUD). */ - +#define PUD_TYPE_TABLE (_AT(pudval_t, 3) << 0) #define PUD_TABLE_BIT (_AT(pgdval_t, 1) << 1) #define PUD_TYPE_MASK (_AT(pgdval_t, 3) << 0) #define PUD_TYPE_SECT (_AT(pgdval_t, 1) << 0) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6d5854972a77..d9b23efdaded 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -35,7 +35,11 @@ * VMALLOC and SPARSEMEM_VMEMMAP ranges. */ #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) +#ifndef CONFIG_ARM64_4_LEVELS #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) +#else +#define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) +#endif #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) @@ -44,12 +48,16 @@ #ifndef __ASSEMBLY__ extern void __pte_error(const char *file, int line, unsigned long val); extern void __pmd_error(const char *file, int line, unsigned long val); +extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) #ifndef CONFIG_ARM64_2_LEVELS #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #endif +#ifdef CONFIG_ARM64_4_LEVELS +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) +#endif #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) #ifdef CONFIG_SMP @@ -347,6 +355,30 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) #endif /* CONFIG_ARM64_2_LEVELS */ +#ifdef CONFIG_ARM64_4_LEVELS + +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) +#define pgd_present(pgd) (pgd_val(pgd)) + +static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + *pgdp = pgd; + dsb(ishst); +} + +static inline void pgd_clear(pgd_t *pgdp) +{ + set_pgd(pgdp, __pgd(0)); +} + +static inline pud_t *pgd_page_vaddr(pgd_t pgd) +{ + return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); +} + +#endif /* CONFIG_ARM64_4_LEVELS */ + /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -355,6 +387,14 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) +#ifdef CONFIG_ARM64_4_LEVELS +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) +{ + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); +} +#endif + /* Find an entry in the second-level page table.. */ #ifndef CONFIG_ARM64_2_LEVELS #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index bc19101edaeb..49dc8f03362f 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -100,6 +100,15 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif +#ifdef CONFIG_ARM64_4_LEVELS +static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, + unsigned long addr) +{ + tlb_add_flush(tlb, addr); + tlb_remove_page(tlb, virt_to_page(pudp)); +} +#endif + static inline void __tlb_remove_pmd_tlb_entry(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long address) { diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index fa3b7fb8a77a..847b99daad79 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -476,16 +476,42 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to populate the PGD for the corresponding block entry in the next - * level (tbl) for the given virtual address. + * Macro to populate the PUD for the corresponding block entry in the next + * level (tbl) for the given virtual address in case of 4 levels. * - * Preserves: pgd, tbl, virt - * Corrupts: tmp1, tmp2 + * Preserves: pgd, virt + * Corrupts: tbl, tmp1, tmp2 + * Returns: pud */ - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2 + .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 +#ifdef CONFIG_ARM64_4_LEVELS + add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. + // to make room for pud + add \pud, \pgd, #PAGE_SIZE // pgd points to pud which + // follows pgd + lsr \tmp1, \virt, #PUD_SHIFT + and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index + orr \tmp2, \tbl, #3 // PUD entry table type + str \tmp2, [\pud, \tmp1, lsl #3] +#else + mov \pud, \tbl +#endif + .endm + +/* + * Macro to populate the PGD (and possibily PUD) for the corresponding + * block entry in the next level (tbl) for the given virtual address. + * + * Preserves: pgd, virt + * Corrupts: tmp1, tmp2, tmp3 + * Returns: tbl -> page where block mappings can be placed + * (changed to make room for pud with 4 levels, preserved otherwise) + */ + .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2, tmp3 + create_pud_entry \pgd, \tbl, \virt, \tmp3, \tmp1, \tmp2 lsr \tmp1, \virt, #PGDIR_SHIFT and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index - orr \tmp2, \tbl, #3 // PGD entry table type + orr \tmp2, \tmp3, #3 // PGD entry table type str \tmp2, [\pgd, \tmp1, lsl #3] .endm @@ -550,7 +576,7 @@ __create_page_tables: add x0, x25, #PAGE_SIZE // section table address ldr x3, =KERNEL_START add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x25, x0, x3, x5, x6 + create_pgd_entry x25, x0, x3, x1, x5, x6 ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) add x6, x6, x28 // __pa(KERNEL_END) @@ -561,7 +587,7 @@ __create_page_tables: */ add x0, x26, #PAGE_SIZE // section table address mov x5, #PAGE_OFFSET - create_pgd_entry x26, x0, x5, x3, x6 + create_pgd_entry x26, x0, x5, x1, x3, x6 ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 506f7814e305..02cd3f023e9a 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -339,6 +339,11 @@ void __pmd_error(const char *file, int line, unsigned long val) pr_crit("%s:%d: bad pmd %016lx.\n", file, line, val); } +void __pud_error(const char *file, int line, unsigned long val) +{ + pr_crit("%s:%d: bad pud %016lx.\n", file, line, val); +} + void __pgd_error(const char *file, int line, unsigned long val) { pr_crit("%s:%d: bad pgd %016lx.\n", file, line, val); diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index bcc965e2cce1..41cb6d3d6075 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -62,6 +62,7 @@ void show_pte(struct mm_struct *mm, unsigned long addr) break; pud = pud_offset(pgd, addr); + printk(", *pud=%016llx", pud_val(*pud)); if (pud_none(*pud) || pud_bad(*pud)) break; diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index 69000efa015e..d5e969e7b576 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -104,9 +104,12 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) EXPORT_SYMBOL(ioremap_cache); static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#ifndef CONFIG_ARM64_64K_PAGES +#ifndef CONFIG_ARM64_2_LEVELS static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif +#ifdef CONFIG_ARM64_4_LEVELS +static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; +#endif static inline pud_t * __init early_ioremap_pud(unsigned long addr) { @@ -144,6 +147,7 @@ void __init early_ioremap_init(void) unsigned long addr = fix_to_virt(FIX_BTMAP_BEGIN); pgd = pgd_offset_k(addr); + pgd_populate(&init_mm, pgd, bm_pud); pud = pud_offset(pgd, addr); pud_populate(&init_mm, pud, bm_pmd); pmd = pmd_offset(pud, addr); diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index c43f1dd19489..c55567283cde 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include "mm.h" @@ -204,9 +205,16 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, unsigned long end, unsigned long phys, int map_io) { - pud_t *pud = pud_offset(pgd, addr); + pud_t *pud; unsigned long next; + if (pgd_none(*pgd)) { + pud = early_alloc(PTRS_PER_PUD * sizeof(pud_t)); + pgd_populate(&init_mm, pgd, pud); + } + BUG_ON(pgd_bad(*pgd)); + + pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); @@ -290,10 +298,10 @@ static void __init map_mem(void) * memory addressable from the initial direct kernel mapping. * * The initial direct kernel mapping, located at swapper_pg_dir, - * gives us PGDIR_SIZE memory starting from PHYS_OFFSET (which must be + * gives us PUD_SIZE memory starting from PHYS_OFFSET (which must be * aligned to 2MB as per Documentation/arm64/booting.txt). */ - limit = PHYS_OFFSET + PGDIR_SIZE; + limit = PHYS_OFFSET + PUD_SIZE; memblock_set_current_limit(limit); /* map all the memory banks */ From abe669d7e1a8f9163eb7e8e153e7257d38c1ba3e Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 15 Jul 2014 15:37:21 +0100 Subject: [PATCH 37/55] arm64: Convert bool ARM64_x_LEVELS to int ARM64_PGTABLE_LEVELS Rather than having several Kconfig options, define int ARM64_PGTABLE_LEVELS which will be also useful in converting some of the pgtable macros. Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/Kconfig | 13 +++++-------- arch/arm64/include/asm/page.h | 6 +++--- arch/arm64/include/asm/pgalloc.h | 8 ++++---- arch/arm64/include/asm/pgtable-hwdef.h | 4 ++-- arch/arm64/include/asm/pgtable.h | 18 +++++++++--------- arch/arm64/include/asm/tlb.h | 4 ++-- arch/arm64/kernel/head.S | 2 +- arch/arm64/mm/ioremap.c | 4 ++-- 8 files changed, 28 insertions(+), 31 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index cf07cc7295bb..8cdb7f36bfc8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -207,14 +207,11 @@ config ARM64_VA_BITS default 42 if ARM64_VA_BITS_42 default 48 if ARM64_VA_BITS_48 -config ARM64_2_LEVELS - def_bool y if ARM64_64K_PAGES && ARM64_VA_BITS_42 - -config ARM64_3_LEVELS - def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_39 - -config ARM64_4_LEVELS - def_bool y if ARM64_4K_PAGES && ARM64_VA_BITS_48 +config ARM64_PGTABLE_LEVELS + int + default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 + default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 + default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 config CPU_BIG_ENDIAN bool "Build big-endian kernel" diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index cf9afa0366b6..a998ff478777 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -37,7 +37,7 @@ * map the kernel. The swapper also maps the FDT (see __create_page_tables for * more information). */ -#ifdef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS == 4 #define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) #define IDMAP_DIR_SIZE (3 * PAGE_SIZE) #else @@ -47,9 +47,9 @@ #ifndef __ASSEMBLY__ -#ifdef CONFIG_ARM64_2_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS == 2 #include -#elif defined(CONFIG_ARM64_3_LEVELS) +#elif CONFIG_ARM64_PGTABLE_LEVELS == 3 #include #else #include diff --git a/arch/arm64/include/asm/pgalloc.h b/arch/arm64/include/asm/pgalloc.h index 7deb5750a945..d5bed02073d6 100644 --- a/arch/arm64/include/asm/pgalloc.h +++ b/arch/arm64/include/asm/pgalloc.h @@ -26,7 +26,7 @@ #define check_pgt_cache() do { } while (0) -#ifndef CONFIG_ARM64_2_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -44,9 +44,9 @@ static inline void pud_populate(struct mm_struct *mm, pud_t *pud, pmd_t *pmd) set_pud(pud, __pud(__pa(pmd) | PMD_TYPE_TABLE)); } -#endif /* CONFIG_ARM64_2_LEVELS */ +#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ -#ifdef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) { @@ -64,7 +64,7 @@ static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, pud_t *pud) set_pgd(pgd, __pgd(__pa(pud) | PUD_TYPE_TABLE)); } -#endif /* CONFIG_ARM64_4_LEVELS */ +#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ extern pgd_t *pgd_alloc(struct mm_struct *mm); extern void pgd_free(struct mm_struct *mm, pgd_t *pgd); diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index fddcc3efa569..d453e8bfef06 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,9 +16,9 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H -#ifdef CONFIG_ARM64_2_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS == 2 #include -#elif defined(CONFIG_ARM64_3_LEVELS) +#elif CONFIG_ARM64_PGTABLE_LEVELS == 3 #include #else #include diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index d9b23efdaded..9f862e6e9286 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -35,7 +35,7 @@ * VMALLOC and SPARSEMEM_VMEMMAP ranges. */ #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) -#ifndef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS != 4 #define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) #else #define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) @@ -52,10 +52,10 @@ extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); #define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) -#ifndef CONFIG_ARM64_2_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 #define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) #endif -#ifdef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 #define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) #endif #define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) @@ -331,7 +331,7 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) */ #define mk_pte(page,prot) pfn_pte(page_to_pfn(page),prot) -#ifndef CONFIG_ARM64_2_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & 2)) @@ -353,9 +353,9 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); } -#endif /* CONFIG_ARM64_2_LEVELS */ +#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ -#ifdef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 #define pgd_none(pgd) (!pgd_val(pgd)) #define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) @@ -377,7 +377,7 @@ static inline pud_t *pgd_page_vaddr(pgd_t pgd) return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); } -#endif /* CONFIG_ARM64_4_LEVELS */ +#endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -387,7 +387,7 @@ static inline pud_t *pgd_page_vaddr(pgd_t pgd) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) -#ifdef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) { @@ -396,7 +396,7 @@ static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) #endif /* Find an entry in the second-level page table.. */ -#ifndef CONFIG_ARM64_2_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 #define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) { diff --git a/arch/arm64/include/asm/tlb.h b/arch/arm64/include/asm/tlb.h index 49dc8f03362f..62731ef9749a 100644 --- a/arch/arm64/include/asm/tlb.h +++ b/arch/arm64/include/asm/tlb.h @@ -91,7 +91,7 @@ static inline void __pte_free_tlb(struct mmu_gather *tlb, pgtable_t pte, tlb_remove_page(tlb, pte); } -#ifndef CONFIG_ARM64_2_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, unsigned long addr) { @@ -100,7 +100,7 @@ static inline void __pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmdp, } #endif -#ifdef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pudp, unsigned long addr) { diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 847b99daad79..019f81d9f1d5 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -484,7 +484,7 @@ ENDPROC(__calc_phys_offset) * Returns: pud */ .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 -#ifdef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS == 4 add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. // to make room for pud add \pud, \pgd, #PAGE_SIZE // pgd points to pud which diff --git a/arch/arm64/mm/ioremap.c b/arch/arm64/mm/ioremap.c index d5e969e7b576..fa324bd5a5c4 100644 --- a/arch/arm64/mm/ioremap.c +++ b/arch/arm64/mm/ioremap.c @@ -104,10 +104,10 @@ void __iomem *ioremap_cache(phys_addr_t phys_addr, size_t size) EXPORT_SYMBOL(ioremap_cache); static pte_t bm_pte[PTRS_PER_PTE] __page_aligned_bss; -#ifndef CONFIG_ARM64_2_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 static pte_t bm_pmd[PTRS_PER_PMD] __page_aligned_bss; #endif -#ifdef CONFIG_ARM64_4_LEVELS +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 static pte_t bm_pud[PTRS_PER_PUD] __page_aligned_bss; #endif From 6b4fee241dd7c4b11cae4432bfa899a386d71f26 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 15 Jul 2014 16:35:38 +0100 Subject: [PATCH 38/55] arm64: Remove asm/pgtable-*level-hwdef.h files The macros in these files can easily be computed based on PAGE_SHIFT and VA_BITS, so just remove them and add the corresponding macros to asm/pgtable-hwdef.h Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/include/asm/pgtable-2level-hwdef.h | 43 ---------------- arch/arm64/include/asm/pgtable-3level-hwdef.h | 50 ------------------- arch/arm64/include/asm/pgtable-4level-hwdef.h | 50 ------------------- arch/arm64/include/asm/pgtable-hwdef.h | 42 +++++++++++++--- 4 files changed, 36 insertions(+), 149 deletions(-) delete mode 100644 arch/arm64/include/asm/pgtable-2level-hwdef.h delete mode 100644 arch/arm64/include/asm/pgtable-3level-hwdef.h delete mode 100644 arch/arm64/include/asm/pgtable-4level-hwdef.h diff --git a/arch/arm64/include/asm/pgtable-2level-hwdef.h b/arch/arm64/include/asm/pgtable-2level-hwdef.h deleted file mode 100644 index 2593b490c56a..000000000000 --- a/arch/arm64/include/asm/pgtable-2level-hwdef.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_PGTABLE_2LEVEL_HWDEF_H -#define __ASM_PGTABLE_2LEVEL_HWDEF_H - -/* - * With LPAE and 64KB pages, there are 2 levels of page tables. Each level has - * 8192 entries of 8 bytes each, occupying a 64KB page. Levels 0 and 1 are not - * used. The 2nd level table (PGD for Linux) can cover a range of 4TB, each - * entry representing 512MB. The user and kernel address spaces are limited to - * 4TB in the 64KB page configuration. - */ -#define PTRS_PER_PTE 8192 -#define PTRS_PER_PGD 8192 - -/* - * PGDIR_SHIFT determines the size a top-level page table entry can map. - */ -#define PGDIR_SHIFT 29 -#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -/* - * section address mask and size definitions. - */ -#define SECTION_SHIFT 29 -#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) -#define SECTION_MASK (~(SECTION_SIZE-1)) - -#endif diff --git a/arch/arm64/include/asm/pgtable-3level-hwdef.h b/arch/arm64/include/asm/pgtable-3level-hwdef.h deleted file mode 100644 index 3dbf941d7767..000000000000 --- a/arch/arm64/include/asm/pgtable-3level-hwdef.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_PGTABLE_3LEVEL_HWDEF_H -#define __ASM_PGTABLE_3LEVEL_HWDEF_H - -/* - * With LPAE and 4KB pages, there are 3 levels of page tables. Each level has - * 512 entries of 8 bytes each, occupying a 4K page. The first level table - * covers a range of 512GB, each entry representing 1GB. The user and kernel - * address spaces are limited to 512GB each. - */ -#define PTRS_PER_PTE 512 -#define PTRS_PER_PMD 512 -#define PTRS_PER_PGD 512 - -/* - * PGDIR_SHIFT determines the size a top-level page table entry can map. - */ -#define PGDIR_SHIFT 30 -#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -/* - * PMD_SHIFT determines the size a middle-level page table entry can map. - */ -#define PMD_SHIFT 21 -#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) - -/* - * section address mask and size definitions. - */ -#define SECTION_SHIFT 21 -#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) -#define SECTION_MASK (~(SECTION_SIZE-1)) - -#endif diff --git a/arch/arm64/include/asm/pgtable-4level-hwdef.h b/arch/arm64/include/asm/pgtable-4level-hwdef.h deleted file mode 100644 index 0ec84e2274a3..000000000000 --- a/arch/arm64/include/asm/pgtable-4level-hwdef.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_PGTABLE_4LEVEL_HWDEF_H -#define __ASM_PGTABLE_4LEVEL_HWDEF_H - -#define PTRS_PER_PTE 512 -#define PTRS_PER_PMD 512 -#define PTRS_PER_PUD 512 -#define PTRS_PER_PGD 512 - -/* - * PGDIR_SHIFT determines the size a top-level page table entry can map. - */ -#define PGDIR_SHIFT 39 -#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) -#define PGDIR_MASK (~(PGDIR_SIZE-1)) - -/* - * PUD_SHIFT determines the size the second level page table entry can map. - */ -#define PUD_SHIFT 30 -#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) -#define PUD_MASK (~(PUD_SIZE-1)) - -/* - * PMD_SHIFT determines the size the third level page table entry can map. - */ -#define PMD_SHIFT 21 -#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) - -/* - * section address mask and size definitions. - */ -#define SECTION_SHIFT 21 -#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) -#define SECTION_MASK (~(SECTION_SIZE-1)) - -#endif diff --git a/arch/arm64/include/asm/pgtable-hwdef.h b/arch/arm64/include/asm/pgtable-hwdef.h index d453e8bfef06..88174e0bfafe 100644 --- a/arch/arm64/include/asm/pgtable-hwdef.h +++ b/arch/arm64/include/asm/pgtable-hwdef.h @@ -16,14 +16,44 @@ #ifndef __ASM_PGTABLE_HWDEF_H #define __ASM_PGTABLE_HWDEF_H -#if CONFIG_ARM64_PGTABLE_LEVELS == 2 -#include -#elif CONFIG_ARM64_PGTABLE_LEVELS == 3 -#include -#else -#include +#define PTRS_PER_PTE (1 << (PAGE_SHIFT - 3)) + +/* + * PMD_SHIFT determines the size a level 2 page table entry can map. + */ +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#define PMD_SHIFT ((PAGE_SHIFT - 3) * 2 + 3) +#define PMD_SIZE (_AC(1, UL) << PMD_SHIFT) +#define PMD_MASK (~(PMD_SIZE-1)) +#define PTRS_PER_PMD PTRS_PER_PTE #endif +/* + * PUD_SHIFT determines the size a level 1 page table entry can map. + */ +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#define PUD_SHIFT ((PAGE_SHIFT - 3) * 3 + 3) +#define PUD_SIZE (_AC(1, UL) << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) +#define PTRS_PER_PUD PTRS_PER_PTE +#endif + +/* + * PGDIR_SHIFT determines the size a top-level page table entry can map + * (depending on the configuration, this level can be 0, 1 or 2). + */ +#define PGDIR_SHIFT ((PAGE_SHIFT - 3) * CONFIG_ARM64_PGTABLE_LEVELS + 3) +#define PGDIR_SIZE (_AC(1, UL) << PGDIR_SHIFT) +#define PGDIR_MASK (~(PGDIR_SIZE-1)) +#define PTRS_PER_PGD (1 << (VA_BITS - PGDIR_SHIFT)) + +/* + * Section address mask and size definitions. + */ +#define SECTION_SHIFT PMD_SHIFT +#define SECTION_SIZE (_AC(1, UL) << SECTION_SHIFT) +#define SECTION_MASK (~(SECTION_SIZE-1)) + /* * Hardware page table definitions. * From 0f1740252bea82c972e432f4d0fb76e974d53bd3 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 15 Jul 2014 16:35:38 +0100 Subject: [PATCH 39/55] arm64: Remove asm/pgtable-*level-types.h files The macros and typedefs in these files are already duplicated, so just use a single pgtable-types.h file with the corresponding #ifdefs. Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/include/asm/page.h | 8 +-- arch/arm64/include/asm/pgtable-2level-types.h | 62 ---------------- arch/arm64/include/asm/pgtable-3level-types.h | 68 ------------------ ...pgtable-4level-types.h => pgtable-types.h} | 72 ++++++++++++------- 4 files changed, 49 insertions(+), 161 deletions(-) delete mode 100644 arch/arm64/include/asm/pgtable-2level-types.h delete mode 100644 arch/arm64/include/asm/pgtable-3level-types.h rename arch/arm64/include/asm/{pgtable-4level-types.h => pgtable-types.h} (73%) diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index a998ff478777..2502754d1921 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -47,13 +47,7 @@ #ifndef __ASSEMBLY__ -#if CONFIG_ARM64_PGTABLE_LEVELS == 2 -#include -#elif CONFIG_ARM64_PGTABLE_LEVELS == 3 -#include -#else -#include -#endif +#include extern void __cpu_clear_user_page(void *p, unsigned long user); extern void __cpu_copy_user_page(void *to, const void *from, diff --git a/arch/arm64/include/asm/pgtable-2level-types.h b/arch/arm64/include/asm/pgtable-2level-types.h deleted file mode 100644 index 5f101e63dfc1..000000000000 --- a/arch/arm64/include/asm/pgtable-2level-types.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_PGTABLE_2LEVEL_TYPES_H -#define __ASM_PGTABLE_2LEVEL_TYPES_H - -#include - -typedef u64 pteval_t; -typedef u64 pgdval_t; -typedef pgdval_t pmdval_t; - -#undef STRICT_MM_TYPECHECKS - -#ifdef STRICT_MM_TYPECHECKS - -/* - * These are used to make use of C type-checking.. - */ -typedef struct { pteval_t pte; } pte_t; -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pteval_t pgprot; } pgprot_t; - -#define pte_val(x) ((x).pte) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ((pte_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#else /* !STRICT_MM_TYPECHECKS */ - -typedef pteval_t pte_t; -typedef pgdval_t pgd_t; -typedef pteval_t pgprot_t; - -#define pte_val(x) (x) -#define pgd_val(x) (x) -#define pgprot_val(x) (x) - -#define __pte(x) (x) -#define __pgd(x) (x) -#define __pgprot(x) (x) - -#endif /* STRICT_MM_TYPECHECKS */ - -#include - -#endif /* __ASM_PGTABLE_2LEVEL_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable-3level-types.h b/arch/arm64/include/asm/pgtable-3level-types.h deleted file mode 100644 index 4e94424938a4..000000000000 --- a/arch/arm64/include/asm/pgtable-3level-types.h +++ /dev/null @@ -1,68 +0,0 @@ -/* - * Copyright (C) 2012 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ -#ifndef __ASM_PGTABLE_3LEVEL_TYPES_H -#define __ASM_PGTABLE_3LEVEL_TYPES_H - -#include - -typedef u64 pteval_t; -typedef u64 pmdval_t; -typedef u64 pgdval_t; - -#undef STRICT_MM_TYPECHECKS - -#ifdef STRICT_MM_TYPECHECKS - -/* - * These are used to make use of C type-checking.. - */ -typedef struct { pteval_t pte; } pte_t; -typedef struct { pmdval_t pmd; } pmd_t; -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pteval_t pgprot; } pgprot_t; - -#define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) - -#define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) - -#else /* !STRICT_MM_TYPECHECKS */ - -typedef pteval_t pte_t; -typedef pmdval_t pmd_t; -typedef pgdval_t pgd_t; -typedef pteval_t pgprot_t; - -#define pte_val(x) (x) -#define pmd_val(x) (x) -#define pgd_val(x) (x) -#define pgprot_val(x) (x) - -#define __pte(x) (x) -#define __pmd(x) (x) -#define __pgd(x) (x) -#define __pgprot(x) (x) - -#endif /* STRICT_MM_TYPECHECKS */ - -#include - -#endif /* __ASM_PGTABLE_3LEVEL_TYPES_H */ diff --git a/arch/arm64/include/asm/pgtable-4level-types.h b/arch/arm64/include/asm/pgtable-types.h similarity index 73% rename from arch/arm64/include/asm/pgtable-4level-types.h rename to arch/arm64/include/asm/pgtable-types.h index 7ad8dd257ea1..ca9df80af896 100644 --- a/arch/arm64/include/asm/pgtable-4level-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -1,5 +1,10 @@ /* - * This program is free software; you can redistribute it and/or modify + * Page table types definitions. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Catalin Marinas + * + * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. * @@ -11,8 +16,9 @@ * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ -#ifndef __ASM_PGTABLE_4LEVEL_TYPES_H -#define __ASM_PGTABLE_4LEVEL_TYPES_H + +#ifndef __ASM_PGTABLE_TYPES_H +#define __ASM_PGTABLE_TYPES_H #include @@ -29,43 +35,61 @@ typedef u64 pgdval_t; * These are used to make use of C type-checking.. */ typedef struct { pteval_t pte; } pte_t; -typedef struct { pmdval_t pmd; } pmd_t; -typedef struct { pudval_t pud; } pud_t; -typedef struct { pgdval_t pgd; } pgd_t; -typedef struct { pteval_t pgprot; } pgprot_t; - #define pte_val(x) ((x).pte) -#define pmd_val(x) ((x).pmd) -#define pud_val(x) ((x).pud) -#define pgd_val(x) ((x).pgd) -#define pgprot_val(x) ((x).pgprot) - #define __pte(x) ((pte_t) { (x) } ) + +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +typedef struct { pmdval_t pmd; } pmd_t; +#define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) } ) +#endif + +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +typedef struct { pudval_t pud; } pud_t; +#define pud_val(x) ((x).pud) #define __pud(x) ((pud_t) { (x) } ) +#endif + +typedef struct { pgdval_t pgd; } pgd_t; +#define pgd_val(x) ((x).pgd) #define __pgd(x) ((pgd_t) { (x) } ) + +typedef struct { pteval_t pgprot; } pgprot_t; +#define pgprot_val(x) ((x).pgprot) #define __pgprot(x) ((pgprot_t) { (x) } ) #else /* !STRICT_MM_TYPECHECKS */ typedef pteval_t pte_t; -typedef pmdval_t pmd_t; -typedef pudval_t pud_t; -typedef pgdval_t pgd_t; -typedef pteval_t pgprot_t; - #define pte_val(x) (x) -#define pmd_val(x) (x) -#define pud_val(x) (x) -#define pgd_val(x) (x) -#define pgprot_val(x) (x) - #define __pte(x) (x) + +#if CONFIG_ARM64_PGTABLE_LEVELS > 2 +typedef pmdval_t pmd_t; +#define pmd_val(x) (x) #define __pmd(x) (x) +#endif + +#if CONFIG_ARM64_PGTABLE_LEVELS > 3 +typedef pudval_t pud_t; +#define pud_val(x) (x) #define __pud(x) (x) +#endif + +typedef pgdval_t pgd_t; +#define pgd_val(x) (x) #define __pgd(x) (x) + +typedef pteval_t pgprot_t; +#define pgprot_val(x) (x) #define __pgprot(x) (x) #endif /* STRICT_MM_TYPECHECKS */ -#endif /* __ASM_PGTABLE_4LEVEL_TYPES_H */ +#if CONFIG_ARM64_PGTABLE_LEVELS == 2 +#include +#elif CONFIG_ARM64_PGTABLE_LEVELS == 3 +#include +#endif + +#endif /* __ASM_PGTABLE_TYPES_H */ From b4a0d8b37797663b0e523c102dbb3e4b712a720c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 16 Jul 2014 12:10:33 +0100 Subject: [PATCH 40/55] arm64: Clean up the initial page table creation in head.S This patch adds a create_table_entry macro which is used to populate pgd and pud entries, also reducing the number of arguments for create_pgd_entry. Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/kernel/head.S | 59 ++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 32 deletions(-) diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index 019f81d9f1d5..a6db505411bc 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -476,43 +476,38 @@ ENDPROC(__calc_phys_offset) .quad PAGE_OFFSET /* - * Macro to populate the PUD for the corresponding block entry in the next - * level (tbl) for the given virtual address in case of 4 levels. + * Macro to create a table entry to the next page. * - * Preserves: pgd, virt - * Corrupts: tbl, tmp1, tmp2 - * Returns: pud + * tbl: page table address + * virt: virtual address + * shift: #imm page table shift + * ptrs: #imm pointers per table page + * + * Preserves: virt + * Corrupts: tmp1, tmp2 + * Returns: tbl -> next level table page address */ - .macro create_pud_entry, pgd, tbl, virt, pud, tmp1, tmp2 -#if CONFIG_ARM64_PGTABLE_LEVELS == 4 - add \tbl, \tbl, #PAGE_SIZE // bump tbl 1 page up. - // to make room for pud - add \pud, \pgd, #PAGE_SIZE // pgd points to pud which - // follows pgd - lsr \tmp1, \virt, #PUD_SHIFT - and \tmp1, \tmp1, #PTRS_PER_PUD - 1 // PUD index - orr \tmp2, \tbl, #3 // PUD entry table type - str \tmp2, [\pud, \tmp1, lsl #3] -#else - mov \pud, \tbl -#endif + .macro create_table_entry, tbl, virt, shift, ptrs, tmp1, tmp2 + lsr \tmp1, \virt, #\shift + and \tmp1, \tmp1, #\ptrs - 1 // table index + add \tmp2, \tbl, #PAGE_SIZE + orr \tmp2, \tmp2, #PMD_TYPE_TABLE // address of next table and entry type + str \tmp2, [\tbl, \tmp1, lsl #3] + add \tbl, \tbl, #PAGE_SIZE // next level table page .endm /* * Macro to populate the PGD (and possibily PUD) for the corresponding * block entry in the next level (tbl) for the given virtual address. * - * Preserves: pgd, virt - * Corrupts: tmp1, tmp2, tmp3 - * Returns: tbl -> page where block mappings can be placed - * (changed to make room for pud with 4 levels, preserved otherwise) + * Preserves: tbl, next, virt + * Corrupts: tmp1, tmp2 */ - .macro create_pgd_entry, pgd, tbl, virt, tmp1, tmp2, tmp3 - create_pud_entry \pgd, \tbl, \virt, \tmp3, \tmp1, \tmp2 - lsr \tmp1, \virt, #PGDIR_SHIFT - and \tmp1, \tmp1, #PTRS_PER_PGD - 1 // PGD index - orr \tmp2, \tmp3, #3 // PGD entry table type - str \tmp2, [\pgd, \tmp1, lsl #3] + .macro create_pgd_entry, tbl, virt, tmp1, tmp2 + create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 +#if CONFIG_ARM64_PGTABLE_LEVELS == 4 + create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 +#endif .endm /* @@ -573,10 +568,10 @@ __create_page_tables: /* * Create the identity mapping. */ - add x0, x25, #PAGE_SIZE // section table address + mov x0, x25 // idmap_pg_dir ldr x3, =KERNEL_START add x3, x3, x28 // __pa(KERNEL_START) - create_pgd_entry x25, x0, x3, x1, x5, x6 + create_pgd_entry x0, x3, x5, x6 ldr x6, =KERNEL_END mov x5, x3 // __pa(KERNEL_START) add x6, x6, x28 // __pa(KERNEL_END) @@ -585,9 +580,9 @@ __create_page_tables: /* * Map the kernel image (starting with PHYS_OFFSET). */ - add x0, x26, #PAGE_SIZE // section table address + mov x0, x26 // swapper_pg_dir mov x5, #PAGE_OFFSET - create_pgd_entry x26, x0, x5, x1, x3, x6 + create_pgd_entry x0, x5, x3, x6 ldr x6, =KERNEL_END mov x3, x24 // phys offset create_block_map x0, x7, x3, x5, x6 From 08375198b01001c0e43bdd580104b16b019a3754 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 16 Jul 2014 17:42:43 +0100 Subject: [PATCH 41/55] arm64: Determine the vmalloc/vmemmap space at build time based on VA_BITS Rather than guessing what the maximum vmmemap space should be, this patch allows the calculation based on the VA_BITS and sizeof(struct page). The vmalloc space extends to the beginning of the vmemmap space. Since the virtual kernel memory layout now depends on the build configuration, this patch removes the detailed description in Documentation/arm64/memory.txt in favour of information printed during kernel booting. Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- Documentation/arm64/memory.txt | 98 +++++--------------------------- arch/arm64/include/asm/pgtable.h | 13 +++-- arch/arm64/mm/init.c | 22 ++++--- 3 files changed, 38 insertions(+), 95 deletions(-) diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 4c720d698e8e..8845d0847a66 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -2,19 +2,18 @@ ============================== Author: Catalin Marinas -Date : 20 February 2012 This document describes the virtual memory layout used by the AArch64 Linux kernel. The architecture allows up to 4 levels of translation tables with a 4KB page size and up to 3 levels with a 64KB page size. -AArch64 Linux uses either 3 levels or 4 levels of translation tables with -the 4KB page configuration, allowing 39-bit (512GB) or 48-bit (256TB) -virtual addresses, respectively, for both user and kernel. With 64KB -pages, only 2 levels of translation tables, allowing 42-bit (4TB) +AArch64 Linux uses either 3 levels or 4 levels of translation tables +with the 4KB page configuration, allowing 39-bit (512GB) or 48-bit +(256TB) virtual addresses, respectively, for both user and kernel. With +64KB pages, only 2 levels of translation tables, allowing 42-bit (4TB) virtual address, are used but the memory layout is the same. -User addresses have bits 63:39 set to 0 while the kernel addresses have +User addresses have bits 63:48 set to 0 while the kernel addresses have the same bits set to 1. TTBRx selection is given by bit 63 of the virtual address. The swapper_pg_dir contains only kernel (global) mappings while the user pgd contains only user (non-global) mappings. @@ -27,26 +26,7 @@ AArch64 Linux memory layout with 4KB pages + 3 levels: Start End Size Use ----------------------------------------------------------------------- 0000000000000000 0000007fffffffff 512GB user - -ffffff8000000000 ffffffbbfffeffff ~240GB vmalloc - -ffffffbbffff0000 ffffffbbffffffff 64KB [guard page] - -ffffffbc00000000 ffffffbdffffffff 8GB vmemmap - -ffffffbe00000000 ffffffbffbbfffff ~8GB [guard, future vmmemap] - -ffffffbffa000000 ffffffbffaffffff 16MB PCI I/O space - -ffffffbffb000000 ffffffbffbbfffff 12MB [guard] - -ffffffbffbc00000 ffffffbffbdfffff 2MB fixed mappings - -ffffffbffbe00000 ffffffbffbffffff 2MB [guard] - -ffffffbffc000000 ffffffbfffffffff 64MB modules - -ffffffc000000000 ffffffffffffffff 256GB kernel logical memory map +ffffff8000000000 ffffffffffffffff 512GB kernel AArch64 Linux memory layout with 4KB pages + 4 levels: @@ -54,26 +34,7 @@ AArch64 Linux memory layout with 4KB pages + 4 levels: Start End Size Use ----------------------------------------------------------------------- 0000000000000000 0000ffffffffffff 256TB user - -ffff000000000000 ffff7bfffffeffff ~124TB vmalloc - -ffff7bffffff0000 ffff7bffffffffff 64KB [guard page] - -ffff7c0000000000 ffff7dffffffffff 2TB vmemmap - -ffff7e0000000000 ffff7ffffbbfffff ~2TB [guard, future vmmemap] - -ffff7ffffa000000 ffff7ffffaffffff 16MB PCI I/O space - -ffff7ffffb000000 ffff7ffffbbfffff 12MB [guard] - -ffff7ffffbc00000 ffff7ffffbdfffff 2MB fixed mappings - -ffff7ffffbe00000 ffff7ffffbffffff 2MB [guard] - -ffff7ffffc000000 ffff7fffffffffff 64MB modules - -ffff800000000000 ffffffffffffffff 128TB kernel logical memory map +ffff000000000000 ffffffffffffffff 256TB kernel AArch64 Linux memory layout with 64KB pages + 2 levels: @@ -81,44 +42,14 @@ AArch64 Linux memory layout with 64KB pages + 2 levels: Start End Size Use ----------------------------------------------------------------------- 0000000000000000 000003ffffffffff 4TB user - -fffffc0000000000 fffffdfbfffeffff ~2TB vmalloc - -fffffdfbffff0000 fffffdfbffffffff 64KB [guard page] - -fffffdfc00000000 fffffdfdffffffff 8GB vmemmap - -fffffdfe00000000 fffffdfffbbfffff ~8GB [guard, future vmmemap] - -fffffdfffa000000 fffffdfffaffffff 16MB PCI I/O space - -fffffdfffb000000 fffffdfffbbfffff 12MB [guard] - -fffffdfffbc00000 fffffdfffbdfffff 2MB fixed mappings - -fffffdfffbe00000 fffffdfffbffffff 2MB [guard] - -fffffdfffc000000 fffffdffffffffff 64MB modules - -fffffe0000000000 ffffffffffffffff 2TB kernel logical memory map +fffffc0000000000 ffffffffffffffff 4TB kernel -Translation table lookup with 4KB pages + 3 levels: - -+--------+--------+--------+--------+--------+--------+--------+--------+ -|63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| -+--------+--------+--------+--------+--------+--------+--------+--------+ - | | | | | | - | | | | | v - | | | | | [11:0] in-page offset - | | | | +-> [20:12] L3 index - | | | +-----------> [29:21] L2 index - | | +---------------------> [38:30] L1 index - | +-------------------------------> [47:39] L0 index (not used) - +-------------------------------------------------> [63] TTBR0/1 +For details of the virtual kernel memory layout please see the kernel +booting log. -Translation table lookup with 4KB pages + 4 levels: +Translation table lookup with 4KB pages: +--------+--------+--------+--------+--------+--------+--------+--------+ |63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| @@ -133,7 +64,7 @@ Translation table lookup with 4KB pages + 4 levels: +-------------------------------------------------> [63] TTBR0/1 -Translation table lookup with 64KB pages + 2 levels: +Translation table lookup with 64KB pages: +--------+--------+--------+--------+--------+--------+--------+--------+ |63 56|55 48|47 40|39 32|31 24|23 16|15 8|7 0| @@ -142,10 +73,11 @@ Translation table lookup with 64KB pages + 2 levels: | | | | v | | | | [15:0] in-page offset | | | +----------> [28:16] L3 index - | | +--------------------------> [41:29] L2 index (only 38:29 used) - | +-------------------------------> [47:42] L1 index (not used) + | | +--------------------------> [41:29] L2 index + | +-------------------------------> [47:42] L1 index +-------------------------------------------------> [63] TTBR0/1 + When using KVM, the hypervisor maps kernel pages in EL2, at a fixed offset from the kernel VA (top 24bits of the kernel VA set to zero): diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 9f862e6e9286..ec82789d03c3 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -33,13 +33,16 @@ /* * VMALLOC and SPARSEMEM_VMEMMAP ranges. + * + * VMEMAP_SIZE: allows the whole VA space to be covered by a struct page array + * (rounded up to PUD_SIZE). + * VMALLOC_START: beginning of the kernel VA space + * VMALLOC_END: extends to the available space below vmmemmap, PCI I/O space, + * fixed mappings and modules */ +#define VMEMMAP_SIZE ALIGN((1UL << (VA_BITS - PAGE_SHIFT)) * sizeof(struct page), PUD_SIZE) #define VMALLOC_START (UL(0xffffffffffffffff) << VA_BITS) -#if CONFIG_ARM64_PGTABLE_LEVELS != 4 -#define VMALLOC_END (PAGE_OFFSET - UL(0x400000000) - SZ_64K) -#else -#define VMALLOC_END (PAGE_OFFSET - UL(0x40000000000) - SZ_64K) -#endif +#define VMALLOC_END (PAGE_OFFSET - PUD_SIZE - VMEMMAP_SIZE - SZ_64K) #define vmemmap ((struct page *)(VMALLOC_END + SZ_64K)) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index 7f68804814a1..0b32504e280f 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -33,6 +33,7 @@ #include #include +#include #include #include #include @@ -256,26 +257,33 @@ void __init mem_init(void) #define MLK(b, t) b, t, ((t) - (b)) >> 10 #define MLM(b, t) b, t, ((t) - (b)) >> 20 +#define MLG(b, t) b, t, ((t) - (b)) >> 30 #define MLK_ROUNDUP(b, t) b, t, DIV_ROUND_UP(((t) - (b)), SZ_1K) pr_notice("Virtual kernel memory layout:\n" - " vmalloc : 0x%16lx - 0x%16lx (%6ld MB)\n" + " vmalloc : 0x%16lx - 0x%16lx (%6ld GB)\n" #ifdef CONFIG_SPARSEMEM_VMEMMAP - " vmemmap : 0x%16lx - 0x%16lx (%6ld MB)\n" + " vmemmap : 0x%16lx - 0x%16lx (%6ld GB maximum)\n" + " 0x%16lx - 0x%16lx (%6ld MB actual)\n" #endif + " PCI I/O : 0x%16lx - 0x%16lx (%6ld MB)\n" + " fixed : 0x%16lx - 0x%16lx (%6ld KB)\n" " modules : 0x%16lx - 0x%16lx (%6ld MB)\n" " memory : 0x%16lx - 0x%16lx (%6ld MB)\n" - " .init : 0x%p" " - 0x%p" " (%6ld kB)\n" - " .text : 0x%p" " - 0x%p" " (%6ld kB)\n" - " .data : 0x%p" " - 0x%p" " (%6ld kB)\n", - MLM(VMALLOC_START, VMALLOC_END), + " .init : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .text : 0x%p" " - 0x%p" " (%6ld KB)\n" + " .data : 0x%p" " - 0x%p" " (%6ld KB)\n", + MLG(VMALLOC_START, VMALLOC_END), #ifdef CONFIG_SPARSEMEM_VMEMMAP + MLG((unsigned long)vmemmap, + (unsigned long)vmemmap + VMEMMAP_SIZE), MLM((unsigned long)virt_to_page(PAGE_OFFSET), (unsigned long)virt_to_page(high_memory)), #endif + MLM((unsigned long)PCI_IOBASE, (unsigned long)PCI_IOBASE + SZ_16M), + MLK(FIXADDR_START, FIXADDR_TOP), MLM(MODULES_VADDR, MODULES_END), MLM(PAGE_OFFSET, (unsigned long)high_memory), - MLK_ROUNDUP(__init_begin, __init_end), MLK_ROUNDUP(_text, _etext), MLK_ROUNDUP(_sdata, _edata)); From 7078db46215f9137801a122f87ac31c504220a94 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 21 Jul 2014 14:52:49 +0100 Subject: [PATCH 42/55] arm64: asm/pgtable.h pmd/pud definitions clean-up Non-functional change to group together the pmd/pud definitions and reduce the amount of #if CONFIG_ARM64_PGTABLE_LEVELS. Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- arch/arm64/include/asm/pgtable.h | 57 ++++++++++++++++---------------- 1 file changed, 28 insertions(+), 29 deletions(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index ec82789d03c3..73968c2180aa 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -54,15 +54,6 @@ extern void __pmd_error(const char *file, int line, unsigned long val); extern void __pud_error(const char *file, int line, unsigned long val); extern void __pgd_error(const char *file, int line, unsigned long val); -#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 -#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) -#endif -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 -#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) -#endif -#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) - #ifdef CONFIG_SMP #define PROT_DEFAULT (PTE_TYPE_PAGE | PTE_AF | PTE_SHARED) #define PROT_SECT_DEFAULT (PMD_TYPE_SECT | PMD_SECT_AF | PMD_SECT_S) @@ -123,6 +114,8 @@ extern void __pgd_error(const char *file, int line, unsigned long val); extern struct page *empty_zero_page; #define ZERO_PAGE(vaddr) (empty_zero_page) +#define pte_ERROR(pte) __pte_error(__FILE__, __LINE__, pte_val(pte)) + #define pte_pfn(pte) ((pte_val(pte) & PHYS_MASK) >> PAGE_SHIFT) #define pfn_pte(pfn,prot) (__pte(((phys_addr_t)(pfn) << PAGE_SHIFT) | pgprot_val(prot))) @@ -130,6 +123,10 @@ extern struct page *empty_zero_page; #define pte_none(pte) (!pte_val(pte)) #define pte_clear(mm,addr,ptep) set_pte(ptep, __pte(0)) #define pte_page(pte) (pfn_to_page(pte_pfn(pte))) + +/* Find an entry in the third-level page table. */ +#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) + #define pte_offset_kernel(dir,addr) (pmd_page_vaddr(*(dir)) + pte_index(addr)) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) @@ -336,6 +333,8 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd) #if CONFIG_ARM64_PGTABLE_LEVELS > 2 +#define pmd_ERROR(pmd) __pmd_error(__FILE__, __LINE__, pmd_val(pmd)) + #define pud_none(pud) (!pud_val(pud)) #define pud_bad(pud) (!(pud_val(pud) & 2)) #define pud_present(pud) (pud_val(pud)) @@ -356,10 +355,20 @@ static inline pmd_t *pud_page_vaddr(pud_t pud) return __va(pud_val(pud) & PHYS_MASK & (s32)PAGE_MASK); } +/* Find an entry in the second-level page table. */ +#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) + +static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) +{ + return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); +} + #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 2 */ #if CONFIG_ARM64_PGTABLE_LEVELS > 3 +#define pud_ERROR(pud) __pud_error(__FILE__, __LINE__, pud_val(pud)) + #define pgd_none(pgd) (!pgd_val(pgd)) #define pgd_bad(pgd) (!(pgd_val(pgd) & 2)) #define pgd_present(pgd) (pgd_val(pgd)) @@ -380,8 +389,18 @@ static inline pud_t *pgd_page_vaddr(pgd_t pgd) return __va(pgd_val(pgd) & PHYS_MASK & (s32)PAGE_MASK); } +/* Find an entry in the frst-level page table. */ +#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) + +static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) +{ + return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); +} + #endif /* CONFIG_ARM64_PGTABLE_LEVELS > 3 */ +#define pgd_ERROR(pgd) __pgd_error(__FILE__, __LINE__, pgd_val(pgd)) + /* to find an entry in a page-table-directory */ #define pgd_index(addr) (((addr) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) @@ -390,26 +409,6 @@ static inline pud_t *pgd_page_vaddr(pgd_t pgd) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(addr) pgd_offset(&init_mm, addr) -#if CONFIG_ARM64_PGTABLE_LEVELS > 3 -#define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) -static inline pud_t *pud_offset(pgd_t *pgd, unsigned long addr) -{ - return (pud_t *)pgd_page_vaddr(*pgd) + pud_index(addr); -} -#endif - -/* Find an entry in the second-level page table.. */ -#if CONFIG_ARM64_PGTABLE_LEVELS > 2 -#define pmd_index(addr) (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1)) -static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr) -{ - return (pmd_t *)pud_page_vaddr(*pud) + pmd_index(addr); -} -#endif - -/* Find an entry in the third-level page table.. */ -#define pte_index(addr) (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) - static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { const pteval_t mask = PTE_USER | PTE_PXN | PTE_UXN | PTE_RDONLY | From 383c2799113b00a5f12c820ff0fd3dfca9e5be89 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 21 Jul 2014 15:54:50 +0100 Subject: [PATCH 43/55] arm64: Add support for 48-bit VA space with 64KB page configuration This patch allows support for 3 levels of page tables with 64KB page configuration allowing 48-bit VA space. The pgd is no longer a full PAGE_SIZE (PTRS_PER_PGD is 64) and (swapper|idmap)_pg_dir are not fully populated (pgd_alloc falls back to kzalloc). Signed-off-by: Catalin Marinas Tested-by: Jungseok Lee --- Documentation/arm64/memory.txt | 8 ++++++++ arch/arm64/Kconfig | 1 + arch/arm64/include/asm/page.h | 16 +++++++++------- arch/arm64/kernel/head.S | 6 ++++-- 4 files changed, 22 insertions(+), 9 deletions(-) diff --git a/Documentation/arm64/memory.txt b/Documentation/arm64/memory.txt index 8845d0847a66..344e85cc7323 100644 --- a/Documentation/arm64/memory.txt +++ b/Documentation/arm64/memory.txt @@ -45,6 +45,14 @@ Start End Size Use fffffc0000000000 ffffffffffffffff 4TB kernel +AArch64 Linux memory layout with 64KB pages + 3 levels: + +Start End Size Use +----------------------------------------------------------------------- +0000000000000000 0000ffffffffffff 256TB user +ffff000000000000 ffffffffffffffff 256TB kernel + + For details of the virtual kernel memory layout please see the kernel booting log. diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 8cdb7f36bfc8..f4615ba88a77 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -210,6 +210,7 @@ config ARM64_VA_BITS config ARM64_PGTABLE_LEVELS int default 2 if ARM64_64K_PAGES && ARM64_VA_BITS_42 + default 3 if ARM64_64K_PAGES && ARM64_VA_BITS_48 default 3 if ARM64_4K_PAGES && ARM64_VA_BITS_39 default 4 if ARM64_4K_PAGES && ARM64_VA_BITS_48 diff --git a/arch/arm64/include/asm/page.h b/arch/arm64/include/asm/page.h index 2502754d1921..7a3f462133b0 100644 --- a/arch/arm64/include/asm/page.h +++ b/arch/arm64/include/asm/page.h @@ -34,17 +34,19 @@ /* * The idmap and swapper page tables need some space reserved in the kernel * image. Both require pgd, pud (4 levels only) and pmd tables to (section) - * map the kernel. The swapper also maps the FDT (see __create_page_tables for - * more information). + * map the kernel. With the 64K page configuration, swapper and idmap need to + * map to pte level. The swapper also maps the FDT (see __create_page_tables + * for more information). */ -#if CONFIG_ARM64_PGTABLE_LEVELS == 4 -#define SWAPPER_DIR_SIZE (3 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (3 * PAGE_SIZE) +#ifdef CONFIG_ARM64_64K_PAGES +#define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS) #else -#define SWAPPER_DIR_SIZE (2 * PAGE_SIZE) -#define IDMAP_DIR_SIZE (2 * PAGE_SIZE) +#define SWAPPER_PGTABLE_LEVELS (CONFIG_ARM64_PGTABLE_LEVELS - 1) #endif +#define SWAPPER_DIR_SIZE (SWAPPER_PGTABLE_LEVELS * PAGE_SIZE) +#define IDMAP_DIR_SIZE (SWAPPER_DIR_SIZE) + #ifndef __ASSEMBLY__ #include diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index a6db505411bc..0bce493495e9 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -55,9 +55,11 @@ #ifdef CONFIG_ARM64_64K_PAGES #define BLOCK_SHIFT PAGE_SHIFT #define BLOCK_SIZE PAGE_SIZE +#define TABLE_SHIFT PMD_SHIFT #else #define BLOCK_SHIFT SECTION_SHIFT #define BLOCK_SIZE SECTION_SIZE +#define TABLE_SHIFT PUD_SHIFT #endif #define KERNEL_START KERNEL_RAM_VADDR @@ -505,8 +507,8 @@ ENDPROC(__calc_phys_offset) */ .macro create_pgd_entry, tbl, virt, tmp1, tmp2 create_table_entry \tbl, \virt, PGDIR_SHIFT, PTRS_PER_PGD, \tmp1, \tmp2 -#if CONFIG_ARM64_PGTABLE_LEVELS == 4 - create_table_entry \tbl, \virt, PUD_SHIFT, PTRS_PER_PUD, \tmp1, \tmp2 +#if SWAPPER_PGTABLE_LEVELS == 3 + create_table_entry \tbl, \virt, TABLE_SHIFT, PTRS_PER_PTE, \tmp1, \tmp2 #endif .endm From 7f0b1bf04511348995d6fce38c87c98a3b5cb781 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 9 Jun 2014 11:55:03 +0100 Subject: [PATCH 44/55] arm64: Fix barriers used for page table modifications The architecture specification states that both DSB and ISB are required between page table modifications and subsequent memory accesses using the corresponding virtual address. When TLB invalidation takes place, the tlb_flush_* functions already have the necessary barriers. However, there are other functions like create_mapping() for which this is not the case. The patch adds the DSB+ISB instructions in the set_pte() function for valid kernel mappings. The invalid pte case is handled by tlb_flush_* and the user mappings in general have a corresponding update_mmu_cache() call containing a DSB. Even when update_mmu_cache() isn't called, the kernel can still cope with an unlikely spurious page fault by re-executing the instruction. In addition, the set_pmd, set_pud() functions gain an ISB for architecture compliance when block mappings are created. Signed-off-by: Catalin Marinas Reported-by: Leif Lindholm Acked-by: Steve Capper Cc: Will Deacon Cc: --- arch/arm64/include/asm/cacheflush.h | 11 +---------- arch/arm64/include/asm/pgtable.h | 13 +++++++++++++ arch/arm64/include/asm/tlbflush.h | 5 +++-- 3 files changed, 17 insertions(+), 12 deletions(-) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index a5176cf32dad..f2defe1c380c 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -138,19 +138,10 @@ static inline void __flush_icache_all(void) #define flush_icache_page(vma,page) do { } while (0) /* - * flush_cache_vmap() is used when creating mappings (eg, via vmap, - * vmalloc, ioremap etc) in kernel space for pages. On non-VIPT - * caches, since the direct-mappings of these pages may contain cached - * data, we need to do a full cache flush to ensure that writebacks - * don't corrupt data placed into these pages via the new mappings. + * Not required on AArch64 (PIPT or VIPT non-aliasing D-cache). */ static inline void flush_cache_vmap(unsigned long start, unsigned long end) { - /* - * set_pte_at() called from vmap_pte_range() does not - * have a DSB after cleaning the cache line. - */ - dsb(ish); } static inline void flush_cache_vunmap(unsigned long start, unsigned long end) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 73968c2180aa..ffe1ba0506d1 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -146,6 +146,8 @@ extern struct page *empty_zero_page; #define pte_valid_user(pte) \ ((pte_val(pte) & (PTE_VALID | PTE_USER)) == (PTE_VALID | PTE_USER)) +#define pte_valid_not_user(pte) \ + ((pte_val(pte) & (PTE_VALID | PTE_USER)) == PTE_VALID) static inline pte_t pte_wrprotect(pte_t pte) { @@ -192,6 +194,15 @@ static inline pte_t pte_mkspecial(pte_t pte) static inline void set_pte(pte_t *ptep, pte_t pte) { *ptep = pte; + + /* + * Only if the new pte is valid and kernel, otherwise TLB maintenance + * or update_mmu_cache() have the necessary barriers. + */ + if (pte_valid_not_user(pte)) { + dsb(ishst); + isb(); + } } extern void __sync_icache_dcache(pte_t pteval, unsigned long addr); @@ -311,6 +322,7 @@ static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { *pmdp = pmd; dsb(ishst); + isb(); } static inline void pmd_clear(pmd_t *pmdp) @@ -343,6 +355,7 @@ static inline void set_pud(pud_t *pudp, pud_t pud) { *pudp = pud; dsb(ishst); + isb(); } static inline void pud_clear(pud_t *pudp) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index b9349c4513ea..3796ea6bb734 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -122,6 +122,7 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end for (addr = start; addr < end; addr += 1 << (PAGE_SHIFT - 12)) asm("tlbi vaae1is, %0" : : "r"(addr)); dsb(ish); + isb(); } /* @@ -131,8 +132,8 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { /* - * set_pte() does not have a DSB, so make sure that the page table - * write is visible. + * set_pte() does not have a DSB for user mappings, so make sure that + * the page table write is visible. */ dsb(ishst); } From 2a8f45b040bcb9b2ad2845f061499d1b6f41cc7b Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 24 Jul 2014 14:12:11 +0100 Subject: [PATCH 45/55] arm64: Do not invoke audit_syscall_* functions if !CONFIG_AUDIT_SYSCALL This is a temporary patch to be able to compile the kernel in linux-next where the audit_syscall_* API has been changed. To be reverted once the proper arm64 fix can be applied. Signed-off-by: Catalin Marinas --- arch/arm64/kernel/ptrace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 70526cfda056..0310811bd77d 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1115,15 +1115,19 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs) if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_enter(regs, regs->syscallno); +#ifdef CONFIG_AUDITSYSCALL audit_syscall_entry(syscall_get_arch(), regs->syscallno, regs->orig_x0, regs->regs[1], regs->regs[2], regs->regs[3]); +#endif return regs->syscallno; } asmlinkage void syscall_trace_exit(struct pt_regs *regs) { +#ifdef CONFIG_AUDITSYSCALL audit_syscall_exit(regs); +#endif if (test_thread_flag(TIF_SYSCALL_TRACEPOINT)) trace_sys_exit(regs, regs_return_value(regs)); From 7c2105fbe9658d6cee18751568e29579bb58bfec Mon Sep 17 00:00:00 2001 From: Andreas Schwab Date: Thu, 24 Jul 2014 17:03:26 +0100 Subject: [PATCH 46/55] arm64/crypto: fix makefile rule for aes-glue-%.o This fixes the following build failure when building with CONFIG_MODVERSIONS enabled: CC [M] arch/arm64/crypto/aes-glue-ce.o ld: cannot find arch/arm64/crypto/aes-glue-ce.o: No such file or directory make[1]: *** [arch/arm64/crypto/aes-ce-blk.o] Error 1 make: *** [arch/arm64/crypto] Error 2 The $(obj)/aes-glue-%.o rule only creates $(obj)/.tmp_aes-glue-ce.o, it should use if_changed_rule instead of if_changed_dep. Signed-off-by: Andreas Schwab [ardb: mention CONFIG_MODVERSIONS in commit log] Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- arch/arm64/crypto/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/crypto/Makefile b/arch/arm64/crypto/Makefile index 2070a56ecc46..a3f935fde975 100644 --- a/arch/arm64/crypto/Makefile +++ b/arch/arm64/crypto/Makefile @@ -35,4 +35,4 @@ AFLAGS_aes-neon.o := -DINTERLEAVE=4 CFLAGS_aes-glue-ce.o := -DUSE_V8_CRYPTO_EXTENSIONS $(obj)/aes-glue-%.o: $(src)/aes-glue.c FORCE - $(call if_changed_dep,cc_o_c) + $(call if_changed_rule,cc_o_c) From 05ac65305437e8ef63d2d19cac704138a2a05aa5 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Thu, 24 Jul 2014 15:56:15 +0100 Subject: [PATCH 47/55] arm64: fix soft lockup due to large tlb flush range Under certain loads, this soft lockup has been observed: BUG: soft lockup - CPU#2 stuck for 22s! [ip6tables:1016] Modules linked in: ip6t_rpfilter ip6t_REJECT cfg80211 rfkill xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_nat nf_conntrack_ipv6 nf_defrag_ipv6 nf_nat_ipv6 ip6table_mangle ip6table_security ip6table_raw ip6table_filter ip6_tables iptable_nat nf_conntrack_ipv4 nf_defrag_ipv4 nf_nat_ipv4 nf_nat nf_conntrack iptable_mangle iptable_security iptable_raw vfat fat efivarfs xfs libcrc32c CPU: 2 PID: 1016 Comm: ip6tables Not tainted 3.13.0-0.rc7.30.sa2.aarch64 #1 task: fffffe03e81d1400 ti: fffffe03f01f8000 task.ti: fffffe03f01f8000 PC is at __cpu_flush_kern_tlb_range+0xc/0x40 LR is at __purge_vmap_area_lazy+0x28c/0x3ac pc : [] lr : [] pstate: 80000145 sp : fffffe03f01fbb70 x29: fffffe03f01fbb70 x28: fffffe03f01f8000 x27: fffffe0000b19000 x26: 00000000000000d0 x25: 000000000000001c x24: fffffe03f01fbc50 x23: fffffe03f01fbc58 x22: fffffe03f01fbc10 x21: fffffe0000b2a3f8 x20: 0000000000000802 x19: fffffe0000b2a3c8 x18: 000003fffdf52710 x17: 000003ff9d8bb910 x16: fffffe000050fbfc x15: 0000000000005735 x14: 000003ff9d7e1a5c x13: 0000000000000000 x12: 000003ff9d7e1a5c x11: 0000000000000007 x10: fffffe0000c09af0 x9 : fffffe0000ad1000 x8 : 000000000000005c x7 : fffffe03e8624000 x6 : 0000000000000000 x5 : 0000000000000000 x4 : 0000000000000000 x3 : fffffe0000c09cc8 x2 : 0000000000000000 x1 : 000fffffdfffca80 x0 : 000fffffcd742150 The __cpu_flush_kern_tlb_range() function looks like: ENTRY(__cpu_flush_kern_tlb_range) dsb sy lsr x0, x0, #12 lsr x1, x1, #12 1: tlbi vaae1is, x0 add x0, x0, #1 cmp x0, x1 b.lo 1b dsb sy isb ret ENDPROC(__cpu_flush_kern_tlb_range) The above soft lockup shows the PC at tlbi insn with: x0 = 0x000fffffcd742150 x1 = 0x000fffffdfffca80 So __cpu_flush_kern_tlb_range has 0x128ba930 tlbi flushes left after it has already been looping for 23 seconds!. Looking up one frame at __purge_vmap_area_lazy(), there is: ... list_for_each_entry_rcu(va, &vmap_area_list, list) { if (va->flags & VM_LAZY_FREE) { if (va->va_start < *start) *start = va->va_start; if (va->va_end > *end) *end = va->va_end; nr += (va->va_end - va->va_start) >> PAGE_SHIFT; list_add_tail(&va->purge_list, &valist); va->flags |= VM_LAZY_FREEING; va->flags &= ~VM_LAZY_FREE; } } ... if (nr || force_flush) flush_tlb_kernel_range(*start, *end); So if two areas are being freed, the range passed to flush_tlb_kernel_range() may be as large as the vmalloc space. For arm64, this is ~240GB for 4k pagesize and ~2TB for 64kpage size. This patch works around this problem by adding a loop limit. If the range is larger than the limit, use flush_tlb_all() rather than flushing based on individual pages. The limit chosen is arbitrary as the TLB size is implementation specific and not accessible in an architected way. The aim of the arbitrary limit is to avoid soft lockup. Signed-off-by: Mark Salter [catalin.marinas@arm.com: commit log update] [catalin.marinas@arm.com: marginal optimisation] [catalin.marinas@arm.com: changed to MAX_TLB_RANGE and added comment] Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/tlbflush.h | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/tlbflush.h b/arch/arm64/include/asm/tlbflush.h index 3796ea6bb734..73f0ce570fb3 100644 --- a/arch/arm64/include/asm/tlbflush.h +++ b/arch/arm64/include/asm/tlbflush.h @@ -98,8 +98,8 @@ static inline void flush_tlb_page(struct vm_area_struct *vma, dsb(ish); } -static inline void flush_tlb_range(struct vm_area_struct *vma, - unsigned long start, unsigned long end) +static inline void __flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) { unsigned long asid = (unsigned long)ASID(vma->vm_mm) << 48; unsigned long addr; @@ -112,7 +112,7 @@ static inline void flush_tlb_range(struct vm_area_struct *vma, dsb(ish); } -static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +static inline void __flush_tlb_kernel_range(unsigned long start, unsigned long end) { unsigned long addr; start >>= 12; @@ -125,6 +125,29 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end isb(); } +/* + * This is meant to avoid soft lock-ups on large TLB flushing ranges and not + * necessarily a performance improvement. + */ +#define MAX_TLB_RANGE (1024UL << PAGE_SHIFT) + +static inline void flush_tlb_range(struct vm_area_struct *vma, + unsigned long start, unsigned long end) +{ + if ((end - start) <= MAX_TLB_RANGE) + __flush_tlb_range(vma, start, end); + else + flush_tlb_mm(vma->vm_mm); +} + +static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) +{ + if ((end - start) <= MAX_TLB_RANGE) + __flush_tlb_kernel_range(start, end); + else + flush_tlb_all(); +} + /* * On AArch64, the cache coherency is handled via the set_pte_at() function. */ From 72c5839515260dce966cd24f54436e6583288e6c Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Thu, 24 Jul 2014 14:14:42 +0100 Subject: [PATCH 48/55] arm64: gicv3: Allow GICv3 compilation with older binutils GICv3 introduces new system registers accessible with the full msr/mrs syntax (e.g. mrs x0, Sop0_op1_CRm_CRn_op2). However, only recent binutils understand the new syntax. This patch introduces msr_s/mrs_s assembly macros which generate the equivalent instructions above and converts the existing GICv3 code (both drivers/irqchip/ and arch/arm64/kernel/). Signed-off-by: Catalin Marinas Reported-by: Olof Johansson Tested-by: Olof Johansson Suggested-by: Mark Rutland Acked-by: Mark Rutland Acked-by: Jason Cooper Cc: Will Deacon Cc: Marc Zyngier --- arch/arm64/include/asm/sysreg.h | 60 ++++++++++++++++++++++++++++++ arch/arm64/kernel/head.S | 6 +-- drivers/irqchip/irq-gic-v3.c | 16 ++++---- include/linux/irqchip/arm-gic-v3.h | 42 +++++++++++---------- 4 files changed, 93 insertions(+), 31 deletions(-) create mode 100644 arch/arm64/include/asm/sysreg.h diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h new file mode 100644 index 000000000000..5c89df0acbcb --- /dev/null +++ b/arch/arm64/include/asm/sysreg.h @@ -0,0 +1,60 @@ +/* + * Macros for accessing system registers with older binutils. + * + * Copyright (C) 2014 ARM Ltd. + * Author: Catalin Marinas + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __ASM_SYSREG_H +#define __ASM_SYSREG_H + +#define sys_reg(op0, op1, crn, crm, op2) \ + ((((op0)-2)<<19)|((op1)<<16)|((crn)<<12)|((crm)<<8)|((op2)<<5)) + +#ifdef __ASSEMBLY__ + + .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30 + .equ __reg_num_x\num, \num + .endr + .equ __reg_num_xzr, 31 + + .macro mrs_s, rt, sreg + .inst 0xd5300000|(\sreg)|(__reg_num_\rt) + .endm + + .macro msr_s, sreg, rt + .inst 0xd5100000|(\sreg)|(__reg_num_\rt) + .endm + +#else + +asm( +" .irp num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30\n" +" .equ __reg_num_x\\num, \\num\n" +" .endr\n" +" .equ __reg_num_xzr, 31\n" +"\n" +" .macro mrs_s, rt, sreg\n" +" .inst 0xd5300000|(\\sreg)|(__reg_num_\\rt)\n" +" .endm\n" +"\n" +" .macro msr_s, sreg, rt\n" +" .inst 0xd5100000|(\\sreg)|(__reg_num_\\rt)\n" +" .endm\n" +); + +#endif + +#endif /* __ASM_SYSREG_H */ diff --git a/arch/arm64/kernel/head.S b/arch/arm64/kernel/head.S index c99e3a879ebc..144f10567f82 100644 --- a/arch/arm64/kernel/head.S +++ b/arch/arm64/kernel/head.S @@ -297,12 +297,12 @@ CPU_LE( bic x0, x0, #(3 << 24) ) // Clear the EE and E0E bits for EL1 cmp x0, #1 b.ne 3f - mrs x0, ICC_SRE_EL2 + mrs_s x0, ICC_SRE_EL2 orr x0, x0, #ICC_SRE_EL2_SRE // Set ICC_SRE_EL2.SRE==1 orr x0, x0, #ICC_SRE_EL2_ENABLE // Set ICC_SRE_EL2.Enable==1 - msr ICC_SRE_EL2, x0 + msr_s ICC_SRE_EL2, x0 isb // Make sure SRE is now set - msr ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults + msr_s ICH_HCR_EL2, xzr // Reset ICC_HCR_EL2 to defaults 3: #endif diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c index 81519bae0453..57eaa5a0b1e3 100644 --- a/drivers/irqchip/irq-gic-v3.c +++ b/drivers/irqchip/irq-gic-v3.c @@ -108,39 +108,39 @@ static u64 gic_read_iar(void) { u64 irqstat; - asm volatile("mrs %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); + asm volatile("mrs_s %0, " __stringify(ICC_IAR1_EL1) : "=r" (irqstat)); return irqstat; } static void gic_write_pmr(u64 val) { - asm volatile("msr " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val)); + asm volatile("msr_s " __stringify(ICC_PMR_EL1) ", %0" : : "r" (val)); } static void gic_write_ctlr(u64 val) { - asm volatile("msr " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val)); + asm volatile("msr_s " __stringify(ICC_CTLR_EL1) ", %0" : : "r" (val)); isb(); } static void gic_write_grpen1(u64 val) { - asm volatile("msr " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val)); + asm volatile("msr_s " __stringify(ICC_GRPEN1_EL1) ", %0" : : "r" (val)); isb(); } static void gic_write_sgi1r(u64 val) { - asm volatile("msr " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val)); + asm volatile("msr_s " __stringify(ICC_SGI1R_EL1) ", %0" : : "r" (val)); } static void gic_enable_sre(void) { u64 val; - asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); val |= ICC_SRE_EL1_SRE; - asm volatile("msr " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val)); + asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val)); isb(); /* @@ -150,7 +150,7 @@ static void gic_enable_sre(void) * * Kindly inform the luser. */ - asm volatile("mrs %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); if (!(val & ICC_SRE_EL1_SRE)) pr_err("GIC: unable to set SRE (disabled at EL2), panic ahead\n"); } diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 30cb7556d43f..03a4ea37ba86 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -18,6 +18,8 @@ #ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H #define __LINUX_IRQCHIP_ARM_GIC_V3_H +#include + /* * Distributor registers. We assume we're running non-secure, with ARE * being set. Secure-only and non-ARE registers are not described. @@ -125,17 +127,17 @@ #define ICH_VMCR_PMR_SHIFT 24 #define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) -#define ICC_EOIR1_EL1 S3_0_C12_C12_1 -#define ICC_IAR1_EL1 S3_0_C12_C12_0 -#define ICC_SGI1R_EL1 S3_0_C12_C11_5 -#define ICC_PMR_EL1 S3_0_C4_C6_0 -#define ICC_CTLR_EL1 S3_0_C12_C12_4 -#define ICC_SRE_EL1 S3_0_C12_C12_5 -#define ICC_GRPEN1_EL1 S3_0_C12_C12_7 +#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) +#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) +#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) +#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) +#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) #define ICC_IAR1_EL1_SPURIOUS 0x3ff -#define ICC_SRE_EL2 S3_4_C12_C9_5 +#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) #define ICC_SRE_EL2_SRE (1 << 0) #define ICC_SRE_EL2_ENABLE (1 << 3) @@ -143,16 +145,16 @@ /* * System register definitions */ -#define ICH_VSEIR_EL2 S3_4_C12_C9_4 -#define ICH_HCR_EL2 S3_4_C12_C11_0 -#define ICH_VTR_EL2 S3_4_C12_C11_1 -#define ICH_MISR_EL2 S3_4_C12_C11_2 -#define ICH_EISR_EL2 S3_4_C12_C11_3 -#define ICH_ELSR_EL2 S3_4_C12_C11_5 -#define ICH_VMCR_EL2 S3_4_C12_C11_7 +#define ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) +#define ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) +#define ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) +#define ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) +#define ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) +#define ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) +#define ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) -#define __LR0_EL2(x) S3_4_C12_C12_ ## x -#define __LR8_EL2(x) S3_4_C12_C13_ ## x +#define __LR0_EL2(x) sys_reg(3, 4, 12, 12, x) +#define __LR8_EL2(x) sys_reg(3, 4, 12, 13, x) #define ICH_LR0_EL2 __LR0_EL2(0) #define ICH_LR1_EL2 __LR0_EL2(1) @@ -171,13 +173,13 @@ #define ICH_LR14_EL2 __LR8_EL2(6) #define ICH_LR15_EL2 __LR8_EL2(7) -#define __AP0Rx_EL2(x) S3_4_C12_C8_ ## x +#define __AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) #define ICH_AP0R0_EL2 __AP0Rx_EL2(0) #define ICH_AP0R1_EL2 __AP0Rx_EL2(1) #define ICH_AP0R2_EL2 __AP0Rx_EL2(2) #define ICH_AP0R3_EL2 __AP0Rx_EL2(3) -#define __AP1Rx_EL2(x) S3_4_C12_C9_ ## x +#define __AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) #define ICH_AP1R0_EL2 __AP1Rx_EL2(0) #define ICH_AP1R1_EL2 __AP1Rx_EL2(1) #define ICH_AP1R2_EL2 __AP1Rx_EL2(2) @@ -189,7 +191,7 @@ static inline void gic_write_eoir(u64 irq) { - asm volatile("msr " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq)); + asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq)); isb(); } From af9b99647cd2a6a2b8a4a1c9ba6193dd375218b5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 29 Jul 2014 16:20:02 +0100 Subject: [PATCH 49/55] arm64: defconfig: add virtio support for running as a kvm guest When running as a kvm guest on a para-virtualised platform, it is useful to have virtio implementations of console, 9pfs and network. This adds these options to the arm64 defconfig, so we can easily run a defconfig kernel build as both host and as a kvm guest. Signed-off-by: Will Deacon --- arch/arm64/configs/defconfig | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 3421f316f5dc..c1071268c912 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -52,6 +52,8 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_INET_LRO is not set # CONFIG_IPV6 is not set # CONFIG_WIRELESS is not set +CONFIG_NET_9P=y +CONFIG_NET_9P_VIRTIO=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y CONFIG_DMA_CMA=y @@ -65,6 +67,7 @@ CONFIG_PATA_PLATFORM=y CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y CONFIG_TUN=y +CONFIG_VIRTIO_NET=y CONFIG_SMC91X=y CONFIG_SMSC911X=y # CONFIG_WLAN is not set @@ -76,6 +79,7 @@ CONFIG_SERIAL_8250_CONSOLE=y CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y CONFIG_SERIAL_OF_PLATFORM=y +CONFIG_VIRTIO_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set CONFIG_REGULATOR=y @@ -90,6 +94,7 @@ CONFIG_USB_ISP1760_HCD=y CONFIG_USB_STORAGE=y CONFIG_MMC=y CONFIG_MMC_ARMMMCI=y +CONFIG_VIRTIO_BALLOON=y CONFIG_VIRTIO_MMIO=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y @@ -107,6 +112,7 @@ CONFIG_HUGETLBFS=y # CONFIG_MISC_FILESYSTEMS is not set CONFIG_NFS_FS=y CONFIG_ROOT_NFS=y +CONFIG_9P_FS=y CONFIG_NLS_CODEPAGE_437=y CONFIG_NLS_ISO8859_1=y CONFIG_VIRTUALIZATION=y From 1915e2ad1cf548217c963121e4076b3d44dd0169 Mon Sep 17 00:00:00 2001 From: Arun Chandran Date: Thu, 26 Jun 2014 15:16:03 +0530 Subject: [PATCH 50/55] arm64: vdso: fix build error when switching from LE to BE Building a kernel with CPU_BIG_ENDIAN fails if there are stale objects from a !CPU_BIG_ENDIAN build. Due to a missing FORCE prerequisite on an if_changed rule in the VDSO Makefile, we attempt to link a stale LE object into the new BE kernel. According to Documentation/kbuild/makefiles.txt, FORCE is required for if_changed rules and forgetting it is a common mistake, so fix it by 'Forcing' the build of vdso. This patch fixes build errors like these: arch/arm64/kernel/vdso/note.o: compiled for a little endian system and target is big endian failed to merge target specific data of file arch/arm64/kernel/vdso/note.o arch/arm64/kernel/vdso/sigreturn.o: compiled for a little endian system and target is big endian failed to merge target specific data of file arch/arm64/kernel/vdso/sigreturn.o Tested-by: Mark Rutland Signed-off-by: Arun Chandran Signed-off-by: Will Deacon --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index 84b942612051..ff3bddea482d 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -43,7 +43,7 @@ $(obj)/vdso-offsets.h: $(obj)/vdso.so.dbg FORCE $(call if_changed,vdsosym) # Assembly rules for the .S files -$(obj-vdso): %.o: %.S +$(obj-vdso): %.o: %.S FORCE $(call if_changed_dep,vdsoas) # Actual build commands From 3666f88010d71e752ad677ec64edf148366afb7a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 30 Jul 2014 16:06:34 +0100 Subject: [PATCH 51/55] arm64: defconfig: enable devtmpfs mount option Matching x86 and making it more convenient to run the arm64 default kernel as distros like Ubuntu need this option. Signed-off-by: Robert Richter Signed-off-by: Will Deacon --- arch/arm64/configs/defconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index c1071268c912..1e52b741d806 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -56,6 +56,7 @@ CONFIG_NET_9P=y CONFIG_NET_9P_VIRTIO=y CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y +CONFIG_DEVTMPFS_MOUNT=y CONFIG_DMA_CMA=y CONFIG_BLK_DEV_LOOP=y CONFIG_VIRTIO_BLK=y From c878e0cff5c5e56b216951cbe75f7a3dd500a736 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 31 Jul 2014 11:36:08 +0100 Subject: [PATCH 52/55] arm64: don't call break hooks for BRK exceptions from EL0 Our break hooks are used to handle brk exceptions from kgdb (and potentially kprobes if that code ever resurfaces), so don't bother calling them if the BRK exception comes from userspace. This prevents userspace from trapping to a kdb shell on systems where kgdb is enabled and active. Cc: Reported-by: Omar Sandoval Signed-off-by: Will Deacon --- arch/arm64/kernel/debug-monitors.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index a7fb874b595e..fe5b94078d82 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -315,20 +315,20 @@ static int brk_handler(unsigned long addr, unsigned int esr, { siginfo_t info; - if (call_break_hook(regs, esr) == DBG_HOOK_HANDLED) - return 0; + if (user_mode(regs)) { + info = (siginfo_t) { + .si_signo = SIGTRAP, + .si_errno = 0, + .si_code = TRAP_BRKPT, + .si_addr = (void __user *)instruction_pointer(regs), + }; - if (!user_mode(regs)) + force_sig_info(SIGTRAP, &info, current); + } else if (call_break_hook(regs, esr) != DBG_HOOK_HANDLED) { + pr_warning("Unexpected kernel BRK exception at EL1\n"); return -EFAULT; + } - info = (siginfo_t) { - .si_signo = SIGTRAP, - .si_errno = 0, - .si_code = TRAP_BRKPT, - .si_addr = (void __user *)instruction_pointer(regs), - }; - - force_sig_info(SIGTRAP, &info, current); return 0; } From e4aa297a490e16c44dc464361daab145a7c451c5 Mon Sep 17 00:00:00 2001 From: "byungchul.park" Date: Thu, 31 Jul 2014 11:05:36 +0100 Subject: [PATCH 53/55] arm64: fpsimd: fix a typo in fpsimd_save_partial_state ENDPROC Commit 190f1ca85d07 ("arm64: add support for kernel mode NEON in interrupt context") introduced a typing error in fpsimd_save_partial_state ENDPROC. This patch fixes the typing error. Acked-by: Ard Biesheuvel Signed-off-by: byungchul.park Signed-off-by: Will Deacon --- arch/arm64/kernel/entry-fpsimd.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index d358ccacfc00..c44a82f146b1 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -52,7 +52,7 @@ ENDPROC(fpsimd_load_state) ENTRY(fpsimd_save_partial_state) fpsimd_save_partial x0, 1, 8, 9 ret -ENDPROC(fpsimd_load_partial_state) +ENDPROC(fpsimd_save_partial_state) /* * Load the bottom n FP registers. From 94156675847c14a9b16e91b035da32e35e98ef79 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 31 Jul 2014 14:00:03 +0100 Subject: [PATCH 54/55] Revert "arm64: dmi: Add SMBIOS/DMI support" This reverts commit a28e3f4b90543f7c249a956e3ca518e243a04618. Ard and Yi Li report that this patch is broken by design, so revert it and let them sort it out for 3.18 instead. Reported-by: Ard Biesheuvel Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 11 ---------- arch/arm64/include/asm/dmi.h | 41 ------------------------------------ arch/arm64/kernel/setup.c | 2 -- 3 files changed, 54 deletions(-) delete mode 100644 arch/arm64/include/asm/dmi.h diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index bfd60fa9d6d5..4fb4652495dd 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -360,17 +360,6 @@ config EFI allow the kernel to be booted as an EFI application. This is only useful on systems that have UEFI firmware. -config DMI - bool "Enable support for SMBIOS (DMI) tables" - depends on EFI - default y - help - This enables SMBIOS/DMI feature for systems. - - This option is only useful on systems that have UEFI firmware. - However, even with this option, the resultant kernel should - continue to boot on existing non-UEFI platforms. - endmenu menu "Userspace binary formats" diff --git a/arch/arm64/include/asm/dmi.h b/arch/arm64/include/asm/dmi.h deleted file mode 100644 index b0882a8620e1..000000000000 --- a/arch/arm64/include/asm/dmi.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * arch/arm64/include/asm/dmi.h - * - * Copyright (C) 2013 Linaro Limited. - * Written by: Yi Li (yi.li@linaro.org) - * - * based on arch/ia64/include/asm/dmi.h - * - * This file is subject to the terms and conditions of the GNU General Public - * License. See the file "COPYING" in the main directory of this archive - * for more details. - */ - -#ifndef __ASM_DMI_H -#define __ASM_DMI_H - -#include -#include - -static inline void __iomem *dmi_remap(u64 phys, u64 size) -{ - void __iomem *p = efi_lookup_mapped_addr(phys); - - /* - * If the mapping spans multiple pages, do a minimal check to ensure - * that the mapping returned by efi_lookup_mapped_addr() covers the - * whole requested range (but ignore potential holes) - */ - if ((phys & ~PAGE_MASK) + size > PAGE_SIZE - && (p + size - 1) != efi_lookup_mapped_addr(phys + size - 1)) - return NULL; - return p; -} - -/* Reuse existing UEFI mappings for DMI */ -#define dmi_alloc(l) kzalloc(l, GFP_KERNEL) -#define dmi_early_remap(x, l) dmi_remap(x, l) -#define dmi_early_unmap(x, l) -#define dmi_unmap(x) - -#endif diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 35339a0e1592..f6f0ccf35ae6 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -43,7 +43,6 @@ #include #include #include -#include #include #include @@ -414,7 +413,6 @@ void __init setup_arch(char **cmdline_p) static int __init arm64_device_init(void) { of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL); - dmi_scan_machine(); return 0; } arch_initcall_sync(arm64_device_init); From ea1719672f59eeb85829073b567495c4f472ac9f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 1 Aug 2014 10:23:20 +0100 Subject: [PATCH 55/55] arm64: add newline to I-cache policy string Due to a missing newline in the I-cache policy detection log output, it's possible to get some ratehr unfortunate output at boot time: CPU1: Booted secondary processor Detected VIPT I-cache on CPU1CPU2: Booted secondary processor Detected VIPT I-cache on CPU2CPU3: Booted secondary processor Detected VIPT I-cache on CPU3CPU4: Booted secondary processor Detected PIPT I-cache on CPU4CPU5: Booted secondary processor Detected PIPT I-cache on CPU5Brought up 6 CPUs SMP: Total of 6 processors activated. This patch adds the missing newline to the format string, cleaning up the output. Fixes: 59ccc0d41b7a ("arm64: cachetype: report weakest cache policy") Signed-off-by: Mark Rutland Signed-off-by: Will Deacon --- arch/arm64/kernel/cpuinfo.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/cpuinfo.c b/arch/arm64/kernel/cpuinfo.c index f82f7d1c468e..f798f66634af 100644 --- a/arch/arm64/kernel/cpuinfo.c +++ b/arch/arm64/kernel/cpuinfo.c @@ -52,7 +52,7 @@ static void cpuinfo_detect_icache_policy(struct cpuinfo_arm64 *info) if (l1ip == ICACHE_POLICY_AIVIVT); set_bit(ICACHEF_AIVIVT, &__icache_flags); - pr_info("Detected %s I-cache on CPU%d", icache_policy_str[l1ip], cpu); + pr_info("Detected %s I-cache on CPU%d\n", icache_policy_str[l1ip], cpu); } static int check_reg_mask(char *name, u64 mask, u64 boot, u64 cur, int cpu)