1
0
Fork 0

arm64 fixes:

- Set the minimum GCC version to 5.1 for arm64 due to earlier compiler
   bugs.
 
 - Make atomic helpers __always_inline to avoid a section mismatch when
   compiling with clang.
 
 - Fix the CMA and crashkernel reservations to use ZONE_DMA (remove the
   arm64_dma32_phys_limit variable, no longer needed with a dynamic
   ZONE_DMA sizing in 5.11).
 
 - Remove redundant IRQ flag tracing that was leaving lockdep
   inconsistent with the hardware state.
 
 - Revert perf events based hard lockup detector that was causing
   smp_processor_id() to be called in preemptible context.
 
 - Some trivial cleanups - spelling fix, renaming S_FRAME_SIZE to
   PT_REGS_SIZE, function prototypes added.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEE5RElWfyWxS+3PLO2a9axLQDIXvEFAmAB2zoACgkQa9axLQDI
 XvE2vA//Vjh7bKMlNtocP7oA0/FVA7i9tKgNxDYmxjYvl6qDg26V7aDMelIi9H6l
 14k+Wbf2Eqkav3+aAGEwdXuaoYPGrZIfVkPf+BbuviluoGsjwaGak0pc29ofDjE4
 zaznZNXwO+joEqrtEZeTQO8fSagAupbgqf3ls/pRjBGVL6XEajhPqA+ccgQB71DI
 O+L0Z1tzQDurABp4mwHGFRbOTMdN59OhxfyHijO3yu+RGLQcO3C29AwuMkzJuYuA
 Fjng+VNS4mrKT8bsP0fpJa/oNekJ4jc/2OQaLxN+re8J+o6/EG6QGKdUL4VlEllk
 eK0chdC/ZD1e6R9MSV0ZL1diYedi0vn+F9mGxwNiSKtWzw0KqPEy7liP0KWQGVyF
 NALShoGkKMglYj2fmOrZhs7E4vAQGPlk7hROssDTM1RSu/7JpBwEJRb9QsOM4p4T
 HgcCoF4smnTCmbyVkcMYgZxMrJ5YOjchTu8uvUwHy6D//ZMQmDE2m3u9Svziu+y3
 Nk8VpIp0HNDZyA7ZTeOrmo2jSEOoK3tDVKiqorPSmZd5mp35BMCr1q/Rcu6uaywr
 4ym/CfmvQIqObzQOYbBze6QZs4DLqERP1p0WgEnWBE8W2rP4UcGaNdegbEsMkrfq
 CiCGcNyfpiJNZSc8VF1/1OFY/yABcWem1pVM7F254zA5G2wZX1U=
 =W9th
 -----END PGP SIGNATURE-----

Merge tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux

Pull arm64 fixes from Catalin Marinas:

 - Set the minimum GCC version to 5.1 for arm64 due to earlier compiler
   bugs.

 - Make atomic helpers __always_inline to avoid a section mismatch when
   compiling with clang.

 - Fix the CMA and crashkernel reservations to use ZONE_DMA (remove the
   arm64_dma32_phys_limit variable, no longer needed with a dynamic
   ZONE_DMA sizing in 5.11).

 - Remove redundant IRQ flag tracing that was leaving lockdep
   inconsistent with the hardware state.

 - Revert perf events based hard lockup detector that was causing
   smp_processor_id() to be called in preemptible context.

 - Some trivial cleanups - spelling fix, renaming S_FRAME_SIZE to
   PT_REGS_SIZE, function prototypes added.

* tag 'arm64-fixes' of git://git.kernel.org/pub/scm/linux/kernel/git/arm64/linux:
  arm64: selftests: Fix spelling of 'Mismatch'
  arm64: syscall: include prototype for EL0 SVC functions
  compiler.h: Raise minimum version of GCC to 5.1 for arm64
  arm64: make atomic helpers __always_inline
  arm64: rename S_FRAME_SIZE to PT_REGS_SIZE
  Revert "arm64: Enable perf events based hard lockup detector"
  arm64: entry: remove redundant IRQ flag tracing
  arm64: Remove arm64_dma32_phys_limit and its uses
master
Linus Torvalds 2021-01-15 13:11:51 -08:00
commit 82821be8a2
17 changed files with 56 additions and 107 deletions

View File

@ -174,8 +174,6 @@ config ARM64
select HAVE_NMI
select HAVE_PATA_PLATFORM
select HAVE_PERF_EVENTS
select HAVE_PERF_EVENTS_NMI if ARM64_PSEUDO_NMI && HW_PERF_EVENTS
select HAVE_HARDLOCKUP_DETECTOR_PERF if PERF_EVENTS && HAVE_PERF_EVENTS_NMI
select HAVE_PERF_REGS
select HAVE_PERF_USER_STACK_DUMP
select HAVE_REGS_AND_STACK_ACCESS_API

View File

@ -17,7 +17,7 @@
#include <asm/lse.h>
#define ATOMIC_OP(op) \
static inline void arch_##op(int i, atomic_t *v) \
static __always_inline void arch_##op(int i, atomic_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
@ -32,7 +32,7 @@ ATOMIC_OP(atomic_sub)
#undef ATOMIC_OP
#define ATOMIC_FETCH_OP(name, op) \
static inline int arch_##op##name(int i, atomic_t *v) \
static __always_inline int arch_##op##name(int i, atomic_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
@ -56,7 +56,7 @@ ATOMIC_FETCH_OPS(atomic_sub_return)
#undef ATOMIC_FETCH_OPS
#define ATOMIC64_OP(op) \
static inline void arch_##op(long i, atomic64_t *v) \
static __always_inline void arch_##op(long i, atomic64_t *v) \
{ \
__lse_ll_sc_body(op, i, v); \
}
@ -71,7 +71,7 @@ ATOMIC64_OP(atomic64_sub)
#undef ATOMIC64_OP
#define ATOMIC64_FETCH_OP(name, op) \
static inline long arch_##op##name(long i, atomic64_t *v) \
static __always_inline long arch_##op##name(long i, atomic64_t *v) \
{ \
return __lse_ll_sc_body(op##name, i, v); \
}
@ -94,7 +94,7 @@ ATOMIC64_FETCH_OPS(atomic64_sub_return)
#undef ATOMIC64_FETCH_OP
#undef ATOMIC64_FETCH_OPS
static inline long arch_atomic64_dec_if_positive(atomic64_t *v)
static __always_inline long arch_atomic64_dec_if_positive(atomic64_t *v)
{
return __lse_ll_sc_body(atomic64_dec_if_positive, v);
}

View File

@ -94,8 +94,7 @@
#endif /* CONFIG_ARM64_FORCE_52BIT */
extern phys_addr_t arm64_dma_phys_limit;
extern phys_addr_t arm64_dma32_phys_limit;
#define ARCH_LOW_ADDRESS_LIMIT ((arm64_dma_phys_limit ? : arm64_dma32_phys_limit) - 1)
#define ARCH_LOW_ADDRESS_LIMIT (arm64_dma_phys_limit - 1)
struct debug_info {
#ifdef CONFIG_HAVE_HW_BREAKPOINT

View File

@ -75,7 +75,7 @@ int main(void)
DEFINE(S_SDEI_TTBR1, offsetof(struct pt_regs, sdei_ttbr1));
DEFINE(S_PMR_SAVE, offsetof(struct pt_regs, pmr_save));
DEFINE(S_STACKFRAME, offsetof(struct pt_regs, stackframe));
DEFINE(S_FRAME_SIZE, sizeof(struct pt_regs));
DEFINE(PT_REGS_SIZE, sizeof(struct pt_regs));
BLANK();
#ifdef CONFIG_COMPAT
DEFINE(COMPAT_SIGFRAME_REGS_OFFSET, offsetof(struct compat_sigframe, uc.uc_mcontext.arm_r0));

View File

@ -35,7 +35,7 @@
*/
.macro ftrace_regs_entry, allregs=0
/* Make room for pt_regs, plus a callee frame */
sub sp, sp, #(S_FRAME_SIZE + 16)
sub sp, sp, #(PT_REGS_SIZE + 16)
/* Save function arguments (and x9 for simplicity) */
stp x0, x1, [sp, #S_X0]
@ -61,15 +61,15 @@
.endif
/* Save the callsite's SP and LR */
add x10, sp, #(S_FRAME_SIZE + 16)
add x10, sp, #(PT_REGS_SIZE + 16)
stp x9, x10, [sp, #S_LR]
/* Save the PC after the ftrace callsite */
str x30, [sp, #S_PC]
/* Create a frame record for the callsite above pt_regs */
stp x29, x9, [sp, #S_FRAME_SIZE]
add x29, sp, #S_FRAME_SIZE
stp x29, x9, [sp, #PT_REGS_SIZE]
add x29, sp, #PT_REGS_SIZE
/* Create our frame record within pt_regs. */
stp x29, x30, [sp, #S_STACKFRAME]
@ -120,7 +120,7 @@ ftrace_common_return:
ldr x9, [sp, #S_PC]
/* Restore the callsite's SP */
add sp, sp, #S_FRAME_SIZE + 16
add sp, sp, #PT_REGS_SIZE + 16
ret x9
SYM_CODE_END(ftrace_common)
@ -130,7 +130,7 @@ SYM_CODE_START(ftrace_graph_caller)
ldr x0, [sp, #S_PC]
sub x0, x0, #AARCH64_INSN_SIZE // ip (callsite's BL insn)
add x1, sp, #S_LR // parent_ip (callsite's LR)
ldr x2, [sp, #S_FRAME_SIZE] // parent fp (callsite's FP)
ldr x2, [sp, #PT_REGS_SIZE] // parent fp (callsite's FP)
bl prepare_ftrace_return
b ftrace_common_return
SYM_CODE_END(ftrace_graph_caller)

View File

@ -75,7 +75,7 @@ alternative_else_nop_endif
.endif
#endif
sub sp, sp, #S_FRAME_SIZE
sub sp, sp, #PT_REGS_SIZE
#ifdef CONFIG_VMAP_STACK
/*
* Test whether the SP has overflowed, without corrupting a GPR.
@ -96,7 +96,7 @@ alternative_else_nop_endif
* userspace, and can clobber EL0 registers to free up GPRs.
*/
/* Stash the original SP (minus S_FRAME_SIZE) in tpidr_el0. */
/* Stash the original SP (minus PT_REGS_SIZE) in tpidr_el0. */
msr tpidr_el0, x0
/* Recover the original x0 value and stash it in tpidrro_el0 */
@ -253,7 +253,7 @@ alternative_else_nop_endif
scs_load tsk, x20
.else
add x21, sp, #S_FRAME_SIZE
add x21, sp, #PT_REGS_SIZE
get_current_task tsk
.endif /* \el == 0 */
mrs x22, elr_el1
@ -377,7 +377,7 @@ alternative_else_nop_endif
ldp x26, x27, [sp, #16 * 13]
ldp x28, x29, [sp, #16 * 14]
ldr lr, [sp, #S_LR]
add sp, sp, #S_FRAME_SIZE // restore sp
add sp, sp, #PT_REGS_SIZE // restore sp
.if \el == 0
alternative_insn eret, nop, ARM64_UNMAP_KERNEL_AT_EL0
@ -580,12 +580,12 @@ __bad_stack:
/*
* Store the original GPRs to the new stack. The orginal SP (minus
* S_FRAME_SIZE) was stashed in tpidr_el0 by kernel_ventry.
* PT_REGS_SIZE) was stashed in tpidr_el0 by kernel_ventry.
*/
sub sp, sp, #S_FRAME_SIZE
sub sp, sp, #PT_REGS_SIZE
kernel_entry 1
mrs x0, tpidr_el0
add x0, x0, #S_FRAME_SIZE
add x0, x0, #PT_REGS_SIZE
str x0, [sp, #S_SP]
/* Stash the regs for handle_bad_stack */

View File

@ -23,8 +23,6 @@
#include <linux/platform_device.h>
#include <linux/sched_clock.h>
#include <linux/smp.h>
#include <linux/nmi.h>
#include <linux/cpufreq.h>
/* ARMv8 Cortex-A53 specific event types. */
#define ARMV8_A53_PERFCTR_PREF_LINEFILL 0xC2
@ -1250,21 +1248,10 @@ static struct platform_driver armv8_pmu_driver = {
static int __init armv8_pmu_driver_init(void)
{
int ret;
if (acpi_disabled)
ret = platform_driver_register(&armv8_pmu_driver);
return platform_driver_register(&armv8_pmu_driver);
else
ret = arm_pmu_acpi_probe(armv8_pmuv3_init);
/*
* Try to re-initialize lockup detector after PMU init in
* case PMU events are triggered via NMIs.
*/
if (ret == 0 && arm_pmu_irq_is_nmi())
lockup_detector_init();
return ret;
return arm_pmu_acpi_probe(armv8_pmuv3_init);
}
device_initcall(armv8_pmu_driver_init)
@ -1322,27 +1309,3 @@ void arch_perf_update_userpage(struct perf_event *event,
userpg->cap_user_time_zero = 1;
userpg->cap_user_time_short = 1;
}
#ifdef CONFIG_HARDLOCKUP_DETECTOR_PERF
/*
* Safe maximum CPU frequency in case a particular platform doesn't implement
* cpufreq driver. Although, architecture doesn't put any restrictions on
* maximum frequency but 5 GHz seems to be safe maximum given the available
* Arm CPUs in the market which are clocked much less than 5 GHz. On the other
* hand, we can't make it much higher as it would lead to a large hard-lockup
* detection timeout on parts which are running slower (eg. 1GHz on
* Developerbox) and doesn't possess a cpufreq driver.
*/
#define SAFE_MAX_CPU_FREQ 5000000000UL // 5 GHz
u64 hw_nmi_get_sample_period(int watchdog_thresh)
{
unsigned int cpu = smp_processor_id();
unsigned long max_cpu_freq;
max_cpu_freq = cpufreq_get_hw_max_freq(cpu) * 1000UL;
if (!max_cpu_freq)
max_cpu_freq = SAFE_MAX_CPU_FREQ;
return (u64)max_cpu_freq * watchdog_thresh;
}
#endif

View File

@ -25,7 +25,7 @@
stp x24, x25, [sp, #S_X24]
stp x26, x27, [sp, #S_X26]
stp x28, x29, [sp, #S_X28]
add x0, sp, #S_FRAME_SIZE
add x0, sp, #PT_REGS_SIZE
stp lr, x0, [sp, #S_LR]
/*
* Construct a useful saved PSTATE
@ -62,7 +62,7 @@
.endm
SYM_CODE_START(kretprobe_trampoline)
sub sp, sp, #S_FRAME_SIZE
sub sp, sp, #PT_REGS_SIZE
save_all_base_regs
@ -76,7 +76,7 @@ SYM_CODE_START(kretprobe_trampoline)
restore_all_base_regs
add sp, sp, #S_FRAME_SIZE
add sp, sp, #PT_REGS_SIZE
ret
SYM_CODE_END(kretprobe_trampoline)

View File

@ -914,13 +914,6 @@ static void do_signal(struct pt_regs *regs)
asmlinkage void do_notify_resume(struct pt_regs *regs,
unsigned long thread_flags)
{
/*
* The assembly code enters us with IRQs off, but it hasn't
* informed the tracing code of that for efficiency reasons.
* Update the trace code with the current status.
*/
trace_hardirqs_off();
do {
if (thread_flags & _TIF_NEED_RESCHED) {
/* Unmask Debug and SError for the next task */

View File

@ -9,6 +9,7 @@
#include <asm/daifflags.h>
#include <asm/debug-monitors.h>
#include <asm/exception.h>
#include <asm/fpsimd.h>
#include <asm/syscall.h>
#include <asm/thread_info.h>
@ -165,15 +166,8 @@ static void el0_svc_common(struct pt_regs *regs, int scno, int sc_nr,
if (!has_syscall_work(flags) && !IS_ENABLED(CONFIG_DEBUG_RSEQ)) {
local_daif_mask();
flags = current_thread_info()->flags;
if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP)) {
/*
* We're off to userspace, where interrupts are
* always enabled after we restore the flags from
* the SPSR.
*/
trace_hardirqs_on();
if (!has_syscall_work(flags) && !(flags & _TIF_SINGLESTEP))
return;
}
local_daif_restore(DAIF_PROCCTX);
}

View File

@ -53,13 +53,13 @@ s64 memstart_addr __ro_after_init = -1;
EXPORT_SYMBOL(memstart_addr);
/*
* We create both ZONE_DMA and ZONE_DMA32. ZONE_DMA covers the first 1G of
* memory as some devices, namely the Raspberry Pi 4, have peripherals with
* this limited view of the memory. ZONE_DMA32 will cover the rest of the 32
* bit addressable memory area.
* If the corresponding config options are enabled, we create both ZONE_DMA
* and ZONE_DMA32. By default ZONE_DMA covers the 32-bit addressable memory
* unless restricted on specific platforms (e.g. 30-bit on Raspberry Pi 4).
* In such case, ZONE_DMA32 covers the rest of the 32-bit addressable memory,
* otherwise it is empty.
*/
phys_addr_t arm64_dma_phys_limit __ro_after_init;
phys_addr_t arm64_dma32_phys_limit __ro_after_init;
#ifdef CONFIG_KEXEC_CORE
/*
@ -84,7 +84,7 @@ static void __init reserve_crashkernel(void)
if (crash_base == 0) {
/* Current arm64 boot protocol requires 2MB alignment */
crash_base = memblock_find_in_range(0, arm64_dma32_phys_limit,
crash_base = memblock_find_in_range(0, arm64_dma_phys_limit,
crash_size, SZ_2M);
if (crash_base == 0) {
pr_warn("cannot allocate crashkernel (size:0x%llx)\n",
@ -196,6 +196,7 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
unsigned long max_zone_pfns[MAX_NR_ZONES] = {0};
unsigned int __maybe_unused acpi_zone_dma_bits;
unsigned int __maybe_unused dt_zone_dma_bits;
phys_addr_t __maybe_unused dma32_phys_limit = max_zone_phys(32);
#ifdef CONFIG_ZONE_DMA
acpi_zone_dma_bits = fls64(acpi_iort_dma_get_max_cpu_address());
@ -205,8 +206,12 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
max_zone_pfns[ZONE_DMA] = PFN_DOWN(arm64_dma_phys_limit);
#endif
#ifdef CONFIG_ZONE_DMA32
max_zone_pfns[ZONE_DMA32] = PFN_DOWN(arm64_dma32_phys_limit);
max_zone_pfns[ZONE_DMA32] = PFN_DOWN(dma32_phys_limit);
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = dma32_phys_limit;
#endif
if (!arm64_dma_phys_limit)
arm64_dma_phys_limit = PHYS_MASK + 1;
max_zone_pfns[ZONE_NORMAL] = max;
free_area_init(max_zone_pfns);
@ -394,16 +399,9 @@ void __init arm64_memblock_init(void)
early_init_fdt_scan_reserved_mem();
if (IS_ENABLED(CONFIG_ZONE_DMA32))
arm64_dma32_phys_limit = max_zone_phys(32);
else
arm64_dma32_phys_limit = PHYS_MASK + 1;
reserve_elfcorehdr();
high_memory = __va(memblock_end_of_DRAM() - 1) + 1;
dma_contiguous_reserve(arm64_dma32_phys_limit);
}
void __init bootmem_init(void)
@ -438,6 +436,11 @@ void __init bootmem_init(void)
sparse_init();
zone_sizes_init(min, max);
/*
* Reserve the CMA area after arm64_dma_phys_limit was initialised.
*/
dma_contiguous_reserve(arm64_dma_phys_limit);
/*
* request_standard_resources() depends on crashkernel's memory being
* reserved, so do it here.
@ -455,7 +458,7 @@ void __init bootmem_init(void)
void __init mem_init(void)
{
if (swiotlb_force == SWIOTLB_FORCE ||
max_pfn > PFN_DOWN(arm64_dma_phys_limit ? : arm64_dma32_phys_limit))
max_pfn > PFN_DOWN(arm64_dma_phys_limit))
swiotlb_init(1);
else
swiotlb_force = SWIOTLB_NO_FORCE;

View File

@ -726,11 +726,6 @@ static int armpmu_get_cpu_irq(struct arm_pmu *pmu, int cpu)
return per_cpu(hw_events->irq, cpu);
}
bool arm_pmu_irq_is_nmi(void)
{
return has_nmi;
}
/*
* PMU hardware loses all context when a CPU goes offline.
* When a CPU is hotplugged back in, since some hardware registers are

View File

@ -11,19 +11,19 @@
* See Documentation/atomic_bitops.txt for details.
*/
static inline void set_bit(unsigned int nr, volatile unsigned long *p)
static __always_inline void set_bit(unsigned int nr, volatile unsigned long *p)
{
p += BIT_WORD(nr);
atomic_long_or(BIT_MASK(nr), (atomic_long_t *)p);
}
static inline void clear_bit(unsigned int nr, volatile unsigned long *p)
static __always_inline void clear_bit(unsigned int nr, volatile unsigned long *p)
{
p += BIT_WORD(nr);
atomic_long_andnot(BIT_MASK(nr), (atomic_long_t *)p);
}
static inline void change_bit(unsigned int nr, volatile unsigned long *p)
static __always_inline void change_bit(unsigned int nr, volatile unsigned long *p)
{
p += BIT_WORD(nr);
atomic_long_xor(BIT_MASK(nr), (atomic_long_t *)p);

View File

@ -13,6 +13,12 @@
/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */
#if GCC_VERSION < 40900
# error Sorry, your version of GCC is too old - please use 4.9 or newer.
#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100
/*
* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293
* https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk
*/
# error Sorry, your version of GCC is too old - please use 5.1 or newer.
#endif
/*

View File

@ -163,8 +163,6 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
#endif
bool arm_pmu_irq_is_nmi(void);
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
struct arm_pmu *armpmu_alloc_atomic(void);

View File

@ -457,7 +457,7 @@ function barf
mov x11, x1 // actual data
mov x12, x2 // data size
puts "Mistatch: PID="
puts "Mismatch: PID="
mov x0, x20
bl putdec
puts ", iteration="

View File

@ -625,7 +625,7 @@ function barf
mov x11, x1 // actual data
mov x12, x2 // data size
puts "Mistatch: PID="
puts "Mismatch: PID="
mov x0, x20
bl putdec
puts ", iteration="