From 751c263bb74fd36b5fc2589d36abc75042336444 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Mon, 15 Jun 2020 12:19:39 +0200 Subject: [PATCH 01/11] arm64: remove TEXT_OFFSET randomization TEXT_OFFSET was recently changed to 0x0, in preparation for its removal at a later stage, and a warning is emitted into the kernel log when the bootloader appears to have failed to take the TEXT_OFFSET image header value into account. Ironically, this warning itself fails to take TEXT_OFFSET into account, and compares the kernel image's alignment modulo 2M against a hardcoded value of 0x0, and so the warning will trigger spuriously when TEXT_OFFSET randomization is enabled. Given the intent to get rid of TEXT_OFFSET entirely, let's fix this oversight by just removing support for TEXT_OFFSET randomization. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200615101939.634391-1-ardb@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig.debug | 15 --------------- arch/arm64/Makefile | 6 ------ 2 files changed, 21 deletions(-) diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index cdf7ec0b975e..265c4461031f 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -8,21 +8,6 @@ config PID_IN_CONTEXTIDR instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. -config ARM64_RANDOMIZE_TEXT_OFFSET - bool "Randomize TEXT_OFFSET at build time" - help - Say Y here if you want the image load offset (AKA TEXT_OFFSET) - of the kernel to be randomized at build-time. When selected, - this option will cause TEXT_OFFSET to be randomized upon any - build of the kernel, and the offset will be reflected in the - text_offset field of the resulting Image. This can be used to - fuzz-test bootloaders which respect text_offset. - - This option is intended for bootloader and/or kernel testing - only. Bootloaders must make no assumptions regarding the value - of TEXT_OFFSET and platforms must not require a specific - value. - config DEBUG_EFI depends on EFI && DEBUG_INFO bool "UEFI debugging" diff --git a/arch/arm64/Makefile b/arch/arm64/Makefile index 76359cfb328a..a0d94d063fa8 100644 --- a/arch/arm64/Makefile +++ b/arch/arm64/Makefile @@ -121,13 +121,7 @@ endif head-y := arch/arm64/kernel/head.o # The byte offset of the kernel image in RAM from the start of RAM. -ifeq ($(CONFIG_ARM64_RANDOMIZE_TEXT_OFFSET), y) -TEXT_OFFSET := $(shell awk "BEGIN {srand(); printf \"0x%06x\n\", \ - int(2 * 1024 * 1024 / (2 ^ $(CONFIG_ARM64_PAGE_SHIFT)) * \ - rand()) * (2 ^ $(CONFIG_ARM64_PAGE_SHIFT))}") -else TEXT_OFFSET := 0x0 -endif ifeq ($(CONFIG_KASAN_SW_TAGS), y) KASAN_SHADOW_SCALE_SHIFT := 4 From 9ba6a9efa4a4c721d0115089bf422f82b8a59e45 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 10 Jun 2020 18:03:09 +0100 Subject: [PATCH 02/11] docs/arm64: Fix typo'd #define in sve.rst sve.rst describes a flag PR_SVE_SET_VL_INHERIT for the PR_SVE_SET_VL prctl, but there is no flag of this name. The flag is shared between the _GET and _SET calls, so the _SET prefix was dropped, giving the name PR_SVE_VL_INHERIT in the headers. Fix it. Signed-off-by: Dave Martin Link: https://lore.kernel.org/r/1591808590-20210-2-git-send-email-Dave.Martin@arm.com Signed-off-by: Will Deacon --- Documentation/arm64/sve.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Documentation/arm64/sve.rst b/Documentation/arm64/sve.rst index 5689c74c8082..bfd55f468258 100644 --- a/Documentation/arm64/sve.rst +++ b/Documentation/arm64/sve.rst @@ -186,7 +186,7 @@ prctl(PR_SVE_SET_VL, unsigned long arg) flags: - PR_SVE_SET_VL_INHERIT + PR_SVE_VL_INHERIT Inherit the current vector length across execve(). Otherwise, the vector length is reset to the system default at execve(). (See @@ -247,7 +247,7 @@ prctl(PR_SVE_GET_VL) The following flag may be OR-ed into the result: - PR_SVE_SET_VL_INHERIT + PR_SVE_VL_INHERIT Vector length will be inherited across execve(). @@ -393,7 +393,7 @@ The regset data starts with struct user_sve_header, containing: * At every execve() call, the new vector length of the new process is set to the system default vector length, unless - * PR_SVE_SET_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the + * PR_SVE_VL_INHERIT (or equivalently SVE_PT_VL_INHERIT) is set for the calling thread, or * a deferred vector length change is pending, established via the From 1e570f512cbdc5e9e401ba640d9827985c1bea1e Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Wed, 10 Jun 2020 18:03:10 +0100 Subject: [PATCH 03/11] arm64/sve: Eliminate data races on sve_default_vl sve_default_vl can be modified via the /proc/sys/abi/sve_default_vl sysctl concurrently with use, and modified concurrently by multiple threads. Adding a lock for this seems overkill, and I don't want to think any more than necessary, so just define wrappers using READ_ONCE()/ WRITE_ONCE(). This will avoid the possibility of torn accesses and repeated loads and stores. There's no evidence yet that this is going wrong in practice: this is just hygiene. For generic sysctl users, it would be better to build this kind of thing into the sysctl common code somehow. Reported-by: Will Deacon Signed-off-by: Dave Martin Link: https://lore.kernel.org/r/1591808590-20210-3-git-send-email-Dave.Martin@arm.com [will: move set_sve_default_vl() inside #ifdef to squash allnoconfig warning] Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 35cb5e66c504..d9eee9194511 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -119,10 +120,20 @@ struct fpsimd_last_state_struct { static DEFINE_PER_CPU(struct fpsimd_last_state_struct, fpsimd_last_state); /* Default VL for tasks that don't set it explicitly: */ -static int sve_default_vl = -1; +static int __sve_default_vl = -1; + +static int get_sve_default_vl(void) +{ + return READ_ONCE(__sve_default_vl); +} #ifdef CONFIG_ARM64_SVE +static void set_sve_default_vl(int val) +{ + WRITE_ONCE(__sve_default_vl, val); +} + /* Maximum supported vector length across all CPUs (initially poisoned) */ int __ro_after_init sve_max_vl = SVE_VL_MIN; int __ro_after_init sve_max_virtualisable_vl = SVE_VL_MIN; @@ -344,7 +355,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret; - int vl = sve_default_vl; + int vl = get_sve_default_vl(); struct ctl_table tmp_table = { .data = &vl, .maxlen = sizeof(vl), @@ -361,7 +372,7 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, if (!sve_vl_valid(vl)) return -EINVAL; - sve_default_vl = find_supported_vector_length(vl); + set_sve_default_vl(find_supported_vector_length(vl)); return 0; } @@ -868,7 +879,7 @@ void __init sve_setup(void) * For the default VL, pick the maximum supported value <= 64. * VL == 64 is guaranteed not to grow the signal frame. */ - sve_default_vl = find_supported_vector_length(64); + set_sve_default_vl(find_supported_vector_length(64)); bitmap_andnot(tmp_map, sve_vq_partial_map, sve_vq_map, SVE_VQ_MAX); @@ -889,7 +900,7 @@ void __init sve_setup(void) pr_info("SVE: maximum available vector length %u bytes per vector\n", sve_max_vl); pr_info("SVE: default vector length %u bytes per vector\n", - sve_default_vl); + get_sve_default_vl()); /* KVM decides whether to support mismatched systems. Just warn here: */ if (sve_max_virtualisable_vl < sve_max_vl) @@ -1029,13 +1040,13 @@ void fpsimd_flush_thread(void) * vector length configured: no kernel task can become a user * task without an exec and hence a call to this function. * By the time the first call to this function is made, all - * early hardware probing is complete, so sve_default_vl + * early hardware probing is complete, so __sve_default_vl * should be valid. * If a bug causes this to go wrong, we make some noise and * try to fudge thread.sve_vl to a safe value here. */ vl = current->thread.sve_vl_onexec ? - current->thread.sve_vl_onexec : sve_default_vl; + current->thread.sve_vl_onexec : get_sve_default_vl(); if (WARN_ON(!sve_vl_valid(vl))) vl = SVE_VL_MIN; From 413d3ea6b775d77b2057f13a9af75875eb066156 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jun 2020 12:23:16 +0100 Subject: [PATCH 04/11] arm64: traps: Dump registers prior to panic() in bad_mode() When panicing due to an unknown/unhandled exception at EL1, dump the registers of the faulting context so that it's easier to figure out what went wrong. In particular, this makes it a lot easier to debug in-kernel BTI failures since it pretty-prints PSTATE.BTYPE in the crash log. Cc: Mark Brown Cc: Catalin Marinas Acked-by: Mark Rutland Link: https://lore.kernel.org/r/20200615113458.2884-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/kernel/traps.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 50cc30acf106..24f2af70ac2e 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -813,6 +813,7 @@ asmlinkage void bad_mode(struct pt_regs *regs, int reason, unsigned int esr) handler[reason], smp_processor_id(), esr, esr_get_class_string(esr)); + __show_regs(regs); local_daif_mask(); panic("bad mode"); } From 8dd4daa04278d7437641962ed53b843c0b0ec4a9 Mon Sep 17 00:00:00 2001 From: Shyam Thombre Date: Wed, 10 Jun 2020 16:39:44 +0530 Subject: [PATCH 05/11] arm64: mm: reset address tag set by kasan sw tagging KASAN sw tagging sets a random tag of 8 bits in the top byte of the pointer returned by the memory allocating functions. So for the functions unaware of this change, the top 8 bits of the address must be reset which is done by the function arch_kasan_reset_tag(). Signed-off-by: Shyam Thombre Link: https://lore.kernel.org/r/1591787384-5823-1-git-send-email-sthombre@codeaurora.org Signed-off-by: Will Deacon --- arch/arm64/mm/mmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index 990929c8837e..1df25f26571d 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -723,6 +723,7 @@ int kern_addr_valid(unsigned long addr) pmd_t *pmdp, pmd; pte_t *ptep, pte; + addr = arch_kasan_reset_tag(addr); if ((((long)addr) >> VA_BITS) != -1UL) return 0; From 034aa9cd698e315c767af1bac3fd1ff8898d2cd7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 15 Jun 2020 16:27:43 +0100 Subject: [PATCH 06/11] arm64: pgtable: Clear the GP bit for non-executable kernel pages Commit cca98e9f8b5e ("mm: enforce that vmap can't map pages executable") introduced 'pgprot_nx(prot)' for arm64 but collided silently with the BTI support during the merge window, which endeavours to clear the GP bit for non-executable kernel mappings in set_memory_nx(). For consistency between the two APIs, clear the GP bit in pgprot_nx(). Acked-by: Mark Rutland Reviewed-by: Mark Brown Link: https://lore.kernel.org/r/20200615154642.3579-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/include/asm/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/pgtable.h b/arch/arm64/include/asm/pgtable.h index 6dbd267ab931..758e2d1577d0 100644 --- a/arch/arm64/include/asm/pgtable.h +++ b/arch/arm64/include/asm/pgtable.h @@ -416,7 +416,7 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd) __pgprot((pgprot_val(prot) & ~(mask)) | (bits)) #define pgprot_nx(prot) \ - __pgprot_modify(prot, 0, PTE_PXN) + __pgprot_modify(prot, PTE_MAYBE_GP, PTE_PXN) /* * Mark the prot value as uncacheable and unbufferable. From e575fb9e76c8e33440fb859572a8b7d430f053d6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 18:29:11 +0100 Subject: [PATCH 07/11] arm64: sve: Fix build failure when ARM64_SVE=y and SYSCTL=n When I squashed the 'allnoconfig' compiler warning about the set_sve_default_vl() function being defined but not used in commit 1e570f512cbd ("arm64/sve: Eliminate data races on sve_default_vl"), I accidentally broke the build for configs where ARM64_SVE is enabled, but SYSCTL is not. Fix this by only compiling the SVE sysctl support if both CONFIG_SVE=y and CONFIG_SYSCTL=y. Cc: Dave Martin Reported-by: Qian Cai Link: https://lore.kernel.org/r/20200616131808.GA1040@lca.pw Signed-off-by: Will Deacon --- arch/arm64/kernel/fpsimd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index d9eee9194511..55c8f3ec6705 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -349,7 +349,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) return sve_vl_from_vq(__bit_to_vq(bit)); } -#ifdef CONFIG_SYSCTL +#if defined(CONFIG_ARM64_SVE) && defined(CONFIG_SYSCTL) static int sve_proc_do_default_vl(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) @@ -394,9 +394,9 @@ static int __init sve_sysctl_init(void) return 0; } -#else /* ! CONFIG_SYSCTL */ +#else /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ static int __init sve_sysctl_init(void) { return 0; } -#endif /* ! CONFIG_SYSCTL */ +#endif /* ! (CONFIG_ARM64_SVE && CONFIG_SYSCTL) */ #define ZREG(sve_state, vq, n) ((char *)(sve_state) + \ (SVE_SIG_ZREG_OFFSET(vq, n) - SVE_SIG_REGS_OFFSET)) From b9249cba25a5dce5de87e5404503a5e11832c2dd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 16 Jun 2020 19:03:49 +0100 Subject: [PATCH 08/11] arm64: bti: Require clang >= 10.0.1 for in-kernel BTI support Unfortunately, most versions of clang that support BTI are capable of miscompiling the kernel when converting a switch statement into a jump table. As an example, attempting to spawn a KVM guest results in a panic: [ 56.253312] Kernel panic - not syncing: bad mode [ 56.253834] CPU: 0 PID: 279 Comm: lkvm Not tainted 5.8.0-rc1 #2 [ 56.254225] Hardware name: QEMU QEMU Virtual Machine, BIOS 0.0.0 02/06/2015 [ 56.254712] Call trace: [ 56.254952] dump_backtrace+0x0/0x1d4 [ 56.255305] show_stack+0x1c/0x28 [ 56.255647] dump_stack+0xc4/0x128 [ 56.255905] panic+0x16c/0x35c [ 56.256146] bad_el0_sync+0x0/0x58 [ 56.256403] el1_sync_handler+0xb4/0xe0 [ 56.256674] el1_sync+0x7c/0x100 [ 56.256928] kvm_vm_ioctl_check_extension_generic+0x74/0x98 [ 56.257286] __arm64_sys_ioctl+0x94/0xcc [ 56.257569] el0_svc_common+0x9c/0x150 [ 56.257836] do_el0_svc+0x84/0x90 [ 56.258083] el0_sync_handler+0xf8/0x298 [ 56.258361] el0_sync+0x158/0x180 This is because the switch in kvm_vm_ioctl_check_extension_generic() is executed as an indirect branch to tail-call through a jump table: ffff800010032dc8: 3869694c ldrb w12, [x10, x9] ffff800010032dcc: 8b0c096b add x11, x11, x12, lsl #2 ffff800010032dd0: d61f0160 br x11 However, where the target case uses the stack, the landing pad is elided due to the presence of a paciasp instruction: ffff800010032e14: d503233f paciasp ffff800010032e18: a9bf7bfd stp x29, x30, [sp, #-16]! ffff800010032e1c: 910003fd mov x29, sp ffff800010032e20: aa0803e0 mov x0, x8 ffff800010032e24: 940017c0 bl ffff800010038d24 ffff800010032e28: 93407c00 sxtw x0, w0 ffff800010032e2c: a8c17bfd ldp x29, x30, [sp], #16 ffff800010032e30: d50323bf autiasp ffff800010032e34: d65f03c0 ret Unfortunately, this results in a fatal exception because paciasp is compatible only with branch-and-link (call) instructions and not simple indirect branches. A fix is being merged into Clang 10.0.1 so that a 'bti j' instruction is emitted as an explicit landing pad in this situation. Make in-kernel BTI depend on that compiler version when building with clang. Cc: Tom Stellard Cc: Daniel Kiss Reviewed-by: Mark Brown Acked-by: Dave Martin Reviewed-by: Nathan Chancellor Acked-by: Nick Desaulniers Link: https://lore.kernel.org/r/20200615105524.GA2694@willie-the-truck Link: https://lore.kernel.org/r/20200616183630.2445-1-will@kernel.org Signed-off-by: Will Deacon --- arch/arm64/Kconfig | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 31380da53689..4ae2419c14a8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1630,6 +1630,8 @@ config ARM64_BTI_KERNEL depends on CC_HAS_BRANCH_PROT_PAC_RET_BTI # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94697 depends on !CC_IS_GCC || GCC_VERSION >= 100100 + # https://reviews.llvm.org/rGb8ae3fdfa579dbf366b1bb1cbfdbf8c51db7fa55 + depends on !CC_IS_CLANG || CLANG_VERSION >= 100001 depends on !(CC_IS_CLANG && GCOV_KERNEL) depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS) help From 618e07865b7453d02410c1f3407c2d78a670eabb Mon Sep 17 00:00:00 2001 From: Barry Song Date: Thu, 18 Jun 2020 09:58:28 +1200 Subject: [PATCH 09/11] arm64: mm: reserve hugetlb CMA after numa_init hugetlb_cma_reserve() is called at the wrong place. numa_init has not been done yet. so all reserved memory will be located at node0. Fixes: cf11e85fc08c ("mm: hugetlb: optionally allocate gigantic hugepages using cma") Signed-off-by: Barry Song Reviewed-by: Anshuman Khandual Acked-by: Roman Gushchin Cc: Matthias Brugger Cc: Will Deacon Link: https://lore.kernel.org/r/20200617215828.25296-1-song.bao.hua@hisilicon.com Signed-off-by: Will Deacon --- arch/arm64/mm/init.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/mm/init.c b/arch/arm64/mm/init.c index e631e6425165..1e93cfc7c47a 100644 --- a/arch/arm64/mm/init.c +++ b/arch/arm64/mm/init.c @@ -404,11 +404,6 @@ void __init arm64_memblock_init(void) high_memory = __va(memblock_end_of_DRAM() - 1) + 1; dma_contiguous_reserve(arm64_dma32_phys_limit); - -#ifdef CONFIG_ARM64_4K_PAGES - hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); -#endif - } void __init bootmem_init(void) @@ -424,6 +419,16 @@ void __init bootmem_init(void) min_low_pfn = min; arm64_numa_init(); + + /* + * must be done after arm64_numa_init() which calls numa_init() to + * initialize node_online_map that gets used in hugetlb_cma_reserve() + * while allocating required CMA size across online nodes. + */ +#ifdef CONFIG_ARM64_4K_PAGES + hugetlb_cma_reserve(PUD_SHIFT - PAGE_SHIFT); +#endif + /* * Sparsemem tries to allocate bootmem in memory_present(), so must be * done after the fixed reservations. From bf508ec95ca3b902f14bb311a7709e5cb57fbc49 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Wed, 17 Jun 2020 16:34:07 -0500 Subject: [PATCH 10/11] arm64: kexec_file: Use struct_size() in kmalloc() Make use of the struct_size() helper instead of an open-coded version in order to avoid any potential type mistakes. This code was detected with the help of Coccinelle and, audited and fixed manually. Signed-off-by: Gustavo A. R. Silva Link: https://lore.kernel.org/r/20200617213407.GA1385@embeddedor Signed-off-by: Will Deacon --- arch/arm64/kernel/machine_kexec_file.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 522e6f517ec0..361a1143e09e 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c @@ -219,8 +219,7 @@ static int prepare_elf_headers(void **addr, unsigned long *sz) MEMBLOCK_NONE, &start, &end, NULL) nr_ranges++; - cmem = kmalloc(sizeof(struct crash_mem) + - sizeof(struct crash_mem_range) * nr_ranges, GFP_KERNEL); + cmem = kmalloc(struct_size(cmem, ranges, nr_ranges), GFP_KERNEL); if (!cmem) return -ENOMEM; From 24ebec25fb270100e252b19c288e21bd7d8cc7f7 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 29 May 2020 14:12:18 +0100 Subject: [PATCH 11/11] arm64: hw_breakpoint: Don't invoke overflow handler on uaccess watchpoints Unprivileged memory accesses generated by the so-called "translated" instructions (e.g. STTR) at EL1 can cause EL0 watchpoints to fire unexpectedly if kernel debugging is enabled. In such cases, the hw_breakpoint logic will invoke the user overflow handler which will typically raise a SIGTRAP back to the current task. This is futile when returning back to the kernel because (a) the signal won't have been delivered and (b) userspace can't handle the thing anyway. Avoid invoking the user overflow handler for watchpoints triggered by kernel uaccess routines, and instead single-step over the faulting instruction as we would if no overflow handler had been installed. (Fixes tag identifies the introduction of unprivileged memory accesses, which exposed this latent bug in the hw_breakpoint code) Cc: Catalin Marinas Cc: James Morse Fixes: 57f4959bad0a ("arm64: kernel: Add support for User Access Override") Reported-by: Luis Machado Signed-off-by: Will Deacon --- arch/arm64/kernel/hw_breakpoint.c | 44 ++++++++++++++++++------------- 1 file changed, 26 insertions(+), 18 deletions(-) diff --git a/arch/arm64/kernel/hw_breakpoint.c b/arch/arm64/kernel/hw_breakpoint.c index 0b727edf4104..af234a1e08b7 100644 --- a/arch/arm64/kernel/hw_breakpoint.c +++ b/arch/arm64/kernel/hw_breakpoint.c @@ -730,6 +730,27 @@ static u64 get_distance_from_watchpoint(unsigned long addr, u64 val, return 0; } +static int watchpoint_report(struct perf_event *wp, unsigned long addr, + struct pt_regs *regs) +{ + int step = is_default_overflow_handler(wp); + struct arch_hw_breakpoint *info = counter_arch_bp(wp); + + info->trigger = addr; + + /* + * If we triggered a user watchpoint from a uaccess routine, then + * handle the stepping ourselves since userspace really can't help + * us with this. + */ + if (!user_mode(regs) && info->ctrl.privilege == AARCH64_BREAKPOINT_EL0) + step = 1; + else + perf_bp_event(wp, regs); + + return step; +} + static int watchpoint_handler(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -739,7 +760,6 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, u64 val; struct perf_event *wp, **slots; struct debug_info *debug_info; - struct arch_hw_breakpoint *info; struct arch_hw_breakpoint_ctrl ctrl; slots = this_cpu_ptr(wp_on_reg); @@ -777,25 +797,13 @@ static int watchpoint_handler(unsigned long addr, unsigned int esr, if (dist != 0) continue; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; + step = watchpoint_report(wp, addr, regs); } - if (min_dist > 0 && min_dist != -1) { - /* No exact match found. */ - wp = slots[closest_match]; - info = counter_arch_bp(wp); - info->trigger = addr; - perf_bp_event(wp, regs); - /* Do we need to handle the stepping? */ - if (is_default_overflow_handler(wp)) - step = 1; - } + /* No exact match found? */ + if (min_dist > 0 && min_dist != -1) + step = watchpoint_report(slots[closest_match], addr, regs); + rcu_read_unlock(); if (!step)