From d93277b9839b0bde06238a7a7f644114edb2ad4a Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 May 2018 13:25:49 +0100 Subject: [PATCH 01/53] Revert "arm64: Increase the max granular size" This reverts commit 97303480753e48fb313dc0e15daaf11b0451cdb8. Commit 97303480753e ("arm64: Increase the max granular size") increased the cache line size to 128 to match Cavium ThunderX, apparently for some performance benefit which could not be confirmed. This change, however, has an impact on the network packet allocation in certain circumstances, requiring slightly over a 4K page with a significant performance degradation. The patch reverts L1_CACHE_SHIFT back to 6 (64-byte cache line). Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 9bbffc7a301f..1dd2c2db0010 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -33,7 +33,7 @@ #define ICACHE_POLICY_VIPT 2 #define ICACHE_POLICY_PIPT 3 -#define L1_CACHE_SHIFT 7 +#define L1_CACHE_SHIFT (6) #define L1_CACHE_BYTES (1 << L1_CACHE_SHIFT) /* From ebc7e21e0fa28c46b938baed292c77e2d3ef8165 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Fri, 11 May 2018 13:33:12 +0100 Subject: [PATCH 02/53] arm64: Increase ARCH_DMA_MINALIGN to 128 This patch increases the ARCH_DMA_MINALIGN to 128 so that it covers the currently known Cache Writeback Granule (CTR_EL0.CWG) on arm64 and moves the fallback in cache_line_size() from L1_CACHE_BYTES to this constant. In addition, it warns (and taints) if the CWG is larger than ARCH_DMA_MINALIGN as this is not safe with non-coherent DMA. Cc: Will Deacon Cc: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cache.h | 4 ++-- arch/arm64/kernel/cpufeature.c | 9 ++------- arch/arm64/mm/dma-mapping.c | 5 +++++ 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/arm64/include/asm/cache.h b/arch/arm64/include/asm/cache.h index 1dd2c2db0010..5df5cfe1c143 100644 --- a/arch/arm64/include/asm/cache.h +++ b/arch/arm64/include/asm/cache.h @@ -43,7 +43,7 @@ * cache before the transfer is done, causing old data to be seen by * the CPU. */ -#define ARCH_DMA_MINALIGN L1_CACHE_BYTES +#define ARCH_DMA_MINALIGN (128) #ifndef __ASSEMBLY__ @@ -77,7 +77,7 @@ static inline u32 cache_type_cwg(void) static inline int cache_line_size(void) { u32 cwg = cache_type_cwg(); - return cwg ? 4 << cwg : L1_CACHE_BYTES; + return cwg ? 4 << cwg : ARCH_DMA_MINALIGN; } #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index 9d1b06d67c53..fbee8c17a4e6 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1606,7 +1606,6 @@ static void __init setup_system_capabilities(void) void __init setup_cpu_features(void) { u32 cwg; - int cls; setup_system_capabilities(); mark_const_caps_ready(); @@ -1627,13 +1626,9 @@ void __init setup_cpu_features(void) * Check for sane CTR_EL0.CWG value. */ cwg = cache_type_cwg(); - cls = cache_line_size(); if (!cwg) - pr_warn("No Cache Writeback Granule information, assuming cache line size %d\n", - cls); - if (L1_CACHE_BYTES < cls) - pr_warn("L1_CACHE_BYTES smaller than the Cache Writeback Granule (%d < %d)\n", - L1_CACHE_BYTES, cls); + pr_warn("No Cache Writeback Granule information, assuming %d\n", + ARCH_DMA_MINALIGN); } static bool __maybe_unused diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index a96ec0181818..ed84432264de 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -504,6 +504,11 @@ static int __init arm64_dma_init(void) max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb = 1; + WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), + TAINT_CPU_OUT_OF_SPEC, + "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", + ARCH_DMA_MINALIGN, cache_line_size()); + return atomic_pool_init(); } arch_initcall(arm64_dma_init); From 5c636aa015c644a3889044270b98c33a8a87734d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 9 May 2018 16:46:26 +0900 Subject: [PATCH 03/53] arm64: remove no-op macro VMLINUX_SYMBOL() VMLINUX_SYMBOL() is no-op unless CONFIG_HAVE_UNDERSCORE_SYMBOL_PREFIX is defined. It has ever been selected only by BLACKFIN and METAG. VMLINUX_SYMBOL() is unneeded for ARM64-specific code. Signed-off-by: Masahiro Yamada Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vmlinux.lds.S | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/arm64/kernel/vmlinux.lds.S b/arch/arm64/kernel/vmlinux.lds.S index 0221aca6493d..605d1b60469c 100644 --- a/arch/arm64/kernel/vmlinux.lds.S +++ b/arch/arm64/kernel/vmlinux.lds.S @@ -34,25 +34,25 @@ jiffies = jiffies_64; * 4 KB (see related ASSERT() below) \ */ \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__hyp_idmap_text_start) = .; \ + __hyp_idmap_text_start = .; \ *(.hyp.idmap.text) \ - VMLINUX_SYMBOL(__hyp_idmap_text_end) = .; \ - VMLINUX_SYMBOL(__hyp_text_start) = .; \ + __hyp_idmap_text_end = .; \ + __hyp_text_start = .; \ *(.hyp.text) \ - VMLINUX_SYMBOL(__hyp_text_end) = .; + __hyp_text_end = .; #define IDMAP_TEXT \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__idmap_text_start) = .; \ + __idmap_text_start = .; \ *(.idmap.text) \ - VMLINUX_SYMBOL(__idmap_text_end) = .; + __idmap_text_end = .; #ifdef CONFIG_HIBERNATION #define HIBERNATE_TEXT \ . = ALIGN(SZ_4K); \ - VMLINUX_SYMBOL(__hibernate_exit_text_start) = .;\ + __hibernate_exit_text_start = .; \ *(.hibernate_exit.text) \ - VMLINUX_SYMBOL(__hibernate_exit_text_end) = .; + __hibernate_exit_text_end = .; #else #define HIBERNATE_TEXT #endif @@ -60,10 +60,10 @@ jiffies = jiffies_64; #ifdef CONFIG_UNMAP_KERNEL_AT_EL0 #define TRAMP_TEXT \ . = ALIGN(PAGE_SIZE); \ - VMLINUX_SYMBOL(__entry_tramp_text_start) = .; \ + __entry_tramp_text_start = .; \ *(.entry.tramp.text) \ . = ALIGN(PAGE_SIZE); \ - VMLINUX_SYMBOL(__entry_tramp_text_end) = .; + __entry_tramp_text_end = .; #else #define TRAMP_TEXT #endif From 92faa7bea3e7592673109e32c75d50f8ce6d5ec6 Mon Sep 17 00:00:00 2001 From: Vincenzo Frascino Date: Fri, 13 Apr 2018 15:44:35 +0100 Subject: [PATCH 04/53] arm64: Remove duplicate include "make includecheck" detected few duplicated includes in arch/arm64. This patch removes the double inclusions. Signed-off-by: Vincenzo Frascino Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_mmu.h | 1 - arch/arm64/kernel/armv8_deprecated.c | 3 +-- arch/arm64/kernel/fpsimd.c | 1 - arch/arm64/kernel/ptrace.c | 2 -- 4 files changed, 1 insertion(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 082110993647..f74987b76d91 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -72,7 +72,6 @@ #ifdef __ASSEMBLY__ #include -#include /* * Convert a kernel VA into a HYP VA. diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 6e47fc3ab549..97d45d5151d4 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include @@ -20,8 +21,6 @@ #include #include #include -#include -#include #define CREATE_TRACE_POINTS #include "trace-events-emulation.h" diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 87a35364e750..3db8ed530e56 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -31,7 +31,6 @@ #include #include #include -#include #include #include #include diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 7ff81fed46e1..f847285d96f3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -1046,8 +1046,6 @@ static const struct user_regset_view user_aarch64_view = { }; #ifdef CONFIG_COMPAT -#include - enum compat_regset { REGSET_COMPAT_GPR, REGSET_COMPAT_VFP, From e75bef2a4fe259b779765a85589e92657d26fdc9 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 24 Apr 2018 16:25:47 +0100 Subject: [PATCH 05/53] arm64: Select ARCH_HAS_FAST_MULTIPLIER It is probably safe to assume that all Armv8-A implementations have a multiplier whose efficiency is comparable or better than a sequence of three or so register-dependent arithmetic instructions. Select ARCH_HAS_FAST_MULTIPLIER to get ever-so-slightly nicer codegen in the few dusty old corners which care. In a contrived benchmark calling hweight64() in a loop, this does indeed turn out to be a small win overall, with no measurable impact on Cortex-A57 but about 5% performance improvement on Cortex-A53. Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index eb2cf4938f6d..9c850f3b398f 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -12,6 +12,7 @@ config ARM64 select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_FAST_MULTIPLIER select ARCH_HAS_FORTIFY_SOURCE select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE if (MEMORY_ISOLATION && COMPACTION) || CMA From 1cfc63b5ae60fe7e01773f38132f98d8b13a99a0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 30 Apr 2018 13:56:32 +0100 Subject: [PATCH 06/53] arm64: cmpwait: Clear event register before arming exclusive monitor When waiting for a cacheline to change state in cmpwait, we may immediately wake-up the first time around the outer loop if the event register was already set (for example, because of the event stream). Avoid these spurious wakeups by explicitly clearing the event register before loading the cacheline and setting the exclusive monitor. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cmpxchg.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 4f5fd2a36e6e..3b0938281541 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -204,7 +204,9 @@ static inline void __cmpwait_case_##name(volatile void *ptr, \ unsigned long tmp; \ \ asm volatile( \ - " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ + " sevl\n" \ + " wfe\n" \ + " ldxr" #sz "\t%" #w "[tmp], %[v]\n" \ " eor %" #w "[tmp], %" #w "[tmp], %" #w "[val]\n" \ " cbnz %" #w "[tmp], 1f\n" \ " wfe\n" \ From d529a18a61f3f497328f096ddf757af928d6105b Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:56 -0500 Subject: [PATCH 07/53] drivers: base: cacheinfo: move cache_setup_of_node() In preparation for the next patch, and to aid in review of that patch, lets move cache_setup_of_node further down in the module without any changes. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Reviewed-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 80 ++++++++++++++++++++-------------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index edf726267282..09ccef7ddc99 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -32,46 +32,6 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) } #ifdef CONFIG_OF -static int cache_setup_of_node(unsigned int cpu) -{ - struct device_node *np; - struct cacheinfo *this_leaf; - struct device *cpu_dev = get_cpu_device(cpu); - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - unsigned int index = 0; - - /* skip if of_node is already populated */ - if (this_cpu_ci->info_list->of_node) - return 0; - - if (!cpu_dev) { - pr_err("No cpu device for CPU %d\n", cpu); - return -ENODEV; - } - np = cpu_dev->of_node; - if (!np) { - pr_err("Failed to find cpu%d device node\n", cpu); - return -ENOENT; - } - - while (index < cache_leaves(cpu)) { - this_leaf = this_cpu_ci->info_list + index; - if (this_leaf->level != 1) - np = of_find_next_cache_node(np); - else - np = of_node_get(np);/* cpu node itself */ - if (!np) - break; - this_leaf->of_node = np; - index++; - } - - if (index != cache_leaves(cpu)) /* not all OF nodes populated */ - return -ENOENT; - - return 0; -} - static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { @@ -202,6 +162,46 @@ static void cache_of_override_properties(unsigned int cpu) cache_associativity(this_leaf); } } + +static int cache_setup_of_node(unsigned int cpu) +{ + struct device_node *np; + struct cacheinfo *this_leaf; + struct device *cpu_dev = get_cpu_device(cpu); + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + unsigned int index = 0; + + /* skip if of_node is already populated */ + if (this_cpu_ci->info_list->of_node) + return 0; + + if (!cpu_dev) { + pr_err("No cpu device for CPU %d\n", cpu); + return -ENODEV; + } + np = cpu_dev->of_node; + if (!np) { + pr_err("Failed to find cpu%d device node\n", cpu); + return -ENOENT; + } + + while (index < cache_leaves(cpu)) { + this_leaf = this_cpu_ci->info_list + index; + if (this_leaf->level != 1) + np = of_find_next_cache_node(np); + else + np = of_node_get(np);/* cpu node itself */ + if (!np) + break; + this_leaf->of_node = np; + index++; + } + + if (index != cache_leaves(cpu)) /* not all OF nodes populated */ + return -ENOENT; + + return 0; +} #else static void cache_of_override_properties(unsigned int cpu) { } static inline int cache_setup_of_node(unsigned int cpu) { return 0; } From 2ff075c7dfd4705de12d687daede2dd664386b1c Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:57 -0500 Subject: [PATCH 08/53] drivers: base: cacheinfo: setup DT cache properties early The original intent in cacheinfo was that an architecture specific populate_cache_leaves() would probe the hardware and then cache_shared_cpu_map_setup() and cache_override_properties() would provide firmware help to extend/expand upon what was probed. Arm64 was really the only architecture that was working this way, and with the removal of most of the hardware probing logic it became clear that it was possible to simplify the logic a bit. This patch combines the walk of the DT nodes with the code updating the cache size/line_size and nr_sets. cache_override_properties() (which was DT specific) is then removed. The result is that cacheinfo.of_node is no longer used as a temporary place to hold DT references for future calls that update cache properties. That change helps to clarify its one remaining use (matching cacheinfo nodes that represent shared caches) which will be used by the ACPI/PPTT code in the following patches. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/riscv/kernel/cacheinfo.c | 1 - drivers/base/cacheinfo.c | 65 ++++++++++++++++------------------- 2 files changed, 29 insertions(+), 37 deletions(-) diff --git a/arch/riscv/kernel/cacheinfo.c b/arch/riscv/kernel/cacheinfo.c index 10ed2749e246..0bc86e5f8f3f 100644 --- a/arch/riscv/kernel/cacheinfo.c +++ b/arch/riscv/kernel/cacheinfo.c @@ -20,7 +20,6 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, struct device_node *node, enum cache_type type, unsigned int level) { - this_leaf->of_node = node; this_leaf->level = level; this_leaf->type = type; /* not a sector cache */ diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 09ccef7ddc99..a872523e8951 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -71,7 +71,7 @@ static inline int get_cacheinfo_idx(enum cache_type type) return type; } -static void cache_size(struct cacheinfo *this_leaf) +static void cache_size(struct cacheinfo *this_leaf, struct device_node *np) { const char *propname; const __be32 *cache_size; @@ -80,13 +80,14 @@ static void cache_size(struct cacheinfo *this_leaf) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].size_prop; - cache_size = of_get_property(this_leaf->of_node, propname, NULL); + cache_size = of_get_property(np, propname, NULL); if (cache_size) this_leaf->size = of_read_number(cache_size, 1); } /* not cache_line_size() because that's a macro in include/linux/cache.h */ -static void cache_get_line_size(struct cacheinfo *this_leaf) +static void cache_get_line_size(struct cacheinfo *this_leaf, + struct device_node *np) { const __be32 *line_size; int i, lim, ct_idx; @@ -98,7 +99,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf) const char *propname; propname = cache_type_info[ct_idx].line_size_props[i]; - line_size = of_get_property(this_leaf->of_node, propname, NULL); + line_size = of_get_property(np, propname, NULL); if (line_size) break; } @@ -107,7 +108,7 @@ static void cache_get_line_size(struct cacheinfo *this_leaf) this_leaf->coherency_line_size = of_read_number(line_size, 1); } -static void cache_nr_sets(struct cacheinfo *this_leaf) +static void cache_nr_sets(struct cacheinfo *this_leaf, struct device_node *np) { const char *propname; const __be32 *nr_sets; @@ -116,7 +117,7 @@ static void cache_nr_sets(struct cacheinfo *this_leaf) ct_idx = get_cacheinfo_idx(this_leaf->type); propname = cache_type_info[ct_idx].nr_sets_prop; - nr_sets = of_get_property(this_leaf->of_node, propname, NULL); + nr_sets = of_get_property(np, propname, NULL); if (nr_sets) this_leaf->number_of_sets = of_read_number(nr_sets, 1); } @@ -135,32 +136,27 @@ static void cache_associativity(struct cacheinfo *this_leaf) this_leaf->ways_of_associativity = (size / nr_sets) / line_size; } -static bool cache_node_is_unified(struct cacheinfo *this_leaf) +static bool cache_node_is_unified(struct cacheinfo *this_leaf, + struct device_node *np) { - return of_property_read_bool(this_leaf->of_node, "cache-unified"); + return of_property_read_bool(np, "cache-unified"); } -static void cache_of_override_properties(unsigned int cpu) +static void cache_of_set_props(struct cacheinfo *this_leaf, + struct device_node *np) { - int index; - struct cacheinfo *this_leaf; - struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); - - for (index = 0; index < cache_leaves(cpu); index++) { - this_leaf = this_cpu_ci->info_list + index; - /* - * init_cache_level must setup the cache level correctly - * overriding the architecturally specified levels, so - * if type is NONE at this stage, it should be unified - */ - if (this_leaf->type == CACHE_TYPE_NOCACHE && - cache_node_is_unified(this_leaf)) - this_leaf->type = CACHE_TYPE_UNIFIED; - cache_size(this_leaf); - cache_get_line_size(this_leaf); - cache_nr_sets(this_leaf); - cache_associativity(this_leaf); - } + /* + * init_cache_level must setup the cache level correctly + * overriding the architecturally specified levels, so + * if type is NONE at this stage, it should be unified + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + cache_node_is_unified(this_leaf, np)) + this_leaf->type = CACHE_TYPE_UNIFIED; + cache_size(this_leaf, np); + cache_get_line_size(this_leaf, np); + cache_nr_sets(this_leaf, np); + cache_associativity(this_leaf); } static int cache_setup_of_node(unsigned int cpu) @@ -193,6 +189,7 @@ static int cache_setup_of_node(unsigned int cpu) np = of_node_get(np);/* cpu node itself */ if (!np) break; + cache_of_set_props(this_leaf, np); this_leaf->of_node = np; index++; } @@ -203,7 +200,6 @@ static int cache_setup_of_node(unsigned int cpu) return 0; } #else -static void cache_of_override_properties(unsigned int cpu) { } static inline int cache_setup_of_node(unsigned int cpu) { return 0; } static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) @@ -286,12 +282,6 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) } } -static void cache_override_properties(unsigned int cpu) -{ - if (of_have_populated_dt()) - return cache_of_override_properties(cpu); -} - static void free_cache_attributes(unsigned int cpu) { if (!per_cpu_cacheinfo(cpu)) @@ -325,6 +315,10 @@ static int detect_cache_attributes(unsigned int cpu) if (per_cpu_cacheinfo(cpu) == NULL) return -ENOMEM; + /* + * populate_cache_leaves() may completely setup the cache leaves and + * shared_cpu_map or it may leave it partially setup. + */ ret = populate_cache_leaves(cpu); if (ret) goto free_ci; @@ -338,7 +332,6 @@ static int detect_cache_attributes(unsigned int cpu) goto free_ci; } - cache_override_properties(cpu); return 0; free_ci: From 9b97387c5c4260ffcdf3b913bdef0d98cb2d4a74 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:58 -0500 Subject: [PATCH 09/53] cacheinfo: rename of_node to fw_token Rename and change the type of of_node to indicate it is a generic pointer which is generally only used for comparison purposes. In a later patch we will put an ACPI/PPTT token pointer in fw_token so that the code which builds the shared cpu masks can be reused. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 16 +++++++++------- include/linux/cacheinfo.h | 8 +++----- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index a872523e8951..597aacb233fc 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -35,7 +35,7 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu) static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { - return sib_leaf->of_node == this_leaf->of_node; + return sib_leaf->fw_token == this_leaf->fw_token; } /* OF properties to query for a given cache type */ @@ -167,9 +167,10 @@ static int cache_setup_of_node(unsigned int cpu) struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); unsigned int index = 0; - /* skip if of_node is already populated */ - if (this_cpu_ci->info_list->of_node) + /* skip if fw_token is already populated */ + if (this_cpu_ci->info_list->fw_token) { return 0; + } if (!cpu_dev) { pr_err("No cpu device for CPU %d\n", cpu); @@ -190,7 +191,7 @@ static int cache_setup_of_node(unsigned int cpu) if (!np) break; cache_of_set_props(this_leaf, np); - this_leaf->of_node = np; + this_leaf->fw_token = np; index++; } @@ -278,7 +279,7 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } - of_node_put(this_leaf->of_node); + of_node_put(this_leaf->fw_token); } } @@ -323,8 +324,9 @@ static int detect_cache_attributes(unsigned int cpu) if (ret) goto free_ci; /* - * For systems using DT for cache hierarchy, of_node and shared_cpu_map - * will be set up here only if they are not populated already + * For systems using DT for cache hierarchy, fw_token + * and shared_cpu_map will be set up here only if they are + * not populated already */ ret = cache_shared_cpu_map_setup(cpu); if (ret) { diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 3d9805297cda..0c6f658054d2 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -34,9 +34,8 @@ enum cache_type { * @shared_cpu_map: logical cpumask representing all the cpus sharing * this cache node * @attributes: bitfield representing various cache attributes - * @of_node: if devicetree is used, this represents either the cpu node in - * case there's no explicit cache node or the cache node itself in the - * device tree + * @fw_token: Unique value used to determine if different cacheinfo + * structures represent a single hardware cache instance. * @disable_sysfs: indicates whether this node is visible to the user via * sysfs or not * @priv: pointer to any private data structure specific to particular @@ -65,8 +64,7 @@ struct cacheinfo { #define CACHE_ALLOCATE_POLICY_MASK \ (CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE) #define CACHE_ID BIT(4) - - struct device_node *of_node; + void *fw_token; bool disable_sysfs; void *priv; }; From 30d87bfacbee396646975a00959764a7c49510ec Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:57:59 -0500 Subject: [PATCH 10/53] arm64/acpi: Create arch specific cpu to acpi id helper Its helpful to be able to lookup the acpi_processor_id associated with a logical cpu. Provide an arm64 helper to do this. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Ard Biesheuvel Acked-by: Sudeep Holla Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/acpi.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/arm64/include/asm/acpi.h b/arch/arm64/include/asm/acpi.h index 32f465a80e4e..0db62a4cbce2 100644 --- a/arch/arm64/include/asm/acpi.h +++ b/arch/arm64/include/asm/acpi.h @@ -86,6 +86,10 @@ static inline bool acpi_has_cpu_in_madt(void) } struct acpi_madt_generic_interrupt *acpi_cpu_get_madt_gicc(int cpu); +static inline u32 get_acpi_id_for_cpu(unsigned int cpu) +{ + return acpi_cpu_get_madt_gicc(cpu)->uid; +} static inline void arch_fix_phys_package_id(int num, u32 slot) { } void __init acpi_init_cpus(void); From 2bd00bcd73e5edd5769e2a5f24c59a517582d862 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:00 -0500 Subject: [PATCH 11/53] ACPI/PPTT: Add Processor Properties Topology Table parsing ACPI 6.2 adds a new table, which describes how processing units are related to each other in tree like fashion. Caches are also sprinkled throughout the tree and describe the properties of the caches in relation to other caches and processing units. Add the code to parse the cache hierarchy and report the total number of levels of cache for a given core using acpi_find_last_cache_level() as well as fill out the individual cores cache information with cache_setup_acpi() once the cpu_cacheinfo structure has been populated by the arch specific code. An additional patch later in the set adds the ability to report peers in the topology using find_acpi_cpu_topology() to report a unique ID for each processing unit at a given level in the tree. These unique id's can then be used to match related processing units which exist as threads, within a given package, etc. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Rafael J. Wysocki Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/acpi/pptt.c | 655 +++++++++++++++++++++++++++++++++++++++++++ include/linux/acpi.h | 4 + 2 files changed, 659 insertions(+) create mode 100644 drivers/acpi/pptt.c diff --git a/drivers/acpi/pptt.c b/drivers/acpi/pptt.c new file mode 100644 index 000000000000..e5ea1974d1e3 --- /dev/null +++ b/drivers/acpi/pptt.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * pptt.c - parsing of Processor Properties Topology Table (PPTT) + * + * Copyright (C) 2018, ARM + * + * This file implements parsing of the Processor Properties Topology Table + * which is optionally used to describe the processor and cache topology. + * Due to the relative pointers used throughout the table, this doesn't + * leverage the existing subtable parsing in the kernel. + * + * The PPTT structure is an inverted tree, with each node potentially + * holding one or two inverted tree data structures describing + * the caches available at that level. Each cache structure optionally + * contains properties describing the cache at a given level which can be + * used to override hardware probed values. + */ +#define pr_fmt(fmt) "ACPI PPTT: " fmt + +#include +#include +#include + +static struct acpi_subtable_header *fetch_pptt_subtable(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + struct acpi_subtable_header *entry; + + /* there isn't a subtable at reference 0 */ + if (pptt_ref < sizeof(struct acpi_subtable_header)) + return NULL; + + if (pptt_ref + sizeof(struct acpi_subtable_header) > table_hdr->length) + return NULL; + + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, pptt_ref); + + if (entry->length == 0) + return NULL; + + if (pptt_ref + entry->length > table_hdr->length) + return NULL; + + return entry; +} + +static struct acpi_pptt_processor *fetch_pptt_node(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + return (struct acpi_pptt_processor *)fetch_pptt_subtable(table_hdr, pptt_ref); +} + +static struct acpi_pptt_cache *fetch_pptt_cache(struct acpi_table_header *table_hdr, + u32 pptt_ref) +{ + return (struct acpi_pptt_cache *)fetch_pptt_subtable(table_hdr, pptt_ref); +} + +static struct acpi_subtable_header *acpi_get_pptt_resource(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *node, + int resource) +{ + u32 *ref; + + if (resource >= node->number_of_priv_resources) + return NULL; + + ref = ACPI_ADD_PTR(u32, node, sizeof(struct acpi_pptt_processor)); + ref += resource; + + return fetch_pptt_subtable(table_hdr, *ref); +} + +static inline bool acpi_pptt_match_type(int table_type, int type) +{ + return ((table_type & ACPI_PPTT_MASK_CACHE_TYPE) == type || + table_type & ACPI_PPTT_CACHE_TYPE_UNIFIED & type); +} + +/** + * acpi_pptt_walk_cache() - Attempt to find the requested acpi_pptt_cache + * @table_hdr: Pointer to the head of the PPTT table + * @local_level: passed res reflects this cache level + * @res: cache resource in the PPTT we want to walk + * @found: returns a pointer to the requested level if found + * @level: the requested cache level + * @type: the requested cache type + * + * Attempt to find a given cache level, while counting the max number + * of cache levels for the cache node. + * + * Given a pptt resource, verify that it is a cache node, then walk + * down each level of caches, counting how many levels are found + * as well as checking the cache type (icache, dcache, unified). If a + * level & type match, then we set found, and continue the search. + * Once the entire cache branch has been walked return its max + * depth. + * + * Return: The cache structure and the level we terminated with. + */ +static int acpi_pptt_walk_cache(struct acpi_table_header *table_hdr, + int local_level, + struct acpi_subtable_header *res, + struct acpi_pptt_cache **found, + int level, int type) +{ + struct acpi_pptt_cache *cache; + + if (res->type != ACPI_PPTT_TYPE_CACHE) + return 0; + + cache = (struct acpi_pptt_cache *) res; + while (cache) { + local_level++; + + if (local_level == level && + cache->flags & ACPI_PPTT_CACHE_TYPE_VALID && + acpi_pptt_match_type(cache->attributes, type)) { + if (*found != NULL && cache != *found) + pr_warn("Found duplicate cache level/type unable to determine uniqueness\n"); + + pr_debug("Found cache @ level %d\n", level); + *found = cache; + /* + * continue looking at this node's resource list + * to verify that we don't find a duplicate + * cache node. + */ + } + cache = fetch_pptt_cache(table_hdr, cache->next_level_of_cache); + } + return local_level; +} + +static struct acpi_pptt_cache *acpi_find_cache_level(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node, + int *starting_level, int level, + int type) +{ + struct acpi_subtable_header *res; + int number_of_levels = *starting_level; + int resource = 0; + struct acpi_pptt_cache *ret = NULL; + int local_level; + + /* walk down from processor node */ + while ((res = acpi_get_pptt_resource(table_hdr, cpu_node, resource))) { + resource++; + + local_level = acpi_pptt_walk_cache(table_hdr, *starting_level, + res, &ret, level, type); + /* + * we are looking for the max depth. Since its potentially + * possible for a given node to have resources with differing + * depths verify that the depth we have found is the largest. + */ + if (number_of_levels < local_level) + number_of_levels = local_level; + } + if (number_of_levels > *starting_level) + *starting_level = number_of_levels; + + return ret; +} + +/** + * acpi_count_levels() - Given a PPTT table, and a cpu node, count the caches + * @table_hdr: Pointer to the head of the PPTT table + * @cpu_node: processor node we wish to count caches for + * + * Given a processor node containing a processing unit, walk into it and count + * how many levels exist solely for it, and then walk up each level until we hit + * the root node (ignore the package level because it may be possible to have + * caches that exist across packages). Count the number of cache levels that + * exist at each level on the way up. + * + * Return: Total number of levels found. + */ +static int acpi_count_levels(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu_node) +{ + int total_levels = 0; + + do { + acpi_find_cache_level(table_hdr, cpu_node, &total_levels, 0, 0); + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } while (cpu_node); + + return total_levels; +} + +/** + * acpi_pptt_leaf_node() - Given a processor node, determine if its a leaf + * @table_hdr: Pointer to the head of the PPTT table + * @node: passed node is checked to see if its a leaf + * + * Determine if the *node parameter is a leaf node by iterating the + * PPTT table, looking for nodes which reference it. + * + * Return: 0 if we find a node referencing the passed node (or table error), + * or 1 if we don't. + */ +static int acpi_pptt_leaf_node(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *node) +{ + struct acpi_subtable_header *entry; + unsigned long table_end; + u32 node_entry; + struct acpi_pptt_processor *cpu_node; + u32 proc_sz; + + table_end = (unsigned long)table_hdr + table_hdr->length; + node_entry = ACPI_PTR_DIFF(node, table_hdr); + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, + sizeof(struct acpi_table_pptt)); + proc_sz = sizeof(struct acpi_pptt_processor *); + + while ((unsigned long)entry + proc_sz < table_end) { + cpu_node = (struct acpi_pptt_processor *)entry; + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && + cpu_node->parent == node_entry) + return 0; + if (entry->length == 0) + return 0; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, + entry->length); + + } + return 1; +} + +/** + * acpi_find_processor_node() - Given a PPTT table find the requested processor + * @table_hdr: Pointer to the head of the PPTT table + * @acpi_cpu_id: cpu we are searching for + * + * Find the subtable entry describing the provided processor. + * This is done by iterating the PPTT table looking for processor nodes + * which have an acpi_processor_id that matches the acpi_cpu_id parameter + * passed into the function. If we find a node that matches this criteria + * we verify that its a leaf node in the topology rather than depending + * on the valid flag, which doesn't need to be set for leaf nodes. + * + * Return: NULL, or the processors acpi_pptt_processor* + */ +static struct acpi_pptt_processor *acpi_find_processor_node(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id) +{ + struct acpi_subtable_header *entry; + unsigned long table_end; + struct acpi_pptt_processor *cpu_node; + u32 proc_sz; + + table_end = (unsigned long)table_hdr + table_hdr->length; + entry = ACPI_ADD_PTR(struct acpi_subtable_header, table_hdr, + sizeof(struct acpi_table_pptt)); + proc_sz = sizeof(struct acpi_pptt_processor *); + + /* find the processor structure associated with this cpuid */ + while ((unsigned long)entry + proc_sz < table_end) { + cpu_node = (struct acpi_pptt_processor *)entry; + + if (entry->length == 0) { + pr_warn("Invalid zero length subtable\n"); + break; + } + if (entry->type == ACPI_PPTT_TYPE_PROCESSOR && + acpi_cpu_id == cpu_node->acpi_processor_id && + acpi_pptt_leaf_node(table_hdr, cpu_node)) { + return (struct acpi_pptt_processor *)entry; + } + + entry = ACPI_ADD_PTR(struct acpi_subtable_header, entry, + entry->length); + } + + return NULL; +} + +static int acpi_find_cache_levels(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id) +{ + int number_of_levels = 0; + struct acpi_pptt_processor *cpu; + + cpu = acpi_find_processor_node(table_hdr, acpi_cpu_id); + if (cpu) + number_of_levels = acpi_count_levels(table_hdr, cpu); + + return number_of_levels; +} + +static u8 acpi_cache_type(enum cache_type type) +{ + switch (type) { + case CACHE_TYPE_DATA: + pr_debug("Looking for data cache\n"); + return ACPI_PPTT_CACHE_TYPE_DATA; + case CACHE_TYPE_INST: + pr_debug("Looking for instruction cache\n"); + return ACPI_PPTT_CACHE_TYPE_INSTR; + default: + case CACHE_TYPE_UNIFIED: + pr_debug("Looking for unified cache\n"); + /* + * It is important that ACPI_PPTT_CACHE_TYPE_UNIFIED + * contains the bit pattern that will match both + * ACPI unified bit patterns because we use it later + * to match both cases. + */ + return ACPI_PPTT_CACHE_TYPE_UNIFIED; + } +} + +static struct acpi_pptt_cache *acpi_find_cache_node(struct acpi_table_header *table_hdr, + u32 acpi_cpu_id, + enum cache_type type, + unsigned int level, + struct acpi_pptt_processor **node) +{ + int total_levels = 0; + struct acpi_pptt_cache *found = NULL; + struct acpi_pptt_processor *cpu_node; + u8 acpi_type = acpi_cache_type(type); + + pr_debug("Looking for CPU %d's level %d cache type %d\n", + acpi_cpu_id, level, acpi_type); + + cpu_node = acpi_find_processor_node(table_hdr, acpi_cpu_id); + + while (cpu_node && !found) { + found = acpi_find_cache_level(table_hdr, cpu_node, + &total_levels, level, acpi_type); + *node = cpu_node; + cpu_node = fetch_pptt_node(table_hdr, cpu_node->parent); + } + + return found; +} + +/* total number of attributes checked by the properties code */ +#define PPTT_CHECKED_ATTRIBUTES 4 + +/** + * update_cache_properties() - Update cacheinfo for the given processor + * @this_leaf: Kernel cache info structure being updated + * @found_cache: The PPTT node describing this cache instance + * @cpu_node: A unique reference to describe this cache instance + * + * The ACPI spec implies that the fields in the cache structures are used to + * extend and correct the information probed from the hardware. Lets only + * set fields that we determine are VALID. + * + * Return: nothing. Side effect of updating the global cacheinfo + */ +static void update_cache_properties(struct cacheinfo *this_leaf, + struct acpi_pptt_cache *found_cache, + struct acpi_pptt_processor *cpu_node) +{ + int valid_flags = 0; + + this_leaf->fw_token = cpu_node; + if (found_cache->flags & ACPI_PPTT_SIZE_PROPERTY_VALID) { + this_leaf->size = found_cache->size; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_LINE_SIZE_VALID) { + this_leaf->coherency_line_size = found_cache->line_size; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_NUMBER_OF_SETS_VALID) { + this_leaf->number_of_sets = found_cache->number_of_sets; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_ASSOCIATIVITY_VALID) { + this_leaf->ways_of_associativity = found_cache->associativity; + valid_flags++; + } + if (found_cache->flags & ACPI_PPTT_WRITE_POLICY_VALID) { + switch (found_cache->attributes & ACPI_PPTT_MASK_WRITE_POLICY) { + case ACPI_PPTT_CACHE_POLICY_WT: + this_leaf->attributes = CACHE_WRITE_THROUGH; + break; + case ACPI_PPTT_CACHE_POLICY_WB: + this_leaf->attributes = CACHE_WRITE_BACK; + break; + } + } + if (found_cache->flags & ACPI_PPTT_ALLOCATION_TYPE_VALID) { + switch (found_cache->attributes & ACPI_PPTT_MASK_ALLOCATION_TYPE) { + case ACPI_PPTT_CACHE_READ_ALLOCATE: + this_leaf->attributes |= CACHE_READ_ALLOCATE; + break; + case ACPI_PPTT_CACHE_WRITE_ALLOCATE: + this_leaf->attributes |= CACHE_WRITE_ALLOCATE; + break; + case ACPI_PPTT_CACHE_RW_ALLOCATE: + case ACPI_PPTT_CACHE_RW_ALLOCATE_ALT: + this_leaf->attributes |= + CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE; + break; + } + } + /* + * If the above flags are valid, and the cache type is NOCACHE + * update the cache type as well. + */ + if (this_leaf->type == CACHE_TYPE_NOCACHE && + valid_flags == PPTT_CHECKED_ATTRIBUTES) + this_leaf->type = CACHE_TYPE_UNIFIED; +} + +static void cache_setup_acpi_cpu(struct acpi_table_header *table, + unsigned int cpu) +{ + struct acpi_pptt_cache *found_cache; + struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct cacheinfo *this_leaf; + unsigned int index = 0; + struct acpi_pptt_processor *cpu_node = NULL; + + while (index < get_cpu_cacheinfo(cpu)->num_leaves) { + this_leaf = this_cpu_ci->info_list + index; + found_cache = acpi_find_cache_node(table, acpi_cpu_id, + this_leaf->type, + this_leaf->level, + &cpu_node); + pr_debug("found = %p %p\n", found_cache, cpu_node); + if (found_cache) + update_cache_properties(this_leaf, + found_cache, + cpu_node); + + index++; + } +} + +/* Passing level values greater than this will result in search termination */ +#define PPTT_ABORT_PACKAGE 0xFF + +static struct acpi_pptt_processor *acpi_find_processor_package_id(struct acpi_table_header *table_hdr, + struct acpi_pptt_processor *cpu, + int level, int flag) +{ + struct acpi_pptt_processor *prev_node; + + while (cpu && level) { + if (cpu->flags & flag) + break; + pr_debug("level %d\n", level); + prev_node = fetch_pptt_node(table_hdr, cpu->parent); + if (prev_node == NULL) + break; + cpu = prev_node; + level--; + } + return cpu; +} + +/** + * topology_get_acpi_cpu_tag() - Find a unique topology value for a feature + * @table: Pointer to the head of the PPTT table + * @cpu: Kernel logical cpu number + * @level: A level that terminates the search + * @flag: A flag which terminates the search + * + * Get a unique value given a cpu, and a topology level, that can be + * matched to determine which cpus share common topological features + * at that level. + * + * Return: Unique value, or -ENOENT if unable to locate cpu + */ +static int topology_get_acpi_cpu_tag(struct acpi_table_header *table, + unsigned int cpu, int level, int flag) +{ + struct acpi_pptt_processor *cpu_node; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + + cpu_node = acpi_find_processor_node(table, acpi_cpu_id); + if (cpu_node) { + cpu_node = acpi_find_processor_package_id(table, cpu_node, + level, flag); + /* Only the first level has a guaranteed id */ + if (level == 0) + return cpu_node->acpi_processor_id; + return ACPI_PTR_DIFF(cpu_node, table); + } + pr_warn_once("PPTT table found, but unable to locate core %d (%d)\n", + cpu, acpi_cpu_id); + return -ENOENT; +} + +static int find_acpi_cpu_topology_tag(unsigned int cpu, int level, int flag) +{ + struct acpi_table_header *table; + acpi_status status; + int retval; + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cpu topology may be inaccurate\n"); + return -ENOENT; + } + retval = topology_get_acpi_cpu_tag(table, cpu, level, flag); + pr_debug("Topology Setup ACPI cpu %d, level %d ret = %d\n", + cpu, level, retval); + acpi_put_table(table); + + return retval; +} + +/** + * acpi_find_last_cache_level() - Determines the number of cache levels for a PE + * @cpu: Kernel logical cpu number + * + * Given a logical cpu number, returns the number of levels of cache represented + * in the PPTT. Errors caused by lack of a PPTT table, or otherwise, return 0 + * indicating we didn't find any cache levels. + * + * Return: Cache levels visible to this core. + */ +int acpi_find_last_cache_level(unsigned int cpu) +{ + u32 acpi_cpu_id; + struct acpi_table_header *table; + int number_of_levels = 0; + acpi_status status; + + pr_debug("Cache Setup find last level cpu=%d\n", cpu); + + acpi_cpu_id = get_acpi_id_for_cpu(cpu); + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cache topology may be inaccurate\n"); + } else { + number_of_levels = acpi_find_cache_levels(table, acpi_cpu_id); + acpi_put_table(table); + } + pr_debug("Cache Setup find last level level=%d\n", number_of_levels); + + return number_of_levels; +} + +/** + * cache_setup_acpi() - Override CPU cache topology with data from the PPTT + * @cpu: Kernel logical cpu number + * + * Updates the global cache info provided by cpu_get_cacheinfo() + * when there are valid properties in the acpi_pptt_cache nodes. A + * successful parse may not result in any updates if none of the + * cache levels have any valid flags set. Futher, a unique value is + * associated with each known CPU cache entry. This unique value + * can be used to determine whether caches are shared between cpus. + * + * Return: -ENOENT on failure to find table, or 0 on success + */ +int cache_setup_acpi(unsigned int cpu) +{ + struct acpi_table_header *table; + acpi_status status; + + pr_debug("Cache Setup ACPI cpu %d\n", cpu); + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, cache topology may be inaccurate\n"); + return -ENOENT; + } + + cache_setup_acpi_cpu(table, cpu); + acpi_put_table(table); + + return status; +} + +/** + * find_acpi_cpu_topology() - Determine a unique topology value for a given cpu + * @cpu: Kernel logical cpu number + * @level: The topological level for which we would like a unique ID + * + * Determine a topology unique ID for each thread/core/cluster/mc_grouping + * /socket/etc. This ID can then be used to group peers, which will have + * matching ids. + * + * The search terminates when either the requested level is found or + * we reach a root node. Levels beyond the termination point will return the + * same unique ID. The unique id for level 0 is the acpi processor id. All + * other levels beyond this use a generated value to uniquely identify + * a topological feature. + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents a unique topological feature. + */ +int find_acpi_cpu_topology(unsigned int cpu, int level) +{ + return find_acpi_cpu_topology_tag(cpu, level, 0); +} + +/** + * find_acpi_cpu_cache_topology() - Determine a unique cache topology value + * @cpu: Kernel logical cpu number + * @level: The cache level for which we would like a unique ID + * + * Determine a unique ID for each unified cache in the system + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents a unique topological feature. + */ +int find_acpi_cpu_cache_topology(unsigned int cpu, int level) +{ + struct acpi_table_header *table; + struct acpi_pptt_cache *found_cache; + acpi_status status; + u32 acpi_cpu_id = get_acpi_id_for_cpu(cpu); + struct acpi_pptt_processor *cpu_node = NULL; + int ret = -1; + + status = acpi_get_table(ACPI_SIG_PPTT, 0, &table); + if (ACPI_FAILURE(status)) { + pr_warn_once("No PPTT table found, topology may be inaccurate\n"); + return -ENOENT; + } + + found_cache = acpi_find_cache_node(table, acpi_cpu_id, + CACHE_TYPE_UNIFIED, + level, + &cpu_node); + if (found_cache) + ret = ACPI_PTR_DIFF(cpu_node, table); + + acpi_put_table(table); + + return ret; +} + + +/** + * find_acpi_cpu_topology_package() - Determine a unique cpu package value + * @cpu: Kernel logical cpu number + * + * Determine a topology unique package ID for the given cpu. + * This ID can then be used to group peers, which will have matching ids. + * + * The search terminates when either a level is found with the PHYSICAL_PACKAGE + * flag set or we reach a root node. + * + * Return: -ENOENT if the PPTT doesn't exist, or the cpu cannot be found. + * Otherwise returns a value which represents the package for this cpu. + */ +int find_acpi_cpu_topology_package(unsigned int cpu) +{ + return find_acpi_cpu_topology_tag(cpu, PPTT_ABORT_PACKAGE, + ACPI_PPTT_PHYSICAL_PACKAGE); +} diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 15bfb15c2fa5..032e12a2fdc2 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1297,4 +1297,8 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +int find_acpi_cpu_topology(unsigned int cpu, int level); +int find_acpi_cpu_topology_package(unsigned int cpu); +int find_acpi_cpu_cache_topology(unsigned int cpu, int level); + #endif /*_LINUX_ACPI_H*/ From 0ce82232232a2f76128e9bfcc6e8b662e110a671 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:01 -0500 Subject: [PATCH 12/53] ACPI: Enable PPTT support on ARM64 Now that we have a PPTT parser, in preparation for its use on arm64, lets build it. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Reviewed-by: Sudeep Holla Acked-by: Ard Biesheuvel Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + drivers/acpi/Kconfig | 3 +++ drivers/acpi/Makefile | 1 + 3 files changed, 5 insertions(+) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 9c850f3b398f..4d98774cf3c7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -7,6 +7,7 @@ config ARM64 select ACPI_REDUCED_HARDWARE_ONLY if ACPI select ACPI_MCFG if ACPI select ACPI_SPCR_TABLE if ACPI + select ACPI_PPTT if ACPI select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED diff --git a/drivers/acpi/Kconfig b/drivers/acpi/Kconfig index 516d7b36d6fb..b533eeb6139d 100644 --- a/drivers/acpi/Kconfig +++ b/drivers/acpi/Kconfig @@ -547,6 +547,9 @@ config ACPI_CONFIGFS if ARM64 source "drivers/acpi/arm64/Kconfig" + +config ACPI_PPTT + bool endif config TPS68470_PMIC_OPREGION diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index 48e202752754..6d59aa109a91 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -88,6 +88,7 @@ obj-$(CONFIG_ACPI_BGRT) += bgrt.o obj-$(CONFIG_ACPI_CPPC_LIB) += cppc_acpi.o obj-$(CONFIG_ACPI_SPCR_TABLE) += spcr.o obj-$(CONFIG_ACPI_DEBUGGER_USER) += acpi_dbg.o +obj-$(CONFIG_ACPI_PPTT) += pptt.o # processor has its own "processor." module_param namespace processor-y := processor_driver.o From 582b468bdc6d9c287a432a63225cf7922e985e15 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:02 -0500 Subject: [PATCH 13/53] drivers: base cacheinfo: Add support for ACPI based firmware tables Call ACPI cache parsing routines from base cacheinfo code if ACPI is enabled. Also stub out cache_setup_acpi and acpi_find_last_cache_level so that individual architectures can enable ACPI topology parsing. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Greg Kroah-Hartman Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/base/cacheinfo.c | 14 ++++++++++---- include/linux/cacheinfo.h | 17 +++++++++++++++++ 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c index 597aacb233fc..2880e2ab01f5 100644 --- a/drivers/base/cacheinfo.c +++ b/drivers/base/cacheinfo.c @@ -206,7 +206,7 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, struct cacheinfo *sib_leaf) { /* - * For non-DT systems, assume unique level 1 cache, system-wide + * For non-DT/ACPI systems, assume unique level 1 caches, system-wide * shared caches for all other levels. This will be used only if * arch specific code has not populated shared_cpu_map */ @@ -214,6 +214,11 @@ static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf, } #endif +int __weak cache_setup_acpi(unsigned int cpu) +{ + return -ENOTSUPP; +} + static int cache_shared_cpu_map_setup(unsigned int cpu) { struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); @@ -227,8 +232,8 @@ static int cache_shared_cpu_map_setup(unsigned int cpu) if (of_have_populated_dt()) ret = cache_setup_of_node(cpu); else if (!acpi_disabled) - /* No cache property/hierarchy support yet in ACPI */ - ret = -ENOTSUPP; + ret = cache_setup_acpi(cpu); + if (ret) return ret; @@ -279,7 +284,8 @@ static void cache_shared_cpu_map_remove(unsigned int cpu) cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map); cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map); } - of_node_put(this_leaf->fw_token); + if (of_have_populated_dt()) + of_node_put(this_leaf->fw_token); } } diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 0c6f658054d2..89397e30e269 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -97,6 +97,23 @@ int func(unsigned int cpu) \ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); +int cache_setup_acpi(unsigned int cpu); +#ifndef CONFIG_ACPI +/* + * acpi_find_last_cache_level is only called on ACPI enabled + * platforms using the PPTT for topology. This means that if + * the platform supports other firmware configuration methods + * we need to stub out the call when ACPI is disabled. + * ACPI enabled platforms not using PPTT won't be making calls + * to this function so we need not worry about them. + */ +static inline int acpi_find_last_cache_level(unsigned int cpu) +{ + return 0; +} +#else +int acpi_find_last_cache_level(unsigned int cpu); +#endif const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf); From 8571890e1513bc6768495b6541fb8064e046a61c Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:03 -0500 Subject: [PATCH 14/53] arm64: Add support for ACPI based firmware tables The /sys cache entries should support ACPI/PPTT generated cache topology information. For arm64, if ACPI is enabled, determine the max number of cache levels and populate them using the PPTT table if one is available. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Reviewed-by: Sudeep Holla Acked-by: Ard Biesheuvel Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cacheinfo.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/arm64/kernel/cacheinfo.c b/arch/arm64/kernel/cacheinfo.c index 380f2e2fbed5..0bf0a835122f 100644 --- a/arch/arm64/kernel/cacheinfo.c +++ b/arch/arm64/kernel/cacheinfo.c @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include @@ -46,7 +47,7 @@ static void ci_leaf_init(struct cacheinfo *this_leaf, static int __init_cache_level(unsigned int cpu) { - unsigned int ctype, level, leaves, of_level; + unsigned int ctype, level, leaves, fw_level; struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu); for (level = 1, leaves = 0; level <= MAX_CACHE_LEVEL; level++) { @@ -59,15 +60,19 @@ static int __init_cache_level(unsigned int cpu) leaves += (ctype == CACHE_TYPE_SEPARATE) ? 2 : 1; } - of_level = of_find_last_cache_level(cpu); - if (level < of_level) { + if (acpi_disabled) + fw_level = of_find_last_cache_level(cpu); + else + fw_level = acpi_find_last_cache_level(cpu); + + if (level < fw_level) { /* * some external caches not specified in CLIDR_EL1 * the information may be available in the device tree * only unified external caches are considered here */ - leaves += (of_level - level); - level = of_level; + leaves += (fw_level - level); + level = fw_level; } this_cpu_ci->num_levels = level; From 868abc07680c2c8b7f85ae883f9f1b90bf4ef4bf Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:04 -0500 Subject: [PATCH 15/53] arm64: topology: rename cluster_id The cluster concept isn't architecturally defined for arm64. Lets match the name of the arm64 topology field to the kernel macro that uses it. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Morten Rasmussen Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/topology.h | 4 ++-- arch/arm64/kernel/topology.c | 26 +++++++++++++------------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index c4f2d50491eb..6b10459e6905 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -7,14 +7,14 @@ struct cpu_topology { int thread_id; int core_id; - int cluster_id; + int package_id; cpumask_t thread_sibling; cpumask_t core_sibling; }; extern struct cpu_topology cpu_topology[NR_CPUS]; -#define topology_physical_package_id(cpu) (cpu_topology[cpu].cluster_id) +#define topology_physical_package_id(cpu) (cpu_topology[cpu].package_id) #define topology_core_id(cpu) (cpu_topology[cpu].core_id) #define topology_core_cpumask(cpu) (&cpu_topology[cpu].core_sibling) #define topology_sibling_cpumask(cpu) (&cpu_topology[cpu].thread_sibling) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 21868530018e..dc18b1e53194 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -47,7 +47,7 @@ static int __init get_cpu_for_node(struct device_node *node) return cpu; } -static int __init parse_core(struct device_node *core, int cluster_id, +static int __init parse_core(struct device_node *core, int package_id, int core_id) { char name[10]; @@ -63,7 +63,7 @@ static int __init parse_core(struct device_node *core, int cluster_id, leaf = false; cpu = get_cpu_for_node(t); if (cpu >= 0) { - cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].package_id = package_id; cpu_topology[cpu].core_id = core_id; cpu_topology[cpu].thread_id = i; } else { @@ -85,7 +85,7 @@ static int __init parse_core(struct device_node *core, int cluster_id, return -EINVAL; } - cpu_topology[cpu].cluster_id = cluster_id; + cpu_topology[cpu].package_id = package_id; cpu_topology[cpu].core_id = core_id; } else if (leaf) { pr_err("%pOF: Can't get CPU for leaf core\n", core); @@ -101,7 +101,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) bool leaf = true; bool has_cores = false; struct device_node *c; - static int cluster_id __initdata; + static int package_id __initdata; int core_id = 0; int i, ret; @@ -140,7 +140,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) } if (leaf) { - ret = parse_core(c, cluster_id, core_id++); + ret = parse_core(c, package_id, core_id++); } else { pr_err("%pOF: Non-leaf cluster with core %s\n", cluster, name); @@ -158,7 +158,7 @@ static int __init parse_cluster(struct device_node *cluster, int depth) pr_warn("%pOF: empty cluster\n", cluster); if (leaf) - cluster_id++; + package_id++; return 0; } @@ -194,7 +194,7 @@ static int __init parse_dt_topology(void) * only mark cores described in the DT as possible. */ for_each_possible_cpu(cpu) - if (cpu_topology[cpu].cluster_id == -1) + if (cpu_topology[cpu].package_id == -1) ret = -EINVAL; out_map: @@ -224,7 +224,7 @@ static void update_siblings_masks(unsigned int cpuid) for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpuid_topo->cluster_id != cpu_topo->cluster_id) + if (cpuid_topo->package_id != cpu_topo->package_id) continue; cpumask_set_cpu(cpuid, &cpu_topo->core_sibling); @@ -245,7 +245,7 @@ void store_cpu_topology(unsigned int cpuid) struct cpu_topology *cpuid_topo = &cpu_topology[cpuid]; u64 mpidr; - if (cpuid_topo->cluster_id != -1) + if (cpuid_topo->package_id != -1) goto topology_populated; mpidr = read_cpuid_mpidr(); @@ -259,19 +259,19 @@ void store_cpu_topology(unsigned int cpuid) /* Multiprocessor system : Multi-threads per core */ cpuid_topo->thread_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 1); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | + cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 2) | MPIDR_AFFINITY_LEVEL(mpidr, 3) << 8; } else { /* Multiprocessor system : Single-thread per core */ cpuid_topo->thread_id = -1; cpuid_topo->core_id = MPIDR_AFFINITY_LEVEL(mpidr, 0); - cpuid_topo->cluster_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | + cpuid_topo->package_id = MPIDR_AFFINITY_LEVEL(mpidr, 1) | MPIDR_AFFINITY_LEVEL(mpidr, 2) << 8 | MPIDR_AFFINITY_LEVEL(mpidr, 3) << 16; } pr_debug("CPU%u: cluster %d core %d thread %d mpidr %#016llx\n", - cpuid, cpuid_topo->cluster_id, cpuid_topo->core_id, + cpuid, cpuid_topo->package_id, cpuid_topo->core_id, cpuid_topo->thread_id, mpidr); topology_populated: @@ -287,7 +287,7 @@ static void __init reset_cpu_topology(void) cpu_topo->thread_id = -1; cpu_topo->core_id = 0; - cpu_topo->cluster_id = -1; + cpu_topo->package_id = -1; cpumask_clear(&cpu_topo->core_sibling); cpumask_set_cpu(cpu, &cpu_topo->core_sibling); From 2f0a5d107e1fd6250a3a0469892a6c47a8ca933b Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:05 -0500 Subject: [PATCH 16/53] arm64: topology: enable ACPI/PPTT based CPU topology Propagate the topology information from the PPTT tree to the cpu_topology array. We can get the thread id and core_id by assuming certain levels of the PPTT tree correspond to those concepts. The package_id is flagged in the tree and can be found by calling find_acpi_cpu_topology_package() which terminates its search when it finds an ACPI node flagged as the physical package. If the tree doesn't contain enough levels to represent all of the requested levels then the root node will be returned for all subsequent levels. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Morten Rasmussen Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/kernel/topology.c | 45 +++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index dc18b1e53194..047d98e68502 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -11,6 +11,7 @@ * for more details. */ +#include #include #include #include @@ -22,6 +23,7 @@ #include #include #include +#include #include #include @@ -296,6 +298,45 @@ static void __init reset_cpu_topology(void) } } +#ifdef CONFIG_ACPI +/* + * Propagate the topology information of the processor_topology_node tree to the + * cpu_topology array. + */ +static int __init parse_acpi_topology(void) +{ + bool is_threaded; + int cpu, topology_id; + + is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; + + for_each_possible_cpu(cpu) { + topology_id = find_acpi_cpu_topology(cpu, 0); + if (topology_id < 0) + return topology_id; + + if (is_threaded) { + cpu_topology[cpu].thread_id = topology_id; + topology_id = find_acpi_cpu_topology(cpu, 1); + cpu_topology[cpu].core_id = topology_id; + } else { + cpu_topology[cpu].thread_id = -1; + cpu_topology[cpu].core_id = topology_id; + } + topology_id = find_acpi_cpu_topology_package(cpu); + cpu_topology[cpu].package_id = topology_id; + } + + return 0; +} + +#else +static inline int __init parse_acpi_topology(void) +{ + return -EINVAL; +} +#endif + void __init init_cpu_topology(void) { reset_cpu_topology(); @@ -304,6 +345,8 @@ void __init init_cpu_topology(void) * Discard anything that was parsed if we hit an error so we * don't use partial information. */ - if (of_have_populated_dt() && parse_dt_topology()) + if (!acpi_disabled && parse_acpi_topology()) + reset_cpu_topology(); + else if (of_have_populated_dt() && parse_dt_topology()) reset_cpu_topology(); } From bce1a65172d1172a8ec26c8251b9a4a97a3cae23 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:06 -0500 Subject: [PATCH 17/53] ACPI: Add PPTT to injectable table list Add ACPI_SIG_PPTT to the table so initrd's can override the system topology. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Ard Biesheuvel Acked-by: Rafael J. Wysocki Signed-off-by: Geoffrey Blake Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- drivers/acpi/tables.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c index 849c4fb19b03..30d93bf7c6a2 100644 --- a/drivers/acpi/tables.c +++ b/drivers/acpi/tables.c @@ -457,7 +457,7 @@ static const char * const table_sigs[] = { ACPI_SIG_UEFI, ACPI_SIG_WAET, ACPI_SIG_WDAT, ACPI_SIG_WDDT, ACPI_SIG_WDRT, ACPI_SIG_DSDT, ACPI_SIG_FADT, ACPI_SIG_PSDT, ACPI_SIG_RSDT, ACPI_SIG_XSDT, ACPI_SIG_SSDT, ACPI_SIG_IORT, - ACPI_SIG_NFIT, ACPI_SIG_HMAT, NULL }; + ACPI_SIG_NFIT, ACPI_SIG_HMAT, ACPI_SIG_PPTT, NULL }; #define ACPI_HEADER_SIZE sizeof(struct acpi_table_header) From 37c3ec2d810f87eac73822f76b30391a83bded19 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Fri, 11 May 2018 18:58:07 -0500 Subject: [PATCH 18/53] arm64: topology: divorce MC scheduling domain from core_siblings Now that we have an accurate view of the physical topology we need to represent it correctly to the scheduler. Generally MC should equal the LLC in the system, but there are a number of special cases that need to be dealt with. In the case of NUMA in socket, we need to assure that the sched domain we build for the MC layer isn't larger than the DIE above it. Similarly for LLC's that might exist in cross socket interconnect or directory hardware we need to assure that MC is shrunk to the socket or NUMA node. This patch builds a sibling mask for the LLC, and then picks the smallest of LLC, socket siblings, or NUMA node siblings, which gives us the behavior described above. This is ever so slightly different than the similar alternative where we look for a cache layer less than or equal to the socket/NUMA siblings. The logic to pick the MC layer affects all arm64 machines, but only changes the behavior for DT/MPIDR systems if the NUMA domain is smaller than the core siblings (generally set to the cluster). Potentially this fixes a possible bug in DT systems, but really it only affects ACPI systems where the core siblings is correctly set to the socket siblings. Thus all currently available ACPI systems should have MC equal to LLC, including the NUMA in socket machines where the LLC is partitioned between the NUMA nodes. Tested-by: Ard Biesheuvel Tested-by: Vijaya Kumar K Tested-by: Xiongfeng Wang Tested-by: Tomasz Nowicki Acked-by: Sudeep Holla Acked-by: Ard Biesheuvel Acked-by: Morten Rasmussen Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/topology.h | 2 ++ arch/arm64/kernel/topology.c | 36 ++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/topology.h b/arch/arm64/include/asm/topology.h index 6b10459e6905..df48212f767b 100644 --- a/arch/arm64/include/asm/topology.h +++ b/arch/arm64/include/asm/topology.h @@ -8,8 +8,10 @@ struct cpu_topology { int thread_id; int core_id; int package_id; + int llc_id; cpumask_t thread_sibling; cpumask_t core_sibling; + cpumask_t llc_siblings; }; extern struct cpu_topology cpu_topology[NR_CPUS]; diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 047d98e68502..7415c166281f 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include @@ -214,7 +215,19 @@ EXPORT_SYMBOL_GPL(cpu_topology); const struct cpumask *cpu_coregroup_mask(int cpu) { - return &cpu_topology[cpu].core_sibling; + const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); + + /* Find the smaller of NUMA, core or LLC siblings */ + if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { + /* not numa in package, lets use the package siblings */ + core_mask = &cpu_topology[cpu].core_sibling; + } + if (cpu_topology[cpu].llc_id != -1) { + if (cpumask_subset(&cpu_topology[cpu].llc_siblings, core_mask)) + core_mask = &cpu_topology[cpu].llc_siblings; + } + + return core_mask; } static void update_siblings_masks(unsigned int cpuid) @@ -226,6 +239,9 @@ static void update_siblings_masks(unsigned int cpuid) for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; + if (cpuid_topo->llc_id == cpu_topo->llc_id) + cpumask_set_cpu(cpu, &cpuid_topo->llc_siblings); + if (cpuid_topo->package_id != cpu_topo->package_id) continue; @@ -291,6 +307,10 @@ static void __init reset_cpu_topology(void) cpu_topo->core_id = 0; cpu_topo->package_id = -1; + cpu_topo->llc_id = -1; + cpumask_clear(&cpu_topo->llc_siblings); + cpumask_set_cpu(cpu, &cpu_topo->llc_siblings); + cpumask_clear(&cpu_topo->core_sibling); cpumask_set_cpu(cpu, &cpu_topo->core_sibling); cpumask_clear(&cpu_topo->thread_sibling); @@ -311,6 +331,8 @@ static int __init parse_acpi_topology(void) is_threaded = read_cpuid_mpidr() & MPIDR_MT_BITMASK; for_each_possible_cpu(cpu) { + int i, cache_id; + topology_id = find_acpi_cpu_topology(cpu, 0); if (topology_id < 0) return topology_id; @@ -325,6 +347,18 @@ static int __init parse_acpi_topology(void) } topology_id = find_acpi_cpu_topology_package(cpu); cpu_topology[cpu].package_id = topology_id; + + i = acpi_find_last_cache_level(cpu); + + if (i > 0) { + /* + * this is the only part of cpu_topology that has + * a direct relationship with the cache topology + */ + cache_id = find_acpi_cpu_cache_topology(cpu, i); + if (cache_id > 0) + cpu_topology[cpu].llc_id = cache_id; + } } return 0; From 159fd7b8d3d12b27593d4fe3f6ae1d8e14ea9d0b Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Mon, 14 May 2018 18:51:09 +0100 Subject: [PATCH 19/53] arm64/sve: Write ZCR_EL1 on context switch only if changed Writes to ZCR_EL1 are self-synchronising, and so may be expensive in typical implementations. This patch adopts the approach used for costly system register writes elsewhere in the kernel: the system register write is suppressed if it would not change the stored value. Since the common case will be that of switching between tasks that use the same vector length as one another, prediction hit rates on the conditional branch should be reasonably good, with lower expected amortised cost than the unconditional execution of a heavyweight self-synchronising instruction. Signed-off-by: Dave Martin Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/fpsimdmacros.h | 12 +++++++----- arch/arm64/kernel/entry-fpsimd.S | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/arm64/include/asm/fpsimdmacros.h b/arch/arm64/include/asm/fpsimdmacros.h index e050d765ca9e..46843515d77b 100644 --- a/arch/arm64/include/asm/fpsimdmacros.h +++ b/arch/arm64/include/asm/fpsimdmacros.h @@ -207,12 +207,14 @@ str w\nxtmp, [\xpfpsr, #4] .endm -.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp +.macro sve_load nxbase, xpfpsr, xvqminus1, nxtmp, xtmp2 mrs_s x\nxtmp, SYS_ZCR_EL1 - bic x\nxtmp, x\nxtmp, ZCR_ELx_LEN_MASK - orr x\nxtmp, x\nxtmp, \xvqminus1 - msr_s SYS_ZCR_EL1, x\nxtmp // self-synchronising - + bic \xtmp2, x\nxtmp, ZCR_ELx_LEN_MASK + orr \xtmp2, \xtmp2, \xvqminus1 + cmp \xtmp2, x\nxtmp + b.eq 921f + msr_s SYS_ZCR_EL1, \xtmp2 // self-synchronising +921: _for n, 0, 31, _sve_ldr_v \n, \nxbase, \n - 34 _sve_ldr_p 0, \nxbase _sve_wrffr 0 diff --git a/arch/arm64/kernel/entry-fpsimd.S b/arch/arm64/kernel/entry-fpsimd.S index 73f17bffcd23..12d4958e6429 100644 --- a/arch/arm64/kernel/entry-fpsimd.S +++ b/arch/arm64/kernel/entry-fpsimd.S @@ -49,7 +49,7 @@ ENTRY(sve_save_state) ENDPROC(sve_save_state) ENTRY(sve_load_state) - sve_load 0, x1, x2, 3 + sve_load 0, x1, x2, 3, x4 ret ENDPROC(sve_load_state) From d0f2e423295313a30b1d56f3b24e9e927f0b66b7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 19 Apr 2018 16:06:07 +0200 Subject: [PATCH 20/53] perf: simplify getting .drvdata We should get drvdata from struct device directly. Going via platform_device is an unneeded step back and forth. Signed-off-by: Wolfram Sang Signed-off-by: Will Deacon --- drivers/perf/arm_spe_pmu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/perf/arm_spe_pmu.c b/drivers/perf/arm_spe_pmu.c index 28bb642af18b..54ec278d2fc4 100644 --- a/drivers/perf/arm_spe_pmu.c +++ b/drivers/perf/arm_spe_pmu.c @@ -131,8 +131,7 @@ static ssize_t arm_spe_pmu_cap_show(struct device *dev, struct device_attribute *attr, char *buf) { - struct platform_device *pdev = to_platform_device(dev); - struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); + struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); struct dev_ext_attribute *ea = container_of(attr, struct dev_ext_attribute, attr); int cap = (long)ea->var; @@ -247,8 +246,7 @@ static ssize_t arm_spe_pmu_get_attr_cpumask(struct device *dev, struct device_attribute *attr, char *buf) { - struct platform_device *pdev = to_platform_device(dev); - struct arm_spe_pmu *spe_pmu = platform_get_drvdata(pdev); + struct arm_spe_pmu *spe_pmu = dev_get_drvdata(dev); return cpumap_print_to_pagebuf(true, buf, &spe_pmu->supported_cpus); } From 5c591304e710339a75a9f0f9f3f085aa4109e55d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 21 May 2018 12:17:09 +0100 Subject: [PATCH 21/53] perf/arm-cci: Remove unnecessary period adjustment Since sampling events are rejected up-front by cci_pmu_event_init(), it doesn't make much sense to go fiddling with the sampling period later. This would seem to be just another leftover artefact of the arm_pmu framwork, and as such can go. Acked-by: Mark Rutland Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 383b2d3dcbc6..72c464485470 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1304,15 +1304,6 @@ static int __hw_perf_event_init(struct perf_event *event) */ hwc->config_base |= (unsigned long)mapping; - /* - * Limit the sample_period to half of the counter width. That way, the - * new counter value is far less likely to overtake the previous one - * unless you have some serious IRQ latency issues. - */ - hwc->sample_period = CCI_PMU_CNTR_MASK >> 1; - hwc->last_period = hwc->sample_period; - local64_set(&hwc->period_left, hwc->sample_period); - if (event->group_leader != event) { if (validate_group(event) != 0) return -EINVAL; From 0788f1e97324d8378e860dc2560699ddc6f3aef9 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 10 May 2018 11:35:15 +0100 Subject: [PATCH 22/53] arm_pmu: simplify arm_pmu::handle_irq The arm_pmu::handle_irq() callback has the same prototype as a generic IRQ handler, taking the IRQ number and a void pointer argument which it must convert to an arm_pmu pointer. This means that all arm_pmu::handle_irq() take an IRQ number they never use, and all must explicitly cast the void pointer to an arm_pmu pointer. Instead, let's change arm_pmu::handle_irq to take an arm_pmu pointer, allowing these casts to be removed. The redundant IRQ number parameter is also removed. Suggested-by: Hoeun Ryu Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Will Deacon --- arch/arm/kernel/perf_event_v6.c | 4 +--- arch/arm/kernel/perf_event_v7.c | 3 +-- arch/arm/kernel/perf_event_xscale.c | 6 ++---- arch/arm64/kernel/perf_event.c | 3 +-- drivers/perf/arm_pmu.c | 2 +- include/linux/perf/arm_pmu.h | 2 +- 6 files changed, 7 insertions(+), 13 deletions(-) diff --git a/arch/arm/kernel/perf_event_v6.c b/arch/arm/kernel/perf_event_v6.c index 1d7061a38922..be42c4f66a40 100644 --- a/arch/arm/kernel/perf_event_v6.c +++ b/arch/arm/kernel/perf_event_v6.c @@ -303,12 +303,10 @@ static void armv6pmu_enable_event(struct perf_event *event) } static irqreturn_t -armv6pmu_handle_irq(int irq_num, - void *dev) +armv6pmu_handle_irq(struct arm_pmu *cpu_pmu) { unsigned long pmcr = armv6_pmcr_read(); struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; diff --git a/arch/arm/kernel/perf_event_v7.c b/arch/arm/kernel/perf_event_v7.c index 870b66c1e4ef..57f01e059f39 100644 --- a/arch/arm/kernel/perf_event_v7.c +++ b/arch/arm/kernel/perf_event_v7.c @@ -946,11 +946,10 @@ static void armv7pmu_disable_event(struct perf_event *event) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static irqreturn_t armv7pmu_handle_irq(int irq_num, void *dev) +static irqreturn_t armv7pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmnc; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; diff --git a/arch/arm/kernel/perf_event_xscale.c b/arch/arm/kernel/perf_event_xscale.c index fcf218da660e..88d1a76f5367 100644 --- a/arch/arm/kernel/perf_event_xscale.c +++ b/arch/arm/kernel/perf_event_xscale.c @@ -142,11 +142,10 @@ xscale1_pmnc_counter_has_overflowed(unsigned long pmnc, } static irqreturn_t -xscale1pmu_handle_irq(int irq_num, void *dev) +xscale1pmu_handle_irq(struct arm_pmu *cpu_pmu) { unsigned long pmnc; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; @@ -489,11 +488,10 @@ xscale2_pmnc_counter_has_overflowed(unsigned long of_flags, } static irqreturn_t -xscale2pmu_handle_irq(int irq_num, void *dev) +xscale2pmu_handle_irq(struct arm_pmu *cpu_pmu) { unsigned long pmnc, of_flags; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; diff --git a/arch/arm64/kernel/perf_event.c b/arch/arm64/kernel/perf_event.c index 85a251b6dfa8..33147aacdafd 100644 --- a/arch/arm64/kernel/perf_event.c +++ b/arch/arm64/kernel/perf_event.c @@ -670,11 +670,10 @@ static void armv8pmu_disable_event(struct perf_event *event) raw_spin_unlock_irqrestore(&events->pmu_lock, flags); } -static irqreturn_t armv8pmu_handle_irq(int irq_num, void *dev) +static irqreturn_t armv8pmu_handle_irq(struct arm_pmu *cpu_pmu) { u32 pmovsr; struct perf_sample_data data; - struct arm_pmu *cpu_pmu = (struct arm_pmu *)dev; struct pmu_hw_events *cpuc = this_cpu_ptr(cpu_pmu->hw_events); struct pt_regs *regs; int idx; diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 1a0d340b65cf..a6347d487635 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -339,7 +339,7 @@ static irqreturn_t armpmu_dispatch_irq(int irq, void *dev) return IRQ_NONE; start_clock = sched_clock(); - ret = armpmu->handle_irq(irq, armpmu); + ret = armpmu->handle_irq(armpmu); finish_clock = sched_clock(); perf_sample_event_took(finish_clock - start_clock); diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 40036a57d072..ad5444491975 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -78,7 +78,7 @@ struct arm_pmu { struct pmu pmu; cpumask_t supported_cpus; char *name; - irqreturn_t (*handle_irq)(int irq_num, void *dev); + irqreturn_t (*handle_irq)(struct arm_pmu *pmu); void (*enable)(struct perf_event *event); void (*disable)(struct perf_event *event); int (*get_event_idx)(struct pmu_hw_events *hw_events, From 75dc344145190bf53aed7a28dbc27c11180d94e5 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 14 May 2018 14:34:51 +0100 Subject: [PATCH 23/53] perf/arm-cc*: Fix MODULE_LICENSE() tags The CCI/CCN drivers are licensed under GPLv2, but the MODULE_LICENSE() tags are using the bare "GPL" string implying GPLv2 or later. Fix them to match their actual file license. Acked-by: Pawel Moll Acked-by: Suzuki K Poulose Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 2 +- drivers/perf/arm-ccn.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 72c464485470..7d916dc2214d 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1709,5 +1709,5 @@ static struct platform_driver cci_pmu_driver = { }; builtin_platform_driver(cci_pmu_driver); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("ARM CCI PMU support"); diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 65b7e4042ece..917b47e776df 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -1594,4 +1594,4 @@ module_init(arm_ccn_init); module_exit(arm_ccn_exit); MODULE_AUTHOR("Pawel Moll "); -MODULE_LICENSE("GPL"); +MODULE_LICENSE("GPL v2"); From 28c01dc9d85e4cd744f254c45d3c894bca168ed6 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 14 May 2018 14:34:52 +0100 Subject: [PATCH 24/53] perf/arm-cci: Remove pointless PMU disabling The CCI PMU driver bears some legacy remnants of the arm_pmu framework from when it was split in c6f85cb4305b ("bus: cci: move away from arm_pmu framework"). In particular this perf_pmu_{dis,en}able() dance around pmu->add which was fixed for arm_pmu in a9e469d1c89b ("drivers/perf: arm_pmu: remove pointless PMU disabling"). For the exact same reasons (i.e. perf core already does this around the call anyway), give cci_pmu_add() the exact same change, which also prevents having to export those core functions to build it as a module. Acked-by: Mark Rutland Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 7d916dc2214d..33b47c292d79 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1184,16 +1184,11 @@ static int cci_pmu_add(struct perf_event *event, int flags) struct cci_pmu_hw_events *hw_events = &cci_pmu->hw_events; struct hw_perf_event *hwc = &event->hw; int idx; - int err = 0; - - perf_pmu_disable(event->pmu); /* If we don't have a space for the counter then finish early. */ idx = pmu_get_event_idx(hw_events, event); - if (idx < 0) { - err = idx; - goto out; - } + if (idx < 0) + return idx; event->hw.idx = idx; hw_events->events[idx] = event; @@ -1205,9 +1200,7 @@ static int cci_pmu_add(struct perf_event *event, int flags) /* Propagate our changes to the userspace mapping. */ perf_event_update_userpage(event); -out: - perf_pmu_enable(event->pmu); - return err; + return 0; } static void cci_pmu_del(struct perf_event *event, int flags) From 8b0c93c20ef78f15d8b760964ff79bda7f68c610 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 14 May 2018 14:34:53 +0100 Subject: [PATCH 25/53] perf/arm-cci: Allow building as a module Fill in the few extra bits and annotations needed to make the driver work properly as a module, and jiggle the Kconfig to expose the driver-level ARM_CCI_PMU option. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 34 ++++++++++++++++++---------------- drivers/perf/arm-cci.c | 17 ++++++++++++++++- 2 files changed, 34 insertions(+), 17 deletions(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 28bb5a029558..110330ecf714 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -6,30 +6,32 @@ menu "Performance monitor support" depends on PERF_EVENTS config ARM_CCI_PMU - bool + tristate "ARM CCI PMU driver" + depends on (ARM && CPU_V7) || ARM64 select ARM_CCI + help + Support for PMU events monitoring on the ARM CCI (Cache Coherent + Interconnect) family of products. + + If compiled as a module, it will be called arm-cci. config ARM_CCI400_PMU - bool "ARM CCI400 PMU support" - depends on (ARM && CPU_V7) || ARM64 + bool "support CCI-400" + default y + depends on ARM_CCI_PMU select ARM_CCI400_COMMON - select ARM_CCI_PMU help - Support for PMU events monitoring on the ARM CCI-400 (cache coherent - interconnect). CCI-400 supports counting events related to the - connected slave/master interfaces. + CCI-400 provides 4 independent event counters counting events related + to the connected slave/master interfaces, plus a cycle counter. config ARM_CCI5xx_PMU - bool "ARM CCI-500/CCI-550 PMU support" - depends on (ARM && CPU_V7) || ARM64 - select ARM_CCI_PMU + bool "support CCI-500/CCI-550" + default y + depends on ARM_CCI_PMU help - Support for PMU events monitoring on the ARM CCI-500/CCI-550 cache - coherent interconnects. Both of them provide 8 independent event counters, - which can count events pertaining to the slave/master interfaces as well - as the internal events to the CCI. - - If unsure, say Y + CCI-500/CCI-550 both provide 8 independent event counters, which can + count events pertaining to the slave/master interfaces as well as the + internal events to the CCI. config ARM_CCN tristate "ARM CCN driver support" diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index 33b47c292d79..e6fadc8e1178 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -1407,6 +1407,7 @@ static int cci_pmu_init(struct cci_pmu *cci_pmu, struct platform_device *pdev) pmu_format_attr_group.attrs = model->format_attrs; cci_pmu->pmu = (struct pmu) { + .module = THIS_MODULE, .name = cci_pmu->model->name, .task_ctx_nr = perf_invalid_context, .pmu_enable = cci_pmu_enable, @@ -1572,6 +1573,7 @@ static const struct of_device_id arm_cci_pmu_matches[] = { #endif {}, }; +MODULE_DEVICE_TABLE(of, arm_cci_pmu_matches); static bool is_duplicate_irq(int irq, int *irqs, int nr_irqs) { @@ -1693,14 +1695,27 @@ static int cci_pmu_probe(struct platform_device *pdev) return 0; } +static int cci_pmu_remove(struct platform_device *pdev) +{ + if (!g_cci_pmu) + return 0; + + cpuhp_remove_state(CPUHP_AP_PERF_ARM_CCI_ONLINE); + perf_pmu_unregister(&g_cci_pmu->pmu); + g_cci_pmu = NULL; + + return 0; +} + static struct platform_driver cci_pmu_driver = { .driver = { .name = DRIVER_NAME, .of_match_table = arm_cci_pmu_matches, }, .probe = cci_pmu_probe, + .remove = cci_pmu_remove, }; -builtin_platform_driver(cci_pmu_driver); +module_platform_driver(cci_pmu_driver); MODULE_LICENSE("GPL v2"); MODULE_DESCRIPTION("ARM CCI PMU support"); From 1898eb61fbc9703efee886d3abec27a388cf28c3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 21 May 2018 18:19:49 +0100 Subject: [PATCH 26/53] drivers/perf: arm-ccn: don't log to dmesg in event_init The ARM CCN PMU driver uses dev_warn() to complain about parameters in the user-provided perf_event_attr. This means that under normal operation (e.g. a single invocation of the perf tool), a number of messages warnings may be logged to dmesg. Tools may issue multiple syscalls to probe for feature support, and multiple applications (from multiple users) can attempt to open events simultaneously, so this is not very helpful, even if a user happens to have access to dmesg. Worse, this can push important information out of the dmesg ring buffer, and can significantly slow down syscall fuzzers, vastly increasing the time it takes to find critical bugs. Demote the dev_warn() instances to dev_dbg(), as is the case for all other PMU drivers under drivers/perf/. Users who wish to debug PMU event initialisation can enable dynamic debug to receive these messages. Signed-off-by: Mark Rutland Cc: Pawel Moll Cc: Will Deacon Signed-off-by: Will Deacon --- drivers/perf/arm-ccn.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/drivers/perf/arm-ccn.c b/drivers/perf/arm-ccn.c index 917b47e776df..b416ee18e6bb 100644 --- a/drivers/perf/arm-ccn.c +++ b/drivers/perf/arm-ccn.c @@ -736,7 +736,7 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) ccn = pmu_to_arm_ccn(event->pmu); if (hw->sample_period) { - dev_warn(ccn->dev, "Sampling not supported!\n"); + dev_dbg(ccn->dev, "Sampling not supported!\n"); return -EOPNOTSUPP; } @@ -744,12 +744,12 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) event->attr.exclude_kernel || event->attr.exclude_hv || event->attr.exclude_idle || event->attr.exclude_host || event->attr.exclude_guest) { - dev_warn(ccn->dev, "Can't exclude execution levels!\n"); + dev_dbg(ccn->dev, "Can't exclude execution levels!\n"); return -EINVAL; } if (event->cpu < 0) { - dev_warn(ccn->dev, "Can't provide per-task data!\n"); + dev_dbg(ccn->dev, "Can't provide per-task data!\n"); return -EOPNOTSUPP; } /* @@ -771,13 +771,13 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) switch (type) { case CCN_TYPE_MN: if (node_xp != ccn->mn_id) { - dev_warn(ccn->dev, "Invalid MN ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid MN ID %d!\n", node_xp); return -EINVAL; } break; case CCN_TYPE_XP: if (node_xp >= ccn->num_xps) { - dev_warn(ccn->dev, "Invalid XP ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid XP ID %d!\n", node_xp); return -EINVAL; } break; @@ -785,11 +785,11 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) break; default: if (node_xp >= ccn->num_nodes) { - dev_warn(ccn->dev, "Invalid node ID %d!\n", node_xp); + dev_dbg(ccn->dev, "Invalid node ID %d!\n", node_xp); return -EINVAL; } if (!arm_ccn_pmu_type_eq(type, ccn->node[node_xp].type)) { - dev_warn(ccn->dev, "Invalid type 0x%x for node %d!\n", + dev_dbg(ccn->dev, "Invalid type 0x%x for node %d!\n", type, node_xp); return -EINVAL; } @@ -808,19 +808,19 @@ static int arm_ccn_pmu_event_init(struct perf_event *event) if (event_id != e->event) continue; if (e->num_ports && port >= e->num_ports) { - dev_warn(ccn->dev, "Invalid port %d for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid port %d for node/XP %d!\n", port, node_xp); return -EINVAL; } if (e->num_vcs && vc >= e->num_vcs) { - dev_warn(ccn->dev, "Invalid vc %d for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid vc %d for node/XP %d!\n", vc, node_xp); return -EINVAL; } valid = 1; } if (!valid) { - dev_warn(ccn->dev, "Invalid event 0x%x for node/XP %d!\n", + dev_dbg(ccn->dev, "Invalid event 0x%x for node/XP %d!\n", event_id, node_xp); return -EINVAL; } From b89205bd508ed384253b4449c6a7a755b956a0f8 Mon Sep 17 00:00:00 2001 From: John Garry Date: Tue, 22 May 2018 23:54:04 +0800 Subject: [PATCH 27/53] drivers/perf: Remove ARM_SPE_PMU explicit PERF_EVENTS dependency Since commit bddb9b68d3fb ("drivers/perf: commonise PERF_EVENTS dependency"), all perf drivers depend on PERF_EVENTS config under a common menu. Config ARM_SPE_PMU still declares explicitly a dependency on PERF_EVENTS, which is unneeded, so remove it. Acked-by: Mark Rutland Signed-off-by: John Garry Signed-off-by: Will Deacon --- drivers/perf/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/perf/Kconfig b/drivers/perf/Kconfig index 110330ecf714..08ebaf7cca8b 100644 --- a/drivers/perf/Kconfig +++ b/drivers/perf/Kconfig @@ -96,7 +96,7 @@ config XGENE_PMU config ARM_SPE_PMU tristate "Enable support for the ARMv8.2 Statistical Profiling Extension" - depends on PERF_EVENTS && ARM64 + depends on ARM64 help Enable perf support for the ARMv8.2 Statistical Profiling Extension, which provides periodic sampling of operations in From 7bd99b403405d329b9b72f6bae6a07c10bfa1b95 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 21 May 2018 19:14:22 +0100 Subject: [PATCH 28/53] arm64: Kconfig: Enable LSE atomics by default Now that we're seeing CPUs shipping with LSE atomics, default them to 'on' in Kconfig. CPUs without the instructions will continue to use LDXR/STXR-based sequences, but they will be placed out-of-line by the compiler. Acked-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 4d98774cf3c7..3aed13626fd7 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -1051,6 +1051,7 @@ config ARM64_PAN config ARM64_LSE_ATOMICS bool "Atomic instructions" + default y help As part of the Large System Extensions, ARMv8.1 introduces new atomic instructions that are designed specifically to scale in @@ -1059,7 +1060,8 @@ config ARM64_LSE_ATOMICS Say Y here to make use of these instructions for the in-kernel atomic routines. This incurs a small overhead on CPUs that do not support these instructions and requires the kernel to be - built with binutils >= 2.25. + built with binutils >= 2.25 in order for the new instructions + to be used. config ARM64_VHE bool "Enable support for Virtualization Host Extensions (VHE)" From 969e61ba87f9195c18130a5e88a966837fdb6a2c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 21 May 2018 14:14:50 +0100 Subject: [PATCH 29/53] arm64: make is_permission_fault() name clearer The naming of is_permission_fault() makes it sound like it should return true for permission faults from EL0, but by design, it only does so for faults from EL1. Let's make this clear by dropping el1 in the name, as we do for is_el1_instruction_abort(). Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 4165485e8b6e..59990491e72a 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -235,8 +235,9 @@ static bool is_el1_instruction_abort(unsigned int esr) return ESR_ELx_EC(esr) == ESR_ELx_EC_IABT_CUR; } -static inline bool is_permission_fault(unsigned int esr, struct pt_regs *regs, - unsigned long addr) +static inline bool is_el1_permission_fault(unsigned int esr, + struct pt_regs *regs, + unsigned long addr) { unsigned int ec = ESR_ELx_EC(esr); unsigned int fsc_type = esr & ESR_ELx_FSC_TYPE; @@ -268,7 +269,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, bust_spinlocks(1); - if (is_permission_fault(esr, regs, addr)) { + if (is_el1_permission_fault(esr, regs, addr)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; else @@ -395,7 +396,7 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, mm_flags |= FAULT_FLAG_WRITE; } - if (addr < TASK_SIZE && is_permission_fault(esr, regs, addr)) { + if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) die("Accessing user space memory with fs=KERNEL_DS", regs, esr); From c870f14ea115bb1d9e57cad3ec6b876cf5542f9f Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 21 May 2018 14:14:51 +0100 Subject: [PATCH 30/53] arm64: Unify kernel fault reporting In do_page_fault(), we handle some kernel faults early, and simply die() with a message. For faults handled later, we dump the faulting address, decode the ESR, walk the page tables, and perform a number of steps to ensure that this data is reported. Let's unify the handling of fatal kernel faults with a new die_kernel_fault() helper, handling all of these details. This is largely the same as the existing logic in __do_kernel_fault(), except that addresses are consistently padded to 16 hex characters, as would be expected for a 64-bit address. The messages currently logged in do_page_fault are adjusted to fit into the die_kernel_fault() message template. Acked-by: Will Deacon Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/mm/fault.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 59990491e72a..27cbe0b38960 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -255,6 +255,22 @@ static inline bool is_el1_permission_fault(unsigned int esr, return false; } +static void die_kernel_fault(const char *msg, unsigned long addr, + unsigned int esr, struct pt_regs *regs) +{ + bust_spinlocks(1); + + pr_alert("Unable to handle kernel %s at virtual address %016lx\n", msg, + addr); + + mem_abort_decode(esr); + + show_pte(addr); + die("Oops", regs, esr); + bust_spinlocks(0); + do_exit(SIGKILL); +} + static void __do_kernel_fault(unsigned long addr, unsigned int esr, struct pt_regs *regs) { @@ -267,8 +283,6 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, if (!is_el1_instruction_abort(esr) && fixup_exception(regs)) return; - bust_spinlocks(1); - if (is_el1_permission_fault(esr, regs, addr)) { if (esr & ESR_ELx_WNR) msg = "write to read-only memory"; @@ -280,15 +294,7 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr, msg = "paging request"; } - pr_alert("Unable to handle kernel %s at virtual address %08lx\n", msg, - addr); - - mem_abort_decode(esr); - - show_pte(addr); - die("Oops", regs, esr); - bust_spinlocks(0); - do_exit(SIGKILL); + die_kernel_fault(msg, addr, esr, regs); } static void __do_user_fault(struct siginfo *info, unsigned int esr) @@ -399,13 +405,16 @@ static int __kprobes do_page_fault(unsigned long addr, unsigned int esr, if (addr < TASK_SIZE && is_el1_permission_fault(esr, regs, addr)) { /* regs->orig_addr_limit may be 0 if we entered from EL0 */ if (regs->orig_addr_limit == KERNEL_DS) - die("Accessing user space memory with fs=KERNEL_DS", regs, esr); + die_kernel_fault("access to user memory with fs=KERNEL_DS", + addr, esr, regs); if (is_el1_instruction_abort(esr)) - die("Attempting to execute userspace memory", regs, esr); + die_kernel_fault("execution of user memory", + addr, esr, regs); if (!search_exception_tables(regs->pc)) - die("Accessing user space memory outside uaccess.h routines", regs, esr); + die_kernel_fault("access to user memory outside uaccess routines", + addr, esr, regs); } perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr); From 984e9cf1b9eaab08e4f1f082ce49ed2670e99d90 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 May 2018 17:41:47 +0200 Subject: [PATCH 31/53] drivers/bus: arm-cci: fix build warnings When the arm-cci driver is enabled, but both CONFIG_ARM_CCI5xx_PMU and CONFIG_ARM_CCI400_PMU are not, we get a warning about how parts of the driver are never used: drivers/perf/arm-cci.c:1454:29: error: 'cci_pmu_models' defined but not used [-Werror=unused-variable] drivers/perf/arm-cci.c:693:16: error: 'cci_pmu_event_show' defined but not used [-Werror=unused-function] drivers/perf/arm-cci.c:685:16: error: 'cci_pmu_format_show' defined but not used [-Werror=unused-function] Marking all three functions as __maybe_unused avoids the warnings in randconfig builds. I'm doing this lacking any ideas for a better fix. Fixes: 3de6be7a3dd8 ("drivers/bus: Split Arm CCI driver") Reviewed-by: Robin Murphy Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- drivers/perf/arm-cci.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/perf/arm-cci.c b/drivers/perf/arm-cci.c index e6fadc8e1178..0d09d8e669cd 100644 --- a/drivers/perf/arm-cci.c +++ b/drivers/perf/arm-cci.c @@ -120,9 +120,9 @@ enum cci_models { static void pmu_write_counters(struct cci_pmu *cci_pmu, unsigned long *mask); -static ssize_t cci_pmu_format_show(struct device *dev, +static ssize_t __maybe_unused cci_pmu_format_show(struct device *dev, struct device_attribute *attr, char *buf); -static ssize_t cci_pmu_event_show(struct device *dev, +static ssize_t __maybe_unused cci_pmu_event_show(struct device *dev, struct device_attribute *attr, char *buf); #define CCI_EXT_ATTR_ENTRY(_name, _func, _config) \ @@ -1451,7 +1451,7 @@ static int cci_pmu_offline_cpu(unsigned int cpu) return 0; } -static struct cci_pmu_model cci_pmu_models[] = { +static __maybe_unused struct cci_pmu_model cci_pmu_models[] = { #ifdef CONFIG_ARM_CCI400_PMU [CCI400_R0] = { .name = "CCI_400", From 73acc0315cd72174729141856c6607d1c91419cb Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 May 2018 17:44:36 +0200 Subject: [PATCH 32/53] ARM: mcpm, perf/arm-cci: export mcpm_is_available Now that the ARM CCI PMU driver can be built as a loadable module, we get a link failure when MCPM is enabled: ERROR: "mcpm_is_available" [drivers/perf/arm-cci.ko] undefined! The simplest fix is to export that helper function. Fixes: 8b0c93c20ef7 ("perf/arm-cci: Allow building as a module") Acked-by: Nicolas Pitre Acked-by: Russell King Signed-off-by: Arnd Bergmann Signed-off-by: Will Deacon --- arch/arm/common/mcpm_entry.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm/common/mcpm_entry.c b/arch/arm/common/mcpm_entry.c index 2b913f17d50f..ad574d20415c 100644 --- a/arch/arm/common/mcpm_entry.c +++ b/arch/arm/common/mcpm_entry.c @@ -9,6 +9,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -174,6 +175,7 @@ bool mcpm_is_available(void) { return (platform_ops) ? true : false; } +EXPORT_SYMBOL_GPL(mcpm_is_available); /* * We can't use regular spinlocks. In the switcher case, it is possible From eff0e9e1078ea7dc1d794dc50e31baef984c46d7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:05 +0100 Subject: [PATCH 33/53] arm/arm64: smccc: Add SMCCC-specific return codes We've so far used the PSCI return codes for SMCCC because they were extremely similar. But with the new ARM DEN 0070A specification, "NOT_REQUIRED" (-2) is clashing with PSCI's "PSCI_RET_INVALID_PARAMS". Let's bite the bullet and add SMCCC specific return codes. Users can be repainted as and when required. Acked-by: Will Deacon Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- include/linux/arm-smccc.h | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index a031897fca76..c89da86de99f 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -291,5 +291,10 @@ asmlinkage void __arm_smccc_hvc(unsigned long a0, unsigned long a1, */ #define arm_smccc_1_1_hvc(...) __arm_smccc_1_1(SMCCC_HVC_INST, __VA_ARGS__) +/* Return codes defined in ARM DEN 0070A */ +#define SMCCC_RET_SUCCESS 0 +#define SMCCC_RET_NOT_SUPPORTED -1 +#define SMCCC_RET_NOT_REQUIRED -2 + #endif /*__ASSEMBLY__*/ #endif /*__LINUX_ARM_SMCCC_H*/ From 8e2906245f1e3b0d027169d9f2e55ce0548cb96e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:06 +0100 Subject: [PATCH 34/53] arm64: Call ARCH_WORKAROUND_2 on transitions between EL0 and EL1 In order for the kernel to protect itself, let's call the SSBD mitigation implemented by the higher exception level (either hypervisor or firmware) on each transition between userspace and kernel. We must take the PSCI conduit into account in order to target the right exception level, hence the introduction of a runtime patching callback. Reviewed-by: Mark Rutland Reviewed-by: Julien Grall Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 24 ++++++++++++++++++++++++ arch/arm64/kernel/entry.S | 22 ++++++++++++++++++++++ include/linux/arm-smccc.h | 5 +++++ 3 files changed, 51 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index a900befadfe8..c1eda6be7758 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -232,6 +232,30 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) } #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ +#ifdef CONFIG_ARM64_SSBD +void __init arm64_update_smccc_conduit(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) +{ + u32 insn; + + BUG_ON(nr_inst != 1); + + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + insn = aarch64_insn_get_hvc_value(); + break; + case PSCI_CONDUIT_SMC: + insn = aarch64_insn_get_smc_value(); + break; + default: + return; + } + + *updptr = cpu_to_le32(insn); +} +#endif /* CONFIG_ARM64_SSBD */ + #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ .matches = is_affected_midr_range, \ .midr_range = MIDR_RANGE(model, v_min, r_min, v_max, r_max) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index ec2ee720e33e..f33e6aed3037 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -18,6 +18,7 @@ * along with this program. If not, see . */ +#include #include #include @@ -137,6 +138,18 @@ alternative_else_nop_endif add \dst, \dst, #(\sym - .entry.tramp.text) .endm + // This macro corrupts x0-x3. It is the caller's duty + // to save/restore them if required. + .macro apply_ssbd, state +#ifdef CONFIG_ARM64_SSBD + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 + mov w1, #\state +alternative_cb arm64_update_smccc_conduit + nop // Patched to SMC/HVC #0 +alternative_cb_end +#endif + .endm + .macro kernel_entry, el, regsize = 64 .if \regsize == 32 mov w0, w0 // zero upper 32 bits of x0 @@ -163,6 +176,13 @@ alternative_else_nop_endif ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. + apply_ssbd 1 + +#ifdef CONFIG_ARM64_SSBD + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] +#endif + mov x29, xzr // fp pointed to user-space .else add x21, sp, #S_FRAME_SIZE @@ -303,6 +323,8 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: + apply_ssbd 0 + .endif msr elr_el1, x21 // set up the return data diff --git a/include/linux/arm-smccc.h b/include/linux/arm-smccc.h index c89da86de99f..ca1d2cc2cdfa 100644 --- a/include/linux/arm-smccc.h +++ b/include/linux/arm-smccc.h @@ -80,6 +80,11 @@ ARM_SMCCC_SMC_32, \ 0, 0x8000) +#define ARM_SMCCC_ARCH_WORKAROUND_2 \ + ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, \ + ARM_SMCCC_SMC_32, \ + 0, 0x7fff) + #ifndef __ASSEMBLY__ #include From 5cf9ce6e5ea50f805c6188c04ed0daaec7b6887d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:07 +0100 Subject: [PATCH 35/53] arm64: Add per-cpu infrastructure to call ARCH_WORKAROUND_2 In a heterogeneous system, we can end up with both affected and unaffected CPUs. Let's check their status before calling into the firmware. Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 2 ++ arch/arm64/kernel/entry.S | 11 +++++++---- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index c1eda6be7758..cd91ca0250f1 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -233,6 +233,8 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #ifdef CONFIG_ARM64_SSBD +DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + void __init arm64_update_smccc_conduit(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index f33e6aed3037..29ad672a6abd 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -140,8 +140,10 @@ alternative_else_nop_endif // This macro corrupts x0-x3. It is the caller's duty // to save/restore them if required. - .macro apply_ssbd, state + .macro apply_ssbd, state, targ, tmp1, tmp2 #ifdef CONFIG_ARM64_SSBD + ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 + cbz \tmp2, \targ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state alternative_cb arm64_update_smccc_conduit @@ -176,12 +178,13 @@ alternative_cb_end ldr x19, [tsk, #TSK_TI_FLAGS] // since we can unmask debug disable_step_tsk x19, x20 // exceptions when scheduling. - apply_ssbd 1 + apply_ssbd 1, 1f, x22, x23 #ifdef CONFIG_ARM64_SSBD ldp x0, x1, [sp, #16 * 0] ldp x2, x3, [sp, #16 * 1] #endif +1: mov x29, xzr // fp pointed to user-space .else @@ -323,8 +326,8 @@ alternative_if ARM64_WORKAROUND_845719 alternative_else_nop_endif #endif 3: - apply_ssbd 0 - + apply_ssbd 0, 5f, x0, x1 +5: .endif msr elr_el1, x21 // set up the return data From a725e3dda1813ed306734823ac4c65ca04e38500 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:08 +0100 Subject: [PATCH 36/53] arm64: Add ARCH_WORKAROUND_2 probing As for Spectre variant-2, we rely on SMCCC 1.1 to provide the discovery mechanism for detecting the SSBD mitigation. A new capability is also allocated for that purpose, and a config option. Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Reviewed-by: Suzuki K Poulose Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 9 +++++ arch/arm64/include/asm/cpucaps.h | 3 +- arch/arm64/kernel/cpu_errata.c | 69 ++++++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 1 deletion(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3aed13626fd7..0b98a6c42454 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -940,6 +940,15 @@ config HARDEN_EL2_VECTORS If unsure, say Y. +config ARM64_SSBD + bool "Speculative Store Bypass Disable" if EXPERT + default y + help + This enables mitigation of the bypassing of previous stores + by speculative loads. + + If unsure, say Y. + menuconfig ARMV8_DEPRECATED bool "Emulate deprecated/obsolete ARMv8 instructions" depends on COMPAT diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index bc51b72fafd4..8a699c708fc9 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -48,7 +48,8 @@ #define ARM64_HAS_CACHE_IDC 27 #define ARM64_HAS_CACHE_DIC 28 #define ARM64_HW_DBM 29 +#define ARM64_SSBD 30 -#define ARM64_NCAPS 30 +#define ARM64_NCAPS 31 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index cd91ca0250f1..7e8f12d85d99 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -256,6 +256,67 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt, *updptr = cpu_to_le32(insn); } + +static void arm64_set_ssbd_mitigation(bool state) +{ + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); + break; + + default: + WARN_ON_ONCE(1); + break; + } +} + +static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, + int scope) +{ + struct arm_smccc_res res; + bool supported = true; + + WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); + + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + return false; + + /* + * The probe function return value is either negative + * (unsupported or mitigated), positive (unaffected), or zero + * (requires mitigation). We only need to do anything in the + * last case. + */ + switch (psci_ops.conduit) { + case PSCI_CONDUIT_HVC: + arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + if ((int)res.a0 != 0) + supported = false; + break; + + case PSCI_CONDUIT_SMC: + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, + ARM_SMCCC_ARCH_WORKAROUND_2, &res); + if ((int)res.a0 != 0) + supported = false; + break; + + default: + supported = false; + } + + if (supported) { + __this_cpu_write(arm64_ssbd_callback_required, 1); + arm64_set_ssbd_mitigation(true); + } + + return supported; +} #endif /* CONFIG_ARM64_SSBD */ #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ @@ -512,6 +573,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), }, +#endif +#ifdef CONFIG_ARM64_SSBD + { + .desc = "Speculative Store Bypass Disable", + .capability = ARM64_SSBD, + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + .matches = has_ssbd_mitigation, + }, #endif { } From a43ae4dfe56a01f5b98ba0cb2f784b6a43bafcc6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:09 +0100 Subject: [PATCH 37/53] arm64: Add 'ssbd' command-line option On a system where the firmware implements ARCH_WORKAROUND_2, it may be useful to either permanently enable or disable the workaround for cases where the user decides that they'd rather not get a trap overhead, and keep the mitigation permanently on or off instead of switching it on exception entry/exit. In any case, default to the mitigation being enabled. Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- .../admin-guide/kernel-parameters.txt | 17 +++ arch/arm64/include/asm/cpufeature.h | 6 + arch/arm64/kernel/cpu_errata.c | 103 +++++++++++++++--- 3 files changed, 110 insertions(+), 16 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 11fc28ecdb6d..7db8868fabab 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4047,6 +4047,23 @@ expediting. Set to zero to disable automatic expediting. + ssbd= [ARM64,HW] + Speculative Store Bypass Disable control + + On CPUs that are vulnerable to the Speculative + Store Bypass vulnerability and offer a + firmware based mitigation, this parameter + indicates how the mitigation should be used: + + force-on: Unconditionally enable mitigation for + for both kernel and userspace + force-off: Unconditionally disable mitigation for + for both kernel and userspace + kernel: Always enable mitigation in the + kernel, and offer a prctl interface + to allow userspace to register its + interest in being mitigated too. + stack_guard_gap= [MM] override the default stack gap protection. The value is in page units and it defines how many pages prior diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index 09b0f2a80c8f..b50650f3e496 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -537,6 +537,12 @@ static inline u64 read_zcr_features(void) return zcr; } +#define ARM64_SSBD_UNKNOWN -1 +#define ARM64_SSBD_FORCE_DISABLE 0 +#define ARM64_SSBD_KERNEL 1 +#define ARM64_SSBD_FORCE_ENABLE 2 +#define ARM64_SSBD_MITIGATED 3 + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 7e8f12d85d99..1075f90fdd8c 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -235,6 +235,38 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) #ifdef CONFIG_ARM64_SSBD DEFINE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); +int ssbd_state __read_mostly = ARM64_SSBD_KERNEL; + +static const struct ssbd_options { + const char *str; + int state; +} ssbd_options[] = { + { "force-on", ARM64_SSBD_FORCE_ENABLE, }, + { "force-off", ARM64_SSBD_FORCE_DISABLE, }, + { "kernel", ARM64_SSBD_KERNEL, }, +}; + +static int __init ssbd_cfg(char *buf) +{ + int i; + + if (!buf || !buf[0]) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(ssbd_options); i++) { + int len = strlen(ssbd_options[i].str); + + if (strncmp(buf, ssbd_options[i].str, len)) + continue; + + ssbd_state = ssbd_options[i].state; + return 0; + } + + return -EINVAL; +} +early_param("ssbd", ssbd_cfg); + void __init arm64_update_smccc_conduit(struct alt_instr *alt, __le32 *origptr, __le32 *updptr, int nr_inst) @@ -278,44 +310,83 @@ static bool has_ssbd_mitigation(const struct arm64_cpu_capabilities *entry, int scope) { struct arm_smccc_res res; - bool supported = true; + bool required = true; + s32 val; WARN_ON(scope != SCOPE_LOCAL_CPU || preemptible()); - if (psci_ops.smccc_version == SMCCC_VERSION_1_0) + if (psci_ops.smccc_version == SMCCC_VERSION_1_0) { + ssbd_state = ARM64_SSBD_UNKNOWN; return false; + } - /* - * The probe function return value is either negative - * (unsupported or mitigated), positive (unaffected), or zero - * (requires mitigation). We only need to do anything in the - * last case. - */ switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_2, &res); - if ((int)res.a0 != 0) - supported = false; break; case PSCI_CONDUIT_SMC: arm_smccc_1_1_smc(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_ARCH_WORKAROUND_2, &res); - if ((int)res.a0 != 0) - supported = false; break; default: - supported = false; + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; } - if (supported) { - __this_cpu_write(arm64_ssbd_callback_required, 1); + val = (s32)res.a0; + + switch (val) { + case SMCCC_RET_NOT_SUPPORTED: + ssbd_state = ARM64_SSBD_UNKNOWN; + return false; + + case SMCCC_RET_NOT_REQUIRED: + pr_info_once("%s mitigation not required\n", entry->desc); + ssbd_state = ARM64_SSBD_MITIGATED; + return false; + + case SMCCC_RET_SUCCESS: + required = true; + break; + + case 1: /* Mitigation not required on this CPU */ + required = false; + break; + + default: + WARN_ON(1); + return false; + } + + switch (ssbd_state) { + case ARM64_SSBD_FORCE_DISABLE: + pr_info_once("%s disabled from command-line\n", entry->desc); + arm64_set_ssbd_mitigation(false); + required = false; + break; + + case ARM64_SSBD_KERNEL: + if (required) { + __this_cpu_write(arm64_ssbd_callback_required, 1); + arm64_set_ssbd_mitigation(true); + } + break; + + case ARM64_SSBD_FORCE_ENABLE: + pr_info_once("%s forced from command-line\n", entry->desc); arm64_set_ssbd_mitigation(true); + required = true; + break; + + default: + WARN_ON(1); + break; } - return supported; + return required; } #endif /* CONFIG_ARM64_SSBD */ From c32e1736ca03904c03de0e4459a673be194f56fd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:10 +0100 Subject: [PATCH 38/53] arm64: ssbd: Add global mitigation state accessor We're about to need the mitigation state in various parts of the kernel in order to do the right thing for userspace and guests. Let's expose an accessor that will let other subsystems know about the state. Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b50650f3e496..b0fc3224ce8a 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -543,6 +543,16 @@ static inline u64 read_zcr_features(void) #define ARM64_SSBD_FORCE_ENABLE 2 #define ARM64_SSBD_MITIGATED 3 +static inline int arm64_get_ssbd_state(void) +{ +#ifdef CONFIG_ARM64_SSBD + extern int ssbd_state; + return ssbd_state; +#else + return ARM64_SSBD_UNKNOWN; +#endif +} + #endif /* __ASSEMBLY__ */ #endif From 986372c4367f46b34a3c0f6918d7fb95cbdf39d6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:11 +0100 Subject: [PATCH 39/53] arm64: ssbd: Skip apply_ssbd if not using dynamic mitigation In order to avoid checking arm64_ssbd_callback_required on each kernel entry/exit even if no mitigation is required, let's add yet another alternative that by default jumps over the mitigation, and that gets nop'ed out if we're doing dynamic mitigation. Think of it as a poor man's static key... Reviewed-by: Julien Grall Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 14 ++++++++++++++ arch/arm64/kernel/entry.S | 3 +++ 2 files changed, 17 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 1075f90fdd8c..2797bc2c8c6a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -289,6 +289,20 @@ void __init arm64_update_smccc_conduit(struct alt_instr *alt, *updptr = cpu_to_le32(insn); } +void __init arm64_enable_wa2_handling(struct alt_instr *alt, + __le32 *origptr, __le32 *updptr, + int nr_inst) +{ + BUG_ON(nr_inst != 1); + /* + * Only allow mitigation on EL1 entry/exit and guest + * ARCH_WORKAROUND_2 handling if the SSBD state allows it to + * be flipped. + */ + if (arm64_get_ssbd_state() == ARM64_SSBD_KERNEL) + *updptr = cpu_to_le32(aarch64_insn_gen_nop()); +} + static void arm64_set_ssbd_mitigation(bool state) { switch (psci_ops.conduit) { diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index 29ad672a6abd..e6f6e2339b22 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -142,6 +142,9 @@ alternative_else_nop_endif // to save/restore them if required. .macro apply_ssbd, state, targ, tmp1, tmp2 #ifdef CONFIG_ARM64_SSBD +alternative_cb arm64_enable_wa2_handling + b \targ +alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 cbz \tmp2, \targ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 From 647d0519b53f440a55df163de21c52a8205431cc Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:12 +0100 Subject: [PATCH 40/53] arm64: ssbd: Restore mitigation status on CPU resume On a system where firmware can dynamically change the state of the mitigation, the CPU will always come up with the mitigation enabled, including when coming back from suspend. If the user has requested "no mitigation" via a command line option, let's enforce it by calling into the firmware again to disable it. Similarily, for a resume from hibernate, the mitigation could have been disabled by the boot kernel. Let's ensure that it is set back on in that case. Acked-by: Will Deacon Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cpufeature.h | 6 ++++++ arch/arm64/kernel/cpu_errata.c | 2 +- arch/arm64/kernel/hibernate.c | 11 +++++++++++ arch/arm64/kernel/suspend.c | 8 ++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/cpufeature.h b/arch/arm64/include/asm/cpufeature.h index b0fc3224ce8a..55bc1f073bfb 100644 --- a/arch/arm64/include/asm/cpufeature.h +++ b/arch/arm64/include/asm/cpufeature.h @@ -553,6 +553,12 @@ static inline int arm64_get_ssbd_state(void) #endif } +#ifdef CONFIG_ARM64_SSBD +void arm64_set_ssbd_mitigation(bool state); +#else +static inline void arm64_set_ssbd_mitigation(bool state) {} +#endif + #endif /* __ASSEMBLY__ */ #endif diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2797bc2c8c6a..cf37ca6fa5f2 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -303,7 +303,7 @@ void __init arm64_enable_wa2_handling(struct alt_instr *alt, *updptr = cpu_to_le32(aarch64_insn_gen_nop()); } -static void arm64_set_ssbd_mitigation(bool state) +void arm64_set_ssbd_mitigation(bool state) { switch (psci_ops.conduit) { case PSCI_CONDUIT_HVC: diff --git a/arch/arm64/kernel/hibernate.c b/arch/arm64/kernel/hibernate.c index 1ec5f28c39fc..6b2686d54411 100644 --- a/arch/arm64/kernel/hibernate.c +++ b/arch/arm64/kernel/hibernate.c @@ -313,6 +313,17 @@ int swsusp_arch_suspend(void) sleep_cpu = -EINVAL; __cpu_suspend_exit(); + + /* + * Just in case the boot kernel did turn the SSBD + * mitigation off behind our back, let's set the state + * to what we expect it to be. + */ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_FORCE_ENABLE: + case ARM64_SSBD_KERNEL: + arm64_set_ssbd_mitigation(true); + } } local_daif_restore(flags); diff --git a/arch/arm64/kernel/suspend.c b/arch/arm64/kernel/suspend.c index a307b9e13392..70c283368b64 100644 --- a/arch/arm64/kernel/suspend.c +++ b/arch/arm64/kernel/suspend.c @@ -62,6 +62,14 @@ void notrace __cpu_suspend_exit(void) */ if (hw_breakpoint_restore) hw_breakpoint_restore(cpu); + + /* + * On resume, firmware implementing dynamic mitigation will + * have turned the mitigation on. If the user has forcefully + * disabled it, make sure their wishes are obeyed. + */ + if (arm64_get_ssbd_state() == ARM64_SSBD_FORCE_DISABLE) + arm64_set_ssbd_mitigation(false); } /* From 9dd9614f5476687abbff8d4b12cd08ae70d7c2ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:13 +0100 Subject: [PATCH 41/53] arm64: ssbd: Introduce thread flag to control userspace mitigation In order to allow userspace to be mitigated on demand, let's introduce a new thread flag that prevents the mitigation from being turned off when exiting to userspace, and doesn't turn it on on entry into the kernel (with the assumption that the mitigation is always enabled in the kernel itself). This will be used by a prctl interface introduced in a later patch. Reviewed-by: Mark Rutland Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/thread_info.h | 1 + arch/arm64/kernel/entry.S | 2 ++ 2 files changed, 3 insertions(+) diff --git a/arch/arm64/include/asm/thread_info.h b/arch/arm64/include/asm/thread_info.h index 740aa03c5f0d..cbcf11b5e637 100644 --- a/arch/arm64/include/asm/thread_info.h +++ b/arch/arm64/include/asm/thread_info.h @@ -94,6 +94,7 @@ void arch_release_task_struct(struct task_struct *tsk); #define TIF_32BIT 22 /* 32bit process */ #define TIF_SVE 23 /* Scalable Vector Extension in use */ #define TIF_SVE_VL_INHERIT 24 /* Inherit sve_vl_onexec across exec */ +#define TIF_SSBD 25 /* Wants SSB mitigation */ #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) #define _TIF_NEED_RESCHED (1 << TIF_NEED_RESCHED) diff --git a/arch/arm64/kernel/entry.S b/arch/arm64/kernel/entry.S index e6f6e2339b22..28ad8799406f 100644 --- a/arch/arm64/kernel/entry.S +++ b/arch/arm64/kernel/entry.S @@ -147,6 +147,8 @@ alternative_cb arm64_enable_wa2_handling alternative_cb_end ldr_this_cpu \tmp2, arm64_ssbd_callback_required, \tmp1 cbz \tmp2, \targ + ldr \tmp2, [tsk, #TSK_TI_FLAGS] + tbnz \tmp2, #TIF_SSBD, \targ mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 mov w1, #\state alternative_cb arm64_update_smccc_conduit From 9cdc0108baa8ef87c76ed834619886a46bd70cbe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:14 +0100 Subject: [PATCH 42/53] arm64: ssbd: Add prctl interface for per-thread mitigation If running on a system that performs dynamic SSBD mitigation, allow userspace to request the mitigation for itself. This is implemented as a prctl call, allowing the mitigation to be enabled or disabled at will for this particular thread. Acked-by: Will Deacon Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/Makefile | 1 + arch/arm64/kernel/ssbd.c | 110 +++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) create mode 100644 arch/arm64/kernel/ssbd.c diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index bf825f38d206..0025f8691046 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -54,6 +54,7 @@ arm64-obj-$(CONFIG_ARM64_RELOC_TEST) += arm64-reloc-test.o arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o +arm64-obj-$(CONFIG_ARM64_SSBD) += ssbd.o obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) diff --git a/arch/arm64/kernel/ssbd.c b/arch/arm64/kernel/ssbd.c new file mode 100644 index 000000000000..3432e5ef9f41 --- /dev/null +++ b/arch/arm64/kernel/ssbd.c @@ -0,0 +1,110 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2018 ARM Ltd, All Rights Reserved. + */ + +#include +#include +#include + +#include + +/* + * prctl interface for SSBD + * FIXME: Drop the below ifdefery once merged in 4.18. + */ +#ifdef PR_SPEC_STORE_BYPASS +static int ssbd_prctl_set(struct task_struct *task, unsigned long ctrl) +{ + int state = arm64_get_ssbd_state(); + + /* Unsupported */ + if (state == ARM64_SSBD_UNKNOWN) + return -EINVAL; + + /* Treat the unaffected/mitigated state separately */ + if (state == ARM64_SSBD_MITIGATED) { + switch (ctrl) { + case PR_SPEC_ENABLE: + return -EPERM; + case PR_SPEC_DISABLE: + case PR_SPEC_FORCE_DISABLE: + return 0; + } + } + + /* + * Things are a bit backward here: the arm64 internal API + * *enables the mitigation* when the userspace API *disables + * speculation*. So much fun. + */ + switch (ctrl) { + case PR_SPEC_ENABLE: + /* If speculation is force disabled, enable is not allowed */ + if (state == ARM64_SSBD_FORCE_ENABLE || + task_spec_ssb_force_disable(task)) + return -EPERM; + task_clear_spec_ssb_disable(task); + clear_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_DISABLE: + if (state == ARM64_SSBD_FORCE_DISABLE) + return -EPERM; + task_set_spec_ssb_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); + break; + case PR_SPEC_FORCE_DISABLE: + if (state == ARM64_SSBD_FORCE_DISABLE) + return -EPERM; + task_set_spec_ssb_disable(task); + task_set_spec_ssb_force_disable(task); + set_tsk_thread_flag(task, TIF_SSBD); + break; + default: + return -ERANGE; + } + + return 0; +} + +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_set(task, ctrl); + default: + return -ENODEV; + } +} + +static int ssbd_prctl_get(struct task_struct *task) +{ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_UNKNOWN: + return -EINVAL; + case ARM64_SSBD_FORCE_ENABLE: + return PR_SPEC_DISABLE; + case ARM64_SSBD_KERNEL: + if (task_spec_ssb_force_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_FORCE_DISABLE; + if (task_spec_ssb_disable(task)) + return PR_SPEC_PRCTL | PR_SPEC_DISABLE; + return PR_SPEC_PRCTL | PR_SPEC_ENABLE; + case ARM64_SSBD_FORCE_DISABLE: + return PR_SPEC_ENABLE; + default: + return PR_SPEC_NOT_AFFECTED; + } +} + +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which) +{ + switch (which) { + case PR_SPEC_STORE_BYPASS: + return ssbd_prctl_get(task); + default: + return -ENODEV; + } +} +#endif /* PR_SPEC_STORE_BYPASS */ From 85478bab409171de501b719971fd25a3d5d639f9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:15 +0100 Subject: [PATCH 43/53] arm64: KVM: Add HYP per-cpu accessors As we're going to require to access per-cpu variables at EL2, let's craft the minimum set of accessors required to implement reading a per-cpu variable, relying on tpidr_el2 to contain the per-cpu offset. Reviewed-by: Christoffer Dall Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_asm.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index f6648a3e4152..fefd8cf42c35 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -71,14 +71,37 @@ extern u32 __kvm_get_mdcr_el2(void); extern u32 __init_stage2_translation(void); +/* Home-grown __this_cpu_{ptr,read} variants that always work at HYP */ +#define __hyp_this_cpu_ptr(sym) \ + ({ \ + void *__ptr = hyp_symbol_addr(sym); \ + __ptr += read_sysreg(tpidr_el2); \ + (typeof(&sym))__ptr; \ + }) + +#define __hyp_this_cpu_read(sym) \ + ({ \ + *__hyp_this_cpu_ptr(sym); \ + }) + #else /* __ASSEMBLY__ */ -.macro get_host_ctxt reg, tmp - adr_l \reg, kvm_host_cpu_state +.macro hyp_adr_this_cpu reg, sym, tmp + adr_l \reg, \sym mrs \tmp, tpidr_el2 add \reg, \reg, \tmp .endm +.macro hyp_ldr_this_cpu reg, sym, tmp + adr_l \reg, \sym + mrs \tmp, tpidr_el2 + ldr \reg, [\reg, \tmp] +.endm + +.macro get_host_ctxt reg, tmp + hyp_adr_this_cpu \reg, kvm_host_cpu_state, \tmp +.endm + .macro get_vcpu_ptr vcpu, ctxt get_host_ctxt \ctxt, \vcpu ldr \vcpu, [\ctxt, #HOST_CONTEXT_VCPU] From 55e3748e8902ff641e334226bdcb432f9a5d78d3 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:16 +0100 Subject: [PATCH 44/53] arm64: KVM: Add ARCH_WORKAROUND_2 support for guests In order to offer ARCH_WORKAROUND_2 support to guests, we need a bit of infrastructure. Let's add a flag indicating whether or not the guest uses SSBD mitigation. Depending on the state of this flag, allow KVM to disable ARCH_WORKAROUND_2 before entering the guest, and enable it when exiting it. Reviewed-by: Christoffer Dall Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_mmu.h | 5 ++++ arch/arm64/include/asm/kvm_asm.h | 3 +++ arch/arm64/include/asm/kvm_host.h | 3 +++ arch/arm64/include/asm/kvm_mmu.h | 24 ++++++++++++++++++ arch/arm64/kvm/hyp/switch.c | 42 +++++++++++++++++++++++++++++++ virt/kvm/arm/arm.c | 4 +++ 6 files changed, 81 insertions(+) diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 707a1f06dc5d..b0c17d88ed40 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -319,6 +319,11 @@ static inline int kvm_map_vectors(void) return 0; } +static inline int hyp_map_aux_data(void) +{ + return 0; +} + #define kvm_phys_to_vttbr(addr) (addr) #endif /* !__ASSEMBLY__ */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index fefd8cf42c35..d4fbb1356c4c 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -33,6 +33,9 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) +#define VCPU_WORKAROUND_2_FLAG_SHIFT 0 +#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT) + /* Translate a kernel address of @sym into its equivalent linear mapping */ #define kvm_ksym_ref(sym) \ ({ \ diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 469de8acd06f..9bef3f69bdcd 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -216,6 +216,9 @@ struct kvm_vcpu_arch { /* Exception Information */ struct kvm_vcpu_fault_info fault; + /* State of various workarounds, see kvm_asm.h for bit assignment */ + u64 workaround_flags; + /* Guest debug state */ u64 debug_flags; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index f74987b76d91..fbe4ddd9da09 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -456,6 +456,30 @@ static inline int kvm_map_vectors(void) } #endif +#ifdef CONFIG_ARM64_SSBD +DECLARE_PER_CPU_READ_MOSTLY(u64, arm64_ssbd_callback_required); + +static inline int hyp_map_aux_data(void) +{ + int cpu, err; + + for_each_possible_cpu(cpu) { + u64 *ptr; + + ptr = per_cpu_ptr(&arm64_ssbd_callback_required, cpu); + err = create_hyp_mappings(ptr, ptr + 1, PAGE_HYP); + if (err) + return err; + } + return 0; +} +#else +static inline int hyp_map_aux_data(void) +{ + return 0; +} +#endif + #define kvm_phys_to_vttbr(addr) phys_to_ttbr(addr) #endif /* __ASSEMBLY__ */ diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index d9645236e474..c50cedc447f1 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -15,6 +15,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -389,6 +390,39 @@ static bool __hyp_text fixup_guest_exit(struct kvm_vcpu *vcpu, u64 *exit_code) return false; } +static inline bool __hyp_text __needs_ssbd_off(struct kvm_vcpu *vcpu) +{ + if (!cpus_have_const_cap(ARM64_SSBD)) + return false; + + return !(vcpu->arch.workaround_flags & VCPU_WORKAROUND_2_FLAG); +} + +static void __hyp_text __set_guest_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * The host runs with the workaround always present. If the + * guest wants it disabled, so be it... + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 0, NULL); +#endif +} + +static void __hyp_text __set_host_arch_workaround_state(struct kvm_vcpu *vcpu) +{ +#ifdef CONFIG_ARM64_SSBD + /* + * If the guest has disabled the workaround, bring it back on. + */ + if (__needs_ssbd_off(vcpu) && + __hyp_this_cpu_read(arm64_ssbd_callback_required)) + arm_smccc_1_1_smc(ARM_SMCCC_ARCH_WORKAROUND_2, 1, NULL); +#endif +} + /* Switch to the guest for VHE systems running in EL2 */ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) { @@ -409,6 +443,8 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) sysreg_restore_guest_state_vhe(guest_ctxt); __debug_switch_to_guest(vcpu); + __set_guest_arch_workaround_state(vcpu); + do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu, host_ctxt); @@ -416,6 +452,8 @@ int kvm_vcpu_run_vhe(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); + __set_host_arch_workaround_state(vcpu); + fp_enabled = fpsimd_enabled_vhe(); sysreg_save_guest_state_vhe(guest_ctxt); @@ -465,6 +503,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) __sysreg_restore_state_nvhe(guest_ctxt); __debug_switch_to_guest(vcpu); + __set_guest_arch_workaround_state(vcpu); + do { /* Jump in the fire! */ exit_code = __guest_enter(vcpu, host_ctxt); @@ -472,6 +512,8 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); + __set_host_arch_workaround_state(vcpu); + fp_enabled = __fpsimd_enabled_nvhe(); __sysreg_save_state_nvhe(guest_ctxt); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a4c1b76240df..2d9b4795edb2 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1490,6 +1490,10 @@ static int init_hyp_mode(void) } } + err = hyp_map_aux_data(); + if (err) + kvm_err("Cannot map host auxilary data: %d\n", err); + return 0; out_err: From b4f18c063a13dfb33e3a63fe1844823e19c2265e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:17 +0100 Subject: [PATCH 45/53] arm64: KVM: Handle guest's ARCH_WORKAROUND_2 requests In order to forward the guest's ARCH_WORKAROUND_2 calls to EL3, add a small(-ish) sequence to handle it at EL2. Special care must be taken to track the state of the guest itself by updating the workaround flags. We also rely on patching to enable calls into the firmware. Note that since we need to execute branches, this always executes after the Spectre-v2 mitigation has been applied. Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/kernel/asm-offsets.c | 1 + arch/arm64/kvm/hyp/hyp-entry.S | 38 ++++++++++++++++++++++++++++++++- 2 files changed, 38 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/asm-offsets.c b/arch/arm64/kernel/asm-offsets.c index 5bdda651bd05..323aeb5f2fe6 100644 --- a/arch/arm64/kernel/asm-offsets.c +++ b/arch/arm64/kernel/asm-offsets.c @@ -136,6 +136,7 @@ int main(void) #ifdef CONFIG_KVM_ARM_HOST DEFINE(VCPU_CONTEXT, offsetof(struct kvm_vcpu, arch.ctxt)); DEFINE(VCPU_FAULT_DISR, offsetof(struct kvm_vcpu, arch.fault.disr_el1)); + DEFINE(VCPU_WORKAROUND_FLAGS, offsetof(struct kvm_vcpu, arch.workaround_flags)); DEFINE(CPU_GP_REGS, offsetof(struct kvm_cpu_context, gp_regs)); DEFINE(CPU_USER_PT_REGS, offsetof(struct kvm_regs, regs)); DEFINE(CPU_FP_REGS, offsetof(struct kvm_regs, fp_regs)); diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index bffece27b5c1..05d836979032 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -106,8 +106,44 @@ el1_hvc_guest: */ ldr x1, [sp] // Guest's x0 eor w1, w1, #ARM_SMCCC_ARCH_WORKAROUND_1 + cbz w1, wa_epilogue + + /* ARM_SMCCC_ARCH_WORKAROUND_2 handling */ + eor w1, w1, #(ARM_SMCCC_ARCH_WORKAROUND_1 ^ \ + ARM_SMCCC_ARCH_WORKAROUND_2) cbnz w1, el1_trap - mov x0, x1 + +#ifdef CONFIG_ARM64_SSBD +alternative_cb arm64_enable_wa2_handling + b wa2_end +alternative_cb_end + get_vcpu_ptr x2, x0 + ldr x0, [x2, #VCPU_WORKAROUND_FLAGS] + + // Sanitize the argument and update the guest flags + ldr x1, [sp, #8] // Guest's x1 + clz w1, w1 // Murphy's device: + lsr w1, w1, #5 // w1 = !!w1 without using + eor w1, w1, #1 // the flags... + bfi x0, x1, #VCPU_WORKAROUND_2_FLAG_SHIFT, #1 + str x0, [x2, #VCPU_WORKAROUND_FLAGS] + + /* Check that we actually need to perform the call */ + hyp_ldr_this_cpu x0, arm64_ssbd_callback_required, x2 + cbz x0, wa2_end + + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_2 + smc #0 + + /* Don't leak data from the SMC call */ + mov x3, xzr +wa2_end: + mov x2, xzr + mov x1, xzr +#endif + +wa_epilogue: + mov x0, xzr add sp, sp, #16 eret From 5d81f7dc9bca4f4963092433e27b508cbe524a32 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 29 May 2018 13:11:18 +0100 Subject: [PATCH 46/53] arm64: KVM: Add ARCH_WORKAROUND_2 discovery through ARCH_FEATURES_FUNC_ID Now that all our infrastructure is in place, let's expose the availability of ARCH_WORKAROUND_2 to guests. We take this opportunity to tidy up a couple of SMCCC constants. Acked-by: Christoffer Dall Reviewed-by: Mark Rutland Signed-off-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm/include/asm/kvm_host.h | 12 ++++++++++++ arch/arm64/include/asm/kvm_host.h | 23 +++++++++++++++++++++++ arch/arm64/kvm/reset.c | 4 ++++ virt/kvm/arm/psci.c | 18 ++++++++++++++++-- 4 files changed, 55 insertions(+), 2 deletions(-) diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c7c28c885a19..7001fb871429 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -315,6 +315,18 @@ static inline bool kvm_arm_harden_branch_predictor(void) return false; } +#define KVM_SSBD_UNKNOWN -1 +#define KVM_SSBD_FORCE_DISABLE 0 +#define KVM_SSBD_KERNEL 1 +#define KVM_SSBD_FORCE_ENABLE 2 +#define KVM_SSBD_MITIGATED 3 + +static inline int kvm_arm_have_ssbd(void) +{ + /* No way to detect it yet, pretend it is not there. */ + return KVM_SSBD_UNKNOWN; +} + static inline void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu) {} static inline void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu) {} diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9bef3f69bdcd..95d8a0e15b5f 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -455,6 +455,29 @@ static inline bool kvm_arm_harden_branch_predictor(void) return cpus_have_const_cap(ARM64_HARDEN_BRANCH_PREDICTOR); } +#define KVM_SSBD_UNKNOWN -1 +#define KVM_SSBD_FORCE_DISABLE 0 +#define KVM_SSBD_KERNEL 1 +#define KVM_SSBD_FORCE_ENABLE 2 +#define KVM_SSBD_MITIGATED 3 + +static inline int kvm_arm_have_ssbd(void) +{ + switch (arm64_get_ssbd_state()) { + case ARM64_SSBD_FORCE_DISABLE: + return KVM_SSBD_FORCE_DISABLE; + case ARM64_SSBD_KERNEL: + return KVM_SSBD_KERNEL; + case ARM64_SSBD_FORCE_ENABLE: + return KVM_SSBD_FORCE_ENABLE; + case ARM64_SSBD_MITIGATED: + return KVM_SSBD_MITIGATED; + case ARM64_SSBD_UNKNOWN: + default: + return KVM_SSBD_UNKNOWN; + } +} + void kvm_vcpu_load_sysregs(struct kvm_vcpu *vcpu); void kvm_vcpu_put_sysregs(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 3256b9228e75..a74311beda35 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -122,6 +122,10 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) /* Reset PMU */ kvm_pmu_vcpu_reset(vcpu); + /* Default workaround setup is enabled (if supported) */ + if (kvm_arm_have_ssbd() == KVM_SSBD_KERNEL) + vcpu->arch.workaround_flags |= VCPU_WORKAROUND_2_FLAG; + /* Reset timer */ return kvm_timer_vcpu_reset(vcpu); } diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index c4762bef13c6..c95ab4c5a475 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -405,7 +405,7 @@ static int kvm_psci_call(struct kvm_vcpu *vcpu) int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) { u32 func_id = smccc_get_function(vcpu); - u32 val = PSCI_RET_NOT_SUPPORTED; + u32 val = SMCCC_RET_NOT_SUPPORTED; u32 feature; switch (func_id) { @@ -417,7 +417,21 @@ int kvm_hvc_call_handler(struct kvm_vcpu *vcpu) switch(feature) { case ARM_SMCCC_ARCH_WORKAROUND_1: if (kvm_arm_harden_branch_predictor()) - val = 0; + val = SMCCC_RET_SUCCESS; + break; + case ARM_SMCCC_ARCH_WORKAROUND_2: + switch (kvm_arm_have_ssbd()) { + case KVM_SSBD_FORCE_DISABLE: + case KVM_SSBD_UNKNOWN: + break; + case KVM_SSBD_KERNEL: + val = SMCCC_RET_SUCCESS; + break; + case KVM_SSBD_FORCE_ENABLE: + case KVM_SSBD_MITIGATED: + val = SMCCC_RET_NOT_REQUIRED; + break; + } break; } break; From 87c021a8143b6133e0085a8162f2a127462b54a3 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 1 Jun 2018 11:10:13 +0100 Subject: [PATCH 47/53] arm64/sve: Thin out initialisation sanity-checks for sve_max_vl Now that the kernel SVE support is reasonably mature, it is excessive to default sve_max_vl to the invalid value -1 and then sprinkle WARN_ON()s around the place to make sure it has been initialised before use. The cpufeatures code already runs pretty early, and will ensure sve_max_vl gets initialised. This patch initialises sve_max_vl to something sane that will be supported by every SVE implementation, and removes most of the sanity checks. The checks in find_supported_vector_length() are retained for now. If anything goes horribly wrong, we are likely to trip a check here sooner or later. Signed-off-by: Dave Martin Acked-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/fpsimd.c | 17 ++++------------- arch/arm64/kernel/ptrace.c | 3 --- 2 files changed, 4 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 3db8ed530e56..d15f38ad70cb 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -128,7 +128,7 @@ static int sve_default_vl = -1; #ifdef CONFIG_ARM64_SVE /* Maximum supported vector length across all CPUs (initially poisoned) */ -int __ro_after_init sve_max_vl = -1; +int __ro_after_init sve_max_vl = SVE_VL_MIN; /* Set of available vector lengths, as vq_to_bit(vq): */ static __ro_after_init DECLARE_BITMAP(sve_vq_map, SVE_VQ_MAX); static void __percpu *efi_sve_state; @@ -359,22 +359,13 @@ static int sve_proc_do_default_vl(struct ctl_table *table, int write, return ret; /* Writing -1 has the special meaning "set to max": */ - if (vl == -1) { - /* Fail safe if sve_max_vl wasn't initialised */ - if (WARN_ON(!sve_vl_valid(sve_max_vl))) - vl = SVE_VL_MIN; - else - vl = sve_max_vl; - - goto chosen; - } + if (vl == -1) + vl = sve_max_vl; if (!sve_vl_valid(vl)) return -EINVAL; - vl = find_supported_vector_length(vl); -chosen: - sve_default_vl = vl; + sve_default_vl = find_supported_vector_length(vl); return 0; } diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index f847285d96f3..bd732644c2f6 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -766,9 +766,6 @@ static void sve_init_header_from_task(struct user_sve_header *header, vq = sve_vq_from_vl(header->vl); header->max_vl = sve_max_vl; - if (WARN_ON(!sve_vl_valid(sve_max_vl))) - header->max_vl = header->vl; - header->size = SVE_PT_SIZE(vq, header->flags); header->max_size = SVE_PT_SIZE(sve_vq_from_vl(header->max_vl), SVE_PT_REGS_SVE); From 94b07c1f8c39c6d839df35fa28ffd1785d385897 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Fri, 1 Jun 2018 11:10:14 +0100 Subject: [PATCH 48/53] arm64: signal: Report signal frame size to userspace via auxv MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stateful CPU architecture extensions may require the signal frame to grow to a size that exceeds the arch's MINSIGSTKSZ #define. However, changing this #define is an ABI break. To allow userspace the option of determining the signal frame size in a more forwards-compatible way, this patch adds a new auxv entry tagged with AT_MINSIGSTKSZ, which provides the maximum signal frame size that the process can observe during its lifetime. If AT_MINSIGSTKSZ is absent from the aux vector, the caller can assume that the MINSIGSTKSZ #define is sufficient. This allows for a consistent interface with older kernels that do not provide AT_MINSIGSTKSZ. The idea is that libc could expose this via sysconf() or some similar mechanism. There is deliberately no AT_SIGSTKSZ. The kernel knows nothing about userspace's own stack overheads and should not pretend to know. For arm64: The primary motivation for this interface is the Scalable Vector Extension, which can require at least 4KB or so of extra space in the signal frame for the largest hardware implementations. To determine the correct value, a "Christmas tree" mode (via the add_all argument) is added to setup_sigframe_layout(), to simulate addition of all possible records to the signal frame at maximum possible size. If this procedure goes wrong somehow, resulting in a stupidly large frame layout and hence failure of sigframe_alloc() to allocate a record to the frame, then this is indicative of a kernel bug. In this case, we WARN() and no attempt is made to populate AT_MINSIGSTKSZ for userspace. For arm64 SVE: The SVE context block in the signal frame needs to be considered too when computing the maximum possible signal frame size. Because the size of this block depends on the vector length, this patch computes the size based not on the thread's current vector length but instead on the maximum possible vector length: this determines the maximum size of SVE context block that can be observed in any signal frame for the lifetime of the process. Signed-off-by: Dave Martin Acked-by: Will Deacon Cc: Ard Biesheuvel Cc: Alex Bennée Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/elf.h | 13 +++++++ arch/arm64/include/asm/processor.h | 5 +++ arch/arm64/include/uapi/asm/auxvec.h | 3 +- arch/arm64/kernel/cpufeature.c | 1 + arch/arm64/kernel/signal.c | 52 ++++++++++++++++++++++++---- 5 files changed, 66 insertions(+), 8 deletions(-) diff --git a/arch/arm64/include/asm/elf.h b/arch/arm64/include/asm/elf.h index fac1c4de7898..433b9554c6a1 100644 --- a/arch/arm64/include/asm/elf.h +++ b/arch/arm64/include/asm/elf.h @@ -121,6 +121,9 @@ #ifndef __ASSEMBLY__ +#include +#include /* for signal_minsigstksz, used by ARCH_DLINFO */ + typedef unsigned long elf_greg_t; #define ELF_NGREG (sizeof(struct user_pt_regs) / sizeof(elf_greg_t)) @@ -148,6 +151,16 @@ typedef struct user_fpsimd_state elf_fpregset_t; do { \ NEW_AUX_ENT(AT_SYSINFO_EHDR, \ (elf_addr_t)current->mm->context.vdso); \ + \ + /* \ + * Should always be nonzero unless there's a kernel bug. \ + * If we haven't determined a sensible value to give to \ + * userspace, omit the entry: \ + */ \ + if (likely(signal_minsigstksz)) \ + NEW_AUX_ENT(AT_MINSIGSTKSZ, signal_minsigstksz); \ + else \ + NEW_AUX_ENT(AT_IGNORE, 0); \ } while (0) #define ARCH_HAS_SETUP_ADDITIONAL_PAGES diff --git a/arch/arm64/include/asm/processor.h b/arch/arm64/include/asm/processor.h index 767598932549..65ab83e8926e 100644 --- a/arch/arm64/include/asm/processor.h +++ b/arch/arm64/include/asm/processor.h @@ -35,6 +35,8 @@ #ifdef __KERNEL__ #include +#include +#include #include #include @@ -244,6 +246,9 @@ void cpu_enable_pan(const struct arm64_cpu_capabilities *__unused); void cpu_enable_cache_maint_trap(const struct arm64_cpu_capabilities *__unused); void cpu_clear_disr(const struct arm64_cpu_capabilities *__unused); +extern unsigned long __ro_after_init signal_minsigstksz; /* sigframe size */ +extern void __init minsigstksz_setup(void); + /* Userspace interface for PR_SVE_{SET,GET}_VL prctl()s: */ #define SVE_SET_VL(arg) sve_set_current_vl(arg) #define SVE_GET_VL() sve_get_current_vl() diff --git a/arch/arm64/include/uapi/asm/auxvec.h b/arch/arm64/include/uapi/asm/auxvec.h index ec0a86d484e1..743c0b84fd30 100644 --- a/arch/arm64/include/uapi/asm/auxvec.h +++ b/arch/arm64/include/uapi/asm/auxvec.h @@ -19,7 +19,8 @@ /* vDSO location */ #define AT_SYSINFO_EHDR 33 +#define AT_MINSIGSTKSZ 51 /* stack needed for signal delivery */ -#define AT_VECTOR_SIZE_ARCH 1 /* entries in ARCH_DLINFO */ +#define AT_VECTOR_SIZE_ARCH 2 /* entries in ARCH_DLINFO */ #endif diff --git a/arch/arm64/kernel/cpufeature.c b/arch/arm64/kernel/cpufeature.c index fbee8c17a4e6..d2856b129097 100644 --- a/arch/arm64/kernel/cpufeature.c +++ b/arch/arm64/kernel/cpufeature.c @@ -1618,6 +1618,7 @@ void __init setup_cpu_features(void) pr_info("emulated: Privileged Access Never (PAN) using TTBR0_EL1 switching\n"); sve_setup(); + minsigstksz_setup(); /* Advertise that we have computed the system capabilities */ set_sys_caps_initialised(); diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index 154b7d30145d..e7da5dba7ba8 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -17,6 +17,7 @@ * along with this program. If not, see . */ +#include #include #include #include @@ -570,8 +571,15 @@ badframe: return 0; } -/* Determine the layout of optional records in the signal frame */ -static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) +/* + * Determine the layout of optional records in the signal frame + * + * add_all: if true, lays out the biggest possible signal frame for + * this task; otherwise, generates a layout for the current state + * of the task. + */ +static int setup_sigframe_layout(struct rt_sigframe_user_layout *user, + bool add_all) { int err; @@ -581,7 +589,7 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) return err; /* fault information, if valid */ - if (current->thread.fault_code) { + if (add_all || current->thread.fault_code) { err = sigframe_alloc(user, &user->esr_offset, sizeof(struct esr_context)); if (err) @@ -591,8 +599,14 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) if (system_supports_sve()) { unsigned int vq = 0; - if (test_thread_flag(TIF_SVE)) - vq = sve_vq_from_vl(current->thread.sve_vl); + if (add_all || test_thread_flag(TIF_SVE)) { + int vl = sve_max_vl; + + if (!add_all) + vl = current->thread.sve_vl; + + vq = sve_vq_from_vl(vl); + } err = sigframe_alloc(user, &user->sve_offset, SVE_SIG_CONTEXT_SIZE(vq)); @@ -603,7 +617,6 @@ static int setup_sigframe_layout(struct rt_sigframe_user_layout *user) return sigframe_alloc_end(user); } - static int setup_sigframe(struct rt_sigframe_user_layout *user, struct pt_regs *regs, sigset_t *set) { @@ -701,7 +714,7 @@ static int get_sigframe(struct rt_sigframe_user_layout *user, int err; init_user_layout(user); - err = setup_sigframe_layout(user); + err = setup_sigframe_layout(user, false); if (err) return err; @@ -936,3 +949,28 @@ asmlinkage void do_notify_resume(struct pt_regs *regs, thread_flags = READ_ONCE(current_thread_info()->flags); } while (thread_flags & _TIF_WORK_MASK); } + +unsigned long __ro_after_init signal_minsigstksz; + +/* + * Determine the stack space required for guaranteed signal devliery. + * This function is used to populate AT_MINSIGSTKSZ at process startup. + * cpufeatures setup is assumed to be complete. + */ +void __init minsigstksz_setup(void) +{ + struct rt_sigframe_user_layout user; + + init_user_layout(&user); + + /* + * If this fails, SIGFRAME_MAXSZ needs to be enlarged. It won't + * be big enough, but it's our best guess: + */ + if (WARN_ON(setup_sigframe_layout(&user, true))) + return; + + signal_minsigstksz = sigframe_size(&user) + + round_up(sizeof(struct frame_record), 16) + + 16; /* max alignment padding */ +} From 38500be10e46c09e20b1a0e9c6cb94e1f0a86610 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Sat, 2 Jun 2018 10:42:54 +0100 Subject: [PATCH 49/53] arm64: KVM: Move VCPU_WORKAROUND_2_FLAG macros to the top of the file This is to avoid potential merging conflicts between commit 55e3748e8902 ("arm64: KVM: Add ARCH_WORKAROUND_2 support for guests") and the KVM tree. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/kvm_asm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index d4fbb1356c4c..951b2076a5e2 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -20,6 +20,9 @@ #include +#define VCPU_WORKAROUND_2_FLAG_SHIFT 0 +#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT) + #define ARM_EXIT_WITH_SERROR_BIT 31 #define ARM_EXCEPTION_CODE(x) ((x) & ~(1U << ARM_EXIT_WITH_SERROR_BIT)) #define ARM_SERROR_PENDING(x) !!((x) & (1U << ARM_EXIT_WITH_SERROR_BIT)) @@ -33,9 +36,6 @@ #define KVM_ARM64_DEBUG_DIRTY_SHIFT 0 #define KVM_ARM64_DEBUG_DIRTY (1 << KVM_ARM64_DEBUG_DIRTY_SHIFT) -#define VCPU_WORKAROUND_2_FLAG_SHIFT 0 -#define VCPU_WORKAROUND_2_FLAG (_AC(1, UL) << VCPU_WORKAROUND_2_FLAG_SHIFT) - /* Translate a kernel address of @sym into its equivalent linear mapping */ #define kvm_ksym_ref(sym) \ ({ \ From 94a5d8790e79ab78f499d2d9f1ff2cab63849d9f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 5 Jun 2018 13:50:07 +0200 Subject: [PATCH 50/53] arm64: cpu_errata: include required headers Without including psci.h and arm-smccc.h, we now get a build failure in some configurations: arch/arm64/kernel/cpu_errata.c: In function 'arm64_update_smccc_conduit': arch/arm64/kernel/cpu_errata.c:278:10: error: 'psci_ops' undeclared (first use in this function); did you mean 'sysfs_ops'? arch/arm64/kernel/cpu_errata.c: In function 'arm64_set_ssbd_mitigation': arch/arm64/kernel/cpu_errata.c:311:3: error: implicit declaration of function 'arm_smccc_1_1_hvc' [-Werror=implicit-function-declaration] arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_2, state, NULL); Signed-off-by: Arnd Bergmann Signed-off-by: Catalin Marinas --- arch/arm64/kernel/cpu_errata.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index cf37ca6fa5f2..97681274da6a 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -16,6 +16,8 @@ * along with this program. If not, see . */ +#include +#include #include #include #include From 2520e627dbeecf7ccccc50c969504b59e1a3294b Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Tue, 5 Jun 2018 15:35:03 +0100 Subject: [PATCH 51/53] ACPI / PPTT: fix build when CONFIG_ACPI_PPTT is not enabled Though CONFIG_ACPI_PPTT is selected by platforms and nor user visible, it may be useful to support the build with CONFIG_ACPI_PPTT disabled. This patch adds the missing dummy/boiler plate implementation to fix the build. Acked-by: "Rafael J. Wysocki" Signed-off-by: Sudeep Holla Signed-off-by: Catalin Marinas --- include/linux/acpi.h | 15 +++++++++++++++ include/linux/cacheinfo.h | 2 +- 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 032e12a2fdc2..d2639cd2eafc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1297,8 +1297,23 @@ static inline int lpit_read_residency_count_address(u64 *address) } #endif +#ifdef CONFIG_ACPI_PPTT int find_acpi_cpu_topology(unsigned int cpu, int level); int find_acpi_cpu_topology_package(unsigned int cpu); int find_acpi_cpu_cache_topology(unsigned int cpu, int level); +#else +static inline int find_acpi_cpu_topology(unsigned int cpu, int level) +{ + return -EINVAL; +} +static inline int find_acpi_cpu_topology_package(unsigned int cpu) +{ + return -EINVAL; +} +static inline int find_acpi_cpu_cache_topology(unsigned int cpu, int level) +{ + return -EINVAL; +} +#endif #endif /*_LINUX_ACPI_H*/ diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h index 89397e30e269..70e19bc6cc9f 100644 --- a/include/linux/cacheinfo.h +++ b/include/linux/cacheinfo.h @@ -98,7 +98,7 @@ struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu); int init_cache_level(unsigned int cpu); int populate_cache_leaves(unsigned int cpu); int cache_setup_acpi(unsigned int cpu); -#ifndef CONFIG_ACPI +#ifndef CONFIG_ACPI_PPTT /* * acpi_find_last_cache_level is only called on ACPI enabled * platforms using the PPTT for topology. This means that if From e156ab71a974737c279530e3b868131291fe677e Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 6 Jun 2018 11:38:46 -0500 Subject: [PATCH 52/53] arm64: topology: Avoid checking numa mask for scheduler MC selection The numa mask subset check can often lead to system hang or crash during CPU hotplug and system suspend operation if NUMA is disabled. This is mostly observed on HMP systems where the CPU compute capacities are different and ends up in different scheduler domains. Since cpumask_of_node is returned instead core_sibling, the scheduler is confused with incorrect cpumasks(e.g. one CPU in two different sched domains at the same time) on CPU hotplug. Lets disable the NUMA siblings checks for the time being, as NUMA in socket machines have LLC's that will assure that the scheduler topology isn't "borken". The NUMA check exists to assure that if a LLC within a socket crosses NUMA nodes/chiplets the scheduler domains remain consistent. This code will likely have to be re-enabled in the near future once the NUMA mask story is sorted. At the moment its not necessary because the NUMA in socket machines LLC's are contained within the NUMA domains. Further, as a defensive mechanism during hot-plug, lets assure that the LLC siblings are also masked. Reported-by: Geert Uytterhoeven Reviewed-by: Sudeep Holla Tested-by: Geert Uytterhoeven Signed-off-by: Jeremy Linton Signed-off-by: Catalin Marinas --- arch/arm64/kernel/topology.c | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 7415c166281f..f845a8617812 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -215,13 +215,8 @@ EXPORT_SYMBOL_GPL(cpu_topology); const struct cpumask *cpu_coregroup_mask(int cpu) { - const cpumask_t *core_mask = cpumask_of_node(cpu_to_node(cpu)); + const cpumask_t *core_mask = &cpu_topology[cpu].core_sibling; - /* Find the smaller of NUMA, core or LLC siblings */ - if (cpumask_subset(&cpu_topology[cpu].core_sibling, core_mask)) { - /* not numa in package, lets use the package siblings */ - core_mask = &cpu_topology[cpu].core_sibling; - } if (cpu_topology[cpu].llc_id != -1) { if (cpumask_subset(&cpu_topology[cpu].llc_siblings, core_mask)) core_mask = &cpu_topology[cpu].llc_siblings; @@ -239,8 +234,10 @@ static void update_siblings_masks(unsigned int cpuid) for_each_possible_cpu(cpu) { cpu_topo = &cpu_topology[cpu]; - if (cpuid_topo->llc_id == cpu_topo->llc_id) + if (cpuid_topo->llc_id == cpu_topo->llc_id) { cpumask_set_cpu(cpu, &cpuid_topo->llc_siblings); + cpumask_set_cpu(cpuid, &cpu_topo->llc_siblings); + } if (cpuid_topo->package_id != cpu_topo->package_id) continue; From 0fe42512b2f03f9e5a20b9f55ef1013a68b4cd48 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Thu, 7 Jun 2018 12:32:05 +0100 Subject: [PATCH 53/53] arm64: Fix syscall restarting around signal suppressed by tracer Commit 17c2895 ("arm64: Abstract syscallno manipulation") abstracts out the pt_regs.syscallno value for a syscall cancelled by a tracer as NO_SYSCALL, and provides helpers to set and check for this condition. However, the way this was implemented has the unintended side-effect of disabling part of the syscall restart logic. This comes about because the second in_syscall() check in do_signal() re-evaluates the "in a syscall" condition based on the updated pt_regs instead of the original pt_regs. forget_syscall() is explicitly called prior to the second check in order to prevent restart logic in the ret_to_user path being spuriously triggered, which means that the second in_syscall() check always yields false. This triggers a failure in tools/testing/selftests/seccomp/seccomp_bpf.c, when using ptrace to suppress a signal that interrups a nanosleep() syscall. Misbehaviour of this type is only expected in the case where a tracer suppresses a signal and the target process is either being single-stepped or the interrupted syscall attempts to restart via -ERESTARTBLOCK. This patch restores the old behaviour by performing the in_syscall() check only once at the start of the function. Fixes: 17c289586009 ("arm64: Abstract syscallno manipulation") Signed-off-by: Dave Martin Reported-by: Sumit Semwal Cc: Will Deacon Cc: # 4.14.x- Signed-off-by: Catalin Marinas --- arch/arm64/kernel/signal.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/signal.c b/arch/arm64/kernel/signal.c index e7da5dba7ba8..511af13e8d8f 100644 --- a/arch/arm64/kernel/signal.c +++ b/arch/arm64/kernel/signal.c @@ -843,11 +843,12 @@ static void do_signal(struct pt_regs *regs) unsigned long continue_addr = 0, restart_addr = 0; int retval = 0; struct ksignal ksig; + bool syscall = in_syscall(regs); /* * If we were from a system call, check for system call restarting... */ - if (in_syscall(regs)) { + if (syscall) { continue_addr = regs->pc; restart_addr = continue_addr - (compat_thumb_mode(regs) ? 2 : 4); retval = regs->regs[0]; @@ -899,7 +900,7 @@ static void do_signal(struct pt_regs *regs) * Handle restarting a different system call. As above, if a debugger * has chosen to restart at a different PC, ignore the restart. */ - if (in_syscall(regs) && regs->pc == restart_addr) { + if (syscall && regs->pc == restart_addr) { if (retval == -ERESTART_RESTARTBLOCK) setup_restart_syscall(regs); user_rewind_single_step(current);