From 4bc352ffb39e4eec253e70f8c076f2f48a6c1926 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Tue, 10 Apr 2018 11:36:42 +0100 Subject: [PATCH 1/6] arm64: KVM: Use SMCCC_ARCH_WORKAROUND_1 for Falkor BP hardening The function SMCCC_ARCH_WORKAROUND_1 was introduced as part of SMC V1.1 Calling Convention to mitigate CVE-2017-5715. This patch uses the standard call SMCCC_ARCH_WORKAROUND_1 for Falkor chips instead of Silicon provider service ID 0xC2001700. Cc: # 4.14+ Signed-off-by: Shanker Donthineni [maz: reworked errata framework integration] Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/include/asm/cpucaps.h | 13 +++---- arch/arm64/include/asm/kvm_asm.h | 2 - arch/arm64/kernel/bpi.S | 8 ---- arch/arm64/kernel/cpu_errata.c | 66 +++++++++----------------------- arch/arm64/kvm/hyp/entry.S | 12 ------ arch/arm64/kvm/hyp/switch.c | 10 ----- 6 files changed, 25 insertions(+), 86 deletions(-) diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index a311880feb0f..bc51b72fafd4 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -43,13 +43,12 @@ #define ARM64_SVE 22 #define ARM64_UNMAP_KERNEL_AT_EL0 23 #define ARM64_HARDEN_BRANCH_PREDICTOR 24 -#define ARM64_HARDEN_BP_POST_GUEST_EXIT 25 -#define ARM64_HAS_RAS_EXTN 26 -#define ARM64_WORKAROUND_843419 27 -#define ARM64_HAS_CACHE_IDC 28 -#define ARM64_HAS_CACHE_DIC 29 -#define ARM64_HW_DBM 30 +#define ARM64_HAS_RAS_EXTN 25 +#define ARM64_WORKAROUND_843419 26 +#define ARM64_HAS_CACHE_IDC 27 +#define ARM64_HAS_CACHE_DIC 28 +#define ARM64_HW_DBM 29 -#define ARM64_NCAPS 31 +#define ARM64_NCAPS 30 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h index d53d40704416..f6648a3e4152 100644 --- a/arch/arm64/include/asm/kvm_asm.h +++ b/arch/arm64/include/asm/kvm_asm.h @@ -71,8 +71,6 @@ extern u32 __kvm_get_mdcr_el2(void); extern u32 __init_stage2_translation(void); -extern void __qcom_hyp_sanitize_btac_predictors(void); - #else /* __ASSEMBLY__ */ .macro get_host_ctxt reg, tmp diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index bb0b67722e86..9404f6aecda7 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -74,14 +74,6 @@ ENTRY(__bp_harden_hyp_vecs_end) .popsection -ENTRY(__qcom_hyp_sanitize_link_stack_start) - stp x29, x30, [sp, #-16]! - .rept 16 - bl . + 4 - .endr - ldp x29, x30, [sp], #16 -ENTRY(__qcom_hyp_sanitize_link_stack_end) - .macro smccc_workaround_1 inst sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 9262ec57f5ab..103c53fb0b4d 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -87,8 +87,6 @@ atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM -extern char __qcom_hyp_sanitize_link_stack_start[]; -extern char __qcom_hyp_sanitize_link_stack_end[]; extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; extern char __smccc_workaround_1_hvc_start[]; @@ -132,8 +130,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, spin_unlock(&bp_lock); } #else -#define __qcom_hyp_sanitize_link_stack_start NULL -#define __qcom_hyp_sanitize_link_stack_end NULL #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL #define __smccc_workaround_1_hvc_start NULL @@ -178,12 +174,25 @@ static void call_hvc_arch_workaround_1(void) arm_smccc_1_1_hvc(ARM_SMCCC_ARCH_WORKAROUND_1, NULL); } +static void qcom_link_stack_sanitization(void) +{ + u64 tmp; + + asm volatile("mov %0, x30 \n" + ".rept 16 \n" + "bl . + 4 \n" + ".endr \n" + "mov x30, %0 \n" + : "=&r" (tmp)); +} + static void enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) { bp_hardening_cb_t cb; void *smccc_start, *smccc_end; struct arm_smccc_res res; + u32 midr = read_cpuid_id(); if (!entry->matches(entry, SCOPE_LOCAL_CPU)) return; @@ -216,30 +225,14 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) return; } + if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || + ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) + cb = qcom_link_stack_sanitization; + install_bp_hardening_cb(entry, cb, smccc_start, smccc_end); return; } - -static void qcom_link_stack_sanitization(void) -{ - u64 tmp; - - asm volatile("mov %0, x30 \n" - ".rept 16 \n" - "bl . + 4 \n" - ".endr \n" - "mov x30, %0 \n" - : "=&r" (tmp)); -} - -static void -qcom_enable_link_stack_sanitization(const struct arm64_cpu_capabilities *entry) -{ - install_bp_hardening_cb(entry, qcom_link_stack_sanitization, - __qcom_hyp_sanitize_link_stack_start, - __qcom_hyp_sanitize_link_stack_end); -} #endif /* CONFIG_HARDEN_BRANCH_PREDICTOR */ #define CAP_MIDR_RANGE(model, v_min, r_min, v_max, r_max) \ @@ -324,27 +317,11 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = { MIDR_ALL_VERSIONS(MIDR_CORTEX_A75), MIDR_ALL_VERSIONS(MIDR_BRCM_VULCAN), MIDR_ALL_VERSIONS(MIDR_CAVIUM_THUNDERX2), - {}, -}; - -static const struct midr_range qcom_bp_harden_cpus[] = { MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR_V1), MIDR_ALL_VERSIONS(MIDR_QCOM_FALKOR), {}, }; -static const struct arm64_cpu_capabilities arm64_bp_harden_list[] = { - { - CAP_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), - .cpu_enable = enable_smccc_arch_workaround_1, - }, - { - CAP_MIDR_RANGE_LIST(qcom_bp_harden_cpus), - .cpu_enable = qcom_enable_link_stack_sanitization, - }, - {}, -}; - #endif #ifndef ERRATA_MIDR_ALL_VERSIONS @@ -495,13 +472,8 @@ const struct arm64_cpu_capabilities arm64_errata[] = { { .capability = ARM64_HARDEN_BRANCH_PREDICTOR, .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, - .matches = multi_entry_cap_matches, - .cpu_enable = multi_entry_cap_cpu_enable, - .match_list = arm64_bp_harden_list, - }, - { - .capability = ARM64_HARDEN_BP_POST_GUEST_EXIT, - ERRATA_MIDR_RANGE_LIST(qcom_bp_harden_cpus), + .cpu_enable = enable_smccc_arch_workaround_1, + ERRATA_MIDR_RANGE_LIST(arm64_bp_harden_smccc_cpus), }, #endif #ifdef CONFIG_HARDEN_EL2_VECTORS diff --git a/arch/arm64/kvm/hyp/entry.S b/arch/arm64/kvm/hyp/entry.S index 1f458f7c3b44..e41a161d313a 100644 --- a/arch/arm64/kvm/hyp/entry.S +++ b/arch/arm64/kvm/hyp/entry.S @@ -209,15 +209,3 @@ alternative_endif eret ENDPROC(__fpsimd_guest_restore) - -ENTRY(__qcom_hyp_sanitize_btac_predictors) - /** - * Call SMC64 with Silicon provider serviceID 23<<8 (0xc2001700) - * 0xC2000000-0xC200FFFF: assigned to SiP Service Calls - * b15-b0: contains SiP functionID - */ - movz x0, #0x1700 - movk x0, #0xc200, lsl #16 - smc #0 - ret -ENDPROC(__qcom_hyp_sanitize_btac_predictors) diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index 07b572173265..d9645236e474 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -472,16 +472,6 @@ int __hyp_text __kvm_vcpu_run_nvhe(struct kvm_vcpu *vcpu) /* And we're baaack! */ } while (fixup_guest_exit(vcpu, &exit_code)); - if (cpus_have_const_cap(ARM64_HARDEN_BP_POST_GUEST_EXIT)) { - u32 midr = read_cpuid_id(); - - /* Apply BTAC predictors mitigation to all Falkor chips */ - if (((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR) || - ((midr & MIDR_CPU_MODEL_MASK) == MIDR_QCOM_FALKOR_V1)) { - __qcom_hyp_sanitize_btac_predictors(); - } - } - fp_enabled = __fpsimd_enabled_nvhe(); __sysreg_save_state_nvhe(guest_ctxt); From 8892b71885df7d6d7b0f491f9a8e2bb12fd4afdd Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Apr 2018 11:36:43 +0100 Subject: [PATCH 2/6] arm64: capabilities: Rework EL2 vector hardening entry Since 5e7951ce19ab ("arm64: capabilities: Clean up midr range helpers"), capabilities must be represented with a single entry. If multiple CPU types can use the same capability, then they need to be enumerated in a list. The EL2 hardening stuff (which affects both A57 and A72) managed to escape the conversion in the above patch thanks to the 4.17 merge window. Let's fix it now. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/cpu_errata.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 103c53fb0b4d..4e9f6a36d5e6 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -324,8 +324,14 @@ static const struct midr_range arm64_bp_harden_smccc_cpus[] = { #endif -#ifndef ERRATA_MIDR_ALL_VERSIONS -#define ERRATA_MIDR_ALL_VERSIONS(x) MIDR_ALL_VERSIONS(x) +#ifdef CONFIG_HARDEN_EL2_VECTORS + +static const struct midr_range arm64_harden_el2_vectors[] = { + MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), + MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + {}, +}; + #endif const struct arm64_cpu_capabilities arm64_errata[] = { @@ -478,14 +484,10 @@ const struct arm64_cpu_capabilities arm64_errata[] = { #endif #ifdef CONFIG_HARDEN_EL2_VECTORS { - .desc = "Cortex-A57 EL2 vector hardening", + .desc = "EL2 vector hardening", .capability = ARM64_HARDEN_EL2_VECTORS, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A57), - }, - { - .desc = "Cortex-A72 EL2 vector hardening", - .capability = ARM64_HARDEN_EL2_VECTORS, - ERRATA_MIDR_ALL_VERSIONS(MIDR_CORTEX_A72), + .type = ARM64_CPUCAP_LOCAL_CPU_ERRATUM, + ERRATA_MIDR_RANGE_LIST(arm64_harden_el2_vectors), }, #endif { From 22765f30dbaf1118c6ff0fcb8b99c9f2b4d396d5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Apr 2018 11:36:44 +0100 Subject: [PATCH 3/6] arm64: Get rid of __smccc_workaround_1_hvc_* The very existence of __smccc_workaround_1_hvc_* is a thinko, as KVM will never use a HVC call to perform the branch prediction invalidation. Even as a nested hypervisor, it would use an SMC instruction. Let's get rid of it. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/bpi.S | 12 ++---------- arch/arm64/kernel/cpu_errata.c | 9 +++------ 2 files changed, 5 insertions(+), 16 deletions(-) diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S index 9404f6aecda7..279e6ced7611 100644 --- a/arch/arm64/kernel/bpi.S +++ b/arch/arm64/kernel/bpi.S @@ -74,21 +74,13 @@ ENTRY(__bp_harden_hyp_vecs_end) .popsection -.macro smccc_workaround_1 inst +ENTRY(__smccc_workaround_1_smc_start) sub sp, sp, #(8 * 4) stp x2, x3, [sp, #(8 * 0)] stp x0, x1, [sp, #(8 * 2)] mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - \inst #0 + smc #0 ldp x2, x3, [sp, #(8 * 0)] ldp x0, x1, [sp, #(8 * 2)] add sp, sp, #(8 * 4) -.endm - -ENTRY(__smccc_workaround_1_smc_start) - smccc_workaround_1 smc ENTRY(__smccc_workaround_1_smc_end) - -ENTRY(__smccc_workaround_1_hvc_start) - smccc_workaround_1 hvc -ENTRY(__smccc_workaround_1_hvc_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 4e9f6a36d5e6..fc9d017f2d33 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -89,8 +89,6 @@ DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); #ifdef CONFIG_KVM extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; -extern char __smccc_workaround_1_hvc_start[]; -extern char __smccc_workaround_1_hvc_end[]; static void __copy_hyp_vect_bpi(int slot, const char *hyp_vecs_start, const char *hyp_vecs_end) @@ -132,8 +130,6 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, #else #define __smccc_workaround_1_smc_start NULL #define __smccc_workaround_1_smc_end NULL -#define __smccc_workaround_1_hvc_start NULL -#define __smccc_workaround_1_hvc_end NULL static void __install_bp_hardening_cb(bp_hardening_cb_t fn, const char *hyp_vecs_start, @@ -207,8 +203,9 @@ enable_smccc_arch_workaround_1(const struct arm64_cpu_capabilities *entry) if ((int)res.a0 < 0) return; cb = call_hvc_arch_workaround_1; - smccc_start = __smccc_workaround_1_hvc_start; - smccc_end = __smccc_workaround_1_hvc_end; + /* This is a guest, no need to patch KVM vectors */ + smccc_start = NULL; + smccc_end = NULL; break; case PSCI_CONDUIT_SMC: From e8b22d0f4500c7bb6aab879d4e32b2a00c89d5f8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 10 Apr 2018 11:36:45 +0100 Subject: [PATCH 4/6] arm64: Move the content of bpi.S to hyp-entry.S bpi.S was introduced as we were starting to build the Spectre v2 mitigation framework, and it was rather unclear that it would become strictly KVM specific. Now that the picture is a lot clearer, let's move the content of that file to hyp-entry.S, where it actually belong. Signed-off-by: Marc Zyngier Signed-off-by: Will Deacon --- arch/arm64/kernel/Makefile | 2 - arch/arm64/kernel/bpi.S | 86 ---------------------------------- arch/arm64/kernel/cpu_errata.c | 4 +- arch/arm64/kvm/hyp/hyp-entry.S | 64 ++++++++++++++++++++++++- 4 files changed, 65 insertions(+), 91 deletions(-) delete mode 100644 arch/arm64/kernel/bpi.S diff --git a/arch/arm64/kernel/Makefile b/arch/arm64/kernel/Makefile index 9b55a3f24be7..bf825f38d206 100644 --- a/arch/arm64/kernel/Makefile +++ b/arch/arm64/kernel/Makefile @@ -55,8 +55,6 @@ arm64-reloc-test-y := reloc_test_core.o reloc_test_syms.o arm64-obj-$(CONFIG_CRASH_DUMP) += crash_dump.o arm64-obj-$(CONFIG_ARM_SDE_INTERFACE) += sdei.o -arm64-obj-$(CONFIG_KVM_INDIRECT_VECTORS)+= bpi.o - obj-y += $(arm64-obj-y) vdso/ probes/ obj-m += $(arm64-obj-m) head-y := head.o diff --git a/arch/arm64/kernel/bpi.S b/arch/arm64/kernel/bpi.S deleted file mode 100644 index 279e6ced7611..000000000000 --- a/arch/arm64/kernel/bpi.S +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Contains CPU specific branch predictor invalidation sequences - * - * Copyright (C) 2018 ARM Ltd. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -#include -#include - -#include -#include - -.macro hyp_ventry - .align 7 -1: .rept 27 - nop - .endr -/* - * The default sequence is to directly branch to the KVM vectors, - * using the computed offset. This applies for VHE as well as - * !ARM64_HARDEN_EL2_VECTORS. - * - * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced - * with: - * - * stp x0, x1, [sp, #-16]! - * movz x0, #(addr & 0xffff) - * movk x0, #((addr >> 16) & 0xffff), lsl #16 - * movk x0, #((addr >> 32) & 0xffff), lsl #32 - * br x0 - * - * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4. - * See kvm_patch_vector_branch for details. - */ -alternative_cb kvm_patch_vector_branch - b __kvm_hyp_vector + (1b - 0b) - nop - nop - nop - nop -alternative_cb_end -.endm - -.macro generate_vectors -0: - .rept 16 - hyp_ventry - .endr - .org 0b + SZ_2K // Safety measure -.endm - - - .text - .pushsection .hyp.text, "ax" - - .align 11 -ENTRY(__bp_harden_hyp_vecs_start) - .rept BP_HARDEN_EL2_SLOTS - generate_vectors - .endr -ENTRY(__bp_harden_hyp_vecs_end) - - .popsection - -ENTRY(__smccc_workaround_1_smc_start) - sub sp, sp, #(8 * 4) - stp x2, x3, [sp, #(8 * 0)] - stp x0, x1, [sp, #(8 * 2)] - mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 - smc #0 - ldp x2, x3, [sp, #(8 * 0)] - ldp x0, x1, [sp, #(8 * 2)] - add sp, sp, #(8 * 4) -ENTRY(__smccc_workaround_1_smc_end) diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index fc9d017f2d33..a900befadfe8 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -86,7 +86,7 @@ atomic_t arm64_el2_vector_last_slot = ATOMIC_INIT(-1); DEFINE_PER_CPU_READ_MOSTLY(struct bp_hardening_data, bp_hardening_data); -#ifdef CONFIG_KVM +#ifdef CONFIG_KVM_INDIRECT_VECTORS extern char __smccc_workaround_1_smc_start[]; extern char __smccc_workaround_1_smc_end[]; @@ -137,7 +137,7 @@ static void __install_bp_hardening_cb(bp_hardening_cb_t fn, { __this_cpu_write(bp_hardening_data.fn, fn); } -#endif /* CONFIG_KVM */ +#endif /* CONFIG_KVM_INDIRECT_VECTORS */ static void install_bp_hardening_cb(const struct arm64_cpu_capabilities *entry, bp_hardening_cb_t fn, diff --git a/arch/arm64/kvm/hyp/hyp-entry.S b/arch/arm64/kvm/hyp/hyp-entry.S index 87dfecce82b1..bffece27b5c1 100644 --- a/arch/arm64/kvm/hyp/hyp-entry.S +++ b/arch/arm64/kvm/hyp/hyp-entry.S @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 - ARM Ltd + * Copyright (C) 2015-2018 - ARM Ltd * Author: Marc Zyngier * * This program is free software; you can redistribute it and/or modify @@ -24,6 +24,7 @@ #include #include #include +#include .text .pushsection .hyp.text, "ax" @@ -237,3 +238,64 @@ ENTRY(__kvm_hyp_vector) invalid_vect el1_fiq_invalid // FIQ 32-bit EL1 valid_vect el1_error // Error 32-bit EL1 ENDPROC(__kvm_hyp_vector) + +#ifdef CONFIG_KVM_INDIRECT_VECTORS +.macro hyp_ventry + .align 7 +1: .rept 27 + nop + .endr +/* + * The default sequence is to directly branch to the KVM vectors, + * using the computed offset. This applies for VHE as well as + * !ARM64_HARDEN_EL2_VECTORS. + * + * For ARM64_HARDEN_EL2_VECTORS configurations, this gets replaced + * with: + * + * stp x0, x1, [sp, #-16]! + * movz x0, #(addr & 0xffff) + * movk x0, #((addr >> 16) & 0xffff), lsl #16 + * movk x0, #((addr >> 32) & 0xffff), lsl #32 + * br x0 + * + * Where addr = kern_hyp_va(__kvm_hyp_vector) + vector-offset + 4. + * See kvm_patch_vector_branch for details. + */ +alternative_cb kvm_patch_vector_branch + b __kvm_hyp_vector + (1b - 0b) + nop + nop + nop + nop +alternative_cb_end +.endm + +.macro generate_vectors +0: + .rept 16 + hyp_ventry + .endr + .org 0b + SZ_2K // Safety measure +.endm + + .align 11 +ENTRY(__bp_harden_hyp_vecs_start) + .rept BP_HARDEN_EL2_SLOTS + generate_vectors + .endr +ENTRY(__bp_harden_hyp_vecs_end) + + .popsection + +ENTRY(__smccc_workaround_1_smc_start) + sub sp, sp, #(8 * 4) + stp x2, x3, [sp, #(8 * 0)] + stp x0, x1, [sp, #(8 * 2)] + mov w0, #ARM_SMCCC_ARCH_WORKAROUND_1 + smc #0 + ldp x2, x3, [sp, #(8 * 0)] + ldp x0, x1, [sp, #(8 * 2)] + add sp, sp, #(8 * 4) +ENTRY(__smccc_workaround_1_smc_end) +#endif From 0f468e221c3ae89d2fbe611a1a69ee554188519a Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Mar 2018 15:13:22 +0200 Subject: [PATCH 5/6] arm64: assembler: add utility macros to push/pop stack frames We are going to add code to all the NEON crypto routines that will turn them into non-leaf functions, so we need to manage the stack frames. To make this less tedious and error prone, add some macros that take the number of callee saved registers to preserve and the extra size to allocate in the stack frame (for locals) and emit the ldp/stp sequences. Signed-off-by: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 63 ++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index 053d83e8db6f..fe2ff3efe1f0 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -565,4 +565,67 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU #endif .endm + /* + * frame_push - Push @regcount callee saved registers to the stack, + * starting at x19, as well as x29/x30, and set x29 to + * the new value of sp. Add @extra bytes of stack space + * for locals. + */ + .macro frame_push, regcount:req, extra + __frame st, \regcount, \extra + .endm + + /* + * frame_pop - Pop the callee saved registers from the stack that were + * pushed in the most recent call to frame_push, as well + * as x29/x30 and any extra stack space that may have been + * allocated. + */ + .macro frame_pop + __frame ld + .endm + + .macro __frame_regs, reg1, reg2, op, num + .if .Lframe_regcount == \num + \op\()r \reg1, [sp, #(\num + 1) * 8] + .elseif .Lframe_regcount > \num + \op\()p \reg1, \reg2, [sp, #(\num + 1) * 8] + .endif + .endm + + .macro __frame, op, regcount, extra=0 + .ifc \op, st + .if (\regcount) < 0 || (\regcount) > 10 + .error "regcount should be in the range [0 ... 10]" + .endif + .if ((\extra) % 16) != 0 + .error "extra should be a multiple of 16 bytes" + .endif + .ifdef .Lframe_regcount + .if .Lframe_regcount != -1 + .error "frame_push/frame_pop may not be nested" + .endif + .endif + .set .Lframe_regcount, \regcount + .set .Lframe_extra, \extra + .set .Lframe_local_offset, ((\regcount + 3) / 2) * 16 + stp x29, x30, [sp, #-.Lframe_local_offset - .Lframe_extra]! + mov x29, sp + .endif + + __frame_regs x19, x20, \op, 1 + __frame_regs x21, x22, \op, 3 + __frame_regs x23, x24, \op, 5 + __frame_regs x25, x26, \op, 7 + __frame_regs x27, x28, \op, 9 + + .ifc \op, ld + .if .Lframe_regcount == -1 + .error "frame_push/frame_pop may not be nested" + .endif + ldp x29, x30, [sp], #.Lframe_local_offset + .Lframe_extra + .set .Lframe_regcount, -1 + .endif + .endm + #endif /* __ASM_ASSEMBLER_H */ From 24534b3511828c66215fdf1533d77a7bf2e1fdb2 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Thu, 29 Mar 2018 15:13:23 +0200 Subject: [PATCH 6/6] arm64: assembler: add macros to conditionally yield the NEON under PREEMPT Add support macros to conditionally yield the NEON (and thus the CPU) that may be called from the assembler code. In some cases, yielding the NEON involves saving and restoring a non trivial amount of context (especially in the CRC folding algorithms), and so the macro is split into three, and the code in between is only executed when the yield path is taken, allowing the context to be preserved. The third macro takes an optional label argument that marks the resume path after a yield has been performed. Signed-off-by: Ard Biesheuvel Reviewed-by: Dave Martin Signed-off-by: Will Deacon --- arch/arm64/include/asm/assembler.h | 73 ++++++++++++++++++++++++++++++ arch/arm64/kernel/asm-offsets.c | 3 ++ 2 files changed, 76 insertions(+) diff --git a/arch/arm64/include/asm/assembler.h b/arch/arm64/include/asm/assembler.h index fe2ff3efe1f0..0bcc98dbba56 100644 --- a/arch/arm64/include/asm/assembler.h +++ b/arch/arm64/include/asm/assembler.h @@ -628,4 +628,77 @@ USER(\label, ic ivau, \tmp2) // invalidate I line PoU .endif .endm +/* + * Check whether to yield to another runnable task from kernel mode NEON code + * (which runs with preemption disabled). + * + * if_will_cond_yield_neon + * // pre-yield patchup code + * do_cond_yield_neon + * // post-yield patchup code + * endif_yield_neon