From 40507403485fcb56b83d6ddfc954e9b08305054c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 14:41:26 +0000 Subject: [PATCH 01/14] arm64: vdso: prevent ld from aligning PT_LOAD segments to 64k Whilst the text segment for our VDSO is marked as PT_LOAD in the ELF headers, it is mapped by the kernel and not actually subject to demand-paging. ld doesn't realise this, and emits a p_align field of 64k (the maximum supported page size), which conflicts with the load address picked by the kernel on 4k systems, which will be 4k aligned. This causes GDB to fail with "Failed to read a valid object file image from memory" when attempting to load the VDSO. This patch passes the -n option to ld, which prevents it from aligning PT_LOAD segments to the maximum page size. Cc: Reported-by: Kyle McMartin Acked-by: Kyle McMartin Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/Makefile b/arch/arm64/kernel/vdso/Makefile index d8064af42e62..6d20b7d162d8 100644 --- a/arch/arm64/kernel/vdso/Makefile +++ b/arch/arm64/kernel/vdso/Makefile @@ -48,7 +48,7 @@ $(obj-vdso): %.o: %.S # Actual build commands quiet_cmd_vdsold = VDSOL $@ - cmd_vdsold = $(CC) $(c_flags) -Wl,-T $^ -o $@ + cmd_vdsold = $(CC) $(c_flags) -Wl,-n -Wl,-T $^ -o $@ quiet_cmd_vdsoas = VDSOA $@ cmd_vdsoas = $(CC) $(a_flags) -c -o $@ $< From 5044bad43ee573d0b6d90e3ccb7a40c2c7d25eb4 Mon Sep 17 00:00:00 2001 From: Vinayak Kale Date: Wed, 5 Feb 2014 09:34:36 +0000 Subject: [PATCH 02/14] arm64: add DSB after icache flush in __flush_icache_all() Add DSB after icache flush to complete the cache maintenance operation. The function __flush_icache_all() is used only for user space mappings and an ISB is not required because of an exception return before executing user instructions. An exception return would behave like an ISB. Signed-off-by: Vinayak Kale Acked-by: Will Deacon Cc: Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/cacheflush.h | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/include/asm/cacheflush.h b/arch/arm64/include/asm/cacheflush.h index fea9ee327206..889324981aa4 100644 --- a/arch/arm64/include/asm/cacheflush.h +++ b/arch/arm64/include/asm/cacheflush.h @@ -116,6 +116,7 @@ extern void flush_dcache_page(struct page *); static inline void __flush_icache_all(void) { asm("ic ialluis"); + dsb(); } #define flush_dcache_mmap_lock(mapping) \ From ccc9e244eb1b9654915634827322932cbafd8244 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 4 Feb 2014 23:08:57 +0000 Subject: [PATCH 03/14] arm64: Align CMA sizes to PAGE_SIZE dma_alloc_from_contiguous takes number of pages for a size. Align up the dma size passed in to page size to avoid truncation and allocation failures on sizes less than PAGE_SIZE. Cc: Will Deacon Signed-off-by: Laura Abbott Signed-off-by: Catalin Marinas --- arch/arm64/mm/dma-mapping.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 45b5ab54c9ee..fbd76785c5db 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -45,6 +45,7 @@ static void *arm64_swiotlb_alloc_coherent(struct device *dev, size_t size, if (IS_ENABLED(CONFIG_DMA_CMA)) { struct page *page; + size = PAGE_ALIGN(size); page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, get_order(size)); if (!page) From a55f9929a9b257f84b6cc7b2397379cabd744a22 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Tue, 4 Feb 2014 16:01:31 +0000 Subject: [PATCH 04/14] arm64: Invalidate the TLB when replacing pmd entries during boot With the 64K page size configuration, __create_page_tables in head.S maps enough memory to get started but using 64K pages rather than 512M sections with a single pgd/pud/pmd entry pointing to a pte table. create_mapping() may override the pgd/pud/pmd table entry with a block (section) one if the RAM size is more than 512MB and aligned correctly. For the end of this block to be accessible, the old TLB entry must be invalidated. Cc: Reported-by: Mark Salter Tested-by: Mark Salter Signed-off-by: Catalin Marinas --- arch/arm64/mm/mmu.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/arch/arm64/mm/mmu.c b/arch/arm64/mm/mmu.c index f557ebbe7013..f8dc7e8fce6f 100644 --- a/arch/arm64/mm/mmu.c +++ b/arch/arm64/mm/mmu.c @@ -203,10 +203,18 @@ static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, do { next = pmd_addr_end(addr, end); /* try section mapping first */ - if (((addr | next | phys) & ~SECTION_MASK) == 0) + if (((addr | next | phys) & ~SECTION_MASK) == 0) { + pmd_t old_pmd =*pmd; set_pmd(pmd, __pmd(phys | prot_sect_kernel)); - else + /* + * Check for previous table entries created during + * boot (__create_page_tables) and flush them. + */ + if (!pmd_none(old_pmd)) + flush_tlb_all(); + } else { alloc_init_pte(pmd, addr, next, __phys_to_pfn(phys)); + } phys += next - addr; } while (pmd++, addr = next, addr != end); } From bfb67a5606376bb32cb6f93dc05cda2e8c2038a5 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Feb 2014 10:24:12 +0000 Subject: [PATCH 05/14] arm64: fix typo: s/SERRROR/SERROR/ Somehow SERROR has acquired an additional 'R' in a couple of headers. This patch removes them before they spread further. As neither instance is in use yet, no other sites need to be fixed up. Signed-off-by: Mark Rutland Acked-by: Marc Zyngier Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/esr.h | 2 +- arch/arm64/include/asm/kvm_arm.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/include/asm/esr.h b/arch/arm64/include/asm/esr.h index 78834123a32e..c4a7f940b387 100644 --- a/arch/arm64/include/asm/esr.h +++ b/arch/arm64/include/asm/esr.h @@ -42,7 +42,7 @@ #define ESR_EL1_EC_SP_ALIGN (0x26) #define ESR_EL1_EC_FP_EXC32 (0x28) #define ESR_EL1_EC_FP_EXC64 (0x2C) -#define ESR_EL1_EC_SERRROR (0x2F) +#define ESR_EL1_EC_SERROR (0x2F) #define ESR_EL1_EC_BREAKPT_EL0 (0x30) #define ESR_EL1_EC_BREAKPT_EL1 (0x31) #define ESR_EL1_EC_SOFTSTP_EL0 (0x32) diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index c98ef4771c73..0eb398655378 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -231,7 +231,7 @@ #define ESR_EL2_EC_SP_ALIGN (0x26) #define ESR_EL2_EC_FP_EXC32 (0x28) #define ESR_EL2_EC_FP_EXC64 (0x2C) -#define ESR_EL2_EC_SERRROR (0x2F) +#define ESR_EL2_EC_SERROR (0x2F) #define ESR_EL2_EC_BREAKPT (0x30) #define ESR_EL2_EC_BREAKPT_HYP (0x31) #define ESR_EL2_EC_SOFTSTP (0x32) From 883d50a0ed403446437444a495356ce31e1197a3 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Wed, 5 Feb 2014 10:24:13 +0000 Subject: [PATCH 06/14] arm64: simplify pgd_alloc Currently pgd_alloc has a redundant NULL check in its return path that can be removed with no ill effects. With that removed it's also possible to return early and eliminate the new_pgd temporary variable. This patch applies said modifications, making the logic of pgd_alloc correspond 1-1 with that of pgd_free. Signed-off-by: Mark Rutland Cc: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/mm/pgd.c | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/arch/arm64/mm/pgd.c b/arch/arm64/mm/pgd.c index 7083cdada657..62c6101df260 100644 --- a/arch/arm64/mm/pgd.c +++ b/arch/arm64/mm/pgd.c @@ -32,17 +32,10 @@ pgd_t *pgd_alloc(struct mm_struct *mm) { - pgd_t *new_pgd; - if (PGD_SIZE == PAGE_SIZE) - new_pgd = (pgd_t *)get_zeroed_page(GFP_KERNEL); + return (pgd_t *)get_zeroed_page(GFP_KERNEL); else - new_pgd = kzalloc(PGD_SIZE, GFP_KERNEL); - - if (!new_pgd) - return NULL; - - return new_pgd; + return kzalloc(PGD_SIZE, GFP_KERNEL); } void pgd_free(struct mm_struct *mm, pgd_t *pgd) From 069b918623e1510e58dacf178905a72c3baa3ae4 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Wed, 5 Feb 2014 05:53:04 +0000 Subject: [PATCH 07/14] arm64: vdso: fix coarse clock handling When __kernel_clock_gettime is called with a CLOCK_MONOTONIC_COARSE or CLOCK_REALTIME_COARSE clock id, it returns incorrectly to whatever the caller has placed in x2 ("ret x2" to return from the fast path). Fix this by saving x30/LR to x2 only in code that will call __do_get_tspec, restoring x30 afterward, and using a plain "ret" to return from the routine. Also: while the resulting tv_nsec value for CLOCK_REALTIME and CLOCK_MONOTONIC must be computed using intermediate values that are left-shifted by cs_shift (x12, set by __do_get_tspec), the results for coarse clocks should be calculated using unshifted values (xtime_coarse_nsec is in units of actual nanoseconds). The current code shifts intermediate values by x12 unconditionally, but x12 is uninitialized when servicing a coarse clock. Fix this by setting x12 to 0 once we know we are dealing with a coarse clock id. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Cc: Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso/gettimeofday.S | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/kernel/vdso/gettimeofday.S b/arch/arm64/kernel/vdso/gettimeofday.S index f0a6d10b5211..fe652ffd34c2 100644 --- a/arch/arm64/kernel/vdso/gettimeofday.S +++ b/arch/arm64/kernel/vdso/gettimeofday.S @@ -103,6 +103,8 @@ ENTRY(__kernel_clock_gettime) bl __do_get_tspec seqcnt_check w9, 1b + mov x30, x2 + cmp w0, #CLOCK_MONOTONIC b.ne 6f @@ -118,6 +120,9 @@ ENTRY(__kernel_clock_gettime) ccmp w0, #CLOCK_MONOTONIC_COARSE, #0x4, ne b.ne 8f + /* xtime_coarse_nsec is already right-shifted */ + mov x12, #0 + /* Get coarse timespec. */ adr vdso_data, _vdso_data 3: seqcnt_acquire @@ -156,7 +161,7 @@ ENTRY(__kernel_clock_gettime) lsr x11, x11, x12 stp x10, x11, [x1, #TSPEC_TV_SEC] mov x0, xzr - ret x2 + ret 7: mov x30, x2 8: /* Syscall fallback. */ From d4022a335271a48cce49df35d825897914fbffe3 Mon Sep 17 00:00:00 2001 From: Nathan Lynch Date: Mon, 3 Feb 2014 19:48:52 +0000 Subject: [PATCH 08/14] arm64: vdso: update wtm fields for CLOCK_MONOTONIC_COARSE Update wall-to-monotonic fields in the VDSO data page unconditionally. These are used to service CLOCK_MONOTONIC_COARSE, which is not guarded by use_syscall. Signed-off-by: Nathan Lynch Acked-by: Will Deacon Cc: Signed-off-by: Catalin Marinas --- arch/arm64/kernel/vdso.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm64/kernel/vdso.c b/arch/arm64/kernel/vdso.c index 65d40cf6945a..a7149cae1615 100644 --- a/arch/arm64/kernel/vdso.c +++ b/arch/arm64/kernel/vdso.c @@ -238,6 +238,8 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->use_syscall = use_syscall; vdso_data->xtime_coarse_sec = xtime_coarse.tv_sec; vdso_data->xtime_coarse_nsec = xtime_coarse.tv_nsec; + vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; + vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; if (!use_syscall) { vdso_data->cs_cycle_last = tk->clock->cycle_last; @@ -245,8 +247,6 @@ void update_vsyscall(struct timekeeper *tk) vdso_data->xtime_clock_nsec = tk->xtime_nsec; vdso_data->cs_mult = tk->mult; vdso_data->cs_shift = tk->shift; - vdso_data->wtm_clock_sec = tk->wall_to_monotonic.tv_sec; - vdso_data->wtm_clock_nsec = tk->wall_to_monotonic.tv_nsec; } smp_wmb(); From 6290b53de025dd08a79257ee84173ef7b6d926f7 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Wed, 5 Feb 2014 12:03:52 +0000 Subject: [PATCH 09/14] arm64: compat: Wire up new AArch32 syscalls This patch enables sys_compat, sys_finit_module, sys_sched_setattr and sys_sched_getattr for compat (AArch32) applications. Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/unistd32.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 58125bf008d3..bb8eb8a78e67 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -399,7 +399,10 @@ __SYSCALL(374, compat_sys_sendmmsg) __SYSCALL(375, sys_setns) __SYSCALL(376, compat_sys_process_vm_readv) __SYSCALL(377, compat_sys_process_vm_writev) -__SYSCALL(378, sys_ni_syscall) /* 378 for kcmp */ +__SYSCALL(378, sys_kcmp) +__SYSCALL(379, sys_finit_module) +__SYSCALL(380, sys_sched_setattr) +__SYSCALL(381, sys_sched_getattr) #define __NR_compat_syscalls 379 From 530b099dfe8499d639e7fbcad28c4199e2a720c7 Mon Sep 17 00:00:00 2001 From: Colin Cross Date: Tue, 4 Feb 2014 02:15:32 +0000 Subject: [PATCH 10/14] security: select correct default LSM_MMAP_MIN_ADDR on arm on arm64 Binaries compiled for arm may run on arm64 if CONFIG_COMPAT is selected. Set LSM_MMAP_MIN_ADDR to 32768 if ARM64 && COMPAT to prevent selinux failures launching 32-bit static executables that are mapped at 0x8000. Signed-off-by: Colin Cross Acked-by: Will Deacon Acked-by: Eric Paris Acked-by: James Morris Signed-off-by: Catalin Marinas --- security/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/security/Kconfig b/security/Kconfig index e9c6ac724fef..beb86b500adf 100644 --- a/security/Kconfig +++ b/security/Kconfig @@ -103,7 +103,7 @@ config INTEL_TXT config LSM_MMAP_MIN_ADDR int "Low address space for LSM to protect from user allocation" depends on SECURITY && SECURITY_SELINUX - default 32768 if ARM + default 32768 if ARM || (ARM64 && COMPAT) default 65536 help This is the portion of low virtual memory which should be protected From 4a7ac12eedd190cdf071e61145defa73df1675c0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 6 Feb 2014 11:30:48 +0000 Subject: [PATCH 11/14] arm64: barriers: allow dsb macro to take option parameter The dsb instruction takes an option specifying both the target access types and shareability domain. This patch allows such an option to be passed to the dsb macro, resulting in potentially more efficient code. Currently the option is ignored until all callers are updated (unlike ARM, the option is mandated by the assembler). Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/barrier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm64/include/asm/barrier.h b/arch/arm64/include/asm/barrier.h index 78e20ba8806b..409ca370cfe2 100644 --- a/arch/arm64/include/asm/barrier.h +++ b/arch/arm64/include/asm/barrier.h @@ -25,7 +25,7 @@ #define wfi() asm volatile("wfi" : : : "memory") #define isb() asm volatile("isb" : : : "memory") -#define dsb() asm volatile("dsb sy" : : : "memory") +#define dsb(opt) asm volatile("dsb sy" : : : "memory") #define mb() dsb() #define rmb() asm volatile("dsb ld" : : : "memory") From 8e86f0b409a44193f1587e87b69c5dcf8f65be67 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 12:29:12 +0000 Subject: [PATCH 12/14] arm64: atomics: fix use of acquire + release for full barrier semantics Linux requires a number of atomic operations to provide full barrier semantics, that is no memory accesses after the operation can be observed before any accesses up to and including the operation in program order. On arm64, these operations have been incorrectly implemented as follows: // A, B, C are independent memory locations // atomic_op (B) 1: ldaxr x0, [B] // Exclusive load with acquire stlxr w1, x0, [B] // Exclusive store with release cbnz w1, 1b The assumption here being that two half barriers are equivalent to a full barrier, so the only permitted ordering would be A -> B -> C (where B is the atomic operation involving both a load and a store). Unfortunately, this is not the case by the letter of the architecture and, in fact, the accesses to A and C are permitted to pass their nearest half barrier resulting in orderings such as Bl -> A -> C -> Bs or Bl -> C -> A -> Bs (where Bl is the load-acquire on B and Bs is the store-release on B). This is a clear violation of the full barrier requirement. The simple way to fix this is to implement the same algorithm as ARMv7 using explicit barriers: // atomic_op (B) dmb ish // Full barrier 1: ldxr x0, [B] // Exclusive load stxr w1, x0, [B] // Exclusive store cbnz w1, 1b dmb ish // Full barrier but this has the undesirable effect of introducing *two* full barrier instructions. A better approach is actually the following, non-intuitive sequence: // atomic_op (B) 1: ldxr x0, [B] // Exclusive load stlxr w1, x0, [B] // Exclusive store with release cbnz w1, 1b dmb ish // Full barrier The simple observations here are: - The dmb ensures that no subsequent accesses (e.g. the access to C) can enter or pass the atomic sequence. - The dmb also ensures that no prior accesses (e.g. the access to A) can pass the atomic sequence. - Therefore, no prior access can pass a subsequent access, or vice-versa (i.e. A is strictly ordered before C). - The stlxr ensures that no prior access can pass the store component of the atomic operation. The only tricky part remaining is the ordering between the ldxr and the access to A, since the absence of the first dmb means that we're now permitting re-ordering between the ldxr and any prior accesses. From an (arbitrary) observer's point of view, there are two scenarios: 1. We have observed the ldxr. This means that if we perform a store to [B], the ldxr will still return older data. If we can observe the ldxr, then we can potentially observe the permitted re-ordering with the access to A, which is clearly an issue when compared to the dmb variant of the code. Thankfully, the exclusive monitor will save us here since it will be cleared as a result of the store and the ldxr will retry. Notice that any use of a later memory observation to imply observation of the ldxr will also imply observation of the access to A, since the stlxr/dmb ensure strict ordering. 2. We have not observed the ldxr. This means we can perform a store and influence the later ldxr. However, that doesn't actually tell us anything about the access to [A], so we've not lost anything here either when compared to the dmb variant. This patch implements this solution for our barriered atomic operations, ensuring that we satisfy the full barrier requirements where they are needed. Cc: Cc: Peter Zijlstra Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic.h | 29 ++++++++++++++++++++--------- arch/arm64/include/asm/cmpxchg.h | 9 +++++---- arch/arm64/include/asm/futex.h | 6 ++++-- arch/arm64/kernel/kuser32.S | 6 ++++-- arch/arm64/lib/bitops.S | 3 ++- 5 files changed, 35 insertions(+), 18 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index 01de5aaa3edc..e32893e005d4 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -64,7 +64,7 @@ static inline int atomic_add_return(int i, atomic_t *v) int result; asm volatile("// atomic_add_return\n" -"1: ldaxr %w0, %2\n" +"1: ldxr %w0, %2\n" " add %w0, %w0, %w3\n" " stlxr %w1, %w0, %2\n" " cbnz %w1, 1b" @@ -72,6 +72,7 @@ static inline int atomic_add_return(int i, atomic_t *v) : "Ir" (i) : "cc", "memory"); + smp_mb(); return result; } @@ -96,7 +97,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) int result; asm volatile("// atomic_sub_return\n" -"1: ldaxr %w0, %2\n" +"1: ldxr %w0, %2\n" " sub %w0, %w0, %w3\n" " stlxr %w1, %w0, %2\n" " cbnz %w1, 1b" @@ -104,6 +105,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) : "Ir" (i) : "cc", "memory"); + smp_mb(); return result; } @@ -112,17 +114,20 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) unsigned long tmp; int oldval; + smp_mb(); + asm volatile("// atomic_cmpxchg\n" -"1: ldaxr %w1, %2\n" +"1: ldxr %w1, %2\n" " cmp %w1, %w3\n" " b.ne 2f\n" -" stlxr %w0, %w4, %2\n" +" stxr %w0, %w4, %2\n" " cbnz %w0, 1b\n" "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) : "cc", "memory"); + smp_mb(); return oldval; } @@ -183,7 +188,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_add_return\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " add %0, %0, %3\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b" @@ -191,6 +196,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) : "Ir" (i) : "cc", "memory"); + smp_mb(); return result; } @@ -215,7 +221,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_sub_return\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " sub %0, %0, %3\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b" @@ -223,6 +229,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) : "Ir" (i) : "cc", "memory"); + smp_mb(); return result; } @@ -231,17 +238,20 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) long oldval; unsigned long res; + smp_mb(); + asm volatile("// atomic64_cmpxchg\n" -"1: ldaxr %1, %2\n" +"1: ldxr %1, %2\n" " cmp %1, %3\n" " b.ne 2f\n" -" stlxr %w0, %4, %2\n" +" stxr %w0, %4, %2\n" " cbnz %w0, 1b\n" "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) : "cc", "memory"); + smp_mb(); return oldval; } @@ -253,11 +263,12 @@ static inline long atomic64_dec_if_positive(atomic64_t *v) unsigned long tmp; asm volatile("// atomic64_dec_if_positive\n" -"1: ldaxr %0, %2\n" +"1: ldxr %0, %2\n" " subs %0, %0, #1\n" " b.mi 2f\n" " stlxr %w1, %0, %2\n" " cbnz %w1, 1b\n" +" dmb ish\n" "2:" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 56166d7f4a25..189390ce8653 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -29,7 +29,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size switch (size) { case 1: asm volatile("// __xchg1\n" - "1: ldaxrb %w0, %2\n" + "1: ldxrb %w0, %2\n" " stlxrb %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) @@ -38,7 +38,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 2: asm volatile("// __xchg2\n" - "1: ldaxrh %w0, %2\n" + "1: ldxrh %w0, %2\n" " stlxrh %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) @@ -47,7 +47,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 4: asm volatile("// __xchg4\n" - "1: ldaxr %w0, %2\n" + "1: ldxr %w0, %2\n" " stlxr %w1, %w3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) @@ -56,7 +56,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size break; case 8: asm volatile("// __xchg8\n" - "1: ldaxr %0, %2\n" + "1: ldxr %0, %2\n" " stlxr %w1, %3, %2\n" " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) @@ -67,6 +67,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size BUILD_BUG(); } + smp_mb(); return ret; } diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 78cc3aba5d69..572193d0005d 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -24,10 +24,11 @@ #define __futex_atomic_op(insn, ret, oldval, uaddr, tmp, oparg) \ asm volatile( \ -"1: ldaxr %w1, %2\n" \ +"1: ldxr %w1, %2\n" \ insn "\n" \ "2: stlxr %w3, %w0, %2\n" \ " cbnz %w3, 1b\n" \ +" dmb ish\n" \ "3:\n" \ " .pushsection .fixup,\"ax\"\n" \ " .align 2\n" \ @@ -111,11 +112,12 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, return -EFAULT; asm volatile("// futex_atomic_cmpxchg_inatomic\n" -"1: ldaxr %w1, %2\n" +"1: ldxr %w1, %2\n" " sub %w3, %w1, %w4\n" " cbnz %w3, 3f\n" "2: stlxr %w3, %w5, %2\n" " cbnz %w3, 1b\n" +" dmb ish\n" "3:\n" " .pushsection .fixup,\"ax\"\n" "4: mov %w0, %w6\n" diff --git a/arch/arm64/kernel/kuser32.S b/arch/arm64/kernel/kuser32.S index 63c48ffdf230..7787208e8cc6 100644 --- a/arch/arm64/kernel/kuser32.S +++ b/arch/arm64/kernel/kuser32.S @@ -38,12 +38,13 @@ __kuser_cmpxchg64: // 0xffff0f60 .inst 0xe92d00f0 // push {r4, r5, r6, r7} .inst 0xe1c040d0 // ldrd r4, r5, [r0] .inst 0xe1c160d0 // ldrd r6, r7, [r1] - .inst 0xe1b20e9f // 1: ldaexd r0, r1, [r2] + .inst 0xe1b20f9f // 1: ldrexd r0, r1, [r2] .inst 0xe0303004 // eors r3, r0, r4 .inst 0x00313005 // eoreqs r3, r1, r5 .inst 0x01a23e96 // stlexdeq r3, r6, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffff9 // beq 1b + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe8bd00f0 // pop {r4, r5, r6, r7} .inst 0xe12fff1e // bx lr @@ -55,11 +56,12 @@ __kuser_memory_barrier: // 0xffff0fa0 .align 5 __kuser_cmpxchg: // 0xffff0fc0 - .inst 0xe1923e9f // 1: ldaex r3, [r2] + .inst 0xe1923f9f // 1: ldrex r3, [r2] .inst 0xe0533000 // subs r3, r3, r0 .inst 0x01823e91 // stlexeq r3, r1, [r2] .inst 0x03330001 // teqeq r3, #1 .inst 0x0afffffa // beq 1b + .inst 0xf57ff05b // dmb ish .inst 0xe2730000 // rsbs r0, r3, #0 .inst 0xe12fff1e // bx lr diff --git a/arch/arm64/lib/bitops.S b/arch/arm64/lib/bitops.S index e5db797790d3..7dac371cc9a2 100644 --- a/arch/arm64/lib/bitops.S +++ b/arch/arm64/lib/bitops.S @@ -46,11 +46,12 @@ ENTRY( \name ) mov x2, #1 add x1, x1, x0, lsr #3 // Get word offset lsl x4, x2, x3 // Create mask -1: ldaxr x2, [x1] +1: ldxr x2, [x1] lsr x0, x2, x3 // Save old value of bit \instr x2, x2, x4 // toggle bit stlxr w5, x2, [x1] cbnz w5, 1b + dmb ish and x0, x0, #1 3: ret ENDPROC(\name ) From 95c4189689f92fba7ecf9097173404d4928c6e9b Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 4 Feb 2014 12:29:13 +0000 Subject: [PATCH 13/14] arm64: asm: remove redundant "cc" clobbers cbnz/tbnz don't update the condition flags, so remove the "cc" clobbers from inline asm blocks that only use these instructions to implement conditional branches. Signed-off-by: Will Deacon Signed-off-by: Catalin Marinas --- arch/arm64/include/asm/atomic.h | 24 ++++++++++-------------- arch/arm64/include/asm/cmpxchg.h | 8 ++++---- arch/arm64/include/asm/futex.h | 4 ++-- arch/arm64/include/asm/spinlock.h | 10 +++++----- 4 files changed, 21 insertions(+), 25 deletions(-) diff --git a/arch/arm64/include/asm/atomic.h b/arch/arm64/include/asm/atomic.h index e32893e005d4..0237f0867e37 100644 --- a/arch/arm64/include/asm/atomic.h +++ b/arch/arm64/include/asm/atomic.h @@ -54,8 +54,7 @@ static inline void atomic_add(int i, atomic_t *v) " stxr %w1, %w0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline int atomic_add_return(int i, atomic_t *v) @@ -70,7 +69,7 @@ static inline int atomic_add_return(int i, atomic_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); smp_mb(); return result; @@ -87,8 +86,7 @@ static inline void atomic_sub(int i, atomic_t *v) " stxr %w1, %w0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline int atomic_sub_return(int i, atomic_t *v) @@ -103,7 +101,7 @@ static inline int atomic_sub_return(int i, atomic_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); smp_mb(); return result; @@ -125,7 +123,7 @@ static inline int atomic_cmpxchg(atomic_t *ptr, int old, int new) "2:" : "=&r" (tmp), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) - : "cc", "memory"); + : "cc"); smp_mb(); return oldval; @@ -178,8 +176,7 @@ static inline void atomic64_add(u64 i, atomic64_t *v) " stxr %w1, %0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline long atomic64_add_return(long i, atomic64_t *v) @@ -194,7 +191,7 @@ static inline long atomic64_add_return(long i, atomic64_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); smp_mb(); return result; @@ -211,8 +208,7 @@ static inline void atomic64_sub(u64 i, atomic64_t *v) " stxr %w1, %0, %2\n" " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) - : "Ir" (i) - : "cc"); + : "Ir" (i)); } static inline long atomic64_sub_return(long i, atomic64_t *v) @@ -227,7 +223,7 @@ static inline long atomic64_sub_return(long i, atomic64_t *v) " cbnz %w1, 1b" : "=&r" (result), "=&r" (tmp), "+Q" (v->counter) : "Ir" (i) - : "cc", "memory"); + : "memory"); smp_mb(); return result; @@ -249,7 +245,7 @@ static inline long atomic64_cmpxchg(atomic64_t *ptr, long old, long new) "2:" : "=&r" (res), "=&r" (oldval), "+Q" (ptr->counter) : "Ir" (old), "r" (new) - : "cc", "memory"); + : "cc"); smp_mb(); return oldval; diff --git a/arch/arm64/include/asm/cmpxchg.h b/arch/arm64/include/asm/cmpxchg.h index 189390ce8653..57c0fa7bf711 100644 --- a/arch/arm64/include/asm/cmpxchg.h +++ b/arch/arm64/include/asm/cmpxchg.h @@ -34,7 +34,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u8 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 2: asm volatile("// __xchg2\n" @@ -43,7 +43,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u16 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 4: asm volatile("// __xchg4\n" @@ -52,7 +52,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u32 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; case 8: asm volatile("// __xchg8\n" @@ -61,7 +61,7 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr, int size " cbnz %w1, 1b\n" : "=&r" (ret), "=&r" (tmp), "+Q" (*(u64 *)ptr) : "r" (x) - : "cc", "memory"); + : "memory"); break; default: BUILD_BUG(); diff --git a/arch/arm64/include/asm/futex.h b/arch/arm64/include/asm/futex.h index 572193d0005d..5f750dc96e0f 100644 --- a/arch/arm64/include/asm/futex.h +++ b/arch/arm64/include/asm/futex.h @@ -41,7 +41,7 @@ " .popsection\n" \ : "=&r" (ret), "=&r" (oldval), "+Q" (*uaddr), "=&r" (tmp) \ : "r" (oparg), "Ir" (-EFAULT) \ - : "cc", "memory") + : "memory") static inline int futex_atomic_op_inuser (int encoded_op, u32 __user *uaddr) @@ -129,7 +129,7 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, " .popsection\n" : "+r" (ret), "=&r" (val), "+Q" (*uaddr), "=&r" (tmp) : "r" (oldval), "r" (newval), "Ir" (-EFAULT) - : "cc", "memory"); + : "memory"); *uval = val; return ret; diff --git a/arch/arm64/include/asm/spinlock.h b/arch/arm64/include/asm/spinlock.h index 3d5cf064d7a1..c45b7b1b7197 100644 --- a/arch/arm64/include/asm/spinlock.h +++ b/arch/arm64/include/asm/spinlock.h @@ -132,7 +132,7 @@ static inline void arch_write_lock(arch_rwlock_t *rw) " cbnz %w0, 2b\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) - : "cc", "memory"); + : "memory"); } static inline int arch_write_trylock(arch_rwlock_t *rw) @@ -146,7 +146,7 @@ static inline int arch_write_trylock(arch_rwlock_t *rw) "1:\n" : "=&r" (tmp), "+Q" (rw->lock) : "r" (0x80000000) - : "cc", "memory"); + : "memory"); return !tmp; } @@ -187,7 +187,7 @@ static inline void arch_read_lock(arch_rwlock_t *rw) " cbnz %w1, 2b\n" : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); } static inline void arch_read_unlock(arch_rwlock_t *rw) @@ -201,7 +201,7 @@ static inline void arch_read_unlock(arch_rwlock_t *rw) " cbnz %w1, 1b\n" : "=&r" (tmp), "=&r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); } static inline int arch_read_trylock(arch_rwlock_t *rw) @@ -216,7 +216,7 @@ static inline int arch_read_trylock(arch_rwlock_t *rw) "1:\n" : "=&r" (tmp), "+r" (tmp2), "+Q" (rw->lock) : - : "cc", "memory"); + : "memory"); return !tmp2; } From 55834a773fe343912b705bef8114ec93fd337188 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 7 Feb 2014 17:12:45 +0000 Subject: [PATCH 14/14] arm64: defconfig: Expand default enabled features FPGA implementations of the Cortex-A57 and Cortex-A53 are now available in the form of the SMM-A57 and SMM-A53 Soft Macrocell Models (SMMs) for Versatile Express. As these attach to a Motherboard Express V2M-P1 it would be useful to have support for some V2M-P1 peripherals enabled by default. Additionally a couple of of features have been introduced since the last defconfig update (CMA, jump labels) that would be good to have enabled by default to ensure they are build and boot tested. This patch updates the arm64 defconfig to enable support for these devices and features. The arm64 Kconfig is modified to select HAVE_PATA_PLATFORM, which is required to enable support for the CompactFlash controller on the V2M-P1. A few options which don't need to appear in defconfig are trimmed: * BLK_DEV - selected by default * EXPERIMENTAL - otherwise gone from the kernel * MII - selected by drivers which require it * USB_SUPPORT - selected by default Signed-off-by: Mark Rutland Signed-off-by: Catalin Marinas --- arch/arm64/Kconfig | 1 + arch/arm64/configs/defconfig | 18 ++++++++++++++---- 2 files changed, 15 insertions(+), 4 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index dd4327f09ba4..27bbcfc7202a 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -36,6 +36,7 @@ config ARM64 select HAVE_GENERIC_DMA_COHERENT select HAVE_HW_BREAKPOINT if PERF_EVENTS select HAVE_MEMBLOCK + select HAVE_PATA_PLATFORM select HAVE_PERF_EVENTS select IRQ_DOMAIN select MODULES_USE_ELF_RELA diff --git a/arch/arm64/configs/defconfig b/arch/arm64/configs/defconfig index 84139be62ae6..7959dd0ca5d5 100644 --- a/arch/arm64/configs/defconfig +++ b/arch/arm64/configs/defconfig @@ -1,4 +1,3 @@ -CONFIG_EXPERIMENTAL=y # CONFIG_LOCALVERSION_AUTO is not set # CONFIG_SWAP is not set CONFIG_SYSVIPC=y @@ -19,6 +18,7 @@ CONFIG_BLK_DEV_INITRD=y CONFIG_KALLSYMS_ALL=y # CONFIG_COMPAT_BRK is not set CONFIG_PROFILING=y +CONFIG_JUMP_LABEL=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y # CONFIG_BLK_DEV_BSG is not set @@ -27,6 +27,7 @@ CONFIG_ARCH_VEXPRESS=y CONFIG_ARCH_XGENE=y CONFIG_SMP=y CONFIG_PREEMPT=y +CONFIG_CMA=y CONFIG_CMDLINE="console=ttyAMA0" # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set CONFIG_COMPAT=y @@ -42,14 +43,17 @@ CONFIG_IP_PNP_BOOTP=y # CONFIG_WIRELESS is not set CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug" CONFIG_DEVTMPFS=y -CONFIG_BLK_DEV=y +CONFIG_DMA_CMA=y CONFIG_SCSI=y # CONFIG_SCSI_PROC_FS is not set CONFIG_BLK_DEV_SD=y # CONFIG_SCSI_LOWLEVEL is not set +CONFIG_ATA=y +CONFIG_PATA_PLATFORM=y +CONFIG_PATA_OF_PLATFORM=y CONFIG_NETDEVICES=y -CONFIG_MII=y CONFIG_SMC91X=y +CONFIG_SMSC911X=y # CONFIG_WLAN is not set CONFIG_INPUT_EVDEV=y # CONFIG_SERIO_I8042 is not set @@ -62,13 +66,19 @@ CONFIG_SERIAL_AMBA_PL011=y CONFIG_SERIAL_AMBA_PL011_CONSOLE=y # CONFIG_HW_RANDOM is not set # CONFIG_HWMON is not set +CONFIG_REGULATOR=y +CONFIG_REGULATOR_FIXED_VOLTAGE=y CONFIG_FB=y # CONFIG_VGA_CONSOLE is not set CONFIG_FRAMEBUFFER_CONSOLE=y CONFIG_LOGO=y # CONFIG_LOGO_LINUX_MONO is not set # CONFIG_LOGO_LINUX_VGA16 is not set -# CONFIG_USB_SUPPORT is not set +CONFIG_USB=y +CONFIG_USB_ISP1760_HCD=y +CONFIG_USB_STORAGE=y +CONFIG_MMC=y +CONFIG_MMC_ARMMMCI=y # CONFIG_IOMMU_SUPPORT is not set CONFIG_EXT2_FS=y CONFIG_EXT3_FS=y