1
0
Fork 0

Merge branch 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm

Pull ARM updates from Russell King:
 "Included in this update:

   - perf updates from Will Deacon:

     The main changes are callchain stability fixes from Jean Pihet and
     event mapping and PMU name rework from Mark Rutland

     The latter is preparatory work for enabling some code re-use with
     arm64 in the future.

   - updates for nommu from Uwe Kleine-König:

     Two different fixes for the same problem making some ARM nommu
     configurations not boot since 3.6-rc1.  The problem is that
     user_addr_max returned the biggest available RAM address which
     makes some copy_from_user variants fail to read from XIP memory.

   - deprecate legacy OMAP DMA API, in preparation for it's removal.

     The popular drivers have been converted over, leaving a very small
     number of rarely used drivers, which hopefully can be converted
     during the next cycle with a bit more visibility (and hopefully
     people popping out of the woodwork to help test)

   - more tweaks for BE systems, particularly with the kernel image
     format.  In connection with this, I've cleaned up the way we
     generate the linker script for the decompressor.

   - removal of hard-coded assumptions of the kernel stack size, making
     everywhere depend on the value of THREAD_SIZE_ORDER.

   - MCPM updates from Nicolas Pitre.

   - Make it easier for proper CPU part number checks (which should
     always include the vendor field).

   - Assembly code optimisation - use the "bx" instruction when
     returning from a function on ARMv6+ rather than "mov pc, reg".

   - Save the last kernel misaligned fault location and report it via
     the procfs alignment file.

   - Clean up the way we create the initial stack frame, which is a
     repeated pattern in several different locations.

   - Support for 8-byte get_user(), needed for some DRM implementations.

   - mcs locking from Will Deacon.

   - Save and restore a few more Cortex-A9 registers (for errata
     workarounds)

   - Fix various aspects of the SWP emulation, and the ELF hwcap for the
     SWP instruction.

   - Update LPAE logic for pte_write and pmd_write to make it more
     correct.

   - Support for Broadcom Brahma15 CPU cores.

   - ARM assembly crypto updates from Ard Biesheuvel"

* 'for-linus' of git://ftp.arm.linux.org.uk/~rmk/linux-arm: (53 commits)
  ARM: add comments to the early page table remap code
  ARM: 8122/1: smp_scu: enable SCU standby support
  ARM: 8121/1: smp_scu: use macro for SCU enable bit
  ARM: 8120/1: crypto: sha512: add ARM NEON implementation
  ARM: 8119/1: crypto: sha1: add ARM NEON implementation
  ARM: 8118/1: crypto: sha1/make use of common SHA-1 structures
  ARM: 8113/1: remove remaining definitions of PLAT_PHYS_OFFSET from <mach/memory.h>
  ARM: 8111/1: Enable erratum 798181 for Broadcom Brahma-B15
  ARM: 8110/1: do CPU-specific init for Broadcom Brahma15 cores
  ARM: 8109/1: mm: Modify pte_write and pmd_write logic for LPAE
  ARM: 8108/1: mm: Introduce {pte,pmd}_isset and {pte,pmd}_isclear
  ARM: hwcap: disable HWCAP_SWP if the CPU advertises it has exclusives
  ARM: SWP emulation: only initialise on ARMv7 CPUs
  ARM: SWP emulation: always enable when SMP is enabled
  ARM: 8103/1: save/restore Cortex-A9 CP15 registers on suspend/resume
  ARM: 8098/1: mcs lock: implement wfe-based polling for MCS locking
  ARM: 8091/2: add get_user() support for 8 byte types
  ARM: 8097/1: unistd.h: relocate comments back to place
  ARM: 8096/1: Describe required sort order for textofs-y (TEXT_OFFSET)
  ARM: 8090/1: add revision info for PL310 errata 588369 and 727915
  ...
hifive-unleashed-5.1
Linus Torvalds 2014-08-05 10:05:29 -07:00
commit c489d98c8c
177 changed files with 3213 additions and 2006 deletions

View File

@ -263,8 +263,22 @@ config NEED_MACH_MEMORY_H
config PHYS_OFFSET
hex "Physical address of main memory" if MMU
depends on !ARM_PATCH_PHYS_VIRT && !NEED_MACH_MEMORY_H
depends on !ARM_PATCH_PHYS_VIRT
default DRAM_BASE if !MMU
default 0x00000000 if ARCH_EBSA110 || \
EP93XX_SDCE3_SYNC_PHYS_OFFSET || \
ARCH_FOOTBRIDGE || \
ARCH_INTEGRATOR || \
ARCH_IOP13XX || \
ARCH_KS8695 || \
(ARCH_REALVIEW && !REALVIEW_HIGH_PHYS_OFFSET)
default 0x10000000 if ARCH_OMAP1 || ARCH_RPC
default 0x20000000 if ARCH_S5PV210
default 0x70000000 if REALVIEW_HIGH_PHYS_OFFSET
default 0xc0000000 if EP93XX_SDCE0_PHYS_OFFSET || ARCH_SA1100
default 0xd0000000 if EP93XX_SDCE1_PHYS_OFFSET
default 0xe0000000 if EP93XX_SDCE2_PHYS_OFFSET
default 0xf0000000 if EP93XX_SDCE3_ASYNC_PHYS_OFFSET
help
Please provide the physical address corresponding to the
location of main memory in your system.
@ -436,7 +450,6 @@ config ARCH_EP93XX
select ARM_VIC
select CLKDEV_LOOKUP
select CPU_ARM920T
select NEED_MACH_MEMORY_H
help
This enables support for the Cirrus EP93xx series of CPUs.

View File

@ -127,6 +127,9 @@ CHECKFLAGS += -D__arm__
#Default value
head-y := arch/arm/kernel/head$(MMUEXT).o
# Text offset. This list is sorted numerically by address in order to
# provide a means to avoid/resolve conflicts in multi-arch kernels.
textofs-y := 0x00008000
textofs-$(CONFIG_ARCH_CLPS711X) := 0x00028000
# We don't want the htc bootloader to corrupt kernel during resume

View File

@ -81,7 +81,7 @@ ZTEXTADDR := 0
ZBSSADDR := ALIGN(8)
endif
SEDFLAGS = s/TEXT_START/$(ZTEXTADDR)/;s/BSS_START/$(ZBSSADDR)/
CPPFLAGS_vmlinux.lds := -DTEXT_START="$(ZTEXTADDR)" -DBSS_START="$(ZBSSADDR)"
suffix_$(CONFIG_KERNEL_GZIP) = gzip
suffix_$(CONFIG_KERNEL_LZO) = lzo
@ -199,8 +199,5 @@ CFLAGS_font.o := -Dstatic=
$(obj)/font.c: $(FONTC)
$(call cmd,shipped)
$(obj)/vmlinux.lds: $(obj)/vmlinux.lds.in arch/arm/boot/Makefile $(KCONFIG_CONFIG)
@sed "$(SEDFLAGS)" < $< > $@
$(obj)/hyp-stub.S: $(srctree)/arch/$(SRCARCH)/kernel/hyp-stub.S
$(call cmd,shipped)

View File

@ -125,9 +125,11 @@ start:
THUMB( adr r12, BSYM(1f) )
THUMB( bx r12 )
.word 0x016f2818 @ Magic numbers to help the loader
.word start @ absolute load/run zImage address
.word _edata @ zImage end address
.word _magic_sig @ Magic numbers to help the loader
.word _magic_start @ absolute load/run zImage address
.word _magic_end @ zImage end address
.word 0x04030201 @ endianness flag
THUMB( .thumb )
1:
ARM_BE8( setend be ) @ go BE8 if compiled for BE8

View File

@ -1,12 +1,20 @@
/*
* linux/arch/arm/boot/compressed/vmlinux.lds.in
*
* Copyright (C) 2000 Russell King
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#ifdef CONFIG_CPU_ENDIAN_BE8
#define ZIMAGE_MAGIC(x) ( (((x) >> 24) & 0x000000ff) | \
(((x) >> 8) & 0x0000ff00) | \
(((x) << 8) & 0x00ff0000) | \
(((x) << 24) & 0xff000000) )
#else
#define ZIMAGE_MAGIC(x) (x)
#endif
OUTPUT_ARCH(arm)
ENTRY(_start)
SECTIONS
@ -57,6 +65,10 @@ SECTIONS
.pad : { BYTE(0); . = ALIGN(8); }
_edata = .;
_magic_sig = ZIMAGE_MAGIC(0x016f2818);
_magic_start = ZIMAGE_MAGIC(_start);
_magic_end = ZIMAGE_MAGIC(_edata);
. = BSS_START;
__bss_start = .;
.bss : { *(.bss) }
@ -73,4 +85,3 @@ SECTIONS
.stab.indexstr 0 : { *(.stab.indexstr) }
.comment 0 : { *(.comment) }
}

View File

@ -12,11 +12,13 @@
#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/irqflags.h>
#include <linux/cpu_pm.h>
#include <asm/mcpm.h>
#include <asm/cacheflush.h>
#include <asm/idmap.h>
#include <asm/cputype.h>
#include <asm/suspend.h>
extern unsigned long mcpm_entry_vectors[MAX_NR_CLUSTERS][MAX_CPUS_PER_CLUSTER];
@ -146,6 +148,56 @@ int mcpm_cpu_powered_up(void)
return 0;
}
#ifdef CONFIG_ARM_CPU_SUSPEND
static int __init nocache_trampoline(unsigned long _arg)
{
void (*cache_disable)(void) = (void *)_arg;
unsigned int mpidr = read_cpuid_mpidr();
unsigned int cpu = MPIDR_AFFINITY_LEVEL(mpidr, 0);
unsigned int cluster = MPIDR_AFFINITY_LEVEL(mpidr, 1);
phys_reset_t phys_reset;
mcpm_set_entry_vector(cpu, cluster, cpu_resume);
setup_mm_for_reboot();
__mcpm_cpu_going_down(cpu, cluster);
BUG_ON(!__mcpm_outbound_enter_critical(cpu, cluster));
cache_disable();
__mcpm_outbound_leave_critical(cluster, CLUSTER_DOWN);
__mcpm_cpu_down(cpu, cluster);
phys_reset = (phys_reset_t)(unsigned long)virt_to_phys(cpu_reset);
phys_reset(virt_to_phys(mcpm_entry_point));
BUG();
}
int __init mcpm_loopback(void (*cache_disable)(void))
{
int ret;
/*
* We're going to soft-restart the current CPU through the
* low-level MCPM code by leveraging the suspend/resume
* infrastructure. Let's play it safe by using cpu_pm_enter()
* in case the CPU init code path resets the VFP or similar.
*/
local_irq_disable();
local_fiq_disable();
ret = cpu_pm_enter();
if (!ret) {
ret = cpu_suspend((unsigned long)cache_disable, nocache_trampoline);
cpu_pm_exit();
}
local_fiq_enable();
local_irq_enable();
if (ret)
pr_err("%s returned %d\n", __func__, ret);
return ret;
}
#endif
struct sync_struct mcpm_sync;
/*

View File

@ -5,10 +5,14 @@
obj-$(CONFIG_CRYPTO_AES_ARM) += aes-arm.o
obj-$(CONFIG_CRYPTO_AES_ARM_BS) += aes-arm-bs.o
obj-$(CONFIG_CRYPTO_SHA1_ARM) += sha1-arm.o
obj-$(CONFIG_CRYPTO_SHA1_ARM_NEON) += sha1-arm-neon.o
obj-$(CONFIG_CRYPTO_SHA512_ARM_NEON) += sha512-arm-neon.o
aes-arm-y := aes-armv4.o aes_glue.o
aes-arm-bs-y := aesbs-core.o aesbs-glue.o
sha1-arm-y := sha1-armv4-large.o sha1_glue.o
sha1-arm-neon-y := sha1-armv7-neon.o sha1_neon_glue.o
sha512-arm-neon-y := sha512-armv7-neon.o sha512_neon_glue.o
quiet_cmd_perl = PERL $@
cmd_perl = $(PERL) $(<) > $(@)

View File

@ -35,6 +35,7 @@
@ that is being targetted.
#include <linux/linkage.h>
#include <asm/assembler.h>
.text
@ -648,7 +649,7 @@ _armv4_AES_set_encrypt_key:
.Ldone: mov r0,#0
ldmia sp!,{r4-r12,lr}
.Labrt: mov pc,lr
.Labrt: ret lr
ENDPROC(private_AES_set_encrypt_key)
.align 5

View File

@ -0,0 +1,634 @@
/* sha1-armv7-neon.S - ARM/NEON accelerated SHA-1 transform function
*
* Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <linux/linkage.h>
.syntax unified
.code 32
.fpu neon
.text
/* Context structure */
#define state_h0 0
#define state_h1 4
#define state_h2 8
#define state_h3 12
#define state_h4 16
/* Constants */
#define K1 0x5A827999
#define K2 0x6ED9EBA1
#define K3 0x8F1BBCDC
#define K4 0xCA62C1D6
.align 4
.LK_VEC:
.LK1: .long K1, K1, K1, K1
.LK2: .long K2, K2, K2, K2
.LK3: .long K3, K3, K3, K3
.LK4: .long K4, K4, K4, K4
/* Register macros */
#define RSTATE r0
#define RDATA r1
#define RNBLKS r2
#define ROLDSTACK r3
#define RWK lr
#define _a r4
#define _b r5
#define _c r6
#define _d r7
#define _e r8
#define RT0 r9
#define RT1 r10
#define RT2 r11
#define RT3 r12
#define W0 q0
#define W1 q1
#define W2 q2
#define W3 q3
#define W4 q4
#define W5 q5
#define W6 q6
#define W7 q7
#define tmp0 q8
#define tmp1 q9
#define tmp2 q10
#define tmp3 q11
#define qK1 q12
#define qK2 q13
#define qK3 q14
#define qK4 q15
/* Round function macros. */
#define WK_offs(i) (((i) & 15) * 4)
#define _R_F1(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
ldr RT3, [sp, WK_offs(i)]; \
pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
bic RT0, d, b; \
add e, e, a, ror #(32 - 5); \
and RT1, c, b; \
pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
add RT0, RT0, RT3; \
add e, e, RT1; \
ror b, #(32 - 30); \
pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
add e, e, RT0;
#define _R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
ldr RT3, [sp, WK_offs(i)]; \
pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
eor RT0, d, b; \
add e, e, a, ror #(32 - 5); \
eor RT0, RT0, c; \
pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
add e, e, RT3; \
ror b, #(32 - 30); \
pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
add e, e, RT0; \
#define _R_F3(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
ldr RT3, [sp, WK_offs(i)]; \
pre1(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
eor RT0, b, c; \
and RT1, b, c; \
add e, e, a, ror #(32 - 5); \
pre2(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
and RT0, RT0, d; \
add RT1, RT1, RT3; \
add e, e, RT0; \
ror b, #(32 - 30); \
pre3(i16,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28); \
add e, e, RT1;
#define _R_F4(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
_R_F2(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
#define _R(a,b,c,d,e,f,i,pre1,pre2,pre3,i16,\
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
_R_##f(a,b,c,d,e,i,pre1,pre2,pre3,i16,\
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
#define R(a,b,c,d,e,f,i) \
_R_##f(a,b,c,d,e,i,dummy,dummy,dummy,i16,\
W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28)
#define dummy(...)
/* Input expansion macros. */
/********* Precalc macros for rounds 0-15 *************************************/
#define W_PRECALC_00_15() \
add RWK, sp, #(WK_offs(0)); \
\
vld1.32 {tmp0, tmp1}, [RDATA]!; \
vrev32.8 W0, tmp0; /* big => little */ \
vld1.32 {tmp2, tmp3}, [RDATA]!; \
vadd.u32 tmp0, W0, curK; \
vrev32.8 W7, tmp1; /* big => little */ \
vrev32.8 W6, tmp2; /* big => little */ \
vadd.u32 tmp1, W7, curK; \
vrev32.8 W5, tmp3; /* big => little */ \
vadd.u32 tmp2, W6, curK; \
vst1.32 {tmp0, tmp1}, [RWK]!; \
vadd.u32 tmp3, W5, curK; \
vst1.32 {tmp2, tmp3}, [RWK]; \
#define WPRECALC_00_15_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vld1.32 {tmp0, tmp1}, [RDATA]!; \
#define WPRECALC_00_15_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
add RWK, sp, #(WK_offs(0)); \
#define WPRECALC_00_15_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vrev32.8 W0, tmp0; /* big => little */ \
#define WPRECALC_00_15_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vld1.32 {tmp2, tmp3}, [RDATA]!; \
#define WPRECALC_00_15_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp0, W0, curK; \
#define WPRECALC_00_15_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vrev32.8 W7, tmp1; /* big => little */ \
#define WPRECALC_00_15_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vrev32.8 W6, tmp2; /* big => little */ \
#define WPRECALC_00_15_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp1, W7, curK; \
#define WPRECALC_00_15_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vrev32.8 W5, tmp3; /* big => little */ \
#define WPRECALC_00_15_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp2, W6, curK; \
#define WPRECALC_00_15_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vst1.32 {tmp0, tmp1}, [RWK]!; \
#define WPRECALC_00_15_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp3, W5, curK; \
#define WPRECALC_00_15_12(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vst1.32 {tmp2, tmp3}, [RWK]; \
/********* Precalc macros for rounds 16-31 ************************************/
#define WPRECALC_16_31_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
veor tmp0, tmp0; \
vext.8 W, W_m16, W_m12, #8; \
#define WPRECALC_16_31_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
add RWK, sp, #(WK_offs(i)); \
vext.8 tmp0, W_m04, tmp0, #4; \
#define WPRECALC_16_31_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
veor tmp0, tmp0, W_m16; \
veor.32 W, W, W_m08; \
#define WPRECALC_16_31_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
veor tmp1, tmp1; \
veor W, W, tmp0; \
#define WPRECALC_16_31_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vshl.u32 tmp0, W, #1; \
#define WPRECALC_16_31_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vext.8 tmp1, tmp1, W, #(16-12); \
vshr.u32 W, W, #31; \
#define WPRECALC_16_31_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vorr tmp0, tmp0, W; \
vshr.u32 W, tmp1, #30; \
#define WPRECALC_16_31_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vshl.u32 tmp1, tmp1, #2; \
#define WPRECALC_16_31_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
veor tmp0, tmp0, W; \
#define WPRECALC_16_31_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
veor W, tmp0, tmp1; \
#define WPRECALC_16_31_10(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp0, W, curK; \
#define WPRECALC_16_31_11(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vst1.32 {tmp0}, [RWK];
/********* Precalc macros for rounds 32-79 ************************************/
#define WPRECALC_32_79_0(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
veor W, W_m28; \
#define WPRECALC_32_79_1(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vext.8 tmp0, W_m08, W_m04, #8; \
#define WPRECALC_32_79_2(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
veor W, W_m16; \
#define WPRECALC_32_79_3(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
veor W, tmp0; \
#define WPRECALC_32_79_4(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
add RWK, sp, #(WK_offs(i&~3)); \
#define WPRECALC_32_79_5(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vshl.u32 tmp1, W, #2; \
#define WPRECALC_32_79_6(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vshr.u32 tmp0, W, #30; \
#define WPRECALC_32_79_7(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vorr W, tmp0, tmp1; \
#define WPRECALC_32_79_8(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vadd.u32 tmp0, W, curK; \
#define WPRECALC_32_79_9(i,W,W_m04,W_m08,W_m12,W_m16,W_m20,W_m24,W_m28) \
vst1.32 {tmp0}, [RWK];
/*
* Transform nblks*64 bytes (nblks*16 32-bit words) at DATA.
*
* unsigned int
* sha1_transform_neon (void *ctx, const unsigned char *data,
* unsigned int nblks)
*/
.align 3
ENTRY(sha1_transform_neon)
/* input:
* r0: ctx, CTX
* r1: data (64*nblks bytes)
* r2: nblks
*/
cmp RNBLKS, #0;
beq .Ldo_nothing;
push {r4-r12, lr};
/*vpush {q4-q7};*/
adr RT3, .LK_VEC;
mov ROLDSTACK, sp;
/* Align stack. */
sub RT0, sp, #(16*4);
and RT0, #(~(16-1));
mov sp, RT0;
vld1.32 {qK1-qK2}, [RT3]!; /* Load K1,K2 */
/* Get the values of the chaining variables. */
ldm RSTATE, {_a-_e};
vld1.32 {qK3-qK4}, [RT3]; /* Load K3,K4 */
#undef curK
#define curK qK1
/* Precalc 0-15. */
W_PRECALC_00_15();
.Loop:
/* Transform 0-15 + Precalc 16-31. */
_R( _a, _b, _c, _d, _e, F1, 0,
WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 16,
W4, W5, W6, W7, W0, _, _, _ );
_R( _e, _a, _b, _c, _d, F1, 1,
WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 16,
W4, W5, W6, W7, W0, _, _, _ );
_R( _d, _e, _a, _b, _c, F1, 2,
WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 16,
W4, W5, W6, W7, W0, _, _, _ );
_R( _c, _d, _e, _a, _b, F1, 3,
WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,16,
W4, W5, W6, W7, W0, _, _, _ );
#undef curK
#define curK qK2
_R( _b, _c, _d, _e, _a, F1, 4,
WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 20,
W3, W4, W5, W6, W7, _, _, _ );
_R( _a, _b, _c, _d, _e, F1, 5,
WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 20,
W3, W4, W5, W6, W7, _, _, _ );
_R( _e, _a, _b, _c, _d, F1, 6,
WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 20,
W3, W4, W5, W6, W7, _, _, _ );
_R( _d, _e, _a, _b, _c, F1, 7,
WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,20,
W3, W4, W5, W6, W7, _, _, _ );
_R( _c, _d, _e, _a, _b, F1, 8,
WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 24,
W2, W3, W4, W5, W6, _, _, _ );
_R( _b, _c, _d, _e, _a, F1, 9,
WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 24,
W2, W3, W4, W5, W6, _, _, _ );
_R( _a, _b, _c, _d, _e, F1, 10,
WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 24,
W2, W3, W4, W5, W6, _, _, _ );
_R( _e, _a, _b, _c, _d, F1, 11,
WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,24,
W2, W3, W4, W5, W6, _, _, _ );
_R( _d, _e, _a, _b, _c, F1, 12,
WPRECALC_16_31_0, WPRECALC_16_31_1, WPRECALC_16_31_2, 28,
W1, W2, W3, W4, W5, _, _, _ );
_R( _c, _d, _e, _a, _b, F1, 13,
WPRECALC_16_31_3, WPRECALC_16_31_4, WPRECALC_16_31_5, 28,
W1, W2, W3, W4, W5, _, _, _ );
_R( _b, _c, _d, _e, _a, F1, 14,
WPRECALC_16_31_6, WPRECALC_16_31_7, WPRECALC_16_31_8, 28,
W1, W2, W3, W4, W5, _, _, _ );
_R( _a, _b, _c, _d, _e, F1, 15,
WPRECALC_16_31_9, WPRECALC_16_31_10,WPRECALC_16_31_11,28,
W1, W2, W3, W4, W5, _, _, _ );
/* Transform 16-63 + Precalc 32-79. */
_R( _e, _a, _b, _c, _d, F1, 16,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 32,
W0, W1, W2, W3, W4, W5, W6, W7);
_R( _d, _e, _a, _b, _c, F1, 17,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 32,
W0, W1, W2, W3, W4, W5, W6, W7);
_R( _c, _d, _e, _a, _b, F1, 18,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 32,
W0, W1, W2, W3, W4, W5, W6, W7);
_R( _b, _c, _d, _e, _a, F1, 19,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 32,
W0, W1, W2, W3, W4, W5, W6, W7);
_R( _a, _b, _c, _d, _e, F2, 20,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 36,
W7, W0, W1, W2, W3, W4, W5, W6);
_R( _e, _a, _b, _c, _d, F2, 21,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 36,
W7, W0, W1, W2, W3, W4, W5, W6);
_R( _d, _e, _a, _b, _c, F2, 22,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 36,
W7, W0, W1, W2, W3, W4, W5, W6);
_R( _c, _d, _e, _a, _b, F2, 23,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 36,
W7, W0, W1, W2, W3, W4, W5, W6);
#undef curK
#define curK qK3
_R( _b, _c, _d, _e, _a, F2, 24,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 40,
W6, W7, W0, W1, W2, W3, W4, W5);
_R( _a, _b, _c, _d, _e, F2, 25,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 40,
W6, W7, W0, W1, W2, W3, W4, W5);
_R( _e, _a, _b, _c, _d, F2, 26,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 40,
W6, W7, W0, W1, W2, W3, W4, W5);
_R( _d, _e, _a, _b, _c, F2, 27,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 40,
W6, W7, W0, W1, W2, W3, W4, W5);
_R( _c, _d, _e, _a, _b, F2, 28,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 44,
W5, W6, W7, W0, W1, W2, W3, W4);
_R( _b, _c, _d, _e, _a, F2, 29,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 44,
W5, W6, W7, W0, W1, W2, W3, W4);
_R( _a, _b, _c, _d, _e, F2, 30,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 44,
W5, W6, W7, W0, W1, W2, W3, W4);
_R( _e, _a, _b, _c, _d, F2, 31,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 44,
W5, W6, W7, W0, W1, W2, W3, W4);
_R( _d, _e, _a, _b, _c, F2, 32,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 48,
W4, W5, W6, W7, W0, W1, W2, W3);
_R( _c, _d, _e, _a, _b, F2, 33,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 48,
W4, W5, W6, W7, W0, W1, W2, W3);
_R( _b, _c, _d, _e, _a, F2, 34,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 48,
W4, W5, W6, W7, W0, W1, W2, W3);
_R( _a, _b, _c, _d, _e, F2, 35,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 48,
W4, W5, W6, W7, W0, W1, W2, W3);
_R( _e, _a, _b, _c, _d, F2, 36,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 52,
W3, W4, W5, W6, W7, W0, W1, W2);
_R( _d, _e, _a, _b, _c, F2, 37,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 52,
W3, W4, W5, W6, W7, W0, W1, W2);
_R( _c, _d, _e, _a, _b, F2, 38,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 52,
W3, W4, W5, W6, W7, W0, W1, W2);
_R( _b, _c, _d, _e, _a, F2, 39,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 52,
W3, W4, W5, W6, W7, W0, W1, W2);
_R( _a, _b, _c, _d, _e, F3, 40,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 56,
W2, W3, W4, W5, W6, W7, W0, W1);
_R( _e, _a, _b, _c, _d, F3, 41,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 56,
W2, W3, W4, W5, W6, W7, W0, W1);
_R( _d, _e, _a, _b, _c, F3, 42,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 56,
W2, W3, W4, W5, W6, W7, W0, W1);
_R( _c, _d, _e, _a, _b, F3, 43,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 56,
W2, W3, W4, W5, W6, W7, W0, W1);
#undef curK
#define curK qK4
_R( _b, _c, _d, _e, _a, F3, 44,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 60,
W1, W2, W3, W4, W5, W6, W7, W0);
_R( _a, _b, _c, _d, _e, F3, 45,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 60,
W1, W2, W3, W4, W5, W6, W7, W0);
_R( _e, _a, _b, _c, _d, F3, 46,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 60,
W1, W2, W3, W4, W5, W6, W7, W0);
_R( _d, _e, _a, _b, _c, F3, 47,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 60,
W1, W2, W3, W4, W5, W6, W7, W0);
_R( _c, _d, _e, _a, _b, F3, 48,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 64,
W0, W1, W2, W3, W4, W5, W6, W7);
_R( _b, _c, _d, _e, _a, F3, 49,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 64,
W0, W1, W2, W3, W4, W5, W6, W7);
_R( _a, _b, _c, _d, _e, F3, 50,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 64,
W0, W1, W2, W3, W4, W5, W6, W7);
_R( _e, _a, _b, _c, _d, F3, 51,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 64,
W0, W1, W2, W3, W4, W5, W6, W7);
_R( _d, _e, _a, _b, _c, F3, 52,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 68,
W7, W0, W1, W2, W3, W4, W5, W6);
_R( _c, _d, _e, _a, _b, F3, 53,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 68,
W7, W0, W1, W2, W3, W4, W5, W6);
_R( _b, _c, _d, _e, _a, F3, 54,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 68,
W7, W0, W1, W2, W3, W4, W5, W6);
_R( _a, _b, _c, _d, _e, F3, 55,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 68,
W7, W0, W1, W2, W3, W4, W5, W6);
_R( _e, _a, _b, _c, _d, F3, 56,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 72,
W6, W7, W0, W1, W2, W3, W4, W5);
_R( _d, _e, _a, _b, _c, F3, 57,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 72,
W6, W7, W0, W1, W2, W3, W4, W5);
_R( _c, _d, _e, _a, _b, F3, 58,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 72,
W6, W7, W0, W1, W2, W3, W4, W5);
_R( _b, _c, _d, _e, _a, F3, 59,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 72,
W6, W7, W0, W1, W2, W3, W4, W5);
subs RNBLKS, #1;
_R( _a, _b, _c, _d, _e, F4, 60,
WPRECALC_32_79_0, WPRECALC_32_79_1, WPRECALC_32_79_2, 76,
W5, W6, W7, W0, W1, W2, W3, W4);
_R( _e, _a, _b, _c, _d, F4, 61,
WPRECALC_32_79_3, WPRECALC_32_79_4, WPRECALC_32_79_5, 76,
W5, W6, W7, W0, W1, W2, W3, W4);
_R( _d, _e, _a, _b, _c, F4, 62,
WPRECALC_32_79_6, dummy, WPRECALC_32_79_7, 76,
W5, W6, W7, W0, W1, W2, W3, W4);
_R( _c, _d, _e, _a, _b, F4, 63,
WPRECALC_32_79_8, dummy, WPRECALC_32_79_9, 76,
W5, W6, W7, W0, W1, W2, W3, W4);
beq .Lend;
/* Transform 64-79 + Precalc 0-15 of next block. */
#undef curK
#define curK qK1
_R( _b, _c, _d, _e, _a, F4, 64,
WPRECALC_00_15_0, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _a, _b, _c, _d, _e, F4, 65,
WPRECALC_00_15_1, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _e, _a, _b, _c, _d, F4, 66,
WPRECALC_00_15_2, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _d, _e, _a, _b, _c, F4, 67,
WPRECALC_00_15_3, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _c, _d, _e, _a, _b, F4, 68,
dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _b, _c, _d, _e, _a, F4, 69,
dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _a, _b, _c, _d, _e, F4, 70,
WPRECALC_00_15_4, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _e, _a, _b, _c, _d, F4, 71,
WPRECALC_00_15_5, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _d, _e, _a, _b, _c, F4, 72,
dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _c, _d, _e, _a, _b, F4, 73,
dummy, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _b, _c, _d, _e, _a, F4, 74,
WPRECALC_00_15_6, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _a, _b, _c, _d, _e, F4, 75,
WPRECALC_00_15_7, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _e, _a, _b, _c, _d, F4, 76,
WPRECALC_00_15_8, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _d, _e, _a, _b, _c, F4, 77,
WPRECALC_00_15_9, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _c, _d, _e, _a, _b, F4, 78,
WPRECALC_00_15_10, dummy, dummy, _, _, _, _, _, _, _, _, _ );
_R( _b, _c, _d, _e, _a, F4, 79,
WPRECALC_00_15_11, dummy, WPRECALC_00_15_12, _, _, _, _, _, _, _, _, _ );
/* Update the chaining variables. */
ldm RSTATE, {RT0-RT3};
add _a, RT0;
ldr RT0, [RSTATE, #state_h4];
add _b, RT1;
add _c, RT2;
add _d, RT3;
add _e, RT0;
stm RSTATE, {_a-_e};
b .Loop;
.Lend:
/* Transform 64-79 */
R( _b, _c, _d, _e, _a, F4, 64 );
R( _a, _b, _c, _d, _e, F4, 65 );
R( _e, _a, _b, _c, _d, F4, 66 );
R( _d, _e, _a, _b, _c, F4, 67 );
R( _c, _d, _e, _a, _b, F4, 68 );
R( _b, _c, _d, _e, _a, F4, 69 );
R( _a, _b, _c, _d, _e, F4, 70 );
R( _e, _a, _b, _c, _d, F4, 71 );
R( _d, _e, _a, _b, _c, F4, 72 );
R( _c, _d, _e, _a, _b, F4, 73 );
R( _b, _c, _d, _e, _a, F4, 74 );
R( _a, _b, _c, _d, _e, F4, 75 );
R( _e, _a, _b, _c, _d, F4, 76 );
R( _d, _e, _a, _b, _c, F4, 77 );
R( _c, _d, _e, _a, _b, F4, 78 );
R( _b, _c, _d, _e, _a, F4, 79 );
mov sp, ROLDSTACK;
/* Update the chaining variables. */
ldm RSTATE, {RT0-RT3};
add _a, RT0;
ldr RT0, [RSTATE, #state_h4];
add _b, RT1;
add _c, RT2;
add _d, RT3;
/*vpop {q4-q7};*/
add _e, RT0;
stm RSTATE, {_a-_e};
pop {r4-r12, pc};
.Ldo_nothing:
bx lr
ENDPROC(sha1_transform_neon)

View File

@ -23,32 +23,27 @@
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/crypto/sha1.h>
struct SHA1_CTX {
uint32_t h0,h1,h2,h3,h4;
u64 count;
u8 data[SHA1_BLOCK_SIZE];
};
asmlinkage void sha1_block_data_order(struct SHA1_CTX *digest,
asmlinkage void sha1_block_data_order(u32 *digest,
const unsigned char *data, unsigned int rounds);
static int sha1_init(struct shash_desc *desc)
{
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
memset(sctx, 0, sizeof(*sctx));
sctx->h0 = SHA1_H0;
sctx->h1 = SHA1_H1;
sctx->h2 = SHA1_H2;
sctx->h3 = SHA1_H3;
sctx->h4 = SHA1_H4;
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
unsigned int len, unsigned int partial)
static int __sha1_update(struct sha1_state *sctx, const u8 *data,
unsigned int len, unsigned int partial)
{
unsigned int done = 0;
@ -56,43 +51,44 @@ static int __sha1_update(struct SHA1_CTX *sctx, const u8 *data,
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->data + partial, data, done);
sha1_block_data_order(sctx, sctx->data, 1);
memcpy(sctx->buffer + partial, data, done);
sha1_block_data_order(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_block_data_order(sctx, data + done, rounds);
sha1_block_data_order(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->data, data + done, len - done);
memcpy(sctx->buffer, data + done, len - done);
return 0;
}
static int sha1_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->data + partial, data, len);
memcpy(sctx->buffer + partial, data, len);
return 0;
}
res = __sha1_update(sctx, data, len, partial);
return res;
}
EXPORT_SYMBOL_GPL(sha1_update_arm);
/* Add padding and return the message digest. */
static int sha1_final(struct shash_desc *desc, u8 *out)
{
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
@ -106,7 +102,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
/* We need to fill a whole block for __sha1_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->data + index, padding, padlen);
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_update(sctx, padding, padlen, index);
}
@ -114,7 +110,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(((u32 *)sctx)[i]);
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
@ -124,7 +120,7 @@ static int sha1_final(struct shash_desc *desc, u8 *out)
static int sha1_export(struct shash_desc *desc, void *out)
{
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
@ -132,7 +128,7 @@ static int sha1_export(struct shash_desc *desc, void *out)
static int sha1_import(struct shash_desc *desc, const void *in)
{
struct SHA1_CTX *sctx = shash_desc_ctx(desc);
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
@ -141,12 +137,12 @@ static int sha1_import(struct shash_desc *desc, const void *in)
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_init,
.update = sha1_update,
.update = sha1_update_arm,
.final = sha1_final,
.export = sha1_export,
.import = sha1_import,
.descsize = sizeof(struct SHA1_CTX),
.statesize = sizeof(struct SHA1_CTX),
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name= "sha1-asm",

View File

@ -0,0 +1,197 @@
/*
* Glue code for the SHA1 Secure Hash Algorithm assembler implementation using
* ARM NEON instructions.
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is based on sha1_generic.c and sha1_ssse3_glue.c:
* Copyright (c) Alan Smithee.
* Copyright (c) Andrew McDonald <andrew@mcdonald.org.uk>
* Copyright (c) Jean-Francois Dive <jef@linuxbe.org>
* Copyright (c) Mathias Krause <minipli@googlemail.com>
* Copyright (c) Chandramouli Narayanan <mouli@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/neon.h>
#include <asm/simd.h>
#include <asm/crypto/sha1.h>
asmlinkage void sha1_transform_neon(void *state_h, const char *data,
unsigned int rounds);
static int sha1_neon_init(struct shash_desc *desc)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
*sctx = (struct sha1_state){
.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
};
return 0;
}
static int __sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count += len;
if (partial) {
done = SHA1_BLOCK_SIZE - partial;
memcpy(sctx->buffer + partial, data, done);
sha1_transform_neon(sctx->state, sctx->buffer, 1);
}
if (len - done >= SHA1_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA1_BLOCK_SIZE;
sha1_transform_neon(sctx->state, data + done, rounds);
done += rounds * SHA1_BLOCK_SIZE;
}
memcpy(sctx->buffer, data + done, len - done);
return 0;
}
static int sha1_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count % SHA1_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA1_BLOCK_SIZE) {
sctx->count += len;
memcpy(sctx->buffer + partial, data, len);
return 0;
}
if (!may_use_simd()) {
res = sha1_update_arm(desc, data, len);
} else {
kernel_neon_begin();
res = __sha1_neon_update(desc, data, len, partial);
kernel_neon_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha1_neon_final(struct shash_desc *desc, u8 *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be32 *dst = (__be32 *)out;
__be64 bits;
static const u8 padding[SHA1_BLOCK_SIZE] = { 0x80, };
bits = cpu_to_be64(sctx->count << 3);
/* Pad out to 56 mod 64 and append length */
index = sctx->count % SHA1_BLOCK_SIZE;
padlen = (index < 56) ? (56 - index) : ((SHA1_BLOCK_SIZE+56) - index);
if (!may_use_simd()) {
sha1_update_arm(desc, padding, padlen);
sha1_update_arm(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_neon_begin();
/* We need to fill a whole block for __sha1_neon_update() */
if (padlen <= 56) {
sctx->count += padlen;
memcpy(sctx->buffer + index, padding, padlen);
} else {
__sha1_neon_update(desc, padding, padlen, index);
}
__sha1_neon_update(desc, (const u8 *)&bits, sizeof(bits), 56);
kernel_neon_end();
}
/* Store state in digest */
for (i = 0; i < 5; i++)
dst[i] = cpu_to_be32(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha1_neon_export(struct shash_desc *desc, void *out)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha1_neon_import(struct shash_desc *desc, const void *in)
{
struct sha1_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static struct shash_alg alg = {
.digestsize = SHA1_DIGEST_SIZE,
.init = sha1_neon_init,
.update = sha1_neon_update,
.final = sha1_neon_final,
.export = sha1_neon_export,
.import = sha1_neon_import,
.descsize = sizeof(struct sha1_state),
.statesize = sizeof(struct sha1_state),
.base = {
.cra_name = "sha1",
.cra_driver_name = "sha1-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA1_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
};
static int __init sha1_neon_mod_init(void)
{
if (!cpu_has_neon())
return -ENODEV;
return crypto_register_shash(&alg);
}
static void __exit sha1_neon_mod_fini(void)
{
crypto_unregister_shash(&alg);
}
module_init(sha1_neon_mod_init);
module_exit(sha1_neon_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
MODULE_ALIAS("sha1");

View File

@ -0,0 +1,455 @@
/* sha512-armv7-neon.S - ARM/NEON assembly implementation of SHA-512 transform
*
* Copyright © 2013-2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*/
#include <linux/linkage.h>
.syntax unified
.code 32
.fpu neon
.text
/* structure of SHA512_CONTEXT */
#define hd_a 0
#define hd_b ((hd_a) + 8)
#define hd_c ((hd_b) + 8)
#define hd_d ((hd_c) + 8)
#define hd_e ((hd_d) + 8)
#define hd_f ((hd_e) + 8)
#define hd_g ((hd_f) + 8)
/* register macros */
#define RK %r2
#define RA d0
#define RB d1
#define RC d2
#define RD d3
#define RE d4
#define RF d5
#define RG d6
#define RH d7
#define RT0 d8
#define RT1 d9
#define RT2 d10
#define RT3 d11
#define RT4 d12
#define RT5 d13
#define RT6 d14
#define RT7 d15
#define RT01q q4
#define RT23q q5
#define RT45q q6
#define RT67q q7
#define RW0 d16
#define RW1 d17
#define RW2 d18
#define RW3 d19
#define RW4 d20
#define RW5 d21
#define RW6 d22
#define RW7 d23
#define RW8 d24
#define RW9 d25
#define RW10 d26
#define RW11 d27
#define RW12 d28
#define RW13 d29
#define RW14 d30
#define RW15 d31
#define RW01q q8
#define RW23q q9
#define RW45q q10
#define RW67q q11
#define RW89q q12
#define RW1011q q13
#define RW1213q q14
#define RW1415q q15
/***********************************************************************
* ARM assembly implementation of sha512 transform
***********************************************************************/
#define rounds2_0_63(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, rw01q, rw2, \
rw23q, rw1415q, rw9, rw10, interleave_op, arg1) \
/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
vshr.u64 RT2, re, #14; \
vshl.u64 RT3, re, #64 - 14; \
interleave_op(arg1); \
vshr.u64 RT4, re, #18; \
vshl.u64 RT5, re, #64 - 18; \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, re, #41; \
vshl.u64 RT5, re, #64 - 41; \
vadd.u64 RT0, RT0, rw0; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, re; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, rf, rg; \
\
vadd.u64 RT1, RT1, rh; \
vshr.u64 RT2, ra, #28; \
vshl.u64 RT3, ra, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, ra, #34; \
vshl.u64 RT5, ra, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* h = Sum0 (a) + Maj (a, b, c); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, ra, #39; \
vshl.u64 RT5, ra, #64 - 39; \
veor.64 RT0, ra, rb; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rc, rb; \
vadd.u64 rd, rd, RT1; /* d+=t1; */ \
veor.64 rh, RT2, RT3; \
\
/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
vshr.u64 RT2, rd, #14; \
vshl.u64 RT3, rd, #64 - 14; \
vadd.u64 rh, rh, RT0; \
vshr.u64 RT4, rd, #18; \
vshl.u64 RT5, rd, #64 - 18; \
vadd.u64 rh, rh, RT1; /* h+=t1; */ \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rd, #41; \
vshl.u64 RT5, rd, #64 - 41; \
vadd.u64 RT0, RT0, rw1; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, rd; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, re, rf; \
\
vadd.u64 RT1, RT1, rg; \
vshr.u64 RT2, rh, #28; \
vshl.u64 RT3, rh, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, rh, #34; \
vshl.u64 RT5, rh, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* g = Sum0 (h) + Maj (h, a, b); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rh, #39; \
vshl.u64 RT5, rh, #64 - 39; \
veor.64 RT0, rh, ra; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rb, ra; \
vadd.u64 rc, rc, RT1; /* c+=t1; */ \
veor.64 rg, RT2, RT3; \
\
/* w[0] += S1 (w[14]) + w[9] + S0 (w[1]); */ \
/* w[1] += S1 (w[15]) + w[10] + S0 (w[2]); */ \
\
/**** S0(w[1:2]) */ \
\
/* w[0:1] += w[9:10] */ \
/* RT23q = rw1:rw2 */ \
vext.u64 RT23q, rw01q, rw23q, #1; \
vadd.u64 rw0, rw9; \
vadd.u64 rg, rg, RT0; \
vadd.u64 rw1, rw10;\
vadd.u64 rg, rg, RT1; /* g+=t1; */ \
\
vshr.u64 RT45q, RT23q, #1; \
vshl.u64 RT67q, RT23q, #64 - 1; \
vshr.u64 RT01q, RT23q, #8; \
veor.u64 RT45q, RT45q, RT67q; \
vshl.u64 RT67q, RT23q, #64 - 8; \
veor.u64 RT45q, RT45q, RT01q; \
vshr.u64 RT01q, RT23q, #7; \
veor.u64 RT45q, RT45q, RT67q; \
\
/**** S1(w[14:15]) */ \
vshr.u64 RT23q, rw1415q, #6; \
veor.u64 RT01q, RT01q, RT45q; \
vshr.u64 RT45q, rw1415q, #19; \
vshl.u64 RT67q, rw1415q, #64 - 19; \
veor.u64 RT23q, RT23q, RT45q; \
vshr.u64 RT45q, rw1415q, #61; \
veor.u64 RT23q, RT23q, RT67q; \
vshl.u64 RT67q, rw1415q, #64 - 61; \
veor.u64 RT23q, RT23q, RT45q; \
vadd.u64 rw01q, RT01q; /* w[0:1] += S(w[1:2]) */ \
veor.u64 RT01q, RT23q, RT67q;
#define vadd_RT01q(rw01q) \
/* w[0:1] += S(w[14:15]) */ \
vadd.u64 rw01q, RT01q;
#define dummy(_) /*_*/
#define rounds2_64_79(ra, rb, rc, rd, re, rf, rg, rh, rw0, rw1, \
interleave_op1, arg1, interleave_op2, arg2) \
/* t1 = h + Sum1 (e) + Ch (e, f, g) + k[t] + w[t]; */ \
vshr.u64 RT2, re, #14; \
vshl.u64 RT3, re, #64 - 14; \
interleave_op1(arg1); \
vshr.u64 RT4, re, #18; \
vshl.u64 RT5, re, #64 - 18; \
interleave_op2(arg2); \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, re, #41; \
vshl.u64 RT5, re, #64 - 41; \
vadd.u64 RT0, RT0, rw0; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, re; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, rf, rg; \
\
vadd.u64 RT1, RT1, rh; \
vshr.u64 RT2, ra, #28; \
vshl.u64 RT3, ra, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, ra, #34; \
vshl.u64 RT5, ra, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* h = Sum0 (a) + Maj (a, b, c); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, ra, #39; \
vshl.u64 RT5, ra, #64 - 39; \
veor.64 RT0, ra, rb; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rc, rb; \
vadd.u64 rd, rd, RT1; /* d+=t1; */ \
veor.64 rh, RT2, RT3; \
\
/* t1 = g + Sum1 (d) + Ch (d, e, f) + k[t] + w[t]; */ \
vshr.u64 RT2, rd, #14; \
vshl.u64 RT3, rd, #64 - 14; \
vadd.u64 rh, rh, RT0; \
vshr.u64 RT4, rd, #18; \
vshl.u64 RT5, rd, #64 - 18; \
vadd.u64 rh, rh, RT1; /* h+=t1; */ \
vld1.64 {RT0}, [RK]!; \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rd, #41; \
vshl.u64 RT5, rd, #64 - 41; \
vadd.u64 RT0, RT0, rw1; \
veor.64 RT23q, RT23q, RT45q; \
vmov.64 RT7, rd; \
veor.64 RT1, RT2, RT3; \
vbsl.64 RT7, re, rf; \
\
vadd.u64 RT1, RT1, rg; \
vshr.u64 RT2, rh, #28; \
vshl.u64 RT3, rh, #64 - 28; \
vadd.u64 RT1, RT1, RT0; \
vshr.u64 RT4, rh, #34; \
vshl.u64 RT5, rh, #64 - 34; \
vadd.u64 RT1, RT1, RT7; \
\
/* g = Sum0 (h) + Maj (h, a, b); */ \
veor.64 RT23q, RT23q, RT45q; \
vshr.u64 RT4, rh, #39; \
vshl.u64 RT5, rh, #64 - 39; \
veor.64 RT0, rh, ra; \
veor.64 RT23q, RT23q, RT45q; \
vbsl.64 RT0, rb, ra; \
vadd.u64 rc, rc, RT1; /* c+=t1; */ \
veor.64 rg, RT2, RT3;
#define vadd_rg_RT0(rg) \
vadd.u64 rg, rg, RT0;
#define vadd_rg_RT1(rg) \
vadd.u64 rg, rg, RT1; /* g+=t1; */
.align 3
ENTRY(sha512_transform_neon)
/* Input:
* %r0: SHA512_CONTEXT
* %r1: data
* %r2: u64 k[] constants
* %r3: nblks
*/
push {%lr};
mov %lr, #0;
/* Load context to d0-d7 */
vld1.64 {RA-RD}, [%r0]!;
vld1.64 {RE-RH}, [%r0];
sub %r0, #(4*8);
/* Load input to w[16], d16-d31 */
/* NOTE: Assumes that on ARMv7 unaligned accesses are always allowed. */
vld1.64 {RW0-RW3}, [%r1]!;
vld1.64 {RW4-RW7}, [%r1]!;
vld1.64 {RW8-RW11}, [%r1]!;
vld1.64 {RW12-RW15}, [%r1]!;
#ifdef __ARMEL__
/* byteswap */
vrev64.8 RW01q, RW01q;
vrev64.8 RW23q, RW23q;
vrev64.8 RW45q, RW45q;
vrev64.8 RW67q, RW67q;
vrev64.8 RW89q, RW89q;
vrev64.8 RW1011q, RW1011q;
vrev64.8 RW1213q, RW1213q;
vrev64.8 RW1415q, RW1415q;
#endif
/* EABI says that d8-d15 must be preserved by callee. */
/*vpush {RT0-RT7};*/
.Loop:
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
RW23q, RW1415q, RW9, RW10, dummy, _);
b .Lenter_rounds;
.Loop_rounds:
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1, RW01q, RW2,
RW23q, RW1415q, RW9, RW10, vadd_RT01q, RW1415q);
.Lenter_rounds:
rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3, RW23q, RW4,
RW45q, RW01q, RW11, RW12, vadd_RT01q, RW01q);
rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5, RW45q, RW6,
RW67q, RW23q, RW13, RW14, vadd_RT01q, RW23q);
rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7, RW67q, RW8,
RW89q, RW45q, RW15, RW0, vadd_RT01q, RW45q);
rounds2_0_63(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9, RW89q, RW10,
RW1011q, RW67q, RW1, RW2, vadd_RT01q, RW67q);
rounds2_0_63(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11, RW1011q, RW12,
RW1213q, RW89q, RW3, RW4, vadd_RT01q, RW89q);
add %lr, #16;
rounds2_0_63(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13, RW1213q, RW14,
RW1415q, RW1011q, RW5, RW6, vadd_RT01q, RW1011q);
cmp %lr, #64;
rounds2_0_63(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15, RW1415q, RW0,
RW01q, RW1213q, RW7, RW8, vadd_RT01q, RW1213q);
bne .Loop_rounds;
subs %r3, #1;
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW0, RW1,
vadd_RT01q, RW1415q, dummy, _);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW2, RW3,
vadd_rg_RT0, RG, vadd_rg_RT1, RG);
beq .Lhandle_tail;
vld1.64 {RW0-RW3}, [%r1]!;
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
vadd_rg_RT0, RC, vadd_rg_RT1, RC);
#ifdef __ARMEL__
vrev64.8 RW01q, RW01q;
vrev64.8 RW23q, RW23q;
#endif
vld1.64 {RW4-RW7}, [%r1]!;
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
vadd_rg_RT0, RA, vadd_rg_RT1, RA);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
vadd_rg_RT0, RG, vadd_rg_RT1, RG);
#ifdef __ARMEL__
vrev64.8 RW45q, RW45q;
vrev64.8 RW67q, RW67q;
#endif
vld1.64 {RW8-RW11}, [%r1]!;
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
vadd_rg_RT0, RC, vadd_rg_RT1, RC);
#ifdef __ARMEL__
vrev64.8 RW89q, RW89q;
vrev64.8 RW1011q, RW1011q;
#endif
vld1.64 {RW12-RW15}, [%r1]!;
vadd_rg_RT0(RA);
vadd_rg_RT1(RA);
/* Load context */
vld1.64 {RT0-RT3}, [%r0]!;
vld1.64 {RT4-RT7}, [%r0];
sub %r0, #(4*8);
#ifdef __ARMEL__
vrev64.8 RW1213q, RW1213q;
vrev64.8 RW1415q, RW1415q;
#endif
vadd.u64 RA, RT0;
vadd.u64 RB, RT1;
vadd.u64 RC, RT2;
vadd.u64 RD, RT3;
vadd.u64 RE, RT4;
vadd.u64 RF, RT5;
vadd.u64 RG, RT6;
vadd.u64 RH, RT7;
/* Store the first half of context */
vst1.64 {RA-RD}, [%r0]!;
sub RK, $(8*80);
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
mov %lr, #0;
sub %r0, #(4*8);
b .Loop;
.Lhandle_tail:
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW4, RW5,
vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW6, RW7,
vadd_rg_RT0, RC, vadd_rg_RT1, RC);
rounds2_64_79(RA, RB, RC, RD, RE, RF, RG, RH, RW8, RW9,
vadd_rg_RT0, RA, vadd_rg_RT1, RA);
rounds2_64_79(RG, RH, RA, RB, RC, RD, RE, RF, RW10, RW11,
vadd_rg_RT0, RG, vadd_rg_RT1, RG);
rounds2_64_79(RE, RF, RG, RH, RA, RB, RC, RD, RW12, RW13,
vadd_rg_RT0, RE, vadd_rg_RT1, RE);
rounds2_64_79(RC, RD, RE, RF, RG, RH, RA, RB, RW14, RW15,
vadd_rg_RT0, RC, vadd_rg_RT1, RC);
/* Load context to d16-d23 */
vld1.64 {RW0-RW3}, [%r0]!;
vadd_rg_RT0(RA);
vld1.64 {RW4-RW7}, [%r0];
vadd_rg_RT1(RA);
sub %r0, #(4*8);
vadd.u64 RA, RW0;
vadd.u64 RB, RW1;
vadd.u64 RC, RW2;
vadd.u64 RD, RW3;
vadd.u64 RE, RW4;
vadd.u64 RF, RW5;
vadd.u64 RG, RW6;
vadd.u64 RH, RW7;
/* Store the first half of context */
vst1.64 {RA-RD}, [%r0]!;
/* Clear used registers */
/* d16-d31 */
veor.u64 RW01q, RW01q;
veor.u64 RW23q, RW23q;
veor.u64 RW45q, RW45q;
veor.u64 RW67q, RW67q;
vst1.64 {RE-RH}, [%r0]; /* Store the last half of context */
veor.u64 RW89q, RW89q;
veor.u64 RW1011q, RW1011q;
veor.u64 RW1213q, RW1213q;
veor.u64 RW1415q, RW1415q;
/* d8-d15 */
/*vpop {RT0-RT7};*/
/* d0-d7 (q0-q3) */
veor.u64 %q0, %q0;
veor.u64 %q1, %q1;
veor.u64 %q2, %q2;
veor.u64 %q3, %q3;
pop {%pc};
ENDPROC(sha512_transform_neon)

View File

@ -0,0 +1,305 @@
/*
* Glue code for the SHA512 Secure Hash Algorithm assembly implementation
* using NEON instructions.
*
* Copyright © 2014 Jussi Kivilinna <jussi.kivilinna@iki.fi>
*
* This file is based on sha512_ssse3_glue.c:
* Copyright (C) 2013 Intel Corporation
* Author: Tim Chen <tim.c.chen@linux.intel.com>
*
* This program is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 2 of the License, or (at your option)
* any later version.
*
*/
#include <crypto/internal/hash.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/mm.h>
#include <linux/cryptohash.h>
#include <linux/types.h>
#include <linux/string.h>
#include <crypto/sha.h>
#include <asm/byteorder.h>
#include <asm/simd.h>
#include <asm/neon.h>
static const u64 sha512_k[] = {
0x428a2f98d728ae22ULL, 0x7137449123ef65cdULL,
0xb5c0fbcfec4d3b2fULL, 0xe9b5dba58189dbbcULL,
0x3956c25bf348b538ULL, 0x59f111f1b605d019ULL,
0x923f82a4af194f9bULL, 0xab1c5ed5da6d8118ULL,
0xd807aa98a3030242ULL, 0x12835b0145706fbeULL,
0x243185be4ee4b28cULL, 0x550c7dc3d5ffb4e2ULL,
0x72be5d74f27b896fULL, 0x80deb1fe3b1696b1ULL,
0x9bdc06a725c71235ULL, 0xc19bf174cf692694ULL,
0xe49b69c19ef14ad2ULL, 0xefbe4786384f25e3ULL,
0x0fc19dc68b8cd5b5ULL, 0x240ca1cc77ac9c65ULL,
0x2de92c6f592b0275ULL, 0x4a7484aa6ea6e483ULL,
0x5cb0a9dcbd41fbd4ULL, 0x76f988da831153b5ULL,
0x983e5152ee66dfabULL, 0xa831c66d2db43210ULL,
0xb00327c898fb213fULL, 0xbf597fc7beef0ee4ULL,
0xc6e00bf33da88fc2ULL, 0xd5a79147930aa725ULL,
0x06ca6351e003826fULL, 0x142929670a0e6e70ULL,
0x27b70a8546d22ffcULL, 0x2e1b21385c26c926ULL,
0x4d2c6dfc5ac42aedULL, 0x53380d139d95b3dfULL,
0x650a73548baf63deULL, 0x766a0abb3c77b2a8ULL,
0x81c2c92e47edaee6ULL, 0x92722c851482353bULL,
0xa2bfe8a14cf10364ULL, 0xa81a664bbc423001ULL,
0xc24b8b70d0f89791ULL, 0xc76c51a30654be30ULL,
0xd192e819d6ef5218ULL, 0xd69906245565a910ULL,
0xf40e35855771202aULL, 0x106aa07032bbd1b8ULL,
0x19a4c116b8d2d0c8ULL, 0x1e376c085141ab53ULL,
0x2748774cdf8eeb99ULL, 0x34b0bcb5e19b48a8ULL,
0x391c0cb3c5c95a63ULL, 0x4ed8aa4ae3418acbULL,
0x5b9cca4f7763e373ULL, 0x682e6ff3d6b2b8a3ULL,
0x748f82ee5defb2fcULL, 0x78a5636f43172f60ULL,
0x84c87814a1f0ab72ULL, 0x8cc702081a6439ecULL,
0x90befffa23631e28ULL, 0xa4506cebde82bde9ULL,
0xbef9a3f7b2c67915ULL, 0xc67178f2e372532bULL,
0xca273eceea26619cULL, 0xd186b8c721c0c207ULL,
0xeada7dd6cde0eb1eULL, 0xf57d4f7fee6ed178ULL,
0x06f067aa72176fbaULL, 0x0a637dc5a2c898a6ULL,
0x113f9804bef90daeULL, 0x1b710b35131c471bULL,
0x28db77f523047d84ULL, 0x32caab7b40c72493ULL,
0x3c9ebe0a15c9bebcULL, 0x431d67c49c100d4cULL,
0x4cc5d4becb3e42b6ULL, 0x597f299cfc657e2aULL,
0x5fcb6fab3ad6faecULL, 0x6c44198c4a475817ULL
};
asmlinkage void sha512_transform_neon(u64 *digest, const void *data,
const u64 k[], unsigned int num_blks);
static int sha512_neon_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA512_H0;
sctx->state[1] = SHA512_H1;
sctx->state[2] = SHA512_H2;
sctx->state[3] = SHA512_H3;
sctx->state[4] = SHA512_H4;
sctx->state[5] = SHA512_H5;
sctx->state[6] = SHA512_H6;
sctx->state[7] = SHA512_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int __sha512_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len, unsigned int partial)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int done = 0;
sctx->count[0] += len;
if (sctx->count[0] < len)
sctx->count[1]++;
if (partial) {
done = SHA512_BLOCK_SIZE - partial;
memcpy(sctx->buf + partial, data, done);
sha512_transform_neon(sctx->state, sctx->buf, sha512_k, 1);
}
if (len - done >= SHA512_BLOCK_SIZE) {
const unsigned int rounds = (len - done) / SHA512_BLOCK_SIZE;
sha512_transform_neon(sctx->state, data + done, sha512_k,
rounds);
done += rounds * SHA512_BLOCK_SIZE;
}
memcpy(sctx->buf, data + done, len - done);
return 0;
}
static int sha512_neon_update(struct shash_desc *desc, const u8 *data,
unsigned int len)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int partial = sctx->count[0] % SHA512_BLOCK_SIZE;
int res;
/* Handle the fast case right here */
if (partial + len < SHA512_BLOCK_SIZE) {
sctx->count[0] += len;
if (sctx->count[0] < len)
sctx->count[1]++;
memcpy(sctx->buf + partial, data, len);
return 0;
}
if (!may_use_simd()) {
res = crypto_sha512_update(desc, data, len);
} else {
kernel_neon_begin();
res = __sha512_neon_update(desc, data, len, partial);
kernel_neon_end();
}
return res;
}
/* Add padding and return the message digest. */
static int sha512_neon_final(struct shash_desc *desc, u8 *out)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
unsigned int i, index, padlen;
__be64 *dst = (__be64 *)out;
__be64 bits[2];
static const u8 padding[SHA512_BLOCK_SIZE] = { 0x80, };
/* save number of bits */
bits[1] = cpu_to_be64(sctx->count[0] << 3);
bits[0] = cpu_to_be64(sctx->count[1] << 3 | sctx->count[0] >> 61);
/* Pad out to 112 mod 128 and append length */
index = sctx->count[0] & 0x7f;
padlen = (index < 112) ? (112 - index) : ((128+112) - index);
if (!may_use_simd()) {
crypto_sha512_update(desc, padding, padlen);
crypto_sha512_update(desc, (const u8 *)&bits, sizeof(bits));
} else {
kernel_neon_begin();
/* We need to fill a whole block for __sha512_neon_update() */
if (padlen <= 112) {
sctx->count[0] += padlen;
if (sctx->count[0] < padlen)
sctx->count[1]++;
memcpy(sctx->buf + index, padding, padlen);
} else {
__sha512_neon_update(desc, padding, padlen, index);
}
__sha512_neon_update(desc, (const u8 *)&bits,
sizeof(bits), 112);
kernel_neon_end();
}
/* Store state in digest */
for (i = 0; i < 8; i++)
dst[i] = cpu_to_be64(sctx->state[i]);
/* Wipe context */
memset(sctx, 0, sizeof(*sctx));
return 0;
}
static int sha512_neon_export(struct shash_desc *desc, void *out)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
memcpy(out, sctx, sizeof(*sctx));
return 0;
}
static int sha512_neon_import(struct shash_desc *desc, const void *in)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
memcpy(sctx, in, sizeof(*sctx));
return 0;
}
static int sha384_neon_init(struct shash_desc *desc)
{
struct sha512_state *sctx = shash_desc_ctx(desc);
sctx->state[0] = SHA384_H0;
sctx->state[1] = SHA384_H1;
sctx->state[2] = SHA384_H2;
sctx->state[3] = SHA384_H3;
sctx->state[4] = SHA384_H4;
sctx->state[5] = SHA384_H5;
sctx->state[6] = SHA384_H6;
sctx->state[7] = SHA384_H7;
sctx->count[0] = sctx->count[1] = 0;
return 0;
}
static int sha384_neon_final(struct shash_desc *desc, u8 *hash)
{
u8 D[SHA512_DIGEST_SIZE];
sha512_neon_final(desc, D);
memcpy(hash, D, SHA384_DIGEST_SIZE);
memset(D, 0, SHA512_DIGEST_SIZE);
return 0;
}
static struct shash_alg algs[] = { {
.digestsize = SHA512_DIGEST_SIZE,
.init = sha512_neon_init,
.update = sha512_neon_update,
.final = sha512_neon_final,
.export = sha512_neon_export,
.import = sha512_neon_import,
.descsize = sizeof(struct sha512_state),
.statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha512",
.cra_driver_name = "sha512-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA512_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
}, {
.digestsize = SHA384_DIGEST_SIZE,
.init = sha384_neon_init,
.update = sha512_neon_update,
.final = sha384_neon_final,
.export = sha512_neon_export,
.import = sha512_neon_import,
.descsize = sizeof(struct sha512_state),
.statesize = sizeof(struct sha512_state),
.base = {
.cra_name = "sha384",
.cra_driver_name = "sha384-neon",
.cra_priority = 250,
.cra_flags = CRYPTO_ALG_TYPE_SHASH,
.cra_blocksize = SHA384_BLOCK_SIZE,
.cra_module = THIS_MODULE,
}
} };
static int __init sha512_neon_mod_init(void)
{
if (!cpu_has_neon())
return -ENODEV;
return crypto_register_shashes(algs, ARRAY_SIZE(algs));
}
static void __exit sha512_neon_mod_fini(void)
{
crypto_unregister_shashes(algs, ARRAY_SIZE(algs));
}
module_init(sha512_neon_mod_init);
module_exit(sha512_neon_mod_fini);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
MODULE_ALIAS("sha512");
MODULE_ALIAS("sha384");

View File

@ -24,6 +24,8 @@
#include <asm/domain.h>
#include <asm/opcodes-virt.h>
#include <asm/asm-offsets.h>
#include <asm/page.h>
#include <asm/thread_info.h>
#define IOMEM(x) (x)
@ -179,10 +181,10 @@
* Get current thread_info.
*/
.macro get_thread_info, rd
ARM( mov \rd, sp, lsr #13 )
ARM( mov \rd, sp, lsr #THREAD_SIZE_ORDER + PAGE_SHIFT )
THUMB( mov \rd, sp )
THUMB( lsr \rd, \rd, #13 )
mov \rd, \rd, lsl #13
THUMB( lsr \rd, \rd, #THREAD_SIZE_ORDER + PAGE_SHIFT )
mov \rd, \rd, lsl #THREAD_SIZE_ORDER + PAGE_SHIFT
.endm
/*
@ -425,4 +427,25 @@ THUMB( orr \reg , \reg , #PSR_T_BIT )
#endif
.endm
.irp c,,eq,ne,cs,cc,mi,pl,vs,vc,hi,ls,ge,lt,gt,le,hs,lo
.macro ret\c, reg
#if __LINUX_ARM_ARCH__ < 6
mov\c pc, \reg
#else
.ifeqs "\reg", "lr"
bx\c \reg
.else
mov\c pc, \reg
.endif
#endif
.endm
.endr
.macro ret.w, reg
ret \reg
#ifdef CONFIG_THUMB2_KERNEL
nop
#endif
.endm
#endif /* __ASM_ASSEMBLER_H__ */

View File

@ -62,17 +62,18 @@
#define ARM_CPU_IMP_ARM 0x41
#define ARM_CPU_IMP_INTEL 0x69
#define ARM_CPU_PART_ARM1136 0xB360
#define ARM_CPU_PART_ARM1156 0xB560
#define ARM_CPU_PART_ARM1176 0xB760
#define ARM_CPU_PART_ARM11MPCORE 0xB020
#define ARM_CPU_PART_CORTEX_A8 0xC080
#define ARM_CPU_PART_CORTEX_A9 0xC090
#define ARM_CPU_PART_CORTEX_A5 0xC050
#define ARM_CPU_PART_CORTEX_A15 0xC0F0
#define ARM_CPU_PART_CORTEX_A7 0xC070
#define ARM_CPU_PART_CORTEX_A12 0xC0D0
#define ARM_CPU_PART_CORTEX_A17 0xC0E0
/* ARM implemented processors */
#define ARM_CPU_PART_ARM1136 0x4100b360
#define ARM_CPU_PART_ARM1156 0x4100b560
#define ARM_CPU_PART_ARM1176 0x4100b760
#define ARM_CPU_PART_ARM11MPCORE 0x4100b020
#define ARM_CPU_PART_CORTEX_A8 0x4100c080
#define ARM_CPU_PART_CORTEX_A9 0x4100c090
#define ARM_CPU_PART_CORTEX_A5 0x4100c050
#define ARM_CPU_PART_CORTEX_A7 0x4100c070
#define ARM_CPU_PART_CORTEX_A12 0x4100c0d0
#define ARM_CPU_PART_CORTEX_A17 0x4100c0e0
#define ARM_CPU_PART_CORTEX_A15 0x4100c0f0
#define ARM_CPU_XSCALE_ARCH_MASK 0xe000
#define ARM_CPU_XSCALE_ARCH_V1 0x2000
@ -171,14 +172,24 @@ static inline unsigned int __attribute_const__ read_cpuid_implementor(void)
return (read_cpuid_id() & 0xFF000000) >> 24;
}
static inline unsigned int __attribute_const__ read_cpuid_part_number(void)
/*
* The CPU part number is meaningless without referring to the CPU
* implementer: implementers are free to define their own part numbers
* which are permitted to clash with other implementer part numbers.
*/
static inline unsigned int __attribute_const__ read_cpuid_part(void)
{
return read_cpuid_id() & 0xff00fff0;
}
static inline unsigned int __attribute_const__ __deprecated read_cpuid_part_number(void)
{
return read_cpuid_id() & 0xFFF0;
}
static inline unsigned int __attribute_const__ xscale_cpu_arch_version(void)
{
return read_cpuid_part_number() & ARM_CPU_XSCALE_ARCH_MASK;
return read_cpuid_id() & ARM_CPU_XSCALE_ARCH_MASK;
}
static inline unsigned int __attribute_const__ read_cpuid_cachetype(void)

View File

@ -0,0 +1,10 @@
#ifndef ASM_ARM_CRYPTO_SHA1_H
#define ASM_ARM_CRYPTO_SHA1_H
#include <linux/crypto.h>
#include <crypto/sha.h>
extern int sha1_update_arm(struct shash_desc *desc, const u8 *data,
unsigned int len);
#endif

View File

@ -35,5 +35,5 @@
\symbol_name:
mov r8, lr
arch_irq_handler_default
mov pc, r8
ret r8
.endm

View File

@ -221,15 +221,6 @@
# endif
#endif
#ifdef CONFIG_CPU_V7
# ifdef CPU_NAME
# undef MULTI_CPU
# define MULTI_CPU
# else
# define CPU_NAME cpu_v7
# endif
#endif
#ifdef CONFIG_CPU_V7M
# ifdef CPU_NAME
# undef MULTI_CPU
@ -248,6 +239,15 @@
# endif
#endif
#ifdef CONFIG_CPU_V7
/*
* Cortex-A9 needs a different suspend/resume function, so we need
* multiple CPU support for ARMv7 anyway.
*/
# undef MULTI_CPU
# define MULTI_CPU
#endif
#ifndef MULTI_CPU
#define cpu_proc_init __glue(CPU_NAME,_proc_init)
#define cpu_proc_fin __glue(CPU_NAME,_proc_fin)

View File

@ -217,6 +217,22 @@ int __mcpm_cluster_state(unsigned int cluster);
int __init mcpm_sync_init(
void (*power_up_setup)(unsigned int affinity_level));
/**
* mcpm_loopback - make a run through the MCPM low-level code
*
* @cache_disable: pointer to function performing cache disabling
*
* This exercises the MCPM machinery by soft resetting the CPU and branching
* to the MCPM low-level entry code before returning to the caller.
* The @cache_disable function must do the necessary cache disabling to
* let the regular kernel init code turn it back on as if the CPU was
* hotplugged in. The MCPM state machine is set as if the cluster was
* initialized meaning the power_up_setup callback passed to mcpm_sync_init()
* will be invoked for all affinity levels. This may be useful to initialize
* some resources such as enabling the CCI that requires the cache to be off, or simply for testing purposes.
*/
int __init mcpm_loopback(void (*cache_disable)(void));
void __init mcpm_smp_set_ops(void);
#else

View File

@ -0,0 +1,23 @@
#ifndef __ASM_MCS_LOCK_H
#define __ASM_MCS_LOCK_H
#ifdef CONFIG_SMP
#include <asm/spinlock.h>
/* MCS spin-locking. */
#define arch_mcs_spin_lock_contended(lock) \
do { \
/* Ensure prior stores are observed before we enter wfe. */ \
smp_mb(); \
while (!(smp_load_acquire(lock))) \
wfe(); \
} while (0) \
#define arch_mcs_spin_unlock_contended(lock) \
do { \
smp_store_release(lock, 1); \
dsb_sev(); \
} while (0)
#endif /* CONFIG_SMP */
#endif /* __ASM_MCS_LOCK_H */

View File

@ -91,9 +91,7 @@
* of this define that was meant to.
* Fortunately, there is no reference for this in noMMU mode, for now.
*/
#ifndef TASK_SIZE
#define TASK_SIZE (CONFIG_DRAM_SIZE)
#endif
#define TASK_SIZE UL(0xffffffff)
#ifndef TASK_UNMAPPED_BASE
#define TASK_UNMAPPED_BASE UL(0x00000000)
@ -150,13 +148,11 @@
/*
* PLAT_PHYS_OFFSET is the offset (from zero) of the start of physical
* memory. This is used for XIP and NoMMU kernels, or by kernels which
* have their own mach/memory.h. Assembly code must always use
* memory. This is used for XIP and NoMMU kernels, and on platforms that don't
* have CONFIG_ARM_PATCH_PHYS_VIRT. Assembly code must always use
* PLAT_PHYS_OFFSET and not PHYS_OFFSET.
*/
#ifndef PLAT_PHYS_OFFSET
#define PLAT_PHYS_OFFSET UL(CONFIG_PHYS_OFFSET)
#endif
#ifndef __ASSEMBLY__

View File

@ -12,15 +12,6 @@
#ifndef __ARM_PERF_EVENT_H__
#define __ARM_PERF_EVENT_H__
/*
* The ARMv7 CPU PMU supports up to 32 event counters.
*/
#define ARMPMU_MAX_HWEVENTS 32
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0xFFFF
#ifdef CONFIG_HW_PERF_EVENTS
struct pt_regs;
extern unsigned long perf_instruction_pointer(struct pt_regs *regs);

View File

@ -43,7 +43,7 @@
#define PMD_SECT_BUFFERABLE (_AT(pmdval_t, 1) << 2)
#define PMD_SECT_CACHEABLE (_AT(pmdval_t, 1) << 3)
#define PMD_SECT_USER (_AT(pmdval_t, 1) << 6) /* AP[1] */
#define PMD_SECT_RDONLY (_AT(pmdval_t, 1) << 7) /* AP[2] */
#define PMD_SECT_AP2 (_AT(pmdval_t, 1) << 7) /* read only */
#define PMD_SECT_S (_AT(pmdval_t, 3) << 8)
#define PMD_SECT_AF (_AT(pmdval_t, 1) << 10)
#define PMD_SECT_nG (_AT(pmdval_t, 1) << 11)
@ -72,6 +72,7 @@
#define PTE_TABLE_BIT (_AT(pteval_t, 1) << 1)
#define PTE_BUFFERABLE (_AT(pteval_t, 1) << 2) /* AttrIndx[0] */
#define PTE_CACHEABLE (_AT(pteval_t, 1) << 3) /* AttrIndx[1] */
#define PTE_AP2 (_AT(pteval_t, 1) << 7) /* AP[2] */
#define PTE_EXT_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define PTE_EXT_AF (_AT(pteval_t, 1) << 10) /* Access Flag */
#define PTE_EXT_NG (_AT(pteval_t, 1) << 11) /* nG */

View File

@ -79,18 +79,19 @@
#define L_PTE_PRESENT (_AT(pteval_t, 3) << 0) /* Present */
#define L_PTE_FILE (_AT(pteval_t, 1) << 2) /* only when !PRESENT */
#define L_PTE_USER (_AT(pteval_t, 1) << 6) /* AP[1] */
#define L_PTE_RDONLY (_AT(pteval_t, 1) << 7) /* AP[2] */
#define L_PTE_SHARED (_AT(pteval_t, 3) << 8) /* SH[1:0], inner shareable */
#define L_PTE_YOUNG (_AT(pteval_t, 1) << 10) /* AF */
#define L_PTE_XN (_AT(pteval_t, 1) << 54) /* XN */
#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55) /* unused */
#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56) /* unused */
#define L_PTE_DIRTY (_AT(pteval_t, 1) << 55)
#define L_PTE_SPECIAL (_AT(pteval_t, 1) << 56)
#define L_PTE_NONE (_AT(pteval_t, 1) << 57) /* PROT_NONE */
#define L_PTE_RDONLY (_AT(pteval_t, 1) << 58) /* READ ONLY */
#define PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
#define PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
#define PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56)
#define PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
#define L_PMD_SECT_VALID (_AT(pmdval_t, 1) << 0)
#define L_PMD_SECT_DIRTY (_AT(pmdval_t, 1) << 55)
#define L_PMD_SECT_SPLITTING (_AT(pmdval_t, 1) << 56)
#define L_PMD_SECT_NONE (_AT(pmdval_t, 1) << 57)
#define L_PMD_SECT_RDONLY (_AT(pteval_t, 1) << 58)
/*
* To be used in assembly code with the upper page attributes.
@ -207,27 +208,32 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long addr)
#define pte_huge(pte) (pte_val(pte) && !(pte_val(pte) & PTE_TABLE_BIT))
#define pte_mkhuge(pte) (__pte(pte_val(pte) & ~PTE_TABLE_BIT))
#define pmd_young(pmd) (pmd_val(pmd) & PMD_SECT_AF)
#define pmd_isset(pmd, val) ((u32)(val) == (val) ? pmd_val(pmd) & (val) \
: !!(pmd_val(pmd) & (val)))
#define pmd_isclear(pmd, val) (!(pmd_val(pmd) & (val)))
#define pmd_young(pmd) (pmd_isset((pmd), PMD_SECT_AF))
#define __HAVE_ARCH_PMD_WRITE
#define pmd_write(pmd) (!(pmd_val(pmd) & PMD_SECT_RDONLY))
#define pmd_write(pmd) (pmd_isclear((pmd), L_PMD_SECT_RDONLY))
#define pmd_dirty(pmd) (pmd_isset((pmd), L_PMD_SECT_DIRTY))
#define pmd_hugewillfault(pmd) (!pmd_young(pmd) || !pmd_write(pmd))
#define pmd_thp_or_huge(pmd) (pmd_huge(pmd) || pmd_trans_huge(pmd))
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !(pmd_val(pmd) & PMD_TABLE_BIT))
#define pmd_trans_splitting(pmd) (pmd_val(pmd) & PMD_SECT_SPLITTING)
#define pmd_trans_huge(pmd) (pmd_val(pmd) && !pmd_table(pmd))
#define pmd_trans_splitting(pmd) (pmd_isset((pmd), L_PMD_SECT_SPLITTING))
#endif
#define PMD_BIT_FUNC(fn,op) \
static inline pmd_t pmd_##fn(pmd_t pmd) { pmd_val(pmd) op; return pmd; }
PMD_BIT_FUNC(wrprotect, |= PMD_SECT_RDONLY);
PMD_BIT_FUNC(wrprotect, |= L_PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkold, &= ~PMD_SECT_AF);
PMD_BIT_FUNC(mksplitting, |= PMD_SECT_SPLITTING);
PMD_BIT_FUNC(mkwrite, &= ~PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkdirty, |= PMD_SECT_DIRTY);
PMD_BIT_FUNC(mksplitting, |= L_PMD_SECT_SPLITTING);
PMD_BIT_FUNC(mkwrite, &= ~L_PMD_SECT_RDONLY);
PMD_BIT_FUNC(mkdirty, |= L_PMD_SECT_DIRTY);
PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
#define pmd_mkhuge(pmd) (__pmd(pmd_val(pmd) & ~PMD_TABLE_BIT))
@ -241,8 +247,8 @@ PMD_BIT_FUNC(mkyoung, |= PMD_SECT_AF);
static inline pmd_t pmd_modify(pmd_t pmd, pgprot_t newprot)
{
const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | PMD_SECT_RDONLY |
PMD_SECT_VALID | PMD_SECT_NONE;
const pmdval_t mask = PMD_SECT_USER | PMD_SECT_XN | L_PMD_SECT_RDONLY |
L_PMD_SECT_VALID | L_PMD_SECT_NONE;
pmd_val(pmd) = (pmd_val(pmd) & ~mask) | (pgprot_val(newprot) & mask);
return pmd;
}
@ -253,8 +259,13 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr,
BUG_ON(addr >= TASK_SIZE);
/* create a faulting entry if PROT_NONE protected */
if (pmd_val(pmd) & PMD_SECT_NONE)
pmd_val(pmd) &= ~PMD_SECT_VALID;
if (pmd_val(pmd) & L_PMD_SECT_NONE)
pmd_val(pmd) &= ~L_PMD_SECT_VALID;
if (pmd_write(pmd) && pmd_dirty(pmd))
pmd_val(pmd) &= ~PMD_SECT_AP2;
else
pmd_val(pmd) |= PMD_SECT_AP2;
*pmdp = __pmd(pmd_val(pmd) | PMD_SECT_nG);
flush_pmd_entry(pmdp);

View File

@ -214,18 +214,22 @@ static inline pte_t *pmd_page_vaddr(pmd_t pmd)
#define pte_clear(mm,addr,ptep) set_pte_ext(ptep, __pte(0), 0)
#define pte_isset(pte, val) ((u32)(val) == (val) ? pte_val(pte) & (val) \
: !!(pte_val(pte) & (val)))
#define pte_isclear(pte, val) (!(pte_val(pte) & (val)))
#define pte_none(pte) (!pte_val(pte))
#define pte_present(pte) (pte_val(pte) & L_PTE_PRESENT)
#define pte_valid(pte) (pte_val(pte) & L_PTE_VALID)
#define pte_present(pte) (pte_isset((pte), L_PTE_PRESENT))
#define pte_valid(pte) (pte_isset((pte), L_PTE_VALID))
#define pte_accessible(mm, pte) (mm_tlb_flush_pending(mm) ? pte_present(pte) : pte_valid(pte))
#define pte_write(pte) (!(pte_val(pte) & L_PTE_RDONLY))
#define pte_dirty(pte) (pte_val(pte) & L_PTE_DIRTY)
#define pte_young(pte) (pte_val(pte) & L_PTE_YOUNG)
#define pte_exec(pte) (!(pte_val(pte) & L_PTE_XN))
#define pte_write(pte) (pte_isclear((pte), L_PTE_RDONLY))
#define pte_dirty(pte) (pte_isset((pte), L_PTE_DIRTY))
#define pte_young(pte) (pte_isset((pte), L_PTE_YOUNG))
#define pte_exec(pte) (pte_isclear((pte), L_PTE_XN))
#define pte_special(pte) (0)
#define pte_valid_user(pte) \
(pte_valid(pte) && (pte_val(pte) & L_PTE_USER) && pte_young(pte))
(pte_valid(pte) && pte_isset((pte), L_PTE_USER) && pte_young(pte))
#if __LINUX_ARM_ARCH__ < 6
static inline void __sync_icache_dcache(pte_t pteval)

View File

@ -42,6 +42,25 @@ struct arm_pmu_platdata {
#ifdef CONFIG_HW_PERF_EVENTS
/*
* The ARMv7 CPU PMU supports up to 32 event counters.
*/
#define ARMPMU_MAX_HWEVENTS 32
#define HW_OP_UNSUPPORTED 0xFFFF
#define C(_x) PERF_COUNT_HW_CACHE_##_x
#define CACHE_OP_UNSUPPORTED 0xFFFF
#define PERF_MAP_ALL_UNSUPPORTED \
[0 ... PERF_COUNT_HW_MAX - 1] = HW_OP_UNSUPPORTED
#define PERF_CACHE_MAP_ALL_UNSUPPORTED \
[0 ... C(MAX) - 1] = { \
[0 ... C(OP_MAX) - 1] = { \
[0 ... C(RESULT_MAX) - 1] = CACHE_OP_UNSUPPORTED, \
}, \
}
/* The events for a given PMU register set. */
struct pmu_hw_events {
/*

View File

@ -84,6 +84,12 @@ static inline long regs_return_value(struct pt_regs *regs)
#define instruction_pointer(regs) (regs)->ARM_pc
#ifdef CONFIG_THUMB2_KERNEL
#define frame_pointer(regs) (regs)->ARM_r7
#else
#define frame_pointer(regs) (regs)->ARM_fp
#endif
static inline void instruction_pointer_set(struct pt_regs *regs,
unsigned long val)
{

View File

@ -11,7 +11,7 @@
static inline bool scu_a9_has_base(void)
{
return read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9;
return read_cpuid_part() == ARM_CPU_PART_CORTEX_A9;
}
static inline unsigned long scu_a9_get_base(void)

View File

@ -1,13 +1,28 @@
#ifndef __ASM_STACKTRACE_H
#define __ASM_STACKTRACE_H
#include <asm/ptrace.h>
struct stackframe {
/*
* FP member should hold R7 when CONFIG_THUMB2_KERNEL is enabled
* and R11 otherwise.
*/
unsigned long fp;
unsigned long sp;
unsigned long lr;
unsigned long pc;
};
static __always_inline
void arm_get_current_stackframe(struct pt_regs *regs, struct stackframe *frame)
{
frame->fp = frame_pointer(regs);
frame->sp = regs->ARM_sp;
frame->lr = regs->ARM_lr;
frame->pc = regs->ARM_pc;
}
extern int unwind_frame(struct stackframe *frame);
extern void walk_stackframe(struct stackframe *frame,
int (*fn)(struct stackframe *, void *), void *data);

View File

@ -14,9 +14,10 @@
#include <linux/compiler.h>
#include <asm/fpstate.h>
#include <asm/page.h>
#define THREAD_SIZE_ORDER 1
#define THREAD_SIZE 8192
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
#define THREAD_START_SP (THREAD_SIZE - 8)
#ifndef __ASSEMBLY__

View File

@ -107,6 +107,8 @@ static inline void set_fs(mm_segment_t fs)
extern int __get_user_1(void *);
extern int __get_user_2(void *);
extern int __get_user_4(void *);
extern int __get_user_lo8(void *);
extern int __get_user_8(void *);
#define __GUP_CLOBBER_1 "lr", "cc"
#ifdef CONFIG_CPU_USE_DOMAINS
@ -115,6 +117,8 @@ extern int __get_user_4(void *);
#define __GUP_CLOBBER_2 "lr", "cc"
#endif
#define __GUP_CLOBBER_4 "lr", "cc"
#define __GUP_CLOBBER_lo8 "lr", "cc"
#define __GUP_CLOBBER_8 "lr", "cc"
#define __get_user_x(__r2,__p,__e,__l,__s) \
__asm__ __volatile__ ( \
@ -125,11 +129,19 @@ extern int __get_user_4(void *);
: "0" (__p), "r" (__l) \
: __GUP_CLOBBER_##__s)
/* narrowing a double-word get into a single 32bit word register: */
#ifdef __ARMEB__
#define __get_user_xb(__r2, __p, __e, __l, __s) \
__get_user_x(__r2, __p, __e, __l, lo8)
#else
#define __get_user_xb __get_user_x
#endif
#define __get_user_check(x,p) \
({ \
unsigned long __limit = current_thread_info()->addr_limit - 1; \
register const typeof(*(p)) __user *__p asm("r0") = (p);\
register unsigned long __r2 asm("r2"); \
register typeof(x) __r2 asm("r2"); \
register unsigned long __l asm("r1") = __limit; \
register int __e asm("r0"); \
switch (sizeof(*(__p))) { \
@ -142,6 +154,12 @@ extern int __get_user_4(void *);
case 4: \
__get_user_x(__r2, __p, __e, __l, 4); \
break; \
case 8: \
if (sizeof((x)) < 8) \
__get_user_xb(__r2, __p, __e, __l, 4); \
else \
__get_user_x(__r2, __p, __e, __l, 8); \
break; \
default: __e = __get_user_bad(); break; \
} \
x = (typeof(*(p))) __r2; \
@ -224,7 +242,7 @@ static inline void set_fs(mm_segment_t fs)
#define access_ok(type,addr,size) (__range_ok(addr,size) == 0)
#define user_addr_max() \
(segment_eq(get_fs(), USER_DS) ? TASK_SIZE : ~0UL)
(segment_eq(get_fs(), KERNEL_DS) ? ~0UL : get_fs())
/*
* The "__xxx" versions of the user access functions do not verify the

View File

@ -15,7 +15,17 @@
#include <uapi/asm/unistd.h>
/*
* This may need to be greater than __NR_last_syscall+1 in order to
* account for the padding in the syscall table
*/
#define __NR_syscalls (384)
/*
* *NOTE*: This is a ghost syscall private to the kernel. Only the
* __kuser_cmpxchg code in entry-armv.S should be aware of its
* existence. Don't ever use this from user code.
*/
#define __ARM_NR_cmpxchg (__ARM_NR_BASE+0x00fff0)
#define __ARCH_WANT_STAT64

View File

@ -410,11 +410,6 @@
#define __NR_sched_getattr (__NR_SYSCALL_BASE+381)
#define __NR_renameat2 (__NR_SYSCALL_BASE+382)
/*
* This may need to be greater than __NR_last_syscall+1 in order to
* account for the padding in the syscall table
*/
/*
* The following SWIs are ARM private.
*/
@ -425,12 +420,6 @@
#define __ARM_NR_usr32 (__ARM_NR_BASE+4)
#define __ARM_NR_set_tls (__ARM_NR_BASE+5)
/*
* *NOTE*: This is a ghost syscall private to the kernel. Only the
* __kuser_cmpxchg code in entry-armv.S should be aware of its
* existence. Don't ever use this from user code.
*/
/*
* The following syscalls are obsolete and no longer available for EABI.
*/

View File

@ -90,7 +90,7 @@ ENTRY(printascii)
ldrneb r1, [r0], #1
teqne r1, #0
bne 1b
mov pc, lr
ret lr
ENDPROC(printascii)
ENTRY(printch)
@ -105,7 +105,7 @@ ENTRY(debug_ll_addr)
addruart r2, r3, ip
str r2, [r0]
str r3, [r1]
mov pc, lr
ret lr
ENDPROC(debug_ll_addr)
#endif
@ -116,7 +116,7 @@ ENTRY(printascii)
mov r0, #0x04 @ SYS_WRITE0
ARM( svc #0x123456 )
THUMB( svc #0xab )
mov pc, lr
ret lr
ENDPROC(printascii)
ENTRY(printch)
@ -125,14 +125,14 @@ ENTRY(printch)
mov r0, #0x03 @ SYS_WRITEC
ARM( svc #0x123456 )
THUMB( svc #0xab )
mov pc, lr
ret lr
ENDPROC(printch)
ENTRY(debug_ll_addr)
mov r2, #0
str r2, [r0]
str r2, [r1]
mov pc, lr
ret lr
ENDPROC(debug_ll_addr)
#endif

View File

@ -224,7 +224,7 @@ svc_preempt:
1: bl preempt_schedule_irq @ irq en/disable is done inside
ldr r0, [tsk, #TI_FLAGS] @ get new tasks TI_FLAGS
tst r0, #_TIF_NEED_RESCHED
moveq pc, r8 @ go again
reteq r8 @ go again
b 1b
#endif
@ -490,7 +490,7 @@ ENDPROC(__und_usr)
.pushsection .fixup, "ax"
.align 2
4: str r4, [sp, #S_PC] @ retry current instruction
mov pc, r9
ret r9
.popsection
.pushsection __ex_table,"a"
.long 1b, 4b
@ -552,7 +552,7 @@ call_fpe:
#endif
tst r0, #0x08000000 @ only CDP/CPRT/LDC/STC have bit 27
tstne r0, #0x04000000 @ bit 26 set on both ARM and Thumb-2
moveq pc, lr
reteq lr
and r8, r0, #0x00000f00 @ mask out CP number
THUMB( lsr r8, r8, #8 )
mov r7, #1
@ -571,33 +571,33 @@ call_fpe:
THUMB( add pc, r8 )
nop
movw_pc lr @ CP#0
ret.w lr @ CP#0
W(b) do_fpe @ CP#1 (FPE)
W(b) do_fpe @ CP#2 (FPE)
movw_pc lr @ CP#3
ret.w lr @ CP#3
#ifdef CONFIG_CRUNCH
b crunch_task_enable @ CP#4 (MaverickCrunch)
b crunch_task_enable @ CP#5 (MaverickCrunch)
b crunch_task_enable @ CP#6 (MaverickCrunch)
#else
movw_pc lr @ CP#4
movw_pc lr @ CP#5
movw_pc lr @ CP#6
ret.w lr @ CP#4
ret.w lr @ CP#5
ret.w lr @ CP#6
#endif
movw_pc lr @ CP#7
movw_pc lr @ CP#8
movw_pc lr @ CP#9
ret.w lr @ CP#7
ret.w lr @ CP#8
ret.w lr @ CP#9
#ifdef CONFIG_VFP
W(b) do_vfp @ CP#10 (VFP)
W(b) do_vfp @ CP#11 (VFP)
#else
movw_pc lr @ CP#10 (VFP)
movw_pc lr @ CP#11 (VFP)
ret.w lr @ CP#10 (VFP)
ret.w lr @ CP#11 (VFP)
#endif
movw_pc lr @ CP#12
movw_pc lr @ CP#13
movw_pc lr @ CP#14 (Debug)
movw_pc lr @ CP#15 (Control)
ret.w lr @ CP#12
ret.w lr @ CP#13
ret.w lr @ CP#14 (Debug)
ret.w lr @ CP#15 (Control)
#ifdef NEED_CPU_ARCHITECTURE
.align 2
@ -649,7 +649,7 @@ ENTRY(fp_enter)
.popsection
ENTRY(no_fp)
mov pc, lr
ret lr
ENDPROC(no_fp)
__und_usr_fault_32:
@ -745,7 +745,7 @@ ENDPROC(__switch_to)
#ifdef CONFIG_ARM_THUMB
bx \reg
#else
mov pc, \reg
ret \reg
#endif
.endm
@ -837,7 +837,7 @@ kuser_cmpxchg64_fixup:
#if __LINUX_ARM_ARCH__ < 6
bcc kuser_cmpxchg32_fixup
#endif
mov pc, lr
ret lr
.previous
#else
@ -905,7 +905,7 @@ kuser_cmpxchg32_fixup:
subs r8, r4, r7
rsbcss r8, r8, #(2b - 1b)
strcs r7, [sp, #S_PC]
mov pc, lr
ret lr
.previous
#else

View File

@ -8,6 +8,7 @@
* published by the Free Software Foundation.
*/
#include <asm/assembler.h>
#include <asm/unistd.h>
#include <asm/ftrace.h>
#include <asm/unwind.h>
@ -88,7 +89,7 @@ ENTRY(ret_from_fork)
cmp r5, #0
movne r0, r4
adrne lr, BSYM(1f)
movne pc, r5
retne r5
1: get_thread_info tsk
b ret_slow_syscall
ENDPROC(ret_from_fork)
@ -290,7 +291,7 @@ ENDPROC(ftrace_graph_caller_old)
.macro mcount_exit
ldmia sp!, {r0-r3, ip, lr}
mov pc, ip
ret ip
.endm
ENTRY(__gnu_mcount_nc)
@ -298,7 +299,7 @@ UNWIND(.fnstart)
#ifdef CONFIG_DYNAMIC_FTRACE
mov ip, lr
ldmia sp!, {lr}
mov pc, ip
ret ip
#else
__mcount
#endif
@ -333,12 +334,12 @@ return_to_handler:
bl ftrace_return_to_handler
mov lr, r0 @ r0 has real ret addr
ldmia sp!, {r0-r3}
mov pc, lr
ret lr
#endif
ENTRY(ftrace_stub)
.Lftrace_stub:
mov pc, lr
ret lr
ENDPROC(ftrace_stub)
#endif /* CONFIG_FUNCTION_TRACER */
@ -561,7 +562,7 @@ sys_mmap2:
streq r5, [sp, #4]
beq sys_mmap_pgoff
mov r0, #-EINVAL
mov pc, lr
ret lr
#else
str r5, [sp, #4]
b sys_mmap_pgoff

View File

@ -240,12 +240,6 @@
movs pc, lr @ return & move spsr_svc into cpsr
.endm
@
@ 32-bit wide "mov pc, reg"
@
.macro movw_pc, reg
mov pc, \reg
.endm
#else /* CONFIG_THUMB2_KERNEL */
.macro svc_exit, rpsr, irq = 0
.if \irq != 0
@ -304,14 +298,6 @@
movs pc, lr @ return & move spsr_svc into cpsr
.endm
#endif /* ifdef CONFIG_CPU_V7M / else */
@
@ 32-bit wide "mov pc, reg"
@
.macro movw_pc, reg
mov pc, \reg
nop
.endm
#endif /* !CONFIG_THUMB2_KERNEL */
/*

View File

@ -32,7 +32,7 @@ ENTRY(__set_fiq_regs)
ldr lr, [r0]
msr cpsr_c, r1 @ return to SVC mode
mov r0, r0 @ avoid hazard prior to ARMv4
mov pc, lr
ret lr
ENDPROC(__set_fiq_regs)
ENTRY(__get_fiq_regs)
@ -45,5 +45,5 @@ ENTRY(__get_fiq_regs)
str lr, [r0]
msr cpsr_c, r1 @ return to SVC mode
mov r0, r0 @ avoid hazard prior to ARMv4
mov pc, lr
ret lr
ENDPROC(__get_fiq_regs)

View File

@ -10,6 +10,7 @@
* published by the Free Software Foundation.
*
*/
#include <asm/assembler.h>
#define ATAG_CORE 0x54410001
#define ATAG_CORE_SIZE ((2*4 + 3*4) >> 2)
@ -61,10 +62,10 @@ __vet_atags:
cmp r5, r6
bne 1f
2: mov pc, lr @ atag/dtb pointer is ok
2: ret lr @ atag/dtb pointer is ok
1: mov r2, #0
mov pc, lr
ret lr
ENDPROC(__vet_atags)
/*
@ -162,7 +163,7 @@ __lookup_processor_type:
cmp r5, r6
blo 1b
mov r5, #0 @ unknown processor
2: mov pc, lr
2: ret lr
ENDPROC(__lookup_processor_type)
/*

View File

@ -82,7 +82,7 @@ ENTRY(stext)
adr lr, BSYM(1f) @ return (PIC) address
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
THUMB( ret r12 )
1: b __after_proc_init
ENDPROC(stext)
@ -119,7 +119,7 @@ ENTRY(secondary_startup)
mov r13, r12 @ __secondary_switched address
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
THUMB( ret r12 )
ENDPROC(secondary_startup)
ENTRY(__secondary_switched)
@ -164,7 +164,7 @@ __after_proc_init:
#endif
mcr p15, 0, r0, c1, c0, 0 @ write control reg
#endif /* CONFIG_CPU_CP15 */
mov pc, r13
ret r13
ENDPROC(__after_proc_init)
.ltorg
@ -254,7 +254,7 @@ ENTRY(__setup_mpu)
orr r0, r0, #CR_M @ Set SCTRL.M (MPU on)
mcr p15, 0, r0, c1, c0, 0 @ Enable MPU
isb
mov pc,lr
ret lr
ENDPROC(__setup_mpu)
#endif
#include "head-common.S"

View File

@ -140,7 +140,7 @@ ENTRY(stext)
mov r8, r4 @ set TTBR1 to swapper_pg_dir
ARM( add pc, r10, #PROCINFO_INITFUNC )
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
THUMB( ret r12 )
1: b __enable_mmu
ENDPROC(stext)
.ltorg
@ -335,7 +335,7 @@ __create_page_tables:
sub r4, r4, #0x1000 @ point to the PGD table
mov r4, r4, lsr #ARCH_PGD_SHIFT
#endif
mov pc, lr
ret lr
ENDPROC(__create_page_tables)
.ltorg
.align
@ -383,7 +383,7 @@ ENTRY(secondary_startup)
ARM( add pc, r10, #PROCINFO_INITFUNC ) @ initialise processor
@ (return control reg)
THUMB( add r12, r10, #PROCINFO_INITFUNC )
THUMB( mov pc, r12 )
THUMB( ret r12 )
ENDPROC(secondary_startup)
/*
@ -468,7 +468,7 @@ ENTRY(__turn_mmu_on)
instr_sync
mov r3, r3
mov r3, r13
mov pc, r3
ret r3
__turn_mmu_on_end:
ENDPROC(__turn_mmu_on)
.popsection
@ -487,7 +487,7 @@ __fixup_smp:
orr r4, r4, #0x0000b000
orr r4, r4, #0x00000020 @ val 0x4100b020
teq r3, r4 @ ARM 11MPCore?
moveq pc, lr @ yes, assume SMP
reteq lr @ yes, assume SMP
mrc p15, 0, r0, c0, c0, 5 @ read MPIDR
and r0, r0, #0xc0000000 @ multiprocessing extensions and
@ -500,7 +500,7 @@ __fixup_smp:
orr r4, r4, #0x0000c000
orr r4, r4, #0x00000090
teq r3, r4 @ Check for ARM Cortex-A9
movne pc, lr @ Not ARM Cortex-A9,
retne lr @ Not ARM Cortex-A9,
@ If a future SoC *does* use 0x0 as the PERIPH_BASE, then the
@ below address check will need to be #ifdef'd or equivalent
@ -512,7 +512,7 @@ __fixup_smp:
ARM_BE8(rev r0, r0) @ byteswap if big endian
and r0, r0, #0x3 @ number of CPUs
teq r0, #0x0 @ is 1?
movne pc, lr
retne lr
__fixup_smp_on_up:
adr r0, 1f
@ -539,7 +539,7 @@ smp_on_up:
.text
__do_fixup_smp_on_up:
cmp r4, r5
movhs pc, lr
reths lr
ldmia r4!, {r0, r6}
ARM( str r6, [r0, r3] )
THUMB( add r0, r0, r3 )
@ -672,7 +672,7 @@ ARM_BE8(rev16 ip, ip)
2: cmp r4, r5
ldrcc r7, [r4], #4 @ use branch for delay slot
bcc 1b
mov pc, lr
ret lr
#endif
ENDPROC(__fixup_a_pv_table)

View File

@ -99,7 +99,7 @@ ENTRY(__hyp_stub_install_secondary)
* immediately.
*/
compare_cpu_mode_with_primary r4, r5, r6, r7
movne pc, lr
retne lr
/*
* Once we have given up on one CPU, we do not try to install the
@ -111,7 +111,7 @@ ENTRY(__hyp_stub_install_secondary)
*/
cmp r4, #HYP_MODE
movne pc, lr @ give up if the CPU is not in HYP mode
retne lr @ give up if the CPU is not in HYP mode
/*
* Configure HSCTLR to set correct exception endianness/instruction set
@ -201,7 +201,7 @@ ENDPROC(__hyp_get_vectors)
@ fall through
ENTRY(__hyp_set_vectors)
__HVC(0)
mov pc, lr
ret lr
ENDPROC(__hyp_set_vectors)
#ifndef ZIMAGE

View File

@ -100,7 +100,7 @@ ENTRY(iwmmxt_task_enable)
get_thread_info r10
#endif
4: dec_preempt_count r10, r3
mov pc, r9 @ normal exit from exception
ret r9 @ normal exit from exception
concan_save:
@ -144,7 +144,7 @@ concan_dump:
wstrd wR15, [r1, #MMX_WR15]
2: teq r0, #0 @ anything to load?
moveq pc, lr @ if not, return
reteq lr @ if not, return
concan_load:
@ -177,10 +177,10 @@ concan_load:
@ clear CUP/MUP (only if r1 != 0)
teq r1, #0
mov r2, #0
moveq pc, lr
reteq lr
tmcr wCon, r2
mov pc, lr
ret lr
/*
* Back up Concan regs to save area and disable access to them
@ -266,7 +266,7 @@ ENTRY(iwmmxt_task_copy)
mov r3, lr @ preserve return address
bl concan_dump
msr cpsr_c, ip @ restore interrupt mode
mov pc, r3
ret r3
/*
* Restore Concan state from given memory address
@ -302,7 +302,7 @@ ENTRY(iwmmxt_task_restore)
mov r3, lr @ preserve return address
bl concan_load
msr cpsr_c, ip @ restore interrupt mode
mov pc, r3
ret r3
/*
* Concan handling on task switch
@ -324,7 +324,7 @@ ENTRY(iwmmxt_task_switch)
add r3, r0, #TI_IWMMXT_STATE @ get next task Concan save area
ldr r2, [r2] @ get current Concan owner
teq r2, r3 @ next task owns it?
movne pc, lr @ no: leave Concan disabled
retne lr @ no: leave Concan disabled
1: @ flip Concan access
XSC(eor r1, r1, #0x3)
@ -351,7 +351,7 @@ ENTRY(iwmmxt_task_release)
eors r0, r0, r1 @ if equal...
streq r0, [r3] @ then clear ownership
msr cpsr_c, r2 @ restore interrupts
mov pc, lr
ret lr
.data
concan_owner:

View File

@ -560,11 +560,16 @@ user_backtrace(struct frame_tail __user *tail,
struct perf_callchain_entry *entry)
{
struct frame_tail buftail;
unsigned long err;
/* Also check accessibility of one struct frame_tail beyond */
if (!access_ok(VERIFY_READ, tail, sizeof(buftail)))
return NULL;
if (__copy_from_user_inatomic(&buftail, tail, sizeof(buftail)))
pagefault_disable();
err = __copy_from_user_inatomic(&buftail, tail, sizeof(buftail));
pagefault_enable();
if (err)
return NULL;
perf_callchain_store(entry, buftail.lr);
@ -590,6 +595,10 @@ perf_callchain_user(struct perf_callchain_entry *entry, struct pt_regs *regs)
}
perf_callchain_store(entry, regs->ARM_pc);
if (!current->mm)
return;
tail = (struct frame_tail __user *)regs->ARM_fp - 1;
while ((entry->nr < PERF_MAX_STACK_DEPTH) &&
@ -621,10 +630,7 @@ perf_callchain_kernel(struct perf_callchain_entry *entry, struct pt_regs *regs)
return;
}
fr.fp = regs->ARM_fp;
fr.sp = regs->ARM_sp;
fr.lr = regs->ARM_lr;
fr.pc = regs->ARM_pc;
arm_get_current_stackframe(regs, &fr);
walk_stackframe(&fr, callchain_trace, entry);
}

View File

@ -233,14 +233,17 @@ static struct of_device_id cpu_pmu_of_device_ids[] = {
{.compatible = "arm,cortex-a7-pmu", .data = armv7_a7_pmu_init},
{.compatible = "arm,cortex-a5-pmu", .data = armv7_a5_pmu_init},
{.compatible = "arm,arm11mpcore-pmu", .data = armv6mpcore_pmu_init},
{.compatible = "arm,arm1176-pmu", .data = armv6pmu_init},
{.compatible = "arm,arm1136-pmu", .data = armv6pmu_init},
{.compatible = "arm,arm1176-pmu", .data = armv6_1176_pmu_init},
{.compatible = "arm,arm1136-pmu", .data = armv6_1136_pmu_init},
{.compatible = "qcom,krait-pmu", .data = krait_pmu_init},
{},
};
static struct platform_device_id cpu_pmu_plat_device_ids[] = {
{.name = "arm-pmu"},
{.name = "armv6-pmu"},
{.name = "armv7-pmu"},
{.name = "xscale-pmu"},
{},
};
@ -250,40 +253,43 @@ static struct platform_device_id cpu_pmu_plat_device_ids[] = {
static int probe_current_pmu(struct arm_pmu *pmu)
{
int cpu = get_cpu();
unsigned long implementor = read_cpuid_implementor();
unsigned long part_number = read_cpuid_part_number();
int ret = -ENODEV;
pr_info("probing PMU on CPU %d\n", cpu);
switch (read_cpuid_part()) {
/* ARM Ltd CPUs. */
if (implementor == ARM_CPU_IMP_ARM) {
switch (part_number) {
case ARM_CPU_PART_ARM1136:
case ARM_CPU_PART_ARM1156:
case ARM_CPU_PART_ARM1176:
ret = armv6pmu_init(pmu);
break;
case ARM_CPU_PART_ARM11MPCORE:
ret = armv6mpcore_pmu_init(pmu);
break;
case ARM_CPU_PART_CORTEX_A8:
ret = armv7_a8_pmu_init(pmu);
break;
case ARM_CPU_PART_CORTEX_A9:
ret = armv7_a9_pmu_init(pmu);
break;
}
/* Intel CPUs [xscale]. */
} else if (implementor == ARM_CPU_IMP_INTEL) {
switch (xscale_cpu_arch_version()) {
case ARM_CPU_XSCALE_ARCH_V1:
ret = xscale1pmu_init(pmu);
break;
case ARM_CPU_XSCALE_ARCH_V2:
ret = xscale2pmu_init(pmu);
break;
case ARM_CPU_PART_ARM1136:
ret = armv6_1136_pmu_init(pmu);
break;
case ARM_CPU_PART_ARM1156:
ret = armv6_1156_pmu_init(pmu);
break;
case ARM_CPU_PART_ARM1176:
ret = armv6_1176_pmu_init(pmu);
break;
case ARM_CPU_PART_ARM11MPCORE:
ret = armv6mpcore_pmu_init(pmu);
break;
case ARM_CPU_PART_CORTEX_A8:
ret = armv7_a8_pmu_init(pmu);
break;
case ARM_CPU_PART_CORTEX_A9:
ret = armv7_a9_pmu_init(pmu);
break;
default:
if (read_cpuid_implementor() == ARM_CPU_IMP_INTEL) {
switch (xscale_cpu_arch_version()) {
case ARM_CPU_XSCALE_ARCH_V1:
ret = xscale1pmu_init(pmu);
break;
case ARM_CPU_XSCALE_ARCH_V2:
ret = xscale2pmu_init(pmu);
break;
}
}
break;
}
put_cpu();

View File

@ -65,13 +65,11 @@ enum armv6_counters {
* accesses/misses in hardware.
*/
static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6_PERFCTR_INSTR_EXEC,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6_PERFCTR_BR_MISPREDICT,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6_PERFCTR_IBUF_STALL,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6_PERFCTR_LSU_FULL_STALL,
};
@ -79,116 +77,31 @@ static const unsigned armv6_perf_map[PERF_COUNT_HW_MAX] = {
static const unsigned armv6_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
/*
* The performance counters don't differentiate between read
* and write accesses/misses so this isn't strictly correct,
* but it's the best we can do. Writes and reads get
* combined.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
PERF_CACHE_MAP_ALL_UNSUPPORTED,
/*
* The performance counters don't differentiate between read and write
* accesses/misses so this isn't strictly correct, but it's the best we
* can do. Writes and reads get combined.
*/
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6_PERFCTR_DCACHE_ACCESS,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DCACHE_MISS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ICACHE_MISS,
/*
* The ARM performance counters can count micro DTLB misses, micro ITLB
* misses and main TLB misses. There isn't an event for TLB misses, so
* use the micro misses here and if users want the main TLB misses they
* can use a raw counter.
*/
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_DTLB_MISS,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6_PERFCTR_ITLB_MISS,
};
enum armv6mpcore_perf_types {
@ -220,13 +133,11 @@ enum armv6mpcore_perf_types {
* accesses/misses in hardware.
*/
static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = ARMV6MPCORE_PERFCTR_CPU_CYCLES,
[PERF_COUNT_HW_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_INSTR_EXEC,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = ARMV6MPCORE_PERFCTR_BR_EXEC,
[PERF_COUNT_HW_BRANCH_MISSES] = ARMV6MPCORE_PERFCTR_BR_MISPREDICT,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = ARMV6MPCORE_PERFCTR_IBUF_STALL,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = ARMV6MPCORE_PERFCTR_LSU_FULL_STALL,
};
@ -234,114 +145,26 @@ static const unsigned armv6mpcore_perf_map[PERF_COUNT_HW_MAX] = {
static const unsigned armv6mpcore_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] =
ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
[C(RESULT_MISS)] =
ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] =
ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
[C(RESULT_MISS)] =
ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
/*
* The ARM performance counters can count micro DTLB misses,
* micro ITLB misses and main TLB misses. There isn't an event
* for TLB misses, so use the micro misses here and if users
* want the main TLB misses they can use a raw counter.
*/
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
PERF_CACHE_MAP_ALL_UNSUPPORTED,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDACCESS,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_RDMISS,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRACCESS,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DCACHE_WRMISS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ICACHE_MISS,
/*
* The ARM performance counters can count micro DTLB misses, micro ITLB
* misses and main TLB misses. There isn't an event for TLB misses, so
* use the micro misses here and if users want the main TLB misses they
* can use a raw counter.
*/
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_DTLB_MISS,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = ARMV6MPCORE_PERFCTR_ITLB_MISS,
};
static inline unsigned long
@ -653,9 +476,8 @@ static int armv6_map_event(struct perf_event *event)
&armv6_perf_cache_map, 0xFF);
}
static int armv6pmu_init(struct arm_pmu *cpu_pmu)
static void armv6pmu_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->name = "v6";
cpu_pmu->handle_irq = armv6pmu_handle_irq;
cpu_pmu->enable = armv6pmu_enable_event;
cpu_pmu->disable = armv6pmu_disable_event;
@ -667,7 +489,26 @@ static int armv6pmu_init(struct arm_pmu *cpu_pmu)
cpu_pmu->map_event = armv6_map_event;
cpu_pmu->num_events = 3;
cpu_pmu->max_period = (1LLU << 32) - 1;
}
static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
{
armv6pmu_init(cpu_pmu);
cpu_pmu->name = "armv6_1136";
return 0;
}
static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
{
armv6pmu_init(cpu_pmu);
cpu_pmu->name = "armv6_1156";
return 0;
}
static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
{
armv6pmu_init(cpu_pmu);
cpu_pmu->name = "armv6_1176";
return 0;
}
@ -687,7 +528,7 @@ static int armv6mpcore_map_event(struct perf_event *event)
static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->name = "v6mpcore";
cpu_pmu->name = "armv6_11mpcore";
cpu_pmu->handle_irq = armv6pmu_handle_irq;
cpu_pmu->enable = armv6pmu_enable_event;
cpu_pmu->disable = armv6mpcore_pmu_disable_event;
@ -703,7 +544,17 @@ static int armv6mpcore_pmu_init(struct arm_pmu *cpu_pmu)
return 0;
}
#else
static int armv6pmu_init(struct arm_pmu *cpu_pmu)
static int armv6_1136_pmu_init(struct arm_pmu *cpu_pmu)
{
return -ENODEV;
}
static int armv6_1156_pmu_init(struct arm_pmu *cpu_pmu)
{
return -ENODEV;
}
static int armv6_1176_pmu_init(struct arm_pmu *cpu_pmu)
{
return -ENODEV;
}

File diff suppressed because it is too large Load Diff

View File

@ -48,118 +48,31 @@ enum xscale_counters {
};
static const unsigned xscale_perf_map[PERF_COUNT_HW_MAX] = {
PERF_MAP_ALL_UNSUPPORTED,
[PERF_COUNT_HW_CPU_CYCLES] = XSCALE_PERFCTR_CCNT,
[PERF_COUNT_HW_INSTRUCTIONS] = XSCALE_PERFCTR_INSTRUCTION,
[PERF_COUNT_HW_CACHE_REFERENCES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_CACHE_MISSES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = XSCALE_PERFCTR_BRANCH,
[PERF_COUNT_HW_BRANCH_MISSES] = XSCALE_PERFCTR_BRANCH_MISS,
[PERF_COUNT_HW_BUS_CYCLES] = HW_OP_UNSUPPORTED,
[PERF_COUNT_HW_STALLED_CYCLES_FRONTEND] = XSCALE_PERFCTR_ICACHE_NO_DELIVER,
[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = HW_OP_UNSUPPORTED,
};
static const unsigned xscale_perf_cache_map[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
[PERF_COUNT_HW_CACHE_RESULT_MAX] = {
[C(L1D)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(L1I)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(LL)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(DTLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(ITLB)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(BPU)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
[C(NODE)] = {
[C(OP_READ)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_WRITE)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
[C(OP_PREFETCH)] = {
[C(RESULT_ACCESS)] = CACHE_OP_UNSUPPORTED,
[C(RESULT_MISS)] = CACHE_OP_UNSUPPORTED,
},
},
PERF_CACHE_MAP_ALL_UNSUPPORTED,
[C(L1D)][C(OP_READ)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(L1D)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
[C(L1D)][C(OP_WRITE)][C(RESULT_ACCESS)] = XSCALE_PERFCTR_DCACHE_ACCESS,
[C(L1D)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DCACHE_MISS,
[C(L1I)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ICACHE_MISS,
[C(DTLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
[C(DTLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_DTLB_MISS,
[C(ITLB)][C(OP_READ)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
[C(ITLB)][C(OP_WRITE)][C(RESULT_MISS)] = XSCALE_PERFCTR_ITLB_MISS,
};
#define XSCALE_PMU_ENABLE 0x001
@ -442,7 +355,7 @@ static int xscale_map_event(struct perf_event *event)
static int xscale1pmu_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->name = "xscale1";
cpu_pmu->name = "armv5_xscale1";
cpu_pmu->handle_irq = xscale1pmu_handle_irq;
cpu_pmu->enable = xscale1pmu_enable_event;
cpu_pmu->disable = xscale1pmu_disable_event;
@ -812,7 +725,7 @@ static inline void xscale2pmu_write_counter(struct perf_event *event, u32 val)
static int xscale2pmu_init(struct arm_pmu *cpu_pmu)
{
cpu_pmu->name = "xscale2";
cpu_pmu->name = "armv5_xscale2";
cpu_pmu->handle_irq = xscale2pmu_handle_irq;
cpu_pmu->enable = xscale2pmu_enable_event;
cpu_pmu->disable = xscale2pmu_disable_event;

View File

@ -3,6 +3,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/kexec.h>
.align 3 /* not needed for this code, but keeps fncpy() happy */
@ -59,7 +60,7 @@ ENTRY(relocate_new_kernel)
mov r0,#0
ldr r1,kexec_mach_type
ldr r2,kexec_boot_atags
ARM( mov pc, lr )
ARM( ret lr )
THUMB( bx lr )
.align

View File

@ -393,19 +393,34 @@ static void __init cpuid_init_hwcaps(void)
elf_hwcap |= HWCAP_LPAE;
}
static void __init feat_v6_fixup(void)
static void __init elf_hwcap_fixup(void)
{
int id = read_cpuid_id();
if ((id & 0xff0f0000) != 0x41070000)
return;
unsigned id = read_cpuid_id();
unsigned sync_prim;
/*
* HWCAP_TLS is available only on 1136 r1p0 and later,
* see also kuser_get_tls_init.
*/
if ((((id >> 4) & 0xfff) == 0xb36) && (((id >> 20) & 3) == 0))
if (read_cpuid_part() == ARM_CPU_PART_ARM1136 &&
((id >> 20) & 3) == 0) {
elf_hwcap &= ~HWCAP_TLS;
return;
}
/* Verify if CPUID scheme is implemented */
if ((id & 0x000f0000) != 0x000f0000)
return;
/*
* If the CPU supports LDREX/STREX and LDREXB/STREXB,
* avoid advertising SWP; it may not be atomic with
* multiprocessing cores.
*/
sync_prim = ((read_cpuid_ext(CPUID_EXT_ISAR3) >> 8) & 0xf0) |
((read_cpuid_ext(CPUID_EXT_ISAR4) >> 20) & 0x0f);
if (sync_prim >= 0x13)
elf_hwcap &= ~HWCAP_SWP;
}
/*
@ -609,7 +624,7 @@ static void __init setup_processor(void)
#endif
erratum_a15_798181_init();
feat_v6_fixup();
elf_hwcap_fixup();
cacheid_init();
cpu_init();

View File

@ -107,7 +107,7 @@ ENTRY(cpu_resume_mmu)
instr_sync
mov r0, r0
mov r0, r0
mov pc, r3 @ jump to virtual address
ret r3 @ jump to virtual address
ENDPROC(cpu_resume_mmu)
.popsection
cpu_resume_after_mmu:

View File

@ -17,6 +17,8 @@
#include <asm/cputype.h>
#define SCU_CTRL 0x00
#define SCU_ENABLE (1 << 0)
#define SCU_STANDBY_ENABLE (1 << 5)
#define SCU_CONFIG 0x04
#define SCU_CPU_STATUS 0x08
#define SCU_INVALIDATE 0x0c
@ -50,10 +52,16 @@ void scu_enable(void __iomem *scu_base)
scu_ctrl = readl_relaxed(scu_base + SCU_CTRL);
/* already enabled? */
if (scu_ctrl & 1)
if (scu_ctrl & SCU_ENABLE)
return;
scu_ctrl |= 1;
scu_ctrl |= SCU_ENABLE;
/* Cortex-A9 earlier than r2p0 has no standby bit in SCU */
if ((read_cpuid_id() & 0xff0ffff0) == 0x410fc090 &&
(read_cpuid_id() & 0x00f0000f) >= 0x00200000)
scu_ctrl |= SCU_STANDBY_ENABLE;
writel_relaxed(scu_ctrl, scu_base + SCU_CTRL);
/*

View File

@ -92,15 +92,19 @@ void erratum_a15_798181_init(void)
unsigned int midr = read_cpuid_id();
unsigned int revidr = read_cpuid(CPUID_REVIDR);
/* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2 ||
(revidr & 0x210) == 0x210) {
return;
}
if (revidr & 0x10)
erratum_a15_798181_handler = erratum_a15_798181_partial;
else
/* Brahma-B15 r0p0..r0p2 affected
* Cortex-A15 r0p0..r3p2 w/o ECO fix affected */
if ((midr & 0xff0ffff0) == 0x420f00f0 && midr <= 0x420f00f2)
erratum_a15_798181_handler = erratum_a15_798181_broadcast;
else if ((midr & 0xff0ffff0) == 0x410fc0f0 && midr <= 0x413fc0f2 &&
(revidr & 0x210) != 0x210) {
if (revidr & 0x10)
erratum_a15_798181_handler =
erratum_a15_798181_partial;
else
erratum_a15_798181_handler =
erratum_a15_798181_broadcast;
}
}
#endif

View File

@ -27,6 +27,7 @@
#include <linux/perf_event.h>
#include <asm/opcodes.h>
#include <asm/system_info.h>
#include <asm/traps.h>
#include <asm/uaccess.h>
@ -266,6 +267,9 @@ static struct undef_hook swp_hook = {
*/
static int __init swp_emulation_init(void)
{
if (cpu_architecture() < CPU_ARCH_ARMv7)
return 0;
#ifdef CONFIG_PROC_FS
if (!proc_create("cpu/swp_emulation", S_IRUGO, NULL, &proc_status_fops))
return -ENOMEM;

View File

@ -50,10 +50,7 @@ unsigned long profile_pc(struct pt_regs *regs)
if (!in_lock_functions(regs->ARM_pc))
return regs->ARM_pc;
frame.fp = regs->ARM_fp;
frame.sp = regs->ARM_sp;
frame.lr = regs->ARM_lr;
frame.pc = regs->ARM_pc;
arm_get_current_stackframe(regs, &frame);
do {
int ret = unwind_frame(&frame);
if (ret < 0)

View File

@ -31,11 +31,13 @@
#include <asm/exception.h>
#include <asm/unistd.h>
#include <asm/traps.h>
#include <asm/ptrace.h>
#include <asm/unwind.h>
#include <asm/tls.h>
#include <asm/system_misc.h>
#include <asm/opcodes.h>
static const char *handler[]= {
"prefetch abort",
"data abort",
@ -184,7 +186,7 @@ static void dump_backtrace(struct pt_regs *regs, struct task_struct *tsk)
tsk = current;
if (regs) {
fp = regs->ARM_fp;
fp = frame_pointer(regs);
mode = processor_mode(regs);
} else if (tsk != current) {
fp = thread_saved_fp(tsk);
@ -719,7 +721,7 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs)
dump_instr("", regs);
if (user_mode(regs)) {
__show_regs(regs);
c_backtrace(regs->ARM_fp, processor_mode(regs));
c_backtrace(frame_pointer(regs), processor_mode(regs));
}
}
#endif

View File

@ -479,12 +479,10 @@ void unwind_backtrace(struct pt_regs *regs, struct task_struct *tsk)
tsk = current;
if (regs) {
frame.fp = regs->ARM_fp;
frame.sp = regs->ARM_sp;
frame.lr = regs->ARM_lr;
arm_get_current_stackframe(regs, &frame);
/* PC might be corrupted, use LR in that case. */
frame.pc = kernel_text_address(regs->ARM_pc)
? regs->ARM_pc : regs->ARM_lr;
if (!kernel_text_address(regs->ARM_pc))
frame.pc = regs->ARM_lr;
} else if (tsk == current) {
frame.fp = (unsigned long)__builtin_frame_address(0);
frame.sp = current_sp;

View File

@ -318,7 +318,6 @@ SECTIONS
_end = .;
STABS_DEBUG
.comment 0 : { *(.comment) }
}
/*

View File

@ -274,13 +274,7 @@ int kvm_arch_vcpu_ioctl_set_sregs(struct kvm_vcpu *vcpu,
int __attribute_const__ kvm_target_cpu(void)
{
unsigned long implementor = read_cpuid_implementor();
unsigned long part_number = read_cpuid_part_number();
if (implementor != ARM_CPU_IMP_ARM)
return -EINVAL;
switch (part_number) {
switch (read_cpuid_part()) {
case ARM_CPU_PART_CORTEX_A7:
return KVM_ARM_TARGET_CORTEX_A7;
case ARM_CPU_PART_CORTEX_A15:

View File

@ -17,6 +17,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unified.h>
#include <asm/asm-offsets.h>
#include <asm/kvm_asm.h>
@ -134,7 +135,7 @@ phase2:
ldr r0, =TRAMPOLINE_VA
adr r1, target
bfi r0, r1, #0, #PAGE_SHIFT
mov pc, r0
ret r0
target: @ We're now in the trampoline code, switch page tables
mcrr p15, 4, r2, r3, c2

View File

@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@ -47,7 +48,7 @@ ENTRY(__aeabi_llsl)
THUMB( lsrmi r3, al, ip )
THUMB( orrmi ah, ah, r3 )
mov al, al, lsl r2
mov pc, lr
ret lr
ENDPROC(__ashldi3)
ENDPROC(__aeabi_llsl)

View File

@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@ -47,7 +48,7 @@ ENTRY(__aeabi_lasr)
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, asr r2
mov pc, lr
ret lr
ENDPROC(__ashrdi3)
ENDPROC(__aeabi_lasr)

View File

@ -25,7 +25,7 @@
ENTRY(c_backtrace)
#if !defined(CONFIG_FRAME_POINTER) || !defined(CONFIG_PRINTK)
mov pc, lr
ret lr
ENDPROC(c_backtrace)
#else
stmfd sp!, {r4 - r8, lr} @ Save an extra register so we have a location...

View File

@ -1,3 +1,4 @@
#include <asm/assembler.h>
#include <asm/unwind.h>
#if __LINUX_ARM_ARCH__ >= 6
@ -70,7 +71,7 @@ UNWIND( .fnstart )
\instr r2, r2, r3
str r2, [r1, r0, lsl #2]
restore_irqs ip
mov pc, lr
ret lr
UNWIND( .fnend )
ENDPROC(\name )
.endm
@ -98,7 +99,7 @@ UNWIND( .fnstart )
\store r2, [r1]
moveq r0, #0
restore_irqs ip
mov pc, lr
ret lr
UNWIND( .fnend )
ENDPROC(\name )
.endm

View File

@ -1,4 +1,5 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
#if __LINUX_ARM_ARCH__ >= 6
ENTRY(__bswapsi2)
@ -18,7 +19,7 @@ ENTRY(__bswapsi2)
mov r3, r3, lsr #8
bic r3, r3, #0xff00
eor r0, r3, r0, ror #8
mov pc, lr
ret lr
ENDPROC(__bswapsi2)
ENTRY(__bswapdi2)
@ -31,6 +32,6 @@ ENTRY(__bswapdi2)
bic r1, r1, #0xff00
eor r1, r1, r0, ror #8
eor r0, r3, ip, ror #8
mov pc, lr
ret lr
ENDPROC(__bswapdi2)
#endif

View File

@ -36,9 +36,9 @@ ENTRY(call_with_stack)
mov r0, r1
adr lr, BSYM(1f)
mov pc, r2
ret r2
1: ldr lr, [sp]
ldr sp, [sp, #4]
mov pc, lr
ret lr
ENDPROC(call_with_stack)

View File

@ -97,7 +97,7 @@ td3 .req lr
#endif
#endif
adcnes sum, sum, td0 @ update checksum
mov pc, lr
ret lr
ENTRY(csum_partial)
stmfd sp!, {buf, lr}

View File

@ -7,6 +7,7 @@
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation.
*/
#include <asm/assembler.h>
/*
* unsigned int
@ -40,7 +41,7 @@ sum .req r3
adcs sum, sum, ip, put_byte_1 @ update checksum
strb ip, [dst], #1
tst dst, #2
moveq pc, lr @ dst is now 32bit aligned
reteq lr @ dst is now 32bit aligned
.Ldst_16bit: load2b r8, ip
sub len, len, #2
@ -48,7 +49,7 @@ sum .req r3
strb r8, [dst], #1
adcs sum, sum, ip, put_byte_1
strb ip, [dst], #1
mov pc, lr @ dst is now 32bit aligned
ret lr @ dst is now 32bit aligned
/*
* Handle 0 to 7 bytes, with any alignment of source and

View File

@ -35,7 +35,7 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
mul r0, r2, r0 @ max = 2^32-1
add r0, r0, r1, lsr #32-6
movs r0, r0, lsr #6
moveq pc, lr
reteq lr
/*
* loops = r0 * HZ * loops_per_jiffy / 1000000
@ -46,23 +46,23 @@ ENTRY(__loop_const_udelay) @ 0 <= r0 <= 0x7fffff06
ENTRY(__loop_delay)
subs r0, r0, #1
#if 0
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
movls pc, lr
retls lr
subs r0, r0, #1
#endif
bhi __loop_delay
mov pc, lr
ret lr
ENDPROC(__loop_udelay)
ENDPROC(__loop_const_udelay)
ENDPROC(__loop_delay)

View File

@ -13,6 +13,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/unwind.h>
#ifdef __ARMEB__
@ -97,7 +98,7 @@ UNWIND(.fnstart)
mov yl, #0
cmpeq xl, r4
movlo xh, xl
movlo pc, lr
retlo lr
@ The division loop for lower bit positions.
@ Here we shift remainer bits leftwards rather than moving the
@ -111,14 +112,14 @@ UNWIND(.fnstart)
subcs xh, xh, r4
movs ip, ip, lsr #1
bne 4b
mov pc, lr
ret lr
@ The top part of remainder became zero. If carry is set
@ (the 33th bit) this is a false positive so resume the loop.
@ Otherwise, if lower part is also null then we are done.
6: bcs 5b
cmp xl, #0
moveq pc, lr
reteq lr
@ We still have remainer bits in the low part. Bring them up.
@ -144,7 +145,7 @@ UNWIND(.fnstart)
movs ip, ip, lsr #1
mov xh, #1
bne 4b
mov pc, lr
ret lr
8: @ Division by a power of 2: determine what that divisor order is
@ then simply shift values around
@ -184,13 +185,13 @@ UNWIND(.fnstart)
THUMB( orr yl, yl, xh )
mov xh, xl, lsl ip
mov xh, xh, lsr ip
mov pc, lr
ret lr
@ eq -> division by 1: obvious enough...
9: moveq yl, xl
moveq yh, xh
moveq xh, #0
moveq pc, lr
reteq lr
UNWIND(.fnend)
UNWIND(.fnstart)

View File

@ -35,7 +35,7 @@ ENTRY(_find_first_zero_bit_le)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
mov pc, lr
ret lr
ENDPROC(_find_first_zero_bit_le)
/*
@ -76,7 +76,7 @@ ENTRY(_find_first_bit_le)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
mov pc, lr
ret lr
ENDPROC(_find_first_bit_le)
/*
@ -114,7 +114,7 @@ ENTRY(_find_first_zero_bit_be)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
mov pc, lr
ret lr
ENDPROC(_find_first_zero_bit_be)
ENTRY(_find_next_zero_bit_be)
@ -148,7 +148,7 @@ ENTRY(_find_first_bit_be)
2: cmp r2, r1 @ any more?
blo 1b
3: mov r0, r1 @ no free bits
mov pc, lr
ret lr
ENDPROC(_find_first_bit_be)
ENTRY(_find_next_bit_be)
@ -192,5 +192,5 @@ ENDPROC(_find_next_bit_be)
#endif
cmp r1, r0 @ Clamp to maxbit
movlo r0, r1
mov pc, lr
ret lr

View File

@ -18,7 +18,7 @@
* Inputs: r0 contains the address
* r1 contains the address limit, which must be preserved
* Outputs: r0 is the error code
* r2 contains the zero-extended value
* r2, r3 contains the zero-extended value
* lr corrupted
*
* No other registers must be altered. (see <asm/uaccess.h>
@ -36,7 +36,7 @@ ENTRY(__get_user_1)
check_uaccess r0, 1, r1, r2, __get_user_bad
1: TUSER(ldrb) r2, [r0]
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__get_user_1)
ENTRY(__get_user_2)
@ -56,25 +56,60 @@ rb .req r0
orr r2, rb, r2, lsl #8
#endif
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__get_user_2)
ENTRY(__get_user_4)
check_uaccess r0, 4, r1, r2, __get_user_bad
4: TUSER(ldr) r2, [r0]
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__get_user_4)
ENTRY(__get_user_8)
check_uaccess r0, 8, r1, r2, __get_user_bad
#ifdef CONFIG_THUMB2_KERNEL
5: TUSER(ldr) r2, [r0]
6: TUSER(ldr) r3, [r0, #4]
#else
5: TUSER(ldr) r2, [r0], #4
6: TUSER(ldr) r3, [r0]
#endif
mov r0, #0
ret lr
ENDPROC(__get_user_8)
#ifdef __ARMEB__
ENTRY(__get_user_lo8)
check_uaccess r0, 8, r1, r2, __get_user_bad
#ifdef CONFIG_CPU_USE_DOMAINS
add r0, r0, #4
7: ldrt r2, [r0]
#else
7: ldr r2, [r0, #4]
#endif
mov r0, #0
ret lr
ENDPROC(__get_user_lo8)
#endif
__get_user_bad8:
mov r3, #0
__get_user_bad:
mov r2, #0
mov r0, #-EFAULT
mov pc, lr
ret lr
ENDPROC(__get_user_bad)
ENDPROC(__get_user_bad8)
.pushsection __ex_table, "a"
.long 1b, __get_user_bad
.long 2b, __get_user_bad
.long 3b, __get_user_bad
.long 4b, __get_user_bad
.long 5b, __get_user_bad8
.long 6b, __get_user_bad8
#ifdef __ARMEB__
.long 7b, __get_user_bad
#endif
.popsection

View File

@ -25,7 +25,7 @@
ENTRY(__raw_readsb)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
ands ip, r1, #3
bne .Linsb_align

View File

@ -12,7 +12,7 @@
ENTRY(__raw_readsl)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
ands ip, r1, #3
bne 3f
@ -33,7 +33,7 @@ ENTRY(__raw_readsl)
stmcsia r1!, {r3, ip}
ldrne r3, [r0, #0]
strne r3, [r1, #0]
mov pc, lr
ret lr
3: ldr r3, [r0]
cmp ip, #2
@ -75,5 +75,5 @@ ENTRY(__raw_readsl)
strb r3, [r1, #1]
8: mov r3, ip, get_byte_0
strb r3, [r1, #0]
mov pc, lr
ret lr
ENDPROC(__raw_readsl)

View File

@ -27,11 +27,11 @@
strb r3, [r1], #1
subs r2, r2, #1
moveq pc, lr
reteq lr
ENTRY(__raw_readsw)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
tst r1, #3
bne .Linsw_align

View File

@ -26,7 +26,7 @@
ENTRY(__raw_readsw)
teq r2, #0
moveq pc, lr
reteq lr
tst r1, #3
bne .Linsw_align

View File

@ -45,7 +45,7 @@
ENTRY(__raw_writesb)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
ands ip, r1, #3
bne .Loutsb_align

View File

@ -12,7 +12,7 @@
ENTRY(__raw_writesl)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
ands ip, r1, #3
bne 3f
@ -33,7 +33,7 @@ ENTRY(__raw_writesl)
ldrne r3, [r1, #0]
strcs ip, [r0, #0]
strne r3, [r0, #0]
mov pc, lr
ret lr
3: bic r1, r1, #3
ldr r3, [r1], #4
@ -47,7 +47,7 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #16
str ip, [r0]
bne 4b
mov pc, lr
ret lr
5: mov ip, r3, lspull #8
ldr r3, [r1], #4
@ -55,7 +55,7 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #24
str ip, [r0]
bne 5b
mov pc, lr
ret lr
6: mov ip, r3, lspull #24
ldr r3, [r1], #4
@ -63,5 +63,5 @@ ENTRY(__raw_writesl)
orr ip, ip, r3, lspush #8
str ip, [r0]
bne 6b
mov pc, lr
ret lr
ENDPROC(__raw_writesl)

View File

@ -28,11 +28,11 @@
orr r3, r3, r3, lsl #16
str r3, [r0]
subs r2, r2, #1
moveq pc, lr
reteq lr
ENTRY(__raw_writesw)
teq r2, #0 @ do we have to check for the zero len?
moveq pc, lr
reteq lr
tst r1, #3
bne .Loutsw_align

View File

@ -31,7 +31,7 @@
ENTRY(__raw_writesw)
teq r2, #0
moveq pc, lr
reteq lr
ands r3, r1, #3
bne .Loutsw_align
@ -96,5 +96,5 @@ ENTRY(__raw_writesw)
tst r2, #1
3: movne ip, r3, lsr #8
strneh ip, [r0]
mov pc, lr
ret lr
ENDPROC(__raw_writesw)

View File

@ -210,7 +210,7 @@ ENTRY(__aeabi_uidiv)
UNWIND(.fnstart)
subs r2, r1, #1
moveq pc, lr
reteq lr
bcc Ldiv0
cmp r0, r1
bls 11f
@ -220,16 +220,16 @@ UNWIND(.fnstart)
ARM_DIV_BODY r0, r1, r2, r3
mov r0, r2
mov pc, lr
ret lr
11: moveq r0, #1
movne r0, #0
mov pc, lr
ret lr
12: ARM_DIV2_ORDER r1, r2
mov r0, r0, lsr r2
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__udivsi3)
@ -244,11 +244,11 @@ UNWIND(.fnstart)
moveq r0, #0
tsthi r1, r2 @ see if divisor is power of 2
andeq r0, r0, r2
movls pc, lr
retls lr
ARM_MOD_BODY r0, r1, r2, r3
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__umodsi3)
@ -274,23 +274,23 @@ UNWIND(.fnstart)
cmp ip, #0
rsbmi r0, r0, #0
mov pc, lr
ret lr
10: teq ip, r0 @ same sign ?
rsbmi r0, r0, #0
mov pc, lr
ret lr
11: movlo r0, #0
moveq r0, ip, asr #31
orreq r0, r0, #1
mov pc, lr
ret lr
12: ARM_DIV2_ORDER r1, r2
cmp ip, #0
mov r0, r3, lsr r2
rsbmi r0, r0, #0
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__divsi3)
@ -315,7 +315,7 @@ UNWIND(.fnstart)
10: cmp ip, #0
rsbmi r0, r0, #0
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__modsi3)
@ -331,7 +331,7 @@ UNWIND(.save {r0, r1, ip, lr} )
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_uidivmod)
@ -344,7 +344,7 @@ UNWIND(.save {r0, r1, ip, lr} )
ldmfd sp!, {r1, r2, ip, lr}
mul r3, r0, r2
sub r1, r1, r3
mov pc, lr
ret lr
UNWIND(.fnend)
ENDPROC(__aeabi_idivmod)

View File

@ -27,6 +27,7 @@ Boston, MA 02110-1301, USA. */
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define al r1
@ -47,7 +48,7 @@ ENTRY(__aeabi_llsr)
THUMB( lslmi r3, ah, ip )
THUMB( orrmi al, al, r3 )
mov ah, ah, lsr r2
mov pc, lr
ret lr
ENDPROC(__lshrdi3)
ENDPROC(__aeabi_llsr)

View File

@ -22,5 +22,5 @@ ENTRY(memchr)
bne 1b
sub r0, r0, #1
2: movne r0, #0
mov pc, lr
ret lr
ENDPROC(memchr)

View File

@ -110,7 +110,7 @@ ENTRY(memset)
strneb r1, [ip], #1
tst r2, #1
strneb r1, [ip], #1
mov pc, lr
ret lr
6: subs r2, r2, #4 @ 1 do we have enough
blt 5b @ 1 bytes to align with?

View File

@ -121,5 +121,5 @@ ENTRY(__memzero)
strneb r2, [r0], #1 @ 1
tst r1, #1 @ 1 a byte left over
strneb r2, [r0], #1 @ 1
mov pc, lr @ 1
ret lr @ 1
ENDPROC(__memzero)

View File

@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define xh r0
@ -41,7 +42,7 @@ ENTRY(__aeabi_lmul)
adc xh, xh, yh, lsr #16
adds xl, xl, ip, lsl #16
adc xh, xh, ip, lsr #16
mov pc, lr
ret lr
ENDPROC(__muldi3)
ENDPROC(__aeabi_lmul)

View File

@ -36,7 +36,7 @@ ENTRY(__put_user_1)
check_uaccess r0, 1, r1, ip, __put_user_bad
1: TUSER(strb) r2, [r0]
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__put_user_1)
ENTRY(__put_user_2)
@ -60,14 +60,14 @@ ENTRY(__put_user_2)
#endif
#endif /* CONFIG_THUMB2_KERNEL */
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__put_user_2)
ENTRY(__put_user_4)
check_uaccess r0, 4, r1, ip, __put_user_bad
4: TUSER(str) r2, [r0]
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__put_user_4)
ENTRY(__put_user_8)
@ -80,12 +80,12 @@ ENTRY(__put_user_8)
6: TUSER(str) r3, [r0]
#endif
mov r0, #0
mov pc, lr
ret lr
ENDPROC(__put_user_8)
__put_user_bad:
mov r0, #-EFAULT
mov pc, lr
ret lr
ENDPROC(__put_user_bad)
.pushsection __ex_table, "a"

View File

@ -23,5 +23,5 @@ ENTRY(strchr)
teq r2, r1
movne r0, #0
subeq r0, r0, #1
mov pc, lr
ret lr
ENDPROC(strchr)

View File

@ -22,5 +22,5 @@ ENTRY(strrchr)
teq r2, #0
bne 1b
mov r0, r3
mov pc, lr
ret lr
ENDPROC(strrchr)

View File

@ -11,6 +11,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#ifdef __ARMEB__
#define xh r0
@ -31,7 +32,7 @@ ENTRY(__ucmpdi2)
movlo r0, #0
moveq r0, #1
movhi r0, #2
mov pc, lr
ret lr
ENDPROC(__ucmpdi2)
@ -44,7 +45,7 @@ ENTRY(__aeabi_ulcmp)
movlo r0, #-1
moveq r0, #0
movhi r0, #1
mov pc, lr
ret lr
ENDPROC(__aeabi_ulcmp)

View File

@ -213,7 +213,7 @@ ddr2clk_stop_done:
cmp ip, r0
bne ddr2clk_stop_done
mov pc, lr
ret lr
ENDPROC(davinci_ddr_psc_config)
CACHE_FLUSH:

View File

@ -16,11 +16,6 @@
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
/*
* Physical DRAM offset.
*/
#define PLAT_PHYS_OFFSET UL(0x00000000)
/*
* Cache flushing area - SRAM
*/

View File

@ -198,7 +198,7 @@ crunch_load:
get_thread_info r10
#endif
2: dec_preempt_count r10, r3
mov pc, lr
ret lr
/*
* Back up crunch regs to save area and disable access to them
@ -277,7 +277,7 @@ ENTRY(crunch_task_copy)
mov r3, lr @ preserve return address
bl crunch_save
msr cpsr_c, ip @ restore interrupt mode
mov pc, r3
ret r3
/*
* Restore crunch state from given memory address
@ -310,4 +310,4 @@ ENTRY(crunch_task_restore)
mov r3, lr @ preserve return address
bl crunch_load
msr cpsr_c, ip @ restore interrupt mode
mov pc, r3
ret r3

View File

@ -1,22 +0,0 @@
/*
* arch/arm/mach-ep93xx/include/mach/memory.h
*/
#ifndef __ASM_ARCH_MEMORY_H
#define __ASM_ARCH_MEMORY_H
#if defined(CONFIG_EP93XX_SDCE3_SYNC_PHYS_OFFSET)
#define PLAT_PHYS_OFFSET UL(0x00000000)
#elif defined(CONFIG_EP93XX_SDCE0_PHYS_OFFSET)
#define PLAT_PHYS_OFFSET UL(0xc0000000)
#elif defined(CONFIG_EP93XX_SDCE1_PHYS_OFFSET)
#define PLAT_PHYS_OFFSET UL(0xd0000000)
#elif defined(CONFIG_EP93XX_SDCE2_PHYS_OFFSET)
#define PLAT_PHYS_OFFSET UL(0xe0000000)
#elif defined(CONFIG_EP93XX_SDCE3_ASYNC_PHYS_OFFSET)
#define PLAT_PHYS_OFFSET UL(0xf0000000)
#else
#error "Kconfig bug: No EP93xx PHYS_OFFSET set"
#endif
#endif

View File

@ -119,6 +119,7 @@ config EXYNOS5420_MCPM
bool "Exynos5420 Multi-Cluster PM support"
depends on MCPM && SOC_EXYNOS5420
select ARM_CCI
select ARM_CPU_SUSPEND
help
This is needed to provide CPU and cluster power management
on Exynos5420 implementing big.LITTLE.

View File

@ -196,7 +196,7 @@ static void exynos_power_down(void)
if (last_man && __mcpm_outbound_enter_critical(cpu, cluster)) {
arch_spin_unlock(&exynos_mcpm_lock);
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A15) {
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
/*
* On the Cortex-A15 we need to disable
* L2 prefetching before flushing the cache.
@ -289,6 +289,19 @@ static void __naked exynos_pm_power_up_setup(unsigned int affinity_level)
"b cci_enable_port_for_self");
}
static void __init exynos_cache_off(void)
{
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A15) {
/* disable L2 prefetching on the Cortex-A15 */
asm volatile(
"mcr p15, 1, %0, c15, c0, 3\n\t"
"isb\n\t"
"dsb"
: : "r" (0x400));
}
exynos_v7_exit_coherency_flush(all);
}
static const struct of_device_id exynos_dt_mcpm_match[] = {
{ .compatible = "samsung,exynos5420" },
{ .compatible = "samsung,exynos5800" },
@ -332,6 +345,8 @@ static int __init exynos_mcpm_init(void)
ret = mcpm_platform_register(&exynos_power_ops);
if (!ret)
ret = mcpm_sync_init(exynos_pm_power_up_setup);
if (!ret)
ret = mcpm_loopback(exynos_cache_off); /* turn on the CCI */
if (ret) {
iounmap(ns_sram_base_addr);
return ret;

View File

@ -190,7 +190,7 @@ static void __init exynos_smp_init_cpus(void)
void __iomem *scu_base = scu_base_addr();
unsigned int i, ncores;
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
ncores = scu_base ? scu_get_core_count(scu_base) : 1;
else
/*
@ -216,7 +216,7 @@ static void __init exynos_smp_prepare_cpus(unsigned int max_cpus)
exynos_sysram_init();
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
scu_enable(scu_base_addr());
/*

View File

@ -300,7 +300,7 @@ static int exynos_pm_suspend(void)
tmp = (S5P_USE_STANDBY_WFI0 | S5P_USE_STANDBY_WFE0);
__raw_writel(tmp, S5P_CENTRAL_SEQ_OPTION);
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_save_register();
return 0;
@ -334,7 +334,7 @@ static void exynos_pm_resume(void)
if (exynos_pm_central_resume())
goto early_wakeup;
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_restore_register();
/* For release retention */
@ -353,7 +353,7 @@ static void exynos_pm_resume(void)
s3c_pm_do_restore_core(exynos_core_save, ARRAY_SIZE(exynos_core_save));
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
scu_enable(S5P_VA_SCU);
early_wakeup:
@ -440,15 +440,14 @@ static int exynos_cpu_pm_notifier(struct notifier_block *self,
case CPU_PM_ENTER:
if (cpu == 0) {
exynos_pm_central_suspend();
if (read_cpuid_part_number() == ARM_CPU_PART_CORTEX_A9)
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9)
exynos_cpu_save_register();
}
break;
case CPU_PM_EXIT:
if (cpu == 0) {
if (read_cpuid_part_number() ==
ARM_CPU_PART_CORTEX_A9) {
if (read_cpuid_part() == ARM_CPU_PART_CORTEX_A9) {
scu_enable(S5P_VA_SCU);
exynos_cpu_restore_register();
}

View File

@ -59,11 +59,6 @@ extern unsigned long __bus_to_pfn(unsigned long);
*/
#define FLUSH_BASE 0xf9000000
/*
* Physical DRAM offset.
*/
#define PLAT_PHYS_OFFSET UL(0x00000000)
#define FLUSH_BASE_PHYS 0x50000000
#endif

View File

@ -10,6 +10,7 @@
*/
#include <linux/linkage.h>
#include <asm/assembler.h>
#include <asm/asm-offsets.h>
#include <asm/hardware/cache-l2x0.h>
#include "hardware.h"
@ -301,7 +302,7 @@ rbc_loop:
resume_mmdc
/* return to suspend finish */
mov pc, lr
ret lr
resume:
/* invalidate L1 I-cache first */
@ -325,7 +326,7 @@ resume:
mov r5, #0x1
resume_mmdc
mov pc, lr
ret lr
ENDPROC(imx6_suspend)
/*

Some files were not shown because too many files have changed in this diff Show More