From 8443b165f13d21214e5d5495eee7c3bf7f2456bf Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Fri, 29 Apr 2005 21:58:15 +0100 Subject: [PATCH 1/8] [PATCH] ARM: 2657/1: export ixp2000_pci_config_addr Patch from Lennert Buytenhek Export ixp2000_pci_config_addr, to be used by the IXDP2800 platform setup code to coordinate booting the master and slave NPU. Signed-off-by: Lennert Buytenhek Signed-off-by: Deepak Saxena Signed-off-by: Russell King --- arch/arm/mach-ixp2000/pci.c | 2 +- include/asm-arm/arch-ixp2000/platform.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c index 831f8ffb6b61..36c9a94298cc 100644 --- a/arch/arm/mach-ixp2000/pci.c +++ b/arch/arm/mach-ixp2000/pci.c @@ -37,7 +37,7 @@ static int pci_master_aborts = 0; static int clear_master_aborts(void); -static u32 * +u32 * ixp2000_pci_config_addr(unsigned int bus_nr, unsigned int devfn, int where) { u32 *paddress; diff --git a/include/asm-arm/arch-ixp2000/platform.h b/include/asm-arm/arch-ixp2000/platform.h index 509e44d528d8..901bba6d02b4 100644 --- a/include/asm-arm/arch-ixp2000/platform.h +++ b/include/asm-arm/arch-ixp2000/platform.h @@ -121,6 +121,7 @@ unsigned long ixp2000_gettimeoffset(void); struct pci_sys_data; +u32 *ixp2000_pci_config_addr(unsigned int bus, unsigned int devfn, int where); void ixp2000_pci_preinit(void); int ixp2000_pci_setup(int, struct pci_sys_data*); struct pci_bus* ixp2000_pci_scan_bus(int, struct pci_sys_data*); From ae36bf5861e1091dd337f0b475e043ab07d4a937 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Fri, 29 Apr 2005 21:58:15 +0100 Subject: [PATCH 2/8] [PATCH] ARM: 2658/1: start ixp2000 pci memory resource at 0xe0000000 Patch from Lennert Buytenhek On the IXDP2800, the bootloader does an awful job of configuring the PCI bus, so we make linux reconfigure everything. Having a 1:1 pci:phys address mapping generally simplifies everything, so try to allocate PCI addresses from the [e0000000..ffffffff] range, which is the physical address range of the outbound PCI window on the IXP2000. This does not affect any of the other IXP2000 platforms since they all use their bootloader's PCI resource assignment. Signed-off-by: Lennert Buytenhek Signed-off-by: Deepak Saxena Signed-off-by: Russell King --- arch/arm/mach-ixp2000/pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c index 36c9a94298cc..3844d5c7cf86 100644 --- a/arch/arm/mach-ixp2000/pci.c +++ b/arch/arm/mach-ixp2000/pci.c @@ -208,7 +208,7 @@ ixp2000_pci_preinit(void) * use our own resource space. */ static struct resource ixp2000_pci_mem_space = { - .start = 0x00000000, + .start = 0xe0000000, .end = 0xffffffff, .flags = IORESOURCE_MEM, .name = "PCI Mem Space" From 458a83fa43e83505f9401783ce9ed41b5a8b5591 Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Fri, 29 Apr 2005 21:58:16 +0100 Subject: [PATCH 3/8] [PATCH] ARM: 2659/1: do not assign PCI I/O address zero on IXP2000 Patch from Lennert Buytenhek Assigning the address zero to a PCI device BAR causes some part of the PCI subsystem to believe that resource allocation for that BAR failed due to resource conflicts, which will make attempts to enable the device fail. Work around this by assigning I/O addresses starting from 00010000. While we're at it, make the PCI I/O resource end at 0001ffff, since we only have 64k of outbound I/O window on the IXP2000, and we don't do bank switching. Signed-off-by: Lennert Buytenhek Signed-off-by: Deepak Saxena Signed-off-by: Russell King --- arch/arm/mach-ixp2000/pci.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/arm/mach-ixp2000/pci.c b/arch/arm/mach-ixp2000/pci.c index 3844d5c7cf86..5ff2f2718c58 100644 --- a/arch/arm/mach-ixp2000/pci.c +++ b/arch/arm/mach-ixp2000/pci.c @@ -215,8 +215,8 @@ static struct resource ixp2000_pci_mem_space = { }; static struct resource ixp2000_pci_io_space = { - .start = 0x00000000, - .end = 0xffffffff, + .start = 0x00010000, + .end = 0x0001ffff, .flags = IORESOURCE_IO, .name = "PCI I/O Space" }; From 3a1e501511a1e2c665c566939047794dcf86466b Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Fri, 29 Apr 2005 22:08:33 +0100 Subject: [PATCH 4/8] [PATCH] ARM: 2655/1: ARM1136 SWP instruction abort handler fix Patch from George G. Davis As noted in http://www.arm.com/linux/patch-2.6.9-arm1.gz, the "Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR." So the v6_early_abort handler does not report the correct rd/wr direction for the SWP instruction which may result in SEGVS or hangs. In order to work around this problem, this patch merely updates the fix contained in the ARM Ltd. patch to use the macroised abort handler fixups. Signed-off-by: George G. Davis Signed-off-by: Russell King --- arch/arm/mm/abort-ev6.S | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/arch/arm/mm/abort-ev6.S b/arch/arm/mm/abort-ev6.S index 38b2cbb89beb..8f76f3df7b4c 100644 --- a/arch/arm/mm/abort-ev6.S +++ b/arch/arm/mm/abort-ev6.S @@ -1,5 +1,6 @@ #include #include +#include "abort-macro.S" /* * Function: v6_early_abort * @@ -13,11 +14,26 @@ * : sp = pointer to registers * * Purpose : obtain information about current aborted instruction. + * Note: we read user space. This means we might cause a data + * abort here if the I-TLB and D-TLB aren't seeing the same + * picture. Unfortunately, this does happen. We live with it. */ .align 5 ENTRY(v6_early_abort) mrc p15, 0, r1, c5, c0, 0 @ get FSR mrc p15, 0, r0, c6, c0, 0 @ get FAR +/* + * Faulty SWP instruction on 1136 doesn't set bit 11 in DFSR. + * The test below covers all the write situations, including Java bytecodes + */ + bic r1, r1, #1 << 11 | 1 << 10 @ clear bits 11 and 10 of FSR + tst r3, #PSR_J_BIT @ Java? + movne pc, lr + do_thumb_abort + ldreq r3, [r2] @ read aborted ARM instruction + do_ldrd_abort + tst r3, #1 << 20 @ L = 0 -> write + orreq r1, r1, #1 << 11 @ yes. mov pc, lr From 2d2669b62984b8d76b05a6a045390a3250317d21 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Fri, 29 Apr 2005 22:08:33 +0100 Subject: [PATCH 5/8] [PATCH] ARM: 2651/3: kernel helpers for NPTL support Patch from Nicolas Pitre This patch entirely reworks the kernel assistance for NPTL on ARM. In particular this provides an efficient way to retrieve the TLS value and perform atomic operations without any instruction emulation nor special system call. This even allows for pre ARMv6 binaries to be forward compatible with SMP systems without any penalty. The problematic and performance critical operations are performed through segment of kernel provided user code reachable from user space at a fixed address in kernel memory. Those fixed entry points are within the vector page so we basically get it for free as no extra memory page is required and nothing else may be mapped at that location anyway. This is different from (but doesn't preclude) a full blown VDSO implementation, however a VDSO would prevent some assembly tricks with constants that allows for efficient branching to those code segments. And since those code segments only use a few cycles before returning to user code, the overhead of a VDSO far call would add a significant overhead to such minimalistic operations. The ARM_NR_set_tls syscall also changed number. This is done for two reasons: 1) this patch changes the way the TLS value was previously meant to be retrieved, therefore we ensure whatever library using the old way gets fixed (they only exist in private tree at the moment since the NPTL work is still progressing). 2) the previous number was allocated in a range causing an undefined instruction trap on kernels not supporting that syscall and it was determined that allocating it in a range returning -ENOSYS would be much nicer for libraries trying to determine if the feature is present or not. Signed-off-by: Nicolas Pitre Signed-off-by: Russell King --- arch/arm/kernel/entry-armv.S | 213 ++++++++++++++++++++++++++++++++++- arch/arm/kernel/traps.c | 58 ++++++++-- arch/arm/mm/Kconfig | 14 +++ include/asm-arm/unistd.h | 3 +- 4 files changed, 277 insertions(+), 11 deletions(-) diff --git a/arch/arm/kernel/entry-armv.S b/arch/arm/kernel/entry-armv.S index 2a5c3fe09a95..080df907f242 100644 --- a/arch/arm/kernel/entry-armv.S +++ b/arch/arm/kernel/entry-armv.S @@ -269,6 +269,12 @@ __pabt_svc: add r5, sp, #S_PC ldmia r7, {r2 - r4} @ Get USR pc, cpsr +#if __LINUX_ARM_ARCH__ < 6 + @ make sure our user space atomic helper is aborted + cmp r2, #VIRT_OFFSET + bichs r3, r3, #PSR_Z_BIT +#endif + @ @ We are now ready to fill in the remaining blanks on the stack: @ @@ -499,8 +505,12 @@ ENTRY(__switch_to) mra r4, r5, acc0 stmia ip, {r4, r5} #endif +#ifdef CONFIG_HAS_TLS_REG + mcr p15, 0, r3, c13, c0, 3 @ set TLS register +#else mov r4, #0xffff0fff - str r3, [r4, #-3] @ Set TLS ptr + str r3, [r4, #-15] @ TLS val at 0xffff0ff0 +#endif mcr p15, 0, r6, c3, c0, 0 @ Set domain register #ifdef CONFIG_VFP @ Always disable VFP so we can lazily save/restore the old @@ -519,6 +529,207 @@ ENTRY(__switch_to) ldmib r2, {r4 - sl, fp, sp, pc} @ Load all regs saved previously __INIT + +/* + * User helpers. + * + * These are segment of kernel provided user code reachable from user space + * at a fixed address in kernel memory. This is used to provide user space + * with some operations which require kernel help because of unimplemented + * native feature and/or instructions in many ARM CPUs. The idea is for + * this code to be executed directly in user mode for best efficiency but + * which is too intimate with the kernel counter part to be left to user + * libraries. In fact this code might even differ from one CPU to another + * depending on the available instruction set and restrictions like on + * SMP systems. In other words, the kernel reserves the right to change + * this code as needed without warning. Only the entry points and their + * results are guaranteed to be stable. + * + * Each segment is 32-byte aligned and will be moved to the top of the high + * vector page. New segments (if ever needed) must be added in front of + * existing ones. This mechanism should be used only for things that are + * really small and justified, and not be abused freely. + * + * User space is expected to implement those things inline when optimizing + * for a processor that has the necessary native support, but only if such + * resulting binaries are already to be incompatible with earlier ARM + * processors due to the use of unsupported instructions other than what + * is provided here. In other words don't make binaries unable to run on + * earlier processors just for the sake of not using these kernel helpers + * if your compiled code is not going to use the new instructions for other + * purpose. + */ + + .align 5 + .globl __kuser_helper_start +__kuser_helper_start: + +/* + * Reference prototype: + * + * int __kernel_cmpxchg(int oldval, int newval, int *ptr) + * + * Input: + * + * r0 = oldval + * r1 = newval + * r2 = ptr + * lr = return address + * + * Output: + * + * r0 = returned value (zero or non-zero) + * C flag = set if r0 == 0, clear if r0 != 0 + * + * Clobbered: + * + * r3, ip, flags + * + * Definition and user space usage example: + * + * typedef int (__kernel_cmpxchg_t)(int oldval, int newval, int *ptr); + * #define __kernel_cmpxchg (*(__kernel_cmpxchg_t *)0xffff0fc0) + * + * Atomically store newval in *ptr if *ptr is equal to oldval for user space. + * Return zero if *ptr was changed or non-zero if no exchange happened. + * The C flag is also set if *ptr was changed to allow for assembly + * optimization in the calling code. + * + * For example, a user space atomic_add implementation could look like this: + * + * #define atomic_add(ptr, val) \ + * ({ register unsigned int *__ptr asm("r2") = (ptr); \ + * register unsigned int __result asm("r1"); \ + * asm volatile ( \ + * "1: @ atomic_add\n\t" \ + * "ldr r0, [r2]\n\t" \ + * "mov r3, #0xffff0fff\n\t" \ + * "add lr, pc, #4\n\t" \ + * "add r1, r0, %2\n\t" \ + * "add pc, r3, #(0xffff0fc0 - 0xffff0fff)\n\t" \ + * "bcc 1b" \ + * : "=&r" (__result) \ + * : "r" (__ptr), "rIL" (val) \ + * : "r0","r3","ip","lr","cc","memory" ); \ + * __result; }) + */ + +__kuser_cmpxchg: @ 0xffff0fc0 + +#if __LINUX_ARM_ARCH__ < 6 + +#ifdef CONFIG_SMP /* sanity check */ +#error "CONFIG_SMP on a machine supporting pre-ARMv6 processors?" +#endif + + /* + * Theory of operation: + * + * We set the Z flag before loading oldval. If ever an exception + * occurs we can not be sure the loaded value will still be the same + * when the exception returns, therefore the user exception handler + * will clear the Z flag whenever the interrupted user code was + * actually from the kernel address space (see the usr_entry macro). + * + * The post-increment on the str is used to prevent a race with an + * exception happening just after the str instruction which would + * clear the Z flag although the exchange was done. + */ + teq ip, ip @ set Z flag + ldr ip, [r2] @ load current val + add r3, r2, #1 @ prepare store ptr + teqeq ip, r0 @ compare with oldval if still allowed + streq r1, [r3, #-1]! @ store newval if still allowed + subs r0, r2, r3 @ if r2 == r3 the str occured + mov pc, lr + +#else + + ldrex r3, [r2] + subs r3, r3, r0 + strexeq r3, r1, [r2] + rsbs r0, r3, #0 + mov pc, lr + +#endif + + .align 5 + +/* + * Reference prototype: + * + * int __kernel_get_tls(void) + * + * Input: + * + * lr = return address + * + * Output: + * + * r0 = TLS value + * + * Clobbered: + * + * the Z flag might be lost + * + * Definition and user space usage example: + * + * typedef int (__kernel_get_tls_t)(void); + * #define __kernel_get_tls (*(__kernel_get_tls_t *)0xffff0fe0) + * + * Get the TLS value as previously set via the __ARM_NR_set_tls syscall. + * + * This could be used as follows: + * + * #define __kernel_get_tls() \ + * ({ register unsigned int __val asm("r0"); \ + * asm( "mov r0, #0xffff0fff; mov lr, pc; sub pc, r0, #31" \ + * : "=r" (__val) : : "lr","cc" ); \ + * __val; }) + */ + +__kuser_get_tls: @ 0xffff0fe0 + +#ifndef CONFIG_HAS_TLS_REG + +#ifdef CONFIG_SMP /* sanity check */ +#error "CONFIG_SMP without CONFIG_HAS_TLS_REG is wrong" +#endif + + ldr r0, [pc, #(16 - 8)] @ TLS stored at 0xffff0ff0 + mov pc, lr + +#else + + mrc p15, 0, r0, c13, c0, 3 @ read TLS register + mov pc, lr + +#endif + + .rep 5 + .word 0 @ pad up to __kuser_helper_version + .endr + +/* + * Reference declaration: + * + * extern unsigned int __kernel_helper_version; + * + * Definition and user space usage example: + * + * #define __kernel_helper_version (*(unsigned int *)0xffff0ffc) + * + * User space may read this to determine the curent number of helpers + * available. + */ + +__kuser_helper_version: @ 0xffff0ffc + .word ((__kuser_helper_end - __kuser_helper_start) >> 5) + + .globl __kuser_helper_end +__kuser_helper_end: + + /* * Vector stubs. * diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 0078aeb85737..3a001fe5540b 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -450,13 +450,17 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) case NR(set_tls): thread->tp_value = regs->ARM_r0; +#ifdef CONFIG_HAS_TLS_REG + asm ("mcr p15, 0, %0, c13, c0, 3" : : "r" (regs->ARM_r0) ); +#else /* - * Our user accessible TLS ptr is located at 0xffff0ffc. - * On SMP read access to this address must raise a fault - * and be emulated from the data abort handler. - * m + * User space must never try to access this directly. + * Expect your app to break eventually if you do so. + * The user helper at 0xffff0fe0 must be used instead. + * (see entry-armv.S for details) */ - *((unsigned long *)0xffff0ffc) = thread->tp_value; + *((unsigned int *)0xffff0ff0) = regs->ARM_r0; +#endif return 0; default: @@ -493,6 +497,41 @@ asmlinkage int arm_syscall(int no, struct pt_regs *regs) return 0; } +#if defined(CONFIG_CPU_32v6) && !defined(CONFIG_HAS_TLS_REG) + +/* + * We might be running on an ARMv6+ processor which should have the TLS + * register, but for some reason we can't use it and have to emulate it. + */ + +static int get_tp_trap(struct pt_regs *regs, unsigned int instr) +{ + int reg = (instr >> 12) & 15; + if (reg == 15) + return 1; + regs->uregs[reg] = current_thread_info()->tp_value; + regs->ARM_pc += 4; + return 0; +} + +static struct undef_hook arm_mrc_hook = { + .instr_mask = 0x0fff0fff, + .instr_val = 0x0e1d0f70, + .cpsr_mask = PSR_T_BIT, + .cpsr_val = 0, + .fn = get_tp_trap, +}; + +static int __init arm_mrc_hook_init(void) +{ + register_undef_hook(&arm_mrc_hook); + return 0; +} + +late_initcall(arm_mrc_hook_init); + +#endif + void __bad_xchg(volatile void *ptr, int size) { printk("xchg: bad data size: pc 0x%p, ptr 0x%p, size %d\n", @@ -580,14 +619,17 @@ void __init trap_init(void) { extern char __stubs_start[], __stubs_end[]; extern char __vectors_start[], __vectors_end[]; + extern char __kuser_helper_start[], __kuser_helper_end[]; + int kuser_sz = __kuser_helper_end - __kuser_helper_start; /* - * Copy the vectors and stubs (in entry-armv.S) into the - * vector page, mapped at 0xffff0000, and ensure these are - * visible to the instruction stream. + * Copy the vectors, stubs and kuser helpers (in entry-armv.S) + * into the vector page, mapped at 0xffff0000, and ensure these + * are visible to the instruction stream. */ memcpy((void *)0xffff0000, __vectors_start, __vectors_end - __vectors_start); memcpy((void *)0xffff0200, __stubs_start, __stubs_end - __stubs_start); + memcpy((void *)0xffff1000 - kuser_sz, __kuser_helper_start, kuser_sz); flush_icache_range(0xffff0000, 0xffff0000 + PAGE_SIZE); modify_domain(DOMAIN_USER, DOMAIN_CLIENT); } diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index 5b670c9ac5ef..007766a0644c 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -409,3 +409,17 @@ config CPU_BPREDICT_DISABLE depends on CPU_ARM1020 help Say Y here to disable branch prediction. If unsure, say N. + +config HAS_TLS_REG + bool + depends on CPU_32v6 && !CPU_32v5 && !CPU_32v4 && !CPU_32v3 + help + This selects support for the CP15 thread register. + It is defined to be available on ARMv6 or later. However + if the kernel is configured to support multiple CPUs including + a pre-ARMv6 processors, or if a given ARMv6 processor doesn't + implement the thread register for some reason, then access to + this register from user space must be trapped and emulated. + If user space is relying on the __kuser_get_tls code then + there should not be any impact. + diff --git a/include/asm-arm/unistd.h b/include/asm-arm/unistd.h index a19ec09eaa01..ace27480886e 100644 --- a/include/asm-arm/unistd.h +++ b/include/asm-arm/unistd.h @@ -359,8 +359,7 @@ #define __ARM_NR_cacheflush (__ARM_NR_BASE+2) #define __ARM_NR_usr26 (__ARM_NR_BASE+3) #define __ARM_NR_usr32 (__ARM_NR_BASE+4) - -#define __ARM_NR_set_tls (__ARM_NR_BASE+0x800) +#define __ARM_NR_set_tls (__ARM_NR_BASE+5) #define __sys2(x) #x #define __sys1(x) __sys2(x) From 05f9869bf20e11bcb9b64b9ebd6a9cf89d6b71ba Mon Sep 17 00:00:00 2001 From: Olav Kongas Date: Fri, 29 Apr 2005 22:08:34 +0100 Subject: [PATCH 6/8] [PATCH] ARM: 2649/1: Fix 'sparse -Wbitwise' warnings from MMIO macros Patch from Olav Kongas On ARM, the outX() and writeX() families of macros take the result of cpu_to_leYY(), which is of restricted type __leYY, and feed it to __raw_writeX(), which expect an argument of unrestricted type. This results in 'sparse -Wbitwise' warnings about incorrect types in assignments. Analogous type mismatch warnings are issued for inX() and readX() counterparts. The below patch resolves these warnings by adding forced typecasts. Signed-off-by: Olav Kongas Signed-off-by: Russell King --- include/asm-arm/io.h | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/include/asm-arm/io.h b/include/asm-arm/io.h index 69bc7a3e8160..658ffa384fda 100644 --- a/include/asm-arm/io.h +++ b/include/asm-arm/io.h @@ -99,12 +99,16 @@ extern void __readwrite_bug(const char *fn); */ #ifdef __io #define outb(v,p) __raw_writeb(v,__io(p)) -#define outw(v,p) __raw_writew(cpu_to_le16(v),__io(p)) -#define outl(v,p) __raw_writel(cpu_to_le32(v),__io(p)) +#define outw(v,p) __raw_writew((__force __u16) \ + cpu_to_le16(v),__io(p)) +#define outl(v,p) __raw_writel((__force __u32) \ + cpu_to_le32(v),__io(p)) -#define inb(p) ({ unsigned int __v = __raw_readb(__io(p)); __v; }) -#define inw(p) ({ unsigned int __v = le16_to_cpu(__raw_readw(__io(p))); __v; }) -#define inl(p) ({ unsigned int __v = le32_to_cpu(__raw_readl(__io(p))); __v; }) +#define inb(p) ({ __u8 __v = __raw_readb(__io(p)); __v; }) +#define inw(p) ({ __u16 __v = le16_to_cpu((__force __le16) \ + __raw_readw(__io(p))); __v; }) +#define inl(p) ({ __u32 __v = le32_to_cpu((__force __le32) \ + __raw_readl(__io(p))); __v; }) #define outsb(p,d,l) __raw_writesb(__io(p),d,l) #define outsw(p,d,l) __raw_writesw(__io(p),d,l) @@ -149,9 +153,11 @@ extern void _memset_io(void __iomem *, int, size_t); * IO port primitives for more information. */ #ifdef __mem_pci -#define readb(c) ({ unsigned int __v = __raw_readb(__mem_pci(c)); __v; }) -#define readw(c) ({ unsigned int __v = le16_to_cpu(__raw_readw(__mem_pci(c))); __v; }) -#define readl(c) ({ unsigned int __v = le32_to_cpu(__raw_readl(__mem_pci(c))); __v; }) +#define readb(c) ({ __u8 __v = __raw_readb(__mem_pci(c)); __v; }) +#define readw(c) ({ __u16 __v = le16_to_cpu((__force __le16) \ + __raw_readw(__mem_pci(c))); __v; }) +#define readl(c) ({ __u32 __v = le32_to_cpu((__force __le32) \ + __raw_readl(__mem_pci(c))); __v; }) #define readb_relaxed(addr) readb(addr) #define readw_relaxed(addr) readw(addr) #define readl_relaxed(addr) readl(addr) @@ -161,8 +167,10 @@ extern void _memset_io(void __iomem *, int, size_t); #define readsl(p,d,l) __raw_readsl(__mem_pci(p),d,l) #define writeb(v,c) __raw_writeb(v,__mem_pci(c)) -#define writew(v,c) __raw_writew(cpu_to_le16(v),__mem_pci(c)) -#define writel(v,c) __raw_writel(cpu_to_le32(v),__mem_pci(c)) +#define writew(v,c) __raw_writew((__force __u16) \ + cpu_to_le16(v),__mem_pci(c)) +#define writel(v,c) __raw_writel((__force __u32) \ + cpu_to_le32(v),__mem_pci(c)) #define writesb(p,d,l) __raw_writesb(__mem_pci(p),d,l) #define writesw(p,d,l) __raw_writesw(__mem_pci(p),d,l) From ca315159dfa80a2bfc7d917a717a7ee8d771bdf9 Mon Sep 17 00:00:00 2001 From: "George G. Davis" Date: Fri, 29 Apr 2005 22:08:35 +0100 Subject: [PATCH 7/8] [PATCH] ARM: 2656/1: Access permission bits are wrong for kernel XIP sections on ARMv6 Patch from George G. Davis This patch is required for kernel XIP support on ARMv6 machines. It ensures that the access permission bits for kernel XIP section descriptors are APX=1 and AP[1:0]=01, which is Kernel read-only/User no access permissions. Prior to this change, kernel XIP section descriptor access permissions were set to Kernel no access/User no access on ARMv6 machines and the kernel would therefore hang upon entry to userspace when set_fs(USER_DS) was executed. Signed-off-by: Steve Longerbeam Signed-off-by: George G. Davis Signed-off-by: Russell King --- arch/arm/mm/mm-armv.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/arm/mm/mm-armv.c b/arch/arm/mm/mm-armv.c index f5a87db8b498..585dfb8e20b9 100644 --- a/arch/arm/mm/mm-armv.c +++ b/arch/arm/mm/mm-armv.c @@ -411,9 +411,10 @@ static void __init build_mem_type_table(void) mem_types[MT_MEMORY].prot_sect &= ~PMD_BIT4; mem_types[MT_ROM].prot_sect &= ~PMD_BIT4; /* - * Mark cache clean areas read only from SVC mode - * and no access from userspace. + * Mark cache clean areas and XIP ROM read only + * from SVC mode and no access from userspace. */ + mem_types[MT_ROM].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_MINICLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; mem_types[MT_CACHECLEAN].prot_sect |= PMD_SECT_APX|PMD_SECT_AP_WRITE; } From 53e173f62c318e65e6ae13524b04c5cf38c1bc3c Mon Sep 17 00:00:00 2001 From: Lennert Buytenhek Date: Fri, 29 Apr 2005 22:13:57 +0100 Subject: [PATCH 8/8] [PATCH] ARM: 2660/2: fix ixdp2800 boot and pci init Patch from Lennert Buytenhek The IXDP2800 is an evalution platform for the IXP2800 processor that has two IXP2800s connected to the same PCI bus. This is problematic as both CPUs will try to configure the PCI bus as they boot linux. Contrary to on the other IXP2000 platforms, the boot loader on the IXDP2800 doesn't configure the PCI bus properly, so we do want the linux instance on one of the CPUs to do that. Making one of the CPUs ignore the PCI bus (and thus act like a pure PCI slave device) is not an option because there is a 82559 NIC on the PCI bus for each of the CPUs. The chosen solution is to have the master CPU configure the PCI bus while the slave is kept in a quiescent state, and then to have the slave CPU scan the PCI bus (without assigning resources) while the master is kept in a quiescent state. After this ritual, the master deletes the slave NIC from its PCI device list, the slave deletes the master NIC from its device list, and (almost) all is well. There's still one little problem: each of the CPUs has a 1G SDRAM BAR, but the IXP2000 only has 512M of outbound PCI memory window. We solve this by hand-assigning the master and slave SDRAM BARs to a location outside each of the IXP's outbound PCI windows, and by having the rest of the BARs autoconfigured in the outbound PCI windows, in the range [e0000000..ffffffff], so that there is a 1:1 pci:phys mapping between them. Even with this patch, a number of issues still remain -- just imagine what happens if one of the CPUs is rebooted, by watchdog or by hand, but the other one isn't. But those issues are not easily fixable given the strange PCI layout of this board and the behavior of the boot loader shipped with the platform. Signed-off-by: Lennert Buytenhek Signed-off-by: Russell King --- arch/arm/configs/ixdp2800_defconfig | 2 +- arch/arm/mach-ixp2000/ixdp2800.c | 147 +++++++++++++++++++++++++--- 2 files changed, 135 insertions(+), 14 deletions(-) diff --git a/arch/arm/configs/ixdp2800_defconfig b/arch/arm/configs/ixdp2800_defconfig index d36f99192962..7be3521f91fc 100644 --- a/arch/arm/configs/ixdp2800_defconfig +++ b/arch/arm/configs/ixdp2800_defconfig @@ -133,7 +133,7 @@ CONFIG_ALIGNMENT_TRAP=y # CONFIG_ZBOOT_ROM_TEXT=0x0 CONFIG_ZBOOT_ROM_BSS=0x0 -CONFIG_CMDLINE="console=ttyS0,9600 root=/dev/nfs ip=bootp mem=64M@0x0 pci=firmware" +CONFIG_CMDLINE="console=ttyS0,9600 root=/dev/nfs ip=bootp mem=64M@0x0" # CONFIG_XIP_KERNEL is not set # diff --git a/arch/arm/mach-ixp2000/ixdp2800.c b/arch/arm/mach-ixp2000/ixdp2800.c index c4683aaff84a..aec13c7108a9 100644 --- a/arch/arm/mach-ixp2000/ixdp2800.c +++ b/arch/arm/mach-ixp2000/ixdp2800.c @@ -65,19 +65,102 @@ static struct sys_timer ixdp2800_timer = { /************************************************************************* * IXDP2800 PCI *************************************************************************/ +static void __init ixdp2800_slave_disable_pci_master(void) +{ + *IXP2000_PCI_CMDSTAT &= ~(PCI_COMMAND_MASTER | PCI_COMMAND_MEMORY); +} + +static void __init ixdp2800_master_wait_for_slave(void) +{ + volatile u32 *addr; + + printk(KERN_INFO "IXDP2800: waiting for slave NPU to configure " + "its BAR sizes\n"); + + addr = ixp2000_pci_config_addr(0, IXDP2X00_SLAVE_NPU_DEVFN, + PCI_BASE_ADDRESS_1); + do { + *addr = 0xffffffff; + cpu_relax(); + } while (*addr != 0xfe000008); + + addr = ixp2000_pci_config_addr(0, IXDP2X00_SLAVE_NPU_DEVFN, + PCI_BASE_ADDRESS_2); + do { + *addr = 0xffffffff; + cpu_relax(); + } while (*addr != 0xc0000008); + + /* + * Configure the slave's SDRAM BAR by hand. + */ + *addr = 0x40000008; +} + +static void __init ixdp2800_slave_wait_for_master_enable(void) +{ + printk(KERN_INFO "IXDP2800: waiting for master NPU to enable us\n"); + + while ((*IXP2000_PCI_CMDSTAT & PCI_COMMAND_MASTER) == 0) + cpu_relax(); +} + void __init ixdp2800_pci_preinit(void) { printk("ixdp2x00_pci_preinit called\n"); - *IXP2000_PCI_ADDR_EXT = 0x0000e000; + *IXP2000_PCI_ADDR_EXT = 0x0001e000; + + if (!ixdp2x00_master_npu()) + ixdp2800_slave_disable_pci_master(); - *IXP2000_PCI_DRAM_BASE_ADDR_MASK = (0x40000000 - 1) & ~0xfffff; *IXP2000_PCI_SRAM_BASE_ADDR_MASK = (0x2000000 - 1) & ~0x3ffff; + *IXP2000_PCI_DRAM_BASE_ADDR_MASK = (0x40000000 - 1) & ~0xfffff; ixp2000_pci_preinit(); + + if (ixdp2x00_master_npu()) { + /* + * Wait until the slave set its SRAM/SDRAM BAR sizes + * correctly before we proceed to scan and enumerate + * the bus. + */ + ixdp2800_master_wait_for_slave(); + + /* + * We configure the SDRAM BARs by hand because they + * are 1G and fall outside of the regular allocated + * PCI address space. + */ + *IXP2000_PCI_SDRAM_BAR = 0x00000008; + } else { + /* + * Wait for the master to complete scanning the bus + * and assigning resources before we proceed to scan + * the bus ourselves. Set pci=firmware to honor the + * master's resource assignment. + */ + ixdp2800_slave_wait_for_master_enable(); + pcibios_setup("firmware"); + } } -int ixdp2800_pci_setup(int nr, struct pci_sys_data *sys) +/* + * We assign the SDRAM BARs for the two IXP2800 CPUs by hand, outside + * of the regular PCI window, because there's only 512M of outbound PCI + * memory window on each IXP, while we need 1G for each of the BARs. + */ +static void __devinit ixp2800_pci_fixup(struct pci_dev *dev) +{ + if (machine_is_ixdp2800()) { + dev->resource[2].start = 0; + dev->resource[2].end = 0; + dev->resource[2].flags = 0; + } +} +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IXP2800, ixp2800_pci_fixup); + +static int __init ixdp2800_pci_setup(int nr, struct pci_sys_data *sys) { sys->mem_offset = 0x00000000; @@ -129,22 +212,47 @@ static int __init ixdp2800_pci_map_irq(struct pci_dev *dev, u8 slot, u8 pin) } else return IRQ_IXP2000_PCIB; /* Slave NIC interrupt */ } -static void ixdp2800_pci_postinit(void) +static void __init ixdp2800_master_enable_slave(void) { - struct pci_dev *dev; + volatile u32 *addr; - if (ixdp2x00_master_npu()) { - dev = pci_find_slot(1, IXDP2800_SLAVE_ENET_DEVFN); - pci_remove_bus_device(dev); - } else { - dev = pci_find_slot(1, IXDP2800_MASTER_ENET_DEVFN); - pci_remove_bus_device(dev); + printk(KERN_INFO "IXDP2800: enabling slave NPU\n"); + addr = (volatile u32 *)ixp2000_pci_config_addr(0, + IXDP2X00_SLAVE_NPU_DEVFN, + PCI_COMMAND); + + *addr |= PCI_COMMAND_MASTER; +} + +static void __init ixdp2800_master_wait_for_slave_bus_scan(void) +{ + volatile u32 *addr; + + printk(KERN_INFO "IXDP2800: waiting for slave to finish bus scan\n"); + + addr = (volatile u32 *)ixp2000_pci_config_addr(0, + IXDP2X00_SLAVE_NPU_DEVFN, + PCI_COMMAND); + while ((*addr & PCI_COMMAND_MEMORY) == 0) + cpu_relax(); +} + +static void __init ixdp2800_slave_signal_bus_scan_completion(void) +{ + printk(KERN_INFO "IXDP2800: bus scan done, signaling master\n"); + *IXP2000_PCI_CMDSTAT |= PCI_COMMAND_MEMORY; +} + +static void __init ixdp2800_pci_postinit(void) +{ + if (!ixdp2x00_master_npu()) { ixdp2x00_slave_pci_postinit(); + ixdp2800_slave_signal_bus_scan_completion(); } } -struct hw_pci ixdp2800_pci __initdata = { +struct __initdata hw_pci ixdp2800_pci __initdata = { .nr_controllers = 1, .setup = ixdp2800_pci_setup, .preinit = ixdp2800_pci_preinit, @@ -155,8 +263,21 @@ struct hw_pci ixdp2800_pci __initdata = { int __init ixdp2800_pci_init(void) { - if (machine_is_ixdp2800()) + if (machine_is_ixdp2800()) { + struct pci_dev *dev; + pci_common_init(&ixdp2800_pci); + if (ixdp2x00_master_npu()) { + dev = pci_find_slot(1, IXDP2800_SLAVE_ENET_DEVFN); + pci_remove_bus_device(dev); + + ixdp2800_master_enable_slave(); + ixdp2800_master_wait_for_slave_bus_scan(); + } else { + dev = pci_find_slot(1, IXDP2800_MASTER_ENET_DEVFN); + pci_remove_bus_device(dev); + } + } return 0; }