From decd300b30e499fe6be1bbfc5650fc971de8c1fa Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Mon, 8 Aug 2005 13:24:38 +1000 Subject: [PATCH 01/37] [PATCH] ppc64: make arch/ppc64/boot standalone Make the bootheader for ppc64 independent from kernel and libc headers. * add -nostdinc -isystem $gccincludes to not include libc headers * declare all functions in header files, also the stuff from string.S * declare some functions static * use stddef.h to get size_t (hopefully ok) * remove ppc32-types.h, only elf.h used the __NN types With further modifications by Paul Mackerras and Stephen Rothwell. Signed-off-by: Olaf Hering Signed-off-by: Paul Mackerras --- arch/ppc64/boot/Makefile | 4 +- arch/ppc64/boot/addnote.c | 4 +- arch/ppc64/boot/crt0.S | 2 +- arch/ppc64/boot/div64.S | 2 +- arch/ppc64/boot/elf.h | 149 ++++++++++++++++++++++++++ arch/ppc64/boot/main.c | 51 ++++----- arch/ppc64/boot/page.h | 34 ++++++ arch/ppc64/boot/ppc32-types.h | 36 ------- arch/ppc64/boot/ppc_asm.h | 62 +++++++++++ arch/ppc64/boot/prom.c | 196 +++++----------------------------- arch/ppc64/boot/prom.h | 18 ++++ arch/ppc64/boot/stdio.h | 16 +++ arch/ppc64/boot/string.S | 2 +- arch/ppc64/boot/string.h | 16 +++ arch/ppc64/boot/zlib.c | 2 +- 15 files changed, 348 insertions(+), 246 deletions(-) create mode 100644 arch/ppc64/boot/elf.h create mode 100644 arch/ppc64/boot/page.h delete mode 100644 arch/ppc64/boot/ppc32-types.h create mode 100644 arch/ppc64/boot/ppc_asm.h create mode 100644 arch/ppc64/boot/prom.h create mode 100644 arch/ppc64/boot/stdio.h create mode 100644 arch/ppc64/boot/string.h diff --git a/arch/ppc64/boot/Makefile b/arch/ppc64/boot/Makefile index 683b2d43c15f..2c5f5e73d00c 100644 --- a/arch/ppc64/boot/Makefile +++ b/arch/ppc64/boot/Makefile @@ -22,8 +22,8 @@ HOSTCC := gcc -BOOTCFLAGS := $(HOSTCFLAGS) $(LINUXINCLUDE) -fno-builtin -BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional +BOOTCFLAGS := $(HOSTCFLAGS) -fno-builtin -nostdinc -isystem $(shell $(CROSS32CC) -print-file-name=include) +BOOTAFLAGS := -D__ASSEMBLY__ $(BOOTCFLAGS) -traditional -nostdinc BOOTLFLAGS := -Ttext 0x00400000 -e _start -T $(srctree)/$(src)/zImage.lds OBJCOPYFLAGS := contents,alloc,load,readonly,data diff --git a/arch/ppc64/boot/addnote.c b/arch/ppc64/boot/addnote.c index 719663a694bb..8041a9845ab7 100644 --- a/arch/ppc64/boot/addnote.c +++ b/arch/ppc64/boot/addnote.c @@ -157,7 +157,7 @@ main(int ac, char **av) PUT_32BE(ns, strlen(arch) + 1); PUT_32BE(ns + 4, N_DESCR * 4); PUT_32BE(ns + 8, 0x1275); - strcpy(&buf[ns + 12], arch); + strcpy((char *) &buf[ns + 12], arch); ns += 12 + strlen(arch) + 1; for (i = 0; i < N_DESCR; ++i, ns += 4) PUT_32BE(ns, descr[i]); @@ -172,7 +172,7 @@ main(int ac, char **av) PUT_32BE(ns, strlen(rpaname) + 1); PUT_32BE(ns + 4, sizeof(rpanote)); PUT_32BE(ns + 8, 0x12759999); - strcpy(&buf[ns + 12], rpaname); + strcpy((char *) &buf[ns + 12], rpaname); ns += 12 + ROUNDUP(strlen(rpaname) + 1); for (i = 0; i < N_RPA_DESCR; ++i, ns += 4) PUT_32BE(ns, rpanote[i]); diff --git a/arch/ppc64/boot/crt0.S b/arch/ppc64/boot/crt0.S index 04d3e74cd72f..3861e7f9cf19 100644 --- a/arch/ppc64/boot/crt0.S +++ b/arch/ppc64/boot/crt0.S @@ -9,7 +9,7 @@ * NOTE: this code runs in 32 bit mode and is packaged as ELF32. */ -#include +#include "ppc_asm.h" .text .globl _start diff --git a/arch/ppc64/boot/div64.S b/arch/ppc64/boot/div64.S index 38f7e466d7d6..722f360a32a9 100644 --- a/arch/ppc64/boot/div64.S +++ b/arch/ppc64/boot/div64.S @@ -13,7 +13,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include +#include "ppc_asm.h" .globl __div64_32 __div64_32: diff --git a/arch/ppc64/boot/elf.h b/arch/ppc64/boot/elf.h new file mode 100644 index 000000000000..d4828fcf1cb9 --- /dev/null +++ b/arch/ppc64/boot/elf.h @@ -0,0 +1,149 @@ +#ifndef _PPC_BOOT_ELF_H_ +#define _PPC_BOOT_ELF_H_ + +/* 32-bit ELF base types. */ +typedef unsigned int Elf32_Addr; +typedef unsigned short Elf32_Half; +typedef unsigned int Elf32_Off; +typedef signed int Elf32_Sword; +typedef unsigned int Elf32_Word; + +/* 64-bit ELF base types. */ +typedef unsigned long long Elf64_Addr; +typedef unsigned short Elf64_Half; +typedef signed short Elf64_SHalf; +typedef unsigned long long Elf64_Off; +typedef signed int Elf64_Sword; +typedef unsigned int Elf64_Word; +typedef unsigned long long Elf64_Xword; +typedef signed long long Elf64_Sxword; + +/* These constants are for the segment types stored in the image headers */ +#define PT_NULL 0 +#define PT_LOAD 1 +#define PT_DYNAMIC 2 +#define PT_INTERP 3 +#define PT_NOTE 4 +#define PT_SHLIB 5 +#define PT_PHDR 6 +#define PT_TLS 7 /* Thread local storage segment */ +#define PT_LOOS 0x60000000 /* OS-specific */ +#define PT_HIOS 0x6fffffff /* OS-specific */ +#define PT_LOPROC 0x70000000 +#define PT_HIPROC 0x7fffffff +#define PT_GNU_EH_FRAME 0x6474e550 + +#define PT_GNU_STACK (PT_LOOS + 0x474e551) + +/* These constants define the different elf file types */ +#define ET_NONE 0 +#define ET_REL 1 +#define ET_EXEC 2 +#define ET_DYN 3 +#define ET_CORE 4 +#define ET_LOPROC 0xff00 +#define ET_HIPROC 0xffff + +/* These constants define the various ELF target machines */ +#define EM_NONE 0 +#define EM_PPC 20 /* PowerPC */ +#define EM_PPC64 21 /* PowerPC64 */ + +#define EI_NIDENT 16 + +typedef struct elf32_hdr { + unsigned char e_ident[EI_NIDENT]; + Elf32_Half e_type; + Elf32_Half e_machine; + Elf32_Word e_version; + Elf32_Addr e_entry; /* Entry point */ + Elf32_Off e_phoff; + Elf32_Off e_shoff; + Elf32_Word e_flags; + Elf32_Half e_ehsize; + Elf32_Half e_phentsize; + Elf32_Half e_phnum; + Elf32_Half e_shentsize; + Elf32_Half e_shnum; + Elf32_Half e_shstrndx; +} Elf32_Ehdr; + +typedef struct elf64_hdr { + unsigned char e_ident[16]; /* ELF "magic number" */ + Elf64_Half e_type; + Elf64_Half e_machine; + Elf64_Word e_version; + Elf64_Addr e_entry; /* Entry point virtual address */ + Elf64_Off e_phoff; /* Program header table file offset */ + Elf64_Off e_shoff; /* Section header table file offset */ + Elf64_Word e_flags; + Elf64_Half e_ehsize; + Elf64_Half e_phentsize; + Elf64_Half e_phnum; + Elf64_Half e_shentsize; + Elf64_Half e_shnum; + Elf64_Half e_shstrndx; +} Elf64_Ehdr; + +/* These constants define the permissions on sections in the program + header, p_flags. */ +#define PF_R 0x4 +#define PF_W 0x2 +#define PF_X 0x1 + +typedef struct elf32_phdr { + Elf32_Word p_type; + Elf32_Off p_offset; + Elf32_Addr p_vaddr; + Elf32_Addr p_paddr; + Elf32_Word p_filesz; + Elf32_Word p_memsz; + Elf32_Word p_flags; + Elf32_Word p_align; +} Elf32_Phdr; + +typedef struct elf64_phdr { + Elf64_Word p_type; + Elf64_Word p_flags; + Elf64_Off p_offset; /* Segment file offset */ + Elf64_Addr p_vaddr; /* Segment virtual address */ + Elf64_Addr p_paddr; /* Segment physical address */ + Elf64_Xword p_filesz; /* Segment size in file */ + Elf64_Xword p_memsz; /* Segment size in memory */ + Elf64_Xword p_align; /* Segment alignment, file & memory */ +} Elf64_Phdr; + +#define EI_MAG0 0 /* e_ident[] indexes */ +#define EI_MAG1 1 +#define EI_MAG2 2 +#define EI_MAG3 3 +#define EI_CLASS 4 +#define EI_DATA 5 +#define EI_VERSION 6 +#define EI_OSABI 7 +#define EI_PAD 8 + +#define ELFMAG0 0x7f /* EI_MAG */ +#define ELFMAG1 'E' +#define ELFMAG2 'L' +#define ELFMAG3 'F' +#define ELFMAG "\177ELF" +#define SELFMAG 4 + +#define ELFCLASSNONE 0 /* EI_CLASS */ +#define ELFCLASS32 1 +#define ELFCLASS64 2 +#define ELFCLASSNUM 3 + +#define ELFDATANONE 0 /* e_ident[EI_DATA] */ +#define ELFDATA2LSB 1 +#define ELFDATA2MSB 2 + +#define EV_NONE 0 /* e_version, EI_VERSION */ +#define EV_CURRENT 1 +#define EV_NUM 2 + +#define ELFOSABI_NONE 0 +#define ELFOSABI_LINUX 3 + +#endif /* _PPC_BOOT_ELF_H_ */ diff --git a/arch/ppc64/boot/main.c b/arch/ppc64/boot/main.c index 199d9804f61c..99e68cfbe688 100644 --- a/arch/ppc64/boot/main.c +++ b/arch/ppc64/boot/main.c @@ -8,36 +8,28 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include "ppc32-types.h" +#include +#include +#include "elf.h" +#include "page.h" +#include "string.h" +#include "stdio.h" +#include "prom.h" #include "zlib.h" -#include -#include -#include -#include -extern void *finddevice(const char *); -extern int getprop(void *, const char *, void *, int); -extern void printf(const char *fmt, ...); -extern int sprintf(char *buf, const char *fmt, ...); -void gunzip(void *, int, unsigned char *, int *); -void *claim(unsigned int, unsigned int, unsigned int); -void flush_cache(void *, unsigned long); -void pause(void); -extern void exit(void); +static void gunzip(void *, int, unsigned char *, int *); +extern void flush_cache(void *, unsigned long); -unsigned long strlen(const char *s); -void *memmove(void *dest, const void *src, unsigned long n); -void *memcpy(void *dest, const void *src, unsigned long n); /* Value picked to match that used by yaboot */ #define PROG_START 0x01400000 #define RAM_END (256<<20) // Fixme: use OF */ -char *avail_ram; -char *begin_avail, *end_avail; -char *avail_high; -unsigned int heap_use; -unsigned int heap_max; +static char *avail_ram; +static char *begin_avail, *end_avail; +static char *avail_high; +static unsigned int heap_use; +static unsigned int heap_max; extern char _start[]; extern char _vmlinux_start[]; @@ -52,9 +44,9 @@ struct addr_range { unsigned long size; unsigned long memsize; }; -struct addr_range vmlinux = {0, 0, 0}; -struct addr_range vmlinuz = {0, 0, 0}; -struct addr_range initrd = {0, 0, 0}; +static struct addr_range vmlinux = {0, 0, 0}; +static struct addr_range vmlinuz = {0, 0, 0}; +static struct addr_range initrd = {0, 0, 0}; static char scratch[128<<10]; /* 128kB of scratch space for gunzip */ @@ -64,13 +56,6 @@ typedef void (*kernel_entry_t)( unsigned long, void *); -int (*prom)(void *); - -void *chosen_handle; -void *stdin; -void *stdout; -void *stderr; - #undef DEBUG static unsigned long claim_base = PROG_START; @@ -277,7 +262,7 @@ void zfree(void *x, void *addr, unsigned nb) #define DEFLATED 8 -void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) +static void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp) { z_stream s; int r, i, flags; diff --git a/arch/ppc64/boot/page.h b/arch/ppc64/boot/page.h new file mode 100644 index 000000000000..14eca30fef64 --- /dev/null +++ b/arch/ppc64/boot/page.h @@ -0,0 +1,34 @@ +#ifndef _PPC_BOOT_PAGE_H +#define _PPC_BOOT_PAGE_H +/* + * Copyright (C) 2001 PPC64 Team, IBM Corp + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifdef __ASSEMBLY__ +#define ASM_CONST(x) x +#else +#define __ASM_CONST(x) x##UL +#define ASM_CONST(x) __ASM_CONST(x) +#endif + +/* PAGE_SHIFT determines the page size */ +#define PAGE_SHIFT 12 +#define PAGE_SIZE (ASM_CONST(1) << PAGE_SHIFT) +#define PAGE_MASK (~(PAGE_SIZE-1)) + +/* align addr on a size boundary - adjust address up/down if needed */ +#define _ALIGN_UP(addr,size) (((addr)+((size)-1))&(~((size)-1))) +#define _ALIGN_DOWN(addr,size) ((addr)&(~((size)-1))) + +/* align addr on a size boundary - adjust address up if needed */ +#define _ALIGN(addr,size) _ALIGN_UP(addr,size) + +/* to align the pointer to the (next) page boundary */ +#define PAGE_ALIGN(addr) _ALIGN(addr, PAGE_SIZE) + +#endif /* _PPC_BOOT_PAGE_H */ diff --git a/arch/ppc64/boot/ppc32-types.h b/arch/ppc64/boot/ppc32-types.h deleted file mode 100644 index f7b8884f8f70..000000000000 --- a/arch/ppc64/boot/ppc32-types.h +++ /dev/null @@ -1,36 +0,0 @@ -#ifndef _PPC64_TYPES_H -#define _PPC64_TYPES_H - -typedef __signed__ char __s8; -typedef unsigned char __u8; - -typedef __signed__ short __s16; -typedef unsigned short __u16; - -typedef __signed__ int __s32; -typedef unsigned int __u32; - -typedef __signed__ long long __s64; -typedef unsigned long long __u64; - -typedef signed char s8; -typedef unsigned char u8; - -typedef signed short s16; -typedef unsigned short u16; - -typedef signed int s32; -typedef unsigned int u32; - -typedef signed long long s64; -typedef unsigned long long u64; - -typedef struct { - __u32 u[4]; -} __attribute((aligned(16))) __vector128; - -#define BITS_PER_LONG 32 - -typedef __vector128 vector128; - -#endif /* _PPC64_TYPES_H */ diff --git a/arch/ppc64/boot/ppc_asm.h b/arch/ppc64/boot/ppc_asm.h new file mode 100644 index 000000000000..1c2c2817f9b7 --- /dev/null +++ b/arch/ppc64/boot/ppc_asm.h @@ -0,0 +1,62 @@ +#ifndef _PPC64_PPC_ASM_H +#define _PPC64_PPC_ASM_H +/* + * + * Definitions used by various bits of low-level assembly code on PowerPC. + * + * Copyright (C) 1995-1999 Gary Thomas, Paul Mackerras, Cort Dougan. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +/* Condition Register Bit Fields */ + +#define cr0 0 +#define cr1 1 +#define cr2 2 +#define cr3 3 +#define cr4 4 +#define cr5 5 +#define cr6 6 +#define cr7 7 + + +/* General Purpose Registers (GPRs) */ + +#define r0 0 +#define r1 1 +#define r2 2 +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 +#define r8 8 +#define r9 9 +#define r10 10 +#define r11 11 +#define r12 12 +#define r13 13 +#define r14 14 +#define r15 15 +#define r16 16 +#define r17 17 +#define r18 18 +#define r19 19 +#define r20 20 +#define r21 21 +#define r22 22 +#define r23 23 +#define r24 24 +#define r25 25 +#define r26 26 +#define r27 27 +#define r28 28 +#define r29 29 +#define r30 30 +#define r31 31 + +#endif /* _PPC64_PPC_ASM_H */ diff --git a/arch/ppc64/boot/prom.c b/arch/ppc64/boot/prom.c index 5e48b80ff5a0..4bea2f4dcb06 100644 --- a/arch/ppc64/boot/prom.c +++ b/arch/ppc64/boot/prom.c @@ -7,43 +7,19 @@ * 2 of the License, or (at your option) any later version. */ #include -#include -#include -#include - -extern __u32 __div64_32(unsigned long long *dividend, __u32 divisor); - -/* The unnecessary pointer compare is there - * to check for type safety (n must be 64bit) - */ -# define do_div(n,base) ({ \ - __u32 __base = (base); \ - __u32 __rem; \ - (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ - if (((n) >> 32) == 0) { \ - __rem = (__u32)(n) % __base; \ - (n) = (__u32)(n) / __base; \ - } else \ - __rem = __div64_32(&(n), __base); \ - __rem; \ - }) +#include +#include "string.h" +#include "stdio.h" +#include "prom.h" int (*prom)(void *); void *chosen_handle; + void *stdin; void *stdout; void *stderr; -void exit(void); -void *finddevice(const char *name); -int getprop(void *phandle, const char *name, void *buf, int buflen); -void chrpboot(int a1, int a2, void *prom); /* in main.c */ - -int printf(char *fmt, ...); - -/* there is no convenient header to get this from... -- paulus */ -extern unsigned long strlen(const char *); int write(void *handle, void *ptr, int nb) @@ -210,107 +186,6 @@ fputs(char *str, void *f) return write(f, str, n) == n? 0: -1; } -int -readchar(void) -{ - char ch; - - for (;;) { - switch (read(stdin, &ch, 1)) { - case 1: - return ch; - case -1: - printf("read(stdin) returned -1\r\n"); - return -1; - } - } -} - -static char line[256]; -static char *lineptr; -static int lineleft; - -int -getchar(void) -{ - int c; - - if (lineleft == 0) { - lineptr = line; - for (;;) { - c = readchar(); - if (c == -1 || c == 4) - break; - if (c == '\r' || c == '\n') { - *lineptr++ = '\n'; - putchar('\n'); - break; - } - switch (c) { - case 0177: - case '\b': - if (lineptr > line) { - putchar('\b'); - putchar(' '); - putchar('\b'); - --lineptr; - } - break; - case 'U' & 0x1F: - while (lineptr > line) { - putchar('\b'); - putchar(' '); - putchar('\b'); - --lineptr; - } - break; - default: - if (lineptr >= &line[sizeof(line) - 1]) - putchar('\a'); - else { - putchar(c); - *lineptr++ = c; - } - } - } - lineleft = lineptr - line; - lineptr = line; - } - if (lineleft == 0) - return -1; - --lineleft; - return *lineptr++; -} - - - -/* String functions lifted from lib/vsprintf.c and lib/ctype.c */ -unsigned char _ctype[] = { -_C,_C,_C,_C,_C,_C,_C,_C, /* 0-7 */ -_C,_C|_S,_C|_S,_C|_S,_C|_S,_C|_S,_C,_C, /* 8-15 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 16-23 */ -_C,_C,_C,_C,_C,_C,_C,_C, /* 24-31 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P, /* 32-39 */ -_P,_P,_P,_P,_P,_P,_P,_P, /* 40-47 */ -_D,_D,_D,_D,_D,_D,_D,_D, /* 48-55 */ -_D,_D,_P,_P,_P,_P,_P,_P, /* 56-63 */ -_P,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U|_X,_U, /* 64-71 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 72-79 */ -_U,_U,_U,_U,_U,_U,_U,_U, /* 80-87 */ -_U,_U,_U,_P,_P,_P,_P,_P, /* 88-95 */ -_P,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L|_X,_L, /* 96-103 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 104-111 */ -_L,_L,_L,_L,_L,_L,_L,_L, /* 112-119 */ -_L,_L,_L,_P,_P,_P,_P,_C, /* 120-127 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 128-143 */ -0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, /* 144-159 */ -_S|_SP,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 160-175 */ -_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P,_P, /* 176-191 */ -_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U,_U, /* 192-207 */ -_U,_U,_U,_U,_U,_U,_U,_P,_U,_U,_U,_U,_U,_U,_U,_L, /* 208-223 */ -_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L,_L, /* 224-239 */ -_L,_L,_L,_L,_L,_L,_L,_P,_L,_L,_L,_L,_L,_L,_L,_L}; /* 240-255 */ - size_t strnlen(const char * s, size_t count) { const char *sc; @@ -320,44 +195,30 @@ size_t strnlen(const char * s, size_t count) return sc - s; } -unsigned long simple_strtoul(const char *cp,char **endp,unsigned int base) -{ - unsigned long result = 0,value; +extern unsigned int __div64_32(unsigned long long *dividend, + unsigned int divisor); - if (!base) { - base = 10; - if (*cp == '0') { - base = 8; - cp++; - if ((*cp == 'x') && isxdigit(cp[1])) { - cp++; - base = 16; - } - } - } - while (isxdigit(*cp) && - (value = isdigit(*cp) ? *cp-'0' : toupper(*cp)-'A'+10) < base) { - result = result*base + value; - cp++; - } - if (endp) - *endp = (char *)cp; - return result; -} - -long simple_strtol(const char *cp,char **endp,unsigned int base) -{ - if(*cp=='-') - return -simple_strtoul(cp+1,endp,base); - return simple_strtoul(cp,endp,base); -} +/* The unnecessary pointer compare is there + * to check for type safety (n must be 64bit) + */ +# define do_div(n,base) ({ \ + unsigned int __base = (base); \ + unsigned int __rem; \ + (void)(((typeof((n)) *)0) == ((unsigned long long *)0)); \ + if (((n) >> 32) == 0) { \ + __rem = (unsigned int)(n) % __base; \ + (n) = (unsigned int)(n) / __base; \ + } else \ + __rem = __div64_32(&(n), __base); \ + __rem; \ + }) static int skip_atoi(const char **s) { - int i=0; + int i, c; - while (isdigit(**s)) - i = i*10 + *((*s)++) - '0'; + for (i = 0; '0' <= (c = **s) && c <= '9'; ++*s) + i = i*10 + c - '0'; return i; } @@ -436,9 +297,6 @@ static char * number(char * str, unsigned long long num, int base, int size, int return str; } -/* Forward decl. needed for IP address printing stuff... */ -int sprintf(char * buf, const char *fmt, ...); - int vsprintf(char *buf, const char *fmt, va_list args) { int len; @@ -477,7 +335,7 @@ int vsprintf(char *buf, const char *fmt, va_list args) /* get field width */ field_width = -1; - if (isdigit(*fmt)) + if ('0' <= *fmt && *fmt <= '9') field_width = skip_atoi(&fmt); else if (*fmt == '*') { ++fmt; @@ -493,7 +351,7 @@ int vsprintf(char *buf, const char *fmt, va_list args) precision = -1; if (*fmt == '.') { ++fmt; - if (isdigit(*fmt)) + if ('0' <= *fmt && *fmt <= '9') precision = skip_atoi(&fmt); else if (*fmt == '*') { ++fmt; @@ -628,7 +486,7 @@ int sprintf(char * buf, const char *fmt, ...) static char sprint_buf[1024]; int -printf(char *fmt, ...) +printf(const char *fmt, ...) { va_list args; int n; diff --git a/arch/ppc64/boot/prom.h b/arch/ppc64/boot/prom.h new file mode 100644 index 000000000000..96ab5aec740c --- /dev/null +++ b/arch/ppc64/boot/prom.h @@ -0,0 +1,18 @@ +#ifndef _PPC_BOOT_PROM_H_ +#define _PPC_BOOT_PROM_H_ + +extern int (*prom) (void *); +extern void *chosen_handle; + +extern void *stdin; +extern void *stdout; +extern void *stderr; + +extern int write(void *handle, void *ptr, int nb); +extern int read(void *handle, void *ptr, int nb); +extern void exit(void); +extern void pause(void); +extern void *finddevice(const char *); +extern void *claim(unsigned long virt, unsigned long size, unsigned long align); +extern int getprop(void *phandle, const char *name, void *buf, int buflen); +#endif /* _PPC_BOOT_PROM_H_ */ diff --git a/arch/ppc64/boot/stdio.h b/arch/ppc64/boot/stdio.h new file mode 100644 index 000000000000..24bd3a8dee94 --- /dev/null +++ b/arch/ppc64/boot/stdio.h @@ -0,0 +1,16 @@ +#ifndef _PPC_BOOT_STDIO_H_ +#define _PPC_BOOT_STDIO_H_ + +extern int printf(const char *fmt, ...); + +extern int sprintf(char *buf, const char *fmt, ...); + +extern int vsprintf(char *buf, const char *fmt, va_list args); + +extern int putc(int c, void *f); +extern int putchar(int c); +extern int getchar(void); + +extern int fputs(char *str, void *f); + +#endif /* _PPC_BOOT_STDIO_H_ */ diff --git a/arch/ppc64/boot/string.S b/arch/ppc64/boot/string.S index ba5f2d21c9ea..7ade87ae7718 100644 --- a/arch/ppc64/boot/string.S +++ b/arch/ppc64/boot/string.S @@ -9,7 +9,7 @@ * NOTE: this code runs in 32 bit mode and is packaged as ELF32. */ -#include +#include "ppc_asm.h" .text .globl strcpy diff --git a/arch/ppc64/boot/string.h b/arch/ppc64/boot/string.h new file mode 100644 index 000000000000..9289258bcbd6 --- /dev/null +++ b/arch/ppc64/boot/string.h @@ -0,0 +1,16 @@ +#ifndef _PPC_BOOT_STRING_H_ +#define _PPC_BOOT_STRING_H_ + +extern char *strcpy(char *dest, const char *src); +extern char *strncpy(char *dest, const char *src, size_t n); +extern char *strcat(char *dest, const char *src); +extern int strcmp(const char *s1, const char *s2); +extern size_t strlen(const char *s); +extern size_t strnlen(const char *s, size_t count); + +extern void *memset(void *s, int c, size_t n); +extern void *memmove(void *dest, const void *src, unsigned long n); +extern void *memcpy(void *dest, const void *src, unsigned long n); +extern int memcmp(const void *s1, const void *s2, size_t n); + +#endif /* _PPC_BOOT_STRING_H_ */ diff --git a/arch/ppc64/boot/zlib.c b/arch/ppc64/boot/zlib.c index 78837e884b8b..0d910cd2079d 100644 --- a/arch/ppc64/boot/zlib.c +++ b/arch/ppc64/boot/zlib.c @@ -107,7 +107,7 @@ extern void *memcpy(void *, const void *, unsigned long); /* Diagnostic functions */ #ifdef DEBUG_ZLIB -# include +# include "stdio.h" # ifndef verbose # define verbose 0 # endif From e28f7faf05159f1cfd564596f5e6178edba6bd49 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 5 Aug 2005 19:39:06 +1000 Subject: [PATCH 02/37] [PATCH] Four level pagetables for ppc64 Implement 4-level pagetables for ppc64 This patch implements full four-level page tables for ppc64, thereby extending the usable user address range to 44 bits (16T). The patch uses a full page for the tables at the bottom and top level, and a quarter page for the intermediate levels. It uses full 64-bit pointers at every level, thus also increasing the addressable range of physical memory. This patch also tweaks the VSID allocation to allow matching range for user addresses (this halves the number of available contexts) and adds some #if and BUILD_BUG sanity checks. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/mm/hash_utils.c | 2 +- arch/ppc64/mm/hugetlbpage.c | 207 ++++++++++++---------------------- arch/ppc64/mm/imalloc.c | 2 +- arch/ppc64/mm/init.c | 62 ++++++---- arch/ppc64/mm/slb_low.S | 2 +- arch/ppc64/mm/tlb.c | 95 +++++++++------- include/asm-ppc64/imalloc.h | 2 +- include/asm-ppc64/mmu.h | 7 +- include/asm-ppc64/page.h | 26 +++-- include/asm-ppc64/pgalloc.h | 93 +++++++++------ include/asm-ppc64/pgtable.h | 92 ++++++++------- include/asm-ppc64/processor.h | 4 +- 12 files changed, 305 insertions(+), 289 deletions(-) diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 623b5d130c31..65d6e8527948 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -302,7 +302,7 @@ int hash_page(unsigned long ea, unsigned long access, unsigned long trap) int local = 0; cpumask_t tmp; - if ((ea & ~REGION_MASK) > EADDR_MASK) + if ((ea & ~REGION_MASK) >= PGTABLE_RANGE) return 1; switch (REGION_ID(ea)) { diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index f9524602818d..a13e44230a6f 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -27,124 +27,91 @@ #include -#define HUGEPGDIR_SHIFT (HPAGE_SHIFT + PAGE_SHIFT - 3) -#define HUGEPGDIR_SIZE (1UL << HUGEPGDIR_SHIFT) -#define HUGEPGDIR_MASK (~(HUGEPGDIR_SIZE-1)) - -#define HUGEPTE_INDEX_SIZE 9 -#define HUGEPGD_INDEX_SIZE 10 - -#define PTRS_PER_HUGEPTE (1 << HUGEPTE_INDEX_SIZE) -#define PTRS_PER_HUGEPGD (1 << HUGEPGD_INDEX_SIZE) - -static inline int hugepgd_index(unsigned long addr) +/* Modelled after find_linux_pte() */ +pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { - return (addr & ~REGION_MASK) >> HUGEPGDIR_SHIFT; -} + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + pte_t *pt; -static pud_t *hugepgd_offset(struct mm_struct *mm, unsigned long addr) -{ - int index; - - if (! mm->context.huge_pgdir) - return NULL; - - - index = hugepgd_index(addr); - BUG_ON(index >= PTRS_PER_HUGEPGD); - return (pud_t *)(mm->context.huge_pgdir + index); -} - -static inline pte_t *hugepte_offset(pud_t *dir, unsigned long addr) -{ - int index; - - if (pud_none(*dir)) - return NULL; - - index = (addr >> HPAGE_SHIFT) % PTRS_PER_HUGEPTE; - return (pte_t *)pud_page(*dir) + index; -} - -static pud_t *hugepgd_alloc(struct mm_struct *mm, unsigned long addr) -{ BUG_ON(! in_hugepage_area(mm->context, addr)); - if (! mm->context.huge_pgdir) { - pgd_t *new; - spin_unlock(&mm->page_table_lock); - /* Don't use pgd_alloc(), because we want __GFP_REPEAT */ - new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); - BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); - spin_lock(&mm->page_table_lock); + addr &= HPAGE_MASK; - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (mm->context.huge_pgdir) - pgd_free(new); - else - mm->context.huge_pgdir = new; - } - return hugepgd_offset(mm, addr); -} - -static pte_t *hugepte_alloc(struct mm_struct *mm, pud_t *dir, unsigned long addr) -{ - if (! pud_present(*dir)) { - pte_t *new; - - spin_unlock(&mm->page_table_lock); - new = kmem_cache_alloc(zero_cache, GFP_KERNEL | __GFP_REPEAT); - BUG_ON(memcmp(new, empty_zero_page, PAGE_SIZE)); - spin_lock(&mm->page_table_lock); - /* - * Because we dropped the lock, we should re-check the - * entry, as somebody else could have populated it.. - */ - if (pud_present(*dir)) { - if (new) - kmem_cache_free(zero_cache, new); - } else { - struct page *ptepage; - - if (! new) - return NULL; - ptepage = virt_to_page(new); - ptepage->mapping = (void *) mm; - ptepage->index = addr & HUGEPGDIR_MASK; - pud_populate(mm, dir, new); + pg = pgd_offset(mm, addr); + if (!pgd_none(*pg)) { + pu = pud_offset(pg, addr); + if (!pud_none(*pu)) { + pm = pmd_offset(pu, addr); + pt = (pte_t *)pm; + BUG_ON(!pmd_none(*pm) + && !(pte_present(*pt) && pte_huge(*pt))); + return pt; } } - return hugepte_offset(dir, addr); -} - -pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) -{ - pud_t *pud; - - BUG_ON(! in_hugepage_area(mm->context, addr)); - - pud = hugepgd_offset(mm, addr); - if (! pud) - return NULL; - - return hugepte_offset(pud, addr); + return NULL; } pte_t *huge_pte_alloc(struct mm_struct *mm, unsigned long addr) { - pud_t *pud; + pgd_t *pg; + pud_t *pu; + pmd_t *pm; + pte_t *pt; BUG_ON(! in_hugepage_area(mm->context, addr)); - pud = hugepgd_alloc(mm, addr); - if (! pud) - return NULL; + addr &= HPAGE_MASK; - return hugepte_alloc(mm, pud, addr); + pg = pgd_offset(mm, addr); + pu = pud_alloc(mm, pg, addr); + + if (pu) { + pm = pmd_alloc(mm, pu, addr); + if (pm) { + pt = (pte_t *)pm; + BUG_ON(!pmd_none(*pm) + && !(pte_present(*pt) && pte_huge(*pt))); + return pt; + } + } + + return NULL; +} + +#define HUGEPTE_BATCH_SIZE (HPAGE_SIZE / PMD_SIZE) + +void set_huge_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + int i; + + if (pte_present(*ptep)) { + pte_clear(mm, addr, ptep); + flush_tlb_pending(); + } + + for (i = 0; i < HUGEPTE_BATCH_SIZE; i++) { + *ptep = __pte(pte_val(pte) & ~_PAGE_HPTEFLAGS); + ptep++; + } +} + +pte_t huge_ptep_get_and_clear(struct mm_struct *mm, unsigned long addr, + pte_t *ptep) +{ + unsigned long old = pte_update(ptep, ~0UL); + int i; + + if (old & _PAGE_HASHPTE) + hpte_update(mm, addr, old, 0); + + for (i = 1; i < HUGEPTE_BATCH_SIZE; i++) + ptep[i] = __pte(0); + + return __pte(old); } /* @@ -541,42 +508,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, } } -void hugetlb_mm_free_pgd(struct mm_struct *mm) -{ - int i; - pgd_t *pgdir; - - spin_lock(&mm->page_table_lock); - - pgdir = mm->context.huge_pgdir; - if (! pgdir) - goto out; - - mm->context.huge_pgdir = NULL; - - /* cleanup any hugepte pages leftover */ - for (i = 0; i < PTRS_PER_HUGEPGD; i++) { - pud_t *pud = (pud_t *)(pgdir + i); - - if (! pud_none(*pud)) { - pte_t *pte = (pte_t *)pud_page(*pud); - struct page *ptepage = virt_to_page(pte); - - ptepage->mapping = NULL; - - BUG_ON(memcmp(pte, empty_zero_page, PAGE_SIZE)); - kmem_cache_free(zero_cache, pte); - } - pud_clear(pud); - } - - BUG_ON(memcmp(pgdir, empty_zero_page, PAGE_SIZE)); - kmem_cache_free(zero_cache, pgdir); - - out: - spin_unlock(&mm->page_table_lock); -} - int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local) { diff --git a/arch/ppc64/mm/imalloc.c b/arch/ppc64/mm/imalloc.c index b6e75b891ac0..c65b87b92756 100644 --- a/arch/ppc64/mm/imalloc.c +++ b/arch/ppc64/mm/imalloc.c @@ -31,7 +31,7 @@ static int get_free_im_addr(unsigned long size, unsigned long *im_addr) break; if ((unsigned long)tmp->addr >= ioremap_bot) addr = tmp->size + (unsigned long) tmp->addr; - if (addr > IMALLOC_END-size) + if (addr >= IMALLOC_END-size) return 1; } *im_addr = addr; diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index e58a24d42879..87f256df8de5 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -66,6 +66,14 @@ #include #include +#if PGTABLE_RANGE > USER_VSID_RANGE +#warning Limited user VSID range means pagetable space is wasted +#endif + +#if (TASK_SIZE_USER64 < PGTABLE_RANGE) && (TASK_SIZE_USER64 < USER_VSID_RANGE) +#warning TASK_SIZE is smaller than it needs to be. +#endif + int mem_init_done; unsigned long ioremap_bot = IMALLOC_BASE; static unsigned long phbs_io_bot = PHBS_IO_BASE; @@ -226,7 +234,7 @@ void __iomem * __ioremap(unsigned long addr, unsigned long size, * Before that, we map using addresses going * up from ioremap_bot. imalloc will use * the addresses from ioremap_bot through - * IMALLOC_END (0xE000001fffffffff) + * IMALLOC_END * */ pa = addr & PAGE_MASK; @@ -417,12 +425,6 @@ int init_new_context(struct task_struct *tsk, struct mm_struct *mm) int index; int err; -#ifdef CONFIG_HUGETLB_PAGE - /* We leave htlb_segs as it was, but for a fork, we need to - * clear the huge_pgdir. */ - mm->context.huge_pgdir = NULL; -#endif - again: if (!idr_pre_get(&mmu_context_idr, GFP_KERNEL)) return -ENOMEM; @@ -453,8 +455,6 @@ void destroy_context(struct mm_struct *mm) spin_unlock(&mmu_context_lock); mm->context.id = NO_CONTEXT; - - hugetlb_mm_free_pgd(mm); } /* @@ -833,23 +833,43 @@ void __iomem * reserve_phb_iospace(unsigned long size) return virt_addr; } -kmem_cache_t *zero_cache; - -static void zero_ctor(void *pte, kmem_cache_t *cache, unsigned long flags) +static void zero_ctor(void *addr, kmem_cache_t *cache, unsigned long flags) { - memset(pte, 0, PAGE_SIZE); + memset(addr, 0, kmem_cache_size(cache)); } +static const int pgtable_cache_size[2] = { + PTE_TABLE_SIZE, PMD_TABLE_SIZE +}; +static const char *pgtable_cache_name[ARRAY_SIZE(pgtable_cache_size)] = { + "pgd_pte_cache", "pud_pmd_cache", +}; + +kmem_cache_t *pgtable_cache[ARRAY_SIZE(pgtable_cache_size)]; + void pgtable_cache_init(void) { - zero_cache = kmem_cache_create("zero", - PAGE_SIZE, - 0, - SLAB_HWCACHE_ALIGN | SLAB_MUST_HWCACHE_ALIGN, - zero_ctor, - NULL); - if (!zero_cache) - panic("pgtable_cache_init(): could not create zero_cache!\n"); + int i; + + BUILD_BUG_ON(PTE_TABLE_SIZE != pgtable_cache_size[PTE_CACHE_NUM]); + BUILD_BUG_ON(PMD_TABLE_SIZE != pgtable_cache_size[PMD_CACHE_NUM]); + BUILD_BUG_ON(PUD_TABLE_SIZE != pgtable_cache_size[PUD_CACHE_NUM]); + BUILD_BUG_ON(PGD_TABLE_SIZE != pgtable_cache_size[PGD_CACHE_NUM]); + + for (i = 0; i < ARRAY_SIZE(pgtable_cache_size); i++) { + int size = pgtable_cache_size[i]; + const char *name = pgtable_cache_name[i]; + + pgtable_cache[i] = kmem_cache_create(name, + size, size, + SLAB_HWCACHE_ALIGN + | SLAB_MUST_HWCACHE_ALIGN, + zero_ctor, + NULL); + if (! pgtable_cache[i]) + panic("pgtable_cache_init(): could not create %s!\n", + name); + } } pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S index 8379d678f70f..f20fc52483a7 100644 --- a/arch/ppc64/mm/slb_low.S +++ b/arch/ppc64/mm/slb_low.S @@ -91,7 +91,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) 0: /* user address: proto-VSID = context<<15 | ESID */ li r11,SLB_VSID_USER - srdi. r9,r3,13 + srdi. r9,r3,USER_ESID_BITS bne- 8f /* invalid ea bits set */ #ifdef CONFIG_HUGETLB_PAGE diff --git a/arch/ppc64/mm/tlb.c b/arch/ppc64/mm/tlb.c index 26f0172c4527..d8a6593a13f0 100644 --- a/arch/ppc64/mm/tlb.c +++ b/arch/ppc64/mm/tlb.c @@ -41,7 +41,58 @@ DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); unsigned long pte_freelist_forced_free; -void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) +struct pte_freelist_batch +{ + struct rcu_head rcu; + unsigned int index; + pgtable_free_t tables[0]; +}; + +DEFINE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); +unsigned long pte_freelist_forced_free; + +#define PTE_FREELIST_SIZE \ + ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) \ + / sizeof(pgtable_free_t)) + +#ifdef CONFIG_SMP +static void pte_free_smp_sync(void *arg) +{ + /* Do nothing, just ensure we sync with all CPUs */ +} +#endif + +/* This is only called when we are critically out of memory + * (and fail to get a page in pte_free_tlb). + */ +static void pgtable_free_now(pgtable_free_t pgf) +{ + pte_freelist_forced_free++; + + smp_call_function(pte_free_smp_sync, NULL, 0, 1); + + pgtable_free(pgf); +} + +static void pte_free_rcu_callback(struct rcu_head *head) +{ + struct pte_freelist_batch *batch = + container_of(head, struct pte_freelist_batch, rcu); + unsigned int i; + + for (i = 0; i < batch->index; i++) + pgtable_free(batch->tables[i]); + + free_page((unsigned long)batch); +} + +static void pte_free_submit(struct pte_freelist_batch *batch) +{ + INIT_RCU_HEAD(&batch->rcu); + call_rcu(&batch->rcu, pte_free_rcu_callback); +} + +void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf) { /* This is safe as we are holding page_table_lock */ cpumask_t local_cpumask = cpumask_of_cpu(smp_processor_id()); @@ -49,19 +100,19 @@ void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage) if (atomic_read(&tlb->mm->mm_users) < 2 || cpus_equal(tlb->mm->cpu_vm_mask, local_cpumask)) { - pte_free(ptepage); + pgtable_free(pgf); return; } if (*batchp == NULL) { *batchp = (struct pte_freelist_batch *)__get_free_page(GFP_ATOMIC); if (*batchp == NULL) { - pte_free_now(ptepage); + pgtable_free_now(pgf); return; } (*batchp)->index = 0; } - (*batchp)->pages[(*batchp)->index++] = ptepage; + (*batchp)->tables[(*batchp)->index++] = pgf; if ((*batchp)->index == PTE_FREELIST_SIZE) { pte_free_submit(*batchp); *batchp = NULL; @@ -132,42 +183,6 @@ void __flush_tlb_pending(struct ppc64_tlb_batch *batch) put_cpu(); } -#ifdef CONFIG_SMP -static void pte_free_smp_sync(void *arg) -{ - /* Do nothing, just ensure we sync with all CPUs */ -} -#endif - -/* This is only called when we are critically out of memory - * (and fail to get a page in pte_free_tlb). - */ -void pte_free_now(struct page *ptepage) -{ - pte_freelist_forced_free++; - - smp_call_function(pte_free_smp_sync, NULL, 0, 1); - - pte_free(ptepage); -} - -static void pte_free_rcu_callback(struct rcu_head *head) -{ - struct pte_freelist_batch *batch = - container_of(head, struct pte_freelist_batch, rcu); - unsigned int i; - - for (i = 0; i < batch->index; i++) - pte_free(batch->pages[i]); - free_page((unsigned long)batch); -} - -void pte_free_submit(struct pte_freelist_batch *batch) -{ - INIT_RCU_HEAD(&batch->rcu); - call_rcu(&batch->rcu, pte_free_rcu_callback); -} - void pte_free_finish(void) { /* This is safe as we are holding page_table_lock */ diff --git a/include/asm-ppc64/imalloc.h b/include/asm-ppc64/imalloc.h index e46ff68a6e41..42adf7033a81 100644 --- a/include/asm-ppc64/imalloc.h +++ b/include/asm-ppc64/imalloc.h @@ -6,7 +6,7 @@ */ #define PHBS_IO_BASE VMALLOC_END #define IMALLOC_BASE (PHBS_IO_BASE + 0x80000000ul) /* Reserve 2 gigs for PHBs */ -#define IMALLOC_END (VMALLOC_START + EADDR_MASK) +#define IMALLOC_END (VMALLOC_START + PGTABLE_RANGE) /* imalloc region types */ diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 70348a851313..959a4bfdcd6a 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -259,8 +259,10 @@ extern void stabs_alloc(void); #define VSID_BITS 36 #define VSID_MODULUS ((1UL<context.htlb_segs) @@ -125,36 +126,42 @@ extern void copy_user_page(void *to, void *from, unsigned long vaddr, struct pag * Entries in the pte table are 64b, while entries in the pgd & pmd are 32b. */ typedef struct { unsigned long pte; } pte_t; -typedef struct { unsigned int pmd; } pmd_t; -typedef struct { unsigned int pgd; } pgd_t; +typedef struct { unsigned long pmd; } pmd_t; +typedef struct { unsigned long pud; } pud_t; +typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) #define pmd_val(x) ((x).pmd) +#define pud_val(x) ((x).pud) #define pgd_val(x) ((x).pgd) #define pgprot_val(x) ((x).pgprot) -#define __pte(x) ((pte_t) { (x) } ) -#define __pmd(x) ((pmd_t) { (x) } ) -#define __pgd(x) ((pgd_t) { (x) } ) -#define __pgprot(x) ((pgprot_t) { (x) } ) +#define __pte(x) ((pte_t) { (x) }) +#define __pmd(x) ((pmd_t) { (x) }) +#define __pud(x) ((pud_t) { (x) }) +#define __pgd(x) ((pgd_t) { (x) }) +#define __pgprot(x) ((pgprot_t) { (x) }) #else /* * .. while these make it easier on the compiler */ typedef unsigned long pte_t; -typedef unsigned int pmd_t; -typedef unsigned int pgd_t; +typedef unsigned long pmd_t; +typedef unsigned long pud_t; +typedef unsigned long pgd_t; typedef unsigned long pgprot_t; #define pte_val(x) (x) #define pmd_val(x) (x) +#define pud_val(x) (x) #define pgd_val(x) (x) #define pgprot_val(x) (x) #define __pte(x) (x) #define __pmd(x) (x) +#define __pud(x) (x) #define __pgd(x) (x) #define __pgprot(x) (x) @@ -208,9 +215,6 @@ extern u64 ppc64_pft_size; /* Log 2 of page table size */ #define USER_REGION_ID (0UL) #define REGION_ID(ea) (((unsigned long)(ea)) >> REGION_SHIFT) -#define __bpn_to_ba(x) ((((unsigned long)(x)) << PAGE_SHIFT) + KERNELBASE) -#define __ba_to_bpn(x) ((((unsigned long)(x)) & ~REGION_MASK) >> PAGE_SHIFT) - #define __va(x) ((void *)((unsigned long)(x) + KERNELBASE)) #ifdef CONFIG_DISCONTIGMEM diff --git a/include/asm-ppc64/pgalloc.h b/include/asm-ppc64/pgalloc.h index 4fc4b739b380..26bc49c1108d 100644 --- a/include/asm-ppc64/pgalloc.h +++ b/include/asm-ppc64/pgalloc.h @@ -6,7 +6,12 @@ #include #include -extern kmem_cache_t *zero_cache; +extern kmem_cache_t *pgtable_cache[]; + +#define PTE_CACHE_NUM 0 +#define PMD_CACHE_NUM 1 +#define PUD_CACHE_NUM 1 +#define PGD_CACHE_NUM 0 /* * This program is free software; you can redistribute it and/or @@ -15,30 +20,40 @@ extern kmem_cache_t *zero_cache; * 2 of the License, or (at your option) any later version. */ -static inline pgd_t * -pgd_alloc(struct mm_struct *mm) +static inline pgd_t *pgd_alloc(struct mm_struct *mm) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL); + return kmem_cache_alloc(pgtable_cache[PGD_CACHE_NUM], GFP_KERNEL); } -static inline void -pgd_free(pgd_t *pgd) +static inline void pgd_free(pgd_t *pgd) { - kmem_cache_free(zero_cache, pgd); + kmem_cache_free(pgtable_cache[PGD_CACHE_NUM], pgd); +} + +#define pgd_populate(MM, PGD, PUD) pgd_set(PGD, PUD) + +static inline pud_t *pud_alloc_one(struct mm_struct *mm, unsigned long addr) +{ + return kmem_cache_alloc(pgtable_cache[PUD_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); +} + +static inline void pud_free(pud_t *pud) +{ + kmem_cache_free(pgtable_cache[PUD_CACHE_NUM], pud); } #define pud_populate(MM, PUD, PMD) pud_set(PUD, PMD) -static inline pmd_t * -pmd_alloc_one(struct mm_struct *mm, unsigned long addr) +static inline pmd_t *pmd_alloc_one(struct mm_struct *mm, unsigned long addr) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache[PMD_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); } -static inline void -pmd_free(pmd_t *pmd) +static inline void pmd_free(pmd_t *pmd) { - kmem_cache_free(zero_cache, pmd); + kmem_cache_free(pgtable_cache[PMD_CACHE_NUM], pmd); } #define pmd_populate_kernel(mm, pmd, pte) pmd_set(pmd, pte) @@ -47,44 +62,58 @@ pmd_free(pmd_t *pmd) static inline pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - return kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); + return kmem_cache_alloc(pgtable_cache[PTE_CACHE_NUM], + GFP_KERNEL|__GFP_REPEAT); } static inline struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) { - pte_t *pte = kmem_cache_alloc(zero_cache, GFP_KERNEL|__GFP_REPEAT); - if (pte) - return virt_to_page(pte); - return NULL; + return virt_to_page(pte_alloc_one_kernel(mm, address)); } static inline void pte_free_kernel(pte_t *pte) { - kmem_cache_free(zero_cache, pte); + kmem_cache_free(pgtable_cache[PTE_CACHE_NUM], pte); } static inline void pte_free(struct page *ptepage) { - kmem_cache_free(zero_cache, page_address(ptepage)); + pte_free_kernel(page_address(ptepage)); } -struct pte_freelist_batch +#define PGF_CACHENUM_MASK 0xf + +typedef struct pgtable_free { + unsigned long val; +} pgtable_free_t; + +static inline pgtable_free_t pgtable_free_cache(void *p, int cachenum, + unsigned long mask) { - struct rcu_head rcu; - unsigned int index; - struct page * pages[0]; -}; + BUG_ON(cachenum > PGF_CACHENUM_MASK); -#define PTE_FREELIST_SIZE ((PAGE_SIZE - sizeof(struct pte_freelist_batch)) / \ - sizeof(struct page *)) + return (pgtable_free_t){.val = ((unsigned long) p & ~mask) | cachenum}; +} -extern void pte_free_now(struct page *ptepage); -extern void pte_free_submit(struct pte_freelist_batch *batch); +static inline void pgtable_free(pgtable_free_t pgf) +{ + void *p = (void *)(pgf.val & ~PGF_CACHENUM_MASK); + int cachenum = pgf.val & PGF_CACHENUM_MASK; -DECLARE_PER_CPU(struct pte_freelist_batch *, pte_freelist_cur); + kmem_cache_free(pgtable_cache[cachenum], p); +} -void __pte_free_tlb(struct mmu_gather *tlb, struct page *ptepage); -#define __pmd_free_tlb(tlb, pmd) __pte_free_tlb(tlb, virt_to_page(pmd)) +void pgtable_free_tlb(struct mmu_gather *tlb, pgtable_free_t pgf); + +#define __pte_free_tlb(tlb, ptepage) \ + pgtable_free_tlb(tlb, pgtable_free_cache(page_address(ptepage), \ + PTE_CACHE_NUM, PTE_TABLE_SIZE-1)) +#define __pmd_free_tlb(tlb, pmd) \ + pgtable_free_tlb(tlb, pgtable_free_cache(pmd, \ + PMD_CACHE_NUM, PMD_TABLE_SIZE-1)) +#define __pud_free_tlb(tlb, pmd) \ + pgtable_free_tlb(tlb, pgtable_free_cache(pud, \ + PUD_CACHE_NUM, PUD_TABLE_SIZE-1)) #define check_pgt_cache() do { } while (0) diff --git a/include/asm-ppc64/pgtable.h b/include/asm-ppc64/pgtable.h index 46cf61c2ff69..5ea952ad7164 100644 --- a/include/asm-ppc64/pgtable.h +++ b/include/asm-ppc64/pgtable.h @@ -15,19 +15,24 @@ #include #endif /* __ASSEMBLY__ */ -#include - /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for * each entry by assuming that each entry is page aligned. */ #define PTE_INDEX_SIZE 9 -#define PMD_INDEX_SIZE 10 -#define PGD_INDEX_SIZE 10 +#define PMD_INDEX_SIZE 7 +#define PUD_INDEX_SIZE 7 +#define PGD_INDEX_SIZE 9 + +#define PTE_TABLE_SIZE (sizeof(pte_t) << PTE_INDEX_SIZE) +#define PMD_TABLE_SIZE (sizeof(pmd_t) << PMD_INDEX_SIZE) +#define PUD_TABLE_SIZE (sizeof(pud_t) << PUD_INDEX_SIZE) +#define PGD_TABLE_SIZE (sizeof(pgd_t) << PGD_INDEX_SIZE) #define PTRS_PER_PTE (1 << PTE_INDEX_SIZE) #define PTRS_PER_PMD (1 << PMD_INDEX_SIZE) +#define PTRS_PER_PUD (1 << PMD_INDEX_SIZE) #define PTRS_PER_PGD (1 << PGD_INDEX_SIZE) /* PMD_SHIFT determines what a second-level page table entry can map */ @@ -35,8 +40,13 @@ #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +/* PUD_SHIFT determines what a third-level page table entry can map */ +#define PUD_SHIFT (PMD_SHIFT + PMD_INDEX_SIZE) +#define PUD_SIZE (1UL << PUD_SHIFT) +#define PUD_MASK (~(PUD_SIZE-1)) + +/* PGDIR_SHIFT determines what a fourth-level page table entry can map */ +#define PGDIR_SHIFT (PUD_SHIFT + PUD_INDEX_SIZE) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -45,15 +55,23 @@ /* * Size of EA range mapped by our pagetables. */ -#define EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ - PGD_INDEX_SIZE + PAGE_SHIFT) -#define EADDR_MASK ((1UL << EADDR_SIZE) - 1) +#define PGTABLE_EADDR_SIZE (PTE_INDEX_SIZE + PMD_INDEX_SIZE + \ + PUD_INDEX_SIZE + PGD_INDEX_SIZE + PAGE_SHIFT) +#define PGTABLE_RANGE (1UL << PGTABLE_EADDR_SIZE) + +#if TASK_SIZE_USER64 > PGTABLE_RANGE +#error TASK_SIZE_USER64 exceeds pagetable range +#endif + +#if TASK_SIZE_USER64 > (1UL << (USER_ESID_BITS + SID_SHIFT)) +#error TASK_SIZE_USER64 exceeds user VSID range +#endif /* * Define the address range of the vmalloc VM area. */ #define VMALLOC_START (0xD000000000000000ul) -#define VMALLOC_SIZE (0x10000000000UL) +#define VMALLOC_SIZE (0x80000000000UL) #define VMALLOC_END (VMALLOC_START + VMALLOC_SIZE) /* @@ -154,8 +172,6 @@ extern unsigned long empty_zero_page[PAGE_SIZE/sizeof(unsigned long)]; #ifndef __ASSEMBLY__ int hash_huge_page(struct mm_struct *mm, unsigned long access, unsigned long ea, unsigned long vsid, int local); - -void hugetlb_mm_free_pgd(struct mm_struct *mm); #endif /* __ASSEMBLY__ */ #define HAVE_ARCH_UNMAPPED_AREA @@ -163,7 +179,6 @@ void hugetlb_mm_free_pgd(struct mm_struct *mm); #else #define hash_huge_page(mm,a,ea,vsid,local) -1 -#define hugetlb_mm_free_pgd(mm) do {} while (0) #endif @@ -197,39 +212,45 @@ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t pgprot) #define pte_pfn(x) ((unsigned long)((pte_val(x) >> PTE_SHIFT))) #define pte_page(x) pfn_to_page(pte_pfn(x)) -#define pmd_set(pmdp, ptep) \ - (pmd_val(*(pmdp)) = __ba_to_bpn(ptep)) +#define pmd_set(pmdp, ptep) ({BUG_ON((u64)ptep < KERNELBASE); pmd_val(*(pmdp)) = (unsigned long)(ptep);}) #define pmd_none(pmd) (!pmd_val(pmd)) #define pmd_bad(pmd) (pmd_val(pmd) == 0) #define pmd_present(pmd) (pmd_val(pmd) != 0) #define pmd_clear(pmdp) (pmd_val(*(pmdp)) = 0) -#define pmd_page_kernel(pmd) (__bpn_to_ba(pmd_val(pmd))) +#define pmd_page_kernel(pmd) (pmd_val(pmd)) #define pmd_page(pmd) virt_to_page(pmd_page_kernel(pmd)) -#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (__ba_to_bpn(pmdp))) +#define pud_set(pudp, pmdp) (pud_val(*(pudp)) = (unsigned long)(pmdp)) #define pud_none(pud) (!pud_val(pud)) -#define pud_bad(pud) ((pud_val(pud)) == 0UL) -#define pud_present(pud) (pud_val(pud) != 0UL) -#define pud_clear(pudp) (pud_val(*(pudp)) = 0UL) -#define pud_page(pud) (__bpn_to_ba(pud_val(pud))) +#define pud_bad(pud) ((pud_val(pud)) == 0) +#define pud_present(pud) (pud_val(pud) != 0) +#define pud_clear(pudp) (pud_val(*(pudp)) = 0) +#define pud_page(pud) (pud_val(pud)) + +#define pgd_set(pgdp, pudp) ({pgd_val(*(pgdp)) = (unsigned long)(pudp);}) +#define pgd_none(pgd) (!pgd_val(pgd)) +#define pgd_bad(pgd) (pgd_val(pgd) == 0) +#define pgd_present(pgd) (pgd_val(pgd) != 0) +#define pgd_clear(pgdp) (pgd_val(*(pgdp)) = 0) +#define pgd_page(pgd) (pgd_val(pgd)) /* * Find an entry in a page-table-directory. We combine the address region * (the high order N bits) and the pgd portion of the address. */ /* to avoid overflow in free_pgtables we don't use PTRS_PER_PGD here */ -#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x7ff) +#define pgd_index(address) (((address) >> (PGDIR_SHIFT)) & 0x1ff) #define pgd_offset(mm, address) ((mm)->pgd + pgd_index(address)) -/* Find an entry in the second-level page table.. */ -#define pmd_offset(pudp,addr) \ - ((pmd_t *) pud_page(*(pudp)) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) +#define pud_offset(pgdp, addr) \ + (((pud_t *) pgd_page(*(pgdp))) + (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1))) + +#define pmd_offset(pudp,addr) \ + (((pmd_t *) pud_page(*(pudp))) + (((addr) >> PMD_SHIFT) & (PTRS_PER_PMD - 1))) -/* Find an entry in the third-level page table.. */ #define pte_offset_kernel(dir,addr) \ - ((pte_t *) pmd_page_kernel(*(dir)) \ - + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) + (((pte_t *) pmd_page_kernel(*(dir))) + (((addr) >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))) #define pte_offset_map(dir,addr) pte_offset_kernel((dir), (addr)) #define pte_offset_map_nested(dir,addr) pte_offset_kernel((dir), (addr)) @@ -458,23 +479,18 @@ extern pgprot_t phys_mem_access_prot(struct file *file, unsigned long addr, #define pte_same(A,B) (((pte_val(A) ^ pte_val(B)) & ~_PAGE_HPTEFLAGS) == 0) #define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08x.\n", __FILE__, __LINE__, pmd_val(e)) + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pud_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pud_val(e)) #define pgd_ERROR(e) \ - printk("%s:%d: bad pgd %08x.\n", __FILE__, __LINE__, pgd_val(e)) + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) extern pgd_t swapper_pg_dir[]; extern void paging_init(void); -/* - * Because the huge pgtables are only 2 level, they can take - * at most around 4M, much less than one hugepage which the - * process is presumably entitled to use. So we don't bother - * freeing up the pagetables on unmap, and wait until - * destroy_context() to clean up the lot. - */ #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) \ - do { } while (0) + free_pgd_range(tlb, addr, end, floor, ceiling) /* * This gets called at the end of handling a page fault, when diff --git a/include/asm-ppc64/processor.h b/include/asm-ppc64/processor.h index 352306cfb579..50b14c0ddb87 100644 --- a/include/asm-ppc64/processor.h +++ b/include/asm-ppc64/processor.h @@ -382,8 +382,8 @@ extern long kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern struct task_struct *last_task_used_math; extern struct task_struct *last_task_used_altivec; -/* 64-bit user address space is 41-bits (2TBs user VM) */ -#define TASK_SIZE_USER64 (0x0000020000000000UL) +/* 64-bit user address space is 44-bits (16TB user VM) */ +#define TASK_SIZE_USER64 (0x0000100000000000UL) /* * 32-bit user address space is 4GB - 1 page From 34153fa3af45d84f3221d9b67ba2ab7e8a220d28 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Tue, 9 Aug 2005 10:36:34 +0200 Subject: [PATCH 03/37] [PATCH] flattened device tree changes This patch updates the format of the flattened device-tree passed between the boot trampoline and the kernel to support a more compact representation, for use by embedded systems mostly. Signed-off-by: Benjamin Herrenschmidt Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/prom.c | 175 ++++++++++++++++++++++++++++------ arch/ppc64/kernel/prom_init.c | 88 ++++++++++------- include/asm-ppc64/prom.h | 14 ++- 3 files changed, 209 insertions(+), 68 deletions(-) diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 5aca01ddd81f..255c39ae1b48 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -625,8 +625,8 @@ void __init finish_device_tree(void) static inline char *find_flat_dt_string(u32 offset) { - return ((char *)initial_boot_params) + initial_boot_params->off_dt_strings - + offset; + return ((char *)initial_boot_params) + + initial_boot_params->off_dt_strings + offset; } /** @@ -635,26 +635,33 @@ static inline char *find_flat_dt_string(u32 offset) * unflatten the tree */ static int __init scan_flat_dt(int (*it)(unsigned long node, - const char *full_path, void *data), + const char *uname, int depth, + void *data), void *data) { unsigned long p = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; int rc = 0; + int depth = -1; do { u32 tag = *((u32 *)p); char *pathp; p += 4; - if (tag == OF_DT_END_NODE) + if (tag == OF_DT_END_NODE) { + depth --; + continue; + } + if (tag == OF_DT_NOP) continue; if (tag == OF_DT_END) break; if (tag == OF_DT_PROP) { u32 sz = *((u32 *)p); p += 8; - p = _ALIGN(p, sz >= 8 ? 8 : 4); + if (initial_boot_params->version < 0x10) + p = _ALIGN(p, sz >= 8 ? 8 : 4); p += sz; p = _ALIGN(p, 4); continue; @@ -664,9 +671,18 @@ static int __init scan_flat_dt(int (*it)(unsigned long node, " device tree !\n", tag); return -EINVAL; } + depth++; pathp = (char *)p; p = _ALIGN(p + strlen(pathp) + 1, 4); - rc = it(p, pathp, data); + if ((*pathp) == '/') { + char *lp, *np; + for (lp = NULL, np = pathp; *np; np++) + if ((*np) == '/') + lp = np+1; + if (lp != NULL) + pathp = lp; + } + rc = it(p, pathp, depth, data); if (rc != 0) break; } while(1); @@ -689,17 +705,21 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name, const char *nstr; p += 4; + if (tag == OF_DT_NOP) + continue; if (tag != OF_DT_PROP) return NULL; sz = *((u32 *)p); noff = *((u32 *)(p + 4)); p += 8; - p = _ALIGN(p, sz >= 8 ? 8 : 4); + if (initial_boot_params->version < 0x10) + p = _ALIGN(p, sz >= 8 ? 8 : 4); nstr = find_flat_dt_string(noff); if (nstr == NULL) { - printk(KERN_WARNING "Can't find property index name !\n"); + printk(KERN_WARNING "Can't find property index" + " name !\n"); return NULL; } if (strcmp(name, nstr) == 0) { @@ -713,7 +733,7 @@ static void* __init get_flat_dt_prop(unsigned long node, const char *name, } static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, - unsigned long align) + unsigned long align) { void *res; @@ -727,13 +747,16 @@ static void *__init unflatten_dt_alloc(unsigned long *mem, unsigned long size, static unsigned long __init unflatten_dt_node(unsigned long mem, unsigned long *p, struct device_node *dad, - struct device_node ***allnextpp) + struct device_node ***allnextpp, + unsigned long fpsize) { struct device_node *np; struct property *pp, **prev_pp = NULL; char *pathp; u32 tag; - unsigned int l; + unsigned int l, allocl; + int has_name = 0; + int new_format = 0; tag = *((u32 *)(*p)); if (tag != OF_DT_BEGIN_NODE) { @@ -742,21 +765,62 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, } *p += 4; pathp = (char *)*p; - l = strlen(pathp) + 1; + l = allocl = strlen(pathp) + 1; *p = _ALIGN(*p + l, 4); - np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + l, + /* version 0x10 has a more compact unit name here instead of the full + * path. we accumulate the full path size using "fpsize", we'll rebuild + * it later. We detect this because the first character of the name is + * not '/'. + */ + if ((*pathp) != '/') { + new_format = 1; + if (fpsize == 0) { + /* root node: special case. fpsize accounts for path + * plus terminating zero. root node only has '/', so + * fpsize should be 2, but we want to avoid the first + * level nodes to have two '/' so we use fpsize 1 here + */ + fpsize = 1; + allocl = 2; + } else { + /* account for '/' and path size minus terminal 0 + * already in 'l' + */ + fpsize += l; + allocl = fpsize; + } + } + + + np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl, __alignof__(struct device_node)); if (allnextpp) { memset(np, 0, sizeof(*np)); np->full_name = ((char*)np) + sizeof(struct device_node); - memcpy(np->full_name, pathp, l); + if (new_format) { + char *p = np->full_name; + /* rebuild full path for new format */ + if (dad && dad->parent) { + strcpy(p, dad->full_name); +#ifdef DEBUG + if ((strlen(p) + l + 1) != allocl) { + DBG("%s: p: %d, l: %d, a: %d\n", + pathp, strlen(p), l, allocl); + } +#endif + p += strlen(p); + } + *(p++) = '/'; + memcpy(p, pathp, l); + } else + memcpy(np->full_name, pathp, l); prev_pp = &np->properties; **allnextpp = np; *allnextpp = &np->allnext; if (dad != NULL) { np->parent = dad; - /* we temporarily use the `next' field as `last_child'. */ + /* we temporarily use the next field as `last_child'*/ if (dad->next == 0) dad->child = np; else @@ -770,18 +834,26 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, char *pname; tag = *((u32 *)(*p)); + if (tag == OF_DT_NOP) { + *p += 4; + continue; + } if (tag != OF_DT_PROP) break; *p += 4; sz = *((u32 *)(*p)); noff = *((u32 *)((*p) + 4)); - *p = _ALIGN((*p) + 8, sz >= 8 ? 8 : 4); + *p += 8; + if (initial_boot_params->version < 0x10) + *p = _ALIGN(*p, sz >= 8 ? 8 : 4); pname = find_flat_dt_string(noff); if (pname == NULL) { printk("Can't find property name in list !\n"); break; } + if (strcmp(pname, "name") == 0) + has_name = 1; l = strlen(pname) + 1; pp = unflatten_dt_alloc(&mem, sizeof(struct property), __alignof__(struct property)); @@ -801,6 +873,36 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, } *p = _ALIGN((*p) + sz, 4); } + /* with version 0x10 we may not have the name property, recreate + * it here from the unit name if absent + */ + if (!has_name) { + char *p = pathp, *ps = pathp, *pa = NULL; + int sz; + + while (*p) { + if ((*p) == '@') + pa = p; + if ((*p) == '/') + ps = p + 1; + p++; + } + if (pa < ps) + pa = p; + sz = (pa - ps) + 1; + pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz, + __alignof__(struct property)); + if (allnextpp) { + pp->name = "name"; + pp->length = sz; + pp->value = (unsigned char *)(pp + 1); + *prev_pp = pp; + prev_pp = &pp->next; + memcpy(pp->value, ps, sz - 1); + ((char *)pp->value)[sz - 1] = 0; + DBG("fixed up name for %s -> %s\n", pathp, pp->value); + } + } if (allnextpp) { *prev_pp = NULL; np->name = get_property(np, "name", NULL); @@ -812,7 +914,7 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, np->type = ""; } while (tag == OF_DT_BEGIN_NODE) { - mem = unflatten_dt_node(mem, p, np, allnextpp); + mem = unflatten_dt_node(mem, p, np, allnextpp, fpsize); tag = *((u32 *)(*p)); } if (tag != OF_DT_END_NODE) { @@ -842,21 +944,27 @@ void __init unflatten_device_tree(void) /* First pass, scan for size */ start = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; - size = unflatten_dt_node(0, &start, NULL, NULL); + size = unflatten_dt_node(0, &start, NULL, NULL, 0); + size = (size | 3) + 1; DBG(" size is %lx, allocating...\n", size); /* Allocate memory for the expanded device tree */ - mem = (unsigned long)abs_to_virt(lmb_alloc(size, + mem = (unsigned long)abs_to_virt(lmb_alloc(size + 4, __alignof__(struct device_node))); + ((u32 *)mem)[size / 4] = 0xdeadbeef; + DBG(" unflattening...\n", mem); /* Second pass, do actual unflattening */ start = ((unsigned long)initial_boot_params) + initial_boot_params->off_dt_struct; - unflatten_dt_node(mem, &start, NULL, &allnextp); + unflatten_dt_node(mem, &start, NULL, &allnextp, 0); if (*((u32 *)start) != OF_DT_END) - printk(KERN_WARNING "Weird tag at end of tree: %x\n", *((u32 *)start)); + printk(KERN_WARNING "Weird tag at end of tree: %08x\n", *((u32 *)start)); + if (((u32 *)mem)[size / 4] != 0xdeadbeef) + printk(KERN_WARNING "End of tree marker overwritten: %08x\n", + ((u32 *)mem)[size / 4] ); *allnextp = NULL; /* Get pointer to OF "/chosen" node for use everywhere */ @@ -880,7 +988,7 @@ void __init unflatten_device_tree(void) static int __init early_init_dt_scan_cpus(unsigned long node, - const char *full_path, void *data) + const char *uname, int depth, void *data) { char *type = get_flat_dt_prop(node, "device_type", NULL); u32 *prop; @@ -947,13 +1055,15 @@ static int __init early_init_dt_scan_cpus(unsigned long node, } static int __init early_init_dt_scan_chosen(unsigned long node, - const char *full_path, void *data) + const char *uname, int depth, void *data) { u32 *prop; u64 *prop64; extern unsigned long memory_limit, tce_alloc_start, tce_alloc_end; - if (strcmp(full_path, "/chosen") != 0) + DBG("search \"chosen\", depth: %d, uname: %s\n", depth, uname); + + if (depth != 1 || strcmp(uname, "chosen") != 0) return 0; /* get platform type */ @@ -1003,18 +1113,20 @@ static int __init early_init_dt_scan_chosen(unsigned long node, } static int __init early_init_dt_scan_root(unsigned long node, - const char *full_path, void *data) + const char *uname, int depth, void *data) { u32 *prop; - if (strcmp(full_path, "/") != 0) + if (depth != 0) return 0; prop = (u32 *)get_flat_dt_prop(node, "#size-cells", NULL); dt_root_size_cells = (prop == NULL) ? 1 : *prop; - + DBG("dt_root_size_cells = %x\n", dt_root_size_cells); + prop = (u32 *)get_flat_dt_prop(node, "#address-cells", NULL); dt_root_addr_cells = (prop == NULL) ? 2 : *prop; + DBG("dt_root_addr_cells = %x\n", dt_root_addr_cells); /* break now */ return 1; @@ -1042,7 +1154,7 @@ static unsigned long __init dt_mem_next_cell(int s, cell_t **cellp) static int __init early_init_dt_scan_memory(unsigned long node, - const char *full_path, void *data) + const char *uname, int depth, void *data) { char *type = get_flat_dt_prop(node, "device_type", NULL); cell_t *reg, *endp; @@ -1058,7 +1170,9 @@ static int __init early_init_dt_scan_memory(unsigned long node, endp = reg + (l / sizeof(cell_t)); - DBG("memory scan node %s ...\n", full_path); + DBG("memory scan node %s ..., reg size %ld, data: %x %x %x %x, ...\n", + uname, l, reg[0], reg[1], reg[2], reg[3]); + while ((endp - reg) >= (dt_root_addr_cells + dt_root_size_cells)) { unsigned long base, size; @@ -1469,10 +1583,11 @@ struct device_node *of_find_node_by_path(const char *path) struct device_node *np = allnodes; read_lock(&devtree_lock); - for (; np != 0; np = np->allnext) + for (; np != 0; np = np->allnext) { if (np->full_name != 0 && strcasecmp(np->full_name, path) == 0 && of_node_get(np)) break; + } read_unlock(&devtree_lock); return np; } diff --git a/arch/ppc64/kernel/prom_init.c b/arch/ppc64/kernel/prom_init.c index dbbe6c79d8da..adcf972711fc 100644 --- a/arch/ppc64/kernel/prom_init.c +++ b/arch/ppc64/kernel/prom_init.c @@ -1534,7 +1534,8 @@ static unsigned long __init dt_find_string(char *str) */ #define MAX_PROPERTY_NAME 64 -static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, +static void __init scan_dt_build_strings(phandle node, + unsigned long *mem_start, unsigned long *mem_end) { unsigned long offset = reloc_offset(); @@ -1547,16 +1548,21 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, /* get and store all property names */ prev_name = RELOC(""); for (;;) { - int rc; - /* 64 is max len of name including nul. */ namep = make_room(mem_start, mem_end, MAX_PROPERTY_NAME, 1); - rc = call_prom("nextprop", 3, 1, node, prev_name, namep); - if (rc != 1) { + if (call_prom("nextprop", 3, 1, node, prev_name, namep) != 1) { /* No more nodes: unwind alloc */ *mem_start = (unsigned long)namep; break; } + + /* skip "name" */ + if (strcmp(namep, RELOC("name")) == 0) { + *mem_start = (unsigned long)namep; + prev_name = RELOC("name"); + continue; + } + /* get/create string entry */ soff = dt_find_string(namep); if (soff != 0) { *mem_start = (unsigned long)namep; @@ -1571,7 +1577,7 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, /* do all our children */ child = call_prom("child", 1, 1, node); - while (child != (phandle)0) { + while (child != 0) { scan_dt_build_strings(child, mem_start, mem_end); child = call_prom("peer", 1, 1, child); } @@ -1580,16 +1586,13 @@ static void __init scan_dt_build_strings(phandle node, unsigned long *mem_start, static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, unsigned long *mem_end) { - int l, align; phandle child; - char *namep, *prev_name, *sstart, *p, *ep; + char *namep, *prev_name, *sstart, *p, *ep, *lp, *path; unsigned long soff; unsigned char *valp; unsigned long offset = reloc_offset(); - char pname[MAX_PROPERTY_NAME]; - char *path; - - path = RELOC(prom_scratch); + static char pname[MAX_PROPERTY_NAME]; + int l; dt_push_token(OF_DT_BEGIN_NODE, mem_start, mem_end); @@ -1599,23 +1602,33 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, namep, *mem_end - *mem_start); if (l >= 0) { /* Didn't fit? Get more room. */ - if (l+1 > *mem_end - *mem_start) { + if ((l+1) > (*mem_end - *mem_start)) { namep = make_room(mem_start, mem_end, l+1, 1); call_prom("package-to-path", 3, 1, node, namep, l); } namep[l] = '\0'; + /* Fixup an Apple bug where they have bogus \0 chars in the * middle of the path in some properties */ for (p = namep, ep = namep + l; p < ep; p++) if (*p == '\0') { memmove(p, p+1, ep - p); - ep--; l--; + ep--; l--; p--; } - *mem_start = _ALIGN(((unsigned long) namep) + strlen(namep) + 1, 4); + + /* now try to extract the unit name in that mess */ + for (p = namep, lp = NULL; *p; p++) + if (*p == '/') + lp = p + 1; + if (lp != NULL) + memmove(namep, lp, strlen(lp) + 1); + *mem_start = _ALIGN(((unsigned long) namep) + + strlen(namep) + 1, 4); } /* get it again for debugging */ + path = RELOC(prom_scratch); memset(path, 0, PROM_SCRATCH_SIZE); call_prom("package-to-path", 3, 1, node, path, PROM_SCRATCH_SIZE-1); @@ -1623,23 +1636,27 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, prev_name = RELOC(""); sstart = (char *)RELOC(dt_string_start); for (;;) { - int rc; - - rc = call_prom("nextprop", 3, 1, node, prev_name, pname); - if (rc != 1) + if (call_prom("nextprop", 3, 1, node, prev_name, + RELOC(pname)) != 1) break; + /* skip "name" */ + if (strcmp(RELOC(pname), RELOC("name")) == 0) { + prev_name = RELOC("name"); + continue; + } + /* find string offset */ - soff = dt_find_string(pname); + soff = dt_find_string(RELOC(pname)); if (soff == 0) { - prom_printf("WARNING: Can't find string index for <%s>, node %s\n", - pname, path); + prom_printf("WARNING: Can't find string index for" + " <%s>, node %s\n", RELOC(pname), path); break; } prev_name = sstart + soff; /* get length */ - l = call_prom("getproplen", 2, 1, node, pname); + l = call_prom("getproplen", 2, 1, node, RELOC(pname)); /* sanity checks */ if (l == PROM_ERROR) @@ -1648,7 +1665,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, prom_printf("WARNING: ignoring large property "); /* It seems OF doesn't null-terminate the path :-( */ prom_printf("[%s] ", path); - prom_printf("%s length 0x%x\n", pname, l); + prom_printf("%s length 0x%x\n", RELOC(pname), l); continue; } @@ -1658,17 +1675,16 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, dt_push_token(soff, mem_start, mem_end); /* push property content */ - align = (l >= 8) ? 8 : 4; - valp = make_room(mem_start, mem_end, l, align); - call_prom("getprop", 4, 1, node, pname, valp, l); + valp = make_room(mem_start, mem_end, l, 4); + call_prom("getprop", 4, 1, node, RELOC(pname), valp, l); *mem_start = _ALIGN(*mem_start, 4); } /* Add a "linux,phandle" property. */ soff = dt_find_string(RELOC("linux,phandle")); if (soff == 0) - prom_printf("WARNING: Can't find string index for " - " node %s\n", path); + prom_printf("WARNING: Can't find string index for" + " node %s\n", path); else { dt_push_token(OF_DT_PROP, mem_start, mem_end); dt_push_token(4, mem_start, mem_end); @@ -1679,7 +1695,7 @@ static void __init scan_dt_build_struct(phandle node, unsigned long *mem_start, /* do all our children */ child = call_prom("child", 1, 1, node); - while (child != (phandle)0) { + while (child != 0) { scan_dt_build_struct(child, mem_start, mem_end); child = call_prom("peer", 1, 1, child); } @@ -1718,7 +1734,8 @@ static void __init flatten_device_tree(void) /* Build header and make room for mem rsv map */ mem_start = _ALIGN(mem_start, 4); - hdr = make_room(&mem_start, &mem_end, sizeof(struct boot_param_header), 4); + hdr = make_room(&mem_start, &mem_end, + sizeof(struct boot_param_header), 4); RELOC(dt_header_start) = (unsigned long)hdr; rsvmap = make_room(&mem_start, &mem_end, sizeof(mem_reserve_map), 8); @@ -1731,11 +1748,11 @@ static void __init flatten_device_tree(void) namep = make_room(&mem_start, &mem_end, 16, 1); strcpy(namep, RELOC("linux,phandle")); mem_start = (unsigned long)namep + strlen(namep) + 1; - RELOC(dt_string_end) = mem_start; /* Build string array */ prom_printf("Building dt strings...\n"); scan_dt_build_strings(root, &mem_start, &mem_end); + RELOC(dt_string_end) = mem_start; /* Build structure */ mem_start = PAGE_ALIGN(mem_start); @@ -1750,9 +1767,11 @@ static void __init flatten_device_tree(void) hdr->totalsize = RELOC(dt_struct_end) - RELOC(dt_header_start); hdr->off_dt_struct = RELOC(dt_struct_start) - RELOC(dt_header_start); hdr->off_dt_strings = RELOC(dt_string_start) - RELOC(dt_header_start); + hdr->dt_strings_size = RELOC(dt_string_end) - RELOC(dt_string_start); hdr->off_mem_rsvmap = ((unsigned long)rsvmap) - RELOC(dt_header_start); hdr->version = OF_DT_VERSION; - hdr->last_comp_version = 1; + /* Version 16 is not backward compatible */ + hdr->last_comp_version = 0x10; /* Reserve the whole thing and copy the reserve map in, we * also bump mem_reserve_cnt to cause further reservations to @@ -1808,6 +1827,9 @@ static void __init fixup_device_tree(void) /* does it need fixup ? */ if (prom_getproplen(i2c, "interrupts") > 0) return; + + prom_printf("fixing up bogus interrupts for u3 i2c...\n"); + /* interrupt on this revision of u3 is number 0 and level */ interrupts[0] = 0; interrupts[1] = 1; diff --git a/include/asm-ppc64/prom.h b/include/asm-ppc64/prom.h index 04b1a84f7ca3..dc5330b39509 100644 --- a/include/asm-ppc64/prom.h +++ b/include/asm-ppc64/prom.h @@ -22,13 +22,15 @@ #define RELOC(x) (*PTRRELOC(&(x))) /* Definitions used by the flattened device tree */ -#define OF_DT_HEADER 0xd00dfeed /* 4: version, 4: total size */ -#define OF_DT_BEGIN_NODE 0x1 /* Start node: full name */ +#define OF_DT_HEADER 0xd00dfeed /* marker */ +#define OF_DT_BEGIN_NODE 0x1 /* Start of node, full name */ #define OF_DT_END_NODE 0x2 /* End node */ -#define OF_DT_PROP 0x3 /* Property: name off, size, content */ +#define OF_DT_PROP 0x3 /* Property: name off, size, + * content */ +#define OF_DT_NOP 0x4 /* nop */ #define OF_DT_END 0x9 -#define OF_DT_VERSION 1 +#define OF_DT_VERSION 0x10 /* * This is what gets passed to the kernel by prom_init or kexec @@ -54,7 +56,9 @@ struct boot_param_header u32 version; /* format version */ u32 last_comp_version; /* last compatible version */ /* version 2 fields below */ - u32 boot_cpuid_phys; /* Which physical CPU id we're booting on */ + u32 boot_cpuid_phys; /* Physical CPU id we're booting on */ + /* version 3 fields below */ + u32 dt_strings_size; /* size of the DT strings block */ }; From 293da76b3d4c2f362f906bce8c5d2e053bdf8d44 Mon Sep 17 00:00:00 2001 From: Jake Moilanen Date: Thu, 9 Jun 2005 09:31:12 -0500 Subject: [PATCH 04/37] [PATCH] ppc64: PCI device-node failure detection OpenFirmware marks devices as failed in the device-tree when a hardware problem is detected. The kernel needs to fail config reads/writes to prevent a kernel crash when incorrect data is read. This patch validates that the device-node is not marked "fail" when config space reads/writes are attempted. Signed-off-by: Jake Moilanen Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/rtas_pci.c | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/arch/ppc64/kernel/rtas_pci.c b/arch/ppc64/kernel/rtas_pci.c index 1048817befb8..1dccadaddd1d 100644 --- a/arch/ppc64/kernel/rtas_pci.c +++ b/arch/ppc64/kernel/rtas_pci.c @@ -58,6 +58,21 @@ static int config_access_valid(struct device_node *dn, int where) return 0; } +static int of_device_available(struct device_node * dn) +{ + char * status; + + status = get_property(dn, "status", NULL); + + if (!status) + return 1; + + if (!strcmp(status, "okay")) + return 1; + + return 0; +} + static int rtas_read_config(struct device_node *dn, int where, int size, u32 *val) { int returnval = -1; @@ -103,7 +118,7 @@ static int rtas_pci_read_config(struct pci_bus *bus, /* Search only direct children of the bus */ for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn) + if (dn->devfn == devfn && of_device_available(dn)) return rtas_read_config(dn, where, size, val); return PCIBIOS_DEVICE_NOT_FOUND; } @@ -146,7 +161,7 @@ static int rtas_pci_write_config(struct pci_bus *bus, /* Search only direct children of the bus */ for (dn = busdn->child; dn; dn = dn->sibling) - if (dn->devfn == devfn) + if (dn->devfn == devfn && of_device_available(dn)) return rtas_write_config(dn, where, size, val); return PCIBIOS_DEVICE_NOT_FOUND; } From 6020164499ff3a61cd8bebceb9e294a155079f71 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Tue, 28 Jun 2005 16:48:04 -0700 Subject: [PATCH 05/37] [PATCH] ppc64: change duplicate Kconfig menu "General setup" to "Bus Options" arch/ppc64/Kconfig defines a "General setup" menu, but also sources init/Kconfig which also defines a "General setup" menu. Both of these menus appear at the top level of make menuconfig. Having two menus with the same name is confusing. This patch renames the ppc64/Kconfig menu to be "Bus Options" and moves options in this menu which are not bus related to the end of the "Platform support" menu. There are many variations among architectures on the exact naming of the "Bus Options" menu. I chose to use the simplest one, which is also used in arch/ppc/Kconfig. Signed-off-by: Frank Rowand Signed-off-by: Paul Mackerras --- arch/ppc64/Kconfig | 68 +++++++++++++++++++++++----------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index 2ce87836c671..4d4f81c65012 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -350,13 +350,46 @@ config SECCOMP If unsure, say Y. Only embedded should say N here. +source "fs/Kconfig.binfmt" + +config HOTPLUG_CPU + bool "Support for hot-pluggable CPUs" + depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) + select HOTPLUG + ---help--- + Say Y here to be able to turn CPUs off and on. + + Say N if you are unsure. + +config PROC_DEVICETREE + bool "Support for Open Firmware device tree in /proc" + depends on !PPC_ISERIES + help + This option adds a device-tree directory under /proc which contains + an image of the device tree that the kernel copies from Open + Firmware. If unsure, say Y here. + +config CMDLINE_BOOL + bool "Default bootloader kernel arguments" + depends on !PPC_ISERIES + +config CMDLINE + string "Initial kernel command string" + depends on CMDLINE_BOOL + default "console=ttyS0,9600 console=tty0 root=/dev/sda2" + help + On some platforms, there is currently no way for the boot loader to + pass arguments to the kernel. For these platforms, you can supply + some command-line options at build time by entering them here. In + most cases you will need to specify the root device here. + endmenu config ISA_DMA_API bool default y -menu "General setup" +menu "Bus Options" config ISA bool @@ -389,45 +422,12 @@ config PCI_DOMAINS bool default PCI -source "fs/Kconfig.binfmt" - source "drivers/pci/Kconfig" -config HOTPLUG_CPU - bool "Support for hot-pluggable CPUs" - depends on SMP && EXPERIMENTAL && (PPC_PSERIES || PPC_PMAC) - select HOTPLUG - ---help--- - Say Y here to be able to turn CPUs off and on. - - Say N if you are unsure. - source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" -config PROC_DEVICETREE - bool "Support for Open Firmware device tree in /proc" - depends on !PPC_ISERIES - help - This option adds a device-tree directory under /proc which contains - an image of the device tree that the kernel copies from Open - Firmware. If unsure, say Y here. - -config CMDLINE_BOOL - bool "Default bootloader kernel arguments" - depends on !PPC_ISERIES - -config CMDLINE - string "Initial kernel command string" - depends on CMDLINE_BOOL - default "console=ttyS0,9600 console=tty0 root=/dev/sda2" - help - On some platforms, there is currently no way for the boot loader to - pass arguments to the kernel. For these platforms, you can supply - some command-line options at build time by entering them here. In - most cases you will need to specify the root device here. - endmenu source "net/Kconfig" From 3e494c80481653bbc810b4e67651097595ea0294 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:40:17 +1000 Subject: [PATCH 06/37] [PATCH] ppc64: split iSeries specific parts out of vio.c This patch splits the iSeries specific parts out of vio.c. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/Makefile | 4 +- arch/ppc64/kernel/iSeries_vio.c | 133 ++++++++++++++++++++++++++++ arch/ppc64/kernel/vio.c | 149 +++++--------------------------- include/asm-ppc64/vio.h | 7 ++ 4 files changed, 167 insertions(+), 126 deletions(-) create mode 100644 arch/ppc64/kernel/iSeries_vio.c diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index 2ecccb6b4f8c..a22c94f6b2db 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -50,7 +50,9 @@ obj-$(CONFIG_LPARCFG) += lparcfg.o obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_HVCS) += hvcserver.o -obj-$(CONFIG_IBMVIO) += vio.o + +vio-obj-$(CONFIG_PPC_ISERIES) += iSeries_vio.o +obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y) obj-$(CONFIG_XICS) += xics.o obj-$(CONFIG_MPIC) += mpic.o diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c new file mode 100644 index 000000000000..e876b4380278 --- /dev/null +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -0,0 +1,133 @@ +/* + * IBM PowerPC iSeries Virtual I/O Infrastructure Support. + * + * Copyright (c) 2005 Stephen Rothwell, IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +struct device *iSeries_vio_dev = &vio_bus_device.dev; +EXPORT_SYMBOL(iSeries_vio_dev); + +static struct iommu_table veth_iommu_table; +static struct iommu_table vio_iommu_table; + +void __init iommu_vio_init(void) +{ + struct iommu_table *t; + struct iommu_table_cb cb; + unsigned long cbp; + unsigned long itc_entries; + + cb.itc_busno = 255; /* Bus 255 is the virtual bus */ + cb.itc_virtbus = 0xff; /* Ask for virtual bus */ + + cbp = virt_to_abs(&cb); + HvCallXm_getTceTableParms(cbp); + + itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); + veth_iommu_table.it_size = itc_entries / 2; + veth_iommu_table.it_busno = cb.itc_busno; + veth_iommu_table.it_offset = cb.itc_offset; + veth_iommu_table.it_index = cb.itc_index; + veth_iommu_table.it_type = TCE_VB; + veth_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&veth_iommu_table); + + if (!t) + printk("Virtual Bus VETH TCE table failed.\n"); + + vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; + vio_iommu_table.it_busno = cb.itc_busno; + vio_iommu_table.it_offset = cb.itc_offset + + veth_iommu_table.it_size; + vio_iommu_table.it_index = cb.itc_index; + vio_iommu_table.it_type = TCE_VB; + vio_iommu_table.it_blocksize = 1; + + t = iommu_init_table(&vio_iommu_table); + + if (!t) + printk("Virtual Bus VIO TCE table failed.\n"); +} + +/** + * vio_register_device: - Register a new vio device. + * @voidev: The device to register. + */ +static struct vio_dev *__init vio_register_device_iseries(char *type, + uint32_t unit_num) +{ + struct vio_dev *viodev; + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) + return NULL; + memset(viodev, 0, sizeof(struct vio_dev)); + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); + + return vio_register_device_common(viodev, viodev->dev.bus_id, type, + unit_num, &vio_iommu_table); +} + +void __init probe_bus_iseries(void) +{ + HvLpIndexMap vlan_map; + struct vio_dev *viodev; + int i; + + /* there is only one of each of these */ + vio_register_device_iseries("viocons", 0); + vio_register_device_iseries("vscsi", 0); + + vlan_map = HvLpConfig_getVirtualLanIndexMap(); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { + if ((vlan_map & (0x8000 >> i)) == 0) + continue; + viodev = vio_register_device_iseries("vlan", i); + /* veth is special and has it own iommu_table */ + viodev->iommu_table = &veth_iommu_table; + } + for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) + vio_register_device_iseries("viodasd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) + vio_register_device_iseries("viocd", i); + for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) + vio_register_device_iseries("viotape", i); +} + +/** + * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus + */ +static int __init vio_bus_init_iseries(void) +{ + int err; + + err = vio_bus_init(); + if (err == 0) { + vio_bus_device.iommu_table = &vio_iommu_table; + iSeries_vio_dev = &vio_bus_device.dev; + probe_bus_iseries(); + } + return err; +} + +__initcall(vio_bus_init_iseries); diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 0c0ba71ac0e8..4b9e3712e384 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -25,10 +25,6 @@ #include #include #include -#include -#include -#include -#include #define DBGENTER() pr_debug("%s entered\n", __FUNCTION__) @@ -41,26 +37,14 @@ static const struct vio_device_id *vio_match_device( static struct iommu_table *vio_build_iommu_table(struct vio_dev *); static int vio_num_address_cells; #endif -#ifdef CONFIG_PPC_ISERIES -static struct iommu_table veth_iommu_table; -static struct iommu_table vio_iommu_table; -#endif -static struct vio_dev vio_bus_device = { /* fake "parent" device */ +struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = vio_bus_device.dev.bus_id, .type = "", -#ifdef CONFIG_PPC_ISERIES - .iommu_table = &vio_iommu_table, -#endif .dev.bus_id = "vio", .dev.bus = &vio_bus_type, }; #ifdef CONFIG_PPC_ISERIES -static struct vio_dev *__init vio_register_device_iseries(char *type, - uint32_t unit_num); - -struct device *iSeries_vio_dev = &vio_bus_device.dev; -EXPORT_SYMBOL(iSeries_vio_dev); #define device_is_compatible(a, b) 1 @@ -157,48 +141,6 @@ static const struct vio_device_id * vio_match_device(const struct vio_device_id return NULL; } -#ifdef CONFIG_PPC_ISERIES -void __init iommu_vio_init(void) -{ - struct iommu_table *t; - struct iommu_table_cb cb; - unsigned long cbp; - unsigned long itc_entries; - - cb.itc_busno = 255; /* Bus 255 is the virtual bus */ - cb.itc_virtbus = 0xff; /* Ask for virtual bus */ - - cbp = virt_to_abs(&cb); - HvCallXm_getTceTableParms(cbp); - - itc_entries = cb.itc_size * PAGE_SIZE / sizeof(union tce_entry); - veth_iommu_table.it_size = itc_entries / 2; - veth_iommu_table.it_busno = cb.itc_busno; - veth_iommu_table.it_offset = cb.itc_offset; - veth_iommu_table.it_index = cb.itc_index; - veth_iommu_table.it_type = TCE_VB; - veth_iommu_table.it_blocksize = 1; - - t = iommu_init_table(&veth_iommu_table); - - if (!t) - printk("Virtual Bus VETH TCE table failed.\n"); - - vio_iommu_table.it_size = itc_entries - veth_iommu_table.it_size; - vio_iommu_table.it_busno = cb.itc_busno; - vio_iommu_table.it_offset = cb.itc_offset + - veth_iommu_table.it_size; - vio_iommu_table.it_index = cb.itc_index; - vio_iommu_table.it_type = TCE_VB; - vio_iommu_table.it_blocksize = 1; - - t = iommu_init_table(&vio_iommu_table); - - if (!t) - printk("Virtual Bus VIO TCE table failed.\n"); -} -#endif - #ifdef CONFIG_PPC_PSERIES static void probe_bus_pseries(void) { @@ -223,38 +165,10 @@ static void probe_bus_pseries(void) } #endif -#ifdef CONFIG_PPC_ISERIES -static void probe_bus_iseries(void) -{ - HvLpIndexMap vlan_map = HvLpConfig_getVirtualLanIndexMap(); - struct vio_dev *viodev; - int i; - - /* there is only one of each of these */ - vio_register_device_iseries("viocons", 0); - vio_register_device_iseries("vscsi", 0); - - vlan_map = HvLpConfig_getVirtualLanIndexMap(); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) { - if ((vlan_map & (0x8000 >> i)) == 0) - continue; - viodev = vio_register_device_iseries("vlan", i); - /* veth is special and has it own iommu_table */ - viodev->iommu_table = &veth_iommu_table; - } - for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) - vio_register_device_iseries("viodasd", i); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) - vio_register_device_iseries("viocd", i); - for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) - vio_register_device_iseries("viotape", i); -} -#endif - /** * vio_bus_init: - Initialize the virtual IO bus */ -static int __init vio_bus_init(void) +int __init vio_bus_init(void) { int err; @@ -264,25 +178,35 @@ static int __init vio_bus_init(void) return err; } - /* the fake parent of all vio devices, just to give us a nice directory */ + /* the fake parent of all vio devices, just to give us + * a nice directory + */ err = device_register(&vio_bus_device.dev); if (err) { - printk(KERN_WARNING "%s: device_register returned %i\n", __FUNCTION__, - err); + printk(KERN_WARNING "%s: device_register returned %i\n", + __FUNCTION__, err); return err; } -#ifdef CONFIG_PPC_PSERIES - probe_bus_pseries(); -#endif -#ifdef CONFIG_PPC_ISERIES - probe_bus_iseries(); -#endif - return 0; } -__initcall(vio_bus_init); +#ifdef CONFIG_PPC_PSERIES +/** + * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus + */ +static int __init vio_bus_init_pseries(void) +{ + int err; + + err = vio_bus_init(); + if (err == 0) + probe_bus_pseries(); + return err; +} + +__initcall(vio_bus_init_pseries); +#endif /* vio_dev refcount hit 0 */ static void __devinit vio_dev_release(struct device *dev) @@ -312,7 +236,7 @@ static ssize_t viodev_show_name(struct device *dev, struct device_attribute *att } DEVICE_ATTR(name, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_name, NULL); -static struct vio_dev * __devinit vio_register_device_common( +struct vio_dev * __devinit vio_register_device_common( struct vio_dev *viodev, char *name, char *type, uint32_t unit_address, struct iommu_table *iommu_table) { @@ -408,31 +332,6 @@ struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) EXPORT_SYMBOL(vio_register_device_node); #endif -#ifdef CONFIG_PPC_ISERIES -/** - * vio_register_device: - Register a new vio device. - * @voidev: The device to register. - */ -static struct vio_dev *__init vio_register_device_iseries(char *type, - uint32_t unit_num) -{ - struct vio_dev *viodev; - - DBGENTER(); - - /* allocate a vio_dev for this node */ - viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (!viodev) - return NULL; - memset(viodev, 0, sizeof(struct vio_dev)); - - snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%s%d", type, unit_num); - - return vio_register_device_common(viodev, viodev->dev.bus_id, type, - unit_num, &vio_iommu_table); -} -#endif - void __devinit vio_unregister_device(struct vio_dev *viodev) { DBGENTER(); diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 20cd98ee6337..1e6d4c4b83a0 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -56,6 +56,9 @@ const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length); int vio_get_irq(struct vio_dev *dev); int vio_enable_interrupts(struct vio_dev *dev); int vio_disable_interrupts(struct vio_dev *dev); +extern struct vio_dev * __devinit vio_register_device_common( + struct vio_dev *viodev, char *name, char *type, + uint32_t unit_address, struct iommu_table *iommu_table); extern struct dma_mapping_ops vio_dma_ops; @@ -95,9 +98,13 @@ struct vio_dev { struct device dev; }; +extern struct vio_dev vio_bus_device; + static inline struct vio_dev *to_vio_dev(struct device *dev) { return container_of(dev, struct vio_dev, dev); } +extern int vio_bus_init(void); + #endif /* _ASM_VIO_H */ From 8c65b5c955b8598d9c63b4e97392377269873a54 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:42:49 +1000 Subject: [PATCH 07/37] [PATCH] ppc64: move iSeries vio iommu init Since the iSeries vio iommu tables cannot be used until after the vio bus has been initialised, move the initialisation of the tables to there. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_vio.c | 3 ++- arch/ppc64/mm/init.c | 3 --- include/asm-ppc64/iommu.h | 3 --- 3 files changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c index e876b4380278..48f0ebf4405d 100644 --- a/arch/ppc64/kernel/iSeries_vio.c +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -27,7 +27,7 @@ EXPORT_SYMBOL(iSeries_vio_dev); static struct iommu_table veth_iommu_table; static struct iommu_table vio_iommu_table; -void __init iommu_vio_init(void) +static void __init iommu_vio_init(void) { struct iommu_table *t; struct iommu_table_cb cb; @@ -123,6 +123,7 @@ static int __init vio_bus_init_iseries(void) err = vio_bus_init(); if (err == 0) { + iommu_vio_init(); vio_bus_device.iommu_table = &vio_iommu_table; iSeries_vio_dev = &vio_bus_device.dev; probe_bus_iseries(); diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 87f256df8de5..9edfe267123e 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -686,9 +686,6 @@ void __init mem_init(void) mem_init_done = 1; -#ifdef CONFIG_PPC_ISERIES - iommu_vio_init(); -#endif /* Initialize the vDSO */ vdso_init(); } diff --git a/include/asm-ppc64/iommu.h b/include/asm-ppc64/iommu.h index 729de5cc21d9..72dcf8116b04 100644 --- a/include/asm-ppc64/iommu.h +++ b/include/asm-ppc64/iommu.h @@ -104,9 +104,6 @@ extern void iommu_devnode_init_pSeries(struct device_node *dn); #ifdef CONFIG_PPC_ISERIES -/* Initializes tables for bio buses */ -extern void __init iommu_vio_init(void); - struct iSeries_Device_Node; /* Creates table for an individual device node */ extern void iommu_devnode_init_iSeries(struct iSeries_Device_Node *dn); From 6312236fe82bbd3b0e1dee60b3eb3b270a2f6aeb Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:45:27 +1000 Subject: [PATCH 08/37] [PATCH] ppc64: make the bus matching function platform specific This patch allows us to have a different bus if matching function for each platform. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_vio.c | 12 +++++++++++- arch/ppc64/kernel/vio.c | 28 +++++++++++++++++++--------- include/asm-ppc64/vio.h | 3 ++- 3 files changed, 32 insertions(+), 11 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c index 48f0ebf4405d..2656b1ca834d 100644 --- a/arch/ppc64/kernel/iSeries_vio.c +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -114,6 +114,16 @@ void __init probe_bus_iseries(void) vio_register_device_iseries("viotape", i); } +/** + * vio_match_device_iseries: - Tell if a iSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_iseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return strncmp(dev->type, id->type, strlen(id->type)) == 0; +} + /** * vio_bus_init_iseries: - Initialize the iSeries virtual IO bus */ @@ -121,7 +131,7 @@ static int __init vio_bus_init_iseries(void) { int err; - err = vio_bus_init(); + err = vio_bus_init(vio_match_device_iseries); if (err == 0) { iommu_vio_init(); vio_bus_device.iommu_table = &vio_iommu_table; diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 4b9e3712e384..8a243cad0f8b 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -44,11 +44,8 @@ struct vio_dev vio_bus_device = { /* fake "parent" device */ .dev.bus = &vio_bus_type, }; -#ifdef CONFIG_PPC_ISERIES - -#define device_is_compatible(a, b) 1 - -#endif +static int (*is_match)(const struct vio_device_id *id, + const struct vio_dev *dev); /* convert from struct device to struct vio_dev and pass to driver. * dev->driver has already been set by generic code because vio_bus_match @@ -133,8 +130,7 @@ static const struct vio_device_id * vio_match_device(const struct vio_device_id DBGENTER(); while (ids->type) { - if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) && - device_is_compatible(dev->dev.platform_data, ids->compat)) + if (is_match(ids, dev)) return ids; ids++; } @@ -168,10 +164,13 @@ static void probe_bus_pseries(void) /** * vio_bus_init: - Initialize the virtual IO bus */ -int __init vio_bus_init(void) +int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, + const struct vio_dev *dev)) { int err; + is_match = match_func; + err = bus_register(&vio_bus_type); if (err) { printk(KERN_ERR "failed to register VIO bus\n"); @@ -192,6 +191,17 @@ int __init vio_bus_init(void) } #ifdef CONFIG_PPC_PSERIES +/** + * vio_match_device_pseries: - Tell if a pSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_pseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && + device_is_compatible(dev->dev.platform_data, id->compat); +} + /** * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus */ @@ -199,7 +209,7 @@ static int __init vio_bus_init_pseries(void) { int err; - err = vio_bus_init(); + err = vio_bus_init(vio_match_device_pseries); if (err == 0) probe_bus_pseries(); return err; diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 1e6d4c4b83a0..70644a232210 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -105,6 +105,7 @@ static inline struct vio_dev *to_vio_dev(struct device *dev) return container_of(dev, struct vio_dev, dev); } -extern int vio_bus_init(void); +extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, + const struct vio_dev *dev)); #endif /* _ASM_VIO_H */ From 19dbd0f6a74f7529d6d49dd50ad6b31adbe0598d Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Tue, 12 Jul 2005 17:50:26 +1000 Subject: [PATCH 09/37] [PATCH] ppc64: split pSeries specific parts out of vio.c This patch just splits out the pSeries specific parts of vio.c. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/Makefile | 1 + arch/ppc64/kernel/iSeries_vio.c | 2 +- arch/ppc64/kernel/pSeries_vio.c | 266 +++++++++++++++++++++++++++++ arch/ppc64/kernel/vio.c | 290 ++------------------------------ include/asm-ppc64/vio.h | 4 +- 5 files changed, 284 insertions(+), 279 deletions(-) create mode 100644 arch/ppc64/kernel/pSeries_vio.c diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index a22c94f6b2db..cbf87dcac92a 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -51,6 +51,7 @@ obj-$(CONFIG_HVC_CONSOLE) += hvconsole.o obj-$(CONFIG_BOOTX_TEXT) += btext.o obj-$(CONFIG_HVCS) += hvcserver.o +vio-obj-$(CONFIG_PPC_PSERIES) += pSeries_vio.o vio-obj-$(CONFIG_PPC_ISERIES) += iSeries_vio.o obj-$(CONFIG_IBMVIO) += vio.o $(vio-obj-y) obj-$(CONFIG_XICS) += xics.o diff --git a/arch/ppc64/kernel/iSeries_vio.c b/arch/ppc64/kernel/iSeries_vio.c index 2656b1ca834d..b4268cc4ba48 100644 --- a/arch/ppc64/kernel/iSeries_vio.c +++ b/arch/ppc64/kernel/iSeries_vio.c @@ -131,7 +131,7 @@ static int __init vio_bus_init_iseries(void) { int err; - err = vio_bus_init(vio_match_device_iseries); + err = vio_bus_init(vio_match_device_iseries, NULL, NULL); if (err == 0) { iommu_vio_init(); vio_bus_device.iommu_table = &vio_iommu_table; diff --git a/arch/ppc64/kernel/pSeries_vio.c b/arch/ppc64/kernel/pSeries_vio.c new file mode 100644 index 000000000000..338f9e1bdc09 --- /dev/null +++ b/arch/ppc64/kernel/pSeries_vio.c @@ -0,0 +1,266 @@ +/* + * IBM PowerPC pSeries Virtual I/O Infrastructure Support. + * + * Copyright (c) 2003-2005 IBM Corp. + * Dave Engebretsen engebret@us.ibm.com + * Santiago Leon santil@us.ibm.com + * Hollis Blanchard + * Stephen Rothwell + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +extern struct subsystem devices_subsys; /* needed for vio_find_name() */ + +static void probe_bus_pseries(void) +{ + struct device_node *node_vroot, *of_node; + + node_vroot = find_devices("vdevice"); + if ((node_vroot == NULL) || (node_vroot->child == NULL)) + /* this machine doesn't do virtual IO, and that's ok */ + return; + + /* + * Create struct vio_devices for each virtual device in the device tree. + * Drivers will associate with them later. + */ + for (of_node = node_vroot->child; of_node != NULL; + of_node = of_node->sibling) { + printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); + vio_register_device_node(of_node); + } +} + +/** + * vio_match_device_pseries: - Tell if a pSeries VIO device matches a + * vio_device_id + */ +static int vio_match_device_pseries(const struct vio_device_id *id, + const struct vio_dev *dev) +{ + return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && + device_is_compatible(dev->dev.platform_data, id->compat); +} + +static void vio_release_device_pseries(struct device *dev) +{ + /* XXX free TCE table */ + of_node_put(dev->platform_data); +} + +static ssize_t viodev_show_devspec(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct device_node *of_node = dev->platform_data; + + return sprintf(buf, "%s\n", of_node->full_name); +} +DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); + +static void vio_unregister_device_pseries(struct vio_dev *viodev) +{ + device_remove_file(&viodev->dev, &dev_attr_devspec); +} + +/** + * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus + */ +static int __init vio_bus_init_pseries(void) +{ + int err; + + err = vio_bus_init(vio_match_device_pseries, + vio_unregister_device_pseries, + vio_release_device_pseries); + if (err == 0) + probe_bus_pseries(); + return err; +} + +__initcall(vio_bus_init_pseries); + +/** + * vio_build_iommu_table: - gets the dma information from OF and + * builds the TCE tree. + * @dev: the virtual device. + * + * Returns a pointer to the built tce tree, or NULL if it can't + * find property. +*/ +static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev) +{ + unsigned int *dma_window; + struct iommu_table *newTceTable; + unsigned long offset; + int dma_window_property_size; + + dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); + if(!dma_window) { + return NULL; + } + + newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); + + /* There should be some code to extract the phys-encoded offset + using prom_n_addr_cells(). However, according to a comment + on earlier versions, it's always zero, so we don't bother */ + offset = dma_window[1] >> PAGE_SHIFT; + + /* TCE table size - measured in tce entries */ + newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; + /* offset for VIO should always be 0 */ + newTceTable->it_offset = offset; + newTceTable->it_busno = 0; + newTceTable->it_index = (unsigned long)dma_window[0]; + newTceTable->it_type = TCE_VB; + + return iommu_init_table(newTceTable); +} + +/** + * vio_register_device_node: - Register a new vio device. + * @of_node: The OF node for this device. + * + * Creates and initializes a vio_dev structure from the data in + * of_node (dev.platform_data) and adds it to the list of virtual devices. + * Returns a pointer to the created vio_dev or NULL if node has + * NULL device_type or compatible fields. + */ +struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) +{ + struct vio_dev *viodev; + unsigned int *unit_address; + unsigned int *irq_p; + + /* we need the 'device_type' property, in order to match with drivers */ + if ((NULL == of_node->type)) { + printk(KERN_WARNING + "%s: node %s missing 'device_type'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + unit_address = (unsigned int *)get_property(of_node, "reg", NULL); + if (!unit_address) { + printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, + of_node->name ? of_node->name : ""); + return NULL; + } + + /* allocate a vio_dev for this node */ + viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); + if (!viodev) { + return NULL; + } + memset(viodev, 0, sizeof(struct vio_dev)); + + viodev->dev.platform_data = of_node_get(of_node); + + viodev->irq = NO_IRQ; + irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); + if (irq_p) { + int virq = virt_irq_create_mapping(*irq_p); + if (virq == NO_IRQ) { + printk(KERN_ERR "Unable to allocate interrupt " + "number for %s\n", of_node->full_name); + } else + viodev->irq = irq_offset_up(virq); + } + + snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); + + /* register with generic device framework */ + if (vio_register_device_common(viodev, of_node->name, of_node->type, + *unit_address, vio_build_iommu_table(viodev)) + == NULL) { + /* XXX free TCE table */ + kfree(viodev); + return NULL; + } + device_create_file(&viodev->dev, &dev_attr_devspec); + + return viodev; +} +EXPORT_SYMBOL(vio_register_device_node); + +/** + * vio_get_attribute: - get attribute for virtual device + * @vdev: The vio device to get property. + * @which: The property/attribute to be extracted. + * @length: Pointer to length of returned data size (unused if NULL). + * + * Calls prom.c's get_property() to return the value of the + * attribute specified by the preprocessor constant @which +*/ +const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) +{ + return get_property(vdev->dev.platform_data, (char*)which, length); +} +EXPORT_SYMBOL(vio_get_attribute); + +/* vio_find_name() - internal because only vio.c knows how we formatted the + * kobject name + * XXX once vio_bus_type.devices is actually used as a kset in + * drivers/base/bus.c, this function should be removed in favor of + * "device_find(kobj_name, &vio_bus_type)" + */ +static struct vio_dev *vio_find_name(const char *kobj_name) +{ + struct kobject *found; + + found = kset_find_obj(&devices_subsys.kset, kobj_name); + if (!found) + return NULL; + + return to_vio_dev(container_of(found, struct device, kobj)); +} + +/** + * vio_find_node - find an already-registered vio_dev + * @vnode: device_node of the virtual device we're looking for + */ +struct vio_dev *vio_find_node(struct device_node *vnode) +{ + uint32_t *unit_address; + char kobj_name[BUS_ID_SIZE]; + + /* construct the kobject name from the device node */ + unit_address = (uint32_t *)get_property(vnode, "reg", NULL); + if (!unit_address) + return NULL; + snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); + + return vio_find_name(kobj_name); +} +EXPORT_SYMBOL(vio_find_node); + +int vio_enable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); + if (rc != H_Success) + printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_enable_interrupts); + +int vio_disable_interrupts(struct vio_dev *dev) +{ + int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); + if (rc != H_Success) + printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); + return rc; +} +EXPORT_SYMBOL(vio_disable_interrupts); diff --git a/arch/ppc64/kernel/vio.c b/arch/ppc64/kernel/vio.c index 8a243cad0f8b..3b790bafcaad 100644 --- a/arch/ppc64/kernel/vio.c +++ b/arch/ppc64/kernel/vio.c @@ -1,10 +1,11 @@ /* * IBM PowerPC Virtual I/O Infrastructure Support. * - * Copyright (c) 2003 IBM Corp. + * Copyright (c) 2003-2005 IBM Corp. * Dave Engebretsen engebret@us.ibm.com * Santiago Leon santil@us.ibm.com * Hollis Blanchard + * Stephen Rothwell * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -14,29 +15,16 @@ #include #include -#include #include -#include #include #include -#include #include #include -#include #include -#include - -#define DBGENTER() pr_debug("%s entered\n", __FUNCTION__) - -extern struct subsystem devices_subsys; /* needed for vio_find_name() */ static const struct vio_device_id *vio_match_device( const struct vio_device_id *, const struct vio_dev *); -#ifdef CONFIG_PPC_PSERIES -static struct iommu_table *vio_build_iommu_table(struct vio_dev *); -static int vio_num_address_cells; -#endif struct vio_dev vio_bus_device = { /* fake "parent" device */ .name = vio_bus_device.dev.bus_id, .type = "", @@ -46,6 +34,8 @@ struct vio_dev vio_bus_device = { /* fake "parent" device */ static int (*is_match)(const struct vio_device_id *id, const struct vio_dev *dev); +static void (*unregister_device_callback)(struct vio_dev *dev); +static void (*release_device_callback)(struct device *dev); /* convert from struct device to struct vio_dev and pass to driver. * dev->driver has already been set by generic code because vio_bus_match @@ -57,8 +47,6 @@ static int vio_bus_probe(struct device *dev) const struct vio_device_id *id; int error = -ENODEV; - DBGENTER(); - if (!viodrv->probe) return error; @@ -76,8 +64,6 @@ static int vio_bus_remove(struct device *dev) struct vio_dev *viodev = to_vio_dev(dev); struct vio_driver *viodrv = to_vio_driver(dev->driver); - DBGENTER(); - if (viodrv->remove) { return viodrv->remove(viodev); } @@ -127,8 +113,6 @@ EXPORT_SYMBOL(vio_unregister_driver); static const struct vio_device_id * vio_match_device(const struct vio_device_id *ids, const struct vio_dev *dev) { - DBGENTER(); - while (ids->type) { if (is_match(ids, dev)) return ids; @@ -137,39 +121,19 @@ static const struct vio_device_id * vio_match_device(const struct vio_device_id return NULL; } -#ifdef CONFIG_PPC_PSERIES -static void probe_bus_pseries(void) -{ - struct device_node *node_vroot, *of_node; - - node_vroot = find_devices("vdevice"); - if ((node_vroot == NULL) || (node_vroot->child == NULL)) - /* this machine doesn't do virtual IO, and that's ok */ - return; - - vio_num_address_cells = prom_n_addr_cells(node_vroot->child); - - /* - * Create struct vio_devices for each virtual device in the device tree. - * Drivers will associate with them later. - */ - for (of_node = node_vroot->child; of_node != NULL; - of_node = of_node->sibling) { - printk(KERN_DEBUG "%s: processing %p\n", __FUNCTION__, of_node); - vio_register_device_node(of_node); - } -} -#endif - /** * vio_bus_init: - Initialize the virtual IO bus */ int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, - const struct vio_dev *dev)) + const struct vio_dev *dev), + void (*unregister_dev)(struct vio_dev *), + void (*release_dev)(struct device *)) { int err; is_match = match_func; + unregister_device_callback = unregister_dev; + release_device_callback = release_dev; err = bus_register(&vio_bus_type); if (err) { @@ -190,56 +154,14 @@ int __init vio_bus_init(int (*match_func)(const struct vio_device_id *id, return 0; } -#ifdef CONFIG_PPC_PSERIES -/** - * vio_match_device_pseries: - Tell if a pSeries VIO device matches a - * vio_device_id - */ -static int vio_match_device_pseries(const struct vio_device_id *id, - const struct vio_dev *dev) -{ - return (strncmp(dev->type, id->type, strlen(id->type)) == 0) && - device_is_compatible(dev->dev.platform_data, id->compat); -} - -/** - * vio_bus_init_pseries: - Initialize the pSeries virtual IO bus - */ -static int __init vio_bus_init_pseries(void) -{ - int err; - - err = vio_bus_init(vio_match_device_pseries); - if (err == 0) - probe_bus_pseries(); - return err; -} - -__initcall(vio_bus_init_pseries); -#endif - /* vio_dev refcount hit 0 */ static void __devinit vio_dev_release(struct device *dev) { - DBGENTER(); - -#ifdef CONFIG_PPC_PSERIES - /* XXX free TCE table */ - of_node_put(dev->platform_data); -#endif + if (release_device_callback) + release_device_callback(dev); kfree(to_vio_dev(dev)); } -#ifdef CONFIG_PPC_PSERIES -static ssize_t viodev_show_devspec(struct device *dev, struct device_attribute *attr, char *buf) -{ - struct device_node *of_node = dev->platform_data; - - return sprintf(buf, "%s\n", of_node->full_name); -} -DEVICE_ATTR(devspec, S_IRUSR | S_IRGRP | S_IROTH, viodev_show_devspec, NULL); -#endif - static ssize_t viodev_show_name(struct device *dev, struct device_attribute *attr, char *buf) { return sprintf(buf, "%s\n", to_vio_dev(dev)->name); @@ -250,8 +172,6 @@ struct vio_dev * __devinit vio_register_device_common( struct vio_dev *viodev, char *name, char *type, uint32_t unit_address, struct iommu_table *iommu_table) { - DBGENTER(); - viodev->name = name; viodev->type = type; viodev->unit_address = unit_address; @@ -272,197 +192,15 @@ struct vio_dev * __devinit vio_register_device_common( return viodev; } -#ifdef CONFIG_PPC_PSERIES -/** - * vio_register_device_node: - Register a new vio device. - * @of_node: The OF node for this device. - * - * Creates and initializes a vio_dev structure from the data in - * of_node (dev.platform_data) and adds it to the list of virtual devices. - * Returns a pointer to the created vio_dev or NULL if node has - * NULL device_type or compatible fields. - */ -struct vio_dev * __devinit vio_register_device_node(struct device_node *of_node) -{ - struct vio_dev *viodev; - unsigned int *unit_address; - unsigned int *irq_p; - - DBGENTER(); - - /* we need the 'device_type' property, in order to match with drivers */ - if ((NULL == of_node->type)) { - printk(KERN_WARNING - "%s: node %s missing 'device_type'\n", __FUNCTION__, - of_node->name ? of_node->name : ""); - return NULL; - } - - unit_address = (unsigned int *)get_property(of_node, "reg", NULL); - if (!unit_address) { - printk(KERN_WARNING "%s: node %s missing 'reg'\n", __FUNCTION__, - of_node->name ? of_node->name : ""); - return NULL; - } - - /* allocate a vio_dev for this node */ - viodev = kmalloc(sizeof(struct vio_dev), GFP_KERNEL); - if (!viodev) { - return NULL; - } - memset(viodev, 0, sizeof(struct vio_dev)); - - viodev->dev.platform_data = of_node_get(of_node); - - viodev->irq = NO_IRQ; - irq_p = (unsigned int *)get_property(of_node, "interrupts", NULL); - if (irq_p) { - int virq = virt_irq_create_mapping(*irq_p); - if (virq == NO_IRQ) { - printk(KERN_ERR "Unable to allocate interrupt " - "number for %s\n", of_node->full_name); - } else - viodev->irq = irq_offset_up(virq); - } - - snprintf(viodev->dev.bus_id, BUS_ID_SIZE, "%x", *unit_address); - - /* register with generic device framework */ - if (vio_register_device_common(viodev, of_node->name, of_node->type, - *unit_address, vio_build_iommu_table(viodev)) - == NULL) { - /* XXX free TCE table */ - kfree(viodev); - return NULL; - } - device_create_file(&viodev->dev, &dev_attr_devspec); - - return viodev; -} -EXPORT_SYMBOL(vio_register_device_node); -#endif - void __devinit vio_unregister_device(struct vio_dev *viodev) { - DBGENTER(); -#ifdef CONFIG_PPC_PSERIES - device_remove_file(&viodev->dev, &dev_attr_devspec); -#endif + if (unregister_device_callback) + unregister_device_callback(viodev); device_remove_file(&viodev->dev, &dev_attr_name); device_unregister(&viodev->dev); } EXPORT_SYMBOL(vio_unregister_device); -#ifdef CONFIG_PPC_PSERIES -/** - * vio_get_attribute: - get attribute for virtual device - * @vdev: The vio device to get property. - * @which: The property/attribute to be extracted. - * @length: Pointer to length of returned data size (unused if NULL). - * - * Calls prom.c's get_property() to return the value of the - * attribute specified by the preprocessor constant @which -*/ -const void * vio_get_attribute(struct vio_dev *vdev, void* which, int* length) -{ - return get_property(vdev->dev.platform_data, (char*)which, length); -} -EXPORT_SYMBOL(vio_get_attribute); - -/* vio_find_name() - internal because only vio.c knows how we formatted the - * kobject name - * XXX once vio_bus_type.devices is actually used as a kset in - * drivers/base/bus.c, this function should be removed in favor of - * "device_find(kobj_name, &vio_bus_type)" - */ -static struct vio_dev *vio_find_name(const char *kobj_name) -{ - struct kobject *found; - - found = kset_find_obj(&devices_subsys.kset, kobj_name); - if (!found) - return NULL; - - return to_vio_dev(container_of(found, struct device, kobj)); -} - -/** - * vio_find_node - find an already-registered vio_dev - * @vnode: device_node of the virtual device we're looking for - */ -struct vio_dev *vio_find_node(struct device_node *vnode) -{ - uint32_t *unit_address; - char kobj_name[BUS_ID_SIZE]; - - /* construct the kobject name from the device node */ - unit_address = (uint32_t *)get_property(vnode, "reg", NULL); - if (!unit_address) - return NULL; - snprintf(kobj_name, BUS_ID_SIZE, "%x", *unit_address); - - return vio_find_name(kobj_name); -} -EXPORT_SYMBOL(vio_find_node); - -/** - * vio_build_iommu_table: - gets the dma information from OF and builds the TCE tree. - * @dev: the virtual device. - * - * Returns a pointer to the built tce tree, or NULL if it can't - * find property. -*/ -static struct iommu_table * vio_build_iommu_table(struct vio_dev *dev) -{ - unsigned int *dma_window; - struct iommu_table *newTceTable; - unsigned long offset; - int dma_window_property_size; - - dma_window = (unsigned int *) get_property(dev->dev.platform_data, "ibm,my-dma-window", &dma_window_property_size); - if(!dma_window) { - return NULL; - } - - newTceTable = (struct iommu_table *) kmalloc(sizeof(struct iommu_table), GFP_KERNEL); - - /* There should be some code to extract the phys-encoded offset - using prom_n_addr_cells(). However, according to a comment - on earlier versions, it's always zero, so we don't bother */ - offset = dma_window[1] >> PAGE_SHIFT; - - /* TCE table size - measured in tce entries */ - newTceTable->it_size = dma_window[4] >> PAGE_SHIFT; - /* offset for VIO should always be 0 */ - newTceTable->it_offset = offset; - newTceTable->it_busno = 0; - newTceTable->it_index = (unsigned long)dma_window[0]; - newTceTable->it_type = TCE_VB; - - return iommu_init_table(newTceTable); -} - -int vio_enable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE); - if (rc != H_Success) { - printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc); - } - return rc; -} -EXPORT_SYMBOL(vio_enable_interrupts); - -int vio_disable_interrupts(struct vio_dev *dev) -{ - int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE); - if (rc != H_Success) { - printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc); - } - return rc; -} -EXPORT_SYMBOL(vio_disable_interrupts); -#endif - static dma_addr_t vio_map_single(struct device *dev, void *vaddr, size_t size, enum dma_data_direction direction) { @@ -526,8 +264,6 @@ static int vio_bus_match(struct device *dev, struct device_driver *drv) const struct vio_device_id *ids = vio_drv->id_table; const struct vio_device_id *found_id; - DBGENTER(); - if (!ids) return 0; diff --git a/include/asm-ppc64/vio.h b/include/asm-ppc64/vio.h index 70644a232210..a82e87c1c5fa 100644 --- a/include/asm-ppc64/vio.h +++ b/include/asm-ppc64/vio.h @@ -106,6 +106,8 @@ static inline struct vio_dev *to_vio_dev(struct device *dev) } extern int vio_bus_init(int (*is_match)(const struct vio_device_id *id, - const struct vio_dev *dev)); + const struct vio_dev *dev), + void (*)(struct vio_dev *), + void (*)(struct device *)); #endif /* _ASM_VIO_H */ From 2e2446ea0758cd57dd065962d9544e3f4d44ea2b Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH 10/37] [PATCH] Remove NACA fixed address constraint Comments in head.S suggest that the iSeries naca has a fixed address, because tools expect to find it there. The only tool which appears to access the naca is addRamDisk, but both the in-kernel version and the version used in RHEL and SuSE in fact locate the NACA the same way as the hypervisor does, by following the pointer in the hvReleaseData structure. Since the requirement for a fixed address seems to be obsolete, this patch removes the naca from head.S and replaces it with a normal C initializer. For good measure, it removes an old version of addRamDisk.c which was sitting, unused, in the ppc32 tree. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc/boot/utils/addRamDisk.c | 203 ------------------------------- arch/ppc64/kernel/LparData.c | 11 ++ arch/ppc64/kernel/head.S | 17 +-- include/asm-ppc64/naca.h | 7 -- 4 files changed, 12 insertions(+), 226 deletions(-) delete mode 100644 arch/ppc/boot/utils/addRamDisk.c diff --git a/arch/ppc/boot/utils/addRamDisk.c b/arch/ppc/boot/utils/addRamDisk.c deleted file mode 100644 index 93400dfcce7f..000000000000 --- a/arch/ppc/boot/utils/addRamDisk.c +++ /dev/null @@ -1,203 +0,0 @@ -#include -#include -#include -#include -#include -#include -#include - -#define ElfHeaderSize (64 * 1024) -#define ElfPages (ElfHeaderSize / 4096) -#define KERNELBASE (0xc0000000) - -void get4k(FILE *file, char *buf ) -{ - unsigned j; - unsigned num = fread(buf, 1, 4096, file); - for ( j=num; j<4096; ++j ) - buf[j] = 0; -} - -void put4k(FILE *file, char *buf ) -{ - fwrite(buf, 1, 4096, file); -} - -void death(const char *msg, FILE *fdesc, const char *fname) -{ - printf(msg); - fclose(fdesc); - unlink(fname); - exit(1); -} - -int main(int argc, char **argv) -{ - char inbuf[4096]; - FILE *ramDisk = NULL; - FILE *inputVmlinux = NULL; - FILE *outputVmlinux = NULL; - unsigned i = 0; - u_int32_t ramFileLen = 0; - u_int32_t ramLen = 0; - u_int32_t roundR = 0; - u_int32_t kernelLen = 0; - u_int32_t actualKernelLen = 0; - u_int32_t round = 0; - u_int32_t roundedKernelLen = 0; - u_int32_t ramStartOffs = 0; - u_int32_t ramPages = 0; - u_int32_t roundedKernelPages = 0; - u_int32_t hvReleaseData = 0; - u_int32_t eyeCatcher = 0xc8a5d9c4; - u_int32_t naca = 0; - u_int32_t xRamDisk = 0; - u_int32_t xRamDiskSize = 0; - if ( argc < 2 ) { - printf("Name of RAM disk file missing.\n"); - exit(1); - } - - if ( argc < 3 ) { - printf("Name of vmlinux file missing.\n"); - exit(1); - } - - if ( argc < 4 ) { - printf("Name of vmlinux output file missing.\n"); - exit(1); - } - - ramDisk = fopen(argv[1], "r"); - if ( ! ramDisk ) { - printf("RAM disk file \"%s\" failed to open.\n", argv[1]); - exit(1); - } - inputVmlinux = fopen(argv[2], "r"); - if ( ! inputVmlinux ) { - printf("vmlinux file \"%s\" failed to open.\n", argv[2]); - exit(1); - } - outputVmlinux = fopen(argv[3], "w+"); - if ( ! outputVmlinux ) { - printf("output vmlinux file \"%s\" failed to open.\n", argv[3]); - exit(1); - } - fseek(ramDisk, 0, SEEK_END); - ramFileLen = ftell(ramDisk); - fseek(ramDisk, 0, SEEK_SET); - printf("%s file size = %d\n", argv[1], ramFileLen); - - ramLen = ramFileLen; - - roundR = 4096 - (ramLen % 4096); - if ( roundR ) { - printf("Rounding RAM disk file up to a multiple of 4096, adding %d\n", roundR); - ramLen += roundR; - } - - printf("Rounded RAM disk size is %d\n", ramLen); - fseek(inputVmlinux, 0, SEEK_END); - kernelLen = ftell(inputVmlinux); - fseek(inputVmlinux, 0, SEEK_SET); - printf("kernel file size = %d\n", kernelLen); - if ( kernelLen == 0 ) { - printf("You must have a linux kernel specified as argv[2]\n"); - exit(1); - } - - actualKernelLen = kernelLen - ElfHeaderSize; - - printf("actual kernel length (minus ELF header) = %d\n", actualKernelLen); - - round = actualKernelLen % 4096; - roundedKernelLen = actualKernelLen; - if ( round ) - roundedKernelLen += (4096 - round); - - printf("actual kernel length rounded up to a 4k multiple = %d\n", roundedKernelLen); - - ramStartOffs = roundedKernelLen; - ramPages = ramLen / 4096; - - printf("RAM disk pages to copy = %d\n", ramPages); - - // Copy 64K ELF header - for (i=0; i<(ElfPages); ++i) { - get4k( inputVmlinux, inbuf ); - put4k( outputVmlinux, inbuf ); - } - - roundedKernelPages = roundedKernelLen / 4096; - - fseek(inputVmlinux, ElfHeaderSize, SEEK_SET); - - for ( i=0; i #include #include -#include #include #include #include @@ -511,24 +510,10 @@ _GLOBAL(do_stab_bolted_pSeries) mfspr r12,SPRG2 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) - - /* Space for the naca. Architected to be located at real address - * NACA_PHYS_ADDR. Various tools rely on this location being fixed. - * The first dword of the naca is required by iSeries LPAR to - * point to itVpdAreas. On pSeries native, this value is not used. - */ - . = NACA_PHYS_ADDR - .globl __end_interrupts -__end_interrupts: -#ifdef CONFIG_PPC_ISERIES - .globl naca -naca: - .llong itVpdAreas - .llong 0 /* xRamDisk */ - .llong 0 /* xRamDiskSize */ . = 0x6100 +#ifdef CONFIG_PPC_ISERIES /*** ISeries-LPAR interrupt handlers ***/ STD_EXCEPTION_ISERIES(0x200, machine_check, PACA_EXMC) diff --git a/include/asm-ppc64/naca.h b/include/asm-ppc64/naca.h index bfb7caa32eaf..d2afe6447597 100644 --- a/include/asm-ppc64/naca.h +++ b/include/asm-ppc64/naca.h @@ -12,8 +12,6 @@ #include -#ifndef __ASSEMBLY__ - struct naca_struct { /* Kernel only data - undefined for user space */ void *xItVpdAreas; /* VPD Data 0x00 */ @@ -23,9 +21,4 @@ struct naca_struct { extern struct naca_struct naca; -#endif /* __ASSEMBLY__ */ - -#define NACA_PAGE 0x4 -#define NACA_PHYS_ADDR (NACA_PAGE< Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH 11/37] [PATCH] Move iSeries and common vectors into unused space in head.S In the ppc64 kernel head.S there is currently quite a lot of unused space between the naca (at fixed address 0x4000) and the fwnmi data area (at fixed address 0x7000). This patch moves various exception vectors and support code into this region to use the wasted space. The functions load_up_fpu and load_up_altivec are moved down as well, since they are essentially continuations of the fp_unavailable_common and altivec_unavailable_common vectors, respectively. Likewise, the fwnmi vectors themselves are moved down into this area, because while the location of the fwnmi data area is fixed by the RPA, the vectors themselves can be anywhere sufficiently low. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 355 +++++++++++++++++++-------------------- 1 file changed, 175 insertions(+), 180 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 13c03648a602..eb54f0548b01 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -52,9 +52,8 @@ * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x3fff : Interrupt support - * 0x4000 - 0x4fff : NACA - * 0x6000 : iSeries and common interrupt prologs + * 0x3000 - 0x6fff : interrupt support, iSeries and common interrupt prologs + * 0x7000 - 0x7fff : FWNMI data area * 0x9000 - 0x9fff : Initial segment table */ @@ -501,17 +500,35 @@ system_call_pSeries: STD_EXCEPTION_PSERIES(0x1300, instruction_breakpoint) STD_EXCEPTION_PSERIES(0x1700, altivec_assist) - /* moved from 0xf00 */ - STD_EXCEPTION_PSERIES(0x3000, performance_monitor) + . = 0x3000 - . = 0x3100 +/*** pSeries interrupt support ***/ + + /* moved from 0xf00 */ + STD_EXCEPTION_PSERIES(., performance_monitor) + + .align 7 _GLOBAL(do_stab_bolted_pSeries) mtcrf 0x80,r12 mfspr r12,SPRG2 EXCEPTION_PROLOG_PSERIES(PACA_EXSLB, .do_stab_bolted) +/* + * Vectors for the FWNMI option. Share common code. + */ + .globl system_reset_fwnmi +system_reset_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) - . = 0x6100 + .globl machine_check_fwnmi +machine_check_fwnmi: + HMT_MEDIUM + mtspr SPRG1,r13 /* save r13 */ + RUNLATCH_ON(r13) + EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) #ifdef CONFIG_PPC_ISERIES /*** ISeries-LPAR interrupt handlers ***/ @@ -656,51 +673,8 @@ hardware_interrupt_iSeries_masked: ld r13,PACA_EXGEN+EX_R13(r13) rfid b . /* prevent speculative execution */ -#endif - -/* - * Data area reserved for FWNMI option. - */ - .= 0x7000 - .globl fwnmi_data_area -fwnmi_data_area: - -#ifdef CONFIG_PPC_ISERIES - . = LPARMAP_PHYS -#include "lparmap.s" #endif /* CONFIG_PPC_ISERIES */ -/* - * Vectors for the FWNMI option. Share common code. - */ - . = 0x8000 - .globl system_reset_fwnmi -system_reset_fwnmi: - HMT_MEDIUM - mtspr SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXGEN, system_reset_common) - .globl machine_check_fwnmi -machine_check_fwnmi: - HMT_MEDIUM - mtspr SPRG1,r13 /* save r13 */ - RUNLATCH_ON(r13) - EXCEPTION_PROLOG_PSERIES(PACA_EXMC, machine_check_common) - - /* - * Space for the initial segment table - * For LPAR, the hypervisor must fill in at least one entry - * before we get control (with relocate on) - */ - . = STAB0_PHYS_ADDR - .globl __start_stab -__start_stab: - - . = (STAB0_PHYS_ADDR + PAGE_SIZE) - .globl __end_stab -__end_stab: - - /*** Common interrupt handlers ***/ STD_EXCEPTION_COMMON(0x100, system_reset, .system_reset_exception) @@ -891,6 +865,62 @@ fp_unavailable_common: bl .kernel_fp_unavailable_exception BUG_OPCODE +/* + * load_up_fpu(unused, unused, tsk) + * Disable FP for the task which had the FPU previously, + * and save its floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + * On SMP we know the fpu is free, since we give it up every + * switch (ie, no lazy save of the FP registers). + * On entry: r13 == 'current' && last_task_used_math != 'current' + */ +_STATIC(load_up_fpu) + mfmsr r5 /* grab the current MSR */ + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync +/* + * For SMP, we don't do lazy FPU switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_fpu in switch_to. + * + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_math@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save FP state to last_task_used_math's THREAD struct */ + addi r4,r4,THREAD + SAVE_32FPRS(0, r4) + mffs fr0 + stfd fr0,THREAD_FPSCR(r4) + /* Disable FP for last_task_used_math */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r6,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* enable use of FP after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + ld r4,THREAD_FPEXC_MODE(r5) + ori r12,r12,MSR_FP + or r12,r12,r4 + std r12,_MSR(r1) + lfd fr0,THREAD_FPSCR(r5) + mtfsf 0xff,fr0 + REST_32FPRS(0, r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return + .align 7 .globl altivec_unavailable_common altivec_unavailable_common: @@ -906,6 +936,80 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) bl .altivec_unavailable_exception b .ret_from_except +#ifdef CONFIG_ALTIVEC +/* + * load_up_altivec(unused, unused, tsk) + * Disable VMX for the task which had it previously, + * and save its vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + * On SMP we know the VMX is free, since we give it up every + * switch (ie, no lazy save of the vector registers). + * On entry: r13 == 'current' && last_task_used_altivec != 'current' + */ +_STATIC(load_up_altivec) + mfmsr r5 /* grab the current MSR */ + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + +/* + * For SMP, we don't do lazy VMX switching because it just gets too + * horrendously complex, especially when a task switches from one CPU + * to another. Instead we call giveup_altvec in switch_to. + * VRSAVE isn't dealt with here, that is done in the normal context + * switch code. Note that we could rely on vrsave value to eventually + * avoid saving all of the VREGs here... + */ +#ifndef CONFIG_SMP + ld r3,last_task_used_altivec@got(r2) + ld r4,0(r3) + cmpdi 0,r4,0 + beq 1f + /* Save VMX state to last_task_used_altivec's THREAD struct */ + addi r4,r4,THREAD + SAVE_32VRS(0,r5,r4) + mfvscr vr0 + li r10,THREAD_VSCR + stvx vr0,r10,r4 + /* Disable VMX for last_task_used_altivec */ + ld r5,PT_REGS(r4) + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r6,MSR_VEC@h + andc r4,r4,r6 + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#endif /* CONFIG_SMP */ + /* Hack: if we get an altivec unavailable trap with VRSAVE + * set to all zeros, we assume this is a broken application + * that fails to set it properly, and thus we switch it to + * all 1's + */ + mfspr r4,SPRN_VRSAVE + cmpdi 0,r4,0 + bne+ 1f + li r4,-1 + mtspr SPRN_VRSAVE,r4 +1: + /* enable use of VMX after return */ + ld r4,PACACURRENT(r13) + addi r5,r4,THREAD /* Get THREAD */ + oris r12,r12,MSR_VEC@h + std r12,_MSR(r1) + li r4,1 + li r10,THREAD_VSCR + stw r4,THREAD_USED_VR(r5) + lvx vr0,r10,r5 + mtvscr vr0 + REST_32VRS(0,r4,r5) +#ifndef CONFIG_SMP + /* Update last_task_used_math to 'current' */ + subi r4,r5,THREAD /* Back to 'current' */ + std r4,0(r3) +#endif /* CONFIG_SMP */ + /* restore registers and return */ + b fast_exception_return +#endif /* CONFIG_ALTIVEC */ + /* * Hash table stuff */ @@ -1152,6 +1256,27 @@ unrecov_slb: bl .unrecoverable_exception b 1b +/* + * Data area reserved for FWNMI option. + * This address (0x7000) is fixed by the RPA. + */ + .= 0x7000 + .globl fwnmi_data_area +fwnmi_data_area: + .space PAGE_SIZE + + /* + * Space for the initial segment table + * For LPAR, the hypervisor must fill in at least one entry + * before we get control (with relocate on) + */ + . = STAB0_PHYS_ADDR + .globl __start_stab +__start_stab: + + . = (STAB0_PHYS_ADDR + PAGE_SIZE) + .globl __end_stab +__end_stab: /* * On pSeries, secondary processors spin in the following code. @@ -1415,62 +1540,6 @@ _GLOBAL(copy_and_flush) .align 8 copy_to_here: -/* - * load_up_fpu(unused, unused, tsk) - * Disable FP for the task which had the FPU previously, - * and save its floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - * On SMP we know the fpu is free, since we give it up every - * switch (ie, no lazy save of the FP registers). - * On entry: r13 == 'current' && last_task_used_math != 'current' - */ -_STATIC(load_up_fpu) - mfmsr r5 /* grab the current MSR */ - ori r5,r5,MSR_FP - mtmsrd r5 /* enable use of fpu now */ - isync -/* - * For SMP, we don't do lazy FPU switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_fpu in switch_to. - * - */ -#ifndef CONFIG_SMP - ld r3,last_task_used_math@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Save FP state to last_task_used_math's THREAD struct */ - addi r4,r4,THREAD - SAVE_32FPRS(0, r4) - mffs fr0 - stfd fr0,THREAD_FPSCR(r4) - /* Disable FP for last_task_used_math */ - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r6,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r6 - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* enable use of FP after return */ - ld r4,PACACURRENT(r13) - addi r5,r4,THREAD /* Get THREAD */ - ld r4,THREAD_FPEXC_MODE(r5) - ori r12,r12,MSR_FP - or r12,r12,r4 - std r12,_MSR(r1) - lfd fr0,THREAD_FPSCR(r5) - mtfsf 0xff,fr0 - REST_32FPRS(0, r5) -#ifndef CONFIG_SMP - /* Update last_task_used_math to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - std r4,0(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - b fast_exception_return - /* * disable_kernel_fp() * Disable the FPU. @@ -1515,81 +1584,7 @@ _GLOBAL(giveup_fpu) #endif /* CONFIG_SMP */ blr - #ifdef CONFIG_ALTIVEC - -/* - * load_up_altivec(unused, unused, tsk) - * Disable VMX for the task which had it previously, - * and save its vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. - * On SMP we know the VMX is free, since we give it up every - * switch (ie, no lazy save of the vector registers). - * On entry: r13 == 'current' && last_task_used_altivec != 'current' - */ -_STATIC(load_up_altivec) - mfmsr r5 /* grab the current MSR */ - oris r5,r5,MSR_VEC@h - mtmsrd r5 /* enable use of VMX now */ - isync - -/* - * For SMP, we don't do lazy VMX switching because it just gets too - * horrendously complex, especially when a task switches from one CPU - * to another. Instead we call giveup_altvec in switch_to. - * VRSAVE isn't dealt with here, that is done in the normal context - * switch code. Note that we could rely on vrsave value to eventually - * avoid saving all of the VREGs here... - */ -#ifndef CONFIG_SMP - ld r3,last_task_used_altivec@got(r2) - ld r4,0(r3) - cmpdi 0,r4,0 - beq 1f - /* Save VMX state to last_task_used_altivec's THREAD struct */ - addi r4,r4,THREAD - SAVE_32VRS(0,r5,r4) - mfvscr vr0 - li r10,THREAD_VSCR - stvx vr0,r10,r4 - /* Disable VMX for last_task_used_altivec */ - ld r5,PT_REGS(r4) - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r6,MSR_VEC@h - andc r4,r4,r6 - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#endif /* CONFIG_SMP */ - /* Hack: if we get an altivec unavailable trap with VRSAVE - * set to all zeros, we assume this is a broken application - * that fails to set it properly, and thus we switch it to - * all 1's - */ - mfspr r4,SPRN_VRSAVE - cmpdi 0,r4,0 - bne+ 1f - li r4,-1 - mtspr SPRN_VRSAVE,r4 -1: - /* enable use of VMX after return */ - ld r4,PACACURRENT(r13) - addi r5,r4,THREAD /* Get THREAD */ - oris r12,r12,MSR_VEC@h - std r12,_MSR(r1) - li r4,1 - li r10,THREAD_VSCR - stw r4,THREAD_USED_VR(r5) - lvx vr0,r10,r5 - mtvscr vr0 - REST_32VRS(0,r4,r5) -#ifndef CONFIG_SMP - /* Update last_task_used_math to 'current' */ - subi r4,r5,THREAD /* Back to 'current' */ - std r4,0(r3) -#endif /* CONFIG_SMP */ - /* restore registers and return */ - b fast_exception_return - /* * disable_kernel_altivec() * Disable the VMX. From c59c464a3e29830bcfae5eea1777cad9e00087f3 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH 12/37] [PATCH] Change address of ppc64 initial segment table On ppc64 machines with segment tables, CPU0's segment table is at a fixed address, currently 0x9000. This patch moves it to the free space at 0x6000, just below the fwnmi data area. This saves 8k of space in vmlinux and the runtime kernel image. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 32 +++++++++++++++++--------------- arch/ppc64/kernel/pacaData.c | 4 ++-- include/asm-ppc64/mmu.h | 7 +++++-- 3 files changed, 24 insertions(+), 19 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index eb54f0548b01..7de38ebbe973 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -52,9 +52,10 @@ * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code * 0x0100 - 0x2fff : pSeries Interrupt prologs - * 0x3000 - 0x6fff : interrupt support, iSeries and common interrupt prologs + * 0x3000 - 0x5fff : interrupt support, iSeries and common interrupt prologs + * 0x6000 - 0x6fff : Initial (CPU0) segment table * 0x7000 - 0x7fff : FWNMI data area - * 0x9000 - 0x9fff : Initial segment table + * 0x8000 - : Early init and support code */ /* @@ -1256,6 +1257,20 @@ unrecov_slb: bl .unrecoverable_exception b 1b +/* + * Space for CPU0's segment table. + * + * On iSeries, the hypervisor must fill in at least one entry before + * we get control (with relocate on). The address is give to the hv + * as a page number (see xLparMap in LparData.c), so this must be at a + * fixed address (the linker can't compute (u64)&initial_stab >> + * PAGE_SHIFT). + */ + . = STAB0_PHYS_ADDR /* 0x6000 */ + .globl initial_stab +initial_stab: + .space 4096 + /* * Data area reserved for FWNMI option. * This address (0x7000) is fixed by the RPA. @@ -1265,19 +1280,6 @@ unrecov_slb: fwnmi_data_area: .space PAGE_SIZE - /* - * Space for the initial segment table - * For LPAR, the hypervisor must fill in at least one entry - * before we get control (with relocate on) - */ - . = STAB0_PHYS_ADDR - .globl __start_stab -__start_stab: - - . = (STAB0_PHYS_ADDR + PAGE_SIZE) - .globl __end_stab -__end_stab: - /* * On pSeries, secondary processors spin in the following code. * At entry, r3 = this processor's number (physical cpu id) diff --git a/arch/ppc64/kernel/pacaData.c b/arch/ppc64/kernel/pacaData.c index 6316188737b6..6182a2cd90a5 100644 --- a/arch/ppc64/kernel/pacaData.c +++ b/arch/ppc64/kernel/pacaData.c @@ -78,7 +78,7 @@ extern unsigned long __toc_start; #define BOOTCPU_PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 1, 0, STAB0_VIRT_ADDR) \ + PACA_INIT_COMMON(number, 1, 0, (u64)&initial_stab) \ PACA_INIT_ISERIES(number) \ } @@ -90,7 +90,7 @@ extern unsigned long __toc_start; #define BOOTCPU_PACA_INIT(number) \ { \ - PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, STAB0_VIRT_ADDR) \ + PACA_INIT_COMMON(number, 1, STAB0_PHYS_ADDR, (u64)&initial_stab) \ } #endif diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 959a4bfdcd6a..789c2693483c 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -28,9 +28,12 @@ #define STE_VSID_SHIFT 12 /* Location of cpu0's segment table */ -#define STAB0_PAGE 0x9 +#define STAB0_PAGE 0x6 #define STAB0_PHYS_ADDR (STAB0_PAGE< Date: Fri, 19 Aug 2005 14:52:31 +1000 Subject: [PATCH 13/37] [PATCH] Remove general use functions from head.S As well as the interrupt vectors and initialization code, head.S contains several asm functions which are used during runtime. This patch moves these to misc.S, a more sensible location for random asm support code. A couple The functions moved are: disable_kernel_fp giveup_fpu disable_kernel_altivec giveup_altivec __setup_cpu_power3 (empty function) Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 95 -------------------------------------- arch/ppc64/kernel/misc.S | 98 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+), 95 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 7de38ebbe973..70e10403b697 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -1542,98 +1542,6 @@ _GLOBAL(copy_and_flush) .align 8 copy_to_here: -/* - * disable_kernel_fp() - * Disable the FPU. - */ -_GLOBAL(disable_kernel_fp) - mfmsr r3 - rldicl r0,r3,(63-MSR_FP_LG),1 - rldicl r3,r0,(MSR_FP_LG+1),0 - mtmsrd r3 /* disable use of fpu now */ - isync - blr - -/* - * giveup_fpu(tsk) - * Disable FP for the task given as the argument, - * and save the floating-point registers in its thread_struct. - * Enables the FPU for use in the kernel on return. - */ -_GLOBAL(giveup_fpu) - mfmsr r5 - ori r5,r5,MSR_FP - mtmsrd r5 /* enable use of fpu now */ - isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - SAVE_32FPRS(0, r3) - mffs fr0 - stfd fr0,THREAD_FPSCR(r3) - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - li r3,MSR_FP|MSR_FE0|MSR_FE1 - andc r4,r4,r3 /* disable FP for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_math@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ - blr - -#ifdef CONFIG_ALTIVEC -/* - * disable_kernel_altivec() - * Disable the VMX. - */ -_GLOBAL(disable_kernel_altivec) - mfmsr r3 - rldicl r0,r3,(63-MSR_VEC_LG),1 - rldicl r3,r0,(MSR_VEC_LG+1),0 - mtmsrd r3 /* disable use of VMX now */ - isync - blr - -/* - * giveup_altivec(tsk) - * Disable VMX for the task given as the argument, - * and save the vector registers in its thread_struct. - * Enables the VMX for use in the kernel on return. - */ -_GLOBAL(giveup_altivec) - mfmsr r5 - oris r5,r5,MSR_VEC@h - mtmsrd r5 /* enable use of VMX now */ - isync - cmpdi 0,r3,0 - beqlr- /* if no previous owner, done */ - addi r3,r3,THREAD /* want THREAD of task */ - ld r5,PT_REGS(r3) - cmpdi 0,r5,0 - SAVE_32VRS(0,r4,r3) - mfvscr vr0 - li r4,THREAD_VSCR - stvx vr0,r4,r3 - beq 1f - ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) - lis r3,MSR_VEC@h - andc r4,r4,r3 /* disable FP for previous task */ - std r4,_MSR-STACK_FRAME_OVERHEAD(r5) -1: -#ifndef CONFIG_SMP - li r5,0 - ld r4,last_task_used_altivec@got(r2) - std r5,0(r4) -#endif /* CONFIG_SMP */ - blr - -#endif /* CONFIG_ALTIVEC */ - #ifdef CONFIG_SMP #ifdef CONFIG_PPC_PMAC /* @@ -1984,9 +1892,6 @@ _STATIC(start_here_common) bl .start_kernel -_GLOBAL(__setup_cpu_power3) - blr - _GLOBAL(hmt_init) #ifdef CONFIG_HMT LOADADDR(r5, hmt_thread_data) diff --git a/arch/ppc64/kernel/misc.S b/arch/ppc64/kernel/misc.S index a05b50b738e9..474df0a862bf 100644 --- a/arch/ppc64/kernel/misc.S +++ b/arch/ppc64/kernel/misc.S @@ -680,6 +680,104 @@ _GLOBAL(kernel_thread) ld r30,-16(r1) blr +/* + * disable_kernel_fp() + * Disable the FPU. + */ +_GLOBAL(disable_kernel_fp) + mfmsr r3 + rldicl r0,r3,(63-MSR_FP_LG),1 + rldicl r3,r0,(MSR_FP_LG+1),0 + mtmsrd r3 /* disable use of fpu now */ + isync + blr + +/* + * giveup_fpu(tsk) + * Disable FP for the task given as the argument, + * and save the floating-point registers in its thread_struct. + * Enables the FPU for use in the kernel on return. + */ +_GLOBAL(giveup_fpu) + mfmsr r5 + ori r5,r5,MSR_FP + mtmsrd r5 /* enable use of fpu now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32FPRS(0, r3) + mffs fr0 + stfd fr0,THREAD_FPSCR(r3) + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + li r3,MSR_FP|MSR_FE0|MSR_FE1 + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_math@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#ifdef CONFIG_ALTIVEC + +#if 0 /* this has no callers for now */ +/* + * disable_kernel_altivec() + * Disable the VMX. + */ +_GLOBAL(disable_kernel_altivec) + mfmsr r3 + rldicl r0,r3,(63-MSR_VEC_LG),1 + rldicl r3,r0,(MSR_VEC_LG+1),0 + mtmsrd r3 /* disable use of VMX now */ + isync + blr +#endif /* 0 */ + +/* + * giveup_altivec(tsk) + * Disable VMX for the task given as the argument, + * and save the vector registers in its thread_struct. + * Enables the VMX for use in the kernel on return. + */ +_GLOBAL(giveup_altivec) + mfmsr r5 + oris r5,r5,MSR_VEC@h + mtmsrd r5 /* enable use of VMX now */ + isync + cmpdi 0,r3,0 + beqlr- /* if no previous owner, done */ + addi r3,r3,THREAD /* want THREAD of task */ + ld r5,PT_REGS(r3) + cmpdi 0,r5,0 + SAVE_32VRS(0,r4,r3) + mfvscr vr0 + li r4,THREAD_VSCR + stvx vr0,r4,r3 + beq 1f + ld r4,_MSR-STACK_FRAME_OVERHEAD(r5) + lis r3,MSR_VEC@h + andc r4,r4,r3 /* disable FP for previous task */ + std r4,_MSR-STACK_FRAME_OVERHEAD(r5) +1: +#ifndef CONFIG_SMP + li r5,0 + ld r4,last_task_used_altivec@got(r2) + std r5,0(r4) +#endif /* CONFIG_SMP */ + blr + +#endif /* CONFIG_ALTIVEC */ + +_GLOBAL(__setup_cpu_power3) + blr + /* kexec_wait(phys_cpu) * * wait for the flag to change, indicating this kernel is going away but From 60ba44945714d9b7dae8b85ab0926f6f13809c73 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:32 +1000 Subject: [PATCH 14/37] [PATCH] Fix apparent code overlap in ppc64 head.S An #if/#else construct near the top of ppc64's head.S appears to create overlapping sections of code for iSeries and pSeries (i.e. one thing on iSeries and something different in the same place on pSeries). In fact, checking the various absolute offsets, it doesn't. This patch unravels the #ifdefs to make it more obvious what's going on. This accomplishes another microstep towards a single kernel image which can boot both iSeries and pSeries. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 70e10403b697..1cc9f203c22f 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -93,6 +93,7 @@ END_FTR_SECTION(0, 1) /* Catch branch to 0 in real mode */ trap + #ifdef CONFIG_PPC_ISERIES /* * At offset 0x20, there is a pointer to iSeries LPAR data. @@ -119,7 +120,7 @@ embedded_sysmap_start: embedded_sysmap_end: .llong 0 -#else /* CONFIG_PPC_ISERIES */ +#endif /* CONFIG_PPC_ISERIES */ /* Secondary processors spin on this value until it goes to 1. */ .globl __secondary_hold_spinloop @@ -169,7 +170,6 @@ _GLOBAL(__secondary_hold) BUG_OPCODE #endif #endif -#endif /* This value is used to mark exception frames on the stack. */ .section ".toc","aw" From 1d086e6bd605ac44154e019fe96ae3568e8b2ba2 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:32 +1000 Subject: [PATCH 15/37] [PATCH] Remove unneeded #defines in head.S arch/ppc64/kernel/head.S #defines SECONDARY_PROCESSORS then has some #ifdefs based on it. Whatever purpose this had is long lost, this patch removes it. Likewise, head.S defines H_SET_ASR, which is now defined, along with other hypervisor call numbers in hvcall.h. This patch deletes it, as well, from head.S. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index 1cc9f203c22f..bf2345eb133c 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -23,8 +23,6 @@ * 2 of the License, or (at your option) any later version. */ -#define SECONDARY_PROCESSORS - #include #include #include @@ -43,11 +41,6 @@ #define DO_SOFT_DISABLE #endif -/* - * hcall interface to pSeries LPAR - */ -#define H_SET_ASR 0x30 - /* * We layout physical memory as follows: * 0x0000 - 0x00ff : Secondary processor spin code @@ -629,9 +622,7 @@ system_reset_iSeries: cmpwi 0,r23,0 beq iSeries_secondary_smp_loop /* Loop until told to go */ -#ifdef SECONDARY_PROCESSORS bne .__secondary_start /* Loop until told to go */ -#endif iSeries_secondary_smp_loop: /* Let the Hypervisor know we are alive */ /* 8002 is a call to HvCallCfg::getLps, a harmless Hypervisor function */ @@ -1325,9 +1316,7 @@ _GLOBAL(pSeries_secondary_smp_init) cmpwi 0,r23,0 #ifdef CONFIG_SMP -#ifdef SECONDARY_PROCESSORS bne .__secondary_start -#endif #endif b 3b /* Loop until told to go */ From 91a57fc6723d778e12686b5106a38583072fd767 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:32 +1000 Subject: [PATCH 16/37] [PATCH] Tweak comments in ppc64 head.S This patch adjust some comments in head.S for accuracy, clarity, and spelling. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index bf2345eb133c..c7462fae7662 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -148,7 +148,7 @@ _GLOBAL(__secondary_hold) std r24,__secondary_hold_acknowledge@l(0) sync - /* All secondary cpu's wait here until told to start. */ + /* All secondary cpus wait here until told to start. */ 100: ld r4,__secondary_hold_spinloop@l(0) cmpdi 0,r4,1 bne 100b @@ -703,8 +703,8 @@ machine_check_common: * R9 contains the saved CR, r13 points to the paca, * r10 contains the (bad) kernel stack pointer, * r11 and r12 contain the saved SRR0 and SRR1. - * We switch to using the paca guard page as an emergency stack, - * save the registers there, and call kernel_bad_stack(), which panics. + * We switch to using an emergency stack, save the registers there, + * and call kernel_bad_stack(), which panics. */ bad_stack: ld r1,PACAEMERGSP(r13) @@ -1303,7 +1303,7 @@ _GLOBAL(pSeries_secondary_smp_init) b .kexec_wait /* next kernel might do better */ 2: mtspr SPRG3,r13 /* Save vaddr of paca in SPRG3 */ - /* From now on, r24 is expected to be logica cpuid */ + /* From now on, r24 is expected to be logical cpuid */ mr r24,r5 3: HMT_LOW lbz r23,PACAPROCSTART(r13) /* Test if this processor should */ From 6fbb49d56d228b666cb4534bbc3c2dfe833c8053 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Fri, 19 Aug 2005 14:52:32 +1000 Subject: [PATCH 17/37] [PATCH] Move variables in ppc64 head.S from .data to .bss The ppc64 head.S defines several zero-initialized structures, such as the empty_zero_page and the kernel top-level pagetable. Currently they are defined to be in the data section. However, they're not used until after the bss is cleared, so this patch moves them to the bss, saving two and a half pages from the vmlinux. Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index c7462fae7662..a0ff707d6fea 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -1971,20 +1971,19 @@ _GLOBAL(smp_release_cpus) /* * We put a few things here that have to be page-aligned. - * This stuff goes at the beginning of the data segment, - * which is page-aligned. + * This stuff goes at the beginning of the bss, which is page-aligned. */ - .data + .section ".bss" + .align 12 - .globl sdata -sdata: + .globl empty_zero_page empty_zero_page: - .space 4096 + .space PAGE_SIZE .globl swapper_pg_dir swapper_pg_dir: - .space 4096 + .space PAGE_SIZE /* * This space gets a copy of optional info passed to us by the bootstrap From 7a6af5e38054d8e658a4b1b703902331a845de1a Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:32:30 +1000 Subject: [PATCH 18/37] [PATCH] ppc64: remove firmware features from cpu_spec The firmware_features field of struct cpu_spec should really be a separate variable as the firmware features do not depend on the chip and the bitmask is constructed independently. By removing it, we save 112 bytes from the cpu_specs array and we access the bitmask directly instead of via the cur_cpu_spec pointer. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/cputable.c | 16 +--------------- arch/ppc64/kernel/lparcfg.c | 4 ++-- arch/ppc64/kernel/pSeries_iommu.c | 2 +- arch/ppc64/kernel/pSeries_setup.c | 12 ++++++------ arch/ppc64/kernel/pSeries_smp.c | 2 +- arch/ppc64/kernel/process.c | 2 +- arch/ppc64/kernel/sysfs.c | 2 +- arch/ppc64/kernel/time.c | 2 +- include/asm-ppc64/cputable.h | 10 +++++----- 9 files changed, 19 insertions(+), 33 deletions(-) diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 77cec42f9525..84fdd27498a5 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -23,6 +23,7 @@ struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); +unsigned long ppc64_firmware_features; /* NOTE: * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's @@ -60,7 +61,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* Power3+ */ .pvr_mask = 0xffff0000, @@ -73,7 +73,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* Northstar */ .pvr_mask = 0xffff0000, @@ -86,7 +85,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* Pulsar */ .pvr_mask = 0xffff0000, @@ -99,7 +97,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* I-star */ .pvr_mask = 0xffff0000, @@ -112,7 +109,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* S-star */ .pvr_mask = 0xffff0000, @@ -125,7 +121,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power3, - .firmware_features = COMMON_PPC64_FW, }, { /* Power4 */ .pvr_mask = 0xffff0000, @@ -138,7 +133,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, }, { /* Power4+ */ .pvr_mask = 0xffff0000, @@ -151,7 +145,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, }, { /* PPC970 */ .pvr_mask = 0xffff0000, @@ -166,7 +159,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, - .firmware_features = COMMON_PPC64_FW, }, { /* PPC970FX */ .pvr_mask = 0xffff0000, @@ -181,7 +173,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, - .firmware_features = COMMON_PPC64_FW, }, { /* PPC970MP */ .pvr_mask = 0xffff0000, @@ -196,7 +187,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_ppc970, - .firmware_features = COMMON_PPC64_FW, }, { /* Power5 */ .pvr_mask = 0xffff0000, @@ -211,7 +201,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, }, { /* Power5 */ .pvr_mask = 0xffff0000, @@ -226,7 +215,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, }, { /* BE DD1.x */ .pvr_mask = 0xffff0000, @@ -241,7 +229,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_be, - .firmware_features = COMMON_PPC64_FW, }, { /* default match */ .pvr_mask = 0x00000000, @@ -254,7 +241,6 @@ struct cpu_spec cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .cpu_setup = __setup_cpu_power4, - .firmware_features = COMMON_PPC64_FW, } }; diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 02e96627fa66..938353848cd0 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -377,7 +377,7 @@ static int lparcfg_data(struct seq_file *m, void *v) partition_active_processors = lparcfg_count_active_processors(); - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { unsigned long h_entitled, h_unallocated; unsigned long h_aggregation, h_resource; unsigned long pool_idle_time, pool_procs; @@ -571,7 +571,7 @@ int __init lparcfg_init(void) mode_t mode = S_IRUSR; /* Allow writing if we have FW_FEATURE_SPLPAR */ - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { lparcfg_fops.write = lparcfg_write; mode |= S_IWUSR; } diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c index 69130522a87e..a5786be9c654 100644 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ b/arch/ppc64/kernel/pSeries_iommu.c @@ -546,7 +546,7 @@ void iommu_init_early_pSeries(void) } if (systemcfg->platform & PLATFORM_LPAR) { - if (cur_cpu_spec->firmware_features & FW_FEATURE_MULTITCE) { + if (ppc64_firmware_features & FW_FEATURE_MULTITCE) { ppc_md.tce_build = tce_buildmulti_pSeriesLP; ppc_md.tce_free = tce_freemulti_pSeriesLP; } else { diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 5bec956e44a0..d3975ac71cfb 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -231,11 +231,11 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) vpa_init(boot_cpuid); /* Choose an idle loop */ - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { if (get_paca()->lppaca.shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); ppc_md.idle_loop = pseries_shared_idle; @@ -260,7 +260,7 @@ static int __init pSeries_init_panel(void) arch_initcall(pSeries_init_panel); -/* Build up the firmware_features bitmask field +/* Build up the ppc64_firmware_features bitmask field * using contents of device-tree/ibm,hypertas-functions. * Ultimately this functionality may be moved into prom.c prom_init(). */ @@ -272,7 +272,7 @@ void __init fw_feature_init(void) DBG(" -> fw_feature_init()\n"); - cur_cpu_spec->firmware_features = 0; + ppc64_firmware_features = 0; dn = of_find_node_by_path("/rtas"); if (dn == NULL) { printk(KERN_ERR "WARNING ! Cannot find RTAS in device-tree !\n"); @@ -288,7 +288,7 @@ void __init fw_feature_init(void) if ((firmware_features_table[i].name) && (strcmp(firmware_features_table[i].name,hypertas))==0) { /* we have a match */ - cur_cpu_spec->firmware_features |= + ppc64_firmware_features |= (firmware_features_table[i].val); break; } @@ -302,7 +302,7 @@ void __init fw_feature_init(void) of_node_put(dn); no_rtas: printk(KERN_INFO "firmware_features = 0x%lx\n", - cur_cpu_spec->firmware_features); + ppc64_firmware_features); DBG(" <- fw_feature_init()\n"); } diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 62c55a123560..8312d324aaae 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -326,7 +326,7 @@ static void __devinit smp_xics_setup_cpu(int cpu) if (cpu != boot_cpuid) xics_setup_cpu(); - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) vpa_init(cpu); cpu_clear(cpu, of_spin_map); diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index f7cae05e40fb..390f434283af 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -206,7 +206,7 @@ struct task_struct *__switch_to(struct task_struct *prev, /* purr is nothing but processor time base */ #if defined(CONFIG_PPC_PSERIES) - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index 02b8ac4e0168..90b653c724fa 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -154,7 +154,7 @@ void ppc64_enable_pmcs(void) #ifdef CONFIG_PPC_PSERIES /* instruct hypervisor to maintain PMCs */ - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) get_paca()->lppaca.pmcregs_in_use = 1; #endif /* CONFIG_PPC_PSERIES */ } diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 909462e1adea..1c05cee05315 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -372,7 +372,7 @@ int timer_interrupt(struct pt_regs * regs) /* collect purr register values often, for accurate calculations */ #if defined(CONFIG_PPC_PSERIES) - if (cur_cpu_spec->firmware_features & FW_FEATURE_SPLPAR) { + if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h index d67fa9e26079..d55698a60e75 100644 --- a/include/asm-ppc64/cputable.h +++ b/include/asm-ppc64/cputable.h @@ -56,11 +56,6 @@ struct cpu_spec { * BHT, SPD, etc... from head.S before branching to identify_machine */ cpu_setup_t cpu_setup; - - /* This is used to identify firmware features which are available - * to the kernel. - */ - unsigned long firmware_features; }; extern struct cpu_spec cpu_specs[]; @@ -72,6 +67,11 @@ static inline unsigned long cpu_has_feature(unsigned long feature) } +/* This is used to identify firmware features which are available + * to the kernel. + */ +extern unsigned long ppc64_firmware_features; + /* firmware feature bitmask values */ #define FIRMWARE_MAX_FEATURES 63 From 1ababe11480d59d75be806804c71fa55d203a5a6 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:35:25 +1000 Subject: [PATCH 19/37] [PATCH] ppc64: create firmware_has_feature() Create the firmware_has_feature() inline and move the firmware feature stuff into its own header file. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/cputable.c | 1 + arch/ppc64/kernel/lparcfg.c | 6 +-- arch/ppc64/kernel/pSeries_iommu.c | 3 +- arch/ppc64/kernel/pSeries_setup.c | 6 +-- arch/ppc64/kernel/pSeries_smp.c | 3 +- arch/ppc64/kernel/process.c | 3 +- arch/ppc64/kernel/sysfs.c | 3 +- arch/ppc64/kernel/time.c | 3 +- include/asm-ppc64/cputable.h | 47 ++-------------------- include/asm-ppc64/firmware.h | 67 +++++++++++++++++++++++++++++++ 10 files changed, 87 insertions(+), 55 deletions(-) create mode 100644 include/asm-ppc64/firmware.h diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 84fdd27498a5..6294fc7bd442 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -20,6 +20,7 @@ #include #include +#include struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); diff --git a/arch/ppc64/kernel/lparcfg.c b/arch/ppc64/kernel/lparcfg.c index 938353848cd0..9d034ff062b1 100644 --- a/arch/ppc64/kernel/lparcfg.c +++ b/arch/ppc64/kernel/lparcfg.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include @@ -377,7 +377,7 @@ static int lparcfg_data(struct seq_file *m, void *v) partition_active_processors = lparcfg_count_active_processors(); - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { unsigned long h_entitled, h_unallocated; unsigned long h_aggregation, h_resource; unsigned long pool_idle_time, pool_procs; @@ -571,7 +571,7 @@ int __init lparcfg_init(void) mode_t mode = S_IRUSR; /* Allow writing if we have FW_FEATURE_SPLPAR */ - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { lparcfg_fops.write = lparcfg_write; mode |= S_IWUSR; } diff --git a/arch/ppc64/kernel/pSeries_iommu.c b/arch/ppc64/kernel/pSeries_iommu.c index a5786be9c654..9d5e1e7fc389 100644 --- a/arch/ppc64/kernel/pSeries_iommu.c +++ b/arch/ppc64/kernel/pSeries_iommu.c @@ -45,6 +45,7 @@ #include #include #include +#include #include "pci.h" #define DBG(fmt...) @@ -546,7 +547,7 @@ void iommu_init_early_pSeries(void) } if (systemcfg->platform & PLATFORM_LPAR) { - if (ppc64_firmware_features & FW_FEATURE_MULTITCE) { + if (firmware_has_feature(FW_FEATURE_MULTITCE)) { ppc_md.tce_build = tce_buildmulti_pSeriesLP; ppc_md.tce_free = tce_freemulti_pSeriesLP; } else { diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index d3975ac71cfb..0058f32a3d89 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include "i8259.h" #include "mpic.h" @@ -231,11 +231,11 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) + if (firmware_has_feature(FW_FEATURE_SPLPAR)) vpa_init(boot_cpuid); /* Choose an idle loop */ - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { if (get_paca()->lppaca.shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); ppc_md.idle_loop = pseries_shared_idle; diff --git a/arch/ppc64/kernel/pSeries_smp.c b/arch/ppc64/kernel/pSeries_smp.c index 8312d324aaae..79c7f3223665 100644 --- a/arch/ppc64/kernel/pSeries_smp.c +++ b/arch/ppc64/kernel/pSeries_smp.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include @@ -326,7 +327,7 @@ static void __devinit smp_xics_setup_cpu(int cpu) if (cpu != boot_cpuid) xics_setup_cpu(); - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) + if (firmware_has_feature(FW_FEATURE_SPLPAR)) vpa_init(cpu); cpu_clear(cpu, of_spin_map); diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index 390f434283af..9bad983333b1 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -206,7 +207,7 @@ struct task_struct *__switch_to(struct task_struct *prev, /* purr is nothing but processor time base */ #if defined(CONFIG_PPC_PSERIES) - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; start_tb = old_thread->start_tb; diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index 90b653c724fa..e399963521c0 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -154,7 +155,7 @@ void ppc64_enable_pmcs(void) #ifdef CONFIG_PPC_PSERIES /* instruct hypervisor to maintain PMCs */ - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) + if (firmware_has_feature(FW_FEATURE_SPLPAR)) get_paca()->lppaca.pmcregs_in_use = 1; #endif /* CONFIG_PPC_PSERIES */ } diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index 1c05cee05315..d523056fd660 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -67,6 +67,7 @@ #include #include #include +#include u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; @@ -372,7 +373,7 @@ int timer_interrupt(struct pt_regs * regs) /* collect purr register values often, for accurate calculations */ #if defined(CONFIG_PPC_PSERIES) - if (ppc64_firmware_features & FW_FEATURE_SPLPAR) { + if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } diff --git a/include/asm-ppc64/cputable.h b/include/asm-ppc64/cputable.h index d55698a60e75..ae6cf3830108 100644 --- a/include/asm-ppc64/cputable.h +++ b/include/asm-ppc64/cputable.h @@ -66,44 +66,6 @@ static inline unsigned long cpu_has_feature(unsigned long feature) return cur_cpu_spec->cpu_features & feature; } - -/* This is used to identify firmware features which are available - * to the kernel. - */ -extern unsigned long ppc64_firmware_features; - -/* firmware feature bitmask values */ -#define FIRMWARE_MAX_FEATURES 63 - -#define FW_FEATURE_PFT (1UL<<0) -#define FW_FEATURE_TCE (1UL<<1) -#define FW_FEATURE_SPRG0 (1UL<<2) -#define FW_FEATURE_DABR (1UL<<3) -#define FW_FEATURE_COPY (1UL<<4) -#define FW_FEATURE_ASR (1UL<<5) -#define FW_FEATURE_DEBUG (1UL<<6) -#define FW_FEATURE_TERM (1UL<<7) -#define FW_FEATURE_PERF (1UL<<8) -#define FW_FEATURE_DUMP (1UL<<9) -#define FW_FEATURE_INTERRUPT (1UL<<10) -#define FW_FEATURE_MIGRATE (1UL<<11) -#define FW_FEATURE_PERFMON (1UL<<12) -#define FW_FEATURE_CRQ (1UL<<13) -#define FW_FEATURE_VIO (1UL<<14) -#define FW_FEATURE_RDMA (1UL<<15) -#define FW_FEATURE_LLAN (1UL<<16) -#define FW_FEATURE_BULK (1UL<<17) -#define FW_FEATURE_XDABR (1UL<<18) -#define FW_FEATURE_MULTITCE (1UL<<19) -#define FW_FEATURE_SPLPAR (1UL<<20) - -typedef struct { - unsigned long val; - char * name; -} firmware_feature_t; - -extern firmware_feature_t firmware_features_table[]; - #endif /* __ASSEMBLY__ */ /* CPU kernel features */ @@ -140,10 +102,8 @@ extern firmware_feature_t firmware_features_table[]; #define CPU_FTR_MMCRA_SIHV ASM_CONST(0x0000080000000000) #define CPU_FTR_CTRL ASM_CONST(0x0000100000000000) -/* Platform firmware features */ -#define FW_FTR_ ASM_CONST(0x0000000000000001) - #ifndef __ASSEMBLY__ + #define COMMON_USER_PPC64 (PPC_FEATURE_32 | PPC_FEATURE_64 | \ PPC_FEATURE_HAS_FPU | PPC_FEATURE_HAS_MMU) @@ -156,10 +116,9 @@ extern firmware_feature_t firmware_features_table[]; #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE) #else #define CPU_FTR_PPCAS_ARCH_V2 (CPU_FTR_PPCAS_ARCH_V2_BASE | CPU_FTR_16M_PAGE) -#endif +#endif /* CONFIG_PPC_ISERIES */ -#define COMMON_PPC64_FW (0) -#endif +#endif /* __ASSEMBLY */ #ifdef __ASSEMBLY__ diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h new file mode 100644 index 000000000000..5bb5bf46bb4a --- /dev/null +++ b/include/asm-ppc64/firmware.h @@ -0,0 +1,67 @@ +/* + * include/asm-ppc64/firmware.h + * + * Extracted from include/asm-ppc64/cputable.h + * + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#ifndef __ASM_PPC_FIRMWARE_H +#define __ASM_PPC_FIRMWARE_H + +#ifdef __KERNEL__ + +#ifndef __ASSEMBLY__ + +/* firmware feature bitmask values */ +#define FIRMWARE_MAX_FEATURES 63 + +#define FW_FEATURE_PFT (1UL<<0) +#define FW_FEATURE_TCE (1UL<<1) +#define FW_FEATURE_SPRG0 (1UL<<2) +#define FW_FEATURE_DABR (1UL<<3) +#define FW_FEATURE_COPY (1UL<<4) +#define FW_FEATURE_ASR (1UL<<5) +#define FW_FEATURE_DEBUG (1UL<<6) +#define FW_FEATURE_TERM (1UL<<7) +#define FW_FEATURE_PERF (1UL<<8) +#define FW_FEATURE_DUMP (1UL<<9) +#define FW_FEATURE_INTERRUPT (1UL<<10) +#define FW_FEATURE_MIGRATE (1UL<<11) +#define FW_FEATURE_PERFMON (1UL<<12) +#define FW_FEATURE_CRQ (1UL<<13) +#define FW_FEATURE_VIO (1UL<<14) +#define FW_FEATURE_RDMA (1UL<<15) +#define FW_FEATURE_LLAN (1UL<<16) +#define FW_FEATURE_BULK (1UL<<17) +#define FW_FEATURE_XDABR (1UL<<18) +#define FW_FEATURE_MULTITCE (1UL<<19) +#define FW_FEATURE_SPLPAR (1UL<<20) + +/* This is used to identify firmware features which are available + * to the kernel. + */ +extern unsigned long ppc64_firmware_features; + +static inline unsigned long firmware_has_feature(unsigned long feature) +{ + return ppc64_firmware_features & feature; +} + +typedef struct { + unsigned long val; + char * name; +} firmware_feature_t; + +extern firmware_feature_t firmware_features_table[]; + +#endif /* __ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* __ASM_PPC_FIRMWARE_H */ From 8d15a3e55f49678b0900dcf5c1cddb322a129325 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:40:16 +1000 Subject: [PATCH 20/37] [PATCH] ppc64: make firmware_has_feature() stronger Make firmware_has_feature() evaluate at compile time for the non pSeries case and tidy up code where possible. Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/Makefile | 2 +- arch/ppc64/kernel/cputable.c | 27 +----------------- arch/ppc64/kernel/firmware.c | 47 +++++++++++++++++++++++++++++++ arch/ppc64/kernel/pSeries_setup.c | 4 +-- arch/ppc64/kernel/process.c | 9 ++---- arch/ppc64/kernel/sysfs.c | 2 -- arch/ppc64/kernel/time.c | 4 +-- include/asm-ppc64/firmware.h | 20 ++++++++++++- 8 files changed, 73 insertions(+), 42 deletions(-) create mode 100644 arch/ppc64/kernel/firmware.c diff --git a/arch/ppc64/kernel/Makefile b/arch/ppc64/kernel/Makefile index cbf87dcac92a..f4b3bfcc109d 100644 --- a/arch/ppc64/kernel/Makefile +++ b/arch/ppc64/kernel/Makefile @@ -11,7 +11,7 @@ obj-y := setup.o entry.o traps.o irq.o idle.o dma.o \ udbg.o binfmt_elf32.o sys_ppc32.o ioctl32.o \ ptrace32.o signal32.o rtc.o init_task.o \ lmb.o cputable.o cpu_setup_power4.o idle_power4.o \ - iommu.o sysfs.o vdso.o pmc.o + iommu.o sysfs.o vdso.o pmc.o firmware.o obj-y += vdso32/ vdso64/ obj-$(CONFIG_PPC_OF) += of_device.o diff --git a/arch/ppc64/kernel/cputable.c b/arch/ppc64/kernel/cputable.c index 6294fc7bd442..4847f2ac8c9f 100644 --- a/arch/ppc64/kernel/cputable.c +++ b/arch/ppc64/kernel/cputable.c @@ -5,7 +5,7 @@ * * Modifications for ppc64: * Copyright (C) 2003 Dave Engebretsen - * + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -20,11 +20,9 @@ #include #include -#include struct cpu_spec* cur_cpu_spec = NULL; EXPORT_SYMBOL(cur_cpu_spec); -unsigned long ppc64_firmware_features; /* NOTE: * Unlike ppc32, ppc64 will only call this once for the boot CPU, it's @@ -244,26 +242,3 @@ struct cpu_spec cpu_specs[] = { .cpu_setup = __setup_cpu_power4, } }; - -firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { - {FW_FEATURE_PFT, "hcall-pft"}, - {FW_FEATURE_TCE, "hcall-tce"}, - {FW_FEATURE_SPRG0, "hcall-sprg0"}, - {FW_FEATURE_DABR, "hcall-dabr"}, - {FW_FEATURE_COPY, "hcall-copy"}, - {FW_FEATURE_ASR, "hcall-asr"}, - {FW_FEATURE_DEBUG, "hcall-debug"}, - {FW_FEATURE_PERF, "hcall-perf"}, - {FW_FEATURE_DUMP, "hcall-dump"}, - {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, - {FW_FEATURE_MIGRATE, "hcall-migrate"}, - {FW_FEATURE_PERFMON, "hcall-perfmon"}, - {FW_FEATURE_CRQ, "hcall-crq"}, - {FW_FEATURE_VIO, "hcall-vio"}, - {FW_FEATURE_RDMA, "hcall-rdma"}, - {FW_FEATURE_LLAN, "hcall-lLAN"}, - {FW_FEATURE_BULK, "hcall-bulk"}, - {FW_FEATURE_XDABR, "hcall-xdabr"}, - {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, - {FW_FEATURE_SPLPAR, "hcall-splpar"}, -}; diff --git a/arch/ppc64/kernel/firmware.c b/arch/ppc64/kernel/firmware.c new file mode 100644 index 000000000000..d8432c0fb27d --- /dev/null +++ b/arch/ppc64/kernel/firmware.c @@ -0,0 +1,47 @@ +/* + * arch/ppc64/kernel/firmware.c + * + * Extracted from cputable.c + * + * Copyright (C) 2001 Ben. Herrenschmidt (benh@kernel.crashing.org) + * + * Modifications for ppc64: + * Copyright (C) 2003 Dave Engebretsen + * Copyright (C) 2005 Stephen Rothwell, IBM Corporation + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include + +#include + +unsigned long ppc64_firmware_features; + +#ifdef CONFIG_PPC_PSERIES +firmware_feature_t firmware_features_table[FIRMWARE_MAX_FEATURES] = { + {FW_FEATURE_PFT, "hcall-pft"}, + {FW_FEATURE_TCE, "hcall-tce"}, + {FW_FEATURE_SPRG0, "hcall-sprg0"}, + {FW_FEATURE_DABR, "hcall-dabr"}, + {FW_FEATURE_COPY, "hcall-copy"}, + {FW_FEATURE_ASR, "hcall-asr"}, + {FW_FEATURE_DEBUG, "hcall-debug"}, + {FW_FEATURE_PERF, "hcall-perf"}, + {FW_FEATURE_DUMP, "hcall-dump"}, + {FW_FEATURE_INTERRUPT, "hcall-interrupt"}, + {FW_FEATURE_MIGRATE, "hcall-migrate"}, + {FW_FEATURE_PERFMON, "hcall-perfmon"}, + {FW_FEATURE_CRQ, "hcall-crq"}, + {FW_FEATURE_VIO, "hcall-vio"}, + {FW_FEATURE_RDMA, "hcall-rdma"}, + {FW_FEATURE_LLAN, "hcall-lLAN"}, + {FW_FEATURE_BULK, "hcall-bulk"}, + {FW_FEATURE_XDABR, "hcall-xdabr"}, + {FW_FEATURE_MULTITCE, "hcall-multi-tce"}, + {FW_FEATURE_SPLPAR, "hcall-splpar"}, +}; +#endif diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 0058f32a3d89..7ae7a2ca1085 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -231,11 +231,9 @@ static void __init pSeries_setup_arch(void) pSeries_nvram_init(); - if (firmware_has_feature(FW_FEATURE_SPLPAR)) - vpa_init(boot_cpuid); - /* Choose an idle loop */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { + vpa_init(boot_cpuid); if (get_paca()->lppaca.shared_proc) { printk(KERN_INFO "Using shared processor idle loop\n"); ppc_md.idle_loop = pseries_shared_idle; diff --git a/arch/ppc64/kernel/process.c b/arch/ppc64/kernel/process.c index 9bad983333b1..7a7e027653ad 100644 --- a/arch/ppc64/kernel/process.c +++ b/arch/ppc64/kernel/process.c @@ -203,10 +203,9 @@ struct task_struct *__switch_to(struct task_struct *prev, new_thread = &new->thread; old_thread = ¤t->thread; -/* Collect purr utilization data per process and per processor wise */ -/* purr is nothing but processor time base */ - -#if defined(CONFIG_PPC_PSERIES) + /* Collect purr utilization data per process and per processor + * wise purr is nothing but processor time base + */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); long unsigned start_tb, current_tb; @@ -215,8 +214,6 @@ struct task_struct *__switch_to(struct task_struct *prev, old_thread->accum_tb += (current_tb - start_tb); new_thread->start_tb = current_tb; } -#endif - local_irq_save(flags); last = _switch(old_thread, new_thread); diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index e399963521c0..eca15d25e026 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -153,11 +153,9 @@ void ppc64_enable_pmcs(void) break; } -#ifdef CONFIG_PPC_PSERIES /* instruct hypervisor to maintain PMCs */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) get_paca()->lppaca.pmcregs_in_use = 1; -#endif /* CONFIG_PPC_PSERIES */ } #else diff --git a/arch/ppc64/kernel/time.c b/arch/ppc64/kernel/time.c index d523056fd660..1696e1b05bb9 100644 --- a/arch/ppc64/kernel/time.c +++ b/arch/ppc64/kernel/time.c @@ -371,13 +371,11 @@ int timer_interrupt(struct pt_regs * regs) process_hvlpevents(regs); #endif -/* collect purr register values often, for accurate calculations */ -#if defined(CONFIG_PPC_PSERIES) + /* collect purr register values often, for accurate calculations */ if (firmware_has_feature(FW_FEATURE_SPLPAR)) { struct cpu_usage *cu = &__get_cpu_var(cpu_usage_array); cu->current_tb = mfspr(SPRN_PURR); } -#endif irq_exit(); diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h index 5bb5bf46bb4a..e3725f3c8ea6 100644 --- a/include/asm-ppc64/firmware.h +++ b/include/asm-ppc64/firmware.h @@ -45,6 +45,22 @@ #define FW_FEATURE_MULTITCE (1UL<<19) #define FW_FEATURE_SPLPAR (1UL<<20) +enum { + FW_FEATURE_PSERIES = FW_FEATURE_PFT | FW_FEATURE_TCE | + FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | + FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | + FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | + FW_FEATURE_MIGRATE | FW_FEATURE_PERFMON | FW_FEATURE_CRQ | + FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | + FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | + FW_FEATURE_SPLPAR, + FW_FEATURE_POSSIBLE = +#ifdef CONFIG_PPC_PSERIES + FW_FEATURE_PSERIES | +#endif + 0, +}; + /* This is used to identify firmware features which are available * to the kernel. */ @@ -52,15 +68,17 @@ extern unsigned long ppc64_firmware_features; static inline unsigned long firmware_has_feature(unsigned long feature) { - return ppc64_firmware_features & feature; + return ppc64_firmware_features & feature & FW_FEATURE_POSSIBLE; } +#ifdef CONFIG_PPC_PSERIES typedef struct { unsigned long val; char * name; } firmware_feature_t; extern firmware_feature_t firmware_features_table[]; +#endif #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ From aed31351941aa990fb0865c186565a589c56d3fe Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 3 Aug 2005 14:43:21 +1000 Subject: [PATCH 21/37] [PATCH] ppc64: introduce FW_FEATURE_ISERIES Signed-off-by: Stephen Rothwell Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_setup.c | 3 +++ arch/ppc64/kernel/pSeries_lpar.c | 1 - arch/ppc64/kernel/pSeries_setup.c | 2 +- include/asm-ppc64/firmware.h | 22 +++++++++++++++++++--- 4 files changed, 23 insertions(+), 5 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index a649edbb23b6..460e7df681a1 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include "iSeries_setup.h" @@ -314,6 +315,8 @@ static void __init iSeries_init_early(void) DBG(" -> iSeries_init_early()\n"); + ppc64_firmware_features = FW_FEATURE_ISERIES; + ppcdbg_initialize(); #if defined(CONFIG_BLK_DEV_INITRD) diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c index 74dd144dcce8..56845543c891 100644 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ b/arch/ppc64/kernel/pSeries_lpar.c @@ -52,7 +52,6 @@ EXPORT_SYMBOL(plpar_hcall_4out); EXPORT_SYMBOL(plpar_hcall_norets); EXPORT_SYMBOL(plpar_hcall_8arg_2ret); -extern void fw_feature_init(void); extern void pSeries_find_serial_port(void); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 7ae7a2ca1085..54e0651ba3fd 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -262,7 +262,7 @@ arch_initcall(pSeries_init_panel); * using contents of device-tree/ibm,hypertas-functions. * Ultimately this functionality may be moved into prom.c prom_init(). */ -void __init fw_feature_init(void) +static void __init fw_feature_init(void) { struct device_node * dn; char * hypertas; diff --git a/include/asm-ppc64/firmware.h b/include/asm-ppc64/firmware.h index e3725f3c8ea6..22bb85cf60af 100644 --- a/include/asm-ppc64/firmware.h +++ b/include/asm-ppc64/firmware.h @@ -44,9 +44,10 @@ #define FW_FEATURE_XDABR (1UL<<18) #define FW_FEATURE_MULTITCE (1UL<<19) #define FW_FEATURE_SPLPAR (1UL<<20) +#define FW_FEATURE_ISERIES (1UL<<21) enum { - FW_FEATURE_PSERIES = FW_FEATURE_PFT | FW_FEATURE_TCE | + FW_FEATURE_PSERIES_POSSIBLE = FW_FEATURE_PFT | FW_FEATURE_TCE | FW_FEATURE_SPRG0 | FW_FEATURE_DABR | FW_FEATURE_COPY | FW_FEATURE_ASR | FW_FEATURE_DEBUG | FW_FEATURE_TERM | FW_FEATURE_PERF | FW_FEATURE_DUMP | FW_FEATURE_INTERRUPT | @@ -54,11 +55,25 @@ enum { FW_FEATURE_VIO | FW_FEATURE_RDMA | FW_FEATURE_LLAN | FW_FEATURE_BULK | FW_FEATURE_XDABR | FW_FEATURE_MULTITCE | FW_FEATURE_SPLPAR, + FW_FEATURE_PSERIES_ALWAYS = 0, + FW_FEATURE_ISERIES_POSSIBLE = FW_FEATURE_ISERIES, + FW_FEATURE_ISERIES_ALWAYS = FW_FEATURE_ISERIES, FW_FEATURE_POSSIBLE = #ifdef CONFIG_PPC_PSERIES - FW_FEATURE_PSERIES | + FW_FEATURE_PSERIES_POSSIBLE | +#endif +#ifdef CONFIG_PPC_ISERIES + FW_FEATURE_ISERIES_POSSIBLE | #endif 0, + FW_FEATURE_ALWAYS = +#ifdef CONFIG_PPC_PSERIES + FW_FEATURE_PSERIES_ALWAYS & +#endif +#ifdef CONFIG_PPC_ISERIES + FW_FEATURE_ISERIES_ALWAYS & +#endif + FW_FEATURE_POSSIBLE, }; /* This is used to identify firmware features which are available @@ -68,7 +83,8 @@ extern unsigned long ppc64_firmware_features; static inline unsigned long firmware_has_feature(unsigned long feature) { - return ppc64_firmware_features & feature & FW_FEATURE_POSSIBLE; + return (FW_FEATURE_ALWAYS & feature) || + (FW_FEATURE_POSSIBLE & ppc64_firmware_features & feature); } #ifdef CONFIG_PPC_PSERIES From 38e85dc18036804ada8698951cfad4e6114fec1b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH 22/37] [PATCH] ppc64: Remove PTRRELOC() from msChunks code The msChunks code was written to work on pSeries, but now it's only used on iSeries. This means there's no need to do PTRRELOC anymore, so remove it all. A few places were getting "extern reloc_offset()" from abs_addr.h, move it into system.h instead. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/LparData.c | 10 +--------- include/asm-ppc64/abs_addr.h | 36 ++++++++++-------------------------- include/asm-ppc64/system.h | 2 ++ 3 files changed, 13 insertions(+), 35 deletions(-) diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c index 3b9a2600fec2..0ed77b2f7d5f 100644 --- a/arch/ppc64/kernel/LparData.c +++ b/arch/ppc64/kernel/LparData.c @@ -229,24 +229,16 @@ struct ItVpdAreas itVpdAreas = { struct msChunks msChunks; EXPORT_SYMBOL(msChunks); -/* Depending on whether this is called from iSeries or pSeries setup - * code, the location of the msChunks struct may or may not have - * to be reloc'd, so we force the caller to do that for us by passing - * in a pointer to the structure. - */ unsigned long msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - _msChunks->num_chunks = num_chunks; _msChunks->chunk_size = chunk_size; _msChunks->chunk_shift = __ilog2(chunk_size); _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1; mem = _ALIGN(mem, sizeof(msChunks_entry)); - _msChunks->abs = (msChunks_entry *)(mem + offset); + _msChunks->abs = (msChunks_entry *)mem; mem += num_chunks * sizeof(msChunks_entry); return mem; diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 6d4e8e787058..93dc63ed4f2f 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -29,46 +29,30 @@ struct msChunks { extern struct msChunks msChunks; extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); -extern unsigned long reloc_offset(void); #ifdef CONFIG_MSCHUNKS -static inline unsigned long -chunk_to_addr(unsigned long chunk) +static inline unsigned long chunk_to_addr(unsigned long chunk) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return chunk << _msChunks->chunk_shift; + return chunk << msChunks.chunk_shift; } -static inline unsigned long -addr_to_chunk(unsigned long addr) +static inline unsigned long addr_to_chunk(unsigned long addr) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return addr >> _msChunks->chunk_shift; + return addr >> msChunks.chunk_shift; } -static inline unsigned long -chunk_offset(unsigned long addr) +static inline unsigned long chunk_offset(unsigned long addr) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - - return addr & _msChunks->chunk_mask; + return addr & msChunks.chunk_mask; } -static inline unsigned long -abs_chunk(unsigned long pchunk) +static inline unsigned long abs_chunk(unsigned long pchunk) { - unsigned long offset = reloc_offset(); - struct msChunks *_msChunks = PTRRELOC(&msChunks); - if ( pchunk >= _msChunks->num_chunks ) { + if (pchunk >= msChunks.num_chunks) return pchunk; - } - return PTRRELOC(_msChunks->abs)[pchunk]; + + return msChunks.abs[pchunk]; } /* A macro so it can take pointers or unsigned long. */ diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 98d120ca8a91..4104a5dedbaa 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -302,5 +302,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new, int size) #define arch_align_stack(x) (x) +extern unsigned long reloc_offset(void); + #endif /* __KERNEL__ */ #endif From 34c8f6961fc601294a38c5bd5ca12131b2e52674 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH 23/37] [PATCH] ppc64: msChunks cleanups Chunks are 256KB, so use constants for the size/shift/mask, rather than getting them from the msChunks struct. The iSeries debugger (??) might still need access to the values in the msChunks struct, so we keep them around for now, but set them from the constant values. Replace msChunks_entry typedef with regular u32. Simplify msChunks_alloc() to manipulate klimit directly, rather than via a parameter. Move msChunks_alloc() and msChunks into iSeries_setup.c, as that's where they're used. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/LparData.c | 18 ------------------ arch/ppc64/kernel/iSeries_setup.c | 20 ++++++++++++++++++-- include/asm-ppc64/abs_addr.h | 15 +++++++++------ 3 files changed, 27 insertions(+), 26 deletions(-) diff --git a/arch/ppc64/kernel/LparData.c b/arch/ppc64/kernel/LparData.c index 0ed77b2f7d5f..0a9c23ca2f0c 100644 --- a/arch/ppc64/kernel/LparData.c +++ b/arch/ppc64/kernel/LparData.c @@ -225,21 +225,3 @@ struct ItVpdAreas itVpdAreas = { 0,0 } }; - -struct msChunks msChunks; -EXPORT_SYMBOL(msChunks); - -unsigned long -msChunks_alloc(unsigned long mem, unsigned long num_chunks, unsigned long chunk_size) -{ - _msChunks->num_chunks = num_chunks; - _msChunks->chunk_size = chunk_size; - _msChunks->chunk_shift = __ilog2(chunk_size); - _msChunks->chunk_mask = (1UL<<_msChunks->chunk_shift)-1; - - mem = _ALIGN(mem, sizeof(msChunks_entry)); - _msChunks->abs = (msChunks_entry *)mem; - mem += num_chunks * sizeof(msChunks_entry); - - return mem; -} diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index 460e7df681a1..e47984ba7c7c 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -415,6 +415,22 @@ static void __init iSeries_init_early(void) DBG(" <- iSeries_init_early()\n"); } +struct msChunks msChunks = { + /* XXX We don't use these, but Piranha might need them. */ + .chunk_size = MSCHUNKS_CHUNK_SIZE, + .chunk_shift = MSCHUNKS_CHUNK_SHIFT, + .chunk_mask = MSCHUNKS_OFFSET_MASK, +}; +EXPORT_SYMBOL(msChunks); + +void msChunks_alloc(unsigned long num_chunks) +{ + klimit = _ALIGN(klimit, sizeof(u32)); + msChunks.abs = (u32 *)klimit; + klimit += num_chunks * sizeof(u32); + msChunks.num_chunks = num_chunks; +} + /* * The iSeries may have very large memories ( > 128 GB ) and a partition * may get memory in "chunks" that may be anywhere in the 2**52 real @@ -452,7 +468,7 @@ static void __init build_iSeries_Memory_Map(void) /* Chunk size on iSeries is 256K bytes */ totalChunks = (u32)HvLpConfig_getMsChunks(); - klimit = msChunks_alloc(klimit, totalChunks, 1UL << 18); + msChunks_alloc(totalChunks); /* * Get absolute address of our load area @@ -498,7 +514,7 @@ static void __init build_iSeries_Memory_Map(void) */ hptFirstChunk = (u32)addr_to_chunk(HvCallHpt_getHptAddress()); hptSizePages = (u32)HvCallHpt_getHptPages(); - hptSizeChunks = hptSizePages >> (msChunks.chunk_shift - PAGE_SHIFT); + hptSizeChunks = hptSizePages >> (MSCHUNKS_CHUNK_SHIFT - PAGE_SHIFT); hptLastChunk = hptFirstChunk + hptSizeChunks - 1; printk("HPT absolute addr = %016lx, size = %dK\n", diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 93dc63ed4f2f..2276567f133a 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -17,34 +17,37 @@ #include #include -typedef u32 msChunks_entry; struct msChunks { unsigned long num_chunks; unsigned long chunk_size; unsigned long chunk_shift; unsigned long chunk_mask; - msChunks_entry *abs; + u32 *abs; }; extern struct msChunks msChunks; -extern unsigned long msChunks_alloc(unsigned long, unsigned long, unsigned long); #ifdef CONFIG_MSCHUNKS +/* Chunks are 256 KB */ +#define MSCHUNKS_CHUNK_SHIFT (18) +#define MSCHUNKS_CHUNK_SIZE (1UL << MSCHUNKS_CHUNK_SHIFT) +#define MSCHUNKS_OFFSET_MASK (MSCHUNKS_CHUNK_SIZE - 1) + static inline unsigned long chunk_to_addr(unsigned long chunk) { - return chunk << msChunks.chunk_shift; + return chunk << MSCHUNKS_CHUNK_SHIFT; } static inline unsigned long addr_to_chunk(unsigned long addr) { - return addr >> msChunks.chunk_shift; + return addr >> MSCHUNKS_CHUNK_SHIFT; } static inline unsigned long chunk_offset(unsigned long addr) { - return addr & msChunks.chunk_mask; + return addr & MSCHUNKS_OFFSET_MASK; } static inline unsigned long abs_chunk(unsigned long pchunk) From 56e97b71bf55edb69dc8e9715553972ce50b1564 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH 24/37] [PATCH] ppc64: Rename msChunks structure Rename the msChunks struct to get rid of the StUdlY caps and make it a bit clearer what it's for. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/head.S | 4 ++-- arch/ppc64/kernel/iSeries_setup.c | 17 +++++++++-------- include/asm-ppc64/abs_addr.h | 15 +++++++-------- 3 files changed, 18 insertions(+), 18 deletions(-) diff --git a/arch/ppc64/kernel/head.S b/arch/ppc64/kernel/head.S index a0ff707d6fea..cccec4902646 100644 --- a/arch/ppc64/kernel/head.S +++ b/arch/ppc64/kernel/head.S @@ -96,12 +96,12 @@ END_FTR_SECTION(0, 1) .llong hvReleaseData-KERNELBASE /* - * At offset 0x28 and 0x30 are offsets to the msChunks + * At offset 0x28 and 0x30 are offsets to the mschunks_map * array (used by the iSeries LPAR debugger to do translation * between physical addresses and absolute addresses) and * to the pidhash table (also used by the debugger) */ - .llong msChunks-KERNELBASE + .llong mschunks_map-KERNELBASE .llong 0 /* pidhash-KERNELBASE SFRXXX */ /* Offset 0x38 - Pointer to start of embedded System.map */ diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index e47984ba7c7c..b384a6ad0a57 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -415,20 +415,20 @@ static void __init iSeries_init_early(void) DBG(" <- iSeries_init_early()\n"); } -struct msChunks msChunks = { +struct mschunks_map mschunks_map = { /* XXX We don't use these, but Piranha might need them. */ .chunk_size = MSCHUNKS_CHUNK_SIZE, .chunk_shift = MSCHUNKS_CHUNK_SHIFT, .chunk_mask = MSCHUNKS_OFFSET_MASK, }; -EXPORT_SYMBOL(msChunks); +EXPORT_SYMBOL(mschunks_map); -void msChunks_alloc(unsigned long num_chunks) +void mschunks_alloc(unsigned long num_chunks) { klimit = _ALIGN(klimit, sizeof(u32)); - msChunks.abs = (u32 *)klimit; + mschunks_map.mapping = (u32 *)klimit; klimit += num_chunks * sizeof(u32); - msChunks.num_chunks = num_chunks; + mschunks_map.num_chunks = num_chunks; } /* @@ -468,7 +468,7 @@ static void __init build_iSeries_Memory_Map(void) /* Chunk size on iSeries is 256K bytes */ totalChunks = (u32)HvLpConfig_getMsChunks(); - msChunks_alloc(totalChunks); + mschunks_alloc(totalChunks); /* * Get absolute address of our load area @@ -505,7 +505,7 @@ static void __init build_iSeries_Memory_Map(void) printk("Load area size %dK\n", loadAreaSize * 256); for (nextPhysChunk = 0; nextPhysChunk < loadAreaSize; ++nextPhysChunk) - msChunks.abs[nextPhysChunk] = + mschunks_map.mapping[nextPhysChunk] = loadAreaFirstChunk + nextPhysChunk; /* @@ -571,7 +571,8 @@ static void __init build_iSeries_Memory_Map(void) (absChunk > hptLastChunk)) && ((absChunk < loadAreaFirstChunk) || (absChunk > loadAreaLastChunk))) { - msChunks.abs[nextPhysChunk] = absChunk; + mschunks_map.mapping[nextPhysChunk] = + absChunk; ++nextPhysChunk; } } diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 2276567f133a..05414a9bfdd1 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -17,18 +17,17 @@ #include #include -struct msChunks { +#ifdef CONFIG_MSCHUNKS + +struct mschunks_map { unsigned long num_chunks; unsigned long chunk_size; unsigned long chunk_shift; unsigned long chunk_mask; - u32 *abs; + u32 *mapping; }; -extern struct msChunks msChunks; - - -#ifdef CONFIG_MSCHUNKS +extern struct mschunks_map mschunks_map; /* Chunks are 256 KB */ #define MSCHUNKS_CHUNK_SHIFT (18) @@ -52,10 +51,10 @@ static inline unsigned long chunk_offset(unsigned long addr) static inline unsigned long abs_chunk(unsigned long pchunk) { - if (pchunk >= msChunks.num_chunks) + if (pchunk >= mschunks_map.num_chunks) return pchunk; - return msChunks.abs[pchunk]; + return mschunks_map.mapping[pchunk]; } /* A macro so it can take pointers or unsigned long. */ From ce21795275ab469b97384faa36462350af17eca0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:23 +1000 Subject: [PATCH 25/37] [PATCH] ppc64: Consolidate some macros The only caller of chunk_offset() and abs_chunk() is phys_to_abs(), so fold the former two into the latter. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- include/asm-ppc64/abs_addr.h | 25 +++++++++---------------- 1 file changed, 9 insertions(+), 16 deletions(-) diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 05414a9bfdd1..025527742fef 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -44,25 +44,18 @@ static inline unsigned long addr_to_chunk(unsigned long addr) return addr >> MSCHUNKS_CHUNK_SHIFT; } -static inline unsigned long chunk_offset(unsigned long addr) +static inline unsigned long phys_to_abs(unsigned long pa) { - return addr & MSCHUNKS_OFFSET_MASK; + unsigned long chunk; + + chunk = addr_to_chunk(pa); + + if (chunk < mschunks_map.num_chunks) + chunk = mschunks_map.mapping[chunk]; + + return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -static inline unsigned long abs_chunk(unsigned long pchunk) -{ - if (pchunk >= mschunks_map.num_chunks) - return pchunk; - - return mschunks_map.mapping[pchunk]; -} - -/* A macro so it can take pointers or unsigned long. */ -#define phys_to_abs(pa) \ - ({ unsigned long _pa = (unsigned long)(pa); \ - chunk_to_addr(abs_chunk(addr_to_chunk(_pa))) + chunk_offset(_pa); \ - }) - static inline unsigned long physRpn_to_absRpn(unsigned long rpn) { From aefd16b0c5a594b5feaba23954ad74061f45c8a5 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:24 +1000 Subject: [PATCH 26/37] [PATCH] ppc64: Remove redundant uses of physRpn_to_absRpn physRpn_to_absRpn is a no-op on non-iSeries platforms, remove the two redundant calls. There's only one caller on iSeries so fold the logic in there so we can get rid of it completely. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_htab.c | 5 ++++- arch/ppc64/kernel/pSeries_lpar.c | 3 +-- arch/ppc64/mm/hash_native.c | 3 +-- include/asm-ppc64/abs_addr.h | 8 -------- 4 files changed, 6 insertions(+), 13 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_htab.c b/arch/ppc64/kernel/iSeries_htab.c index b0250ae4a72a..2192055a90a0 100644 --- a/arch/ppc64/kernel/iSeries_htab.c +++ b/arch/ppc64/kernel/iSeries_htab.c @@ -41,6 +41,7 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { + unsigned long arpn; long slot; hpte_t lhpte; int secondary = 0; @@ -70,8 +71,10 @@ static long iSeries_hpte_insert(unsigned long hpte_group, unsigned long va, slot &= 0x7fffffffffffffff; } + arpn = phys_to_abs(prpn << PAGE_SHIFT) >> PAGE_SHIFT; + lhpte.v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; - lhpte.r = (physRpn_to_absRpn(prpn) << HPTE_R_RPN_SHIFT) | rflags; + lhpte.r = (arpn << HPTE_R_RPN_SHIFT) | rflags; /* Now fill in the actual HPTE */ HvCallHpt_addValidate(slot, secondary, &lhpte); diff --git a/arch/ppc64/kernel/pSeries_lpar.c b/arch/ppc64/kernel/pSeries_lpar.c index 56845543c891..0a3ddc9227c5 100644 --- a/arch/ppc64/kernel/pSeries_lpar.c +++ b/arch/ppc64/kernel/pSeries_lpar.c @@ -278,7 +278,6 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { - unsigned long arpn = physRpn_to_absRpn(prpn); unsigned long lpar_rc; unsigned long flags; unsigned long slot; @@ -289,7 +288,7 @@ long pSeries_lpar_hpte_insert(unsigned long hpte_group, if (vflags & HPTE_V_LARGE) hpte_v &= ~(1UL << HPTE_V_AVPN_SHIFT); - hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; /* Now fill in the actual HPTE */ /* Set CEC cookie to 0 */ diff --git a/arch/ppc64/mm/hash_native.c b/arch/ppc64/mm/hash_native.c index a6abd3a979bf..7626bb59954d 100644 --- a/arch/ppc64/mm/hash_native.c +++ b/arch/ppc64/mm/hash_native.c @@ -51,7 +51,6 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, unsigned long prpn, unsigned long vflags, unsigned long rflags) { - unsigned long arpn = physRpn_to_absRpn(prpn); hpte_t *hptep = htab_address + hpte_group; unsigned long hpte_v, hpte_r; int i; @@ -74,7 +73,7 @@ long native_hpte_insert(unsigned long hpte_group, unsigned long va, hpte_v = (va >> 23) << HPTE_V_AVPN_SHIFT | vflags | HPTE_V_VALID; if (vflags & HPTE_V_LARGE) va &= ~(1UL << HPTE_V_AVPN_SHIFT); - hpte_r = (arpn << HPTE_R_RPN_SHIFT) | rflags; + hpte_r = (prpn << HPTE_R_RPN_SHIFT) | rflags; hptep->r = hpte_r; /* Guarantee the second dword is visible before the valid bit */ diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 025527742fef..ab4320c1cf5b 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -56,14 +56,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -static inline unsigned long -physRpn_to_absRpn(unsigned long rpn) -{ - unsigned long pa = rpn << PAGE_SHIFT; - unsigned long aa = phys_to_abs(pa); - return (aa >> PAGE_SHIFT); -} - /* A macro so it can take pointers or unsigned long. */ #define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa)) From a4a0f97020444f83bf22bb9c8c20d8af2b4e6317 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:24 +1000 Subject: [PATCH 27/37] [PATCH] ppc64: Remove redundant use of pointers in lmb code The lmb code is all written to use a pointer to an lmb struct. But it's always the same lmb struct, called "lmb". So we take the address of lmb, call it _lmb and then start using _lmb->foo everywhere, which is silly. This patch removes the _lmb pointers and replaces them with direct references to the one "lmb" struct. We do the same for some _mem and _rsv pointers which point to lmb.memory and lmb.reserved respectively. This patch looks quite busy, but it's basically just: s/_lmb->/lmb./g s/_mem->/lmb.memory./g s/_rsv->/lmb.reserved./g s/_rsv/&lmb.reserved/g s/mem->/lmb.memory./g Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lmb.c | 100 +++++++++++++++++----------------------- 1 file changed, 43 insertions(+), 57 deletions(-) diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index d6c6bd03d2a4..6cb275615fc4 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c @@ -28,33 +28,32 @@ void lmb_dump_all(void) { #ifdef DEBUG unsigned long i; - struct lmb *_lmb = &lmb; udbg_printf("lmb_dump_all:\n"); udbg_printf(" memory.cnt = 0x%lx\n", - _lmb->memory.cnt); + lmb.memory.cnt); udbg_printf(" memory.size = 0x%lx\n", - _lmb->memory.size); - for (i=0; i < _lmb->memory.cnt ;i++) { + lmb.memory.size); + for (i=0; i < lmb.memory.cnt ;i++) { udbg_printf(" memory.region[0x%x].base = 0x%lx\n", - i, _lmb->memory.region[i].base); + i, lmb.memory.region[i].base); udbg_printf(" .physbase = 0x%lx\n", - _lmb->memory.region[i].physbase); + lmb.memory.region[i].physbase); udbg_printf(" .size = 0x%lx\n", - _lmb->memory.region[i].size); + lmb.memory.region[i].size); } udbg_printf("\n reserved.cnt = 0x%lx\n", - _lmb->reserved.cnt); + lmb.reserved.cnt); udbg_printf(" reserved.size = 0x%lx\n", - _lmb->reserved.size); - for (i=0; i < _lmb->reserved.cnt ;i++) { + lmb.reserved.size); + for (i=0; i < lmb.reserved.cnt ;i++) { udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", - i, _lmb->reserved.region[i].base); + i, lmb.reserved.region[i].base); udbg_printf(" .physbase = 0x%lx\n", - _lmb->reserved.region[i].physbase); + lmb.reserved.region[i].physbase); udbg_printf(" .size = 0x%lx\n", - _lmb->reserved.region[i].size); + lmb.reserved.region[i].size); } #endif /* DEBUG */ } @@ -108,19 +107,17 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) void __init lmb_init(void) { - struct lmb *_lmb = &lmb; - /* Create a dummy zero size LMB which will get coalesced away later. * This simplifies the lmb_add() code below... */ - _lmb->memory.region[0].base = 0; - _lmb->memory.region[0].size = 0; - _lmb->memory.cnt = 1; + lmb.memory.region[0].base = 0; + lmb.memory.region[0].size = 0; + lmb.memory.cnt = 1; /* Ditto. */ - _lmb->reserved.region[0].base = 0; - _lmb->reserved.region[0].size = 0; - _lmb->reserved.cnt = 1; + lmb.reserved.region[0].base = 0; + lmb.reserved.region[0].size = 0; + lmb.reserved.cnt = 1; } /* This routine called with relocation disabled. */ @@ -130,27 +127,26 @@ lmb_analyze(void) unsigned long i; unsigned long mem_size = 0; unsigned long size_mask = 0; - struct lmb *_lmb = &lmb; #ifdef CONFIG_MSCHUNKS unsigned long physbase = 0; #endif - for (i=0; i < _lmb->memory.cnt; i++) { + for (i=0; i < lmb.memory.cnt; i++) { unsigned long lmb_size; - lmb_size = _lmb->memory.region[i].size; + lmb_size = lmb.memory.region[i].size; #ifdef CONFIG_MSCHUNKS - _lmb->memory.region[i].physbase = physbase; + lmb.memory.region[i].physbase = physbase; physbase += lmb_size; #else - _lmb->memory.region[i].physbase = _lmb->memory.region[i].base; + lmb.memory.region[i].physbase = lmb.memory.region[i].base; #endif mem_size += lmb_size; size_mask |= lmb_size; } - _lmb->memory.size = mem_size; + lmb.memory.size = mem_size; } /* This routine called with relocation disabled. */ @@ -213,12 +209,11 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) long __init lmb_add(unsigned long base, unsigned long size) { - struct lmb *_lmb = &lmb; - struct lmb_region *_rgn = &(_lmb->memory); + struct lmb_region *_rgn = &(lmb.memory); /* On pSeries LPAR systems, the first LMB is our RMO region. */ if ( base == 0 ) - _lmb->rmo_size = size; + lmb.rmo_size = size; return lmb_add_region(_rgn, base, size); @@ -227,8 +222,7 @@ lmb_add(unsigned long base, unsigned long size) long __init lmb_reserve(unsigned long base, unsigned long size) { - struct lmb *_lmb = &lmb; - struct lmb_region *_rgn = &(_lmb->reserved); + struct lmb_region *_rgn = &(lmb.reserved); return lmb_add_region(_rgn, base, size); } @@ -260,13 +254,10 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) { long i, j; unsigned long base = 0; - struct lmb *_lmb = &lmb; - struct lmb_region *_mem = &(_lmb->memory); - struct lmb_region *_rsv = &(_lmb->reserved); - for (i=_mem->cnt-1; i >= 0; i--) { - unsigned long lmbbase = _mem->region[i].base; - unsigned long lmbsize = _mem->region[i].size; + for (i=lmb.memory.cnt-1; i >= 0; i--) { + unsigned long lmbbase = lmb.memory.region[i].base; + unsigned long lmbsize = lmb.memory.region[i].size; if ( max_addr == LMB_ALLOC_ANYWHERE ) base = _ALIGN_DOWN(lmbbase+lmbsize-size, align); @@ -276,8 +267,8 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) continue; while ( (lmbbase <= base) && - ((j = lmb_overlaps_region(_rsv,base,size)) >= 0) ) { - base = _ALIGN_DOWN(_rsv->region[j].base-size, align); + ((j = lmb_overlaps_region(&lmb.reserved,base,size)) >= 0) ) { + base = _ALIGN_DOWN(lmb.reserved.region[j].base-size, align); } if ( (base != 0) && (lmbbase <= base) ) @@ -287,7 +278,7 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) if ( i < 0 ) return 0; - lmb_add_region(_rsv, base, size); + lmb_add_region(&lmb.reserved, base, size); return base; } @@ -295,17 +286,15 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) unsigned long __init lmb_phys_mem_size(void) { - struct lmb *_lmb = &lmb; #ifdef CONFIG_MSCHUNKS - return _lmb->memory.size; + return lmb.memory.size; #else - struct lmb_region *_mem = &(_lmb->memory); unsigned long total = 0; int i; /* add all physical memory to the bootmem map */ - for (i=0; i < _mem->cnt; i++) - total += _mem->region[i].size; + for (i=0; i < lmb.memory.cnt; i++) + total += lmb.memory.region[i].size; return total; #endif /* CONFIG_MSCHUNKS */ } @@ -313,14 +302,12 @@ lmb_phys_mem_size(void) unsigned long __init lmb_end_of_DRAM(void) { - struct lmb *_lmb = &lmb; - struct lmb_region *_mem = &(_lmb->memory); - int idx = _mem->cnt - 1; + int idx = lmb.memory.cnt - 1; #ifdef CONFIG_MSCHUNKS - return (_mem->region[idx].physbase + _mem->region[idx].size); + return (lmb.memory.region[idx].physbase + lmb.memory.region[idx].size); #else - return (_mem->region[idx].base + _mem->region[idx].size); + return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); #endif /* CONFIG_MSCHUNKS */ return 0; @@ -353,20 +340,19 @@ void __init lmb_enforce_memory_limit(void) { extern unsigned long memory_limit; unsigned long i, limit; - struct lmb_region *mem = &(lmb.memory); if (! memory_limit) return; limit = memory_limit; - for (i = 0; i < mem->cnt; i++) { - if (limit > mem->region[i].size) { - limit -= mem->region[i].size; + for (i = 0; i < lmb.memory.cnt; i++) { + if (limit > lmb.memory.region[i].size) { + limit -= lmb.memory.region[i].size; continue; } - mem->region[i].size = limit; - mem->cnt = i + 1; + lmb.memory.region[i].size = limit; + lmb.memory.cnt = i + 1; break; } } From e88bcd1b29f63738b702e57d831758706162347e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:25 +1000 Subject: [PATCH 28/37] [PATCH] ppc64: Remove redundant abs_to_phys() macro abs_to_phys() is a macro that turns out to do nothing, and also has the unfortunate property that it's not the inverse of phys_to_abs() on iSeries. The following is for my benefit as much as everyone else. With CONFIG_MSCHUNKS enabled, the lmb code is changed such that it keeps a physbase variable for each lmb region. This is used to take the possibly discontiguous lmb regions and present them as a contiguous address space beginning from zero. In this context each lmb region's base address is its "absolute" base address, and its physbase is it's "physical" address (from Linux's point of view). The abs_to_phys() macro does the mapping from "absolute" to "physical". Note: This is not related to the iSeries mapping of physical to absolute (ie. Hypervisor) addresses which is maintained with the msChunks structure. And the msChunks structure is not controlled via CONFIG_MSCHUNKS. Once upon a time you could compile for non-iSeries with CONFIG_MSCHUNKS enabled. But these days CONFIG_MSCHUNKS depends on CONFIG_PPC_ISERIES, so for non-iSeries code abs_to_phys() is a no-op. On iSeries we always have one lmb region which spans from 0 to systemcfg->physicalMemorySize (arch/ppc64/kernel/iSeries_setup.c line 383). This region has a base (ie. absolute) address of 0, and a physbase address of 0 (as calculated in lmb_analyze() (arch/ppc64/kernel/lmb.c line 144)). On iSeries, abs_to_phys(aa) is defined as lmb_abs_to_phys(aa), which finds the lmb region containing aa (and there's only one, ie. 0), and then does: return lmb.memory.region[0].physbase + (aa - lmb.memory.region[0].base) physbase == base == 0, so you're left with "return aa". So remove abs_to_phys(), and lmb_abs_to_phys() which is the implementation of abs_to_phys() for iSeries. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lmb.c | 19 ------------------- arch/ppc64/mm/init.c | 4 +--- include/asm-ppc64/abs_addr.h | 6 +----- 3 files changed, 2 insertions(+), 27 deletions(-) diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index 6cb275615fc4..111da736652b 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c @@ -313,25 +313,6 @@ lmb_end_of_DRAM(void) return 0; } -unsigned long __init -lmb_abs_to_phys(unsigned long aa) -{ - unsigned long i, pa = aa; - struct lmb *_lmb = &lmb; - struct lmb_region *_mem = &(_lmb->memory); - - for (i=0; i < _mem->cnt; i++) { - unsigned long lmbbase = _mem->region[i].base; - unsigned long lmbsize = _mem->region[i].size; - if ( lmb_addrs_overlap(aa,1,lmbbase,lmbsize) ) { - pa = _mem->region[i].physbase + (aa - lmbbase); - break; - } - } - - return pa; -} - /* * Truncate the lmb list to memory_limit if it's set * You must call lmb_analyze() after this. diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index 9edfe267123e..a16cf12c586b 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -42,7 +42,6 @@ #include #include -#include #include #include #include @@ -167,7 +166,6 @@ static int map_io_page(unsigned long ea, unsigned long pa, int flags) ptep = pte_alloc_kernel(&init_mm, pmdp, ea); if (!ptep) return -ENOMEM; - pa = abs_to_phys(pa); set_pte_at(&init_mm, ea, ptep, pfn_pte(pa >> PAGE_SHIFT, __pgprot(flags))); spin_unlock(&init_mm.page_table_lock); @@ -547,7 +545,7 @@ void __init do_init_bootmem(void) */ bootmap_pages = bootmem_bootmap_pages(total_pages); - start = abs_to_phys(lmb_alloc(bootmap_pages<> PAGE_SHIFT, total_pages); diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index ab4320c1cf5b..200db1c45f29 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -56,9 +56,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -/* A macro so it can take pointers or unsigned long. */ -#define abs_to_phys(aa) lmb_abs_to_phys((unsigned long)(aa)) - #else /* !CONFIG_MSCHUNKS */ #define chunk_to_addr(chunk) ((unsigned long)(chunk)) @@ -68,12 +65,11 @@ static inline unsigned long phys_to_abs(unsigned long pa) #define phys_to_abs(pa) (pa) #define physRpn_to_absRpn(rpn) (rpn) -#define abs_to_phys(aa) (aa) #endif /* !CONFIG_MSCHUNKS */ /* Convenience macros */ #define virt_to_abs(va) phys_to_abs(__pa(va)) -#define abs_to_virt(aa) __va(abs_to_phys(aa)) +#define abs_to_virt(aa) __va(aa) #endif /* _ABS_ADDR_H */ From 180379dcefb39e8bd05d562b0685e9084dffcc0a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:26 +1000 Subject: [PATCH 29/37] [PATCH] ppc64: Remove physbase from the lmb_property struct We no longer need the lmb code to know about abs and phys addresses, so remove the physbase variable from the lmb_property struct. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lmb.c | 23 ----------------------- arch/ppc64/mm/hash_utils.c | 2 +- arch/ppc64/mm/init.c | 27 ++++++++++++--------------- arch/ppc64/mm/numa.c | 2 +- include/asm-ppc64/lmb.h | 1 - 5 files changed, 14 insertions(+), 41 deletions(-) diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index 111da736652b..6ed6312d848f 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c @@ -37,8 +37,6 @@ void lmb_dump_all(void) for (i=0; i < lmb.memory.cnt ;i++) { udbg_printf(" memory.region[0x%x].base = 0x%lx\n", i, lmb.memory.region[i].base); - udbg_printf(" .physbase = 0x%lx\n", - lmb.memory.region[i].physbase); udbg_printf(" .size = 0x%lx\n", lmb.memory.region[i].size); } @@ -50,8 +48,6 @@ void lmb_dump_all(void) for (i=0; i < lmb.reserved.cnt ;i++) { udbg_printf(" reserved.region[0x%x].base = 0x%lx\n", i, lmb.reserved.region[i].base); - udbg_printf(" .physbase = 0x%lx\n", - lmb.reserved.region[i].physbase); udbg_printf(" .size = 0x%lx\n", lmb.reserved.region[i].size); } @@ -97,7 +93,6 @@ lmb_coalesce_regions(struct lmb_region *rgn, unsigned long r1, unsigned long r2) rgn->region[r1].size += rgn->region[r2].size; for (i=r2; i < rgn->cnt-1; i++) { rgn->region[i].base = rgn->region[i+1].base; - rgn->region[i].physbase = rgn->region[i+1].physbase; rgn->region[i].size = rgn->region[i+1].size; } rgn->cnt--; @@ -127,21 +122,12 @@ lmb_analyze(void) unsigned long i; unsigned long mem_size = 0; unsigned long size_mask = 0; -#ifdef CONFIG_MSCHUNKS - unsigned long physbase = 0; -#endif for (i=0; i < lmb.memory.cnt; i++) { unsigned long lmb_size; lmb_size = lmb.memory.region[i].size; -#ifdef CONFIG_MSCHUNKS - lmb.memory.region[i].physbase = physbase; - physbase += lmb_size; -#else - lmb.memory.region[i].physbase = lmb.memory.region[i].base; -#endif mem_size += lmb_size; size_mask |= lmb_size; } @@ -164,7 +150,6 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) adjacent = lmb_addrs_adjacent(base,size,rgnbase,rgnsize); if ( adjacent > 0 ) { rgn->region[i].base -= size; - rgn->region[i].physbase -= size; rgn->region[i].size += size; coalesced++; break; @@ -191,11 +176,9 @@ lmb_add_region(struct lmb_region *rgn, unsigned long base, unsigned long size) for (i=rgn->cnt-1; i >= 0; i--) { if (base < rgn->region[i].base) { rgn->region[i+1].base = rgn->region[i].base; - rgn->region[i+1].physbase = rgn->region[i].physbase; rgn->region[i+1].size = rgn->region[i].size; } else { rgn->region[i+1].base = base; - rgn->region[i+1].physbase = lmb_abs_to_phys(base); rgn->region[i+1].size = size; break; } @@ -304,13 +287,7 @@ lmb_end_of_DRAM(void) { int idx = lmb.memory.cnt - 1; -#ifdef CONFIG_MSCHUNKS - return (lmb.memory.region[idx].physbase + lmb.memory.region[idx].size); -#else return (lmb.memory.region[idx].base + lmb.memory.region[idx].size); -#endif /* CONFIG_MSCHUNKS */ - - return 0; } /* diff --git a/arch/ppc64/mm/hash_utils.c b/arch/ppc64/mm/hash_utils.c index 65d6e8527948..09475c8edf7c 100644 --- a/arch/ppc64/mm/hash_utils.c +++ b/arch/ppc64/mm/hash_utils.c @@ -210,7 +210,7 @@ void __init htab_initialize(void) /* create bolted the linear mapping in the hash table */ for (i=0; i < lmb.memory.cnt; i++) { - base = lmb.memory.region[i].physbase + KERNELBASE; + base = lmb.memory.region[i].base + KERNELBASE; size = lmb.memory.region[i].size; DBG("creating mapping for region: %lx : %lx\n", base, size); diff --git a/arch/ppc64/mm/init.c b/arch/ppc64/mm/init.c index a16cf12c586b..c02dc9809ca5 100644 --- a/arch/ppc64/mm/init.c +++ b/arch/ppc64/mm/init.c @@ -482,9 +482,9 @@ void __init mm_init_ppc64(void) for (i = 1; i < lmb.memory.cnt; i++) { unsigned long base, prevbase, prevsize; - prevbase = lmb.memory.region[i-1].physbase; + prevbase = lmb.memory.region[i-1].base; prevsize = lmb.memory.region[i-1].size; - base = lmb.memory.region[i].physbase; + base = lmb.memory.region[i].base; if (base > (prevbase + prevsize)) { io_hole_start = prevbase + prevsize; io_hole_size = base - (prevbase + prevsize); @@ -511,11 +511,8 @@ int page_is_ram(unsigned long pfn) for (i=0; i < lmb.memory.cnt; i++) { unsigned long base; -#ifdef CONFIG_MSCHUNKS - base = lmb.memory.region[i].physbase; -#else base = lmb.memory.region[i].base; -#endif + if ((paddr >= base) && (paddr < (base + lmb.memory.region[i].size))) { return 1; @@ -556,25 +553,25 @@ void __init do_init_bootmem(void) * present. */ for (i=0; i < lmb.memory.cnt; i++) { - unsigned long physbase, size; + unsigned long base, size; unsigned long start_pfn, end_pfn; - physbase = lmb.memory.region[i].physbase; + base = lmb.memory.region[i].base; size = lmb.memory.region[i].size; - start_pfn = physbase >> PAGE_SHIFT; + start_pfn = base >> PAGE_SHIFT; end_pfn = start_pfn + (size >> PAGE_SHIFT); memory_present(0, start_pfn, end_pfn); - free_bootmem(physbase, size); + free_bootmem(base, size); } /* reserve the sections we're already using */ for (i=0; i < lmb.reserved.cnt; i++) { - unsigned long physbase = lmb.reserved.region[i].physbase; + unsigned long base = lmb.reserved.region[i].base; unsigned long size = lmb.reserved.region[i].size; - reserve_bootmem(physbase, size); + reserve_bootmem(base, size); } } @@ -613,10 +610,10 @@ static int __init setup_kcore(void) int i; for (i=0; i < lmb.memory.cnt; i++) { - unsigned long physbase, size; + unsigned long base, size; struct kcore_list *kcore_mem; - physbase = lmb.memory.region[i].physbase; + base = lmb.memory.region[i].base; size = lmb.memory.region[i].size; /* GFP_ATOMIC to avoid might_sleep warnings during boot */ @@ -624,7 +621,7 @@ static int __init setup_kcore(void) if (!kcore_mem) panic("mem_init: kmalloc failed\n"); - kclist_add(kcore_mem, __va(physbase), size); + kclist_add(kcore_mem, __va(base), size); } kclist_add(&kcore_vmem, (void *)VMALLOC_START, VMALLOC_END-VMALLOC_START); diff --git a/arch/ppc64/mm/numa.c b/arch/ppc64/mm/numa.c index 0b191f2de016..c3116f0d788c 100644 --- a/arch/ppc64/mm/numa.c +++ b/arch/ppc64/mm/numa.c @@ -671,7 +671,7 @@ new_range: * Mark reserved regions on this node */ for (i = 0; i < lmb.reserved.cnt; i++) { - unsigned long physbase = lmb.reserved.region[i].physbase; + unsigned long physbase = lmb.reserved.region[i].base; unsigned long size = lmb.reserved.region[i].size; if (pa_to_nid(physbase) != nid && diff --git a/include/asm-ppc64/lmb.h b/include/asm-ppc64/lmb.h index a6cbca21ac1d..cb368bf0f264 100644 --- a/include/asm-ppc64/lmb.h +++ b/include/asm-ppc64/lmb.h @@ -22,7 +22,6 @@ struct lmb_property { unsigned long base; - unsigned long physbase; unsigned long size; }; From 71e1f55ad4bc4c8bcfe696400a950a34263a750e Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:26 +1000 Subject: [PATCH 30/37] [PATCH] ppc64: Simplify some lmb functions lmb_phys_mem_size() can always return lmb.memory.size, as long as it's called after lmb_analyze(), which it is. There's no need to recalculate the size on every call. lmb_analyze() was calculating a few things we then threw away, so just don't calculate them to start with. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/lmb.c | 27 +++++---------------------- 1 file changed, 5 insertions(+), 22 deletions(-) diff --git a/arch/ppc64/kernel/lmb.c b/arch/ppc64/kernel/lmb.c index 6ed6312d848f..5adaca2ddc9d 100644 --- a/arch/ppc64/kernel/lmb.c +++ b/arch/ppc64/kernel/lmb.c @@ -119,20 +119,12 @@ lmb_init(void) void __init lmb_analyze(void) { - unsigned long i; - unsigned long mem_size = 0; - unsigned long size_mask = 0; + int i; - for (i=0; i < lmb.memory.cnt; i++) { - unsigned long lmb_size; + lmb.memory.size = 0; - lmb_size = lmb.memory.region[i].size; - - mem_size += lmb_size; - size_mask |= lmb_size; - } - - lmb.memory.size = mem_size; + for (i = 0; i < lmb.memory.cnt; i++) + lmb.memory.size += lmb.memory.region[i].size; } /* This routine called with relocation disabled. */ @@ -266,20 +258,11 @@ lmb_alloc_base(unsigned long size, unsigned long align, unsigned long max_addr) return base; } +/* You must call lmb_analyze() before this. */ unsigned long __init lmb_phys_mem_size(void) { -#ifdef CONFIG_MSCHUNKS return lmb.memory.size; -#else - unsigned long total = 0; - int i; - - /* add all physical memory to the bootmem map */ - for (i=0; i < lmb.memory.cnt; i++) - total += lmb.memory.region[i].size; - return total; -#endif /* CONFIG_MSCHUNKS */ } unsigned long __init From bef5686229810709091fb6e505071f4aa41e3760 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 3 Aug 2005 20:21:26 +1000 Subject: [PATCH 31/37] [PATCH] ppc64: Remove CONFIG_MSCHUNKS We can now remove CONFIG_MSCHUNKS as it doesn't do anything interesting anymore. The only macro in abs_addr.h which is called by non-iSeries code is phys_to_abs(), so remove the other dummy implementations, and we add a firmware feature check to phys_to_abs(). Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/Kconfig | 6 ------ arch/ppc64/configs/iSeries_defconfig | 1 - include/asm-ppc64/abs_addr.h | 19 +++++-------------- 3 files changed, 5 insertions(+), 21 deletions(-) diff --git a/arch/ppc64/Kconfig b/arch/ppc64/Kconfig index 4d4f81c65012..13b262f10216 100644 --- a/arch/ppc64/Kconfig +++ b/arch/ppc64/Kconfig @@ -302,12 +302,6 @@ config GENERIC_HARDIRQS bool default y -config MSCHUNKS - bool - depends on PPC_ISERIES - default y - - config PPC_RTAS bool depends on PPC_PSERIES || PPC_BPA diff --git a/arch/ppc64/configs/iSeries_defconfig b/arch/ppc64/configs/iSeries_defconfig index 394ba18b58c7..219c6677abcc 100644 --- a/arch/ppc64/configs/iSeries_defconfig +++ b/arch/ppc64/configs/iSeries_defconfig @@ -99,7 +99,6 @@ CONFIG_HZ_100=y # CONFIG_HZ_1000 is not set CONFIG_HZ=100 CONFIG_GENERIC_HARDIRQS=y -CONFIG_MSCHUNKS=y CONFIG_LPARCFG=y CONFIG_SECCOMP=y CONFIG_ISA_DMA_API=y diff --git a/include/asm-ppc64/abs_addr.h b/include/asm-ppc64/abs_addr.h index 200db1c45f29..84c24d4cdb71 100644 --- a/include/asm-ppc64/abs_addr.h +++ b/include/asm-ppc64/abs_addr.h @@ -16,8 +16,7 @@ #include #include #include - -#ifdef CONFIG_MSCHUNKS +#include struct mschunks_map { unsigned long num_chunks; @@ -48,6 +47,10 @@ static inline unsigned long phys_to_abs(unsigned long pa) { unsigned long chunk; + /* This is a no-op on non-iSeries */ + if (!firmware_has_feature(FW_FEATURE_ISERIES)) + return pa; + chunk = addr_to_chunk(pa); if (chunk < mschunks_map.num_chunks) @@ -56,18 +59,6 @@ static inline unsigned long phys_to_abs(unsigned long pa) return chunk_to_addr(chunk) + (pa & MSCHUNKS_OFFSET_MASK); } -#else /* !CONFIG_MSCHUNKS */ - -#define chunk_to_addr(chunk) ((unsigned long)(chunk)) -#define addr_to_chunk(addr) (addr) -#define chunk_offset(addr) (0) -#define abs_chunk(pchunk) (pchunk) - -#define phys_to_abs(pa) (pa) -#define physRpn_to_absRpn(rpn) (rpn) - -#endif /* !CONFIG_MSCHUNKS */ - /* Convenience macros */ #define virt_to_abs(va) phys_to_abs(__pa(va)) #define abs_to_virt(aa) __va(aa) From b13cfd173f73c3f6f9a307b7b6e64d45fbd756b2 Mon Sep 17 00:00:00 2001 From: Olaf Hering Date: Thu, 4 Aug 2005 19:26:42 +0200 Subject: [PATCH 32/37] [PATCH] ppc64: allow xmon=off If both CONFIG_XMON and CONFIG_XMON_DEFAULT is enabled in the .config, there is no way to disable xmon again. setup_system calls first xmon_init, later parse_early_param. So a new 'xmon=off' cmdline option will do the right thing. Signed-off-by: Olaf Hering Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/setup.c | 8 +++++--- arch/ppc64/xmon/start.c | 2 +- arch/ppc64/xmon/xmon.c | 26 ++++++++++++++++++-------- include/asm-ppc64/system.h | 2 +- 4 files changed, 25 insertions(+), 13 deletions(-) diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index e9c24d2dbd91..b3ef8df12982 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -627,7 +627,7 @@ void __init setup_system(void) * Initialize xmon */ #ifdef CONFIG_XMON_DEFAULT - xmon_init(); + xmon_init(1); #endif /* * Register early console @@ -1343,11 +1343,13 @@ static int __init early_xmon(char *p) /* ensure xmon is enabled */ if (p) { if (strncmp(p, "on", 2) == 0) - xmon_init(); + xmon_init(1); + if (strncmp(p, "off", 3) == 0) + xmon_init(0); if (strncmp(p, "early", 5) != 0) return 0; } - xmon_init(); + xmon_init(1); debugger(NULL); return 0; diff --git a/arch/ppc64/xmon/start.c b/arch/ppc64/xmon/start.c index a9265bcc79b2..f86b584acd76 100644 --- a/arch/ppc64/xmon/start.c +++ b/arch/ppc64/xmon/start.c @@ -27,7 +27,7 @@ static void sysrq_handle_xmon(int key, struct pt_regs *pt_regs, struct tty_struct *tty) { /* ensure xmon is enabled */ - xmon_init(); + xmon_init(1); debugger(pt_regs); } diff --git a/arch/ppc64/xmon/xmon.c b/arch/ppc64/xmon/xmon.c index 05539439e6bc..45908b10acd3 100644 --- a/arch/ppc64/xmon/xmon.c +++ b/arch/ppc64/xmon/xmon.c @@ -2496,15 +2496,25 @@ static void dump_stab(void) } } -void xmon_init(void) +void xmon_init(int enable) { - __debugger = xmon; - __debugger_ipi = xmon_ipi; - __debugger_bpt = xmon_bpt; - __debugger_sstep = xmon_sstep; - __debugger_iabr_match = xmon_iabr_match; - __debugger_dabr_match = xmon_dabr_match; - __debugger_fault_handler = xmon_fault_handler; + if (enable) { + __debugger = xmon; + __debugger_ipi = xmon_ipi; + __debugger_bpt = xmon_bpt; + __debugger_sstep = xmon_sstep; + __debugger_iabr_match = xmon_iabr_match; + __debugger_dabr_match = xmon_dabr_match; + __debugger_fault_handler = xmon_fault_handler; + } else { + __debugger = NULL; + __debugger_ipi = NULL; + __debugger_bpt = NULL; + __debugger_sstep = NULL; + __debugger_iabr_match = NULL; + __debugger_dabr_match = NULL; + __debugger_fault_handler = NULL; + } } void dump_segments(void) diff --git a/include/asm-ppc64/system.h b/include/asm-ppc64/system.h index 4104a5dedbaa..b9e1835351e9 100644 --- a/include/asm-ppc64/system.h +++ b/include/asm-ppc64/system.h @@ -88,7 +88,7 @@ DEBUGGER_BOILERPLATE(debugger_dabr_match) DEBUGGER_BOILERPLATE(debugger_fault_handler) #ifdef CONFIG_XMON -extern void xmon_init(void); +extern void xmon_init(int enable); #endif #else From 180a33627d958d5d9d3602dde6ac74b315e136f0 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 11:13:36 +1000 Subject: [PATCH 33/37] [PATCH] ppc64: Move ppc64_enable_pmcs() logic into a ppc_md function This patch moves power4_enable_pmcs() to arch/ppc64/kernel/pmc.c. I've tested it on P5 LPAR and P4. It does what it used to. Signed-off-by: Michael Ellerman Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/iSeries_setup.c | 2 ++ arch/ppc64/kernel/pSeries_setup.c | 21 ++++++++++++ arch/ppc64/kernel/pmac_setup.c | 2 ++ arch/ppc64/kernel/pmc.c | 21 ++++++++++++ arch/ppc64/kernel/sysfs.c | 54 +++---------------------------- include/asm-ppc64/machdep.h | 3 ++ include/asm-ppc64/pmc.h | 2 ++ 7 files changed, 55 insertions(+), 50 deletions(-) diff --git a/arch/ppc64/kernel/iSeries_setup.c b/arch/ppc64/kernel/iSeries_setup.c index b384a6ad0a57..3ffefbbc6623 100644 --- a/arch/ppc64/kernel/iSeries_setup.c +++ b/arch/ppc64/kernel/iSeries_setup.c @@ -964,6 +964,8 @@ void __init iSeries_early_setup(void) ppc_md.calibrate_decr = iSeries_calibrate_decr; ppc_md.progress = iSeries_progress; + /* XXX Implement enable_pmcs for iSeries */ + if (get_paca()->lppaca.shared_proc) { ppc_md.idle_loop = iseries_shared_idle; printk(KERN_INFO "Using shared processor idle loop\n"); diff --git a/arch/ppc64/kernel/pSeries_setup.c b/arch/ppc64/kernel/pSeries_setup.c index 54e0651ba3fd..f0f0630cf07c 100644 --- a/arch/ppc64/kernel/pSeries_setup.c +++ b/arch/ppc64/kernel/pSeries_setup.c @@ -61,6 +61,7 @@ #include #include #include +#include #include "i8259.h" #include "mpic.h" @@ -187,6 +188,21 @@ static void __init pSeries_setup_mpic(void) " MPIC "); } +static void pseries_lpar_enable_pmcs(void) +{ + unsigned long set, reset; + + power4_enable_pmcs(); + + set = 1UL << 63; + reset = 0; + plpar_hcall_norets(H_PERFMON, set, reset); + + /* instruct hypervisor to maintain PMCs */ + if (firmware_has_feature(FW_FEATURE_SPLPAR)) + get_paca()->lppaca.pmcregs_in_use = 1; +} + static void __init pSeries_setup_arch(void) { /* Fixup ppc_md depending on the type of interrupt controller */ @@ -245,6 +261,11 @@ static void __init pSeries_setup_arch(void) printk(KERN_INFO "Using default idle loop\n"); ppc_md.idle_loop = default_idle; } + + if (systemcfg->platform & PLATFORM_LPAR) + ppc_md.enable_pmcs = pseries_lpar_enable_pmcs; + else + ppc_md.enable_pmcs = power4_enable_pmcs; } static int __init pSeries_init_panel(void) diff --git a/arch/ppc64/kernel/pmac_setup.c b/arch/ppc64/kernel/pmac_setup.c index e40877fa67cd..8ff86a766cdf 100644 --- a/arch/ppc64/kernel/pmac_setup.c +++ b/arch/ppc64/kernel/pmac_setup.c @@ -71,6 +71,7 @@ #include #include #include +#include #include "pmac.h" #include "mpic.h" @@ -511,4 +512,5 @@ struct machdep_calls __initdata pmac_md = { .progress = pmac_progress, .check_legacy_ioport = pmac_check_legacy_ioport, .idle_loop = native_idle, + .enable_pmcs = power4_enable_pmcs, }; diff --git a/arch/ppc64/kernel/pmc.c b/arch/ppc64/kernel/pmc.c index 67be773f9c00..cdfec7438d01 100644 --- a/arch/ppc64/kernel/pmc.c +++ b/arch/ppc64/kernel/pmc.c @@ -65,3 +65,24 @@ void release_pmc_hardware(void) spin_unlock(&pmc_owner_lock); } EXPORT_SYMBOL_GPL(release_pmc_hardware); + +void power4_enable_pmcs(void) +{ + unsigned long hid0; + + hid0 = mfspr(HID0); + hid0 |= 1UL << (63 - 20); + + /* POWER4 requires the following sequence */ + asm volatile( + "sync\n" + "mtspr %1, %0\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "mfspr %0, %1\n" + "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): + "memory"); +} diff --git a/arch/ppc64/kernel/sysfs.c b/arch/ppc64/kernel/sysfs.c index eca15d25e026..f311ee7c0070 100644 --- a/arch/ppc64/kernel/sysfs.c +++ b/arch/ppc64/kernel/sysfs.c @@ -101,6 +101,8 @@ static int __init setup_smt_snooze_delay(char *str) } __setup("smt-snooze-delay=", setup_smt_snooze_delay); +#endif /* CONFIG_PPC_MULTIPLATFORM */ + /* * Enabling PMCs will slow partition context switch times so we only do * it the first time we write to the PMCs. @@ -110,63 +112,15 @@ static DEFINE_PER_CPU(char, pmcs_enabled); void ppc64_enable_pmcs(void) { - unsigned long hid0; -#ifdef CONFIG_PPC_PSERIES - unsigned long set, reset; -#endif /* CONFIG_PPC_PSERIES */ - /* Only need to enable them once */ if (__get_cpu_var(pmcs_enabled)) return; __get_cpu_var(pmcs_enabled) = 1; - switch (systemcfg->platform) { - case PLATFORM_PSERIES: - case PLATFORM_POWERMAC: - hid0 = mfspr(HID0); - hid0 |= 1UL << (63 - 20); - - /* POWER4 requires the following sequence */ - asm volatile( - "sync\n" - "mtspr %1, %0\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "mfspr %0, %1\n" - "isync" : "=&r" (hid0) : "i" (HID0), "0" (hid0): - "memory"); - break; - -#ifdef CONFIG_PPC_PSERIES - case PLATFORM_PSERIES_LPAR: - set = 1UL << 63; - reset = 0; - plpar_hcall_norets(H_PERFMON, set, reset); - break; -#endif /* CONFIG_PPC_PSERIES */ - - default: - break; - } - - /* instruct hypervisor to maintain PMCs */ - if (firmware_has_feature(FW_FEATURE_SPLPAR)) - get_paca()->lppaca.pmcregs_in_use = 1; + if (ppc_md.enable_pmcs) + ppc_md.enable_pmcs(); } - -#else - -/* PMC stuff */ -void ppc64_enable_pmcs(void) -{ - /* XXX Implement for iseries */ -} -#endif /* CONFIG_PPC_MULTIPLATFORM */ - EXPORT_SYMBOL(ppc64_enable_pmcs); /* XXX convert to rusty's on_one_cpu */ diff --git a/include/asm-ppc64/machdep.h b/include/asm-ppc64/machdep.h index f0ef06375947..ff2c9287d3b6 100644 --- a/include/asm-ppc64/machdep.h +++ b/include/asm-ppc64/machdep.h @@ -140,6 +140,9 @@ struct machdep_calls { /* Idle loop for this platform, leave empty for default idle loop */ int (*idle_loop)(void); + + /* Function to enable pmcs for this platform, called once per cpu. */ + void (*enable_pmcs)(void); }; extern int default_idle(void); diff --git a/include/asm-ppc64/pmc.h b/include/asm-ppc64/pmc.h index c924748c0bea..d1d297dbccfe 100644 --- a/include/asm-ppc64/pmc.h +++ b/include/asm-ppc64/pmc.h @@ -26,4 +26,6 @@ typedef void (*perf_irq_t)(struct pt_regs *); int reserve_pmc_hardware(perf_irq_t new_perf_irq); void release_pmc_hardware(void); +void power4_enable_pmcs(void); + #endif /* _PPC64_PMC_H */ From 145ec7d51ae507c7cc8889ad05e24af05bbd9147 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 15:20:18 +1000 Subject: [PATCH 34/37] [PATCH] ppc64: Fix a misleading printk in unflatten_dt_node() When unflatten_dt_node() fails to find an OF_DT_END_NODE tag it prints "Weird tag at start of node", this should be "Weird tag at end of node". Signed-off-by: Michael Ellerman arch/ppc64/kernel/prom.c | 2 +- 1 files changed, 1 insertion(+), 1 deletion(-) Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/prom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 255c39ae1b48..04b852d446a3 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -918,7 +918,7 @@ static unsigned long __init unflatten_dt_node(unsigned long mem, tag = *((u32 *)(*p)); } if (tag != OF_DT_END_NODE) { - printk("Weird tag at start of node: %x\n", tag); + printk("Weird tag at end of node: %x\n", tag); return mem; } *p += 4; From 95920324f51b3a12603cf6d9bacbd831f34c5b60 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 15:20:19 +1000 Subject: [PATCH 35/37] [PATCH] ppc64: unflatten_device_tree() should check if lmb_alloc() fails unflatten_device_tree() doesn't check if lmb_alloc() succeeds or not, it should. All it can do is panic, but at least there's an error message (assuming you have some sort of console at that point). Signed-off-by: Michael Ellerman arch/ppc64/kernel/prom.c | 9 +++++++-- 1 files changed, 7 insertions(+), 2 deletions(-) Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/prom.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/ppc64/kernel/prom.c b/arch/ppc64/kernel/prom.c index 04b852d446a3..b21848826791 100644 --- a/arch/ppc64/kernel/prom.c +++ b/arch/ppc64/kernel/prom.c @@ -950,8 +950,13 @@ void __init unflatten_device_tree(void) DBG(" size is %lx, allocating...\n", size); /* Allocate memory for the expanded device tree */ - mem = (unsigned long)abs_to_virt(lmb_alloc(size + 4, - __alignof__(struct device_node))); + mem = lmb_alloc(size + 4, __alignof__(struct device_node)); + if (!mem) { + DBG("Couldn't allocate memory with lmb_alloc()!\n"); + panic("Couldn't allocate memory with lmb_alloc()!\n"); + } + mem = (unsigned long)abs_to_virt(mem); + ((u32 *)mem)[size / 4] = 0xdeadbeef; DBG(" unflattening...\n", mem); From 9a5573e378c5c8976c6000a7643b52e2a0481688 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 9 Aug 2005 15:20:20 +1000 Subject: [PATCH 36/37] [PATCH] ppc64: Check of_chosen in check_for_initrd() You can't call get_property() on a NULL node, so check if of_chosen is set in check_for_initrd(). Signed-off-by: Michael Ellerman arch/ppc64/kernel/setup.c | 20 ++++++++++++-------- 1 files changed, 12 insertions(+), 8 deletions(-) Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/setup.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/arch/ppc64/kernel/setup.c b/arch/ppc64/kernel/setup.c index b3ef8df12982..ee3b20de2e7a 100644 --- a/arch/ppc64/kernel/setup.c +++ b/arch/ppc64/kernel/setup.c @@ -536,15 +536,19 @@ static void __init check_for_initrd(void) DBG(" -> check_for_initrd()\n"); - prop = (u64 *)get_property(of_chosen, "linux,initrd-start", NULL); - if (prop != NULL) { - initrd_start = (unsigned long)__va(*prop); - prop = (u64 *)get_property(of_chosen, "linux,initrd-end", NULL); + if (of_chosen) { + prop = (u64 *)get_property(of_chosen, + "linux,initrd-start", NULL); if (prop != NULL) { - initrd_end = (unsigned long)__va(*prop); - initrd_below_start_ok = 1; - } else - initrd_start = 0; + initrd_start = (unsigned long)__va(*prop); + prop = (u64 *)get_property(of_chosen, + "linux,initrd-end", NULL); + if (prop != NULL) { + initrd_end = (unsigned long)__va(*prop); + initrd_below_start_ok = 1; + } else + initrd_start = 0; + } } /* If we were passed an initrd, set the ROOT_DEV properly if the values From c594adad5653491813959277fb87a2fef54c4e05 Mon Sep 17 00:00:00 2001 From: David Gibson Date: Thu, 11 Aug 2005 16:55:21 +1000 Subject: [PATCH 37/37] [PATCH] Dynamic hugepage addresses for ppc64 Paulus, I think this is now a reasonable candidate for the post-2.6.13 queue. Relax address restrictions for hugepages on ppc64 Presently, 64-bit applications on ppc64 may only use hugepages in the address region from 1-1.5T. Furthermore, if hugepages are enabled in the kernel config, they may only use hugepages and never normal pages in this area. This patch relaxes this restriction, allowing any address to be used with hugepages, but with a 1TB granularity. That is if you map a hugepage anywhere in the region 1TB-2TB, that entire area will be reserved exclusively for hugepages for the remainder of the process's lifetime. This works analagously to hugepages in 32-bit applications, where hugepages can be mapped anywhere, but with 256MB (mmu segment) granularity. This patch applies on top of the four level pagetable patch (http://patchwork.ozlabs.org/linuxppc64/patch?id=1936). Signed-off-by: David Gibson Signed-off-by: Paul Mackerras --- arch/ppc64/kernel/asm-offsets.c | 3 +- arch/ppc64/mm/hugetlbpage.c | 211 ++++++++++++++++++++++++-------- arch/ppc64/mm/slb_low.S | 25 ++-- include/asm-ppc64/mmu.h | 2 +- include/asm-ppc64/page.h | 29 +++-- 5 files changed, 191 insertions(+), 79 deletions(-) diff --git a/arch/ppc64/kernel/asm-offsets.c b/arch/ppc64/kernel/asm-offsets.c index abb9e5b5da03..17e35d0fed09 100644 --- a/arch/ppc64/kernel/asm-offsets.c +++ b/arch/ppc64/kernel/asm-offsets.c @@ -94,7 +94,8 @@ int main(void) DEFINE(PACASLBCACHEPTR, offsetof(struct paca_struct, slb_cache_ptr)); DEFINE(PACACONTEXTID, offsetof(struct paca_struct, context.id)); #ifdef CONFIG_HUGETLB_PAGE - DEFINE(PACAHTLBSEGS, offsetof(struct paca_struct, context.htlb_segs)); + DEFINE(PACALOWHTLBAREAS, offsetof(struct paca_struct, context.low_htlb_areas)); + DEFINE(PACAHIGHHTLBAREAS, offsetof(struct paca_struct, context.high_htlb_areas)); #endif /* CONFIG_HUGETLB_PAGE */ DEFINE(PACADEFAULTDECR, offsetof(struct paca_struct, default_decr)); DEFINE(PACA_EXGEN, offsetof(struct paca_struct, exgen)); diff --git a/arch/ppc64/mm/hugetlbpage.c b/arch/ppc64/mm/hugetlbpage.c index a13e44230a6f..e7833c80eb68 100644 --- a/arch/ppc64/mm/hugetlbpage.c +++ b/arch/ppc64/mm/hugetlbpage.c @@ -27,6 +27,9 @@ #include +#define NUM_LOW_AREAS (0x100000000UL >> SID_SHIFT) +#define NUM_HIGH_AREAS (PGTABLE_RANGE >> HTLB_AREA_SHIFT) + /* Modelled after find_linux_pte() */ pte_t *huge_pte_offset(struct mm_struct *mm, unsigned long addr) { @@ -129,15 +132,17 @@ int is_aligned_hugepage_range(unsigned long addr, unsigned long len) return 0; } -static void flush_segments(void *parm) +static void flush_low_segments(void *parm) { - u16 segs = (unsigned long) parm; + u16 areas = (unsigned long) parm; unsigned long i; asm volatile("isync" : : : "memory"); - for (i = 0; i < 16; i++) { - if (! (segs & (1U << i))) + BUILD_BUG_ON((sizeof(areas)*8) != NUM_LOW_AREAS); + + for (i = 0; i < NUM_LOW_AREAS; i++) { + if (! (areas & (1U << i))) continue; asm volatile("slbie %0" : : "r" (i << SID_SHIFT)); } @@ -145,13 +150,33 @@ static void flush_segments(void *parm) asm volatile("isync" : : : "memory"); } -static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) +static void flush_high_segments(void *parm) { - unsigned long start = seg << SID_SHIFT; - unsigned long end = (seg+1) << SID_SHIFT; + u16 areas = (unsigned long) parm; + unsigned long i, j; + + asm volatile("isync" : : : "memory"); + + BUILD_BUG_ON((sizeof(areas)*8) != NUM_HIGH_AREAS); + + for (i = 0; i < NUM_HIGH_AREAS; i++) { + if (! (areas & (1U << i))) + continue; + for (j = 0; j < (1UL << (HTLB_AREA_SHIFT-SID_SHIFT)); j++) + asm volatile("slbie %0" + :: "r" ((i << HTLB_AREA_SHIFT) + (j << SID_SHIFT))); + } + + asm volatile("isync" : : : "memory"); +} + +static int prepare_low_area_for_htlb(struct mm_struct *mm, unsigned long area) +{ + unsigned long start = area << SID_SHIFT; + unsigned long end = (area+1) << SID_SHIFT; struct vm_area_struct *vma; - BUG_ON(seg >= 16); + BUG_ON(area >= NUM_LOW_AREAS); /* Check no VMAs are in the region */ vma = find_vma(mm, start); @@ -161,20 +186,39 @@ static int prepare_low_seg_for_htlb(struct mm_struct *mm, unsigned long seg) return 0; } -static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) +static int prepare_high_area_for_htlb(struct mm_struct *mm, unsigned long area) +{ + unsigned long start = area << HTLB_AREA_SHIFT; + unsigned long end = (area+1) << HTLB_AREA_SHIFT; + struct vm_area_struct *vma; + + BUG_ON(area >= NUM_HIGH_AREAS); + + /* Check no VMAs are in the region */ + vma = find_vma(mm, start); + if (vma && (vma->vm_start < end)) + return -EBUSY; + + return 0; +} + +static int open_low_hpage_areas(struct mm_struct *mm, u16 newareas) { unsigned long i; - newsegs &= ~(mm->context.htlb_segs); - if (! newsegs) + BUILD_BUG_ON((sizeof(newareas)*8) != NUM_LOW_AREAS); + BUILD_BUG_ON((sizeof(mm->context.low_htlb_areas)*8) != NUM_LOW_AREAS); + + newareas &= ~(mm->context.low_htlb_areas); + if (! newareas) return 0; /* The segments we want are already open */ - for (i = 0; i < 16; i++) - if ((1 << i) & newsegs) - if (prepare_low_seg_for_htlb(mm, i) != 0) + for (i = 0; i < NUM_LOW_AREAS; i++) + if ((1 << i) & newareas) + if (prepare_low_area_for_htlb(mm, i) != 0) return -EBUSY; - mm->context.htlb_segs |= newsegs; + mm->context.low_htlb_areas |= newareas; /* update the paca copy of the context struct */ get_paca()->context = mm->context; @@ -182,29 +226,63 @@ static int open_low_hpage_segs(struct mm_struct *mm, u16 newsegs) /* the context change must make it to memory before the flush, * so that further SLB misses do the right thing. */ mb(); - on_each_cpu(flush_segments, (void *)(unsigned long)newsegs, 0, 1); + on_each_cpu(flush_low_segments, (void *)(unsigned long)newareas, 0, 1); + + return 0; +} + +static int open_high_hpage_areas(struct mm_struct *mm, u16 newareas) +{ + unsigned long i; + + BUILD_BUG_ON((sizeof(newareas)*8) != NUM_HIGH_AREAS); + BUILD_BUG_ON((sizeof(mm->context.high_htlb_areas)*8) + != NUM_HIGH_AREAS); + + newareas &= ~(mm->context.high_htlb_areas); + if (! newareas) + return 0; /* The areas we want are already open */ + + for (i = 0; i < NUM_HIGH_AREAS; i++) + if ((1 << i) & newareas) + if (prepare_high_area_for_htlb(mm, i) != 0) + return -EBUSY; + + mm->context.high_htlb_areas |= newareas; + + /* update the paca copy of the context struct */ + get_paca()->context = mm->context; + + /* the context change must make it to memory before the flush, + * so that further SLB misses do the right thing. */ + mb(); + on_each_cpu(flush_high_segments, (void *)(unsigned long)newareas, 0, 1); return 0; } int prepare_hugepage_range(unsigned long addr, unsigned long len) { - if (within_hugepage_high_range(addr, len)) - return 0; - else if ((addr < 0x100000000UL) && ((addr+len) < 0x100000000UL)) { - int err; - /* Yes, we need both tests, in case addr+len overflows - * 64-bit arithmetic */ - err = open_low_hpage_segs(current->mm, + int err; + + if ( (addr+len) < addr ) + return -EINVAL; + + if ((addr + len) < 0x100000000UL) + err = open_low_hpage_areas(current->mm, LOW_ESID_MASK(addr, len)); - if (err) - printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" - " failed (segs: 0x%04hx)\n", addr, len, - LOW_ESID_MASK(addr, len)); + else + err = open_high_hpage_areas(current->mm, + HTLB_AREA_MASK(addr, len)); + if (err) { + printk(KERN_DEBUG "prepare_hugepage_range(%lx, %lx)" + " failed (lowmask: 0x%04hx, highmask: 0x%04hx)\n", + addr, len, + LOW_ESID_MASK(addr, len), HTLB_AREA_MASK(addr, len)); return err; } - return -EINVAL; + return 0; } struct page * @@ -276,8 +354,8 @@ full_search: vma = find_vma(mm, addr); continue; } - if (touches_hugepage_high_range(addr, len)) { - addr = TASK_HPAGE_END; + if (touches_hugepage_high_range(mm, addr, len)) { + addr = ALIGN(addr+1, 1UL<mm, addr); - for (vma = find_vma(current->mm, addr); - addr + len <= TASK_HPAGE_END; - vma = vma->vm_next) { + while (addr + len <= TASK_SIZE_USER64) { BUG_ON(vma && (addr >= vma->vm_end)); /* invariant */ - BUG_ON(! within_hugepage_high_range(addr, len)); + + if (! __within_hugepage_high_range(addr, len, areamask)) { + addr = ALIGN(addr+1, 1UL<mm, addr); + continue; + } if (!vma || (addr + len) <= vma->vm_start) return addr; addr = ALIGN(vma->vm_end, HPAGE_SIZE); - /* Because we're in a hugepage region, this alignment - * should not skip us over any VMAs */ + /* Depending on segmask this might not be a confirmed + * hugepage region, so the ALIGN could have skipped + * some VMAs */ + vma = find_vma(current->mm, addr); } return -ENOMEM; @@ -474,6 +558,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags) { + int lastshift; + u16 areamask, curareas; + if (len & ~HPAGE_MASK) return -EINVAL; @@ -481,31 +568,49 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, return -EINVAL; if (test_thread_flag(TIF_32BIT)) { - int lastshift = 0; - u16 segmask, cursegs = current->mm->context.htlb_segs; + curareas = current->mm->context.low_htlb_areas; /* First see if we can do the mapping in the existing - * low hpage segments */ - addr = htlb_get_low_area(len, cursegs); + * low areas */ + addr = htlb_get_low_area(len, curareas); if (addr != -ENOMEM) return addr; - for (segmask = LOW_ESID_MASK(0x100000000UL-len, len); - ! lastshift; segmask >>=1) { - if (segmask & 1) + lastshift = 0; + for (areamask = LOW_ESID_MASK(0x100000000UL-len, len); + ! lastshift; areamask >>=1) { + if (areamask & 1) lastshift = 1; - addr = htlb_get_low_area(len, cursegs | segmask); + addr = htlb_get_low_area(len, curareas | areamask); if ((addr != -ENOMEM) - && open_low_hpage_segs(current->mm, segmask) == 0) + && open_low_hpage_areas(current->mm, areamask) == 0) return addr; } - printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" - " enough segments\n"); - return -ENOMEM; } else { - return htlb_get_high_area(len); + curareas = current->mm->context.high_htlb_areas; + + /* First see if we can do the mapping in the existing + * high areas */ + addr = htlb_get_high_area(len, curareas); + if (addr != -ENOMEM) + return addr; + + lastshift = 0; + for (areamask = HTLB_AREA_MASK(TASK_SIZE_USER64-len, len); + ! lastshift; areamask >>=1) { + if (areamask & 1) + lastshift = 1; + + addr = htlb_get_high_area(len, curareas | areamask); + if ((addr != -ENOMEM) + && open_high_hpage_areas(current->mm, areamask) == 0) + return addr; + } } + printk(KERN_DEBUG "hugetlb_get_unmapped_area() unable to open" + " enough areas\n"); + return -ENOMEM; } int hash_huge_page(struct mm_struct *mm, unsigned long access, diff --git a/arch/ppc64/mm/slb_low.S b/arch/ppc64/mm/slb_low.S index f20fc52483a7..bab255889c58 100644 --- a/arch/ppc64/mm/slb_low.S +++ b/arch/ppc64/mm/slb_low.S @@ -89,28 +89,29 @@ END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) b 9f 0: /* user address: proto-VSID = context<<15 | ESID */ - li r11,SLB_VSID_USER - srdi. r9,r3,USER_ESID_BITS bne- 8f /* invalid ea bits set */ #ifdef CONFIG_HUGETLB_PAGE BEGIN_FTR_SECTION - /* check against the hugepage ranges */ - cmpldi r3,(TASK_HPAGE_END>>SID_SHIFT) - bge 6f /* >= TASK_HPAGE_END */ - cmpldi r3,(TASK_HPAGE_BASE>>SID_SHIFT) - bge 5f /* TASK_HPAGE_BASE..TASK_HPAGE_END */ - cmpldi r3,16 - bge 6f /* 4GB..TASK_HPAGE_BASE */ + lhz r9,PACAHIGHHTLBAREAS(r13) + srdi r11,r3,(HTLB_AREA_SHIFT-SID_SHIFT) + srd r9,r9,r11 + andi. r9,r9,1 + bne 5f - lhz r9,PACAHTLBSEGS(r13) + li r11,SLB_VSID_USER + + cmpldi r3,16 + bge 6f + + lhz r9,PACALOWHTLBAREAS(r13) srd r9,r9,r3 andi. r9,r9,1 + beq 6f -5: /* this is a hugepage user address */ - li r11,(SLB_VSID_USER|SLB_VSID_L) +5: li r11,SLB_VSID_USER|SLB_VSID_L END_FTR_SECTION_IFSET(CPU_FTR_16M_PAGE) #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/asm-ppc64/mmu.h b/include/asm-ppc64/mmu.h index 789c2693483c..ad36bb28de29 100644 --- a/include/asm-ppc64/mmu.h +++ b/include/asm-ppc64/mmu.h @@ -307,7 +307,7 @@ typedef unsigned long mm_context_id_t; typedef struct { mm_context_id_t id; #ifdef CONFIG_HUGETLB_PAGE - u16 htlb_segs; /* bitmask */ + u16 low_htlb_areas, high_htlb_areas; #endif } mm_context_t; diff --git a/include/asm-ppc64/page.h b/include/asm-ppc64/page.h index 7e7b18ea986e..a79a08df62bd 100644 --- a/include/asm-ppc64/page.h +++ b/include/asm-ppc64/page.h @@ -37,40 +37,45 @@ #define HUGETLB_PAGE_ORDER (HPAGE_SHIFT - PAGE_SHIFT) -/* For 64-bit processes the hugepage range is 1T-1.5T */ -#define TASK_HPAGE_BASE ASM_CONST(0x0000010000000000) -#define TASK_HPAGE_END ASM_CONST(0x0000018000000000) +#define HTLB_AREA_SHIFT 40 +#define HTLB_AREA_SIZE (1UL << HTLB_AREA_SHIFT) +#define GET_HTLB_AREA(x) ((x) >> HTLB_AREA_SHIFT) #define LOW_ESID_MASK(addr, len) (((1U << (GET_ESID(addr+len-1)+1)) \ - (1U << GET_ESID(addr))) & 0xffff) +#define HTLB_AREA_MASK(addr, len) (((1U << (GET_HTLB_AREA(addr+len-1)+1)) \ + - (1U << GET_HTLB_AREA(addr))) & 0xffff) #define ARCH_HAS_HUGEPAGE_ONLY_RANGE #define ARCH_HAS_PREPARE_HUGEPAGE_RANGE #define ARCH_HAS_SETCLEAR_HUGE_PTE #define touches_hugepage_low_range(mm, addr, len) \ - (LOW_ESID_MASK((addr), (len)) & mm->context.htlb_segs) -#define touches_hugepage_high_range(addr, len) \ - (((addr) > (TASK_HPAGE_BASE-(len))) && ((addr) < TASK_HPAGE_END)) + (LOW_ESID_MASK((addr), (len)) & (mm)->context.low_htlb_areas) +#define touches_hugepage_high_range(mm, addr, len) \ + (HTLB_AREA_MASK((addr), (len)) & (mm)->context.high_htlb_areas) #define __within_hugepage_low_range(addr, len, segmask) \ ((LOW_ESID_MASK((addr), (len)) | (segmask)) == (segmask)) #define within_hugepage_low_range(addr, len) \ __within_hugepage_low_range((addr), (len), \ - current->mm->context.htlb_segs) -#define within_hugepage_high_range(addr, len) (((addr) >= TASK_HPAGE_BASE) \ - && ((addr)+(len) <= TASK_HPAGE_END) && ((addr)+(len) >= (addr))) + current->mm->context.low_htlb_areas) +#define __within_hugepage_high_range(addr, len, zonemask) \ + ((HTLB_AREA_MASK((addr), (len)) | (zonemask)) == (zonemask)) +#define within_hugepage_high_range(addr, len) \ + __within_hugepage_high_range((addr), (len), \ + current->mm->context.high_htlb_areas) #define is_hugepage_only_range(mm, addr, len) \ - (touches_hugepage_high_range((addr), (len)) || \ + (touches_hugepage_high_range((mm), (addr), (len)) || \ touches_hugepage_low_range((mm), (addr), (len))) #define HAVE_ARCH_HUGETLB_UNMAPPED_AREA #define in_hugepage_area(context, addr) \ (cpu_has_feature(CPU_FTR_16M_PAGE) && \ - ( (((addr) >= TASK_HPAGE_BASE) && ((addr) < TASK_HPAGE_END)) || \ + ( ((1 << GET_HTLB_AREA(addr)) & (context).high_htlb_areas) || \ ( ((addr) < 0x100000000L) && \ - ((1 << GET_ESID(addr)) & (context).htlb_segs) ) ) ) + ((1 << GET_ESID(addr)) & (context).low_htlb_areas) ) ) ) #else /* !CONFIG_HUGETLB_PAGE */