1
0
Fork 0
alistair23-linux/arch/powerpc/kernel/vdso.c

829 lines
20 KiB
C
Raw Normal View History

/*
* Copyright (C) 2004 Benjamin Herrenschmidt, IBM Corp.
* <benh@kernel.crashing.org>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/errno.h>
#include <linux/sched.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/smp.h>
#include <linux/stddef.h>
#include <linux/unistd.h>
#include <linux/slab.h>
#include <linux/user.h>
#include <linux/elf.h>
#include <linux/security.h>
#include <linux/memblock.h>
#include <asm/cpu_has_feature.h>
#include <asm/pgtable.h>
#include <asm/processor.h>
#include <asm/mmu.h>
#include <asm/mmu_context.h>
#include <asm/prom.h>
#include <asm/machdep.h>
#include <asm/cputable.h>
#include <asm/sections.h>
#include <asm/firmware.h>
#include <asm/vdso.h>
#include <asm/vdso_datapage.h>
#include <asm/setup.h>
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
#undef DEBUG
#ifdef DEBUG
#define DBG(fmt...) printk(fmt)
#else
#define DBG(fmt...)
#endif
/* Max supported size for symbol names */
#define MAX_SYMNAME 64
/* The alignment of the vDSO */
#define VDSO_ALIGNMENT (1 << 16)
static unsigned int vdso32_pages;
static void *vdso32_kbase;
static struct page **vdso32_pagelist;
unsigned long vdso32_sigtramp;
unsigned long vdso32_rt_sigtramp;
#ifdef CONFIG_VDSO32
extern char vdso32_start, vdso32_end;
#endif
#ifdef CONFIG_PPC64
extern char vdso64_start, vdso64_end;
static void *vdso64_kbase = &vdso64_start;
static unsigned int vdso64_pages;
static struct page **vdso64_pagelist;
unsigned long vdso64_rt_sigtramp;
#endif /* CONFIG_PPC64 */
static int vdso_ready;
/*
* The vdso data page (aka. systemcfg for old ppc64 fans) is here.
* Once the early boot kernel code no longer needs to muck around
* with it, it will become dynamically allocated
*/
static union {
struct vdso_data data;
u8 page[PAGE_SIZE];
} vdso_data_store __page_aligned_data;
struct vdso_data *vdso_data = &vdso_data_store.data;
/* Format of the patch table */
struct vdso_patch_def
{
unsigned long ftr_mask, ftr_value;
const char *gen_name;
const char *fix_name;
};
/* Table of functions to patch based on the CPU type/revision
*
* Currently, we only change sync_dicache to do nothing on processors
* with a coherent icache
*/
static struct vdso_patch_def vdso_patches[] = {
{
CPU_FTR_COHERENT_ICACHE, CPU_FTR_COHERENT_ICACHE,
"__kernel_sync_dicache", "__kernel_sync_dicache_p5"
},
#ifdef CONFIG_PPC32
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_gettimeofday", NULL
},
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_gettime", NULL
},
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_clock_getres", NULL
},
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_get_tbfreq", NULL
},
{
CPU_FTR_USE_RTC, CPU_FTR_USE_RTC,
"__kernel_time", NULL
},
#endif
};
/*
* Some infos carried around for each of them during parsing at
* boot time.
*/
struct lib32_elfinfo
{
Elf32_Ehdr *hdr; /* ptr to ELF */
Elf32_Sym *dynsym; /* ptr to .dynsym section */
unsigned long dynsymsize; /* size of .dynsym section */
char *dynstr; /* ptr to .dynstr section */
unsigned long text; /* offset of .text section in .so */
};
struct lib64_elfinfo
{
Elf64_Ehdr *hdr;
Elf64_Sym *dynsym;
unsigned long dynsymsize;
char *dynstr;
unsigned long text;
};
/*
* This is called from binfmt_elf, we create the special vma for the
* vDSO and insert it into the mm struct tree
*/
int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
{
struct mm_struct *mm = current->mm;
struct page **vdso_pagelist;
unsigned long vdso_pages;
unsigned long vdso_base;
int rc;
if (!vdso_ready)
return 0;
#ifdef CONFIG_PPC64
if (is_32bit_task()) {
vdso_pagelist = vdso32_pagelist;
vdso_pages = vdso32_pages;
vdso_base = VDSO32_MBASE;
} else {
vdso_pagelist = vdso64_pagelist;
vdso_pages = vdso64_pages;
powerpc: Move 64bit VDSO to improve context switch performance On 64bit applications the VDSO is the only thing in segment 0. Since the VDSO is position independent we can remove the hint and let get_unmapped_area pick an area. This will mean the vdso will be near other mmaps and will share an SLB entry: 10000000-10001000 r-xp 00000000 08:06 5778459 /root/context_switch_64 10010000-10011000 r--p 00000000 08:06 5778459 /root/context_switch_64 10011000-10012000 rw-p 00001000 08:06 5778459 /root/context_switch_64 fffa92ae000-fffa92b0000 rw-p 00000000 00:00 0 fffa92b0000-fffa9453000 r-xp 00000000 08:06 4334051 /lib64/power6/libc-2.9.so fffa9453000-fffa9462000 ---p 001a3000 08:06 4334051 /lib64/power6/libc-2.9.so fffa9462000-fffa9466000 r--p 001a2000 08:06 4334051 /lib64/power6/libc-2.9.so fffa9466000-fffa947c000 rw-p 001a6000 08:06 4334051 /lib64/power6/libc-2.9.so fffa947c000-fffa9480000 rw-p 00000000 00:00 0 fffa9480000-fffa94a8000 r-xp 00000000 08:06 4333852 /lib64/ld-2.9.so fffa94b3000-fffa94b4000 rw-p 00000000 00:00 0 fffa94b4000-fffa94b7000 r-xp 00000000 00:00 0 [vdso] <----- here I am fffa94b7000-fffa94b8000 r--p 00027000 08:06 4333852 /lib64/ld-2.9.so fffa94b8000-fffa94bb000 rw-p 00028000 08:06 4333852 /lib64/ld-2.9.so fffa94bb000-fffa94bc000 rw-p 00000000 00:00 0 fffe4c10000-fffe4c25000 rw-p 00000000 00:00 0 [stack] On a microbenchmark that bounces a token between two 64bit processes over pipes and calls gettimeofday each iteration (to access the VDSO), our context switch rate goes from 268k to 277k ctx switches/sec (tested on a 4GHz POWER6). Signed-off-by: Anton Blanchard <anton@samba.org> Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
2009-07-13 14:53:51 -06:00
/*
* On 64bit we don't have a preferred map address. This
* allows get_unmapped_area to find an area near other mmaps
* and most likely share a SLB entry.
*/
vdso_base = 0;
}
#else
vdso_pagelist = vdso32_pagelist;
vdso_pages = vdso32_pages;
vdso_base = VDSO32_MBASE;
#endif
current->mm->context.vdso_base = 0;
/* vDSO has a problem and was disabled, just don't "enable" it for the
* process
*/
if (vdso_pages == 0)
return 0;
/* Add a page to the vdso size for the data page */
vdso_pages ++;
/*
* pick a base address for the vDSO in process space. We try to put it
* at vdso_base which is the "natural" base for it, but we might fail
* and end up putting it elsewhere.
* Add enough to the size so that the result can be aligned.
*/
if (down_write_killable(&mm->mmap_sem))
return -EINTR;
vdso_base = get_unmapped_area(NULL, vdso_base,
(vdso_pages << PAGE_SHIFT) +
((VDSO_ALIGNMENT - 1) & PAGE_MASK),
0, 0);
if (IS_ERR_VALUE(vdso_base)) {
rc = vdso_base;
goto fail_mmapsem;
}
/* Add required alignment. */
vdso_base = ALIGN(vdso_base, VDSO_ALIGNMENT);
/*
* Put vDSO base into mm struct. We need to do this before calling
* install_special_mapping or the perf counter mmap tracking code
* will fail to recognise it as a vDSO (since arch_vma_name fails).
*/
current->mm->context.vdso_base = vdso_base;
/*
* our vma flags don't have VM_WRITE so by default, the process isn't
* allowed to write those pages.
* gdb can break that with ptrace interface, and thus trigger COW on
* those pages but it's then your responsibility to never do that on
* the "data" page of the vDSO or you'll stop getting kernel updates
* and your nice userland gettimeofday will be totally dead.
* It's fine to use that for setting breakpoints in the vDSO code
coredump: remove VM_ALWAYSDUMP flag The motivation for this patchset was that I was looking at a way for a qemu-kvm process, to exclude the guest memory from its core dump, which can be quite large. There are already a number of filter flags in /proc/<pid>/coredump_filter, however, these allow one to specify 'types' of kernel memory, not specific address ranges (which is needed in this case). Since there are no more vma flags available, the first patch eliminates the need for the 'VM_ALWAYSDUMP' flag. The flag is used internally by the kernel to mark vdso and vsyscall pages. However, it is simple enough to check if a vma covers a vdso or vsyscall page without the need for this flag. The second patch then replaces the 'VM_ALWAYSDUMP' flag with a new 'VM_NODUMP' flag, which can be set by userspace using new madvise flags: 'MADV_DONTDUMP', and unset via 'MADV_DODUMP'. The core dump filters continue to work the same as before unless 'MADV_DONTDUMP' is set on the region. The qemu code which implements this features is at: http://people.redhat.com/~jbaron/qemu-dump/qemu-dump.patch In my testing the qemu core dump shrunk from 383MB -> 13MB with this patch. I also believe that the 'MADV_DONTDUMP' flag might be useful for security sensitive apps, which might want to select which areas are dumped. This patch: The VM_ALWAYSDUMP flag is currently used by the coredump code to indicate that a vma is part of a vsyscall or vdso section. However, we can determine if a vma is in one these sections by checking it against the gate_vma and checking for a non-NULL return value from arch_vma_name(). Thus, freeing a valuable vma bit. Signed-off-by: Jason Baron <jbaron@redhat.com> Acked-by: Roland McGrath <roland@hack.frob.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-03-23 16:02:51 -06:00
* pages though.
*/
rc = install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
VM_READ|VM_EXEC|
coredump: remove VM_ALWAYSDUMP flag The motivation for this patchset was that I was looking at a way for a qemu-kvm process, to exclude the guest memory from its core dump, which can be quite large. There are already a number of filter flags in /proc/<pid>/coredump_filter, however, these allow one to specify 'types' of kernel memory, not specific address ranges (which is needed in this case). Since there are no more vma flags available, the first patch eliminates the need for the 'VM_ALWAYSDUMP' flag. The flag is used internally by the kernel to mark vdso and vsyscall pages. However, it is simple enough to check if a vma covers a vdso or vsyscall page without the need for this flag. The second patch then replaces the 'VM_ALWAYSDUMP' flag with a new 'VM_NODUMP' flag, which can be set by userspace using new madvise flags: 'MADV_DONTDUMP', and unset via 'MADV_DODUMP'. The core dump filters continue to work the same as before unless 'MADV_DONTDUMP' is set on the region. The qemu code which implements this features is at: http://people.redhat.com/~jbaron/qemu-dump/qemu-dump.patch In my testing the qemu core dump shrunk from 383MB -> 13MB with this patch. I also believe that the 'MADV_DONTDUMP' flag might be useful for security sensitive apps, which might want to select which areas are dumped. This patch: The VM_ALWAYSDUMP flag is currently used by the coredump code to indicate that a vma is part of a vsyscall or vdso section. However, we can determine if a vma is in one these sections by checking it against the gate_vma and checking for a non-NULL return value from arch_vma_name(). Thus, freeing a valuable vma bit. Signed-off-by: Jason Baron <jbaron@redhat.com> Acked-by: Roland McGrath <roland@hack.frob.com> Cc: Chris Metcalf <cmetcalf@tilera.com> Cc: Avi Kivity <avi@redhat.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2012-03-23 16:02:51 -06:00
VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
vdso_pagelist);
if (rc) {
current->mm->context.vdso_base = 0;
goto fail_mmapsem;
}
up_write(&mm->mmap_sem);
return 0;
fail_mmapsem:
up_write(&mm->mmap_sem);
return rc;
}
const char *arch_vma_name(struct vm_area_struct *vma)
{
if (vma->vm_mm && vma->vm_start == vma->vm_mm->context.vdso_base)
return "[vdso]";
return NULL;
}
#ifdef CONFIG_VDSO32
static void * __init find_section32(Elf32_Ehdr *ehdr, const char *secname,
unsigned long *size)
{
Elf32_Shdr *sechdrs;
unsigned int i;
char *secnames;
/* Grab section headers and strings so we can tell who is who */
sechdrs = (void *)ehdr + ehdr->e_shoff;
secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
/* Find the section they want */
for (i = 1; i < ehdr->e_shnum; i++) {
if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
if (size)
*size = sechdrs[i].sh_size;
return (void *)ehdr + sechdrs[i].sh_offset;
}
}
*size = 0;
return NULL;
}
static Elf32_Sym * __init find_symbol32(struct lib32_elfinfo *lib,
const char *symname)
{
unsigned int i;
char name[MAX_SYMNAME], *c;
for (i = 0; i < (lib->dynsymsize / sizeof(Elf32_Sym)); i++) {
if (lib->dynsym[i].st_name == 0)
continue;
strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
MAX_SYMNAME);
c = strchr(name, '@');
if (c)
*c = 0;
if (strcmp(symname, name) == 0)
return &lib->dynsym[i];
}
return NULL;
}
/* Note that we assume the section is .text and the symbol is relative to
* the library base
*/
static unsigned long __init find_function32(struct lib32_elfinfo *lib,
const char *symname)
{
Elf32_Sym *sym = find_symbol32(lib, symname);
if (sym == NULL) {
printk(KERN_WARNING "vDSO32: function %s not found !\n",
symname);
return 0;
}
return sym->st_value - VDSO32_LBASE;
}
static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64,
const char *orig, const char *fix)
{
Elf32_Sym *sym32_gen, *sym32_fix;
sym32_gen = find_symbol32(v32, orig);
if (sym32_gen == NULL) {
printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", orig);
return -1;
}
if (fix == NULL) {
sym32_gen->st_name = 0;
return 0;
}
sym32_fix = find_symbol32(v32, fix);
if (sym32_fix == NULL) {
printk(KERN_ERR "vDSO32: Can't find symbol %s !\n", fix);
return -1;
}
sym32_gen->st_value = sym32_fix->st_value;
sym32_gen->st_size = sym32_fix->st_size;
sym32_gen->st_info = sym32_fix->st_info;
sym32_gen->st_other = sym32_fix->st_other;
sym32_gen->st_shndx = sym32_fix->st_shndx;
return 0;
}
#else /* !CONFIG_VDSO32 */
static unsigned long __init find_function32(struct lib32_elfinfo *lib,
const char *symname)
{
return 0;
}
static int __init vdso_do_func_patch32(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64,
const char *orig, const char *fix)
{
return 0;
}
#endif /* CONFIG_VDSO32 */
#ifdef CONFIG_PPC64
static void * __init find_section64(Elf64_Ehdr *ehdr, const char *secname,
unsigned long *size)
{
Elf64_Shdr *sechdrs;
unsigned int i;
char *secnames;
/* Grab section headers and strings so we can tell who is who */
sechdrs = (void *)ehdr + ehdr->e_shoff;
secnames = (void *)ehdr + sechdrs[ehdr->e_shstrndx].sh_offset;
/* Find the section they want */
for (i = 1; i < ehdr->e_shnum; i++) {
if (strcmp(secnames+sechdrs[i].sh_name, secname) == 0) {
if (size)
*size = sechdrs[i].sh_size;
return (void *)ehdr + sechdrs[i].sh_offset;
}
}
if (size)
*size = 0;
return NULL;
}
static Elf64_Sym * __init find_symbol64(struct lib64_elfinfo *lib,
const char *symname)
{
unsigned int i;
char name[MAX_SYMNAME], *c;
for (i = 0; i < (lib->dynsymsize / sizeof(Elf64_Sym)); i++) {
if (lib->dynsym[i].st_name == 0)
continue;
strlcpy(name, lib->dynstr + lib->dynsym[i].st_name,
MAX_SYMNAME);
c = strchr(name, '@');
if (c)
*c = 0;
if (strcmp(symname, name) == 0)
return &lib->dynsym[i];
}
return NULL;
}
/* Note that we assume the section is .text and the symbol is relative to
* the library base
*/
static unsigned long __init find_function64(struct lib64_elfinfo *lib,
const char *symname)
{
Elf64_Sym *sym = find_symbol64(lib, symname);
if (sym == NULL) {
printk(KERN_WARNING "vDSO64: function %s not found !\n",
symname);
return 0;
}
#ifdef VDS64_HAS_DESCRIPTORS
return *((u64 *)(vdso64_kbase + sym->st_value - VDSO64_LBASE)) -
VDSO64_LBASE;
#else
return sym->st_value - VDSO64_LBASE;
#endif
}
static int __init vdso_do_func_patch64(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64,
const char *orig, const char *fix)
{
Elf64_Sym *sym64_gen, *sym64_fix;
sym64_gen = find_symbol64(v64, orig);
if (sym64_gen == NULL) {
printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", orig);
return -1;
}
if (fix == NULL) {
sym64_gen->st_name = 0;
return 0;
}
sym64_fix = find_symbol64(v64, fix);
if (sym64_fix == NULL) {
printk(KERN_ERR "vDSO64: Can't find symbol %s !\n", fix);
return -1;
}
sym64_gen->st_value = sym64_fix->st_value;
sym64_gen->st_size = sym64_fix->st_size;
sym64_gen->st_info = sym64_fix->st_info;
sym64_gen->st_other = sym64_fix->st_other;
sym64_gen->st_shndx = sym64_fix->st_shndx;
return 0;
}
#endif /* CONFIG_PPC64 */
static __init int vdso_do_find_sections(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
void *sect;
/*
* Locate symbol tables & text section
*/
#ifdef CONFIG_VDSO32
v32->dynsym = find_section32(v32->hdr, ".dynsym", &v32->dynsymsize);
v32->dynstr = find_section32(v32->hdr, ".dynstr", NULL);
if (v32->dynsym == NULL || v32->dynstr == NULL) {
printk(KERN_ERR "vDSO32: required symbol section not found\n");
return -1;
}
sect = find_section32(v32->hdr, ".text", NULL);
if (sect == NULL) {
printk(KERN_ERR "vDSO32: the .text section was not found\n");
return -1;
}
v32->text = sect - vdso32_kbase;
#endif
#ifdef CONFIG_PPC64
v64->dynsym = find_section64(v64->hdr, ".dynsym", &v64->dynsymsize);
v64->dynstr = find_section64(v64->hdr, ".dynstr", NULL);
if (v64->dynsym == NULL || v64->dynstr == NULL) {
printk(KERN_ERR "vDSO64: required symbol section not found\n");
return -1;
}
sect = find_section64(v64->hdr, ".text", NULL);
if (sect == NULL) {
printk(KERN_ERR "vDSO64: the .text section was not found\n");
return -1;
}
v64->text = sect - vdso64_kbase;
#endif /* CONFIG_PPC64 */
return 0;
}
static __init void vdso_setup_trampolines(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
/*
* Find signal trampolines
*/
#ifdef CONFIG_PPC64
vdso64_rt_sigtramp = find_function64(v64, "__kernel_sigtramp_rt64");
#endif
vdso32_sigtramp = find_function32(v32, "__kernel_sigtramp32");
vdso32_rt_sigtramp = find_function32(v32, "__kernel_sigtramp_rt32");
}
static __init int vdso_fixup_datapage(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
#ifdef CONFIG_VDSO32
Elf32_Sym *sym32;
#endif
#ifdef CONFIG_PPC64
Elf64_Sym *sym64;
sym64 = find_symbol64(v64, "__kernel_datapage_offset");
if (sym64 == NULL) {
printk(KERN_ERR "vDSO64: Can't find symbol "
"__kernel_datapage_offset !\n");
return -1;
}
*((int *)(vdso64_kbase + sym64->st_value - VDSO64_LBASE)) =
(vdso64_pages << PAGE_SHIFT) -
(sym64->st_value - VDSO64_LBASE);
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_VDSO32
sym32 = find_symbol32(v32, "__kernel_datapage_offset");
if (sym32 == NULL) {
printk(KERN_ERR "vDSO32: Can't find symbol "
"__kernel_datapage_offset !\n");
return -1;
}
*((int *)(vdso32_kbase + (sym32->st_value - VDSO32_LBASE))) =
(vdso32_pages << PAGE_SHIFT) -
(sym32->st_value - VDSO32_LBASE);
#endif
return 0;
}
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
static __init int vdso_fixup_features(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
unsigned long size;
void *start;
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
#ifdef CONFIG_PPC64
start = find_section64(v64->hdr, "__ftr_fixup", &size);
if (start)
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
do_feature_fixups(cur_cpu_spec->cpu_features,
start, start + size);
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
start = find_section64(v64->hdr, "__mmu_ftr_fixup", &size);
if (start)
do_feature_fixups(cur_cpu_spec->mmu_features,
start, start + size);
start = find_section64(v64->hdr, "__fw_ftr_fixup", &size);
if (start)
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
do_feature_fixups(powerpc_firmware_features,
start, start + size);
start = find_section64(v64->hdr, "__lwsync_fixup", &size);
if (start)
do_lwsync_fixups(cur_cpu_spec->cpu_features,
start, start + size);
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_VDSO32
start = find_section32(v32->hdr, "__ftr_fixup", &size);
if (start)
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
do_feature_fixups(cur_cpu_spec->cpu_features,
start, start + size);
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
start = find_section32(v32->hdr, "__mmu_ftr_fixup", &size);
if (start)
do_feature_fixups(cur_cpu_spec->mmu_features,
start, start + size);
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
#ifdef CONFIG_PPC64
start = find_section32(v32->hdr, "__fw_ftr_fixup", &size);
if (start)
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
do_feature_fixups(powerpc_firmware_features,
start, start + size);
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
#endif /* CONFIG_PPC64 */
start = find_section32(v32->hdr, "__lwsync_fixup", &size);
if (start)
do_lwsync_fixups(cur_cpu_spec->cpu_features,
start, start + size);
#endif
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
return 0;
}
static __init int vdso_fixup_alt_funcs(struct lib32_elfinfo *v32,
struct lib64_elfinfo *v64)
{
int i;
for (i = 0; i < ARRAY_SIZE(vdso_patches); i++) {
struct vdso_patch_def *patch = &vdso_patches[i];
int match = (cur_cpu_spec->cpu_features & patch->ftr_mask)
== patch->ftr_value;
if (!match)
continue;
DBG("replacing %s with %s...\n", patch->gen_name,
patch->fix_name ? "NONE" : patch->fix_name);
/*
* Patch the 32 bits and 64 bits symbols. Note that we do not
* patch the "." symbol on 64 bits.
* It would be easy to do, but doesn't seem to be necessary,
* patching the OPD symbol is enough.
*/
vdso_do_func_patch32(v32, v64, patch->gen_name,
patch->fix_name);
#ifdef CONFIG_PPC64
vdso_do_func_patch64(v32, v64, patch->gen_name,
patch->fix_name);
#endif /* CONFIG_PPC64 */
}
return 0;
}
static __init int vdso_setup(void)
{
struct lib32_elfinfo v32;
struct lib64_elfinfo v64;
v32.hdr = vdso32_kbase;
#ifdef CONFIG_PPC64
v64.hdr = vdso64_kbase;
#endif
if (vdso_do_find_sections(&v32, &v64))
return -1;
if (vdso_fixup_datapage(&v32, &v64))
return -1;
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
if (vdso_fixup_features(&v32, &v64))
return -1;
if (vdso_fixup_alt_funcs(&v32, &v64))
return -1;
vdso_setup_trampolines(&v32, &v64);
return 0;
}
/*
* Called from setup_arch to initialize the bitmap of available
* syscalls in the systemcfg page
*/
static void __init vdso_setup_syscall_map(void)
{
unsigned int i;
extern unsigned long *sys_call_table;
extern unsigned long sys_ni_syscall;
for (i = 0; i < NR_syscalls; i++) {
#ifdef CONFIG_PPC64
if (sys_call_table[i*2] != sys_ni_syscall)
vdso_data->syscall_map_64[i >> 5] |=
0x80000000UL >> (i & 0x1f);
if (sys_call_table[i*2+1] != sys_ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
#else /* CONFIG_PPC64 */
if (sys_call_table[i] != sys_ni_syscall)
vdso_data->syscall_map_32[i >> 5] |=
0x80000000UL >> (i & 0x1f);
#endif /* CONFIG_PPC64 */
}
}
#ifdef CONFIG_PPC64
int vdso_getcpu_init(void)
{
unsigned long cpu, node, val;
/*
powerpc/booke64: Use SPRG7 for VDSO Previously SPRG3 was marked for use by both VDSO and critical interrupts (though critical interrupts were not fully implemented). In commit 8b64a9dfb091f1eca8b7e58da82f1e7d1d5fe0ad ("powerpc/booke64: Use SPRG0/3 scratch for bolted TLB miss & crit int"), Mihai Caraman made an attempt to resolve this conflict by restoring the VDSO value early in the critical interrupt, but this has some issues: - It's incompatible with EXCEPTION_COMMON which restores r13 from the by-then-overwritten scratch (this cost me some debugging time). - It forces critical exceptions to be a special case handled differently from even machine check and debug level exceptions. - It didn't occur to me that it was possible to make this work at all (by doing a final "ld r13, PACA_EXCRIT+EX_R13(r13)") until after I made (most of) this patch. :-) It might be worth investigating using a load rather than SPRG on return from all exceptions (except TLB misses where the scratch never leaves the SPRG) -- it could save a few cycles. Until then, let's stick with SPRG for all exceptions. Since we cannot use SPRG4-7 for scratch without corrupting the state of a KVM guest, move VDSO to SPRG7 on book3e. Since neither SPRG4-7 nor critical interrupts exist on book3s, SPRG3 is still used for VDSO there. Signed-off-by: Scott Wood <scottwood@freescale.com> Cc: Mihai Caraman <mihai.caraman@freescale.com> Cc: Anton Blanchard <anton@samba.org> Cc: Paul Mackerras <paulus@samba.org> Cc: kvm-ppc@vger.kernel.org
2014-03-10 16:29:38 -06:00
* SPRG_VDSO contains the CPU in the bottom 16 bits and the NUMA node
* in the next 16 bits. The VDSO uses this to implement getcpu().
*/
cpu = get_cpu();
WARN_ON_ONCE(cpu > 0xffff);
node = cpu_to_node(cpu);
WARN_ON_ONCE(node > 0xffff);
val = (cpu & 0xfff) | ((node & 0xffff) << 16);
powerpc/booke64: Use SPRG7 for VDSO Previously SPRG3 was marked for use by both VDSO and critical interrupts (though critical interrupts were not fully implemented). In commit 8b64a9dfb091f1eca8b7e58da82f1e7d1d5fe0ad ("powerpc/booke64: Use SPRG0/3 scratch for bolted TLB miss & crit int"), Mihai Caraman made an attempt to resolve this conflict by restoring the VDSO value early in the critical interrupt, but this has some issues: - It's incompatible with EXCEPTION_COMMON which restores r13 from the by-then-overwritten scratch (this cost me some debugging time). - It forces critical exceptions to be a special case handled differently from even machine check and debug level exceptions. - It didn't occur to me that it was possible to make this work at all (by doing a final "ld r13, PACA_EXCRIT+EX_R13(r13)") until after I made (most of) this patch. :-) It might be worth investigating using a load rather than SPRG on return from all exceptions (except TLB misses where the scratch never leaves the SPRG) -- it could save a few cycles. Until then, let's stick with SPRG for all exceptions. Since we cannot use SPRG4-7 for scratch without corrupting the state of a KVM guest, move VDSO to SPRG7 on book3e. Since neither SPRG4-7 nor critical interrupts exist on book3s, SPRG3 is still used for VDSO there. Signed-off-by: Scott Wood <scottwood@freescale.com> Cc: Mihai Caraman <mihai.caraman@freescale.com> Cc: Anton Blanchard <anton@samba.org> Cc: Paul Mackerras <paulus@samba.org> Cc: kvm-ppc@vger.kernel.org
2014-03-10 16:29:38 -06:00
mtspr(SPRN_SPRG_VDSO_WRITE, val);
get_paca()->sprg_vdso = val;
put_cpu();
return 0;
}
/* We need to call this before SMP init */
early_initcall(vdso_getcpu_init);
#endif
static int __init vdso_init(void)
{
int i;
#ifdef CONFIG_PPC64
/*
* Fill up the "systemcfg" stuff for backward compatibility
*/
strcpy((char *)vdso_data->eye_catcher, "SYSTEMCFG:PPC64");
vdso_data->version.major = SYSTEMCFG_MAJOR;
vdso_data->version.minor = SYSTEMCFG_MINOR;
vdso_data->processor = mfspr(SPRN_PVR);
/*
* Fake the old platform number for pSeries and add
* in LPAR bit if necessary
*/
vdso_data->platform = 0x100;
if (firmware_has_feature(FW_FEATURE_LPAR))
vdso_data->platform |= 1;
vdso_data->physicalMemorySize = memblock_phys_mem_size();
vdso_data->dcache_size = ppc64_caches.l1d.size;
vdso_data->dcache_line_size = ppc64_caches.l1d.line_size;
vdso_data->icache_size = ppc64_caches.l1i.size;
vdso_data->icache_line_size = ppc64_caches.l1i.line_size;
vdso_data->dcache_block_size = ppc64_caches.l1d.block_size;
vdso_data->icache_block_size = ppc64_caches.l1i.block_size;
vdso_data->dcache_log_block_size = ppc64_caches.l1d.log_block_size;
vdso_data->icache_log_block_size = ppc64_caches.l1i.log_block_size;
/*
* Calculate the size of the 64 bits vDSO
*/
vdso64_pages = (&vdso64_end - &vdso64_start) >> PAGE_SHIFT;
DBG("vdso64_kbase: %p, 0x%x pages\n", vdso64_kbase, vdso64_pages);
#else
vdso_data->dcache_block_size = L1_CACHE_BYTES;
vdso_data->dcache_log_block_size = L1_CACHE_SHIFT;
vdso_data->icache_block_size = L1_CACHE_BYTES;
vdso_data->icache_log_block_size = L1_CACHE_SHIFT;
#endif /* CONFIG_PPC64 */
#ifdef CONFIG_VDSO32
vdso32_kbase = &vdso32_start;
/*
* Calculate the size of the 32 bits vDSO
*/
vdso32_pages = (&vdso32_end - &vdso32_start) >> PAGE_SHIFT;
DBG("vdso32_kbase: %p, 0x%x pages\n", vdso32_kbase, vdso32_pages);
#endif
/*
* Setup the syscall map in the vDOS
*/
vdso_setup_syscall_map();
[POWERPC] Support feature fixups in vdso's This patch reworks the feature fixup mecanism so vdso's can be fixed up. The main issue was that the construct: .long label (or .llong on 64 bits) will not work in the case of a shared library like the vdso. It will generate an empty placeholder in the fixup table along with a reloc, which is not something we can deal with in the vdso. The idea here (thanks Alan Modra !) is to instead use something like: 1: .long label - 1b That is, the feature fixup tables no longer contain addresses of bits of code to patch, but offsets of such code from the fixup table entry itself. That is properly resolved by ld when building the .so's. I've modified the fixup mecanism generically to use that method for the rest of the kernel as well. Another trick is that the 32 bits vDSO included in the 64 bits kernel need to have a table in the 64 bits format. However, gas does not support 32 bits code with a statement of the form: .llong label - 1b (Or even just .llong label) That is, it cannot emit the right fixup/relocation for the linker to use to assign a 32 bits address to an .llong field. Thus, in the specific case of the 32 bits vdso built as part of the 64 bits kernel, we are using a modified macro that generates: .long 0xffffffff .llong label - 1b Note that is assumes that the value is negative which is enforced by the .lds (those offsets are always negative as the .text is always before the fixup table and gas doesn't support emiting the reloc the other way around). Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org> Signed-off-by: Paul Mackerras <paulus@samba.org>
2006-10-19 19:47:18 -06:00
/*
* Initialize the vDSO images in memory, that is do necessary
* fixups of vDSO symbols, locate trampolines, etc...
*/
if (vdso_setup()) {
printk(KERN_ERR "vDSO setup failure, not enabled !\n");
vdso32_pages = 0;
#ifdef CONFIG_PPC64
vdso64_pages = 0;
#endif
return 0;
}
#ifdef CONFIG_VDSO32
/* Make sure pages are in the correct state */
treewide: kzalloc() -> kcalloc() The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 15:03:40 -06:00
vdso32_pagelist = kcalloc(vdso32_pages + 2, sizeof(struct page *),
GFP_KERNEL);
BUG_ON(vdso32_pagelist == NULL);
for (i = 0; i < vdso32_pages; i++) {
struct page *pg = virt_to_page(vdso32_kbase + i*PAGE_SIZE);
ClearPageReserved(pg);
get_page(pg);
vdso32_pagelist[i] = pg;
}
vdso32_pagelist[i++] = virt_to_page(vdso_data);
vdso32_pagelist[i] = NULL;
#endif
#ifdef CONFIG_PPC64
treewide: kzalloc() -> kcalloc() The kzalloc() function has a 2-factor argument form, kcalloc(). This patch replaces cases of: kzalloc(a * b, gfp) with: kcalloc(a * b, gfp) as well as handling cases of: kzalloc(a * b * c, gfp) with: kzalloc(array3_size(a, b, c), gfp) as it's slightly less ugly than: kzalloc_array(array_size(a, b), c, gfp) This does, however, attempt to ignore constant size factors like: kzalloc(4 * 1024, gfp) though any constants defined via macros get caught up in the conversion. Any factors with a sizeof() of "unsigned char", "char", and "u8" were dropped, since they're redundant. The Coccinelle script used for this was: // Fix redundant parens around sizeof(). @@ type TYPE; expression THING, E; @@ ( kzalloc( - (sizeof(TYPE)) * E + sizeof(TYPE) * E , ...) | kzalloc( - (sizeof(THING)) * E + sizeof(THING) * E , ...) ) // Drop single-byte sizes and redundant parens. @@ expression COUNT; typedef u8; typedef __u8; @@ ( kzalloc( - sizeof(u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(__u8) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(unsigned char) * (COUNT) + COUNT , ...) | kzalloc( - sizeof(u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(__u8) * COUNT + COUNT , ...) | kzalloc( - sizeof(char) * COUNT + COUNT , ...) | kzalloc( - sizeof(unsigned char) * COUNT + COUNT , ...) ) // 2-factor product with sizeof(type/expression) and identifier or constant. @@ type TYPE; expression THING; identifier COUNT_ID; constant COUNT_CONST; @@ ( - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_ID) + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_ID + COUNT_ID, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (COUNT_CONST) + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * COUNT_CONST + COUNT_CONST, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_ID) + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_ID + COUNT_ID, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (COUNT_CONST) + COUNT_CONST, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * COUNT_CONST + COUNT_CONST, sizeof(THING) , ...) ) // 2-factor product, only identifiers. @@ identifier SIZE, COUNT; @@ - kzalloc + kcalloc ( - SIZE * COUNT + COUNT, SIZE , ...) // 3-factor product with 1 sizeof(type) or sizeof(expression), with // redundant parens removed. @@ expression THING; identifier STRIDE, COUNT; type TYPE; @@ ( kzalloc( - sizeof(TYPE) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(TYPE) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(TYPE)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * (COUNT) * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * (STRIDE) + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) | kzalloc( - sizeof(THING) * COUNT * STRIDE + array3_size(COUNT, STRIDE, sizeof(THING)) , ...) ) // 3-factor product with 2 sizeof(variable), with redundant parens removed. @@ expression THING1, THING2; identifier COUNT; type TYPE1, TYPE2; @@ ( kzalloc( - sizeof(TYPE1) * sizeof(TYPE2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(THING1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(THING1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * COUNT + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) | kzalloc( - sizeof(TYPE1) * sizeof(THING2) * (COUNT) + array3_size(COUNT, sizeof(TYPE1), sizeof(THING2)) , ...) ) // 3-factor product, only identifiers, with redundant parens removed. @@ identifier STRIDE, SIZE, COUNT; @@ ( kzalloc( - (COUNT) * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * STRIDE * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - (COUNT) * (STRIDE) * (SIZE) + array3_size(COUNT, STRIDE, SIZE) , ...) | kzalloc( - COUNT * STRIDE * SIZE + array3_size(COUNT, STRIDE, SIZE) , ...) ) // Any remaining multi-factor products, first at least 3-factor products, // when they're not all constants... @@ expression E1, E2, E3; constant C1, C2, C3; @@ ( kzalloc(C1 * C2 * C3, ...) | kzalloc( - (E1) * E2 * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * E3 + array3_size(E1, E2, E3) , ...) | kzalloc( - (E1) * (E2) * (E3) + array3_size(E1, E2, E3) , ...) | kzalloc( - E1 * E2 * E3 + array3_size(E1, E2, E3) , ...) ) // And then all remaining 2 factors products when they're not all constants, // keeping sizeof() as the second factor argument. @@ expression THING, E1, E2; type TYPE; constant C1, C2, C3; @@ ( kzalloc(sizeof(THING) * C2, ...) | kzalloc(sizeof(TYPE) * C2, ...) | kzalloc(C1 * C2 * C3, ...) | kzalloc(C1 * C2, ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * (E2) + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(TYPE) * E2 + E2, sizeof(TYPE) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * (E2) + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - sizeof(THING) * E2 + E2, sizeof(THING) , ...) | - kzalloc + kcalloc ( - (E1) * E2 + E1, E2 , ...) | - kzalloc + kcalloc ( - (E1) * (E2) + E1, E2 , ...) | - kzalloc + kcalloc ( - E1 * E2 + E1, E2 , ...) ) Signed-off-by: Kees Cook <keescook@chromium.org>
2018-06-12 15:03:40 -06:00
vdso64_pagelist = kcalloc(vdso64_pages + 2, sizeof(struct page *),
GFP_KERNEL);
BUG_ON(vdso64_pagelist == NULL);
for (i = 0; i < vdso64_pages; i++) {
struct page *pg = virt_to_page(vdso64_kbase + i*PAGE_SIZE);
ClearPageReserved(pg);
get_page(pg);
vdso64_pagelist[i] = pg;
}
vdso64_pagelist[i++] = virt_to_page(vdso_data);
vdso64_pagelist[i] = NULL;
#endif /* CONFIG_PPC64 */
get_page(virt_to_page(vdso_data));
smp_wmb();
vdso_ready = 1;
return 0;
}
arch_initcall(vdso_init);