Merge branch 'x86-kaslr-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 kernel address space randomization support from Peter Anvin:
 "This enables kernel address space randomization for x86"

* 'x86-kaslr-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86, kaslr: Clarify RANDOMIZE_BASE_MAX_OFFSET
  x86, kaslr: Remove unused including <linux/version.h>
  x86, kaslr: Use char array to gain sizeof sanity
  x86, kaslr: Add a circular multiply for better bit diffusion
  x86, kaslr: Mix entropy sources together as needed
  x86/relocs: Add percpu fixup for GNU ld 2.23
  x86, boot: Rename get_flags() and check_flags() to *_cpuflags()
  x86, kaslr: Raise the maximum virtual address to -1 GiB on x86_64
  x86, kaslr: Report kernel offset on panic
  x86, kaslr: Select random position from e820 maps
  x86, kaslr: Provide randomness functions
  x86, kaslr: Return location from decompress_kernel
  x86, boot: Move CPU flags out of cpucheck
  x86, relocs: Add more per-cpu gold special cases
This commit is contained in:
Linus Torvalds 2014-01-20 14:45:50 -08:00
commit f4bcd8ccdd
22 changed files with 654 additions and 158 deletions

View file

@ -2017,6 +2017,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
noapic [SMP,APIC] Tells the kernel to not make use of any noapic [SMP,APIC] Tells the kernel to not make use of any
IOAPICs that may be present in the system. IOAPICs that may be present in the system.
nokaslr [X86]
Disable kernel base offset ASLR (Address Space
Layout Randomization) if built into the kernel.
noautogroup Disable scheduler automatic task group creation. noautogroup Disable scheduler automatic task group creation.
nobats [PPC] Do not use BATs for mapping kernel lowmem nobats [PPC] Do not use BATs for mapping kernel lowmem

View file

@ -1693,16 +1693,67 @@ config RELOCATABLE
Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address Note: If CONFIG_RELOCATABLE=y, then the kernel runs from the address
it has been loaded at and the compile time physical address it has been loaded at and the compile time physical address
(CONFIG_PHYSICAL_START) is ignored. (CONFIG_PHYSICAL_START) is used as the minimum location.
# Relocation on x86-32 needs some additional build support config RANDOMIZE_BASE
bool "Randomize the address of the kernel image"
depends on RELOCATABLE
depends on !HIBERNATION
default n
---help---
Randomizes the physical and virtual address at which the
kernel image is decompressed, as a security feature that
deters exploit attempts relying on knowledge of the location
of kernel internals.
Entropy is generated using the RDRAND instruction if it is
supported. If RDTSC is supported, it is used as well. If
neither RDRAND nor RDTSC are supported, then randomness is
read from the i8254 timer.
The kernel will be offset by up to RANDOMIZE_BASE_MAX_OFFSET,
and aligned according to PHYSICAL_ALIGN. Since the kernel is
built using 2GiB addressing, and PHYSICAL_ALGIN must be at a
minimum of 2MiB, only 10 bits of entropy is theoretically
possible. At best, due to page table layouts, 64-bit can use
9 bits of entropy and 32-bit uses 8 bits.
If unsure, say N.
config RANDOMIZE_BASE_MAX_OFFSET
hex "Maximum kASLR offset allowed" if EXPERT
depends on RANDOMIZE_BASE
range 0x0 0x20000000 if X86_32
default "0x20000000" if X86_32
range 0x0 0x40000000 if X86_64
default "0x40000000" if X86_64
---help---
The lesser of RANDOMIZE_BASE_MAX_OFFSET and available physical
memory is used to determine the maximal offset in bytes that will
be applied to the kernel when kernel Address Space Layout
Randomization (kASLR) is active. This must be a multiple of
PHYSICAL_ALIGN.
On 32-bit this is limited to 512MiB by page table layouts. The
default is 512MiB.
On 64-bit this is limited by how the kernel fixmap page table is
positioned, so this cannot be larger than 1GiB currently. Without
RANDOMIZE_BASE, there is a 512MiB to 1.5GiB split between kernel
and modules. When RANDOMIZE_BASE_MAX_OFFSET is above 512MiB, the
modules area will shrink to compensate, up to the current maximum
1GiB to 1GiB split. The default is 1GiB.
If unsure, leave at the default value.
# Relocation on x86 needs some additional build support
config X86_NEED_RELOCS config X86_NEED_RELOCS
def_bool y def_bool y
depends on X86_32 && RELOCATABLE depends on RANDOMIZE_BASE || (X86_32 && RELOCATABLE)
config PHYSICAL_ALIGN config PHYSICAL_ALIGN
hex "Alignment value to which kernel should be aligned" hex "Alignment value to which kernel should be aligned"
default "0x1000000" default "0x200000"
range 0x2000 0x1000000 if X86_32 range 0x2000 0x1000000 if X86_32
range 0x200000 0x1000000 if X86_64 range 0x200000 0x1000000 if X86_64
---help--- ---help---

View file

@ -20,7 +20,7 @@ targets := vmlinux.bin setup.bin setup.elf bzImage
targets += fdimage fdimage144 fdimage288 image.iso mtools.conf targets += fdimage fdimage144 fdimage288 image.iso mtools.conf
subdir- := compressed subdir- := compressed
setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpucheck.o setup-y += a20.o bioscall.o cmdline.o copy.o cpu.o cpuflags.o cpucheck.o
setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o setup-y += early_serial_console.o edd.o header.o main.o mca.o memory.o
setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o setup-y += pm.o pmjump.o printf.o regs.o string.o tty.o video.o
setup-y += video-mode.o version.o setup-y += video-mode.o version.o

View file

@ -26,9 +26,8 @@
#include <asm/boot.h> #include <asm/boot.h>
#include <asm/setup.h> #include <asm/setup.h>
#include "bitops.h" #include "bitops.h"
#include <asm/cpufeature.h>
#include <asm/processor-flags.h>
#include "ctype.h" #include "ctype.h"
#include "cpuflags.h"
/* Useful macros */ /* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) #define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
@ -307,14 +306,7 @@ static inline int cmdline_find_option_bool(const char *option)
return __cmdline_find_option_bool(cmd_line_ptr, option); return __cmdline_find_option_bool(cmd_line_ptr, option);
} }
/* cpu.c, cpucheck.c */ /* cpu.c, cpucheck.c */
struct cpu_features {
int level; /* Family, or 64 for x86-64 */
int model;
u32 flags[NCAPINTS];
};
extern struct cpu_features cpu;
int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr); int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr);
int validate_cpu(void); int validate_cpu(void);

View file

@ -28,7 +28,7 @@ HOST_EXTRACFLAGS += -I$(srctree)/tools/include
VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \ VMLINUX_OBJS = $(obj)/vmlinux.lds $(obj)/head_$(BITS).o $(obj)/misc.o \
$(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \ $(obj)/string.o $(obj)/cmdline.o $(obj)/early_serial_console.o \
$(obj)/piggy.o $(obj)/piggy.o $(obj)/cpuflags.o $(obj)/aslr.o
$(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone $(obj)/eboot.o: KBUILD_CFLAGS += -fshort-wchar -mno-red-zone

View file

@ -0,0 +1,316 @@
#include "misc.h"
#ifdef CONFIG_RANDOMIZE_BASE
#include <asm/msr.h>
#include <asm/archrandom.h>
#include <asm/e820.h>
#include <generated/compile.h>
#include <linux/module.h>
#include <linux/uts.h>
#include <linux/utsname.h>
#include <generated/utsrelease.h>
/* Simplified build-specific string for starting entropy. */
static const char build_str[] = UTS_RELEASE " (" LINUX_COMPILE_BY "@"
LINUX_COMPILE_HOST ") (" LINUX_COMPILER ") " UTS_VERSION;
#define I8254_PORT_CONTROL 0x43
#define I8254_PORT_COUNTER0 0x40
#define I8254_CMD_READBACK 0xC0
#define I8254_SELECT_COUNTER0 0x02
#define I8254_STATUS_NOTREADY 0x40
static inline u16 i8254(void)
{
u16 status, timer;
do {
outb(I8254_PORT_CONTROL,
I8254_CMD_READBACK | I8254_SELECT_COUNTER0);
status = inb(I8254_PORT_COUNTER0);
timer = inb(I8254_PORT_COUNTER0);
timer |= inb(I8254_PORT_COUNTER0) << 8;
} while (status & I8254_STATUS_NOTREADY);
return timer;
}
static unsigned long rotate_xor(unsigned long hash, const void *area,
size_t size)
{
size_t i;
unsigned long *ptr = (unsigned long *)area;
for (i = 0; i < size / sizeof(hash); i++) {
/* Rotate by odd number of bits and XOR. */
hash = (hash << ((sizeof(hash) * 8) - 7)) | (hash >> 7);
hash ^= ptr[i];
}
return hash;
}
/* Attempt to create a simple but unpredictable starting entropy. */
static unsigned long get_random_boot(void)
{
unsigned long hash = 0;
hash = rotate_xor(hash, build_str, sizeof(build_str));
hash = rotate_xor(hash, real_mode, sizeof(*real_mode));
return hash;
}
static unsigned long get_random_long(void)
{
#ifdef CONFIG_X86_64
const unsigned long mix_const = 0x5d6008cbf3848dd3UL;
#else
const unsigned long mix_const = 0x3f39e593UL;
#endif
unsigned long raw, random = get_random_boot();
bool use_i8254 = true;
debug_putstr("KASLR using");
if (has_cpuflag(X86_FEATURE_RDRAND)) {
debug_putstr(" RDRAND");
if (rdrand_long(&raw)) {
random ^= raw;
use_i8254 = false;
}
}
if (has_cpuflag(X86_FEATURE_TSC)) {
debug_putstr(" RDTSC");
rdtscll(raw);
random ^= raw;
use_i8254 = false;
}
if (use_i8254) {
debug_putstr(" i8254");
random ^= i8254();
}
/* Circular multiply for better bit diffusion */
asm("mul %3"
: "=a" (random), "=d" (raw)
: "a" (random), "rm" (mix_const));
random += raw;
debug_putstr("...\n");
return random;
}
struct mem_vector {
unsigned long start;
unsigned long size;
};
#define MEM_AVOID_MAX 5
struct mem_vector mem_avoid[MEM_AVOID_MAX];
static bool mem_contains(struct mem_vector *region, struct mem_vector *item)
{
/* Item at least partially before region. */
if (item->start < region->start)
return false;
/* Item at least partially after region. */
if (item->start + item->size > region->start + region->size)
return false;
return true;
}
static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two)
{
/* Item one is entirely before item two. */
if (one->start + one->size <= two->start)
return false;
/* Item one is entirely after item two. */
if (one->start >= two->start + two->size)
return false;
return true;
}
static void mem_avoid_init(unsigned long input, unsigned long input_size,
unsigned long output, unsigned long output_size)
{
u64 initrd_start, initrd_size;
u64 cmd_line, cmd_line_size;
unsigned long unsafe, unsafe_len;
char *ptr;
/*
* Avoid the region that is unsafe to overlap during
* decompression (see calculations at top of misc.c).
*/
unsafe_len = (output_size >> 12) + 32768 + 18;
unsafe = (unsigned long)input + input_size - unsafe_len;
mem_avoid[0].start = unsafe;
mem_avoid[0].size = unsafe_len;
/* Avoid initrd. */
initrd_start = (u64)real_mode->ext_ramdisk_image << 32;
initrd_start |= real_mode->hdr.ramdisk_image;
initrd_size = (u64)real_mode->ext_ramdisk_size << 32;
initrd_size |= real_mode->hdr.ramdisk_size;
mem_avoid[1].start = initrd_start;
mem_avoid[1].size = initrd_size;
/* Avoid kernel command line. */
cmd_line = (u64)real_mode->ext_cmd_line_ptr << 32;
cmd_line |= real_mode->hdr.cmd_line_ptr;
/* Calculate size of cmd_line. */
ptr = (char *)(unsigned long)cmd_line;
for (cmd_line_size = 0; ptr[cmd_line_size++]; )
;
mem_avoid[2].start = cmd_line;
mem_avoid[2].size = cmd_line_size;
/* Avoid heap memory. */
mem_avoid[3].start = (unsigned long)free_mem_ptr;
mem_avoid[3].size = BOOT_HEAP_SIZE;
/* Avoid stack memory. */
mem_avoid[4].start = (unsigned long)free_mem_end_ptr;
mem_avoid[4].size = BOOT_STACK_SIZE;
}
/* Does this memory vector overlap a known avoided area? */
bool mem_avoid_overlap(struct mem_vector *img)
{
int i;
for (i = 0; i < MEM_AVOID_MAX; i++) {
if (mem_overlaps(img, &mem_avoid[i]))
return true;
}
return false;
}
unsigned long slots[CONFIG_RANDOMIZE_BASE_MAX_OFFSET / CONFIG_PHYSICAL_ALIGN];
unsigned long slot_max = 0;
static void slots_append(unsigned long addr)
{
/* Overflowing the slots list should be impossible. */
if (slot_max >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET /
CONFIG_PHYSICAL_ALIGN)
return;
slots[slot_max++] = addr;
}
static unsigned long slots_fetch_random(void)
{
/* Handle case of no slots stored. */
if (slot_max == 0)
return 0;
return slots[get_random_long() % slot_max];
}
static void process_e820_entry(struct e820entry *entry,
unsigned long minimum,
unsigned long image_size)
{
struct mem_vector region, img;
/* Skip non-RAM entries. */
if (entry->type != E820_RAM)
return;
/* Ignore entries entirely above our maximum. */
if (entry->addr >= CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
return;
/* Ignore entries entirely below our minimum. */
if (entry->addr + entry->size < minimum)
return;
region.start = entry->addr;
region.size = entry->size;
/* Potentially raise address to minimum location. */
if (region.start < minimum)
region.start = minimum;
/* Potentially raise address to meet alignment requirements. */
region.start = ALIGN(region.start, CONFIG_PHYSICAL_ALIGN);
/* Did we raise the address above the bounds of this e820 region? */
if (region.start > entry->addr + entry->size)
return;
/* Reduce size by any delta from the original address. */
region.size -= region.start - entry->addr;
/* Reduce maximum size to fit end of image within maximum limit. */
if (region.start + region.size > CONFIG_RANDOMIZE_BASE_MAX_OFFSET)
region.size = CONFIG_RANDOMIZE_BASE_MAX_OFFSET - region.start;
/* Walk each aligned slot and check for avoided areas. */
for (img.start = region.start, img.size = image_size ;
mem_contains(&region, &img) ;
img.start += CONFIG_PHYSICAL_ALIGN) {
if (mem_avoid_overlap(&img))
continue;
slots_append(img.start);
}
}
static unsigned long find_random_addr(unsigned long minimum,
unsigned long size)
{
int i;
unsigned long addr;
/* Make sure minimum is aligned. */
minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN);
/* Verify potential e820 positions, appending to slots list. */
for (i = 0; i < real_mode->e820_entries; i++) {
process_e820_entry(&real_mode->e820_map[i], minimum, size);
}
return slots_fetch_random();
}
unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size)
{
unsigned long choice = (unsigned long)output;
unsigned long random;
if (cmdline_find_option_bool("nokaslr")) {
debug_putstr("KASLR disabled...\n");
goto out;
}
/* Record the various known unsafe memory ranges. */
mem_avoid_init((unsigned long)input, input_size,
(unsigned long)output, output_size);
/* Walk e820 and find a random address. */
random = find_random_addr(choice, output_size);
if (!random) {
debug_putstr("KASLR could not find suitable E820 region...\n");
goto out;
}
/* Always enforce the minimum. */
if (random < choice)
goto out;
choice = random;
out:
return (unsigned char *)choice;
}
#endif /* CONFIG_RANDOMIZE_BASE */

View file

@ -1,6 +1,6 @@
#include "misc.h" #include "misc.h"
#ifdef CONFIG_EARLY_PRINTK #if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
static unsigned long fs; static unsigned long fs;
static inline void set_fs(unsigned long seg) static inline void set_fs(unsigned long seg)

View file

@ -0,0 +1,12 @@
#ifdef CONFIG_RANDOMIZE_BASE
#include "../cpuflags.c"
bool has_cpuflag(int flag)
{
get_cpuflags();
return test_bit(flag, cpu.flags);
}
#endif

View file

@ -117,9 +117,11 @@ preferred_addr:
addl %eax, %ebx addl %eax, %ebx
notl %eax notl %eax
andl %eax, %ebx andl %eax, %ebx
#else cmpl $LOAD_PHYSICAL_ADDR, %ebx
movl $LOAD_PHYSICAL_ADDR, %ebx jge 1f
#endif #endif
movl $LOAD_PHYSICAL_ADDR, %ebx
1:
/* Target address to relocate to for decompression */ /* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx addl $z_extract_offset, %ebx
@ -191,14 +193,14 @@ relocated:
leal boot_heap(%ebx), %eax leal boot_heap(%ebx), %eax
pushl %eax /* heap area */ pushl %eax /* heap area */
pushl %esi /* real mode pointer */ pushl %esi /* real mode pointer */
call decompress_kernel call decompress_kernel /* returns kernel location in %eax */
addl $24, %esp addl $24, %esp
/* /*
* Jump to the decompressed kernel. * Jump to the decompressed kernel.
*/ */
xorl %ebx, %ebx xorl %ebx, %ebx
jmp *%ebp jmp *%eax
/* /*
* Stack and heap for uncompression * Stack and heap for uncompression

View file

@ -94,9 +94,11 @@ ENTRY(startup_32)
addl %eax, %ebx addl %eax, %ebx
notl %eax notl %eax
andl %eax, %ebx andl %eax, %ebx
#else cmpl $LOAD_PHYSICAL_ADDR, %ebx
movl $LOAD_PHYSICAL_ADDR, %ebx jge 1f
#endif #endif
movl $LOAD_PHYSICAL_ADDR, %ebx
1:
/* Target address to relocate to for decompression */ /* Target address to relocate to for decompression */
addl $z_extract_offset, %ebx addl $z_extract_offset, %ebx
@ -269,9 +271,11 @@ preferred_addr:
addq %rax, %rbp addq %rax, %rbp
notq %rax notq %rax
andq %rax, %rbp andq %rax, %rbp
#else cmpq $LOAD_PHYSICAL_ADDR, %rbp
movq $LOAD_PHYSICAL_ADDR, %rbp jge 1f
#endif #endif
movq $LOAD_PHYSICAL_ADDR, %rbp
1:
/* Target address to relocate to for decompression */ /* Target address to relocate to for decompression */
leaq z_extract_offset(%rbp), %rbx leaq z_extract_offset(%rbp), %rbx
@ -339,13 +343,13 @@ relocated:
movl $z_input_len, %ecx /* input_len */ movl $z_input_len, %ecx /* input_len */
movq %rbp, %r8 /* output target address */ movq %rbp, %r8 /* output target address */
movq $z_output_len, %r9 /* decompressed length */ movq $z_output_len, %r9 /* decompressed length */
call decompress_kernel call decompress_kernel /* returns kernel location in %rax */
popq %rsi popq %rsi
/* /*
* Jump to the decompressed kernel. * Jump to the decompressed kernel.
*/ */
jmp *%rbp jmp *%rax
.code32 .code32
no_longmode: no_longmode:

View file

@ -112,14 +112,8 @@ struct boot_params *real_mode; /* Pointer to real-mode data */
void *memset(void *s, int c, size_t n); void *memset(void *s, int c, size_t n);
void *memcpy(void *dest, const void *src, size_t n); void *memcpy(void *dest, const void *src, size_t n);
#ifdef CONFIG_X86_64 memptr free_mem_ptr;
#define memptr long memptr free_mem_end_ptr;
#else
#define memptr unsigned
#endif
static memptr free_mem_ptr;
static memptr free_mem_end_ptr;
static char *vidmem; static char *vidmem;
static int vidport; static int vidport;
@ -395,7 +389,7 @@ static void parse_elf(void *output)
free(phdrs); free(phdrs);
} }
asmlinkage void decompress_kernel(void *rmode, memptr heap, asmlinkage void *decompress_kernel(void *rmode, memptr heap,
unsigned char *input_data, unsigned char *input_data,
unsigned long input_len, unsigned long input_len,
unsigned char *output, unsigned char *output,
@ -422,6 +416,10 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
free_mem_ptr = heap; /* Heap */ free_mem_ptr = heap; /* Heap */
free_mem_end_ptr = heap + BOOT_HEAP_SIZE; free_mem_end_ptr = heap + BOOT_HEAP_SIZE;
output = choose_kernel_location(input_data, input_len,
output, output_len);
/* Validate memory location choices. */
if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1)) if ((unsigned long)output & (MIN_KERNEL_ALIGN - 1))
error("Destination address inappropriately aligned"); error("Destination address inappropriately aligned");
#ifdef CONFIG_X86_64 #ifdef CONFIG_X86_64
@ -441,5 +439,5 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap,
parse_elf(output); parse_elf(output);
handle_relocations(output, output_len); handle_relocations(output, output_len);
debug_putstr("done.\nBooting the kernel.\n"); debug_putstr("done.\nBooting the kernel.\n");
return; return output;
} }

View file

@ -23,7 +23,15 @@
#define BOOT_BOOT_H #define BOOT_BOOT_H
#include "../ctype.h" #include "../ctype.h"
#ifdef CONFIG_X86_64
#define memptr long
#else
#define memptr unsigned
#endif
/* misc.c */ /* misc.c */
extern memptr free_mem_ptr;
extern memptr free_mem_end_ptr;
extern struct boot_params *real_mode; /* Pointer to real-mode data */ extern struct boot_params *real_mode; /* Pointer to real-mode data */
void __putstr(const char *s); void __putstr(const char *s);
#define error_putstr(__x) __putstr(__x) #define error_putstr(__x) __putstr(__x)
@ -39,23 +47,40 @@ static inline void debug_putstr(const char *s)
#endif #endif
#ifdef CONFIG_EARLY_PRINTK #if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
/* cmdline.c */ /* cmdline.c */
int cmdline_find_option(const char *option, char *buffer, int bufsize); int cmdline_find_option(const char *option, char *buffer, int bufsize);
int cmdline_find_option_bool(const char *option); int cmdline_find_option_bool(const char *option);
#endif
#if CONFIG_RANDOMIZE_BASE
/* aslr.c */
unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size);
/* cpuflags.c */
bool has_cpuflag(int flag);
#else
static inline
unsigned char *choose_kernel_location(unsigned char *input,
unsigned long input_size,
unsigned char *output,
unsigned long output_size)
{
return output;
}
#endif
#ifdef CONFIG_EARLY_PRINTK
/* early_serial_console.c */ /* early_serial_console.c */
extern int early_serial_base; extern int early_serial_base;
void console_init(void); void console_init(void);
#else #else
/* early_serial_console.c */
static const int early_serial_base; static const int early_serial_base;
static inline void console_init(void) static inline void console_init(void)
{ } { }
#endif #endif
#endif #endif

View file

@ -28,8 +28,6 @@
#include <asm/required-features.h> #include <asm/required-features.h>
#include <asm/msr-index.h> #include <asm/msr-index.h>
struct cpu_features cpu;
static u32 cpu_vendor[3];
static u32 err_flags[NCAPINTS]; static u32 err_flags[NCAPINTS];
static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY; static const int req_level = CONFIG_X86_MINIMUM_CPU_FAMILY;
@ -69,92 +67,8 @@ static int is_transmeta(void)
cpu_vendor[2] == A32('M', 'x', '8', '6'); cpu_vendor[2] == A32('M', 'x', '8', '6');
} }
static int has_fpu(void)
{
u16 fcw = -1, fsw = -1;
u32 cr0;
asm("movl %%cr0,%0" : "=r" (cr0));
if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
asm volatile("movl %0,%%cr0" : : "r" (cr0));
}
asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
: "+m" (fsw), "+m" (fcw));
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}
static int has_eflag(u32 mask)
{
u32 f0, f1;
asm("pushfl ; "
"pushfl ; "
"popl %0 ; "
"movl %0,%1 ; "
"xorl %2,%1 ; "
"pushl %1 ; "
"popfl ; "
"pushfl ; "
"popl %1 ; "
"popfl"
: "=&r" (f0), "=&r" (f1)
: "ri" (mask));
return !!((f0^f1) & mask);
}
static void get_flags(void)
{
u32 max_intel_level, max_amd_level;
u32 tfms;
if (has_fpu())
set_bit(X86_FEATURE_FPU, cpu.flags);
if (has_eflag(X86_EFLAGS_ID)) {
asm("cpuid"
: "=a" (max_intel_level),
"=b" (cpu_vendor[0]),
"=d" (cpu_vendor[1]),
"=c" (cpu_vendor[2])
: "a" (0));
if (max_intel_level >= 0x00000001 &&
max_intel_level <= 0x0000ffff) {
asm("cpuid"
: "=a" (tfms),
"=c" (cpu.flags[4]),
"=d" (cpu.flags[0])
: "a" (0x00000001)
: "ebx");
cpu.level = (tfms >> 8) & 15;
cpu.model = (tfms >> 4) & 15;
if (cpu.level >= 6)
cpu.model += ((tfms >> 16) & 0xf) << 4;
}
asm("cpuid"
: "=a" (max_amd_level)
: "a" (0x80000000)
: "ebx", "ecx", "edx");
if (max_amd_level >= 0x80000001 &&
max_amd_level <= 0x8000ffff) {
u32 eax = 0x80000001;
asm("cpuid"
: "+a" (eax),
"=c" (cpu.flags[6]),
"=d" (cpu.flags[1])
: : "ebx");
}
}
}
/* Returns a bitmask of which words we have error bits in */ /* Returns a bitmask of which words we have error bits in */
static int check_flags(void) static int check_cpuflags(void)
{ {
u32 err; u32 err;
int i; int i;
@ -187,8 +101,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
if (has_eflag(X86_EFLAGS_AC)) if (has_eflag(X86_EFLAGS_AC))
cpu.level = 4; cpu.level = 4;
get_flags(); get_cpuflags();
err = check_flags(); err = check_cpuflags();
if (test_bit(X86_FEATURE_LM, cpu.flags)) if (test_bit(X86_FEATURE_LM, cpu.flags))
cpu.level = 64; cpu.level = 64;
@ -207,8 +121,8 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
eax &= ~(1 << 15); eax &= ~(1 << 15);
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
get_flags(); /* Make sure it really did something */ get_cpuflags(); /* Make sure it really did something */
err = check_flags(); err = check_cpuflags();
} else if (err == 0x01 && } else if (err == 0x01 &&
!(err_flags[0] & ~(1 << X86_FEATURE_CX8)) && !(err_flags[0] & ~(1 << X86_FEATURE_CX8)) &&
is_centaur() && cpu.model >= 6) { is_centaur() && cpu.model >= 6) {
@ -223,7 +137,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
set_bit(X86_FEATURE_CX8, cpu.flags); set_bit(X86_FEATURE_CX8, cpu.flags);
err = check_flags(); err = check_cpuflags();
} else if (err == 0x01 && is_transmeta()) { } else if (err == 0x01 && is_transmeta()) {
/* Transmeta might have masked feature bits in word 0 */ /* Transmeta might have masked feature bits in word 0 */
@ -238,7 +152,7 @@ int check_cpu(int *cpu_level_ptr, int *req_level_ptr, u32 **err_flags_ptr)
: : "ecx", "ebx"); : : "ecx", "ebx");
asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx)); asm("wrmsr" : : "a" (eax), "d" (edx), "c" (ecx));
err = check_flags(); err = check_cpuflags();
} }
if (err_flags_ptr) if (err_flags_ptr)

104
arch/x86/boot/cpuflags.c Normal file
View file

@ -0,0 +1,104 @@
#include <linux/types.h>
#include "bitops.h"
#include <asm/processor-flags.h>
#include <asm/required-features.h>
#include <asm/msr-index.h>
#include "cpuflags.h"
struct cpu_features cpu;
u32 cpu_vendor[3];
static bool loaded_flags;
static int has_fpu(void)
{
u16 fcw = -1, fsw = -1;
unsigned long cr0;
asm volatile("mov %%cr0,%0" : "=r" (cr0));
if (cr0 & (X86_CR0_EM|X86_CR0_TS)) {
cr0 &= ~(X86_CR0_EM|X86_CR0_TS);
asm volatile("mov %0,%%cr0" : : "r" (cr0));
}
asm volatile("fninit ; fnstsw %0 ; fnstcw %1"
: "+m" (fsw), "+m" (fcw));
return fsw == 0 && (fcw & 0x103f) == 0x003f;
}
int has_eflag(unsigned long mask)
{
unsigned long f0, f1;
asm volatile("pushf \n\t"
"pushf \n\t"
"pop %0 \n\t"
"mov %0,%1 \n\t"
"xor %2,%1 \n\t"
"push %1 \n\t"
"popf \n\t"
"pushf \n\t"
"pop %1 \n\t"
"popf"
: "=&r" (f0), "=&r" (f1)
: "ri" (mask));
return !!((f0^f1) & mask);
}
/* Handle x86_32 PIC using ebx. */
#if defined(__i386__) && defined(__PIC__)
# define EBX_REG "=r"
#else
# define EBX_REG "=b"
#endif
static inline void cpuid(u32 id, u32 *a, u32 *b, u32 *c, u32 *d)
{
asm volatile(".ifnc %%ebx,%3 ; movl %%ebx,%3 ; .endif \n\t"
"cpuid \n\t"
".ifnc %%ebx,%3 ; xchgl %%ebx,%3 ; .endif \n\t"
: "=a" (*a), "=c" (*c), "=d" (*d), EBX_REG (*b)
: "a" (id)
);
}
void get_cpuflags(void)
{
u32 max_intel_level, max_amd_level;
u32 tfms;
u32 ignored;
if (loaded_flags)
return;
loaded_flags = true;
if (has_fpu())
set_bit(X86_FEATURE_FPU, cpu.flags);
if (has_eflag(X86_EFLAGS_ID)) {
cpuid(0x0, &max_intel_level, &cpu_vendor[0], &cpu_vendor[2],
&cpu_vendor[1]);
if (max_intel_level >= 0x00000001 &&
max_intel_level <= 0x0000ffff) {
cpuid(0x1, &tfms, &ignored, &cpu.flags[4],
&cpu.flags[0]);
cpu.level = (tfms >> 8) & 15;
cpu.model = (tfms >> 4) & 15;
if (cpu.level >= 6)
cpu.model += ((tfms >> 16) & 0xf) << 4;
}
cpuid(0x80000000, &max_amd_level, &ignored, &ignored,
&ignored);
if (max_amd_level >= 0x80000001 &&
max_amd_level <= 0x8000ffff) {
cpuid(0x80000001, &ignored, &ignored, &cpu.flags[6],
&cpu.flags[1]);
}
}
}

19
arch/x86/boot/cpuflags.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef BOOT_CPUFLAGS_H
#define BOOT_CPUFLAGS_H
#include <asm/cpufeature.h>
#include <asm/processor-flags.h>
struct cpu_features {
int level; /* Family, or 64 for x86-64 */
int model;
u32 flags[NCAPINTS];
};
extern struct cpu_features cpu;
extern u32 cpu_vendor[3];
int has_eflag(unsigned long mask);
void get_cpuflags(void);
#endif

View file

@ -39,6 +39,20 @@
#ifdef CONFIG_ARCH_RANDOM #ifdef CONFIG_ARCH_RANDOM
/* Instead of arch_get_random_long() when alternatives haven't run. */
static inline int rdrand_long(unsigned long *v)
{
int ok;
asm volatile("1: " RDRAND_LONG "\n\t"
"jc 2f\n\t"
"decl %0\n\t"
"jnz 1b\n\t"
"2:"
: "=r" (ok), "=a" (*v)
: "0" (RDRAND_RETRY_LOOPS));
return ok;
}
#define GET_RANDOM(name, type, rdrand, nop) \ #define GET_RANDOM(name, type, rdrand, nop) \
static inline int name(type *v) \ static inline int name(type *v) \
{ \ { \
@ -68,6 +82,13 @@ GET_RANDOM(arch_get_random_int, unsigned int, RDRAND_INT, ASM_NOP3);
#endif /* CONFIG_X86_64 */ #endif /* CONFIG_X86_64 */
#else
static inline int rdrand_long(unsigned long *v)
{
return 0;
}
#endif /* CONFIG_ARCH_RANDOM */ #endif /* CONFIG_ARCH_RANDOM */
extern void x86_init_rdrand(struct cpuinfo_x86 *c); extern void x86_init_rdrand(struct cpuinfo_x86 *c);

View file

@ -39,9 +39,18 @@
#define __VIRTUAL_MASK_SHIFT 47 #define __VIRTUAL_MASK_SHIFT 47
/* /*
* Kernel image size is limited to 512 MB (see level2_kernel_pgt in * Kernel image size is limited to 1GiB due to the fixmap living in the
* arch/x86/kernel/head_64.S), and it is mapped here: * next 1GiB (see level2_kernel_pgt in arch/x86/kernel/head_64.S). Use
* 512MiB by default, leaving 1.5GiB for modules once the page tables
* are fully set up. If kernel ASLR is configured, it can extend the
* kernel page table mapping, reducing the size of the modules area.
*/ */
#define KERNEL_IMAGE_SIZE (512 * 1024 * 1024) #define KERNEL_IMAGE_SIZE_DEFAULT (512 * 1024 * 1024)
#if defined(CONFIG_RANDOMIZE_BASE) && \
CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE_DEFAULT
#define KERNEL_IMAGE_SIZE CONFIG_RANDOMIZE_BASE_MAX_OFFSET
#else
#define KERNEL_IMAGE_SIZE KERNEL_IMAGE_SIZE_DEFAULT
#endif
#endif /* _ASM_X86_PAGE_64_DEFS_H */ #endif /* _ASM_X86_PAGE_64_DEFS_H */

View file

@ -58,7 +58,7 @@ typedef struct { pteval_t pte; } pte_t;
#define VMALLOC_START _AC(0xffffc90000000000, UL) #define VMALLOC_START _AC(0xffffc90000000000, UL)
#define VMALLOC_END _AC(0xffffe8ffffffffff, UL) #define VMALLOC_END _AC(0xffffe8ffffffffff, UL)
#define VMEMMAP_START _AC(0xffffea0000000000, UL) #define VMEMMAP_START _AC(0xffffea0000000000, UL)
#define MODULES_VADDR _AC(0xffffffffa0000000, UL) #define MODULES_VADDR (__START_KERNEL_map + KERNEL_IMAGE_SIZE)
#define MODULES_END _AC(0xffffffffff000000, UL) #define MODULES_END _AC(0xffffffffff000000, UL)
#define MODULES_LEN (MODULES_END - MODULES_VADDR) #define MODULES_LEN (MODULES_END - MODULES_VADDR)

View file

@ -31,20 +31,6 @@ static int __init x86_rdrand_setup(char *s)
} }
__setup("nordrand", x86_rdrand_setup); __setup("nordrand", x86_rdrand_setup);
/* We can't use arch_get_random_long() here since alternatives haven't run */
static inline int rdrand_long(unsigned long *v)
{
int ok;
asm volatile("1: " RDRAND_LONG "\n\t"
"jc 2f\n\t"
"decl %0\n\t"
"jnz 1b\n\t"
"2:"
: "=r" (ok), "=a" (*v)
: "0" (RDRAND_RETRY_LOOPS));
return ok;
}
/* /*
* Force a reseed cycle; we are architecturally guaranteed a reseed * Force a reseed cycle; we are architecturally guaranteed a reseed
* after no more than 512 128-bit chunks of random data. This also * after no more than 512 128-bit chunks of random data. This also

View file

@ -827,6 +827,20 @@ static void __init trim_low_memory_range(void)
memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE)); memblock_reserve(0, ALIGN(reserve_low, PAGE_SIZE));
} }
/*
* Dump out kernel offset information on panic.
*/
static int
dump_kernel_offset(struct notifier_block *self, unsigned long v, void *p)
{
pr_emerg("Kernel Offset: 0x%lx from 0x%lx "
"(relocation range: 0x%lx-0x%lx)\n",
(unsigned long)&_text - __START_KERNEL, __START_KERNEL,
__START_KERNEL_map, MODULES_VADDR-1);
return 0;
}
/* /*
* Determine if we were loaded by an EFI loader. If so, then we have also been * Determine if we were loaded by an EFI loader. If so, then we have also been
* passed the efi memmap, systab, etc., so we should use these data structures * passed the efi memmap, systab, etc., so we should use these data structures
@ -1252,3 +1266,15 @@ void __init i386_reserve_resources(void)
} }
#endif /* CONFIG_X86_32 */ #endif /* CONFIG_X86_32 */
static struct notifier_block kernel_offset_notifier = {
.notifier_call = dump_kernel_offset
};
static int __init register_kernel_offset_dumper(void)
{
atomic_notifier_chain_register(&panic_notifier_list,
&kernel_offset_notifier);
return 0;
}
__initcall(register_kernel_offset_dumper);

View file

@ -806,6 +806,9 @@ void __init mem_init(void)
BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END); BUILD_BUG_ON(VMALLOC_START >= VMALLOC_END);
#undef high_memory #undef high_memory
#undef __FIXADDR_TOP #undef __FIXADDR_TOP
#ifdef CONFIG_RANDOMIZE_BASE
BUILD_BUG_ON(CONFIG_RANDOMIZE_BASE_MAX_OFFSET > KERNEL_IMAGE_SIZE);
#endif
#ifdef CONFIG_HIGHMEM #ifdef CONFIG_HIGHMEM
BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START); BUG_ON(PKMAP_BASE + LAST_PKMAP*PAGE_SIZE > FIXADDR_START);

View file

@ -722,15 +722,25 @@ static void percpu_init(void)
/* /*
* Check to see if a symbol lies in the .data..percpu section. * Check to see if a symbol lies in the .data..percpu section.
* For some as yet not understood reason the "__init_begin" *
* symbol which immediately preceeds the .data..percpu section * The linker incorrectly associates some symbols with the
* also shows up as it it were part of it so we do an explict * .data..percpu section so we also need to check the symbol
* check for that symbol name and ignore it. * name to make sure that we classify the symbol correctly.
*
* The GNU linker incorrectly associates:
* __init_begin
* __per_cpu_load
*
* The "gold" linker incorrectly associates:
* init_per_cpu__irq_stack_union
* init_per_cpu__gdt_page
*/ */
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname) static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
{ {
return (sym->st_shndx == per_cpu_shndx) && return (sym->st_shndx == per_cpu_shndx) &&
strcmp(symname, "__init_begin"); strcmp(symname, "__init_begin") &&
strcmp(symname, "__per_cpu_load") &&
strncmp(symname, "init_per_cpu_", 13);
} }