diff --git a/MAINTAINERS b/MAINTAINERS index c5325d2bb86d..0885aa2b095a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3224,6 +3224,12 @@ M: mporter@kernel.crashing.org L: linux-kernel@vger.kernel.org S: Maintained +RDC R-321X SoC +P: Florian Fainelli +M: florian.fainelli@telecomint.eu +L: linux-kernel@vger.kernel.org +S: Maintained + RDC R6040 FAST ETHERNET DRIVER P: Florian Fainelli M: florian.fainelli@telecomint.eu diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 2e1e3af28c3a..fa555148823d 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -220,9 +220,9 @@ config DEBUG_BOOT_PARAMS This option will cause struct boot_params to be exported via debugfs. config CPA_DEBUG - bool "CPA self test code" + bool "CPA self-test code" depends on DEBUG_KERNEL help - Do change_page_attr self tests at boot. + Do change_page_attr() self-tests every 30 seconds. endmenu diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a0522735dd9d..5affe91ca1e5 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -827,7 +827,6 @@ static int fill_powernow_table_pstate(struct powernow_k8_data *data, struct cpuf for (i = 0; i < data->acpi_data.state_count; i++) { u32 index; - u32 hi = 0, lo = 0; index = data->acpi_data.states[i].control & HW_PSTATE_MASK; if (index > data->max_hw_pstate) { diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/main.c index 1e27b69a7a0e..b6e136f23d3d 100644 --- a/arch/x86/kernel/cpu/mtrr/main.c +++ b/arch/x86/kernel/cpu/mtrr/main.c @@ -659,7 +659,7 @@ static __init int amd_special_default_mtrr(void) */ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) { - unsigned long i, base, size, highest_addr = 0, def, dummy; + unsigned long i, base, size, highest_pfn = 0, def, dummy; mtrr_type type; u64 trim_start, trim_size; @@ -682,28 +682,27 @@ int __init mtrr_trim_uncached_memory(unsigned long end_pfn) mtrr_if->get(i, &base, &size, &type); if (type != MTRR_TYPE_WRBACK) continue; - base <<= PAGE_SHIFT; - size <<= PAGE_SHIFT; - if (highest_addr < base + size) - highest_addr = base + size; + if (highest_pfn < base + size) + highest_pfn = base + size; } /* kvm/qemu doesn't have mtrr set right, don't trim them all */ - if (!highest_addr) { + if (!highest_pfn) { printk(KERN_WARNING "WARNING: strange, CPU MTRRs all blank?\n"); WARN_ON(1); return 0; } - if ((highest_addr >> PAGE_SHIFT) < end_pfn) { + if (highest_pfn < end_pfn) { printk(KERN_WARNING "WARNING: BIOS bug: CPU MTRRs don't cover" - " all of memory, losing %LdMB of RAM.\n", - (((u64)end_pfn << PAGE_SHIFT) - highest_addr) >> 20); + " all of memory, losing %luMB of RAM.\n", + (end_pfn - highest_pfn) >> (20 - PAGE_SHIFT)); WARN_ON(1); printk(KERN_INFO "update e820 for mtrr\n"); - trim_start = highest_addr; + trim_start = highest_pfn; + trim_start <<= PAGE_SHIFT; trim_size = end_pfn; trim_size <<= PAGE_SHIFT; trim_size -= trim_start; diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index bea8474744ff..c7341e81941c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -582,7 +582,6 @@ retint_restore_args: /* return to kernel space */ TRACE_IRQS_IRETQ restore_args: RESTORE_ARGS 0,8,0 -iret_label: #ifdef CONFIG_PARAVIRT INTERRUPT_RETURN #endif @@ -593,13 +592,22 @@ ENTRY(native_iret) .quad native_iret, bad_iret .previous .section .fixup,"ax" - /* force a signal here? this matches i386 behaviour */ - /* running with kernel gs */ bad_iret: - movq $11,%rdi /* SIGSEGV */ - TRACE_IRQS_ON - ENABLE_INTERRUPTS(CLBR_ANY | ~(CLBR_RDI)) - jmp do_exit + /* + * The iret traps when the %cs or %ss being restored is bogus. + * We've lost the original trap vector and error code. + * #GPF is the most likely one to get for an invalid selector. + * So pretend we completed the iret and took the #GPF in user mode. + * + * We are now running with the kernel GS after exception recovery. + * But error_entry expects us to have user GS to match the user %cs, + * so swap back. + */ + pushq $0 + + SWAPGS + jmp general_protection + .previous /* edi: workmask, edx: work */ @@ -911,7 +919,7 @@ error_kernelspace: iret run with kernel gs again, so don't set the user space flag. B stepping K8s sometimes report an truncated RIP for IRET exceptions returning to compat mode. Check for these here too. */ - leaq iret_label(%rip),%rbp + leaq native_iret(%rip),%rbp cmpq %rbp,RIP(%rsp) je error_swapgs movl %ebp,%ebp /* zero extend */ diff --git a/arch/x86/kernel/head_64.S b/arch/x86/kernel/head_64.S index 4f283ad215ec..09b38d539b09 100644 --- a/arch/x86/kernel/head_64.S +++ b/arch/x86/kernel/head_64.S @@ -250,18 +250,13 @@ ENTRY(secondary_startup_64) lretq /* SMP bootup changes these two */ -#ifndef CONFIG_HOTPLUG_CPU - .pushsection .init.data -#endif + __CPUINITDATA .align 8 - .globl initial_code -initial_code: + ENTRY(initial_code) .quad x86_64_start_kernel -#ifndef CONFIG_HOTPLUG_CPU - .popsection -#endif - .globl init_rsp -init_rsp: + __FINITDATA + + ENTRY(init_rsp) .quad init_thread_union+THREAD_SIZE-8 bad_address: diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 96286df1bb81..702c33efea84 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -103,9 +103,26 @@ static int set_segment_reg(struct task_struct *task, if (invalid_selector(value)) return -EIO; - if (offset != offsetof(struct user_regs_struct, gs)) + /* + * For %cs and %ss we cannot permit a null selector. + * We can permit a bogus selector as long as it has USER_RPL. + * Null selectors are fine for other segment registers, but + * we will never get back to user mode with invalid %cs or %ss + * and will take the trap in iret instead. Much code relies + * on user_mode() to distinguish a user trap frame (which can + * safely use invalid selectors) from a kernel trap frame. + */ + switch (offset) { + case offsetof(struct user_regs_struct, cs): + case offsetof(struct user_regs_struct, ss): + if (unlikely(value == 0)) + return -EIO; + + default: *pt_regs_access(task_pt_regs(task), offset) = value; - else { + break; + + case offsetof(struct user_regs_struct, gs): task->thread.gs = value; if (task == current) /* @@ -227,12 +244,16 @@ static int set_segment_reg(struct task_struct *task, * Can't actually change these in 64-bit mode. */ case offsetof(struct user_regs_struct,cs): + if (unlikely(value == 0)) + return -EIO; #ifdef CONFIG_IA32_EMULATION if (test_tsk_thread_flag(task, TIF_IA32)) task_pt_regs(task)->cs = value; #endif break; case offsetof(struct user_regs_struct,ss): + if (unlikely(value == 0)) + return -EIO; #ifdef CONFIG_IA32_EMULATION if (test_tsk_thread_flag(task, TIF_IA32)) task_pt_regs(task)->ss = value; diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c index 3cd7a2dcd4fe..6ba33ca8715a 100644 --- a/arch/x86/kernel/quirks.c +++ b/arch/x86/kernel/quirks.c @@ -380,19 +380,19 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_NVIDIA, 0x0367, void force_hpet_resume(void) { switch (force_hpet_resume_type) { - case ICH_FORCE_HPET_RESUME: - return ich_force_hpet_resume(); - - case OLD_ICH_FORCE_HPET_RESUME: - return old_ich_force_hpet_resume(); - - case VT8237_FORCE_HPET_RESUME: - return vt8237_force_hpet_resume(); - - case NVIDIA_FORCE_HPET_RESUME: - return nvidia_force_hpet_resume(); - - default: + case ICH_FORCE_HPET_RESUME: + ich_force_hpet_resume(); + return; + case OLD_ICH_FORCE_HPET_RESUME: + old_ich_force_hpet_resume(); + return; + case VT8237_FORCE_HPET_RESUME: + vt8237_force_hpet_resume(); + return; + case NVIDIA_FORCE_HPET_RESUME: + nvidia_force_hpet_resume(); + return; + default: break; } } diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c index 36c100c323aa..10b8a6f69f84 100644 --- a/arch/x86/kernel/test_nx.c +++ b/arch/x86/kernel/test_nx.c @@ -139,7 +139,6 @@ static int test_NX(void) * Until then, don't run them to avoid too many people getting scared * by the error message */ -#if 0 #ifdef CONFIG_DEBUG_RODATA /* Test 3: Check if the .rodata section is executable */ @@ -152,6 +151,7 @@ static int test_NX(void) } #endif +#if 0 /* Test 4: Check if the .data section of a module is executable */ if (test_address(&test_data)) { printk(KERN_ERR "test_nx: .data section is executable\n"); diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 3cf72977d012..b22c01e05a18 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -1176,17 +1176,12 @@ void __init trap_init(void) #endif set_trap_gate(19,&simd_coprocessor_error); + /* + * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. + * Generate a build-time error if the alignment is wrong. + */ + BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); if (cpu_has_fxsr) { - /* - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. - * Generates a compile-time "error: zero width for bit-field" if - * the alignment is wrong. - */ - struct fxsrAlignAssert { - int _:!(offsetof(struct task_struct, - thread.i387.fxsave) & 15); - }; - printk(KERN_INFO "Enabling fast FPU save and restore... "); set_in_cr4(X86_CR4_OSFXSR); printk("done.\n"); diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index ad8b9733d6b3..621afb6343dc 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -428,6 +428,16 @@ static noinline void pgtable_bad(unsigned long address, struct pt_regs *regs, } #endif +static int spurious_fault_check(unsigned long error_code, pte_t *pte) +{ + if ((error_code & PF_WRITE) && !pte_write(*pte)) + return 0; + if ((error_code & PF_INSTR) && !pte_exec(*pte)) + return 0; + + return 1; +} + /* * Handle a spurious fault caused by a stale TLB entry. This allows * us to lazily refresh the TLB when increasing the permissions of a @@ -457,20 +467,21 @@ static int spurious_fault(unsigned long address, if (!pud_present(*pud)) return 0; + if (pud_large(*pud)) + return spurious_fault_check(error_code, (pte_t *) pud); + pmd = pmd_offset(pud, address); if (!pmd_present(*pmd)) return 0; + if (pmd_large(*pmd)) + return spurious_fault_check(error_code, (pte_t *) pmd); + pte = pte_offset_kernel(pmd, address); if (!pte_present(*pte)) return 0; - if ((error_code & PF_WRITE) && !pte_write(*pte)) - return 0; - if ((error_code & PF_INSTR) && !pte_exec(*pte)) - return 0; - - return 1; + return spurious_fault_check(error_code, pte); } /* @@ -947,11 +958,12 @@ void vmalloc_sync_all(void) for (address = start; address <= VMALLOC_END; address += PGDIR_SIZE) { if (!test_bit(pgd_index(address), insync)) { const pgd_t *pgd_ref = pgd_offset_k(address); + unsigned long flags; struct page *page; if (pgd_none(*pgd_ref)) continue; - spin_lock(&pgd_lock); + spin_lock_irqsave(&pgd_lock, flags); list_for_each_entry(page, &pgd_list, lru) { pgd_t *pgd; pgd = (pgd_t *)page_address(page) + pgd_index(address); @@ -960,7 +972,7 @@ void vmalloc_sync_all(void) else BUG_ON(pgd_page_vaddr(*pgd) != pgd_page_vaddr(*pgd_ref)); } - spin_unlock(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); set_bit(pgd_index(address), insync); } if (address == start) diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 3a98d6f724ab..9b61c75a2355 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -591,10 +591,17 @@ void mark_rodata_ro(void) if (end <= start) return; - set_memory_ro(start, (end - start) >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", (end - start) >> 10); + set_memory_ro(start, (end - start) >> PAGE_SHIFT); + + /* + * The rodata section (but not the kernel text!) should also be + * not-executable. + */ + start = ((unsigned long)__start_rodata + PAGE_SIZE - 1) & PAGE_MASK; + set_memory_nx(start, (end - start) >> PAGE_SHIFT); rodata_test(); diff --git a/arch/x86/mm/pageattr-test.c b/arch/x86/mm/pageattr-test.c index 398f3a578dde..ed8201600354 100644 --- a/arch/x86/mm/pageattr-test.c +++ b/arch/x86/mm/pageattr-test.c @@ -5,6 +5,7 @@ * and compares page tables forwards and afterwards. */ #include +#include #include #include #include @@ -14,8 +15,13 @@ #include #include +/* + * Only print the results of the first pass: + */ +static __read_mostly int print = 1; + enum { - NTEST = 4000, + NTEST = 400, #ifdef CONFIG_X86_64 LPS = (1 << PMD_SHIFT), #elif defined(CONFIG_X86_PAE) @@ -31,7 +37,7 @@ struct split_state { long min_exec, max_exec; }; -static __init int print_split(struct split_state *s) +static int print_split(struct split_state *s) { long i, expected, missed = 0; int printed = 0; @@ -82,10 +88,13 @@ static __init int print_split(struct split_state *s) s->max_exec = addr; } } - printk(KERN_INFO - "CPA mapping 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", - s->spg, s->lpg, s->gpg, s->exec, - s->min_exec != ~0UL ? s->min_exec : 0, s->max_exec, missed); + if (print) { + printk(KERN_INFO + " 4k %lu large %lu gb %lu x %lu[%lx-%lx] miss %lu\n", + s->spg, s->lpg, s->gpg, s->exec, + s->min_exec != ~0UL ? s->min_exec : 0, + s->max_exec, missed); + } expected = (s->gpg*GPS + s->lpg*LPS)/PAGE_SIZE + s->spg + missed; if (expected != i) { @@ -96,11 +105,11 @@ static __init int print_split(struct split_state *s) return err; } -static unsigned long __initdata addr[NTEST]; -static unsigned int __initdata len[NTEST]; +static unsigned long addr[NTEST]; +static unsigned int len[NTEST]; /* Change the global bit on random pages in the direct mapping */ -static __init int exercise_pageattr(void) +static int pageattr_test(void) { struct split_state sa, sb, sc; unsigned long *bm; @@ -110,7 +119,8 @@ static __init int exercise_pageattr(void) int i, k; int err; - printk(KERN_INFO "CPA exercising pageattr\n"); + if (print) + printk(KERN_INFO "CPA self-test:\n"); bm = vmalloc((max_pfn_mapped + 7) / 8); if (!bm) { @@ -186,7 +196,6 @@ static __init int exercise_pageattr(void) failed += print_split(&sb); - printk(KERN_INFO "CPA reverting everything\n"); for (i = 0; i < NTEST; i++) { if (!addr[i]) continue; @@ -214,12 +223,40 @@ static __init int exercise_pageattr(void) failed += print_split(&sc); if (failed) { - printk(KERN_ERR "CPA selftests NOT PASSED. Please report.\n"); + printk(KERN_ERR "NOT PASSED. Please report.\n"); WARN_ON(1); + return -EINVAL; } else { - printk(KERN_INFO "CPA selftests PASSED\n"); + if (print) + printk(KERN_INFO "ok.\n"); } return 0; } -module_init(exercise_pageattr); + +static int do_pageattr_test(void *__unused) +{ + while (!kthread_should_stop()) { + schedule_timeout_interruptible(HZ*30); + if (pageattr_test() < 0) + break; + if (print) + print--; + } + return 0; +} + +static int start_pageattr_test(void) +{ + struct task_struct *p; + + p = kthread_create(do_pageattr_test, NULL, "pageattr-test"); + if (!IS_ERR(p)) + wake_up_process(p); + else + WARN_ON(1); + + return 0; +} + +module_init(start_pageattr_test); diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 16ce841f08d6..8493c855582b 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -167,8 +167,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) if (within(address, virt_to_highmap(_text), virt_to_highmap(_etext))) pgprot_val(forbidden) |= _PAGE_NX; - -#ifdef CONFIG_DEBUG_RODATA /* The .rodata section needs to be read-only */ if (within(address, (unsigned long)__start_rodata, (unsigned long)__end_rodata)) @@ -179,7 +177,6 @@ static inline pgprot_t static_protections(pgprot_t prot, unsigned long address) if (within(address, virt_to_highmap(__start_rodata), virt_to_highmap(__end_rodata))) pgprot_val(forbidden) |= _PAGE_RW; -#endif prot = __pgprot(pgprot_val(prot) & ~pgprot_val(forbidden)); @@ -260,17 +257,6 @@ try_preserve_large_page(pte_t *kpte, unsigned long address, pgprot_t old_prot, new_prot; int level, do_split = 1; - /* - * An Athlon 64 X2 showed hard hangs if we tried to preserve - * largepages and changed the PSE entry from RW to RO. - * - * As AMD CPUs have a long series of erratas in this area, - * (and none of the known ones seem to explain this hang), - * disable this code until the hang can be debugged: - */ - if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD) - return 1; - spin_lock_irqsave(&pgd_lock, flags); /* * Check for races, another CPU might have split this page diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 4628c42ca892..111771d38e6e 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1077,7 +1077,7 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) current->mm->start_stack = bprm->p; #ifdef arch_randomize_brk - if (current->flags & PF_RANDOMIZE) + if ((current->flags & PF_RANDOMIZE) && (randomize_va_space > 1)) current->mm->brk = current->mm->start_brk = arch_randomize_brk(current->mm); #endif diff --git a/include/asm-x86/pgalloc_64.h b/include/asm-x86/pgalloc_64.h index 315314ce4bfb..4f6220db22b1 100644 --- a/include/asm-x86/pgalloc_64.h +++ b/include/asm-x86/pgalloc_64.h @@ -42,19 +42,21 @@ static inline void pud_free(struct mm_struct *mm, pud_t *pud) static inline void pgd_list_add(pgd_t *pgd) { struct page *page = virt_to_page(pgd); + unsigned long flags; - spin_lock(&pgd_lock); + spin_lock_irqsave(&pgd_lock, flags); list_add(&page->lru, &pgd_list); - spin_unlock(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); } static inline void pgd_list_del(pgd_t *pgd) { struct page *page = virt_to_page(pgd); + unsigned long flags; - spin_lock(&pgd_lock); + spin_lock_irqsave(&pgd_lock, flags); list_del(&page->lru); - spin_unlock(&pgd_lock); + spin_unlock_irqrestore(&pgd_lock, flags); } static inline pgd_t *pgd_alloc(struct mm_struct *mm) diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index c4e00161a247..b0fd85ab9efb 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -79,7 +79,7 @@ struct cpuidle_state_kobj { }; struct cpuidle_device { - int enabled:1; + unsigned int enabled:1; unsigned int cpu; int last_residency; diff --git a/include/linux/init.h b/include/linux/init.h index 90cdbbbbe077..a404a0055dd7 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -110,6 +110,7 @@ #define __FINIT .previous #define __INITDATA .section ".init.data","aw" +#define __FINITDATA .previous #define __DEVINIT .section ".devinit.text", "ax" #define __DEVINITDATA .section ".devinit.data", "aw" diff --git a/init/Kconfig b/init/Kconfig index 87f50df58893..92b23e256614 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -541,6 +541,18 @@ config ELF_CORE help Enable support for generating core dumps. Disabling saves about 4k. +config COMPAT_BRK + bool "Disable heap randomization" + default y + help + Randomizing heap placement makes heap exploits harder, but it + also breaks ancient binaries (including anything libc5 based). + This option changes the bootup default to heap randomization + disabled, and can be overriden runtime by setting + /proc/sys/kernel/randomize_va_space to 2. + + On non-ancient distros (post-2000 ones) Y is usually a safe choice. + config BASE_FULL default y bool "Enable full-sized data structures for core" if EMBEDDED diff --git a/mm/memory.c b/mm/memory.c index 7bb70728bb52..9d073fa0a2d0 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -82,7 +82,18 @@ void * high_memory; EXPORT_SYMBOL(num_physpages); EXPORT_SYMBOL(high_memory); -int randomize_va_space __read_mostly = 1; +/* + * Randomize the address space (stacks, mmaps, brk, etc.). + * + * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization, + * as ancient (libc5 based) binaries can segfault. ) + */ +int randomize_va_space __read_mostly = +#ifdef CONFIG_COMPAT_BRK + 1; +#else + 2; +#endif static int __init disable_randmaps(char *s) { diff --git a/mm/mmap.c b/mm/mmap.c index bb4c963cc534..ad6e4eaf34f8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -245,7 +245,7 @@ asmlinkage unsigned long sys_brk(unsigned long brk) down_write(&mm->mmap_sem); - if (brk < mm->end_code) + if (brk < mm->start_brk) goto out; /*