Merge branch 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull x86 irq updates from Ingo Molnar: "Here are the main changes in this tree: - Introduce x86-64 IRQ/exception/debug stack guard pages to detect stack overflows immediately and deterministically. - Clean up over a decade worth of cruft accumulated. The outcome of this should be more clear-cut faults/crashes when any of the low level x86 CPU stacks overflow, instead of silent memory corruption and sporadic failures much later on" * 'x86-irq-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (33 commits) x86/irq: Fix outdated comments x86/irq/64: Remove stack overflow debug code x86/irq/64: Remap the IRQ stack with guard pages x86/irq/64: Split the IRQ stack into its own pages x86/irq/64: Init hardirq_stack_ptr during CPU hotplug x86/irq/32: Handle irq stack allocation failure proper x86/irq/32: Invoke irq_ctx_init() from init_IRQ() x86/irq/64: Rename irq_stack_ptr to hardirq_stack_ptr x86/irq/32: Rename hard/softirq_stack to hard/softirq_stack_ptr x86/irq/32: Make irq stack a character array x86/irq/32: Define IRQ_STACK_SIZE x86/dumpstack/64: Speedup in_exception_stack() x86/exceptions: Split debug IST stack x86/exceptions: Enable IST guard pages x86/exceptions: Disconnect IST index and stack order x86/cpu: Remove orig_ist array x86/cpu: Prepare TSS.IST setup for guard pages x86/dumpstack/64: Use cpu_entry_area instead of orig_ist x86/irq/64: Use cpu entry area instead of orig_ist x86/traps: Use cpu_entry_area instead of orig_ist ...hifive-unleashed-5.2
commit
8f14772703
|
@ -59,7 +59,7 @@ If that assumption is ever broken then the stacks will become corrupt.
|
||||||
|
|
||||||
The currently assigned IST stacks are :-
|
The currently assigned IST stacks are :-
|
||||||
|
|
||||||
* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
* ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||||
|
|
||||||
Used for interrupt 8 - Double Fault Exception (#DF).
|
Used for interrupt 8 - Double Fault Exception (#DF).
|
||||||
|
|
||||||
|
@ -68,7 +68,7 @@ The currently assigned IST stacks are :-
|
||||||
Using a separate stack allows the kernel to recover from it well enough
|
Using a separate stack allows the kernel to recover from it well enough
|
||||||
in many cases to still output an oops.
|
in many cases to still output an oops.
|
||||||
|
|
||||||
* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
* ESTACK_NMI. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||||
|
|
||||||
Used for non-maskable interrupts (NMI).
|
Used for non-maskable interrupts (NMI).
|
||||||
|
|
||||||
|
@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
|
||||||
middle of switching stacks. Using IST for NMI events avoids making
|
middle of switching stacks. Using IST for NMI events avoids making
|
||||||
assumptions about the previous state of the kernel stack.
|
assumptions about the previous state of the kernel stack.
|
||||||
|
|
||||||
* DEBUG_STACK. DEBUG_STKSZ
|
* ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||||
|
|
||||||
Used for hardware debug interrupts (interrupt 1) and for software
|
Used for hardware debug interrupts (interrupt 1) and for software
|
||||||
debug interrupts (INT3).
|
debug interrupts (INT3).
|
||||||
|
@ -86,7 +86,12 @@ The currently assigned IST stacks are :-
|
||||||
avoids making assumptions about the previous state of the kernel
|
avoids making assumptions about the previous state of the kernel
|
||||||
stack.
|
stack.
|
||||||
|
|
||||||
* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
To handle nested #DB correctly there exist two instances of DB stacks. On
|
||||||
|
#DB entry the IST stackpointer for #DB is switched to the second instance
|
||||||
|
so a nested #DB starts from a clean stack. The nested #DB switches
|
||||||
|
the IST stackpointer to a guard hole to catch triple nesting.
|
||||||
|
|
||||||
|
* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||||
|
|
||||||
Used for interrupt 18 - Machine Check Exception (#MC).
|
Used for interrupt 18 - Machine Check Exception (#MC).
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ config X86_32
|
||||||
select ARCH_WANT_IPC_PARSE_VERSION
|
select ARCH_WANT_IPC_PARSE_VERSION
|
||||||
select CLKSRC_I8253
|
select CLKSRC_I8253
|
||||||
select CLONE_BACKWARDS
|
select CLONE_BACKWARDS
|
||||||
|
select HAVE_DEBUG_STACKOVERFLOW
|
||||||
select MODULES_USE_ELF_REL
|
select MODULES_USE_ELF_REL
|
||||||
select OLD_SIGACTION
|
select OLD_SIGACTION
|
||||||
|
|
||||||
|
@ -138,7 +139,6 @@ config X86
|
||||||
select HAVE_COPY_THREAD_TLS
|
select HAVE_COPY_THREAD_TLS
|
||||||
select HAVE_C_RECORDMCOUNT
|
select HAVE_C_RECORDMCOUNT
|
||||||
select HAVE_DEBUG_KMEMLEAK
|
select HAVE_DEBUG_KMEMLEAK
|
||||||
select HAVE_DEBUG_STACKOVERFLOW
|
|
||||||
select HAVE_DMA_CONTIGUOUS
|
select HAVE_DMA_CONTIGUOUS
|
||||||
select HAVE_DYNAMIC_FTRACE
|
select HAVE_DYNAMIC_FTRACE
|
||||||
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
select HAVE_DYNAMIC_FTRACE_WITH_REGS
|
||||||
|
|
|
@ -298,7 +298,7 @@ ENTRY(__switch_to_asm)
|
||||||
|
|
||||||
#ifdef CONFIG_STACKPROTECTOR
|
#ifdef CONFIG_STACKPROTECTOR
|
||||||
movq TASK_stack_canary(%rsi), %rbx
|
movq TASK_stack_canary(%rsi), %rbx
|
||||||
movq %rbx, PER_CPU_VAR(irq_stack_union)+stack_canary_offset
|
movq %rbx, PER_CPU_VAR(fixed_percpu_data) + stack_canary_offset
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_RETPOLINE
|
#ifdef CONFIG_RETPOLINE
|
||||||
|
@ -430,8 +430,8 @@ END(irq_entries_start)
|
||||||
* it before we actually move ourselves to the IRQ stack.
|
* it before we actually move ourselves to the IRQ stack.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
movq \old_rsp, PER_CPU_VAR(irq_stack_union + IRQ_STACK_SIZE - 8)
|
movq \old_rsp, PER_CPU_VAR(irq_stack_backing_store + IRQ_STACK_SIZE - 8)
|
||||||
movq PER_CPU_VAR(irq_stack_ptr), %rsp
|
movq PER_CPU_VAR(hardirq_stack_ptr), %rsp
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_ENTRY
|
#ifdef CONFIG_DEBUG_ENTRY
|
||||||
/*
|
/*
|
||||||
|
@ -840,7 +840,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||||
/*
|
/*
|
||||||
* Exception entry points.
|
* Exception entry points.
|
||||||
*/
|
*/
|
||||||
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + ((x) - 1) * 8)
|
#define CPU_TSS_IST(x) PER_CPU_VAR(cpu_tss_rw) + (TSS_ist + (x) * 8)
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* idtentry - Generate an IDT entry stub
|
* idtentry - Generate an IDT entry stub
|
||||||
|
@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR irq_work_interrupt smp_irq_work_interrupt
|
||||||
* @paranoid == 2 is special: the stub will never switch stacks. This is for
|
* @paranoid == 2 is special: the stub will never switch stacks. This is for
|
||||||
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
|
* #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
|
||||||
*/
|
*/
|
||||||
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1
|
.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 ist_offset=0
|
||||||
ENTRY(\sym)
|
ENTRY(\sym)
|
||||||
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
|
UNWIND_HINT_IRET_REGS offset=\has_error_code*8
|
||||||
|
|
||||||
|
@ -924,13 +924,13 @@ ENTRY(\sym)
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
.if \shift_ist != -1
|
.if \shift_ist != -1
|
||||||
subq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
|
subq $\ist_offset, CPU_TSS_IST(\shift_ist)
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
call \do_sym
|
call \do_sym
|
||||||
|
|
||||||
.if \shift_ist != -1
|
.if \shift_ist != -1
|
||||||
addq $EXCEPTION_STKSZ, CPU_TSS_IST(\shift_ist)
|
addq $\ist_offset, CPU_TSS_IST(\shift_ist)
|
||||||
.endif
|
.endif
|
||||||
|
|
||||||
/* these procedures expect "no swapgs" flag in ebx */
|
/* these procedures expect "no swapgs" flag in ebx */
|
||||||
|
@ -1128,7 +1128,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
|
||||||
hv_stimer0_callback_vector hv_stimer0_vector_handler
|
hv_stimer0_callback_vector hv_stimer0_vector_handler
|
||||||
#endif /* CONFIG_HYPERV */
|
#endif /* CONFIG_HYPERV */
|
||||||
|
|
||||||
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=DEBUG_STACK
|
idtentry debug do_debug has_error_code=0 paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
|
||||||
idtentry int3 do_int3 has_error_code=0
|
idtentry int3 do_int3 has_error_code=0
|
||||||
idtentry stack_segment do_stack_segment has_error_code=1
|
idtentry stack_segment do_stack_segment has_error_code=1
|
||||||
|
|
||||||
|
|
|
@ -7,6 +7,64 @@
|
||||||
#include <asm/processor.h>
|
#include <asm/processor.h>
|
||||||
#include <asm/intel_ds.h>
|
#include <asm/intel_ds.h>
|
||||||
|
|
||||||
|
#ifdef CONFIG_X86_64
|
||||||
|
|
||||||
|
/* Macro to enforce the same ordering and stack sizes */
|
||||||
|
#define ESTACKS_MEMBERS(guardsize, db2_holesize)\
|
||||||
|
char DF_stack_guard[guardsize]; \
|
||||||
|
char DF_stack[EXCEPTION_STKSZ]; \
|
||||||
|
char NMI_stack_guard[guardsize]; \
|
||||||
|
char NMI_stack[EXCEPTION_STKSZ]; \
|
||||||
|
char DB2_stack_guard[guardsize]; \
|
||||||
|
char DB2_stack[db2_holesize]; \
|
||||||
|
char DB1_stack_guard[guardsize]; \
|
||||||
|
char DB1_stack[EXCEPTION_STKSZ]; \
|
||||||
|
char DB_stack_guard[guardsize]; \
|
||||||
|
char DB_stack[EXCEPTION_STKSZ]; \
|
||||||
|
char MCE_stack_guard[guardsize]; \
|
||||||
|
char MCE_stack[EXCEPTION_STKSZ]; \
|
||||||
|
char IST_top_guard[guardsize]; \
|
||||||
|
|
||||||
|
/* The exception stacks' physical storage. No guard pages required */
|
||||||
|
struct exception_stacks {
|
||||||
|
ESTACKS_MEMBERS(0, 0)
|
||||||
|
};
|
||||||
|
|
||||||
|
/* The effective cpu entry area mapping with guard pages. */
|
||||||
|
struct cea_exception_stacks {
|
||||||
|
ESTACKS_MEMBERS(PAGE_SIZE, EXCEPTION_STKSZ)
|
||||||
|
};
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The exception stack ordering in [cea_]exception_stacks
|
||||||
|
*/
|
||||||
|
enum exception_stack_ordering {
|
||||||
|
ESTACK_DF,
|
||||||
|
ESTACK_NMI,
|
||||||
|
ESTACK_DB2,
|
||||||
|
ESTACK_DB1,
|
||||||
|
ESTACK_DB,
|
||||||
|
ESTACK_MCE,
|
||||||
|
N_EXCEPTION_STACKS
|
||||||
|
};
|
||||||
|
|
||||||
|
#define CEA_ESTACK_SIZE(st) \
|
||||||
|
sizeof(((struct cea_exception_stacks *)0)->st## _stack)
|
||||||
|
|
||||||
|
#define CEA_ESTACK_BOT(ceastp, st) \
|
||||||
|
((unsigned long)&(ceastp)->st## _stack)
|
||||||
|
|
||||||
|
#define CEA_ESTACK_TOP(ceastp, st) \
|
||||||
|
(CEA_ESTACK_BOT(ceastp, st) + CEA_ESTACK_SIZE(st))
|
||||||
|
|
||||||
|
#define CEA_ESTACK_OFFS(st) \
|
||||||
|
offsetof(struct cea_exception_stacks, st## _stack)
|
||||||
|
|
||||||
|
#define CEA_ESTACK_PAGES \
|
||||||
|
(sizeof(struct cea_exception_stacks) / PAGE_SIZE)
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
* cpu_entry_area is a percpu region that contains things needed by the CPU
|
||||||
* and early entry/exit code. Real types aren't used for all fields here
|
* and early entry/exit code. Real types aren't used for all fields here
|
||||||
|
@ -32,12 +90,9 @@ struct cpu_entry_area {
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
/*
|
/*
|
||||||
* Exception stacks used for IST entries.
|
* Exception stacks used for IST entries with guard pages.
|
||||||
*
|
|
||||||
* In the future, this should have a separate slot for each stack
|
|
||||||
* with guard pages between them.
|
|
||||||
*/
|
*/
|
||||||
char exception_stacks[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ];
|
struct cea_exception_stacks estacks;
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_CPU_SUP_INTEL
|
#ifdef CONFIG_CPU_SUP_INTEL
|
||||||
/*
|
/*
|
||||||
|
@ -57,6 +112,7 @@ struct cpu_entry_area {
|
||||||
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
|
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
|
||||||
|
|
||||||
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||||
|
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
|
||||||
|
|
||||||
extern void setup_cpu_entry_areas(void);
|
extern void setup_cpu_entry_areas(void);
|
||||||
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
|
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
|
||||||
|
@ -76,4 +132,7 @@ static inline struct entry_stack *cpu_entry_stack(int cpu)
|
||||||
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
|
return &get_cpu_entry_area(cpu)->entry_stack_page.stack;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define __this_cpu_ist_top_va(name) \
|
||||||
|
CEA_ESTACK_TOP(__this_cpu_read(cea_exception_stacks), name)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -104,11 +104,9 @@ static inline void debug_stack_usage_dec(void)
|
||||||
{
|
{
|
||||||
__this_cpu_dec(debug_stack_usage);
|
__this_cpu_dec(debug_stack_usage);
|
||||||
}
|
}
|
||||||
int is_debug_stack(unsigned long addr);
|
|
||||||
void debug_stack_set_zero(void);
|
void debug_stack_set_zero(void);
|
||||||
void debug_stack_reset(void);
|
void debug_stack_reset(void);
|
||||||
#else /* !X86_64 */
|
#else /* !X86_64 */
|
||||||
static inline int is_debug_stack(unsigned long addr) { return 0; }
|
|
||||||
static inline void debug_stack_set_zero(void) { }
|
static inline void debug_stack_set_zero(void) { }
|
||||||
static inline void debug_stack_reset(void) { }
|
static inline void debug_stack_reset(void) { }
|
||||||
static inline void debug_stack_usage_inc(void) { }
|
static inline void debug_stack_usage_inc(void) { }
|
||||||
|
|
|
@ -16,11 +16,7 @@ static inline int irq_canonicalize(int irq)
|
||||||
return ((irq == 2) ? 9 : irq);
|
return ((irq == 2) ? 9 : irq);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
extern int irq_init_percpu_irqstack(unsigned int cpu);
|
||||||
extern void irq_ctx_init(int cpu);
|
|
||||||
#else
|
|
||||||
# define irq_ctx_init(cpu) do { } while (0)
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#define __ARCH_HAS_DO_SOFTIRQ
|
#define __ARCH_HAS_DO_SOFTIRQ
|
||||||
|
|
||||||
|
|
|
@ -18,8 +18,8 @@
|
||||||
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
|
* Vectors 0 ... 31 : system traps and exceptions - hardcoded events
|
||||||
* Vectors 32 ... 127 : device interrupts
|
* Vectors 32 ... 127 : device interrupts
|
||||||
* Vector 128 : legacy int80 syscall interface
|
* Vector 128 : legacy int80 syscall interface
|
||||||
* Vectors 129 ... INVALIDATE_TLB_VECTOR_START-1 except 204 : device interrupts
|
* Vectors 129 ... LOCAL_TIMER_VECTOR-1
|
||||||
* Vectors INVALIDATE_TLB_VECTOR_START ... 255 : special interrupts
|
* Vectors LOCAL_TIMER_VECTOR ... 255 : special interrupts
|
||||||
*
|
*
|
||||||
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
|
* 64-bit x86 has per CPU IDT tables, 32-bit has one shared IDT table.
|
||||||
*
|
*
|
||||||
|
|
|
@ -22,11 +22,9 @@
|
||||||
#define THREAD_SIZE_ORDER 1
|
#define THREAD_SIZE_ORDER 1
|
||||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||||
|
|
||||||
#define DOUBLEFAULT_STACK 1
|
#define IRQ_STACK_SIZE THREAD_SIZE
|
||||||
#define NMI_STACK 0
|
|
||||||
#define DEBUG_STACK 0
|
#define N_EXCEPTION_STACKS 1
|
||||||
#define MCE_STACK 0
|
|
||||||
#define N_EXCEPTION_STACKS 1
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_PAE
|
#ifdef CONFIG_X86_PAE
|
||||||
/*
|
/*
|
||||||
|
|
|
@ -14,22 +14,20 @@
|
||||||
|
|
||||||
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
|
#define THREAD_SIZE_ORDER (2 + KASAN_STACK_ORDER)
|
||||||
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
#define THREAD_SIZE (PAGE_SIZE << THREAD_SIZE_ORDER)
|
||||||
#define CURRENT_MASK (~(THREAD_SIZE - 1))
|
|
||||||
|
|
||||||
#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
|
#define EXCEPTION_STACK_ORDER (0 + KASAN_STACK_ORDER)
|
||||||
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
|
#define EXCEPTION_STKSZ (PAGE_SIZE << EXCEPTION_STACK_ORDER)
|
||||||
|
|
||||||
#define DEBUG_STACK_ORDER (EXCEPTION_STACK_ORDER + 1)
|
|
||||||
#define DEBUG_STKSZ (PAGE_SIZE << DEBUG_STACK_ORDER)
|
|
||||||
|
|
||||||
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
|
#define IRQ_STACK_ORDER (2 + KASAN_STACK_ORDER)
|
||||||
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
|
#define IRQ_STACK_SIZE (PAGE_SIZE << IRQ_STACK_ORDER)
|
||||||
|
|
||||||
#define DOUBLEFAULT_STACK 1
|
/*
|
||||||
#define NMI_STACK 2
|
* The index for the tss.ist[] array. The hardware limit is 7 entries.
|
||||||
#define DEBUG_STACK 3
|
*/
|
||||||
#define MCE_STACK 4
|
#define IST_INDEX_DF 0
|
||||||
#define N_EXCEPTION_STACKS 4 /* hw limit: 7 */
|
#define IST_INDEX_NMI 1
|
||||||
|
#define IST_INDEX_DB 2
|
||||||
|
#define IST_INDEX_MCE 3
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Set __PAGE_OFFSET to the most negative possible address +
|
* Set __PAGE_OFFSET to the most negative possible address +
|
||||||
|
|
|
@ -367,6 +367,13 @@ DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
|
||||||
#define __KERNEL_TSS_LIMIT \
|
#define __KERNEL_TSS_LIMIT \
|
||||||
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
|
(IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
|
||||||
|
|
||||||
|
/* Per CPU interrupt stacks */
|
||||||
|
struct irq_stack {
|
||||||
|
char stack[IRQ_STACK_SIZE];
|
||||||
|
} __aligned(IRQ_STACK_SIZE);
|
||||||
|
|
||||||
|
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||||
#else
|
#else
|
||||||
|
@ -374,38 +381,25 @@ DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
|
||||||
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
|
#define cpu_current_top_of_stack cpu_tss_rw.x86_tss.sp1
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
|
||||||
* Save the original ist values for checking stack pointers during debugging
|
|
||||||
*/
|
|
||||||
struct orig_ist {
|
|
||||||
unsigned long ist[7];
|
|
||||||
};
|
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
DECLARE_PER_CPU(struct orig_ist, orig_ist);
|
struct fixed_percpu_data {
|
||||||
|
|
||||||
union irq_stack_union {
|
|
||||||
char irq_stack[IRQ_STACK_SIZE];
|
|
||||||
/*
|
/*
|
||||||
* GCC hardcodes the stack canary as %gs:40. Since the
|
* GCC hardcodes the stack canary as %gs:40. Since the
|
||||||
* irq_stack is the object at %gs:0, we reserve the bottom
|
* irq_stack is the object at %gs:0, we reserve the bottom
|
||||||
* 48 bytes of the irq stack for the canary.
|
* 48 bytes of the irq stack for the canary.
|
||||||
*/
|
*/
|
||||||
struct {
|
char gs_base[40];
|
||||||
char gs_base[40];
|
unsigned long stack_canary;
|
||||||
unsigned long stack_canary;
|
|
||||||
};
|
|
||||||
};
|
};
|
||||||
|
|
||||||
DECLARE_PER_CPU_FIRST(union irq_stack_union, irq_stack_union) __visible;
|
DECLARE_PER_CPU_FIRST(struct fixed_percpu_data, fixed_percpu_data) __visible;
|
||||||
DECLARE_INIT_PER_CPU(irq_stack_union);
|
DECLARE_INIT_PER_CPU(fixed_percpu_data);
|
||||||
|
|
||||||
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
|
static inline unsigned long cpu_kernelmode_gs_base(int cpu)
|
||||||
{
|
{
|
||||||
return (unsigned long)per_cpu(irq_stack_union.gs_base, cpu);
|
return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
DECLARE_PER_CPU(char *, irq_stack_ptr);
|
|
||||||
DECLARE_PER_CPU(unsigned int, irq_count);
|
DECLARE_PER_CPU(unsigned int, irq_count);
|
||||||
extern asmlinkage void ignore_sysret(void);
|
extern asmlinkage void ignore_sysret(void);
|
||||||
|
|
||||||
|
@ -427,15 +421,8 @@ struct stack_canary {
|
||||||
};
|
};
|
||||||
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
|
DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
|
||||||
#endif
|
#endif
|
||||||
/*
|
/* Per CPU softirq stack pointer */
|
||||||
* per-CPU IRQ handling stacks
|
DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
|
||||||
*/
|
|
||||||
struct irq_stack {
|
|
||||||
u32 stack[THREAD_SIZE/sizeof(u32)];
|
|
||||||
} __aligned(THREAD_SIZE);
|
|
||||||
|
|
||||||
DECLARE_PER_CPU(struct irq_stack *, hardirq_stack);
|
|
||||||
DECLARE_PER_CPU(struct irq_stack *, softirq_stack);
|
|
||||||
#endif /* X86_64 */
|
#endif /* X86_64 */
|
||||||
|
|
||||||
extern unsigned int fpu_kernel_xstate_size;
|
extern unsigned int fpu_kernel_xstate_size;
|
||||||
|
|
|
@ -131,7 +131,7 @@ void native_smp_prepare_boot_cpu(void);
|
||||||
void native_smp_prepare_cpus(unsigned int max_cpus);
|
void native_smp_prepare_cpus(unsigned int max_cpus);
|
||||||
void calculate_max_logical_packages(void);
|
void calculate_max_logical_packages(void);
|
||||||
void native_smp_cpus_done(unsigned int max_cpus);
|
void native_smp_cpus_done(unsigned int max_cpus);
|
||||||
void common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
int common_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||||
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
int native_cpu_up(unsigned int cpunum, struct task_struct *tidle);
|
||||||
int native_cpu_disable(void);
|
int native_cpu_disable(void);
|
||||||
int common_cpu_die(unsigned int cpu);
|
int common_cpu_die(unsigned int cpu);
|
||||||
|
|
|
@ -13,7 +13,7 @@
|
||||||
* On x86_64, %gs is shared by percpu area and stack canary. All
|
* On x86_64, %gs is shared by percpu area and stack canary. All
|
||||||
* percpu symbols are zero based and %gs points to the base of percpu
|
* percpu symbols are zero based and %gs points to the base of percpu
|
||||||
* area. The first occupant of the percpu area is always
|
* area. The first occupant of the percpu area is always
|
||||||
* irq_stack_union which contains stack_canary at offset 40. Userland
|
* fixed_percpu_data which contains stack_canary at offset 40. Userland
|
||||||
* %gs is always saved and restored on kernel entry and exit using
|
* %gs is always saved and restored on kernel entry and exit using
|
||||||
* swapgs, so stack protector doesn't add any complexity there.
|
* swapgs, so stack protector doesn't add any complexity there.
|
||||||
*
|
*
|
||||||
|
@ -64,7 +64,7 @@ static __always_inline void boot_init_stack_canary(void)
|
||||||
u64 tsc;
|
u64 tsc;
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
BUILD_BUG_ON(offsetof(union irq_stack_union, stack_canary) != 40);
|
BUILD_BUG_ON(offsetof(struct fixed_percpu_data, stack_canary) != 40);
|
||||||
#endif
|
#endif
|
||||||
/*
|
/*
|
||||||
* We both use the random pool and the current TSC as a source
|
* We both use the random pool and the current TSC as a source
|
||||||
|
@ -79,7 +79,7 @@ static __always_inline void boot_init_stack_canary(void)
|
||||||
|
|
||||||
current->stack_canary = canary;
|
current->stack_canary = canary;
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
this_cpu_write(irq_stack_union.stack_canary, canary);
|
this_cpu_write(fixed_percpu_data.stack_canary, canary);
|
||||||
#else
|
#else
|
||||||
this_cpu_write(stack_canary.canary, canary);
|
this_cpu_write(stack_canary.canary, canary);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
|
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/ptrace.h>
|
#include <linux/ptrace.h>
|
||||||
|
|
||||||
|
#include <asm/cpu_entry_area.h>
|
||||||
#include <asm/switch_to.h>
|
#include <asm/switch_to.h>
|
||||||
|
|
||||||
enum stack_type {
|
enum stack_type {
|
||||||
|
|
|
@ -68,10 +68,12 @@ int main(void)
|
||||||
#undef ENTRY
|
#undef ENTRY
|
||||||
|
|
||||||
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
|
OFFSET(TSS_ist, tss_struct, x86_tss.ist);
|
||||||
|
DEFINE(DB_STACK_OFFSET, offsetof(struct cea_exception_stacks, DB_stack) -
|
||||||
|
offsetof(struct cea_exception_stacks, DB1_stack));
|
||||||
BLANK();
|
BLANK();
|
||||||
|
|
||||||
#ifdef CONFIG_STACKPROTECTOR
|
#ifdef CONFIG_STACKPROTECTOR
|
||||||
DEFINE(stack_canary_offset, offsetof(union irq_stack_union, stack_canary));
|
DEFINE(stack_canary_offset, offsetof(struct fixed_percpu_data, stack_canary));
|
||||||
BLANK();
|
BLANK();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -507,19 +507,6 @@ void load_percpu_segment(int cpu)
|
||||||
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
DEFINE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
/*
|
|
||||||
* Special IST stacks which the CPU switches to when it calls
|
|
||||||
* an IST-marked descriptor entry. Up to 7 stacks (hardware
|
|
||||||
* limit), all of them are 4K, except the debug stack which
|
|
||||||
* is 8K.
|
|
||||||
*/
|
|
||||||
static const unsigned int exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
|
||||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
|
||||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
|
||||||
};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Load the original GDT from the per-cpu structure */
|
/* Load the original GDT from the per-cpu structure */
|
||||||
void load_direct_gdt(int cpu)
|
void load_direct_gdt(int cpu)
|
||||||
{
|
{
|
||||||
|
@ -1511,9 +1498,9 @@ static __init int setup_clearcpuid(char *arg)
|
||||||
__setup("clearcpuid=", setup_clearcpuid);
|
__setup("clearcpuid=", setup_clearcpuid);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
DEFINE_PER_CPU_FIRST(union irq_stack_union,
|
DEFINE_PER_CPU_FIRST(struct fixed_percpu_data,
|
||||||
irq_stack_union) __aligned(PAGE_SIZE) __visible;
|
fixed_percpu_data) __aligned(PAGE_SIZE) __visible;
|
||||||
EXPORT_PER_CPU_SYMBOL_GPL(irq_stack_union);
|
EXPORT_PER_CPU_SYMBOL_GPL(fixed_percpu_data);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* The following percpu variables are hot. Align current_task to
|
* The following percpu variables are hot. Align current_task to
|
||||||
|
@ -1523,9 +1510,7 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
|
||||||
&init_task;
|
&init_task;
|
||||||
EXPORT_PER_CPU_SYMBOL(current_task);
|
EXPORT_PER_CPU_SYMBOL(current_task);
|
||||||
|
|
||||||
DEFINE_PER_CPU(char *, irq_stack_ptr) =
|
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
|
||||||
init_per_cpu_var(irq_stack_union.irq_stack) + IRQ_STACK_SIZE;
|
|
||||||
|
|
||||||
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
|
||||||
|
|
||||||
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
|
||||||
|
@ -1562,23 +1547,7 @@ void syscall_init(void)
|
||||||
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
|
X86_EFLAGS_IOPL|X86_EFLAGS_AC|X86_EFLAGS_NT);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Copies of the original ist values from the tss are only accessed during
|
|
||||||
* debugging, no special alignment required.
|
|
||||||
*/
|
|
||||||
DEFINE_PER_CPU(struct orig_ist, orig_ist);
|
|
||||||
|
|
||||||
static DEFINE_PER_CPU(unsigned long, debug_stack_addr);
|
|
||||||
DEFINE_PER_CPU(int, debug_stack_usage);
|
DEFINE_PER_CPU(int, debug_stack_usage);
|
||||||
|
|
||||||
int is_debug_stack(unsigned long addr)
|
|
||||||
{
|
|
||||||
return __this_cpu_read(debug_stack_usage) ||
|
|
||||||
(addr <= __this_cpu_read(debug_stack_addr) &&
|
|
||||||
addr > (__this_cpu_read(debug_stack_addr) - DEBUG_STKSZ));
|
|
||||||
}
|
|
||||||
NOKPROBE_SYMBOL(is_debug_stack);
|
|
||||||
|
|
||||||
DEFINE_PER_CPU(u32, debug_idt_ctr);
|
DEFINE_PER_CPU(u32, debug_idt_ctr);
|
||||||
|
|
||||||
void debug_stack_set_zero(void)
|
void debug_stack_set_zero(void)
|
||||||
|
@ -1690,17 +1659,14 @@ static void setup_getcpu(int cpu)
|
||||||
* initialized (naturally) in the bootstrap process, such as the GDT
|
* initialized (naturally) in the bootstrap process, such as the GDT
|
||||||
* and IDT. We reload them nevertheless, this function acts as a
|
* and IDT. We reload them nevertheless, this function acts as a
|
||||||
* 'CPU state barrier', nothing should get across.
|
* 'CPU state barrier', nothing should get across.
|
||||||
* A lot of state is already set up in PDA init for 64 bit
|
|
||||||
*/
|
*/
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
|
|
||||||
void cpu_init(void)
|
void cpu_init(void)
|
||||||
{
|
{
|
||||||
struct orig_ist *oist;
|
int cpu = raw_smp_processor_id();
|
||||||
struct task_struct *me;
|
struct task_struct *me;
|
||||||
struct tss_struct *t;
|
struct tss_struct *t;
|
||||||
unsigned long v;
|
|
||||||
int cpu = raw_smp_processor_id();
|
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
wait_for_master_cpu(cpu);
|
wait_for_master_cpu(cpu);
|
||||||
|
@ -1715,7 +1681,6 @@ void cpu_init(void)
|
||||||
load_ucode_ap();
|
load_ucode_ap();
|
||||||
|
|
||||||
t = &per_cpu(cpu_tss_rw, cpu);
|
t = &per_cpu(cpu_tss_rw, cpu);
|
||||||
oist = &per_cpu(orig_ist, cpu);
|
|
||||||
|
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
if (this_cpu_read(numa_node) == 0 &&
|
if (this_cpu_read(numa_node) == 0 &&
|
||||||
|
@ -1753,16 +1718,11 @@ void cpu_init(void)
|
||||||
/*
|
/*
|
||||||
* set up and load the per-CPU TSS
|
* set up and load the per-CPU TSS
|
||||||
*/
|
*/
|
||||||
if (!oist->ist[0]) {
|
if (!t->x86_tss.ist[0]) {
|
||||||
char *estacks = get_cpu_entry_area(cpu)->exception_stacks;
|
t->x86_tss.ist[IST_INDEX_DF] = __this_cpu_ist_top_va(DF);
|
||||||
|
t->x86_tss.ist[IST_INDEX_NMI] = __this_cpu_ist_top_va(NMI);
|
||||||
for (v = 0; v < N_EXCEPTION_STACKS; v++) {
|
t->x86_tss.ist[IST_INDEX_DB] = __this_cpu_ist_top_va(DB);
|
||||||
estacks += exception_stack_sizes[v];
|
t->x86_tss.ist[IST_INDEX_MCE] = __this_cpu_ist_top_va(MCE);
|
||||||
oist->ist[v] = t->x86_tss.ist[v] =
|
|
||||||
(unsigned long)estacks;
|
|
||||||
if (v == DEBUG_STACK-1)
|
|
||||||
per_cpu(debug_stack_addr, cpu) = (unsigned long)estacks;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
t->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
|
||||||
|
|
|
@ -34,14 +34,14 @@ const char *stack_type_name(enum stack_type type)
|
||||||
|
|
||||||
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
|
static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
|
||||||
{
|
{
|
||||||
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack);
|
unsigned long *begin = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
|
||||||
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
|
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||||
* It just means the stack is empty.
|
* It just means the stack is empty.
|
||||||
*/
|
*/
|
||||||
if (stack <= begin || stack > end)
|
if (stack < begin || stack > end)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
info->type = STACK_TYPE_IRQ;
|
info->type = STACK_TYPE_IRQ;
|
||||||
|
@ -59,14 +59,14 @@ static bool in_hardirq_stack(unsigned long *stack, struct stack_info *info)
|
||||||
|
|
||||||
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
|
static bool in_softirq_stack(unsigned long *stack, struct stack_info *info)
|
||||||
{
|
{
|
||||||
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack);
|
unsigned long *begin = (unsigned long *)this_cpu_read(softirq_stack_ptr);
|
||||||
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
|
unsigned long *end = begin + (THREAD_SIZE / sizeof(long));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||||
* It just means the stack is empty.
|
* It just means the stack is empty.
|
||||||
*/
|
*/
|
||||||
if (stack <= begin || stack > end)
|
if (stack < begin || stack > end)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
info->type = STACK_TYPE_SOFTIRQ;
|
info->type = STACK_TYPE_SOFTIRQ;
|
||||||
|
|
|
@ -16,23 +16,21 @@
|
||||||
#include <linux/bug.h>
|
#include <linux/bug.h>
|
||||||
#include <linux/nmi.h>
|
#include <linux/nmi.h>
|
||||||
|
|
||||||
|
#include <asm/cpu_entry_area.h>
|
||||||
#include <asm/stacktrace.h>
|
#include <asm/stacktrace.h>
|
||||||
|
|
||||||
static char *exception_stack_names[N_EXCEPTION_STACKS] = {
|
static const char * const exception_stack_names[] = {
|
||||||
[ DOUBLEFAULT_STACK-1 ] = "#DF",
|
[ ESTACK_DF ] = "#DF",
|
||||||
[ NMI_STACK-1 ] = "NMI",
|
[ ESTACK_NMI ] = "NMI",
|
||||||
[ DEBUG_STACK-1 ] = "#DB",
|
[ ESTACK_DB2 ] = "#DB2",
|
||||||
[ MCE_STACK-1 ] = "#MC",
|
[ ESTACK_DB1 ] = "#DB1",
|
||||||
};
|
[ ESTACK_DB ] = "#DB",
|
||||||
|
[ ESTACK_MCE ] = "#MC",
|
||||||
static unsigned long exception_stack_sizes[N_EXCEPTION_STACKS] = {
|
|
||||||
[0 ... N_EXCEPTION_STACKS - 1] = EXCEPTION_STKSZ,
|
|
||||||
[DEBUG_STACK - 1] = DEBUG_STKSZ
|
|
||||||
};
|
};
|
||||||
|
|
||||||
const char *stack_type_name(enum stack_type type)
|
const char *stack_type_name(enum stack_type type)
|
||||||
{
|
{
|
||||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
|
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
|
||||||
|
|
||||||
if (type == STACK_TYPE_IRQ)
|
if (type == STACK_TYPE_IRQ)
|
||||||
return "IRQ";
|
return "IRQ";
|
||||||
|
@ -52,43 +50,84 @@ const char *stack_type_name(enum stack_type type)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* struct estack_pages - Page descriptor for exception stacks
|
||||||
|
* @offs: Offset from the start of the exception stack area
|
||||||
|
* @size: Size of the exception stack
|
||||||
|
* @type: Type to store in the stack_info struct
|
||||||
|
*/
|
||||||
|
struct estack_pages {
|
||||||
|
u32 offs;
|
||||||
|
u16 size;
|
||||||
|
u16 type;
|
||||||
|
};
|
||||||
|
|
||||||
|
#define EPAGERANGE(st) \
|
||||||
|
[PFN_DOWN(CEA_ESTACK_OFFS(st)) ... \
|
||||||
|
PFN_DOWN(CEA_ESTACK_OFFS(st) + CEA_ESTACK_SIZE(st) - 1)] = { \
|
||||||
|
.offs = CEA_ESTACK_OFFS(st), \
|
||||||
|
.size = CEA_ESTACK_SIZE(st), \
|
||||||
|
.type = STACK_TYPE_EXCEPTION + ESTACK_ ##st, }
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Array of exception stack page descriptors. If the stack is larger than
|
||||||
|
* PAGE_SIZE, all pages covering a particular stack will have the same
|
||||||
|
* info. The guard pages including the not mapped DB2 stack are zeroed
|
||||||
|
* out.
|
||||||
|
*/
|
||||||
|
static const
|
||||||
|
struct estack_pages estack_pages[CEA_ESTACK_PAGES] ____cacheline_aligned = {
|
||||||
|
EPAGERANGE(DF),
|
||||||
|
EPAGERANGE(NMI),
|
||||||
|
EPAGERANGE(DB1),
|
||||||
|
EPAGERANGE(DB),
|
||||||
|
EPAGERANGE(MCE),
|
||||||
|
};
|
||||||
|
|
||||||
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
|
static bool in_exception_stack(unsigned long *stack, struct stack_info *info)
|
||||||
{
|
{
|
||||||
unsigned long *begin, *end;
|
unsigned long begin, end, stk = (unsigned long)stack;
|
||||||
|
const struct estack_pages *ep;
|
||||||
struct pt_regs *regs;
|
struct pt_regs *regs;
|
||||||
unsigned k;
|
unsigned int k;
|
||||||
|
|
||||||
BUILD_BUG_ON(N_EXCEPTION_STACKS != 4);
|
BUILD_BUG_ON(N_EXCEPTION_STACKS != 6);
|
||||||
|
|
||||||
for (k = 0; k < N_EXCEPTION_STACKS; k++) {
|
begin = (unsigned long)__this_cpu_read(cea_exception_stacks);
|
||||||
end = (unsigned long *)raw_cpu_ptr(&orig_ist)->ist[k];
|
end = begin + sizeof(struct cea_exception_stacks);
|
||||||
begin = end - (exception_stack_sizes[k] / sizeof(long));
|
/* Bail if @stack is outside the exception stack area. */
|
||||||
regs = (struct pt_regs *)end - 1;
|
if (stk < begin || stk >= end)
|
||||||
|
return false;
|
||||||
|
|
||||||
if (stack <= begin || stack >= end)
|
/* Calc page offset from start of exception stacks */
|
||||||
continue;
|
k = (stk - begin) >> PAGE_SHIFT;
|
||||||
|
/* Lookup the page descriptor */
|
||||||
|
ep = &estack_pages[k];
|
||||||
|
/* Guard page? */
|
||||||
|
if (!ep->size)
|
||||||
|
return false;
|
||||||
|
|
||||||
info->type = STACK_TYPE_EXCEPTION + k;
|
begin += (unsigned long)ep->offs;
|
||||||
info->begin = begin;
|
end = begin + (unsigned long)ep->size;
|
||||||
info->end = end;
|
regs = (struct pt_regs *)end - 1;
|
||||||
info->next_sp = (unsigned long *)regs->sp;
|
|
||||||
|
|
||||||
return true;
|
info->type = ep->type;
|
||||||
}
|
info->begin = (unsigned long *)begin;
|
||||||
|
info->end = (unsigned long *)end;
|
||||||
return false;
|
info->next_sp = (unsigned long *)regs->sp;
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
|
static bool in_irq_stack(unsigned long *stack, struct stack_info *info)
|
||||||
{
|
{
|
||||||
unsigned long *end = (unsigned long *)this_cpu_read(irq_stack_ptr);
|
unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
|
||||||
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
|
unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* This is a software stack, so 'end' can be a valid stack pointer.
|
* This is a software stack, so 'end' can be a valid stack pointer.
|
||||||
* It just means the stack is empty.
|
* It just means the stack is empty.
|
||||||
*/
|
*/
|
||||||
if (stack <= begin || stack > end)
|
if (stack < begin || stack >= end)
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
info->type = STACK_TYPE_IRQ;
|
info->type = STACK_TYPE_IRQ;
|
||||||
|
|
|
@ -265,7 +265,7 @@ ENDPROC(start_cpu0)
|
||||||
GLOBAL(initial_code)
|
GLOBAL(initial_code)
|
||||||
.quad x86_64_start_kernel
|
.quad x86_64_start_kernel
|
||||||
GLOBAL(initial_gs)
|
GLOBAL(initial_gs)
|
||||||
.quad INIT_PER_CPU_VAR(irq_stack_union)
|
.quad INIT_PER_CPU_VAR(fixed_percpu_data)
|
||||||
GLOBAL(initial_stack)
|
GLOBAL(initial_stack)
|
||||||
/*
|
/*
|
||||||
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
|
* The SIZEOF_PTREGS gap is a convention which helps the in-kernel
|
||||||
|
|
|
@ -41,13 +41,12 @@ struct idt_data {
|
||||||
#define SYSG(_vector, _addr) \
|
#define SYSG(_vector, _addr) \
|
||||||
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
|
G(_vector, _addr, DEFAULT_STACK, GATE_INTERRUPT, DPL3, __KERNEL_CS)
|
||||||
|
|
||||||
/* Interrupt gate with interrupt stack */
|
/*
|
||||||
|
* Interrupt gate with interrupt stack. The _ist index is the index in
|
||||||
|
* the tss.ist[] array, but for the descriptor it needs to start at 1.
|
||||||
|
*/
|
||||||
#define ISTG(_vector, _addr, _ist) \
|
#define ISTG(_vector, _addr, _ist) \
|
||||||
G(_vector, _addr, _ist, GATE_INTERRUPT, DPL0, __KERNEL_CS)
|
G(_vector, _addr, _ist + 1, GATE_INTERRUPT, DPL0, __KERNEL_CS)
|
||||||
|
|
||||||
/* System interrupt gate with interrupt stack */
|
|
||||||
#define SISTG(_vector, _addr, _ist) \
|
|
||||||
G(_vector, _addr, _ist, GATE_INTERRUPT, DPL3, __KERNEL_CS)
|
|
||||||
|
|
||||||
/* Task gate */
|
/* Task gate */
|
||||||
#define TSKG(_vector, _gdt) \
|
#define TSKG(_vector, _gdt) \
|
||||||
|
@ -184,11 +183,11 @@ gate_desc debug_idt_table[IDT_ENTRIES] __page_aligned_bss;
|
||||||
* cpu_init() when the TSS has been initialized.
|
* cpu_init() when the TSS has been initialized.
|
||||||
*/
|
*/
|
||||||
static const __initconst struct idt_data ist_idts[] = {
|
static const __initconst struct idt_data ist_idts[] = {
|
||||||
ISTG(X86_TRAP_DB, debug, DEBUG_STACK),
|
ISTG(X86_TRAP_DB, debug, IST_INDEX_DB),
|
||||||
ISTG(X86_TRAP_NMI, nmi, NMI_STACK),
|
ISTG(X86_TRAP_NMI, nmi, IST_INDEX_NMI),
|
||||||
ISTG(X86_TRAP_DF, double_fault, DOUBLEFAULT_STACK),
|
ISTG(X86_TRAP_DF, double_fault, IST_INDEX_DF),
|
||||||
#ifdef CONFIG_X86_MCE
|
#ifdef CONFIG_X86_MCE
|
||||||
ISTG(X86_TRAP_MC, &machine_check, MCE_STACK),
|
ISTG(X86_TRAP_MC, &machine_check, IST_INDEX_MCE),
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -51,8 +51,8 @@ static inline int check_stack_overflow(void) { return 0; }
|
||||||
static inline void print_stack_overflow(void) { }
|
static inline void print_stack_overflow(void) { }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack);
|
DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
|
||||||
DEFINE_PER_CPU(struct irq_stack *, softirq_stack);
|
DEFINE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
|
||||||
|
|
||||||
static void call_on_stack(void *func, void *stack)
|
static void call_on_stack(void *func, void *stack)
|
||||||
{
|
{
|
||||||
|
@ -76,7 +76,7 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
|
||||||
u32 *isp, *prev_esp, arg1;
|
u32 *isp, *prev_esp, arg1;
|
||||||
|
|
||||||
curstk = (struct irq_stack *) current_stack();
|
curstk = (struct irq_stack *) current_stack();
|
||||||
irqstk = __this_cpu_read(hardirq_stack);
|
irqstk = __this_cpu_read(hardirq_stack_ptr);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* this is where we switch to the IRQ stack. However, if we are
|
* this is where we switch to the IRQ stack. However, if we are
|
||||||
|
@ -107,27 +107,28 @@ static inline int execute_on_irq_stack(int overflow, struct irq_desc *desc)
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* allocate per-cpu stacks for hardirq and for softirq processing
|
* Allocate per-cpu stacks for hardirq and softirq processing
|
||||||
*/
|
*/
|
||||||
void irq_ctx_init(int cpu)
|
int irq_init_percpu_irqstack(unsigned int cpu)
|
||||||
{
|
{
|
||||||
struct irq_stack *irqstk;
|
int node = cpu_to_node(cpu);
|
||||||
|
struct page *ph, *ps;
|
||||||
|
|
||||||
if (per_cpu(hardirq_stack, cpu))
|
if (per_cpu(hardirq_stack_ptr, cpu))
|
||||||
return;
|
return 0;
|
||||||
|
|
||||||
irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
|
ph = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
|
||||||
THREADINFO_GFP,
|
if (!ph)
|
||||||
THREAD_SIZE_ORDER));
|
return -ENOMEM;
|
||||||
per_cpu(hardirq_stack, cpu) = irqstk;
|
ps = alloc_pages_node(node, THREADINFO_GFP, THREAD_SIZE_ORDER);
|
||||||
|
if (!ps) {
|
||||||
|
__free_pages(ph, THREAD_SIZE_ORDER);
|
||||||
|
return -ENOMEM;
|
||||||
|
}
|
||||||
|
|
||||||
irqstk = page_address(alloc_pages_node(cpu_to_node(cpu),
|
per_cpu(hardirq_stack_ptr, cpu) = page_address(ph);
|
||||||
THREADINFO_GFP,
|
per_cpu(softirq_stack_ptr, cpu) = page_address(ps);
|
||||||
THREAD_SIZE_ORDER));
|
return 0;
|
||||||
per_cpu(softirq_stack, cpu) = irqstk;
|
|
||||||
|
|
||||||
printk(KERN_DEBUG "CPU %u irqstacks, hard=%p soft=%p\n",
|
|
||||||
cpu, per_cpu(hardirq_stack, cpu), per_cpu(softirq_stack, cpu));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void do_softirq_own_stack(void)
|
void do_softirq_own_stack(void)
|
||||||
|
@ -135,7 +136,7 @@ void do_softirq_own_stack(void)
|
||||||
struct irq_stack *irqstk;
|
struct irq_stack *irqstk;
|
||||||
u32 *isp, *prev_esp;
|
u32 *isp, *prev_esp;
|
||||||
|
|
||||||
irqstk = __this_cpu_read(softirq_stack);
|
irqstk = __this_cpu_read(softirq_stack_ptr);
|
||||||
|
|
||||||
/* build the stack frame on the softirq stack */
|
/* build the stack frame on the softirq stack */
|
||||||
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
|
isp = (u32 *) ((char *)irqstk + sizeof(*irqstk));
|
||||||
|
|
|
@ -18,63 +18,64 @@
|
||||||
#include <linux/uaccess.h>
|
#include <linux/uaccess.h>
|
||||||
#include <linux/smp.h>
|
#include <linux/smp.h>
|
||||||
#include <linux/sched/task_stack.h>
|
#include <linux/sched/task_stack.h>
|
||||||
|
|
||||||
|
#include <asm/cpu_entry_area.h>
|
||||||
#include <asm/io_apic.h>
|
#include <asm/io_apic.h>
|
||||||
#include <asm/apic.h>
|
#include <asm/apic.h>
|
||||||
|
|
||||||
int sysctl_panic_on_stackoverflow;
|
DEFINE_PER_CPU_PAGE_ALIGNED(struct irq_stack, irq_stack_backing_store) __visible;
|
||||||
|
DECLARE_INIT_PER_CPU(irq_stack_backing_store);
|
||||||
/*
|
|
||||||
* Probabilistic stack overflow check:
|
|
||||||
*
|
|
||||||
* Only check the stack in process context, because everything else
|
|
||||||
* runs on the big interrupt stacks. Checking reliably is too expensive,
|
|
||||||
* so we just check from interrupts.
|
|
||||||
*/
|
|
||||||
static inline void stack_overflow_check(struct pt_regs *regs)
|
|
||||||
{
|
|
||||||
#ifdef CONFIG_DEBUG_STACKOVERFLOW
|
|
||||||
#define STACK_TOP_MARGIN 128
|
|
||||||
struct orig_ist *oist;
|
|
||||||
u64 irq_stack_top, irq_stack_bottom;
|
|
||||||
u64 estack_top, estack_bottom;
|
|
||||||
u64 curbase = (u64)task_stack_page(current);
|
|
||||||
|
|
||||||
if (user_mode(regs))
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (regs->sp >= curbase + sizeof(struct pt_regs) + STACK_TOP_MARGIN &&
|
|
||||||
regs->sp <= curbase + THREAD_SIZE)
|
|
||||||
return;
|
|
||||||
|
|
||||||
irq_stack_top = (u64)this_cpu_ptr(irq_stack_union.irq_stack) +
|
|
||||||
STACK_TOP_MARGIN;
|
|
||||||
irq_stack_bottom = (u64)__this_cpu_read(irq_stack_ptr);
|
|
||||||
if (regs->sp >= irq_stack_top && regs->sp <= irq_stack_bottom)
|
|
||||||
return;
|
|
||||||
|
|
||||||
oist = this_cpu_ptr(&orig_ist);
|
|
||||||
estack_top = (u64)oist->ist[0] - EXCEPTION_STKSZ + STACK_TOP_MARGIN;
|
|
||||||
estack_bottom = (u64)oist->ist[N_EXCEPTION_STACKS - 1];
|
|
||||||
if (regs->sp >= estack_top && regs->sp <= estack_bottom)
|
|
||||||
return;
|
|
||||||
|
|
||||||
WARN_ONCE(1, "do_IRQ(): %s has overflown the kernel stack (cur:%Lx,sp:%lx,irq stk top-bottom:%Lx-%Lx,exception stk top-bottom:%Lx-%Lx,ip:%pF)\n",
|
|
||||||
current->comm, curbase, regs->sp,
|
|
||||||
irq_stack_top, irq_stack_bottom,
|
|
||||||
estack_top, estack_bottom, (void *)regs->ip);
|
|
||||||
|
|
||||||
if (sysctl_panic_on_stackoverflow)
|
|
||||||
panic("low stack detected by irq handler - check messages\n");
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
|
bool handle_irq(struct irq_desc *desc, struct pt_regs *regs)
|
||||||
{
|
{
|
||||||
stack_overflow_check(regs);
|
|
||||||
|
|
||||||
if (IS_ERR_OR_NULL(desc))
|
if (IS_ERR_OR_NULL(desc))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
generic_handle_irq_desc(desc);
|
generic_handle_irq_desc(desc);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef CONFIG_VMAP_STACK
|
||||||
|
/*
|
||||||
|
* VMAP the backing store with guard pages
|
||||||
|
*/
|
||||||
|
static int map_irq_stack(unsigned int cpu)
|
||||||
|
{
|
||||||
|
char *stack = (char *)per_cpu_ptr(&irq_stack_backing_store, cpu);
|
||||||
|
struct page *pages[IRQ_STACK_SIZE / PAGE_SIZE];
|
||||||
|
void *va;
|
||||||
|
int i;
|
||||||
|
|
||||||
|
for (i = 0; i < IRQ_STACK_SIZE / PAGE_SIZE; i++) {
|
||||||
|
phys_addr_t pa = per_cpu_ptr_to_phys(stack + (i << PAGE_SHIFT));
|
||||||
|
|
||||||
|
pages[i] = pfn_to_page(pa >> PAGE_SHIFT);
|
||||||
|
}
|
||||||
|
|
||||||
|
va = vmap(pages, IRQ_STACK_SIZE / PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL);
|
||||||
|
if (!va)
|
||||||
|
return -ENOMEM;
|
||||||
|
|
||||||
|
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
/*
|
||||||
|
* If VMAP stacks are disabled due to KASAN, just use the per cpu
|
||||||
|
* backing store without guard pages.
|
||||||
|
*/
|
||||||
|
static int map_irq_stack(unsigned int cpu)
|
||||||
|
{
|
||||||
|
void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
|
||||||
|
|
||||||
|
per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
int irq_init_percpu_irqstack(unsigned int cpu)
|
||||||
|
{
|
||||||
|
if (per_cpu(hardirq_stack_ptr, cpu))
|
||||||
|
return 0;
|
||||||
|
return map_irq_stack(cpu);
|
||||||
|
}
|
||||||
|
|
|
@ -91,6 +91,8 @@ void __init init_IRQ(void)
|
||||||
for (i = 0; i < nr_legacy_irqs(); i++)
|
for (i = 0; i < nr_legacy_irqs(); i++)
|
||||||
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
|
per_cpu(vector_irq, 0)[ISA_IRQ_VECTOR(i)] = irq_to_desc(i);
|
||||||
|
|
||||||
|
BUG_ON(irq_init_percpu_irqstack(smp_processor_id()));
|
||||||
|
|
||||||
x86_init.irqs.intr_init();
|
x86_init.irqs.intr_init();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -104,6 +106,4 @@ void __init native_init_IRQ(void)
|
||||||
|
|
||||||
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
|
if (!acpi_ioapic && !of_ioapic && nr_legacy_irqs())
|
||||||
setup_irq(2, &irq2);
|
setup_irq(2, &irq2);
|
||||||
|
|
||||||
irq_ctx_init(smp_processor_id());
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,13 +21,14 @@
|
||||||
#include <linux/ratelimit.h>
|
#include <linux/ratelimit.h>
|
||||||
#include <linux/slab.h>
|
#include <linux/slab.h>
|
||||||
#include <linux/export.h>
|
#include <linux/export.h>
|
||||||
|
#include <linux/atomic.h>
|
||||||
#include <linux/sched/clock.h>
|
#include <linux/sched/clock.h>
|
||||||
|
|
||||||
#if defined(CONFIG_EDAC)
|
#if defined(CONFIG_EDAC)
|
||||||
#include <linux/edac.h>
|
#include <linux/edac.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <linux/atomic.h>
|
#include <asm/cpu_entry_area.h>
|
||||||
#include <asm/traps.h>
|
#include <asm/traps.h>
|
||||||
#include <asm/mach_traps.h>
|
#include <asm/mach_traps.h>
|
||||||
#include <asm/nmi.h>
|
#include <asm/nmi.h>
|
||||||
|
@ -487,6 +488,23 @@ static DEFINE_PER_CPU(unsigned long, nmi_cr2);
|
||||||
* switch back to the original IDT.
|
* switch back to the original IDT.
|
||||||
*/
|
*/
|
||||||
static DEFINE_PER_CPU(int, update_debug_stack);
|
static DEFINE_PER_CPU(int, update_debug_stack);
|
||||||
|
|
||||||
|
static bool notrace is_debug_stack(unsigned long addr)
|
||||||
|
{
|
||||||
|
struct cea_exception_stacks *cs = __this_cpu_read(cea_exception_stacks);
|
||||||
|
unsigned long top = CEA_ESTACK_TOP(cs, DB);
|
||||||
|
unsigned long bot = CEA_ESTACK_BOT(cs, DB1);
|
||||||
|
|
||||||
|
if (__this_cpu_read(debug_stack_usage))
|
||||||
|
return true;
|
||||||
|
/*
|
||||||
|
* Note, this covers the guard page between DB and DB1 as well to
|
||||||
|
* avoid two checks. But by all means @addr can never point into
|
||||||
|
* the guard page.
|
||||||
|
*/
|
||||||
|
return addr >= bot && addr < top;
|
||||||
|
}
|
||||||
|
NOKPROBE_SYMBOL(is_debug_stack);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
dotraplinkage notrace void
|
dotraplinkage notrace void
|
||||||
|
|
|
@ -244,11 +244,6 @@ void __init setup_per_cpu_areas(void)
|
||||||
per_cpu(x86_cpu_to_logical_apicid, cpu) =
|
per_cpu(x86_cpu_to_logical_apicid, cpu) =
|
||||||
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
|
early_per_cpu_map(x86_cpu_to_logical_apicid, cpu);
|
||||||
#endif
|
#endif
|
||||||
#ifdef CONFIG_X86_64
|
|
||||||
per_cpu(irq_stack_ptr, cpu) =
|
|
||||||
per_cpu(irq_stack_union.irq_stack, cpu) +
|
|
||||||
IRQ_STACK_SIZE;
|
|
||||||
#endif
|
|
||||||
#ifdef CONFIG_NUMA
|
#ifdef CONFIG_NUMA
|
||||||
per_cpu(x86_cpu_to_node_map, cpu) =
|
per_cpu(x86_cpu_to_node_map, cpu) =
|
||||||
early_per_cpu_map(x86_cpu_to_node_map, cpu);
|
early_per_cpu_map(x86_cpu_to_node_map, cpu);
|
||||||
|
|
|
@ -935,20 +935,27 @@ out:
|
||||||
return boot_error;
|
return boot_error;
|
||||||
}
|
}
|
||||||
|
|
||||||
void common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
int common_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||||
{
|
{
|
||||||
|
int ret;
|
||||||
|
|
||||||
/* Just in case we booted with a single CPU. */
|
/* Just in case we booted with a single CPU. */
|
||||||
alternatives_enable_smp();
|
alternatives_enable_smp();
|
||||||
|
|
||||||
per_cpu(current_task, cpu) = idle;
|
per_cpu(current_task, cpu) = idle;
|
||||||
|
|
||||||
|
/* Initialize the interrupt stack(s) */
|
||||||
|
ret = irq_init_percpu_irqstack(cpu);
|
||||||
|
if (ret)
|
||||||
|
return ret;
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
/* Stack for startup_32 can be just as for start_secondary onwards */
|
/* Stack for startup_32 can be just as for start_secondary onwards */
|
||||||
irq_ctx_init(cpu);
|
|
||||||
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
|
per_cpu(cpu_current_top_of_stack, cpu) = task_top_of_stack(idle);
|
||||||
#else
|
#else
|
||||||
initial_gs = per_cpu_offset(cpu);
|
initial_gs = per_cpu_offset(cpu);
|
||||||
#endif
|
#endif
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
|
@ -1106,7 +1113,9 @@ int native_cpu_up(unsigned int cpu, struct task_struct *tidle)
|
||||||
/* the FPU context is blank, nobody can own it */
|
/* the FPU context is blank, nobody can own it */
|
||||||
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
|
per_cpu(fpu_fpregs_owner_ctx, cpu) = NULL;
|
||||||
|
|
||||||
common_cpu_up(cpu, tidle);
|
err = common_cpu_up(cpu, tidle);
|
||||||
|
if (err)
|
||||||
|
return err;
|
||||||
|
|
||||||
err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
|
err = do_boot_cpu(apicid, cpu, tidle, &cpu0_nmi_registered);
|
||||||
if (err) {
|
if (err) {
|
||||||
|
|
|
@ -403,7 +403,8 @@ SECTIONS
|
||||||
*/
|
*/
|
||||||
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
|
#define INIT_PER_CPU(x) init_per_cpu__##x = ABSOLUTE(x) + __per_cpu_load
|
||||||
INIT_PER_CPU(gdt_page);
|
INIT_PER_CPU(gdt_page);
|
||||||
INIT_PER_CPU(irq_stack_union);
|
INIT_PER_CPU(fixed_percpu_data);
|
||||||
|
INIT_PER_CPU(irq_stack_backing_store);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Build-time check on the image size:
|
* Build-time check on the image size:
|
||||||
|
@ -412,8 +413,8 @@ INIT_PER_CPU(irq_stack_union);
|
||||||
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
"kernel image bigger than KERNEL_IMAGE_SIZE");
|
||||||
|
|
||||||
#ifdef CONFIG_SMP
|
#ifdef CONFIG_SMP
|
||||||
. = ASSERT((irq_stack_union == 0),
|
. = ASSERT((fixed_percpu_data == 0),
|
||||||
"irq_stack_union is not at start of per-cpu area");
|
"fixed_percpu_data is not at start of per-cpu area");
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif /* CONFIG_X86_32 */
|
#endif /* CONFIG_X86_32 */
|
||||||
|
|
|
@ -13,8 +13,8 @@
|
||||||
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
|
static DEFINE_PER_CPU_PAGE_ALIGNED(struct entry_stack_page, entry_stack_storage);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
|
static DEFINE_PER_CPU_PAGE_ALIGNED(struct exception_stacks, exception_stacks);
|
||||||
[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
|
DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
struct cpu_entry_area *get_cpu_entry_area(int cpu)
|
||||||
|
@ -52,10 +52,10 @@ cea_map_percpu_pages(void *cea_vaddr, void *ptr, int pages, pgprot_t prot)
|
||||||
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
|
cea_set_pte(cea_vaddr, per_cpu_ptr_to_phys(ptr), prot);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void __init percpu_setup_debug_store(int cpu)
|
static void __init percpu_setup_debug_store(unsigned int cpu)
|
||||||
{
|
{
|
||||||
#ifdef CONFIG_CPU_SUP_INTEL
|
#ifdef CONFIG_CPU_SUP_INTEL
|
||||||
int npages;
|
unsigned int npages;
|
||||||
void *cea;
|
void *cea;
|
||||||
|
|
||||||
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL)
|
||||||
|
@ -78,9 +78,43 @@ static void __init percpu_setup_debug_store(int cpu)
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Setup the fixmap mappings only once per-processor */
|
#ifdef CONFIG_X86_64
|
||||||
static void __init setup_cpu_entry_area(int cpu)
|
|
||||||
|
#define cea_map_stack(name) do { \
|
||||||
|
npages = sizeof(estacks->name## _stack) / PAGE_SIZE; \
|
||||||
|
cea_map_percpu_pages(cea->estacks.name## _stack, \
|
||||||
|
estacks->name## _stack, npages, PAGE_KERNEL); \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static void __init percpu_setup_exception_stacks(unsigned int cpu)
|
||||||
{
|
{
|
||||||
|
struct exception_stacks *estacks = per_cpu_ptr(&exception_stacks, cpu);
|
||||||
|
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
|
||||||
|
unsigned int npages;
|
||||||
|
|
||||||
|
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
||||||
|
|
||||||
|
per_cpu(cea_exception_stacks, cpu) = &cea->estacks;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The exceptions stack mappings in the per cpu area are protected
|
||||||
|
* by guard pages so each stack must be mapped separately. DB2 is
|
||||||
|
* not mapped; it just exists to catch triple nesting of #DB.
|
||||||
|
*/
|
||||||
|
cea_map_stack(DF);
|
||||||
|
cea_map_stack(NMI);
|
||||||
|
cea_map_stack(DB1);
|
||||||
|
cea_map_stack(DB);
|
||||||
|
cea_map_stack(MCE);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
static inline void percpu_setup_exception_stacks(unsigned int cpu) {}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/* Setup the fixmap mappings only once per-processor */
|
||||||
|
static void __init setup_cpu_entry_area(unsigned int cpu)
|
||||||
|
{
|
||||||
|
struct cpu_entry_area *cea = get_cpu_entry_area(cpu);
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
|
/* On 64-bit systems, we use a read-only fixmap GDT and TSS. */
|
||||||
pgprot_t gdt_prot = PAGE_KERNEL_RO;
|
pgprot_t gdt_prot = PAGE_KERNEL_RO;
|
||||||
|
@ -101,10 +135,9 @@ static void __init setup_cpu_entry_area(int cpu)
|
||||||
pgprot_t tss_prot = PAGE_KERNEL;
|
pgprot_t tss_prot = PAGE_KERNEL;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
cea_set_pte(&get_cpu_entry_area(cpu)->gdt, get_cpu_gdt_paddr(cpu),
|
cea_set_pte(&cea->gdt, get_cpu_gdt_paddr(cpu), gdt_prot);
|
||||||
gdt_prot);
|
|
||||||
|
|
||||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->entry_stack_page,
|
cea_map_percpu_pages(&cea->entry_stack_page,
|
||||||
per_cpu_ptr(&entry_stack_storage, cpu), 1,
|
per_cpu_ptr(&entry_stack_storage, cpu), 1,
|
||||||
PAGE_KERNEL);
|
PAGE_KERNEL);
|
||||||
|
|
||||||
|
@ -128,22 +161,15 @@ static void __init setup_cpu_entry_area(int cpu)
|
||||||
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
|
BUILD_BUG_ON((offsetof(struct tss_struct, x86_tss) ^
|
||||||
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
|
offsetofend(struct tss_struct, x86_tss)) & PAGE_MASK);
|
||||||
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
|
BUILD_BUG_ON(sizeof(struct tss_struct) % PAGE_SIZE != 0);
|
||||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->tss,
|
cea_map_percpu_pages(&cea->tss, &per_cpu(cpu_tss_rw, cpu),
|
||||||
&per_cpu(cpu_tss_rw, cpu),
|
|
||||||
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
|
sizeof(struct tss_struct) / PAGE_SIZE, tss_prot);
|
||||||
|
|
||||||
#ifdef CONFIG_X86_32
|
#ifdef CONFIG_X86_32
|
||||||
per_cpu(cpu_entry_area, cpu) = get_cpu_entry_area(cpu);
|
per_cpu(cpu_entry_area, cpu) = cea;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef CONFIG_X86_64
|
percpu_setup_exception_stacks(cpu);
|
||||||
BUILD_BUG_ON(sizeof(exception_stacks) % PAGE_SIZE != 0);
|
|
||||||
BUILD_BUG_ON(sizeof(exception_stacks) !=
|
|
||||||
sizeof(((struct cpu_entry_area *)0)->exception_stacks));
|
|
||||||
cea_map_percpu_pages(&get_cpu_entry_area(cpu)->exception_stacks,
|
|
||||||
&per_cpu(exception_stacks, cpu),
|
|
||||||
sizeof(exception_stacks) / PAGE_SIZE, PAGE_KERNEL);
|
|
||||||
#endif
|
|
||||||
percpu_setup_debug_store(cpu);
|
percpu_setup_debug_store(cpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -28,6 +28,7 @@
|
||||||
#include <asm/mmu_context.h> /* vma_pkey() */
|
#include <asm/mmu_context.h> /* vma_pkey() */
|
||||||
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
|
#include <asm/efi.h> /* efi_recover_from_page_fault()*/
|
||||||
#include <asm/desc.h> /* store_idt(), ... */
|
#include <asm/desc.h> /* store_idt(), ... */
|
||||||
|
#include <asm/cpu_entry_area.h> /* exception stack */
|
||||||
|
|
||||||
#define CREATE_TRACE_POINTS
|
#define CREATE_TRACE_POINTS
|
||||||
#include <asm/trace/exceptions.h>
|
#include <asm/trace/exceptions.h>
|
||||||
|
@ -793,7 +794,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
|
||||||
if (is_vmalloc_addr((void *)address) &&
|
if (is_vmalloc_addr((void *)address) &&
|
||||||
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
|
(((unsigned long)tsk->stack - 1 - address < PAGE_SIZE) ||
|
||||||
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
address - ((unsigned long)tsk->stack + THREAD_SIZE) < PAGE_SIZE)) {
|
||||||
unsigned long stack = this_cpu_read(orig_ist.ist[DOUBLEFAULT_STACK]) - sizeof(void *);
|
unsigned long stack = __this_cpu_ist_top_va(DF) - sizeof(void *);
|
||||||
/*
|
/*
|
||||||
* We're likely to be running with very little stack space
|
* We're likely to be running with very little stack space
|
||||||
* left. It's plausible that we'd hit this condition but
|
* left. It's plausible that we'd hit this condition but
|
||||||
|
|
|
@ -754,7 +754,7 @@ static void percpu_init(void)
|
||||||
* __per_cpu_load
|
* __per_cpu_load
|
||||||
*
|
*
|
||||||
* The "gold" linker incorrectly associates:
|
* The "gold" linker incorrectly associates:
|
||||||
* init_per_cpu__irq_stack_union
|
* init_per_cpu__fixed_percpu_data
|
||||||
* init_per_cpu__gdt_page
|
* init_per_cpu__gdt_page
|
||||||
*/
|
*/
|
||||||
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
|
static int is_percpu_sym(ElfW(Sym) *sym, const char *symname)
|
||||||
|
|
|
@ -361,7 +361,9 @@ static int xen_pv_cpu_up(unsigned int cpu, struct task_struct *idle)
|
||||||
{
|
{
|
||||||
int rc;
|
int rc;
|
||||||
|
|
||||||
common_cpu_up(cpu, idle);
|
rc = common_cpu_up(cpu, idle);
|
||||||
|
if (rc)
|
||||||
|
return rc;
|
||||||
|
|
||||||
xen_setup_runstate_info(cpu);
|
xen_setup_runstate_info(cpu);
|
||||||
|
|
||||||
|
|
|
@ -40,13 +40,13 @@ ENTRY(startup_xen)
|
||||||
#ifdef CONFIG_X86_64
|
#ifdef CONFIG_X86_64
|
||||||
/* Set up %gs.
|
/* Set up %gs.
|
||||||
*
|
*
|
||||||
* The base of %gs always points to the bottom of the irqstack
|
* The base of %gs always points to fixed_percpu_data. If the
|
||||||
* union. If the stack protector canary is enabled, it is
|
* stack protector canary is enabled, it is located at %gs:40.
|
||||||
* located at %gs:40. Note that, on SMP, the boot cpu uses
|
* Note that, on SMP, the boot cpu uses init data section until
|
||||||
* init data section till per cpu areas are set up.
|
* the per cpu areas are set up.
|
||||||
*/
|
*/
|
||||||
movl $MSR_GS_BASE,%ecx
|
movl $MSR_GS_BASE,%ecx
|
||||||
movq $INIT_PER_CPU_VAR(irq_stack_union),%rax
|
movq $INIT_PER_CPU_VAR(fixed_percpu_data),%rax
|
||||||
cdq
|
cdq
|
||||||
wrmsr
|
wrmsr
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -1687,7 +1687,6 @@ void __init xen_init_IRQ(void)
|
||||||
|
|
||||||
#ifdef CONFIG_X86
|
#ifdef CONFIG_X86
|
||||||
if (xen_pv_domain()) {
|
if (xen_pv_domain()) {
|
||||||
irq_ctx_init(smp_processor_id());
|
|
||||||
if (xen_initial_domain())
|
if (xen_initial_domain())
|
||||||
pci_xen_initial_domain();
|
pci_xen_initial_domain();
|
||||||
}
|
}
|
||||||
|
|
48
mm/slab.c
48
mm/slab.c
|
@ -1467,53 +1467,17 @@ static bool is_debug_pagealloc_cache(struct kmem_cache *cachep)
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef CONFIG_DEBUG_PAGEALLOC
|
#ifdef CONFIG_DEBUG_PAGEALLOC
|
||||||
static void store_stackinfo(struct kmem_cache *cachep, unsigned long *addr,
|
static void slab_kernel_map(struct kmem_cache *cachep, void *objp, int map)
|
||||||
unsigned long caller)
|
|
||||||
{
|
|
||||||
int size = cachep->object_size;
|
|
||||||
|
|
||||||
addr = (unsigned long *)&((char *)addr)[obj_offset(cachep)];
|
|
||||||
|
|
||||||
if (size < 5 * sizeof(unsigned long))
|
|
||||||
return;
|
|
||||||
|
|
||||||
*addr++ = 0x12345678;
|
|
||||||
*addr++ = caller;
|
|
||||||
*addr++ = smp_processor_id();
|
|
||||||
size -= 3 * sizeof(unsigned long);
|
|
||||||
{
|
|
||||||
unsigned long *sptr = &caller;
|
|
||||||
unsigned long svalue;
|
|
||||||
|
|
||||||
while (!kstack_end(sptr)) {
|
|
||||||
svalue = *sptr++;
|
|
||||||
if (kernel_text_address(svalue)) {
|
|
||||||
*addr++ = svalue;
|
|
||||||
size -= sizeof(unsigned long);
|
|
||||||
if (size <= sizeof(unsigned long))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
*addr++ = 0x87654321;
|
|
||||||
}
|
|
||||||
|
|
||||||
static void slab_kernel_map(struct kmem_cache *cachep, void *objp,
|
|
||||||
int map, unsigned long caller)
|
|
||||||
{
|
{
|
||||||
if (!is_debug_pagealloc_cache(cachep))
|
if (!is_debug_pagealloc_cache(cachep))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
if (caller)
|
|
||||||
store_stackinfo(cachep, objp, caller);
|
|
||||||
|
|
||||||
kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
|
kernel_map_pages(virt_to_page(objp), cachep->size / PAGE_SIZE, map);
|
||||||
}
|
}
|
||||||
|
|
||||||
#else
|
#else
|
||||||
static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
|
static inline void slab_kernel_map(struct kmem_cache *cachep, void *objp,
|
||||||
int map, unsigned long caller) {}
|
int map) {}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -1661,7 +1625,7 @@ static void slab_destroy_debugcheck(struct kmem_cache *cachep,
|
||||||
|
|
||||||
if (cachep->flags & SLAB_POISON) {
|
if (cachep->flags & SLAB_POISON) {
|
||||||
check_poison_obj(cachep, objp);
|
check_poison_obj(cachep, objp);
|
||||||
slab_kernel_map(cachep, objp, 1, 0);
|
slab_kernel_map(cachep, objp, 1);
|
||||||
}
|
}
|
||||||
if (cachep->flags & SLAB_RED_ZONE) {
|
if (cachep->flags & SLAB_RED_ZONE) {
|
||||||
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
|
if (*dbg_redzone1(cachep, objp) != RED_INACTIVE)
|
||||||
|
@ -2433,7 +2397,7 @@ static void cache_init_objs_debug(struct kmem_cache *cachep, struct page *page)
|
||||||
/* need to poison the objs? */
|
/* need to poison the objs? */
|
||||||
if (cachep->flags & SLAB_POISON) {
|
if (cachep->flags & SLAB_POISON) {
|
||||||
poison_obj(cachep, objp, POISON_FREE);
|
poison_obj(cachep, objp, POISON_FREE);
|
||||||
slab_kernel_map(cachep, objp, 0, 0);
|
slab_kernel_map(cachep, objp, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -2812,7 +2776,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
|
||||||
|
|
||||||
if (cachep->flags & SLAB_POISON) {
|
if (cachep->flags & SLAB_POISON) {
|
||||||
poison_obj(cachep, objp, POISON_FREE);
|
poison_obj(cachep, objp, POISON_FREE);
|
||||||
slab_kernel_map(cachep, objp, 0, caller);
|
slab_kernel_map(cachep, objp, 0);
|
||||||
}
|
}
|
||||||
return objp;
|
return objp;
|
||||||
}
|
}
|
||||||
|
@ -3076,7 +3040,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
|
||||||
return objp;
|
return objp;
|
||||||
if (cachep->flags & SLAB_POISON) {
|
if (cachep->flags & SLAB_POISON) {
|
||||||
check_poison_obj(cachep, objp);
|
check_poison_obj(cachep, objp);
|
||||||
slab_kernel_map(cachep, objp, 1, 0);
|
slab_kernel_map(cachep, objp, 1);
|
||||||
poison_obj(cachep, objp, POISON_INUSE);
|
poison_obj(cachep, objp, POISON_INUSE);
|
||||||
}
|
}
|
||||||
if (cachep->flags & SLAB_STORE_USER)
|
if (cachep->flags & SLAB_STORE_USER)
|
||||||
|
|
Loading…
Reference in New Issue