Merge branch 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 fixes from Ingo Molnar:
 "These are the fixes left over from the v5.4 cycle:

   - Various low level 32-bit entry code fixes and improvements by Andy
     Lutomirski, Peter Zijlstra and Thomas Gleixner.

   - Fix 32-bit Xen PV breakage, by Jan Beulich"

* 'x86-urgent-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/entry/32: Fix FIXUP_ESPFIX_STACK with user CR3
  x86/pti/32: Calculate the various PTI cpu_entry_area sizes correctly, make the CPU_ENTRY_AREA_PAGES assert precise
  selftests/x86/sigreturn/32: Invalidate DS and ES when abusing the kernel
  selftests/x86/mov_ss_trap: Fix the SYSENTER test
  x86/entry/32: Fix NMI vs ESPFIX
  x86/entry/32: Unwind the ESPFIX stack earlier on exception entry
  x86/entry/32: Move FIXUP_FRAME after pushing %fs in SAVE_ALL
  x86/entry/32: Use %ss segment where required
  x86/entry/32: Fix IRET exception
  x86/cpu_entry_area: Add guard page for entry stack on 32bit
  x86/pti/32: Size initial_page_table correctly
  x86/doublefault/32: Fix stack canaries in the double fault handler
  x86/xen/32: Simplify ring check in xen_iret_crit_fixup()
  x86/xen/32: Make xen_iret_crit_fixup() independent of frame layout
  x86/stackframe/32: Repair 32-bit Xen PV
This commit is contained in:
Linus Torvalds 2019-11-26 10:12:28 -08:00
commit 5c4a1c090d
10 changed files with 217 additions and 140 deletions

View file

@ -172,7 +172,7 @@
ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
.if \no_user_check == 0
/* coming from usermode? */
testl $SEGMENT_RPL_MASK, PT_CS(%esp)
testl $USER_SEGMENT_RPL_MASK, PT_CS(%esp)
jz .Lend_\@
.endif
/* On user-cr3? */
@ -205,64 +205,76 @@
#define CS_FROM_ENTRY_STACK (1 << 31)
#define CS_FROM_USER_CR3 (1 << 30)
#define CS_FROM_KERNEL (1 << 29)
#define CS_FROM_ESPFIX (1 << 28)
.macro FIXUP_FRAME
/*
* The high bits of the CS dword (__csh) are used for CS_FROM_*.
* Clear them in case hardware didn't do this for us.
*/
andl $0x0000ffff, 3*4(%esp)
andl $0x0000ffff, 4*4(%esp)
#ifdef CONFIG_VM86
testl $X86_EFLAGS_VM, 4*4(%esp)
testl $X86_EFLAGS_VM, 5*4(%esp)
jnz .Lfrom_usermode_no_fixup_\@
#endif
testl $SEGMENT_RPL_MASK, 3*4(%esp)
testl $USER_SEGMENT_RPL_MASK, 4*4(%esp)
jnz .Lfrom_usermode_no_fixup_\@
orl $CS_FROM_KERNEL, 3*4(%esp)
orl $CS_FROM_KERNEL, 4*4(%esp)
/*
* When we're here from kernel mode; the (exception) stack looks like:
*
* 5*4(%esp) - <previous context>
* 4*4(%esp) - flags
* 3*4(%esp) - cs
* 2*4(%esp) - ip
* 1*4(%esp) - orig_eax
* 0*4(%esp) - gs / function
* 6*4(%esp) - <previous context>
* 5*4(%esp) - flags
* 4*4(%esp) - cs
* 3*4(%esp) - ip
* 2*4(%esp) - orig_eax
* 1*4(%esp) - gs / function
* 0*4(%esp) - fs
*
* Lets build a 5 entry IRET frame after that, such that struct pt_regs
* is complete and in particular regs->sp is correct. This gives us
* the original 5 enties as gap:
* the original 6 enties as gap:
*
* 12*4(%esp) - <previous context>
* 11*4(%esp) - gap / flags
* 10*4(%esp) - gap / cs
* 9*4(%esp) - gap / ip
* 8*4(%esp) - gap / orig_eax
* 7*4(%esp) - gap / gs / function
* 6*4(%esp) - ss
* 5*4(%esp) - sp
* 4*4(%esp) - flags
* 3*4(%esp) - cs
* 2*4(%esp) - ip
* 1*4(%esp) - orig_eax
* 0*4(%esp) - gs / function
* 14*4(%esp) - <previous context>
* 13*4(%esp) - gap / flags
* 12*4(%esp) - gap / cs
* 11*4(%esp) - gap / ip
* 10*4(%esp) - gap / orig_eax
* 9*4(%esp) - gap / gs / function
* 8*4(%esp) - gap / fs
* 7*4(%esp) - ss
* 6*4(%esp) - sp
* 5*4(%esp) - flags
* 4*4(%esp) - cs
* 3*4(%esp) - ip
* 2*4(%esp) - orig_eax
* 1*4(%esp) - gs / function
* 0*4(%esp) - fs
*/
pushl %ss # ss
pushl %esp # sp (points at ss)
addl $6*4, (%esp) # point sp back at the previous context
pushl 6*4(%esp) # flags
pushl 6*4(%esp) # cs
pushl 6*4(%esp) # ip
pushl 6*4(%esp) # orig_eax
pushl 6*4(%esp) # gs / function
addl $7*4, (%esp) # point sp back at the previous context
pushl 7*4(%esp) # flags
pushl 7*4(%esp) # cs
pushl 7*4(%esp) # ip
pushl 7*4(%esp) # orig_eax
pushl 7*4(%esp) # gs / function
pushl 7*4(%esp) # fs
.Lfrom_usermode_no_fixup_\@:
.endm
.macro IRET_FRAME
/*
* We're called with %ds, %es, %fs, and %gs from the interrupted
* frame, so we shouldn't use them. Also, we may be in ESPFIX
* mode and therefore have a nonzero SS base and an offset ESP,
* so any attempt to access the stack needs to use SS. (except for
* accesses through %esp, which automatically use SS.)
*/
testl $CS_FROM_KERNEL, 1*4(%esp)
jz .Lfinished_frame_\@
@ -276,31 +288,40 @@
movl 5*4(%esp), %eax # (modified) regs->sp
movl 4*4(%esp), %ecx # flags
movl %ecx, -4(%eax)
movl %ecx, %ss:-1*4(%eax)
movl 3*4(%esp), %ecx # cs
andl $0x0000ffff, %ecx
movl %ecx, -8(%eax)
movl %ecx, %ss:-2*4(%eax)
movl 2*4(%esp), %ecx # ip
movl %ecx, -12(%eax)
movl %ecx, %ss:-3*4(%eax)
movl 1*4(%esp), %ecx # eax
movl %ecx, -16(%eax)
movl %ecx, %ss:-4*4(%eax)
popl %ecx
lea -16(%eax), %esp
lea -4*4(%eax), %esp
popl %eax
.Lfinished_frame_\@:
.endm
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0
.macro SAVE_ALL pt_regs_ax=%eax switch_stacks=0 skip_gs=0 unwind_espfix=0
cld
.if \skip_gs == 0
PUSH_GS
.endif
FIXUP_FRAME
pushl %fs
pushl %eax
movl $(__KERNEL_PERCPU), %eax
movl %eax, %fs
.if \unwind_espfix > 0
UNWIND_ESPFIX_STACK
.endif
popl %eax
FIXUP_FRAME
pushl %es
pushl %ds
pushl \pt_regs_ax
@ -313,8 +334,6 @@
movl $(__USER_DS), %edx
movl %edx, %ds
movl %edx, %es
movl $(__KERNEL_PERCPU), %edx
movl %edx, %fs
.if \skip_gs == 0
SET_KERNEL_GS %edx
.endif
@ -324,8 +343,8 @@
.endif
.endm
.macro SAVE_ALL_NMI cr3_reg:req
SAVE_ALL
.macro SAVE_ALL_NMI cr3_reg:req unwind_espfix=0
SAVE_ALL unwind_espfix=\unwind_espfix
BUG_IF_WRONG_CR3
@ -357,6 +376,7 @@
2: popl %es
3: popl %fs
POP_GS \pop
IRET_FRAME
.pushsection .fixup, "ax"
4: movl $0, (%esp)
jmp 1b
@ -395,7 +415,8 @@
.macro CHECK_AND_APPLY_ESPFIX
#ifdef CONFIG_X86_ESPFIX32
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + (GDT_ENTRY_ESPFIX_SS * 8)
#define GDT_ESPFIX_OFFSET (GDT_ENTRY_ESPFIX_SS * 8)
#define GDT_ESPFIX_SS PER_CPU_VAR(gdt_page) + GDT_ESPFIX_OFFSET
ALTERNATIVE "jmp .Lend_\@", "", X86_BUG_ESPFIX
@ -1075,7 +1096,6 @@ restore_all:
/* Restore user state */
RESTORE_REGS pop=4 # skip orig_eax/error_code
.Lirq_return:
IRET_FRAME
/*
* ARCH_HAS_MEMBARRIER_SYNC_CORE rely on IRET core serialization
* when returning from IPI handler and when returning from
@ -1128,30 +1148,43 @@ ENDPROC(entry_INT80_32)
* We can't call C functions using the ESPFIX stack. This code reads
* the high word of the segment base from the GDT and swiches to the
* normal stack and adjusts ESP with the matching offset.
*
* We might be on user CR3 here, so percpu data is not mapped and we can't
* access the GDT through the percpu segment. Instead, use SGDT to find
* the cpu_entry_area alias of the GDT.
*/
#ifdef CONFIG_X86_ESPFIX32
/* fixup the stack */
mov GDT_ESPFIX_SS + 4, %al /* bits 16..23 */
mov GDT_ESPFIX_SS + 7, %ah /* bits 24..31 */
pushl %ecx
subl $2*4, %esp
sgdt (%esp)
movl 2(%esp), %ecx /* GDT address */
/*
* Careful: ECX is a linear pointer, so we need to force base
* zero. %cs is the only known-linear segment we have right now.
*/
mov %cs:GDT_ESPFIX_OFFSET + 4(%ecx), %al /* bits 16..23 */
mov %cs:GDT_ESPFIX_OFFSET + 7(%ecx), %ah /* bits 24..31 */
shl $16, %eax
addl $2*4, %esp
popl %ecx
addl %esp, %eax /* the adjusted stack pointer */
pushl $__KERNEL_DS
pushl %eax
lss (%esp), %esp /* switch to the normal stack segment */
#endif
.endm
.macro UNWIND_ESPFIX_STACK
/* It's safe to clobber %eax, all other regs need to be preserved */
#ifdef CONFIG_X86_ESPFIX32
movl %ss, %eax
/* see if on espfix stack */
cmpw $__ESPFIX_SS, %ax
jne 27f
movl $__KERNEL_DS, %eax
movl %eax, %ds
movl %eax, %es
jne .Lno_fixup_\@
/* switch to normal stack */
FIXUP_ESPFIX_STACK
27:
.Lno_fixup_\@:
#endif
.endm
@ -1341,11 +1374,6 @@ END(spurious_interrupt_bug)
#ifdef CONFIG_XEN_PV
ENTRY(xen_hypervisor_callback)
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
/*
* Check to see if we got the event in the critical
* region in xen_iret_direct, after we've reenabled
@ -1353,16 +1381,17 @@ ENTRY(xen_hypervisor_callback)
* iret instruction's behaviour where it delivers a
* pending interrupt when enabling interrupts:
*/
movl PT_EIP(%esp), %eax
cmpl $xen_iret_start_crit, %eax
cmpl $xen_iret_start_crit, (%esp)
jb 1f
cmpl $xen_iret_end_crit, %eax
cmpl $xen_iret_end_crit, (%esp)
jae 1f
jmp xen_iret_crit_fixup
ENTRY(xen_do_upcall)
1: mov %esp, %eax
call xen_iret_crit_fixup
1:
pushl $-1 /* orig_ax = -1 => not a system call */
SAVE_ALL
ENCODE_FRAME_POINTER
TRACE_IRQS_OFF
mov %esp, %eax
call xen_evtchn_do_upcall
#ifndef CONFIG_PREEMPTION
call xen_maybe_preempt_hcall
@ -1449,10 +1478,9 @@ END(page_fault)
common_exception_read_cr2:
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
ENCODE_FRAME_POINTER
UNWIND_ESPFIX_STACK
/* fixup %gs */
GS_TO_REG %ecx
@ -1474,9 +1502,8 @@ END(common_exception_read_cr2)
common_exception:
/* the function address is in %gs's slot on the stack */
SAVE_ALL switch_stacks=1 skip_gs=1
SAVE_ALL switch_stacks=1 skip_gs=1 unwind_espfix=1
ENCODE_FRAME_POINTER
UNWIND_ESPFIX_STACK
/* fixup %gs */
GS_TO_REG %ecx
@ -1515,6 +1542,10 @@ ENTRY(nmi)
ASM_CLAC
#ifdef CONFIG_X86_ESPFIX32
/*
* ESPFIX_SS is only ever set on the return to user path
* after we've switched to the entry stack.
*/
pushl %eax
movl %ss, %eax
cmpw $__ESPFIX_SS, %ax
@ -1550,6 +1581,11 @@ ENTRY(nmi)
movl %ebx, %esp
.Lnmi_return:
#ifdef CONFIG_X86_ESPFIX32
testl $CS_FROM_ESPFIX, PT_CS(%esp)
jnz .Lnmi_from_espfix
#endif
CHECK_AND_APPLY_ESPFIX
RESTORE_ALL_NMI cr3_reg=%edi pop=4
jmp .Lirq_return
@ -1557,23 +1593,42 @@ ENTRY(nmi)
#ifdef CONFIG_X86_ESPFIX32
.Lnmi_espfix_stack:
/*
* create the pointer to lss back
* Create the pointer to LSS back
*/
pushl %ss
pushl %esp
addl $4, (%esp)
/* copy the iret frame of 12 bytes */
.rept 3
pushl 16(%esp)
.endr
pushl %eax
SAVE_ALL_NMI cr3_reg=%edi
/* Copy the (short) IRET frame */
pushl 4*4(%esp) # flags
pushl 4*4(%esp) # cs
pushl 4*4(%esp) # ip
pushl %eax # orig_ax
SAVE_ALL_NMI cr3_reg=%edi unwind_espfix=1
ENCODE_FRAME_POINTER
FIXUP_ESPFIX_STACK # %eax == %esp
/* clear CS_FROM_KERNEL, set CS_FROM_ESPFIX */
xorl $(CS_FROM_ESPFIX | CS_FROM_KERNEL), PT_CS(%esp)
xorl %edx, %edx # zero error code
call do_nmi
movl %esp, %eax # pt_regs pointer
jmp .Lnmi_from_sysenter_stack
.Lnmi_from_espfix:
RESTORE_ALL_NMI cr3_reg=%edi
lss 12+4(%esp), %esp # back to espfix stack
/*
* Because we cleared CS_FROM_KERNEL, IRET_FRAME 'forgot' to
* fix up the gap and long frame:
*
* 3 - original frame (exception)
* 2 - ESPFIX block (above)
* 6 - gap (FIXUP_FRAME)
* 5 - long frame (FIXUP_FRAME)
* 1 - orig_ax
*/
lss (1+5+6)*4(%esp), %esp # back to espfix stack
jmp .Lirq_return
#endif
END(nmi)

View file

@ -78,8 +78,12 @@ struct cpu_entry_area {
/*
* The GDT is just below entry_stack and thus serves (on x86_64) as
* a a read-only guard page.
* a read-only guard page. On 32-bit the GDT must be writeable, so
* it needs an extra guard page.
*/
#ifdef CONFIG_X86_32
char guard_entry_stack[PAGE_SIZE];
#endif
struct entry_stack_page entry_stack_page;
/*
@ -94,7 +98,6 @@ struct cpu_entry_area {
*/
struct cea_exception_stacks estacks;
#endif
#ifdef CONFIG_CPU_SUP_INTEL
/*
* Per CPU debug store for Intel performance monitoring. Wastes a
* full page at the moment.
@ -105,11 +108,13 @@ struct cpu_entry_area {
* Reserve enough fixmap PTEs.
*/
struct debug_store_buffers cpu_debug_buffers;
#endif
};
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
#define CPU_ENTRY_AREA_TOT_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
#define CPU_ENTRY_AREA_SIZE (sizeof(struct cpu_entry_area))
#define CPU_ENTRY_AREA_ARRAY_SIZE (CPU_ENTRY_AREA_SIZE * NR_CPUS)
/* Total size includes the readonly IDT mapping page as well: */
#define CPU_ENTRY_AREA_TOTAL_SIZE (CPU_ENTRY_AREA_ARRAY_SIZE + PAGE_SIZE)
DECLARE_PER_CPU(struct cpu_entry_area *, cpu_entry_area);
DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
@ -117,13 +122,14 @@ DECLARE_PER_CPU(struct cea_exception_stacks *, cea_exception_stacks);
extern void setup_cpu_entry_areas(void);
extern void cea_set_pte(void *cea_vaddr, phys_addr_t pa, pgprot_t flags);
/* Single page reserved for the readonly IDT mapping: */
#define CPU_ENTRY_AREA_RO_IDT CPU_ENTRY_AREA_BASE
#define CPU_ENTRY_AREA_PER_CPU (CPU_ENTRY_AREA_RO_IDT + PAGE_SIZE)
#define CPU_ENTRY_AREA_RO_IDT_VADDR ((void *)CPU_ENTRY_AREA_RO_IDT)
#define CPU_ENTRY_AREA_MAP_SIZE \
(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_TOT_SIZE - CPU_ENTRY_AREA_BASE)
(CPU_ENTRY_AREA_PER_CPU + CPU_ENTRY_AREA_ARRAY_SIZE - CPU_ENTRY_AREA_BASE)
extern struct cpu_entry_area *get_cpu_entry_area(int cpu);

View file

@ -44,11 +44,11 @@ extern bool __vmalloc_start_set; /* set once high_memory is set */
* Define this here and validate with BUILD_BUG_ON() in pgtable_32.c
* to avoid include recursion hell
*/
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 40)
#define CPU_ENTRY_AREA_PAGES (NR_CPUS * 39)
#define CPU_ENTRY_AREA_BASE \
((FIXADDR_TOT_START - PAGE_SIZE * (CPU_ENTRY_AREA_PAGES + 1)) \
& PMD_MASK)
/* The +1 is for the readonly IDT page: */
#define CPU_ENTRY_AREA_BASE \
((FIXADDR_TOT_START - PAGE_SIZE*(CPU_ENTRY_AREA_PAGES+1)) & PMD_MASK)
#define LDT_BASE_ADDR \
((CPU_ENTRY_AREA_BASE - PAGE_SIZE) & PMD_MASK)

View file

@ -31,6 +31,18 @@
*/
#define SEGMENT_RPL_MASK 0x3
/*
* When running on Xen PV, the actual privilege level of the kernel is 1,
* not 0. Testing the Requested Privilege Level in a segment selector to
* determine whether the context is user mode or kernel mode with
* SEGMENT_RPL_MASK is wrong because the PV kernel's privilege level
* matches the 0x3 mask.
*
* Testing with USER_SEGMENT_RPL_MASK is valid for both native and Xen PV
* kernels because privilege level 2 is never used.
*/
#define USER_SEGMENT_RPL_MASK 0x2
/* User mode is privilege level 3: */
#define USER_RPL 0x3

View file

@ -65,6 +65,9 @@ struct x86_hw_tss doublefault_tss __cacheline_aligned = {
.ss = __KERNEL_DS,
.ds = __USER_DS,
.fs = __KERNEL_PERCPU,
#ifndef CONFIG_X86_32_LAZY_GS
.gs = __KERNEL_STACK_CANARY,
#endif
.__cr3 = __pa_nodebug(swapper_pg_dir),
};

View file

@ -571,6 +571,16 @@ ENTRY(initial_page_table)
# error "Kernel PMDs should be 1, 2 or 3"
# endif
.align PAGE_SIZE /* needs to be page-sized too */
#ifdef CONFIG_PAGE_TABLE_ISOLATION
/*
* PTI needs another page so sync_initial_pagetable() works correctly
* and does not scribble over the data which is placed behind the
* actual initial_page_table. See clone_pgd_range().
*/
.fill 1024, 4, 0
#endif
#endif
.data

View file

@ -178,7 +178,9 @@ static __init void setup_cpu_entry_area_ptes(void)
#ifdef CONFIG_X86_32
unsigned long start, end;
BUILD_BUG_ON(CPU_ENTRY_AREA_PAGES * PAGE_SIZE < CPU_ENTRY_AREA_MAP_SIZE);
/* The +1 is for the readonly IDT: */
BUILD_BUG_ON((CPU_ENTRY_AREA_PAGES+1)*PAGE_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
BUILD_BUG_ON(CPU_ENTRY_AREA_TOTAL_SIZE != CPU_ENTRY_AREA_MAP_SIZE);
BUG_ON(CPU_ENTRY_AREA_BASE & ~PMD_MASK);
start = CPU_ENTRY_AREA_BASE;

View file

@ -126,10 +126,9 @@ hyper_iret:
.globl xen_iret_start_crit, xen_iret_end_crit
/*
* This is called by xen_hypervisor_callback in entry.S when it sees
* This is called by xen_hypervisor_callback in entry_32.S when it sees
* that the EIP at the time of interrupt was between
* xen_iret_start_crit and xen_iret_end_crit. We're passed the EIP in
* %eax so we can do a more refined determination of what to do.
* xen_iret_start_crit and xen_iret_end_crit.
*
* The stack format at this point is:
* ----------------
@ -138,70 +137,46 @@ hyper_iret:
* eflags } outer exception info
* cs }
* eip }
* ---------------- <- edi (copy dest)
* ----------------
* eax : outer eax if it hasn't been restored
* ----------------
* eflags } nested exception info
* cs } (no ss/esp because we're nested
* eip } from the same ring)
* orig_eax }<- esi (copy src)
* - - - - - - - -
* fs }
* es }
* ds } SAVE_ALL state
* eax }
* : :
* ebx }<- esp
* ----------------
* eflags }
* cs } nested exception info
* eip }
* return address : (into xen_hypervisor_callback)
*
* In order to deliver the nested exception properly, we need to shift
* everything from the return addr up to the error code so it sits
* just under the outer exception info. This means that when we
* handle the exception, we do it in the context of the outer
* exception rather than starting a new one.
* In order to deliver the nested exception properly, we need to discard the
* nested exception frame such that when we handle the exception, we do it
* in the context of the outer exception rather than starting a new one.
*
* The only caveat is that if the outer eax hasn't been restored yet
* (ie, it's still on stack), we need to insert its value into the
* SAVE_ALL state before going on, since it's usermode state which we
* eventually need to restore.
* The only caveat is that if the outer eax hasn't been restored yet (i.e.
* it's still on stack), we need to restore its value here.
*/
ENTRY(xen_iret_crit_fixup)
/*
* Paranoia: Make sure we're really coming from kernel space.
* One could imagine a case where userspace jumps into the
* critical range address, but just before the CPU delivers a
* GP, it decides to deliver an interrupt instead. Unlikely?
* Definitely. Easy to avoid? Yes. The Intel documents
* explicitly say that the reported EIP for a bad jump is the
* jump instruction itself, not the destination, but some
* virtual environments get this wrong.
* PF, it decides to deliver an interrupt instead. Unlikely?
* Definitely. Easy to avoid? Yes.
*/
movl PT_CS(%esp), %ecx
andl $SEGMENT_RPL_MASK, %ecx
cmpl $USER_RPL, %ecx
je 2f
lea PT_ORIG_EAX(%esp), %esi
lea PT_EFLAGS(%esp), %edi
testb $2, 2*4(%esp) /* nested CS */
jnz 2f
/*
* If eip is before iret_restore_end then stack
* hasn't been restored yet.
*/
cmp $iret_restore_end, %eax
cmpl $iret_restore_end, 1*4(%esp)
jae 1f
movl 0+4(%edi), %eax /* copy EAX (just above top of frame) */
movl %eax, PT_EAX(%esp)
movl 4*4(%esp), %eax /* load outer EAX */
ret $4*4 /* discard nested EIP, CS, and EFLAGS as
* well as the just restored EAX */
lea ESP_OFFSET(%edi), %edi /* move dest up over saved regs */
/* set up the copy */
1: std
mov $PT_EIP / 4, %ecx /* saved regs up to orig_eax */
rep movsl
cld
lea 4(%edi), %esp /* point esp to new frame */
2: jmp xen_do_upcall
1:
ret $3*4 /* discard nested EIP, CS, and EFLAGS */
2:
ret
END(xen_iret_crit_fixup)

View file

@ -257,7 +257,8 @@ int main()
err(1, "sigaltstack");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
nr = SYS_getpid;
asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr)
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
: [ss] "m" (ss) : "flags", "rcx"
#ifdef __x86_64__
, "r11"

View file

@ -451,6 +451,19 @@ static void sigusr1(int sig, siginfo_t *info, void *ctx_void)
ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL;
ctx->uc_mcontext.gregs[REG_CX] = 0;
#ifdef __i386__
/*
* Make sure the kernel doesn't inadvertently use DS or ES-relative
* accesses in a region where user DS or ES is loaded.
*
* Skip this for 64-bit builds because long mode doesn't care about
* DS and ES and skipping it increases test coverage a little bit,
* since 64-bit kernels can still run the 32-bit build.
*/
ctx->uc_mcontext.gregs[REG_DS] = 0;
ctx->uc_mcontext.gregs[REG_ES] = 0;
#endif
memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t));
requested_regs[REG_CX] = *ssptr(ctx); /* The asm code does this. */