diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index b78a17b12810..3db651fc8ec5 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -7,9 +7,10 @@ extra-y := head_$(BITS).o head$(BITS).o head.o init_task.o vmlinu CPPFLAGS_vmlinux.lds += -U$(UTS_MACHINE) ifdef CONFIG_FTRACE -# Do not profile debug utilities +# Do not profile debug and lowlevel utilities CFLAGS_REMOVE_tsc.o = -pg CFLAGS_REMOVE_rtc.o = -pg +CFLAGS_REMOVE_paravirt.o = -pg endif # diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index b4564d089b43..097d8a6797fa 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -124,6 +124,7 @@ static void *get_call_destination(u8 type) .pv_irq_ops = pv_irq_ops, .pv_apic_ops = pv_apic_ops, .pv_mmu_ops = pv_mmu_ops, + .pv_lock_ops = pv_lock_ops, }; return *((void **)&tmpl + type); } @@ -267,6 +268,17 @@ enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return __get_cpu_var(paravirt_lazy_mode); } +void __init paravirt_use_bytelocks(void) +{ +#ifdef CONFIG_SMP + pv_lock_ops.spin_is_locked = __byte_spin_is_locked; + pv_lock_ops.spin_is_contended = __byte_spin_is_contended; + pv_lock_ops.spin_lock = __byte_spin_lock; + pv_lock_ops.spin_trylock = __byte_spin_trylock; + pv_lock_ops.spin_unlock = __byte_spin_unlock; +#endif +} + struct pv_info pv_info = { .name = "bare hardware", .paravirt_enabled = 0, @@ -449,6 +461,18 @@ struct pv_mmu_ops pv_mmu_ops = { .set_fixmap = native_set_fixmap, }; +struct pv_lock_ops pv_lock_ops = { +#ifdef CONFIG_SMP + .spin_is_locked = __ticket_spin_is_locked, + .spin_is_contended = __ticket_spin_is_contended, + + .spin_lock = __ticket_spin_lock, + .spin_trylock = __ticket_spin_trylock, + .spin_unlock = __ticket_spin_unlock, +#endif +}; +EXPORT_SYMBOL_GPL(pv_lock_ops); + EXPORT_SYMBOL_GPL(pv_time_ops); EXPORT_SYMBOL (pv_cpu_ops); EXPORT_SYMBOL (pv_mmu_ops); diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index f702199312a5..e693812ac59a 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -15,6 +15,7 @@ * This does not handle HOTPLUG_CPU yet. */ #include +#include #include #include @@ -35,6 +36,8 @@ #include "xen-ops.h" #include "mmu.h" +static void __cpuinit xen_init_lock_cpu(int cpu); + cpumask_t xen_cpu_initialized_map; static DEFINE_PER_CPU(int, resched_irq); @@ -179,6 +182,8 @@ static void __init xen_smp_prepare_cpus(unsigned int max_cpus) { unsigned cpu; + xen_init_lock_cpu(0); + smp_store_cpu_info(0); cpu_data(0).x86_max_cores = 1; set_cpu_sibling_map(0); @@ -301,6 +306,7 @@ static int __cpuinit xen_cpu_up(unsigned int cpu) clear_tsk_thread_flag(idle, TIF_FORK); #endif xen_setup_timer(cpu); + xen_init_lock_cpu(cpu); per_cpu(cpu_state, cpu) = CPU_UP_PREPARE; @@ -413,6 +419,170 @@ static irqreturn_t xen_call_function_single_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } +struct xen_spinlock { + unsigned char lock; /* 0 -> free; 1 -> locked */ + unsigned short spinners; /* count of waiting cpus */ +}; + +static int xen_spin_is_locked(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + return xl->lock != 0; +} + +static int xen_spin_is_contended(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + /* Not strictly true; this is only the count of contended + lock-takers entering the slow path. */ + return xl->spinners != 0; +} + +static int xen_spin_trylock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + u8 old = 1; + + asm("xchgb %b0,%1" + : "+q" (old), "+m" (xl->lock) : : "memory"); + + return old == 0; +} + +static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; +static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); + +static inline void spinning_lock(struct xen_spinlock *xl) +{ + __get_cpu_var(lock_spinners) = xl; + wmb(); /* set lock of interest before count */ + asm(LOCK_PREFIX " incw %0" + : "+m" (xl->spinners) : : "memory"); +} + +static inline void unspinning_lock(struct xen_spinlock *xl) +{ + asm(LOCK_PREFIX " decw %0" + : "+m" (xl->spinners) : : "memory"); + wmb(); /* decrement count before clearing lock */ + __get_cpu_var(lock_spinners) = NULL; +} + +static noinline int xen_spin_lock_slow(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int irq = __get_cpu_var(lock_kicker_irq); + int ret; + + /* If kicker interrupts not initialized yet, just spin */ + if (irq == -1) + return 0; + + /* announce we're spinning */ + spinning_lock(xl); + + /* clear pending */ + xen_clear_irq_pending(irq); + + /* check again make sure it didn't become free while + we weren't looking */ + ret = xen_spin_trylock(lock); + if (ret) + goto out; + + /* block until irq becomes pending */ + xen_poll_irq(irq); + kstat_this_cpu.irqs[irq]++; + +out: + unspinning_lock(xl); + return ret; +} + +static void xen_spin_lock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + int timeout; + u8 oldval; + + do { + timeout = 1 << 10; + + asm("1: xchgb %1,%0\n" + " testb %1,%1\n" + " jz 3f\n" + "2: rep;nop\n" + " cmpb $0,%0\n" + " je 1b\n" + " dec %2\n" + " jnz 2b\n" + "3:\n" + : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) + : "1" (1) + : "memory"); + + } while (unlikely(oldval != 0 && !xen_spin_lock_slow(lock))); +} + +static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) +{ + int cpu; + + for_each_online_cpu(cpu) { + /* XXX should mix up next cpu selection */ + if (per_cpu(lock_spinners, cpu) == xl) { + xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); + break; + } + } +} + +static void xen_spin_unlock(struct raw_spinlock *lock) +{ + struct xen_spinlock *xl = (struct xen_spinlock *)lock; + + smp_wmb(); /* make sure no writes get moved after unlock */ + xl->lock = 0; /* release lock */ + + /* make sure unlock happens before kick */ + barrier(); + + if (unlikely(xl->spinners)) + xen_spin_unlock_slow(xl); +} + +static __cpuinit void xen_init_lock_cpu(int cpu) +{ + int irq; + const char *name; + + name = kasprintf(GFP_KERNEL, "spinlock%d", cpu); + irq = bind_ipi_to_irqhandler(XEN_SPIN_UNLOCK_VECTOR, + cpu, + xen_reschedule_interrupt, + IRQF_DISABLED|IRQF_PERCPU|IRQF_NOBALANCING, + name, + NULL); + + if (irq >= 0) { + disable_irq(irq); /* make sure it's never delivered */ + per_cpu(lock_kicker_irq, cpu) = irq; + } + + printk("cpu %d spinlock event irq %d\n", cpu, irq); +} + +static void __init xen_init_spinlocks(void) +{ + pv_lock_ops.spin_is_locked = xen_spin_is_locked; + pv_lock_ops.spin_is_contended = xen_spin_is_contended; + pv_lock_ops.spin_lock = xen_spin_lock; + pv_lock_ops.spin_trylock = xen_spin_trylock; + pv_lock_ops.spin_unlock = xen_spin_unlock; +} + static const struct smp_ops xen_smp_ops __initdata = { .smp_prepare_boot_cpu = xen_smp_prepare_boot_cpu, .smp_prepare_cpus = xen_smp_prepare_cpus, @@ -430,4 +600,5 @@ void __init xen_smp_init(void) { smp_ops = xen_smp_ops; xen_fill_possible_map(); + xen_init_spinlocks(); } diff --git a/drivers/xen/events.c b/drivers/xen/events.c index 332dd63750a0..0e0c28574af8 100644 --- a/drivers/xen/events.c +++ b/drivers/xen/events.c @@ -734,6 +734,33 @@ static void restore_cpu_ipis(unsigned int cpu) } } +/* Clear an irq's pending state, in preparation for polling on it */ +void xen_clear_irq_pending(int irq) +{ + int evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) + clear_evtchn(evtchn); +} + +/* Poll waiting for an irq to become pending. In the usual case, the + irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq(int irq) +{ + evtchn_port_t evtchn = evtchn_from_irq(irq); + + if (VALID_EVTCHN(evtchn)) { + struct sched_poll poll; + + poll.nr_ports = 1; + poll.timeout = 0; + poll.ports = &evtchn; + + if (HYPERVISOR_sched_op(SCHEDOP_poll, &poll) != 0) + BUG(); + } +} + void xen_irq_resume(void) { unsigned int cpu, irq, evtchn; diff --git a/include/asm-x86/paravirt.h b/include/asm-x86/paravirt.h index 695ce9383f52..aec9767836b6 100644 --- a/include/asm-x86/paravirt.h +++ b/include/asm-x86/paravirt.h @@ -325,6 +325,15 @@ struct pv_mmu_ops { unsigned long phys, pgprot_t flags); }; +struct raw_spinlock; +struct pv_lock_ops { + int (*spin_is_locked)(struct raw_spinlock *lock); + int (*spin_is_contended)(struct raw_spinlock *lock); + void (*spin_lock)(struct raw_spinlock *lock); + int (*spin_trylock)(struct raw_spinlock *lock); + void (*spin_unlock)(struct raw_spinlock *lock); +}; + /* This contains all the paravirt structures: we get a convenient * number for each function using the offset which we use to indicate * what to patch. */ @@ -335,6 +344,7 @@ struct paravirt_patch_template { struct pv_irq_ops pv_irq_ops; struct pv_apic_ops pv_apic_ops; struct pv_mmu_ops pv_mmu_ops; + struct pv_lock_ops pv_lock_ops; }; extern struct pv_info pv_info; @@ -344,6 +354,7 @@ extern struct pv_cpu_ops pv_cpu_ops; extern struct pv_irq_ops pv_irq_ops; extern struct pv_apic_ops pv_apic_ops; extern struct pv_mmu_ops pv_mmu_ops; +extern struct pv_lock_ops pv_lock_ops; #define PARAVIRT_PATCH(x) \ (offsetof(struct paravirt_patch_template, x) / sizeof(void *)) @@ -1368,6 +1379,37 @@ static inline void __set_fixmap(unsigned /* enum fixed_addresses */ idx, void _paravirt_nop(void); #define paravirt_nop ((void *)_paravirt_nop) +void paravirt_use_bytelocks(void); + +#ifdef CONFIG_SMP + +static inline int __raw_spin_is_locked(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_is_locked, lock); +} + +static inline int __raw_spin_is_contended(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_is_contended, lock); +} + +static __always_inline void __raw_spin_lock(struct raw_spinlock *lock) +{ + PVOP_VCALL1(pv_lock_ops.spin_lock, lock); +} + +static __always_inline int __raw_spin_trylock(struct raw_spinlock *lock) +{ + return PVOP_CALL1(int, pv_lock_ops.spin_trylock, lock); +} + +static __always_inline void __raw_spin_unlock(struct raw_spinlock *lock) +{ + PVOP_VCALL1(pv_lock_ops.spin_unlock, lock); +} + +#endif + /* These all sit in the .parainstructions section to tell us what to patch. */ struct paravirt_patch_site { u8 *instr; /* original instructions */ @@ -1452,6 +1494,7 @@ static inline unsigned long __raw_local_irq_save(void) return f; } + /* Make sure as little as possible of this mess escapes. */ #undef PARAVIRT_CALL #undef __PVOP_CALL diff --git a/include/asm-x86/spinlock.h b/include/asm-x86/spinlock.h index 21e89bf92f1c..4f9a9861799a 100644 --- a/include/asm-x86/spinlock.h +++ b/include/asm-x86/spinlock.h @@ -6,7 +6,7 @@ #include #include #include - +#include /* * Your basic SMP spinlocks, allowing only a single CPU anywhere * @@ -54,21 +54,21 @@ * much between them in performance though, especially as locks are out of line. */ #if (NR_CPUS < 256) -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); return (((tmp >> 8) & 0xff) != (tmp & 0xff)); } -static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); return (((tmp >> 8) & 0xff) - (tmp & 0xff)) > 1; } -static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) { short inc = 0x0100; @@ -87,9 +87,7 @@ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) : "memory", "cc"); } -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) - -static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) { int tmp; short new; @@ -110,7 +108,7 @@ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) return tmp; } -static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) { asm volatile(UNLOCK_LOCK_PREFIX "incb %0" : "+m" (lock->slock) @@ -118,21 +116,21 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) : "memory", "cc"); } #else -static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +static inline int __ticket_spin_is_locked(raw_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); return (((tmp >> 16) & 0xffff) != (tmp & 0xffff)); } -static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +static inline int __ticket_spin_is_contended(raw_spinlock_t *lock) { int tmp = ACCESS_ONCE(lock->slock); return (((tmp >> 16) & 0xffff) - (tmp & 0xffff)) > 1; } -static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_lock(raw_spinlock_t *lock) { int inc = 0x00010000; int tmp; @@ -153,9 +151,7 @@ static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) : "memory", "cc"); } -#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) - -static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +static __always_inline int __ticket_spin_trylock(raw_spinlock_t *lock) { int tmp; int new; @@ -177,7 +173,7 @@ static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) return tmp; } -static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +static __always_inline void __ticket_spin_unlock(raw_spinlock_t *lock) { asm volatile(UNLOCK_LOCK_PREFIX "incw %0" : "+m" (lock->slock) @@ -186,6 +182,98 @@ static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) } #endif +#define __raw_spin_lock_flags(lock, flags) __raw_spin_lock(lock) + +#ifdef CONFIG_PARAVIRT +/* + * Define virtualization-friendly old-style lock byte lock, for use in + * pv_lock_ops if desired. + * + * This differs from the pre-2.6.24 spinlock by always using xchgb + * rather than decb to take the lock; this allows it to use a + * zero-initialized lock structure. It also maintains a 1-byte + * contention counter, so that we can implement + * __byte_spin_is_contended. + */ +struct __byte_spinlock { + s8 lock; + s8 spinners; +}; + +static inline int __byte_spin_is_locked(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + return bl->lock != 0; +} + +static inline int __byte_spin_is_contended(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + return bl->spinners != 0; +} + +static inline void __byte_spin_lock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + s8 val = 1; + + asm("1: xchgb %1, %0\n" + " test %1,%1\n" + " jz 3f\n" + " " LOCK_PREFIX "incb %2\n" + "2: rep;nop\n" + " cmpb $1, %0\n" + " je 2b\n" + " " LOCK_PREFIX "decb %2\n" + " jmp 1b\n" + "3:" + : "+m" (bl->lock), "+q" (val), "+m" (bl->spinners): : "memory"); +} + +static inline int __byte_spin_trylock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + u8 old = 1; + + asm("xchgb %1,%0" + : "+m" (bl->lock), "+q" (old) : : "memory"); + + return old == 0; +} + +static inline void __byte_spin_unlock(raw_spinlock_t *lock) +{ + struct __byte_spinlock *bl = (struct __byte_spinlock *)lock; + smp_wmb(); + bl->lock = 0; +} +#else /* !CONFIG_PARAVIRT */ +static inline int __raw_spin_is_locked(raw_spinlock_t *lock) +{ + return __ticket_spin_is_locked(lock); +} + +static inline int __raw_spin_is_contended(raw_spinlock_t *lock) +{ + return __ticket_spin_is_contended(lock); +} + +static __always_inline void __raw_spin_lock(raw_spinlock_t *lock) +{ + __ticket_spin_lock(lock); +} + +static __always_inline int __raw_spin_trylock(raw_spinlock_t *lock) +{ + return __ticket_spin_trylock(lock); +} + +static __always_inline void __raw_spin_unlock(raw_spinlock_t *lock) +{ + __ticket_spin_unlock(lock); +} +#endif /* CONFIG_PARAVIRT */ + static inline void __raw_spin_unlock_wait(raw_spinlock_t *lock) { while (__raw_spin_is_locked(lock)) diff --git a/include/asm-x86/spinlock_types.h b/include/asm-x86/spinlock_types.h index 9029cf78cf5d..06c071c9eee9 100644 --- a/include/asm-x86/spinlock_types.h +++ b/include/asm-x86/spinlock_types.h @@ -5,7 +5,7 @@ # error "please don't include this file directly" #endif -typedef struct { +typedef struct raw_spinlock { unsigned int slock; } raw_spinlock_t; diff --git a/include/asm-x86/xen/events.h b/include/asm-x86/xen/events.h index f8d57ea1f05f..8ded74720024 100644 --- a/include/asm-x86/xen/events.h +++ b/include/asm-x86/xen/events.h @@ -5,6 +5,7 @@ enum ipi_vector { XEN_RESCHEDULE_VECTOR, XEN_CALL_FUNCTION_VECTOR, XEN_CALL_FUNCTION_SINGLE_VECTOR, + XEN_SPIN_UNLOCK_VECTOR, XEN_NR_IPIS, }; diff --git a/include/xen/events.h b/include/xen/events.h index 67c4436554a9..4680ff3fbc91 100644 --- a/include/xen/events.h +++ b/include/xen/events.h @@ -44,4 +44,11 @@ extern void notify_remote_via_irq(int irq); extern void xen_irq_resume(void); +/* Clear an irq's pending state, in preparation for polling on it */ +void xen_clear_irq_pending(int irq); + +/* Poll waiting for an irq to become pending. In the usual case, the + irq will be disabled so it won't deliver an interrupt. */ +void xen_poll_irq(int irq); + #endif /* _XEN_EVENTS_H */