From 95c354fe9f7d6decc08a92aa26eb233ecc2155bf Mon Sep 17 00:00:00 2001 From: Nick Piggin Date: Wed, 30 Jan 2008 13:31:20 +0100 Subject: [PATCH] spinlock: lockbreak cleanup The break_lock data structure and code for spinlocks is quite nasty. Not only does it double the size of a spinlock but it changes locking to a potentially less optimal trylock. Put all of that under CONFIG_GENERIC_LOCKBREAK, and introduce a __raw_spin_is_contended that uses the lock data itself to determine whether there are waiters on the lock, to be used if CONFIG_GENERIC_LOCKBREAK is not set. Rename need_lockbreak to spin_needbreak, make it use spin_is_contended to decouple it from the spinlock implementation, and make it typesafe (rwlocks do not have any need_lockbreak sites -- why do they even get bloated up with that break_lock then?). Signed-off-by: Nick Piggin Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/arm/Kconfig | 5 +++++ arch/ia64/Kconfig | 5 +++++ arch/m32r/Kconfig | 5 +++++ arch/mips/Kconfig | 5 +++++ arch/parisc/Kconfig | 5 +++++ arch/powerpc/Kconfig | 5 +++++ arch/sparc64/Kconfig | 5 +++++ arch/x86/Kconfig | 4 ++++ fs/jbd/checkpoint.c | 3 ++- fs/jbd/commit.c | 2 +- fs/jbd2/checkpoint.c | 3 ++- fs/jbd2/commit.c | 2 +- include/linux/sched.h | 21 +++++++-------------- include/linux/spinlock.h | 6 ++++++ include/linux/spinlock_types.h | 4 ++-- include/linux/spinlock_up.h | 2 ++ kernel/sched.c | 16 ++++++---------- kernel/spinlock.c | 3 +-- mm/memory.c | 8 +++----- 19 files changed, 72 insertions(+), 37 deletions(-) diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index de211ac3853e..77201d3f7479 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -91,6 +91,11 @@ config GENERIC_IRQ_PROBE bool default y +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index bef47725d4ad..4a81b7fb191a 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig @@ -42,6 +42,11 @@ config MMU config SWIOTLB bool +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_XCHGADD_ALGORITHM bool default y diff --git a/arch/m32r/Kconfig b/arch/m32r/Kconfig index ab9a264cb194..f7237c5f531e 100644 --- a/arch/m32r/Kconfig +++ b/arch/m32r/Kconfig @@ -235,6 +235,11 @@ config IRAM_SIZE # Define implied options from the CPU selection here # +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK bool depends on M32R diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 6b0f85f02c79..4fad0a34b997 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig @@ -694,6 +694,11 @@ source "arch/mips/vr41xx/Kconfig" endmenu +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK bool default y diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index b8ef1787a191..2b649c46631c 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -19,6 +19,11 @@ config MMU config STACK_GROWSUP def_bool y +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK def_bool y diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 232c298c933f..c17a194beb0e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -53,6 +53,11 @@ config RWSEM_XCHGADD_ALGORITHM bool default y +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config ARCH_HAS_ILOG2_U32 bool default y diff --git a/arch/sparc64/Kconfig b/arch/sparc64/Kconfig index 10b212a1f9f5..1e25bce0366d 100644 --- a/arch/sparc64/Kconfig +++ b/arch/sparc64/Kconfig @@ -200,6 +200,11 @@ config US2E_FREQ If in doubt, say N. # Global things across all Sun machines. +config GENERIC_LOCKBREAK + bool + default y + depends on SMP && PREEMPT + config RWSEM_GENERIC_SPINLOCK bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 23936301db56..db434f8171d3 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -19,6 +19,10 @@ config X86_64 config X86 def_bool y +config GENERIC_LOCKBREAK + def_bool y + depends on SMP && PREEMPT + config GENERIC_TIME def_bool y diff --git a/fs/jbd/checkpoint.c b/fs/jbd/checkpoint.c index 0f69c416eebc..a5432bbbfb88 100644 --- a/fs/jbd/checkpoint.c +++ b/fs/jbd/checkpoint.c @@ -347,7 +347,8 @@ restart: break; } retry = __process_buffer(journal, jh, bhs,&batch_count); - if (!retry && lock_need_resched(&journal->j_list_lock)){ + if (!retry && (need_resched() || + spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); retry = 1; break; diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index 610264b99a8e..31853eb65b4c 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -265,7 +265,7 @@ write_out_data: put_bh(bh); } - if (lock_need_resched(&journal->j_list_lock)) { + if (need_resched() || spin_needbreak(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c index 1b7f282c1ae9..6914598022ce 100644 --- a/fs/jbd2/checkpoint.c +++ b/fs/jbd2/checkpoint.c @@ -353,7 +353,8 @@ restart: } retry = __process_buffer(journal, jh, bhs, &batch_count, transaction); - if (!retry && lock_need_resched(&journal->j_list_lock)){ + if (!retry && (need_resched() || + spin_needbreak(&journal->j_list_lock))) { spin_unlock(&journal->j_list_lock); retry = 1; break; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index da8d0eb3b7b9..4f302d279279 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -341,7 +341,7 @@ write_out_data: put_bh(bh); } - if (lock_need_resched(&journal->j_list_lock)) { + if (need_resched() || spin_needbreak(&journal->j_list_lock)) { spin_unlock(&journal->j_list_lock); goto write_out_data; } diff --git a/include/linux/sched.h b/include/linux/sched.h index 2d0546e884ea..9d4797609aa5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1922,23 +1922,16 @@ extern int cond_resched_softirq(void); /* * Does a critical section need to be broken due to another - * task waiting?: + * task waiting?: (technically does not depend on CONFIG_PREEMPT, + * but a general need for low latency) */ -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) -# define need_lockbreak(lock) ((lock)->break_lock) -#else -# define need_lockbreak(lock) 0 -#endif - -/* - * Does a critical section need to be broken due to another - * task waiting or preemption being signalled: - */ -static inline int lock_need_resched(spinlock_t *lock) +static inline int spin_needbreak(spinlock_t *lock) { - if (need_lockbreak(lock) || need_resched()) - return 1; +#ifdef CONFIG_PREEMPT + return spin_is_contended(lock); +#else return 0; +#endif } /* diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index c376f3b36c89..124449733c55 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -120,6 +120,12 @@ do { \ #define spin_is_locked(lock) __raw_spin_is_locked(&(lock)->raw_lock) +#ifdef CONFIG_GENERIC_LOCKBREAK +#define spin_is_contended(lock) ((lock)->break_lock) +#else +#define spin_is_contended(lock) __raw_spin_is_contended(&(lock)->raw_lock) +#endif + /** * spin_unlock_wait - wait until the spinlock gets unlocked * @lock: the spinlock in question. diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index f6a3a951b79e..68d88f71f1a2 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -19,7 +19,7 @@ typedef struct { raw_spinlock_t raw_lock; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +#ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK @@ -35,7 +35,7 @@ typedef struct { typedef struct { raw_rwlock_t raw_lock; -#if defined(CONFIG_PREEMPT) && defined(CONFIG_SMP) +#ifdef CONFIG_GENERIC_LOCKBREAK unsigned int break_lock; #endif #ifdef CONFIG_DEBUG_SPINLOCK diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index ea54c4c9a4ec..938234c4a996 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -64,6 +64,8 @@ static inline void __raw_spin_unlock(raw_spinlock_t *lock) # define __raw_spin_trylock(lock) ({ (void)(lock); 1; }) #endif /* DEBUG_SPINLOCK */ +#define __raw_spin_is_contended(lock) (((void)(lock), 0)) + #define __raw_read_can_lock(lock) (((void)(lock), 1)) #define __raw_write_can_lock(lock) (((void)(lock), 1)) diff --git a/kernel/sched.c b/kernel/sched.c index 524285e46fa7..ba4c88088f62 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4945,19 +4945,15 @@ EXPORT_SYMBOL(_cond_resched); */ int cond_resched_lock(spinlock_t *lock) { + int resched = need_resched() && system_state == SYSTEM_RUNNING; int ret = 0; - if (need_lockbreak(lock)) { + if (spin_needbreak(lock) || resched) { spin_unlock(lock); - cpu_relax(); - ret = 1; - spin_lock(lock); - } - if (need_resched() && system_state == SYSTEM_RUNNING) { - spin_release(&lock->dep_map, 1, _THIS_IP_); - _raw_spin_unlock(lock); - preempt_enable_no_resched(); - __cond_resched(); + if (resched && need_resched()) + __cond_resched(); + else + cpu_relax(); ret = 1; spin_lock(lock); } diff --git a/kernel/spinlock.c b/kernel/spinlock.c index cd72424c2662..ae28c8245123 100644 --- a/kernel/spinlock.c +++ b/kernel/spinlock.c @@ -65,8 +65,7 @@ EXPORT_SYMBOL(_write_trylock); * even on CONFIG_PREEMPT, because lockdep assumes that interrupts are * not re-enabled during lock-acquire (which the preempt-spin-ops do): */ -#if !defined(CONFIG_PREEMPT) || !defined(CONFIG_SMP) || \ - defined(CONFIG_DEBUG_LOCK_ALLOC) +#if !defined(CONFIG_GENERIC_LOCKBREAK) || defined(CONFIG_DEBUG_LOCK_ALLOC) void __lockfunc _read_lock(rwlock_t *lock) { diff --git a/mm/memory.c b/mm/memory.c index 4b0144b24c12..673ebbf499c7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -513,8 +513,7 @@ again: if (progress >= 32) { progress = 0; if (need_resched() || - need_lockbreak(src_ptl) || - need_lockbreak(dst_ptl)) + spin_needbreak(src_ptl) || spin_needbreak(dst_ptl)) break; } if (pte_none(*src_pte)) { @@ -853,7 +852,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, tlb_finish_mmu(*tlbp, tlb_start, start); if (need_resched() || - (i_mmap_lock && need_lockbreak(i_mmap_lock))) { + (i_mmap_lock && spin_needbreak(i_mmap_lock))) { if (i_mmap_lock) { *tlbp = NULL; goto out; @@ -1768,8 +1767,7 @@ again: restart_addr = zap_page_range(vma, start_addr, end_addr - start_addr, details); - need_break = need_resched() || - need_lockbreak(details->i_mmap_lock); + need_break = need_resched() || spin_needbreak(details->i_mmap_lock); if (restart_addr >= end_addr) { /* We have now completed this vma: mark it so */