alistair23-linux/include/linux/seqlock.h
Milton Miller 5db1256a51 seqlock: Don't smp_rmb in seqlock reader spin loop
Move the smp_rmb after cpu_relax loop in read_seqlock and add
ACCESS_ONCE to make sure the test and return are consistent.

A multi-threaded core in the lab didn't like the update
from 2.6.35 to 2.6.36, to the point it would hang during
boot when multiple threads were active.  Bisection showed
af5ab277de (clockevents:
Remove the per cpu tick skew) as the culprit and it is
supported with stack traces showing xtime_lock waits including
tick_do_update_jiffies64 and/or update_vsyscall.

Experimentation showed the combination of cpu_relax and smp_rmb
was significantly slowing the progress of other threads sharing
the core, and this patch is effective in avoiding the hang.

A theory is the rmb is affecting the whole core while the
cpu_relax is causing a resource rebalance flush, together they
cause an interfernce cadance that is unbroken when the seqlock
reader has interrupts disabled.

At first I was confused why the refactor in
3c22cd5709 (kernel: optimise
seqlock) didn't affect this patch application, but after some
study that affected seqcount not seqlock. The new seqcount was
not factored back into the seqlock.  I defer that the future.

While the removal of the timer interrupt offset created
contention for the xtime lock while a cpu does the
additonal work to update the system clock, the seqlock
implementation with the tight rmb spin loop goes back much
further, and is just waiting for the right trigger.

Cc: <stable@vger.kernel.org>
Signed-off-by: Milton Miller <miltonm@bga.com>
Cc: <linuxppc-dev@lists.ozlabs.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Nick Piggin <npiggin@kernel.dk>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Anton Blanchard <anton@samba.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Eric Dumazet <eric.dumazet@gmail.com>
Link: http://lkml.kernel.org/r/%3Cseqlock-rmb%40mdm.bga.com%3E
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
2011-05-12 12:13:43 +02:00

264 lines
6.7 KiB
C

#ifndef __LINUX_SEQLOCK_H
#define __LINUX_SEQLOCK_H
/*
* Reader/writer consistent mechanism without starving writers. This type of
* lock for data where the reader wants a consistent set of information
* and is willing to retry if the information changes. Readers never
* block but they may have to retry if a writer is in
* progress. Writers do not wait for readers.
*
* This is not as cache friendly as brlock. Also, this will not work
* for data that contains pointers, because any writer could
* invalidate a pointer that a reader was following.
*
* Expected reader usage:
* do {
* seq = read_seqbegin(&foo);
* ...
* } while (read_seqretry(&foo, seq));
*
*
* On non-SMP the spin locks disappear but the writer still needs
* to increment the sequence variables because an interrupt routine could
* change the state of the data.
*
* Based on x86_64 vsyscall gettimeofday
* by Keith Owens and Andrea Arcangeli
*/
#include <linux/spinlock.h>
#include <linux/preempt.h>
typedef struct {
unsigned sequence;
spinlock_t lock;
} seqlock_t;
/*
* These macros triggered gcc-3.x compile-time problems. We think these are
* OK now. Be cautious.
*/
#define __SEQLOCK_UNLOCKED(lockname) \
{ 0, __SPIN_LOCK_UNLOCKED(lockname) }
#define SEQLOCK_UNLOCKED \
__SEQLOCK_UNLOCKED(old_style_seqlock_init)
#define seqlock_init(x) \
do { \
(x)->sequence = 0; \
spin_lock_init(&(x)->lock); \
} while (0)
#define DEFINE_SEQLOCK(x) \
seqlock_t x = __SEQLOCK_UNLOCKED(x)
/* Lock out other writers and update the count.
* Acts like a normal spin_lock/unlock.
* Don't need preempt_disable() because that is in the spin_lock already.
*/
static inline void write_seqlock(seqlock_t *sl)
{
spin_lock(&sl->lock);
++sl->sequence;
smp_wmb();
}
static inline void write_sequnlock(seqlock_t *sl)
{
smp_wmb();
sl->sequence++;
spin_unlock(&sl->lock);
}
static inline int write_tryseqlock(seqlock_t *sl)
{
int ret = spin_trylock(&sl->lock);
if (ret) {
++sl->sequence;
smp_wmb();
}
return ret;
}
/* Start of read calculation -- fetch last complete writer token */
static __always_inline unsigned read_seqbegin(const seqlock_t *sl)
{
unsigned ret;
repeat:
ret = ACCESS_ONCE(sl->sequence);
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
}
smp_rmb();
return ret;
}
/*
* Test if reader processed invalid data.
*
* If sequence value changed then writer changed data while in section.
*/
static __always_inline int read_seqretry(const seqlock_t *sl, unsigned start)
{
smp_rmb();
return unlikely(sl->sequence != start);
}
/*
* Version using sequence counter only.
* This can be used when code has its own mutex protecting the
* updating starting before the write_seqcountbeqin() and ending
* after the write_seqcount_end().
*/
typedef struct seqcount {
unsigned sequence;
} seqcount_t;
#define SEQCNT_ZERO { 0 }
#define seqcount_init(x) do { *(x) = (seqcount_t) SEQCNT_ZERO; } while (0)
/**
* __read_seqcount_begin - begin a seq-read critical section (without barrier)
* @s: pointer to seqcount_t
* Returns: count to be passed to read_seqcount_retry
*
* __read_seqcount_begin is like read_seqcount_begin, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
* provided before actually loading any of the variables that are to be
* protected in this critical section.
*
* Use carefully, only in critical code, and comment how the barrier is
* provided.
*/
static inline unsigned __read_seqcount_begin(const seqcount_t *s)
{
unsigned ret;
repeat:
ret = s->sequence;
if (unlikely(ret & 1)) {
cpu_relax();
goto repeat;
}
return ret;
}
/**
* read_seqcount_begin - begin a seq-read critical section
* @s: pointer to seqcount_t
* Returns: count to be passed to read_seqcount_retry
*
* read_seqcount_begin opens a read critical section of the given seqcount.
* Validity of the critical section is tested by checking read_seqcount_retry
* function.
*/
static inline unsigned read_seqcount_begin(const seqcount_t *s)
{
unsigned ret = __read_seqcount_begin(s);
smp_rmb();
return ret;
}
/**
* __read_seqcount_retry - end a seq-read critical section (without barrier)
* @s: pointer to seqcount_t
* @start: count, from read_seqcount_begin
* Returns: 1 if retry is required, else 0
*
* __read_seqcount_retry is like read_seqcount_retry, but has no smp_rmb()
* barrier. Callers should ensure that smp_rmb() or equivalent ordering is
* provided before actually loading any of the variables that are to be
* protected in this critical section.
*
* Use carefully, only in critical code, and comment how the barrier is
* provided.
*/
static inline int __read_seqcount_retry(const seqcount_t *s, unsigned start)
{
return unlikely(s->sequence != start);
}
/**
* read_seqcount_retry - end a seq-read critical section
* @s: pointer to seqcount_t
* @start: count, from read_seqcount_begin
* Returns: 1 if retry is required, else 0
*
* read_seqcount_retry closes a read critical section of the given seqcount.
* If the critical section was invalid, it must be ignored (and typically
* retried).
*/
static inline int read_seqcount_retry(const seqcount_t *s, unsigned start)
{
smp_rmb();
return __read_seqcount_retry(s, start);
}
/*
* Sequence counter only version assumes that callers are using their
* own mutexing.
*/
static inline void write_seqcount_begin(seqcount_t *s)
{
s->sequence++;
smp_wmb();
}
static inline void write_seqcount_end(seqcount_t *s)
{
smp_wmb();
s->sequence++;
}
/**
* write_seqcount_barrier - invalidate in-progress read-side seq operations
* @s: pointer to seqcount_t
*
* After write_seqcount_barrier, no read-side seq operations will complete
* successfully and see data older than this.
*/
static inline void write_seqcount_barrier(seqcount_t *s)
{
smp_wmb();
s->sequence+=2;
}
/*
* Possible sw/hw IRQ protected versions of the interfaces.
*/
#define write_seqlock_irqsave(lock, flags) \
do { local_irq_save(flags); write_seqlock(lock); } while (0)
#define write_seqlock_irq(lock) \
do { local_irq_disable(); write_seqlock(lock); } while (0)
#define write_seqlock_bh(lock) \
do { local_bh_disable(); write_seqlock(lock); } while (0)
#define write_sequnlock_irqrestore(lock, flags) \
do { write_sequnlock(lock); local_irq_restore(flags); } while(0)
#define write_sequnlock_irq(lock) \
do { write_sequnlock(lock); local_irq_enable(); } while(0)
#define write_sequnlock_bh(lock) \
do { write_sequnlock(lock); local_bh_enable(); } while(0)
#define read_seqbegin_irqsave(lock, flags) \
({ local_irq_save(flags); read_seqbegin(lock); })
#define read_seqretry_irqrestore(lock, iv, flags) \
({ \
int ret = read_seqretry(lock, iv); \
local_irq_restore(flags); \
ret; \
})
#endif /* __LINUX_SEQLOCK_H */