diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 58ad7eff83ff..1caa6b04fdc5 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -164,6 +164,8 @@ static inline u64 ktime_get_raw_ns(void) return ktime_to_ns(ktime_get_raw()); } +extern u64 ktime_get_mono_fast_ns(void); + /* * Timespec interfaces utilizing the ktime based ones */ diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index dee23c9d6c21..8980fb722fc5 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -44,6 +44,22 @@ static struct { static DEFINE_RAW_SPINLOCK(timekeeper_lock); static struct timekeeper shadow_timekeeper; +/** + * struct tk_fast - NMI safe timekeeper + * @seq: Sequence counter for protecting updates. The lowest bit + * is the index for the tk_read_base array + * @base: tk_read_base array. Access is indexed by the lowest bit of + * @seq. + * + * See @update_fast_timekeeper() below. + */ +struct tk_fast { + seqcount_t seq; + struct tk_read_base base[2]; +}; + +static struct tk_fast tk_fast_mono ____cacheline_aligned; + /* flag for if timekeeping is suspended */ int __read_mostly timekeeping_suspended; @@ -210,6 +226,112 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk) return nsec + arch_gettimeoffset(); } +/** + * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. + * @tk: The timekeeper from which we take the update + * @tkf: The fast timekeeper to update + * @tbase: The time base for the fast timekeeper (mono/raw) + * + * We want to use this from any context including NMI and tracing / + * instrumenting the timekeeping code itself. + * + * So we handle this differently than the other timekeeping accessor + * functions which retry when the sequence count has changed. The + * update side does: + * + * smp_wmb(); <- Ensure that the last base[1] update is visible + * tkf->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * update(tkf->base[0], tk); + * smp_wmb(); <- Ensure that the base[0] update is visible + * tkf->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * update(tkf->base[1], tk); + * + * The reader side does: + * + * do { + * seq = tkf->seq; + * smp_rmb(); + * idx = seq & 0x01; + * now = now(tkf->base[idx]); + * smp_rmb(); + * } while (seq != tkf->seq) + * + * As long as we update base[0] readers are forced off to + * base[1]. Once base[0] is updated readers are redirected to base[0] + * and the base[1] update takes place. + * + * So if a NMI hits the update of base[0] then it will use base[1] + * which is still consistent. In the worst case this can result is a + * slightly wrong timestamp (a few nanoseconds). See + * @ktime_get_mono_fast_ns. + */ +static void update_fast_timekeeper(struct timekeeper *tk) +{ + struct tk_read_base *base = tk_fast_mono.base; + + /* Force readers off to base[1] */ + raw_write_seqcount_latch(&tk_fast_mono.seq); + + /* Update base[0] */ + memcpy(base, &tk->tkr, sizeof(*base)); + + /* Force readers back to base[0] */ + raw_write_seqcount_latch(&tk_fast_mono.seq); + + /* Update base[1] */ + memcpy(base + 1, base, sizeof(*base)); +} + +/** + * ktime_get_mono_fast_ns - Fast NMI safe access to clock monotonic + * + * This timestamp is not guaranteed to be monotonic across an update. + * The timestamp is calculated by: + * + * now = base_mono + clock_delta * slope + * + * So if the update lowers the slope, readers who are forced to the + * not yet updated second array are still using the old steeper slope. + * + * tmono + * ^ + * | o n + * | o n + * | u + * | o + * |o + * |12345678---> reader order + * + * o = old slope + * u = update + * n = new slope + * + * So reader 6 will observe time going backwards versus reader 5. + * + * While other CPUs are likely to be able observe that, the only way + * for a CPU local observation is when an NMI hits in the middle of + * the update. Timestamps taken from that NMI context might be ahead + * of the following timestamps. Callers need to be aware of that and + * deal with it. + */ +u64 notrace ktime_get_mono_fast_ns(void) +{ + struct tk_read_base *tkr; + unsigned int seq; + u64 now; + + do { + seq = raw_read_seqcount(&tk_fast_mono.seq); + tkr = tk_fast_mono.base + (seq & 0x01); + now = ktime_to_ns(tkr->base_mono) + timekeeping_get_ns(tkr); + + } while (read_seqcount_retry(&tk_fast_mono.seq, seq)); + return now; +} +EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns); + #ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD static inline void update_vsyscall(struct timekeeper *tk) @@ -325,6 +447,8 @@ static void timekeeping_update(struct timekeeper *tk, unsigned int action) if (action & TK_MIRROR) memcpy(&shadow_timekeeper, &tk_core.timekeeper, sizeof(tk_core.timekeeper)); + + update_fast_timekeeper(tk); } /**