diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index be7c0d9506b1..d8fc55aa9d44 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -549,15 +549,6 @@ loops can be debugged more effectively on production systems. - clocksource.arm_arch_timer.fsl-a008585= - [ARM64] - Format: - Enable/disable the workaround of Freescale/NXP - erratum A-008585. This can be useful for KVM - guests, if the guest device tree doesn't show the - erratum. If unspecified, the workaround is - enabled based on the device tree. - clearcpuid=BITNUM [X86] Disable CPUID feature X for the kernel. See arch/x86/include/asm/cpufeatures.h for the valid bit diff --git a/Documentation/devicetree/bindings/arm/arch_timer.txt b/Documentation/devicetree/bindings/arm/arch_timer.txt index ad440a2b8051..e926aea1147d 100644 --- a/Documentation/devicetree/bindings/arm/arch_timer.txt +++ b/Documentation/devicetree/bindings/arm/arch_timer.txt @@ -31,6 +31,12 @@ to deliver its interrupts via SPIs. This also affects writes to the tval register, due to the implicit counter read. +- hisilicon,erratum-161010101 : A boolean property. Indicates the + presence of Hisilicon erratum 161010101, which says that reading the + counters is unreliable in some cases, and reads may return a value 32 + beyond the correct value. This also affects writes to the tval + registers, due to the implicit counter read. + ** Optional properties: - arm,cpu-registers-not-fw-configured : Firmware does not initialize diff --git a/Documentation/devicetree/bindings/timer/cortina,gemini-timer.txt b/Documentation/devicetree/bindings/timer/cortina,gemini-timer.txt new file mode 100644 index 000000000000..16ea1d3b2e9e --- /dev/null +++ b/Documentation/devicetree/bindings/timer/cortina,gemini-timer.txt @@ -0,0 +1,22 @@ +Cortina Systems Gemini timer + +This timer is embedded in the Cortina Systems Gemini SoCs. + +Required properties: + +- compatible : Must be "cortina,gemini-timer" +- reg : Should contain registers location and length +- interrupts : Should contain the three timer interrupts with + flags for rising edge +- syscon : a phandle to the global Gemini system controller + +Example: + +timer@43000000 { + compatible = "cortina,gemini-timer"; + reg = <0x43000000 0x1000>; + interrupts = <14 IRQ_TYPE_EDGE_RISING>, /* Timer 1 */ + <15 IRQ_TYPE_EDGE_RISING>, /* Timer 2 */ + <16 IRQ_TYPE_EDGE_RISING>; /* Timer 3 */ + syscon = <&syscon>; +}; diff --git a/Documentation/devicetree/bindings/timer/renesas,ostm.txt b/Documentation/devicetree/bindings/timer/renesas,ostm.txt new file mode 100644 index 000000000000..be3ae0fdf775 --- /dev/null +++ b/Documentation/devicetree/bindings/timer/renesas,ostm.txt @@ -0,0 +1,30 @@ +* Renesas OS Timer (OSTM) + +The OSTM is a multi-channel 32-bit timer/counter with fixed clock +source that can operate in either interval count down timer or free-running +compare match mode. + +Channels are independent from each other. + +Required Properties: + + - compatible: must be one or more of the following: + - "renesas,r7s72100-ostm" for the r7s72100 OSTM + - "renesas,ostm" for any OSTM + This is a fallback for the above renesas,*-ostm entries + + - reg: base address and length of the register block for a timer channel. + + - interrupts: interrupt specifier for the timer channel. + + - clocks: clock specifier for the timer channel. + +Example: R7S72100 (RZ/A1H) OSTM node + + ostm0: timer@fcfec000 { + compatible = "renesas,r7s72100-ostm", "renesas,ostm"; + reg = <0xfcfec000 0x30>; + interrupts = ; + clocks = <&mstp5_clks R7S72100_CLK_OSTM0>; + power-domains = <&cpg_clocks>; + }; diff --git a/Documentation/timers/timer_stats.txt b/Documentation/timers/timer_stats.txt deleted file mode 100644 index de835ee97455..000000000000 --- a/Documentation/timers/timer_stats.txt +++ /dev/null @@ -1,73 +0,0 @@ -timer_stats - timer usage statistics ------------------------------------- - -timer_stats is a debugging facility to make the timer (ab)usage in a Linux -system visible to kernel and userspace developers. If enabled in the config -but not used it has almost zero runtime overhead, and a relatively small -data structure overhead. Even if collection is enabled runtime all the -locking is per-CPU and lookup is hashed. - -timer_stats should be used by kernel and userspace developers to verify that -their code does not make unduly use of timers. This helps to avoid unnecessary -wakeups, which should be avoided to optimize power consumption. - -It can be enabled by CONFIG_TIMER_STATS in the "Kernel hacking" configuration -section. - -timer_stats collects information about the timer events which are fired in a -Linux system over a sample period: - -- the pid of the task(process) which initialized the timer -- the name of the process which initialized the timer -- the function where the timer was initialized -- the callback function which is associated to the timer -- the number of events (callbacks) - -timer_stats adds an entry to /proc: /proc/timer_stats - -This entry is used to control the statistics functionality and to read out the -sampled information. - -The timer_stats functionality is inactive on bootup. - -To activate a sample period issue: -# echo 1 >/proc/timer_stats - -To stop a sample period issue: -# echo 0 >/proc/timer_stats - -The statistics can be retrieved by: -# cat /proc/timer_stats - -While sampling is enabled, each readout from /proc/timer_stats will see -newly updated statistics. Once sampling is disabled, the sampled information -is kept until a new sample period is started. This allows multiple readouts. - -Sample output of /proc/timer_stats: - -Timerstats sample period: 3.888770 s - 12, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) - 15, 1 swapper hcd_submit_urb (rh_timer_func) - 4, 959 kedac schedule_timeout (process_timeout) - 1, 0 swapper page_writeback_init (wb_timer_fn) - 28, 0 swapper hrtimer_stop_sched_tick (hrtimer_sched_tick) - 22, 2948 IRQ 4 tty_flip_buffer_push (delayed_work_timer_fn) - 3, 3100 bash schedule_timeout (process_timeout) - 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) - 1, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) - 1, 1 swapper neigh_table_init_no_netlink (neigh_periodic_timer) - 1, 2292 ip __netdev_watchdog_up (dev_watchdog) - 1, 23 events/1 do_cache_clean (delayed_work_timer_fn) -90 total events, 30.0 events/sec - -The first column is the number of events, the second column the pid, the third -column is the name of the process. The forth column shows the function which -initialized the timer and in parenthesis the callback function which was -executed on expiry. - - Thomas, Ingo - -Added flag to indicate 'deferrable timer' in /proc/timer_stats. A deferrable -timer will appear as follows - 10D, 1 swapper queue_delayed_work_on (delayed_work_timer_fn) - diff --git a/arch/arm/mach-shmobile/Kconfig b/arch/arm/mach-shmobile/Kconfig index 2bb4b09f079e..ad7d604ff001 100644 --- a/arch/arm/mach-shmobile/Kconfig +++ b/arch/arm/mach-shmobile/Kconfig @@ -57,6 +57,7 @@ config ARCH_R7S72100 select PM select PM_GENERIC_DOMAINS select SYS_SUPPORTS_SH_MTU2 + select RENESAS_OSTM config ARCH_R8A73A4 bool "R-Mobile APE6 (R8A73A40)" diff --git a/arch/arm64/include/asm/arch_timer.h b/arch/arm64/include/asm/arch_timer.h index eaa5bbe3fa87..b4b34004a21e 100644 --- a/arch/arm64/include/asm/arch_timer.h +++ b/arch/arm64/include/asm/arch_timer.h @@ -29,41 +29,29 @@ #include -#if IS_ENABLED(CONFIG_FSL_ERRATUM_A008585) +#if IS_ENABLED(CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND) extern struct static_key_false arch_timer_read_ool_enabled; -#define needs_fsl_a008585_workaround() \ +#define needs_unstable_timer_counter_workaround() \ static_branch_unlikely(&arch_timer_read_ool_enabled) #else -#define needs_fsl_a008585_workaround() false +#define needs_unstable_timer_counter_workaround() false #endif -u32 __fsl_a008585_read_cntp_tval_el0(void); -u32 __fsl_a008585_read_cntv_tval_el0(void); -u64 __fsl_a008585_read_cntvct_el0(void); -/* - * The number of retries is an arbitrary value well beyond the highest number - * of iterations the loop has been observed to take. - */ -#define __fsl_a008585_read_reg(reg) ({ \ - u64 _old, _new; \ - int _retries = 200; \ - \ - do { \ - _old = read_sysreg(reg); \ - _new = read_sysreg(reg); \ - _retries--; \ - } while (unlikely(_old != _new) && _retries); \ - \ - WARN_ON_ONCE(!_retries); \ - _new; \ -}) +struct arch_timer_erratum_workaround { + const char *id; /* Indicate the Erratum ID */ + u32 (*read_cntp_tval_el0)(void); + u32 (*read_cntv_tval_el0)(void); + u64 (*read_cntvct_el0)(void); +}; + +extern const struct arch_timer_erratum_workaround *timer_unstable_counter_workaround; #define arch_timer_reg_read_stable(reg) \ ({ \ u64 _val; \ - if (needs_fsl_a008585_workaround()) \ - _val = __fsl_a008585_read_##reg(); \ + if (needs_unstable_timer_counter_workaround()) \ + _val = timer_unstable_counter_workaround->read_##reg();\ else \ _val = read_sysreg(reg); \ _val; \ diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 2d1f5638974c..20f2ba6d79be 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h generic-y += cputime.h -generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h new file mode 100644 index 000000000000..9f765cdf09a5 --- /dev/null +++ b/arch/tile/include/asm/div64.h @@ -0,0 +1,16 @@ +#ifndef _ASM_TILE_DIV64_H +#define _ASM_TILE_DIV64_H + +#include + +#ifdef __tilegx__ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return __insn_mul_lu_lu(a, b); +} +#define mul_u32_u32 mul_u32_u32 +#endif + +#include + +#endif /* _ASM_TILE_DIV64_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index ced283ac79df..af95c47d5c9e 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -59,6 +59,17 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) } #define div_u64_rem div_u64_rem +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#define mul_u32_u32 mul_u32_u32 + #else # include #endif /* CONFIG_X86_32 */ diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 073d1f1a620b..a8e91ae89fb3 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -156,13 +156,13 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { + unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; int d0; xloops *= 4; asm("mull %%edx" :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4))); + :"1" (xloops), "0" (lpj * (HZ / 4))); __delay(++xloops); } diff --git a/drivers/clocksource/Kconfig b/drivers/clocksource/Kconfig index 4866f7aa32e6..3356ab821624 100644 --- a/drivers/clocksource/Kconfig +++ b/drivers/clocksource/Kconfig @@ -5,6 +5,10 @@ config CLKSRC_OF bool select CLKSRC_PROBE +config CLKEVT_OF + bool + select CLKEVT_PROBE + config CLKSRC_ACPI bool select CLKSRC_PROBE @@ -12,6 +16,9 @@ config CLKSRC_ACPI config CLKSRC_PROBE bool +config CLKEVT_PROBE + bool + config CLKSRC_I8253 bool @@ -60,6 +67,16 @@ config DW_APB_TIMER_OF select DW_APB_TIMER select CLKSRC_OF +config GEMINI_TIMER + bool "Cortina Gemini timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + depends on HAS_IOMEM + select CLKSRC_MMIO + select CLKSRC_OF + select MFD_SYSCON + help + Enables support for the Gemini timer + config ROCKCHIP_TIMER bool "Rockchip timer driver" if COMPILE_TEST depends on ARM || ARM64 @@ -325,16 +342,30 @@ config ARM_ARCH_TIMER_EVTSTREAM This must be disabled for hardware validation purposes to detect any hardware anomalies of missing events. +config ARM_ARCH_TIMER_OOL_WORKAROUND + bool + config FSL_ERRATUM_A008585 bool "Workaround for Freescale/NXP Erratum A-008585" default y depends on ARM_ARCH_TIMER && ARM64 + select ARM_ARCH_TIMER_OOL_WORKAROUND help This option enables a workaround for Freescale/NXP Erratum A-008585 ("ARM generic timer may contain an erroneous value"). The workaround will only be active if the fsl,erratum-a008585 property is found in the timer node. +config HISILICON_ERRATUM_161010101 + bool "Workaround for Hisilicon Erratum 161010101" + default y + select ARM_ARCH_TIMER_OOL_WORKAROUND + depends on ARM_ARCH_TIMER && ARM64 + help + This option enables a workaround for Hisilicon Erratum + 161010101. The workaround will be active if the hisilicon,erratum-161010101 + property is found in the timer node. + config ARM_GLOBAL_TIMER bool "Support for the ARM global timer" if COMPILE_TEST select CLKSRC_OF if OF @@ -467,6 +498,13 @@ config SH_TIMER_MTU2 Timer Pulse Unit 2 (MTU2) hardware available on SoCs from Renesas. This hardware comes with 16 bit-timer registers. +config RENESAS_OSTM + bool "Renesas OSTM timer driver" if COMPILE_TEST + depends on GENERIC_CLOCKEVENTS + select CLKSRC_MMIO + help + Enables the support for the Renesas OSTM. + config SH_TIMER_TMU bool "Renesas TMU timer driver" if COMPILE_TEST depends on GENERIC_CLOCKEVENTS diff --git a/drivers/clocksource/Makefile b/drivers/clocksource/Makefile index a14111e1f087..d227d1314f14 100644 --- a/drivers/clocksource/Makefile +++ b/drivers/clocksource/Makefile @@ -1,4 +1,5 @@ obj-$(CONFIG_CLKSRC_PROBE) += clksrc-probe.o +obj-$(CONFIG_CLKEVT_PROBE) += clkevt-probe.o obj-$(CONFIG_ATMEL_PIT) += timer-atmel-pit.o obj-$(CONFIG_ATMEL_ST) += timer-atmel-st.o obj-$(CONFIG_ATMEL_TCB_CLKSRC) += tcb_clksrc.o @@ -8,6 +9,7 @@ obj-$(CONFIG_CS5535_CLOCK_EVENT_SRC) += cs5535-clockevt.o obj-$(CONFIG_CLKSRC_JCORE_PIT) += jcore-pit.o obj-$(CONFIG_SH_TIMER_CMT) += sh_cmt.o obj-$(CONFIG_SH_TIMER_MTU2) += sh_mtu2.o +obj-$(CONFIG_RENESAS_OSTM) += renesas-ostm.o obj-$(CONFIG_SH_TIMER_TMU) += sh_tmu.o obj-$(CONFIG_EM_TIMER_STI) += em_sti.o obj-$(CONFIG_CLKBLD_I8253) += i8253.o @@ -15,6 +17,7 @@ obj-$(CONFIG_CLKSRC_MMIO) += mmio.o obj-$(CONFIG_DIGICOLOR_TIMER) += timer-digicolor.o obj-$(CONFIG_DW_APB_TIMER) += dw_apb_timer.o obj-$(CONFIG_DW_APB_TIMER_OF) += dw_apb_timer_of.o +obj-$(CONFIG_GEMINI_TIMER) += timer-gemini.o obj-$(CONFIG_ROCKCHIP_TIMER) += rockchip_timer.o obj-$(CONFIG_CLKSRC_NOMADIK_MTU) += nomadik-mtu.o obj-$(CONFIG_CLKSRC_DBX500_PRCMU) += clksrc-dbx500-prcmu.o diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 4c8c3fb2e8b2..93aa1364376a 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -96,41 +96,107 @@ early_param("clocksource.arm_arch_timer.evtstrm", early_evtstrm_cfg); */ #ifdef CONFIG_FSL_ERRATUM_A008585 -DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled); -EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled); +/* + * The number of retries is an arbitrary value well beyond the highest number + * of iterations the loop has been observed to take. + */ +#define __fsl_a008585_read_reg(reg) ({ \ + u64 _old, _new; \ + int _retries = 200; \ + \ + do { \ + _old = read_sysreg(reg); \ + _new = read_sysreg(reg); \ + _retries--; \ + } while (unlikely(_old != _new) && _retries); \ + \ + WARN_ON_ONCE(!_retries); \ + _new; \ +}) -static int fsl_a008585_enable = -1; - -static int __init early_fsl_a008585_cfg(char *buf) -{ - int ret; - bool val; - - ret = strtobool(buf, &val); - if (ret) - return ret; - - fsl_a008585_enable = val; - return 0; -} -early_param("clocksource.arm_arch_timer.fsl-a008585", early_fsl_a008585_cfg); - -u32 __fsl_a008585_read_cntp_tval_el0(void) +static u32 notrace fsl_a008585_read_cntp_tval_el0(void) { return __fsl_a008585_read_reg(cntp_tval_el0); } -u32 __fsl_a008585_read_cntv_tval_el0(void) +static u32 notrace fsl_a008585_read_cntv_tval_el0(void) { return __fsl_a008585_read_reg(cntv_tval_el0); } -u64 __fsl_a008585_read_cntvct_el0(void) +static u64 notrace fsl_a008585_read_cntvct_el0(void) { return __fsl_a008585_read_reg(cntvct_el0); } -EXPORT_SYMBOL(__fsl_a008585_read_cntvct_el0); -#endif /* CONFIG_FSL_ERRATUM_A008585 */ +#endif + +#ifdef CONFIG_HISILICON_ERRATUM_161010101 +/* + * Verify whether the value of the second read is larger than the first by + * less than 32 is the only way to confirm the value is correct, so clear the + * lower 5 bits to check whether the difference is greater than 32 or not. + * Theoretically the erratum should not occur more than twice in succession + * when reading the system counter, but it is possible that some interrupts + * may lead to more than twice read errors, triggering the warning, so setting + * the number of retries far beyond the number of iterations the loop has been + * observed to take. + */ +#define __hisi_161010101_read_reg(reg) ({ \ + u64 _old, _new; \ + int _retries = 50; \ + \ + do { \ + _old = read_sysreg(reg); \ + _new = read_sysreg(reg); \ + _retries--; \ + } while (unlikely((_new - _old) >> 5) && _retries); \ + \ + WARN_ON_ONCE(!_retries); \ + _new; \ +}) + +static u32 notrace hisi_161010101_read_cntp_tval_el0(void) +{ + return __hisi_161010101_read_reg(cntp_tval_el0); +} + +static u32 notrace hisi_161010101_read_cntv_tval_el0(void) +{ + return __hisi_161010101_read_reg(cntv_tval_el0); +} + +static u64 notrace hisi_161010101_read_cntvct_el0(void) +{ + return __hisi_161010101_read_reg(cntvct_el0); +} +#endif + +#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND +const struct arch_timer_erratum_workaround *timer_unstable_counter_workaround = NULL; +EXPORT_SYMBOL_GPL(timer_unstable_counter_workaround); + +DEFINE_STATIC_KEY_FALSE(arch_timer_read_ool_enabled); +EXPORT_SYMBOL_GPL(arch_timer_read_ool_enabled); + +static const struct arch_timer_erratum_workaround ool_workarounds[] = { +#ifdef CONFIG_FSL_ERRATUM_A008585 + { + .id = "fsl,erratum-a008585", + .read_cntp_tval_el0 = fsl_a008585_read_cntp_tval_el0, + .read_cntv_tval_el0 = fsl_a008585_read_cntv_tval_el0, + .read_cntvct_el0 = fsl_a008585_read_cntvct_el0, + }, +#endif +#ifdef CONFIG_HISILICON_ERRATUM_161010101 + { + .id = "hisilicon,erratum-161010101", + .read_cntp_tval_el0 = hisi_161010101_read_cntp_tval_el0, + .read_cntv_tval_el0 = hisi_161010101_read_cntv_tval_el0, + .read_cntvct_el0 = hisi_161010101_read_cntvct_el0, + }, +#endif +}; +#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ static __always_inline void arch_timer_reg_write(int access, enum arch_timer_reg reg, u32 val, @@ -281,8 +347,8 @@ static __always_inline void set_next_event(const int access, unsigned long evt, arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); } -#ifdef CONFIG_FSL_ERRATUM_A008585 -static __always_inline void fsl_a008585_set_next_event(const int access, +#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND +static __always_inline void erratum_set_next_event_generic(const int access, unsigned long evt, struct clock_event_device *clk) { unsigned long ctrl; @@ -300,20 +366,20 @@ static __always_inline void fsl_a008585_set_next_event(const int access, arch_timer_reg_write(access, ARCH_TIMER_REG_CTRL, ctrl, clk); } -static int fsl_a008585_set_next_event_virt(unsigned long evt, +static int erratum_set_next_event_virt(unsigned long evt, struct clock_event_device *clk) { - fsl_a008585_set_next_event(ARCH_TIMER_VIRT_ACCESS, evt, clk); + erratum_set_next_event_generic(ARCH_TIMER_VIRT_ACCESS, evt, clk); return 0; } -static int fsl_a008585_set_next_event_phys(unsigned long evt, +static int erratum_set_next_event_phys(unsigned long evt, struct clock_event_device *clk) { - fsl_a008585_set_next_event(ARCH_TIMER_PHYS_ACCESS, evt, clk); + erratum_set_next_event_generic(ARCH_TIMER_PHYS_ACCESS, evt, clk); return 0; } -#endif /* CONFIG_FSL_ERRATUM_A008585 */ +#endif /* CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND */ static int arch_timer_set_next_event_virt(unsigned long evt, struct clock_event_device *clk) @@ -343,16 +409,16 @@ static int arch_timer_set_next_event_phys_mem(unsigned long evt, return 0; } -static void fsl_a008585_set_sne(struct clock_event_device *clk) +static void erratum_workaround_set_sne(struct clock_event_device *clk) { -#ifdef CONFIG_FSL_ERRATUM_A008585 +#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND if (!static_branch_unlikely(&arch_timer_read_ool_enabled)) return; if (arch_timer_uses_ppi == VIRT_PPI) - clk->set_next_event = fsl_a008585_set_next_event_virt; + clk->set_next_event = erratum_set_next_event_virt; else - clk->set_next_event = fsl_a008585_set_next_event_phys; + clk->set_next_event = erratum_set_next_event_phys; #endif } @@ -385,7 +451,7 @@ static void __arch_timer_setup(unsigned type, BUG(); } - fsl_a008585_set_sne(clk); + erratum_workaround_set_sne(clk); } else { clk->features |= CLOCK_EVT_FEAT_DYNIRQ; clk->name = "arch_mem_timer"; @@ -580,7 +646,7 @@ static struct clocksource clocksource_counter = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; -static struct cyclecounter cyclecounter = { +static struct cyclecounter cyclecounter __ro_after_init = { .read = arch_counter_read_cc, .mask = CLOCKSOURCE_MASK(56), }; @@ -605,7 +671,7 @@ static void __init arch_counter_register(unsigned type) clocksource_counter.archdata.vdso_direct = true; -#ifdef CONFIG_FSL_ERRATUM_A008585 +#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND /* * Don't use the vdso fastpath if errata require using * the out-of-line counter accessor. @@ -893,12 +959,15 @@ static int __init arch_timer_of_init(struct device_node *np) arch_timer_c3stop = !of_property_read_bool(np, "always-on"); -#ifdef CONFIG_FSL_ERRATUM_A008585 - if (fsl_a008585_enable < 0) - fsl_a008585_enable = of_property_read_bool(np, "fsl,erratum-a008585"); - if (fsl_a008585_enable) { - static_branch_enable(&arch_timer_read_ool_enabled); - pr_info("Enabling workaround for FSL erratum A-008585\n"); +#ifdef CONFIG_ARM_ARCH_TIMER_OOL_WORKAROUND + for (i = 0; i < ARRAY_SIZE(ool_workarounds); i++) { + if (of_property_read_bool(np, ool_workarounds[i].id)) { + timer_unstable_counter_workaround = &ool_workarounds[i]; + static_branch_enable(&arch_timer_read_ool_enabled); + pr_info("arch_timer: Enabling workaround for %s\n", + timer_unstable_counter_workaround->id); + break; + } } #endif diff --git a/drivers/clocksource/clkevt-probe.c b/drivers/clocksource/clkevt-probe.c new file mode 100644 index 000000000000..8c30fec86094 --- /dev/null +++ b/drivers/clocksource/clkevt-probe.c @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2016, Linaro Ltd. All rights reserved. + * Daniel Lezcano + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include +#include + +extern struct of_device_id __clkevt_of_table[]; + +static const struct of_device_id __clkevt_of_table_sentinel + __used __section(__clkevt_of_table_end); + +int __init clockevent_probe(void) +{ + struct device_node *np; + const struct of_device_id *match; + of_init_fn_1_ret init_func; + int ret, clockevents = 0; + + for_each_matching_node_and_match(np, __clkevt_of_table, &match) { + if (!of_device_is_available(np)) + continue; + + init_func = match->data; + + ret = init_func(np); + if (ret) { + pr_warn("Failed to initialize '%s' (%d)\n", + np->name, ret); + continue; + } + + clockevents++; + } + + if (!clockevents) { + pr_crit("%s: no matching clockevent found\n", __func__); + return -ENODEV; + } + + return 0; +} diff --git a/drivers/clocksource/renesas-ostm.c b/drivers/clocksource/renesas-ostm.c new file mode 100644 index 000000000000..c76f57668fb2 --- /dev/null +++ b/drivers/clocksource/renesas-ostm.c @@ -0,0 +1,265 @@ +/* + * Renesas Timer Support - OSTM + * + * Copyright (C) 2017 Renesas Electronics America, Inc. + * Copyright (C) 2017 Chris Brandt + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +/* + * The OSTM contains independent channels. + * The first OSTM channel probed will be set up as a free running + * clocksource. Additionally we will use this clocksource for the system + * schedule timer sched_clock(). + * + * The second (or more) channel probed will be set up as an interrupt + * driven clock event. + */ + +struct ostm_device { + void __iomem *base; + unsigned long ticks_per_jiffy; + struct clock_event_device ced; +}; + +static void __iomem *system_clock; /* For sched_clock() */ + +/* OSTM REGISTERS */ +#define OSTM_CMP 0x000 /* RW,32 */ +#define OSTM_CNT 0x004 /* R,32 */ +#define OSTM_TE 0x010 /* R,8 */ +#define OSTM_TS 0x014 /* W,8 */ +#define OSTM_TT 0x018 /* W,8 */ +#define OSTM_CTL 0x020 /* RW,8 */ + +#define TE 0x01 +#define TS 0x01 +#define TT 0x01 +#define CTL_PERIODIC 0x00 +#define CTL_ONESHOT 0x02 +#define CTL_FREERUN 0x02 + +static struct ostm_device *ced_to_ostm(struct clock_event_device *ced) +{ + return container_of(ced, struct ostm_device, ced); +} + +static void ostm_timer_stop(struct ostm_device *ostm) +{ + if (readb(ostm->base + OSTM_TE) & TE) { + writeb(TT, ostm->base + OSTM_TT); + + /* + * Read back the register simply to confirm the write operation + * has completed since I/O writes can sometimes get queued by + * the bus architecture. + */ + while (readb(ostm->base + OSTM_TE) & TE) + ; + } +} + +static int __init ostm_init_clksrc(struct ostm_device *ostm, unsigned long rate) +{ + /* + * irq not used (clock sources don't use interrupts) + */ + + ostm_timer_stop(ostm); + + writel(0, ostm->base + OSTM_CMP); + writeb(CTL_FREERUN, ostm->base + OSTM_CTL); + writeb(TS, ostm->base + OSTM_TS); + + return clocksource_mmio_init(ostm->base + OSTM_CNT, + "ostm", rate, + 300, 32, clocksource_mmio_readl_up); +} + +static u64 notrace ostm_read_sched_clock(void) +{ + return readl(system_clock); +} + +static void __init ostm_init_sched_clock(struct ostm_device *ostm, + unsigned long rate) +{ + system_clock = ostm->base + OSTM_CNT; + sched_clock_register(ostm_read_sched_clock, 32, rate); +} + +static int ostm_clock_event_next(unsigned long delta, + struct clock_event_device *ced) +{ + struct ostm_device *ostm = ced_to_ostm(ced); + + ostm_timer_stop(ostm); + + writel(delta, ostm->base + OSTM_CMP); + writeb(CTL_ONESHOT, ostm->base + OSTM_CTL); + writeb(TS, ostm->base + OSTM_TS); + + return 0; +} + +static int ostm_shutdown(struct clock_event_device *ced) +{ + struct ostm_device *ostm = ced_to_ostm(ced); + + ostm_timer_stop(ostm); + + return 0; +} +static int ostm_set_periodic(struct clock_event_device *ced) +{ + struct ostm_device *ostm = ced_to_ostm(ced); + + if (clockevent_state_oneshot(ced) || clockevent_state_periodic(ced)) + ostm_timer_stop(ostm); + + writel(ostm->ticks_per_jiffy - 1, ostm->base + OSTM_CMP); + writeb(CTL_PERIODIC, ostm->base + OSTM_CTL); + writeb(TS, ostm->base + OSTM_TS); + + return 0; +} + +static int ostm_set_oneshot(struct clock_event_device *ced) +{ + struct ostm_device *ostm = ced_to_ostm(ced); + + ostm_timer_stop(ostm); + + return 0; +} + +static irqreturn_t ostm_timer_interrupt(int irq, void *dev_id) +{ + struct ostm_device *ostm = dev_id; + + if (clockevent_state_oneshot(&ostm->ced)) + ostm_timer_stop(ostm); + + /* notify clockevent layer */ + if (ostm->ced.event_handler) + ostm->ced.event_handler(&ostm->ced); + + return IRQ_HANDLED; +} + +static int __init ostm_init_clkevt(struct ostm_device *ostm, int irq, + unsigned long rate) +{ + struct clock_event_device *ced = &ostm->ced; + int ret = -ENXIO; + + ret = request_irq(irq, ostm_timer_interrupt, + IRQF_TIMER | IRQF_IRQPOLL, + "ostm", ostm); + if (ret) { + pr_err("ostm: failed to request irq\n"); + return ret; + } + + ced->name = "ostm"; + ced->features = CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_PERIODIC; + ced->set_state_shutdown = ostm_shutdown; + ced->set_state_periodic = ostm_set_periodic; + ced->set_state_oneshot = ostm_set_oneshot; + ced->set_next_event = ostm_clock_event_next; + ced->shift = 32; + ced->rating = 300; + ced->cpumask = cpumask_of(0); + clockevents_config_and_register(ced, rate, 0xf, 0xffffffff); + + return 0; +} + +static int __init ostm_init(struct device_node *np) +{ + struct ostm_device *ostm; + int ret = -EFAULT; + struct clk *ostm_clk = NULL; + int irq; + unsigned long rate; + + ostm = kzalloc(sizeof(*ostm), GFP_KERNEL); + if (!ostm) + return -ENOMEM; + + ostm->base = of_iomap(np, 0); + if (!ostm->base) { + pr_err("ostm: failed to remap I/O memory\n"); + goto err; + } + + irq = irq_of_parse_and_map(np, 0); + if (irq < 0) { + pr_err("ostm: Failed to get irq\n"); + goto err; + } + + ostm_clk = of_clk_get(np, 0); + if (IS_ERR(ostm_clk)) { + pr_err("ostm: Failed to get clock\n"); + ostm_clk = NULL; + goto err; + } + + ret = clk_prepare_enable(ostm_clk); + if (ret) { + pr_err("ostm: Failed to enable clock\n"); + goto err; + } + + rate = clk_get_rate(ostm_clk); + ostm->ticks_per_jiffy = (rate + HZ / 2) / HZ; + + /* + * First probed device will be used as system clocksource. Any + * additional devices will be used as clock events. + */ + if (!system_clock) { + ret = ostm_init_clksrc(ostm, rate); + + if (!ret) { + ostm_init_sched_clock(ostm, rate); + pr_info("ostm: used for clocksource\n"); + } + + } else { + ret = ostm_init_clkevt(ostm, irq, rate); + + if (!ret) + pr_info("ostm: used for clock events\n"); + } + +err: + if (ret) { + clk_disable_unprepare(ostm_clk); + iounmap(ostm->base); + kfree(ostm); + return ret; + } + + return 0; +} + +CLOCKSOURCE_OF_DECLARE(ostm, "renesas,ostm", ostm_init); diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c index d4ca9962a759..745844ee973e 100644 --- a/drivers/clocksource/tcb_clksrc.c +++ b/drivers/clocksource/tcb_clksrc.c @@ -10,6 +10,7 @@ #include #include #include +#include /* @@ -56,11 +57,16 @@ static u64 tc_get_cycles(struct clocksource *cs) return (upper << 16) | lower; } -static u64 tc_get_cycles32(struct clocksource *cs) +static u32 tc_get_cv32(void) { return __raw_readl(tcaddr + ATMEL_TC_REG(0, CV)); } +static u64 tc_get_cycles32(struct clocksource *cs) +{ + return tc_get_cv32(); +} + static struct clocksource clksrc = { .name = "tcb_clksrc", .rating = 200, @@ -69,6 +75,11 @@ static struct clocksource clksrc = { .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +static u64 notrace tc_read_sched_clock(void) +{ + return tc_get_cv32(); +} + #ifdef CONFIG_GENERIC_CLOCKEVENTS struct tc_clkevt_device { @@ -339,6 +350,9 @@ static int __init tcb_clksrc_init(void) clksrc.read = tc_get_cycles32; /* setup ony channel 0 */ tcb_setup_single_chan(tc, best_divisor_idx); + + /* register sched_clock on chips with single 32 bit counter */ + sched_clock_register(tc_read_sched_clock, 32, divided_rate); } else { /* tclib will give us three clocks no matter what the * underlying platform supports. diff --git a/drivers/clocksource/timer-gemini.c b/drivers/clocksource/timer-gemini.c new file mode 100644 index 000000000000..dda27b7bf1a1 --- /dev/null +++ b/drivers/clocksource/timer-gemini.c @@ -0,0 +1,277 @@ +/* + * Gemini timer driver + * Copyright (C) 2017 Linus Walleij + * + * Based on a rewrite of arch/arm/mach-gemini/timer.c: + * Copyright (C) 2001-2006 Storlink, Corp. + * Copyright (C) 2008-2009 Paulius Zaleckas + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Relevant registers in the global syscon + */ +#define GLOBAL_STATUS 0x04 +#define CPU_AHB_RATIO_MASK (0x3 << 18) +#define CPU_AHB_1_1 (0x0 << 18) +#define CPU_AHB_3_2 (0x1 << 18) +#define CPU_AHB_24_13 (0x2 << 18) +#define CPU_AHB_2_1 (0x3 << 18) +#define REG_TO_AHB_SPEED(reg) ((((reg) >> 15) & 0x7) * 10 + 130) + +/* + * Register definitions for the timers + */ +#define TIMER1_COUNT (0x00) +#define TIMER1_LOAD (0x04) +#define TIMER1_MATCH1 (0x08) +#define TIMER1_MATCH2 (0x0c) +#define TIMER2_COUNT (0x10) +#define TIMER2_LOAD (0x14) +#define TIMER2_MATCH1 (0x18) +#define TIMER2_MATCH2 (0x1c) +#define TIMER3_COUNT (0x20) +#define TIMER3_LOAD (0x24) +#define TIMER3_MATCH1 (0x28) +#define TIMER3_MATCH2 (0x2c) +#define TIMER_CR (0x30) +#define TIMER_INTR_STATE (0x34) +#define TIMER_INTR_MASK (0x38) + +#define TIMER_1_CR_ENABLE (1 << 0) +#define TIMER_1_CR_CLOCK (1 << 1) +#define TIMER_1_CR_INT (1 << 2) +#define TIMER_2_CR_ENABLE (1 << 3) +#define TIMER_2_CR_CLOCK (1 << 4) +#define TIMER_2_CR_INT (1 << 5) +#define TIMER_3_CR_ENABLE (1 << 6) +#define TIMER_3_CR_CLOCK (1 << 7) +#define TIMER_3_CR_INT (1 << 8) +#define TIMER_1_CR_UPDOWN (1 << 9) +#define TIMER_2_CR_UPDOWN (1 << 10) +#define TIMER_3_CR_UPDOWN (1 << 11) +#define TIMER_DEFAULT_FLAGS (TIMER_1_CR_UPDOWN | \ + TIMER_3_CR_ENABLE | \ + TIMER_3_CR_UPDOWN) + +#define TIMER_1_INT_MATCH1 (1 << 0) +#define TIMER_1_INT_MATCH2 (1 << 1) +#define TIMER_1_INT_OVERFLOW (1 << 2) +#define TIMER_2_INT_MATCH1 (1 << 3) +#define TIMER_2_INT_MATCH2 (1 << 4) +#define TIMER_2_INT_OVERFLOW (1 << 5) +#define TIMER_3_INT_MATCH1 (1 << 6) +#define TIMER_3_INT_MATCH2 (1 << 7) +#define TIMER_3_INT_OVERFLOW (1 << 8) +#define TIMER_INT_ALL_MASK 0x1ff + +static unsigned int tick_rate; +static void __iomem *base; + +static u64 notrace gemini_read_sched_clock(void) +{ + return readl(base + TIMER3_COUNT); +} + +static int gemini_timer_set_next_event(unsigned long cycles, + struct clock_event_device *evt) +{ + u32 cr; + + /* Setup the match register */ + cr = readl(base + TIMER1_COUNT); + writel(cr + cycles, base + TIMER1_MATCH1); + if (readl(base + TIMER1_COUNT) - cr > cycles) + return -ETIME; + + return 0; +} + +static int gemini_timer_shutdown(struct clock_event_device *evt) +{ + u32 cr; + + /* + * Disable also for oneshot: the set_next() call will arm the timer + * instead. + */ + /* Stop timer and interrupt. */ + cr = readl(base + TIMER_CR); + cr &= ~(TIMER_1_CR_ENABLE | TIMER_1_CR_INT); + writel(cr, base + TIMER_CR); + + /* Setup counter start from 0 */ + writel(0, base + TIMER1_COUNT); + writel(0, base + TIMER1_LOAD); + + /* enable interrupt */ + cr = readl(base + TIMER_INTR_MASK); + cr &= ~(TIMER_1_INT_OVERFLOW | TIMER_1_INT_MATCH2); + cr |= TIMER_1_INT_MATCH1; + writel(cr, base + TIMER_INTR_MASK); + + /* start the timer */ + cr = readl(base + TIMER_CR); + cr |= TIMER_1_CR_ENABLE; + writel(cr, base + TIMER_CR); + + return 0; +} + +static int gemini_timer_set_periodic(struct clock_event_device *evt) +{ + u32 period = DIV_ROUND_CLOSEST(tick_rate, HZ); + u32 cr; + + /* Stop timer and interrupt */ + cr = readl(base + TIMER_CR); + cr &= ~(TIMER_1_CR_ENABLE | TIMER_1_CR_INT); + writel(cr, base + TIMER_CR); + + /* Setup timer to fire at 1/HT intervals. */ + cr = 0xffffffff - (period - 1); + writel(cr, base + TIMER1_COUNT); + writel(cr, base + TIMER1_LOAD); + + /* enable interrupt on overflow */ + cr = readl(base + TIMER_INTR_MASK); + cr &= ~(TIMER_1_INT_MATCH1 | TIMER_1_INT_MATCH2); + cr |= TIMER_1_INT_OVERFLOW; + writel(cr, base + TIMER_INTR_MASK); + + /* Start the timer */ + cr = readl(base + TIMER_CR); + cr |= TIMER_1_CR_ENABLE; + cr |= TIMER_1_CR_INT; + writel(cr, base + TIMER_CR); + + return 0; +} + +/* Use TIMER1 as clock event */ +static struct clock_event_device gemini_clockevent = { + .name = "TIMER1", + /* Reasonably fast and accurate clock event */ + .rating = 300, + .shift = 32, + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT, + .set_next_event = gemini_timer_set_next_event, + .set_state_shutdown = gemini_timer_shutdown, + .set_state_periodic = gemini_timer_set_periodic, + .set_state_oneshot = gemini_timer_shutdown, + .tick_resume = gemini_timer_shutdown, +}; + +/* + * IRQ handler for the timer + */ +static irqreturn_t gemini_timer_interrupt(int irq, void *dev_id) +{ + struct clock_event_device *evt = &gemini_clockevent; + + evt->event_handler(evt); + return IRQ_HANDLED; +} + +static struct irqaction gemini_timer_irq = { + .name = "Gemini Timer Tick", + .flags = IRQF_TIMER, + .handler = gemini_timer_interrupt, +}; + +static int __init gemini_timer_of_init(struct device_node *np) +{ + static struct regmap *map; + int irq; + int ret; + u32 val; + + map = syscon_regmap_lookup_by_phandle(np, "syscon"); + if (IS_ERR(map)) { + pr_err("Can't get regmap for syscon handle"); + return -ENODEV; + } + ret = regmap_read(map, GLOBAL_STATUS, &val); + if (ret) { + pr_err("Can't read syscon status register"); + return -ENXIO; + } + + base = of_iomap(np, 0); + if (!base) { + pr_err("Can't remap registers"); + return -ENXIO; + } + /* IRQ for timer 1 */ + irq = irq_of_parse_and_map(np, 0); + if (irq <= 0) { + pr_err("Can't parse IRQ"); + return -EINVAL; + } + + tick_rate = REG_TO_AHB_SPEED(val) * 1000000; + printk(KERN_INFO "Bus: %dMHz", tick_rate / 1000000); + + tick_rate /= 6; /* APB bus run AHB*(1/6) */ + + switch (val & CPU_AHB_RATIO_MASK) { + case CPU_AHB_1_1: + printk(KERN_CONT "(1/1)\n"); + break; + case CPU_AHB_3_2: + printk(KERN_CONT "(3/2)\n"); + break; + case CPU_AHB_24_13: + printk(KERN_CONT "(24/13)\n"); + break; + case CPU_AHB_2_1: + printk(KERN_CONT "(2/1)\n"); + break; + } + + /* + * Reset the interrupt mask and status + */ + writel(TIMER_INT_ALL_MASK, base + TIMER_INTR_MASK); + writel(0, base + TIMER_INTR_STATE); + writel(TIMER_DEFAULT_FLAGS, base + TIMER_CR); + + /* + * Setup free-running clocksource timer (interrupts + * disabled.) + */ + writel(0, base + TIMER3_COUNT); + writel(0, base + TIMER3_LOAD); + writel(0, base + TIMER3_MATCH1); + writel(0, base + TIMER3_MATCH2); + clocksource_mmio_init(base + TIMER3_COUNT, + "gemini_clocksource", tick_rate, + 300, 32, clocksource_mmio_readl_up); + sched_clock_register(gemini_read_sched_clock, 32, tick_rate); + + /* + * Setup clockevent timer (interrupt-driven.) + */ + writel(0, base + TIMER1_COUNT); + writel(0, base + TIMER1_LOAD); + writel(0, base + TIMER1_MATCH1); + writel(0, base + TIMER1_MATCH2); + setup_irq(irq, &gemini_timer_irq); + gemini_clockevent.cpumask = cpumask_of(0); + clockevents_config_and_register(&gemini_clockevent, tick_rate, + 1, 0xffffffff); + + return 0; +} +CLOCKSOURCE_OF_DECLARE(nomadik_mtu, "cortina,gemini-timer", + gemini_timer_of_init); diff --git a/fs/proc/base.c b/fs/proc/base.c index 87c9a9aacda3..b1f7d30e96c2 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2179,7 +2179,7 @@ static const struct file_operations proc_map_files_operations = { .llseek = generic_file_llseek, }; -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) struct timers_private { struct pid *pid; struct task_struct *task; @@ -2936,7 +2936,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations), REG("setgroups", S_IRUGO|S_IWUSR, proc_setgroups_operations), #endif -#ifdef CONFIG_CHECKPOINT_RESTORE +#if defined(CONFIG_CHECKPOINT_RESTORE) && defined(CONFIG_POSIX_TIMERS) REG("timers", S_IRUGO, proc_timers_operations), #endif REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations), diff --git a/fs/timerfd.c b/fs/timerfd.c index c173cc196175..384fa759a563 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -40,6 +40,7 @@ struct timerfd_ctx { short unsigned settime_flags; /* to show in fdinfo */ struct rcu_head rcu; struct list_head clist; + spinlock_t cancel_lock; bool might_cancel; }; @@ -112,7 +113,7 @@ void timerfd_clock_was_set(void) rcu_read_unlock(); } -static void timerfd_remove_cancel(struct timerfd_ctx *ctx) +static void __timerfd_remove_cancel(struct timerfd_ctx *ctx) { if (ctx->might_cancel) { ctx->might_cancel = false; @@ -122,6 +123,13 @@ static void timerfd_remove_cancel(struct timerfd_ctx *ctx) } } +static void timerfd_remove_cancel(struct timerfd_ctx *ctx) +{ + spin_lock(&ctx->cancel_lock); + __timerfd_remove_cancel(ctx); + spin_unlock(&ctx->cancel_lock); +} + static bool timerfd_canceled(struct timerfd_ctx *ctx) { if (!ctx->might_cancel || ctx->moffs != KTIME_MAX) @@ -132,6 +140,7 @@ static bool timerfd_canceled(struct timerfd_ctx *ctx) static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) { + spin_lock(&ctx->cancel_lock); if ((ctx->clockid == CLOCK_REALTIME || ctx->clockid == CLOCK_REALTIME_ALARM) && (flags & TFD_TIMER_ABSTIME) && (flags & TFD_TIMER_CANCEL_ON_SET)) { @@ -141,9 +150,10 @@ static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) list_add_rcu(&ctx->clist, &cancel_list); spin_unlock(&cancel_lock); } - } else if (ctx->might_cancel) { - timerfd_remove_cancel(ctx); + } else { + __timerfd_remove_cancel(ctx); } + spin_unlock(&ctx->cancel_lock); } static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) @@ -400,6 +410,7 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) return -ENOMEM; init_waitqueue_head(&ctx->wqh); + spin_lock_init(&ctx->cancel_lock); ctx->clockid = clockid; if (isalarm(ctx)) diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 0d442e34c349..5d3053c34fb3 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -224,4 +224,13 @@ static inline void tick_setup_hrtimer_broadcast(void) { } #endif /* !CONFIG_GENERIC_CLOCKEVENTS */ +#define CLOCKEVENT_OF_DECLARE(name, compat, fn) \ + OF_DECLARE_1_RET(clkevt, name, compat, fn) + +#ifdef CONFIG_CLKEVT_PROBE +extern int clockevent_probe(void); +#els +static inline int clockevent_probe(void) { return 0; } +#endif + #endif /* _LINUX_CLOCKCHIPS_H */ diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index b3d2c1a89ac4..96f1e88b767c 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -649,11 +649,15 @@ static inline size_t cpumask_size(void) * used. Please use this_cpu_cpumask_var_t in those cases. The direct use * of this_cpu_ptr() or this_cpu_read() will lead to failures when the * other type of cpumask_var_t implementation is configured. + * + * Please also note that __cpumask_var_read_mostly can be used to declare + * a cpumask_var_t variable itself (not its content) as read mostly. */ #ifdef CONFIG_CPUMASK_OFFSTACK typedef struct cpumask *cpumask_var_t; -#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) +#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x) +#define __cpumask_var_read_mostly __read_mostly bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node); bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags); @@ -667,6 +671,7 @@ void free_bootmem_cpumask_var(cpumask_var_t mask); typedef struct cpumask cpumask_var_t[1]; #define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x) +#define __cpumask_var_read_mostly static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags) { diff --git a/include/linux/delay.h b/include/linux/delay.h index a6ecb34cf547..2ecb3c46b20a 100644 --- a/include/linux/delay.h +++ b/include/linux/delay.h @@ -5,6 +5,17 @@ * Copyright (C) 1993 Linus Torvalds * * Delay routines, using a pre-computed "loops_per_jiffy" value. + * + * Please note that ndelay(), udelay() and mdelay() may return early for + * several reasons: + * 1. computed loops_per_jiffy too low (due to the time taken to + * execute the timer interrupt.) + * 2. cache behaviour affecting the time it takes to execute the + * loop function. + * 3. CPU clock rate changes. + * + * Please see this thread: + * http://lists.openwall.net/linux-kernel/2011/01/09/56 */ #include diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index cdab81ba29f8..e52b427223ba 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -88,12 +88,6 @@ enum hrtimer_restart { * @base: pointer to the timer base (per cpu and per clock) * @state: state information (See bit values above) * @is_rel: Set if the timer was armed relative - * @start_pid: timer statistics field to store the pid of the task which - * started the timer - * @start_site: timer statistics field to store the site where the timer - * was started - * @start_comm: timer statistics field to store the name of the process which - * started the timer * * The hrtimer structure must be initialized by hrtimer_init() */ @@ -104,11 +98,6 @@ struct hrtimer { struct hrtimer_clock_base *base; u8 state; u8 is_rel; -#ifdef CONFIG_TIMER_STATS - int start_pid; - void *start_site; - char start_comm[16]; -#endif }; /** diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 325f649d77ff..3a85d61f7614 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -42,6 +42,27 @@ extern struct fs_struct init_fs; #define INIT_PREV_CPUTIME(x) #endif +#ifdef CONFIG_POSIX_TIMERS +#define INIT_POSIX_TIMERS(s) \ + .posix_timers = LIST_HEAD_INIT(s.posix_timers), +#define INIT_CPU_TIMERS(s) \ + .cpu_timers = { \ + LIST_HEAD_INIT(s.cpu_timers[0]), \ + LIST_HEAD_INIT(s.cpu_timers[1]), \ + LIST_HEAD_INIT(s.cpu_timers[2]), \ + }, +#define INIT_CPUTIMER(s) \ + .cputimer = { \ + .cputime_atomic = INIT_CPUTIME_ATOMIC, \ + .running = false, \ + .checking_timer = false, \ + }, +#else +#define INIT_POSIX_TIMERS(s) +#define INIT_CPU_TIMERS(s) +#define INIT_CPUTIMER(s) +#endif + #define INIT_SIGNALS(sig) { \ .nr_threads = 1, \ .thread_head = LIST_HEAD_INIT(init_task.thread_node), \ @@ -49,14 +70,10 @@ extern struct fs_struct init_fs; .shared_pending = { \ .list = LIST_HEAD_INIT(sig.shared_pending.list), \ .signal = {{0}}}, \ - .posix_timers = LIST_HEAD_INIT(sig.posix_timers), \ - .cpu_timers = INIT_CPU_TIMERS(sig.cpu_timers), \ + INIT_POSIX_TIMERS(sig) \ + INIT_CPU_TIMERS(sig) \ .rlim = INIT_RLIMITS, \ - .cputimer = { \ - .cputime_atomic = INIT_CPUTIME_ATOMIC, \ - .running = false, \ - .checking_timer = false, \ - }, \ + INIT_CPUTIMER(sig) \ INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ @@ -247,7 +264,7 @@ extern struct task_group root_task_group; .blocked = {{0}}, \ .alloc_lock = __SPIN_LOCK_UNLOCKED(tsk.alloc_lock), \ .journal_info = NULL, \ - .cpu_timers = INIT_CPU_TIMERS(tsk.cpu_timers), \ + INIT_CPU_TIMERS(tsk) \ .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(tsk.pi_lock), \ .timer_slack_ns = 50000, /* 50 usec default slack */ \ .pids = { \ @@ -274,13 +291,6 @@ extern struct task_group root_task_group; } -#define INIT_CPU_TIMERS(cpu_timers) \ -{ \ - LIST_HEAD_INIT(cpu_timers[0]), \ - LIST_HEAD_INIT(cpu_timers[1]), \ - LIST_HEAD_INIT(cpu_timers[2]), \ -} - /* Attach to the init_task data structure for proper alignment */ #define __init_task_data __attribute__((__section__(".data..init_task"))) diff --git a/include/linux/math64.h b/include/linux/math64.h index 6e8b5b270ffe..80690c96c734 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -133,6 +133,16 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) return ret; } +#ifndef mul_u32_u32 +/* + * Many a GCC version messes this up and generates a 64x64 mult :-( + */ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return (u64)a * b; +} +#endif + #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) #ifndef mul_u64_u32_shr @@ -160,9 +170,9 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) al = a; ah = a >> 32; - ret = ((u64)al * mul) >> shift; + ret = mul_u32_u32(al, mul) >> shift; if (ah) - ret += ((u64)ah * mul) << (32 - shift); + ret += mul_u32_u32(ah, mul) << (32 - shift); return ret; } @@ -186,10 +196,10 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) a0.ll = a; b0.ll = b; - rl.ll = (u64)a0.l.low * b0.l.low; - rm.ll = (u64)a0.l.low * b0.l.high; - rn.ll = (u64)a0.l.high * b0.l.low; - rh.ll = (u64)a0.l.high * b0.l.high; + rl.ll = mul_u32_u32(a0.l.low, b0.l.low); + rm.ll = mul_u32_u32(a0.l.low, b0.l.high); + rn.ll = mul_u32_u32(a0.l.high, b0.l.low); + rh.ll = mul_u32_u32(a0.l.high, b0.l.high); /* * Each of these lines computes a 64-bit intermediate result into "c", @@ -229,8 +239,8 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) } u, rl, rh; u.ll = a; - rl.ll = (u64)u.l.low * mul; - rh.ll = (u64)u.l.high * mul + rl.l.high; + rl.ll = mul_u32_u32(u.l.low, mul); + rh.ll = mul_u32_u32(u.l.high, mul) + rl.l.high; /* Bits 32-63 of the result will be in rh.l.low. */ rl.l.high = do_div(rh.ll, divisor); diff --git a/include/linux/sched.h b/include/linux/sched.h index ad3ec9ec61f7..6e4782eae076 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -734,13 +734,14 @@ struct signal_struct { unsigned int is_child_subreaper:1; unsigned int has_child_subreaper:1; +#ifdef CONFIG_POSIX_TIMERS + /* POSIX.1b Interval Timers */ int posix_timer_id; struct list_head posix_timers; /* ITIMER_REAL timer for the process */ struct hrtimer real_timer; - struct pid *leader_pid; ktime_t it_real_incr; /* @@ -759,12 +760,16 @@ struct signal_struct { /* Earliest-expiration cache. */ struct task_cputime cputime_expires; + struct list_head cpu_timers[3]; + +#endif + + struct pid *leader_pid; + #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; #endif - struct list_head cpu_timers[3]; - struct pid *tty_old_pgrp; /* boolean value for session group leader */ @@ -1691,8 +1696,10 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; +#ifdef CONFIG_POSIX_TIMERS struct task_cputime cputime_expires; struct list_head cpu_timers[3]; +#endif /* process credentials */ const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 51d601f192d4..5a209b84fd9e 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -20,11 +20,6 @@ struct timer_list { unsigned long data; u32 flags; -#ifdef CONFIG_TIMER_STATS - int start_pid; - void *start_site; - char start_comm[16]; -#endif #ifdef CONFIG_LOCKDEP struct lockdep_map lockdep_map; #endif @@ -197,46 +192,6 @@ extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); */ #define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) -/* - * Timer-statistics info: - */ -#ifdef CONFIG_TIMER_STATS - -extern int timer_stats_active; - -extern void init_timer_stats(void); - -extern void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, u32 flags); - -extern void __timer_stats_timer_set_start_info(struct timer_list *timer, - void *addr); - -static inline void timer_stats_timer_set_start_info(struct timer_list *timer) -{ - if (likely(!timer_stats_active)) - return; - __timer_stats_timer_set_start_info(timer, __builtin_return_address(0)); -} - -static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) -{ - timer->start_site = NULL; -} -#else -static inline void init_timer_stats(void) -{ -} - -static inline void timer_stats_timer_set_start_info(struct timer_list *timer) -{ -} - -static inline void timer_stats_timer_clear_start_info(struct timer_list *timer) -{ -} -#endif - extern void add_timer(struct timer_list *timer); extern int try_to_del_timer_sync(struct timer_list *timer); diff --git a/kernel/fork.c b/kernel/fork.c index 11c5c8ab827c..105c6676d93b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1304,6 +1304,7 @@ void __cleanup_sighand(struct sighand_struct *sighand) } } +#ifdef CONFIG_POSIX_TIMERS /* * Initialize POSIX timer handling for a thread group. */ @@ -1322,6 +1323,9 @@ static void posix_cpu_timers_init_group(struct signal_struct *sig) INIT_LIST_HEAD(&sig->cpu_timers[1]); INIT_LIST_HEAD(&sig->cpu_timers[2]); } +#else +static inline void posix_cpu_timers_init_group(struct signal_struct *sig) { } +#endif static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) { @@ -1346,11 +1350,11 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); - INIT_LIST_HEAD(&sig->posix_timers); seqlock_init(&sig->stats_lock); prev_cputime_init(&sig->prev_cputime); #ifdef CONFIG_POSIX_TIMERS + INIT_LIST_HEAD(&sig->posix_timers); hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL); sig->real_timer.function = it_real_fn; #endif @@ -1425,6 +1429,7 @@ static void rt_mutex_init_task(struct task_struct *p) #endif } +#ifdef CONFIG_POSIX_TIMERS /* * Initialize POSIX timer handling for a single task. */ @@ -1437,6 +1442,9 @@ static void posix_cpu_timers_init(struct task_struct *tsk) INIT_LIST_HEAD(&tsk->cpu_timers[1]); INIT_LIST_HEAD(&tsk->cpu_timers[2]); } +#else +static inline void posix_cpu_timers_init(struct task_struct *tsk) { } +#endif static inline void init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) diff --git a/kernel/kthread.c b/kernel/kthread.c index 2318fba86277..8461a4372e8a 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -850,7 +850,6 @@ void __kthread_queue_delayed_work(struct kthread_worker *worker, list_add(&work->node, &worker->delayed_work_list); work->worker = worker; - timer_stats_timer_set_start_info(&dwork->timer); timer->expires = jiffies + delay; add_timer(timer); } diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 2516b8df6dbb..a688a8206727 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2246,6 +2246,7 @@ prio_changed_rt(struct rq *rq, struct task_struct *p, int oldprio) } } +#ifdef CONFIG_POSIX_TIMERS static void watchdog(struct rq *rq, struct task_struct *p) { unsigned long soft, hard; @@ -2267,6 +2268,9 @@ static void watchdog(struct rq *rq, struct task_struct *p) p->cputime_expires.sched_exp = p->se.sum_exec_runtime; } } +#else +static inline void watchdog(struct rq *rq, struct task_struct *p) { } +#endif static void task_tick_rt(struct rq *rq, struct task_struct *p, int queued) { diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 34659a853505..c69a9870ab79 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h @@ -172,18 +172,19 @@ sched_info_switch(struct rq *rq, */ /** - * cputimer_running - return true if cputimer is running + * get_running_cputimer - return &tsk->signal->cputimer if cputimer is running * * @tsk: Pointer to target task. */ -static inline bool cputimer_running(struct task_struct *tsk) - +#ifdef CONFIG_POSIX_TIMERS +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; /* Check if cputimer isn't running. This is accessed without locking. */ if (!READ_ONCE(cputimer->running)) - return false; + return NULL; /* * After we flush the task's sum_exec_runtime to sig->sum_sched_runtime @@ -200,10 +201,17 @@ static inline bool cputimer_running(struct task_struct *tsk) * clock delta is behind the expiring timer value. */ if (unlikely(!tsk->sighand)) - return false; + return NULL; - return true; + return cputimer; } +#else +static inline +struct thread_group_cputimer *get_running_cputimer(struct task_struct *tsk) +{ + return NULL; +} +#endif /** * account_group_user_time - Maintain utime for a thread group. @@ -218,9 +226,9 @@ static inline bool cputimer_running(struct task_struct *tsk) static inline void account_group_user_time(struct task_struct *tsk, cputime_t cputime) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(cputime, &cputimer->cputime_atomic.utime); @@ -239,9 +247,9 @@ static inline void account_group_user_time(struct task_struct *tsk, static inline void account_group_system_time(struct task_struct *tsk, cputime_t cputime) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(cputime, &cputimer->cputime_atomic.stime); @@ -260,9 +268,9 @@ static inline void account_group_system_time(struct task_struct *tsk, static inline void account_group_exec_runtime(struct task_struct *tsk, unsigned long long ns) { - struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; + struct thread_group_cputimer *cputimer = get_running_cputimer(tsk); - if (!cputimer_running(tsk)) + if (!cputimer) return; atomic64_add(ns, &cputimer->cputime_atomic.sum_exec_runtime); diff --git a/kernel/time/Makefile b/kernel/time/Makefile index 976840d29a71..938dbf33ef49 100644 --- a/kernel/time/Makefile +++ b/kernel/time/Makefile @@ -15,6 +15,5 @@ ifeq ($(CONFIG_GENERIC_CLOCKEVENTS_BROADCAST),y) endif obj-$(CONFIG_GENERIC_SCHED_CLOCK) += sched_clock.o obj-$(CONFIG_TICK_ONESHOT) += tick-oneshot.o tick-sched.o -obj-$(CONFIG_TIMER_STATS) += timer_stats.o obj-$(CONFIG_DEBUG_FS) += timekeeping_debug.o obj-$(CONFIG_TEST_UDELAY) += test_udelay.o diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index c6ecedd3b839..8e11d8d9f419 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -94,17 +94,15 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = }; static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = { + /* Make sure we catch unsupported clockids */ + [0 ... MAX_CLOCKS - 1] = HRTIMER_MAX_CLOCK_BASES, + [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, [CLOCK_TAI] = HRTIMER_BASE_TAI, }; -static inline int hrtimer_clockid_to_base(clockid_t clock_id) -{ - return hrtimer_clock_to_base_table[clock_id]; -} - /* * Functions and macros which are different for UP/SMP systems are kept in a * single place @@ -766,34 +764,6 @@ void hrtimers_resume(void) clock_was_set_delayed(); } -static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (timer->start_site) - return; - timer->start_site = __builtin_return_address(0); - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -#endif -} - -static inline void timer_stats_hrtimer_clear_start_info(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; -#endif -} - -static inline void timer_stats_account_hrtimer(struct hrtimer *timer) -{ -#ifdef CONFIG_TIMER_STATS - if (likely(!timer_stats_active)) - return; - timer_stats_update_stats(timer, timer->start_pid, timer->start_site, - timer->function, timer->start_comm, 0); -#endif -} - /* * Counterpart to lock_hrtimer_base above: */ @@ -932,7 +902,6 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest * rare case and less expensive than a smp call. */ debug_deactivate(timer); - timer_stats_hrtimer_clear_start_info(timer); reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); if (!restart) @@ -990,8 +959,6 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, /* Switch the timer base, if necessary: */ new_base = switch_hrtimer_base(timer, base, mode & HRTIMER_MODE_PINNED); - timer_stats_hrtimer_set_start_info(timer); - leftmost = enqueue_hrtimer(timer, new_base); if (!leftmost) goto unlock; @@ -1112,6 +1079,18 @@ u64 hrtimer_get_next_event(void) } #endif +static inline int hrtimer_clockid_to_base(clockid_t clock_id) +{ + if (likely(clock_id < MAX_CLOCKS)) { + int base = hrtimer_clock_to_base_table[clock_id]; + + if (likely(base != HRTIMER_MAX_CLOCK_BASES)) + return base; + } + WARN(1, "Invalid clockid %d. Using MONOTONIC\n", clock_id); + return HRTIMER_BASE_MONOTONIC; +} + static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, enum hrtimer_mode mode) { @@ -1128,12 +1107,6 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, base = hrtimer_clockid_to_base(clock_id); timer->base = &cpu_base->clock_base[base]; timerqueue_init(&timer->node); - -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif } /** @@ -1217,7 +1190,6 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, raw_write_seqcount_barrier(&cpu_base->seq); __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); - timer_stats_account_hrtimer(timer); fn = timer->function; /* diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 17ac99b60ee5..987e496bb51a 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -29,12 +29,13 @@ */ static struct tick_device tick_broadcast_device; -static cpumask_var_t tick_broadcast_mask; -static cpumask_var_t tick_broadcast_on; -static cpumask_var_t tmpmask; -static DEFINE_RAW_SPINLOCK(tick_broadcast_lock); +static cpumask_var_t tick_broadcast_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_on __cpumask_var_read_mostly; +static cpumask_var_t tmpmask __cpumask_var_read_mostly; static int tick_broadcast_forced; +static __cacheline_aligned_in_smp DEFINE_RAW_SPINLOCK(tick_broadcast_lock); + #ifdef CONFIG_TICK_ONESHOT static void tick_broadcast_clear_oneshot(int cpu); static void tick_resume_broadcast_oneshot(struct clock_event_device *bc); @@ -516,9 +517,9 @@ void tick_resume_broadcast(void) #ifdef CONFIG_TICK_ONESHOT -static cpumask_var_t tick_broadcast_oneshot_mask; -static cpumask_var_t tick_broadcast_pending_mask; -static cpumask_var_t tick_broadcast_force_mask; +static cpumask_var_t tick_broadcast_oneshot_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_pending_mask __cpumask_var_read_mostly; +static cpumask_var_t tick_broadcast_force_mask __cpumask_var_read_mostly; /* * Exposed for debugging: see timer_list.c diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index db087d7e106d..95b258dd75db 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -1275,27 +1275,8 @@ error: /* even if we error out, we forwarded the time, so call update */ } EXPORT_SYMBOL(timekeeping_inject_offset); - /** - * timekeeping_get_tai_offset - Returns current TAI offset from UTC - * - */ -s32 timekeeping_get_tai_offset(void) -{ - struct timekeeper *tk = &tk_core.timekeeper; - unsigned int seq; - s32 ret; - - do { - seq = read_seqcount_begin(&tk_core.seq); - ret = tk->tai_offset; - } while (read_seqcount_retry(&tk_core.seq, seq)); - - return ret; -} - -/** - * __timekeeping_set_tai_offset - Lock free worker function + * __timekeeping_set_tai_offset - Sets the TAI offset from UTC and monotonic * */ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) @@ -1304,24 +1285,6 @@ static void __timekeeping_set_tai_offset(struct timekeeper *tk, s32 tai_offset) tk->offs_tai = ktime_add(tk->offs_real, ktime_set(tai_offset, 0)); } -/** - * timekeeping_set_tai_offset - Sets the current TAI offset from UTC - * - */ -void timekeeping_set_tai_offset(s32 tai_offset) -{ - struct timekeeper *tk = &tk_core.timekeeper; - unsigned long flags; - - raw_spin_lock_irqsave(&timekeeper_lock, flags); - write_seqcount_begin(&tk_core.seq); - __timekeeping_set_tai_offset(tk, tai_offset); - timekeeping_update(tk, TK_MIRROR | TK_CLOCK_WAS_SET); - write_seqcount_end(&tk_core.seq); - raw_spin_unlock_irqrestore(&timekeeper_lock, flags); - clock_was_set(); -} - /** * change_clocksource - Swaps clocksources if a new one is available * diff --git a/kernel/time/timekeeping.h b/kernel/time/timekeeping.h index 704f595ce83f..d0914676d4c5 100644 --- a/kernel/time/timekeeping.h +++ b/kernel/time/timekeeping.h @@ -11,8 +11,6 @@ extern ktime_t ktime_get_update_offsets_now(unsigned int *cwsseq, extern int timekeeping_valid_for_hres(void); extern u64 timekeeping_max_deferment(void); extern int timekeeping_inject_offset(struct timespec *ts); -extern s32 timekeeping_get_tai_offset(void); -extern void timekeeping_set_tai_offset(s32 tai_offset); extern int timekeeping_suspend(void); extern void timekeeping_resume(void); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index ec33a6933eae..82a6bfa0c307 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -571,38 +571,6 @@ internal_add_timer(struct timer_base *base, struct timer_list *timer) trigger_dyntick_cpu(base, timer); } -#ifdef CONFIG_TIMER_STATS -void __timer_stats_timer_set_start_info(struct timer_list *timer, void *addr) -{ - if (timer->start_site) - return; - - timer->start_site = addr; - memcpy(timer->start_comm, current->comm, TASK_COMM_LEN); - timer->start_pid = current->pid; -} - -static void timer_stats_account_timer(struct timer_list *timer) -{ - void *site; - - /* - * start_site can be concurrently reset by - * timer_stats_timer_clear_start_info() - */ - site = READ_ONCE(timer->start_site); - if (likely(!site)) - return; - - timer_stats_update_stats(timer, timer->start_pid, site, - timer->function, timer->start_comm, - timer->flags); -} - -#else -static void timer_stats_account_timer(struct timer_list *timer) {} -#endif - #ifdef CONFIG_DEBUG_OBJECTS_TIMERS static struct debug_obj_descr timer_debug_descr; @@ -789,11 +757,6 @@ static void do_init_timer(struct timer_list *timer, unsigned int flags, { timer->entry.pprev = NULL; timer->flags = flags | raw_smp_processor_id(); -#ifdef CONFIG_TIMER_STATS - timer->start_site = NULL; - timer->start_pid = -1; - memset(timer->start_comm, 0, TASK_COMM_LEN); -#endif lockdep_init_map(&timer->lockdep_map, name, key, 0); } @@ -1001,8 +964,6 @@ __mod_timer(struct timer_list *timer, unsigned long expires, bool pending_only) base = lock_timer_base(timer, &flags); } - timer_stats_timer_set_start_info(timer); - ret = detach_if_pending(timer, base, false); if (!ret && pending_only) goto out_unlock; @@ -1130,7 +1091,6 @@ void add_timer_on(struct timer_list *timer, int cpu) struct timer_base *new_base, *base; unsigned long flags; - timer_stats_timer_set_start_info(timer); BUG_ON(timer_pending(timer) || !timer->function); new_base = get_timer_cpu_base(timer->flags, cpu); @@ -1176,7 +1136,6 @@ int del_timer(struct timer_list *timer) debug_assert_init(timer); - timer_stats_timer_clear_start_info(timer); if (timer_pending(timer)) { base = lock_timer_base(timer, &flags); ret = detach_if_pending(timer, base, true); @@ -1204,10 +1163,9 @@ int try_to_del_timer_sync(struct timer_list *timer) base = lock_timer_base(timer, &flags); - if (base->running_timer != timer) { - timer_stats_timer_clear_start_info(timer); + if (base->running_timer != timer) ret = detach_if_pending(timer, base, true); - } + spin_unlock_irqrestore(&base->lock, flags); return ret; @@ -1331,7 +1289,6 @@ static void expire_timers(struct timer_base *base, struct hlist_head *head) unsigned long data; timer = hlist_entry(head->first, struct timer_list, entry); - timer_stats_account_timer(timer); base->running_timer = timer; detach_timer(timer, true); @@ -1868,7 +1825,6 @@ static void __init init_timer_cpus(void) void __init init_timers(void) { init_timer_cpus(); - init_timer_stats(); open_softirq(TIMER_SOFTIRQ, run_timer_softirq); } diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c index afe6cd1944fc..ff8d5c13d04b 100644 --- a/kernel/time/timer_list.c +++ b/kernel/time/timer_list.c @@ -62,21 +62,11 @@ static void print_timer(struct seq_file *m, struct hrtimer *taddr, struct hrtimer *timer, int idx, u64 now) { -#ifdef CONFIG_TIMER_STATS - char tmp[TASK_COMM_LEN + 1]; -#endif SEQ_printf(m, " #%d: ", idx); print_name_offset(m, taddr); SEQ_printf(m, ", "); print_name_offset(m, timer->function); SEQ_printf(m, ", S:%02x", timer->state); -#ifdef CONFIG_TIMER_STATS - SEQ_printf(m, ", "); - print_name_offset(m, timer->start_site); - memcpy(tmp, timer->start_comm, TASK_COMM_LEN); - tmp[TASK_COMM_LEN] = 0; - SEQ_printf(m, ", %s/%d", tmp, timer->start_pid); -#endif SEQ_printf(m, "\n"); SEQ_printf(m, " # expires at %Lu-%Lu nsecs [in %Ld to %Ld nsecs]\n", (unsigned long long)ktime_to_ns(hrtimer_get_softexpires(timer)), @@ -127,7 +117,7 @@ print_base(struct seq_file *m, struct hrtimer_clock_base *base, u64 now) SEQ_printf(m, " .base: %pK\n", base); SEQ_printf(m, " .index: %d\n", base->index); - SEQ_printf(m, " .resolution: %u nsecs\n", (unsigned) hrtimer_resolution); + SEQ_printf(m, " .resolution: %u nsecs\n", hrtimer_resolution); SEQ_printf(m, " .get_time: "); print_name_offset(m, base->get_time); diff --git a/kernel/time/timer_stats.c b/kernel/time/timer_stats.c deleted file mode 100644 index afddded947df..000000000000 --- a/kernel/time/timer_stats.c +++ /dev/null @@ -1,425 +0,0 @@ -/* - * kernel/time/timer_stats.c - * - * Collect timer usage statistics. - * - * Copyright(C) 2006, Red Hat, Inc., Ingo Molnar - * Copyright(C) 2006 Timesys Corp., Thomas Gleixner - * - * timer_stats is based on timer_top, a similar functionality which was part of - * Con Kolivas dyntick patch set. It was developed by Daniel Petrini at the - * Instituto Nokia de Tecnologia - INdT - Manaus. timer_top's design was based - * on dynamic allocation of the statistics entries and linear search based - * lookup combined with a global lock, rather than the static array, hash - * and per-CPU locking which is used by timer_stats. It was written for the - * pre hrtimer kernel code and therefore did not take hrtimers into account. - * Nevertheless it provided the base for the timer_stats implementation and - * was a helpful source of inspiration. Kudos to Daniel and the Nokia folks - * for this effort. - * - * timer_top.c is - * Copyright (C) 2005 Instituto Nokia de Tecnologia - INdT - Manaus - * Written by Daniel Petrini - * timer_top.c was released under the GNU General Public License version 2 - * - * We export the addresses and counting of timer functions being called, - * the pid and cmdline from the owner process if applicable. - * - * Start/stop data collection: - * # echo [1|0] >/proc/timer_stats - * - * Display the information collected so far: - * # cat /proc/timer_stats - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include -#include -#include -#include -#include -#include - -#include - -/* - * This is our basic unit of interest: a timer expiry event identified - * by the timer, its start/expire functions and the PID of the task that - * started the timer. We count the number of times an event happens: - */ -struct entry { - /* - * Hash list: - */ - struct entry *next; - - /* - * Hash keys: - */ - void *timer; - void *start_func; - void *expire_func; - pid_t pid; - - /* - * Number of timeout events: - */ - unsigned long count; - u32 flags; - - /* - * We save the command-line string to preserve - * this information past task exit: - */ - char comm[TASK_COMM_LEN + 1]; - -} ____cacheline_aligned_in_smp; - -/* - * Spinlock protecting the tables - not taken during lookup: - */ -static DEFINE_RAW_SPINLOCK(table_lock); - -/* - * Per-CPU lookup locks for fast hash lookup: - */ -static DEFINE_PER_CPU(raw_spinlock_t, tstats_lookup_lock); - -/* - * Mutex to serialize state changes with show-stats activities: - */ -static DEFINE_MUTEX(show_mutex); - -/* - * Collection status, active/inactive: - */ -int __read_mostly timer_stats_active; - -/* - * Beginning/end timestamps of measurement: - */ -static ktime_t time_start, time_stop; - -/* - * tstat entry structs only get allocated while collection is - * active and never freed during that time - this simplifies - * things quite a bit. - * - * They get freed when a new collection period is started. - */ -#define MAX_ENTRIES_BITS 10 -#define MAX_ENTRIES (1UL << MAX_ENTRIES_BITS) - -static unsigned long nr_entries; -static struct entry entries[MAX_ENTRIES]; - -static atomic_t overflow_count; - -/* - * The entries are in a hash-table, for fast lookup: - */ -#define TSTAT_HASH_BITS (MAX_ENTRIES_BITS - 1) -#define TSTAT_HASH_SIZE (1UL << TSTAT_HASH_BITS) -#define TSTAT_HASH_MASK (TSTAT_HASH_SIZE - 1) - -#define __tstat_hashfn(entry) \ - (((unsigned long)(entry)->timer ^ \ - (unsigned long)(entry)->start_func ^ \ - (unsigned long)(entry)->expire_func ^ \ - (unsigned long)(entry)->pid ) & TSTAT_HASH_MASK) - -#define tstat_hashentry(entry) (tstat_hash_table + __tstat_hashfn(entry)) - -static struct entry *tstat_hash_table[TSTAT_HASH_SIZE] __read_mostly; - -static void reset_entries(void) -{ - nr_entries = 0; - memset(entries, 0, sizeof(entries)); - memset(tstat_hash_table, 0, sizeof(tstat_hash_table)); - atomic_set(&overflow_count, 0); -} - -static struct entry *alloc_entry(void) -{ - if (nr_entries >= MAX_ENTRIES) - return NULL; - - return entries + nr_entries++; -} - -static int match_entries(struct entry *entry1, struct entry *entry2) -{ - return entry1->timer == entry2->timer && - entry1->start_func == entry2->start_func && - entry1->expire_func == entry2->expire_func && - entry1->pid == entry2->pid; -} - -/* - * Look up whether an entry matching this item is present - * in the hash already. Must be called with irqs off and the - * lookup lock held: - */ -static struct entry *tstat_lookup(struct entry *entry, char *comm) -{ - struct entry **head, *curr, *prev; - - head = tstat_hashentry(entry); - curr = *head; - - /* - * The fastpath is when the entry is already hashed, - * we do this with the lookup lock held, but with the - * table lock not held: - */ - while (curr) { - if (match_entries(curr, entry)) - return curr; - - curr = curr->next; - } - /* - * Slowpath: allocate, set up and link a new hash entry: - */ - prev = NULL; - curr = *head; - - raw_spin_lock(&table_lock); - /* - * Make sure we have not raced with another CPU: - */ - while (curr) { - if (match_entries(curr, entry)) - goto out_unlock; - - prev = curr; - curr = curr->next; - } - - curr = alloc_entry(); - if (curr) { - *curr = *entry; - curr->count = 0; - curr->next = NULL; - memcpy(curr->comm, comm, TASK_COMM_LEN); - - smp_mb(); /* Ensure that curr is initialized before insert */ - - if (prev) - prev->next = curr; - else - *head = curr; - } - out_unlock: - raw_spin_unlock(&table_lock); - - return curr; -} - -/** - * timer_stats_update_stats - Update the statistics for a timer. - * @timer: pointer to either a timer_list or a hrtimer - * @pid: the pid of the task which set up the timer - * @startf: pointer to the function which did the timer setup - * @timerf: pointer to the timer callback function of the timer - * @comm: name of the process which set up the timer - * @tflags: The flags field of the timer - * - * When the timer is already registered, then the event counter is - * incremented. Otherwise the timer is registered in a free slot. - */ -void timer_stats_update_stats(void *timer, pid_t pid, void *startf, - void *timerf, char *comm, u32 tflags) -{ - /* - * It doesn't matter which lock we take: - */ - raw_spinlock_t *lock; - struct entry *entry, input; - unsigned long flags; - - if (likely(!timer_stats_active)) - return; - - lock = &per_cpu(tstats_lookup_lock, raw_smp_processor_id()); - - input.timer = timer; - input.start_func = startf; - input.expire_func = timerf; - input.pid = pid; - input.flags = tflags; - - raw_spin_lock_irqsave(lock, flags); - if (!timer_stats_active) - goto out_unlock; - - entry = tstat_lookup(&input, comm); - if (likely(entry)) - entry->count++; - else - atomic_inc(&overflow_count); - - out_unlock: - raw_spin_unlock_irqrestore(lock, flags); -} - -static void print_name_offset(struct seq_file *m, unsigned long addr) -{ - char symname[KSYM_NAME_LEN]; - - if (lookup_symbol_name(addr, symname) < 0) - seq_printf(m, "<%p>", (void *)addr); - else - seq_printf(m, "%s", symname); -} - -static int tstats_show(struct seq_file *m, void *v) -{ - struct timespec64 period; - struct entry *entry; - unsigned long ms; - long events = 0; - ktime_t time; - int i; - - mutex_lock(&show_mutex); - /* - * If still active then calculate up to now: - */ - if (timer_stats_active) - time_stop = ktime_get(); - - time = ktime_sub(time_stop, time_start); - - period = ktime_to_timespec64(time); - ms = period.tv_nsec / 1000000; - - seq_puts(m, "Timer Stats Version: v0.3\n"); - seq_printf(m, "Sample period: %ld.%03ld s\n", (long)period.tv_sec, ms); - if (atomic_read(&overflow_count)) - seq_printf(m, "Overflow: %d entries\n", atomic_read(&overflow_count)); - seq_printf(m, "Collection: %s\n", timer_stats_active ? "active" : "inactive"); - - for (i = 0; i < nr_entries; i++) { - entry = entries + i; - if (entry->flags & TIMER_DEFERRABLE) { - seq_printf(m, "%4luD, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } else { - seq_printf(m, " %4lu, %5d %-16s ", - entry->count, entry->pid, entry->comm); - } - - print_name_offset(m, (unsigned long)entry->start_func); - seq_puts(m, " ("); - print_name_offset(m, (unsigned long)entry->expire_func); - seq_puts(m, ")\n"); - - events += entry->count; - } - - ms += period.tv_sec * 1000; - if (!ms) - ms = 1; - - if (events && period.tv_sec) - seq_printf(m, "%ld total events, %ld.%03ld events/sec\n", - events, events * 1000 / ms, - (events * 1000000 / ms) % 1000); - else - seq_printf(m, "%ld total events\n", events); - - mutex_unlock(&show_mutex); - - return 0; -} - -/* - * After a state change, make sure all concurrent lookup/update - * activities have stopped: - */ -static void sync_access(void) -{ - unsigned long flags; - int cpu; - - for_each_online_cpu(cpu) { - raw_spinlock_t *lock = &per_cpu(tstats_lookup_lock, cpu); - - raw_spin_lock_irqsave(lock, flags); - /* nothing */ - raw_spin_unlock_irqrestore(lock, flags); - } -} - -static ssize_t tstats_write(struct file *file, const char __user *buf, - size_t count, loff_t *offs) -{ - char ctl[2]; - - if (count != 2 || *offs) - return -EINVAL; - - if (copy_from_user(ctl, buf, count)) - return -EFAULT; - - mutex_lock(&show_mutex); - switch (ctl[0]) { - case '0': - if (timer_stats_active) { - timer_stats_active = 0; - time_stop = ktime_get(); - sync_access(); - } - break; - case '1': - if (!timer_stats_active) { - reset_entries(); - time_start = ktime_get(); - smp_mb(); - timer_stats_active = 1; - } - break; - default: - count = -EINVAL; - } - mutex_unlock(&show_mutex); - - return count; -} - -static int tstats_open(struct inode *inode, struct file *filp) -{ - return single_open(filp, tstats_show, NULL); -} - -static const struct file_operations tstats_fops = { - .open = tstats_open, - .read = seq_read, - .write = tstats_write, - .llseek = seq_lseek, - .release = single_release, -}; - -void __init init_timer_stats(void) -{ - int cpu; - - for_each_possible_cpu(cpu) - raw_spin_lock_init(&per_cpu(tstats_lookup_lock, cpu)); -} - -static int __init init_tstats_procfs(void) -{ - struct proc_dir_entry *pe; - - pe = proc_create("timer_stats", 0644, NULL, &tstats_fops); - if (!pe) - return -ENOMEM; - return 0; -} -__initcall(init_tstats_procfs); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1d9fb6543a66..072cbc9b175d 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1523,8 +1523,6 @@ static void __queue_delayed_work(int cpu, struct workqueue_struct *wq, return; } - timer_stats_timer_set_start_info(&dwork->timer); - dwork->wq = wq; dwork->cpu = cpu; timer->expires = jiffies + delay; diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index eb9e9a7870fa..132af338d6dd 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -980,20 +980,6 @@ config DEBUG_TIMEKEEPING If unsure, say N. -config TIMER_STATS - bool "Collect kernel timers statistics" - depends on DEBUG_KERNEL && PROC_FS - help - If you say Y here, additional code will be inserted into the - timer routines to collect statistics about kernel timers being - reprogrammed. The statistics can be read from /proc/timer_stats. - The statistics collection is started by writing 1 to /proc/timer_stats, - writing 0 stops it. This feature is useful to collect information - about timer usage patterns in kernel and userspace. This feature - is lightweight if enabled in the kernel config but not activated - (it defaults to deactivated on bootup and will only be activated - if some application like powertop activates it explicitly). - config DEBUG_PREEMPT bool "Debug preemptible kernel" depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT diff --git a/lib/timerqueue.c b/lib/timerqueue.c index adc6ee0a5126..4a720ed4fdaf 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -80,8 +80,7 @@ bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) if (head->next == node) { struct rb_node *rbn = rb_next(&node->node); - head->next = rbn ? - rb_entry(rbn, struct timerqueue_node, node) : NULL; + head->next = rb_entry_safe(rbn, struct timerqueue_node, node); } rb_erase(&node->node, &head->head); RB_CLEAR_NODE(&node->node);