From b32614c03413f8a6025d8677c2b7c0ee976e63d4 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Sun, 27 Nov 2016 00:13:34 +0100 Subject: [PATCH] tracing/rb: Convert to hotplug state machine Install the callbacks via the state machine. The notifier in struct ring_buffer is replaced by the multi instance interface. Upon __ring_buffer_alloc() invocation, cpuhp_state_add_instance() will invoke the trace_rb_cpu_prepare() on each CPU. This callback may now fail. This means __ring_buffer_alloc() will fail and cleanup (like previously) and during a CPU up event this failure will not allow the CPU to come up. Signed-off-by: Sebastian Andrzej Siewior Cc: Steven Rostedt Cc: rt@linutronix.de Link: http://lkml.kernel.org/r/20161126231350.10321-7-bigeasy@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/cpuhotplug.h | 1 + include/linux/ring_buffer.h | 6 ++ kernel/trace/ring_buffer.c | 133 ++++++++++++------------------------ kernel/trace/trace.c | 15 +++- 4 files changed, 65 insertions(+), 90 deletions(-) diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index e3771fb959c0..18bcfeb2463e 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -62,6 +62,7 @@ enum cpuhp_state { CPUHP_TOPOLOGY_PREPARE, CPUHP_NET_IUCV_PREPARE, CPUHP_ARM_BL_PREPARE, + CPUHP_TRACE_RB_PREPARE, CPUHP_TIMERS_DEAD, CPUHP_NOTF_ERR_INJ_PREPARE, CPUHP_MIPS_SOC_PREPARE, diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index 4acc552e9279..b6d4568795a7 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -198,4 +198,10 @@ enum ring_buffer_flags { RB_FL_OVERWRITE = 1 << 0, }; +#ifdef CONFIG_RING_BUFFER +int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node); +#else +#define trace_rb_cpu_prepare NULL +#endif + #endif /* _LINUX_RING_BUFFER_H */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 9c143739b8d7..a7a055f167c7 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -479,9 +479,7 @@ struct ring_buffer { struct ring_buffer_per_cpu **buffers; -#ifdef CONFIG_HOTPLUG_CPU - struct notifier_block cpu_notify; -#endif + struct hlist_node node; u64 (*clock)(void); struct rb_irq_work irq_work; @@ -1274,11 +1272,6 @@ static void rb_free_cpu_buffer(struct ring_buffer_per_cpu *cpu_buffer) kfree(cpu_buffer); } -#ifdef CONFIG_HOTPLUG_CPU -static int rb_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu); -#endif - /** * __ring_buffer_alloc - allocate a new ring_buffer * @size: the size in bytes per cpu that is needed. @@ -1296,6 +1289,7 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, long nr_pages; int bsize; int cpu; + int ret; /* keep it in its own cache line */ buffer = kzalloc(ALIGN(sizeof(*buffer), cache_line_size()), @@ -1318,17 +1312,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, if (nr_pages < 2) nr_pages = 2; - /* - * In case of non-hotplug cpu, if the ring-buffer is allocated - * in early initcall, it will not be notified of secondary cpus. - * In that off case, we need to allocate for all possible cpus. - */ -#ifdef CONFIG_HOTPLUG_CPU - cpu_notifier_register_begin(); - cpumask_copy(buffer->cpumask, cpu_online_mask); -#else - cpumask_copy(buffer->cpumask, cpu_possible_mask); -#endif buffer->cpus = nr_cpu_ids; bsize = sizeof(void *) * nr_cpu_ids; @@ -1337,19 +1320,15 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, if (!buffer->buffers) goto fail_free_cpumask; - for_each_buffer_cpu(buffer, cpu) { - buffer->buffers[cpu] = - rb_allocate_cpu_buffer(buffer, nr_pages, cpu); - if (!buffer->buffers[cpu]) - goto fail_free_buffers; - } + cpu = raw_smp_processor_id(); + cpumask_set_cpu(cpu, buffer->cpumask); + buffer->buffers[cpu] = rb_allocate_cpu_buffer(buffer, nr_pages, cpu); + if (!buffer->buffers[cpu]) + goto fail_free_buffers; -#ifdef CONFIG_HOTPLUG_CPU - buffer->cpu_notify.notifier_call = rb_cpu_notify; - buffer->cpu_notify.priority = 0; - __register_cpu_notifier(&buffer->cpu_notify); - cpu_notifier_register_done(); -#endif + ret = cpuhp_state_add_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); + if (ret < 0) + goto fail_free_buffers; mutex_init(&buffer->mutex); @@ -1364,9 +1343,6 @@ struct ring_buffer *__ring_buffer_alloc(unsigned long size, unsigned flags, fail_free_cpumask: free_cpumask_var(buffer->cpumask); -#ifdef CONFIG_HOTPLUG_CPU - cpu_notifier_register_done(); -#endif fail_free_buffer: kfree(buffer); @@ -1383,18 +1359,11 @@ ring_buffer_free(struct ring_buffer *buffer) { int cpu; -#ifdef CONFIG_HOTPLUG_CPU - cpu_notifier_register_begin(); - __unregister_cpu_notifier(&buffer->cpu_notify); -#endif + cpuhp_state_remove_instance(CPUHP_TRACE_RB_PREPARE, &buffer->node); for_each_buffer_cpu(buffer, cpu) rb_free_cpu_buffer(buffer->buffers[cpu]); -#ifdef CONFIG_HOTPLUG_CPU - cpu_notifier_register_done(); -#endif - kfree(buffer->buffers); free_cpumask_var(buffer->cpumask); @@ -4633,62 +4602,48 @@ int ring_buffer_read_page(struct ring_buffer *buffer, } EXPORT_SYMBOL_GPL(ring_buffer_read_page); -#ifdef CONFIG_HOTPLUG_CPU -static int rb_cpu_notify(struct notifier_block *self, - unsigned long action, void *hcpu) +/* + * We only allocate new buffers, never free them if the CPU goes down. + * If we were to free the buffer, then the user would lose any trace that was in + * the buffer. + */ +int trace_rb_cpu_prepare(unsigned int cpu, struct hlist_node *node) { - struct ring_buffer *buffer = - container_of(self, struct ring_buffer, cpu_notify); - long cpu = (long)hcpu; + struct ring_buffer *buffer; long nr_pages_same; int cpu_i; unsigned long nr_pages; - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (cpumask_test_cpu(cpu, buffer->cpumask)) - return NOTIFY_OK; + buffer = container_of(node, struct ring_buffer, node); + if (cpumask_test_cpu(cpu, buffer->cpumask)) + return 0; - nr_pages = 0; - nr_pages_same = 1; - /* check if all cpu sizes are same */ - for_each_buffer_cpu(buffer, cpu_i) { - /* fill in the size from first enabled cpu */ - if (nr_pages == 0) - nr_pages = buffer->buffers[cpu_i]->nr_pages; - if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { - nr_pages_same = 0; - break; - } + nr_pages = 0; + nr_pages_same = 1; + /* check if all cpu sizes are same */ + for_each_buffer_cpu(buffer, cpu_i) { + /* fill in the size from first enabled cpu */ + if (nr_pages == 0) + nr_pages = buffer->buffers[cpu_i]->nr_pages; + if (nr_pages != buffer->buffers[cpu_i]->nr_pages) { + nr_pages_same = 0; + break; } - /* allocate minimum pages, user can later expand it */ - if (!nr_pages_same) - nr_pages = 2; - buffer->buffers[cpu] = - rb_allocate_cpu_buffer(buffer, nr_pages, cpu); - if (!buffer->buffers[cpu]) { - WARN(1, "failed to allocate ring buffer on CPU %ld\n", - cpu); - return NOTIFY_OK; - } - smp_wmb(); - cpumask_set_cpu(cpu, buffer->cpumask); - break; - case CPU_DOWN_PREPARE: - case CPU_DOWN_PREPARE_FROZEN: - /* - * Do nothing. - * If we were to free the buffer, then the user would - * lose any trace that was in the buffer. - */ - break; - default: - break; } - return NOTIFY_OK; + /* allocate minimum pages, user can later expand it */ + if (!nr_pages_same) + nr_pages = 2; + buffer->buffers[cpu] = + rb_allocate_cpu_buffer(buffer, nr_pages, cpu); + if (!buffer->buffers[cpu]) { + WARN(1, "failed to allocate ring buffer on CPU %u\n", + cpu); + return -ENOMEM; + } + smp_wmb(); + cpumask_set_cpu(cpu, buffer->cpumask); + return 0; } -#endif #ifdef CONFIG_RING_BUFFER_STARTUP_TEST /* diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8696ce6bf2f6..465d56febc5b 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -7659,10 +7659,21 @@ __init static int tracer_alloc_buffers(void) raw_spin_lock_init(&global_trace.start_lock); + /* + * The prepare callbacks allocates some memory for the ring buffer. We + * don't free the buffer if the if the CPU goes down. If we were to free + * the buffer, then the user would lose any trace that was in the + * buffer. The memory will be removed once the "instance" is removed. + */ + ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE, + "trace/RB:preapre", trace_rb_cpu_prepare, + NULL); + if (ret < 0) + goto out_free_cpumask; /* Used for event triggers */ temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE); if (!temp_buffer) - goto out_free_cpumask; + goto out_rm_hp_state; if (trace_create_savedcmd() < 0) goto out_free_temp_buffer; @@ -7723,6 +7734,8 @@ out_free_savedcmd: free_saved_cmdlines_buffer(savedcmd); out_free_temp_buffer: ring_buffer_free(temp_buffer); +out_rm_hp_state: + cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE); out_free_cpumask: free_cpumask_var(global_trace.tracing_cpumask); out_free_buffer_mask: