From ddf84433f411b612e935a6719ee395bb9dd2221f Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 25 Nov 2013 12:59:03 +0530 Subject: [PATCH 1/7] ARC: [SMP] cpumask not needed in IPI send path The current IPI sending callstack needlessly involves cpumask. arch_send_call_function_single_ipi(cpu) / smp_send_reschedule(cpu) ipi_send_msg(cpumask_of(cpu)) --> [cpu to cpumask] plat_smp_ops.ipi_send(callmap) for_each_cpu(callmap) --> [cpuask to cpu] do_plat_specific_ipi_PER_CPU Given that current backends are not capable of 1:N IPIs, lets simplify the interface for now, by keeping "a" cpu all along. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/smp.h | 4 ++-- arch/arc/kernel/smp.c | 23 ++++++++++++++--------- arch/arc/plat-arcfpga/smp.c | 8 ++------ 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index eefc29f08cdb..9b40e3bdc1ba 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -46,13 +46,13 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * * @info: SoC SMP specific info for /proc/cpuinfo etc * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) - * @ipi_send: To send IPI to a @cpumask + * @ipi_send: To send IPI to a @cpu * @ips_clear: To clear IPI received by @cpu at @irq */ struct plat_smp_ops { const char *info; void (*cpu_kick)(int cpu, unsigned long pc); - void (*ipi_send)(void *callmap); + void (*ipi_send)(int cpu); void (*ipi_clear)(int cpu, int irq); }; diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index c2f9ebbc38f6..f2fdef964045 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -220,28 +220,33 @@ struct ipi_data { static DEFINE_PER_CPU(struct ipi_data, ipi_data); -static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg) +static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg) { + struct ipi_data *ipi = &per_cpu(ipi_data, cpu); unsigned long flags; - unsigned int cpu; local_irq_save(flags); - for_each_cpu(cpu, callmap) { - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); - set_bit(msg, &ipi->bits); - } + set_bit(msg, &ipi->bits); /* Call the platform specific cross-CPU call function */ if (plat_smp_ops.ipi_send) - plat_smp_ops.ipi_send((void *)callmap); + plat_smp_ops.ipi_send(cpu); local_irq_restore(flags); } +static void ipi_send_msg(const struct cpumask *callmap, enum ipi_msg_type msg) +{ + unsigned int cpu; + + for_each_cpu(cpu, callmap) + ipi_send_msg_one(cpu, msg); +} + void smp_send_reschedule(int cpu) { - ipi_send_msg(cpumask_of(cpu), IPI_RESCHEDULE); + ipi_send_msg_one(cpu, IPI_RESCHEDULE); } void smp_send_stop(void) @@ -254,7 +259,7 @@ void smp_send_stop(void) void arch_send_call_function_single_ipi(int cpu) { - ipi_send_msg(cpumask_of(cpu), IPI_CALL_FUNC); + ipi_send_msg_one(cpu, IPI_CALL_FUNC); } void arch_send_call_function_ipi_mask(const struct cpumask *mask) diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c index 91b55349a5f8..5e4fe959665c 100644 --- a/arch/arc/plat-arcfpga/smp.c +++ b/arch/arc/plat-arcfpga/smp.c @@ -88,13 +88,9 @@ void iss_model_init_smp(unsigned int cpu) smp_ipi_irq_setup(cpu, IDU_INTERRUPT_0 + cpu); } -static void iss_model_ipi_send(void *arg) +static void iss_model_ipi_send(int cpu) { - struct cpumask *callmap = arg; - unsigned int cpu; - - for_each_cpu(cpu, callmap) - idu_irq_assert(cpu); + idu_irq_assert(cpu); } static void iss_model_ipi_clear(int cpu, int irq) From ccdaa6e0c86d8f48bf87439e4d6942b7a3e58899 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 25 Nov 2013 14:37:14 +0530 Subject: [PATCH 2/7] ARC: [SMP] IPI ACK interface doesn't need "self" cpu-id The interface is confusing, it feels like we are getting "sender" info, whereas it is the "receiver", which can very well be retrived by smp_processor_id(), if need be. Signed-off-by: Vineet Gupta --- arch/arc/include/asm/smp.h | 4 ++-- arch/arc/kernel/smp.c | 2 +- arch/arc/plat-arcfpga/smp.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/arc/include/asm/smp.h b/arch/arc/include/asm/smp.h index 9b40e3bdc1ba..5d06eee43ea9 100644 --- a/arch/arc/include/asm/smp.h +++ b/arch/arc/include/asm/smp.h @@ -47,13 +47,13 @@ extern int smp_ipi_irq_setup(int cpu, int irq); * @info: SoC SMP specific info for /proc/cpuinfo etc * @cpu_kick: For Master to kickstart a cpu (optionally at a PC) * @ipi_send: To send IPI to a @cpu - * @ips_clear: To clear IPI received by @cpu at @irq + * @ips_clear: To clear IPI received at @irq */ struct plat_smp_ops { const char *info; void (*cpu_kick)(int cpu, unsigned long pc); void (*ipi_send)(int cpu); - void (*ipi_clear)(int cpu, int irq); + void (*ipi_clear)(int irq); }; /* TBD: stop exporting it for direct population by platform */ diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index f2fdef964045..9f771b5d66c6 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -310,7 +310,7 @@ irqreturn_t do_IPI(int irq, void *dev_id) unsigned long ops; if (plat_smp_ops.ipi_clear) - plat_smp_ops.ipi_clear(cpu, irq); + plat_smp_ops.ipi_clear(irq); /* * XXX: is this loop really needed diff --git a/arch/arc/plat-arcfpga/smp.c b/arch/arc/plat-arcfpga/smp.c index 5e4fe959665c..8a12741f5f7a 100644 --- a/arch/arc/plat-arcfpga/smp.c +++ b/arch/arc/plat-arcfpga/smp.c @@ -93,9 +93,9 @@ static void iss_model_ipi_send(int cpu) idu_irq_assert(cpu); } -static void iss_model_ipi_clear(int cpu, int irq) +static void iss_model_ipi_clear(int irq) { - idu_irq_clear(IDU_INTERRUPT_0 + cpu); + idu_irq_clear(IDU_INTERRUPT_0 + smp_processor_id()); } void iss_model_init_early_smp(void) From 53dc110c97436dcf2a758091c98093657116e91c Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Mon, 25 Nov 2013 14:48:39 +0530 Subject: [PATCH 3/7] ARC: [SMP] cpu halt interface doesn't need "self" cpu-id Signed-off-by: Vineet Gupta --- arch/arc/kernel/smp.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 9f771b5d66c6..467dd9cca18f 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -270,12 +270,12 @@ void arch_send_call_function_ipi_mask(const struct cpumask *mask) /* * ipi_cpu_stop - handle IPI from smp_send_stop() */ -static void ipi_cpu_stop(unsigned int cpu) +static void ipi_cpu_stop(void) { machine_halt(); } -static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu) +static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi) { unsigned long msg = 0; @@ -292,11 +292,10 @@ static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi, int cpu) break; case IPI_CPU_STOP: - ipi_cpu_stop(cpu); + ipi_cpu_stop(); break; } } while (msg < BITS_PER_LONG); - } /* @@ -317,7 +316,7 @@ irqreturn_t do_IPI(int irq, void *dev_id) * And do we need to move ipi_clean inside */ while ((ops = xchg(&ipi->bits, 0)) != 0) - __do_IPI(&ops, ipi, cpu); + __do_IPI(&ops, ipi); return IRQ_HANDLED; } From f2a4aa5646687f9a51616581221b1d348590d47e Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Tue, 26 Nov 2013 15:23:44 +0530 Subject: [PATCH 4/7] ARC: [SMP] simplify IPI code * ipi_data is just a word, no need to keep it as struct * find_next_bit() not needed to loop thru a 32bit word, ffs suffices --- arch/arc/kernel/smp.c | 56 +++++++++++++++++++++---------------------- 1 file changed, 28 insertions(+), 28 deletions(-) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 467dd9cca18f..c00c612e8dd3 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -197,37 +197,31 @@ int __init setup_profiling_timer(unsigned int multiplier) /* Inter Processor Interrupt Handling */ /*****************************************************************************/ -/* - * structures for inter-processor calls - * A Collection of single bit ipi messages - * - */ - -/* - * TODO_rajesh investigate tlb message types. - * IPI Timer not needed because each ARC has an individual Interrupting Timer - */ enum ipi_msg_type { - IPI_NOP = 0, + IPI_EMPTY = 0, IPI_RESCHEDULE = 1, IPI_CALL_FUNC, - IPI_CPU_STOP + IPI_CPU_STOP, }; -struct ipi_data { - unsigned long bits; -}; +/* + * In arches with IRQ for each msg type (above), receiver can use IRQ-id to + * figure out what msg was sent. For those which don't (ARC has dedicated IPI + * IRQ), the msg-type needs to be conveyed via per-cpu data + */ -static DEFINE_PER_CPU(struct ipi_data, ipi_data); +static DEFINE_PER_CPU(unsigned long, ipi_data); static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg) { - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); + unsigned long __percpu *ipi_data_ptr = per_cpu_ptr(&ipi_data, cpu); unsigned long flags; + pr_debug("%d Sending msg [%d] to %d\n", smp_processor_id(), msg, cpu); + local_irq_save(flags); - set_bit(msg, &ipi->bits); + set_bit(msg, ipi_data_ptr); /* Call the platform specific cross-CPU call function */ if (plat_smp_ops.ipi_send) @@ -275,12 +269,11 @@ static void ipi_cpu_stop(void) machine_halt(); } -static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi) +static inline void __do_IPI(unsigned long pending) { - unsigned long msg = 0; + while (pending) { - do { - msg = find_next_bit(ops, BITS_PER_LONG, msg+1); + unsigned long msg = __ffs(pending); switch (msg) { case IPI_RESCHEDULE: @@ -294,8 +287,14 @@ static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi) case IPI_CPU_STOP: ipi_cpu_stop(); break; + + default: + pr_warn("IPI missing msg\n"); + } - } while (msg < BITS_PER_LONG); + + pending &= ~(1U << msg); + } } /* @@ -304,9 +303,10 @@ static inline void __do_IPI(unsigned long *ops, struct ipi_data *ipi) */ irqreturn_t do_IPI(int irq, void *dev_id) { - int cpu = smp_processor_id(); - struct ipi_data *ipi = &per_cpu(ipi_data, cpu); - unsigned long ops; + unsigned long pending; + + pr_debug("IPI [%ld] received on cpu %d\n", + *this_cpu_ptr(&ipi_data), smp_processor_id()); if (plat_smp_ops.ipi_clear) plat_smp_ops.ipi_clear(irq); @@ -315,8 +315,8 @@ irqreturn_t do_IPI(int irq, void *dev_id) * XXX: is this loop really needed * And do we need to move ipi_clean inside */ - while ((ops = xchg(&ipi->bits, 0)) != 0) - __do_IPI(&ops, ipi); + while ((pending = xchg(this_cpu_ptr(&ipi_data), 0)) != 0) + __do_IPI(pending); return IRQ_HANDLED; } From d8e8c7dda11f5d5cf90495f2e89d917a83509bc0 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 28 Nov 2013 13:57:54 +0530 Subject: [PATCH 5/7] ARC: [SMP] optimize IPI send and receive * Don't send an IPI if receiver already has a pending IPI. Atomically piggyback the new msg with pending msg. * IPI receiver looping on xchg() not required References: https://lkml.org/lkml/2013/11/25/232 Suggested-by: Peter Zijlstra Signed-off-by: Vineet Gupta --- arch/arc/kernel/smp.c | 68 +++++++++++++++++++++++++------------------ 1 file changed, 40 insertions(+), 28 deletions(-) diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index c00c612e8dd3..40859e5619f9 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -215,16 +215,31 @@ static DEFINE_PER_CPU(unsigned long, ipi_data); static void ipi_send_msg_one(int cpu, enum ipi_msg_type msg) { unsigned long __percpu *ipi_data_ptr = per_cpu_ptr(&ipi_data, cpu); + unsigned long old, new; unsigned long flags; pr_debug("%d Sending msg [%d] to %d\n", smp_processor_id(), msg, cpu); local_irq_save(flags); - set_bit(msg, ipi_data_ptr); + /* + * Atomically write new msg bit (in case others are writing too), + * and read back old value + */ + do { + new = old = *ipi_data_ptr; + new |= 1U << msg; + } while (cmpxchg(ipi_data_ptr, old, new) != old); - /* Call the platform specific cross-CPU call function */ - if (plat_smp_ops.ipi_send) + /* + * Call the platform specific IPI kick function, but avoid if possible: + * Only do so if there's no pending msg from other concurrent sender(s). + * Otherwise, recevier will see this msg as well when it takes the + * IPI corresponding to that msg. This is true, even if it is already in + * IPI handler, because !@old means it has not yet dequeued the msg(s) + * so @new msg can be a free-loader + */ + if (plat_smp_ops.ipi_send && !old) plat_smp_ops.ipi_send(cpu); local_irq_restore(flags); @@ -269,31 +284,23 @@ static void ipi_cpu_stop(void) machine_halt(); } -static inline void __do_IPI(unsigned long pending) +static inline void __do_IPI(unsigned long msg) { - while (pending) { + switch (msg) { + case IPI_RESCHEDULE: + scheduler_ipi(); + break; - unsigned long msg = __ffs(pending); + case IPI_CALL_FUNC: + generic_smp_call_function_interrupt(); + break; - switch (msg) { - case IPI_RESCHEDULE: - scheduler_ipi(); - break; + case IPI_CPU_STOP: + ipi_cpu_stop(); + break; - case IPI_CALL_FUNC: - generic_smp_call_function_interrupt(); - break; - - case IPI_CPU_STOP: - ipi_cpu_stop(); - break; - - default: - pr_warn("IPI missing msg\n"); - - } - - pending &= ~(1U << msg); + default: + pr_warn("IPI with unexpected msg %ld\n", msg); } } @@ -312,11 +319,16 @@ irqreturn_t do_IPI(int irq, void *dev_id) plat_smp_ops.ipi_clear(irq); /* - * XXX: is this loop really needed - * And do we need to move ipi_clean inside + * "dequeue" the msg corresponding to this IPI (and possibly other + * piggybacked msg from elided IPIs: see ipi_send_msg_one() above) */ - while ((pending = xchg(this_cpu_ptr(&ipi_data), 0)) != 0) - __do_IPI(pending); + pending = xchg(this_cpu_ptr(&ipi_data), 0); + + do { + unsigned long msg = __ffs(pending); + __do_IPI(msg); + pending &= ~(1U << msg); + } while (pending); return IRQ_HANDLED; } From 59ed9413533897823bcdb4c79fd2904718e25b0a Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 16 Jan 2014 15:01:24 +0530 Subject: [PATCH 6/7] ARC: [cmdline] uboot cmdline handling rework * Moved cmdline copy from asm to "C" - allows for more robust checking of pointer validity etc. * Remove the Kconfig option to do so, base it on a runtime value passed by u-boot Signed-off-by: Vineet Gupta --- arch/arc/Kconfig | 11 ----------- arch/arc/kernel/head.S | 26 +++++++------------------- arch/arc/kernel/setup.c | 37 ++++++++++++++++++++++++++----------- 3 files changed, 33 insertions(+), 41 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 9063ae6553cc..c1f76f7424ac 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -409,17 +409,6 @@ config ARC_DBG_TLB_MISS_COUNT Counts number of I and D TLB Misses and exports them via Debugfs The counters can be cleared via Debugfs as well -config CMDLINE_UBOOT - bool "Support U-boot kernel command line passing" - default n - help - If you are using U-boot (www.denx.de) and wish to pass the kernel - command line from the U-boot environment to the Linux kernel then - switch this option on. - ARC U-boot will setup the cmdline in RAM/flash and set r2 to point - to it. kernel startup code will append this to DeviceTree - /bootargs provided cmdline args. - config ARC_BUILTIN_DTB_NAME string "Built in DTB" help diff --git a/arch/arc/kernel/head.S b/arch/arc/kernel/head.S index 2c878e964a64..991997269d02 100644 --- a/arch/arc/kernel/head.S +++ b/arch/arc/kernel/head.S @@ -49,25 +49,13 @@ stext: st.ab 0, [r5,4] brlt r5, r6, 1b -#ifdef CONFIG_CMDLINE_UBOOT - ; support for bootloader provided cmdline - ; If cmdline passed by u-boot, then - ; r0 = 1 (because ATAGS parsing, now retired, used to use 0) - ; r1 = magic number (board identity) - ; r2 = addr of cmdline string (somewhere in memory/flash) - - brne r0, 1, .Lother_bootup_chores ; u-boot didn't pass cmdline - breq r2, 0, .Lother_bootup_chores ; or cmdline is NULL - - mov r5, @command_line -1: - ldb.ab r6, [r2, 1] - breq r6, 0, .Lother_bootup_chores - b.d 1b - stb.ab r6, [r5, 1] -#endif - -.Lother_bootup_chores: + ; Uboot - kernel ABI + ; r0 = [0] No uboot interaction, [1] cmdline in r2, [2] DTB in r2 + ; r1 = magic number (board identity, unused as of now + ; r2 = pointer to uboot provided cmdline or external DTB in mem + ; These are handled later in setup_arch() + st r0, [@uboot_tag] + st r2, [@uboot_arg] ; Identify if running on ISS vs Silicon ; IDENTITY Reg [ 3 2 1 0 ] diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index 643eae4436e0..ffb60b4f6f86 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -29,7 +29,10 @@ int running_on_hw = 1; /* vs. on ISS */ -char __initdata command_line[COMMAND_LINE_SIZE]; +/* Part of U-boot ABI: see head.S */ +int __initdata uboot_tag; +char __initdata *uboot_arg; + const struct machine_desc *machine_desc; struct task_struct *_current_task[NR_CPUS]; /* For stack switching */ @@ -311,19 +314,31 @@ void setup_processor(void) arc_chk_fpu(); } +static inline int is_kernel(unsigned long addr) +{ + if (addr >= (unsigned long)_stext && addr <= (unsigned long)_end) + return 1; + return 0; +} + void __init setup_arch(char **cmdline_p) { - /* This also populates @boot_command_line from /bootargs */ - machine_desc = setup_machine_fdt(__dtb_start); - if (!machine_desc) - panic("Embedded DT invalid\n"); + machine_desc = setup_machine_fdt(__dtb_start); + if (!machine_desc) + panic("Embedded DT invalid\n"); - /* Append any u-boot provided cmdline */ -#ifdef CONFIG_CMDLINE_UBOOT - /* Add a whitespace seperator between the 2 cmdlines */ - strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); - strlcat(boot_command_line, command_line, COMMAND_LINE_SIZE); -#endif + /* + * Append uboot cmdline to embedded DT cmdline. + * setup_machine_fdt() would have populated @boot_command_line + */ + if (uboot_tag == 1) { + BUG_ON(is_kernel(unsigned long)uboot_arg); + + /* Ensure a whitespace between the 2 cmdlines */ + strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); + strlcat(boot_command_line, uboot_arg, + COMMAND_LINE_SIZE); + } /* Save unparsed command line copy for /proc/cmdline */ *cmdline_p = boot_command_line; From e57d339a6264355df8c98948f05a46ff2bc5d504 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 16 Jan 2014 15:04:24 +0530 Subject: [PATCH 7/7] ARC: [cmdline] support External Device Trees from u-boot Signed-off-by: Vineet Gupta --- arch/arc/kernel/setup.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/arch/arc/kernel/setup.c b/arch/arc/kernel/setup.c index ffb60b4f6f86..119dddb752b2 100644 --- a/arch/arc/kernel/setup.c +++ b/arch/arc/kernel/setup.c @@ -323,22 +323,31 @@ static inline int is_kernel(unsigned long addr) void __init setup_arch(char **cmdline_p) { + /* make sure that uboot passed pointer to cmdline/dtb is valid */ + if (uboot_tag && is_kernel((unsigned long)uboot_arg)) + panic("Invalid uboot arg\n"); + + /* See if u-boot passed an external Device Tree blob */ + machine_desc = setup_machine_fdt(uboot_arg); /* uboot_tag == 2 */ + if (!machine_desc) { + /* No, so try the embedded one */ machine_desc = setup_machine_fdt(__dtb_start); if (!machine_desc) panic("Embedded DT invalid\n"); /* - * Append uboot cmdline to embedded DT cmdline. + * If we are here, it is established that @uboot_arg didn't + * point to DT blob. Instead if u-boot says it is cmdline, + * Appent to embedded DT cmdline. * setup_machine_fdt() would have populated @boot_command_line */ if (uboot_tag == 1) { - BUG_ON(is_kernel(unsigned long)uboot_arg); - /* Ensure a whitespace between the 2 cmdlines */ strlcat(boot_command_line, " ", COMMAND_LINE_SIZE); strlcat(boot_command_line, uboot_arg, COMMAND_LINE_SIZE); } + } /* Save unparsed command line copy for /proc/cmdline */ *cmdline_p = boot_command_line;