From c59c015b6aa5bc18d01c8e482149086cafd7332a Mon Sep 17 00:00:00 2001 From: Stephen Chivers Date: Fri, 21 Feb 2014 12:29:48 +1100 Subject: [PATCH 01/26] powerpc: Correct emulated mtfsf instruction The emulated (CONFIG_MATH_EMULATION_FULL) PowerPC Floating Point instruction mtfsf does not correctly copy bits from its source register to the Floating Point Status and Register (FPSCR). The error is in the preparation of the mask used to select the bits to be copied from the source to the FPSCR. Execution of the mtfsf instruction does not produce the same results on a MPC8548 platform (emulated floating point) as on MPC7410 or 440EP platforms (hardware floating point). This error has been detected using a Freescale MPC8548 based platform and the patch below tested using that platform. The patch is based on the patch(es) provided by Gabriel Paubert and analysis by Gabriel, James Yang and David Laight. Signed-off-by: Stephen Chivers Signed-off-by: Gabriel Paubert Tested-by: Stephen Chivers Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/math-emu/mtfsf.c | 56 ++++++++++++++--------------------- 1 file changed, 22 insertions(+), 34 deletions(-) diff --git a/arch/powerpc/math-emu/mtfsf.c b/arch/powerpc/math-emu/mtfsf.c index dbce92e4f046..44b0fc8214f4 100644 --- a/arch/powerpc/math-emu/mtfsf.c +++ b/arch/powerpc/math-emu/mtfsf.c @@ -11,48 +11,36 @@ mtfsf(unsigned int FM, u32 *frB) u32 mask; u32 fpscr; - if (FM == 0) - return 0; - - if (FM == 0xff) - mask = 0x9fffffff; + if (likely(FM == 1)) + mask = 0x0f; + else if (likely(FM == 0xff)) + mask = ~0; else { - mask = 0; - if (FM & (1 << 0)) - mask |= 0x90000000; - if (FM & (1 << 1)) - mask |= 0x0f000000; - if (FM & (1 << 2)) - mask |= 0x00f00000; - if (FM & (1 << 3)) - mask |= 0x000f0000; - if (FM & (1 << 4)) - mask |= 0x0000f000; - if (FM & (1 << 5)) - mask |= 0x00000f00; - if (FM & (1 << 6)) - mask |= 0x000000f0; - if (FM & (1 << 7)) - mask |= 0x0000000f; + mask = ((FM & 1) | + ((FM << 3) & 0x10) | + ((FM << 6) & 0x100) | + ((FM << 9) & 0x1000) | + ((FM << 12) & 0x10000) | + ((FM << 15) & 0x100000) | + ((FM << 18) & 0x1000000) | + ((FM << 21) & 0x10000000)) * 15; } - __FPU_FPSCR &= ~(mask); - __FPU_FPSCR |= (frB[1] & mask); + fpscr = ((__FPU_FPSCR & ~mask) | (frB[1] & mask)) & + ~(FPSCR_VX | FPSCR_FEX | 0x800); - __FPU_FPSCR &= ~(FPSCR_VX); - if (__FPU_FPSCR & (FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | + if (fpscr & (FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI | FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC | FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI)) - __FPU_FPSCR |= FPSCR_VX; + fpscr |= FPSCR_VX; - fpscr = __FPU_FPSCR; - fpscr &= ~(FPSCR_FEX); - if (((fpscr & FPSCR_VX) && (fpscr & FPSCR_VE)) || - ((fpscr & FPSCR_OX) && (fpscr & FPSCR_OE)) || - ((fpscr & FPSCR_UX) && (fpscr & FPSCR_UE)) || - ((fpscr & FPSCR_ZX) && (fpscr & FPSCR_ZE)) || - ((fpscr & FPSCR_XX) && (fpscr & FPSCR_XE))) + /* The bit order of exception enables and exception status + * is the same. Simply shift and mask to check for enabled + * exceptions. + */ + if (fpscr & (fpscr >> 22) & 0xf8) fpscr |= FPSCR_FEX; + __FPU_FPSCR = fpscr; #ifdef DEBUG From a08a53ea4c97940fe83fea3eab27618ac0fb5ed1 Mon Sep 17 00:00:00 2001 From: Greg Kurz Date: Fri, 4 Apr 2014 09:35:13 +0200 Subject: [PATCH 02/26] powerpc/le: Enable RTAS events support The current kernel code assumes big endian and parses RTAS events all wrong. The most visible effect is that we cannot honor EPOW events, meaning, for example, we cannot shut down a guest properly from the hypervisor. This new patch is largely inspired by Nathan's work: we get rid of all the bit fields in the RTAS event structures (even the unused ones, for consistency). We also introduce endian safe accessors for the fields used by the kernel (trivial rtas_error_type() accessor added for consistency). Cc: Nathan Fontenot Signed-off-by: Greg Kurz Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/rtas.h | 127 +++++++++++++----- arch/powerpc/kernel/rtas.c | 15 ++- arch/powerpc/kernel/rtasd.c | 24 ++-- arch/powerpc/platforms/pseries/io_event_irq.c | 6 +- arch/powerpc/platforms/pseries/ras.c | 17 ++- 5 files changed, 128 insertions(+), 61 deletions(-) diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index a0e1add01ef5..b390f55b0df1 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -150,19 +150,53 @@ struct rtas_suspend_me_data { #define RTAS_VECTOR_EXTERNAL_INTERRUPT 0x500 struct rtas_error_log { - unsigned long version:8; /* Architectural version */ - unsigned long severity:3; /* Severity level of error */ - unsigned long disposition:2; /* Degree of recovery */ - unsigned long extended:1; /* extended log present? */ - unsigned long /* reserved */ :2; /* Reserved for future use */ - unsigned long initiator:4; /* Initiator of event */ - unsigned long target:4; /* Target of failed operation */ - unsigned long type:8; /* General event or error*/ - unsigned long extended_log_length:32; /* length in bytes */ - unsigned char buffer[1]; /* Start of extended log */ + /* Byte 0 */ + uint8_t byte0; /* Architectural version */ + + /* Byte 1 */ + uint8_t byte1; + /* XXXXXXXX + * XXX 3: Severity level of error + * XX 2: Degree of recovery + * X 1: Extended log present? + * XX 2: Reserved + */ + + /* Byte 2 */ + uint8_t byte2; + /* XXXXXXXX + * XXXX 4: Initiator of event + * XXXX 4: Target of failed operation + */ + uint8_t byte3; /* General event or error*/ + __be32 extended_log_length; /* length in bytes */ + unsigned char buffer[1]; /* Start of extended log */ /* Variable length. */ }; +static inline uint8_t rtas_error_severity(const struct rtas_error_log *elog) +{ + return (elog->byte1 & 0xE0) >> 5; +} + +static inline uint8_t rtas_error_disposition(const struct rtas_error_log *elog) +{ + return (elog->byte1 & 0x18) >> 3; +} + +static inline uint8_t rtas_error_extended(const struct rtas_error_log *elog) +{ + return (elog->byte1 & 0x04) >> 2; +} + +#define rtas_error_type(x) ((x)->byte3) + +static inline +uint32_t rtas_error_extended_log_length(const struct rtas_error_log *elog) +{ + return be32_to_cpu(elog->extended_log_length); +} + #define RTAS_V6EXT_LOG_FORMAT_EVENT_LOG 14 #define RTAS_V6EXT_COMPANY_ID_IBM (('I' << 24) | ('B' << 16) | ('M' << 8)) @@ -172,32 +206,35 @@ struct rtas_error_log { */ struct rtas_ext_event_log_v6 { /* Byte 0 */ - uint32_t log_valid:1; /* 1:Log valid */ - uint32_t unrecoverable_error:1; /* 1:Unrecoverable error */ - uint32_t recoverable_error:1; /* 1:recoverable (correctable */ - /* or successfully retried) */ - uint32_t degraded_operation:1; /* 1:Unrecoverable err, bypassed*/ - /* - degraded operation (e.g. */ - /* CPU or mem taken off-line) */ - uint32_t predictive_error:1; - uint32_t new_log:1; /* 1:"New" log (Always 1 for */ - /* data returned from RTAS */ - uint32_t big_endian:1; /* 1: Big endian */ - uint32_t :1; /* reserved */ + uint8_t byte0; + /* XXXXXXXX + * X 1: Log valid + * X 1: Unrecoverable error + * X 1: Recoverable (correctable or successfully retried) + * X 1: Bypassed unrecoverable error (degraded operation) + * X 1: Predictive error + * X 1: "New" log (always 1 for data returned from RTAS) + * X 1: Big Endian + * X 1: Reserved + */ + /* Byte 1 */ - uint32_t :8; /* reserved */ + uint8_t byte1; /* reserved */ + /* Byte 2 */ - uint32_t powerpc_format:1; /* Set to 1 (indicating log is */ - /* in PowerPC format */ - uint32_t :3; /* reserved */ - uint32_t log_format:4; /* Log format indicator. Define */ - /* format used for byte 12-2047 */ + uint8_t byte2; + /* XXXXXXXX + * X 1: Set to 1 (indicating log is in PowerPC format) + * XXX 3: Reserved + * XXXX 4: Log format used for bytes 12-2047 + */ + /* Byte 3 */ - uint32_t :8; /* reserved */ + uint8_t byte3; /* reserved */ /* Byte 4-11 */ uint8_t reserved[8]; /* reserved */ /* Byte 12-15 */ - uint32_t company_id; /* Company ID of the company */ + __be32 company_id; /* Company ID of the company */ /* that defines the format for */ /* the vendor specific log type */ /* Byte 16-end of log */ @@ -205,6 +242,18 @@ struct rtas_ext_event_log_v6 { /* Variable length. */ }; +static +inline uint8_t rtas_ext_event_log_format(struct rtas_ext_event_log_v6 *ext_log) +{ + return ext_log->byte2 & 0x0F; +} + +static +inline uint32_t rtas_ext_event_company_id(struct rtas_ext_event_log_v6 *ext_log) +{ + return be32_to_cpu(ext_log->company_id); +} + /* pSeries event log format */ /* Two bytes ASCII section IDs */ @@ -227,14 +276,26 @@ struct rtas_ext_event_log_v6 { /* Vendor specific Platform Event Log Format, Version 6, section header */ struct pseries_errorlog { - uint16_t id; /* 0x00 2-byte ASCII section ID */ - uint16_t length; /* 0x02 Section length in bytes */ + __be16 id; /* 0x00 2-byte ASCII section ID */ + __be16 length; /* 0x02 Section length in bytes */ uint8_t version; /* 0x04 Section version */ uint8_t subtype; /* 0x05 Section subtype */ - uint16_t creator_component; /* 0x06 Creator component ID */ + __be16 creator_component; /* 0x06 Creator component ID */ uint8_t data[]; /* 0x08 Start of section data */ }; +static +inline uint16_t pseries_errorlog_id(struct pseries_errorlog *sect) +{ + return be16_to_cpu(sect->id); +} + +static +inline uint16_t pseries_errorlog_length(struct pseries_errorlog *sect) +{ + return be16_to_cpu(sect->length); +} + struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, uint16_t section_id); diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c index f386296ff378..8cd5ed049b5d 100644 --- a/arch/powerpc/kernel/rtas.c +++ b/arch/powerpc/kernel/rtas.c @@ -993,21 +993,24 @@ struct pseries_errorlog *get_pseries_errorlog(struct rtas_error_log *log, (struct rtas_ext_event_log_v6 *)log->buffer; struct pseries_errorlog *sect; unsigned char *p, *log_end; + uint32_t ext_log_length = rtas_error_extended_log_length(log); + uint8_t log_format = rtas_ext_event_log_format(ext_log); + uint32_t company_id = rtas_ext_event_company_id(ext_log); /* Check that we understand the format */ - if (log->extended_log_length < sizeof(struct rtas_ext_event_log_v6) || - ext_log->log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || - ext_log->company_id != RTAS_V6EXT_COMPANY_ID_IBM) + if (ext_log_length < sizeof(struct rtas_ext_event_log_v6) || + log_format != RTAS_V6EXT_LOG_FORMAT_EVENT_LOG || + company_id != RTAS_V6EXT_COMPANY_ID_IBM) return NULL; - log_end = log->buffer + log->extended_log_length; + log_end = log->buffer + ext_log_length; p = ext_log->vendor_log; while (p < log_end) { sect = (struct pseries_errorlog *)p; - if (sect->id == section_id) + if (pseries_errorlog_id(sect) == section_id) return sect; - p += sect->length; + p += pseries_errorlog_length(sect); } return NULL; diff --git a/arch/powerpc/kernel/rtasd.c b/arch/powerpc/kernel/rtasd.c index 1130c53ad652..e736387fee6a 100644 --- a/arch/powerpc/kernel/rtasd.c +++ b/arch/powerpc/kernel/rtasd.c @@ -150,8 +150,8 @@ static void printk_log_rtas(char *buf, int len) struct rtas_error_log *errlog = (struct rtas_error_log *)buf; printk(RTAS_DEBUG "event: %d, Type: %s, Severity: %d\n", - error_log_cnt, rtas_event_type(errlog->type), - errlog->severity); + error_log_cnt, rtas_event_type(rtas_error_type(errlog)), + rtas_error_severity(errlog)); } } @@ -159,14 +159,16 @@ static int log_rtas_len(char * buf) { int len; struct rtas_error_log *err; + uint32_t extended_log_length; /* rtas fixed header */ len = 8; err = (struct rtas_error_log *)buf; - if (err->extended && err->extended_log_length) { + extended_log_length = rtas_error_extended_log_length(err); + if (rtas_error_extended(err) && extended_log_length) { /* extended header */ - len += err->extended_log_length; + len += extended_log_length; } if (rtas_error_log_max == 0) @@ -293,15 +295,13 @@ void prrn_schedule_update(u32 scope) static void handle_rtas_event(const struct rtas_error_log *log) { - if (log->type == RTAS_TYPE_PRRN) { - /* For PRRN Events the extended log length is used to denote - * the scope for calling rtas update-nodes. - */ - if (prrn_is_enabled()) - prrn_schedule_update(log->extended_log_length); - } + if (rtas_error_type(log) != RTAS_TYPE_PRRN || !prrn_is_enabled()) + return; - return; + /* For PRRN Events the extended log length is used to denote + * the scope for calling rtas update-nodes. + */ + prrn_schedule_update(rtas_error_extended_log_length(log)); } #else diff --git a/arch/powerpc/platforms/pseries/io_event_irq.c b/arch/powerpc/platforms/pseries/io_event_irq.c index 5ea88d1541f7..0240c4ff878a 100644 --- a/arch/powerpc/platforms/pseries/io_event_irq.c +++ b/arch/powerpc/platforms/pseries/io_event_irq.c @@ -82,9 +82,9 @@ static struct pseries_io_event * ioei_find_event(struct rtas_error_log *elog) * RTAS_TYPE_IO only exists in extended event log version 6 or later. * No need to check event log version. */ - if (unlikely(elog->type != RTAS_TYPE_IO)) { - printk_once(KERN_WARNING "io_event_irq: Unexpected event type %d", - elog->type); + if (unlikely(rtas_error_type(elog) != RTAS_TYPE_IO)) { + printk_once(KERN_WARNING"io_event_irq: Unexpected event type %d", + rtas_error_type(elog)); return NULL; } diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c index 721c0586b284..9c5778e6ed4b 100644 --- a/arch/powerpc/platforms/pseries/ras.c +++ b/arch/powerpc/platforms/pseries/ras.c @@ -236,7 +236,8 @@ static irqreturn_t ras_error_interrupt(int irq, void *dev_id) rtas_elog = (struct rtas_error_log *)ras_log_buf; - if ((status == 0) && (rtas_elog->severity >= RTAS_SEVERITY_ERROR_SYNC)) + if (status == 0 && + rtas_error_severity(rtas_elog) >= RTAS_SEVERITY_ERROR_SYNC) fatal = 1; else fatal = 0; @@ -300,13 +301,14 @@ static struct rtas_error_log *fwnmi_get_errinfo(struct pt_regs *regs) /* If it isn't an extended log we can use the per cpu 64bit buffer */ h = (struct rtas_error_log *)&savep[1]; - if (!h->extended) { + if (!rtas_error_extended(h)) { memcpy(&__get_cpu_var(mce_data_buf), h, sizeof(__u64)); errhdr = (struct rtas_error_log *)&__get_cpu_var(mce_data_buf); } else { - int len; + int len, error_log_length; - len = max_t(int, 8+h->extended_log_length, RTAS_ERROR_LOG_MAX); + error_log_length = 8 + rtas_error_extended_log_length(h); + len = max_t(int, error_log_length, RTAS_ERROR_LOG_MAX); memset(global_mce_data_buf, 0, RTAS_ERROR_LOG_MAX); memcpy(global_mce_data_buf, h, len); errhdr = (struct rtas_error_log *)global_mce_data_buf; @@ -350,23 +352,24 @@ int pSeries_system_reset_exception(struct pt_regs *regs) static int recover_mce(struct pt_regs *regs, struct rtas_error_log *err) { int recovered = 0; + int disposition = rtas_error_disposition(err); if (!(regs->msr & MSR_RI)) { /* If MSR_RI isn't set, we cannot recover */ recovered = 0; - } else if (err->disposition == RTAS_DISP_FULLY_RECOVERED) { + } else if (disposition == RTAS_DISP_FULLY_RECOVERED) { /* Platform corrected itself */ recovered = 1; - } else if (err->disposition == RTAS_DISP_LIMITED_RECOVERY) { + } else if (disposition == RTAS_DISP_LIMITED_RECOVERY) { /* Platform corrected itself but could be degraded */ printk(KERN_ERR "MCE: limited recovery, system may " "be degraded\n"); recovered = 1; } else if (user_mode(regs) && !is_global_init(current) && - err->severity == RTAS_SEVERITY_ERROR_SYNC) { + rtas_error_severity(err) == RTAS_SEVERITY_ERROR_SYNC) { /* * If we received a synchronous error when in userspace From e6b8fd028b584ffca7a7255b8971f254932c9fce Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 4 Apr 2014 20:19:48 +1100 Subject: [PATCH 03/26] powerpc/tm: Disable IRQ in tm_recheckpoint We can't take an IRQ when we're about to do a trechkpt as our GPR state is set to user GPR values. We've hit this when running some IBM Java stress tests in the lab resulting in the following dump: cpu 0x3f: Vector: 700 (Program Check) at [c000000007eb3d40] pc: c000000000050074: restore_gprs+0xc0/0x148 lr: 00000000b52a8184 sp: ac57d360 msr: 8000000100201030 current = 0xc00000002c500000 paca = 0xc000000007dbfc00 softe: 0 irq_happened: 0x00 pid = 34535, comm = Pooled Thread # R00 = 00000000b52a8184 R16 = 00000000b3e48fda R01 = 00000000ac57d360 R17 = 00000000ade79bd8 R02 = 00000000ac586930 R18 = 000000000fac9bcc R03 = 00000000ade60000 R19 = 00000000ac57f930 R04 = 00000000f6624918 R20 = 00000000ade79be8 R05 = 00000000f663f238 R21 = 00000000ac218a54 R06 = 0000000000000002 R22 = 000000000f956280 R07 = 0000000000000008 R23 = 000000000000007e R08 = 000000000000000a R24 = 000000000000000c R09 = 00000000b6e69160 R25 = 00000000b424cf00 R10 = 0000000000000181 R26 = 00000000f66256d4 R11 = 000000000f365ec0 R27 = 00000000b6fdcdd0 R12 = 00000000f66400f0 R28 = 0000000000000001 R13 = 00000000ada71900 R29 = 00000000ade5a300 R14 = 00000000ac2185a8 R30 = 00000000f663f238 R15 = 0000000000000004 R31 = 00000000f6624918 pc = c000000000050074 restore_gprs+0xc0/0x148 cfar= c00000000004fe28 dont_restore_vec+0x1c/0x1a4 lr = 00000000b52a8184 msr = 8000000100201030 cr = 24804888 ctr = 0000000000000000 xer = 0000000000000000 trap = 700 This moves tm_recheckpoint to a C function and moves the tm_restore_sprs into that function. It then adds IRQ disabling over the trechkpt critical section. It also sets the TEXASR FS in the signals code to ensure this is never set now that we explictly write the TM sprs in tm_recheckpoint. Signed-off-by: Michael Neuling cc: stable@vger.kernel.org Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/process.c | 34 +++++++++++++++++++++++++++------ arch/powerpc/kernel/signal_32.c | 2 ++ arch/powerpc/kernel/signal_64.c | 2 ++ arch/powerpc/kernel/tm.S | 2 +- 4 files changed, 33 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index af064d28b365..31d021506d21 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -610,6 +610,31 @@ out_and_saveregs: tm_save_sprs(thr); } +extern void __tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr); + +void tm_recheckpoint(struct thread_struct *thread, + unsigned long orig_msr) +{ + unsigned long flags; + + /* We really can't be interrupted here as the TEXASR registers can't + * change and later in the trecheckpoint code, we have a userspace R1. + * So let's hard disable over this region. + */ + local_irq_save(flags); + hard_irq_disable(); + + /* The TM SPRs are restored here, so that TEXASR.FS can be set + * before the trecheckpoint and no explosion occurs. + */ + tm_restore_sprs(thread); + + __tm_recheckpoint(thread, orig_msr); + + local_irq_restore(flags); +} + static inline void tm_recheckpoint_new_task(struct task_struct *new) { unsigned long msr; @@ -628,13 +653,10 @@ static inline void tm_recheckpoint_new_task(struct task_struct *new) if (!new->thread.regs) return; - /* The TM SPRs are restored here, so that TEXASR.FS can be set - * before the trecheckpoint and no explosion occurs. - */ - tm_restore_sprs(&new->thread); - - if (!MSR_TM_ACTIVE(new->thread.regs->msr)) + if (!MSR_TM_ACTIVE(new->thread.regs->msr)){ + tm_restore_sprs(&new->thread); return; + } msr = new->thread.tm_orig_msr; /* Recheckpoint to restore original checkpointed register state. */ TM_DEBUG("*** tm_recheckpoint of pid %d " diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index a67e00aa3caa..4e47db686b5d 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -881,6 +881,8 @@ static long restore_tm_user_regs(struct pt_regs *regs, * transactional versions should be loaded. */ tm_enable(); + /* Make sure the transaction is marked as failed */ + current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); /* Get the top half of the MSR */ diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index 8d253c29649b..d501dc4dc3e6 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -527,6 +527,8 @@ static long restore_tm_sigcontexts(struct pt_regs *regs, } #endif tm_enable(); + /* Make sure the transaction is marked as failed */ + current->thread.tm_texasr |= TEXASR_FS; /* This loads the checkpointed FP/VEC state, if used */ tm_recheckpoint(¤t->thread, msr); diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index ef47bcbd4352..03567c05950a 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -307,7 +307,7 @@ dont_backup_fp: * Call with IRQs off, stacks get all out of sync for * some periods in here! */ -_GLOBAL(tm_recheckpoint) +_GLOBAL(__tm_recheckpoint) mfcr r5 mflr r0 stw r5, 8(r1) From 4a85b31d369b496c316d89b144ee9626073e5ef2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Mar 2014 13:36:26 +1100 Subject: [PATCH 04/26] powerpc: Adjust CPU_FTR_SMT on all platforms For historical reasons that code was under #ifdef CONFIG_PPC_PSERIES but it applies equally to all 64-bit platforms. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index dd72bebd708a..337646ce4f34 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -379,7 +379,7 @@ static int __init early_init_dt_scan_cpus(unsigned long node, check_cpu_pa_features(node); check_cpu_slb_size(node); -#ifdef CONFIG_PPC_PSERIES +#ifdef CONFIG_PPC64 if (nthreads > 1) cur_cpu_spec->cpu_features |= CPU_FTR_SMT; else From 36ae37e3436b0c7731ae15a03d9215ff24bef9f2 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Mar 2014 13:36:27 +1100 Subject: [PATCH 05/26] powerpc: Make boot_cpuid common between 32 and 64-bit Move the definition to setup-common.c and set the init value to -1 on both 32 and 64-bit (it was 0 on 64-bit). Additionally add a check to prom.c to garantee that the init value has been udpated after the DT scan. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 4 ++++ arch/powerpc/kernel/setup-common.c | 3 +++ arch/powerpc/kernel/setup_32.c | 2 -- arch/powerpc/kernel/setup_64.c | 1 - 4 files changed, 7 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 337646ce4f34..ea50a7ecd81b 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -747,6 +747,10 @@ void __init early_init_devtree(void *params) * (altivec support, boot CPU ID, ...) */ of_scan_flat_dt(early_init_dt_scan_cpus, NULL); + if (boot_cpuid < 0) { + printk("Failed to indentify boot CPU !\n"); + BUG(); + } #if defined(CONFIG_SMP) && defined(CONFIG_PPC64) /* We'll later wait for secondaries to check in; there are diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c index bc76cc6b419c..79b7612ac6fa 100644 --- a/arch/powerpc/kernel/setup-common.c +++ b/arch/powerpc/kernel/setup-common.c @@ -76,6 +76,9 @@ EXPORT_SYMBOL(ppc_md); struct machdep_calls *machine_id; EXPORT_SYMBOL(machine_id); +int boot_cpuid = -1; +EXPORT_SYMBOL_GPL(boot_cpuid); + unsigned long klimit = (unsigned long) _end; char cmd_line[COMMAND_LINE_SIZE]; diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c index 04cc4fcca78b..ea4fda60e57b 100644 --- a/arch/powerpc/kernel/setup_32.c +++ b/arch/powerpc/kernel/setup_32.c @@ -44,8 +44,6 @@ extern void bootx_init(unsigned long r4, unsigned long phys); -int boot_cpuid = -1; -EXPORT_SYMBOL_GPL(boot_cpuid); int boot_cpuid_phys; EXPORT_SYMBOL_GPL(boot_cpuid_phys); diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 4933909cc5c0..d8aabbdc6483 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -74,7 +74,6 @@ #define DBG(fmt...) #endif -int boot_cpuid = 0; int spinning_secondaries; u64 ppc64_pft_size; From 7222f779acf94a123557c7bbbfbcc19424589f52 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Mar 2014 13:36:28 +1100 Subject: [PATCH 06/26] powerpc/prom: early_init_dt_scan_cpus() updates cpu features only once All our cpu feature updates were done for every CPU in the device-tree, thus overwriting the cputable bits over and over again. Instead do them only for the boot CPU. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/prom.c | 52 +++++++++++++++++++------------------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index ea50a7ecd81b..668aa4791fd7 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -347,33 +347,34 @@ static int __init early_init_dt_scan_cpus(unsigned long node, #endif } - if (found >= 0) { - DBG("boot cpu: logical %d physical %d\n", found, - be32_to_cpu(intserv[found_thread])); - boot_cpuid = found; - set_hard_smp_processor_id(found, - be32_to_cpu(intserv[found_thread])); + /* Not the boot CPU */ + if (found < 0) + return 0; - /* - * PAPR defines "logical" PVR values for cpus that - * meet various levels of the architecture: - * 0x0f000001 Architecture version 2.04 - * 0x0f000002 Architecture version 2.05 - * If the cpu-version property in the cpu node contains - * such a value, we call identify_cpu again with the - * logical PVR value in order to use the cpu feature - * bits appropriate for the architecture level. - * - * A POWER6 partition in "POWER6 architected" mode - * uses the 0x0f000002 PVR value; in POWER5+ mode - * it uses 0x0f000001. - */ - prop = of_get_flat_dt_prop(node, "cpu-version", NULL); - if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) - identify_cpu(0, be32_to_cpup(prop)); + DBG("boot cpu: logical %d physical %d\n", found, + be32_to_cpu(intserv[found_thread])); + boot_cpuid = found; + set_hard_smp_processor_id(found, be32_to_cpu(intserv[found_thread])); - identical_pvr_fixup(node); - } + /* + * PAPR defines "logical" PVR values for cpus that + * meet various levels of the architecture: + * 0x0f000001 Architecture version 2.04 + * 0x0f000002 Architecture version 2.05 + * If the cpu-version property in the cpu node contains + * such a value, we call identify_cpu again with the + * logical PVR value in order to use the cpu feature + * bits appropriate for the architecture level. + * + * A POWER6 partition in "POWER6 architected" mode + * uses the 0x0f000002 PVR value; in POWER5+ mode + * it uses 0x0f000001. + */ + prop = of_get_flat_dt_prop(node, "cpu-version", NULL); + if (prop && (be32_to_cpup(prop) & 0xff000000) == 0x0f000000) + identify_cpu(0, be32_to_cpup(prop)); + + identical_pvr_fixup(node); check_cpu_feature_properties(node); check_cpu_pa_features(node); @@ -385,7 +386,6 @@ static int __init early_init_dt_scan_cpus(unsigned long node, else cur_cpu_spec->cpu_features &= ~CPU_FTR_SMT; #endif - return 0; } From a944a9c40b81a71609692c4909bb57e1d01f4bb7 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Mar 2014 13:36:29 +1100 Subject: [PATCH 07/26] powerpc/ppc64: Gracefully handle early interrupts If we take an interrupt such as a trap caused by a BUG_ON before the MMU has been setup, the interrupt handlers try to enable virutal mode and cause a recursive crash, making the original problem very hard to debug. This fixes it by adjusting the "kernel_msr" value in the PACA so that it only has MSR_IR and MSR_DR (translation for instruction and data) set after the MMU has been initialized for the processor. We may still not have a console yet but at least we don't get into a recursive fault (and early debug console or memory dump via JTAG of the kernel buffer *will* give us the proper error). Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/paca.c | 3 ++- arch/powerpc/kernel/setup_64.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index bf0aada02fe4..ad302f845e5d 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -152,7 +152,8 @@ void __init initialise_paca(struct paca_struct *new_paca, int cpu) new_paca->paca_index = cpu; new_paca->kernel_toc = kernel_toc; new_paca->kernelbase = (unsigned long) _stext; - new_paca->kernel_msr = MSR_KERNEL; + /* Only set MSR:IR/DR when MMU is initialized */ + new_paca->kernel_msr = MSR_KERNEL & ~(MSR_IR | MSR_DR); new_paca->hw_cpu_id = 0xffff; new_paca->kexec_state = KEXEC_STATE_NONE; new_paca->__current = &init_task; diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index d8aabbdc6483..1d33e817ab2d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -261,6 +261,14 @@ void __init early_setup(unsigned long dt_ptr) /* Initialize the hash table or TLB handling */ early_init_mmu(); + /* + * At this point, we can let interrupts switch to virtual mode + * (the MMU has been setup), so adjust the MSR in the PACA to + * have IR and DR set. + */ + get_paca()->kernel_msr = MSR_KERNEL; + + /* Reserve large chunks of memory for use by CMA for KVM */ kvm_cma_reserve(); /* @@ -293,6 +301,13 @@ void early_setup_secondary(void) /* Initialize the hash table or TLB handling */ early_init_mmu_secondary(); + + /* + * At this point, we can let interrupts switch to virtual mode + * (the MMU has been setup), so adjust the MSR in the PACA to + * have IR and DR set. + */ + get_paca()->kernel_msr = MSR_KERNEL; } #endif /* CONFIG_SMP */ From 8f619b5429d9d852df09b85d9e41459859e04951 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Mar 2014 13:36:30 +1100 Subject: [PATCH 08/26] powerpc/ppc64: Do not turn AIL (reloc-on interrupts) too early Turn them on at the same time as we allow MSR_IR/DR in the paca kernel MSR, ie, after the MMU has been setup enough to be able to handle relocated access to the linear mapping. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/cpu_setup_power.S | 2 -- arch/powerpc/kernel/setup_64.c | 18 +++++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 37d1bb002aa9..1557e7c2c7e1 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -56,7 +56,6 @@ _GLOBAL(__setup_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - oris r3, r3, LPCR_AIL_3@h bl __init_LPCR bl __init_HFSCR bl __init_tlb_power8 @@ -75,7 +74,6 @@ _GLOBAL(__restore_cpu_power8) li r0,0 mtspr SPRN_LPID,r0 mfspr r3,SPRN_LPCR - oris r3, r3, LPCR_AIL_3@h bl __init_LPCR bl __init_HFSCR bl __init_tlb_power8 diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 1d33e817ab2d..3d7a50a08f5e 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -195,6 +195,18 @@ static void fixup_boot_paca(void) get_paca()->data_offset = 0; } +static void cpu_ready_for_interrupts(void) +{ + /* Set IR and DR in PACA MSR */ + get_paca()->kernel_msr = MSR_KERNEL; + + /* Enable AIL if supported */ + if (cpu_has_feature(CPU_FTR_ARCH_207S)) { + unsigned long lpcr = mfspr(SPRN_LPCR); + mtspr(SPRN_LPCR, lpcr | LPCR_AIL_3); + } +} + /* * Early initialization entry point. This is called by head.S * with MMU translation disabled. We rely on the "feature" of @@ -264,9 +276,9 @@ void __init early_setup(unsigned long dt_ptr) /* * At this point, we can let interrupts switch to virtual mode * (the MMU has been setup), so adjust the MSR in the PACA to - * have IR and DR set. + * have IR and DR set and enable AIL if it exists */ - get_paca()->kernel_msr = MSR_KERNEL; + cpu_ready_for_interrupts(); /* Reserve large chunks of memory for use by CMA for KVM */ kvm_cma_reserve(); @@ -307,7 +319,7 @@ void early_setup_secondary(void) * (the MMU has been setup), so adjust the MSR in the PACA to * have IR and DR set. */ - get_paca()->kernel_msr = MSR_KERNEL; + cpu_ready_for_interrupts(); } #endif /* CONFIG_SMP */ From 798af00c4d75cdbed58bfe5c31e721bc0daedd9b Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Mar 2014 13:36:31 +1100 Subject: [PATCH 09/26] powerpc/powernv: Add opal_notifier_unregister() and export to modules opal_notifier_register() is missing a pending "unregister" variant and should be exposed to modules. Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/platforms/powernv/opal.c | 14 ++++++++++++++ 2 files changed, 16 insertions(+) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index fe2aa0b48d2b..6fb5f90e6464 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -892,6 +892,8 @@ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); extern int opal_notifier_register(struct notifier_block *nb); +extern int opal_notifier_unregister(struct notifier_block *nb); + extern int opal_message_notifier_register(enum OpalMessageType msg_type, struct notifier_block *nb); extern void opal_notifier_enable(void); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index e92f2f67640f..7835d5bb973f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -180,6 +180,20 @@ int opal_notifier_register(struct notifier_block *nb) atomic_notifier_chain_register(&opal_notifier_head, nb); return 0; } +EXPORT_SYMBOL_GPL(opal_notifier_register); + +int opal_notifier_unregister(struct notifier_block *nb) +{ + if (!nb) { + pr_warning("%s: Invalid argument (%p)\n", + __func__, nb); + return -EINVAL; + } + + atomic_notifier_chain_unregister(&opal_notifier_head, nb); + return 0; +} +EXPORT_SYMBOL_GPL(opal_notifier_unregister); static void opal_do_notifier(uint64_t events) { From 32b941b79394435c1b6d688abfe762c5b3c82afa Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Fri, 28 Mar 2014 13:36:32 +1100 Subject: [PATCH 10/26] tty/hvc_opal: Kick the HVC thread on OPAL console events The firmware can notify us when new input data is available, so let's make sure we wakeup the HVC thread in that case. Signed-off-by: Benjamin Herrenschmidt --- drivers/tty/hvc/hvc_opal.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/drivers/tty/hvc/hvc_opal.c b/drivers/tty/hvc/hvc_opal.c index b01659bd4f7c..a585079b4b38 100644 --- a/drivers/tty/hvc/hvc_opal.c +++ b/drivers/tty/hvc/hvc_opal.c @@ -61,6 +61,7 @@ static struct hvc_opal_priv *hvc_opal_privs[MAX_NR_HVC_CONSOLES]; /* For early boot console */ static struct hvc_opal_priv hvc_opal_boot_priv; static u32 hvc_opal_boot_termno; +static bool hvc_opal_event_registered; static const struct hv_ops hvc_opal_raw_ops = { .get_chars = opal_get_chars, @@ -161,6 +162,18 @@ static const struct hv_ops hvc_opal_hvsi_ops = { .tiocmset = hvc_opal_hvsi_tiocmset, }; +static int hvc_opal_console_event(struct notifier_block *nb, + unsigned long events, void *change) +{ + if (events & OPAL_EVENT_CONSOLE_INPUT) + hvc_kick(); + return 0; +} + +static struct notifier_block hvc_opal_console_nb = { + .notifier_call = hvc_opal_console_event, +}; + static int hvc_opal_probe(struct platform_device *dev) { const struct hv_ops *ops; @@ -170,6 +183,7 @@ static int hvc_opal_probe(struct platform_device *dev) unsigned int termno, boot = 0; const __be32 *reg; + if (of_device_is_compatible(dev->dev.of_node, "ibm,opal-console-raw")) { proto = HV_PROTOCOL_RAW; ops = &hvc_opal_raw_ops; @@ -213,12 +227,18 @@ static int hvc_opal_probe(struct platform_device *dev) dev->dev.of_node->full_name, boot ? " (boot console)" : ""); - /* We don't do IRQ yet */ + /* We don't do IRQ ... */ hp = hvc_alloc(termno, 0, ops, MAX_VIO_PUT_CHARS); if (IS_ERR(hp)) return PTR_ERR(hp); dev_set_drvdata(&dev->dev, hp); + /* ... but we use OPAL event to kick the console */ + if (!hvc_opal_event_registered) { + opal_notifier_register(&hvc_opal_console_nb); + hvc_opal_event_registered = true; + } + return 0; } From bb4398e1de739a13e06589fc04cbb2267ba59800 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 28 Mar 2014 16:33:33 +1100 Subject: [PATCH 11/26] powerpc/powernv: Fix endian issues with OPAL async code OPAL defines opal_msg as a big endian struct so we have to byte swap it on little endian builds. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 6 +++--- arch/powerpc/platforms/powernv/opal-async.c | 7 ++++--- arch/powerpc/platforms/powernv/opal-sensor.c | 2 +- arch/powerpc/platforms/powernv/opal-sysparam.c | 4 ++-- arch/powerpc/platforms/powernv/opal.c | 10 ++++++---- 5 files changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6fb5f90e6464..fc73661c20fa 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -422,9 +422,9 @@ enum OpalSysparamPerm { }; struct opal_msg { - uint32_t msg_type; - uint32_t reserved; - uint64_t params[8]; + __be32 msg_type; + __be32 reserved; + __be64 params[8]; }; struct opal_machine_check_event { diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c index cd0c1354d404..32e2adfa5320 100644 --- a/arch/powerpc/platforms/powernv/opal-async.c +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -125,14 +125,15 @@ static int opal_async_comp_event(struct notifier_block *nb, { struct opal_msg *comp_msg = msg; unsigned long flags; + uint64_t token; if (msg_type != OPAL_MSG_ASYNC_COMP) return 0; - memcpy(&opal_async_responses[comp_msg->params[0]], comp_msg, - sizeof(*comp_msg)); + token = be64_to_cpu(comp_msg->params[0]); + memcpy(&opal_async_responses[token], comp_msg, sizeof(*comp_msg)); spin_lock_irqsave(&opal_async_comp_lock, flags); - __set_bit(comp_msg->params[0], opal_async_complete_map); + __set_bit(token, opal_async_complete_map); spin_unlock_irqrestore(&opal_async_comp_lock, flags); wake_up(&opal_async_wait); diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index 663cc9c65613..7503e298c4c3 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -53,7 +53,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) goto out_token; } - ret = msg.params[1]; + ret = be64_to_cpu(msg.params[1]); out_token: mutex_unlock(&opal_sensor_mutex); diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c index 0bd249a26f30..6b614726baf2 100644 --- a/arch/powerpc/platforms/powernv/opal-sysparam.c +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -64,7 +64,7 @@ static int opal_get_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = msg.params[1]; + ret = be64_to_cpu(msg.params[1]); out_token: opal_async_release_token(token); @@ -98,7 +98,7 @@ static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) goto out_token; } - ret = msg.params[1]; + ret = be64_to_cpu(msg.params[1]); out_token: opal_async_release_token(token); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 7835d5bb973f..778a2793e75b 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -281,6 +281,7 @@ static void opal_handle_message(void) * value in /proc/device-tree. */ static struct opal_msg msg; + u32 type; ret = opal_get_msg(__pa(&msg), sizeof(msg)); /* No opal message pending. */ @@ -294,13 +295,14 @@ static void opal_handle_message(void) return; } + type = be32_to_cpu(msg.msg_type); + /* Sanity check */ - if (msg.msg_type > OPAL_MSG_TYPE_MAX) { - pr_warning("%s: Unknown message type: %u\n", - __func__, msg.msg_type); + if (type > OPAL_MSG_TYPE_MAX) { + pr_warning("%s: Unknown message type: %u\n", __func__, type); return; } - opal_message_do_notify(msg.msg_type, (void *)&msg); + opal_message_do_notify(type, (void *)&msg); } static int opal_message_notify(struct notifier_block *nb, From 9000c17dc0f9c910267d2661225c9d33a227b27e Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 28 Mar 2014 16:34:10 +1100 Subject: [PATCH 12/26] powerpc/powernv: Fix endian issues with sensor code One OPAL call and one device tree property needed byte swapping. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 3 +-- arch/powerpc/platforms/powernv/opal-sensor.c | 4 +++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index fc73661c20fa..a13ab397edda 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -874,8 +874,7 @@ int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer, size_t length); int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, size_t length); -int64_t opal_sensor_read(uint32_t sensor_hndl, int token, - uint32_t *sensor_data); +int64_t opal_sensor_read(uint32_t sensor_hndl, int token, __be32 *sensor_data); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c index 7503e298c4c3..10271ad1fac4 100644 --- a/arch/powerpc/platforms/powernv/opal-sensor.c +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -33,6 +33,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) { int ret, token; struct opal_msg msg; + __be32 data; token = opal_async_get_token_interruptible(); if (token < 0) { @@ -42,7 +43,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) } mutex_lock(&opal_sensor_mutex); - ret = opal_sensor_read(sensor_hndl, token, sensor_data); + ret = opal_sensor_read(sensor_hndl, token, &data); if (ret != OPAL_ASYNC_COMPLETION) goto out_token; @@ -53,6 +54,7 @@ int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) goto out_token; } + *sensor_data = be32_to_cpu(data); ret = be64_to_cpu(msg.params[1]); out_token: From 1dc954bd2f85144b9a2959139d41a5f6b9b34712 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 2 Apr 2014 21:37:39 +0530 Subject: [PATCH 13/26] powerpc/mm: NUMA pte should be handled via slow path in get_user_pages_fast() We need to handle numa pte via the slow path Signed-off-by: Aneesh Kumar K.V Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/gup.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/arch/powerpc/mm/gup.c b/arch/powerpc/mm/gup.c index c5f734e20b0f..d8746684f606 100644 --- a/arch/powerpc/mm/gup.c +++ b/arch/powerpc/mm/gup.c @@ -36,6 +36,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, do { pte_t pte = ACCESS_ONCE(*ptep); struct page *page; + /* + * Similar to the PMD case, NUMA hinting must take slow path + */ + if (pte_numa(pte)) + return 0; if ((pte_val(pte) & mask) != result) return 0; @@ -75,6 +80,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end, if (pmd_none(pmd) || pmd_trans_splitting(pmd)) return 0; if (pmd_huge(pmd) || pmd_large(pmd)) { + /* + * NUMA hinting faults need to be handled in the GUP + * slowpath for accounting purposes and so that they + * can be serialised against THP migration. + */ + if (pmd_numa(pmd)) + return 0; + if (!gup_hugepte((pte_t *)pmdp, PMD_SIZE, addr, next, write, pages, nr)) return 0; From e47ff70a56b04dcdc81685380f2acef4776fc60b Mon Sep 17 00:00:00 2001 From: Li Zhong Date: Thu, 3 Apr 2014 14:58:20 +0800 Subject: [PATCH 14/26] powerpc: Use of_node_init() for the fakenode in msi_bitmap.c This patch uses of_node_init() to initialize the kobject in the fake node used in test_of_node(), to avoid following kobject warning. [ 0.897654] kobject: '(null)' (c0000007ca183a08): is not initialized, yet kobject_put() is being called. [ 0.897682] ------------[ cut here ]------------ [ 0.897688] WARNING: at lib/kobject.c:670 [ 0.897692] Modules linked in: [ 0.897701] CPU: 4 PID: 1 Comm: swapper/0 Not tainted 3.14.0+ #1 [ 0.897708] task: c0000007ca100000 ti: c0000007ca180000 task.ti: c0000007ca180000 [ 0.897715] NIP: c00000000046a1f0 LR: c00000000046a1ec CTR: 0000000001704660 [ 0.897721] REGS: c0000007ca1835c0 TRAP: 0700 Not tainted (3.14.0+) [ 0.897727] MSR: 8000000000029032 CR: 28000024 XER: 0000000d [ 0.897749] CFAR: c0000000008ef4ec SOFTE: 1 GPR00: c00000000046a1ec c0000007ca183840 c0000000014c59b8 000000000000005c GPR04: 0000000000000001 c000000000129770 0000000000000000 0000000000000001 GPR08: 0000000000000000 0000000000000000 0000000000000000 0000000000003fef GPR12: 0000000000000000 c00000000f221200 c00000000000c350 0000000000000000 GPR16: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20: 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR24: 0000000000000000 c00000000144e808 c000000000c56f20 00000000000000d8 GPR28: c000000000cd5058 0000000000000000 c000000001454ca8 c0000007ca183a08 [ 0.897856] NIP [c00000000046a1f0] .kobject_put+0xa0/0xb0 [ 0.897863] LR [c00000000046a1ec] .kobject_put+0x9c/0xb0 [ 0.897868] Call Trace: [ 0.897874] [c0000007ca183840] [c00000000046a1ec] .kobject_put+0x9c/0xb0 (unreliable) [ 0.897885] [c0000007ca1838c0] [c000000000743f9c] .of_node_put+0x2c/0x50 [ 0.897894] [c0000007ca183940] [c000000000c83954] .test_of_node+0x1dc/0x208 [ 0.897902] [c0000007ca183b80] [c000000000c839a4] .msi_bitmap_selftest+0x24/0x38 [ 0.897913] [c0000007ca183bf0] [c00000000000bb34] .do_one_initcall+0x144/0x200 [ 0.897922] [c0000007ca183ce0] [c000000000c748e4] .kernel_init_freeable+0x2b4/0x394 [ 0.897931] [c0000007ca183db0] [c00000000000c374] .kernel_init+0x24/0x130 [ 0.897940] [c0000007ca183e30] [c00000000000a2f4] .ret_from_kernel_thread+0x5c/0x68 [ 0.897947] Instruction dump: [ 0.897952] 7fe3fb78 38210080 e8010010 ebe1fff8 7c0803a6 4800014c e89f0000 3c62ff6e [ 0.897971] 7fe5fb78 3863a950 48485279 60000000 <0fe00000> 39000000 393f0038 4bffff80 [ 0.897992] ---[ end trace 1eeffdb9f825a556 ]--- Signed-off-by: Li Zhong Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/sysdev/msi_bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/msi_bitmap.c b/arch/powerpc/sysdev/msi_bitmap.c index 8ba60424be95..2ff630267e9e 100644 --- a/arch/powerpc/sysdev/msi_bitmap.c +++ b/arch/powerpc/sysdev/msi_bitmap.c @@ -202,7 +202,7 @@ void __init test_of_node(void) /* There should really be a struct device_node allocator */ memset(&of_node, 0, sizeof(of_node)); - kref_init(&of_node.kobj.kref); + of_node_init(&of_node); of_node.full_name = node_name; check(0 == msi_bitmap_alloc(&bmp, size, &of_node)); From fa5c11b790f9e18ec6f69680c4aa728157aef5f5 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 28 Mar 2014 16:40:39 +1100 Subject: [PATCH 15/26] powerpc: Remove dead code in sycall entry In: commit 742415d6b66bf09e3e73280178ef7ec85c90b7ee Author: Michael Neuling powerpc: Turn syscall handler into macros We converted the syscall entry code onto macros, but in doing this we introduced some cruft that's never run and should never have been added. This removes that code. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/kernel/exceptions-64s.S | 8 -------- 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d9c650ec7dac..3afd3915921a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -54,14 +54,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ xori r12,r12,MSR_LE ; \ mtspr SPRN_SRR1,r12 ; \ rfid ; /* return to userspace */ \ - b . ; \ -2: mfspr r12,SPRN_SRR1 ; \ - andi. r12,r12,MSR_PR ; \ - bne 0b ; \ - mtspr SPRN_SRR0,r3 ; \ - mtspr SPRN_SRR1,r4 ; \ - mtspr SPRN_SDR1,r5 ; \ - rfid ; \ b . ; /* prevent speculative execution */ #if defined(CONFIG_RELOCATABLE) From 6e556b471036b751aaa1a1b5a189eff76b1a2d0b Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Sun, 30 Mar 2014 11:03:23 +0530 Subject: [PATCH 16/26] powerpc/book3s: Fix mc_recoverable_range buffer overrun issue. Currently we wrongly allocate mc_recoverable_range buffer (to hold recoverable ranges) based on size of the property "mcheck-recoverable-ranges". This results in allocating less memory to hold available recoverable range entries from /proc/device-tree/ibm,opal/mcheck-recoverable-ranges. This patch fixes this issue by allocating mc_recoverable_range buffer based on number of entries of recoverable ranges instead of device property size. Without this change we end up allocating less memory and run into memory corruption issue. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/opal.c | 28 +++++++++++++++++++-------- 1 file changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 778a2793e75b..3697772e3759 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -102,19 +102,36 @@ int __init early_init_dt_scan_opal(unsigned long node, int __init early_init_dt_scan_recoverable_ranges(unsigned long node, const char *uname, int depth, void *data) { - unsigned long i, size; + unsigned long i, psize, size; const __be32 *prop; if (depth != 1 || strcmp(uname, "ibm,opal") != 0) return 0; - prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &size); + prop = of_get_flat_dt_prop(node, "mcheck-recoverable-ranges", &psize); if (!prop) return 1; pr_debug("Found machine check recoverable ranges.\n"); + /* + * Calculate number of available entries. + * + * Each recoverable address range entry is (start address, len, + * recovery address), 2 cells each for start and recovery address, + * 1 cell for len, totalling 5 cells per entry. + */ + mc_recoverable_range_len = psize / (sizeof(*prop) * 5); + + /* Sanity check */ + if (!mc_recoverable_range_len) + return 1; + + /* Size required to hold all the entries. */ + size = mc_recoverable_range_len * + sizeof(struct mcheck_recoverable_range); + /* * Allocate a buffer to hold the MC recoverable ranges. We would be * accessing them in real mode, hence it needs to be within @@ -124,11 +141,7 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node, ppc64_rma_size)); memset(mc_recoverable_range, 0, size); - /* - * Each recoverable address entry is an (start address,len, - * recover address) pair, * 2 cells each, totalling 4 cells per entry. - */ - for (i = 0; i < size / (sizeof(*prop) * 5); i++) { + for (i = 0; i < mc_recoverable_range_len; i++) { mc_recoverable_range[i].start_addr = of_read_number(prop + (i * 5) + 0, 2); mc_recoverable_range[i].end_addr = @@ -142,7 +155,6 @@ int __init early_init_dt_scan_recoverable_ranges(unsigned long node, mc_recoverable_range[i].end_addr, mc_recoverable_range[i].recover_addr); } - mc_recoverable_range_len = i; return 1; } From bfc36894a48b996eba7e02d8e43093a289c1fb91 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 1 Apr 2014 14:28:19 +1030 Subject: [PATCH 17/26] powerpc/powernv: Add OPAL message log interface OPAL provides an in-memory circular buffer containing a message log populated with various runtime messages produced by the firmware. Provide a sysfs interface /sys/firmware/opal/msglog for userspace to view the messages. Signed-off-by: Joel Stanley Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 4 + arch/powerpc/platforms/powernv/Makefile | 1 + arch/powerpc/platforms/powernv/opal-msglog.c | 120 +++++++++++++++++++ arch/powerpc/platforms/powernv/opal.c | 4 +- 4 files changed, 128 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/powernv/opal-msglog.c diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index a13ab397edda..05f9455615d6 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -730,6 +730,9 @@ typedef struct oppanel_line { /* /sys/firmware/opal */ extern struct kobject *opal_kobj; +/* /ibm,opal */ +extern struct device_node *opal_node; + /* API functions */ int64_t opal_console_write(int64_t term_number, __be64 *length, const uint8_t *buffer); @@ -920,6 +923,7 @@ extern void opal_flash_init(void); extern int opal_elog_init(void); extern void opal_platform_dump_init(void); extern void opal_sys_param_init(void); +extern void opal_msglog_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index f324ea099503..63cebb9b4d45 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,6 +1,7 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o +obj-y += opal-msglog.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-msglog.c b/arch/powerpc/platforms/powernv/opal-msglog.c new file mode 100644 index 000000000000..1bb25b952504 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-msglog.c @@ -0,0 +1,120 @@ +/* + * PowerNV OPAL in-memory console interface + * + * Copyright 2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +/* OPAL in-memory console. Defined in OPAL source at core/console.c */ +struct memcons { + __be64 magic; +#define MEMCONS_MAGIC 0x6630696567726173L + __be64 obuf_phys; + __be64 ibuf_phys; + __be32 obuf_size; + __be32 ibuf_size; + __be32 out_pos; +#define MEMCONS_OUT_POS_WRAP 0x80000000u +#define MEMCONS_OUT_POS_MASK 0x00ffffffu + __be32 in_prod; + __be32 in_cons; +}; + +static ssize_t opal_msglog_read(struct file *file, struct kobject *kobj, + struct bin_attribute *bin_attr, char *to, + loff_t pos, size_t count) +{ + struct memcons *mc = bin_attr->private; + const char *conbuf; + size_t ret, first_read = 0; + uint32_t out_pos, avail; + + if (!mc) + return -ENODEV; + + out_pos = be32_to_cpu(ACCESS_ONCE(mc->out_pos)); + + /* Now we've read out_pos, put a barrier in before reading the new + * data it points to in conbuf. */ + smp_rmb(); + + conbuf = phys_to_virt(be64_to_cpu(mc->obuf_phys)); + + /* When the buffer has wrapped, read from the out_pos marker to the end + * of the buffer, and then read the remaining data as in the un-wrapped + * case. */ + if (out_pos & MEMCONS_OUT_POS_WRAP) { + + out_pos &= MEMCONS_OUT_POS_MASK; + avail = be32_to_cpu(mc->obuf_size) - out_pos; + + ret = memory_read_from_buffer(to, count, &pos, + conbuf + out_pos, avail); + + if (ret < 0) + goto out; + + first_read = ret; + to += first_read; + count -= first_read; + pos -= avail; + } + + /* Sanity check. The firmware should not do this to us. */ + if (out_pos > be32_to_cpu(mc->obuf_size)) { + pr_err("OPAL: memory console corruption. Aborting read.\n"); + return -EINVAL; + } + + ret = memory_read_from_buffer(to, count, &pos, conbuf, out_pos); + + if (ret < 0) + goto out; + + ret += first_read; +out: + return ret; +} + +static struct bin_attribute opal_msglog_attr = { + .attr = {.name = "msglog", .mode = 0444}, + .read = opal_msglog_read +}; + +void __init opal_msglog_init(void) +{ + u64 mcaddr; + struct memcons *mc; + + if (of_property_read_u64(opal_node, "ibm,opal-memcons", &mcaddr)) { + pr_warn("OPAL: Property ibm,opal-memcons not found, no message log\n"); + return; + } + + mc = phys_to_virt(mcaddr); + if (!mc) { + pr_warn("OPAL: memory console address is invalid\n"); + return; + } + + if (be64_to_cpu(mc->magic) != MEMCONS_MAGIC) { + pr_warn("OPAL: memory console version is invalid\n"); + return; + } + + opal_msglog_attr.private = mc; + + if (sysfs_create_bin_file(opal_kobj, &opal_msglog_attr) != 0) + pr_warn("OPAL: sysfs file creation failed\n"); +} diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 3697772e3759..99e9c2887e21 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -46,7 +46,7 @@ struct mcheck_recoverable_range { static struct mcheck_recoverable_range *mc_recoverable_range; static int mc_recoverable_range_len; -static struct device_node *opal_node; +struct device_node *opal_node; static DEFINE_SPINLOCK(opal_write_lock); extern u64 opal_mc_secondary_handler[]; static unsigned int *opal_irqs; @@ -602,6 +602,8 @@ static int __init opal_init(void) opal_platform_dump_init(); /* Setup system parameters interface */ opal_sys_param_init(); + /* Setup message log interface. */ + opal_msglog_init(); } return 0; From e28b05e7ae8ba09e030ffe891ba154df5791cb76 Mon Sep 17 00:00:00 2001 From: Joel Stanley Date: Tue, 1 Apr 2014 14:28:20 +1030 Subject: [PATCH 18/26] powerpc/powernv: Add invalid OPAL call This call will not be understood by OPAL, and cause it to add an error to it's log. Among other things, this is useful for testing the behaviour of the log as it fills up. Signed-off-by: Joel Stanley Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 2 ++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + arch/powerpc/platforms/powernv/opal.c | 3 +++ 3 files changed, 6 insertions(+) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 05f9455615d6..6bd3b183cd63 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -87,6 +87,7 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_ASYNC_COMPLETION -15 /* API Tokens (in r0) */ +#define OPAL_INVALID_CALL -1 #define OPAL_CONSOLE_WRITE 1 #define OPAL_CONSOLE_READ 2 #define OPAL_RTC_READ 3 @@ -734,6 +735,7 @@ extern struct kobject *opal_kobj; extern struct device_node *opal_node; /* API functions */ +int64_t opal_invalid_call(void); int64_t opal_console_write(int64_t term_number, __be64 *length, const uint8_t *buffer); int64_t opal_console_read(int64_t term_number, __be64 *length, diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index bb90f9a4e027..f531ffe35b3e 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -61,6 +61,7 @@ _STATIC(opal_return) mtcr r4; rfid +OPAL_CALL(opal_invalid_call, OPAL_INVALID_CALL); OPAL_CALL(opal_console_write, OPAL_CONSOLE_WRITE); OPAL_CALL(opal_console_read, OPAL_CONSOLE_READ); OPAL_CALL(opal_console_write_buffer_space, OPAL_CONSOLE_WRITE_BUFFER_SPACE); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 99e9c2887e21..49d2f00019e5 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -635,3 +635,6 @@ void opal_shutdown(void) mdelay(10); } } + +/* Export this so that test modules can use it */ +EXPORT_SYMBOL_GPL(opal_invalid_call); From f83319d71002aec03bd87bc9aabce5f549680f0a Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Fri, 28 Mar 2014 17:01:23 +1100 Subject: [PATCH 19/26] powerpc: Add lq/stq emulation Recent CPUs support quad word load and store instructions. Add support to the alignment handler for them. Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/emulated_ops.h | 1 + arch/powerpc/kernel/align.c | 52 +++++++++++++++++++++---- arch/powerpc/kernel/traps.c | 1 + 3 files changed, 46 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/emulated_ops.h b/arch/powerpc/include/asm/emulated_ops.h index 4358e3002f35..f00e10e2a335 100644 --- a/arch/powerpc/include/asm/emulated_ops.h +++ b/arch/powerpc/include/asm/emulated_ops.h @@ -54,6 +54,7 @@ extern struct ppc_emulated { #ifdef CONFIG_PPC64 struct ppc_emulated_entry mfdscr; struct ppc_emulated_entry mtdscr; + struct ppc_emulated_entry lq_stq; #endif } ppc_emulated; diff --git a/arch/powerpc/kernel/align.c b/arch/powerpc/kernel/align.c index de91f3ae631e..94908af308d8 100644 --- a/arch/powerpc/kernel/align.c +++ b/arch/powerpc/kernel/align.c @@ -73,7 +73,7 @@ static struct aligninfo aligninfo[128] = { { 8, LD+F }, /* 00 0 1001: lfd */ { 4, ST+F+S }, /* 00 0 1010: stfs */ { 8, ST+F }, /* 00 0 1011: stfd */ - INVALID, /* 00 0 1100 */ + { 16, LD }, /* 00 0 1100: lq */ { 8, LD }, /* 00 0 1101: ld/ldu/lwa */ INVALID, /* 00 0 1110 */ { 8, ST }, /* 00 0 1111: std/stdu */ @@ -140,7 +140,7 @@ static struct aligninfo aligninfo[128] = { { 2, LD+SW }, /* 10 0 1100: lhbrx */ { 4, LD+SE }, /* 10 0 1101 lwa */ { 2, ST+SW }, /* 10 0 1110: sthbrx */ - INVALID, /* 10 0 1111 */ + { 16, ST }, /* 10 0 1111: stq */ INVALID, /* 10 1 0000 */ INVALID, /* 10 1 0001 */ INVALID, /* 10 1 0010 */ @@ -385,8 +385,6 @@ static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, char *ptr1 = (char *) ¤t->thread.TS_FPR(reg+1); int i, ret, sw = 0; - if (!(flags & F)) - return 0; if (reg & 1) return 0; /* invalid form: FRS/FRT must be even */ if (flags & SW) @@ -406,6 +404,34 @@ static int emulate_fp_pair(unsigned char __user *addr, unsigned int reg, return 1; /* exception handled and fixed up */ } +#ifdef CONFIG_PPC64 +static int emulate_lq_stq(struct pt_regs *regs, unsigned char __user *addr, + unsigned int reg, unsigned int flags) +{ + char *ptr0 = (char *)®s->gpr[reg]; + char *ptr1 = (char *)®s->gpr[reg+1]; + int i, ret, sw = 0; + + if (reg & 1) + return 0; /* invalid form: GPR must be even */ + if (flags & SW) + sw = 7; + ret = 0; + for (i = 0; i < 8; ++i) { + if (!(flags & ST)) { + ret |= __get_user(ptr0[i^sw], addr + i); + ret |= __get_user(ptr1[i^sw], addr + i + 8); + } else { + ret |= __put_user(ptr0[i^sw], addr + i); + ret |= __put_user(ptr1[i^sw], addr + i + 8); + } + } + if (ret) + return -EFAULT; + return 1; /* exception handled and fixed up */ +} +#endif /* CONFIG_PPC64 */ + #ifdef CONFIG_SPE static struct aligninfo spe_aligninfo[32] = { @@ -914,10 +940,20 @@ int fix_alignment(struct pt_regs *regs) flush_fp_to_thread(current); } - /* Special case for 16-byte FP loads and stores */ - if (nb == 16) { - PPC_WARN_ALIGNMENT(fp_pair, regs); - return emulate_fp_pair(addr, reg, flags); + if ((nb == 16)) { + if (flags & F) { + /* Special case for 16-byte FP loads and stores */ + PPC_WARN_ALIGNMENT(fp_pair, regs); + return emulate_fp_pair(addr, reg, flags); + } else { +#ifdef CONFIG_PPC64 + /* Special case for 16-byte loads and stores */ + PPC_WARN_ALIGNMENT(lq_stq, regs); + return emulate_lq_stq(regs, addr, reg, flags); +#else + return 0; +#endif + } } PPC_WARN_ALIGNMENT(unaligned, regs); diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index df86f0ce2d36..1bd7ca298fa1 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -1868,6 +1868,7 @@ struct ppc_emulated ppc_emulated = { #ifdef CONFIG_PPC64 WARN_EMULATED_SETUP(mfdscr), WARN_EMULATED_SETUP(mtdscr), + WARN_EMULATED_SETUP(lq_stq), #endif }; From aba6f4f2e64f9b43aaba6d46c67e9e25a55f4614 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 25 Mar 2014 12:35:03 -0700 Subject: [PATCH 20/26] powerpc: Convert last uses of __FUNCTION__ to __func__ Just about all of these have been converted to __func__, so convert the last uses. Signed-off-by: Joe Perches Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/nvram.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index d7096f2f7751..0cc240b7f694 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -298,13 +298,13 @@ int nvram_write_os_partition(struct nvram_os_partition *part, char * buff, rc = ppc_md.nvram_write((char *)&info, sizeof(struct err_log_info), &tmp_index); if (rc <= 0) { - pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); + pr_err("%s: Failed nvram_write (%d)\n", __func__, rc); return rc; } rc = ppc_md.nvram_write(buff, length, &tmp_index); if (rc <= 0) { - pr_err("%s: Failed nvram_write (%d)\n", __FUNCTION__, rc); + pr_err("%s: Failed nvram_write (%d)\n", __func__, rc); return rc; } @@ -351,15 +351,14 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff, sizeof(struct err_log_info), &tmp_index); if (rc <= 0) { - pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, - rc); + pr_err("%s: Failed nvram_read (%d)\n", __func__, rc); return rc; } } rc = ppc_md.nvram_read(buff, length, &tmp_index); if (rc <= 0) { - pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc); + pr_err("%s: Failed nvram_read (%d)\n", __func__, rc); return rc; } @@ -869,7 +868,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, break; default: pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", - __FUNCTION__, (int) reason); + __func__, (int) reason); return; } From bfd25d72abc62a89f9c9c41417da998adcf2578e Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Tue, 25 Mar 2014 11:43:08 +1100 Subject: [PATCH 21/26] powerpc/opal: Add missing include next-20140324 currently fails compiling celleb_defconfig with: arch/powerpc/include/asm/opal.h:894:42: error: 'struct notifier_block' declared inside parameter list [-Werror] arch/powerpc/include/asm/opal.h:894:42: error: its scope is only this definition or declaration, which is probably not what you want [-Werror] arch/powerpc/include/asm/opal.h:896:14: error: 'struct notifier_block' declared inside parameter list [-Werror] This is due to a missing include which is added here. Signed-off-by: Michael Neuling Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 6bd3b183cd63..a2efdaa020b0 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -178,6 +178,8 @@ extern int opal_enter_rtas(struct rtas_args *args, #ifndef __ASSEMBLY__ +#include + /* Other enums */ enum OpalVendorApiTokens { OPAL_START_VENDOR_API_RANGE = 1000, OPAL_END_VENDOR_API_RANGE = 1999 From 282efb7023d482632f2fa567163362841423a1ac Mon Sep 17 00:00:00 2001 From: Monam Agarwal Date: Sat, 22 Mar 2014 12:20:56 +0530 Subject: [PATCH 22/26] arch/powerpc: Use RCU_INIT_POINTER(x, NULL) in platforms/cell/spu_syscalls.c Here rcu_assign_pointer() is ensuring that the initialization of a structure is carried out before storing a pointer to that structure. So, rcu_assign_pointer(p, NULL) can always safely be converted to RCU_INIT_POINTER(p, NULL). Signed-off-by: Monam Agarwal Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/cell/spu_syscalls.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/cell/spu_syscalls.c b/arch/powerpc/platforms/cell/spu_syscalls.c index 3844f1397fc3..38e0a1a5cec3 100644 --- a/arch/powerpc/platforms/cell/spu_syscalls.c +++ b/arch/powerpc/platforms/cell/spu_syscalls.c @@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(register_spu_syscalls); void unregister_spu_syscalls(struct spufs_calls *calls) { BUG_ON(spufs_calls->owner != calls->owner); - rcu_assign_pointer(spufs_calls, NULL); + RCU_INIT_POINTER(spufs_calls, NULL); synchronize_rcu(); } EXPORT_SYMBOL_GPL(unregister_spu_syscalls); From d3d35d957a9d0733dc51f14b5abc0bff5d3c5f3a Mon Sep 17 00:00:00 2001 From: Tony Breeds Date: Wed, 12 Mar 2014 15:12:01 +1100 Subject: [PATCH 23/26] powerpc/le: Avoid creatng R_PPC64_TOCSAVE relocations for modules. When building modules with a native le toolchain the linker will generate R_PPC64_TOCSAVE relocations when it's safe to omit saving r2 on a plt call. This isn't helpful in the conext of a kernel module and the kernel will fail to load those modules with an error like: nf_conntrack: Unknown ADD relocation: 109 This patch tells the linker to avoid createing R_PPC64_TOCSAVE relocations allowing modules to load. Signed-off-by: Tony Breeds Signed-off-by: Anton Blanchard Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 0f4344e6fbca..4c0cedf4e2c7 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -74,6 +74,7 @@ override CROSS32AS += -mlittle-endian LDEMULATION := lppc GNUTARGET := powerpcle MULTIPLEWORD := -mno-multiple +KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect) else ifeq ($(call cc-option-yn,-mbig-endian),y) override CC += -mbig-endian From 9a0133613e4412b2caaaf0d9dd81f213bcededf1 Mon Sep 17 00:00:00 2001 From: Michael Wang Date: Tue, 8 Apr 2014 11:19:36 +0800 Subject: [PATCH 24/26] power, sched: stop updating inside arch_update_cpu_topology() when nothing to be update Since v1: Edited the comment according to Srivatsa's suggestion. During the testing, we encounter below WARN followed by Oops: WARNING: at kernel/sched/core.c:6218 ... NIP [c000000000101660] .build_sched_domains+0x11d0/0x1200 LR [c000000000101358] .build_sched_domains+0xec8/0x1200 PACATMSCRATCH [800000000000f032] Call Trace: [c00000001b103850] [c000000000101358] .build_sched_domains+0xec8/0x1200 [c00000001b1039a0] [c00000000010aad4] .partition_sched_domains+0x484/0x510 [c00000001b103aa0] [c00000000016d0a8] .rebuild_sched_domains+0x68/0xa0 [c00000001b103b30] [c00000000005cbf0] .topology_work_fn+0x10/0x30 ... Oops: Kernel access of bad area, sig: 11 [#1] ... NIP [c00000000045c000] .__bitmap_weight+0x60/0xf0 LR [c00000000010132c] .build_sched_domains+0xe9c/0x1200 PACATMSCRATCH [8000000000029032] Call Trace: [c00000001b1037a0] [c000000000288ff4] .kmem_cache_alloc_node_trace+0x184/0x3a0 [c00000001b103850] [c00000000010132c] .build_sched_domains+0xe9c/0x1200 [c00000001b1039a0] [c00000000010aad4] .partition_sched_domains+0x484/0x510 [c00000001b103aa0] [c00000000016d0a8] .rebuild_sched_domains+0x68/0xa0 [c00000001b103b30] [c00000000005cbf0] .topology_work_fn+0x10/0x30 ... This was caused by that 'sd->groups == NULL' after building groups, which was caused by the empty 'sd->span'. The cpu's domain contained nothing because the cpu was assigned to a wrong node, due to the following unfortunate sequence of events: 1. The hypervisor sent a topology update to the guest OS, to notify changes to the cpu-node mapping. However, the update was actually redundant - i.e., the "new" mapping was exactly the same as the old one. 2. Due to this, the 'updated_cpus' mask turned out to be empty after exiting the 'for-loop' in arch_update_cpu_topology(). 3. So we ended up calling stop-machine() with an empty cpumask list, which made stop-machine internally elect cpumask_first(cpu_online_mask), i.e., CPU0 as the cpu to run the payload (the update_cpu_topology() function). 4. This causes update_cpu_topology() to be run by CPU0. And since 'updates' is kzalloc()'ed inside arch_update_cpu_topology(), update_cpu_topology() finds update->cpu as well as update->new_nid to be 0. In other words, we end up assigning CPU0 (and eventually its siblings) to node 0, incorrectly. Along with the following wrong updating, it causes the sched-domain rebuild code to break and crash the system. Fix this by skipping the topology update in cases where we find that the topology has not actually changed in reality (ie., spurious updates). CC: Benjamin Herrenschmidt CC: Paul Mackerras CC: Nathan Fontenot CC: Stephen Rothwell CC: Andrew Morton CC: Robert Jennings CC: Jesse Larrew CC: "Srivatsa S. Bhat" CC: Alistair Popple Suggested-by: "Srivatsa S. Bhat" Signed-off-by: Michael Wang Reviewed-by: Srivatsa S. Bhat Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/mm/numa.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c index 30a42e24bf14..4ebbb9e99286 100644 --- a/arch/powerpc/mm/numa.c +++ b/arch/powerpc/mm/numa.c @@ -1591,6 +1591,20 @@ int arch_update_cpu_topology(void) cpu = cpu_last_thread_sibling(cpu); } + /* + * In cases where we have nothing to update (because the updates list + * is too short or because the new topology is same as the old one), + * skip invoking update_cpu_topology() via stop-machine(). This is + * necessary (and not just a fast-path optimization) since stop-machine + * can end up electing a random CPU to run update_cpu_topology(), and + * thus trick us into setting up incorrect cpu-node mappings (since + * 'updates' is kzalloc()'ed). + * + * And for the similar reason, we will skip all the following updating. + */ + if (!cpumask_weight(&updated_cpus)) + goto out; + stop_machine(update_cpu_topology, &updates[0], &updated_cpus); /* @@ -1612,6 +1626,7 @@ int arch_update_cpu_topology(void) changed = 1; } +out: kfree(updates); return changed; } From 4952ef9aec58e5f95df45fe36a1a6e0abf8987a8 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 9 Apr 2014 12:56:37 +1000 Subject: [PATCH 25/26] Revert "powerpc/powernv: hwmon driver for power values, fan rpm and temperature" This reverts commit 0de7f8a917b5202014430e0055c0e1db0348bd62. This driver wasn't merged via the proper maintainers (my fault ... ooops !) and has serious issues so let's take it out for now and have a new better one be merged the right way Signed-off-by: Benjamin Herrenschmidt --- --- drivers/hwmon/Kconfig | 8 - drivers/hwmon/Makefile | 1 - drivers/hwmon/ibmpowernv.c | 529 ------------------------------------- 3 files changed, 538 deletions(-) delete mode 100644 drivers/hwmon/ibmpowernv.c diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index b13172cfbeef..bc196f49ec53 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -554,14 +554,6 @@ config SENSORS_IBMPEX This driver can also be built as a module. If so, the module will be called ibmpex. -config SENSORS_IBMPOWERNV - tristate "IBM PowerNv Platform temperature/power/fan sensor" - depends on PPC_POWERNV - default y - help - If you say yes here you get support for the temperature/fan/power - sensors on your platform. - config SENSORS_IIO_HWMON tristate "Hwmon driver that uses channels specified via iio maps" depends on IIO diff --git a/drivers/hwmon/Makefile b/drivers/hwmon/Makefile index 199c401bf8d9..c48f9873ac73 100644 --- a/drivers/hwmon/Makefile +++ b/drivers/hwmon/Makefile @@ -71,7 +71,6 @@ obj-$(CONFIG_SENSORS_ULTRA45) += ultra45_env.o obj-$(CONFIG_SENSORS_I5K_AMB) += i5k_amb.o obj-$(CONFIG_SENSORS_IBMAEM) += ibmaem.o obj-$(CONFIG_SENSORS_IBMPEX) += ibmpex.o -obj-$(CONFIG_SENSORS_IBMPOWERNV)+= ibmpowernv.o obj-$(CONFIG_SENSORS_IIO_HWMON) += iio_hwmon.o obj-$(CONFIG_SENSORS_INA209) += ina209.o obj-$(CONFIG_SENSORS_INA2XX) += ina2xx.o diff --git a/drivers/hwmon/ibmpowernv.c b/drivers/hwmon/ibmpowernv.c deleted file mode 100644 index b7b1297a9b02..000000000000 --- a/drivers/hwmon/ibmpowernv.c +++ /dev/null @@ -1,529 +0,0 @@ -/* - * hwmon driver for temperature/power/fan on IBM PowerNV platform - * Copyright (C) 2013 IBM - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -MODULE_DESCRIPTION("IBM PowerNV Platform power/temp/fan sensor hwmon module"); -MODULE_LICENSE("GPL"); - -#define MAX_ATTR_LENGTH 32 - -/* Device tree sensor name prefixes. The device tree has the names in the - * format "cooling-fan#2-faulted" where the "cooling-fan" is the sensor type, - * 2 is the sensor count, and "faulted" is the sensor data attribute type. - */ -#define DT_FAULT_ATTR_SUFFIX "faulted" -#define DT_DATA_ATTR_SUFFIX "data" -#define DT_THRESHOLD_ATTR_SUFFIX "thrs" - -enum sensors { - FAN, - TEMPERATURE, - POWERSUPPLY, - POWER, - MAX_SENSOR_TYPE, -}; - -enum attributes { - INPUT, - MINIMUM, - MAXIMUM, - FAULT, - MAX_ATTR_TYPES -}; - -static struct sensor_name { - char *name; - char *compaible; -} sensor_names[] = { - {"fan-sensor", "ibm,opal-sensor-cooling-fan"}, - {"amb-temp-sensor", "ibm,opal-sensor-amb-temp"}, - {"power-sensor", "ibm,opal-sensor-power-supply"}, - {"power", "ibm,opal-sensor-power"} -}; - -static const char * const attribute_type_table[] = { - "input", - "min", - "max", - "fault", - NULL -}; - -struct pdev_entry { - struct list_head list; - struct platform_device *pdev; - enum sensors type; -}; - -static LIST_HEAD(pdev_list); - -/* The sensors are categorised on type. - * - * The sensors of same type are categorised under a common platform device. - * So, The pdev is shared by all sensors of same type. - * Ex : temp1_input, temp1_max, temp2_input,temp2_max all share same platform - * device. - * - * "sensor_data" is the Platform device specific data. - * There is one hwmon_device instance for all the sensors of same type. - * This also holds the list of all sensors with same type but different - * attribute and index. - */ -struct sensor_specific_data { - u32 sensor_id; /* The hex value as in the device tree */ - u32 sensor_index; /* The sensor instance index */ - struct sensor_device_attribute sd_attr; - enum attributes attr_type; - char attr_name[64]; -}; - -struct sensor_data { - struct device *hwmon_dev; - struct list_head sensor_list; - struct device_attribute name_attr; -}; - -struct sensor_entry { - struct list_head list; - struct sensor_specific_data *sensor_data; -}; - -static struct platform_device *powernv_sensor_get_pdev(enum sensors type) -{ - struct pdev_entry *p; - list_for_each_entry(p, &pdev_list, list) - if (p->type == type) - return p->pdev; - - return NULL; -} - -static struct sensor_specific_data *powernv_sensor_get_sensor_data( - struct sensor_data *pdata, - int index, enum attributes attr_type) -{ - struct sensor_entry *p; - list_for_each_entry(p, &pdata->sensor_list, list) - if ((p->sensor_data->sensor_index == index) && - (attr_type == p->sensor_data->attr_type)) - return p->sensor_data; - - return NULL; -} - -static ssize_t show_name(struct device *dev, - struct device_attribute *devattr, char *buf) -{ - struct platform_device *pdev = to_platform_device(dev); - - return sprintf(buf, "%s\n", pdev->name); -} - -/* Note: Data from the sensors for each sensor type needs to be converted to - * the dimension appropriate. - */ -static ssize_t show_sensor(struct device *dev, - struct device_attribute *devattr, char *buf) -{ - struct sensor_device_attribute *sd_attr = to_sensor_dev_attr(devattr); - struct platform_device *pdev = to_platform_device(dev); - struct sensor_data *pdata = platform_get_drvdata(pdev); - struct sensor_specific_data *tdata = NULL; - enum sensors sensor_type = pdev->id; - u32 x = -1; - int ret; - - if (sd_attr && sd_attr->dev_attr.attr.name) { - char *pos = strchr(sd_attr->dev_attr.attr.name, '_'); - int i; - - for (i = 0; i < MAX_ATTR_TYPES; i++) { - if (strcmp(pos+1, attribute_type_table[i]) == 0) { - tdata = powernv_sensor_get_sensor_data(pdata, - sd_attr->index, i); - break; - } - } - } - - if (tdata) { - ret = opal_get_sensor_data(tdata->sensor_id, &x); - if (ret) - x = -1; - } - - if (sensor_type == TEMPERATURE && x > 0) { - /* Temperature comes in Degrees and convert it to - * milli-degrees. - */ - x = x*1000; - } else if (sensor_type == POWER && x > 0) { - /* Power value comes in watts, convert to micro-watts */ - x = x * 1000000; - } - - return sprintf(buf, "%d\n", x); -} - -static u32 get_sensor_index_from_name(const char *name) -{ - char *hash_position = strchr(name, '#'); - u32 index = 0, copy_length; - char newbuf[8]; - - if (hash_position) { - copy_length = strchr(hash_position, '-') - hash_position - 1; - if (copy_length < sizeof(newbuf)) { - strncpy(newbuf, hash_position + 1, copy_length); - sscanf(newbuf, "%d", &index); - } - } - - return index; -} - -static inline void get_sensor_suffix_from_name(const char *name, char *suffix) -{ - char *dash_position = strrchr(name, '-'); - if (dash_position) - strncpy(suffix, dash_position+1, MAX_ATTR_LENGTH); - else - strcpy(suffix,""); -} - -static int get_sensor_attr_properties(const char *sensor_name, - enum sensors sensor_type, enum attributes *attr_type, - u32 *sensor_index) -{ - char suffix[MAX_ATTR_LENGTH]; - - *attr_type = MAX_ATTR_TYPES; - *sensor_index = get_sensor_index_from_name(sensor_name); - if (*sensor_index == 0) - return -EINVAL; - - get_sensor_suffix_from_name(sensor_name, suffix); - if (strcmp(suffix, "") == 0) - return -EINVAL; - - if (strcmp(suffix, DT_FAULT_ATTR_SUFFIX) == 0) - *attr_type = FAULT; - else if (strcmp(suffix, DT_DATA_ATTR_SUFFIX) == 0) - *attr_type = INPUT; - else if ((sensor_type == TEMPERATURE) && - (strcmp(suffix, DT_THRESHOLD_ATTR_SUFFIX) == 0)) - *attr_type = MAXIMUM; - else if ((sensor_type == FAN) && - (strcmp(suffix, DT_THRESHOLD_ATTR_SUFFIX) == 0)) - *attr_type = MINIMUM; - else - return -ENOENT; - - if (((sensor_type == FAN) && ((*attr_type == INPUT) || - (*attr_type == MINIMUM))) - || ((sensor_type == TEMPERATURE) && ((*attr_type == INPUT) || - (*attr_type == MAXIMUM))) - || ((sensor_type == POWER) && ((*attr_type == INPUT)))) - return 0; - - return -ENOENT; -} - -static int create_sensor_attr(struct sensor_specific_data *tdata, - struct device *dev, enum sensors sensor_type, - enum attributes attr_type) -{ - int err = 0; - char temp_file_prefix[50]; - static const char *const file_name_format = "%s%d_%s"; - - tdata->attr_type = attr_type; - - if (sensor_type == FAN) - strcpy(temp_file_prefix, "fan"); - else if (sensor_type == TEMPERATURE) - strcpy(temp_file_prefix, "temp"); - else if (sensor_type == POWERSUPPLY) - strcpy(temp_file_prefix, "powersupply"); - else if (sensor_type == POWER) - strcpy(temp_file_prefix, "power"); - - snprintf(tdata->attr_name, sizeof(tdata->attr_name), file_name_format, - temp_file_prefix, tdata->sensor_index, - attribute_type_table[tdata->attr_type]); - - sysfs_attr_init(&tdata->sd_attr.dev_attr.attr); - tdata->sd_attr.dev_attr.attr.name = tdata->attr_name; - tdata->sd_attr.dev_attr.attr.mode = S_IRUGO; - tdata->sd_attr.dev_attr.show = show_sensor; - - tdata->sd_attr.index = tdata->sensor_index; - err = device_create_file(dev, &tdata->sd_attr.dev_attr); - - return err; -} - -static int create_name_attr(struct sensor_data *pdata, - struct device *dev) -{ - sysfs_attr_init(&pdata->name_attr.attr); - pdata->name_attr.attr.name = "name"; - pdata->name_attr.attr.mode = S_IRUGO; - pdata->name_attr.show = show_name; - return device_create_file(dev, &pdata->name_attr); -} - -static int create_platform_device(enum sensors sensor_type, - struct platform_device **pdev) -{ - struct pdev_entry *pdev_entry = NULL; - int err; - - *pdev = platform_device_alloc(sensor_names[sensor_type].name, - sensor_type); - if (!*pdev) { - pr_err("Device allocation failed\n"); - err = -ENOMEM; - goto exit; - } - - pdev_entry = kzalloc(sizeof(struct pdev_entry), GFP_KERNEL); - if (!pdev_entry) { - pr_err("Device allocation failed\n"); - err = -ENOMEM; - goto exit_device_put; - } - - err = platform_device_add(*pdev); - if (err) { - pr_err("Device addition failed (%d)\n", err); - goto exit_device_free; - } - - pdev_entry->pdev = *pdev; - pdev_entry->type = (*pdev)->id; - - list_add_tail(&pdev_entry->list, &pdev_list); - - return 0; -exit_device_free: - kfree(pdev_entry); -exit_device_put: - platform_device_put(*pdev); -exit: - return err; -} - -static int create_sensor_data(struct platform_device *pdev) -{ - struct sensor_data *pdata = NULL; - int err = 0; - - pdata = kzalloc(sizeof(struct sensor_data), GFP_KERNEL); - if (!pdata) { - err = -ENOMEM; - goto exit; - } - - err = create_name_attr(pdata, &pdev->dev); - if (err) - goto exit_free; - - pdata->hwmon_dev = hwmon_device_register(&pdev->dev); - if (IS_ERR(pdata->hwmon_dev)) { - err = PTR_ERR(pdata->hwmon_dev); - dev_err(&pdev->dev, "Class registration failed (%d)\n", - err); - goto exit_name; - } - - INIT_LIST_HEAD(&pdata->sensor_list); - platform_set_drvdata(pdev, pdata); - - return 0; - -exit_name: - device_remove_file(&pdev->dev, &pdata->name_attr); -exit_free: - kfree(pdata); -exit: - return err; -} - -static void delete_sensor_attr(struct sensor_data *pdata) -{ - struct sensor_entry *s, *l; - - list_for_each_entry_safe(s, l, &pdata->sensor_list, list) { - struct sensor_specific_data *tdata = s->sensor_data; - kfree(tdata); - list_del(&s->list); - kfree(s); - } -} - -static int powernv_sensor_init(u32 sensor_id, const struct device_node *np, - enum sensors sensor_type, enum attributes attr_type, - u32 sensor_index) -{ - struct platform_device *pdev = powernv_sensor_get_pdev(sensor_type); - struct sensor_specific_data *tdata; - struct sensor_entry *sensor_entry; - struct sensor_data *pdata; - int err = 0; - - if (!pdev) { - err = create_platform_device(sensor_type, &pdev); - if (err) - goto exit; - - err = create_sensor_data(pdev); - if (err) - goto exit; - } - - pdata = platform_get_drvdata(pdev); - if (!pdata) { - err = -ENOMEM; - goto exit; - } - - tdata = kzalloc(sizeof(struct sensor_specific_data), GFP_KERNEL); - if (!tdata) { - err = -ENOMEM; - goto exit; - } - - tdata->sensor_id = sensor_id; - tdata->sensor_index = sensor_index; - - err = create_sensor_attr(tdata, &pdev->dev, sensor_type, attr_type); - if (err) - goto exit_free; - - sensor_entry = kzalloc(sizeof(struct sensor_entry), GFP_KERNEL); - if (!sensor_entry) { - err = -ENOMEM; - goto exit_attr; - } - - sensor_entry->sensor_data = tdata; - - list_add_tail(&sensor_entry->list, &pdata->sensor_list); - - return 0; -exit_attr: - device_remove_file(&pdev->dev, &tdata->sd_attr.dev_attr); -exit_free: - kfree(tdata); -exit: - return err; -} - -static void delete_unregister_sensors(void) -{ - struct pdev_entry *p, *n; - - list_for_each_entry_safe(p, n, &pdev_list, list) { - struct sensor_data *pdata = platform_get_drvdata(p->pdev); - if (pdata) { - delete_sensor_attr(pdata); - - hwmon_device_unregister(pdata->hwmon_dev); - kfree(pdata); - } - platform_device_unregister(p->pdev); - list_del(&p->list); - kfree(p); - } -} - -static int __init powernv_hwmon_init(void) -{ - struct device_node *opal, *np = NULL; - enum attributes attr_type; - enum sensors type; - const u32 *sensor_id; - u32 sensor_index; - int err; - - opal = of_find_node_by_path("/ibm,opal/sensors"); - if (!opal) { - pr_err("%s: Opal 'sensors' node not found\n", __func__); - return -ENXIO; - } - - for_each_child_of_node(opal, np) { - if (np->name == NULL) - continue; - - for (type = 0; type < MAX_SENSOR_TYPE; type++) - if (of_device_is_compatible(np, - sensor_names[type].compaible)) - break; - - if (type == MAX_SENSOR_TYPE) - continue; - - if (get_sensor_attr_properties(np->name, type, &attr_type, - &sensor_index)) - continue; - - sensor_id = of_get_property(np, "sensor-id", NULL); - if (!sensor_id) { - pr_info("%s: %s doesn't have sensor-id\n", __func__, - np->name); - continue; - } - - err = powernv_sensor_init(*sensor_id, np, type, attr_type, - sensor_index); - if (err) { - of_node_put(opal); - goto exit; - } - } - of_node_put(opal); - - return 0; -exit: - delete_unregister_sensors(); - return err; - -} - -static void powernv_hwmon_exit(void) -{ - delete_unregister_sensors(); -} - -module_init(powernv_hwmon_init); -module_exit(powernv_hwmon_exit); From cc4f265ad9a37bdb1846c45eebe454c382f31d67 Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Wed, 9 Apr 2014 13:47:37 +1000 Subject: [PATCH 26/26] powerpc/powernv Adapt opal-elog and opal-dump to new sysfs_remove_file_self We are currently using sysfs_schedule_callback() which is deprecated and about to be removed. Switch to the new interface instead. Signed-off-by: Stewart Smith Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/powernv/opal-dump.c | 9 ++------- arch/powerpc/platforms/powernv/opal-elog.c | 9 ++------- 2 files changed, 4 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c index 0c767c561dc9..b9827b0d87e4 100644 --- a/arch/powerpc/platforms/powernv/opal-dump.c +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -86,19 +86,14 @@ static int64_t dump_send_ack(uint32_t dump_id) return rc; } -static void delay_release_kobj(void *kobj) -{ - kobject_put((struct kobject *)kobj); -} - static ssize_t dump_ack_store(struct dump_obj *dump_obj, struct dump_attribute *attr, const char *buf, size_t count) { dump_send_ack(dump_obj->id); - sysfs_schedule_callback(&dump_obj->kobj, delay_release_kobj, - &dump_obj->kobj, THIS_MODULE); + sysfs_remove_file_self(&dump_obj->kobj, &attr->attr); + kobject_put(&dump_obj->kobj); return count; } diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c index 1d7355bc9db0..ef7bc2a97862 100644 --- a/arch/powerpc/platforms/powernv/opal-elog.c +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -70,19 +70,14 @@ static ssize_t elog_ack_show(struct elog_obj *elog_obj, return sprintf(buf, "ack - acknowledge log message\n"); } -static void delay_release_kobj(void *kobj) -{ - kobject_put((struct kobject *)kobj); -} - static ssize_t elog_ack_store(struct elog_obj *elog_obj, struct elog_attribute *attr, const char *buf, size_t count) { opal_send_ack_elog(elog_obj->id); - sysfs_schedule_callback(&elog_obj->kobj, delay_release_kobj, - &elog_obj->kobj, THIS_MODULE); + sysfs_remove_file_self(&elog_obj->kobj, &attr->attr); + kobject_put(&elog_obj->kobj); return count; }