From f436f8bb73138bc74eb1c6527723e00988ad8a8a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 1 Oct 2009 16:14:32 +0200 Subject: [PATCH 1/5] x86: EDAC: MCE: Fix MCE decoding callback logic Make decoding of MCEs happen only on AMD hardware by registering a non-default callback only on CPU families which support it. While looking at the interaction of decode_mce() with the other MCE code i also noticed a few other things and made the following cleanups/fixes: - Fixed the mce_decode() weak alias - a weak alias is really not good here, it should be a proper callback. A weak alias will be overriden if a piece of code is built into the kernel - not good, obviously. - The patch initializes the callback on AMD family 10h and 11h. - Added the more correct fallback printk of: No support for human readable MCE decoding on this CPU type. Transcribe the message and run it through 'mcelog --ascii' to decode. On CPUs that dont have a decoder. - Made the surrounding code more readable. Note that the callback allows us to have a default fallback - without having to check the CPU versions during the printout itself. When an EDAC module registers itself, it can install the decode-print function. (there's no unregister needed as this is core code.) version -v2 by Borislav Petkov: - add K8 to the set of supported CPUs - always build in edac_mce_amd since we use an early_initcall now - fix checkpatch warnings Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Andi Kleen LKML-Reference: <20091001141432.GA11410@aftab> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 2 ++ arch/x86/kernel/cpu/mcheck/mce.c | 62 +++++++++++++++++++------------- drivers/edac/Makefile | 2 +- drivers/edac/edac_mce_amd.c | 15 +++++++- 4 files changed, 55 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index b608a64c5814..f1363b72364f 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {} static inline void enable_p5_mce(void) {} #endif +extern void (*x86_mce_decode_callback)(struct mce *m); + void mce_setup(struct mce *m); void mce_log(struct mce *m); DECLARE_PER_CPU(struct sys_device, mce_dev); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 183c3457d2f4..b1598a9436d0 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait); static DEFINE_PER_CPU(struct mce, mces_seen); static int cpu_missing; +static void default_decode_mce(struct mce *m) +{ + pr_emerg("No human readable MCE decoding support on this CPU type.\n"); + pr_emerg("Run the message through 'mcelog --ascii' to decode.\n"); +} + +/* + * CPU/chipset specific EDAC code can register a callback here to print + * MCE errors in a human-readable form: + */ +void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce; +EXPORT_SYMBOL(x86_mce_decode_callback); /* MCA banks polled by the period polling timer for corrected events */ DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { @@ -165,46 +177,46 @@ void mce_log(struct mce *mce) set_bit(0, &mce_need_notify); } -void __weak decode_mce(struct mce *m) -{ - return; -} - static void print_mce(struct mce *m) { - printk(KERN_EMERG - "CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", + pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n", m->extcpu, m->mcgstatus, m->bank, m->status); + if (m->ip) { - printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", - !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", - m->cs, m->ip); + pr_emerg("RIP%s %02x:<%016Lx> ", + !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", + m->cs, m->ip); + if (m->cs == __KERNEL_CS) print_symbol("{%s}", m->ip); - printk(KERN_CONT "\n"); + pr_cont("\n"); } - printk(KERN_EMERG "TSC %llx ", m->tsc); - if (m->addr) - printk(KERN_CONT "ADDR %llx ", m->addr); - if (m->misc) - printk(KERN_CONT "MISC %llx ", m->misc); - printk(KERN_CONT "\n"); - printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", - m->cpuvendor, m->cpuid, m->time, m->socketid, - m->apicid); - decode_mce(m); + pr_emerg("TSC %llx ", m->tsc); + if (m->addr) + pr_cont("ADDR %llx ", m->addr); + if (m->misc) + pr_cont("MISC %llx ", m->misc); + + pr_cont("\n"); + pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n", + m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid); + + /* + * Print out human-readable details about the MCE error, + * (if the CPU has an implementation for that): + */ + x86_mce_decode_callback(m); } static void print_mce_head(void) { - printk(KERN_EMERG "\nHARDWARE ERROR\n"); + pr_emerg("\nHARDWARE ERROR\n"); } static void print_mce_tail(void) { - printk(KERN_EMERG "This is not a software problem!\n" - "Run through mcelog --ascii to decode and contact your hardware vendor\n"); + pr_emerg("This is not a software problem!\n"); } #define PANIC_TIMEOUT 5 /* 5 seconds */ @@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced; static void wait_for_panic(void) { long timeout = PANIC_TIMEOUT*USEC_PER_SEC; + preempt_disable(); local_irq_enable(); while (timeout-- > 0) @@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp) static int msr_to_offset(u32 msr) { unsigned bank = __get_cpu_var(injectm.bank); + if (msr == rip_msr) return offsetof(struct mce, ip); if (msr == MSR_IA32_MCx_STATUS(bank)) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 7a473bbe8abd..8701cd7ce4e3 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -18,7 +18,7 @@ edac_core-objs += edac_pci.o edac_pci_sysfs.o endif ifdef CONFIG_CPU_SUP_AMD -edac_core-objs += edac_mce_amd.o +obj-$(CONFIG_X86_MCE) += edac_mce_amd.o endif obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 0c21c370c9dd..83a01a1187d7 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -362,7 +362,7 @@ static inline void amd_decode_err_code(unsigned int ec) pr_warning("Huh? Unknown MCE error 0x%x\n", ec); } -void decode_mce(struct mce *m) +static void amd_decode_mce(struct mce *m) { struct err_regs regs; int node, ecc; @@ -420,3 +420,16 @@ void decode_mce(struct mce *m) amd_decode_err_code(m->status & 0xffff); } + +static int __init mce_amd_init(void) +{ + /* + * We can decode MCEs for Opteron and later CPUs: + */ + if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && + (boot_cpu_data.x86 >= 0xf)) + x86_mce_decode_callback = amd_decode_mce; + + return 0; +} +early_initcall(mce_amd_init); From 329bd4119c8a0afea95f9db6d6b402a2f2b40e84 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Oct 2009 15:23:21 +0200 Subject: [PATCH 2/5] initcalls: Add early_initcall() for modules Complete the early_initcall() API by making it available in modules too. To be used by the EDAC/MCE code. Signed-off-by: Borislav Petkov Acked-by: Linus Torvalds Cc: Andi Kleen LKML-Reference: <20091002132321.GC28682@aftab> Signed-off-by: Ingo Molnar --- include/linux/init.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/init.h b/include/linux/init.h index 400adbb45414..ff8bde520d03 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -271,6 +271,7 @@ void __init parse_early_options(char *cmdline); #else /* MODULE */ /* Don't use these in modules, but some people do... */ +#define early_initcall(fn) module_init(fn) #define core_initcall(fn) module_init(fn) #define postcore_initcall(fn) module_init(fn) #define arch_initcall(fn) module_init(fn) From 0d18b2e34bd1ad8f5bd3f3a17b5e7df132e511a9 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 2 Oct 2009 15:31:48 +0200 Subject: [PATCH 3/5] x86: EDAC: carve out AMD MCE decoding logic This converts the MCE decoding logic into a standalone config option which can be built-in or a module, the first one being the default for MCEs happening early on in the boot process. This, beyond being separated in a cleaner way, also saves RAM by making the decoding logic modular. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Andi Kleen LKML-Reference: <20091002133148.GD28682@aftab> Signed-off-by: Ingo Molnar --- drivers/edac/Kconfig | 14 +++++++++++++- drivers/edac/Makefile | 5 +---- drivers/edac/edac_mce_amd.c | 19 ++++++++++++++++++- 3 files changed, 32 insertions(+), 6 deletions(-) diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 02127e59fe8e..55c9c59b3f71 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -47,6 +47,18 @@ config EDAC_DEBUG_VERBOSE Source file name and line number where debugging message printed will be added to debugging message. + config EDAC_DECODE_MCE + tristate "Decode MCEs in human-readable form (only on AMD for now)" + depends on CPU_SUP_AMD && X86_MCE + default y + ---help--- + Enable this option if you want to decode Machine Check Exceptions + occuring on your machine in human-readable form. + + You should definitely say Y here in case you want to decode MCEs + which occur really early upon boot, before the module infrastructure + has been initialized. + config EDAC_MM_EDAC tristate "Main Memory EDAC (Error Detection And Correction) reporting" help @@ -59,7 +71,7 @@ config EDAC_MM_EDAC config EDAC_AMD64 tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" - depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD + depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE help Support for error detection and correction on the AMD 64 Families of Memory Controllers (K8, F10h and F11h) diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 8701cd7ce4e3..bc5dc232a0fb 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -6,7 +6,6 @@ # GNU General Public License. # - obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o @@ -17,9 +16,7 @@ ifdef CONFIG_PCI edac_core-objs += edac_pci.o edac_pci_sysfs.o endif -ifdef CONFIG_CPU_SUP_AMD -obj-$(CONFIG_X86_MCE) += edac_mce_amd.o -endif +obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o diff --git a/drivers/edac/edac_mce_amd.c b/drivers/edac/edac_mce_amd.c index 83a01a1187d7..713ed7d37247 100644 --- a/drivers/edac/edac_mce_amd.c +++ b/drivers/edac/edac_mce_amd.c @@ -3,6 +3,7 @@ static bool report_gart_errors; static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); +static void (*orig_mce_callback)(struct mce *m); void amd_report_gart_errors(bool v) { @@ -427,9 +428,25 @@ static int __init mce_amd_init(void) * We can decode MCEs for Opteron and later CPUs: */ if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) && - (boot_cpu_data.x86 >= 0xf)) + (boot_cpu_data.x86 >= 0xf)) { + /* safe the default decode mce callback */ + orig_mce_callback = x86_mce_decode_callback; + x86_mce_decode_callback = amd_decode_mce; + } return 0; } early_initcall(mce_amd_init); + +#ifdef MODULE +static void __exit mce_amd_exit(void) +{ + x86_mce_decode_callback = orig_mce_callback; +} + +MODULE_DESCRIPTION("AMD MCE decoder"); +MODULE_ALIAS("edac-mce-amd"); +MODULE_LICENSE("GPL"); +module_exit(mce_amd_exit); +#endif From 11879ba5d9ab8174af9b9cefbb2396a54dfbf8c1 Mon Sep 17 00:00:00 2001 From: Arjan van de Ven Date: Sat, 26 Sep 2009 20:51:50 +0200 Subject: [PATCH 4/5] x86: Simplify bound checks in the MTRR code The current bound checks for copy_from_user in the MTRR driver are not as obvious as they could be, and gcc agrees with that. This patch simplifies the boundary checks to the point that gcc can now prove to itself that the copy_from_user() is never going past its bounds. Signed-off-by: Arjan van de Ven Cc: Yinghai Lu Cc: Linus Torvalds LKML-Reference: <20090926205150.30797709@infradead.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mtrr/if.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c index f04e72527604..3c1b12d461d1 100644 --- a/arch/x86/kernel/cpu/mtrr/if.c +++ b/arch/x86/kernel/cpu/mtrr/if.c @@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos) unsigned long long base, size; char *ptr; char line[LINE_SIZE]; + int length; size_t linelen; if (!capable(CAP_SYS_ADMIN)) return -EPERM; - if (!len) - return -EINVAL; memset(line, 0, LINE_SIZE); - if (len > LINE_SIZE) - len = LINE_SIZE; - if (copy_from_user(line, buf, len - 1)) + + length = len; + length--; + + if (length > LINE_SIZE - 1) + length = LINE_SIZE - 1; + + if (length < 0) + return -EINVAL; + + if (copy_from_user(line, buf, length)) return -EFAULT; linelen = strlen(line); From e3be785fb59f92c0df685037062d041619653b7a Mon Sep 17 00:00:00 2001 From: Marin Mitov Date: Sat, 3 Oct 2009 20:45:02 +0300 Subject: [PATCH 5/5] x86, pci: Correct spelling in a comment Signed-off-by: Marin Mitov Cc: Joerg Roedel Cc: Jesse Brandeburg LKML-Reference: <200910032045.02523.mitov@issp.bas.bg> Signed-off-by: Ingo Molnar ====================================================== --- arch/x86/kernel/pci-dma.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 64b838eac18c..d20009b4e6ef 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0; /* * This variable becomes 1 if iommu=pt is passed on the kernel command line. - * If this variable is 1, IOMMU implementations do no DMA ranslation for + * If this variable is 1, IOMMU implementations do no DMA translation for * devices and allow every device to access to whole physical memory. This is * useful if a user want to use an IOMMU only for KVM device assignment to * guests and not for driver dma translation.