Merge branch 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip

* 'x86-fixes-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/linux-2.6-tip:
  x86, pci: Correct spelling in a comment
  x86: Simplify bound checks in the MTRR code
  x86: EDAC: carve out AMD MCE decoding logic
  initcalls: Add early_initcall() for modules
  x86: EDAC: MCE: Fix MCE decoding callback logic
This commit is contained in:
Linus Torvalds 2009-10-08 12:06:36 -07:00
commit 624235c5b3
8 changed files with 99 additions and 36 deletions

View file

@ -133,6 +133,8 @@ static inline void winchip_mcheck_init(struct cpuinfo_x86 *c) {}
static inline void enable_p5_mce(void) {} static inline void enable_p5_mce(void) {}
#endif #endif
extern void (*x86_mce_decode_callback)(struct mce *m);
void mce_setup(struct mce *m); void mce_setup(struct mce *m);
void mce_log(struct mce *m); void mce_log(struct mce *m);
DECLARE_PER_CPU(struct sys_device, mce_dev); DECLARE_PER_CPU(struct sys_device, mce_dev);

View file

@ -85,6 +85,18 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen); static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing; static int cpu_missing;
static void default_decode_mce(struct mce *m)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
}
/*
* CPU/chipset specific EDAC code can register a callback here to print
* MCE errors in a human-readable form:
*/
void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
EXPORT_SYMBOL(x86_mce_decode_callback);
/* MCA banks polled by the period polling timer for corrected events */ /* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = { DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@ -165,46 +177,46 @@ void mce_log(struct mce *mce)
set_bit(0, &mce_need_notify); set_bit(0, &mce_need_notify);
} }
void __weak decode_mce(struct mce *m)
{
return;
}
static void print_mce(struct mce *m) static void print_mce(struct mce *m)
{ {
printk(KERN_EMERG pr_emerg("CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
"CPU %d: Machine Check Exception: %16Lx Bank %d: %016Lx\n",
m->extcpu, m->mcgstatus, m->bank, m->status); m->extcpu, m->mcgstatus, m->bank, m->status);
if (m->ip) { if (m->ip) {
printk(KERN_EMERG "RIP%s %02x:<%016Lx> ", pr_emerg("RIP%s %02x:<%016Lx> ",
!(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "", !(m->mcgstatus & MCG_STATUS_EIPV) ? " !INEXACT!" : "",
m->cs, m->ip); m->cs, m->ip);
if (m->cs == __KERNEL_CS) if (m->cs == __KERNEL_CS)
print_symbol("{%s}", m->ip); print_symbol("{%s}", m->ip);
printk(KERN_CONT "\n"); pr_cont("\n");
} }
printk(KERN_EMERG "TSC %llx ", m->tsc);
if (m->addr)
printk(KERN_CONT "ADDR %llx ", m->addr);
if (m->misc)
printk(KERN_CONT "MISC %llx ", m->misc);
printk(KERN_CONT "\n");
printk(KERN_EMERG "PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid,
m->apicid);
decode_mce(m); pr_emerg("TSC %llx ", m->tsc);
if (m->addr)
pr_cont("ADDR %llx ", m->addr);
if (m->misc)
pr_cont("MISC %llx ", m->misc);
pr_cont("\n");
pr_emerg("PROCESSOR %u:%x TIME %llu SOCKET %u APIC %x\n",
m->cpuvendor, m->cpuid, m->time, m->socketid, m->apicid);
/*
* Print out human-readable details about the MCE error,
* (if the CPU has an implementation for that):
*/
x86_mce_decode_callback(m);
} }
static void print_mce_head(void) static void print_mce_head(void)
{ {
printk(KERN_EMERG "\nHARDWARE ERROR\n"); pr_emerg("\nHARDWARE ERROR\n");
} }
static void print_mce_tail(void) static void print_mce_tail(void)
{ {
printk(KERN_EMERG "This is not a software problem!\n" pr_emerg("This is not a software problem!\n");
"Run through mcelog --ascii to decode and contact your hardware vendor\n");
} }
#define PANIC_TIMEOUT 5 /* 5 seconds */ #define PANIC_TIMEOUT 5 /* 5 seconds */
@ -218,6 +230,7 @@ static atomic_t mce_fake_paniced;
static void wait_for_panic(void) static void wait_for_panic(void)
{ {
long timeout = PANIC_TIMEOUT*USEC_PER_SEC; long timeout = PANIC_TIMEOUT*USEC_PER_SEC;
preempt_disable(); preempt_disable();
local_irq_enable(); local_irq_enable();
while (timeout-- > 0) while (timeout-- > 0)
@ -285,6 +298,7 @@ static void mce_panic(char *msg, struct mce *final, char *exp)
static int msr_to_offset(u32 msr) static int msr_to_offset(u32 msr)
{ {
unsigned bank = __get_cpu_var(injectm.bank); unsigned bank = __get_cpu_var(injectm.bank);
if (msr == rip_msr) if (msr == rip_msr)
return offsetof(struct mce, ip); return offsetof(struct mce, ip);
if (msr == MSR_IA32_MCx_STATUS(bank)) if (msr == MSR_IA32_MCx_STATUS(bank))

View file

@ -96,17 +96,24 @@ mtrr_write(struct file *file, const char __user *buf, size_t len, loff_t * ppos)
unsigned long long base, size; unsigned long long base, size;
char *ptr; char *ptr;
char line[LINE_SIZE]; char line[LINE_SIZE];
int length;
size_t linelen; size_t linelen;
if (!capable(CAP_SYS_ADMIN)) if (!capable(CAP_SYS_ADMIN))
return -EPERM; return -EPERM;
if (!len)
return -EINVAL;
memset(line, 0, LINE_SIZE); memset(line, 0, LINE_SIZE);
if (len > LINE_SIZE)
len = LINE_SIZE; length = len;
if (copy_from_user(line, buf, len - 1)) length--;
if (length > LINE_SIZE - 1)
length = LINE_SIZE - 1;
if (length < 0)
return -EINVAL;
if (copy_from_user(line, buf, length))
return -EFAULT; return -EFAULT;
linelen = strlen(line); linelen = strlen(line);

View file

@ -35,7 +35,7 @@ int iommu_detected __read_mostly = 0;
/* /*
* This variable becomes 1 if iommu=pt is passed on the kernel command line. * This variable becomes 1 if iommu=pt is passed on the kernel command line.
* If this variable is 1, IOMMU implementations do no DMA ranslation for * If this variable is 1, IOMMU implementations do no DMA translation for
* devices and allow every device to access to whole physical memory. This is * devices and allow every device to access to whole physical memory. This is
* useful if a user want to use an IOMMU only for KVM device assignment to * useful if a user want to use an IOMMU only for KVM device assignment to
* guests and not for driver dma translation. * guests and not for driver dma translation.

View file

@ -47,6 +47,18 @@ config EDAC_DEBUG_VERBOSE
Source file name and line number where debugging message Source file name and line number where debugging message
printed will be added to debugging message. printed will be added to debugging message.
config EDAC_DECODE_MCE
tristate "Decode MCEs in human-readable form (only on AMD for now)"
depends on CPU_SUP_AMD && X86_MCE
default y
---help---
Enable this option if you want to decode Machine Check Exceptions
occuring on your machine in human-readable form.
You should definitely say Y here in case you want to decode MCEs
which occur really early upon boot, before the module infrastructure
has been initialized.
config EDAC_MM_EDAC config EDAC_MM_EDAC
tristate "Main Memory EDAC (Error Detection And Correction) reporting" tristate "Main Memory EDAC (Error Detection And Correction) reporting"
help help
@ -59,7 +71,7 @@ config EDAC_MM_EDAC
config EDAC_AMD64 config EDAC_AMD64
tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h" tristate "AMD64 (Opteron, Athlon64) K8, F10h, F11h"
depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && CPU_SUP_AMD depends on EDAC_MM_EDAC && K8_NB && X86_64 && PCI && EDAC_DECODE_MCE
help help
Support for error detection and correction on the AMD 64 Support for error detection and correction on the AMD 64
Families of Memory Controllers (K8, F10h and F11h) Families of Memory Controllers (K8, F10h and F11h)

View file

@ -6,7 +6,6 @@
# GNU General Public License. # GNU General Public License.
# #
obj-$(CONFIG_EDAC) := edac_stub.o obj-$(CONFIG_EDAC) := edac_stub.o
obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o obj-$(CONFIG_EDAC_MM_EDAC) += edac_core.o
@ -17,9 +16,7 @@ ifdef CONFIG_PCI
edac_core-objs += edac_pci.o edac_pci_sysfs.o edac_core-objs += edac_pci.o edac_pci_sysfs.o
endif endif
ifdef CONFIG_CPU_SUP_AMD obj-$(CONFIG_EDAC_DECODE_MCE) += edac_mce_amd.o
edac_core-objs += edac_mce_amd.o
endif
obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o obj-$(CONFIG_EDAC_AMD76X) += amd76x_edac.o
obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o obj-$(CONFIG_EDAC_CPC925) += cpc925_edac.o

View file

@ -3,6 +3,7 @@
static bool report_gart_errors; static bool report_gart_errors;
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs); static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
static void (*orig_mce_callback)(struct mce *m);
void amd_report_gart_errors(bool v) void amd_report_gart_errors(bool v)
{ {
@ -362,7 +363,7 @@ static inline void amd_decode_err_code(unsigned int ec)
pr_warning("Huh? Unknown MCE error 0x%x\n", ec); pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
} }
void decode_mce(struct mce *m) static void amd_decode_mce(struct mce *m)
{ {
struct err_regs regs; struct err_regs regs;
int node, ecc; int node, ecc;
@ -420,3 +421,32 @@ void decode_mce(struct mce *m)
amd_decode_err_code(m->status & 0xffff); amd_decode_err_code(m->status & 0xffff);
} }
static int __init mce_amd_init(void)
{
/*
* We can decode MCEs for Opteron and later CPUs:
*/
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
(boot_cpu_data.x86 >= 0xf)) {
/* safe the default decode mce callback */
orig_mce_callback = x86_mce_decode_callback;
x86_mce_decode_callback = amd_decode_mce;
}
return 0;
}
early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
x86_mce_decode_callback = orig_mce_callback;
}
MODULE_DESCRIPTION("AMD MCE decoder");
MODULE_ALIAS("edac-mce-amd");
MODULE_LICENSE("GPL");
module_exit(mce_amd_exit);
#endif

View file

@ -271,6 +271,7 @@ void __init parse_early_options(char *cmdline);
#else /* MODULE */ #else /* MODULE */
/* Don't use these in modules, but some people do... */ /* Don't use these in modules, but some people do... */
#define early_initcall(fn) module_init(fn)
#define core_initcall(fn) module_init(fn) #define core_initcall(fn) module_init(fn)
#define postcore_initcall(fn) module_init(fn) #define postcore_initcall(fn) module_init(fn)
#define arch_initcall(fn) module_init(fn) #define arch_initcall(fn) module_init(fn)