Merge branch 'perf/mce' into perf/core

Merge reason: It's ready for v2.6.33.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
This commit is contained in:
Ingo Molnar 2009-12-03 20:10:59 +01:00
commit 26fb20d008
9 changed files with 198 additions and 63 deletions

View file

@ -20,11 +20,11 @@ typedef struct {
unsigned int irq_call_count;
unsigned int irq_tlb_count;
#endif
#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_THERMAL_VECTOR
unsigned int irq_thermal_count;
# ifdef CONFIG_X86_MCE_THRESHOLD
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
unsigned int irq_threshold_count;
# endif
#endif
} ____cacheline_aligned irq_cpustat_t;

View file

@ -108,6 +108,8 @@ struct mce_log {
#define K8_MCE_THRESHOLD_BANK_5 (MCE_THRESHOLD_BASE + 5 * 9)
#define K8_MCE_THRESHOLD_DRAM_ECC (MCE_THRESHOLD_BANK_4 + 0)
extern struct atomic_notifier_head x86_mce_decoder_chain;
#ifdef __KERNEL__
#include <linux/percpu.h>
@ -118,9 +120,11 @@ extern int mce_disabled;
extern int mce_p5_enabled;
#ifdef CONFIG_X86_MCE
void mcheck_init(struct cpuinfo_x86 *c);
int mcheck_init(void);
void mcheck_cpu_init(struct cpuinfo_x86 *c);
#else
static inline void mcheck_init(struct cpuinfo_x86 *c) {}
static inline int mcheck_init(void) { return 0; }
static inline void mcheck_cpu_init(struct cpuinfo_x86 *c) {}
#endif
#ifdef CONFIG_X86_ANCIENT_MCE
@ -214,5 +218,11 @@ void intel_init_thermal(struct cpuinfo_x86 *c);
void mce_log_therm_throt_event(__u64 status);
#ifdef CONFIG_X86_THERMAL_VECTOR
extern void mcheck_intel_therm_init(void);
#else
static inline void mcheck_intel_therm_init(void) { }
#endif
#endif /* __KERNEL__ */
#endif /* _ASM_X86_MCE_H */

View file

@ -837,10 +837,8 @@ static void __cpuinit identify_cpu(struct cpuinfo_x86 *c)
boot_cpu_data.x86_capability[i] &= c->x86_capability[i];
}
#ifdef CONFIG_X86_MCE
/* Init Machine Check Exception if available. */
mcheck_init(c);
#endif
mcheck_cpu_init(c);
select_idle_routine(c);

View file

@ -46,6 +46,9 @@
#include "mce-internal.h"
#define CREATE_TRACE_POINTS
#include <trace/events/mce.h>
int mce_disabled __read_mostly;
#define MISC_MCELOG_MINOR 227
@ -85,18 +88,26 @@ static DECLARE_WAIT_QUEUE_HEAD(mce_wait);
static DEFINE_PER_CPU(struct mce, mces_seen);
static int cpu_missing;
static void default_decode_mce(struct mce *m)
/*
* CPU/chipset specific EDAC code can register a notifier call here to print
* MCE errors in a human-readable form.
*/
ATOMIC_NOTIFIER_HEAD(x86_mce_decoder_chain);
EXPORT_SYMBOL_GPL(x86_mce_decoder_chain);
static int default_decode_mce(struct notifier_block *nb, unsigned long val,
void *data)
{
pr_emerg("No human readable MCE decoding support on this CPU type.\n");
pr_emerg("Run the message through 'mcelog --ascii' to decode.\n");
return NOTIFY_STOP;
}
/*
* CPU/chipset specific EDAC code can register a callback here to print
* MCE errors in a human-readable form:
*/
void (*x86_mce_decode_callback)(struct mce *m) = default_decode_mce;
EXPORT_SYMBOL(x86_mce_decode_callback);
static struct notifier_block mce_dec_nb = {
.notifier_call = default_decode_mce,
.priority = -1,
};
/* MCA banks polled by the period polling timer for corrected events */
DEFINE_PER_CPU(mce_banks_t, mce_poll_banks) = {
@ -141,6 +152,9 @@ void mce_log(struct mce *mce)
{
unsigned next, entry;
/* Emit the trace record: */
trace_mce_record(mce);
mce->finished = 0;
wmb();
for (;;) {
@ -204,9 +218,9 @@ static void print_mce(struct mce *m)
/*
* Print out human-readable details about the MCE error,
* (if the CPU has an implementation for that):
* (if the CPU has an implementation for that)
*/
x86_mce_decode_callback(m);
atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, m);
}
static void print_mce_head(void)
@ -1122,7 +1136,7 @@ static int check_interval = 5 * 60; /* 5 minutes */
static DEFINE_PER_CPU(int, mce_next_interval); /* in jiffies */
static DEFINE_PER_CPU(struct timer_list, mce_timer);
static void mcheck_timer(unsigned long data)
static void mce_start_timer(unsigned long data)
{
struct timer_list *t = &per_cpu(mce_timer, data);
int *n;
@ -1187,7 +1201,7 @@ int mce_notify_irq(void)
}
EXPORT_SYMBOL_GPL(mce_notify_irq);
static int mce_banks_init(void)
static int __cpuinit __mcheck_cpu_mce_banks_init(void)
{
int i;
@ -1206,7 +1220,7 @@ static int mce_banks_init(void)
/*
* Initialize Machine Checks for a CPU.
*/
static int __cpuinit mce_cap_init(void)
static int __cpuinit __mcheck_cpu_cap_init(void)
{
unsigned b;
u64 cap;
@ -1228,7 +1242,7 @@ static int __cpuinit mce_cap_init(void)
WARN_ON(banks != 0 && b != banks);
banks = b;
if (!mce_banks) {
int err = mce_banks_init();
int err = __mcheck_cpu_mce_banks_init();
if (err)
return err;
@ -1244,7 +1258,7 @@ static int __cpuinit mce_cap_init(void)
return 0;
}
static void mce_init(void)
static void __mcheck_cpu_init_generic(void)
{
mce_banks_t all_banks;
u64 cap;
@ -1273,7 +1287,7 @@ static void mce_init(void)
}
/* Add per CPU specific workarounds here */
static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
static int __cpuinit __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
{
if (c->x86_vendor == X86_VENDOR_UNKNOWN) {
pr_info("MCE: unknown CPU type - not enabling MCE support.\n");
@ -1341,7 +1355,7 @@ static int __cpuinit mce_cpu_quirks(struct cpuinfo_x86 *c)
return 0;
}
static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
static void __cpuinit __mcheck_cpu_ancient_init(struct cpuinfo_x86 *c)
{
if (c->x86 != 5)
return;
@ -1355,7 +1369,7 @@ static void __cpuinit mce_ancient_init(struct cpuinfo_x86 *c)
}
}
static void mce_cpu_features(struct cpuinfo_x86 *c)
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
case X86_VENDOR_INTEL:
@ -1369,7 +1383,7 @@ static void mce_cpu_features(struct cpuinfo_x86 *c)
}
}
static void mce_init_timer(void)
static void __mcheck_cpu_init_timer(void)
{
struct timer_list *t = &__get_cpu_var(mce_timer);
int *n = &__get_cpu_var(mce_next_interval);
@ -1380,7 +1394,7 @@ static void mce_init_timer(void)
*n = check_interval * HZ;
if (!*n)
return;
setup_timer(t, mcheck_timer, smp_processor_id());
setup_timer(t, mce_start_timer, smp_processor_id());
t->expires = round_jiffies(jiffies + *n);
add_timer_on(t, smp_processor_id());
}
@ -1400,27 +1414,28 @@ void (*machine_check_vector)(struct pt_regs *, long error_code) =
* Called for each booted CPU to set up machine checks.
* Must be called with preempt off:
*/
void __cpuinit mcheck_init(struct cpuinfo_x86 *c)
void __cpuinit mcheck_cpu_init(struct cpuinfo_x86 *c)
{
if (mce_disabled)
return;
mce_ancient_init(c);
__mcheck_cpu_ancient_init(c);
if (!mce_available(c))
return;
if (mce_cap_init() < 0 || mce_cpu_quirks(c) < 0) {
if (__mcheck_cpu_cap_init() < 0 || __mcheck_cpu_apply_quirks(c) < 0) {
mce_disabled = 1;
return;
}
machine_check_vector = do_machine_check;
mce_init();
mce_cpu_features(c);
mce_init_timer();
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(c);
__mcheck_cpu_init_timer();
INIT_WORK(&__get_cpu_var(mce_work), mce_process_work);
}
/*
@ -1640,6 +1655,15 @@ static int __init mcheck_enable(char *str)
}
__setup("mce", mcheck_enable);
int __init mcheck_init(void)
{
atomic_notifier_chain_register(&x86_mce_decoder_chain, &mce_dec_nb);
mcheck_intel_therm_init();
return 0;
}
/*
* Sysfs support
*/
@ -1648,7 +1672,7 @@ __setup("mce", mcheck_enable);
* Disable machine checks on suspend and shutdown. We can't really handle
* them later.
*/
static int mce_disable(void)
static int mce_disable_error_reporting(void)
{
int i;
@ -1663,12 +1687,12 @@ static int mce_disable(void)
static int mce_suspend(struct sys_device *dev, pm_message_t state)
{
return mce_disable();
return mce_disable_error_reporting();
}
static int mce_shutdown(struct sys_device *dev)
{
return mce_disable();
return mce_disable_error_reporting();
}
/*
@ -1678,8 +1702,8 @@ static int mce_shutdown(struct sys_device *dev)
*/
static int mce_resume(struct sys_device *dev)
{
mce_init();
mce_cpu_features(&current_cpu_data);
__mcheck_cpu_init_generic();
__mcheck_cpu_init_vendor(&current_cpu_data);
return 0;
}
@ -1689,8 +1713,8 @@ static void mce_cpu_restart(void *data)
del_timer_sync(&__get_cpu_var(mce_timer));
if (!mce_available(&current_cpu_data))
return;
mce_init();
mce_init_timer();
__mcheck_cpu_init_generic();
__mcheck_cpu_init_timer();
}
/* Reinit MCEs after user configuration changes */
@ -1716,7 +1740,7 @@ static void mce_enable_ce(void *all)
cmci_reenable();
cmci_recheck();
if (all)
mce_init_timer();
__mcheck_cpu_init_timer();
}
static struct sysdev_class mce_sysclass = {
@ -1929,13 +1953,14 @@ static __cpuinit void mce_remove_device(unsigned int cpu)
}
/* Make sure there are no machine checks on offlined CPUs. */
static void mce_disable_cpu(void *h)
static void __cpuinit mce_disable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
if (!mce_available(&current_cpu_data))
return;
if (!(action & CPU_TASKS_FROZEN))
cmci_clear();
for (i = 0; i < banks; i++) {
@ -1946,7 +1971,7 @@ static void mce_disable_cpu(void *h)
}
}
static void mce_reenable_cpu(void *h)
static void __cpuinit mce_reenable_cpu(void *h)
{
unsigned long action = *(unsigned long *)h;
int i;
@ -2025,7 +2050,7 @@ static __init void mce_init_banks(void)
}
}
static __init int mce_init_device(void)
static __init int mcheck_init_device(void)
{
int err;
int i = 0;
@ -2053,7 +2078,7 @@ static __init int mce_init_device(void)
return err;
}
device_initcall(mce_init_device);
device_initcall(mcheck_init_device);
/*
* Old style boot options parsing. Only for compatibility.
@ -2101,7 +2126,7 @@ static int fake_panic_set(void *data, u64 val)
DEFINE_SIMPLE_ATTRIBUTE(fake_panic_fops, fake_panic_get,
fake_panic_set, "%llu\n");
static int __init mce_debugfs_init(void)
static int __init mcheck_debugfs_init(void)
{
struct dentry *dmce, *ffake_panic;
@ -2115,5 +2140,5 @@ static int __init mce_debugfs_init(void)
return 0;
}
late_initcall(mce_debugfs_init);
late_initcall(mcheck_debugfs_init);
#endif

View file

@ -49,6 +49,8 @@ static DEFINE_PER_CPU(struct thermal_state, thermal_state);
static atomic_t therm_throt_en = ATOMIC_INIT(0);
static u32 lvtthmr_init __read_mostly;
#ifdef CONFIG_SYSFS
#define define_therm_throt_sysdev_one_ro(_name) \
static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
@ -254,6 +256,18 @@ asmlinkage void smp_thermal_interrupt(struct pt_regs *regs)
ack_APIC_irq();
}
void __init mcheck_intel_therm_init(void)
{
/*
* This function is only called on boot CPU. Save the init thermal
* LVT value on BSP and use that value to restore APs' thermal LVT
* entry BIOS programmed later
*/
if (cpu_has(&boot_cpu_data, X86_FEATURE_ACPI) &&
cpu_has(&boot_cpu_data, X86_FEATURE_ACC))
lvtthmr_init = apic_read(APIC_LVTTHMR);
}
void intel_init_thermal(struct cpuinfo_x86 *c)
{
unsigned int cpu = smp_processor_id();
@ -270,7 +284,20 @@ void intel_init_thermal(struct cpuinfo_x86 *c)
* since it might be delivered via SMI already:
*/
rdmsr(MSR_IA32_MISC_ENABLE, l, h);
h = apic_read(APIC_LVTTHMR);
/*
* The initial value of thermal LVT entries on all APs always reads
* 0x10000 because APs are woken up by BSP issuing INIT-SIPI-SIPI
* sequence to them and LVT registers are reset to 0s except for
* the mask bits which are set to 1s when APs receive INIT IPI.
* Always restore the value that BIOS has programmed on AP based on
* BSP's info we saved since BIOS is always setting the same value
* for all threads/cores
*/
apic_write(APIC_LVTTHMR, lvtthmr_init);
h = lvtthmr_init;
if ((l & MSR_IA32_MISC_ENABLE_TM1) && (h & APIC_DM_SMI)) {
printk(KERN_DEBUG
"CPU%d: Thermal monitoring handled by SMI\n", cpu);

View file

@ -92,17 +92,17 @@ static int show_other_interrupts(struct seq_file *p, int prec)
seq_printf(p, "%10u ", irq_stats(j)->irq_tlb_count);
seq_printf(p, " TLB shootdowns\n");
#endif
#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_THERMAL_VECTOR
seq_printf(p, "%*s: ", prec, "TRM");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_thermal_count);
seq_printf(p, " Thermal event interrupts\n");
# ifdef CONFIG_X86_MCE_THRESHOLD
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
seq_printf(p, "%*s: ", prec, "THR");
for_each_online_cpu(j)
seq_printf(p, "%10u ", irq_stats(j)->irq_threshold_count);
seq_printf(p, " Threshold APIC interrupts\n");
# endif
#endif
#ifdef CONFIG_X86_MCE
seq_printf(p, "%*s: ", prec, "MCE");
@ -194,11 +194,11 @@ u64 arch_irq_stat_cpu(unsigned int cpu)
sum += irq_stats(cpu)->irq_call_count;
sum += irq_stats(cpu)->irq_tlb_count;
#endif
#ifdef CONFIG_X86_MCE
#ifdef CONFIG_X86_THERMAL_VECTOR
sum += irq_stats(cpu)->irq_thermal_count;
# ifdef CONFIG_X86_MCE_THRESHOLD
#endif
#ifdef CONFIG_X86_MCE_THRESHOLD
sum += irq_stats(cpu)->irq_threshold_count;
# endif
#endif
#ifdef CONFIG_X86_MCE
sum += per_cpu(mce_exception_count, cpu);

View file

@ -109,6 +109,7 @@
#ifdef CONFIG_X86_64
#include <asm/numa_64.h>
#endif
#include <asm/mce.h>
/*
* end_pfn only includes RAM, while max_pfn_mapped includes all e820 entries.
@ -1031,6 +1032,8 @@ void __init setup_arch(char **cmdline_p)
#endif
#endif
x86_init.oem.banner();
mcheck_init();
}
#ifdef CONFIG_X86_32

View file

@ -3,7 +3,6 @@
static bool report_gart_errors;
static void (*nb_bus_decoder)(int node_id, struct err_regs *regs);
static void (*orig_mce_callback)(struct mce *m);
void amd_report_gart_errors(bool v)
{
@ -363,8 +362,10 @@ static inline void amd_decode_err_code(unsigned int ec)
pr_warning("Huh? Unknown MCE error 0x%x\n", ec);
}
static void amd_decode_mce(struct mce *m)
static int amd_decode_mce(struct notifier_block *nb, unsigned long val,
void *data)
{
struct mce *m = (struct mce *)data;
struct err_regs regs;
int node, ecc;
@ -420,20 +421,22 @@ static void amd_decode_mce(struct mce *m)
}
amd_decode_err_code(m->status & 0xffff);
return NOTIFY_STOP;
}
static struct notifier_block amd_mce_dec_nb = {
.notifier_call = amd_decode_mce,
};
static int __init mce_amd_init(void)
{
/*
* We can decode MCEs for Opteron and later CPUs:
*/
if ((boot_cpu_data.x86_vendor == X86_VENDOR_AMD) &&
(boot_cpu_data.x86 >= 0xf)) {
/* safe the default decode mce callback */
orig_mce_callback = x86_mce_decode_callback;
x86_mce_decode_callback = amd_decode_mce;
}
(boot_cpu_data.x86 >= 0xf))
atomic_notifier_chain_register(&x86_mce_decoder_chain, &amd_mce_dec_nb);
return 0;
}
@ -442,7 +445,7 @@ early_initcall(mce_amd_init);
#ifdef MODULE
static void __exit mce_amd_exit(void)
{
x86_mce_decode_callback = orig_mce_callback;
atomic_notifier_chain_unregister(&x86_mce_decoder_chain, &amd_mce_dec_nb);
}
MODULE_DESCRIPTION("AMD MCE decoder");

View file

@ -0,0 +1,69 @@
#undef TRACE_SYSTEM
#define TRACE_SYSTEM mce
#if !defined(_TRACE_MCE_H) || defined(TRACE_HEADER_MULTI_READ)
#define _TRACE_MCE_H
#include <linux/ktime.h>
#include <linux/tracepoint.h>
#include <asm/mce.h>
TRACE_EVENT(mce_record,
TP_PROTO(struct mce *m),
TP_ARGS(m),
TP_STRUCT__entry(
__field( u64, mcgcap )
__field( u64, mcgstatus )
__field( u8, bank )
__field( u64, status )
__field( u64, addr )
__field( u64, misc )
__field( u64, ip )
__field( u8, cs )
__field( u64, tsc )
__field( u64, walltime )
__field( u32, cpu )
__field( u32, cpuid )
__field( u32, apicid )
__field( u32, socketid )
__field( u8, cpuvendor )
),
TP_fast_assign(
__entry->mcgcap = m->mcgcap;
__entry->mcgstatus = m->mcgstatus;
__entry->bank = m->bank;
__entry->status = m->status;
__entry->addr = m->addr;
__entry->misc = m->misc;
__entry->ip = m->ip;
__entry->cs = m->cs;
__entry->tsc = m->tsc;
__entry->walltime = m->time;
__entry->cpu = m->extcpu;
__entry->cpuid = m->cpuid;
__entry->apicid = m->apicid;
__entry->socketid = m->socketid;
__entry->cpuvendor = m->cpuvendor;
),
TP_printk("CPU: %d, MCGc/s: %llx/%llx, MC%d: %016Lx, ADDR/MISC: %016Lx/%016Lx, RIP: %02x:<%016Lx>, TSC: %llx, PROCESSOR: %u:%x, TIME: %llu, SOCKET: %u, APIC: %x",
__entry->cpu,
__entry->mcgcap, __entry->mcgstatus,
__entry->bank, __entry->status,
__entry->addr, __entry->misc,
__entry->cs, __entry->ip,
__entry->tsc,
__entry->cpuvendor, __entry->cpuid,
__entry->walltime,
__entry->socketid,
__entry->apicid)
);
#endif /* _TRACE_MCE_H */
/* This part must be outside protection */
#include <trace/define_trace.h>