1
0
Fork 0

Merge branch 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RAS updates from Borislav Petkov:
 "This time around we have in store:

   - Disable MC4_MISC thresholding banks on all AMD family 0x15 models
     (Shirish S)

   - AMD MCE error descriptions update and error decode improvements
     (Yazen Ghannam)

   - The usual smaller conversions and fixes"

* 'ras-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mce: Improve error message when kernel cannot recover, p2
  EDAC/mce_amd: Decode MCA_STATUS in bit definition order
  EDAC/mce_amd: Decode MCA_STATUS[Scrub] bit
  EDAC, mce_amd: Print ExtErrorCode and description on a single line
  EDAC, mce_amd: Match error descriptions to latest documentation
  x86/MCE/AMD, EDAC/mce_amd: Add new error descriptions for some SMCA bank types
  x86/MCE/AMD, EDAC/mce_amd: Add new McaTypes for CS, PSP, and SMU units
  x86/MCE/AMD, EDAC/mce_amd: Add new MP5, NBIO, and PCIE SMCA bank types
  RAS: Add a MAINTAINERS entry
  RAS: Use consistent types for UUIDs
  x86/MCE/AMD: Carve out the MC4_MISC thresholding quirk
  x86/MCE/AMD: Turn off MC4_MISC thresholding on all family 0x15 models
  x86/MCE: Switch to use the new generic UUID API
hifive-unleashed-5.1
Linus Torvalds 2019-03-08 09:11:39 -08:00
commit e13284da94
9 changed files with 287 additions and 148 deletions

View File

@ -12960,6 +12960,16 @@ M: Alexandre Bounine <alex.bou9@gmail.com>
S: Maintained S: Maintained
F: drivers/rapidio/ F: drivers/rapidio/
RAS INFRASTRUCTURE
M: Tony Luck <tony.luck@intel.com>
M: Borislav Petkov <bp@alien8.de>
L: linux-edac@vger.kernel.org
S: Maintained
F: drivers/ras/
F: include/linux/ras.h
F: include/ras/ras_event.h
F: Documentation/admin-guide/ras.rst
RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER RAYLINK/WEBGEAR 802.11 WIRELESS LAN DRIVER
L: linux-wireless@vger.kernel.org L: linux-wireless@vger.kernel.org
S: Orphan S: Orphan

View File

@ -48,6 +48,7 @@
#define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */ #define MCI_STATUS_SYNDV BIT_ULL(53) /* synd reg. valid */
#define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */ #define MCI_STATUS_DEFERRED BIT_ULL(44) /* uncorrected error, deferred exception */
#define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */ #define MCI_STATUS_POISON BIT_ULL(43) /* access poisonous data */
#define MCI_STATUS_SCRUB BIT_ULL(40) /* Error detected during scrub operation */
/* /*
* McaX field if set indicates a given bank supports MCA extensions: * McaX field if set indicates a given bank supports MCA extensions:
@ -307,11 +308,17 @@ enum smca_bank_types {
SMCA_FP, /* Floating Point */ SMCA_FP, /* Floating Point */
SMCA_L3_CACHE, /* L3 Cache */ SMCA_L3_CACHE, /* L3 Cache */
SMCA_CS, /* Coherent Slave */ SMCA_CS, /* Coherent Slave */
SMCA_CS_V2, /* Coherent Slave */
SMCA_PIE, /* Power, Interrupts, etc. */ SMCA_PIE, /* Power, Interrupts, etc. */
SMCA_UMC, /* Unified Memory Controller */ SMCA_UMC, /* Unified Memory Controller */
SMCA_PB, /* Parameter Block */ SMCA_PB, /* Parameter Block */
SMCA_PSP, /* Platform Security Processor */ SMCA_PSP, /* Platform Security Processor */
SMCA_PSP_V2, /* Platform Security Processor */
SMCA_SMU, /* System Management Unit */ SMCA_SMU, /* System Management Unit */
SMCA_SMU_V2, /* System Management Unit */
SMCA_MP5, /* Microprocessor 5 Unit */
SMCA_NBIO, /* Northbridge IO Unit */
SMCA_PCIE, /* PCI Express Unit */
N_SMCA_BANK_TYPES N_SMCA_BANK_TYPES
}; };

View File

@ -88,11 +88,17 @@ static struct smca_bank_name smca_names[] = {
[SMCA_FP] = { "floating_point", "Floating Point Unit" }, [SMCA_FP] = { "floating_point", "Floating Point Unit" },
[SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" }, [SMCA_L3_CACHE] = { "l3_cache", "L3 Cache" },
[SMCA_CS] = { "coherent_slave", "Coherent Slave" }, [SMCA_CS] = { "coherent_slave", "Coherent Slave" },
[SMCA_CS_V2] = { "coherent_slave", "Coherent Slave" },
[SMCA_PIE] = { "pie", "Power, Interrupts, etc." }, [SMCA_PIE] = { "pie", "Power, Interrupts, etc." },
[SMCA_UMC] = { "umc", "Unified Memory Controller" }, [SMCA_UMC] = { "umc", "Unified Memory Controller" },
[SMCA_PB] = { "param_block", "Parameter Block" }, [SMCA_PB] = { "param_block", "Parameter Block" },
[SMCA_PSP] = { "psp", "Platform Security Processor" }, [SMCA_PSP] = { "psp", "Platform Security Processor" },
[SMCA_PSP_V2] = { "psp", "Platform Security Processor" },
[SMCA_SMU] = { "smu", "System Management Unit" }, [SMCA_SMU] = { "smu", "System Management Unit" },
[SMCA_SMU_V2] = { "smu", "System Management Unit" },
[SMCA_MP5] = { "mp5", "Microprocessor 5 Unit" },
[SMCA_NBIO] = { "nbio", "Northbridge IO Unit" },
[SMCA_PCIE] = { "pcie", "PCI Express Unit" },
}; };
static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init =
@ -138,30 +144,42 @@ static struct smca_hwid smca_hwid_mcatypes[] = {
{ SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 }, { SMCA_RESERVED, HWID_MCATYPE(0x00, 0x0), 0x0 },
/* ZN Core (HWID=0xB0) MCA types */ /* ZN Core (HWID=0xB0) MCA types */
{ SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFEF }, { SMCA_LS, HWID_MCATYPE(0xB0, 0x0), 0x1FFFFF },
{ SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF }, { SMCA_IF, HWID_MCATYPE(0xB0, 0x1), 0x3FFF },
{ SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF }, { SMCA_L2_CACHE, HWID_MCATYPE(0xB0, 0x2), 0xF },
{ SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF }, { SMCA_DE, HWID_MCATYPE(0xB0, 0x3), 0x1FF },
/* HWID 0xB0 MCATYPE 0x4 is Reserved */ /* HWID 0xB0 MCATYPE 0x4 is Reserved */
{ SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0x7FF }, { SMCA_EX, HWID_MCATYPE(0xB0, 0x5), 0xFFF },
{ SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F }, { SMCA_FP, HWID_MCATYPE(0xB0, 0x6), 0x7F },
{ SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF }, { SMCA_L3_CACHE, HWID_MCATYPE(0xB0, 0x7), 0xFF },
/* Data Fabric MCA types */ /* Data Fabric MCA types */
{ SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF }, { SMCA_CS, HWID_MCATYPE(0x2E, 0x0), 0x1FF },
{ SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0xF }, { SMCA_PIE, HWID_MCATYPE(0x2E, 0x1), 0x1F },
{ SMCA_CS_V2, HWID_MCATYPE(0x2E, 0x2), 0x3FFF },
/* Unified Memory Controller MCA type */ /* Unified Memory Controller MCA type */
{ SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0x3F }, { SMCA_UMC, HWID_MCATYPE(0x96, 0x0), 0xFF },
/* Parameter Block MCA type */ /* Parameter Block MCA type */
{ SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 }, { SMCA_PB, HWID_MCATYPE(0x05, 0x0), 0x1 },
/* Platform Security Processor MCA type */ /* Platform Security Processor MCA type */
{ SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 }, { SMCA_PSP, HWID_MCATYPE(0xFF, 0x0), 0x1 },
{ SMCA_PSP_V2, HWID_MCATYPE(0xFF, 0x1), 0x3FFFF },
/* System Management Unit MCA type */ /* System Management Unit MCA type */
{ SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 }, { SMCA_SMU, HWID_MCATYPE(0x01, 0x0), 0x1 },
{ SMCA_SMU_V2, HWID_MCATYPE(0x01, 0x1), 0x7FF },
/* Microprocessor 5 Unit MCA type */
{ SMCA_MP5, HWID_MCATYPE(0x01, 0x2), 0x3FF },
/* Northbridge IO Unit MCA type */
{ SMCA_NBIO, HWID_MCATYPE(0x18, 0x0), 0x1F },
/* PCI Express Unit MCA type */
{ SMCA_PCIE, HWID_MCATYPE(0x46, 0x0), 0x1F },
}; };
struct smca_bank smca_banks[MAX_NR_BANKS]; struct smca_bank smca_banks[MAX_NR_BANKS];
@ -545,6 +563,40 @@ out:
return offset; return offset;
} }
/*
* Turn off MC4_MISC thresholding banks on all family 0x15 models since
* they're not supported there.
*/
void disable_err_thresholding(struct cpuinfo_x86 *c)
{
int i;
u64 hwcr;
bool need_toggle;
u32 msrs[] = {
0x00000413, /* MC4_MISC0 */
0xc0000408, /* MC4_MISC1 */
};
if (c->x86 != 0x15)
return;
rdmsrl(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
for (i = 0; i < ARRAY_SIZE(msrs); i++)
msr_clear_bit(msrs[i], 62);
/* restore old settings */
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr);
}
/* cpu init entry point, called from mce.c with preempt off */ /* cpu init entry point, called from mce.c with preempt off */
void mce_amd_feature_init(struct cpuinfo_x86 *c) void mce_amd_feature_init(struct cpuinfo_x86 *c)
{ {
@ -552,6 +604,8 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
unsigned int bank, block, cpu = smp_processor_id(); unsigned int bank, block, cpu = smp_processor_id();
int offset = -1; int offset = -1;
disable_err_thresholding(c);
for (bank = 0; bank < mca_cfg.banks; ++bank) { for (bank = 0; bank < mca_cfg.banks; ++bank) {
if (mce_flags.smca) if (mce_flags.smca)
smca_configure(bank, cpu); smca_configure(bank, cpu);

View File

@ -64,11 +64,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err)
EXPORT_SYMBOL_GPL(apei_mce_report_mem_error); EXPORT_SYMBOL_GPL(apei_mce_report_mem_error);
#define CPER_CREATOR_MCE \ #define CPER_CREATOR_MCE \
UUID_LE(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \ GUID_INIT(0x75a574e3, 0x5052, 0x4b29, 0x8a, 0x8e, 0xbe, 0x2c, \
0x64, 0x90, 0xb8, 0x9d) 0x64, 0x90, 0xb8, 0x9d)
#define CPER_SECTION_TYPE_MCE \ #define CPER_SECTION_TYPE_MCE \
UUID_LE(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \ GUID_INIT(0xfe08ffbe, 0x95e4, 0x4be7, 0xbc, 0x73, 0x40, 0x96, \
0x04, 0x4a, 0x38, 0xfc) 0x04, 0x4a, 0x38, 0xfc)
/* /*
* CPER specification (in UEFI specification 2.3 appendix N) requires * CPER specification (in UEFI specification 2.3 appendix N) requires
@ -135,7 +135,7 @@ retry:
goto out; goto out;
/* try to skip other type records in storage */ /* try to skip other type records in storage */
else if (rc != sizeof(rcd) || else if (rc != sizeof(rcd) ||
uuid_le_cmp(rcd.hdr.creator_id, CPER_CREATOR_MCE)) !guid_equal(&rcd.hdr.creator_id, &CPER_CREATOR_MCE))
goto retry; goto retry;
memcpy(m, &rcd.mce, sizeof(*m)); memcpy(m, &rcd.mce, sizeof(*m));
rc = sizeof(*m); rc = sizeof(*m);

View File

@ -1612,36 +1612,6 @@ static int __mcheck_cpu_apply_quirks(struct cpuinfo_x86 *c)
if (c->x86 == 0x15 && c->x86_model <= 0xf) if (c->x86 == 0x15 && c->x86_model <= 0xf)
mce_flags.overflow_recov = 1; mce_flags.overflow_recov = 1;
/*
* Turn off MC4_MISC thresholding banks on those models since
* they're not supported there.
*/
if (c->x86 == 0x15 &&
(c->x86_model >= 0x10 && c->x86_model <= 0x1f)) {
int i;
u64 hwcr;
bool need_toggle;
u32 msrs[] = {
0x00000413, /* MC4_MISC0 */
0xc0000408, /* MC4_MISC1 */
};
rdmsrl(MSR_K7_HWCR, hwcr);
/* McStatusWrEn has to be set */
need_toggle = !(hwcr & BIT(18));
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr | BIT(18));
/* Clear CntP bit safely */
for (i = 0; i < ARRAY_SIZE(msrs); i++)
msr_clear_bit(msrs[i], 62);
/* restore old settings */
if (need_toggle)
wrmsrl(MSR_K7_HWCR, hwcr);
}
} }
if (c->x86_vendor == X86_VENDOR_INTEL) { if (c->x86_vendor == X86_VENDOR_INTEL) {

View File

@ -165,6 +165,11 @@ static struct severity {
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA), SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_DATA),
KERNEL KERNEL
), ),
MCESEV(
PANIC, "Instruction fetch error in kernel",
SER, MASK(MCI_STATUS_OVER|MCI_UC_SAR|MCI_ADDR|MCACOD, MCI_UC_SAR|MCI_ADDR|MCACOD_INSTR),
KERNEL
),
#endif #endif
MCESEV( MCESEV(
PANIC, "Action required: unknown MCACOD", PANIC, "Action required: unknown MCACOD",

View File

@ -151,138 +151,223 @@ static const char * const mc6_mce_desc[] = {
/* Scalable MCA error strings */ /* Scalable MCA error strings */
static const char * const smca_ls_mce_desc[] = { static const char * const smca_ls_mce_desc[] = {
"Load queue parity", "Load queue parity error",
"Store queue parity", "Store queue parity error",
"Miss address buffer payload parity", "Miss address buffer payload parity error",
"L1 TLB parity", "Level 1 TLB parity error",
"Reserved", "DC Tag error type 5",
"DC tag error type 6", "DC Tag error type 6",
"DC tag error type 1", "DC Tag error type 1",
"Internal error type 1", "Internal error type 1",
"Internal error type 2", "Internal error type 2",
"Sys Read data error thread 0", "System Read Data Error Thread 0",
"Sys read data error thread 1", "System Read Data Error Thread 1",
"DC tag error type 2", "DC Tag error type 2",
"DC data error type 1 (poison consumption)", "DC Data error type 1 and poison consumption",
"DC data error type 2", "DC Data error type 2",
"DC data error type 3", "DC Data error type 3",
"DC tag error type 4", "DC Tag error type 4",
"L2 TLB parity", "Level 2 TLB parity error",
"PDC parity error", "PDC parity error",
"DC tag error type 3", "DC Tag error type 3",
"DC tag error type 5", "DC Tag error type 5",
"L2 fill data error", "L2 Fill Data error",
}; };
static const char * const smca_if_mce_desc[] = { static const char * const smca_if_mce_desc[] = {
"microtag probe port parity error", "Op Cache Microtag Probe Port Parity Error",
"IC microtag or full tag multi-hit error", "IC Microtag or Full Tag Multi-hit Error",
"IC full tag parity", "IC Full Tag Parity Error",
"IC data array parity", "IC Data Array Parity Error",
"Decoupling queue phys addr parity error", "Decoupling Queue PhysAddr Parity Error",
"L0 ITLB parity error", "L0 ITLB Parity Error",
"L1 ITLB parity error", "L1 ITLB Parity Error",
"L2 ITLB parity error", "L2 ITLB Parity Error",
"BPQ snoop parity on Thread 0", "BPQ Thread 0 Snoop Parity Error",
"BPQ snoop parity on Thread 1", "BPQ Thread 1 Snoop Parity Error",
"L1 BTB multi-match error", "L1 BTB Multi-Match Error",
"L2 BTB multi-match error", "L2 BTB Multi-Match Error",
"L2 Cache Response Poison error", "L2 Cache Response Poison Error",
"System Read Data error", "System Read Data Error",
}; };
static const char * const smca_l2_mce_desc[] = { static const char * const smca_l2_mce_desc[] = {
"L2M tag multi-way-hit error", "L2M Tag Multiple-Way-Hit error",
"L2M tag ECC error", "L2M Tag or State Array ECC Error",
"L2M data ECC error", "L2M Data Array ECC Error",
"HW assert", "Hardware Assert Error",
}; };
static const char * const smca_de_mce_desc[] = { static const char * const smca_de_mce_desc[] = {
"uop cache tag parity error", "Micro-op cache tag parity error",
"uop cache data parity error", "Micro-op cache data parity error",
"Insn buffer parity error", "Instruction buffer parity error",
"uop queue parity error", "Micro-op queue parity error",
"Insn dispatch queue parity error", "Instruction dispatch queue parity error",
"Fetch address FIFO parity", "Fetch address FIFO parity error",
"Patch RAM data parity", "Patch RAM data parity error",
"Patch RAM sequencer parity", "Patch RAM sequencer parity error",
"uop buffer parity" "Micro-op buffer parity error"
}; };
static const char * const smca_ex_mce_desc[] = { static const char * const smca_ex_mce_desc[] = {
"Watchdog timeout error", "Watchdog Timeout error",
"Phy register file parity", "Physical register file parity error",
"Flag register file parity", "Flag register file parity error",
"Immediate displacement register file parity", "Immediate displacement register file parity error",
"Address generator payload parity", "Address generator payload parity error",
"EX payload parity", "EX payload parity error",
"Checkpoint queue parity", "Checkpoint queue parity error",
"Retire dispatch queue parity", "Retire dispatch queue parity error",
"Retire status queue parity error", "Retire status queue parity error",
"Scheduling queue parity error", "Scheduling queue parity error",
"Branch buffer queue parity error", "Branch buffer queue parity error",
"Hardware Assertion error",
}; };
static const char * const smca_fp_mce_desc[] = { static const char * const smca_fp_mce_desc[] = {
"Physical register file parity", "Physical register file (PRF) parity error",
"Freelist parity error", "Freelist (FL) parity error",
"Schedule queue parity", "Schedule queue parity error",
"NSQ parity error", "NSQ parity error",
"Retire queue parity", "Retire queue (RQ) parity error",
"Status register file parity", "Status register file (SRF) parity error",
"Hardware assertion", "Hardware assertion",
}; };
static const char * const smca_l3_mce_desc[] = { static const char * const smca_l3_mce_desc[] = {
"Shadow tag macro ECC error", "Shadow Tag Macro ECC Error",
"Shadow tag macro multi-way-hit error", "Shadow Tag Macro Multi-way-hit Error",
"L3M tag ECC error", "L3M Tag ECC Error",
"L3M tag multi-way-hit error", "L3M Tag Multi-way-hit Error",
"L3M data ECC error", "L3M Data ECC Error",
"XI parity, L3 fill done channel error", "SDP Parity Error or SystemReadDataError from XI",
"L3 victim queue parity", "L3 Victim Queue Parity Error",
"L3 HW assert", "L3 Hardware Assertion",
}; };
static const char * const smca_cs_mce_desc[] = { static const char * const smca_cs_mce_desc[] = {
"Illegal request from transport layer", "Illegal Request",
"Address violation", "Address Violation",
"Security violation", "Security Violation",
"Illegal response from transport layer", "Illegal Response",
"Unexpected response", "Unexpected Response",
"Parity error on incoming request or probe response data", "Request or Probe Parity Error",
"Parity error on incoming read response data", "Read Response Parity Error",
"Atomic request parity", "Atomic Request Parity Error",
"ECC error on probe filter access", "Probe Filter ECC Error",
};
static const char * const smca_cs2_mce_desc[] = {
"Illegal Request",
"Address Violation",
"Security Violation",
"Illegal Response",
"Unexpected Response",
"Request or Probe Parity Error",
"Read Response Parity Error",
"Atomic Request Parity Error",
"SDP read response had no match in the CS queue",
"Probe Filter Protocol Error",
"Probe Filter ECC Error",
"SDP read response had an unexpected RETRY error",
"Counter overflow error",
"Counter underflow error",
}; };
static const char * const smca_pie_mce_desc[] = { static const char * const smca_pie_mce_desc[] = {
"HW assert", "Hardware Assert",
"Internal PIE register security violation", "Register security violation",
"Error on GMI link", "Link Error",
"Poison data written to internal PIE register", "Poison data consumption",
"A deferred error was detected in the DF"
}; };
static const char * const smca_umc_mce_desc[] = { static const char * const smca_umc_mce_desc[] = {
"DRAM ECC error", "DRAM ECC error",
"Data poison error on DRAM", "Data poison error",
"SDP parity error", "SDP parity error",
"Advanced peripheral bus error", "Advanced peripheral bus error",
"Command/address parity error", "Address/Command parity error",
"Write data CRC error", "Write data CRC error",
"DCQ SRAM ECC error",
"AES SRAM ECC error",
}; };
static const char * const smca_pb_mce_desc[] = { static const char * const smca_pb_mce_desc[] = {
"Parameter Block RAM ECC error", "An ECC error in the Parameter Block RAM array",
}; };
static const char * const smca_psp_mce_desc[] = { static const char * const smca_psp_mce_desc[] = {
"PSP RAM ECC or parity error", "An ECC or parity error in a PSP RAM instance",
};
static const char * const smca_psp2_mce_desc[] = {
"High SRAM ECC or parity error",
"Low SRAM ECC or parity error",
"Instruction Cache Bank 0 ECC or parity error",
"Instruction Cache Bank 1 ECC or parity error",
"Instruction Tag Ram 0 parity error",
"Instruction Tag Ram 1 parity error",
"Data Cache Bank 0 ECC or parity error",
"Data Cache Bank 1 ECC or parity error",
"Data Cache Bank 2 ECC or parity error",
"Data Cache Bank 3 ECC or parity error",
"Data Tag Bank 0 parity error",
"Data Tag Bank 1 parity error",
"Data Tag Bank 2 parity error",
"Data Tag Bank 3 parity error",
"Dirty Data Ram parity error",
"TLB Bank 0 parity error",
"TLB Bank 1 parity error",
"System Hub Read Buffer ECC or parity error",
}; };
static const char * const smca_smu_mce_desc[] = { static const char * const smca_smu_mce_desc[] = {
"SMU RAM ECC or parity error", "An ECC or parity error in an SMU RAM instance",
};
static const char * const smca_smu2_mce_desc[] = {
"High SRAM ECC or parity error",
"Low SRAM ECC or parity error",
"Data Cache Bank A ECC or parity error",
"Data Cache Bank B ECC or parity error",
"Data Tag Cache Bank A ECC or parity error",
"Data Tag Cache Bank B ECC or parity error",
"Instruction Cache Bank A ECC or parity error",
"Instruction Cache Bank B ECC or parity error",
"Instruction Tag Cache Bank A ECC or parity error",
"Instruction Tag Cache Bank B ECC or parity error",
"System Hub Read Buffer ECC or parity error",
};
static const char * const smca_mp5_mce_desc[] = {
"High SRAM ECC or parity error",
"Low SRAM ECC or parity error",
"Data Cache Bank A ECC or parity error",
"Data Cache Bank B ECC or parity error",
"Data Tag Cache Bank A ECC or parity error",
"Data Tag Cache Bank B ECC or parity error",
"Instruction Cache Bank A ECC or parity error",
"Instruction Cache Bank B ECC or parity error",
"Instruction Tag Cache Bank A ECC or parity error",
"Instruction Tag Cache Bank B ECC or parity error",
};
static const char * const smca_nbio_mce_desc[] = {
"ECC or Parity error",
"PCIE error",
"SDP ErrEvent error",
"SDP Egress Poison Error",
"IOHC Internal Poison Error",
};
static const char * const smca_pcie_mce_desc[] = {
"CCIX PER Message logging",
"CCIX Read Response with Status: Non-Data Error",
"CCIX Write Response with Status: Non-Data Error",
"CCIX Read Response with Status: Data Error",
"CCIX Non-okay write response with data error",
}; };
struct smca_mce_desc { struct smca_mce_desc {
@ -299,11 +384,17 @@ static struct smca_mce_desc smca_mce_descs[] = {
[SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) }, [SMCA_FP] = { smca_fp_mce_desc, ARRAY_SIZE(smca_fp_mce_desc) },
[SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) }, [SMCA_L3_CACHE] = { smca_l3_mce_desc, ARRAY_SIZE(smca_l3_mce_desc) },
[SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) }, [SMCA_CS] = { smca_cs_mce_desc, ARRAY_SIZE(smca_cs_mce_desc) },
[SMCA_CS_V2] = { smca_cs2_mce_desc, ARRAY_SIZE(smca_cs2_mce_desc) },
[SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) }, [SMCA_PIE] = { smca_pie_mce_desc, ARRAY_SIZE(smca_pie_mce_desc) },
[SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) }, [SMCA_UMC] = { smca_umc_mce_desc, ARRAY_SIZE(smca_umc_mce_desc) },
[SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) }, [SMCA_PB] = { smca_pb_mce_desc, ARRAY_SIZE(smca_pb_mce_desc) },
[SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) }, [SMCA_PSP] = { smca_psp_mce_desc, ARRAY_SIZE(smca_psp_mce_desc) },
[SMCA_PSP_V2] = { smca_psp2_mce_desc, ARRAY_SIZE(smca_psp2_mce_desc) },
[SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) }, [SMCA_SMU] = { smca_smu_mce_desc, ARRAY_SIZE(smca_smu_mce_desc) },
[SMCA_SMU_V2] = { smca_smu2_mce_desc, ARRAY_SIZE(smca_smu2_mce_desc) },
[SMCA_MP5] = { smca_mp5_mce_desc, ARRAY_SIZE(smca_mp5_mce_desc) },
[SMCA_NBIO] = { smca_nbio_mce_desc, ARRAY_SIZE(smca_nbio_mce_desc) },
[SMCA_PCIE] = { smca_pcie_mce_desc, ARRAY_SIZE(smca_pcie_mce_desc) },
}; };
static bool f12h_mc0_mce(u16 ec, u8 xec) static bool f12h_mc0_mce(u16 ec, u8 xec)
@ -874,13 +965,12 @@ static void decode_smca_error(struct mce *m)
ip_name = smca_get_long_name(bank_type); ip_name = smca_get_long_name(bank_type);
pr_emerg(HW_ERR "%s Extended Error Code: %d\n", ip_name, xec); pr_emerg(HW_ERR "%s Ext. Error Code: %d", ip_name, xec);
/* Only print the decode of valid error codes */ /* Only print the decode of valid error codes */
if (xec < smca_mce_descs[bank_type].num_descs && if (xec < smca_mce_descs[bank_type].num_descs &&
(hwid->xec_bitmap & BIT_ULL(xec))) { (hwid->xec_bitmap & BIT_ULL(xec))) {
pr_emerg(HW_ERR "%s Error: ", ip_name); pr_cont(", %s.\n", smca_mce_descs[bank_type].descs[xec]);
pr_cont("%s.\n", smca_mce_descs[bank_type].descs[xec]);
} }
if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc) if (bank_type == SMCA_UMC && xec == 0 && decode_dram_ecc)
@ -961,8 +1051,24 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
((m->status & MCI_STATUS_UC) ? "UE" : ((m->status & MCI_STATUS_UC) ? "UE" :
(m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"), (m->status & MCI_STATUS_DEFERRED) ? "-" : "CE"),
((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"), ((m->status & MCI_STATUS_MISCV) ? "MiscV" : "-"),
((m->status & MCI_STATUS_PCC) ? "PCC" : "-"), ((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-"),
((m->status & MCI_STATUS_ADDRV) ? "AddrV" : "-")); ((m->status & MCI_STATUS_PCC) ? "PCC" : "-"));
if (boot_cpu_has(X86_FEATURE_SMCA)) {
u32 low, high;
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
if (!rdmsr_safe(addr, &low, &high) &&
(low & MCI_CONFIG_MCAX))
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
}
/* do the two bits[14:13] together */
ecc = (m->status >> 45) & 0x3;
if (ecc)
pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
if (fam >= 0x15) { if (fam >= 0x15) {
pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-")); pr_cont("|%s", (m->status & MCI_STATUS_DEFERRED ? "Deferred" : "-"));
@ -972,21 +1078,8 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data)
pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-")); pr_cont("|%s", (m->status & MCI_STATUS_POISON ? "Poison" : "-"));
} }
if (boot_cpu_has(X86_FEATURE_SMCA)) { if (fam >= 0x17)
u32 low, high; pr_cont("|%s", (m->status & MCI_STATUS_SCRUB ? "Scrub" : "-"));
u32 addr = MSR_AMD64_SMCA_MCx_CONFIG(m->bank);
pr_cont("|%s", ((m->status & MCI_STATUS_SYNDV) ? "SyndV" : "-"));
if (!rdmsr_safe(addr, &low, &high) &&
(low & MCI_CONFIG_MCAX))
pr_cont("|%s", ((m->status & MCI_STATUS_TCC) ? "TCC" : "-"));
}
/* do the two bits[14:13] together */
ecc = (m->status >> 45) & 0x3;
if (ecc)
pr_cont("|%sECC", ((ecc == 2) ? "C" : "U"));
pr_cont("]: 0x%016llx\n", m->status); pr_cont("]: 0x%016llx\n", m->status);

View File

@ -14,7 +14,7 @@
#define TRACE_INCLUDE_PATH ../../include/ras #define TRACE_INCLUDE_PATH ../../include/ras
#include <ras/ras_event.h> #include <ras/ras_event.h>
void log_non_standard_event(const uuid_le *sec_type, const uuid_le *fru_id, void log_non_standard_event(const guid_t *sec_type, const guid_t *fru_id,
const char *fru_text, const u8 sev, const u8 *err, const char *fru_text, const u8 sev, const u8 *err,
const u32 len) const u32 len)
{ {

View File

@ -27,7 +27,7 @@
TRACE_EVENT(extlog_mem_event, TRACE_EVENT(extlog_mem_event,
TP_PROTO(struct cper_sec_mem_err *mem, TP_PROTO(struct cper_sec_mem_err *mem,
u32 err_seq, u32 err_seq,
const uuid_le *fru_id, const guid_t *fru_id,
const char *fru_text, const char *fru_text,
u8 sev), u8 sev),
@ -39,7 +39,7 @@ TRACE_EVENT(extlog_mem_event,
__field(u8, sev) __field(u8, sev)
__field(u64, pa) __field(u64, pa)
__field(u8, pa_mask_lsb) __field(u8, pa_mask_lsb)
__field_struct(uuid_le, fru_id) __field_struct(guid_t, fru_id)
__string(fru_text, fru_text) __string(fru_text, fru_text)
__field_struct(struct cper_mem_err_compact, data) __field_struct(struct cper_mem_err_compact, data)
), ),
@ -218,8 +218,8 @@ TRACE_EVENT(arm_event,
*/ */
TRACE_EVENT(non_standard_event, TRACE_EVENT(non_standard_event,
TP_PROTO(const uuid_le *sec_type, TP_PROTO(const guid_t *sec_type,
const uuid_le *fru_id, const guid_t *fru_id,
const char *fru_text, const char *fru_text,
const u8 sev, const u8 sev,
const u8 *err, const u8 *err,