1
0
Fork 0

* skx_edac: Address translation for NVDIMMs (Tony Luck and Qiuxu Zhuo)

* ACPI_ADXL build fix
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEzv7L6UO9uDPlPSfHEsHwGGHeVUoFAlvcKnEACgkQEsHwGGHe
 VUr45g/9E67lU84Dz41ly6zFDTmQdYBNPRayz9QgIGHfIMwIN8aAVoezC8B4NCqc
 8rQ3W48REkewLmPO2GoEVld4UnHbvVlZYZ4bGcxhYzWL5dcleoJIFVupF4Ifo6/Y
 SPbVUyihtw4FFr+Ft8x/bOJQ6QQ4CIiVX3mJBdwdqQ6Lm9yoEz6AlSbTJiyyzr8I
 gGfcKD5TcmJWpsDzRXJ/xWddfA+hfUpKxkuJqPIRZvmKnJpy79af8MlQAZwXuXVS
 361wj5SzP0LzktT5JQn73V04NzSSDTbFSycnWXUex3lxIsE6KolsEwfglccdhvIy
 Nz/Du4kJn1Ye/zbsO27qtkGCXSz0qYKsdfUg1RY+MnZPe4mFmmAm0izTKH3EltQx
 +OQWtcBz5vvNf3Odwnfw1nNvYrbnzaDNIsjHspopVrsmD2oRefmpsYsOEzGyAGDw
 PtUC3l70u7i4e2NtF7Doo23g1yIyXWQZtrEDDFmQZGUo7YKxE7AkGPzINxpNSQME
 z11ny9GyISaUc/Zf5zUHYmIFcYtiCngecl4F8hCXvfyp4MbxYgTI+5YY185NfLSU
 pQHwyMGKLifI6ndhWd3sO9KSCqFzSZKaVF/DdKScSqB+v1NqflMyjzfulR625/5U
 cSWWQP8Zu6H7os/X3+o5KCC/Pzbs5Nx/QPLwCabrOwcI3kdmQ2E=
 =JQUK
 -----END PGP SIGNATURE-----

Merge tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp

Pull more EDAC updates from Borislav Petkov:
 "The second part of the EDAC pile which contains the ADXL user and a
  build fix which addresses a not-so-sensical .config but fixes
  randconfig builds people do:

   - skx_edac: Address translation for NVDIMMs (Tony Luck and Qiuxu Zhuo)

   - ACPI_ADXL build fix"

[ I don't think "sensical" is a word, particularly when used in the
  context of actually meaning "nonsensical", but I like it   - Linus ]

* tag 'edac_for_4.20_2' of git://git.kernel.org/pub/scm/linux/kernel/git/bp/bp:
  EDAC, skx: Fix randconfig builds
  EDAC, skx_edac: Add address translation for non-volatile DIMMs
hifive-unleashed-5.1
Linus Torvalds 2018-11-02 11:17:22 -07:00
commit 0b21f21ae0
3 changed files with 186 additions and 13 deletions

View File

@ -234,6 +234,7 @@ config EDAC_SKX
depends on PCI && X86_64 && X86_MCE_INTEL && PCI_MMCONFIG
depends on ACPI_NFIT || !ACPI_NFIT # if ACPI_NFIT=m, EDAC_SKX can't be y
select DMI
select ACPI_ADXL if ACPI
help
Support for error detection and correction the Intel
Skylake server Integrated Memory Controllers. If your

View File

@ -26,6 +26,7 @@
#include <linux/bitmap.h>
#include <linux/math64.h>
#include <linux/mod_devicetable.h>
#include <linux/adxl.h>
#include <acpi/nfit.h>
#include <asm/cpu_device_id.h>
#include <asm/intel-family.h>
@ -35,6 +36,7 @@
#include "edac_module.h"
#define EDAC_MOD_STR "skx_edac"
#define MSG_SIZE 1024
/*
* Debug macros
@ -54,6 +56,29 @@
static LIST_HEAD(skx_edac_list);
static u64 skx_tolm, skx_tohm;
static char *skx_msg;
static unsigned int nvdimm_count;
enum {
INDEX_SOCKET,
INDEX_MEMCTRL,
INDEX_CHANNEL,
INDEX_DIMM,
INDEX_MAX
};
static const char * const component_names[] = {
[INDEX_SOCKET] = "ProcessorSocketId",
[INDEX_MEMCTRL] = "MemoryControllerId",
[INDEX_CHANNEL] = "ChannelId",
[INDEX_DIMM] = "DimmSlotId",
};
static int component_indices[ARRAY_SIZE(component_names)];
static int adxl_component_count;
static const char * const *adxl_component_names;
static u64 *adxl_values;
static char *adxl_msg;
#define NUM_IMC 2 /* memory controllers per socket */
#define NUM_CHANNELS 3 /* channels per memory controller */
@ -393,6 +418,8 @@ static int get_nvdimm_info(struct dimm_info *dimm, struct skx_imc *imc,
u16 flags;
u64 size = 0;
nvdimm_count++;
dev_handle = ACPI_NFIT_BUILD_DEVICE_HANDLE(dimmno, chan, imc->lmc,
imc->src_id, 0);
@ -941,12 +968,46 @@ static void teardown_skx_debug(void)
}
#endif /*CONFIG_EDAC_DEBUG*/
static bool skx_adxl_decode(struct decoded_addr *res)
{
int i, len = 0;
if (res->addr >= skx_tohm || (res->addr >= skx_tolm &&
res->addr < BIT_ULL(32))) {
edac_dbg(0, "Address 0x%llx out of range\n", res->addr);
return false;
}
if (adxl_decode(res->addr, adxl_values)) {
edac_dbg(0, "Failed to decode 0x%llx\n", res->addr);
return false;
}
res->socket = (int)adxl_values[component_indices[INDEX_SOCKET]];
res->imc = (int)adxl_values[component_indices[INDEX_MEMCTRL]];
res->channel = (int)adxl_values[component_indices[INDEX_CHANNEL]];
res->dimm = (int)adxl_values[component_indices[INDEX_DIMM]];
for (i = 0; i < adxl_component_count; i++) {
if (adxl_values[i] == ~0x0ull)
continue;
len += snprintf(adxl_msg + len, MSG_SIZE - len, " %s:0x%llx",
adxl_component_names[i], adxl_values[i]);
if (MSG_SIZE - len <= 0)
break;
}
return true;
}
static void skx_mce_output_error(struct mem_ctl_info *mci,
const struct mce *m,
struct decoded_addr *res)
{
enum hw_event_mc_err_type tp_event;
char *type, *optype, msg[256];
char *type, *optype;
bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
bool overflow = GET_BITFIELD(m->status, 62, 62);
bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
@ -1007,22 +1068,47 @@ static void skx_mce_output_error(struct mem_ctl_info *mci,
break;
}
}
if (adxl_component_count) {
snprintf(skx_msg, MSG_SIZE, "%s%s err_code:%04x:%04x %s",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode, adxl_msg);
} else {
snprintf(skx_msg, MSG_SIZE,
"%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode,
res->socket, res->imc, res->rank,
res->bank_group, res->bank_address, res->row, res->column);
}
snprintf(msg, sizeof(msg),
"%s%s err_code:%04x:%04x socket:%d imc:%d rank:%d bg:%d ba:%d row:%x col:%x",
overflow ? " OVERFLOW" : "",
(uncorrected_error && recoverable) ? " recoverable" : "",
mscod, errcode,
res->socket, res->imc, res->rank,
res->bank_group, res->bank_address, res->row, res->column);
edac_dbg(0, "%s\n", msg);
edac_dbg(0, "%s\n", skx_msg);
/* Call the helper to output message */
edac_mc_handle_error(tp_event, mci, core_err_cnt,
m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
res->channel, res->dimm, -1,
optype, msg);
optype, skx_msg);
}
static struct mem_ctl_info *get_mci(int src_id, int lmc)
{
struct skx_dev *d;
if (lmc > NUM_IMC - 1) {
skx_printk(KERN_ERR, "Bad lmc %d\n", lmc);
return NULL;
}
list_for_each_entry(d, &skx_edac_list, list) {
if (d->imc[0].src_id == src_id)
return d->imc[lmc].mci;
}
skx_printk(KERN_ERR, "No mci for src_id %d lmc %d\n", src_id, lmc);
return NULL;
}
static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
@ -1040,10 +1126,23 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val,
if ((mce->status & 0xefff) >> 7 != 1 || !(mce->status & MCI_STATUS_ADDRV))
return NOTIFY_DONE;
memset(&res, 0, sizeof(res));
res.addr = mce->addr;
if (!skx_decode(&res))
if (adxl_component_count) {
if (!skx_adxl_decode(&res))
return NOTIFY_DONE;
mci = get_mci(res.socket, res.imc);
} else {
if (!skx_decode(&res))
return NOTIFY_DONE;
mci = res.dev->imc[res.imc].mci;
}
if (!mci)
return NOTIFY_DONE;
mci = res.dev->imc[res.imc].mci;
if (mce->mcgstatus & MCG_STATUS_MCIP)
type = "Exception";
@ -1094,6 +1193,62 @@ static void skx_remove(void)
}
}
static void __init skx_adxl_get(void)
{
const char * const *names;
int i, j;
names = adxl_get_component_names();
if (!names) {
skx_printk(KERN_NOTICE, "No firmware support for address translation.");
skx_printk(KERN_CONT, " Only decoding DDR4 address!\n");
return;
}
for (i = 0; i < INDEX_MAX; i++) {
for (j = 0; names[j]; j++) {
if (!strcmp(component_names[i], names[j])) {
component_indices[i] = j;
break;
}
}
if (!names[j])
goto err;
}
adxl_component_names = names;
while (*names++)
adxl_component_count++;
adxl_values = kcalloc(adxl_component_count, sizeof(*adxl_values),
GFP_KERNEL);
if (!adxl_values) {
adxl_component_count = 0;
return;
}
adxl_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
if (!adxl_msg) {
adxl_component_count = 0;
kfree(adxl_values);
}
return;
err:
skx_printk(KERN_ERR, "'%s' is not matched from DSM parameters: ",
component_names[i]);
for (j = 0; names[j]; j++)
skx_printk(KERN_CONT, "%s ", names[j]);
skx_printk(KERN_CONT, "\n");
}
static void __exit skx_adxl_put(void)
{
kfree(adxl_values);
kfree(adxl_msg);
}
/*
* skx_init:
* make sure we are running on the correct cpu model
@ -1158,6 +1313,15 @@ static int __init skx_init(void)
}
}
skx_msg = kzalloc(MSG_SIZE, GFP_KERNEL);
if (!skx_msg) {
rc = -ENOMEM;
goto fail;
}
if (nvdimm_count)
skx_adxl_get();
/* Ensure that the OPSTATE is set correctly for POLL or NMI */
opstate_init();
@ -1176,6 +1340,9 @@ static void __exit skx_exit(void)
edac_dbg(2, "\n");
mce_unregister_decode_chain(&skx_mce_dec);
skx_remove();
if (nvdimm_count)
skx_adxl_put();
kfree(skx_msg);
teardown_skx_debug();
}

View File

@ -7,7 +7,12 @@
#ifndef _LINUX_ADXL_H
#define _LINUX_ADXL_H
#ifdef CONFIG_ACPI_ADXL
const char * const *adxl_get_component_names(void);
int adxl_decode(u64 addr, u64 component_values[]);
#else
static inline const char * const *adxl_get_component_names(void) { return NULL; }
static inline int adxl_decode(u64 addr, u64 component_values[]) { return -EOPNOTSUPP; }
#endif
#endif /* _LINUX_ADXL_H */