1
0
Fork 0

EDAC, altera: Fix S10 Double Bit Error Notification

Stratix10 Double Bit Error Address was always read from SDRAM Address
register instead of each device's Address register.

To determine which device had the DBE, cycle through the EDAC devices
comparing the DBE value to the db_irq value. Once found, report the DBE
Address from the device registers as well as the device name.

Finally, notify the system via an SMC call and indicate the panic should
result in a system reboot. Change a run-time check to a Stratix10
compile-time check for a clean SMC notification.

Fixes: d5fc912556 ("EDAC, altera: Combine Stratix10 and Arria10 probe functions")
Signed-off-by: Thor Thayer <thor.thayer@linux.intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Cc: James Morse <james.morse@arm.com>
Cc: Mauro Carvalho Chehab <mchehab@kernel.org>
Cc: linux-edac <linux-edac@vger.kernel.org>
Link: https://lkml.kernel.org/r/1552490842-25440-1-git-send-email-thor.thayer@linux.intel.com
hifive-unleashed-5.2
Thor Thayer 2019-03-13 10:27:22 -05:00 committed by Borislav Petkov
parent fe783516e3
commit 1bd76ff448
2 changed files with 72 additions and 17 deletions

View File

@ -1930,6 +1930,15 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
goto err_release_group1;
}
#ifdef CONFIG_ARCH_STRATIX10
/* Use IRQ to determine SError origin instead of assigning IRQ */
rc = of_property_read_u32_index(np, "interrupts", 0, &altdev->db_irq);
if (rc) {
edac_printk(KERN_ERR, EDAC_DEVICE,
"Unable to parse DB IRQ index\n");
goto err_release_group1;
}
#else
altdev->db_irq = irq_of_parse_and_map(np, 1);
if (!altdev->db_irq) {
edac_printk(KERN_ERR, EDAC_DEVICE, "Error allocating DBIRQ\n");
@ -1943,6 +1952,7 @@ static int altr_edac_a10_device_add(struct altr_arria10_edac *edac,
edac_printk(KERN_ERR, EDAC_DEVICE, "No DBERR IRQ resource\n");
goto err_release_group1;
}
#endif
rc = edac_device_add_device(dci);
if (rc) {
@ -2005,6 +2015,10 @@ static const struct irq_domain_ops a10_eccmgr_ic_ops = {
/************** Stratix 10 EDAC Double Bit Error Handler ************/
#define to_a10edac(p, m) container_of(p, struct altr_arria10_edac, m)
#ifdef CONFIG_ARCH_STRATIX10
/* panic routine issues reboot on non-zero panic_timeout */
extern int panic_timeout;
/*
* The double bit error is handled through SError which is fatal. This is
* called as a panic notifier to printout ECC error info as part of the panic.
@ -2018,17 +2032,37 @@ static int s10_edac_dberr_handler(struct notifier_block *this,
regmap_read(edac->ecc_mgr_map, S10_SYSMGR_ECC_INTSTAT_DERR_OFST,
&dberror);
regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_VAL_OFST, dberror);
if (dberror & S10_DDR0_IRQ_MASK) {
regmap_read(edac->ecc_mgr_map, A10_DERRADDR_OFST, &err_addr);
regmap_write(edac->ecc_mgr_map, S10_SYSMGR_UE_ADDR_OFST,
err_addr);
edac_printk(KERN_ERR, EDAC_MC,
"EDAC: [Uncorrectable errors @ 0x%08X]\n\n",
err_addr);
if (dberror & S10_DBE_IRQ_MASK) {
struct list_head *position;
struct altr_edac_device_dev *ed;
struct arm_smccc_res result;
/* Find the matching DBE in the list of devices */
list_for_each(position, &edac->a10_ecc_devices) {
ed = list_entry(position, struct altr_edac_device_dev,
next);
if (!(BIT(ed->db_irq) & dberror))
continue;
writel(ALTR_A10_ECC_DERRPENA,
ed->base + ALTR_A10_ECC_INTSTAT_OFST);
err_addr = readl(ed->base + ALTR_S10_DERR_ADDRA_OFST);
regmap_write(edac->ecc_mgr_map,
S10_SYSMGR_UE_ADDR_OFST, err_addr);
edac_printk(KERN_ERR, EDAC_DEVICE,
"EDAC: [Fatal DBE on %s @ 0x%08X]\n",
ed->edac_dev_name, err_addr);
break;
}
/* Notify the System through SMC. Reboot delay = 1 second */
panic_timeout = 1;
arm_smccc_smc(INTEL_SIP_SMC_ECC_DBE, dberror, 0, 0, 0, 0,
0, 0, &result);
}
return NOTIFY_DONE;
}
#endif
/****************** Arria 10 EDAC Probe Function *********************/
static int altr_edac_a10_probe(struct platform_device *pdev)
@ -2098,16 +2132,8 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
altr_edac_a10_irq_handler,
edac);
if (socfpga_is_a10()) {
edac->db_irq = platform_get_irq(pdev, 1);
if (edac->db_irq < 0) {
dev_err(&pdev->dev, "No DBERR IRQ resource\n");
return edac->db_irq;
}
irq_set_chained_handler_and_data(edac->db_irq,
altr_edac_a10_irq_handler,
edac);
} else {
#ifdef CONFIG_ARCH_STRATIX10
{
int dberror, err_addr;
edac->panic_notifier.notifier_call = s10_edac_dberr_handler;
@ -2130,6 +2156,15 @@ static int altr_edac_a10_probe(struct platform_device *pdev)
S10_SYSMGR_UE_ADDR_OFST, 0);
}
}
#else
edac->db_irq = platform_get_irq(pdev, 1);
if (edac->db_irq < 0) {
dev_err(&pdev->dev, "No DBERR IRQ resource\n");
return edac->db_irq;
}
irq_set_chained_handler_and_data(edac->db_irq,
altr_edac_a10_irq_handler, edac);
#endif
for_each_child_of_node(pdev->dev.of_node, child) {
if (!of_device_is_available(child))

View File

@ -289,6 +289,7 @@ struct altr_sdram_mc_data {
#define ALTR_A10_ECC_INIT_WATCHDOG_10US 10000
/************* Stratix10 Defines **************/
#define ALTR_S10_DERR_ADDRA_OFST 0x2C
/* Stratix10 ECC Manager Defines */
#define S10_SYSMGR_ECC_INTMASK_CLR_OFST 0x98
@ -299,6 +300,7 @@ struct altr_sdram_mc_data {
#define S10_SYSMGR_UE_ADDR_OFST 0x224
#define S10_DDR0_IRQ_MASK BIT(16)
#define S10_DBE_IRQ_MASK 0x3FE
/* Define ECC Block Offsets for peripherals */
#define ECC_BLK_ADDRESS_OFST 0x40
@ -435,4 +437,22 @@ struct altr_arria10_edac {
#define INTEL_SIP_SMC_REG_WRITE \
INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_REG_WRITE)
/*
* Request INTEL_SIP_SMC_ECC_DBE
*
* Sync call used by service driver at EL1 alert EL3 that a Double Bit
* ECC error has occurred.
*
* Call register usage:
* a0 INTEL_SIP_SMC_ECC_DBE
* a1 SysManager Double Bit Error value
* a2-7 not used
*
* Return status
* a0 INTEL_SIP_SMC_STATUS_OK
*/
#define INTEL_SIP_SMC_FUNCID_ECC_DBE 13
#define INTEL_SIP_SMC_ECC_DBE \
INTEL_SIP_SMC_FAST_CALL_VAL(INTEL_SIP_SMC_FUNCID_ECC_DBE)
#endif /* #ifndef _ALTERA_EDAC_H */