diff --git a/Documentation/PCI/pcieaer-howto.txt b/Documentation/PCI/pcieaer-howto.txt index b4987c0bcb20..ea8cafba255c 100644 --- a/Documentation/PCI/pcieaer-howto.txt +++ b/Documentation/PCI/pcieaer-howto.txt @@ -49,25 +49,17 @@ depends on CONFIG_PCIEPORTBUS, so pls. set CONFIG_PCIEPORTBUS=y and CONFIG_PCIEAER = y. 2.2 Load PCI Express AER Root Driver -There is a case where a system has AER support in BIOS. Enabling the AER -Root driver and having AER support in BIOS may result unpredictable -behavior. To avoid this conflict, a successful load of the AER Root driver -requires ACPI _OSC support in the BIOS to allow the AER Root driver to -request for native control of AER. See the PCI FW 3.0 Specification for -details regarding OSC usage. Currently, lots of firmwares don't provide -_OSC support while they use PCI Express. To support such firmwares, -forceload, a parameter of type bool, could enable AER to continue to -be initiated although firmwares have no _OSC support. To enable the -walkaround, pls. add aerdriver.forceload=y to kernel boot parameter line -when booting kernel. Note that forceload=n by default. -nosourceid, another parameter of type bool, can be used when broken -hardware (mostly chipsets) has root ports that cannot obtain the reporting -source ID. nosourceid=n by default. +Some systems have AER support in firmware. Enabling Linux AER support at +the same time the firmware handles AER may result in unpredictable +behavior. Therefore, Linux does not handle AER events unless the firmware +grants AER control to the OS via the ACPI _OSC method. See the PCI FW 3.0 +Specification for details regarding _OSC usage. 2.3 AER error output -When a PCI-E AER error is captured, an error message will be outputted to -console. If it's a correctable error, it is outputted as a warning. + +When a PCIe AER error is captured, an error message will be output to +console. If it's a correctable error, it is output as a warning. Otherwise, it is printed as an error. So users could choose different log level to filter out correctable error messages. diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 60746ef904e4..f0a029e68d3e 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -457,7 +457,7 @@ static void ghes_do_proc(struct ghes *ghes, devfn = PCI_DEVFN(pcie_err->device_id.device, pcie_err->device_id.function); - aer_severity = cper_severity_to_aer(sev); + aer_severity = cper_severity_to_aer(gdata->error_severity); /* * If firmware reset the component to contain diff --git a/drivers/pci/pcie/aer/aerdrv.c b/drivers/pci/pcie/aer/aerdrv.c index 49805a48b81c..139150b2bdfd 100644 --- a/drivers/pci/pcie/aer/aerdrv.c +++ b/drivers/pci/pcie/aer/aerdrv.c @@ -66,7 +66,7 @@ static int pcie_aer_disable; void pci_no_aer(void) { - pcie_aer_disable = 1; /* has priority over 'forceload' */ + pcie_aer_disable = 1; } bool pci_aer_available(void) @@ -130,7 +130,7 @@ static void aer_enable_rootport(struct aer_rpc *rpc) pcie_capability_clear_word(pdev, PCI_EXP_RTCTL, SYSTEM_ERROR_INTR_ON_MESG_MASK); - aer_pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + aer_pos = pdev->aer_cap; /* Clear error status */ pci_read_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, ®32); pci_write_config_dword(pdev, aer_pos + PCI_ERR_ROOT_STATUS, reg32); @@ -169,7 +169,7 @@ static void aer_disable_rootport(struct aer_rpc *rpc) */ set_downstream_devices_error_reporting(pdev, false); - pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ERR); + pos = pdev->aer_cap; /* Disable Root's interrupt in response to error messages */ pci_read_config_dword(pdev, pos + PCI_ERR_ROOT_COMMAND, ®32); reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; @@ -196,7 +196,7 @@ irqreturn_t aer_irq(int irq, void *context) unsigned long flags; int pos; - pos = pci_find_ext_capability(pdev->port, PCI_EXT_CAP_ID_ERR); + pos = pdev->port->aer_cap; /* * Must lock access to Root Error Status Reg, Root Error ID Reg, * and Root error producer/consumer index @@ -290,7 +290,6 @@ static void aer_remove(struct pcie_device *dev) /** * aer_probe - initialize resources * @dev: pointer to the pcie_dev data structure - * @id: pointer to the service id data structure * * Invoked when PCI Express bus loads AER service driver. */ @@ -300,11 +299,6 @@ static int aer_probe(struct pcie_device *dev) struct aer_rpc *rpc; struct device *device = &dev->device; - /* Init */ - status = aer_init(dev); - if (status) - return status; - /* Alloc rpc data structure */ rpc = aer_alloc_rpc(dev); if (!rpc) { @@ -339,7 +333,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) u32 reg32; int pos; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; /* Disable Root's interrupt in response to error messages */ pci_read_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, ®32); @@ -392,7 +386,7 @@ static void aer_error_resume(struct pci_dev *dev) pcie_capability_write_word(dev, PCI_EXP_DEVSTA, reg16); /* Clean AER Root Error Status */ - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &mask); if (dev->error_state == pci_channel_io_normal) diff --git a/drivers/pci/pcie/aer/aerdrv.h b/drivers/pci/pcie/aer/aerdrv.h index 945c939a86c5..d51e4a57b190 100644 --- a/drivers/pci/pcie/aer/aerdrv.h +++ b/drivers/pci/pcie/aer/aerdrv.h @@ -60,6 +60,7 @@ struct aer_rpc { struct pcie_device *rpd; /* Root Port device */ struct work_struct dpc_handler; struct aer_err_source e_sources[AER_ERROR_SOURCES_MAX]; + struct aer_err_info e_info; unsigned short prod_idx; /* Error Producer Index */ unsigned short cons_idx; /* Error Consumer Index */ int isr; @@ -105,7 +106,6 @@ static inline pci_ers_result_t merge_result(enum pci_ers_result orig, } extern struct bus_type pcie_port_bus_type; -int aer_init(struct pcie_device *dev); void aer_isr(struct work_struct *work); void aer_print_error(struct pci_dev *dev, struct aer_err_info *info); void aer_print_port_info(struct pci_dev *dev, struct aer_err_info *info); @@ -121,11 +121,4 @@ static inline int pcie_aer_get_firmware_first(struct pci_dev *pci_dev) return 0; } #endif - -static inline void pcie_aer_force_firmware_first(struct pci_dev *pci_dev, - int enable) -{ - pci_dev->__aer_firmware_first = !!enable; - pci_dev->__aer_firmware_first_valid = 1; -} #endif /* _AERDRV_H_ */ diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 521e39c1b66d..b1303b32053f 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -27,11 +27,6 @@ #include #include "aerdrv.h" -static bool forceload; -static bool nosourceid; -module_param(forceload, bool, 0); -module_param(nosourceid, bool, 0); - #define PCI_EXP_AER_FLAGS (PCI_EXP_DEVCTL_CERE | PCI_EXP_DEVCTL_NFERE | \ PCI_EXP_DEVCTL_FERE | PCI_EXP_DEVCTL_URRE) @@ -40,7 +35,7 @@ int pci_enable_pcie_error_reporting(struct pci_dev *dev) if (pcie_aer_get_firmware_first(dev)) return -EIO; - if (!pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR)) + if (!dev->aer_cap) return -EIO; return pcie_capability_set_word(dev, PCI_EXP_DEVCTL, PCI_EXP_AER_FLAGS); @@ -62,7 +57,7 @@ int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) int pos; u32 status; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; if (!pos) return -EIO; @@ -83,7 +78,7 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) if (!pci_is_pcie(dev)) return -ENODEV; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; if (!pos) return -EIO; @@ -102,6 +97,12 @@ int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) return 0; } +int pci_aer_init(struct pci_dev *dev) +{ + dev->aer_cap = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + return pci_cleanup_aer_error_status_regs(dev); +} + /** * add_error_device - list device to be handled * @e_info: pointer to error info @@ -132,7 +133,8 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) * When bus id is equal to 0, it might be a bad id * reported by root port. */ - if (!nosourceid && (PCI_BUS_NUM(e_info->id) != 0)) { + if ((PCI_BUS_NUM(e_info->id) != 0) && + !(dev->bus->bus_flags & PCI_BUS_FLAGS_NO_AERSID)) { /* Device ID match? */ if (e_info->id == ((dev->bus->number << 8) | dev->devfn)) return true; @@ -144,10 +146,10 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) /* * When either - * 1) nosourceid==y; - * 2) bus id is equal to 0. Some ports might lose the bus + * 1) bus id is equal to 0. Some ports might lose the bus * id of error source id; - * 3) There are multiple errors and prior id comparing fails; + * 2) bus flag PCI_BUS_FLAGS_NO_AERSID is set + * 3) There are multiple errors and prior ID comparing fails; * We check AER status registers to find possible reporter. */ if (atomic_read(&dev->enable_cnt) == 0) @@ -158,7 +160,7 @@ static bool is_error_source(struct pci_dev *dev, struct aer_err_info *e_info) if (!(reg16 & PCI_EXP_AER_FLAGS)) return false; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; if (!pos) return false; @@ -555,7 +557,7 @@ static void handle_error_source(struct pcie_device *aerdev, * Correctable error does not need software intervention. * No need to go through error recovery process. */ - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; if (pos) pci_write_config_dword(dev, pos + PCI_ERR_COR_STATUS, info->status); @@ -647,7 +649,7 @@ static int get_device_error_info(struct pci_dev *dev, struct aer_err_info *info) info->status = 0; info->tlp_header_valid = 0; - pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_ERR); + pos = dev->aer_cap; /* The device might not support AER */ if (!pos) @@ -715,15 +717,8 @@ static inline void aer_process_err_devices(struct pcie_device *p_device, static void aer_isr_one_error(struct pcie_device *p_device, struct aer_err_source *e_src) { - struct aer_err_info *e_info; - - /* struct aer_err_info might be big, so we allocate it with slab */ - e_info = kmalloc(sizeof(struct aer_err_info), GFP_KERNEL); - if (!e_info) { - dev_printk(KERN_DEBUG, &p_device->port->dev, - "Can't allocate mem when processing AER errors\n"); - return; - } + struct aer_rpc *rpc = get_service_data(p_device); + struct aer_err_info *e_info = &rpc->e_info; /* * There is a possibility that both correctable error and @@ -762,8 +757,6 @@ static void aer_isr_one_error(struct pcie_device *p_device, if (find_source_device(p_device->port, e_info)) aer_process_err_devices(p_device, e_info); } - - kfree(e_info); } /** @@ -812,19 +805,3 @@ void aer_isr(struct work_struct *work) aer_isr_one_error(p_device, &e_src); mutex_unlock(&rpc->rpc_mutex); } - -/** - * aer_init - provide AER initialization - * @dev: pointer to AER pcie device - * - * Invoked when AER service driver is loaded. - */ -int aer_init(struct pcie_device *dev) -{ - if (forceload) { - dev_printk(KERN_DEBUG, &dev->device, - "aerdrv forceload requested.\n"); - pcie_aer_force_firmware_first(dev->port, 0); - } - return 0; -} diff --git a/drivers/pci/pcie/aer/aerdrv_errprint.c b/drivers/pci/pcie/aer/aerdrv_errprint.c index 167fe411ce2e..54c4b691e51f 100644 --- a/drivers/pci/pcie/aer/aerdrv_errprint.c +++ b/drivers/pci/pcie/aer/aerdrv_errprint.c @@ -219,15 +219,13 @@ int cper_severity_to_aer(int cper_severity) } EXPORT_SYMBOL_GPL(cper_severity_to_aer); -void cper_print_aer(struct pci_dev *dev, int cper_severity, +void cper_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer) { - int aer_severity, layer, agent, status_strs_size, tlp_header_valid = 0; + int layer, agent, status_strs_size, tlp_header_valid = 0; u32 status, mask; const char **status_strs; - aer_severity = cper_severity_to_aer(cper_severity); - if (aer_severity == AER_CORRECTABLE) { status = aer->cor_status; mask = aer->cor_mask; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index e2e424472058..ab002671fa60 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1666,10 +1666,11 @@ static void pci_init_capabilities(struct pci_dev *dev) /* Enable ACS P2P upstream forwarding */ pci_enable_acs(dev); - pci_cleanup_aer_error_status_regs(dev); - /* Precision Time Measurement */ pci_ptm_init(dev); + + /* Advanced Error Reporting */ + pci_aer_init(dev); } /* diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 37ff0158e45f..6297942649bf 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -4428,3 +4428,20 @@ static void quirk_intel_qat_vf_cap(struct pci_dev *pdev) } } DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x443, quirk_intel_qat_vf_cap); + +/* + * VMD-enabled root ports will change the source ID for all messages + * to the VMD device. Rather than doing device matching with the source + * ID, the AER driver should traverse the child device tree, reading + * AER registers to find the faulting device. + */ +static void quirk_no_aersid(struct pci_dev *pdev) +{ + /* VMD Domain */ + if (pdev->bus->sysdata && pci_domain_nr(pdev->bus) >= 0x10000) + pdev->bus->bus_flags |= PCI_BUS_FLAGS_NO_AERSID; +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2030, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2031, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2032, quirk_no_aersid); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2033, quirk_no_aersid); diff --git a/include/linux/aer.h b/include/linux/aer.h index 164049357e5c..04602cbe85dc 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -63,7 +63,7 @@ static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) } #endif -void cper_print_aer(struct pci_dev *dev, int cper_severity, +void cper_print_aer(struct pci_dev *dev, int aer_severity, struct aer_capability_regs *aer); int cper_severity_to_aer(int cper_severity); void aer_recover_queue(int domain, unsigned int bus, unsigned int devfn, diff --git a/include/linux/pci.h b/include/linux/pci.h index 7256f33b6a15..84d222ad3c08 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -187,8 +187,9 @@ enum pci_irq_reroute_variant { typedef unsigned short __bitwise pci_bus_flags_t; enum pci_bus_flags { - PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, - PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2, + PCI_BUS_FLAGS_NO_MSI = (__force pci_bus_flags_t) 1, + PCI_BUS_FLAGS_NO_MMRBC = (__force pci_bus_flags_t) 2, + PCI_BUS_FLAGS_NO_AERSID = (__force pci_bus_flags_t) 4, }; /* These values come from the PCI Express Spec */ @@ -268,6 +269,9 @@ struct pci_dev { unsigned int class; /* 3 bytes: (base,sub,prog-if) */ u8 revision; /* PCI revision, low byte of class word */ u8 hdr_type; /* PCI header type (`multi' flag masked out) */ +#ifdef CONFIG_PCIEAER + u16 aer_cap; /* AER capability offset */ +#endif u8 pcie_cap; /* PCIe capability offset */ u8 msi_cap; /* MSI capability offset */ u8 msix_cap; /* MSI-X capability offset */ @@ -1374,9 +1378,11 @@ static inline bool pcie_aspm_support_enabled(void) { return false; } #ifdef CONFIG_PCIEAER void pci_no_aer(void); bool pci_aer_available(void); +int pci_aer_init(struct pci_dev *dev); #else static inline void pci_no_aer(void) { } static inline bool pci_aer_available(void) { return false; } +static inline int pci_aer_init(struct pci_dev *d) { return -ENODEV; } #endif #ifdef CONFIG_PCIE_ECRC