diff --git a/drivers/infiniband/hw/hfi1/pcie.c b/drivers/infiniband/hw/hfi1/pcie.c index 4d4371bf2c7c..baf7c324f7b8 100644 --- a/drivers/infiniband/hw/hfi1/pcie.c +++ b/drivers/infiniband/hw/hfi1/pcie.c @@ -905,9 +905,7 @@ static int trigger_sbr(struct hfi1_devdata *dd) * delay after a reset is required. Per spec requirements, * the link is either working or not after that point. */ - pci_reset_bridge_secondary_bus(dev->bus->self); - - return 0; + return pci_reset_bus(dev); } /* diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 4923a2a8e14b..5b78f3b1b918 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -273,6 +273,9 @@ int pci_enable_pasid(struct pci_dev *pdev, int features) if (WARN_ON(pdev->pasid_enabled)) return -EBUSY; + if (!pdev->eetlp_prefix_path) + return -EINVAL; + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PASID); if (!pos) return -EINVAL; diff --git a/drivers/pci/hotplug/pciehp_hpc.c b/drivers/pci/hotplug/pciehp_hpc.c index 5b15e76f3564..7136e3430925 100644 --- a/drivers/pci/hotplug/pciehp_hpc.c +++ b/drivers/pci/hotplug/pciehp_hpc.c @@ -761,6 +761,7 @@ int pciehp_reset_slot(struct slot *slot, int probe) struct controller *ctrl = slot->ctrl; struct pci_dev *pdev = ctrl_dev(ctrl); u16 stat_mask = 0, ctrl_mask = 0; + int rc; if (probe) return 0; @@ -778,7 +779,7 @@ int pciehp_reset_slot(struct slot *slot, int probe) ctrl_dbg(ctrl, "%s: SLOTCTRL %x write cmd %x\n", __func__, pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, 0); - pci_reset_bridge_secondary_bus(ctrl->pcie->port); + rc = pci_bridge_secondary_bus_reset(ctrl->pcie->port); pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, stat_mask); pcie_write_cmd_nowait(ctrl, ctrl_mask, ctrl_mask); @@ -786,7 +787,7 @@ int pciehp_reset_slot(struct slot *slot, int probe) pci_pcie_cap(ctrl->pcie->port) + PCI_EXP_SLTCTL, ctrl_mask); up_write(&ctrl->reset_lock); - return 0; + return rc; } int pcie_init_notification(struct controller *ctrl) diff --git a/drivers/pci/iov.c b/drivers/pci/iov.c index 0f04ae648cf1..c5f3cd4ed766 100644 --- a/drivers/pci/iov.c +++ b/drivers/pci/iov.c @@ -818,15 +818,15 @@ int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs) { if (!dev->is_physfn) return -ENOSYS; + if (numvfs > dev->sriov->total_VFs) return -EINVAL; /* Shouldn't change if VFs already enabled */ if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE) return -EBUSY; - else - dev->sriov->driver_max_VFs = numvfs; + dev->sriov->driver_max_VFs = numvfs; return 0; } EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs); diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 77960fe9d9f3..80da4841c7d2 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -4349,7 +4349,7 @@ static int pci_dev_wait(struct pci_dev *dev, char *reset_type, int timeout) * Returns true if the device advertises support for PCIe function level * resets. */ -static bool pcie_has_flr(struct pci_dev *dev) +bool pcie_has_flr(struct pci_dev *dev) { u32 cap; @@ -4359,6 +4359,7 @@ static bool pcie_has_flr(struct pci_dev *dev) pcie_capability_read_dword(dev, PCI_EXP_DEVCAP, &cap); return cap & PCI_EXP_DEVCAP_FLR; } +EXPORT_SYMBOL_GPL(pcie_has_flr); /** * pcie_flr - initiate a PCIe function level reset @@ -4534,19 +4535,18 @@ void __weak pcibios_reset_secondary_bus(struct pci_dev *dev) } /** - * pci_reset_bridge_secondary_bus - Reset the secondary bus on a PCI bridge. + * pci_bridge_secondary_bus_reset - Reset the secondary bus on a PCI bridge. * @dev: Bridge device * * Use the bridge control register to assert reset on the secondary bus. * Devices on the secondary bus are left in power-on state. */ -int pci_reset_bridge_secondary_bus(struct pci_dev *dev) +int pci_bridge_secondary_bus_reset(struct pci_dev *dev) { pcibios_reset_secondary_bus(dev); return pci_dev_wait(dev, "bus reset", PCIE_RESET_READY_POLL_MS); } -EXPORT_SYMBOL_GPL(pci_reset_bridge_secondary_bus); static int pci_parent_bus_reset(struct pci_dev *dev, int probe) { @@ -4563,9 +4563,7 @@ static int pci_parent_bus_reset(struct pci_dev *dev, int probe) if (probe) return 0; - pci_reset_bridge_secondary_bus(dev->bus->self); - - return 0; + return pci_bridge_secondary_bus_reset(dev->bus->self); } static int pci_reset_hotplug_slot(struct hotplug_slot *hotplug, int probe) @@ -5097,7 +5095,7 @@ int pci_probe_reset_slot(struct pci_slot *slot) EXPORT_SYMBOL_GPL(pci_probe_reset_slot); /** - * pci_reset_slot - reset a PCI slot + * __pci_reset_slot - Try to reset a PCI slot * @slot: PCI slot to reset * * A PCI bus may host multiple slots, each slot may support a reset mechanism @@ -5109,33 +5107,9 @@ EXPORT_SYMBOL_GPL(pci_probe_reset_slot); * through this function. PCI config space of all devices in the slot and * behind the slot is saved before and restored after reset. * - * Return 0 on success, non-zero on error. - */ -int pci_reset_slot(struct pci_slot *slot) -{ - int rc; - - rc = pci_slot_reset(slot, 1); - if (rc) - return rc; - - pci_slot_save_and_disable(slot); - - rc = pci_slot_reset(slot, 0); - - pci_slot_restore(slot); - - return rc; -} -EXPORT_SYMBOL_GPL(pci_reset_slot); - -/** - * pci_try_reset_slot - Try to reset a PCI slot - * @slot: PCI slot to reset - * * Same as above except return -EAGAIN if the slot cannot be locked */ -int pci_try_reset_slot(struct pci_slot *slot) +static int __pci_reset_slot(struct pci_slot *slot) { int rc; @@ -5156,10 +5130,11 @@ int pci_try_reset_slot(struct pci_slot *slot) return rc; } -EXPORT_SYMBOL_GPL(pci_try_reset_slot); static int pci_bus_reset(struct pci_bus *bus, int probe) { + int ret; + if (!bus->self || !pci_bus_resetable(bus)) return -ENOTTY; @@ -5170,11 +5145,11 @@ static int pci_bus_reset(struct pci_bus *bus, int probe) might_sleep(); - pci_reset_bridge_secondary_bus(bus->self); + ret = pci_bridge_secondary_bus_reset(bus->self); pci_bus_unlock(bus); - return 0; + return ret; } /** @@ -5190,39 +5165,12 @@ int pci_probe_reset_bus(struct pci_bus *bus) EXPORT_SYMBOL_GPL(pci_probe_reset_bus); /** - * pci_reset_bus - reset a PCI bus - * @bus: top level PCI bus to reset - * - * Do a bus reset on the given bus and any subordinate buses, saving - * and restoring state of all devices. - * - * Return 0 on success, non-zero on error. - */ -int pci_reset_bus(struct pci_bus *bus) -{ - int rc; - - rc = pci_bus_reset(bus, 1); - if (rc) - return rc; - - pci_bus_save_and_disable(bus); - - rc = pci_bus_reset(bus, 0); - - pci_bus_restore(bus); - - return rc; -} -EXPORT_SYMBOL_GPL(pci_reset_bus); - -/** - * pci_try_reset_bus - Try to reset a PCI bus + * __pci_reset_bus - Try to reset a PCI bus * @bus: top level PCI bus to reset * * Same as above except return -EAGAIN if the bus cannot be locked */ -int pci_try_reset_bus(struct pci_bus *bus) +static int __pci_reset_bus(struct pci_bus *bus) { int rc; @@ -5234,7 +5182,7 @@ int pci_try_reset_bus(struct pci_bus *bus) if (pci_bus_trylock(bus)) { might_sleep(); - pci_reset_bridge_secondary_bus(bus->self); + rc = pci_bridge_secondary_bus_reset(bus->self); pci_bus_unlock(bus); } else rc = -EAGAIN; @@ -5243,7 +5191,19 @@ int pci_try_reset_bus(struct pci_bus *bus) return rc; } -EXPORT_SYMBOL_GPL(pci_try_reset_bus); + +/** + * pci_reset_bus - Try to reset a PCI bus + * @pdev: top level PCI device to reset via slot/bus + * + * Same as above except return -EAGAIN if the bus cannot be locked + */ +int pci_reset_bus(struct pci_dev *pdev) +{ + return pci_probe_reset_slot(pdev->slot) ? + __pci_reset_slot(pdev->slot) : __pci_reset_bus(pdev->bus); +} +EXPORT_SYMBOL_GPL(pci_reset_bus); /** * pcix_get_max_mmrbc - get PCI-X maximum designed memory read byte count diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 61960411bd3b..6e0d1528d471 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -34,6 +34,7 @@ int pci_mmap_fits(struct pci_dev *pdev, int resno, struct vm_area_struct *vmai, enum pci_mmap_api mmap_api); int pci_probe_reset_function(struct pci_dev *dev); +int pci_bridge_secondary_bus_reset(struct pci_dev *dev); /** * struct pci_platform_pm_ops - Firmware PM callbacks diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c index 4e823ae051a7..83180edd6ed4 100644 --- a/drivers/pci/pcie/aer.c +++ b/drivers/pci/pcie/aer.c @@ -1517,6 +1517,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) { u32 reg32; int pos; + int rc; pos = dev->aer_cap; @@ -1525,7 +1526,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) reg32 &= ~ROOT_PORT_INTR_ON_MESG_MASK; pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); - pci_reset_bridge_secondary_bus(dev); + rc = pci_bridge_secondary_bus_reset(dev); pci_printk(KERN_DEBUG, dev, "Root Port link has been reset\n"); /* Clear Root Error Status */ @@ -1537,7 +1538,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev) reg32 |= ROOT_PORT_INTR_ON_MESG_MASK; pci_write_config_dword(dev, pos + PCI_ERR_ROOT_COMMAND, reg32); - return PCI_ERS_RESULT_RECOVERED; + return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } /** diff --git a/drivers/pci/pcie/err.c b/drivers/pci/pcie/err.c index 674984a9277a..708fd3a0d646 100644 --- a/drivers/pci/pcie/err.c +++ b/drivers/pci/pcie/err.c @@ -175,9 +175,11 @@ out: */ static pci_ers_result_t default_reset_link(struct pci_dev *dev) { - pci_reset_bridge_secondary_bus(dev); + int rc; + + rc = pci_bridge_secondary_bus_reset(dev); pci_printk(KERN_DEBUG, dev, "downstream link has been reset\n"); - return PCI_ERS_RESULT_RECOVERED; + return rc ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED; } static pci_ers_result_t reset_link(struct pci_dev *dev, u32 service) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index 8fa78f0ae4d9..ec784009a36b 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -2070,6 +2070,29 @@ static void pci_configure_ltr(struct pci_dev *dev) #endif } +static void pci_configure_eetlp_prefix(struct pci_dev *dev) +{ +#ifdef CONFIG_PCI_PASID + struct pci_dev *bridge; + u32 cap; + + if (!pci_is_pcie(dev)) + return; + + pcie_capability_read_dword(dev, PCI_EXP_DEVCAP2, &cap); + if (!(cap & PCI_EXP_DEVCAP2_EE_PREFIX)) + return; + + if (pci_pcie_type(dev) == PCI_EXP_TYPE_ROOT_PORT) + dev->eetlp_prefix_path = 1; + else { + bridge = pci_upstream_bridge(dev); + if (bridge && bridge->eetlp_prefix_path) + dev->eetlp_prefix_path = 1; + } +#endif +} + static void pci_configure_device(struct pci_dev *dev) { struct hotplug_params hpp; @@ -2079,6 +2102,7 @@ static void pci_configure_device(struct pci_dev *dev) pci_configure_extended_tags(dev, NULL); pci_configure_relaxed_ordering(dev); pci_configure_ltr(dev); + pci_configure_eetlp_prefix(dev); memset(&hpp, 0, sizeof(hpp)); ret = pci_get_hp_params(dev, &hpp); diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 6b635022f2fe..b2e6c02385e5 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -3667,6 +3668,108 @@ static int reset_chelsio_generic_dev(struct pci_dev *dev, int probe) #define PCI_DEVICE_ID_INTEL_IVB_M_VGA 0x0156 #define PCI_DEVICE_ID_INTEL_IVB_M2_VGA 0x0166 +/* + * The Samsung SM961/PM961 controller can sometimes enter a fatal state after + * FLR where config space reads from the device return -1. We seem to be + * able to avoid this condition if we disable the NVMe controller prior to + * FLR. This quirk is generic for any NVMe class device requiring similar + * assistance to quiesce the device prior to FLR. + * + * NVMe specification: https://nvmexpress.org/resources/specifications/ + * Revision 1.0e: + * Chapter 2: Required and optional PCI config registers + * Chapter 3: NVMe control registers + * Chapter 7.3: Reset behavior + */ +static int nvme_disable_and_flr(struct pci_dev *dev, int probe) +{ + void __iomem *bar; + u16 cmd; + u32 cfg; + + if (dev->class != PCI_CLASS_STORAGE_EXPRESS || + !pcie_has_flr(dev) || !pci_resource_start(dev, 0)) + return -ENOTTY; + + if (probe) + return 0; + + bar = pci_iomap(dev, 0, NVME_REG_CC + sizeof(cfg)); + if (!bar) + return -ENOTTY; + + pci_read_config_word(dev, PCI_COMMAND, &cmd); + pci_write_config_word(dev, PCI_COMMAND, cmd | PCI_COMMAND_MEMORY); + + cfg = readl(bar + NVME_REG_CC); + + /* Disable controller if enabled */ + if (cfg & NVME_CC_ENABLE) { + u32 cap = readl(bar + NVME_REG_CAP); + unsigned long timeout; + + /* + * Per nvme_disable_ctrl() skip shutdown notification as it + * could complete commands to the admin queue. We only intend + * to quiesce the device before reset. + */ + cfg &= ~(NVME_CC_SHN_MASK | NVME_CC_ENABLE); + + writel(cfg, bar + NVME_REG_CC); + + /* + * Some controllers require an additional delay here, see + * NVME_QUIRK_DELAY_BEFORE_CHK_RDY. None of those are yet + * supported by this quirk. + */ + + /* Cap register provides max timeout in 500ms increments */ + timeout = ((NVME_CAP_TIMEOUT(cap) + 1) * HZ / 2) + jiffies; + + for (;;) { + u32 status = readl(bar + NVME_REG_CSTS); + + /* Ready status becomes zero on disable complete */ + if (!(status & NVME_CSTS_RDY)) + break; + + msleep(100); + + if (time_after(jiffies, timeout)) { + pci_warn(dev, "Timeout waiting for NVMe ready status to clear after disable\n"); + break; + } + } + } + + pci_iounmap(dev, bar); + + pcie_flr(dev); + + return 0; +} + +/* + * Intel DC P3700 NVMe controller will timeout waiting for ready status + * to change after NVMe enable if the driver starts interacting with the + * device too soon after FLR. A 250ms delay after FLR has heuristically + * proven to produce reliably working results for device assignment cases. + */ +static int delay_250ms_after_flr(struct pci_dev *dev, int probe) +{ + if (!pcie_has_flr(dev)) + return -ENOTTY; + + if (probe) + return 0; + + pcie_flr(dev); + + msleep(250); + + return 0; +} + static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_82599_SFP_VF, reset_intel_82599_sfp_virtfn }, @@ -3674,6 +3777,8 @@ static const struct pci_dev_reset_methods pci_dev_reset_methods[] = { reset_ivb_igd }, { PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_M2_VGA, reset_ivb_igd }, + { PCI_VENDOR_ID_SAMSUNG, 0xa804, nvme_disable_and_flr }, + { PCI_VENDOR_ID_INTEL, 0x0953, delay_250ms_after_flr }, { PCI_VENDOR_ID_CHELSIO, PCI_ANY_ID, reset_chelsio_generic_dev }, { 0 } @@ -3743,6 +3848,9 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x917a, /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c78 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9182, quirk_dma_func1_alias); +/* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c134 */ +DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x9183, + quirk_dma_func1_alias); /* https://bugzilla.kernel.org/show_bug.cgi?id=42679#c46 */ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_MARVELL_EXT, 0x91a0, quirk_dma_func1_alias); diff --git a/drivers/vfio/pci/vfio_pci.c b/drivers/vfio/pci/vfio_pci.c index b423a309a6e0..345c0dc8a6dc 100644 --- a/drivers/vfio/pci/vfio_pci.c +++ b/drivers/vfio/pci/vfio_pci.c @@ -1010,8 +1010,7 @@ reset_info_exit: &info, slot); if (!ret) /* User has access, do the reset */ - ret = slot ? pci_try_reset_slot(vdev->pdev->slot) : - pci_try_reset_bus(vdev->pdev->bus); + ret = pci_reset_bus(vdev->pdev); hot_reset_release: for (i--; i >= 0; i--) @@ -1373,8 +1372,7 @@ static void vfio_pci_try_bus_reset(struct vfio_pci_device *vdev) } if (needs_reset) - ret = slot ? pci_try_reset_slot(vdev->pdev->slot) : - pci_try_reset_bus(vdev->pdev->bus); + ret = pci_reset_bus(vdev->pdev); put_devs: for (i = 0; i < devs.cur_index; i++) { diff --git a/include/linux/pci.h b/include/linux/pci.h index f80dfdacf9e6..0543800ec565 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -351,6 +351,7 @@ struct pci_dev { unsigned int ltr_path:1; /* Latency Tolerance Reporting supported from root to here */ #endif + unsigned int eetlp_prefix_path:1; /* End-to-End TLP Prefix */ pci_channel_state_t error_state; /* Current connectivity state */ struct device dev; /* Generic device interface */ @@ -1105,20 +1106,17 @@ u32 pcie_bandwidth_available(struct pci_dev *dev, struct pci_dev **limiting_dev, enum pci_bus_speed *speed, enum pcie_link_width *width); void pcie_print_link_status(struct pci_dev *dev); +bool pcie_has_flr(struct pci_dev *dev); int pcie_flr(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); int pci_reset_function_locked(struct pci_dev *dev); int pci_try_reset_function(struct pci_dev *dev); int pci_probe_reset_slot(struct pci_slot *slot); -int pci_reset_slot(struct pci_slot *slot); -int pci_try_reset_slot(struct pci_slot *slot); int pci_probe_reset_bus(struct pci_bus *bus); -int pci_reset_bus(struct pci_bus *bus); -int pci_try_reset_bus(struct pci_bus *bus); +int pci_reset_bus(struct pci_dev *dev); void pci_reset_secondary_bus(struct pci_dev *dev); void pcibios_reset_secondary_bus(struct pci_dev *dev); -int pci_reset_bridge_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 82e6b361204e..ee556ccc93f4 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -636,6 +636,7 @@ #define PCI_EXP_DEVCAP2_OBFF_MASK 0x000c0000 /* OBFF support mechanism */ #define PCI_EXP_DEVCAP2_OBFF_MSG 0x00040000 /* New message signaling */ #define PCI_EXP_DEVCAP2_OBFF_WAKE 0x00080000 /* Re-use WAKE# for OBFF */ +#define PCI_EXP_DEVCAP2_EE_PREFIX 0x00200000 /* End-End TLP Prefix */ #define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ #define PCI_EXP_DEVCTL2_COMP_TIMEOUT 0x000f /* Completion Timeout Value */ #define PCI_EXP_DEVCTL2_COMP_TMOUT_DIS 0x0010 /* Completion Timeout Disable */