From f3ec4f87d607f40497afdb5ac03f11e2ea253d52 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Tue, 8 Jun 2010 15:23:51 -0400 Subject: [PATCH 01/30] PCI: change device runtime PM settings for probe and remove This patch (as1388) changes the way the PCI core handles runtime PM settings when probing or unbinding drivers. Now the core will make sure the device is enabled for runtime PM, with a usage count >= 1, when a driver is probed. It does the same when calling a driver's remove method. If the driver wants to use runtime PM, all it has to do is call pm_runtime_pu_noidle() near the end of its probe routine (to cancel the core's usage increment) and pm_runtime_get_noresume() near the start of its remove routine (to restore the usage count). It does not need to mess around with setting the runtime state to enabled, disabled, active, or suspended. The patch updates e1000e and r8169, the only PCI drivers that already use the existing runtime PM interface. Signed-off-by: Alan Stern Acked-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/net/e1000e/netdev.c | 16 ++++------------ drivers/net/r8169.c | 16 ++++------------ drivers/pci/pci-driver.c | 30 ++++++++++++++++++++++++++++-- 3 files changed, 36 insertions(+), 26 deletions(-) diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c index 57a7e41da69e..4afc8dd9b935 100644 --- a/drivers/net/e1000e/netdev.c +++ b/drivers/net/e1000e/netdev.c @@ -5721,11 +5721,8 @@ static int __devinit e1000_probe(struct pci_dev *pdev, e1000_print_device_info(adapter); - if (pci_dev_run_wake(pdev)) { - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); - } - pm_schedule_suspend(&pdev->dev, MSEC_PER_SEC); + if (pci_dev_run_wake(pdev)) + pm_runtime_put_noidle(&pdev->dev); return 0; @@ -5771,8 +5768,6 @@ static void __devexit e1000_remove(struct pci_dev *pdev) struct e1000_adapter *adapter = netdev_priv(netdev); bool down = test_bit(__E1000_DOWN, &adapter->state); - pm_runtime_get_sync(&pdev->dev); - /* * flush_scheduled work may reschedule our watchdog task, so * explicitly disable watchdog tasks from being rescheduled @@ -5797,11 +5792,8 @@ static void __devexit e1000_remove(struct pci_dev *pdev) clear_bit(__E1000_DOWN, &adapter->state); unregister_netdev(netdev); - if (pci_dev_run_wake(pdev)) { - pm_runtime_disable(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); - } - pm_runtime_put_noidle(&pdev->dev); + if (pci_dev_run_wake(pdev)) + pm_runtime_get_noresume(&pdev->dev); /* * Release control of h/w to f/w. If f/w is AMT enabled, this diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index cdc6a5c2e70d..c982a4763bef 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -3219,11 +3219,8 @@ rtl8169_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) device_set_wakeup_enable(&pdev->dev, tp->features & RTL_FEATURE_WOL); - if (pci_dev_run_wake(pdev)) { - pm_runtime_set_active(&pdev->dev); - pm_runtime_enable(&pdev->dev); - } - pm_runtime_idle(&pdev->dev); + if (pci_dev_run_wake(pdev)) + pm_runtime_put_noidle(&pdev->dev); out: return rc; @@ -3246,17 +3243,12 @@ static void __devexit rtl8169_remove_one(struct pci_dev *pdev) struct net_device *dev = pci_get_drvdata(pdev); struct rtl8169_private *tp = netdev_priv(dev); - pm_runtime_get_sync(&pdev->dev); - flush_scheduled_work(); unregister_netdev(dev); - if (pci_dev_run_wake(pdev)) { - pm_runtime_disable(&pdev->dev); - pm_runtime_set_suspended(&pdev->dev); - } - pm_runtime_put_noidle(&pdev->dev); + if (pci_dev_run_wake(pdev)) + pm_runtime_get_noresume(&pdev->dev); /* restore original MAC address */ rtl_rar_set(tp, dev->perm_addr); diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index f9a0aec3abcf..8a6f797de8e5 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -289,8 +289,26 @@ struct drv_dev_and_id { static long local_pci_probe(void *_ddi) { struct drv_dev_and_id *ddi = _ddi; + struct device *dev = &ddi->dev->dev; + int rc; - return ddi->drv->probe(ddi->dev, ddi->id); + /* Unbound PCI devices are always set to disabled and suspended. + * During probe, the device is set to enabled and active and the + * usage count is incremented. If the driver supports runtime PM, + * it should call pm_runtime_put_noidle() in its probe routine and + * pm_runtime_get_noresume() in its remove routine. + */ + pm_runtime_get_noresume(dev); + pm_runtime_set_active(dev); + pm_runtime_enable(dev); + + rc = ddi->drv->probe(ddi->dev, ddi->id); + if (rc) { + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_put_noidle(dev); + } + return rc; } static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev, @@ -369,11 +387,19 @@ static int pci_device_remove(struct device * dev) struct pci_driver * drv = pci_dev->driver; if (drv) { - if (drv->remove) + if (drv->remove) { + pm_runtime_get_sync(dev); drv->remove(pci_dev); + pm_runtime_put_noidle(dev); + } pci_dev->driver = NULL; } + /* Undo the runtime PM settings in local_pci_probe() */ + pm_runtime_disable(dev); + pm_runtime_set_suspended(dev); + pm_runtime_put_noidle(dev); + /* * If the device is still on, set the power state as "unknown", * since it might change by the next time we load the driver. From 73cd3b43f08cc9a9bcb168994b8e9ebd983ff573 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Tue, 15 Jun 2010 15:43:19 +0200 Subject: [PATCH 02/30] x86/PCI: pci, fix section mismatch pcibios_scan_specific_bus calls pci_scan_bus_on_node which is __devinit. Mark pcibios_scan_specific_bus __devinit as well since all users are now __init or __devinit. Signed-off-by: Jiri Slaby Signed-off-by: Jesse Barnes --- arch/x86/pci/legacy.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/pci/legacy.c b/arch/x86/pci/legacy.c index 8d460eaf524f..c89266be6048 100644 --- a/arch/x86/pci/legacy.c +++ b/arch/x86/pci/legacy.c @@ -36,7 +36,7 @@ int __init pci_legacy_init(void) return 0; } -void pcibios_scan_specific_bus(int busn) +void __devinit pcibios_scan_specific_bus(int busn) { int devfn; long node; From f6735590e9f441762ab5afeff64ded99e5b19a68 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 27 May 2010 11:21:11 +0900 Subject: [PATCH 03/30] PCI aerdrv: fix annoying warnings Some compiler generates following warnings: In function 'aer_isr': warning: 'e_src.id' may be used uninitialized in this function warning: 'e_src.status' may be used uninitialized in this function Avoid status flag "int ret" and return constants instead, so that gcc sees the return value matching "it is initialized" better. Acked-by: Hidetoshi Seto Signed-off-by: Linus Torvalds Signed-off-by: Hidetoshi Seto Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aer/aerdrv_core.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/pci/pcie/aer/aerdrv_core.c b/drivers/pci/pcie/aer/aerdrv_core.c index 8af4f619bba2..fc0b5a93e1de 100644 --- a/drivers/pci/pcie/aer/aerdrv_core.c +++ b/drivers/pci/pcie/aer/aerdrv_core.c @@ -727,20 +727,21 @@ static void aer_isr_one_error(struct pcie_device *p_device, static int get_e_source(struct aer_rpc *rpc, struct aer_err_source *e_src) { unsigned long flags; - int ret = 0; /* Lock access to Root error producer/consumer index */ spin_lock_irqsave(&rpc->e_lock, flags); - if (rpc->prod_idx != rpc->cons_idx) { - *e_src = rpc->e_sources[rpc->cons_idx]; - rpc->cons_idx++; - if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) - rpc->cons_idx = 0; - ret = 1; + if (rpc->prod_idx == rpc->cons_idx) { + spin_unlock_irqrestore(&rpc->e_lock, flags); + return 0; } + + *e_src = rpc->e_sources[rpc->cons_idx]; + rpc->cons_idx++; + if (rpc->cons_idx == AER_ERROR_SOURCES_MAX) + rpc->cons_idx = 0; spin_unlock_irqrestore(&rpc->e_lock, flags); - return ret; + return 1; } /** From 3b8fdb759e6ed446433c6dfd5a226d9007925596 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Wed, 26 May 2010 12:46:39 +0200 Subject: [PATCH 04/30] PCI: hotplug/shpchp_hpc: add parenthesis in SLOT_REG_RSVDZ_MASK The SLOT_REG_RSVDZ_MASK macro is normally used like this: slot_reg &= ~SLOT_REG_RSVDZ_MASK; The ~ operator has higher precedence than the | operator from inside the macro, so it needs parenthesis. Reviewed-by: Kenji Kaneshige Signed-off-by: Dan Carpenter Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/shpchp_hpc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/shpchp_hpc.c b/drivers/pci/hotplug/shpchp_hpc.c index 5f5e8d2e3552..d3985e7deab7 100644 --- a/drivers/pci/hotplug/shpchp_hpc.c +++ b/drivers/pci/hotplug/shpchp_hpc.c @@ -113,7 +113,7 @@ #define CON_PFAULT_INTR_MASK (1 << 28) #define MRL_CHANGE_SERR_MASK (1 << 29) #define CON_PFAULT_SERR_MASK (1 << 30) -#define SLOT_REG_RSVDZ_MASK (1 << 15) | (7 << 21) +#define SLOT_REG_RSVDZ_MASK ((1 << 15) | (7 << 21)) /* * SHPC Command Code definitnions From aff6136974e4ac43574973a5b4de15d97506b16a Mon Sep 17 00:00:00 2001 From: Clemens Ladisch Date: Wed, 26 May 2010 12:21:10 +0200 Subject: [PATCH 05/30] PCI quirk: AMD 780: work around wrong vendor ID on APC bridge In all AMD 780 family northbridges, the vendor ID of the internal graphics PCI/PCI bridge reads not as AMD but as that of the mainboard vendor, because the hardware actually returns the value of the subsystem vendor ID (erratum 18). We currently have additional quirk entries for Asus and Acer, but it is likely that we will encounter more systems with other vendor IDs. Since we do not know in advance all possible vendor IDs, a better way to find the device is to declare the quirk on the host bridge, whose ID is always correct, and use that device as a stepping stone to find the PCI/ PCI bridge, if present. Reviewed-by: Matthew Wilcox Signed-off-by: Clemens Ladisch Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 477345d41641..4334d15a8141 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2126,12 +2126,29 @@ static void __devinit quirk_disable_msi(struct pci_dev *dev) } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_8131_BRIDGE, quirk_disable_msi); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x9602, quirk_disable_msi); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ASUSTEK, 0x9602, quirk_disable_msi); -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AI, 0x9602, quirk_disable_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, 0xa238, quirk_disable_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, 0x5a3f, quirk_disable_msi); +/* + * The APC bridge device in AMD 780 family northbridges has some random + * OEM subsystem ID in its vendor ID register (erratum 18), so instead + * we use the possible vendor/device IDs of the host bridge for the + * declared quirk, and search for the APC bridge by slot number. + */ +static void __devinit quirk_amd_780_apc_msi(struct pci_dev *host_bridge) +{ + struct pci_dev *apc_bridge; + + apc_bridge = pci_get_slot(host_bridge->bus, PCI_DEVFN(1, 0)); + if (apc_bridge) { + if (apc_bridge->device == 0x9602) + quirk_disable_msi(apc_bridge); + pci_dev_put(apc_bridge); + } +} +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x9600, quirk_amd_780_apc_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, 0x9601, quirk_amd_780_apc_msi); + /* Go through the list of Hypertransport capabilities and * return 1 if a HT MSI capability is found and enabled */ static int __devinit msi_ht_cap_enabled(struct pci_dev *dev) From 549e15611b4ac1de51ef0e0a79c2704f50a638a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 23 May 2010 10:22:55 +0200 Subject: [PATCH 06/30] PCI: disable MSI on VIA K8M800 MSI delivery from on-board ahci controller doesn't work on K8M800. At this point, it's unclear whether the culprit is with the ahci controller or the host bridge. Given the track record and considering the rather minimal impact of MSI, disabling it seems reasonable. Signed-off-by: Tejun Heo Reported-by: Rainer Hurtado Navarro Cc: stable@kernel.org Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 4334d15a8141..3a81d9d44019 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2115,6 +2115,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS480, quirk_disabl DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3336, quirk_disable_all_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3351, quirk_disable_all_msi); DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3364, quirk_disable_all_msi); +DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8380_0, quirk_disable_all_msi); /* Disable MSI on chipsets that are known to not support it */ static void __devinit quirk_disable_msi(struct pci_dev *dev) From 7bd1c365fd124624191d49dcc1eb9759d6017ec3 Mon Sep 17 00:00:00 2001 From: Mike Habeck Date: Wed, 12 May 2010 11:14:32 -0700 Subject: [PATCH 07/30] x86/PCI: Add option to not assign BAR's if not already assigned The Linux kernel assigns BARs that a BIOS did not assign, most likely to handle broken BIOSes that didn't enumerate the devices correctly. On UV the BIOS purposely doesn't assign I/O BARs for certain devices/ drivers we know don't use them (examples, LSI SAS, Qlogic FC, ...). We purposely don't assign these I/O BARs because I/O Space is a very limited resource. There is only 64k of I/O Space, and in a PCIe topology that space gets divided up into 4k chucks (this is due to the fact that a pci-to-pci bridge's I/O decoder is aligned at 4k)... Thus a system can have at most 16 cards with I/O BARs: (64k / 4k = 16) SGI needs to scale to >16 devices with I/O BARs. So by not assigning I/O BARs on devices we know don't use them, we can do that (iff the kernel doesn't go and assign these BARs that the BIOS purposely didn't assign). This patch will not assign a resource to a device BAR if that BAR was not assigned by the BIOS, and the kernel cmdline option 'pci=nobar' was specified. This patch is closely modeled after the 'pci=norom' option that currently exists in the tree. Signed-off-by: Mike Habeck Signed-off-by: Mike Travis Signed-off-by: Jesse Barnes --- Documentation/kernel-parameters.txt | 2 ++ arch/x86/include/asm/pci_x86.h | 1 + arch/x86/pci/common.c | 20 ++++++++++++++++++++ 3 files changed, 23 insertions(+) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 2b2407d9a6d0..4fac69beeb4f 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1970,6 +1970,8 @@ and is between 256 and 4096 characters. It is defined in the file norom [X86] Do not assign address space to expansion ROMs that do not already have BIOS assigned address ranges. + nobar [X86] Do not assign address space to the + BARs that weren't assigned by the BIOS. irqmask=0xMMMM [X86] Set a bit mask of IRQs allowed to be assigned automatically to PCI devices. You can make the kernel exclude IRQs of your ISA cards diff --git a/arch/x86/include/asm/pci_x86.h b/arch/x86/include/asm/pci_x86.h index cd2a31dc5fb8..49c7219826f9 100644 --- a/arch/x86/include/asm/pci_x86.h +++ b/arch/x86/include/asm/pci_x86.h @@ -30,6 +30,7 @@ #define PCI_HAS_IO_ECS 0x40000 #define PCI_NOASSIGN_ROMS 0x80000 #define PCI_ROOT_NO_CRS 0x100000 +#define PCI_NOASSIGN_BARS 0x200000 extern unsigned int pci_probe; extern unsigned long pirq_table_addr; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 215a27ae050d..a0772af64efb 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -125,6 +125,23 @@ void __init dmi_check_skip_isa_align(void) static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev) { struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE]; + struct resource *bar_r; + int bar; + + if (pci_probe & PCI_NOASSIGN_BARS) { + /* + * If the BIOS did not assign the BAR, zero out the + * resource so the kernel doesn't attmept to assign + * it later on in pci_assign_unassigned_resources + */ + for (bar = 0; bar <= PCI_STD_RESOURCE_END; bar++) { + bar_r = &dev->resource[bar]; + if (bar_r->start == 0 && bar_r->end != 0) { + bar_r->flags = 0; + bar_r->end = 0; + } + } + } if (pci_probe & PCI_NOASSIGN_ROMS) { if (rom_r->parent) @@ -509,6 +526,9 @@ char * __devinit pcibios_setup(char *str) } else if (!strcmp(str, "norom")) { pci_probe |= PCI_NOASSIGN_ROMS; return NULL; + } else if (!strcmp(str, "nobar")) { + pci_probe |= PCI_NOASSIGN_BARS; + return NULL; } else if (!strcmp(str, "assign-busses")) { pci_probe |= PCI_ASSIGN_ALL_BUSSES; return NULL; From 7736a05a320712c0a9b8f9e1cd0688b2c0848009 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 1 Jun 2010 09:00:16 -0700 Subject: [PATCH 08/30] PCI: sparse warning (trivial) Assigning zero where NULL should be used. Signed-off-by: Stephen Hemminger Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 628ea20a8841..42eea5fbc13b 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -56,7 +56,7 @@ void pci_bus_remove_resources(struct pci_bus *bus) int i; for (i = 0; i < PCI_BRIDGE_RESOURCE_NUM; i++) - bus->resource[i] = 0; + bus->resource[i] = NULL; list_for_each_entry_safe(bus_res, tmp, &bus->resources, list) { list_del(&bus_res->list); From 2eb5ebd3665a38a4526b45cb3a31a132b2aa9927 Mon Sep 17 00:00:00 2001 From: Junchang Wang Date: Fri, 18 Jun 2010 10:02:33 +0800 Subject: [PATCH 09/30] PCI: check return value of pci_enable_device() when enabling bridges pci_enable_device can fail. In that case, a printed warning would be more appropriate. Signed-off-by: Justin P. Mattock Signed-off-by: Junchang Wang Signed-off-by: Jesse Barnes --- drivers/pci/bus.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/pci/bus.c b/drivers/pci/bus.c index 42eea5fbc13b..7f0af0e9b826 100644 --- a/drivers/pci/bus.c +++ b/drivers/pci/bus.c @@ -240,6 +240,8 @@ void pci_enable_bridges(struct pci_bus *bus) if (dev->subordinate) { if (!pci_is_enabled(dev)) { retval = pci_enable_device(dev); + if (retval) + dev_err(&dev->dev, "Error enabling bridge (%d), continuing\n", retval); pci_set_master(dev); } pci_enable_bridges(dev->subordinate); From 4302e0fb7fa5b071e30f3cfb68e85155b3d69d9b Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Thu, 17 Jun 2010 10:42:44 +0900 Subject: [PATCH 10/30] PCI: fix wrong memory address handling in MSI-X Use resource_size_t for MMIO address instead of unsigned long. Otherwise, higher 32-bits of MMIO address are cleared unexpectedly in x86-32 PAE. Acked-by: Matthew Wilcox Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 77b68eaf021e..6a0f2f07f955 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -435,7 +435,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec) static void __iomem *msix_map_region(struct pci_dev *dev, unsigned pos, unsigned nr_entries) { - unsigned long phys_addr; + resource_size_t phys_addr; u32 table_offset; u8 bir; From 41cd766b065970ff6f6c89dd1cf55fa706c84a3d Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Wed, 9 Jun 2010 16:05:07 -0400 Subject: [PATCH 11/30] PCI: Don't enable aspm before drivers have had a chance to veto it The aspm code will currently set the configured aspm policy before drivers have had an opportunity to indicate that their hardware doesn't support it. Unfortunately, putting some hardware in L0 or L1 can result in the hardware no longer responding to any requests, even after aspm is disabled. It makes more sense to leave aspm policy at the BIOS defaults at initial setup time, reconfiguring it after pci_enable_device() is called. This allows the driver to blacklist individual devices beforehand. Reviewed-by: Kenji Kaneshige Signed-off-by: Matthew Garrett Signed-off-by: Jesse Barnes --- drivers/pci/pcie/aspm.c | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pcie/aspm.c b/drivers/pci/pcie/aspm.c index be53d98fa384..71222814c1ec 100644 --- a/drivers/pci/pcie/aspm.c +++ b/drivers/pci/pcie/aspm.c @@ -588,11 +588,23 @@ void pcie_aspm_init_link_state(struct pci_dev *pdev) * update through pcie_aspm_cap_init(). */ pcie_aspm_cap_init(link, blacklist); - pcie_config_aspm_path(link); /* Setup initial Clock PM state */ pcie_clkpm_cap_init(link, blacklist); - pcie_set_clkpm(link, policy_to_clkpm_state(link)); + + /* + * At this stage drivers haven't had an opportunity to change the + * link policy setting. Enabling ASPM on broken hardware can cripple + * it even before the driver has had a chance to disable ASPM, so + * default to a safe level right now. If we're enabling ASPM beyond + * the BIOS's expectation, we'll do so once pci_enable_device() is + * called. + */ + if (aspm_policy != POLICY_POWERSAVE) { + pcie_config_aspm_path(link); + pcie_set_clkpm(link, policy_to_clkpm_state(link)); + } + unlock: mutex_unlock(&aspm_lock); out: From 01b666df487b80c956cef3ce3253776ddeebd41e Mon Sep 17 00:00:00 2001 From: Praveen Kalamegham Date: Thu, 20 May 2010 15:32:22 -0500 Subject: [PATCH 12/30] PCI hotplug: pciehp: Fixed return value sign for pciehp_unconfigure_device pciehp_unconfigure_device() should return -EINVAL, not EINVAL. Signed-off-by: Praveen Kalamegham Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/pciehp_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pciehp_pci.c b/drivers/pci/hotplug/pciehp_pci.c index 2fce726758d2..a4031dfe938e 100644 --- a/drivers/pci/hotplug/pciehp_pci.c +++ b/drivers/pci/hotplug/pciehp_pci.c @@ -137,7 +137,7 @@ int pciehp_unconfigure_device(struct slot *p_slot) "Cannot remove display device %s\n", pci_name(temp)); pci_dev_put(temp); - rc = EINVAL; + rc = -EINVAL; break; } } From 0ba10bc75271e4139eb9ca67d107624d581e3a94 Mon Sep 17 00:00:00 2001 From: Praveen Kalamegham Date: Thu, 20 May 2010 12:15:01 -0500 Subject: [PATCH 13/30] PCI hotplug: shpchp: Removed check for hotplug of display devices Removed check to prevent hotplug of display devices within shpchp. Originally this was thought to have been required within the PCI Hotplug specification for some legacy devices. However there is no such requirement in the most recent revision. The check prevents hotplug of not only display devices but also computational GPUs which require serviceability. Signed-off-by: Praveen Kalamegham Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/shpchp_pci.c | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/drivers/pci/hotplug/shpchp_pci.c b/drivers/pci/hotplug/shpchp_pci.c index 8c3d3219f227..a2ccfcd3c298 100644 --- a/drivers/pci/hotplug/shpchp_pci.c +++ b/drivers/pci/hotplug/shpchp_pci.c @@ -60,12 +60,6 @@ int __ref shpchp_configure_device(struct slot *p_slot) dev = pci_get_slot(parent, PCI_DEVFN(p_slot->device, fn)); if (!dev) continue; - if ((dev->class >> 16) == PCI_BASE_CLASS_DISPLAY) { - ctrl_err(ctrl, "Cannot hot-add display device %s\n", - pci_name(dev)); - pci_dev_put(dev); - continue; - } if ((dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) || (dev->hdr_type == PCI_HEADER_TYPE_CARDBUS)) { /* Find an unused bus number for the new bridge */ @@ -114,17 +108,11 @@ int shpchp_unconfigure_device(struct slot *p_slot) ctrl_dbg(ctrl, "%s: domain:bus:dev = %04x:%02x:%02x\n", __func__, pci_domain_nr(parent), p_slot->bus, p_slot->device); - for (j=0; j<8 ; j++) { - struct pci_dev* temp = pci_get_slot(parent, + for (j = 0; j < 8 ; j++) { + struct pci_dev *temp = pci_get_slot(parent, (p_slot->device << 3) | j); if (!temp) continue; - if ((temp->class >> 16) == PCI_BASE_CLASS_DISPLAY) { - ctrl_err(ctrl, "Cannot remove display device %s\n", - pci_name(temp)); - pci_dev_put(temp); - continue; - } if (temp->hdr_type == PCI_HEADER_TYPE_BRIDGE) { pci_read_config_byte(temp, PCI_BRIDGE_CONTROL, &bctl); if (bctl & PCI_BRIDGE_CTL_VGA) { @@ -132,7 +120,8 @@ int shpchp_unconfigure_device(struct slot *p_slot) "Cannot remove display device %s\n", pci_name(temp)); pci_dev_put(temp); - continue; + rc = -EINVAL; + break; } } pci_remove_bus_device(temp); From 3f579c340fe6d6bdd8c6f9f144e7c3b85d4174ec Mon Sep 17 00:00:00 2001 From: Yinghai Lu Date: Fri, 21 May 2010 14:35:06 -0700 Subject: [PATCH 14/30] PCI hotplug: make sure child bridges are enabled at hotplug time Found one PCIe Module with several bridges built-in where a "cold" hotadd doesn't work. If we end up reassigning bridge windows at hotadd time, and have to loop through assigning new ranges, we won't end up enabling the child bridges because the first assignment pass already tried to enable them, which prevents __pci_bridge_assign_resource from updating the windows. So try to move enabling of child bridges to the end, and only do it once. Signed-off-by: Yinghai Lu Signed-off-by: Jesse Barnes --- drivers/pci/setup-bus.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/pci/setup-bus.c b/drivers/pci/setup-bus.c index 19b111383f62..66cb8f4cc5f4 100644 --- a/drivers/pci/setup-bus.c +++ b/drivers/pci/setup-bus.c @@ -874,19 +874,16 @@ void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge) again: pci_bus_size_bridges(parent); __pci_bridge_assign_resources(bridge, &head); - retval = pci_reenable_device(bridge); - pci_set_master(bridge); - pci_enable_bridges(parent); tried_times++; if (!head.next) - return; + goto enable_all; if (tried_times >= 2) { /* still fail, don't need to try more */ free_failed_list(&head); - return; + goto enable_all; } printk(KERN_DEBUG "PCI: No. %d try to assign unassigned res\n", @@ -919,5 +916,10 @@ again: free_failed_list(&head); goto again; + +enable_all: + retval = pci_reenable_device(bridge); + pci_set_master(bridge); + pci_enable_bridges(parent); } EXPORT_SYMBOL_GPL(pci_assign_unassigned_bridge_resources); From 852972acff8f10f3a15679be2059bb94916cba5d Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 22 Jun 2010 11:25:43 -0400 Subject: [PATCH 15/30] ACPI: Disable ASPM if the platform won't provide _OSC control for PCIe The PCI SIG documentation for the _OSC OS/firmware handshaking interface states: "If the _OSC control method is absent from the scope of a host bridge device, then the operating system must not enable or attempt to use any features defined in this section for the hierarchy originated by the host bridge." The obvious interpretation of this is that the OS should not attempt to use PCIe hotplug, PME or AER - however, the specification also notes that an _OSC method is *required* for PCIe hierarchies, and experimental validation with An Alternative OS indicates that it doesn't use any PCIe functionality if the _OSC method is missing. That arguably means we shouldn't be using MSI or extended config space, but right now our problems seem to be limited to vendors being surprised when ASPM gets enabled on machines when other OSs refuse to do so. So, for now, let's just disable ASPM if the _OSC method doesn't exist or refuses to hand over PCIe capability control. Acked-by: Rafael J. Wysocki Signed-off-by: Matthew Garrett Signed-off-by: Jesse Barnes --- drivers/acpi/pci_root.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/acpi/pci_root.c b/drivers/acpi/pci_root.c index 4eac59393edc..1f67057af2a5 100644 --- a/drivers/acpi/pci_root.c +++ b/drivers/acpi/pci_root.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -543,6 +544,14 @@ static int __devinit acpi_pci_root_add(struct acpi_device *device) if (flags != base_flags) acpi_pci_osc_support(root, flags); + status = acpi_pci_osc_control_set(root->device->handle, + OSC_PCI_EXPRESS_CAP_STRUCTURE_CONTROL); + + if (ACPI_FAILURE(status)) { + printk(KERN_INFO "Unable to assume PCIe control: Disabling ASPM\n"); + pcie_no_aspm(); + } + pci_acpi_add_bus_pm_notifier(device, root->bus); if (device->wakeup.flags.run_wake) device_set_run_wake(root->bus->bridge, true); From a3f5835a8e0bd25ed15becd65e01aa3d5ac8d771 Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Tue, 29 Jun 2010 14:15:28 +0400 Subject: [PATCH 16/30] PCI: pci-sysfs: remove casts from void* Remove unnesessary casts from void*. Signed-off-by: Kulikov Vasiliy Signed-off-by: Jesse Barnes --- drivers/pci/pci-sysfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index c9957f68ac9b..5935b854917f 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -734,7 +734,7 @@ pci_mmap_resource(struct kobject *kobj, struct bin_attribute *attr, { struct pci_dev *pdev = to_pci_dev(container_of(kobj, struct device, kobj)); - struct resource *res = (struct resource *)attr->private; + struct resource *res = attr->private; enum pci_mmap_state mmap_type; resource_size_t start, end; int i; From 8cc2bfd87fdd2f4a31f39c86f59df4b4be2c0adc Mon Sep 17 00:00:00 2001 From: Kenji Kaneshige Date: Wed, 23 Jun 2010 16:04:10 +0900 Subject: [PATCH 17/30] PCI: kernel oops on access to pci proc file while hot-removal I encountered the problem that /proc/bus/pci/XX/YY is not removed even after the corresponding device is hot-removed, if the file is still being opened. In addtion, accessing this file in this situation causes kernel panic (see below). Becasue the pci_proc_detach_device() doesn't call remove_proc_entry() if struct proc_dir_entry->count > 1, access to /proc/bus/pci/XX/YY would refer to struct pci_dev that was already freed. Though I don't know why the check for proc_dir_entry->count was added, I don't think it is needed. Removing this check fixes the problem. Steps to reproduce ------------------ # cd /sys/bus/pci/slots/2/ # PROC_BUS_PCI_FILE=/proc/bus/pci/`awk -F: '{print $2"/"$3}' < address`.0 # sleep 10000 < $PROC_BUS_PCI_FILE & # echo 0 > power # while true; do cat $PROC_BUS_PCI_FILE > /dev/null; done Oops Messages ------------- BUG: unable to handle kernel NULL pointer dereference at 00000042 IP: [] pci_user_read_config_dword+0x65/0xa0 *pdpt = 000000002185e001 *pde = 0000000476a79067 Oops: 0000 [#1] SMP last sysfs file: /sys/devices/pci0000:00/0000:00:1c.0/0000:10:00.0/local_cpus Modules linked in: autofs4 sunrpc cpufreq_ondemand acpi_cpufreq ipv6 dm_mirror dm_region_hash dm_log dm_mod e1000e i2c_i801 i2c_core iTCO_wdt igb sg pcspkr dca iTCO_vendor_support ext4 mbcache jbd2 sd_mod crc_t10dif lpfc mptsas scsi_transport_fc mptscsih mptbase scsi_tgt scsi_transport_sas [last unloaded: microcode] Pid: 2997, comm: cat Not tainted 2.6.34-kk #32 SB/PRIMEQUEST 1800E EIP: 0060:[] EFLAGS: 00010046 CPU: 19 EIP is at pci_user_read_config_dword+0x65/0xa0 EAX: 00000002 EBX: e44f1800 ECX: e144df14 EDX: 155668c7 ESI: 00000087 EDI: 00000000 EBP: e144df40 ESP: e144df0c DS: 007b ES: 007b FS: 00d8 GS: 00e0 SS: 0068 Process cat (pid: 2997, ti=e144c000 task=e26f2570 task.ti=e144c000) Stack: c09ceac0 c0570f72 ffffffff 08c57000 00000000 00001000 e44f1800 c05d2404 <0> e144df40 00001000 00000000 00001000 08c57000 3093ae50 e420cb40 e358d5c0 <0> c05d2300 fffffffb c054984f e144df9c 00008000 08c57000 e358d5c0 00008000 Call Trace: [] ? security_capable+0x22/0x30 [] ? proc_bus_pci_read+0x104/0x220 [] ? proc_bus_pci_read+0x0/0x220 [] ? proc_reg_read+0x5f/0x90 [] ? proc_reg_read+0x0/0x90 [] ? vfs_read+0x9d/0x190 [] ? audit_syscall_entry+0x204/0x230 [] ? sys_read+0x41/0x70 [] ? sysenter_do_call+0x12/0x28 Code: b4 26 00 00 00 00 b8 20 88 b1 c0 c7 44 24 08 ff ff ff ff e8 3e 52 22 00 f6 83 24 04 00 00 20 75 34 8b 43 08 8d 4c 24 08 8b 53 1c <8b> 70 40 89 4c 24 04 89 f9 c7 04 24 04 00 00 00 ff 16 89 c6 f0 EIP: [] pci_user_read_config_dword+0x65/0xa0 SS:ESP 0068:e144df0c CR2: 0000000000000042 Acked-by: Greg Kroah-Hartman Signed-off-by: Kenji Kaneshige Signed-off-by: Jesse Barnes --- drivers/pci/proc.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 449e890267a2..68316b59d7d3 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -431,8 +431,6 @@ int pci_proc_detach_device(struct pci_dev *dev) struct proc_dir_entry *e; if ((e = dev->procent)) { - if (atomic_read(&e->count) > 1) - return -EBUSY; remove_proc_entry(e->name, dev->bus->procdir); dev->procent = NULL; } From ea5f9fc5899660dd26c1ccf3fab183bd041140ee Mon Sep 17 00:00:00 2001 From: Matthew Garrett Date: Tue, 22 Jun 2010 17:03:03 -0400 Subject: [PATCH 18/30] PCI: Default PCIe ASPM control to on and require !EMBEDDED to disable The CONFIG_PCIEASPM option is confusing and potentially dangerous. ASPM is a hardware mediated feature rather than one under direct OS control, and even if the config option is disabled the system firmware may have turned on ASPM on various bits of hardware. This can cause problems later - various hardware that claims to support ASPM does a poor job of it and may hang or cause other difficulties. The kernel is able to recognise this in many cases and disable the ASPM functionality, but only if CONFIG_PCIEASPM is enabled. Given that in its default configuration this option will either leave the hardware as it was originally or disable hardware functionality that may cause problems, it should by default y. The only reason to disable it ought to be to reduce code size, so make it dependent on CONFIG_EMBEDDED. Signed-off-by: Matthew Garrett Cc: lrodriguez@atheros.com Cc: maximlevitsky@gmail.com Signed-off-by: Jesse Barnes --- drivers/pci/pcie/Kconfig | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/pci/pcie/Kconfig b/drivers/pci/pcie/Kconfig index b8b494b3e0d0..dda70981b7a6 100644 --- a/drivers/pci/pcie/Kconfig +++ b/drivers/pci/pcie/Kconfig @@ -31,14 +31,22 @@ source "drivers/pci/pcie/aer/Kconfig" # PCI Express ASPM # config PCIEASPM - bool "PCI Express ASPM support(Experimental)" - depends on PCI && EXPERIMENTAL && PCIEPORTBUS - default n + bool "PCI Express ASPM control" if EMBEDDED + depends on PCI && PCIEPORTBUS + default y help - This enables PCI Express ASPM (Active State Power Management) and - Clock Power Management. ASPM supports state L0/L0s/L1. + This enables OS control over PCI Express ASPM (Active State + Power Management) and Clock Power Management. ASPM supports + state L0/L0s/L1. - When in doubt, say N. + ASPM is initially set up the the firmware. With this option enabled, + Linux can modify this state in order to disable ASPM on known-bad + hardware or configurations and enable it when known-safe. + + ASPM can be disabled or enabled at runtime via + /sys/module/pcie_aspm/parameters/policy + + When in doubt, say Y. config PCIEASPM_DEBUG bool "Debug PCI Express ASPM" depends on PCIEASPM From fcd097f31a6ee207cc0c3da9cccd2a86d4334785 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Thu, 17 Jun 2010 20:16:36 +0100 Subject: [PATCH 19/30] PCI: MSI: Remove unsafe and unnecessary hardware access During suspend on an SMP system, {read,write}_msi_msg_desc() may be called to mask and unmask interrupts on a device that is already in a reduced power state. At this point memory-mapped registers including MSI-X tables are not accessible, and config space may not be fully functional either. While a device is in a reduced power state its interrupts are effectively masked and its MSI(-X) state will be restored when it is brought back to D0. Therefore these functions can simply read and write msi_desc::msg for devices not in D0. Further, read_msi_msg_desc() should only ever be used to update a previously written message, so it can always read msi_desc::msg and never needs to touch the hardware. Tested-by: "Michael Chan" Signed-off-by: Ben Hutchings Signed-off-by: Jesse Barnes --- drivers/pci/msi.c | 34 +++++++++++----------------------- 1 file changed, 11 insertions(+), 23 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 6a0f2f07f955..4c14f31f2b4d 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -196,30 +196,15 @@ void unmask_msi_irq(unsigned int irq) void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - if (entry->msi_attrib.is_msix) { - void __iomem *base = entry->mask_base + - entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; - msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); - msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); - msg->data = readl(base + PCI_MSIX_ENTRY_DATA); - } else { - struct pci_dev *dev = entry->dev; - int pos = entry->msi_attrib.pos; - u16 data; + /* We do not touch the hardware (which may not even be + * accessible at the moment) but return the last message + * written. Assert that this is valid, assuming that + * valid messages are not all-zeroes. */ + BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | + entry->msg.data)); - pci_read_config_dword(dev, msi_lower_address_reg(pos), - &msg->address_lo); - if (entry->msi_attrib.is_64) { - pci_read_config_dword(dev, msi_upper_address_reg(pos), - &msg->address_hi); - pci_read_config_word(dev, msi_data_reg(pos, 1), &data); - } else { - msg->address_hi = 0; - pci_read_config_word(dev, msi_data_reg(pos, 0), &data); - } - msg->data = data; - } + *msg = entry->msg; } void read_msi_msg(unsigned int irq, struct msi_msg *msg) @@ -232,7 +217,10 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg) void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - if (entry->msi_attrib.is_msix) { + + if (entry->dev->current_state != PCI_D0) { + /* Don't touch the hardware now */ + } else if (entry->msi_attrib.is_msix) { void __iomem *base; base = entry->mask_base + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; From 253d2e549818f5a4a52e2db0aba3dacee21e5b38 Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 16 Jul 2010 10:19:22 -0700 Subject: [PATCH 20/30] PCI: disable mmio during bar sizing It is a known issue that mmio decoding shall be disabled while doing PCI bar sizing. Host bridge and other devices (PCI PIC) shall be excluded for certain platforms. This patch mainly comes from Mathew Willcox's patch in http://kerneltrap.org/mailarchive/linux-kernel/2007/9/13/258969. A new flag bit "mmio_alway_on" is added to pci_dev with the intention that devices with their mmio decoding cannot be disabled during BAR sizing shall have this bit set, preferrablly in their quirks. Without this patch, Intel Moorestown platform graphics unit will be corrupted during bar sizing activities. Signed-off-by: Jacob Pan Signed-off-by: Jesse Barnes --- drivers/pci/probe.c | 10 ++++++++++ drivers/pci/quirks.c | 13 +++++++++++++ include/linux/pci.h | 2 ++ 3 files changed, 25 insertions(+) diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index f4adba2d1dd3..12625d90f8b5 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -163,9 +163,16 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, struct resource *res, unsigned int pos) { u32 l, sz, mask; + u16 orig_cmd; mask = type ? PCI_ROM_ADDRESS_MASK : ~0; + if (!dev->mmio_always_on) { + pci_read_config_word(dev, PCI_COMMAND, &orig_cmd); + pci_write_config_word(dev, PCI_COMMAND, + orig_cmd & ~(PCI_COMMAND_MEMORY | PCI_COMMAND_IO)); + } + res->name = pci_name(dev); pci_read_config_dword(dev, pos, &l); @@ -173,6 +180,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type, pci_read_config_dword(dev, pos, &sz); pci_write_config_dword(dev, pos, l); + if (!dev->mmio_always_on) + pci_write_config_word(dev, PCI_COMMAND, orig_cmd); + /* * All bits set in sz means the device isn't working properly. * If the BAR isn't implemented, all bits must be 0. If it's a diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 3a81d9d44019..202efa6f57c4 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -91,6 +91,19 @@ static void __devinit quirk_resource_alignment(struct pci_dev *dev) } DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, quirk_resource_alignment); +/* + * Decoding should be disabled for a PCI device during BAR sizing to avoid + * conflict. But doing so may cause problems on host bridge and perhaps other + * key system devices. For devices that need to have mmio decoding always-on, + * we need to set the dev->mmio_always_on bit. + */ +static void __devinit quirk_mmio_always_on(struct pci_dev *dev) +{ + if ((dev->class >> 8) == PCI_CLASS_BRIDGE_HOST) + dev->mmio_always_on = 1; +} +DECLARE_PCI_FIXUP_EARLY(PCI_ANY_ID, PCI_ANY_ID, quirk_mmio_always_on); + /* The Mellanox Tavor device gives false positive parity errors * Mark this device with a broken_parity_status, to allow * PCI scanning code to "skip" this now blacklisted device. diff --git a/include/linux/pci.h b/include/linux/pci.h index f26fda76b87f..b1d17956a153 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -270,6 +270,8 @@ struct pci_dev { unsigned int d1_support:1; /* Low power state D1 is supported */ unsigned int d2_support:1; /* Low power state D2 is supported */ unsigned int no_d1d2:1; /* Only allow D0 and D3 */ + unsigned int mmio_always_on:1; /* disallow turning off io/mem + decoding during bar sizing */ unsigned int wakeup_prepared:1; unsigned int d3_delay; /* D3->D0 transition time in ms */ From bfb51cd01661136bae1dd00c32d504cff6a9f924 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 12 Jul 2010 15:59:11 +0900 Subject: [PATCH 21/30] PCI: remove unused HAVE_ARCH_PCI_SET_DMA_MAX_SEGMENT_{SIZE|BOUNDARY} In 2.6.34, we transformed the PCI DMA API into the generic device mode. The PCI DMA API is just the wrapper of the DMA API. So we don't need HAVE_ARCH_PCI_SET_DMA_MAX_SEGMENT_SIZE or HAVE_ARCH_PCI_SET_DMA_SEGMENT_BOUNDARY (which enable architectures to have the own implementations). Both haven't been used anyway. Signed-off-by: FUJITA Tomonori Signed-off-by: Jesse Barnes --- drivers/pci/pci.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 740fb4ea9669..9afad0faa9b6 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -2294,21 +2294,17 @@ void pci_msi_off(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_msi_off); -#ifndef HAVE_ARCH_PCI_SET_DMA_MAX_SEGMENT_SIZE int pci_set_dma_max_seg_size(struct pci_dev *dev, unsigned int size) { return dma_set_max_seg_size(&dev->dev, size); } EXPORT_SYMBOL(pci_set_dma_max_seg_size); -#endif -#ifndef HAVE_ARCH_PCI_SET_DMA_SEGMENT_BOUNDARY int pci_set_dma_seg_boundary(struct pci_dev *dev, unsigned long mask) { return dma_set_seg_boundary(&dev->dev, mask); } EXPORT_SYMBOL(pci_set_dma_seg_boundary); -#endif static int pcie_flr(struct pci_dev *dev, int probe) { From 2491762cfb475dbdfa3db11ebea6de49f58b7fac Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 23 Jul 2010 12:53:27 -0600 Subject: [PATCH 22/30] x86/PCI: use host bridge _CRS info on ASRock ALiveSATA2-GLAN This DMI quirk turns on "pci=use_crs" for the ALiveSATA2-GLAN because amd_bus.c doesn't handle this system correctly. The system has a single HyperTransport I/O chain, but has two PCI host bridges to buses 00 and 80. amd_bus.c learns the MMIO range associated with buses 00-ff and that this range is routed to the HT chain hosted at node 0, link 0: bus: [00, ff] on node 0 link 0 bus: 00 index 1 [mem 0x80000000-0xfcffffffff] This includes the address space for both bus 00 and bus 80, and amd_bus.c assumes it's all routed to bus 00. We find device 80:01.0, which BIOS left in the middle of that space, but we don't find a bridge from bus 00 to bus 80, so we conclude that 80:01.0 is unreachable from bus 00, and we move it from the original, working, address to something outside the bus 00 aperture, which does not work: pci 0000:80:01.0: reg 10: [mem 0xfebfc000-0xfebfffff 64bit] pci 0000:80:01.0: BAR 0: assigned [mem 0xfd00000000-0xfd00003fff 64bit] The BIOS told us everything we need to know to handle this correctly, so we're better off if we just pay attention, which lets us leave the 80:01.0 device at the original, working, address: ACPI: PCI Root Bridge [PCI0] (domain 0000 [bus 00-7f]) pci_root PNP0A03:00: host bridge window [mem 0x80000000-0xff37ffff] ACPI: PCI Root Bridge [PCI1] (domain 0000 [bus 80-ff]) pci_root PNP0A08:00: host bridge window [mem 0xfebfc000-0xfebfffff] This was a regression between 2.6.33 and 2.6.34. In 2.6.33, amd_bus.c was used only when we found multiple HT chains. 3e3da00c01d050, which enabled amd_bus.c even on systems with a single HT chain, caused this failure. This quirk was written by Graham. If we ever enable "pci=use_crs" for machines from 2006 or earlir, this quirk should be removed. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=16007 Cc: stable@kernel.org Reported-by: Graham Ramsey Signed-off-by: Bjorn Helgaas Signed-off-by: Jesse Barnes --- arch/x86/pci/acpi.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c index 2ec04c424a62..15466c096ba5 100644 --- a/arch/x86/pci/acpi.c +++ b/arch/x86/pci/acpi.c @@ -34,6 +34,15 @@ static const struct dmi_system_id pci_use_crs_table[] __initconst = { DMI_MATCH(DMI_PRODUCT_NAME, "x3800"), }, }, + /* https://bugzilla.kernel.org/show_bug.cgi?id=16007 */ + /* 2006 AMD HT/VIA system with two host bridges */ + { + .callback = set_use_crs, + .ident = "ASRock ALiveSATA2-GLAN", + .matches = { + DMI_MATCH(DMI_PRODUCT_NAME, "ALiveSATA2-GLAN"), + }, + }, {} }; From 8633328be242677fdedc42052838dd0608e7f342 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Mon, 19 Jul 2010 09:45:34 -0600 Subject: [PATCH 23/30] PCI: Allow read/write access to sysfs I/O port resources PCI sysfs resource files currently only allow mmap'ing. On x86 this works fine for memory backed BARs, but doesn't work at all for I/O port backed BARs. Add read/write to I/O port PCI sysfs resource files to allow userspace access to these device regions. Acked-by: Chris Wright Signed-off-by: Alex Williamson Signed-off-by: Jesse Barnes --- Documentation/filesystems/sysfs-pci.txt | 7 ++- drivers/pci/pci-sysfs.c | 68 +++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) diff --git a/Documentation/filesystems/sysfs-pci.txt b/Documentation/filesystems/sysfs-pci.txt index 85354b32d731..74eaac26f8b8 100644 --- a/Documentation/filesystems/sysfs-pci.txt +++ b/Documentation/filesystems/sysfs-pci.txt @@ -39,7 +39,7 @@ files, each with their own function. local_cpus nearby CPU mask (cpumask, ro) remove remove device from kernel's list (ascii, wo) resource PCI resource host addresses (ascii, ro) - resource0..N PCI resource N, if present (binary, mmap) + resource0..N PCI resource N, if present (binary, mmap, rw[1]) resource0_wc..N_wc PCI WC map resource N, if prefetchable (binary, mmap) rom PCI ROM resource, if present (binary, ro) subsystem_device PCI subsystem device (ascii, ro) @@ -54,13 +54,16 @@ files, each with their own function. binary - file contains binary data cpumask - file contains a cpumask type +[1] rw for RESOURCE_IO (I/O port) regions only + The read only files are informational, writes to them will be ignored, with the exception of the 'rom' file. Writable files can be used to perform actions on the device (e.g. changing config space, detaching a device). mmapable files are available via an mmap of the file at offset 0 and can be used to do actual device programming from userspace. Note that some platforms don't support mmapping of certain resources, so be sure to check the return -value from any attempted mmap. +value from any attempted mmap. The most notable of these are I/O port +resources, which also provide read/write access. The 'enable' file provides a counter that indicates how many times the device has been enabled. If the 'enable' file currently returns '4', and a '1' is diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index 5935b854917f..f7692dc531e4 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -778,6 +778,70 @@ pci_mmap_resource_wc(struct file *filp, struct kobject *kobj, return pci_mmap_resource(kobj, attr, vma, 1); } +static ssize_t +pci_resource_io(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count, bool write) +{ + struct pci_dev *pdev = to_pci_dev(container_of(kobj, + struct device, kobj)); + struct resource *res = attr->private; + unsigned long port = off; + int i; + + for (i = 0; i < PCI_ROM_RESOURCE; i++) + if (res == &pdev->resource[i]) + break; + if (i >= PCI_ROM_RESOURCE) + return -ENODEV; + + port += pci_resource_start(pdev, i); + + if (port > pci_resource_end(pdev, i)) + return 0; + + if (port + count - 1 > pci_resource_end(pdev, i)) + return -EINVAL; + + switch (count) { + case 1: + if (write) + outb(*(u8 *)buf, port); + else + *(u8 *)buf = inb(port); + return 1; + case 2: + if (write) + outw(*(u16 *)buf, port); + else + *(u16 *)buf = inw(port); + return 2; + case 4: + if (write) + outl(*(u32 *)buf, port); + else + *(u32 *)buf = inl(port); + return 4; + } + return -EINVAL; +} + +static ssize_t +pci_read_resource_io(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + return pci_resource_io(filp, kobj, attr, buf, off, count, false); +} + +static ssize_t +pci_write_resource_io(struct file *filp, struct kobject *kobj, + struct bin_attribute *attr, char *buf, + loff_t off, size_t count) +{ + return pci_resource_io(filp, kobj, attr, buf, off, count, true); +} + /** * pci_remove_resource_files - cleanup resource files * @pdev: dev to cleanup @@ -828,6 +892,10 @@ static int pci_create_attr(struct pci_dev *pdev, int num, int write_combine) sprintf(res_attr_name, "resource%d", num); res_attr->mmap = pci_mmap_resource_uc; } + if (pci_resource_flags(pdev, num) & IORESOURCE_IO) { + res_attr->read = pci_read_resource_io; + res_attr->write = pci_write_resource_io; + } res_attr->attr.name = res_attr_name; res_attr->attr.mode = S_IRUSR | S_IWUSR; res_attr->size = pci_resource_len(pdev, num); From 911e1c9b05a8e3559a7aa89083930700a0b9e7ee Mon Sep 17 00:00:00 2001 From: Narendra K Date: Mon, 26 Jul 2010 05:56:50 -0500 Subject: [PATCH 24/30] PCI: export SMBIOS provided firmware instance and label to sysfs This patch exports SMBIOS provided firmware instance and label of onboard PCI devices to sysfs. New files are: /sys/bus/pci/devices/.../label which contains the firmware name for the device in question, and /sys/bus/pci/devices/.../index which contains the firmware device type instance for the given device. Signed-off-by: Jordan Hargrave Signed-off-by: Narendra K Signed-off-by: Jesse Barnes --- Documentation/ABI/testing/sysfs-bus-pci | 27 +++++ drivers/firmware/dmi_scan.c | 25 +++++ drivers/pci/Makefile | 3 + drivers/pci/pci-label.c | 143 ++++++++++++++++++++++++ drivers/pci/pci-sysfs.c | 5 + drivers/pci/pci.h | 9 ++ include/linux/dmi.h | 9 ++ 7 files changed, 221 insertions(+) create mode 100644 drivers/pci/pci-label.c diff --git a/Documentation/ABI/testing/sysfs-bus-pci b/Documentation/ABI/testing/sysfs-bus-pci index 25be3250f7d6..f979d825d112 100644 --- a/Documentation/ABI/testing/sysfs-bus-pci +++ b/Documentation/ABI/testing/sysfs-bus-pci @@ -139,3 +139,30 @@ Contact: linux-pci@vger.kernel.org Description: This symbolic link points to the PCI hotplug controller driver module that manages the hotplug slot. + +What: /sys/bus/pci/devices/.../label +Date: July 2010 +Contact: Narendra K , linux-bugs@dell.com +Description: + Reading this attribute will provide the firmware + given name(SMBIOS type 41 string) of the PCI device. + The attribute will be created only if the firmware + has given a name to the PCI device. +Users: + Userspace applications interested in knowing the + firmware assigned name of the PCI device. + +What: /sys/bus/pci/devices/.../index +Date: July 2010 +Contact: Narendra K , linux-bugs@dell.com +Description: + Reading this attribute will provide the firmware + given instance(SMBIOS type 41 device type instance) + of the PCI device. The attribute will be created + only if the firmware has given a device type instance + to the PCI device. +Users: + Userspace applications interested in knowing the + firmware assigned device type instance of the PCI + device that can help in understanding the firmware + intended order of the PCI device. diff --git a/drivers/firmware/dmi_scan.c b/drivers/firmware/dmi_scan.c index d46467271349..b3d22d659990 100644 --- a/drivers/firmware/dmi_scan.c +++ b/drivers/firmware/dmi_scan.c @@ -277,6 +277,29 @@ static void __init dmi_save_ipmi_device(const struct dmi_header *dm) list_add_tail(&dev->list, &dmi_devices); } +static void __init dmi_save_dev_onboard(int instance, int segment, int bus, + int devfn, const char *name) +{ + struct dmi_dev_onboard *onboard_dev; + + onboard_dev = dmi_alloc(sizeof(*onboard_dev) + strlen(name) + 1); + if (!onboard_dev) { + printk(KERN_ERR "dmi_save_dev_onboard: out of memory.\n"); + return; + } + onboard_dev->instance = instance; + onboard_dev->segment = segment; + onboard_dev->bus = bus; + onboard_dev->devfn = devfn; + + strcpy((char *)&onboard_dev[1], name); + onboard_dev->dev.type = DMI_DEV_TYPE_DEV_ONBOARD; + onboard_dev->dev.name = (char *)&onboard_dev[1]; + onboard_dev->dev.device_data = onboard_dev; + + list_add(&onboard_dev->dev.list, &dmi_devices); +} + static void __init dmi_save_extended_devices(const struct dmi_header *dm) { const u8 *d = (u8*) dm + 5; @@ -285,6 +308,8 @@ static void __init dmi_save_extended_devices(const struct dmi_header *dm) if ((*d & 0x80) == 0) return; + dmi_save_dev_onboard(*(d+1), *(u16 *)(d+2), *(d+4), *(d+5), + dmi_string_nosave(dm, *(d-1))); dmi_save_one_device(*d & 0x7f, dmi_string_nosave(dm, *(d - 1))); } diff --git a/drivers/pci/Makefile b/drivers/pci/Makefile index 0b51857fbaf7..dc1aa0922868 100644 --- a/drivers/pci/Makefile +++ b/drivers/pci/Makefile @@ -55,6 +55,9 @@ obj-$(CONFIG_MICROBLAZE) += setup-bus.o # obj-$(CONFIG_ACPI) += pci-acpi.o +# SMBIOS provided firmware instance and labels +obj-$(CONFIG_DMI) += pci-label.o + # Cardbus & CompactPCI use setup-bus obj-$(CONFIG_HOTPLUG) += setup-bus.o diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c new file mode 100644 index 000000000000..111500e86f94 --- /dev/null +++ b/drivers/pci/pci-label.c @@ -0,0 +1,143 @@ +/* + * Purpose: Export the firmware instance and label associated with + * a pci device to sysfs + * Copyright (C) 2010 Dell Inc. + * by Narendra K , + * Jordan Hargrave + * + * SMBIOS defines type 41 for onboard pci devices. This code retrieves + * the instance number and string from the type 41 record and exports + * it to sysfs. + * + * Please see http://linux.dell.com/wiki/index.php/Oss/libnetdevname for more + * information. + */ + +#include +#include +#include +#include +#include +#include +#include "pci.h" + +enum smbios_attr_enum { + SMBIOS_ATTR_NONE = 0, + SMBIOS_ATTR_LABEL_SHOW, + SMBIOS_ATTR_INSTANCE_SHOW, +}; + +static mode_t +find_smbios_instance_string(struct pci_dev *pdev, char *buf, + enum smbios_attr_enum attribute) +{ + const struct dmi_device *dmi; + struct dmi_dev_onboard *donboard; + int bus; + int devfn; + + bus = pdev->bus->number; + devfn = pdev->devfn; + + dmi = NULL; + while ((dmi = dmi_find_device(DMI_DEV_TYPE_DEV_ONBOARD, + NULL, dmi)) != NULL) { + donboard = dmi->device_data; + if (donboard && donboard->bus == bus && + donboard->devfn == devfn) { + if (buf) { + if (attribute == SMBIOS_ATTR_INSTANCE_SHOW) + return scnprintf(buf, PAGE_SIZE, + "%d\n", + donboard->instance); + else if (attribute == SMBIOS_ATTR_LABEL_SHOW) + return scnprintf(buf, PAGE_SIZE, + "%s\n", + dmi->name); + } + return strlen(dmi->name); + } + } + return 0; +} + +static mode_t +smbios_instance_string_exist(struct kobject *kobj, struct attribute *attr, + int n) +{ + struct device *dev; + struct pci_dev *pdev; + + dev = container_of(kobj, struct device, kobj); + pdev = to_pci_dev(dev); + + return find_smbios_instance_string(pdev, NULL, SMBIOS_ATTR_NONE) ? + S_IRUGO : 0; +} + +static ssize_t +smbioslabel_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev; + pdev = to_pci_dev(dev); + + return find_smbios_instance_string(pdev, buf, + SMBIOS_ATTR_LABEL_SHOW); +} + +static ssize_t +smbiosinstance_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct pci_dev *pdev; + pdev = to_pci_dev(dev); + + return find_smbios_instance_string(pdev, buf, + SMBIOS_ATTR_INSTANCE_SHOW); +} + +static struct device_attribute smbios_attr_label = { + .attr = {.name = "label", .mode = 0444, .owner = THIS_MODULE}, + .show = smbioslabel_show, +}; + +static struct device_attribute smbios_attr_instance = { + .attr = {.name = "index", .mode = 0444, .owner = THIS_MODULE}, + .show = smbiosinstance_show, +}; + +static struct attribute *smbios_attributes[] = { + &smbios_attr_label.attr, + &smbios_attr_instance.attr, + NULL, +}; + +static struct attribute_group smbios_attr_group = { + .attrs = smbios_attributes, + .is_visible = smbios_instance_string_exist, +}; + +static int +pci_create_smbiosname_file(struct pci_dev *pdev) +{ + if (!sysfs_create_group(&pdev->dev.kobj, &smbios_attr_group)) + return 0; + return -ENODEV; +} + +static void +pci_remove_smbiosname_file(struct pci_dev *pdev) +{ + sysfs_remove_group(&pdev->dev.kobj, &smbios_attr_group); +} + +void pci_create_firmware_label_files(struct pci_dev *pdev) +{ + if (!pci_create_smbiosname_file(pdev)) + ; +} + +void pci_remove_firmware_label_files(struct pci_dev *pdev) +{ + pci_remove_smbiosname_file(pdev); +} diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c index f7692dc531e4..b5a7d9bfcb24 100644 --- a/drivers/pci/pci-sysfs.c +++ b/drivers/pci/pci-sysfs.c @@ -1165,6 +1165,8 @@ int __must_check pci_create_sysfs_dev_files (struct pci_dev *pdev) if (retval) goto err_vga_file; + pci_create_firmware_label_files(pdev); + return 0; err_vga_file: @@ -1232,6 +1234,9 @@ void pci_remove_sysfs_dev_files(struct pci_dev *pdev) sysfs_remove_bin_file(&pdev->dev.kobj, pdev->rom_attr); kfree(pdev->rom_attr); } + + pci_remove_firmware_label_files(pdev); + } static int __init pci_sysfs_init(void) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index f8077b3c8c8c..d930338e0922 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -11,6 +11,15 @@ extern int pci_uevent(struct device *dev, struct kobj_uevent_env *env); extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); +#ifndef CONFIG_DMI +static inline void pci_create_firmware_label_files(struct pci_dev *pdev) +{ return 0; } +static inline void pci_remove_firmware_label_files(struct pci_dev *pdev) +{ return 0; } +#else +extern void pci_create_firmware_label_files(struct pci_dev *pdev); +extern void pci_remove_firmware_label_files(struct pci_dev *pdev); +#endif extern void pci_cleanup_rom(struct pci_dev *dev); #ifdef HAVE_PCI_MMAP extern int pci_mmap_fits(struct pci_dev *pdev, int resno, diff --git a/include/linux/dmi.h b/include/linux/dmi.h index a8a3e1ac281d..90e087f8d951 100644 --- a/include/linux/dmi.h +++ b/include/linux/dmi.h @@ -20,6 +20,7 @@ enum dmi_device_type { DMI_DEV_TYPE_SAS, DMI_DEV_TYPE_IPMI = -1, DMI_DEV_TYPE_OEM_STRING = -2, + DMI_DEV_TYPE_DEV_ONBOARD = -3, }; struct dmi_header { @@ -37,6 +38,14 @@ struct dmi_device { #ifdef CONFIG_DMI +struct dmi_dev_onboard { + struct dmi_device dev; + int instance; + int segment; + int bus; + int devfn; +}; + extern int dmi_check_system(const struct dmi_system_id *list); const struct dmi_system_id *dmi_first_match(const struct dmi_system_id *list); extern const char * dmi_get_system_info(int field); From 30da55242818a8ca08583188ebcbaccd283ad4d9 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Fri, 23 Jul 2010 14:56:28 +0100 Subject: [PATCH 25/30] PCI: MSI: Restore read_msi_msg_desc(); add get_cached_msi_msg_desc() commit 2ca1af9aa3285c6a5f103ed31ad09f7399fc65d7 "PCI: MSI: Remove unsafe and unnecessary hardware access" changed read_msi_msg_desc() to return the last MSI message written instead of reading it from the device, since it may be called while the device is in a reduced power state. However, the pSeries platform code really does need to read messages from the device, since they are initially written by firmware. Therefore: - Restore the previous behaviour of read_msi_msg_desc() - Add new functions get_cached_msi_msg{,_desc}() which return the last MSI message written - Use the new functions where appropriate Acked-by: Michael Ellerman Signed-off-by: Ben Hutchings Signed-off-by: Jesse Barnes --- arch/ia64/kernel/msi_ia64.c | 2 +- arch/ia64/sn/kernel/msi_sn.c | 2 +- arch/x86/kernel/apic/io_apic.c | 2 +- drivers/pci/msi.c | 51 +++++++++++++++++++++++++++++----- include/linux/msi.h | 2 ++ 5 files changed, 49 insertions(+), 10 deletions(-) diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c index 6c8922856049..4a746ea838ff 100644 --- a/arch/ia64/kernel/msi_ia64.c +++ b/arch/ia64/kernel/msi_ia64.c @@ -25,7 +25,7 @@ static int ia64_set_msi_irq_affinity(unsigned int irq, if (irq_prepare_move(irq, cpu)) return -1; - read_msi_msg(irq, &msg); + get_cached_msi_msg(irq, &msg); addr = msg.address_lo; addr &= MSI_ADDR_DEST_ID_MASK; diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c index ebfdd6a9ae1a..0c72dd463831 100644 --- a/arch/ia64/sn/kernel/msi_sn.c +++ b/arch/ia64/sn/kernel/msi_sn.c @@ -175,7 +175,7 @@ static int sn_set_msi_irq_affinity(unsigned int irq, * Release XIO resources for the old MSI PCI address */ - read_msi_msg(irq, &msg); + get_cached_msi_msg(irq, &msg); sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo; pdev = sn_pdev->pdi_linux_pcidev; provider = SN_PCIDEV_BUSPROVIDER(pdev); diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index e41ed24ab26d..4dc0084ec1b1 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -3397,7 +3397,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask) cfg = desc->chip_data; - read_msi_msg_desc(desc, &msg); + get_cached_msi_msg_desc(desc, &msg); msg.data &= ~MSI_DATA_VECTOR_MASK; msg.data |= MSI_DATA_VECTOR(cfg->vector); diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 4c14f31f2b4d..69b7be33b3a2 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -197,14 +197,32 @@ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); - /* We do not touch the hardware (which may not even be - * accessible at the moment) but return the last message - * written. Assert that this is valid, assuming that - * valid messages are not all-zeroes. */ - BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | - entry->msg.data)); + BUG_ON(entry->dev->current_state != PCI_D0); - *msg = entry->msg; + if (entry->msi_attrib.is_msix) { + void __iomem *base = entry->mask_base + + entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE; + + msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR); + msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR); + msg->data = readl(base + PCI_MSIX_ENTRY_DATA); + } else { + struct pci_dev *dev = entry->dev; + int pos = entry->msi_attrib.pos; + u16 data; + + pci_read_config_dword(dev, msi_lower_address_reg(pos), + &msg->address_lo); + if (entry->msi_attrib.is_64) { + pci_read_config_dword(dev, msi_upper_address_reg(pos), + &msg->address_hi); + pci_read_config_word(dev, msi_data_reg(pos, 1), &data); + } else { + msg->address_hi = 0; + pci_read_config_word(dev, msi_data_reg(pos, 0), &data); + } + msg->data = data; + } } void read_msi_msg(unsigned int irq, struct msi_msg *msg) @@ -214,6 +232,25 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg) read_msi_msg_desc(desc, msg); } +void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) +{ + struct msi_desc *entry = get_irq_desc_msi(desc); + + /* Assert that the cache is valid, assuming that + * valid messages are not all-zeroes. */ + BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo | + entry->msg.data)); + + *msg = entry->msg; +} + +void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg) +{ + struct irq_desc *desc = irq_to_desc(irq); + + get_cached_msi_msg_desc(desc, msg); +} + void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg) { struct msi_desc *entry = get_irq_desc_msi(desc); diff --git a/include/linux/msi.h b/include/linux/msi.h index 6991ab5b24d1..91b05c171854 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -14,8 +14,10 @@ struct irq_desc; extern void mask_msi_irq(unsigned int irq); extern void unmask_msi_irq(unsigned int irq); extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); +extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg); extern void read_msi_msg(unsigned int irq, struct msi_msg *msg); +extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg); extern void write_msi_msg(unsigned int irq, struct msi_msg *msg); struct msi_desc { From 4e344b1cc53989e8ecc1140e9346f657d7c8aa9e Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Sat, 3 Jul 2010 20:04:39 +0400 Subject: [PATCH 26/30] PCI: use for_each_pci_dev() Use for_each_pci_dev() to simplify the code. Signed-off-by: Kulikov Vasiliy Signed-off-by: Jesse Barnes --- drivers/pci/hotplug/fakephp.c | 2 +- drivers/pci/proc.c | 4 ++-- drivers/pci/quirks.c | 2 +- drivers/pci/search.c | 2 +- drivers/pci/setup-irq.c | 3 +-- 5 files changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/pci/hotplug/fakephp.c b/drivers/pci/hotplug/fakephp.c index 5317e4d7d96e..17d10e2e8fb6 100644 --- a/drivers/pci/hotplug/fakephp.c +++ b/drivers/pci/hotplug/fakephp.c @@ -135,7 +135,7 @@ static int __init init_legacy(void) struct pci_dev *pdev = NULL; /* Add existing devices */ - while ((pdev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) + for_each_pci_dev(pdev) legacy_add_slot(pdev); /* Be alerted of any new ones */ diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 68316b59d7d3..01f0306525a5 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -483,9 +483,9 @@ static int __init pci_proc_init(void) proc_create("devices", 0, proc_bus_pci_dir, &proc_bus_pci_dev_operations); proc_initialized = 1; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) pci_proc_attach_device(dev); - } + return 0; } diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 202efa6f57c4..8141f442e3df 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2773,7 +2773,7 @@ static int __init pci_apply_final_quirks(void) printk(KERN_DEBUG "PCI: CLS %u bytes\n", pci_cache_line_size << 2); - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { pci_fixup_device(pci_fixup_final, dev); /* * If arch hasn't set it explicitly yet, use the CLS diff --git a/drivers/pci/search.c b/drivers/pci/search.c index 20d03f772289..9d75dc8ca602 100644 --- a/drivers/pci/search.c +++ b/drivers/pci/search.c @@ -169,7 +169,7 @@ struct pci_dev *pci_get_domain_bus_and_slot(int domain, unsigned int bus, { struct pci_dev *dev = NULL; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { if (pci_domain_nr(dev->bus) == domain && (dev->bus->number == bus && dev->devfn == devfn)) return dev; diff --git a/drivers/pci/setup-irq.c b/drivers/pci/setup-irq.c index aa795fd428de..eec9738f3492 100644 --- a/drivers/pci/setup-irq.c +++ b/drivers/pci/setup-irq.c @@ -59,7 +59,6 @@ pci_fixup_irqs(u8 (*swizzle)(struct pci_dev *, u8 *), int (*map_irq)(struct pci_dev *, u8, u8)) { struct pci_dev *dev = NULL; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) pdev_fixup_irq(dev, swizzle, map_irq); - } } From 1f7979ac53224b0208e7d3eaeb5fd72ab9687389 Mon Sep 17 00:00:00 2001 From: Kulikov Vasiliy Date: Sat, 3 Jul 2010 20:04:03 +0400 Subject: [PATCH 27/30] x86/PCI: use for_each_pci_dev() Use for_each_pci_dev() to simplify the code. Signed-off-by: Kulikov Vasiliy Signed-off-by: Jesse Barnes --- arch/x86/pci/irq.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c index 9810a0f76c91..f547ee05f715 100644 --- a/arch/x86/pci/irq.c +++ b/arch/x86/pci/irq.c @@ -989,7 +989,7 @@ static int pcibios_lookup_irq(struct pci_dev *dev, int assign) dev_info(&dev->dev, "%s PCI INT %c -> IRQ %d\n", msg, 'A' + pin - 1, irq); /* Update IRQ for all devices with the same pirq value */ - while ((dev2 = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev2)) != NULL) { + for_each_pci_dev(dev2) { pci_read_config_byte(dev2, PCI_INTERRUPT_PIN, &pin); if (!pin) continue; @@ -1028,7 +1028,7 @@ void __init pcibios_fixup_irqs(void) u8 pin; DBG(KERN_DEBUG "PCI: IRQ fixup\n"); - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { /* * If the BIOS has set an out of range IRQ number, just * ignore it. Also keep track of which IRQ's are @@ -1052,7 +1052,7 @@ void __init pcibios_fixup_irqs(void) return; dev = NULL; - while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + for_each_pci_dev(dev) { pci_read_config_byte(dev, PCI_INTERRUPT_PIN, &pin); if (!pin) continue; From 3d2a531804d16cd8df6dbbb0429c6f143e756049 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 23 Jul 2010 22:19:55 +0200 Subject: [PATCH 28/30] PCI: Do not run NVidia quirks related to MSI with MSI disabled There is no reason to run NVidia-specific quirks related to HT MSI mappings with MSI disabled via pci=nomsi, so make __nv_msi_ht_cap_quirk() return immediately in that case. This allows at least one machine to boot 100% of the time with pci=nomsi (it still doesn't boot reliably without that). Addresses https://bugzilla.kernel.org/show_bug.cgi?id=16443 . Cc: stable@kernel.org Signed-off-by: Rafael J. Wysocki Signed-off-by: Jesse Barnes --- drivers/pci/quirks.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 8141f442e3df..a1682f19bcb0 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2421,6 +2421,9 @@ static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all) int pos; int found; + if (!pci_msi_enabled()) + return; + /* check if there is HT MSI cap or enabled on this device */ found = ht_check_msi_mapping(dev); From b879743f26cb029e41ffe865fb939cfc6fa9be88 Mon Sep 17 00:00:00 2001 From: Narendra K Date: Mon, 2 Aug 2010 07:44:29 -0500 Subject: [PATCH 29/30] PCI: Fix warnings when CONFIG_DMI unset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch fixes the below warnings introduced by the commit 911e1c9b05a8e3559a7aa89083930700a0b9e7ee ("PCI: export SMBIOS provided firmware instance and label to sysfs"). drivers/pci/pci.h: In function ‘pci_create_firmware_label_files’: drivers/pci/pci.h:16: warning: ‘return’ with a value, in function returning void drivers/pci/pci.h: In function ‘pci_remove_firmware_label_files’: drivers/pci/pci.h:18: warning: ‘return’ with a value, in function returning void The warnings are seen because of the below code, doing a retun 0 from the functions 'pci_create_firmware_label_files' and 'pci_remove_firmware_label_files' defined as void. +#ifndef CONFIG_DMI +static inline void pci_create_firmware_label_files(struct pci_dev *pdev) +{ return 0; } +static inline void pci_remove_firmware_label_files(struct pci_dev *pdev) +{ return 0; } Signed-off-by: Narendra K Signed-off-by: Jesse Barnes --- drivers/pci/pci.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index d930338e0922..95186caa1492 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -13,9 +13,9 @@ extern int pci_create_sysfs_dev_files(struct pci_dev *pdev); extern void pci_remove_sysfs_dev_files(struct pci_dev *pdev); #ifndef CONFIG_DMI static inline void pci_create_firmware_label_files(struct pci_dev *pdev) -{ return 0; } +{ return; } static inline void pci_remove_firmware_label_files(struct pci_dev *pdev) -{ return 0; } +{ return; } #else extern void pci_create_firmware_label_files(struct pci_dev *pdev); extern void pci_remove_firmware_label_files(struct pci_dev *pdev); From 763e9db9994e27a7d2cb3701c8a097a867d0e0b4 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 4 Aug 2010 14:25:31 +1000 Subject: [PATCH 30/30] PCI: update for owner removal from struct device_attribute Fixes the build. Acked-by: Guenter Roeck Signed-off-by: Stephen Rothwell Signed-off-by: Jesse Barnes --- drivers/pci/pci-label.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci-label.c b/drivers/pci/pci-label.c index 111500e86f94..90c0a729cd3a 100644 --- a/drivers/pci/pci-label.c +++ b/drivers/pci/pci-label.c @@ -97,12 +97,12 @@ smbiosinstance_show(struct device *dev, } static struct device_attribute smbios_attr_label = { - .attr = {.name = "label", .mode = 0444, .owner = THIS_MODULE}, + .attr = {.name = "label", .mode = 0444}, .show = smbioslabel_show, }; static struct device_attribute smbios_attr_instance = { - .attr = {.name = "index", .mode = 0444, .owner = THIS_MODULE}, + .attr = {.name = "index", .mode = 0444}, .show = smbiosinstance_show, };