From 4132a577a0a7e75b938d2ae49c7a16b358f60661 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 18 Sep 2016 05:39:20 +0200 Subject: [PATCH 1/4] PCI: Afford direct-complete to devices with non-standard PM There are devices not power-manageable by the platform, but still able to runtime suspend to D3cold with a non-standard mechanism. One example is laptop hybrid graphics where the discrete GPU and its built-in HDA controller are power-managed either with a _DSM (AMD PowerXpress, Nvidia Optimus) or a separate gmux controller (MacBook Pro). Another example is Thunderbolt on Macs which is power-managed with custom ACPI methods. When putting the system to sleep, we currently handle such devices improperly by transitioning them from D3cold to D3hot (the default power state defined at the top of pci_target_state()). This wastes energy and prolongs the suspend sequence (powering up the Thunderbolt controller takes 2 seconds). Avoid that by assuming that a non-standard PM mechanism is at work if the device is not platform-power-manageable but currently in D3cold. If the device is wakeup enabled, we might still have to wake it up from D3cold if PME cannot be signaled from that power state. The check for devices without PM capability comes before the check for D3cold since such devices could in theory also be powered down by non-standard means and should then be afforded direct-complete as well. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki --- drivers/pci/pci.c | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index aab9d5115a5f..2f818c3e6571 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -1959,9 +1959,22 @@ static pci_power_t pci_target_state(struct pci_dev *dev) default: target_state = state; } - } else if (!dev->pm_cap) { + + return target_state; + } + + if (!dev->pm_cap) target_state = PCI_D0; - } else if (device_may_wakeup(&dev->dev)) { + + /* + * If the device is in D3cold even though it's not power-manageable by + * the platform, it may have been powered down by non-standard means. + * Best to let it slumber. + */ + if (dev->current_state == PCI_D3cold) + target_state = PCI_D3cold; + + if (device_may_wakeup(&dev->dev)) { /* * Find the deepest state from which the device can generate * wake-up events, make it the target state and enable device From cc7cc02bada84f0d707aa5b6d2ef8728a2e1f911 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 18 Sep 2016 05:39:20 +0200 Subject: [PATCH 2/4] PCI: Query platform firmware for device power state Usually the most accurate way to determine a PCI device's power state is to read its PM Control & Status Register. There are two cases however when this is not an option: If the device doesn't have the PM capability at all, or if it is in D3cold (in which case its config space is inaccessible). In both cases, we can alternatively query the platform firmware for its opinion on the device's power state. To facilitate this, augment struct pci_platform_pm_ops with a ->get_power callback and implement it for acpi_pci_platform_pm (the only pci_platform_pm_ops existing so far). It is used by a forthcoming commit to let pci_update_current_state() recognize D3cold. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki --- drivers/pci/pci-acpi.c | 22 ++++++++++++++++++++++ drivers/pci/pci.c | 10 ++++++++-- drivers/pci/pci.h | 3 +++ 3 files changed, 33 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci-acpi.c b/drivers/pci/pci-acpi.c index 9a033e8ee9a4..d966d47c9e80 100644 --- a/drivers/pci/pci-acpi.c +++ b/drivers/pci/pci-acpi.c @@ -452,6 +452,27 @@ static int acpi_pci_set_power_state(struct pci_dev *dev, pci_power_t state) return error; } +static pci_power_t acpi_pci_get_power_state(struct pci_dev *dev) +{ + struct acpi_device *adev = ACPI_COMPANION(&dev->dev); + static const pci_power_t state_conv[] = { + [ACPI_STATE_D0] = PCI_D0, + [ACPI_STATE_D1] = PCI_D1, + [ACPI_STATE_D2] = PCI_D2, + [ACPI_STATE_D3_HOT] = PCI_D3hot, + [ACPI_STATE_D3_COLD] = PCI_D3cold, + }; + int state; + + if (!adev || !acpi_device_power_manageable(adev)) + return PCI_UNKNOWN; + + if (acpi_device_get_power(adev, &state) || state == ACPI_STATE_UNKNOWN) + return PCI_UNKNOWN; + + return state_conv[state]; +} + static bool acpi_pci_can_wakeup(struct pci_dev *dev) { struct acpi_device *adev = ACPI_COMPANION(&dev->dev); @@ -534,6 +555,7 @@ static bool acpi_pci_need_resume(struct pci_dev *dev) static const struct pci_platform_pm_ops acpi_pci_platform_pm = { .is_manageable = acpi_pci_power_manageable, .set_state = acpi_pci_set_power_state, + .get_state = acpi_pci_get_power_state, .choose_state = acpi_pci_choose_state, .sleep_wake = acpi_pci_sleep_wake, .run_wake = acpi_pci_run_wake, diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2f818c3e6571..2e18e9adcc15 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -552,8 +552,9 @@ static const struct pci_platform_pm_ops *pci_platform_pm; int pci_set_platform_pm(const struct pci_platform_pm_ops *ops) { - if (!ops->is_manageable || !ops->set_state || !ops->choose_state || - !ops->sleep_wake || !ops->run_wake || !ops->need_resume) + if (!ops->is_manageable || !ops->set_state || !ops->get_state || + !ops->choose_state || !ops->sleep_wake || !ops->run_wake || + !ops->need_resume) return -EINVAL; pci_platform_pm = ops; return 0; @@ -570,6 +571,11 @@ static inline int platform_pci_set_power_state(struct pci_dev *dev, return pci_platform_pm ? pci_platform_pm->set_state(dev, t) : -ENOSYS; } +static inline pci_power_t platform_pci_get_power_state(struct pci_dev *dev) +{ + return pci_platform_pm ? pci_platform_pm->get_state(dev) : PCI_UNKNOWN; +} + static inline pci_power_t platform_pci_choose_state(struct pci_dev *dev) { return pci_platform_pm ? diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h index 9730c474b016..01d520648e1d 100644 --- a/drivers/pci/pci.h +++ b/drivers/pci/pci.h @@ -42,6 +42,8 @@ int pci_probe_reset_function(struct pci_dev *dev); * * @set_state: invokes the platform firmware to set the device's power state * + * @get_state: queries the platform firmware for a device's current power state + * * @choose_state: returns PCI power state of given device preferred by the * platform; to be used during system-wide transitions from a * sleeping state to the working state and vice versa @@ -62,6 +64,7 @@ int pci_probe_reset_function(struct pci_dev *dev); struct pci_platform_pm_ops { bool (*is_manageable)(struct pci_dev *dev); int (*set_state)(struct pci_dev *dev, pci_power_t state); + pci_power_t (*get_state)(struct pci_dev *dev); pci_power_t (*choose_state)(struct pci_dev *dev); int (*sleep_wake)(struct pci_dev *dev, bool enable); int (*run_wake)(struct pci_dev *dev, bool enable); From a6a64026c0cd1a76a0c8ab1c05a421aa4821887b Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 18 Sep 2016 05:39:20 +0200 Subject: [PATCH 3/4] PCI: Recognize D3cold in pci_update_current_state() Whenever a device is resumed or its power state is changed using the platform, its new power state is read from the PM Control & Status Register and cached in pci_dev->current_state by calling pci_update_current_state(). If the device is in D3cold, reading from config space typically results in a fabricated "all ones" response. But if it's in D3hot, the two bits representing the power state in the PMCSR are *also* set to 1. Thus D3hot and D3cold are not discernible by just reading the PMCSR. To account for this, pci_update_current_state() uses two workarounds: - When transitioning to D3cold using pci_platform_power_transition(), the new power state is set blindly by pci_update_current_state(), i.e. without verifying that the device actually *is* in D3cold. This is achieved by setting the "state" argument to PCI_D3cold. The "state" argument was originally intended to convey the new state in case the device doesn't have the PM capability. It is *also* used to convey the device state if the PM capability is present and the new state is D3cold, but this was never explained in the kerneldoc. - Once the current_state is set to D3cold, further invocations of pci_update_current_state() will blindly assume that the device is still in D3cold and leave the current_state unmodified. To get out of this impasse, the current_state has to be set directly, typically by calling pci_raw_set_power_state() or pci_enable_device(). It would be desirable if pci_update_current_state() could reliably detect D3cold by itself. That would allow us to do away with these workarounds, and it would allow for a smarter, more energy conserving runtime resume strategy after system sleep: Currently devices which utilize direct_complete are mandatorily runtime resumed in their ->complete stage. This can be avoided if their power state after system sleep is the same as before, but it requires a mechanism to detect the power state reliably. We've just gained the ability to query the platform firmware for its opinion on the device's power state. On platforms conforming to ACPI 4.0 or newer, this allows recognition of D3cold. Pre-4.0 platforms lack _PR3 and therefore the deepest power state that will ever be reported is D3hot, even though the device may actually be in D3cold. To detect D3cold in those cases, accessibility of the vendor ID in config space is probed using pci_device_is_present(). This also works for devices which are not platform-power-manageable at all, but can be suspended to D3cold using a nonstandard mechanism (e.g. some hybrid graphics laptops or Thunderbolt on the Mac). Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki --- drivers/pci/pci.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c index 2e18e9adcc15..b2be8957a290 100644 --- a/drivers/pci/pci.c +++ b/drivers/pci/pci.c @@ -707,26 +707,25 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state) } /** - * pci_update_current_state - Read PCI power state of given device from its - * PCI PM registers and cache it + * pci_update_current_state - Read power state of given device and cache it * @dev: PCI device to handle. * @state: State to cache in case the device doesn't have the PM capability + * + * The power state is read from the PMCSR register, which however is + * inaccessible in D3cold. The platform firmware is therefore queried first + * to detect accessibility of the register. In case the platform firmware + * reports an incorrect state or the device isn't power manageable by the + * platform at all, we try to detect D3cold by testing accessibility of the + * vendor ID in config space. */ void pci_update_current_state(struct pci_dev *dev, pci_power_t state) { - if (dev->pm_cap) { + if (platform_pci_get_power_state(dev) == PCI_D3cold || + !pci_device_is_present(dev)) { + dev->current_state = PCI_D3cold; + } else if (dev->pm_cap) { u16 pmcsr; - /* - * Configuration space is not accessible for device in - * D3cold, so just keep or set D3cold for safety - */ - if (dev->current_state == PCI_D3cold) - return; - if (state == PCI_D3cold) { - dev->current_state = PCI_D3cold; - return; - } pci_read_config_word(dev, dev->pm_cap + PCI_PM_CTRL, &pmcsr); dev->current_state = (pmcsr & PCI_PM_CTRL_STATE_MASK); } else { From a0d2a959d3da343554523d26902de1d40a9e5c28 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 18 Sep 2016 05:39:20 +0200 Subject: [PATCH 4/4] PCI: Avoid unnecessary resume after direct-complete Commit 58a1fbbb2ee8 ("PM / PCI / ACPI: Kick devices that might have been reset by firmware") added a runtime resume for devices that were runtime suspended when the system entered sleep. The motivation was that devices might be in a reset-power-on state after waking from system sleep, so their power state as perceived by Linux (stored in pci_dev->current_state) would no longer reflect reality. By resuming such devices, we allow them to return to a low-power state via autosuspend and also bring their current_state in sync with reality. However for devices that are *not* in a reset-power-on state, doing an unconditional resume wastes energy. A more refined approach is called for which issues a runtime resume only if the power state after direct-complete is shallower than it was before. To achieve this, update the device's current_state and compare it to its pre-sleep value. Signed-off-by: Lukas Wunner Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki --- drivers/pci/pci-driver.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/pci/pci-driver.c b/drivers/pci/pci-driver.c index e39a67c8ef39..fd4b9c48fd2a 100644 --- a/drivers/pci/pci-driver.c +++ b/drivers/pci/pci-driver.c @@ -684,8 +684,19 @@ static int pci_pm_prepare(struct device *dev) static void pci_pm_complete(struct device *dev) { - pci_dev_complete_resume(to_pci_dev(dev)); - pm_complete_with_resume_check(dev); + struct pci_dev *pci_dev = to_pci_dev(dev); + + pci_dev_complete_resume(pci_dev); + pm_generic_complete(dev); + + /* Resume device if platform firmware has put it in reset-power-on */ + if (dev->power.direct_complete && pm_resume_via_firmware()) { + pci_power_t pre_sleep_state = pci_dev->current_state; + + pci_update_current_state(pci_dev, pci_dev->current_state); + if (pci_dev->current_state < pre_sleep_state) + pm_request_resume(dev); + } } #else /* !CONFIG_PM_SLEEP */