1
0
Fork 0

Linux 4.12-rc7

-----BEGIN PGP SIGNATURE-----
 
 iQEcBAABAgAGBQJZUGOmAAoJEHm+PkMAQRiGhX8H/3fIhingPD01MBf98U0xGrJo
 yIXmhu6nFs7TM0lDVDcHsKgqLQIT69ll7PrSZrMkc1RGUIPINoCuJVuJqDre0kfB
 of5TX2KegqSx8h1vOWjGBCBjdYfPGyMdf9icf6KsGc/SlIdhN6WA99kglAjJA0Ve
 qPTNagF0ntUNg1lsXffxyfcHqFpyqw/Z/C4ie/byFsn9iJ1VG9mNlTWSud09vhuM
 3tvHzTUVAIWWuRrrgrvgqQpnwL+q5BfSDsXScMjBau0EK3RGGqG8EN6Kbkfa7VQ6
 aBoeboQjUijSJnVwvySdQ11MChTIOwZdfrNPra/1HD3WJNsSu4BIRt5JcAKcOhc=
 =qmSg
 -----END PGP SIGNATURE-----

Merge tag 'v4.12-rc7' into devel

Linux 4.12-rc7
hifive-unleashed-5.1
Linus Walleij 2017-06-29 14:27:39 +02:00
commit 6183061967
1261 changed files with 14148 additions and 8185 deletions

View File

@ -59,20 +59,28 @@ button driver uses the following 3 modes in order not to trigger issues.
If the userspace hasn't been prepared to ignore the unreliable "opened"
events and the unreliable initial state notification, Linux users can use
the following kernel parameters to handle the possible issues:
A. button.lid_init_state=open:
A. button.lid_init_state=method:
When this option is specified, the ACPI button driver reports the
initial lid state using the returning value of the _LID control method
and whether the "opened"/"closed" events are paired fully relies on the
firmware implementation.
This option can be used to fix some platforms where the returning value
of the _LID control method is reliable but the initial lid state
notification is missing.
This option is the default behavior during the period the userspace
isn't ready to handle the buggy AML tables.
B. button.lid_init_state=open:
When this option is specified, the ACPI button driver always reports the
initial lid state as "opened" and whether the "opened"/"closed" events
are paired fully relies on the firmware implementation.
This may fix some platforms where the returning value of the _LID
control method is not reliable and the initial lid state notification is
missing.
This option is the default behavior during the period the userspace
isn't ready to handle the buggy AML tables.
If the userspace has been prepared to ignore the unreliable "opened" events
and the unreliable initial state notification, Linux users should always
use the following kernel parameter:
B. button.lid_init_state=ignore:
C. button.lid_init_state=ignore:
When this option is specified, the ACPI button driver never reports the
initial lid state and there is a compensation mechanism implemented to
ensure that the reliable "closed" notifications can always be delievered

View File

@ -866,6 +866,15 @@
dscc4.setup= [NET]
dt_cpu_ftrs= [PPC]
Format: {"off" | "known"}
Control how the dt_cpu_ftrs device-tree binding is
used for CPU feature discovery and setup (if it
exists).
off: Do not use it, fall back to legacy cpu table.
known: Do not pass through unknown features to guests
or userspace, only those that the kernel is aware of.
dump_apple_properties [X86]
Dump name and content of EFI device properties on
x86 Macs. Useful for driver authors to determine
@ -3802,6 +3811,13 @@
expediting. Set to zero to disable automatic
expediting.
stack_guard_gap= [MM]
override the default stack gap protection. The value
is in page units and it defines how many pages prior
to (for stacks growing down) resp. after (for stacks
growing up) the main stack are reserved for no other
mapping. Default value is 256 pages.
stacktrace [FTRACE]
Enabled the stack tracer on boot up.

View File

@ -1,4 +1,5 @@
.. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
=======================
CPU Performance Scaling
@ -75,7 +76,7 @@ feedback registers, as that information is typically specific to the hardware
interface it comes from and may not be easily represented in an abstract,
platform-independent way. For this reason, ``CPUFreq`` allows scaling drivers
to bypass the governor layer and implement their own performance scaling
algorithms. That is done by the ``intel_pstate`` scaling driver.
algorithms. That is done by the |intel_pstate| scaling driver.
``CPUFreq`` Policy Objects
@ -174,13 +175,13 @@ necessary to restart the scaling governor so that it can take the new online CPU
into account. That is achieved by invoking the governor's ``->stop`` and
``->start()`` callbacks, in this order, for the entire policy.
As mentioned before, the ``intel_pstate`` scaling driver bypasses the scaling
As mentioned before, the |intel_pstate| scaling driver bypasses the scaling
governor layer of ``CPUFreq`` and provides its own P-state selection algorithms.
Consequently, if ``intel_pstate`` is used, scaling governors are not attached to
Consequently, if |intel_pstate| is used, scaling governors are not attached to
new policy objects. Instead, the driver's ``->setpolicy()`` callback is invoked
to register per-CPU utilization update callbacks for each policy. These
callbacks are invoked by the CPU scheduler in the same way as for scaling
governors, but in the ``intel_pstate`` case they both determine the P-state to
governors, but in the |intel_pstate| case they both determine the P-state to
use and change the hardware configuration accordingly in one go from scheduler
context.
@ -257,7 +258,7 @@ are the following:
``scaling_available_governors``
List of ``CPUFreq`` scaling governors present in the kernel that can
be attached to this policy or (if the ``intel_pstate`` scaling driver is
be attached to this policy or (if the |intel_pstate| scaling driver is
in use) list of scaling algorithms provided by the driver that can be
applied to this policy.
@ -274,7 +275,7 @@ are the following:
the CPU is actually running at (due to hardware design and other
limitations).
Some scaling drivers (e.g. ``intel_pstate``) attempt to provide
Some scaling drivers (e.g. |intel_pstate|) attempt to provide
information more precisely reflecting the current CPU frequency through
this attribute, but that still may not be the exact current CPU
frequency as seen by the hardware at the moment.
@ -284,13 +285,13 @@ are the following:
``scaling_governor``
The scaling governor currently attached to this policy or (if the
``intel_pstate`` scaling driver is in use) the scaling algorithm
|intel_pstate| scaling driver is in use) the scaling algorithm
provided by the driver that is currently applied to this policy.
This attribute is read-write and writing to it will cause a new scaling
governor to be attached to this policy or a new scaling algorithm
provided by the scaling driver to be applied to it (in the
``intel_pstate`` case), as indicated by the string written to this
|intel_pstate| case), as indicated by the string written to this
attribute (which must be one of the names listed by the
``scaling_available_governors`` attribute described above).
@ -619,7 +620,7 @@ This file is located under :file:`/sys/devices/system/cpu/cpufreq/` and controls
the "boost" setting for the whole system. It is not present if the underlying
scaling driver does not support the frequency boost mechanism (or supports it,
but provides a driver-specific interface for controlling it, like
``intel_pstate``).
|intel_pstate|).
If the value in this file is 1, the frequency boost mechanism is enabled. This
means that either the hardware can be put into states in which it is able to

View File

@ -6,6 +6,7 @@ Power Management
:maxdepth: 2
cpufreq
intel_pstate
.. only:: subproject and html

View File

@ -0,0 +1,755 @@
===============================================
``intel_pstate`` CPU Performance Scaling Driver
===============================================
::
Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
General Information
===================
``intel_pstate`` is a part of the
:doc:`CPU performance scaling subsystem <cpufreq>` in the Linux kernel
(``CPUFreq``). It is a scaling driver for the Sandy Bridge and later
generations of Intel processors. Note, however, that some of those processors
may not be supported. [To understand ``intel_pstate`` it is necessary to know
how ``CPUFreq`` works in general, so this is the time to read :doc:`cpufreq` if
you have not done that yet.]
For the processors supported by ``intel_pstate``, the P-state concept is broader
than just an operating frequency or an operating performance point (see the
`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
information about that). For this reason, the representation of P-states used
by ``intel_pstate`` internally follows the hardware specification (for details
refer to `Intel® 64 and IA-32 Architectures Software Developers Manual
Volume 3: System Programming Guide <SDM_>`_). However, the ``CPUFreq`` core
uses frequencies for identifying operating performance points of CPUs and
frequencies are involved in the user space interface exposed by it, so
``intel_pstate`` maps its internal representation of P-states to frequencies too
(fortunately, that mapping is unambiguous). At the same time, it would not be
practical for ``intel_pstate`` to supply the ``CPUFreq`` core with a table of
available frequencies due to the possible size of it, so the driver does not do
that. Some functionality of the core is limited by that.
Since the hardware P-state selection interface used by ``intel_pstate`` is
available at the logical CPU level, the driver always works with individual
CPUs. Consequently, if ``intel_pstate`` is in use, every ``CPUFreq`` policy
object corresponds to one logical CPU and ``CPUFreq`` policies are effectively
equivalent to CPUs. In particular, this means that they become "inactive" every
time the corresponding CPU is taken offline and need to be re-initialized when
it goes back online.
``intel_pstate`` is not modular, so it cannot be unloaded, which means that the
only way to pass early-configuration-time parameters to it is via the kernel
command line. However, its configuration can be adjusted via ``sysfs`` to a
great extent. In some configurations it even is possible to unregister it via
``sysfs`` which allows another ``CPUFreq`` scaling driver to be loaded and
registered (see `below <status_attr_>`_).
Operation Modes
===============
``intel_pstate`` can operate in three different modes: in the active mode with
or without hardware-managed P-states support and in the passive mode. Which of
them will be in effect depends on what kernel command line options are used and
on the capabilities of the processor.
Active Mode
-----------
This is the default operation mode of ``intel_pstate``. If it works in this
mode, the ``scaling_driver`` policy attribute in ``sysfs`` for all ``CPUFreq``
policies contains the string "intel_pstate".
In this mode the driver bypasses the scaling governors layer of ``CPUFreq`` and
provides its own scaling algorithms for P-state selection. Those algorithms
can be applied to ``CPUFreq`` policies in the same way as generic scaling
governors (that is, through the ``scaling_governor`` policy attribute in
``sysfs``). [Note that different P-state selection algorithms may be chosen for
different policies, but that is not recommended.]
They are not generic scaling governors, but their names are the same as the
names of some of those governors. Moreover, confusingly enough, they generally
do not work in the same way as the generic governors they share the names with.
For example, the ``powersave`` P-state selection algorithm provided by
``intel_pstate`` is not a counterpart of the generic ``powersave`` governor
(roughly, it corresponds to the ``schedutil`` and ``ondemand`` governors).
There are two P-state selection algorithms provided by ``intel_pstate`` in the
active mode: ``powersave`` and ``performance``. The way they both operate
depends on whether or not the hardware-managed P-states (HWP) feature has been
enabled in the processor and possibly on the processor model.
Which of the P-state selection algorithms is used by default depends on the
:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option.
Namely, if that option is set, the ``performance`` algorithm will be used by
default, and the other one will be used by default if it is not set.
Active Mode With HWP
~~~~~~~~~~~~~~~~~~~~
If the processor supports the HWP feature, it will be enabled during the
processor initialization and cannot be disabled after that. It is possible
to avoid enabling it by passing the ``intel_pstate=no_hwp`` argument to the
kernel in the command line.
If the HWP feature has been enabled, ``intel_pstate`` relies on the processor to
select P-states by itself, but still it can give hints to the processor's
internal P-state selection logic. What those hints are depends on which P-state
selection algorithm has been applied to the given policy (or to the CPU it
corresponds to).
Even though the P-state selection is carried out by the processor automatically,
``intel_pstate`` registers utilization update callbacks with the CPU scheduler
in this mode. However, they are not used for running a P-state selection
algorithm, but for periodic updates of the current CPU frequency information to
be made available from the ``scaling_cur_freq`` policy attribute in ``sysfs``.
HWP + ``performance``
.....................
In this configuration ``intel_pstate`` will write 0 to the processor's
Energy-Performance Preference (EPP) knob (if supported) or its
Energy-Performance Bias (EPB) knob (otherwise), which means that the processor's
internal P-state selection logic is expected to focus entirely on performance.
This will override the EPP/EPB setting coming from the ``sysfs`` interface
(see `Energy vs Performance Hints`_ below).
Also, in this configuration the range of P-states available to the processor's
internal P-state selection logic is always restricted to the upper boundary
(that is, the maximum P-state that the driver is allowed to use).
HWP + ``powersave``
...................
In this configuration ``intel_pstate`` will set the processor's
Energy-Performance Preference (EPP) knob (if supported) or its
Energy-Performance Bias (EPB) knob (otherwise) to whatever value it was
previously set to via ``sysfs`` (or whatever default value it was
set to by the platform firmware). This usually causes the processor's
internal P-state selection logic to be less performance-focused.
Active Mode Without HWP
~~~~~~~~~~~~~~~~~~~~~~~
This is the default operation mode for processors that do not support the HWP
feature. It also is used by default with the ``intel_pstate=no_hwp`` argument
in the kernel command line. However, in this mode ``intel_pstate`` may refuse
to work with the given processor if it does not recognize it. [Note that
``intel_pstate`` will never refuse to work with any processor with the HWP
feature enabled.]
In this mode ``intel_pstate`` registers utilization update callbacks with the
CPU scheduler in order to run a P-state selection algorithm, either
``powersave`` or ``performance``, depending on the ``scaling_cur_freq`` policy
setting in ``sysfs``. The current CPU frequency information to be made
available from the ``scaling_cur_freq`` policy attribute in ``sysfs`` is
periodically updated by those utilization update callbacks too.
``performance``
...............
Without HWP, this P-state selection algorithm is always the same regardless of
the processor model and platform configuration.
It selects the maximum P-state it is allowed to use, subject to limits set via
``sysfs``, every time the P-state selection computations are carried out by the
driver's utilization update callback for the given CPU (that does not happen
more often than every 10 ms), but the hardware configuration will not be changed
if the new P-state is the same as the current one.
This is the default P-state selection algorithm if the
:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
is set.
``powersave``
.............
Without HWP, this P-state selection algorithm generally depends on the
processor model and/or the system profile setting in the ACPI tables and there
are two variants of it.
One of them is used with processors from the Atom line and (regardless of the
processor model) on platforms with the system profile in the ACPI tables set to
"mobile" (laptops mostly), "tablet", "appliance PC", "desktop", or
"workstation". It is also used with processors supporting the HWP feature if
that feature has not been enabled (that is, with the ``intel_pstate=no_hwp``
argument in the kernel command line). It is similar to the algorithm
implemented by the generic ``schedutil`` scaling governor except that the
utilization metric used by it is based on numbers coming from feedback
registers of the CPU. It generally selects P-states proportional to the
current CPU utilization, so it is referred to as the "proportional" algorithm.
The second variant of the ``powersave`` P-state selection algorithm, used in all
of the other cases (generally, on processors from the Core line, so it is
referred to as the "Core" algorithm), is based on the values read from the APERF
and MPERF feedback registers and the previously requested target P-state.
It does not really take CPU utilization into account explicitly, but as a rule
it causes the CPU P-state to ramp up very quickly in response to increased
utilization which is generally desirable in server environments.
Regardless of the variant, this algorithm is run by the driver's utilization
update callback for the given CPU when it is invoked by the CPU scheduler, but
not more often than every 10 ms (that can be tweaked via ``debugfs`` in `this
particular case <Tuning Interface in debugfs_>`_). Like in the ``performance``
case, the hardware configuration is not touched if the new P-state turns out to
be the same as the current one.
This is the default P-state selection algorithm if the
:c:macro:`CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE` kernel configuration option
is not set.
Passive Mode
------------
This mode is used if the ``intel_pstate=passive`` argument is passed to the
kernel in the command line (it implies the ``intel_pstate=no_hwp`` setting too).
Like in the active mode without HWP support, in this mode ``intel_pstate`` may
refuse to work with the given processor if it does not recognize it.
If the driver works in this mode, the ``scaling_driver`` policy attribute in
``sysfs`` for all ``CPUFreq`` policies contains the string "intel_cpufreq".
Then, the driver behaves like a regular ``CPUFreq`` scaling driver. That is,
it is invoked by generic scaling governors when necessary to talk to the
hardware in order to change the P-state of a CPU (in particular, the
``schedutil`` governor can invoke it directly from scheduler context).
While in this mode, ``intel_pstate`` can be used with all of the (generic)
scaling governors listed by the ``scaling_available_governors`` policy attribute
in ``sysfs`` (and the P-state selection algorithms described above are not
used). Then, it is responsible for the configuration of policy objects
corresponding to CPUs and provides the ``CPUFreq`` core (and the scaling
governors attached to the policy objects) with accurate information on the
maximum and minimum operating frequencies supported by the hardware (including
the so-called "turbo" frequency ranges). In other words, in the passive mode
the entire range of available P-states is exposed by ``intel_pstate`` to the
``CPUFreq`` core. However, in this mode the driver does not register
utilization update callbacks with the CPU scheduler and the ``scaling_cur_freq``
information comes from the ``CPUFreq`` core (and is the last frequency selected
by the current scaling governor for the given policy).
.. _turbo:
Turbo P-states Support
======================
In the majority of cases, the entire range of P-states available to
``intel_pstate`` can be divided into two sub-ranges that correspond to
different types of processor behavior, above and below a boundary that
will be referred to as the "turbo threshold" in what follows.
The P-states above the turbo threshold are referred to as "turbo P-states" and
the whole sub-range of P-states they belong to is referred to as the "turbo
range". These names are related to the Turbo Boost technology allowing a
multicore processor to opportunistically increase the P-state of one or more
cores if there is enough power to do that and if that is not going to cause the
thermal envelope of the processor package to be exceeded.
Specifically, if software sets the P-state of a CPU core within the turbo range
(that is, above the turbo threshold), the processor is permitted to take over
performance scaling control for that core and put it into turbo P-states of its
choice going forward. However, that permission is interpreted differently by
different processor generations. Namely, the Sandy Bridge generation of
processors will never use any P-states above the last one set by software for
the given core, even if it is within the turbo range, whereas all of the later
processor generations will take it as a license to use any P-states from the
turbo range, even above the one set by software. In other words, on those
processors setting any P-state from the turbo range will enable the processor
to put the given core into all turbo P-states up to and including the maximum
supported one as it sees fit.
One important property of turbo P-states is that they are not sustainable. More
precisely, there is no guarantee that any CPUs will be able to stay in any of
those states indefinitely, because the power distribution within the processor
package may change over time or the thermal envelope it was designed for might
be exceeded if a turbo P-state was used for too long.
In turn, the P-states below the turbo threshold generally are sustainable. In
fact, if one of them is set by software, the processor is not expected to change
it to a lower one unless in a thermal stress or a power limit violation
situation (a higher P-state may still be used if it is set for another CPU in
the same package at the same time, for example).
Some processors allow multiple cores to be in turbo P-states at the same time,
but the maximum P-state that can be set for them generally depends on the number
of cores running concurrently. The maximum turbo P-state that can be set for 3
cores at the same time usually is lower than the analogous maximum P-state for
2 cores, which in turn usually is lower than the maximum turbo P-state that can
be set for 1 core. The one-core maximum turbo P-state is thus the maximum
supported one overall.
The maximum supported turbo P-state, the turbo threshold (the maximum supported
non-turbo P-state) and the minimum supported P-state are specific to the
processor model and can be determined by reading the processor's model-specific
registers (MSRs). Moreover, some processors support the Configurable TDP
(Thermal Design Power) feature and, when that feature is enabled, the turbo
threshold effectively becomes a configurable value that can be set by the
platform firmware.
Unlike ``_PSS`` objects in the ACPI tables, ``intel_pstate`` always exposes
the entire range of available P-states, including the whole turbo range, to the
``CPUFreq`` core and (in the passive mode) to generic scaling governors. This
generally causes turbo P-states to be set more often when ``intel_pstate`` is
used relative to ACPI-based CPU performance scaling (see `below <acpi-cpufreq_>`_
for more information).
Moreover, since ``intel_pstate`` always knows what the real turbo threshold is
(even if the Configurable TDP feature is enabled in the processor), its
``no_turbo`` attribute in ``sysfs`` (described `below <no_turbo_attr_>`_) should
work as expected in all cases (that is, if set to disable turbo P-states, it
always should prevent ``intel_pstate`` from using them).
Processor Support
=================
To handle a given processor ``intel_pstate`` requires a number of different
pieces of information on it to be known, including:
* The minimum supported P-state.
* The maximum supported `non-turbo P-state <turbo_>`_.
* Whether or not turbo P-states are supported at all.
* The maximum supported `one-core turbo P-state <turbo_>`_ (if turbo P-states
are supported).
* The scaling formula to translate the driver's internal representation
of P-states into frequencies and the other way around.
Generally, ways to obtain that information are specific to the processor model
or family. Although it often is possible to obtain all of it from the processor
itself (using model-specific registers), there are cases in which hardware
manuals need to be consulted to get to it too.
For this reason, there is a list of supported processors in ``intel_pstate`` and
the driver initialization will fail if the detected processor is not in that
list, unless it supports the `HWP feature <Active Mode_>`_. [The interface to
obtain all of the information listed above is the same for all of the processors
supporting the HWP feature, which is why they all are supported by
``intel_pstate``.]
User Space Interface in ``sysfs``
=================================
Global Attributes
-----------------
``intel_pstate`` exposes several global attributes (files) in ``sysfs`` to
control its functionality at the system level. They are located in the
``/sys/devices/system/cpu/cpufreq/intel_pstate/`` directory and affect all
CPUs.
Some of them are not present if the ``intel_pstate=per_cpu_perf_limits``
argument is passed to the kernel in the command line.
``max_perf_pct``
Maximum P-state the driver is allowed to set in percent of the
maximum supported performance level (the highest supported `turbo
P-state <turbo_>`_).
This attribute will not be exposed if the
``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
command line.
``min_perf_pct``
Minimum P-state the driver is allowed to set in percent of the
maximum supported performance level (the highest supported `turbo
P-state <turbo_>`_).
This attribute will not be exposed if the
``intel_pstate=per_cpu_perf_limits`` argument is present in the kernel
command line.
``num_pstates``
Number of P-states supported by the processor (between 0 and 255
inclusive) including both turbo and non-turbo P-states (see
`Turbo P-states Support`_).
The value of this attribute is not affected by the ``no_turbo``
setting described `below <no_turbo_attr_>`_.
This attribute is read-only.
``turbo_pct``
Ratio of the `turbo range <turbo_>`_ size to the size of the entire
range of supported P-states, in percent.
This attribute is read-only.
.. _no_turbo_attr:
``no_turbo``
If set (equal to 1), the driver is not allowed to set any turbo P-states
(see `Turbo P-states Support`_). If unset (equalt to 0, which is the
default), turbo P-states can be set by the driver.
[Note that ``intel_pstate`` does not support the general ``boost``
attribute (supported by some other scaling drivers) which is replaced
by this one.]
This attrubute does not affect the maximum supported frequency value
supplied to the ``CPUFreq`` core and exposed via the policy interface,
but it affects the maximum possible value of per-policy P-state limits
(see `Interpretation of Policy Attributes`_ below for details).
.. _status_attr:
``status``
Operation mode of the driver: "active", "passive" or "off".
"active"
The driver is functional and in the `active mode
<Active Mode_>`_.
"passive"
The driver is functional and in the `passive mode
<Passive Mode_>`_.
"off"
The driver is not functional (it is not registered as a scaling
driver with the ``CPUFreq`` core).
This attribute can be written to in order to change the driver's
operation mode or to unregister it. The string written to it must be
one of the possible values of it and, if successful, the write will
cause the driver to switch over to the operation mode represented by
that string - or to be unregistered in the "off" case. [Actually,
switching over from the active mode to the passive mode or the other
way around causes the driver to be unregistered and registered again
with a different set of callbacks, so all of its settings (the global
as well as the per-policy ones) are then reset to their default
values, possibly depending on the target operation mode.]
That only is supported in some configurations, though (for example, if
the `HWP feature is enabled in the processor <Active Mode With HWP_>`_,
the operation mode of the driver cannot be changed), and if it is not
supported in the current configuration, writes to this attribute with
fail with an appropriate error.
Interpretation of Policy Attributes
-----------------------------------
The interpretation of some ``CPUFreq`` policy attributes described in
:doc:`cpufreq` is special with ``intel_pstate`` as the current scaling driver
and it generally depends on the driver's `operation mode <Operation Modes_>`_.
First of all, the values of the ``cpuinfo_max_freq``, ``cpuinfo_min_freq`` and
``scaling_cur_freq`` attributes are produced by applying a processor-specific
multiplier to the internal P-state representation used by ``intel_pstate``.
Also, the values of the ``scaling_max_freq`` and ``scaling_min_freq``
attributes are capped by the frequency corresponding to the maximum P-state that
the driver is allowed to set.
If the ``no_turbo`` `global attribute <no_turbo_attr_>`_ is set, the driver is
not allowed to use turbo P-states, so the maximum value of ``scaling_max_freq``
and ``scaling_min_freq`` is limited to the maximum non-turbo P-state frequency.
Accordingly, setting ``no_turbo`` causes ``scaling_max_freq`` and
``scaling_min_freq`` to go down to that value if they were above it before.
However, the old values of ``scaling_max_freq`` and ``scaling_min_freq`` will be
restored after unsetting ``no_turbo``, unless these attributes have been written
to after ``no_turbo`` was set.
If ``no_turbo`` is not set, the maximum possible value of ``scaling_max_freq``
and ``scaling_min_freq`` corresponds to the maximum supported turbo P-state,
which also is the value of ``cpuinfo_max_freq`` in either case.
Next, the following policy attributes have special meaning if
``intel_pstate`` works in the `active mode <Active Mode_>`_:
``scaling_available_governors``
List of P-state selection algorithms provided by ``intel_pstate``.
``scaling_governor``
P-state selection algorithm provided by ``intel_pstate`` currently in
use with the given policy.
``scaling_cur_freq``
Frequency of the average P-state of the CPU represented by the given
policy for the time interval between the last two invocations of the
driver's utilization update callback by the CPU scheduler for that CPU.
The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
same as for other scaling drivers.
Additionally, the value of the ``scaling_driver`` attribute for ``intel_pstate``
depends on the operation mode of the driver. Namely, it is either
"intel_pstate" (in the `active mode <Active Mode_>`_) or "intel_cpufreq" (in the
`passive mode <Passive Mode_>`_).
Coordination of P-State Limits
------------------------------
``intel_pstate`` allows P-state limits to be set in two ways: with the help of
the ``max_perf_pct`` and ``min_perf_pct`` `global attributes
<Global Attributes_>`_ or via the ``scaling_max_freq`` and ``scaling_min_freq``
``CPUFreq`` policy attributes. The coordination between those limits is based
on the following rules, regardless of the current operation mode of the driver:
1. All CPUs are affected by the global limits (that is, none of them can be
requested to run faster than the global maximum and none of them can be
requested to run slower than the global minimum).
2. Each individual CPU is affected by its own per-policy limits (that is, it
cannot be requested to run faster than its own per-policy maximum and it
cannot be requested to run slower than its own per-policy minimum).
3. The global and per-policy limits can be set independently.
If the `HWP feature is enabled in the processor <Active Mode With HWP_>`_, the
resulting effective values are written into its registers whenever the limits
change in order to request its internal P-state selection logic to always set
P-states within these limits. Otherwise, the limits are taken into account by
scaling governors (in the `passive mode <Passive Mode_>`_) and by the driver
every time before setting a new P-state for a CPU.
Additionally, if the ``intel_pstate=per_cpu_perf_limits`` command line argument
is passed to the kernel, ``max_perf_pct`` and ``min_perf_pct`` are not exposed
at all and the only way to set the limits is by using the policy attributes.
Energy vs Performance Hints
---------------------------
If ``intel_pstate`` works in the `active mode with the HWP feature enabled
<Active Mode With HWP_>`_ in the processor, additional attributes are present
in every ``CPUFreq`` policy directory in ``sysfs``. They are intended to allow
user space to help ``intel_pstate`` to adjust the processor's internal P-state
selection logic by focusing it on performance or on energy-efficiency, or
somewhere between the two extremes:
``energy_performance_preference``
Current value of the energy vs performance hint for the given policy
(or the CPU represented by it).
The hint can be changed by writing to this attribute.
``energy_performance_available_preferences``
List of strings that can be written to the
``energy_performance_preference`` attribute.
They represent different energy vs performance hints and should be
self-explanatory, except that ``default`` represents whatever hint
value was set by the platform firmware.
Strings written to the ``energy_performance_preference`` attribute are
internally translated to integer values written to the processor's
Energy-Performance Preference (EPP) knob (if supported) or its
Energy-Performance Bias (EPB) knob.
[Note that tasks may by migrated from one CPU to another by the scheduler's
load-balancing algorithm and if different energy vs performance hints are
set for those CPUs, that may lead to undesirable outcomes. To avoid such
issues it is better to set the same energy vs performance hint for all CPUs
or to pin every task potentially sensitive to them to a specific CPU.]
.. _acpi-cpufreq:
``intel_pstate`` vs ``acpi-cpufreq``
====================================
On the majority of systems supported by ``intel_pstate``, the ACPI tables
provided by the platform firmware contain ``_PSS`` objects returning information
that can be used for CPU performance scaling (refer to the `ACPI specification`_
for details on the ``_PSS`` objects and the format of the information returned
by them).
The information returned by the ACPI ``_PSS`` objects is used by the
``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate``
the ``acpi-cpufreq`` driver uses the same hardware CPU performance scaling
interface, but the set of P-states it can use is limited by the ``_PSS``
output.
On those systems each ``_PSS`` object returns a list of P-states supported by
the corresponding CPU which basically is a subset of the P-states range that can
be used by ``intel_pstate`` on the same system, with one exception: the whole
`turbo range <turbo_>`_ is represented by one item in it (the topmost one). By
convention, the frequency returned by ``_PSS`` for that item is greater by 1 MHz
than the frequency of the highest non-turbo P-state listed by it, but the
corresponding P-state representation (following the hardware specification)
returned for it matches the maximum supported turbo P-state (or is the
special value 255 meaning essentially "go as high as you can get").
The list of P-states returned by ``_PSS`` is reflected by the table of
available frequencies supplied by ``acpi-cpufreq`` to the ``CPUFreq`` core and
scaling governors and the minimum and maximum supported frequencies reported by
it come from that list as well. In particular, given the special representation
of the turbo range described above, this means that the maximum supported
frequency reported by ``acpi-cpufreq`` is higher by 1 MHz than the frequency
of the highest supported non-turbo P-state listed by ``_PSS`` which, of course,
affects decisions made by the scaling governors, except for ``powersave`` and
``performance``.
For example, if a given governor attempts to select a frequency proportional to
estimated CPU load and maps the load of 100% to the maximum supported frequency
(possibly multiplied by a constant), then it will tend to choose P-states below
the turbo threshold if ``acpi-cpufreq`` is used as the scaling driver, because
in that case the turbo range corresponds to a small fraction of the frequency
band it can use (1 MHz vs 1 GHz or more). In consequence, it will only go to
the turbo range for the highest loads and the other loads above 50% that might
benefit from running at turbo frequencies will be given non-turbo P-states
instead.
One more issue related to that may appear on systems supporting the
`Configurable TDP feature <turbo_>`_ allowing the platform firmware to set the
turbo threshold. Namely, if that is not coordinated with the lists of P-states
returned by ``_PSS`` properly, there may be more than one item corresponding to
a turbo P-state in those lists and there may be a problem with avoiding the
turbo range (if desirable or necessary). Usually, to avoid using turbo
P-states overall, ``acpi-cpufreq`` simply avoids using the topmost state listed
by ``_PSS``, but that is not sufficient when there are other turbo P-states in
the list returned by it.
Apart from the above, ``acpi-cpufreq`` works like ``intel_pstate`` in the
`passive mode <Passive Mode_>`_, except that the number of P-states it can set
is limited to the ones listed by the ACPI ``_PSS`` objects.
Kernel Command Line Options for ``intel_pstate``
================================================
Several kernel command line options can be used to pass early-configuration-time
parameters to ``intel_pstate`` in order to enforce specific behavior of it. All
of them have to be prepended with the ``intel_pstate=`` prefix.
``disable``
Do not register ``intel_pstate`` as the scaling driver even if the
processor is supported by it.
``passive``
Register ``intel_pstate`` in the `passive mode <Passive Mode_>`_ to
start with.
This option implies the ``no_hwp`` one described below.
``force``
Register ``intel_pstate`` as the scaling driver instead of
``acpi-cpufreq`` even if the latter is preferred on the given system.
This may prevent some platform features (such as thermal controls and
power capping) that rely on the availability of ACPI P-states
information from functioning as expected, so it should be used with
caution.
This option does not work with processors that are not supported by
``intel_pstate`` and on platforms where the ``pcc-cpufreq`` scaling
driver is used instead of ``acpi-cpufreq``.
``no_hwp``
Do not enable the `hardware-managed P-states (HWP) feature
<Active Mode With HWP_>`_ even if it is supported by the processor.
``hwp_only``
Register ``intel_pstate`` as the scaling driver only if the
`hardware-managed P-states (HWP) feature <Active Mode With HWP_>`_ is
supported by the processor.
``support_acpi_ppc``
Take ACPI ``_PPC`` performance limits into account.
If the preferred power management profile in the FADT (Fixed ACPI
Description Table) is set to "Enterprise Server" or "Performance
Server", the ACPI ``_PPC`` limits are taken into account by default
and this option has no effect.
``per_cpu_perf_limits``
Use per-logical-CPU P-State limits (see `Coordination of P-state
Limits`_ for details).
Diagnostics and Tuning
======================
Trace Events
------------
There are two static trace events that can be used for ``intel_pstate``
diagnostics. One of them is the ``cpu_frequency`` trace event generally used
by ``CPUFreq``, and the other one is the ``pstate_sample`` trace event specific
to ``intel_pstate``. Both of them are triggered by ``intel_pstate`` only if
it works in the `active mode <Active Mode_>`_.
The following sequence of shell commands can be used to enable them and see
their output (if the kernel is generally configured to support event tracing)::
# cd /sys/kernel/debug/tracing/
# echo 1 > events/power/pstate_sample/enable
# echo 1 > events/power/cpu_frequency/enable
# cat trace
gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107 scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618 freq=2474476
cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2
If ``intel_pstate`` works in the `passive mode <Passive Mode_>`_, the
``cpu_frequency`` trace event will be triggered either by the ``schedutil``
scaling governor (for the policies it is attached to), or by the ``CPUFreq``
core (for the policies with other scaling governors).
``ftrace``
----------
The ``ftrace`` interface can be used for low-level diagnostics of
``intel_pstate``. For example, to check how often the function to set a
P-state is called, the ``ftrace`` filter can be set to to
:c:func:`intel_pstate_set_pstate`::
# cd /sys/kernel/debug/tracing/
# cat available_filter_functions | grep -i pstate
intel_pstate_set_pstate
intel_pstate_cpu_init
...
# echo intel_pstate_set_pstate > set_ftrace_filter
# echo function > current_tracer
# cat trace | head -15
# tracer: function
#
# entries-in-buffer/entries-written: 80/80 #P:4
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
<idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
Tuning Interface in ``debugfs``
-------------------------------
The ``powersave`` algorithm provided by ``intel_pstate`` for `the Core line of
processors in the active mode <powersave_>`_ is based on a `PID controller`_
whose parameters were chosen to address a number of different use cases at the
same time. However, it still is possible to fine-tune it to a specific workload
and the ``debugfs`` interface under ``/sys/kernel/debug/pstate_snb/`` is
provided for this purpose. [Note that the ``pstate_snb`` directory will be
present only if the specific P-state selection algorithm matching the interface
in it actually is in use.]
The following files present in that directory can be used to modify the PID
controller parameters at run time:
| ``deadband``
| ``d_gain_pct``
| ``i_gain_pct``
| ``p_gain_pct``
| ``sample_rate_ms``
| ``setpoint``
Note, however, that achieving desirable results this way generally requires
expert-level understanding of the power vs performance tradeoff, so extra care
is recommended when attempting to do that.
.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
.. _PID controller: https://en.wikipedia.org/wiki/PID_controller

View File

@ -1,281 +0,0 @@
Intel P-State driver
--------------------
This driver provides an interface to control the P-State selection for the
SandyBridge+ Intel processors.
The following document explains P-States:
http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
As stated in the document, P-State doesnt exactly mean a frequency. However, for
the sake of the relationship with cpufreq, P-State and frequency are used
interchangeably.
Understanding the cpufreq core governors and policies are important before
discussing more details about the Intel P-State driver. Based on what callbacks
a cpufreq driver provides to the cpufreq core, it can support two types of
drivers:
- with target_index() callback: In this mode, the drivers using cpufreq core
simply provide the minimum and maximum frequency limits and an additional
interface target_index() to set the current frequency. The cpufreq subsystem
has a number of scaling governors ("performance", "powersave", "ondemand",
etc.). Depending on which governor is in use, cpufreq core will call for
transitions to a specific frequency using target_index() callback.
- setpolicy() callback: In this mode, drivers do not provide target_index()
callback, so cpufreq core can't request a transition to a specific frequency.
The driver provides minimum and maximum frequency limits and callbacks to set a
policy. The policy in cpufreq sysfs is referred to as the "scaling governor".
The cpufreq core can request the driver to operate in any of the two policies:
"performance" and "powersave". The driver decides which frequency to use based
on the above policy selection considering minimum and maximum frequency limits.
The Intel P-State driver falls under the latter category, which implements the
setpolicy() callback. This driver decides what P-State to use based on the
requested policy from the cpufreq core. If the processor is capable of
selecting its next P-State internally, then the driver will offload this
responsibility to the processor (aka HWP: Hardware P-States). If not, the
driver implements algorithms to select the next P-State.
Since these policies are implemented in the driver, they are not same as the
cpufreq scaling governors implementation, even if they have the same name in
the cpufreq sysfs (scaling_governors). For example the "performance" policy is
similar to cpufreqs "performance" governor, but "powersave" is completely
different than the cpufreq "powersave" governor. The strategy here is similar
to cpufreq "ondemand", where the requested P-State is related to the system load.
Sysfs Interface
In addition to the frequency-controlling interfaces provided by the cpufreq
core, the driver provides its own sysfs files to control the P-State selection.
These files have been added to /sys/devices/system/cpu/intel_pstate/.
Any changes made to these files are applicable to all CPUs (even in a
multi-package system, Refer to later section on placing "Per-CPU limits").
max_perf_pct: Limits the maximum P-State that will be requested by
the driver. It states it as a percentage of the available performance. The
available (P-State) performance may be reduced by the no_turbo
setting described below.
min_perf_pct: Limits the minimum P-State that will be requested by
the driver. It states it as a percentage of the max (non-turbo)
performance level.
no_turbo: Limits the driver to selecting P-State below the turbo
frequency range.
turbo_pct: Displays the percentage of the total performance that
is supported by hardware that is in the turbo range. This number
is independent of whether turbo has been disabled or not.
num_pstates: Displays the number of P-States that are supported
by hardware. This number is independent of whether turbo has
been disabled or not.
For example, if a system has these parameters:
Max 1 core turbo ratio: 0x21 (Max 1 core ratio is the maximum P-State)
Max non turbo ratio: 0x17
Minimum ratio : 0x08 (Here the ratio is called max efficiency ratio)
Sysfs will show :
max_perf_pct:100, which corresponds to 1 core ratio
min_perf_pct:24, max_efficiency_ratio / max 1 Core ratio
no_turbo:0, turbo is not disabled
num_pstates:26 = (max 1 Core ratio - Max Efficiency Ratio + 1)
turbo_pct:39 = (max 1 core ratio - max non turbo ratio) / num_pstates
Refer to "Intel® 64 and IA-32 Architectures Software Developers Manual
Volume 3: System Programming Guide" to understand ratios.
There is one more sysfs attribute in /sys/devices/system/cpu/intel_pstate/
that can be used for controlling the operation mode of the driver:
status: Three settings are possible:
"off" - The driver is not in use at this time.
"active" - The driver works as a P-state governor (default).
"passive" - The driver works as a regular cpufreq one and collaborates
with the generic cpufreq governors (it sets P-states as
requested by those governors).
The current setting is returned by reads from this attribute. Writing one
of the above strings to it changes the operation mode as indicated by that
string, if possible. If HW-managed P-states (HWP) are enabled, it is not
possible to change the driver's operation mode and attempts to write to
this attribute will fail.
cpufreq sysfs for Intel P-State
Since this driver registers with cpufreq, cpufreq sysfs is also presented.
There are some important differences, which need to be considered.
scaling_cur_freq: This displays the real frequency which was used during
the last sample period instead of what is requested. Some other cpufreq driver,
like acpi-cpufreq, displays what is requested (Some changes are on the
way to fix this for acpi-cpufreq driver). The same is true for frequencies
displayed at /proc/cpuinfo.
scaling_governor: This displays current active policy. Since each CPU has a
cpufreq sysfs, it is possible to set a scaling governor to each CPU. But this
is not possible with Intel P-States, as there is one common policy for all
CPUs. Here, the last requested policy will be applicable to all CPUs. It is
suggested that one use the cpupower utility to change policy to all CPUs at the
same time.
scaling_setspeed: This attribute can never be used with Intel P-State.
scaling_max_freq/scaling_min_freq: This interface can be used similarly to
the max_perf_pct/min_perf_pct of Intel P-State sysfs. However since frequencies
are converted to nearest possible P-State, this is prone to rounding errors.
This method is not preferred to limit performance.
affected_cpus: Not used
related_cpus: Not used
For contemporary Intel processors, the frequency is controlled by the
processor itself and the P-State exposed to software is related to
performance levels. The idea that frequency can be set to a single
frequency is fictional for Intel Core processors. Even if the scaling
driver selects a single P-State, the actual frequency the processor
will run at is selected by the processor itself.
Per-CPU limits
The kernel command line option "intel_pstate=per_cpu_perf_limits" forces
the intel_pstate driver to use per-CPU performance limits. When it is set,
the sysfs control interface described above is subject to limitations.
- The following controls are not available for both read and write
/sys/devices/system/cpu/intel_pstate/max_perf_pct
/sys/devices/system/cpu/intel_pstate/min_perf_pct
- The following controls can be used to set performance limits, as far as the
architecture of the processor permits:
/sys/devices/system/cpu/cpu*/cpufreq/scaling_max_freq
/sys/devices/system/cpu/cpu*/cpufreq/scaling_min_freq
/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor
- User can still observe turbo percent and number of P-States from
/sys/devices/system/cpu/intel_pstate/turbo_pct
/sys/devices/system/cpu/intel_pstate/num_pstates
- User can read write system wide turbo status
/sys/devices/system/cpu/no_turbo
Support of energy performance hints
It is possible to provide hints to the HWP algorithms in the processor
to be more performance centric to more energy centric. When the driver
is using HWP, two additional cpufreq sysfs attributes are presented for
each logical CPU.
These attributes are:
- energy_performance_available_preferences
- energy_performance_preference
To get list of supported hints:
$ cat energy_performance_available_preferences
default performance balance_performance balance_power power
The current preference can be read or changed via cpufreq sysfs
attribute "energy_performance_preference". Reading from this attribute
will display current effective setting. User can write any of the valid
preference string to this attribute. User can always restore to power-on
default by writing "default".
Since threads can migrate to different CPUs, this is possible that the
new CPU may have different energy performance preference than the previous
one. To avoid such issues, either threads can be pinned to specific CPUs
or set the same energy performance preference value to all CPUs.
Tuning Intel P-State driver
When the performance can be tuned using PID (Proportional Integral
Derivative) controller, debugfs files are provided for adjusting performance.
They are presented under:
/sys/kernel/debug/pstate_snb/
The PID tunable parameters are:
deadband
d_gain_pct
i_gain_pct
p_gain_pct
sample_rate_ms
setpoint
To adjust these parameters, some understanding of driver implementation is
necessary. There are some tweeks described here, but be very careful. Adjusting
them requires expert level understanding of power and performance relationship.
These limits are only useful when the "powersave" policy is active.
-To make the system more responsive to load changes, sample_rate_ms can
be adjusted (current default is 10ms).
-To make the system use higher performance, even if the load is lower, setpoint
can be adjusted to a lower number. This will also lead to faster ramp up time
to reach the maximum P-State.
If there are no derivative and integral coefficients, The next P-State will be
equal to:
current P-State - ((setpoint - current cpu load) * p_gain_pct)
For example, if the current PID parameters are (Which are defaults for the core
processors like SandyBridge):
deadband = 0
d_gain_pct = 0
i_gain_pct = 0
p_gain_pct = 20
sample_rate_ms = 10
setpoint = 97
If the current P-State = 0x08 and current load = 100, this will result in the
next P-State = 0x08 - ((97 - 100) * 0.2) = 8.6 (rounded to 9). Here the P-State
goes up by only 1. If during next sample interval the current load doesn't
change and still 100, then P-State goes up by one again. This process will
continue as long as the load is more than the setpoint until the maximum P-State
is reached.
For the same load at setpoint = 60, this will result in the next P-State
= 0x08 - ((60 - 100) * 0.2) = 16
So by changing the setpoint from 97 to 60, there is an increase of the
next P-State from 9 to 16. So this will make processor execute at higher
P-State for the same CPU load. If the load continues to be more than the
setpoint during next sample intervals, then P-State will go up again till the
maximum P-State is reached. But the ramp up time to reach the maximum P-State
will be much faster when the setpoint is 60 compared to 97.
Debugging Intel P-State driver
Event tracing
To debug P-State transition, the Linux event tracing interface can be used.
There are two specific events, which can be enabled (Provided the kernel
configs related to event tracing are enabled).
# cd /sys/kernel/debug/tracing/
# echo 1 > events/power/pstate_sample/enable
# echo 1 > events/power/cpu_frequency/enable
# cat trace
gnome-terminal--4510 [001] ..s. 1177.680733: pstate_sample: core_busy=107
scaled=94 from=26 to=26 mperf=1143818 aperf=1230607 tsc=29838618
freq=2474476
cat-5235 [002] ..s. 1177.681723: cpu_frequency: state=2900000 cpu_id=2
Using ftrace
If function level tracing is required, the Linux ftrace interface can be used.
For example if we want to check how often a function to set a P-State is
called, we can set ftrace filter to intel_pstate_set_pstate.
# cd /sys/kernel/debug/tracing/
# cat available_filter_functions | grep -i pstate
intel_pstate_set_pstate
intel_pstate_cpu_init
...
# echo intel_pstate_set_pstate > set_ftrace_filter
# echo function > current_tracer
# cat trace | head -15
# tracer: function
#
# entries-in-buffer/entries-written: 80/80 #P:4
#
# _-----=> irqs-off
# / _----=> need-resched
# | / _---=> hardirq/softirq
# || / _--=> preempt-depth
# ||| / delay
# TASK-PID CPU# |||| TIMESTAMP FUNCTION
# | | | |||| | |
Xorg-3129 [000] ..s. 2537.644844: intel_pstate_set_pstate <-intel_pstate_timer_func
gnome-terminal--4510 [002] ..s. 2537.649844: intel_pstate_set_pstate <-intel_pstate_timer_func
gnome-shell-3409 [001] ..s. 2537.650850: intel_pstate_set_pstate <-intel_pstate_timer_func
<idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func

View File

@ -22,7 +22,8 @@ Required properties :
- #clock-cells : must contain 1
- #reset-cells : must contain 1
For the PRCM CCUs on H3/A64, one more clock is needed:
For the PRCM CCUs on H3/A64, two more clocks are needed:
- "pll-periph": the SoC's peripheral PLL from the main CCU
- "iosc": the SoC's internal frequency oscillator
Example for generic CCU:
@ -39,8 +40,8 @@ Example for PRCM CCU:
r_ccu: clock@01f01400 {
compatible = "allwinner,sun50i-a64-r-ccu";
reg = <0x01f01400 0x100>;
clocks = <&osc24M>, <&osc32k>, <&iosc>;
clock-names = "hosc", "losc", "iosc";
clocks = <&osc24M>, <&osc32k>, <&iosc>, <&ccu CLK_PLL_PERIPH0>;
clock-names = "hosc", "losc", "iosc", "pll-periph";
#clock-cells = <1>;
#reset-cells = <1>;
};

View File

@ -41,9 +41,9 @@ Required properties:
Optional properties:
In order to use the GPIO lines in PWM mode, some additional optional
properties are required. Only Armada 370 and XP support these properties.
properties are required.
- compatible: Must contain "marvell,armada-370-xp-gpio"
- compatible: Must contain "marvell,armada-370-gpio"
- reg: an additional register set is needed, for the GPIO Blink
Counter on/off registers.
@ -71,7 +71,7 @@ Example:
};
gpio1: gpio@18140 {
compatible = "marvell,armada-370-xp-gpio";
compatible = "marvell,armada-370-gpio";
reg = <0x18140 0x40>, <0x181c8 0x08>;
reg-names = "gpio", "pwm";
ngpios = <17>;

View File

@ -36,7 +36,7 @@ Optional properties:
control gpios
- threshold: allows setting the "click"-threshold in the range
from 20 to 80.
from 0 to 80.
- gain: allows setting the sensitivity in the range from 0 to
31. Note that lower values indicate higher

View File

@ -16,6 +16,11 @@ Required properties:
- reg: Base address of PMIC on Hi6220 SoC.
- interrupt-controller: Hi655x has internal IRQs (has own IRQ domain).
- pmic-gpios: The GPIO used by PMIC IRQ.
- #clock-cells: From common clock binding; shall be set to 0
Optional properties:
- clock-output-names: From common clock binding to override the
default output clock name
Example:
pmic: pmic@f8000000 {
@ -24,4 +29,5 @@ Example:
interrupt-controller;
#interrupt-cells = <2>;
pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
#clock-cells = <0>;
}

View File

@ -31,7 +31,7 @@ Example:
compatible = "st,stm32-timers";
reg = <0x40010000 0x400>;
clocks = <&rcc 0 160>;
clock-names = "clk_int";
clock-names = "int";
pwm {
compatible = "st,stm32-pwm";

View File

@ -18,6 +18,8 @@ Optional properties:
"ext_clock" (External clock provided to the card).
- post-power-on-delay-ms : Delay in ms after powering the card and
de-asserting the reset-gpios (if any)
- power-off-delay-us : Delay in us after asserting the reset-gpios (if any)
during power off of the card.
Example:

View File

@ -34,7 +34,7 @@ Required properties:
"brcm,bcm6328-switch"
"brcm,bcm6368-switch" and the mandatory "brcm,bcm63xx-switch"
See Documentation/devicetree/bindings/dsa/dsa.txt for a list of additional
See Documentation/devicetree/bindings/net/dsa/dsa.txt for a list of additional
required and optional properties.
Examples:

View File

@ -26,6 +26,10 @@ Optional properties:
- interrupt-controller : Indicates the switch is itself an interrupt
controller. This is used for the PHY interrupts.
#interrupt-cells = <2> : Controller uses two cells, number and flag
- eeprom-length : Set to the length of an EEPROM connected to the
switch. Must be set if the switch can not detect
the presence and/or size of a connected EEPROM,
otherwise optional.
- mdio : Container of PHY and devices on the switches MDIO
bus.
- mdio? : Container of PHYs and devices on the external MDIO

View File

@ -15,6 +15,10 @@ Optional properties:
- phy-reset-active-high : If present then the reset sequence using the GPIO
specified in the "phy-reset-gpios" property is reversed (H=reset state,
L=operation state).
- phy-reset-post-delay : Post reset delay in milliseconds. If present then
a delay of phy-reset-post-delay milliseconds will be observed after the
phy-reset-gpios has been toggled. Can be omitted thus no delay is
observed. Delay is in range of 1ms to 1000ms. Other delays are invalid.
- phy-supply : regulator that powers the Ethernet PHY.
- phy-handle : phandle to the PHY device connected to this device.
- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.

View File

@ -27,6 +27,7 @@ Optional properties:
of the device. On many systems this is wired high so the device goes
out of reset at power-on, but if it is under program control, this
optional GPIO can wake up in response to it.
- vdd33a-supply, vddvario-supply : 3.3V analog and IO logic power supplies
Examples:

View File

@ -247,7 +247,6 @@ bias-bus-hold - latch weakly
bias-pull-up - pull up the pin
bias-pull-down - pull down the pin
bias-pull-pin-default - use pin-default pull state
bi-directional - pin supports simultaneous input/output operations
drive-push-pull - drive actively high and low
drive-open-drain - drive with open drain
drive-open-source - drive with open source
@ -260,7 +259,6 @@ input-debounce - debounce mode with debound time X
power-source - select between different power supplies
low-power-enable - enable low power mode
low-power-disable - disable low power mode
output-enable - enable output on pin regardless of output value
output-low - set the pin to output mode with low level
output-high - set the pin to output mode with high level
slew-rate - set the slew rate

View File

@ -10,6 +10,7 @@ Required properties:
- "rockchip,rk3288-usb", "rockchip,rk3066-usb", "snps,dwc2": for rk3288 Soc;
- "lantiq,arx100-usb": The DWC2 USB controller instance in Lantiq ARX SoCs;
- "lantiq,xrx200-usb": The DWC2 USB controller instance in Lantiq XRX SoCs;
- "amlogic,meson8-usb": The DWC2 USB controller instance in Amlogic Meson8 SoCs;
- "amlogic,meson8b-usb": The DWC2 USB controller instance in Amlogic Meson8b SoCs;
- "amlogic,meson-gxbb-usb": The DWC2 USB controller instance in Amlogic S905 SoCs;
- "amcc,dwc-otg": The DWC2 USB controller instance in AMCC Canyonlands 460EX SoCs;

View File

@ -15,7 +15,7 @@ It has been tested with the following devices:
The driver allows configuration of the touch screen via a set of sysfs files:
/sys/class/input/eventX/device/device/threshold:
allows setting the "click"-threshold in the range from 20 to 80.
allows setting the "click"-threshold in the range from 0 to 80.
/sys/class/input/eventX/device/device/gain:
allows setting the sensitivity in the range from 0 to 31. Note that

View File

@ -0,0 +1,194 @@
The QorIQ DPAA Ethernet Driver
==============================
Authors:
Madalin Bucur <madalin.bucur@nxp.com>
Camelia Groza <camelia.groza@nxp.com>
Contents
========
- DPAA Ethernet Overview
- DPAA Ethernet Supported SoCs
- Configuring DPAA Ethernet in your kernel
- DPAA Ethernet Frame Processing
- DPAA Ethernet Features
- Debugging
DPAA Ethernet Overview
======================
DPAA stands for Data Path Acceleration Architecture and it is a
set of networking acceleration IPs that are available on several
generations of SoCs, both on PowerPC and ARM64.
The Freescale DPAA architecture consists of a series of hardware blocks
that support Ethernet connectivity. The Ethernet driver depends upon the
following drivers in the Linux kernel:
- Peripheral Access Memory Unit (PAMU) (* needed only for PPC platforms)
drivers/iommu/fsl_*
- Frame Manager (FMan)
drivers/net/ethernet/freescale/fman
- Queue Manager (QMan), Buffer Manager (BMan)
drivers/soc/fsl/qbman
A simplified view of the dpaa_eth interfaces mapped to FMan MACs:
dpaa_eth /eth0\ ... /ethN\
driver | | | |
------------- ---- ----------- ---- -------------
-Ports / Tx Rx \ ... / Tx Rx \
FMan | | | |
-MACs | MAC0 | | MACN |
/ dtsec0 \ ... / dtsecN \ (or tgec)
/ \ / \(or memac)
--------- -------------- --- -------------- ---------
FMan, FMan Port, FMan SP, FMan MURAM drivers
---------------------------------------------------------
FMan HW blocks: MURAM, MACs, Ports, SP
---------------------------------------------------------
The dpaa_eth relation to the QMan, BMan and FMan:
________________________________
dpaa_eth / eth0 \
driver / \
--------- -^- -^- -^- --- ---------
QMan driver / \ / \ / \ \ / | BMan |
|Rx | |Rx | |Tx | |Tx | | driver |
--------- |Dfl| |Err| |Cnf| |FQs| | |
QMan HW |FQ | |FQ | |FQs| | | | |
/ \ / \ / \ \ / | |
--------- --- --- --- -v- ---------
| FMan QMI | |
| FMan HW FMan BMI | BMan HW |
----------------------- --------
where the acronyms used above (and in the code) are:
DPAA = Data Path Acceleration Architecture
FMan = DPAA Frame Manager
QMan = DPAA Queue Manager
BMan = DPAA Buffers Manager
QMI = QMan interface in FMan
BMI = BMan interface in FMan
FMan SP = FMan Storage Profiles
MURAM = Multi-user RAM in FMan
FQ = QMan Frame Queue
Rx Dfl FQ = default reception FQ
Rx Err FQ = Rx error frames FQ
Tx Cnf FQ = Tx confirmation FQs
Tx FQs = transmission frame queues
dtsec = datapath three speed Ethernet controller (10/100/1000 Mbps)
tgec = ten gigabit Ethernet controller (10 Gbps)
memac = multirate Ethernet MAC (10/100/1000/10000)
DPAA Ethernet Supported SoCs
============================
The DPAA drivers enable the Ethernet controllers present on the following SoCs:
# PPC
P1023
P2041
P3041
P4080
P5020
P5040
T1023
T1024
T1040
T1042
T2080
T4240
B4860
# ARM
LS1043A
LS1046A
Configuring DPAA Ethernet in your kernel
========================================
To enable the DPAA Ethernet driver, the following Kconfig options are required:
# common for arch/arm64 and arch/powerpc platforms
CONFIG_FSL_DPAA=y
CONFIG_FSL_FMAN=y
CONFIG_FSL_DPAA_ETH=y
CONFIG_FSL_XGMAC_MDIO=y
# for arch/powerpc only
CONFIG_FSL_PAMU=y
# common options needed for the PHYs used on the RDBs
CONFIG_VITESSE_PHY=y
CONFIG_REALTEK_PHY=y
CONFIG_AQUANTIA_PHY=y
DPAA Ethernet Frame Processing
==============================
On Rx, buffers for the incoming frames are retrieved from one of the three
existing buffers pools. The driver initializes and seeds these, each with
buffers of different sizes: 1KB, 2KB and 4KB.
On Tx, all transmitted frames are returned to the driver through Tx
confirmation frame queues. The driver is then responsible for freeing the
buffers. In order to do this properly, a backpointer is added to the buffer
before transmission that points to the skb. When the buffer returns to the
driver on a confirmation FQ, the skb can be correctly consumed.
DPAA Ethernet Features
======================
Currently the DPAA Ethernet driver enables the basic features required for
a Linux Ethernet driver. The support for advanced features will be added
gradually.
The driver has Rx and Tx checksum offloading for UDP and TCP. Currently the Rx
checksum offload feature is enabled by default and cannot be controlled through
ethtool.
The driver has support for multiple prioritized Tx traffic classes. Priorities
range from 0 (lowest) to 3 (highest). These are mapped to HW workqueues with
strict priority levels. Each traffic class contains NR_CPU TX queues. By
default, only one traffic class is enabled and the lowest priority Tx queues
are used. Higher priority traffic classes can be enabled with the mqprio
qdisc. For example, all four traffic classes are enabled on an interface with
the following command. Furthermore, skb priority levels are mapped to traffic
classes as follows:
* priorities 0 to 3 - traffic class 0 (low priority)
* priorities 4 to 7 - traffic class 1 (medium-low priority)
* priorities 8 to 11 - traffic class 2 (medium-high priority)
* priorities 12 to 15 - traffic class 3 (high priority)
tc qdisc add dev <int> root handle 1: \
mqprio num_tc 4 map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 hw 1
Debugging
=========
The following statistics are exported for each interface through ethtool:
- interrupt count per CPU
- Rx packets count per CPU
- Tx packets count per CPU
- Tx confirmed packets count per CPU
- Tx S/G frames count per CPU
- Tx error count per CPU
- Rx error count per CPU
- Rx error count per type
- congestion related statistics:
- congestion status
- time spent in congestion
- number of time the device entered congestion
- dropped packets count per cause
The driver also exports the following information in sysfs:
- the FQ IDs for each FQ type
/sys/devices/platform/dpaa-ethernet.0/net/<int>/fqids
- the IDs of the buffer pools in use
/sys/devices/platform/dpaa-ethernet.0/net/<int>/bpids

View File

@ -122,7 +122,7 @@ associated flow of the packet. The hash is either provided by hardware
or will be computed in the stack. Capable hardware can pass the hash in
the receive descriptor for the packet; this would usually be the same
hash used for RSS (e.g. computed Toeplitz hash). The hash is saved in
skb->rx_hash and can be used elsewhere in the stack as a hash of the
skb->hash and can be used elsewhere in the stack as a hash of the
packets flow.
Each receive hardware queue has an associated list of CPUs to which

View File

@ -1,7 +1,7 @@
TCP protocol
============
Last updated: 9 February 2008
Last updated: 3 June 2017
Contents
========
@ -29,18 +29,19 @@ As of 2.6.13, Linux supports pluggable congestion control algorithms.
A congestion control mechanism can be registered through functions in
tcp_cong.c. The functions used by the congestion control mechanism are
registered via passing a tcp_congestion_ops struct to
tcp_register_congestion_control. As a minimum name, ssthresh,
cong_avoid must be valid.
tcp_register_congestion_control. As a minimum, the congestion control
mechanism must provide a valid name and must implement either ssthresh,
cong_avoid and undo_cwnd hooks or the "omnipotent" cong_control hook.
Private data for a congestion control mechanism is stored in tp->ca_priv.
tcp_ca(tp) returns a pointer to this space. This is preallocated space - it
is important to check the size of your private data will fit this space, or
alternatively space could be allocated elsewhere and a pointer to it could
alternatively, space could be allocated elsewhere and a pointer to it could
be stored here.
There are three kinds of congestion control algorithms currently: The
simplest ones are derived from TCP reno (highspeed, scalable) and just
provide an alternative the congestion window calculation. More complex
provide an alternative congestion window calculation. More complex
ones like BIC try to look at other events to provide better
heuristics. There are also round trip time based algorithms like
Vegas and Westwood+.
@ -49,21 +50,15 @@ Good TCP congestion control is a complex problem because the algorithm
needs to maintain fairness and performance. Please review current
research and RFC's before developing new modules.
The method that is used to determine which congestion control mechanism is
determined by the setting of the sysctl net.ipv4.tcp_congestion_control.
The default congestion control will be the last one registered (LIFO);
so if you built everything as modules, the default will be reno. If you
build with the defaults from Kconfig, then CUBIC will be builtin (not a
module) and it will end up the default.
The default congestion control mechanism is chosen based on the
DEFAULT_TCP_CONG Kconfig parameter. If you really want a particular default
value then you can set it using sysctl net.ipv4.tcp_congestion_control. The
module will be autoloaded if needed and you will get the expected protocol. If
you ask for an unknown congestion method, then the sysctl attempt will fail.
If you really want a particular default value then you will need
to set it with the sysctl. If you use a sysctl, the module will be autoloaded
if needed and you will get the expected protocol. If you ask for an
unknown congestion method, then the sysctl attempt will fail.
If you remove a tcp congestion control module, then you will get the next
If you remove a TCP congestion control module, then you will get the next
available one. Since reno cannot be built as a module, and cannot be
deleted, it will always be available.
removed, it will always be available.
How the new TCP output machine [nyi] works.
===========================================

View File

@ -16,6 +16,8 @@ ALC880
6-jack in back, 2-jack in front
6stack-digout
6-jack with a SPDIF out
6stack-automute
6-jack with headphone jack detection
ALC260
======
@ -62,6 +64,8 @@ lenovo-dock
Enables docking station I/O for some Lenovos
hp-gpio-led
GPIO LED support on HP laptops
hp-dock-gpio-mic1-led
HP dock with mic LED support
dell-headset-multi
Headset jack, which can also be used as mic-in
dell-headset-dock
@ -72,6 +76,12 @@ alc283-sense-combo
Combo jack sensing on ALC283
tpt440-dock
Pin configs for Lenovo Thinkpad Dock support
tpt440
Lenovo Thinkpad T440s setup
tpt460
Lenovo Thinkpad T460/560 setup
dual-codecs
Lenovo laptops with dual codecs
ALC66x/67x/892
==============
@ -97,6 +107,8 @@ inv-dmic
Inverted internal mic workaround
dell-headset-multi
Headset jack, which can also be used as mic-in
dual-codecs
Lenovo laptops with dual codecs
ALC680
======
@ -114,6 +126,8 @@ inv-dmic
Inverted internal mic workaround
no-primary-hp
VAIO Z/VGC-LN51JGB workaround (for fixed speaker DAC)
dual-codecs
ALC1220 dual codecs for Gaming mobos
ALC861/660
==========
@ -206,65 +220,47 @@ auto
Conexant 5045
=============
laptop-hpsense
Laptop with HP sense (old model laptop)
laptop-micsense
Laptop with Mic sense (old model fujitsu)
laptop-hpmicsense
Laptop with HP and Mic senses
benq
Benq R55E
laptop-hp530
HP 530 laptop
test
for testing/debugging purpose, almost all controls can be
adjusted. Appearing only when compiled with $CONFIG_SND_DEBUG=y
cap-mix-amp
Fix max input level on mixer widget
toshiba-p105
Toshiba P105 quirk
hp-530
HP 530 quirk
Conexant 5047
=============
laptop
Basic Laptop config
laptop-hp
Laptop config for some HP models (subdevice 30A5)
laptop-eapd
Laptop config with EAPD support
test
for testing/debugging purpose, almost all controls can be
adjusted. Appearing only when compiled with $CONFIG_SND_DEBUG=y
cap-mix-amp
Fix max input level on mixer widget
Conexant 5051
=============
laptop
Basic Laptop config (default)
hp
HP Spartan laptop
hp-dv6736
HP dv6736
hp-f700
HP Compaq Presario F700
ideapad
Lenovo IdeaPad laptop
toshiba
Toshiba Satellite M300
lenovo-x200
Lenovo X200 quirk
Conexant 5066
=============
laptop
Basic Laptop config (default)
hp-laptop
HP laptops, e g G60
asus
Asus K52JU, Lenovo G560
dell-laptop
Dell laptops
dell-vostro
Dell Vostro
olpc-xo-1_5
OLPC XO 1.5
ideapad
Lenovo IdeaPad U150
stereo-dmic
Workaround for inverted stereo digital mic
gpio1
Enable GPIO1 pin
headphone-mic-pin
Enable headphone mic NID 0x18 without detection
tp410
Thinkpad T400 & co quirks
thinkpad
Lenovo Thinkpad
Thinkpad mute/mic LED quirk
lemote-a1004
Lemote A1004 quirk
lemote-a1205
Lemote A1205 quirk
olpc-xo
OLPC XO quirk
mute-led-eapd
Mute LED control via EAPD
hp-dock
HP dock support
mute-led-gpio
Mute LED control via GPIO
STAC9200
========
@ -444,6 +440,8 @@ dell-eq
Dell desktops/laptops
alienware
Alienware M17x
asus-mobo
Pin configs for ASUS mobo with 5.1/SPDIF out
auto
BIOS setup (default)
@ -477,6 +475,8 @@ hp-envy-ts-bass
Pin fixup for HP Envy TS bass speaker (NID 0x10)
hp-bnb13-eq
Hardware equalizer setup for HP laptops
hp-envy-ts-bass
HP Envy TS bass support
auto
BIOS setup (default)
@ -496,10 +496,22 @@ auto
Cirrus Logic CS4206/4207
========================
mbp53
MacBook Pro 5,3
mbp55
MacBook Pro 5,5
imac27
IMac 27 Inch
imac27_122
iMac 12,2
apple
Generic Apple quirk
mbp101
MacBookPro 10,1
mbp81
MacBookPro 8,1
mba42
MacBookAir 4,2
auto
BIOS setup (default)
@ -509,6 +521,10 @@ mba6
MacBook Air 6,1 and 6,2
gpio0
Enable GPIO 0 amp
mbp11
MacBookPro 11,2
macmini
MacMini 7,1
auto
BIOS setup (default)

View File

@ -1172,7 +1172,7 @@ N: clps711x
ARM/CIRRUS LOGIC EP93XX ARM ARCHITECTURE
M: Hartley Sweeten <hsweeten@visionengravers.com>
M: Ryan Mallon <rmallon@gmail.com>
M: Alexander Sverdlin <alexander.sverdlin@gmail.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-ep93xx/
@ -1489,14 +1489,16 @@ M: Gregory Clement <gregory.clement@free-electrons.com>
M: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
S: Maintained
F: arch/arm/mach-mvebu/
F: drivers/rtc/rtc-armada38x.c
F: arch/arm/boot/dts/armada*
F: arch/arm/boot/dts/kirkwood*
F: arch/arm/configs/mvebu_*_defconfig
F: arch/arm/mach-mvebu/
F: arch/arm64/boot/dts/marvell/armada*
F: drivers/cpufreq/mvebu-cpufreq.c
F: drivers/irqchip/irq-armada-370-xp.c
F: drivers/irqchip/irq-mvebu-*
F: drivers/pinctrl/mvebu/
F: arch/arm/configs/mvebu_*_defconfig
F: drivers/rtc/rtc-armada38x.c
ARM/Marvell Berlin SoC support
M: Jisheng Zhang <jszhang@marvell.com>
@ -1721,7 +1723,6 @@ N: rockchip
ARM/SAMSUNG EXYNOS ARM ARCHITECTURES
M: Kukjin Kim <kgene@kernel.org>
M: Krzysztof Kozlowski <krzk@kernel.org>
R: Javier Martinez Canillas <javier@osg.samsung.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: linux-samsung-soc@vger.kernel.org (moderated for non-subscribers)
Q: https://patchwork.kernel.org/project/linux-samsung-soc/list/
@ -1829,7 +1830,6 @@ F: drivers/edac/altera_edac.
ARM/STI ARCHITECTURE
M: Patrice Chotard <patrice.chotard@st.com>
L: linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
L: kernel@stlinux.com
W: http://www.stlinux.com
S: Maintained
F: arch/arm/mach-sti/
@ -5622,7 +5622,7 @@ F: scripts/get_maintainer.pl
GENWQE (IBM Generic Workqueue Card)
M: Frank Haverkamp <haver@linux.vnet.ibm.com>
M: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
M: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
S: Supported
F: drivers/misc/genwqe/
@ -5667,7 +5667,6 @@ F: tools/testing/selftests/gpio/
GPIO SUBSYSTEM
M: Linus Walleij <linus.walleij@linaro.org>
M: Alexandre Courbot <gnurou@gmail.com>
L: linux-gpio@vger.kernel.org
T: git git://git.kernel.org/pub/scm/linux/kernel/git/linusw/linux-gpio.git
S: Maintained
@ -7143,7 +7142,7 @@ S: Maintained
F: drivers/media/platform/rcar_jpu.c
JSM Neo PCI based serial card
M: Gabriel Krisman Bertazi <krisman@linux.vnet.ibm.com>
M: Guilherme G. Piccoli <gpiccoli@linux.vnet.ibm.com>
L: linux-serial@vger.kernel.org
S: Maintained
F: drivers/tty/serial/jsm/
@ -7707,7 +7706,7 @@ F: drivers/platform/x86/hp_accel.c
LIVE PATCHING
M: Josh Poimboeuf <jpoimboe@redhat.com>
M: Jessica Yu <jeyu@redhat.com>
M: Jessica Yu <jeyu@kernel.org>
M: Jiri Kosina <jikos@kernel.org>
M: Miroslav Benes <mbenes@suse.cz>
R: Petr Mladek <pmladek@suse.com>
@ -8508,7 +8507,7 @@ S: Odd Fixes
F: drivers/media/radio/radio-miropcm20*
MELLANOX MLX4 core VPI driver
M: Yishai Hadas <yishaih@mellanox.com>
M: Tariq Toukan <tariqt@mellanox.com>
L: netdev@vger.kernel.org
L: linux-rdma@vger.kernel.org
W: http://www.mellanox.com
@ -8516,7 +8515,6 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/
S: Supported
F: drivers/net/ethernet/mellanox/mlx4/
F: include/linux/mlx4/
F: include/uapi/rdma/mlx4-abi.h
MELLANOX MLX4 IB driver
M: Yishai Hadas <yishaih@mellanox.com>
@ -8526,6 +8524,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/
S: Supported
F: drivers/infiniband/hw/mlx4/
F: include/linux/mlx4/
F: include/uapi/rdma/mlx4-abi.h
MELLANOX MLX5 core VPI driver
M: Saeed Mahameed <saeedm@mellanox.com>
@ -8538,7 +8537,6 @@ Q: http://patchwork.ozlabs.org/project/netdev/list/
S: Supported
F: drivers/net/ethernet/mellanox/mlx5/core/
F: include/linux/mlx5/
F: include/uapi/rdma/mlx5-abi.h
MELLANOX MLX5 IB driver
M: Matan Barak <matanb@mellanox.com>
@ -8549,6 +8547,7 @@ Q: http://patchwork.kernel.org/project/linux-rdma/list/
S: Supported
F: drivers/infiniband/hw/mlx5/
F: include/linux/mlx5/
F: include/uapi/rdma/mlx5-abi.h
MELEXIS MLX90614 DRIVER
M: Crt Mori <cmo@melexis.com>
@ -8588,7 +8587,7 @@ S: Maintained
F: drivers/media/dvb-frontends/mn88473*
MODULE SUPPORT
M: Jessica Yu <jeyu@redhat.com>
M: Jessica Yu <jeyu@kernel.org>
M: Rusty Russell <rusty@rustcorp.com.au>
T: git git://git.kernel.org/pub/scm/linux/kernel/git/jeyu/linux.git modules-next
S: Maintained
@ -10457,7 +10456,7 @@ S: Orphan
PXA RTC DRIVER
M: Robert Jarzmik <robert.jarzmik@free.fr>
L: rtc-linux@googlegroups.com
L: linux-rtc@vger.kernel.org
S: Maintained
QAT DRIVER
@ -10764,7 +10763,7 @@ X: kernel/torture.c
REAL TIME CLOCK (RTC) SUBSYSTEM
M: Alessandro Zummo <a.zummo@towertech.it>
M: Alexandre Belloni <alexandre.belloni@free-electrons.com>
L: rtc-linux@googlegroups.com
L: linux-rtc@vger.kernel.org
Q: http://patchwork.ozlabs.org/project/rtc-linux/list/
T: git git://git.kernel.org/pub/scm/linux/kernel/git/abelloni/linux.git
S: Maintained
@ -11275,7 +11274,6 @@ F: drivers/media/rc/serial_ir.c
STI CEC DRIVER
M: Benjamin Gaignard <benjamin.gaignard@linaro.org>
L: kernel@stlinux.com
S: Maintained
F: drivers/staging/media/st-cec/
F: Documentation/devicetree/bindings/media/stih-cec.txt
@ -11785,6 +11783,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/nsekhar/linux-davinci.git
S: Supported
F: arch/arm/mach-davinci/
F: drivers/i2c/busses/i2c-davinci.c
F: arch/arm/boot/dts/da850*
TI DAVINCI SERIES MEDIA DRIVER
M: "Lad, Prabhakar" <prabhakar.csengg@gmail.com>
@ -13868,7 +13867,7 @@ S: Odd fixes
F: drivers/net/wireless/wl3501*
WOLFSON MICROELECTRONICS DRIVERS
L: patches@opensource.wolfsonmicro.com
L: patches@opensource.cirrus.com
T: git https://github.com/CirrusLogic/linux-drivers.git
W: https://github.com/CirrusLogic/linux-drivers/wiki
S: Supported

View File

@ -1,7 +1,7 @@
VERSION = 4
PATCHLEVEL = 12
SUBLEVEL = 0
EXTRAVERSION = -rc2
EXTRAVERSION = -rc7
NAME = Fearless Coyote
# *DOCUMENTATION*
@ -1437,7 +1437,7 @@ help:
@echo ' make V=0|1 [targets] 0 => quiet build (default), 1 => verbose build'
@echo ' make V=2 [targets] 2 => give reason for rebuild of target'
@echo ' make O=dir [targets] Locate all output files in "dir", including .config'
@echo ' make C=1 [targets] Check all c source with $$CHECK (sparse by default)'
@echo ' make C=1 [targets] Check re-compiled c source with $$CHECK (sparse by default)'
@echo ' make C=2 [targets] Force check of all c source with $$CHECK'
@echo ' make RECORDMCOUNT_WARN=1 [targets] Warn about ignored mcount sections'
@echo ' make W=n [targets] Enable extra gcc checks, n=1,2,3 where'

View File

@ -65,7 +65,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}

View File

@ -17,14 +17,12 @@
@ there.
.inst 'M' | ('Z' << 8) | (0x1310 << 16) @ tstne r0, #0x4d000
#else
mov r0, r0
W(mov) r0, r0
#endif
.endm
.macro __EFI_HEADER
#ifdef CONFIG_EFI_STUB
b __efi_start
.set start_offset, __efi_start - start
.org start + 0x3c
@

View File

@ -130,19 +130,22 @@ start:
.rept 7
__nop
.endr
ARM( mov r0, r0 )
ARM( b 1f )
THUMB( badr r12, 1f )
THUMB( bx r12 )
#ifndef CONFIG_THUMB2_KERNEL
mov r0, r0
#else
AR_CLASS( sub pc, pc, #3 ) @ A/R: switch to Thumb2 mode
M_CLASS( nop.w ) @ M: already in Thumb2 mode
.thumb
#endif
W(b) 1f
.word _magic_sig @ Magic numbers to help the loader
.word _magic_start @ absolute load/run zImage address
.word _magic_end @ zImage end address
.word 0x04030201 @ endianness flag
THUMB( .thumb )
1: __EFI_HEADER
__EFI_HEADER
1:
ARM_BE8( setend be ) @ go BE8 if compiled for BE8
AR_CLASS( mrs r9, cpsr )
#ifdef CONFIG_ARM_VIRT_EXT

View File

@ -220,7 +220,7 @@
mmc1_pins: pinmux_mmc1_pins {
pinctrl-single,pins = <
AM33XX_IOPAD(0x960, PIN_INPUT | MUX_MODE7) /* spi0_cs1.gpio0_6 */
AM33XX_IOPAD(0x96c, PIN_INPUT | MUX_MODE7) /* uart0_rtsn.gpio1_9 */
>;
};
@ -280,10 +280,6 @@
AM33XX_IOPAD(0x834, PIN_INPUT_PULLUP | MUX_MODE7) /* nKbdReset - gpmc_ad13.gpio1_13 */
AM33XX_IOPAD(0x838, PIN_INPUT_PULLUP | MUX_MODE7) /* nDispReset - gpmc_ad14.gpio1_14 */
AM33XX_IOPAD(0x844, PIN_INPUT_PULLUP | MUX_MODE7) /* USB1_enPower - gpmc_a1.gpio1_17 */
/* AVR Programming - SPI Bus (bit bang) - Screen and Keyboard */
AM33XX_IOPAD(0x954, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattMOSI spi0_d0.gpio0_3 */
AM33XX_IOPAD(0x958, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattMISO spi0_d1.gpio0_4 */
AM33XX_IOPAD(0x950, PIN_INPUT_PULLUP | MUX_MODE7) /* Kbd/Disp/BattSCLK spi0_clk.gpio0_2 */
/* PDI Bus - Battery system */
AM33XX_IOPAD(0x840, PIN_INPUT_PULLUP | MUX_MODE7) /* nBattReset gpmc_a0.gpio1_16 */
AM33XX_IOPAD(0x83c, PIN_INPUT_PULLUP | MUX_MODE7) /* BattPDIData gpmc_ad15.gpio1_15 */
@ -384,7 +380,7 @@
pinctrl-names = "default";
pinctrl-0 = <&mmc1_pins>;
bus-width = <4>;
cd-gpios = <&gpio0 6 GPIO_ACTIVE_LOW>;
cd-gpios = <&gpio1 9 GPIO_ACTIVE_LOW>;
vmmc-supply = <&vmmcsd_fixed>;
};

View File

@ -3,6 +3,11 @@
#include <dt-bindings/clock/bcm2835-aux.h>
#include <dt-bindings/gpio/gpio.h>
/* firmware-provided startup stubs live here, where the secondary CPUs are
* spinning.
*/
/memreserve/ 0x00000000 0x00001000;
/* This include file covers the common peripherals and configuration between
* bcm2835 and bcm2836 implementations, leaving the CPU configuration to
* bcm2835.dtsi and bcm2836.dtsi.

View File

@ -120,10 +120,16 @@
ethphy0: ethernet-phy@2 {
reg = <2>;
micrel,led-mode = <1>;
clocks = <&clks IMX6UL_CLK_ENET_REF>;
clock-names = "rmii-ref";
};
ethphy1: ethernet-phy@1 {
reg = <1>;
micrel,led-mode = <1>;
clocks = <&clks IMX6UL_CLK_ENET2_REF>;
clock-names = "rmii-ref";
};
};
};

View File

@ -137,8 +137,8 @@ netcp: netcp@26000000 {
/* NetCP address range */
ranges = <0 0x26000000 0x1000000>;
clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>, <&clkosr>;
clock-names = "pa_clk", "ethss_clk", "cpts", "osr_clk";
clocks = <&clkpa>, <&clkcpgmac>, <&chipclk12>;
clock-names = "pa_clk", "ethss_clk", "cpts";
dma-coherent;
ti,navigator-dmas = <&dma_gbe 0>,

View File

@ -232,6 +232,14 @@
};
};
osr: sram@70000000 {
compatible = "mmio-sram";
reg = <0x70000000 0x10000>;
#address-cells = <1>;
#size-cells = <1>;
clocks = <&clkosr>;
};
dspgpio0: keystone_dsp_gpio@02620240 {
compatible = "ti,keystone-dsp-gpio";
gpio-controller;

View File

@ -558,10 +558,11 @@
};
r_ccu: clock@1f01400 {
compatible = "allwinner,sun50i-a64-r-ccu";
compatible = "allwinner,sun8i-h3-r-ccu";
reg = <0x01f01400 0x100>;
clocks = <&osc24M>, <&osc32k>, <&iosc>;
clock-names = "hosc", "losc", "iosc";
clocks = <&osc24M>, <&osc32k>, <&iosc>,
<&ccu 9>;
clock-names = "hosc", "losc", "iosc", "pll-periph";
#clock-cells = <1>;
#reset-cells = <1>;
};

View File

@ -1,4 +1,4 @@
#include <versatile-ab.dts>
#include "versatile-ab.dts"
/ {
model = "ARM Versatile PB";

View File

@ -235,7 +235,7 @@ int mcpm_cpu_power_up(unsigned int cpu, unsigned int cluster)
return ret;
}
typedef void (*phys_reset_t)(unsigned long);
typedef typeof(cpu_reset) phys_reset_t;
void mcpm_cpu_power_down(void)
{
@ -300,7 +300,7 @@ void mcpm_cpu_power_down(void)
* on the CPU.
*/
phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
phys_reset(__pa_symbol(mcpm_entry_point));
phys_reset(__pa_symbol(mcpm_entry_point), false);
/* should never get here */
BUG();
@ -389,7 +389,7 @@ static int __init nocache_trampoline(unsigned long _arg)
__mcpm_cpu_down(cpu, cluster);
phys_reset = (phys_reset_t)(unsigned long)__pa_symbol(cpu_reset);
phys_reset(__pa_symbol(mcpm_entry_point));
phys_reset(__pa_symbol(mcpm_entry_point), false);
BUG();
}

View File

@ -19,7 +19,8 @@ struct dev_archdata {
#ifdef CONFIG_XEN
const struct dma_map_ops *dev_dma_ops;
#endif
bool dma_coherent;
unsigned int dma_coherent:1;
unsigned int dma_ops_setup:1;
};
struct omap_device;

View File

@ -66,6 +66,7 @@ typedef pte_t *pte_addr_t;
#define pgprot_noncached(prot) (prot)
#define pgprot_writecombine(prot) (prot)
#define pgprot_dmacoherent(prot) (prot)
#define pgprot_device(prot) (prot)
/*

View File

@ -104,7 +104,6 @@ __do_hyp_init:
@ - Write permission implies XN: disabled
@ - Instruction cache: enabled
@ - Data/Unified cache: enabled
@ - Memory alignment checks: enabled
@ - MMU: enabled (this code must be run from an identity mapping)
mrc p15, 4, r0, c1, c0, 0 @ HSCR
ldr r2, =HSCTLR_MASK
@ -112,8 +111,8 @@ __do_hyp_init:
mrc p15, 0, r1, c1, c0, 0 @ SCTLR
ldr r2, =(HSCTLR_EE | HSCTLR_FI | HSCTLR_I | HSCTLR_C)
and r1, r1, r2
ARM( ldr r2, =(HSCTLR_M | HSCTLR_A) )
THUMB( ldr r2, =(HSCTLR_M | HSCTLR_A | HSCTLR_TE) )
ARM( ldr r2, =(HSCTLR_M) )
THUMB( ldr r2, =(HSCTLR_M | HSCTLR_TE) )
orr r1, r1, r2
orr r0, r0, r1
mcr p15, 4, r0, c1, c0, 0 @ HSCR

View File

@ -1,6 +1,7 @@
menuconfig ARCH_AT91
bool "Atmel SoCs"
depends on ARCH_MULTI_V4T || ARCH_MULTI_V5 || ARCH_MULTI_V7
select ARM_CPU_SUSPEND if PM
select COMMON_CLK_AT91
select GPIOLIB
select PINCTRL

View File

@ -153,7 +153,8 @@ int __init davinci_pm_init(void)
davinci_sram_suspend = sram_alloc(davinci_cpu_suspend_sz, NULL);
if (!davinci_sram_suspend) {
pr_err("PM: cannot allocate SRAM memory\n");
return -ENOMEM;
ret = -ENOMEM;
goto no_sram_mem;
}
davinci_sram_push(davinci_sram_suspend, davinci_cpu_suspend,
@ -161,6 +162,10 @@ int __init davinci_pm_init(void)
suspend_set_ops(&davinci_pm_ops);
return 0;
no_sram_mem:
iounmap(pm_config.ddrpsc_reg_base);
no_ddrpsc_mem:
iounmap(pm_config.ddrpll_reg_base);
no_ddrpll_mem:

View File

@ -2311,7 +2311,14 @@ int arm_iommu_attach_device(struct device *dev,
}
EXPORT_SYMBOL_GPL(arm_iommu_attach_device);
static void __arm_iommu_detach_device(struct device *dev)
/**
* arm_iommu_detach_device
* @dev: valid struct device pointer
*
* Detaches the provided device from a previously attached map.
* This voids the dma operations (dma_map_ops pointer)
*/
void arm_iommu_detach_device(struct device *dev)
{
struct dma_iommu_mapping *mapping;
@ -2324,22 +2331,10 @@ static void __arm_iommu_detach_device(struct device *dev)
iommu_detach_device(mapping->domain, dev);
kref_put(&mapping->kref, release_iommu_mapping);
to_dma_iommu_mapping(dev) = NULL;
set_dma_ops(dev, NULL);
pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
}
/**
* arm_iommu_detach_device
* @dev: valid struct device pointer
*
* Detaches the provided device from a previously attached map.
* This voids the dma operations (dma_map_ops pointer)
*/
void arm_iommu_detach_device(struct device *dev)
{
__arm_iommu_detach_device(dev);
set_dma_ops(dev, NULL);
}
EXPORT_SYMBOL_GPL(arm_iommu_detach_device);
static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent)
@ -2379,7 +2374,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev)
if (!mapping)
return;
__arm_iommu_detach_device(dev);
arm_iommu_detach_device(dev);
arm_iommu_release_mapping(mapping);
}
@ -2430,9 +2425,13 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
dev->dma_ops = xen_dma_ops;
}
#endif
dev->archdata.dma_ops_setup = true;
}
void arch_teardown_dma_ops(struct device *dev)
{
if (!dev->archdata.dma_ops_setup)
return;
arm_teardown_iommu_dma_ops(dev);
}

View File

@ -90,7 +90,7 @@ arch_get_unmapped_area(struct file *filp, unsigned long addr,
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@ -141,7 +141,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}

View File

@ -1084,10 +1084,6 @@ config SYSVIPC_COMPAT
def_bool y
depends on COMPAT && SYSVIPC
config KEYS_COMPAT
def_bool y
depends on COMPAT && KEYS
endmenu
menu "Power management options"

View File

@ -406,8 +406,9 @@
r_ccu: clock@1f01400 {
compatible = "allwinner,sun50i-a64-r-ccu";
reg = <0x01f01400 0x100>;
clocks = <&osc24M>, <&osc32k>, <&iosc>;
clock-names = "hosc", "losc", "iosc";
clocks = <&osc24M>, <&osc32k>, <&iosc>,
<&ccu 11>;
clock-names = "hosc", "losc", "iosc", "pll-periph";
#clock-cells = <1>;
#reset-cells = <1>;
};

View File

@ -40,7 +40,7 @@
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "sunxi-h3-h5.dtsi"
#include <arm/sunxi-h3-h5.dtsi>
/ {
cpus {

View File

@ -1 +0,0 @@
../../../../arm/boot/dts/sunxi-h3-h5.dtsi

View File

@ -81,6 +81,45 @@
};
};
reg_sys_5v: regulator@0 {
compatible = "regulator-fixed";
regulator-name = "SYS_5V";
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
regulator-boot-on;
regulator-always-on;
};
reg_vdd_3v3: regulator@1 {
compatible = "regulator-fixed";
regulator-name = "VDD_3V3";
regulator-min-microvolt = <3300000>;
regulator-max-microvolt = <3300000>;
regulator-boot-on;
regulator-always-on;
vin-supply = <&reg_sys_5v>;
};
reg_5v_hub: regulator@2 {
compatible = "regulator-fixed";
regulator-name = "5V_HUB";
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
regulator-boot-on;
gpio = <&gpio0 7 0>;
regulator-always-on;
vin-supply = <&reg_sys_5v>;
};
wl1835_pwrseq: wl1835-pwrseq {
compatible = "mmc-pwrseq-simple";
/* WLAN_EN GPIO */
reset-gpios = <&gpio0 5 GPIO_ACTIVE_LOW>;
clocks = <&pmic>;
clock-names = "ext_clock";
power-off-delay-us = <10>;
};
soc {
spi0: spi@f7106000 {
status = "ok";
@ -256,11 +295,31 @@
/* GPIO blocks 16 thru 19 do not appear to be routed to pins */
dwmmc_2: dwmmc2@f723f000 {
ti,non-removable;
dwmmc_0: dwmmc0@f723d000 {
cap-mmc-highspeed;
non-removable;
/* WL_EN */
vmmc-supply = <&wlan_en_reg>;
bus-width = <0x8>;
vmmc-supply = <&ldo19>;
};
dwmmc_1: dwmmc1@f723e000 {
card-detect-delay = <200>;
cap-sd-highspeed;
sd-uhs-sdr12;
sd-uhs-sdr25;
sd-uhs-sdr50;
vqmmc-supply = <&ldo7>;
vmmc-supply = <&ldo10>;
bus-width = <0x4>;
disable-wp;
cd-gpios = <&gpio1 0 1>;
};
dwmmc_2: dwmmc2@f723f000 {
bus-width = <0x4>;
non-removable;
vmmc-supply = <&reg_vdd_3v3>;
mmc-pwrseq = <&wl1835_pwrseq>;
#address-cells = <0x1>;
#size-cells = <0x0>;
@ -272,18 +331,6 @@
interrupts = <3 IRQ_TYPE_EDGE_RISING>;
};
};
wlan_en_reg: regulator@1 {
compatible = "regulator-fixed";
regulator-name = "wlan-en-regulator";
regulator-min-microvolt = <1800000>;
regulator-max-microvolt = <1800000>;
/* WLAN_EN GPIO */
gpio = <&gpio0 5 0>;
/* WLAN card specific delay */
startup-delay-us = <70000>;
enable-active-high;
};
};
leds {
@ -330,6 +377,7 @@
pmic: pmic@f8000000 {
compatible = "hisilicon,hi655x-pmic";
reg = <0x0 0xf8000000 0x0 0x1000>;
#clock-cells = <0>;
interrupt-controller;
#interrupt-cells = <2>;
pmic-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;

View File

@ -725,20 +725,10 @@
status = "disabled";
};
fixed_5v_hub: regulator@0 {
compatible = "regulator-fixed";
regulator-name = "fixed_5v_hub";
regulator-min-microvolt = <5000000>;
regulator-max-microvolt = <5000000>;
regulator-boot-on;
gpio = <&gpio0 7 0>;
regulator-always-on;
};
usb_phy: usbphy {
compatible = "hisilicon,hi6220-usb-phy";
#phy-cells = <0>;
phy-supply = <&fixed_5v_hub>;
phy-supply = <&reg_5v_hub>;
hisilicon,peripheral-syscon = <&sys_ctrl>;
};
@ -766,17 +756,12 @@
dwmmc_0: dwmmc0@f723d000 {
compatible = "hisilicon,hi6220-dw-mshc";
num-slots = <0x1>;
cap-mmc-highspeed;
non-removable;
reg = <0x0 0xf723d000 0x0 0x1000>;
interrupts = <0x0 0x48 0x4>;
clocks = <&sys_ctrl 2>, <&sys_ctrl 1>;
clock-names = "ciu", "biu";
resets = <&sys_ctrl PERIPH_RSTDIS0_MMC0>;
reset-names = "reset";
bus-width = <0x8>;
vmmc-supply = <&ldo19>;
pinctrl-names = "default";
pinctrl-0 = <&emmc_pmx_func &emmc_clk_cfg_func
&emmc_cfg_func &emmc_rst_cfg_func>;
@ -784,13 +769,7 @@
dwmmc_1: dwmmc1@f723e000 {
compatible = "hisilicon,hi6220-dw-mshc";
num-slots = <0x1>;
card-detect-delay = <200>;
hisilicon,peripheral-syscon = <&ao_ctrl>;
cap-sd-highspeed;
sd-uhs-sdr12;
sd-uhs-sdr25;
sd-uhs-sdr50;
reg = <0x0 0xf723e000 0x0 0x1000>;
interrupts = <0x0 0x49 0x4>;
#address-cells = <0x1>;
@ -799,11 +778,6 @@
clock-names = "ciu", "biu";
resets = <&sys_ctrl PERIPH_RSTDIS0_MMC1>;
reset-names = "reset";
vqmmc-supply = <&ldo7>;
vmmc-supply = <&ldo10>;
bus-width = <0x4>;
disable-wp;
cd-gpios = <&gpio1 0 1>;
pinctrl-names = "default", "idle";
pinctrl-0 = <&sd_pmx_func &sd_clk_cfg_func &sd_cfg_func>;
pinctrl-1 = <&sd_pmx_idle &sd_clk_cfg_idle &sd_cfg_idle>;
@ -811,15 +785,12 @@
dwmmc_2: dwmmc2@f723f000 {
compatible = "hisilicon,hi6220-dw-mshc";
num-slots = <0x1>;
reg = <0x0 0xf723f000 0x0 0x1000>;
interrupts = <0x0 0x4a 0x4>;
clocks = <&sys_ctrl HI6220_MMC2_CIUCLK>, <&sys_ctrl HI6220_MMC2_CLK>;
clock-names = "ciu", "biu";
resets = <&sys_ctrl PERIPH_RSTDIS0_MMC2>;
reset-names = "reset";
bus-width = <0x4>;
broken-cd;
pinctrl-names = "default", "idle";
pinctrl-0 = <&sdio_pmx_func &sdio_clk_cfg_func &sdio_cfg_func>;
pinctrl-1 = <&sdio_pmx_idle &sdio_clk_cfg_idle &sdio_cfg_idle>;

View File

@ -231,8 +231,7 @@
cpm_crypto: crypto@800000 {
compatible = "inside-secure,safexcel-eip197";
reg = <0x800000 0x200000>;
interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING
| IRQ_TYPE_LEVEL_HIGH)>,
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 54 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 55 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 56 IRQ_TYPE_LEVEL_HIGH>,

View File

@ -221,8 +221,7 @@
cps_crypto: crypto@800000 {
compatible = "inside-secure,safexcel-eip197";
reg = <0x800000 0x200000>;
interrupts = <GIC_SPI 34 (IRQ_TYPE_EDGE_RISING
| IRQ_TYPE_LEVEL_HIGH)>,
interrupts = <GIC_SPI 34 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 278 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 279 IRQ_TYPE_LEVEL_HIGH>,
<GIC_SPI 280 IRQ_TYPE_LEVEL_HIGH>,

View File

@ -68,6 +68,7 @@ CONFIG_PCIE_QCOM=y
CONFIG_PCIE_ARMADA_8K=y
CONFIG_PCI_AARDVARK=y
CONFIG_PCIE_RCAR=y
CONFIG_PCIE_ROCKCHIP=m
CONFIG_PCI_HOST_GENERIC=y
CONFIG_PCI_XGENE=y
CONFIG_ARM64_VA_BITS_48=y
@ -208,6 +209,8 @@ CONFIG_BRCMFMAC=m
CONFIG_WL18XX=m
CONFIG_WLCORE_SDIO=m
CONFIG_INPUT_EVDEV=y
CONFIG_KEYBOARD_ADC=m
CONFIG_KEYBOARD_CROS_EC=y
CONFIG_KEYBOARD_GPIO=y
CONFIG_INPUT_MISC=y
CONFIG_INPUT_PM8941_PWRKEY=y
@ -263,6 +266,7 @@ CONFIG_SPI_MESON_SPIFC=m
CONFIG_SPI_ORION=y
CONFIG_SPI_PL022=y
CONFIG_SPI_QUP=y
CONFIG_SPI_ROCKCHIP=y
CONFIG_SPI_S3C64XX=y
CONFIG_SPI_SPIDEV=m
CONFIG_SPMI=y
@ -292,6 +296,7 @@ CONFIG_THERMAL_GOV_POWER_ALLOCATOR=y
CONFIG_CPU_THERMAL=y
CONFIG_THERMAL_EMULATION=y
CONFIG_EXYNOS_THERMAL=y
CONFIG_ROCKCHIP_THERMAL=m
CONFIG_WATCHDOG=y
CONFIG_S3C2410_WATCHDOG=y
CONFIG_MESON_GXBB_WATCHDOG=m
@ -300,12 +305,14 @@ CONFIG_RENESAS_WDT=y
CONFIG_BCM2835_WDT=y
CONFIG_MFD_CROS_EC=y
CONFIG_MFD_CROS_EC_I2C=y
CONFIG_MFD_CROS_EC_SPI=y
CONFIG_MFD_EXYNOS_LPASS=m
CONFIG_MFD_HI655X_PMIC=y
CONFIG_MFD_MAX77620=y
CONFIG_MFD_SPMI_PMIC=y
CONFIG_MFD_RK808=y
CONFIG_MFD_SEC_CORE=y
CONFIG_REGULATOR_FAN53555=y
CONFIG_REGULATOR_FIXED_VOLTAGE=y
CONFIG_REGULATOR_GPIO=y
CONFIG_REGULATOR_HI655X=y
@ -473,8 +480,10 @@ CONFIG_ARCH_TEGRA_186_SOC=y
CONFIG_EXTCON_USB_GPIO=y
CONFIG_IIO=y
CONFIG_EXYNOS_ADC=y
CONFIG_ROCKCHIP_SARADC=m
CONFIG_PWM=y
CONFIG_PWM_BCM2835=m
CONFIG_PWM_CROS_EC=m
CONFIG_PWM_MESON=m
CONFIG_PWM_ROCKCHIP=y
CONFIG_PWM_SAMSUNG=y
@ -484,6 +493,7 @@ CONFIG_PHY_HI6220_USB=y
CONFIG_PHY_SUN4I_USB=y
CONFIG_PHY_ROCKCHIP_INNO_USB2=y
CONFIG_PHY_ROCKCHIP_EMMC=y
CONFIG_PHY_ROCKCHIP_PCIE=m
CONFIG_PHY_XGENE=y
CONFIG_PHY_TEGRA_XUSB=y
CONFIG_ARM_SCPI_PROTOCOL=y

View File

@ -23,9 +23,9 @@
#define ACPI_MADT_GICC_LENGTH \
(acpi_gbl_FADT.header.revision < 6 ? 76 : 80)
#define BAD_MADT_GICC_ENTRY(entry, end) \
(!(entry) || (unsigned long)(entry) + sizeof(*(entry)) > (end) || \
(entry)->header.length != ACPI_MADT_GICC_LENGTH)
#define BAD_MADT_GICC_ENTRY(entry, end) \
(!(entry) || (entry)->header.length != ACPI_MADT_GICC_LENGTH || \
(unsigned long)(entry) + ACPI_MADT_GICC_LENGTH > (end))
/* Basic configuration for ACPI */
#ifdef CONFIG_ACPI

View File

@ -286,6 +286,10 @@
#define SCTLR_ELx_A (1 << 1)
#define SCTLR_ELx_M 1
#define SCTLR_EL2_RES1 ((1 << 4) | (1 << 5) | (1 << 11) | (1 << 16) | \
(1 << 16) | (1 << 18) | (1 << 22) | (1 << 23) | \
(1 << 28) | (1 << 29))
#define SCTLR_ELx_FLAGS (SCTLR_ELx_M | SCTLR_ELx_A | SCTLR_ELx_C | \
SCTLR_ELx_SA | SCTLR_ELx_I)

View File

@ -191,8 +191,10 @@ struct pci_bus *pci_acpi_scan_root(struct acpi_pci_root *root)
return NULL;
root_ops = kzalloc_node(sizeof(*root_ops), GFP_KERNEL, node);
if (!root_ops)
if (!root_ops) {
kfree(ri);
return NULL;
}
ri->cfg = pci_acpi_setup_ecam_mapping(root);
if (!ri->cfg) {

View File

@ -221,10 +221,11 @@ void update_vsyscall(struct timekeeper *tk)
/* tkr_mono.cycle_last == tkr_raw.cycle_last */
vdso_data->cs_cycle_last = tk->tkr_mono.cycle_last;
vdso_data->raw_time_sec = tk->raw_time.tv_sec;
vdso_data->raw_time_nsec = tk->raw_time.tv_nsec;
vdso_data->raw_time_nsec = (tk->raw_time.tv_nsec <<
tk->tkr_raw.shift) +
tk->tkr_raw.xtime_nsec;
vdso_data->xtime_clock_sec = tk->xtime_sec;
vdso_data->xtime_clock_nsec = tk->tkr_mono.xtime_nsec;
/* tkr_raw.xtime_nsec == 0 */
vdso_data->cs_mono_mult = tk->tkr_mono.mult;
vdso_data->cs_raw_mult = tk->tkr_raw.mult;
/* tkr_mono.shift == tkr_raw.shift */

View File

@ -256,7 +256,6 @@ monotonic_raw:
seqcnt_check fail=monotonic_raw
/* All computations are done with left-shifted nsecs. */
lsl x14, x14, x12
get_nsec_per_sec res=x9
lsl x9, x9, x12

View File

@ -106,10 +106,13 @@ __do_hyp_init:
tlbi alle2
dsb sy
mrs x4, sctlr_el2
and x4, x4, #SCTLR_ELx_EE // preserve endianness of EL2
ldr x5, =SCTLR_ELx_FLAGS
orr x4, x4, x5
/*
* Preserve all the RES1 bits while setting the default flags,
* as well as the EE bit on BE. Drop the A flag since the compiler
* is allowed to generate unaligned accesses.
*/
ldr x4, =(SCTLR_EL2_RES1 | (SCTLR_ELx_FLAGS & ~SCTLR_ELx_A))
CPU_BE( orr x4, x4, #SCTLR_ELx_EE)
msr sctlr_el2, x4
isb

View File

@ -65,8 +65,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* Here set VMCR.CTLR in ICC_CTLR_EL1 layout.
* The vgic_set_vmcr() will convert to ICH_VMCR layout.
*/
vmcr.ctlr = val & ICC_CTLR_EL1_CBPR_MASK;
vmcr.ctlr |= val & ICC_CTLR_EL1_EOImode_MASK;
vmcr.cbpr = (val & ICC_CTLR_EL1_CBPR_MASK) >> ICC_CTLR_EL1_CBPR_SHIFT;
vmcr.eoim = (val & ICC_CTLR_EL1_EOImode_MASK) >> ICC_CTLR_EL1_EOImode_SHIFT;
vgic_set_vmcr(vcpu, &vmcr);
} else {
val = 0;
@ -83,8 +83,8 @@ static bool access_gic_ctlr(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
* The VMCR.CTLR value is in ICC_CTLR_EL1 layout.
* Extract it directly using ICC_CTLR_EL1 reg definitions.
*/
val |= vmcr.ctlr & ICC_CTLR_EL1_CBPR_MASK;
val |= vmcr.ctlr & ICC_CTLR_EL1_EOImode_MASK;
val |= (vmcr.cbpr << ICC_CTLR_EL1_CBPR_SHIFT) & ICC_CTLR_EL1_CBPR_MASK;
val |= (vmcr.eoim << ICC_CTLR_EL1_EOImode_SHIFT) & ICC_CTLR_EL1_EOImode_MASK;
p->regval = val;
}
@ -135,7 +135,7 @@ static bool access_gic_bpr1(struct kvm_vcpu *vcpu, struct sys_reg_params *p,
p->regval = 0;
vgic_get_vmcr(vcpu, &vmcr);
if (!((vmcr.ctlr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT)) {
if (!vmcr.cbpr) {
if (p->is_write) {
vmcr.abpr = (p->regval & ICC_BPR1_EL1_MASK) >>
ICC_BPR1_EL1_SHIFT;

View File

@ -36,6 +36,7 @@ int bpf_jit_enable __read_mostly;
#define TMP_REG_1 (MAX_BPF_JIT_REG + 0)
#define TMP_REG_2 (MAX_BPF_JIT_REG + 1)
#define TCALL_CNT (MAX_BPF_JIT_REG + 2)
#define TMP_REG_3 (MAX_BPF_JIT_REG + 3)
/* Map BPF registers to A64 registers */
static const int bpf2a64[] = {
@ -57,6 +58,7 @@ static const int bpf2a64[] = {
/* temporary registers for internal BPF JIT */
[TMP_REG_1] = A64_R(10),
[TMP_REG_2] = A64_R(11),
[TMP_REG_3] = A64_R(12),
/* tail_call_cnt */
[TCALL_CNT] = A64_R(26),
/* temporary register for blinding constants */
@ -319,6 +321,7 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
const u8 src = bpf2a64[insn->src_reg];
const u8 tmp = bpf2a64[TMP_REG_1];
const u8 tmp2 = bpf2a64[TMP_REG_2];
const u8 tmp3 = bpf2a64[TMP_REG_3];
const s16 off = insn->off;
const s32 imm = insn->imm;
const int i = insn - ctx->prog->insnsi;
@ -689,10 +692,10 @@ emit_cond_jmp:
emit(A64_PRFM(tmp, PST, L1, STRM), ctx);
emit(A64_LDXR(isdw, tmp2, tmp), ctx);
emit(A64_ADD(isdw, tmp2, tmp2, src), ctx);
emit(A64_STXR(isdw, tmp2, tmp, tmp2), ctx);
emit(A64_STXR(isdw, tmp2, tmp, tmp3), ctx);
jmp_offset = -3;
check_imm19(jmp_offset);
emit(A64_CBNZ(0, tmp2, jmp_offset), ctx);
emit(A64_CBNZ(0, tmp3, jmp_offset), ctx);
break;
/* R0 = ntohx(*(size *)(((struct sk_buff *)R6)->data + imm)) */

View File

@ -16,5 +16,11 @@ static inline cycles_t get_cycles(void)
#define vxtime_lock() do {} while (0)
#define vxtime_unlock() do {} while (0)
/* This attribute is used in include/linux/jiffies.h alongside with
* __cacheline_aligned_in_smp. It is assumed that __cacheline_aligned_in_smp
* for frv does not contain another section specification.
*/
#define __jiffy_arch_data __attribute__((__section__(".data")))
#endif

View File

@ -75,7 +75,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr, unsi
addr = PAGE_ALIGN(addr);
vma = find_vma(current->mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)))
goto success;
}

View File

@ -37,15 +37,14 @@ __kernel_size_t __clear_user_hexagon(void __user *dest, unsigned long count)
long uncleared;
while (count > PAGE_SIZE) {
uncleared = __copy_to_user_hexagon(dest, &empty_zero_page,
PAGE_SIZE);
uncleared = raw_copy_to_user(dest, &empty_zero_page, PAGE_SIZE);
if (uncleared)
return count - (PAGE_SIZE - uncleared);
count -= PAGE_SIZE;
dest += PAGE_SIZE;
}
if (count)
count = __copy_to_user_hexagon(dest, &empty_zero_page, count);
count = raw_copy_to_user(dest, &empty_zero_page, count);
return count;
}

View File

@ -128,19 +128,19 @@ quiet_cmd_cpp_its_S = ITS $@
-DADDR_BITS=$(ADDR_BITS) \
-DADDR_CELLS=$(itb_addr_cells)
$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
$(obj)/vmlinux.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,none,vmlinux.bin)
$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
$(obj)/vmlinux.gz.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,gzip,vmlinux.bin.gz)
$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
$(obj)/vmlinux.bz2.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,bzip2,vmlinux.bin.bz2)
$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
$(obj)/vmlinux.lzma.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,lzma,vmlinux.bin.lzma)
$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S FORCE
$(obj)/vmlinux.lzo.its: $(srctree)/arch/mips/$(PLATFORM)/vmlinux.its.S $(VMLINUX) FORCE
$(call if_changed_dep,cpp_its_S,lzo,vmlinux.bin.lzo)
quiet_cmd_itb-image = ITB $@

View File

@ -35,7 +35,12 @@ extern pte_t *pkmap_page_table;
* easily, subsequent pte tables have to be allocated in one physical
* chunk of RAM.
*/
#ifdef CONFIG_PHYS_ADDR_T_64BIT
#define LAST_PKMAP 512
#else
#define LAST_PKMAP 1024
#endif
#define LAST_PKMAP_MASK (LAST_PKMAP-1)
#define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT)
#define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT))

View File

@ -43,7 +43,8 @@ typedef union mips_instruction kprobe_opcode_t;
#define flush_insn_slot(p) \
do { \
flush_icache_range((unsigned long)p->addr, \
if (p->addr) \
flush_icache_range((unsigned long)p->addr, \
(unsigned long)p->addr + \
(MAX_INSN_SIZE * sizeof(kprobe_opcode_t))); \
} while (0)

View File

@ -19,6 +19,10 @@
#define __ARCH_USE_5LEVEL_HACK
#include <asm-generic/pgtable-nopmd.h>
#ifdef CONFIG_HIGHMEM
#include <asm/highmem.h>
#endif
extern int temp_tlb_entry;
/*
@ -62,7 +66,8 @@ extern int add_temporary_entry(unsigned long entrylo0, unsigned long entrylo1,
#define VMALLOC_START MAP_BASE
#define PKMAP_BASE (0xfe000000UL)
#define PKMAP_END ((FIXADDR_START) & ~((LAST_PKMAP << PAGE_SHIFT)-1))
#define PKMAP_BASE (PKMAP_END - PAGE_SIZE * LAST_PKMAP)
#ifdef CONFIG_HIGHMEM
# define VMALLOC_END (PKMAP_BASE-2*PAGE_SIZE)

View File

@ -804,8 +804,10 @@ int __compute_return_epc_for_insn(struct pt_regs *regs,
break;
}
/* Compact branch: BNEZC || JIALC */
if (insn.i_format.rs)
if (!insn.i_format.rs) {
/* JIALC: set $31/ra */
regs->regs[31] = epc + 4;
}
regs->cp0_epc += 8;
break;
#endif

View File

@ -38,20 +38,6 @@ void arch_ftrace_update_code(int command)
#endif
/*
* Check if the address is in kernel space
*
* Clone core_kernel_text() from kernel/extable.c, but doesn't call
* init_kernel_text() for Ftrace doesn't trace functions in init sections.
*/
static inline int in_kernel_space(unsigned long ip)
{
if (ip >= (unsigned long)_stext &&
ip <= (unsigned long)_etext)
return 1;
return 0;
}
#ifdef CONFIG_DYNAMIC_FTRACE
#define JAL 0x0c000000 /* jump & link: ip --> ra, jump to target */
@ -198,7 +184,7 @@ int ftrace_make_nop(struct module *mod,
* If ip is in kernel space, no long call, otherwise, long call is
* needed.
*/
new = in_kernel_space(ip) ? INSN_NOP : INSN_B_1F;
new = core_kernel_text(ip) ? INSN_NOP : INSN_B_1F;
#ifdef CONFIG_64BIT
return ftrace_modify_code(ip, new);
#else
@ -218,12 +204,12 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
unsigned int new;
unsigned long ip = rec->ip;
new = in_kernel_space(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
new = core_kernel_text(ip) ? insn_jal_ftrace_caller : insn_la_mcount[0];
#ifdef CONFIG_64BIT
return ftrace_modify_code(ip, new);
#else
return ftrace_modify_code_2r(ip, new, in_kernel_space(ip) ?
return ftrace_modify_code_2r(ip, new, core_kernel_text(ip) ?
INSN_NOP : insn_la_mcount[1]);
#endif
}
@ -289,7 +275,7 @@ unsigned long ftrace_get_parent_ra_addr(unsigned long self_ra, unsigned long
* instruction "lui v1, hi_16bit_of_mcount"(offset is 24), but for
* kernel, move after the instruction "move ra, at"(offset is 16)
*/
ip = self_ra - (in_kernel_space(self_ra) ? 16 : 24);
ip = self_ra - (core_kernel_text(self_ra) ? 16 : 24);
/*
* search the text until finding the non-store instruction or "s{d,w}
@ -394,7 +380,7 @@ void prepare_ftrace_return(unsigned long *parent_ra_addr, unsigned long self_ra,
* entries configured through the tracing/set_graph_function interface.
*/
insns = in_kernel_space(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
insns = core_kernel_text(self_ra) ? 2 : MCOUNT_OFFSET_INSNS + 1;
trace.func = self_ra - (MCOUNT_INSN_SIZE * insns);
/* Only trace if the calling function expects to */

View File

@ -1597,7 +1597,6 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
break;
case CPU_P5600:
case CPU_P6600:
case CPU_I6400:
/* 8-bit event numbers */
raw_id = config & 0x1ff;
base_id = raw_id & 0xff;
@ -1610,6 +1609,11 @@ static const struct mips_perf_event *mipsxx_pmu_map_raw_event(u64 config)
raw_event.range = P;
#endif
break;
case CPU_I6400:
/* 8-bit event numbers */
base_id = config & 0xff;
raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;
break;
case CPU_1004K:
if (IS_BOTH_COUNTERS_1004K_EVENT(base_id))
raw_event.cntr_mask = CNTR_EVEN | CNTR_ODD;

View File

@ -120,7 +120,6 @@ int copy_thread_tls(unsigned long clone_flags, unsigned long usp,
struct thread_info *ti = task_thread_info(p);
struct pt_regs *childregs, *regs = current_pt_regs();
unsigned long childksp;
p->set_child_tid = p->clear_child_tid = NULL;
childksp = (unsigned long)task_stack_page(p) + THREAD_SIZE - 32;

View File

@ -166,7 +166,11 @@ static int _kvm_mips_host_tlb_inv(unsigned long entryhi)
int kvm_mips_host_tlb_inv(struct kvm_vcpu *vcpu, unsigned long va,
bool user, bool kernel)
{
int idx_user, idx_kernel;
/*
* Initialize idx_user and idx_kernel to workaround bogus
* maybe-initialized warning when using GCC 6.
*/
int idx_user = 0, idx_kernel = 0;
unsigned long flags, old_entryhi;
local_irq_save(flags);

View File

@ -93,7 +93,7 @@ static unsigned long arch_get_unmapped_area_common(struct file *filp,
vma = find_vma(mm, addr);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}

View File

@ -51,15 +51,15 @@ void __init pagetable_init(void)
/*
* Fixed mappings:
*/
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
fixrange_init(vaddr, vaddr + FIXADDR_SIZE, pgd_base);
vaddr = __fix_to_virt(__end_of_fixed_addresses - 1);
fixrange_init(vaddr & PMD_MASK, vaddr + FIXADDR_SIZE, pgd_base);
#ifdef CONFIG_HIGHMEM
/*
* Permanent kmaps:
*/
vaddr = PKMAP_BASE;
fixrange_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
fixrange_init(vaddr & PMD_MASK, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);
pgd = swapper_pg_dir + __pgd_offset(vaddr);
pud = pud_offset(pgd, vaddr);

View File

@ -167,8 +167,6 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
top_of_kernel_stack = sp;
p->set_child_tid = p->clear_child_tid = NULL;
/* Locate userspace context on stack... */
sp -= STACK_FRAME_OVERHEAD; /* redzone */
sp -= sizeof(struct pt_regs);

View File

@ -90,7 +90,7 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
{
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct vm_area_struct *vma, *prev;
unsigned long task_size = TASK_SIZE;
int do_color_align, last_mmap;
struct vm_unmapped_area_info info;
@ -117,9 +117,10 @@ unsigned long arch_get_unmapped_area(struct file *filp, unsigned long addr,
else
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
vma = find_vma_prev(mm, addr, &prev);
if (task_size - len >= addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)) &&
(!prev || addr >= vm_end_gap(prev)))
goto found_addr;
}
@ -143,7 +144,7 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
const unsigned long len, const unsigned long pgoff,
const unsigned long flags)
{
struct vm_area_struct *vma;
struct vm_area_struct *vma, *prev;
struct mm_struct *mm = current->mm;
unsigned long addr = addr0;
int do_color_align, last_mmap;
@ -177,9 +178,11 @@ arch_get_unmapped_area_topdown(struct file *filp, const unsigned long addr0,
addr = COLOR_ALIGN(addr, last_mmap, pgoff);
else
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
vma = find_vma_prev(mm, addr, &prev);
if (TASK_SIZE - len >= addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)) &&
(!prev || addr >= vm_end_gap(prev)))
goto found_addr;
}

View File

@ -380,22 +380,6 @@ source "arch/powerpc/platforms/Kconfig"
menu "Kernel options"
config PPC_DT_CPU_FTRS
bool "Device-tree based CPU feature discovery & setup"
depends on PPC_BOOK3S_64
default n
help
This enables code to use a new device tree binding for describing CPU
compatibility and features. Saying Y here will attempt to use the new
binding if the firmware provides it. Currently only the skiboot
firmware provides this binding.
If you're not sure say Y.
config PPC_CPUFEATURES_ENABLE_UNKNOWN
bool "cpufeatures pass through unknown features to guest/userspace"
depends on PPC_DT_CPU_FTRS
default y
config HIGHMEM
bool "High memory support"
depends on PPC32
@ -1215,11 +1199,6 @@ source "arch/powerpc/Kconfig.debug"
source "security/Kconfig"
config KEYS_COMPAT
bool
depends on COMPAT && KEYS
default y
source "crypto/Kconfig"
config PPC_LIB_RHEAP

View File

@ -8,7 +8,7 @@
#define H_PTE_INDEX_SIZE 9
#define H_PMD_INDEX_SIZE 7
#define H_PUD_INDEX_SIZE 9
#define H_PGD_INDEX_SIZE 12
#define H_PGD_INDEX_SIZE 9
#ifndef __ASSEMBLY__
#define H_PTE_TABLE_SIZE (sizeof(pte_t) << H_PTE_INDEX_SIZE)

View File

@ -104,7 +104,7 @@
"1: "PPC_TLNEI" %4,0\n" \
_EMIT_BUG_ENTRY \
: : "i" (__FILE__), "i" (__LINE__), \
"i" (BUGFLAG_TAINT(TAINT_WARN)), \
"i" (BUGFLAG_WARNING|BUGFLAG_TAINT(TAINT_WARN)),\
"i" (sizeof(struct bug_entry)), \
"r" (__ret_warn_on)); \
} \

View File

@ -214,7 +214,6 @@ enum {
#define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000)
#define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000)
#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000)
#define CPU_FTR_SUBCORE LONG_ASM_CONST(0x2000000000000000)
#define CPU_FTR_POWER9_DD1 LONG_ASM_CONST(0x4000000000000000)
#ifndef __ASSEMBLY__
@ -463,7 +462,7 @@ enum {
CPU_FTR_STCX_CHECKS_ADDRESS | CPU_FTR_POPCNTB | CPU_FTR_POPCNTD | \
CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \
CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP | CPU_FTR_SUBCORE)
CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP)
#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG)
#define CPU_FTRS_POWER8_DD1 (CPU_FTRS_POWER8 & ~CPU_FTR_DBELL)
#define CPU_FTRS_POWER9 (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \

View File

@ -103,6 +103,7 @@ extern int kprobe_exceptions_notify(struct notifier_block *self,
extern int kprobe_fault_handler(struct pt_regs *regs, int trapnr);
extern int kprobe_handler(struct pt_regs *regs);
extern int kprobe_post_handler(struct pt_regs *regs);
extern int is_current_kprobe_addr(unsigned long addr);
#ifdef CONFIG_KPROBES_ON_FTRACE
extern int skip_singlestep(struct kprobe *p, struct pt_regs *regs,
struct kprobe_ctlblk *kcb);

View File

@ -110,13 +110,18 @@ void release_thread(struct task_struct *);
#define TASK_SIZE_128TB (0x0000800000000000UL)
#define TASK_SIZE_512TB (0x0002000000000000UL)
#ifdef CONFIG_PPC_BOOK3S_64
/*
* For now 512TB is only supported with book3s and 64K linux page size.
*/
#if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_64K_PAGES)
/*
* Max value currently used:
*/
#define TASK_SIZE_USER64 TASK_SIZE_512TB
#define TASK_SIZE_USER64 TASK_SIZE_512TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_128TB
#else
#define TASK_SIZE_USER64 TASK_SIZE_64TB
#define TASK_SIZE_USER64 TASK_SIZE_64TB
#define DEFAULT_MAP_WINDOW_USER64 TASK_SIZE_64TB
#endif
/*
@ -132,7 +137,7 @@ void release_thread(struct task_struct *);
* space during mmap's.
*/
#define TASK_UNMAPPED_BASE_USER32 (PAGE_ALIGN(TASK_SIZE_USER32 / 4))
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(TASK_SIZE_128TB / 4))
#define TASK_UNMAPPED_BASE_USER64 (PAGE_ALIGN(DEFAULT_MAP_WINDOW_USER64 / 4))
#define TASK_UNMAPPED_BASE ((is_32bit_task()) ? \
TASK_UNMAPPED_BASE_USER32 : TASK_UNMAPPED_BASE_USER64 )
@ -143,21 +148,15 @@ void release_thread(struct task_struct *);
* with 128TB and conditionally enable upto 512TB
*/
#ifdef CONFIG_PPC_BOOK3S_64
#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
TASK_SIZE_USER32 : TASK_SIZE_128TB)
#define DEFAULT_MAP_WINDOW ((is_32bit_task()) ? \
TASK_SIZE_USER32 : DEFAULT_MAP_WINDOW_USER64)
#else
#define DEFAULT_MAP_WINDOW TASK_SIZE
#endif
#ifdef __powerpc64__
#ifdef CONFIG_PPC_BOOK3S_64
/* Limit stack to 128TB */
#define STACK_TOP_USER64 TASK_SIZE_128TB
#else
#define STACK_TOP_USER64 TASK_SIZE_USER64
#endif
#define STACK_TOP_USER64 DEFAULT_MAP_WINDOW_USER64
#define STACK_TOP_USER32 TASK_SIZE_USER32
#define STACK_TOP (is_32bit_task() ? \

View File

@ -44,8 +44,22 @@ extern void __init dump_numa_cpu_topology(void);
extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
static inline int early_cpu_to_node(int cpu)
{
int nid;
nid = numa_cpu_lookup_table[cpu];
/*
* Fall back to node 0 if nid is unset (it should be, except bugs).
* This allows callers to safely do NODE_DATA(early_cpu_to_node(cpu)).
*/
return (nid < 0) ? 0 : nid;
}
#else
static inline int early_cpu_to_node(int cpu) { return 0; }
static inline void dump_numa_cpu_topology(void) {}
static inline int sysfs_add_device_to_node(struct device *dev, int nid)

View File

@ -94,11 +94,13 @@ struct xive_q {
* store at 0 and some ESBs support doing a trigger via a
* separate trigger page.
*/
#define XIVE_ESB_GET 0x800
#define XIVE_ESB_SET_PQ_00 0xc00
#define XIVE_ESB_SET_PQ_01 0xd00
#define XIVE_ESB_SET_PQ_10 0xe00
#define XIVE_ESB_SET_PQ_11 0xf00
#define XIVE_ESB_STORE_EOI 0x400 /* Store */
#define XIVE_ESB_LOAD_EOI 0x000 /* Load */
#define XIVE_ESB_GET 0x800 /* Load */
#define XIVE_ESB_SET_PQ_00 0xc00 /* Load */
#define XIVE_ESB_SET_PQ_01 0xd00 /* Load */
#define XIVE_ESB_SET_PQ_10 0xe00 /* Load */
#define XIVE_ESB_SET_PQ_11 0xf00 /* Load */
#define XIVE_ESB_VAL_P 0x2
#define XIVE_ESB_VAL_Q 0x1

View File

@ -46,6 +46,8 @@
#define PPC_FEATURE2_HTM_NOSC 0x01000000
#define PPC_FEATURE2_ARCH_3_00 0x00800000 /* ISA 3.00 */
#define PPC_FEATURE2_HAS_IEEE128 0x00400000 /* VSX IEEE Binary Float 128-bit */
#define PPC_FEATURE2_DARN 0x00200000 /* darn random number insn */
#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
/*
* IMPORTANT!

View File

@ -124,7 +124,8 @@ extern void __restore_cpu_e6500(void);
#define COMMON_USER_POWER9 COMMON_USER_POWER8
#define COMMON_USER2_POWER9 (COMMON_USER2_POWER8 | \
PPC_FEATURE2_ARCH_3_00 | \
PPC_FEATURE2_HAS_IEEE128)
PPC_FEATURE2_HAS_IEEE128 | \
PPC_FEATURE2_DARN )
#ifdef CONFIG_PPC_BOOK3E_64
#define COMMON_USER_BOOKE (COMMON_USER_PPC64 | PPC_FEATURE_BOOKE)

View File

@ -8,6 +8,7 @@
#include <linux/export.h>
#include <linux/init.h>
#include <linux/jump_label.h>
#include <linux/libfdt.h>
#include <linux/memblock.h>
#include <linux/printk.h>
#include <linux/sched.h>
@ -642,7 +643,6 @@ static struct dt_cpu_feature_match __initdata
{"processor-control-facility", feat_enable_dbell, CPU_FTR_DBELL},
{"processor-control-facility-v3", feat_enable_dbell, CPU_FTR_DBELL},
{"processor-utilization-of-resources-register", feat_enable_purr, 0},
{"subcore", feat_enable, CPU_FTR_SUBCORE},
{"no-execute", feat_enable, 0},
{"strong-access-ordering", feat_enable, CPU_FTR_SAO},
{"cache-inhibited-large-page", feat_enable_large_ci, 0},
@ -671,12 +671,24 @@ static struct dt_cpu_feature_match __initdata
{"wait-v3", feat_enable, 0},
};
/* XXX: how to configure this? Default + boot time? */
#ifdef CONFIG_PPC_CPUFEATURES_ENABLE_UNKNOWN
#define CPU_FEATURE_ENABLE_UNKNOWN 1
#else
#define CPU_FEATURE_ENABLE_UNKNOWN 0
#endif
static bool __initdata using_dt_cpu_ftrs;
static bool __initdata enable_unknown = true;
static int __init dt_cpu_ftrs_parse(char *str)
{
if (!str)
return 0;
if (!strcmp(str, "off"))
using_dt_cpu_ftrs = false;
else if (!strcmp(str, "known"))
enable_unknown = false;
else
return 1;
return 0;
}
early_param("dt_cpu_ftrs", dt_cpu_ftrs_parse);
static void __init cpufeatures_setup_start(u32 isa)
{
@ -707,7 +719,7 @@ static bool __init cpufeatures_process_feature(struct dt_cpu_feature *f)
}
}
if (!known && CPU_FEATURE_ENABLE_UNKNOWN) {
if (!known && enable_unknown) {
if (!feat_try_enable_unknown(f)) {
pr_info("not enabling: %s (unknown and unsupported by kernel)\n",
f->name);
@ -756,6 +768,26 @@ static void __init cpufeatures_setup_finished(void)
cur_cpu_spec->cpu_features, cur_cpu_spec->mmu_features);
}
static int __init disabled_on_cmdline(void)
{
unsigned long root, chosen;
const char *p;
root = of_get_flat_dt_root();
chosen = of_get_flat_dt_subnode_by_name(root, "chosen");
if (chosen == -FDT_ERR_NOTFOUND)
return false;
p = of_get_flat_dt_prop(chosen, "bootargs", NULL);
if (!p)
return false;
if (strstr(p, "dt_cpu_ftrs=off"))
return true;
return false;
}
static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
int depth, void *data)
{
@ -766,8 +798,6 @@ static int __init fdt_find_cpu_features(unsigned long node, const char *uname,
return 0;
}
static bool __initdata using_dt_cpu_ftrs = false;
bool __init dt_cpu_ftrs_in_use(void)
{
return using_dt_cpu_ftrs;
@ -775,6 +805,8 @@ bool __init dt_cpu_ftrs_in_use(void)
bool __init dt_cpu_ftrs_init(void *fdt)
{
using_dt_cpu_ftrs = false;
/* Setup and verify the FDT, if it fails we just bail */
if (!early_init_dt_verify(fdt))
return false;
@ -782,6 +814,9 @@ bool __init dt_cpu_ftrs_init(void *fdt)
if (!of_scan_flat_dt(fdt_find_cpu_features, NULL))
return false;
if (disabled_on_cmdline())
return false;
cpufeatures_setup_cpu();
using_dt_cpu_ftrs = true;
@ -1027,5 +1062,8 @@ static int __init dt_cpu_ftrs_scan_callback(unsigned long node, const char
void __init dt_cpu_ftrs_scan(void)
{
if (!using_dt_cpu_ftrs)
return;
of_scan_flat_dt(dt_cpu_ftrs_scan_callback, NULL);
}

View File

@ -1411,10 +1411,8 @@ USE_TEXT_SECTION()
.balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
andis. r0,r4,0xa410 /* weird error? */
andis. r0,r4,0xa450 /* weird error? */
bne- handle_page_fault /* if not, try to insert a HPTE */
andis. r0,r4,DSISR_DABRMATCH@h
bne- handle_dabr_fault
CURRENT_THREAD_INFO(r11, r1)
lwz r0,TI_PREEMPT(r11) /* If we're in an "NMI" */
andis. r0,r0,NMI_MASK@h /* (i.e. an irq when soft-disabled) */
@ -1438,11 +1436,16 @@ do_hash_page:
/* Error */
blt- 13f
/* Reload DSISR into r4 for the DABR check below */
ld r4,_DSISR(r1)
#endif /* CONFIG_PPC_STD_MMU_64 */
/* Here we have a page fault that hash_page can't handle. */
handle_page_fault:
11: ld r4,_DAR(r1)
11: andis. r0,r4,DSISR_DABRMATCH@h
bne- handle_dabr_fault
ld r4,_DAR(r1)
ld r5,_DSISR(r1)
addi r3,r1,STACK_FRAME_OVERHEAD
bl do_page_fault

View File

@ -43,6 +43,12 @@ DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
struct kretprobe_blackpoint kretprobe_blacklist[] = {{NULL, NULL}};
int is_current_kprobe_addr(unsigned long addr)
{
struct kprobe *p = kprobe_running();
return (p && (unsigned long)p->addr == addr) ? 1 : 0;
}
bool arch_within_kprobe_blacklist(unsigned long addr)
{
return (addr >= (unsigned long)__kprobes_text_start &&
@ -617,6 +623,15 @@ int setjmp_pre_handler(struct kprobe *p, struct pt_regs *regs)
regs->gpr[2] = (unsigned long)(((func_descr_t *)jp->entry)->toc);
#endif
/*
* jprobes use jprobe_return() which skips the normal return
* path of the function, and this messes up the accounting of the
* function graph tracer.
*
* Pause function graph tracing while performing the jprobe function.
*/
pause_graph_tracing();
return 1;
}
NOKPROBE_SYMBOL(setjmp_pre_handler);
@ -642,6 +657,8 @@ int longjmp_break_handler(struct kprobe *p, struct pt_regs *regs)
* saved regs...
*/
memcpy(regs, &kcb->jprobe_saved_regs, sizeof(struct pt_regs));
/* It's OK to start function graph tracing again */
unpause_graph_tracing();
preempt_enable_no_resched();
return 1;
}

View File

@ -1666,6 +1666,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
#ifdef CONFIG_VSX
current->thread.used_vsr = 0;
#endif
current->thread.load_fp = 0;
memset(&current->thread.fp_state, 0, sizeof(current->thread.fp_state));
current->thread.fp_save_area = NULL;
#ifdef CONFIG_ALTIVEC
@ -1674,6 +1675,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.vr_save_area = NULL;
current->thread.vrsave = 0;
current->thread.used_vr = 0;
current->thread.load_vec = 0;
#endif /* CONFIG_ALTIVEC */
#ifdef CONFIG_SPE
memset(current->thread.evr, 0, sizeof(current->thread.evr));
@ -1685,6 +1687,7 @@ void start_thread(struct pt_regs *regs, unsigned long start, unsigned long sp)
current->thread.tm_tfhar = 0;
current->thread.tm_texasr = 0;
current->thread.tm_tfiar = 0;
current->thread.load_tm = 0;
#endif /* CONFIG_PPC_TRANSACTIONAL_MEM */
}
EXPORT_SYMBOL(start_thread);

View File

@ -161,7 +161,9 @@ static struct ibm_pa_feature {
{ .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL },
{ .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE },
{ .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE },
#ifdef CONFIG_PPC_RADIX_MMU
{ .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX },
#endif
{ .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
{ .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE,
.cpu_user_ftrs = PPC_FEATURE_TRUE_LE },

View File

@ -928,7 +928,7 @@ void __init setup_arch(char **cmdline_p)
#ifdef CONFIG_PPC_MM_SLICES
#ifdef CONFIG_PPC64
init_mm.context.addr_limit = TASK_SIZE_128TB;
init_mm.context.addr_limit = DEFAULT_MAP_WINDOW_USER64;
#else
#error "context.addr_limit not initialized."
#endif

View File

@ -615,6 +615,24 @@ void __init exc_lvl_early_init(void)
}
#endif
/*
* Emergency stacks are used for a range of things, from asynchronous
* NMIs (system reset, machine check) to synchronous, process context.
* We set preempt_count to zero, even though that isn't necessarily correct. To
* get the right value we'd need to copy it from the previous thread_info, but
* doing that might fault causing more problems.
* TODO: what to do with accounting?
*/
static void emerg_stack_init_thread_info(struct thread_info *ti, int cpu)
{
ti->task = NULL;
ti->cpu = cpu;
ti->preempt_count = 0;
ti->local_flags = 0;
ti->flags = 0;
klp_init_thread_info(ti);
}
/*
* Stack space used when we detect a bad kernel stack pointer, and
* early in SMP boots before relocation is enabled. Exclusive emergency
@ -633,24 +651,31 @@ void __init emergency_stack_init(void)
* Since we use these as temporary stacks during secondary CPU
* bringup, we need to get at them in real mode. This means they
* must also be within the RMO region.
*
* The IRQ stacks allocated elsewhere in this file are zeroed and
* initialized in kernel/irq.c. These are initialized here in order
* to have emergency stacks available as early as possible.
*/
limit = min(safe_stack_limit(), ppc64_rma_size);
for_each_possible_cpu(i) {
struct thread_info *ti;
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
klp_init_thread_info(ti);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
paca[i].emergency_sp = (void *)ti + THREAD_SIZE;
#ifdef CONFIG_PPC_BOOK3S_64
/* emergency stack for NMI exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
klp_init_thread_info(ti);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
paca[i].nmi_emergency_sp = (void *)ti + THREAD_SIZE;
/* emergency stack for machine check exception handling. */
ti = __va(memblock_alloc_base(THREAD_SIZE, THREAD_SIZE, limit));
klp_init_thread_info(ti);
memset(ti, 0, THREAD_SIZE);
emerg_stack_init_thread_info(ti, i);
paca[i].mc_emergency_sp = (void *)ti + THREAD_SIZE;
#endif
}
@ -661,7 +686,7 @@ void __init emergency_stack_init(void)
static void * __init pcpu_fc_alloc(unsigned int cpu, size_t size, size_t align)
{
return __alloc_bootmem_node(NODE_DATA(cpu_to_node(cpu)), size, align,
return __alloc_bootmem_node(NODE_DATA(early_cpu_to_node(cpu)), size, align,
__pa(MAX_DMA_ADDRESS));
}
@ -672,7 +697,7 @@ static void __init pcpu_fc_free(void *ptr, size_t size)
static int pcpu_cpu_distance(unsigned int from, unsigned int to)
{
if (cpu_to_node(from) == cpu_to_node(to))
if (early_cpu_to_node(from) == early_cpu_to_node(to))
return LOCAL_DISTANCE;
else
return REMOTE_DISTANCE;

View File

@ -45,10 +45,14 @@ _GLOBAL(ftrace_caller)
stdu r1,-SWITCH_FRAME_SIZE(r1)
/* Save all gprs to pt_regs */
SAVE_8GPRS(0,r1)
SAVE_8GPRS(8,r1)
SAVE_8GPRS(16,r1)
SAVE_8GPRS(24,r1)
SAVE_GPR(0, r1)
SAVE_10GPRS(2, r1)
SAVE_10GPRS(12, r1)
SAVE_10GPRS(22, r1)
/* Save previous stack pointer (r1) */
addi r8, r1, SWITCH_FRAME_SIZE
std r8, GPR1(r1)
/* Load special regs for save below */
mfmsr r8
@ -95,18 +99,44 @@ ftrace_call:
bl ftrace_stub
nop
/* Load ctr with the possibly modified NIP */
ld r3, _NIP(r1)
mtctr r3
/* Load the possibly modified NIP */
ld r15, _NIP(r1)
#ifdef CONFIG_LIVEPATCH
cmpd r14,r3 /* has NIP been altered? */
cmpd r14, r15 /* has NIP been altered? */
#endif
#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_KPROBES_ON_FTRACE)
/* NIP has not been altered, skip over further checks */
beq 1f
/* Check if there is an active kprobe on us */
subi r3, r14, 4
bl is_current_kprobe_addr
nop
/*
* If r3 == 1, then this is a kprobe/jprobe.
* else, this is livepatched function.
*
* The conditional branch for livepatch_handler below will use the
* result of this comparison. For kprobe/jprobe, we just need to branch to
* the new NIP, not call livepatch_handler. The branch below is bne, so we
* want CR0[EQ] to be true if this is a kprobe/jprobe. Which means we want
* CR0[EQ] = (r3 == 1).
*/
cmpdi r3, 1
1:
#endif
/* Load CTR with the possibly modified NIP */
mtctr r15
/* Restore gprs */
REST_8GPRS(0,r1)
REST_8GPRS(8,r1)
REST_8GPRS(16,r1)
REST_8GPRS(24,r1)
REST_GPR(0,r1)
REST_10GPRS(2,r1)
REST_10GPRS(12,r1)
REST_10GPRS(22,r1)
/* Restore possibly modified LR */
ld r0, _LINK(r1)
@ -119,7 +149,10 @@ ftrace_call:
addi r1, r1, SWITCH_FRAME_SIZE
#ifdef CONFIG_LIVEPATCH
/* Based on the cmpd above, if the NIP was altered handle livepatch */
/*
* Based on the cmpd or cmpdi above, if the NIP was altered and we're
* not on a kprobe/jprobe, then handle livepatch.
*/
bne- livepatch_handler
#endif

View File

@ -1486,6 +1486,14 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
r = set_vpa(vcpu, &vcpu->arch.dtl, addr, len);
break;
case KVM_REG_PPC_TB_OFFSET:
/*
* POWER9 DD1 has an erratum where writing TBU40 causes
* the timebase to lose ticks. So we don't let the
* timebase offset be changed on P9 DD1. (It is
* initialized to zero.)
*/
if (cpu_has_feature(CPU_FTR_POWER9_DD1))
break;
/* round up to multiple of 2^24 */
vcpu->arch.vcore->tb_offset =
ALIGN(set_reg_val(id, *val), 1UL << 24);
@ -2907,12 +2915,36 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
{
int r;
int srcu_idx;
unsigned long ebb_regs[3] = {}; /* shut up GCC */
unsigned long user_tar = 0;
unsigned int user_vrsave;
if (!vcpu->arch.sane) {
run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
return -EINVAL;
}
/*
* Don't allow entry with a suspended transaction, because
* the guest entry/exit code will lose it.
* If the guest has TM enabled, save away their TM-related SPRs
* (they will get restored by the TM unavailable interrupt).
*/
#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
if (cpu_has_feature(CPU_FTR_TM) && current->thread.regs &&
(current->thread.regs->msr & MSR_TM)) {
if (MSR_TM_ACTIVE(current->thread.regs->msr)) {
run->exit_reason = KVM_EXIT_FAIL_ENTRY;
run->fail_entry.hardware_entry_failure_reason = 0;
return -EINVAL;
}
current->thread.tm_tfhar = mfspr(SPRN_TFHAR);
current->thread.tm_tfiar = mfspr(SPRN_TFIAR);
current->thread.tm_texasr = mfspr(SPRN_TEXASR);
current->thread.regs->msr &= ~MSR_TM;
}
#endif
kvmppc_core_prepare_to_enter(vcpu);
/* No need to go into the guest when all we'll do is come back out */
@ -2934,6 +2966,15 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
flush_all_to_thread(current);
/* Save userspace EBB and other register values */
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
ebb_regs[0] = mfspr(SPRN_EBBHR);
ebb_regs[1] = mfspr(SPRN_EBBRR);
ebb_regs[2] = mfspr(SPRN_BESCR);
user_tar = mfspr(SPRN_TAR);
}
user_vrsave = mfspr(SPRN_VRSAVE);
vcpu->arch.wqp = &vcpu->arch.vcore->wq;
vcpu->arch.pgdir = current->mm->pgd;
vcpu->arch.state = KVMPPC_VCPU_BUSY_IN_HOST;
@ -2960,6 +3001,16 @@ static int kvmppc_vcpu_run_hv(struct kvm_run *run, struct kvm_vcpu *vcpu)
}
} while (is_kvmppc_resume_guest(r));
/* Restore userspace EBB and other register values */
if (cpu_has_feature(CPU_FTR_ARCH_207S)) {
mtspr(SPRN_EBBHR, ebb_regs[0]);
mtspr(SPRN_EBBRR, ebb_regs[1]);
mtspr(SPRN_BESCR, ebb_regs[2]);
mtspr(SPRN_TAR, user_tar);
mtspr(SPRN_FSCR, current->thread.fscr);
}
mtspr(SPRN_VRSAVE, user_vrsave);
out:
vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
atomic_dec(&vcpu->kvm->arch.vcpus_running);

View File

@ -121,10 +121,20 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* Put whatever is in the decrementer into the
* hypervisor decrementer.
*/
BEGIN_FTR_SECTION
ld r5, HSTATE_KVM_VCORE(r13)
ld r6, VCORE_KVM(r5)
ld r9, KVM_HOST_LPCR(r6)
andis. r9, r9, LPCR_LD@h
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
mfspr r8,SPRN_DEC
mftb r7
mtspr SPRN_HDEC,r8
BEGIN_FTR_SECTION
/* On POWER9, don't sign-extend if host LPCR[LD] bit is set */
bne 32f
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
extsw r8,r8
32: mtspr SPRN_HDEC,r8
add r8,r8,r7
std r8,HSTATE_DECEXP(r13)

View File

@ -32,12 +32,29 @@
#include <asm/opal.h>
#include <asm/xive-regs.h>
/* Sign-extend HDEC if not on POWER9 */
#define EXTEND_HDEC(reg) \
BEGIN_FTR_SECTION; \
extsw reg, reg; \
END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
#define VCPU_GPRS_TM(reg) (((reg) * ULONG_SIZE) + VCPU_GPR_TM)
/* Values in HSTATE_NAPPING(r13) */
#define NAPPING_CEDE 1
#define NAPPING_NOVCPU 2
/* Stack frame offsets for kvmppc_hv_entry */
#define SFS 144
#define STACK_SLOT_TRAP (SFS-4)
#define STACK_SLOT_TID (SFS-16)
#define STACK_SLOT_PSSCR (SFS-24)
#define STACK_SLOT_PID (SFS-32)
#define STACK_SLOT_IAMR (SFS-40)
#define STACK_SLOT_CIABR (SFS-48)
#define STACK_SLOT_DAWR (SFS-56)
#define STACK_SLOT_DAWRX (SFS-64)
/*
* Call kvmppc_hv_entry in real mode.
* Must be called with interrupts hard-disabled.
@ -214,6 +231,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
kvmppc_primary_no_guest:
/* We handle this much like a ceded vcpu */
/* put the HDEC into the DEC, since HDEC interrupts don't wake us */
/* HDEC may be larger than DEC for arch >= v3.00, but since the */
/* HDEC value came from DEC in the first place, it will fit */
mfspr r3, SPRN_HDEC
mtspr SPRN_DEC, r3
/*
@ -295,8 +314,9 @@ kvm_novcpu_wakeup:
/* See if our timeslice has expired (HDEC is negative) */
mfspr r0, SPRN_HDEC
EXTEND_HDEC(r0)
li r12, BOOK3S_INTERRUPT_HV_DECREMENTER
cmpwi r0, 0
cmpdi r0, 0
blt kvm_novcpu_exit
/* Got an IPI but other vcpus aren't yet exiting, must be a latecomer */
@ -319,10 +339,10 @@ kvm_novcpu_exit:
bl kvmhv_accumulate_time
#endif
13: mr r3, r12
stw r12, 112-4(r1)
stw r12, STACK_SLOT_TRAP(r1)
bl kvmhv_commence_exit
nop
lwz r12, 112-4(r1)
lwz r12, STACK_SLOT_TRAP(r1)
b kvmhv_switch_to_host
/*
@ -390,8 +410,8 @@ kvm_secondary_got_guest:
lbz r4, HSTATE_PTID(r13)
cmpwi r4, 0
bne 63f
lis r6, 0x7fff
ori r6, r6, 0xffff
LOAD_REG_ADDR(r6, decrementer_max)
ld r6, 0(r6)
mtspr SPRN_HDEC, r6
/* and set per-LPAR registers, if doing dynamic micro-threading */
ld r6, HSTATE_SPLIT_MODE(r13)
@ -545,11 +565,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
* *
*****************************************************************************/
/* Stack frame offsets */
#define STACK_SLOT_TID (112-16)
#define STACK_SLOT_PSSCR (112-24)
#define STACK_SLOT_PID (112-32)
.global kvmppc_hv_entry
kvmppc_hv_entry:
@ -565,7 +580,7 @@ kvmppc_hv_entry:
*/
mflr r0
std r0, PPC_LR_STKOFF(r1)
stdu r1, -112(r1)
stdu r1, -SFS(r1)
/* Save R1 in the PACA */
std r1, HSTATE_HOST_R1(r13)
@ -749,10 +764,20 @@ BEGIN_FTR_SECTION
mfspr r5, SPRN_TIDR
mfspr r6, SPRN_PSSCR
mfspr r7, SPRN_PID
mfspr r8, SPRN_IAMR
std r5, STACK_SLOT_TID(r1)
std r6, STACK_SLOT_PSSCR(r1)
std r7, STACK_SLOT_PID(r1)
std r8, STACK_SLOT_IAMR(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
mfspr r5, SPRN_CIABR
mfspr r6, SPRN_DAWR
mfspr r7, SPRN_DAWRX
std r5, STACK_SLOT_CIABR(r1)
std r6, STACK_SLOT_DAWR(r1)
std r7, STACK_SLOT_DAWRX(r1)
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
/* Set partition DABR */
@ -968,7 +993,8 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
/* Check if HDEC expires soon */
mfspr r3, SPRN_HDEC
cmpwi r3, 512 /* 1 microsecond */
EXTEND_HDEC(r3)
cmpdi r3, 512 /* 1 microsecond */
blt hdec_soon
#ifdef CONFIG_KVM_XICS
@ -1505,11 +1531,10 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_300)
* set by the guest could disrupt the host.
*/
li r0, 0
mtspr SPRN_IAMR, r0
mtspr SPRN_CIABR, r0
mtspr SPRN_DAWRX, r0
mtspr SPRN_PSPB, r0
mtspr SPRN_WORT, r0
BEGIN_FTR_SECTION
mtspr SPRN_IAMR, r0
mtspr SPRN_TCSCR, r0
/* Set MMCRS to 1<<31 to freeze and disable the SPMC counters */
li r0, 1
@ -1525,6 +1550,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300)
std r6,VCPU_UAMOR(r9)
li r6,0
mtspr SPRN_AMR,r6
mtspr SPRN_UAMOR, r6
/* Switch DSCR back to host value */
mfspr r8, SPRN_DSCR
@ -1669,13 +1695,23 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
ptesync
/* Restore host values of some registers */
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_CIABR(r1)
ld r6, STACK_SLOT_DAWR(r1)
ld r7, STACK_SLOT_DAWRX(r1)
mtspr SPRN_CIABR, r5
mtspr SPRN_DAWR, r6
mtspr SPRN_DAWRX, r7
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S)
BEGIN_FTR_SECTION
ld r5, STACK_SLOT_TID(r1)
ld r6, STACK_SLOT_PSSCR(r1)
ld r7, STACK_SLOT_PID(r1)
ld r8, STACK_SLOT_IAMR(r1)
mtspr SPRN_TIDR, r5
mtspr SPRN_PSSCR, r6
mtspr SPRN_PID, r7
mtspr SPRN_IAMR, r8
END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300)
BEGIN_FTR_SECTION
PPC_INVALIDATE_ERAT
@ -1819,8 +1855,8 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_TYPE_RADIX)
li r0, KVM_GUEST_MODE_NONE
stb r0, HSTATE_IN_GUEST(r13)
ld r0, 112+PPC_LR_STKOFF(r1)
addi r1, r1, 112
ld r0, SFS+PPC_LR_STKOFF(r1)
addi r1, r1, SFS
mtlr r0
blr
@ -2366,12 +2402,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_TM)
mfspr r3, SPRN_DEC
mfspr r4, SPRN_HDEC
mftb r5
cmpw r3, r4
extsw r3, r3
EXTEND_HDEC(r4)
cmpd r3, r4
ble 67f
mtspr SPRN_DEC, r4
67:
/* save expiry time of guest decrementer */
extsw r3, r3
add r3, r3, r5
ld r4, HSTATE_KVM_VCPU(r13)
ld r5, HSTATE_KVM_VCORE(r13)

View File

@ -69,7 +69,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
{
/* If the XIVE supports the new "store EOI facility, use it */
if (xd->flags & XIVE_IRQ_FLAG_STORE_EOI)
__x_writeq(0, __x_eoi_page(xd));
__x_writeq(0, __x_eoi_page(xd) + XIVE_ESB_STORE_EOI);
else if (hw_irq && xd->flags & XIVE_IRQ_FLAG_EOI_FW) {
opal_int_eoi(hw_irq);
} else {
@ -89,7 +89,7 @@ static void GLUE(X_PFX,source_eoi)(u32 hw_irq, struct xive_irq_data *xd)
* properly.
*/
if (xd->flags & XIVE_IRQ_FLAG_LSI)
__x_readq(__x_eoi_page(xd));
__x_readq(__x_eoi_page(xd) + XIVE_ESB_LOAD_EOI);
else {
eoi_val = GLUE(X_PFX,esb_load)(xd, XIVE_ESB_SET_PQ_00);

View File

@ -68,7 +68,7 @@ radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
/*

View File

@ -112,7 +112,7 @@ radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
@ -157,7 +157,7 @@ radix__arch_get_unmapped_area_topdown(struct file *filp,
addr = PAGE_ALIGN(addr);
vma = find_vma(mm, addr);
if (mm->task_size - len >= addr && addr >= mmap_min_addr &&
(!vma || addr + len <= vma->vm_start))
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}

Some files were not shown because too many files have changed in this diff Show More