Merge branch 'linus' into core/urgent, to merge in dependent changes
Signed-off-by: Ingo Molnar <mingo@kernel.org>hifive-unleashed-5.2
commit
82045dd855
|
@ -78,6 +78,8 @@ ForEachMacros:
|
|||
- 'ata_qc_for_each_with_internal'
|
||||
- 'ax25_for_each'
|
||||
- 'ax25_uid_for_each'
|
||||
- '__bio_for_each_bvec'
|
||||
- 'bio_for_each_bvec'
|
||||
- 'bio_for_each_integrity_vec'
|
||||
- '__bio_for_each_segment'
|
||||
- 'bio_for_each_segment'
|
||||
|
@ -118,10 +120,12 @@ ForEachMacros:
|
|||
- 'drm_for_each_legacy_plane'
|
||||
- 'drm_for_each_plane'
|
||||
- 'drm_for_each_plane_mask'
|
||||
- 'drm_for_each_privobj'
|
||||
- 'drm_mm_for_each_hole'
|
||||
- 'drm_mm_for_each_node'
|
||||
- 'drm_mm_for_each_node_in_range'
|
||||
- 'drm_mm_for_each_node_safe'
|
||||
- 'flow_action_for_each'
|
||||
- 'for_each_active_drhd_unit'
|
||||
- 'for_each_active_iommu'
|
||||
- 'for_each_available_child_of_node'
|
||||
|
@ -158,6 +162,9 @@ ForEachMacros:
|
|||
- 'for_each_dss_dev'
|
||||
- 'for_each_efi_memory_desc'
|
||||
- 'for_each_efi_memory_desc_in_map'
|
||||
- 'for_each_element'
|
||||
- 'for_each_element_extid'
|
||||
- 'for_each_element_id'
|
||||
- 'for_each_endpoint_of_node'
|
||||
- 'for_each_evictable_lru'
|
||||
- 'for_each_fib6_node_rt_rcu'
|
||||
|
@ -195,6 +202,7 @@ ForEachMacros:
|
|||
- 'for_each_net_rcu'
|
||||
- 'for_each_new_connector_in_state'
|
||||
- 'for_each_new_crtc_in_state'
|
||||
- 'for_each_new_mst_mgr_in_state'
|
||||
- 'for_each_new_plane_in_state'
|
||||
- 'for_each_new_private_obj_in_state'
|
||||
- 'for_each_node'
|
||||
|
@ -210,8 +218,10 @@ ForEachMacros:
|
|||
- 'for_each_of_pci_range'
|
||||
- 'for_each_old_connector_in_state'
|
||||
- 'for_each_old_crtc_in_state'
|
||||
- 'for_each_old_mst_mgr_in_state'
|
||||
- 'for_each_oldnew_connector_in_state'
|
||||
- 'for_each_oldnew_crtc_in_state'
|
||||
- 'for_each_oldnew_mst_mgr_in_state'
|
||||
- 'for_each_oldnew_plane_in_state'
|
||||
- 'for_each_oldnew_plane_in_state_reverse'
|
||||
- 'for_each_oldnew_private_obj_in_state'
|
||||
|
@ -243,6 +253,9 @@ ForEachMacros:
|
|||
- 'for_each_sg_dma_page'
|
||||
- 'for_each_sg_page'
|
||||
- 'for_each_sibling_event'
|
||||
- 'for_each_subelement'
|
||||
- 'for_each_subelement_extid'
|
||||
- 'for_each_subelement_id'
|
||||
- '__for_each_thread'
|
||||
- 'for_each_thread'
|
||||
- 'for_each_zone'
|
||||
|
@ -252,6 +265,8 @@ ForEachMacros:
|
|||
- 'fwnode_for_each_child_node'
|
||||
- 'fwnode_graph_for_each_endpoint'
|
||||
- 'gadget_for_each_ep'
|
||||
- 'genradix_for_each'
|
||||
- 'genradix_for_each_from'
|
||||
- 'hash_for_each'
|
||||
- 'hash_for_each_possible'
|
||||
- 'hash_for_each_possible_rcu'
|
||||
|
@ -293,7 +308,11 @@ ForEachMacros:
|
|||
- 'key_for_each'
|
||||
- 'key_for_each_safe'
|
||||
- 'klp_for_each_func'
|
||||
- 'klp_for_each_func_safe'
|
||||
- 'klp_for_each_func_static'
|
||||
- 'klp_for_each_object'
|
||||
- 'klp_for_each_object_safe'
|
||||
- 'klp_for_each_object_static'
|
||||
- 'kvm_for_each_memslot'
|
||||
- 'kvm_for_each_vcpu'
|
||||
- 'list_for_each'
|
||||
|
@ -324,6 +343,8 @@ ForEachMacros:
|
|||
- 'media_device_for_each_intf'
|
||||
- 'media_device_for_each_link'
|
||||
- 'media_device_for_each_pad'
|
||||
- 'mp_bvec_for_each_page'
|
||||
- 'mp_bvec_for_each_segment'
|
||||
- 'nanddev_io_for_each_page'
|
||||
- 'netdev_for_each_lower_dev'
|
||||
- 'netdev_for_each_lower_private'
|
||||
|
@ -375,6 +396,7 @@ ForEachMacros:
|
|||
- 'rht_for_each_rcu'
|
||||
- 'rht_for_each_rcu_continue'
|
||||
- '__rq_for_each_bio'
|
||||
- 'rq_for_each_bvec'
|
||||
- 'rq_for_each_segment'
|
||||
- 'scsi_for_each_prot_sg'
|
||||
- 'scsi_for_each_sg'
|
||||
|
@ -410,6 +432,8 @@ ForEachMacros:
|
|||
- 'v4l2_m2m_for_each_src_buf_safe'
|
||||
- 'virtio_device_for_each_vq'
|
||||
- 'xa_for_each'
|
||||
- 'xa_for_each_marked'
|
||||
- 'xa_for_each_start'
|
||||
- 'xas_for_each'
|
||||
- 'xas_for_each_conflict'
|
||||
- 'xas_for_each_marked'
|
||||
|
|
2
.mailmap
2
.mailmap
|
@ -156,6 +156,8 @@ Morten Welinder <welinder@darter.rentec.com>
|
|||
Morten Welinder <welinder@troll.com>
|
||||
Mythri P K <mythripk@ti.com>
|
||||
Nguyen Anh Quynh <aquynh@gmail.com>
|
||||
Nicolas Pitre <nico@fluxnic.net> <nicolas.pitre@linaro.org>
|
||||
Nicolas Pitre <nico@fluxnic.net> <nico@linaro.org>
|
||||
Paolo 'Blaisorblade' Giarrusso <blaisorblade@yahoo.it>
|
||||
Patrick Mochel <mochel@digitalimplant.org>
|
||||
Paul Burton <paul.burton@mips.com> <paul.burton@imgtec.com>
|
||||
|
|
|
@ -511,10 +511,30 @@ Description: Control Symetric Multi Threading (SMT)
|
|||
control: Read/write interface to control SMT. Possible
|
||||
values:
|
||||
|
||||
"on" SMT is enabled
|
||||
"off" SMT is disabled
|
||||
"forceoff" SMT is force disabled. Cannot be changed.
|
||||
"notsupported" SMT is not supported by the CPU
|
||||
"on" SMT is enabled
|
||||
"off" SMT is disabled
|
||||
"forceoff" SMT is force disabled. Cannot be changed.
|
||||
"notsupported" SMT is not supported by the CPU
|
||||
"notimplemented" SMT runtime toggling is not
|
||||
implemented for the architecture
|
||||
|
||||
If control status is "forceoff" or "notsupported" writes
|
||||
are rejected.
|
||||
|
||||
What: /sys/devices/system/cpu/cpu#/power/energy_perf_bias
|
||||
Date: March 2019
|
||||
Contact: linux-pm@vger.kernel.org
|
||||
Description: Intel Energy and Performance Bias Hint (EPB)
|
||||
|
||||
EPB for the given CPU in a sliding scale 0 - 15, where a value
|
||||
of 0 corresponds to a hint preference for highest performance
|
||||
and a value of 15 corresponds to the maximum energy savings.
|
||||
|
||||
In order to change the EPB value for the CPU, write either
|
||||
a number in the 0 - 15 sliding scale above, or one of the
|
||||
strings: "performance", "balance-performance", "normal",
|
||||
"balance-power", "power" (that represent values reflected by
|
||||
their meaning), to this attribute.
|
||||
|
||||
This attribute is present for all online CPUs supporting the
|
||||
Intel EPB feature.
|
||||
|
|
|
@ -155,8 +155,7 @@ keeping lock contention under control at all tree levels regardless
|
|||
of the level of loading on the system.
|
||||
|
||||
</p><p>RCU updaters wait for normal grace periods by registering
|
||||
RCU callbacks, either directly via <tt>call_rcu()</tt> and
|
||||
friends (namely <tt>call_rcu_bh()</tt> and <tt>call_rcu_sched()</tt>),
|
||||
RCU callbacks, either directly via <tt>call_rcu()</tt>
|
||||
or indirectly via <tt>synchronize_rcu()</tt> and friends.
|
||||
RCU callbacks are represented by <tt>rcu_head</tt> structures,
|
||||
which are queued on <tt>rcu_data</tt> structures while they are
|
||||
|
|
|
@ -56,6 +56,7 @@ sections.
|
|||
RCU-preempt Expedited Grace Periods</a></h2>
|
||||
|
||||
<p>
|
||||
<tt>CONFIG_PREEMPT=y</tt> kernels implement RCU-preempt.
|
||||
The overall flow of the handling of a given CPU by an RCU-preempt
|
||||
expedited grace period is shown in the following diagram:
|
||||
|
||||
|
@ -139,6 +140,7 @@ or offline, among other things.
|
|||
RCU-sched Expedited Grace Periods</a></h2>
|
||||
|
||||
<p>
|
||||
<tt>CONFIG_PREEMPT=n</tt> kernels implement RCU-sched.
|
||||
The overall flow of the handling of a given CPU by an RCU-sched
|
||||
expedited grace period is shown in the following diagram:
|
||||
|
||||
|
@ -146,7 +148,7 @@ expedited grace period is shown in the following diagram:
|
|||
|
||||
<p>
|
||||
As with RCU-preempt, RCU-sched's
|
||||
<tt>synchronize_sched_expedited()</tt> ignores offline and
|
||||
<tt>synchronize_rcu_expedited()</tt> ignores offline and
|
||||
idle CPUs, again because they are in remotely detectable
|
||||
quiescent states.
|
||||
However, because the
|
||||
|
|
|
@ -34,12 +34,11 @@ Similarly, any code that happens before the beginning of a given RCU grace
|
|||
period is guaranteed to see the effects of all accesses following the end
|
||||
of that grace period that are within RCU read-side critical sections.
|
||||
|
||||
<p>This guarantee is particularly pervasive for <tt>synchronize_sched()</tt>,
|
||||
for which RCU-sched read-side critical sections include any region
|
||||
<p>Note well that RCU-sched read-side critical sections include any region
|
||||
of code for which preemption is disabled.
|
||||
Given that each individual machine instruction can be thought of as
|
||||
an extremely small region of preemption-disabled code, one can think of
|
||||
<tt>synchronize_sched()</tt> as <tt>smp_mb()</tt> on steroids.
|
||||
<tt>synchronize_rcu()</tt> as <tt>smp_mb()</tt> on steroids.
|
||||
|
||||
<p>RCU updaters use this guarantee by splitting their updates into
|
||||
two phases, one of which is executed before the grace period and
|
||||
|
|
|
@ -81,18 +81,19 @@ currently executing on some other CPU. We therefore cannot free
|
|||
up any data structures used by the old NMI handler until execution
|
||||
of it completes on all other CPUs.
|
||||
|
||||
One way to accomplish this is via synchronize_sched(), perhaps as
|
||||
One way to accomplish this is via synchronize_rcu(), perhaps as
|
||||
follows:
|
||||
|
||||
unset_nmi_callback();
|
||||
synchronize_sched();
|
||||
synchronize_rcu();
|
||||
kfree(my_nmi_data);
|
||||
|
||||
This works because synchronize_sched() blocks until all CPUs complete
|
||||
any preemption-disabled segments of code that they were executing.
|
||||
Since NMI handlers disable preemption, synchronize_sched() is guaranteed
|
||||
This works because (as of v4.20) synchronize_rcu() blocks until all
|
||||
CPUs complete any preemption-disabled segments of code that they were
|
||||
executing.
|
||||
Since NMI handlers disable preemption, synchronize_rcu() is guaranteed
|
||||
not to return until all ongoing NMI handlers exit. It is therefore safe
|
||||
to free up the handler's data as soon as synchronize_sched() returns.
|
||||
to free up the handler's data as soon as synchronize_rcu() returns.
|
||||
|
||||
Important note: for this to work, the architecture in question must
|
||||
invoke nmi_enter() and nmi_exit() on NMI entry and exit, respectively.
|
||||
|
|
|
@ -86,10 +86,8 @@ even on a UP system. So do not do it! Even on a UP system, the RCU
|
|||
infrastructure -must- respect grace periods, and -must- invoke callbacks
|
||||
from a known environment in which no locks are held.
|
||||
|
||||
It -is- safe for synchronize_sched() and synchronize_rcu_bh() to return
|
||||
immediately on an UP system. It is also safe for synchronize_rcu()
|
||||
to return immediately on UP systems, except when running preemptable
|
||||
RCU.
|
||||
Note that it -is- safe for synchronize_rcu() to return immediately on
|
||||
UP systems, including !PREEMPT SMP builds running on UP systems.
|
||||
|
||||
Quick Quiz #3: Why can't synchronize_rcu() return immediately on
|
||||
UP systems running preemptable RCU?
|
||||
|
|
|
@ -182,16 +182,13 @@ over a rather long period of time, but improvements are always welcome!
|
|||
when publicizing a pointer to a structure that can
|
||||
be traversed by an RCU read-side critical section.
|
||||
|
||||
5. If call_rcu(), or a related primitive such as call_rcu_bh(),
|
||||
call_rcu_sched(), or call_srcu() is used, the callback function
|
||||
will be called from softirq context. In particular, it cannot
|
||||
block.
|
||||
5. If call_rcu() or call_srcu() is used, the callback function will
|
||||
be called from softirq context. In particular, it cannot block.
|
||||
|
||||
6. Since synchronize_rcu() can block, it cannot be called from
|
||||
any sort of irq context. The same rule applies for
|
||||
synchronize_rcu_bh(), synchronize_sched(), synchronize_srcu(),
|
||||
synchronize_rcu_expedited(), synchronize_rcu_bh_expedited(),
|
||||
synchronize_sched_expedite(), and synchronize_srcu_expedited().
|
||||
6. Since synchronize_rcu() can block, it cannot be called
|
||||
from any sort of irq context. The same rule applies
|
||||
for synchronize_srcu(), synchronize_rcu_expedited(), and
|
||||
synchronize_srcu_expedited().
|
||||
|
||||
The expedited forms of these primitives have the same semantics
|
||||
as the non-expedited forms, but expediting is both expensive and
|
||||
|
@ -212,20 +209,20 @@ over a rather long period of time, but improvements are always welcome!
|
|||
of the system, especially to real-time workloads running on
|
||||
the rest of the system.
|
||||
|
||||
7. If the updater uses call_rcu() or synchronize_rcu(), then the
|
||||
corresponding readers must use rcu_read_lock() and
|
||||
rcu_read_unlock(). If the updater uses call_rcu_bh() or
|
||||
synchronize_rcu_bh(), then the corresponding readers must
|
||||
use rcu_read_lock_bh() and rcu_read_unlock_bh(). If the
|
||||
updater uses call_rcu_sched() or synchronize_sched(), then
|
||||
the corresponding readers must disable preemption, possibly
|
||||
by calling rcu_read_lock_sched() and rcu_read_unlock_sched().
|
||||
If the updater uses synchronize_srcu() or call_srcu(), then
|
||||
the corresponding readers must use srcu_read_lock() and
|
||||
7. As of v4.20, a given kernel implements only one RCU flavor,
|
||||
which is RCU-sched for PREEMPT=n and RCU-preempt for PREEMPT=y.
|
||||
If the updater uses call_rcu() or synchronize_rcu(),
|
||||
then the corresponding readers my use rcu_read_lock() and
|
||||
rcu_read_unlock(), rcu_read_lock_bh() and rcu_read_unlock_bh(),
|
||||
or any pair of primitives that disables and re-enables preemption,
|
||||
for example, rcu_read_lock_sched() and rcu_read_unlock_sched().
|
||||
If the updater uses synchronize_srcu() or call_srcu(),
|
||||
then the corresponding readers must use srcu_read_lock() and
|
||||
srcu_read_unlock(), and with the same srcu_struct. The rules for
|
||||
the expedited primitives are the same as for their non-expedited
|
||||
counterparts. Mixing things up will result in confusion and
|
||||
broken kernels.
|
||||
broken kernels, and has even resulted in an exploitable security
|
||||
issue.
|
||||
|
||||
One exception to this rule: rcu_read_lock() and rcu_read_unlock()
|
||||
may be substituted for rcu_read_lock_bh() and rcu_read_unlock_bh()
|
||||
|
@ -288,8 +285,7 @@ over a rather long period of time, but improvements are always welcome!
|
|||
d. Periodically invoke synchronize_rcu(), permitting a limited
|
||||
number of updates per grace period.
|
||||
|
||||
The same cautions apply to call_rcu_bh(), call_rcu_sched(),
|
||||
call_srcu(), and kfree_rcu().
|
||||
The same cautions apply to call_srcu() and kfree_rcu().
|
||||
|
||||
Note that although these primitives do take action to avoid memory
|
||||
exhaustion when any given CPU has too many callbacks, a determined
|
||||
|
@ -322,7 +318,7 @@ over a rather long period of time, but improvements are always welcome!
|
|||
|
||||
11. Any lock acquired by an RCU callback must be acquired elsewhere
|
||||
with softirq disabled, e.g., via spin_lock_irqsave(),
|
||||
spin_lock_bh(), etc. Failing to disable irq on a given
|
||||
spin_lock_bh(), etc. Failing to disable softirq on a given
|
||||
acquisition of that lock will result in deadlock as soon as
|
||||
the RCU softirq handler happens to run your RCU callback while
|
||||
interrupting that acquisition's critical section.
|
||||
|
@ -335,13 +331,16 @@ over a rather long period of time, but improvements are always welcome!
|
|||
must use whatever locking or other synchronization is required
|
||||
to safely access and/or modify that data structure.
|
||||
|
||||
RCU callbacks are -usually- executed on the same CPU that executed
|
||||
the corresponding call_rcu(), call_rcu_bh(), or call_rcu_sched(),
|
||||
but are by -no- means guaranteed to be. For example, if a given
|
||||
CPU goes offline while having an RCU callback pending, then that
|
||||
RCU callback will execute on some surviving CPU. (If this was
|
||||
not the case, a self-spawning RCU callback would prevent the
|
||||
victim CPU from ever going offline.)
|
||||
Do not assume that RCU callbacks will be executed on the same
|
||||
CPU that executed the corresponding call_rcu() or call_srcu().
|
||||
For example, if a given CPU goes offline while having an RCU
|
||||
callback pending, then that RCU callback will execute on some
|
||||
surviving CPU. (If this was not the case, a self-spawning RCU
|
||||
callback would prevent the victim CPU from ever going offline.)
|
||||
Furthermore, CPUs designated by rcu_nocbs= might well -always-
|
||||
have their RCU callbacks executed on some other CPUs, in fact,
|
||||
for some real-time workloads, this is the whole point of using
|
||||
the rcu_nocbs= kernel boot parameter.
|
||||
|
||||
13. Unlike other forms of RCU, it -is- permissible to block in an
|
||||
SRCU read-side critical section (demarked by srcu_read_lock()
|
||||
|
@ -381,11 +380,11 @@ over a rather long period of time, but improvements are always welcome!
|
|||
|
||||
SRCU's expedited primitive (synchronize_srcu_expedited())
|
||||
never sends IPIs to other CPUs, so it is easier on
|
||||
real-time workloads than is synchronize_rcu_expedited(),
|
||||
synchronize_rcu_bh_expedited() or synchronize_sched_expedited().
|
||||
real-time workloads than is synchronize_rcu_expedited().
|
||||
|
||||
Note that rcu_dereference() and rcu_assign_pointer() relate to
|
||||
SRCU just as they do to other forms of RCU.
|
||||
Note that rcu_assign_pointer() relates to SRCU just as it does to
|
||||
other forms of RCU, but instead of rcu_dereference() you should
|
||||
use srcu_dereference() in order to avoid lockdep splats.
|
||||
|
||||
14. The whole point of call_rcu(), synchronize_rcu(), and friends
|
||||
is to wait until all pre-existing readers have finished before
|
||||
|
@ -405,6 +404,9 @@ over a rather long period of time, but improvements are always welcome!
|
|||
read-side critical sections. It is the responsibility of the
|
||||
RCU update-side primitives to deal with this.
|
||||
|
||||
For SRCU readers, you can use smp_mb__after_srcu_read_unlock()
|
||||
immediately after an srcu_read_unlock() to get a full barrier.
|
||||
|
||||
16. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the
|
||||
__rcu sparse checks to validate your RCU code. These can help
|
||||
find problems as follows:
|
||||
|
@ -428,22 +430,19 @@ over a rather long period of time, but improvements are always welcome!
|
|||
These debugging aids can help you find problems that are
|
||||
otherwise extremely difficult to spot.
|
||||
|
||||
17. If you register a callback using call_rcu(), call_rcu_bh(),
|
||||
call_rcu_sched(), or call_srcu(), and pass in a function defined
|
||||
within a loadable module, then it in necessary to wait for
|
||||
all pending callbacks to be invoked after the last invocation
|
||||
and before unloading that module. Note that it is absolutely
|
||||
-not- sufficient to wait for a grace period! The current (say)
|
||||
synchronize_rcu() implementation waits only for all previous
|
||||
callbacks registered on the CPU that synchronize_rcu() is running
|
||||
on, but it is -not- guaranteed to wait for callbacks registered
|
||||
on other CPUs.
|
||||
17. If you register a callback using call_rcu() or call_srcu(), and
|
||||
pass in a function defined within a loadable module, then it in
|
||||
necessary to wait for all pending callbacks to be invoked after
|
||||
the last invocation and before unloading that module. Note that
|
||||
it is absolutely -not- sufficient to wait for a grace period!
|
||||
The current (say) synchronize_rcu() implementation is -not-
|
||||
guaranteed to wait for callbacks registered on other CPUs.
|
||||
Or even on the current CPU if that CPU recently went offline
|
||||
and came back online.
|
||||
|
||||
You instead need to use one of the barrier functions:
|
||||
|
||||
o call_rcu() -> rcu_barrier()
|
||||
o call_rcu_bh() -> rcu_barrier()
|
||||
o call_rcu_sched() -> rcu_barrier()
|
||||
o call_srcu() -> srcu_barrier()
|
||||
|
||||
However, these barrier functions are absolutely -not- guaranteed
|
||||
|
|
|
@ -52,10 +52,10 @@ o If I am running on a uniprocessor kernel, which can only do one
|
|||
o How can I see where RCU is currently used in the Linux kernel?
|
||||
|
||||
Search for "rcu_read_lock", "rcu_read_unlock", "call_rcu",
|
||||
"rcu_read_lock_bh", "rcu_read_unlock_bh", "call_rcu_bh",
|
||||
"srcu_read_lock", "srcu_read_unlock", "synchronize_rcu",
|
||||
"synchronize_net", "synchronize_srcu", and the other RCU
|
||||
primitives. Or grab one of the cscope databases from:
|
||||
"rcu_read_lock_bh", "rcu_read_unlock_bh", "srcu_read_lock",
|
||||
"srcu_read_unlock", "synchronize_rcu", "synchronize_net",
|
||||
"synchronize_srcu", and the other RCU primitives. Or grab one
|
||||
of the cscope databases from:
|
||||
|
||||
http://www.rdrop.com/users/paulmck/RCU/linuxusage/rculocktab.html
|
||||
|
||||
|
|
|
@ -351,3 +351,106 @@ garbage values.
|
|||
|
||||
In short, rcu_dereference() is -not- optional when you are going to
|
||||
dereference the resulting pointer.
|
||||
|
||||
|
||||
WHICH MEMBER OF THE rcu_dereference() FAMILY SHOULD YOU USE?
|
||||
|
||||
First, please avoid using rcu_dereference_raw() and also please avoid
|
||||
using rcu_dereference_check() and rcu_dereference_protected() with a
|
||||
second argument with a constant value of 1 (or true, for that matter).
|
||||
With that caution out of the way, here is some guidance for which
|
||||
member of the rcu_dereference() to use in various situations:
|
||||
|
||||
1. If the access needs to be within an RCU read-side critical
|
||||
section, use rcu_dereference(). With the new consolidated
|
||||
RCU flavors, an RCU read-side critical section is entered
|
||||
using rcu_read_lock(), anything that disables bottom halves,
|
||||
anything that disables interrupts, or anything that disables
|
||||
preemption.
|
||||
|
||||
2. If the access might be within an RCU read-side critical section
|
||||
on the one hand, or protected by (say) my_lock on the other,
|
||||
use rcu_dereference_check(), for example:
|
||||
|
||||
p1 = rcu_dereference_check(p->rcu_protected_pointer,
|
||||
lockdep_is_held(&my_lock));
|
||||
|
||||
|
||||
3. If the access might be within an RCU read-side critical section
|
||||
on the one hand, or protected by either my_lock or your_lock on
|
||||
the other, again use rcu_dereference_check(), for example:
|
||||
|
||||
p1 = rcu_dereference_check(p->rcu_protected_pointer,
|
||||
lockdep_is_held(&my_lock) ||
|
||||
lockdep_is_held(&your_lock));
|
||||
|
||||
4. If the access is on the update side, so that it is always protected
|
||||
by my_lock, use rcu_dereference_protected():
|
||||
|
||||
p1 = rcu_dereference_protected(p->rcu_protected_pointer,
|
||||
lockdep_is_held(&my_lock));
|
||||
|
||||
This can be extended to handle multiple locks as in #3 above,
|
||||
and both can be extended to check other conditions as well.
|
||||
|
||||
5. If the protection is supplied by the caller, and is thus unknown
|
||||
to this code, that is the rare case when rcu_dereference_raw()
|
||||
is appropriate. In addition, rcu_dereference_raw() might be
|
||||
appropriate when the lockdep expression would be excessively
|
||||
complex, except that a better approach in that case might be to
|
||||
take a long hard look at your synchronization design. Still,
|
||||
there are data-locking cases where any one of a very large number
|
||||
of locks or reference counters suffices to protect the pointer,
|
||||
so rcu_dereference_raw() does have its place.
|
||||
|
||||
However, its place is probably quite a bit smaller than one
|
||||
might expect given the number of uses in the current kernel.
|
||||
Ditto for its synonym, rcu_dereference_check( ... , 1), and
|
||||
its close relative, rcu_dereference_protected(... , 1).
|
||||
|
||||
|
||||
SPARSE CHECKING OF RCU-PROTECTED POINTERS
|
||||
|
||||
The sparse static-analysis tool checks for direct access to RCU-protected
|
||||
pointers, which can result in "interesting" bugs due to compiler
|
||||
optimizations involving invented loads and perhaps also load tearing.
|
||||
For example, suppose someone mistakenly does something like this:
|
||||
|
||||
p = q->rcu_protected_pointer;
|
||||
do_something_with(p->a);
|
||||
do_something_else_with(p->b);
|
||||
|
||||
If register pressure is high, the compiler might optimize "p" out
|
||||
of existence, transforming the code to something like this:
|
||||
|
||||
do_something_with(q->rcu_protected_pointer->a);
|
||||
do_something_else_with(q->rcu_protected_pointer->b);
|
||||
|
||||
This could fatally disappoint your code if q->rcu_protected_pointer
|
||||
changed in the meantime. Nor is this a theoretical problem: Exactly
|
||||
this sort of bug cost Paul E. McKenney (and several of his innocent
|
||||
colleagues) a three-day weekend back in the early 1990s.
|
||||
|
||||
Load tearing could of course result in dereferencing a mashup of a pair
|
||||
of pointers, which also might fatally disappoint your code.
|
||||
|
||||
These problems could have been avoided simply by making the code instead
|
||||
read as follows:
|
||||
|
||||
p = rcu_dereference(q->rcu_protected_pointer);
|
||||
do_something_with(p->a);
|
||||
do_something_else_with(p->b);
|
||||
|
||||
Unfortunately, these sorts of bugs can be extremely hard to spot during
|
||||
review. This is where the sparse tool comes into play, along with the
|
||||
"__rcu" marker. If you mark a pointer declaration, whether in a structure
|
||||
or as a formal parameter, with "__rcu", which tells sparse to complain if
|
||||
this pointer is accessed directly. It will also cause sparse to complain
|
||||
if a pointer not marked with "__rcu" is accessed using rcu_dereference()
|
||||
and friends. For example, ->rcu_protected_pointer might be declared as
|
||||
follows:
|
||||
|
||||
struct foo __rcu *rcu_protected_pointer;
|
||||
|
||||
Use of "__rcu" is opt-in. If you choose not to use it, then you should
|
||||
ignore the sparse warnings.
|
||||
|
|
|
@ -83,16 +83,15 @@ Pseudo-code using rcu_barrier() is as follows:
|
|||
2. Execute rcu_barrier().
|
||||
3. Allow the module to be unloaded.
|
||||
|
||||
There are also rcu_barrier_bh(), rcu_barrier_sched(), and srcu_barrier()
|
||||
functions for the other flavors of RCU, and you of course must match
|
||||
the flavor of rcu_barrier() with that of call_rcu(). If your module
|
||||
uses multiple flavors of call_rcu(), then it must also use multiple
|
||||
There is also an srcu_barrier() function for SRCU, and you of course
|
||||
must match the flavor of rcu_barrier() with that of call_rcu(). If your
|
||||
module uses multiple flavors of call_rcu(), then it must also use multiple
|
||||
flavors of rcu_barrier() when unloading that module. For example, if
|
||||
it uses call_rcu_bh(), call_srcu() on srcu_struct_1, and call_srcu() on
|
||||
it uses call_rcu(), call_srcu() on srcu_struct_1, and call_srcu() on
|
||||
srcu_struct_2(), then the following three lines of code will be required
|
||||
when unloading:
|
||||
|
||||
1 rcu_barrier_bh();
|
||||
1 rcu_barrier();
|
||||
2 srcu_barrier(&srcu_struct_1);
|
||||
3 srcu_barrier(&srcu_struct_2);
|
||||
|
||||
|
@ -185,12 +184,12 @@ module invokes call_rcu() from timers, you will need to first cancel all
|
|||
the timers, and only then invoke rcu_barrier() to wait for any remaining
|
||||
RCU callbacks to complete.
|
||||
|
||||
Of course, if you module uses call_rcu_bh(), you will need to invoke
|
||||
rcu_barrier_bh() before unloading. Similarly, if your module uses
|
||||
call_rcu_sched(), you will need to invoke rcu_barrier_sched() before
|
||||
unloading. If your module uses call_rcu(), call_rcu_bh(), -and-
|
||||
call_rcu_sched(), then you will need to invoke each of rcu_barrier(),
|
||||
rcu_barrier_bh(), and rcu_barrier_sched().
|
||||
Of course, if you module uses call_rcu(), you will need to invoke
|
||||
rcu_barrier() before unloading. Similarly, if your module uses
|
||||
call_srcu(), you will need to invoke srcu_barrier() before unloading,
|
||||
and on the same srcu_struct structure. If your module uses call_rcu()
|
||||
-and- call_srcu(), then you will need to invoke rcu_barrier() -and-
|
||||
srcu_barrier().
|
||||
|
||||
|
||||
Implementing rcu_barrier()
|
||||
|
@ -223,8 +222,8 @@ shown below. Note that the final "1" in on_each_cpu()'s argument list
|
|||
ensures that all the calls to rcu_barrier_func() will have completed
|
||||
before on_each_cpu() returns. Line 9 then waits for the completion.
|
||||
|
||||
This code was rewritten in 2008 to support rcu_barrier_bh() and
|
||||
rcu_barrier_sched() in addition to the original rcu_barrier().
|
||||
This code was rewritten in 2008 and several times thereafter, but this
|
||||
still gives the general idea.
|
||||
|
||||
The rcu_barrier_func() runs on each CPU, where it invokes call_rcu()
|
||||
to post an RCU callback, as follows:
|
||||
|
|
|
@ -310,7 +310,7 @@ reader, updater, and reclaimer.
|
|||
|
||||
|
||||
rcu_assign_pointer()
|
||||
+--------+
|
||||
+--------+
|
||||
+---------------------->| reader |---------+
|
||||
| +--------+ |
|
||||
| | |
|
||||
|
@ -318,12 +318,12 @@ reader, updater, and reclaimer.
|
|||
| | | rcu_read_lock()
|
||||
| | | rcu_read_unlock()
|
||||
| rcu_dereference() | |
|
||||
+---------+ | |
|
||||
| updater |<---------------------+ |
|
||||
+---------+ V
|
||||
+---------+ | |
|
||||
| updater |<----------------+ |
|
||||
+---------+ V
|
||||
| +-----------+
|
||||
+----------------------------------->| reclaimer |
|
||||
+-----------+
|
||||
+-----------+
|
||||
Defer:
|
||||
synchronize_rcu() & call_rcu()
|
||||
|
||||
|
|
|
@ -56,12 +56,12 @@ situation from a state where some tasks are stalled but the CPU is
|
|||
still doing productive work. As such, time spent in this subset of the
|
||||
stall state is tracked separately and exported in the "full" averages.
|
||||
|
||||
The ratios are tracked as recent trends over ten, sixty, and three
|
||||
hundred second windows, which gives insight into short term events as
|
||||
well as medium and long term trends. The total absolute stall time is
|
||||
tracked and exported as well, to allow detection of latency spikes
|
||||
which wouldn't necessarily make a dent in the time averages, or to
|
||||
average trends over custom time frames.
|
||||
The ratios (in %) are tracked as recent trends over ten, sixty, and
|
||||
three hundred second windows, which gives insight into short term events
|
||||
as well as medium and long term trends. The total absolute stall time
|
||||
(in us) is tracked and exported as well, to allow detection of latency
|
||||
spikes which wouldn't necessarily make a dent in the time averages,
|
||||
or to average trends over custom time frames.
|
||||
|
||||
Cgroup2 interface
|
||||
=================
|
||||
|
|
|
@ -1,66 +0,0 @@
|
|||
The AML Debugger
|
||||
|
||||
Copyright (C) 2016, Intel Corporation
|
||||
Author: Lv Zheng <lv.zheng@intel.com>
|
||||
|
||||
|
||||
This document describes the usage of the AML debugger embedded in the Linux
|
||||
kernel.
|
||||
|
||||
1. Build the debugger
|
||||
|
||||
The following kernel configuration items are required to enable the AML
|
||||
debugger interface from the Linux kernel:
|
||||
|
||||
CONFIG_ACPI_DEBUGGER=y
|
||||
CONFIG_ACPI_DEBUGGER_USER=m
|
||||
|
||||
The userspace utilities can be built from the kernel source tree using
|
||||
the following commands:
|
||||
|
||||
$ cd tools
|
||||
$ make acpi
|
||||
|
||||
The resultant userspace tool binary is then located at:
|
||||
|
||||
tools/power/acpi/acpidbg
|
||||
|
||||
It can be installed to system directories by running "make install" (as a
|
||||
sufficiently privileged user).
|
||||
|
||||
2. Start the userspace debugger interface
|
||||
|
||||
After booting the kernel with the debugger built-in, the debugger can be
|
||||
started by using the following commands:
|
||||
|
||||
# mount -t debugfs none /sys/kernel/debug
|
||||
# modprobe acpi_dbg
|
||||
# tools/power/acpi/acpidbg
|
||||
|
||||
That spawns the interactive AML debugger environment where you can execute
|
||||
debugger commands.
|
||||
|
||||
The commands are documented in the "ACPICA Overview and Programmer Reference"
|
||||
that can be downloaded from
|
||||
|
||||
https://acpica.org/documentation
|
||||
|
||||
The detailed debugger commands reference is located in Chapter 12 "ACPICA
|
||||
Debugger Reference". The "help" command can be used for a quick reference.
|
||||
|
||||
3. Stop the userspace debugger interface
|
||||
|
||||
The interactive debugger interface can be closed by pressing Ctrl+C or using
|
||||
the "quit" or "exit" commands. When finished, unload the module with:
|
||||
|
||||
# rmmod acpi_dbg
|
||||
|
||||
The module unloading may fail if there is an acpidbg instance running.
|
||||
|
||||
4. Run the debugger in a script
|
||||
|
||||
It may be useful to run the AML debugger in a test script. "acpidbg" supports
|
||||
this in a special "batch" mode. For example, the following command outputs
|
||||
the entire ACPI namespace:
|
||||
|
||||
# acpidbg -b "namespace"
|
|
@ -1,147 +0,0 @@
|
|||
APEI output format
|
||||
~~~~~~~~~~~~~~~~~~
|
||||
|
||||
APEI uses printk as hardware error reporting interface, the output
|
||||
format is as follow.
|
||||
|
||||
<error record> :=
|
||||
APEI generic hardware error status
|
||||
severity: <integer>, <severity string>
|
||||
section: <integer>, severity: <integer>, <severity string>
|
||||
flags: <integer>
|
||||
<section flags strings>
|
||||
fru_id: <uuid string>
|
||||
fru_text: <string>
|
||||
section_type: <section type string>
|
||||
<section data>
|
||||
|
||||
<severity string>* := recoverable | fatal | corrected | info
|
||||
|
||||
<section flags strings># :=
|
||||
[primary][, containment warning][, reset][, threshold exceeded]\
|
||||
[, resource not accessible][, latent error]
|
||||
|
||||
<section type string> := generic processor error | memory error | \
|
||||
PCIe error | unknown, <uuid string>
|
||||
|
||||
<section data> :=
|
||||
<generic processor section data> | <memory section data> | \
|
||||
<pcie section data> | <null>
|
||||
|
||||
<generic processor section data> :=
|
||||
[processor_type: <integer>, <proc type string>]
|
||||
[processor_isa: <integer>, <proc isa string>]
|
||||
[error_type: <integer>
|
||||
<proc error type strings>]
|
||||
[operation: <integer>, <proc operation string>]
|
||||
[flags: <integer>
|
||||
<proc flags strings>]
|
||||
[level: <integer>]
|
||||
[version_info: <integer>]
|
||||
[processor_id: <integer>]
|
||||
[target_address: <integer>]
|
||||
[requestor_id: <integer>]
|
||||
[responder_id: <integer>]
|
||||
[IP: <integer>]
|
||||
|
||||
<proc type string>* := IA32/X64 | IA64
|
||||
|
||||
<proc isa string>* := IA32 | IA64 | X64
|
||||
|
||||
<processor error type strings># :=
|
||||
[cache error][, TLB error][, bus error][, micro-architectural error]
|
||||
|
||||
<proc operation string>* := unknown or generic | data read | data write | \
|
||||
instruction execution
|
||||
|
||||
<proc flags strings># :=
|
||||
[restartable][, precise IP][, overflow][, corrected]
|
||||
|
||||
<memory section data> :=
|
||||
[error_status: <integer>]
|
||||
[physical_address: <integer>]
|
||||
[physical_address_mask: <integer>]
|
||||
[node: <integer>]
|
||||
[card: <integer>]
|
||||
[module: <integer>]
|
||||
[bank: <integer>]
|
||||
[device: <integer>]
|
||||
[row: <integer>]
|
||||
[column: <integer>]
|
||||
[bit_position: <integer>]
|
||||
[requestor_id: <integer>]
|
||||
[responder_id: <integer>]
|
||||
[target_id: <integer>]
|
||||
[error_type: <integer>, <mem error type string>]
|
||||
|
||||
<mem error type string>* :=
|
||||
unknown | no error | single-bit ECC | multi-bit ECC | \
|
||||
single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \
|
||||
target abort | parity error | watchdog timeout | invalid address | \
|
||||
mirror Broken | memory sparing | scrub corrected error | \
|
||||
scrub uncorrected error
|
||||
|
||||
<pcie section data> :=
|
||||
[port_type: <integer>, <pcie port type string>]
|
||||
[version: <integer>.<integer>]
|
||||
[command: <integer>, status: <integer>]
|
||||
[device_id: <integer>:<integer>:<integer>.<integer>
|
||||
slot: <integer>
|
||||
secondary_bus: <integer>
|
||||
vendor_id: <integer>, device_id: <integer>
|
||||
class_code: <integer>]
|
||||
[serial number: <integer>, <integer>]
|
||||
[bridge: secondary_status: <integer>, control: <integer>]
|
||||
[aer_status: <integer>, aer_mask: <integer>
|
||||
<aer status string>
|
||||
[aer_uncor_severity: <integer>]
|
||||
aer_layer=<aer layer string>, aer_agent=<aer agent string>
|
||||
aer_tlp_header: <integer> <integer> <integer> <integer>]
|
||||
|
||||
<pcie port type string>* := PCIe end point | legacy PCI end point | \
|
||||
unknown | unknown | root port | upstream switch port | \
|
||||
downstream switch port | PCIe to PCI/PCI-X bridge | \
|
||||
PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
|
||||
root complex event collector
|
||||
|
||||
if section severity is fatal or recoverable
|
||||
<aer status string># :=
|
||||
unknown | unknown | unknown | unknown | Data Link Protocol | \
|
||||
unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
|
||||
Poisoned TLP | Flow Control Protocol | Completion Timeout | \
|
||||
Completer Abort | Unexpected Completion | Receiver Overflow | \
|
||||
Malformed TLP | ECRC | Unsupported Request
|
||||
else
|
||||
<aer status string># :=
|
||||
Receiver Error | unknown | unknown | unknown | unknown | unknown | \
|
||||
Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
|
||||
Replay Timer Timeout | Advisory Non-Fatal
|
||||
fi
|
||||
|
||||
<aer layer string> :=
|
||||
Physical Layer | Data Link Layer | Transaction Layer
|
||||
|
||||
<aer agent string> :=
|
||||
Receiver ID | Requester ID | Completer ID | Transmitter ID
|
||||
|
||||
Where, [] designate corresponding content is optional
|
||||
|
||||
All <field string> description with * has the following format:
|
||||
|
||||
field: <integer>, <field string>
|
||||
|
||||
Where value of <integer> should be the position of "string" in <field
|
||||
string> description. Otherwise, <field string> will be "unknown".
|
||||
|
||||
All <field strings> description with # has the following format:
|
||||
|
||||
field: <integer>
|
||||
<field strings>
|
||||
|
||||
Where each string in <fields strings> corresponding to one set bit of
|
||||
<integer>. The bit position is the position of "string" in <field
|
||||
strings> description.
|
||||
|
||||
For more detailed explanation of every field, please refer to UEFI
|
||||
specification version 2.3 or later, section Appendix N: Common
|
||||
Platform Error Record.
|
|
@ -1,58 +0,0 @@
|
|||
ACPI I2C Muxes
|
||||
--------------
|
||||
|
||||
Describing an I2C device hierarchy that includes I2C muxes requires an ACPI
|
||||
Device () scope per mux channel.
|
||||
|
||||
Consider this topology:
|
||||
|
||||
+------+ +------+
|
||||
| SMB1 |-->| MUX0 |--CH00--> i2c client A (0x50)
|
||||
| | | 0x70 |--CH01--> i2c client B (0x50)
|
||||
+------+ +------+
|
||||
|
||||
which corresponds to the following ASL:
|
||||
|
||||
Device (SMB1)
|
||||
{
|
||||
Name (_HID, ...)
|
||||
Device (MUX0)
|
||||
{
|
||||
Name (_HID, ...)
|
||||
Name (_CRS, ResourceTemplate () {
|
||||
I2cSerialBus (0x70, ControllerInitiated, I2C_SPEED,
|
||||
AddressingMode7Bit, "^SMB1", 0x00,
|
||||
ResourceConsumer,,)
|
||||
}
|
||||
|
||||
Device (CH00)
|
||||
{
|
||||
Name (_ADR, 0)
|
||||
|
||||
Device (CLIA)
|
||||
{
|
||||
Name (_HID, ...)
|
||||
Name (_CRS, ResourceTemplate () {
|
||||
I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
|
||||
AddressingMode7Bit, "^CH00", 0x00,
|
||||
ResourceConsumer,,)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Device (CH01)
|
||||
{
|
||||
Name (_ADR, 1)
|
||||
|
||||
Device (CLIB)
|
||||
{
|
||||
Name (_HID, ...)
|
||||
Name (_CRS, ResourceTemplate () {
|
||||
I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
|
||||
AddressingMode7Bit, "^CH01", 0x00,
|
||||
ResourceConsumer,,)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -1,111 +0,0 @@
|
|||
Upgrading ACPI tables via initrd
|
||||
================================
|
||||
|
||||
1) Introduction (What is this about)
|
||||
2) What is this for
|
||||
3) How does it work
|
||||
4) References (Where to retrieve userspace tools)
|
||||
|
||||
1) What is this about
|
||||
---------------------
|
||||
|
||||
If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
|
||||
upgrade the ACPI execution environment that is defined by the ACPI tables
|
||||
via upgrading the ACPI tables provided by the BIOS with an instrumented,
|
||||
modified, more recent version one, or installing brand new ACPI tables.
|
||||
|
||||
When building initrd with kernel in a single image, option
|
||||
ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this
|
||||
feature to work.
|
||||
|
||||
For a full list of ACPI tables that can be upgraded/installed, take a look
|
||||
at the char *table_sigs[MAX_ACPI_SIGNATURE]; definition in
|
||||
drivers/acpi/tables.c.
|
||||
All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
|
||||
be overridable, except:
|
||||
- ACPI_SIG_RSDP (has a signature of 6 bytes)
|
||||
- ACPI_SIG_FACS (does not have an ordinary ACPI table header)
|
||||
Both could get implemented as well.
|
||||
|
||||
|
||||
2) What is this for
|
||||
-------------------
|
||||
|
||||
Complain to your platform/BIOS vendor if you find a bug which is so severe
|
||||
that a workaround is not accepted in the Linux kernel. And this facility
|
||||
allows you to upgrade the buggy tables before your platform/BIOS vendor
|
||||
releases an upgraded BIOS binary.
|
||||
|
||||
This facility can be used by platform/BIOS vendors to provide a Linux
|
||||
compatible environment without modifying the underlying platform firmware.
|
||||
|
||||
This facility also provides a powerful feature to easily debug and test
|
||||
ACPI BIOS table compatibility with the Linux kernel by modifying old
|
||||
platform provided ACPI tables or inserting new ACPI tables.
|
||||
|
||||
It can and should be enabled in any kernel because there is no functional
|
||||
change with not instrumented initrds.
|
||||
|
||||
|
||||
3) How does it work
|
||||
-------------------
|
||||
|
||||
# Extract the machine's ACPI tables:
|
||||
cd /tmp
|
||||
acpidump >acpidump
|
||||
acpixtract -a acpidump
|
||||
# Disassemble, modify and recompile them:
|
||||
iasl -d *.dat
|
||||
# For example add this statement into a _PRT (PCI Routing Table) function
|
||||
# of the DSDT:
|
||||
Store("HELLO WORLD", debug)
|
||||
# And increase the OEM Revision. For example, before modification:
|
||||
DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
|
||||
# After modification:
|
||||
DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
|
||||
iasl -sa dsdt.dsl
|
||||
# Add the raw ACPI tables to an uncompressed cpio archive.
|
||||
# They must be put into a /kernel/firmware/acpi directory inside the cpio
|
||||
# archive. Note that if the table put here matches a platform table
|
||||
# (similar Table Signature, and similar OEMID, and similar OEM Table ID)
|
||||
# with a more recent OEM Revision, the platform table will be upgraded by
|
||||
# this table. If the table put here doesn't match a platform table
|
||||
# (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
|
||||
# ID), this table will be appended.
|
||||
mkdir -p kernel/firmware/acpi
|
||||
cp dsdt.aml kernel/firmware/acpi
|
||||
# A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
|
||||
# (see osl.c):
|
||||
iasl -sa facp.dsl
|
||||
iasl -sa ssdt1.dsl
|
||||
cp facp.aml kernel/firmware/acpi
|
||||
cp ssdt1.aml kernel/firmware/acpi
|
||||
# The uncompressed cpio archive must be the first. Other, typically
|
||||
# compressed cpio archives, must be concatenated on top of the uncompressed
|
||||
# one. Following command creates the uncompressed cpio archive and
|
||||
# concatenates the original initrd on top:
|
||||
find kernel | cpio -H newc --create > /boot/instrumented_initrd
|
||||
cat /boot/initrd >>/boot/instrumented_initrd
|
||||
# reboot with increased acpi debug level, e.g. boot params:
|
||||
acpi.debug_level=0x2 acpi.debug_layer=0xFFFFFFFF
|
||||
# and check your syslog:
|
||||
[ 1.268089] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
|
||||
[ 1.272091] [ACPI Debug] String [0x0B] "HELLO WORLD"
|
||||
|
||||
iasl is able to disassemble and recompile quite a lot different,
|
||||
also static ACPI tables.
|
||||
|
||||
|
||||
4) Where to retrieve userspace tools
|
||||
------------------------------------
|
||||
|
||||
iasl and acpixtract are part of Intel's ACPICA project:
|
||||
http://acpica.org/
|
||||
and should be packaged by distributions (for example in the acpica package
|
||||
on SUSE).
|
||||
|
||||
acpidump can be found in Len Browns pmtools:
|
||||
ftp://kernel.org/pub/linux/kernel/people/lenb/acpi/utils/pmtools/acpidump
|
||||
This tool is also part of the acpica package on SUSE.
|
||||
Alternatively, used ACPI tables can be retrieved via sysfs in latest kernels:
|
||||
/sys/firmware/acpi/tables
|
|
@ -1,73 +0,0 @@
|
|||
Linux ACPI Custom Control Method How To
|
||||
=======================================
|
||||
|
||||
Written by Zhang Rui <rui.zhang@intel.com>
|
||||
|
||||
|
||||
Linux supports customizing ACPI control methods at runtime.
|
||||
|
||||
Users can use this to
|
||||
1. override an existing method which may not work correctly,
|
||||
or just for debugging purposes.
|
||||
2. insert a completely new method in order to create a missing
|
||||
method such as _OFF, _ON, _STA, _INI, etc.
|
||||
For these cases, it is far simpler to dynamically install a single
|
||||
control method rather than override the entire DSDT, because kernel
|
||||
rebuild/reboot is not needed and test result can be got in minutes.
|
||||
|
||||
Note: Only ACPI METHOD can be overridden, any other object types like
|
||||
"Device", "OperationRegion", are not recognized. Methods
|
||||
declared inside scope operators are also not supported.
|
||||
Note: The same ACPI control method can be overridden for many times,
|
||||
and it's always the latest one that used by Linux/kernel.
|
||||
Note: To get the ACPI debug object output (Store (AAAA, Debug)),
|
||||
please run "echo 1 > /sys/module/acpi/parameters/aml_debug_output".
|
||||
|
||||
1. override an existing method
|
||||
a) get the ACPI table via ACPI sysfs I/F. e.g. to get the DSDT,
|
||||
just run "cat /sys/firmware/acpi/tables/DSDT > /tmp/dsdt.dat"
|
||||
b) disassemble the table by running "iasl -d dsdt.dat".
|
||||
c) rewrite the ASL code of the method and save it in a new file,
|
||||
d) package the new file (psr.asl) to an ACPI table format.
|
||||
Here is an example of a customized \_SB._AC._PSR method,
|
||||
|
||||
DefinitionBlock ("", "SSDT", 1, "", "", 0x20080715)
|
||||
{
|
||||
Method (\_SB_.AC._PSR, 0, NotSerialized)
|
||||
{
|
||||
Store ("In AC _PSR", Debug)
|
||||
Return (ACON)
|
||||
}
|
||||
}
|
||||
Note that the full pathname of the method in ACPI namespace
|
||||
should be used.
|
||||
e) assemble the file to generate the AML code of the method.
|
||||
e.g. "iasl -vw 6084 psr.asl" (psr.aml is generated as a result)
|
||||
If parameter "-vw 6084" is not supported by your iASL compiler,
|
||||
please try a newer version.
|
||||
f) mount debugfs by "mount -t debugfs none /sys/kernel/debug"
|
||||
g) override the old method via the debugfs by running
|
||||
"cat /tmp/psr.aml > /sys/kernel/debug/acpi/custom_method"
|
||||
|
||||
2. insert a new method
|
||||
This is easier than overriding an existing method.
|
||||
We just need to create the ASL code of the method we want to
|
||||
insert and then follow the step c) ~ g) in section 1.
|
||||
|
||||
3. undo your changes
|
||||
The "undo" operation is not supported for a new inserted method
|
||||
right now, i.e. we can not remove a method currently.
|
||||
For an overridden method, in order to undo your changes, please
|
||||
save a copy of the method original ASL code in step c) section 1,
|
||||
and redo step c) ~ g) to override the method with the original one.
|
||||
|
||||
|
||||
Note: We can use a kernel with multiple custom ACPI method running,
|
||||
But each individual write to debugfs can implement a SINGLE
|
||||
method override. i.e. if we want to insert/override multiple
|
||||
ACPI methods, we need to redo step c) ~ g) for multiple times.
|
||||
|
||||
Note: Be aware that root can mis-use this driver to modify arbitrary
|
||||
memory and gain additional rights, if root's privileges got
|
||||
restricted (for example if root is not allowed to load additional
|
||||
modules after boot).
|
|
@ -1,192 +0,0 @@
|
|||
ACPICA Trace Facility
|
||||
|
||||
Copyright (C) 2015, Intel Corporation
|
||||
Author: Lv Zheng <lv.zheng@intel.com>
|
||||
|
||||
|
||||
Abstract:
|
||||
|
||||
This document describes the functions and the interfaces of the method
|
||||
tracing facility.
|
||||
|
||||
1. Functionalities and usage examples:
|
||||
|
||||
ACPICA provides method tracing capability. And two functions are
|
||||
currently implemented using this capability.
|
||||
|
||||
A. Log reducer
|
||||
ACPICA subsystem provides debugging outputs when CONFIG_ACPI_DEBUG is
|
||||
enabled. The debugging messages which are deployed via
|
||||
ACPI_DEBUG_PRINT() macro can be reduced at 2 levels - per-component
|
||||
level (known as debug layer, configured via
|
||||
/sys/module/acpi/parameters/debug_layer) and per-type level (known as
|
||||
debug level, configured via /sys/module/acpi/parameters/debug_level).
|
||||
|
||||
But when the particular layer/level is applied to the control method
|
||||
evaluations, the quantity of the debugging outputs may still be too
|
||||
large to be put into the kernel log buffer. The idea thus is worked out
|
||||
to only enable the particular debug layer/level (normally more detailed)
|
||||
logs when the control method evaluation is started, and disable the
|
||||
detailed logging when the control method evaluation is stopped.
|
||||
|
||||
The following command examples illustrate the usage of the "log reducer"
|
||||
functionality:
|
||||
a. Filter out the debug layer/level matched logs when control methods
|
||||
are being evaluated:
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0xXXXXXXXX" > trace_debug_layer
|
||||
# echo "0xYYYYYYYY" > trace_debug_level
|
||||
# echo "enable" > trace_state
|
||||
b. Filter out the debug layer/level matched logs when the specified
|
||||
control method is being evaluated:
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0xXXXXXXXX" > trace_debug_layer
|
||||
# echo "0xYYYYYYYY" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "method" > /sys/module/acpi/parameters/trace_state
|
||||
c. Filter out the debug layer/level matched logs when the specified
|
||||
control method is being evaluated for the first time:
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0xXXXXXXXX" > trace_debug_layer
|
||||
# echo "0xYYYYYYYY" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "method-once" > /sys/module/acpi/parameters/trace_state
|
||||
Where:
|
||||
0xXXXXXXXX/0xYYYYYYYY: Refer to Documentation/acpi/debug.txt for
|
||||
possible debug layer/level masking values.
|
||||
\PPPP.AAAA.TTTT.HHHH: Full path of a control method that can be found
|
||||
in the ACPI namespace. It needn't be an entry
|
||||
of a control method evaluation.
|
||||
|
||||
B. AML tracer
|
||||
|
||||
There are special log entries added by the method tracing facility at
|
||||
the "trace points" the AML interpreter starts/stops to execute a control
|
||||
method, or an AML opcode. Note that the format of the log entries are
|
||||
subject to change:
|
||||
[ 0.186427] exdebug-0398 ex_trace_point : Method Begin [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
|
||||
[ 0.186630] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905c88:If] execution.
|
||||
[ 0.186820] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905cc0:LEqual] execution.
|
||||
[ 0.187010] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905a20:-NamePath-] execution.
|
||||
[ 0.187214] exdebug-0398 ex_trace_point : Opcode End [0xf5905a20:-NamePath-] execution.
|
||||
[ 0.187407] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905f60:One] execution.
|
||||
[ 0.187594] exdebug-0398 ex_trace_point : Opcode End [0xf5905f60:One] execution.
|
||||
[ 0.187789] exdebug-0398 ex_trace_point : Opcode End [0xf5905cc0:LEqual] execution.
|
||||
[ 0.187980] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905cc0:Return] execution.
|
||||
[ 0.188146] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905f60:One] execution.
|
||||
[ 0.188334] exdebug-0398 ex_trace_point : Opcode End [0xf5905f60:One] execution.
|
||||
[ 0.188524] exdebug-0398 ex_trace_point : Opcode End [0xf5905cc0:Return] execution.
|
||||
[ 0.188712] exdebug-0398 ex_trace_point : Opcode End [0xf5905c88:If] execution.
|
||||
[ 0.188903] exdebug-0398 ex_trace_point : Method End [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
|
||||
|
||||
Developers can utilize these special log entries to track the AML
|
||||
interpretion, thus can aid issue debugging and performance tuning. Note
|
||||
that, as the "AML tracer" logs are implemented via ACPI_DEBUG_PRINT()
|
||||
macro, CONFIG_ACPI_DEBUG is also required to be enabled for enabling
|
||||
"AML tracer" logs.
|
||||
|
||||
The following command examples illustrate the usage of the "AML tracer"
|
||||
functionality:
|
||||
a. Filter out the method start/stop "AML tracer" logs when control
|
||||
methods are being evaluated:
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "enable" > trace_state
|
||||
b. Filter out the method start/stop "AML tracer" when the specified
|
||||
control method is being evaluated:
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "method" > trace_state
|
||||
c. Filter out the method start/stop "AML tracer" logs when the specified
|
||||
control method is being evaluated for the first time:
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "method-once" > trace_state
|
||||
d. Filter out the method/opcode start/stop "AML tracer" when the
|
||||
specified control method is being evaluated:
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "opcode" > trace_state
|
||||
e. Filter out the method/opcode start/stop "AML tracer" when the
|
||||
specified control method is being evaluated for the first time:
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "opcode-opcode" > trace_state
|
||||
|
||||
Note that all above method tracing facility related module parameters can
|
||||
be used as the boot parameters, for example:
|
||||
acpi.trace_debug_layer=0x80 acpi.trace_debug_level=0x10 \
|
||||
acpi.trace_method_name=\_SB.LID0._LID acpi.trace_state=opcode-once
|
||||
|
||||
2. Interface descriptions:
|
||||
|
||||
All method tracing functions can be configured via ACPI module
|
||||
parameters that are accessible at /sys/module/acpi/parameters/:
|
||||
|
||||
trace_method_name
|
||||
The full path of the AML method that the user wants to trace.
|
||||
Note that the full path shouldn't contain the trailing "_"s in its
|
||||
name segments but may contain "\" to form an absolute path.
|
||||
|
||||
trace_debug_layer
|
||||
The temporary debug_layer used when the tracing feature is enabled.
|
||||
Using ACPI_EXECUTER (0x80) by default, which is the debug_layer
|
||||
used to match all "AML tracer" logs.
|
||||
|
||||
trace_debug_level
|
||||
The temporary debug_level used when the tracing feature is enabled.
|
||||
Using ACPI_LV_TRACE_POINT (0x10) by default, which is the
|
||||
debug_level used to match all "AML tracer" logs.
|
||||
|
||||
trace_state
|
||||
The status of the tracing feature.
|
||||
Users can enable/disable this debug tracing feature by executing
|
||||
the following command:
|
||||
# echo string > /sys/module/acpi/parameters/trace_state
|
||||
Where "string" should be one of the following:
|
||||
"disable"
|
||||
Disable the method tracing feature.
|
||||
"enable"
|
||||
Enable the method tracing feature.
|
||||
ACPICA debugging messages matching
|
||||
"trace_debug_layer/trace_debug_level" during any method
|
||||
execution will be logged.
|
||||
"method"
|
||||
Enable the method tracing feature.
|
||||
ACPICA debugging messages matching
|
||||
"trace_debug_layer/trace_debug_level" during method execution
|
||||
of "trace_method_name" will be logged.
|
||||
"method-once"
|
||||
Enable the method tracing feature.
|
||||
ACPICA debugging messages matching
|
||||
"trace_debug_layer/trace_debug_level" during method execution
|
||||
of "trace_method_name" will be logged only once.
|
||||
"opcode"
|
||||
Enable the method tracing feature.
|
||||
ACPICA debugging messages matching
|
||||
"trace_debug_layer/trace_debug_level" during method/opcode
|
||||
execution of "trace_method_name" will be logged.
|
||||
"opcode-once"
|
||||
Enable the method tracing feature.
|
||||
ACPICA debugging messages matching
|
||||
"trace_debug_layer/trace_debug_level" during method/opcode
|
||||
execution of "trace_method_name" will be logged only once.
|
||||
Note that, the difference between the "enable" and other feature
|
||||
enabling options are:
|
||||
1. When "enable" is specified, since
|
||||
"trace_debug_layer/trace_debug_level" shall apply to all control
|
||||
method evaluations, after configuring "trace_state" to "enable",
|
||||
"trace_method_name" will be reset to NULL.
|
||||
2. When "method/opcode" is specified, if
|
||||
"trace_method_name" is NULL when "trace_state" is configured to
|
||||
these options, the "trace_debug_layer/trace_debug_level" will
|
||||
apply to all control method evaluations.
|
|
@ -1,172 +0,0 @@
|
|||
|
||||
In order to support ACPI open-ended hardware configurations (e.g. development
|
||||
boards) we need a way to augment the ACPI configuration provided by the firmware
|
||||
image. A common example is connecting sensors on I2C / SPI buses on development
|
||||
boards.
|
||||
|
||||
Although this can be accomplished by creating a kernel platform driver or
|
||||
recompiling the firmware image with updated ACPI tables, neither is practical:
|
||||
the former proliferates board specific kernel code while the latter requires
|
||||
access to firmware tools which are often not publicly available.
|
||||
|
||||
Because ACPI supports external references in AML code a more practical
|
||||
way to augment firmware ACPI configuration is by dynamically loading
|
||||
user defined SSDT tables that contain the board specific information.
|
||||
|
||||
For example, to enumerate a Bosch BMA222E accelerometer on the I2C bus of the
|
||||
Minnowboard MAX development board exposed via the LSE connector [1], the
|
||||
following ASL code can be used:
|
||||
|
||||
DefinitionBlock ("minnowmax.aml", "SSDT", 1, "Vendor", "Accel", 0x00000003)
|
||||
{
|
||||
External (\_SB.I2C6, DeviceObj)
|
||||
|
||||
Scope (\_SB.I2C6)
|
||||
{
|
||||
Device (STAC)
|
||||
{
|
||||
Name (_ADR, Zero)
|
||||
Name (_HID, "BMA222E")
|
||||
|
||||
Method (_CRS, 0, Serialized)
|
||||
{
|
||||
Name (RBUF, ResourceTemplate ()
|
||||
{
|
||||
I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
|
||||
AddressingMode7Bit, "\\_SB.I2C6", 0x00,
|
||||
ResourceConsumer, ,)
|
||||
GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
|
||||
"\\_SB.GPO2", 0x00, ResourceConsumer, , )
|
||||
{ // Pin list
|
||||
0
|
||||
}
|
||||
})
|
||||
Return (RBUF)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
which can then be compiled to AML binary format:
|
||||
|
||||
$ iasl minnowmax.asl
|
||||
|
||||
Intel ACPI Component Architecture
|
||||
ASL Optimizing Compiler version 20140214-64 [Mar 29 2014]
|
||||
Copyright (c) 2000 - 2014 Intel Corporation
|
||||
|
||||
ASL Input: minnomax.asl - 30 lines, 614 bytes, 7 keywords
|
||||
AML Output: minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
|
||||
|
||||
[1] http://wiki.minnowboard.org/MinnowBoard_MAX#Low_Speed_Expansion_Connector_.28Top.29
|
||||
|
||||
The resulting AML code can then be loaded by the kernel using one of the methods
|
||||
below.
|
||||
|
||||
== Loading ACPI SSDTs from initrd ==
|
||||
|
||||
This option allows loading of user defined SSDTs from initrd and it is useful
|
||||
when the system does not support EFI or when there is not enough EFI storage.
|
||||
|
||||
It works in a similar way with initrd based ACPI tables override/upgrade: SSDT
|
||||
aml code must be placed in the first, uncompressed, initrd under the
|
||||
"kernel/firmware/acpi" path. Multiple files can be used and this will translate
|
||||
in loading multiple tables. Only SSDT and OEM tables are allowed. See
|
||||
initrd_table_override.txt for more details.
|
||||
|
||||
Here is an example:
|
||||
|
||||
# Add the raw ACPI tables to an uncompressed cpio archive.
|
||||
# They must be put into a /kernel/firmware/acpi directory inside the
|
||||
# cpio archive.
|
||||
# The uncompressed cpio archive must be the first.
|
||||
# Other, typically compressed cpio archives, must be
|
||||
# concatenated on top of the uncompressed one.
|
||||
mkdir -p kernel/firmware/acpi
|
||||
cp ssdt.aml kernel/firmware/acpi
|
||||
|
||||
# Create the uncompressed cpio archive and concatenate the original initrd
|
||||
# on top:
|
||||
find kernel | cpio -H newc --create > /boot/instrumented_initrd
|
||||
cat /boot/initrd >>/boot/instrumented_initrd
|
||||
|
||||
== Loading ACPI SSDTs from EFI variables ==
|
||||
|
||||
This is the preferred method, when EFI is supported on the platform, because it
|
||||
allows a persistent, OS independent way of storing the user defined SSDTs. There
|
||||
is also work underway to implement EFI support for loading user defined SSDTs
|
||||
and using this method will make it easier to convert to the EFI loading
|
||||
mechanism when that will arrive.
|
||||
|
||||
In order to load SSDTs from an EFI variable the efivar_ssdt kernel command line
|
||||
parameter can be used. The argument for the option is the variable name to
|
||||
use. If there are multiple variables with the same name but with different
|
||||
vendor GUIDs, all of them will be loaded.
|
||||
|
||||
In order to store the AML code in an EFI variable the efivarfs filesystem can be
|
||||
used. It is enabled and mounted by default in /sys/firmware/efi/efivars in all
|
||||
recent distribution.
|
||||
|
||||
Creating a new file in /sys/firmware/efi/efivars will automatically create a new
|
||||
EFI variable. Updating a file in /sys/firmware/efi/efivars will update the EFI
|
||||
variable. Please note that the file name needs to be specially formatted as
|
||||
"Name-GUID" and that the first 4 bytes in the file (little-endian format)
|
||||
represent the attributes of the EFI variable (see EFI_VARIABLE_MASK in
|
||||
include/linux/efi.h). Writing to the file must also be done with one write
|
||||
operation.
|
||||
|
||||
For example, you can use the following bash script to create/update an EFI
|
||||
variable with the content from a given file:
|
||||
|
||||
#!/bin/sh -e
|
||||
|
||||
while ! [ -z "$1" ]; do
|
||||
case "$1" in
|
||||
"-f") filename="$2"; shift;;
|
||||
"-g") guid="$2"; shift;;
|
||||
*) name="$1";;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "Syntax: ${0##*/} -f filename [ -g guid ] name"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[ -n "$name" -a -f "$filename" ] || usage
|
||||
|
||||
EFIVARFS="/sys/firmware/efi/efivars"
|
||||
|
||||
[ -d "$EFIVARFS" ] || exit 2
|
||||
|
||||
if stat -tf $EFIVARFS | grep -q -v de5e81e4; then
|
||||
mount -t efivarfs none $EFIVARFS
|
||||
fi
|
||||
|
||||
# try to pick up an existing GUID
|
||||
[ -n "$guid" ] || guid=$(find "$EFIVARFS" -name "$name-*" | head -n1 | cut -f2- -d-)
|
||||
|
||||
# use a randomly generated GUID
|
||||
[ -n "$guid" ] || guid="$(cat /proc/sys/kernel/random/uuid)"
|
||||
|
||||
# efivarfs expects all of the data in one write
|
||||
tmp=$(mktemp)
|
||||
/bin/echo -ne "\007\000\000\000" | cat - $filename > $tmp
|
||||
dd if=$tmp of="$EFIVARFS/$name-$guid" bs=$(stat -c %s $tmp)
|
||||
rm $tmp
|
||||
|
||||
== Loading ACPI SSDTs from configfs ==
|
||||
|
||||
This option allows loading of user defined SSDTs from userspace via the configfs
|
||||
interface. The CONFIG_ACPI_CONFIGFS option must be select and configfs must be
|
||||
mounted. In the following examples, we assume that configfs has been mounted in
|
||||
/config.
|
||||
|
||||
New tables can be loading by creating new directories in /config/acpi/table/ and
|
||||
writing the SSDT aml code in the aml attribute:
|
||||
|
||||
cd /config/acpi/table
|
||||
mkdir my_ssdt
|
||||
cat ~/ssdt.aml > my_ssdt/aml
|
|
@ -1,5 +1,11 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
Collaborative Processor Performance Control (CPPC)
|
||||
==================================================
|
||||
Collaborative Processor Performance Control (CPPC)
|
||||
==================================================
|
||||
|
||||
CPPC
|
||||
====
|
||||
|
||||
CPPC defined in the ACPI spec describes a mechanism for the OS to manage the
|
||||
performance of a logical processor on a contigious and abstract performance
|
||||
|
@ -10,31 +16,28 @@ For more details on CPPC please refer to the ACPI specification at:
|
|||
|
||||
http://uefi.org/specifications
|
||||
|
||||
Some of the CPPC registers are exposed via sysfs under:
|
||||
Some of the CPPC registers are exposed via sysfs under::
|
||||
|
||||
/sys/devices/system/cpu/cpuX/acpi_cppc/
|
||||
/sys/devices/system/cpu/cpuX/acpi_cppc/
|
||||
|
||||
for each cpu X
|
||||
for each cpu X::
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
$ ls -lR /sys/devices/system/cpu/cpu0/acpi_cppc/
|
||||
/sys/devices/system/cpu/cpu0/acpi_cppc/:
|
||||
total 0
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 feedback_ctrs
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 highest_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_freq
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_nonlinear_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 nominal_freq
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 nominal_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 reference_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 wraparound_time
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
$ ls -lR /sys/devices/system/cpu/cpu0/acpi_cppc/
|
||||
/sys/devices/system/cpu/cpu0/acpi_cppc/:
|
||||
total 0
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 feedback_ctrs
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 highest_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_freq
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_nonlinear_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 lowest_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 nominal_freq
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 nominal_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 reference_perf
|
||||
-r--r--r-- 1 root root 65536 Mar 5 19:38 wraparound_time
|
||||
|
||||
* highest_perf : Highest performance of this processor (abstract scale).
|
||||
* nominal_perf : Highest sustained performance of this processor (abstract scale).
|
||||
* nominal_perf : Highest sustained performance of this processor
|
||||
(abstract scale).
|
||||
* lowest_nonlinear_perf : Lowest performance of this processor with nonlinear
|
||||
power savings (abstract scale).
|
||||
* lowest_perf : Lowest performance of this processor (abstract scale).
|
||||
|
@ -48,22 +51,26 @@ total 0
|
|||
* feedback_ctrs : Includes both Reference and delivered performance counter.
|
||||
Reference counter ticks up proportional to processor's reference performance.
|
||||
Delivered counter ticks up proportional to processor's delivered performance.
|
||||
* wraparound_time: Minimum time for the feedback counters to wraparound (seconds).
|
||||
* wraparound_time: Minimum time for the feedback counters to wraparound
|
||||
(seconds).
|
||||
* reference_perf : Performance level at which reference performance counter
|
||||
accumulates (abstract scale).
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Computing Average Delivered Performance
|
||||
Computing Average Delivered Performance
|
||||
=======================================
|
||||
|
||||
Below describes the steps to compute the average performance delivered by taking
|
||||
two different snapshots of feedback counters at time T1 and T2.
|
||||
Below describes the steps to compute the average performance delivered by
|
||||
taking two different snapshots of feedback counters at time T1 and T2.
|
||||
|
||||
T1: Read feedback_ctrs as fbc_t1
|
||||
Wait or run some workload
|
||||
T2: Read feedback_ctrs as fbc_t2
|
||||
T1: Read feedback_ctrs as fbc_t1
|
||||
Wait or run some workload
|
||||
|
||||
delivered_counter_delta = fbc_t2[del] - fbc_t1[del]
|
||||
reference_counter_delta = fbc_t2[ref] - fbc_t1[ref]
|
||||
T2: Read feedback_ctrs as fbc_t2
|
||||
|
||||
delivered_perf = (refernce_perf x delivered_counter_delta) / reference_counter_delta
|
||||
::
|
||||
|
||||
delivered_counter_delta = fbc_t2[del] - fbc_t1[del]
|
||||
reference_counter_delta = fbc_t2[ref] - fbc_t1[ref]
|
||||
|
||||
delivered_perf = (refernce_perf x delivered_counter_delta) / reference_counter_delta
|
|
@ -1,6 +1,12 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============
|
||||
Overriding DSDT
|
||||
===============
|
||||
|
||||
Linux supports a method of overriding the BIOS DSDT:
|
||||
|
||||
CONFIG_ACPI_CUSTOM_DSDT builds the image into the kernel.
|
||||
CONFIG_ACPI_CUSTOM_DSDT - builds the image into the kernel.
|
||||
|
||||
When to use this method is described in detail on the
|
||||
Linux/ACPI home page:
|
|
@ -0,0 +1,14 @@
|
|||
============
|
||||
ACPI Support
|
||||
============
|
||||
|
||||
Here we document in detail how to interact with various mechanisms in
|
||||
the Linux ACPI support.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
initrd_table_override
|
||||
dsdt-override
|
||||
ssdt-overlays
|
||||
cppc_sysfs
|
|
@ -0,0 +1,115 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
================================
|
||||
Upgrading ACPI tables via initrd
|
||||
================================
|
||||
|
||||
What is this about
|
||||
==================
|
||||
|
||||
If the ACPI_TABLE_UPGRADE compile option is true, it is possible to
|
||||
upgrade the ACPI execution environment that is defined by the ACPI tables
|
||||
via upgrading the ACPI tables provided by the BIOS with an instrumented,
|
||||
modified, more recent version one, or installing brand new ACPI tables.
|
||||
|
||||
When building initrd with kernel in a single image, option
|
||||
ACPI_TABLE_OVERRIDE_VIA_BUILTIN_INITRD should also be true for this
|
||||
feature to work.
|
||||
|
||||
For a full list of ACPI tables that can be upgraded/installed, take a look
|
||||
at the char `*table_sigs[MAX_ACPI_SIGNATURE];` definition in
|
||||
drivers/acpi/tables.c.
|
||||
|
||||
All ACPI tables iasl (Intel's ACPI compiler and disassembler) knows should
|
||||
be overridable, except:
|
||||
|
||||
- ACPI_SIG_RSDP (has a signature of 6 bytes)
|
||||
- ACPI_SIG_FACS (does not have an ordinary ACPI table header)
|
||||
|
||||
Both could get implemented as well.
|
||||
|
||||
|
||||
What is this for
|
||||
================
|
||||
|
||||
Complain to your platform/BIOS vendor if you find a bug which is so severe
|
||||
that a workaround is not accepted in the Linux kernel. And this facility
|
||||
allows you to upgrade the buggy tables before your platform/BIOS vendor
|
||||
releases an upgraded BIOS binary.
|
||||
|
||||
This facility can be used by platform/BIOS vendors to provide a Linux
|
||||
compatible environment without modifying the underlying platform firmware.
|
||||
|
||||
This facility also provides a powerful feature to easily debug and test
|
||||
ACPI BIOS table compatibility with the Linux kernel by modifying old
|
||||
platform provided ACPI tables or inserting new ACPI tables.
|
||||
|
||||
It can and should be enabled in any kernel because there is no functional
|
||||
change with not instrumented initrds.
|
||||
|
||||
|
||||
How does it work
|
||||
================
|
||||
::
|
||||
|
||||
# Extract the machine's ACPI tables:
|
||||
cd /tmp
|
||||
acpidump >acpidump
|
||||
acpixtract -a acpidump
|
||||
# Disassemble, modify and recompile them:
|
||||
iasl -d *.dat
|
||||
# For example add this statement into a _PRT (PCI Routing Table) function
|
||||
# of the DSDT:
|
||||
Store("HELLO WORLD", debug)
|
||||
# And increase the OEM Revision. For example, before modification:
|
||||
DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000000)
|
||||
# After modification:
|
||||
DefinitionBlock ("DSDT.aml", "DSDT", 2, "INTEL ", "TEMPLATE", 0x00000001)
|
||||
iasl -sa dsdt.dsl
|
||||
# Add the raw ACPI tables to an uncompressed cpio archive.
|
||||
# They must be put into a /kernel/firmware/acpi directory inside the cpio
|
||||
# archive. Note that if the table put here matches a platform table
|
||||
# (similar Table Signature, and similar OEMID, and similar OEM Table ID)
|
||||
# with a more recent OEM Revision, the platform table will be upgraded by
|
||||
# this table. If the table put here doesn't match a platform table
|
||||
# (dissimilar Table Signature, or dissimilar OEMID, or dissimilar OEM Table
|
||||
# ID), this table will be appended.
|
||||
mkdir -p kernel/firmware/acpi
|
||||
cp dsdt.aml kernel/firmware/acpi
|
||||
# A maximum of "NR_ACPI_INITRD_TABLES (64)" tables are currently allowed
|
||||
# (see osl.c):
|
||||
iasl -sa facp.dsl
|
||||
iasl -sa ssdt1.dsl
|
||||
cp facp.aml kernel/firmware/acpi
|
||||
cp ssdt1.aml kernel/firmware/acpi
|
||||
# The uncompressed cpio archive must be the first. Other, typically
|
||||
# compressed cpio archives, must be concatenated on top of the uncompressed
|
||||
# one. Following command creates the uncompressed cpio archive and
|
||||
# concatenates the original initrd on top:
|
||||
find kernel | cpio -H newc --create > /boot/instrumented_initrd
|
||||
cat /boot/initrd >>/boot/instrumented_initrd
|
||||
# reboot with increased acpi debug level, e.g. boot params:
|
||||
acpi.debug_level=0x2 acpi.debug_layer=0xFFFFFFFF
|
||||
# and check your syslog:
|
||||
[ 1.268089] ACPI: PCI Interrupt Routing Table [\_SB_.PCI0._PRT]
|
||||
[ 1.272091] [ACPI Debug] String [0x0B] "HELLO WORLD"
|
||||
|
||||
iasl is able to disassemble and recompile quite a lot different,
|
||||
also static ACPI tables.
|
||||
|
||||
|
||||
Where to retrieve userspace tools
|
||||
=================================
|
||||
|
||||
iasl and acpixtract are part of Intel's ACPICA project:
|
||||
http://acpica.org/
|
||||
|
||||
and should be packaged by distributions (for example in the acpica package
|
||||
on SUSE).
|
||||
|
||||
acpidump can be found in Len Browns pmtools:
|
||||
ftp://kernel.org/pub/linux/kernel/people/lenb/acpi/utils/pmtools/acpidump
|
||||
|
||||
This tool is also part of the acpica package on SUSE.
|
||||
Alternatively, used ACPI tables can be retrieved via sysfs in latest kernels:
|
||||
/sys/firmware/acpi/tables
|
|
@ -0,0 +1,180 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=============
|
||||
SSDT Overlays
|
||||
=============
|
||||
|
||||
In order to support ACPI open-ended hardware configurations (e.g. development
|
||||
boards) we need a way to augment the ACPI configuration provided by the firmware
|
||||
image. A common example is connecting sensors on I2C / SPI buses on development
|
||||
boards.
|
||||
|
||||
Although this can be accomplished by creating a kernel platform driver or
|
||||
recompiling the firmware image with updated ACPI tables, neither is practical:
|
||||
the former proliferates board specific kernel code while the latter requires
|
||||
access to firmware tools which are often not publicly available.
|
||||
|
||||
Because ACPI supports external references in AML code a more practical
|
||||
way to augment firmware ACPI configuration is by dynamically loading
|
||||
user defined SSDT tables that contain the board specific information.
|
||||
|
||||
For example, to enumerate a Bosch BMA222E accelerometer on the I2C bus of the
|
||||
Minnowboard MAX development board exposed via the LSE connector [1], the
|
||||
following ASL code can be used::
|
||||
|
||||
DefinitionBlock ("minnowmax.aml", "SSDT", 1, "Vendor", "Accel", 0x00000003)
|
||||
{
|
||||
External (\_SB.I2C6, DeviceObj)
|
||||
|
||||
Scope (\_SB.I2C6)
|
||||
{
|
||||
Device (STAC)
|
||||
{
|
||||
Name (_ADR, Zero)
|
||||
Name (_HID, "BMA222E")
|
||||
|
||||
Method (_CRS, 0, Serialized)
|
||||
{
|
||||
Name (RBUF, ResourceTemplate ()
|
||||
{
|
||||
I2cSerialBus (0x0018, ControllerInitiated, 0x00061A80,
|
||||
AddressingMode7Bit, "\\_SB.I2C6", 0x00,
|
||||
ResourceConsumer, ,)
|
||||
GpioInt (Edge, ActiveHigh, Exclusive, PullDown, 0x0000,
|
||||
"\\_SB.GPO2", 0x00, ResourceConsumer, , )
|
||||
{ // Pin list
|
||||
0
|
||||
}
|
||||
})
|
||||
Return (RBUF)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
which can then be compiled to AML binary format::
|
||||
|
||||
$ iasl minnowmax.asl
|
||||
|
||||
Intel ACPI Component Architecture
|
||||
ASL Optimizing Compiler version 20140214-64 [Mar 29 2014]
|
||||
Copyright (c) 2000 - 2014 Intel Corporation
|
||||
|
||||
ASL Input: minnomax.asl - 30 lines, 614 bytes, 7 keywords
|
||||
AML Output: minnowmax.aml - 165 bytes, 6 named objects, 1 executable opcodes
|
||||
|
||||
[1] http://wiki.minnowboard.org/MinnowBoard_MAX#Low_Speed_Expansion_Connector_.28Top.29
|
||||
|
||||
The resulting AML code can then be loaded by the kernel using one of the methods
|
||||
below.
|
||||
|
||||
Loading ACPI SSDTs from initrd
|
||||
==============================
|
||||
|
||||
This option allows loading of user defined SSDTs from initrd and it is useful
|
||||
when the system does not support EFI or when there is not enough EFI storage.
|
||||
|
||||
It works in a similar way with initrd based ACPI tables override/upgrade: SSDT
|
||||
aml code must be placed in the first, uncompressed, initrd under the
|
||||
"kernel/firmware/acpi" path. Multiple files can be used and this will translate
|
||||
in loading multiple tables. Only SSDT and OEM tables are allowed. See
|
||||
initrd_table_override.txt for more details.
|
||||
|
||||
Here is an example::
|
||||
|
||||
# Add the raw ACPI tables to an uncompressed cpio archive.
|
||||
# They must be put into a /kernel/firmware/acpi directory inside the
|
||||
# cpio archive.
|
||||
# The uncompressed cpio archive must be the first.
|
||||
# Other, typically compressed cpio archives, must be
|
||||
# concatenated on top of the uncompressed one.
|
||||
mkdir -p kernel/firmware/acpi
|
||||
cp ssdt.aml kernel/firmware/acpi
|
||||
|
||||
# Create the uncompressed cpio archive and concatenate the original initrd
|
||||
# on top:
|
||||
find kernel | cpio -H newc --create > /boot/instrumented_initrd
|
||||
cat /boot/initrd >>/boot/instrumented_initrd
|
||||
|
||||
Loading ACPI SSDTs from EFI variables
|
||||
=====================================
|
||||
|
||||
This is the preferred method, when EFI is supported on the platform, because it
|
||||
allows a persistent, OS independent way of storing the user defined SSDTs. There
|
||||
is also work underway to implement EFI support for loading user defined SSDTs
|
||||
and using this method will make it easier to convert to the EFI loading
|
||||
mechanism when that will arrive.
|
||||
|
||||
In order to load SSDTs from an EFI variable the efivar_ssdt kernel command line
|
||||
parameter can be used. The argument for the option is the variable name to
|
||||
use. If there are multiple variables with the same name but with different
|
||||
vendor GUIDs, all of them will be loaded.
|
||||
|
||||
In order to store the AML code in an EFI variable the efivarfs filesystem can be
|
||||
used. It is enabled and mounted by default in /sys/firmware/efi/efivars in all
|
||||
recent distribution.
|
||||
|
||||
Creating a new file in /sys/firmware/efi/efivars will automatically create a new
|
||||
EFI variable. Updating a file in /sys/firmware/efi/efivars will update the EFI
|
||||
variable. Please note that the file name needs to be specially formatted as
|
||||
"Name-GUID" and that the first 4 bytes in the file (little-endian format)
|
||||
represent the attributes of the EFI variable (see EFI_VARIABLE_MASK in
|
||||
include/linux/efi.h). Writing to the file must also be done with one write
|
||||
operation.
|
||||
|
||||
For example, you can use the following bash script to create/update an EFI
|
||||
variable with the content from a given file::
|
||||
|
||||
#!/bin/sh -e
|
||||
|
||||
while ! [ -z "$1" ]; do
|
||||
case "$1" in
|
||||
"-f") filename="$2"; shift;;
|
||||
"-g") guid="$2"; shift;;
|
||||
*) name="$1";;
|
||||
esac
|
||||
shift
|
||||
done
|
||||
|
||||
usage()
|
||||
{
|
||||
echo "Syntax: ${0##*/} -f filename [ -g guid ] name"
|
||||
exit 1
|
||||
}
|
||||
|
||||
[ -n "$name" -a -f "$filename" ] || usage
|
||||
|
||||
EFIVARFS="/sys/firmware/efi/efivars"
|
||||
|
||||
[ -d "$EFIVARFS" ] || exit 2
|
||||
|
||||
if stat -tf $EFIVARFS | grep -q -v de5e81e4; then
|
||||
mount -t efivarfs none $EFIVARFS
|
||||
fi
|
||||
|
||||
# try to pick up an existing GUID
|
||||
[ -n "$guid" ] || guid=$(find "$EFIVARFS" -name "$name-*" | head -n1 | cut -f2- -d-)
|
||||
|
||||
# use a randomly generated GUID
|
||||
[ -n "$guid" ] || guid="$(cat /proc/sys/kernel/random/uuid)"
|
||||
|
||||
# efivarfs expects all of the data in one write
|
||||
tmp=$(mktemp)
|
||||
/bin/echo -ne "\007\000\000\000" | cat - $filename > $tmp
|
||||
dd if=$tmp of="$EFIVARFS/$name-$guid" bs=$(stat -c %s $tmp)
|
||||
rm $tmp
|
||||
|
||||
Loading ACPI SSDTs from configfs
|
||||
================================
|
||||
|
||||
This option allows loading of user defined SSDTs from userspace via the configfs
|
||||
interface. The CONFIG_ACPI_CONFIGFS option must be select and configfs must be
|
||||
mounted. In the following examples, we assume that configfs has been mounted in
|
||||
/config.
|
||||
|
||||
New tables can be loading by creating new directories in /config/acpi/table/ and
|
||||
writing the SSDT aml code in the aml attribute::
|
||||
|
||||
cd /config/acpi/table
|
||||
mkdir my_ssdt
|
||||
cat ~/ssdt.aml > my_ssdt/aml
|
|
@ -77,6 +77,7 @@ configure specific aspects of kernel behavior to your liking.
|
|||
LSM/index
|
||||
mm/index
|
||||
perf-security
|
||||
acpi/index
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
|
|
@ -88,6 +88,7 @@ parameter is applicable::
|
|||
APIC APIC support is enabled.
|
||||
APM Advanced Power Management support is enabled.
|
||||
ARM ARM architecture is enabled.
|
||||
ARM64 ARM64 architecture is enabled.
|
||||
AX25 Appropriate AX.25 support is enabled.
|
||||
CLK Common clock infrastructure is enabled.
|
||||
CMA Contiguous Memory Area support is enabled.
|
||||
|
|
|
@ -704,8 +704,11 @@
|
|||
upon panic. This parameter reserves the physical
|
||||
memory region [offset, offset + size] for that kernel
|
||||
image. If '@offset' is omitted, then a suitable offset
|
||||
is selected automatically. Check
|
||||
Documentation/kdump/kdump.txt for further details.
|
||||
is selected automatically.
|
||||
[KNL, x86_64] select a region under 4G first, and
|
||||
fall back to reserve region above 4G when '@offset'
|
||||
hasn't been specified.
|
||||
See Documentation/kdump/kdump.txt for further details.
|
||||
|
||||
crashkernel=range1:size1[,range2:size2,...][@offset]
|
||||
[KNL] Same as above, but depends on the memory
|
||||
|
@ -2544,6 +2547,40 @@
|
|||
in the "bleeding edge" mini2440 support kernel at
|
||||
http://repo.or.cz/w/linux-2.6/mini2440.git
|
||||
|
||||
mitigations=
|
||||
[X86,PPC,S390,ARM64] Control optional mitigations for
|
||||
CPU vulnerabilities. This is a set of curated,
|
||||
arch-independent options, each of which is an
|
||||
aggregation of existing arch-specific options.
|
||||
|
||||
off
|
||||
Disable all optional CPU mitigations. This
|
||||
improves system performance, but it may also
|
||||
expose users to several CPU vulnerabilities.
|
||||
Equivalent to: nopti [X86,PPC]
|
||||
kpti=0 [ARM64]
|
||||
nospectre_v1 [PPC]
|
||||
nobp=0 [S390]
|
||||
nospectre_v2 [X86,PPC,S390,ARM64]
|
||||
spectre_v2_user=off [X86]
|
||||
spec_store_bypass_disable=off [X86,PPC]
|
||||
ssbd=force-off [ARM64]
|
||||
l1tf=off [X86]
|
||||
|
||||
auto (default)
|
||||
Mitigate all CPU vulnerabilities, but leave SMT
|
||||
enabled, even if it's vulnerable. This is for
|
||||
users who don't want to be surprised by SMT
|
||||
getting disabled across kernel upgrades, or who
|
||||
have other ways of avoiding SMT-based attacks.
|
||||
Equivalent to: (default behavior)
|
||||
|
||||
auto,nosmt
|
||||
Mitigate all CPU vulnerabilities, disabling SMT
|
||||
if needed. This is for users who always want to
|
||||
be fully mitigated, even if it means losing SMT.
|
||||
Equivalent to: l1tf=flush,nosmt [X86]
|
||||
|
||||
mminit_loglevel=
|
||||
[KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this
|
||||
parameter allows control of the logging verbosity for
|
||||
|
@ -2873,10 +2910,10 @@
|
|||
check bypass). With this option data leaks are possible
|
||||
in the system.
|
||||
|
||||
nospectre_v2 [X86,PPC_FSL_BOOK3E] Disable all mitigations for the Spectre variant 2
|
||||
(indirect branch prediction) vulnerability. System may
|
||||
allow data leaks with this option, which is equivalent
|
||||
to spectre_v2=off.
|
||||
nospectre_v2 [X86,PPC_FSL_BOOK3E,ARM64] Disable all mitigations for
|
||||
the Spectre variant 2 (indirect branch prediction)
|
||||
vulnerability. System may allow data leaks with this
|
||||
option.
|
||||
|
||||
nospec_store_bypass_disable
|
||||
[HW] Disable all mitigations for the Speculative Store Bypass vulnerability
|
||||
|
@ -3394,6 +3431,8 @@
|
|||
bridges without forcing it upstream. Note:
|
||||
this removes isolation between devices and
|
||||
may put more devices in an IOMMU group.
|
||||
force_floating [S390] Force usage of floating interrupts.
|
||||
nomio [S390] Do not use MIO instructions.
|
||||
|
||||
pcie_aspm= [PCIE] Forcibly enable or disable PCIe Active State Power
|
||||
Management.
|
||||
|
@ -3623,7 +3662,9 @@
|
|||
see CONFIG_RAS_CEC help text.
|
||||
|
||||
rcu_nocbs= [KNL]
|
||||
The argument is a cpu list, as described above.
|
||||
The argument is a cpu list, as described above,
|
||||
except that the string "all" can be used to
|
||||
specify every CPU on the system.
|
||||
|
||||
In kernels built with CONFIG_RCU_NOCB_CPU=y, set
|
||||
the specified list of CPUs to be no-callback CPUs.
|
||||
|
@ -4703,6 +4744,10 @@
|
|||
[x86] unstable: mark the TSC clocksource as unstable, this
|
||||
marks the TSC unconditionally unstable at bootup and
|
||||
avoids any further wobbles once the TSC watchdog notices.
|
||||
[x86] nowatchdog: disable clocksource watchdog. Used
|
||||
in situations with strict latency requirements (where
|
||||
interruptions from clocksource watchdog are not
|
||||
acceptable).
|
||||
|
||||
turbografx.map[2|3]= [HW,JOY]
|
||||
TurboGraFX parallel port interface
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
.. |struct cpufreq_policy| replace:: :c:type:`struct cpufreq_policy <cpufreq_policy>`
|
||||
.. |intel_pstate| replace:: :doc:`intel_pstate <intel_pstate>`
|
||||
|
||||
|
@ -5,9 +8,10 @@
|
|||
CPU Performance Scaling
|
||||
=======================
|
||||
|
||||
::
|
||||
:Copyright: |copy| 2017 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
The Concept of CPU Performance Scaling
|
||||
======================================
|
||||
|
@ -396,8 +400,8 @@ RT or deadline scheduling classes, the governor will increase the frequency to
|
|||
the allowed maximum (that is, the ``scaling_max_freq`` policy limit). In turn,
|
||||
if it is invoked by the CFS scheduling class, the governor will use the
|
||||
Per-Entity Load Tracking (PELT) metric for the root control group of the
|
||||
given CPU as the CPU utilization estimate (see the `Per-entity load tracking`_
|
||||
LWN.net article for a description of the PELT mechanism). Then, the new
|
||||
given CPU as the CPU utilization estimate (see the *Per-entity load tracking*
|
||||
LWN.net article [1]_ for a description of the PELT mechanism). Then, the new
|
||||
CPU frequency to apply is computed in accordance with the formula
|
||||
|
||||
f = 1.25 * ``f_0`` * ``util`` / ``max``
|
||||
|
@ -698,4 +702,8 @@ hardware feature (e.g. all Intel ones), even if the
|
|||
:c:macro:`CONFIG_X86_ACPI_CPUFREQ_CPB` configuration option is set.
|
||||
|
||||
|
||||
.. _Per-entity load tracking: https://lwn.net/Articles/531853/
|
||||
References
|
||||
==========
|
||||
|
||||
.. [1] Jonathan Corbet, *Per-entity load tracking*,
|
||||
https://lwn.net/Articles/531853/
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
.. |struct cpuidle_state| replace:: :c:type:`struct cpuidle_state <cpuidle_state>`
|
||||
.. |cpufreq| replace:: :doc:`CPU Performance Scaling <cpufreq>`
|
||||
|
||||
|
@ -5,9 +8,10 @@
|
|||
CPU Idle Time Management
|
||||
========================
|
||||
|
||||
::
|
||||
:Copyright: |copy| 2018 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Copyright (c) 2018 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Concepts
|
||||
========
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
================
|
||||
Power Management
|
||||
================
|
||||
|
|
|
@ -0,0 +1,41 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
======================================
|
||||
Intel Performance and Energy Bias Hint
|
||||
======================================
|
||||
|
||||
:Copyright: |copy| 2019 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
|
||||
.. kernel-doc:: arch/x86/kernel/cpu/intel_epb.c
|
||||
:doc: overview
|
||||
|
||||
Intel Performance and Energy Bias Attribute in ``sysfs``
|
||||
========================================================
|
||||
|
||||
The Intel Performance and Energy Bias Hint (EPB) value for a given (logical) CPU
|
||||
can be checked or updated through a ``sysfs`` attribute (file) under
|
||||
:file:`/sys/devices/system/cpu/cpu<N>/power/`, where the CPU number ``<N>``
|
||||
is allocated at the system initialization time:
|
||||
|
||||
``energy_perf_bias``
|
||||
Shows the current EPB value for the CPU in a sliding scale 0 - 15, where
|
||||
a value of 0 corresponds to a hint preference for highest performance
|
||||
and a value of 15 corresponds to the maximum energy savings.
|
||||
|
||||
In order to update the EPB value for the CPU, this attribute can be
|
||||
written to, either with a number in the 0 - 15 sliding scale above, or
|
||||
with one of the strings: "performance", "balance-performance", "normal",
|
||||
"balance-power", "power" that represent values reflected by their
|
||||
meaning.
|
||||
|
||||
This attribute is present for all online CPUs supporting the EPB
|
||||
feature.
|
||||
|
||||
Note that while the EPB interface to the processor is defined at the logical CPU
|
||||
level, the physical register backing it may be shared by multiple CPUs (for
|
||||
example, SMT siblings or cores in one package). For this reason, updating the
|
||||
EPB value for one CPU may cause the EPB values for other CPUs to change.
|
|
@ -1,10 +1,13 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
===============================================
|
||||
``intel_pstate`` CPU Performance Scaling Driver
|
||||
===============================================
|
||||
|
||||
::
|
||||
:Copyright: |copy| 2017 Intel Corporation
|
||||
|
||||
Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
|
||||
General Information
|
||||
|
@ -20,11 +23,10 @@ you have not done that yet.]
|
|||
|
||||
For the processors supported by ``intel_pstate``, the P-state concept is broader
|
||||
than just an operating frequency or an operating performance point (see the
|
||||
`LinuxCon Europe 2015 presentation by Kristen Accardi <LCEU2015_>`_ for more
|
||||
LinuxCon Europe 2015 presentation by Kristen Accardi [1]_ for more
|
||||
information about that). For this reason, the representation of P-states used
|
||||
by ``intel_pstate`` internally follows the hardware specification (for details
|
||||
refer to `Intel® 64 and IA-32 Architectures Software Developer’s Manual
|
||||
Volume 3: System Programming Guide <SDM_>`_). However, the ``CPUFreq`` core
|
||||
refer to Intel Software Developer’s Manual [2]_). However, the ``CPUFreq`` core
|
||||
uses frequencies for identifying operating performance points of CPUs and
|
||||
frequencies are involved in the user space interface exposed by it, so
|
||||
``intel_pstate`` maps its internal representation of P-states to frequencies too
|
||||
|
@ -561,9 +563,9 @@ or to pin every task potentially sensitive to them to a specific CPU.]
|
|||
|
||||
On the majority of systems supported by ``intel_pstate``, the ACPI tables
|
||||
provided by the platform firmware contain ``_PSS`` objects returning information
|
||||
that can be used for CPU performance scaling (refer to the `ACPI specification`_
|
||||
for details on the ``_PSS`` objects and the format of the information returned
|
||||
by them).
|
||||
that can be used for CPU performance scaling (refer to the ACPI specification
|
||||
[3]_ for details on the ``_PSS`` objects and the format of the information
|
||||
returned by them).
|
||||
|
||||
The information returned by the ACPI ``_PSS`` objects is used by the
|
||||
``acpi-cpufreq`` scaling driver. On systems supported by ``intel_pstate``
|
||||
|
@ -728,6 +730,14 @@ P-state is called, the ``ftrace`` filter can be set to to
|
|||
<idle>-0 [000] ..s. 2537.654843: intel_pstate_set_pstate <-intel_pstate_timer_func
|
||||
|
||||
|
||||
.. _LCEU2015: http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
|
||||
.. _SDM: http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
|
||||
.. _ACPI specification: http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf
|
||||
References
|
||||
==========
|
||||
|
||||
.. [1] Kristen Accardi, *Balancing Power and Performance in the Linux Kernel*,
|
||||
http://events.linuxfoundation.org/sites/events/files/slides/LinuxConEurope_2015.pdf
|
||||
|
||||
.. [2] *Intel® 64 and IA-32 Architectures Software Developer’s Manual Volume 3: System Programming Guide*,
|
||||
http://www.intel.com/content/www/us/en/architecture-and-technology/64-ia-32-architectures-software-developer-system-programming-manual-325384.html
|
||||
|
||||
.. [3] *Advanced Configuration and Power Interface Specification*,
|
||||
https://uefi.org/sites/default/files/resources/ACPI_6_3_final_Jan30.pdf
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
===================
|
||||
System Sleep States
|
||||
===================
|
||||
|
||||
::
|
||||
:Copyright: |copy| 2017 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Sleep states are global low-power states of the entire system in which user
|
||||
space code cannot be executed and the overall system activity is significantly
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
===========================
|
||||
Power Management Strategies
|
||||
===========================
|
||||
|
||||
::
|
||||
:Copyright: |copy| 2017 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Copyright (c) 2017 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
The Linux kernel supports two major high-level power management strategies.
|
||||
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
============================
|
||||
System-Wide Power Management
|
||||
============================
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============================
|
||||
Working-State Power Management
|
||||
==============================
|
||||
|
@ -8,3 +10,4 @@ Working-State Power Management
|
|||
cpuidle
|
||||
cpufreq
|
||||
intel_pstate
|
||||
intel_epb
|
||||
|
|
|
@ -209,6 +209,22 @@ infrastructure:
|
|||
| AT | [35-32] | y |
|
||||
x--------------------------------------------------x
|
||||
|
||||
6) ID_AA64ZFR0_EL1 - SVE feature ID register 0
|
||||
|
||||
x--------------------------------------------------x
|
||||
| Name | bits | visible |
|
||||
|--------------------------------------------------|
|
||||
| SM4 | [43-40] | y |
|
||||
|--------------------------------------------------|
|
||||
| SHA3 | [35-32] | y |
|
||||
|--------------------------------------------------|
|
||||
| BitPerm | [19-16] | y |
|
||||
|--------------------------------------------------|
|
||||
| AES | [7-4] | y |
|
||||
|--------------------------------------------------|
|
||||
| SVEVer | [3-0] | y |
|
||||
x--------------------------------------------------x
|
||||
|
||||
Appendix I: Example
|
||||
---------------------------
|
||||
|
||||
|
|
|
@ -13,9 +13,9 @@ architected discovery mechanism available to userspace code at EL0. The
|
|||
kernel exposes the presence of these features to userspace through a set
|
||||
of flags called hwcaps, exposed in the auxilliary vector.
|
||||
|
||||
Userspace software can test for features by acquiring the AT_HWCAP entry
|
||||
of the auxilliary vector, and testing whether the relevant flags are
|
||||
set, e.g.
|
||||
Userspace software can test for features by acquiring the AT_HWCAP or
|
||||
AT_HWCAP2 entry of the auxiliary vector, and testing whether the relevant
|
||||
flags are set, e.g.
|
||||
|
||||
bool floating_point_is_present(void)
|
||||
{
|
||||
|
@ -135,6 +135,10 @@ HWCAP_DCPOP
|
|||
|
||||
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0001.
|
||||
|
||||
HWCAP2_DCPODP
|
||||
|
||||
Functionality implied by ID_AA64ISAR1_EL1.DPB == 0b0010.
|
||||
|
||||
HWCAP_SHA3
|
||||
|
||||
Functionality implied by ID_AA64ISAR0_EL1.SHA3 == 0b0001.
|
||||
|
@ -159,6 +163,30 @@ HWCAP_SVE
|
|||
|
||||
Functionality implied by ID_AA64PFR0_EL1.SVE == 0b0001.
|
||||
|
||||
HWCAP2_SVE2
|
||||
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SVEVer == 0b0001.
|
||||
|
||||
HWCAP2_SVEAES
|
||||
|
||||
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0001.
|
||||
|
||||
HWCAP2_SVEPMULL
|
||||
|
||||
Functionality implied by ID_AA64ZFR0_EL1.AES == 0b0010.
|
||||
|
||||
HWCAP2_SVEBITPERM
|
||||
|
||||
Functionality implied by ID_AA64ZFR0_EL1.BitPerm == 0b0001.
|
||||
|
||||
HWCAP2_SVESHA3
|
||||
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SHA3 == 0b0001.
|
||||
|
||||
HWCAP2_SVESM4
|
||||
|
||||
Functionality implied by ID_AA64ZFR0_EL1.SM4 == 0b0001.
|
||||
|
||||
HWCAP_ASIMDFHM
|
||||
|
||||
Functionality implied by ID_AA64ISAR0_EL1.FHM == 0b0001.
|
||||
|
@ -194,3 +222,10 @@ HWCAP_PACG
|
|||
Functionality implied by ID_AA64ISAR1_EL1.GPA == 0b0001 or
|
||||
ID_AA64ISAR1_EL1.GPI == 0b0001, as described by
|
||||
Documentation/arm64/pointer-authentication.txt.
|
||||
|
||||
|
||||
4. Unused AT_HWCAP bits
|
||||
-----------------------
|
||||
|
||||
For interoperation with userspace, the kernel guarantees that bits 62
|
||||
and 63 of AT_HWCAP will always be returned as 0.
|
||||
|
|
|
@ -61,6 +61,7 @@ stable kernels.
|
|||
| ARM | Cortex-A76 | #1188873 | ARM64_ERRATUM_1188873 |
|
||||
| ARM | Cortex-A76 | #1165522 | ARM64_ERRATUM_1165522 |
|
||||
| ARM | Cortex-A76 | #1286807 | ARM64_ERRATUM_1286807 |
|
||||
| ARM | Neoverse-N1 | #1188873 | ARM64_ERRATUM_1188873 |
|
||||
| ARM | MMU-500 | #841119,#826419 | N/A |
|
||||
| | | | |
|
||||
| Cavium | ThunderX ITS | #22375, #24313 | CAVIUM_ERRATUM_22375 |
|
||||
|
@ -77,6 +78,7 @@ stable kernels.
|
|||
| Hisilicon | Hip0{5,6,7} | #161010101 | HISILICON_ERRATUM_161010101 |
|
||||
| Hisilicon | Hip0{6,7} | #161010701 | N/A |
|
||||
| Hisilicon | Hip07 | #161600802 | HISILICON_ERRATUM_161600802 |
|
||||
| Hisilicon | Hip08 SMMU PMCG | #162001800 | N/A |
|
||||
| | | | |
|
||||
| Qualcomm Tech. | Kryo/Falkor v1 | E1003 | QCOM_FALKOR_ERRATUM_1003 |
|
||||
| Qualcomm Tech. | Falkor v1 | E1009 | QCOM_FALKOR_ERRATUM_1009 |
|
||||
|
|
|
@ -34,6 +34,23 @@ model features for SVE is included in Appendix A.
|
|||
following sections: software that needs to verify that those interfaces are
|
||||
present must check for HWCAP_SVE instead.
|
||||
|
||||
* On hardware that supports the SVE2 extensions, HWCAP2_SVE2 will also
|
||||
be reported in the AT_HWCAP2 aux vector entry. In addition to this,
|
||||
optional extensions to SVE2 may be reported by the presence of:
|
||||
|
||||
HWCAP2_SVE2
|
||||
HWCAP2_SVEAES
|
||||
HWCAP2_SVEPMULL
|
||||
HWCAP2_SVEBITPERM
|
||||
HWCAP2_SVESHA3
|
||||
HWCAP2_SVESM4
|
||||
|
||||
This list may be extended over time as the SVE architecture evolves.
|
||||
|
||||
These extensions are also reported via the CPU ID register ID_AA64ZFR0_EL1,
|
||||
which userspace can read using an MRS instruction. See elf_hwcaps.txt and
|
||||
cpu-feature-registers.txt for details.
|
||||
|
||||
* Debuggers should restrict themselves to interacting with the target via the
|
||||
NT_ARM_SVE regset. The recommended way of detecting support for this regset
|
||||
is to connect to a target process first and then attempt a
|
||||
|
|
|
@ -56,6 +56,23 @@ Barriers:
|
|||
smp_mb__{before,after}_atomic()
|
||||
|
||||
|
||||
TYPES (signed vs unsigned)
|
||||
-----
|
||||
|
||||
While atomic_t, atomic_long_t and atomic64_t use int, long and s64
|
||||
respectively (for hysterical raisins), the kernel uses -fno-strict-overflow
|
||||
(which implies -fwrapv) and defines signed overflow to behave like
|
||||
2s-complement.
|
||||
|
||||
Therefore, an explicitly unsigned variant of the atomic ops is strictly
|
||||
unnecessary and we can simply cast, there is no UB.
|
||||
|
||||
There was a bug in UBSAN prior to GCC-8 that would generate UB warnings for
|
||||
signed types.
|
||||
|
||||
With this we also conform to the C/C++ _Atomic behaviour and things like
|
||||
P1236R1.
|
||||
|
||||
|
||||
SEMANTICS
|
||||
---------
|
||||
|
|
|
@ -148,16 +148,16 @@ The ``btf_type.size * 8`` must be equal to or greater than ``BTF_INT_BITS()``
|
|||
for the type. The maximum value of ``BTF_INT_BITS()`` is 128.
|
||||
|
||||
The ``BTF_INT_OFFSET()`` specifies the starting bit offset to calculate values
|
||||
for this int. For example, a bitfield struct member has: * btf member bit
|
||||
offset 100 from the start of the structure, * btf member pointing to an int
|
||||
type, * the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
|
||||
for this int. For example, a bitfield struct member has:
|
||||
* btf member bit offset 100 from the start of the structure,
|
||||
* btf member pointing to an int type,
|
||||
* the int type has ``BTF_INT_OFFSET() = 2`` and ``BTF_INT_BITS() = 4``
|
||||
|
||||
Then in the struct memory layout, this member will occupy ``4`` bits starting
|
||||
from bits ``100 + 2 = 102``.
|
||||
|
||||
Alternatively, the bitfield struct member can be the following to access the
|
||||
same bits as the above:
|
||||
|
||||
* btf member bit offset 102,
|
||||
* btf member pointing to an int type,
|
||||
* the int type has ``BTF_INT_OFFSET() = 0`` and ``BTF_INT_BITS() = 4``
|
||||
|
|
|
@ -101,16 +101,6 @@ changes occur:
|
|||
translations for software managed TLB configurations.
|
||||
The sparc64 port currently does this.
|
||||
|
||||
6) ``void tlb_migrate_finish(struct mm_struct *mm)``
|
||||
|
||||
This interface is called at the end of an explicit
|
||||
process migration. This interface provides a hook
|
||||
to allow a platform to update TLB or context-specific
|
||||
information for the address space.
|
||||
|
||||
The ia64 sn2 platform is one example of a platform
|
||||
that uses this interface.
|
||||
|
||||
Next, we have the cache flushing interfaces. In general, when Linux
|
||||
is changing an existing virtual-->physical mapping to a new value,
|
||||
the sequence will be in one of the following forms::
|
||||
|
|
|
@ -3,79 +3,79 @@ How CPU topology info is exported via sysfs
|
|||
===========================================
|
||||
|
||||
Export CPU topology info via sysfs. Items (attributes) are similar
|
||||
to /proc/cpuinfo output of some architectures:
|
||||
to /proc/cpuinfo output of some architectures. They reside in
|
||||
/sys/devices/system/cpu/cpuX/topology/:
|
||||
|
||||
1) /sys/devices/system/cpu/cpuX/topology/physical_package_id:
|
||||
physical_package_id:
|
||||
|
||||
physical package id of cpuX. Typically corresponds to a physical
|
||||
socket number, but the actual value is architecture and platform
|
||||
dependent.
|
||||
|
||||
2) /sys/devices/system/cpu/cpuX/topology/core_id:
|
||||
core_id:
|
||||
|
||||
the CPU core ID of cpuX. Typically it is the hardware platform's
|
||||
identifier (rather than the kernel's). The actual value is
|
||||
architecture and platform dependent.
|
||||
|
||||
3) /sys/devices/system/cpu/cpuX/topology/book_id:
|
||||
book_id:
|
||||
|
||||
the book ID of cpuX. Typically it is the hardware platform's
|
||||
identifier (rather than the kernel's). The actual value is
|
||||
architecture and platform dependent.
|
||||
|
||||
4) /sys/devices/system/cpu/cpuX/topology/drawer_id:
|
||||
drawer_id:
|
||||
|
||||
the drawer ID of cpuX. Typically it is the hardware platform's
|
||||
identifier (rather than the kernel's). The actual value is
|
||||
architecture and platform dependent.
|
||||
|
||||
5) /sys/devices/system/cpu/cpuX/topology/thread_siblings:
|
||||
thread_siblings:
|
||||
|
||||
internal kernel map of cpuX's hardware threads within the same
|
||||
core as cpuX.
|
||||
|
||||
6) /sys/devices/system/cpu/cpuX/topology/thread_siblings_list:
|
||||
thread_siblings_list:
|
||||
|
||||
human-readable list of cpuX's hardware threads within the same
|
||||
core as cpuX.
|
||||
|
||||
7) /sys/devices/system/cpu/cpuX/topology/core_siblings:
|
||||
core_siblings:
|
||||
|
||||
internal kernel map of cpuX's hardware threads within the same
|
||||
physical_package_id.
|
||||
|
||||
8) /sys/devices/system/cpu/cpuX/topology/core_siblings_list:
|
||||
core_siblings_list:
|
||||
|
||||
human-readable list of cpuX's hardware threads within the same
|
||||
physical_package_id.
|
||||
|
||||
9) /sys/devices/system/cpu/cpuX/topology/book_siblings:
|
||||
book_siblings:
|
||||
|
||||
internal kernel map of cpuX's hardware threads within the same
|
||||
book_id.
|
||||
|
||||
10) /sys/devices/system/cpu/cpuX/topology/book_siblings_list:
|
||||
book_siblings_list:
|
||||
|
||||
human-readable list of cpuX's hardware threads within the same
|
||||
book_id.
|
||||
|
||||
11) /sys/devices/system/cpu/cpuX/topology/drawer_siblings:
|
||||
drawer_siblings:
|
||||
|
||||
internal kernel map of cpuX's hardware threads within the same
|
||||
drawer_id.
|
||||
|
||||
12) /sys/devices/system/cpu/cpuX/topology/drawer_siblings_list:
|
||||
drawer_siblings_list:
|
||||
|
||||
human-readable list of cpuX's hardware threads within the same
|
||||
drawer_id.
|
||||
|
||||
To implement it in an architecture-neutral way, a new source file,
|
||||
drivers/base/topology.c, is to export the 6 to 12 attributes. The book
|
||||
and drawer related sysfs files will only be created if CONFIG_SCHED_BOOK
|
||||
and CONFIG_SCHED_DRAWER are selected.
|
||||
Architecture-neutral, drivers/base/topology.c, exports these attributes.
|
||||
However, the book and drawer related sysfs files will only be created if
|
||||
CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are selected, respectively.
|
||||
|
||||
CONFIG_SCHED_BOOK and CONFIG_DRAWER are currently only used on s390, where
|
||||
they reflect the cpu and cache hierarchy.
|
||||
CONFIG_SCHED_BOOK and CONFIG_SCHED_DRAWER are currently only used on s390,
|
||||
where they reflect the cpu and cache hierarchy.
|
||||
|
||||
For an architecture to support this feature, it must define some of
|
||||
these macros in include/asm-XXX/topology.h::
|
||||
|
@ -98,10 +98,10 @@ To be consistent on all architectures, include/linux/topology.h
|
|||
provides default definitions for any of the above macros that are
|
||||
not defined by include/asm-XXX/topology.h:
|
||||
|
||||
1) physical_package_id: -1
|
||||
2) core_id: 0
|
||||
3) sibling_cpumask: just the given CPU
|
||||
4) core_cpumask: just the given CPU
|
||||
1) topology_physical_package_id: -1
|
||||
2) topology_core_id: 0
|
||||
3) topology_sibling_cpumask: just the given CPU
|
||||
4) topology_core_cpumask: just the given CPU
|
||||
|
||||
For architectures that don't support books (CONFIG_SCHED_BOOK) there are no
|
||||
default definitions for topology_book_id() and topology_book_cpumask().
|
||||
|
|
|
@ -228,7 +228,7 @@ patternProperties:
|
|||
- renesas,r9a06g032-smp
|
||||
- rockchip,rk3036-smp
|
||||
- rockchip,rk3066-smp
|
||||
- socionext,milbeaut-m10v-smp
|
||||
- socionext,milbeaut-m10v-smp
|
||||
- ste,dbx500-smp
|
||||
|
||||
cpu-release-addr:
|
||||
|
|
|
@ -232,37 +232,152 @@ Example:
|
|||
};
|
||||
};
|
||||
|
||||
Stratix10 SoCFPGA ECC Manager
|
||||
Stratix10 SoCFPGA ECC Manager (ARM64)
|
||||
The Stratix10 SoC ECC Manager handles the IRQs for each peripheral
|
||||
in a shared register similar to the Arria10. However, ECC requires
|
||||
access to registers that can only be read from Secure Monitor with
|
||||
SMC calls. Therefore the device tree is slightly different.
|
||||
in a shared register similar to the Arria10. However, Stratix10 ECC
|
||||
requires access to registers that can only be read from Secure Monitor
|
||||
with SMC calls. Therefore the device tree is slightly different. Note
|
||||
that only 1 interrupt is sent in Stratix10 because the double bit errors
|
||||
are treated as SErrors in ARM64 instead of IRQs in ARM32.
|
||||
|
||||
Required Properties:
|
||||
- compatible : Should be "altr,socfpga-s10-ecc-manager"
|
||||
- interrupts : Should be single bit error interrupt, then double bit error
|
||||
interrupt.
|
||||
- altr,sysgr-syscon : phandle to Stratix10 System Manager Block
|
||||
containing the ECC manager registers.
|
||||
- interrupts : Should be single bit error interrupt.
|
||||
- interrupt-controller : boolean indicator that ECC Manager is an interrupt controller
|
||||
- #interrupt-cells : must be set to 2.
|
||||
- #address-cells: must be 1
|
||||
- #size-cells: must be 1
|
||||
- ranges : standard definition, should translate from local addresses
|
||||
|
||||
Subcomponents:
|
||||
|
||||
SDRAM ECC
|
||||
Required Properties:
|
||||
- compatible : Should be "altr,sdram-edac-s10"
|
||||
- interrupts : Should be single bit error interrupt, then double bit error
|
||||
interrupt, in this order.
|
||||
- interrupts : Should be single bit error interrupt.
|
||||
|
||||
On-Chip RAM ECC
|
||||
Required Properties:
|
||||
- compatible : Should be "altr,socfpga-s10-ocram-ecc"
|
||||
- reg : Address and size for ECC block registers.
|
||||
- altr,ecc-parent : phandle to parent OCRAM node.
|
||||
- interrupts : Should be single bit error interrupt.
|
||||
|
||||
Ethernet FIFO ECC
|
||||
Required Properties:
|
||||
- compatible : Should be "altr,socfpga-s10-eth-mac-ecc"
|
||||
- reg : Address and size for ECC block registers.
|
||||
- altr,ecc-parent : phandle to parent Ethernet node.
|
||||
- interrupts : Should be single bit error interrupt.
|
||||
|
||||
NAND FIFO ECC
|
||||
Required Properties:
|
||||
- compatible : Should be "altr,socfpga-s10-nand-ecc"
|
||||
- reg : Address and size for ECC block registers.
|
||||
- altr,ecc-parent : phandle to parent NAND node.
|
||||
- interrupts : Should be single bit error interrupt.
|
||||
|
||||
DMA FIFO ECC
|
||||
Required Properties:
|
||||
- compatible : Should be "altr,socfpga-s10-dma-ecc"
|
||||
- reg : Address and size for ECC block registers.
|
||||
- altr,ecc-parent : phandle to parent DMA node.
|
||||
- interrupts : Should be single bit error interrupt.
|
||||
|
||||
USB FIFO ECC
|
||||
Required Properties:
|
||||
- compatible : Should be "altr,socfpga-s10-usb-ecc"
|
||||
- reg : Address and size for ECC block registers.
|
||||
- altr,ecc-parent : phandle to parent USB node.
|
||||
- interrupts : Should be single bit error interrupt.
|
||||
|
||||
SDMMC FIFO ECC
|
||||
Required Properties:
|
||||
- compatible : Should be "altr,socfpga-s10-sdmmc-ecc"
|
||||
- reg : Address and size for ECC block registers.
|
||||
- altr,ecc-parent : phandle to parent SD/MMC node.
|
||||
- interrupts : Should be single bit error interrupt for port A
|
||||
and then single bit error interrupt for port B.
|
||||
|
||||
Example:
|
||||
|
||||
eccmgr {
|
||||
compatible = "altr,socfpga-s10-ecc-manager";
|
||||
interrupts = <0 15 4>, <0 95 4>;
|
||||
altr,sysmgr-syscon = <&sysmgr>;
|
||||
#address-cells = <1>;
|
||||
#size-cells = <1>;
|
||||
interrupts = <0 15 4>;
|
||||
interrupt-controller;
|
||||
#interrupt-cells = <2>;
|
||||
ranges;
|
||||
|
||||
sdramedac {
|
||||
compatible = "altr,sdram-edac-s10";
|
||||
interrupts = <16 4>, <48 4>;
|
||||
interrupts = <16 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
ocram-ecc@ff8cc000 {
|
||||
compatible = "altr,socfpga-s10-ocram-ecc";
|
||||
reg = <ff8cc000 0x100>;
|
||||
altr,ecc-parent = <&ocram>;
|
||||
interrupts = <1 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
emac0-rx-ecc@ff8c0000 {
|
||||
compatible = "altr,socfpga-s10-eth-mac-ecc";
|
||||
reg = <0xff8c0000 0x100>;
|
||||
altr,ecc-parent = <&gmac0>;
|
||||
interrupts = <4 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
emac0-tx-ecc@ff8c0400 {
|
||||
compatible = "altr,socfpga-s10-eth-mac-ecc";
|
||||
reg = <0xff8c0400 0x100>;
|
||||
altr,ecc-parent = <&gmac0>;
|
||||
interrupts = <5 IRQ_TYPE_LEVEL_HIGH>'
|
||||
};
|
||||
|
||||
nand-buf-ecc@ff8c8000 {
|
||||
compatible = "altr,socfpga-s10-nand-ecc";
|
||||
reg = <0xff8c8000 0x100>;
|
||||
altr,ecc-parent = <&nand>;
|
||||
interrupts = <11 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
nand-rd-ecc@ff8c8400 {
|
||||
compatible = "altr,socfpga-s10-nand-ecc";
|
||||
reg = <0xff8c8400 0x100>;
|
||||
altr,ecc-parent = <&nand>;
|
||||
interrupts = <13 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
nand-wr-ecc@ff8c8800 {
|
||||
compatible = "altr,socfpga-s10-nand-ecc";
|
||||
reg = <0xff8c8800 0x100>;
|
||||
altr,ecc-parent = <&nand>;
|
||||
interrupts = <12 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
dma-ecc@ff8c9000 {
|
||||
compatible = "altr,socfpga-s10-dma-ecc";
|
||||
reg = <0xff8c9000 0x100>;
|
||||
altr,ecc-parent = <&pdma>;
|
||||
interrupts = <10 IRQ_TYPE_LEVEL_HIGH>;
|
||||
|
||||
usb0-ecc@ff8c4000 {
|
||||
compatible = "altr,socfpga-s10-usb-ecc";
|
||||
reg = <0xff8c4000 0x100>;
|
||||
altr,ecc-parent = <&usb0>;
|
||||
interrupts = <2 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
|
||||
sdmmc-ecc@ff8c8c00 {
|
||||
compatible = "altr,socfpga-s10-sdmmc-ecc";
|
||||
reg = <0xff8c8c00 0x100>;
|
||||
altr,ecc-parent = <&mmc>;
|
||||
interrupts = <14 IRQ_TYPE_LEVEL_HIGH>,
|
||||
<15 IRQ_TYPE_LEVEL_HIGH>;
|
||||
};
|
||||
};
|
||||
|
|
|
@ -26,7 +26,7 @@ Required node properties:
|
|||
|
||||
Optional node properties:
|
||||
|
||||
- ti,mode: Operation mode (see above).
|
||||
- ti,mode: Operation mode (u8) (see above).
|
||||
|
||||
|
||||
Example (operation mode 2):
|
||||
|
@ -34,5 +34,5 @@ Example (operation mode 2):
|
|||
adc128d818@1d {
|
||||
compatible = "ti,adc128d818";
|
||||
reg = <0x1d>;
|
||||
ti,mode = <2>;
|
||||
ti,mode = /bits/ 8 <2>;
|
||||
};
|
||||
|
|
|
@ -20,6 +20,8 @@ Required properties:
|
|||
Optional properties:
|
||||
- phy-handle: See ethernet.txt file in the same directory.
|
||||
If absent, davinci_emac driver defaults to 100/FULL.
|
||||
- nvmem-cells: phandle, reference to an nvmem node for the MAC address
|
||||
- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
|
||||
- ti,davinci-rmii-en: 1 byte, 1 means use RMII
|
||||
- ti,davinci-no-bd-ram: boolean, does EMAC have BD RAM?
|
||||
|
||||
|
|
|
@ -10,15 +10,14 @@ Documentation/devicetree/bindings/phy/phy-bindings.txt.
|
|||
the boot program; should be used in cases where the MAC address assigned to
|
||||
the device by the boot program is different from the "local-mac-address"
|
||||
property;
|
||||
- nvmem-cells: phandle, reference to an nvmem node for the MAC address;
|
||||
- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used;
|
||||
- max-speed: number, specifies maximum speed in Mbit/s supported by the device;
|
||||
- max-frame-size: number, maximum transfer unit (IEEE defined MTU), rather than
|
||||
the maximum frame size (there's contradiction in the Devicetree
|
||||
Specification).
|
||||
- phy-mode: string, operation mode of the PHY interface. This is now a de-facto
|
||||
standard property; supported values are:
|
||||
* "internal"
|
||||
* "internal" (Internal means there is not a standard bus between the MAC and
|
||||
the PHY, something proprietary is being used to embed the PHY in the MAC.)
|
||||
* "mii"
|
||||
* "gmii"
|
||||
* "sgmii"
|
||||
|
|
|
@ -26,6 +26,10 @@ Required properties:
|
|||
Optional elements: 'tsu_clk'
|
||||
- clocks: Phandles to input clocks.
|
||||
|
||||
Optional properties:
|
||||
- nvmem-cells: phandle, reference to an nvmem node for the MAC address
|
||||
- nvmem-cell-names: string, should be "mac-address" if nvmem is to be used
|
||||
|
||||
Optional properties for PHY child node:
|
||||
- reset-gpios : Should specify the gpio for phy reset
|
||||
- magic-packet : If present, indicates that the hardware supports waking
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
============
|
||||
ACPI Support
|
||||
============
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
linuxized-acpica
|
||||
scan_handlers
|
|
@ -1,31 +1,37 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
============================================================
|
||||
Linuxized ACPICA - Introduction to ACPICA Release Automation
|
||||
============================================================
|
||||
|
||||
Copyright (C) 2013-2016, Intel Corporation
|
||||
Author: Lv Zheng <lv.zheng@intel.com>
|
||||
:Copyright: |copy| 2013-2016, Intel Corporation
|
||||
|
||||
:Author: Lv Zheng <lv.zheng@intel.com>
|
||||
|
||||
|
||||
Abstract:
|
||||
|
||||
Abstract
|
||||
========
|
||||
This document describes the ACPICA project and the relationship between
|
||||
ACPICA and Linux. It also describes how ACPICA code in drivers/acpi/acpica,
|
||||
include/acpi and tools/power/acpi is automatically updated to follow the
|
||||
upstream.
|
||||
|
||||
ACPICA Project
|
||||
==============
|
||||
|
||||
1. ACPICA Project
|
||||
The ACPI Component Architecture (ACPICA) project provides an operating
|
||||
system (OS)-independent reference implementation of the Advanced
|
||||
Configuration and Power Interface Specification (ACPI). It has been
|
||||
adapted by various host OSes. By directly integrating ACPICA, Linux can
|
||||
also benefit from the application experiences of ACPICA from other host
|
||||
OSes.
|
||||
|
||||
The ACPI Component Architecture (ACPICA) project provides an operating
|
||||
system (OS)-independent reference implementation of the Advanced
|
||||
Configuration and Power Interface Specification (ACPI). It has been
|
||||
adapted by various host OSes. By directly integrating ACPICA, Linux can
|
||||
also benefit from the application experiences of ACPICA from other host
|
||||
OSes.
|
||||
The homepage of ACPICA project is: www.acpica.org, it is maintained and
|
||||
supported by Intel Corporation.
|
||||
|
||||
The homepage of ACPICA project is: www.acpica.org, it is maintained and
|
||||
supported by Intel Corporation.
|
||||
|
||||
The following figure depicts the Linux ACPI subsystem where the ACPICA
|
||||
adaptation is included:
|
||||
The following figure depicts the Linux ACPI subsystem where the ACPICA
|
||||
adaptation is included::
|
||||
|
||||
+---------------------------------------------------------+
|
||||
| |
|
||||
|
@ -71,21 +77,27 @@ upstream.
|
|||
|
||||
Figure 1. Linux ACPI Software Components
|
||||
|
||||
NOTE:
|
||||
.. note::
|
||||
A. OS Service Layer - Provided by Linux to offer OS dependent
|
||||
implementation of the predefined ACPICA interfaces (acpi_os_*).
|
||||
::
|
||||
|
||||
include/acpi/acpiosxf.h
|
||||
drivers/acpi/osl.c
|
||||
include/acpi/platform
|
||||
include/asm/acenv.h
|
||||
B. ACPICA Functionality - Released from ACPICA code base to offer
|
||||
OS independent implementation of the ACPICA interfaces (acpi_*).
|
||||
::
|
||||
|
||||
drivers/acpi/acpica
|
||||
include/acpi/ac*.h
|
||||
tools/power/acpi
|
||||
C. Linux/ACPI Functionality - Providing Linux specific ACPI
|
||||
functionality to the other Linux kernel subsystems and user space
|
||||
programs.
|
||||
::
|
||||
|
||||
drivers/acpi
|
||||
include/linux/acpi.h
|
||||
include/linux/acpi*.h
|
||||
|
@ -95,24 +107,27 @@ upstream.
|
|||
ACPI subsystem to offer architecture specific implementation of the
|
||||
ACPI interfaces. They are Linux specific components and are out of
|
||||
the scope of this document.
|
||||
::
|
||||
|
||||
include/asm/acpi.h
|
||||
include/asm/acpi*.h
|
||||
arch/*/acpi
|
||||
|
||||
2. ACPICA Release
|
||||
ACPICA Release
|
||||
==============
|
||||
|
||||
The ACPICA project maintains its code base at the following repository URL:
|
||||
https://github.com/acpica/acpica.git. As a rule, a release is made every
|
||||
month.
|
||||
The ACPICA project maintains its code base at the following repository URL:
|
||||
https://github.com/acpica/acpica.git. As a rule, a release is made every
|
||||
month.
|
||||
|
||||
As the coding style adopted by the ACPICA project is not acceptable by
|
||||
Linux, there is a release process to convert the ACPICA git commits into
|
||||
Linux patches. The patches generated by this process are referred to as
|
||||
"linuxized ACPICA patches". The release process is carried out on a local
|
||||
copy the ACPICA git repository. Each commit in the monthly release is
|
||||
converted into a linuxized ACPICA patch. Together, they form the monthly
|
||||
ACPICA release patchset for the Linux ACPI community. This process is
|
||||
illustrated in the following figure:
|
||||
As the coding style adopted by the ACPICA project is not acceptable by
|
||||
Linux, there is a release process to convert the ACPICA git commits into
|
||||
Linux patches. The patches generated by this process are referred to as
|
||||
"linuxized ACPICA patches". The release process is carried out on a local
|
||||
copy the ACPICA git repository. Each commit in the monthly release is
|
||||
converted into a linuxized ACPICA patch. Together, they form the monthly
|
||||
ACPICA release patchset for the Linux ACPI community. This process is
|
||||
illustrated in the following figure::
|
||||
|
||||
+-----------------------------+
|
||||
| acpica / master (-) commits |
|
||||
|
@ -153,7 +168,7 @@ upstream.
|
|||
|
||||
Figure 2. ACPICA -> Linux Upstream Process
|
||||
|
||||
NOTE:
|
||||
.. note::
|
||||
A. Linuxize Utilities - Provided by the ACPICA repository, including a
|
||||
utility located in source/tools/acpisrc folder and a number of
|
||||
scripts located in generate/linux folder.
|
||||
|
@ -170,19 +185,20 @@ upstream.
|
|||
following kernel configuration options:
|
||||
CONFIG_ACPI/CONFIG_ACPI_DEBUG/CONFIG_ACPI_DEBUGGER
|
||||
|
||||
3. ACPICA Divergences
|
||||
ACPICA Divergences
|
||||
==================
|
||||
|
||||
Ideally, all of the ACPICA commits should be converted into Linux patches
|
||||
automatically without manual modifications, the "linux / master" tree should
|
||||
contain the ACPICA code that exactly corresponds to the ACPICA code
|
||||
contained in "new linuxized acpica" tree and it should be possible to run
|
||||
the release process fully automatically.
|
||||
Ideally, all of the ACPICA commits should be converted into Linux patches
|
||||
automatically without manual modifications, the "linux / master" tree should
|
||||
contain the ACPICA code that exactly corresponds to the ACPICA code
|
||||
contained in "new linuxized acpica" tree and it should be possible to run
|
||||
the release process fully automatically.
|
||||
|
||||
As a matter of fact, however, there are source code differences between
|
||||
the ACPICA code in Linux and the upstream ACPICA code, referred to as
|
||||
"ACPICA Divergences".
|
||||
As a matter of fact, however, there are source code differences between
|
||||
the ACPICA code in Linux and the upstream ACPICA code, referred to as
|
||||
"ACPICA Divergences".
|
||||
|
||||
The various sources of ACPICA divergences include:
|
||||
The various sources of ACPICA divergences include:
|
||||
1. Legacy divergences - Before the current ACPICA release process was
|
||||
established, there already had been divergences between Linux and
|
||||
ACPICA. Over the past several years those divergences have been greatly
|
||||
|
@ -213,11 +229,12 @@ upstream.
|
|||
rebased on the ACPICA side in order to offer better solutions, new ACPICA
|
||||
divergences are generated.
|
||||
|
||||
4. ACPICA Development
|
||||
ACPICA Development
|
||||
==================
|
||||
|
||||
This paragraph guides Linux developers to use the ACPICA upstream release
|
||||
utilities to obtain Linux patches corresponding to upstream ACPICA commits
|
||||
before they become available from the ACPICA release process.
|
||||
This paragraph guides Linux developers to use the ACPICA upstream release
|
||||
utilities to obtain Linux patches corresponding to upstream ACPICA commits
|
||||
before they become available from the ACPICA release process.
|
||||
|
||||
1. Cherry-pick an ACPICA commit
|
||||
|
||||
|
@ -225,7 +242,7 @@ upstream.
|
|||
you want to cherry pick must be committed into the local repository.
|
||||
|
||||
Then the gen-patch.sh command can help to cherry-pick an ACPICA commit
|
||||
from the ACPICA local repository:
|
||||
from the ACPICA local repository::
|
||||
|
||||
$ git clone https://github.com/acpica/acpica
|
||||
$ cd acpica
|
||||
|
@ -240,7 +257,7 @@ upstream.
|
|||
changes that haven't been applied to Linux yet.
|
||||
|
||||
You can generate the ACPICA release series yourself and rebase your code on
|
||||
top of the generated ACPICA release patches:
|
||||
top of the generated ACPICA release patches::
|
||||
|
||||
$ git clone https://github.com/acpica/acpica
|
||||
$ cd acpica
|
||||
|
@ -254,7 +271,7 @@ upstream.
|
|||
3. Inspect the current divergences
|
||||
|
||||
If you have local copies of both Linux and upstream ACPICA, you can generate
|
||||
a diff file indicating the state of the current divergences:
|
||||
a diff file indicating the state of the current divergences::
|
||||
|
||||
# git clone https://github.com/acpica/acpica
|
||||
# git clone http://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
|
|
@ -1,7 +1,13 @@
|
|||
ACPI Scan Handlers
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
Copyright (C) 2012, Intel Corporation
|
||||
Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
==================
|
||||
ACPI Scan Handlers
|
||||
==================
|
||||
|
||||
:Copyright: |copy| 2012, Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
During system initialization and ACPI-based device hot-add, the ACPI namespace
|
||||
is scanned in search of device objects that generally represent various pieces
|
||||
|
@ -30,14 +36,14 @@ to configure that link so that the kernel can use it.
|
|||
Those additional configuration tasks usually depend on the type of the hardware
|
||||
component represented by the given device node which can be determined on the
|
||||
basis of the device node's hardware ID (HID). They are performed by objects
|
||||
called ACPI scan handlers represented by the following structure:
|
||||
called ACPI scan handlers represented by the following structure::
|
||||
|
||||
struct acpi_scan_handler {
|
||||
const struct acpi_device_id *ids;
|
||||
struct list_head list_node;
|
||||
int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id);
|
||||
void (*detach)(struct acpi_device *dev);
|
||||
};
|
||||
struct acpi_scan_handler {
|
||||
const struct acpi_device_id *ids;
|
||||
struct list_head list_node;
|
||||
int (*attach)(struct acpi_device *dev, const struct acpi_device_id *id);
|
||||
void (*detach)(struct acpi_device *dev);
|
||||
};
|
||||
|
||||
where ids is the list of IDs of device nodes the given handler is supposed to
|
||||
take care of, list_node is the hook to the global list of ACPI scan handlers
|
|
@ -103,51 +103,6 @@ continuing execution::
|
|||
ha->flags.ints_enabled = 0;
|
||||
}
|
||||
|
||||
In addition to write posting, on some large multiprocessing systems
|
||||
(e.g. SGI Challenge, Origin and Altix machines) posted writes won't be
|
||||
strongly ordered coming from different CPUs. Thus it's important to
|
||||
properly protect parts of your driver that do memory-mapped writes with
|
||||
locks and use the :c:func:`mmiowb()` to make sure they arrive in the
|
||||
order intended. Issuing a regular readX() will also ensure write ordering,
|
||||
but should only be used when the
|
||||
driver has to be sure that the write has actually arrived at the device
|
||||
(not that it's simply ordered with respect to other writes), since a
|
||||
full readX() is a relatively expensive operation.
|
||||
|
||||
Generally, one should use :c:func:`mmiowb()` prior to releasing a spinlock
|
||||
that protects regions using :c:func:`writeb()` or similar functions that
|
||||
aren't surrounded by readb() calls, which will ensure ordering
|
||||
and flushing. The following pseudocode illustrates what might occur if
|
||||
write ordering isn't guaranteed via :c:func:`mmiowb()` or one of the
|
||||
readX() functions::
|
||||
|
||||
CPU A: spin_lock_irqsave(&dev_lock, flags)
|
||||
CPU A: ...
|
||||
CPU A: writel(newval, ring_ptr);
|
||||
CPU A: spin_unlock_irqrestore(&dev_lock, flags)
|
||||
...
|
||||
CPU B: spin_lock_irqsave(&dev_lock, flags)
|
||||
CPU B: writel(newval2, ring_ptr);
|
||||
CPU B: ...
|
||||
CPU B: spin_unlock_irqrestore(&dev_lock, flags)
|
||||
|
||||
In the case above, newval2 could be written to ring_ptr before newval.
|
||||
Fixing it is easy though::
|
||||
|
||||
CPU A: spin_lock_irqsave(&dev_lock, flags)
|
||||
CPU A: ...
|
||||
CPU A: writel(newval, ring_ptr);
|
||||
CPU A: mmiowb(); /* ensure no other writes beat us to the device */
|
||||
CPU A: spin_unlock_irqrestore(&dev_lock, flags)
|
||||
...
|
||||
CPU B: spin_lock_irqsave(&dev_lock, flags)
|
||||
CPU B: writel(newval2, ring_ptr);
|
||||
CPU B: ...
|
||||
CPU B: mmiowb();
|
||||
CPU B: spin_unlock_irqrestore(&dev_lock, flags)
|
||||
|
||||
See tg3.c for a real world example of how to use :c:func:`mmiowb()`
|
||||
|
||||
PCI ordering rules also guarantee that PIO read responses arrive after any
|
||||
outstanding DMA writes from that bus, since for some devices the result of
|
||||
a readb() call may signal to the driver that a DMA transaction is
|
||||
|
|
|
@ -56,6 +56,7 @@ available subsections can be seen below.
|
|||
slimbus
|
||||
soundwire/index
|
||||
fpga/index
|
||||
acpi/index
|
||||
|
||||
.. only:: subproject and html
|
||||
|
||||
|
|
|
@ -132,10 +132,6 @@ precludes passing these pages to userspace.
|
|||
P2P memory is also technically IO memory but should never have any side
|
||||
effects behind it. Thus, the order of loads and stores should not be important
|
||||
and ioreadX(), iowriteX() and friends should not be necessary.
|
||||
However, as the memory is not cache coherent, if access ever needs to
|
||||
be protected by a spinlock then :c:func:`mmiowb()` must be used before
|
||||
unlocking the lock. (See ACQUIRES VS I/O ACCESSES in
|
||||
Documentation/memory-barriers.txt)
|
||||
|
||||
|
||||
P2P DMA Support Library
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
.. |struct cpuidle_governor| replace:: :c:type:`struct cpuidle_governor <cpuidle_governor>`
|
||||
.. |struct cpuidle_device| replace:: :c:type:`struct cpuidle_device <cpuidle_device>`
|
||||
.. |struct cpuidle_driver| replace:: :c:type:`struct cpuidle_driver <cpuidle_driver>`
|
||||
|
@ -7,9 +10,9 @@
|
|||
CPU Idle Time Management
|
||||
========================
|
||||
|
||||
::
|
||||
:Copyright: |copy| 2019 Intel Corporation
|
||||
|
||||
Copyright (c) 2019 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
|
||||
CPU Idle Time Management Subsystem
|
||||
|
|
|
@ -1,3 +1,6 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
.. |struct dev_pm_ops| replace:: :c:type:`struct dev_pm_ops <dev_pm_ops>`
|
||||
.. |struct dev_pm_domain| replace:: :c:type:`struct dev_pm_domain <dev_pm_domain>`
|
||||
.. |struct bus_type| replace:: :c:type:`struct bus_type <bus_type>`
|
||||
|
@ -12,11 +15,12 @@
|
|||
Device Power Management Basics
|
||||
==============================
|
||||
|
||||
::
|
||||
:Copyright: |copy| 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
|
||||
:Copyright: |copy| 2010 Alan Stern <stern@rowland.harvard.edu>
|
||||
:Copyright: |copy| 2016 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Copyright (c) 2010-2011 Rafael J. Wysocki <rjw@sisk.pl>, Novell Inc.
|
||||
Copyright (c) 2010 Alan Stern <stern@rowland.harvard.edu>
|
||||
Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Most of the code in Linux is device drivers, so most of the Linux power
|
||||
management (PM) code is also driver-specific. Most drivers will do very
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============================
|
||||
CPU and Device Power Management
|
||||
===============================
|
||||
|
|
|
@ -1,10 +1,14 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
=============================
|
||||
Suspend/Hibernation Notifiers
|
||||
=============================
|
||||
|
||||
::
|
||||
:Copyright: |copy| 2016 Intel Corporation
|
||||
|
||||
:Author: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
Copyright (c) 2016 Intel Corp., Rafael J. Wysocki <rafael.j.wysocki@intel.com>
|
||||
|
||||
There are some operations that subsystems or drivers may want to carry out
|
||||
before hibernation/suspend or after restore/resume, but they require the system
|
||||
|
|
|
@ -1,3 +1,5 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==================================
|
||||
Device Power Management Data Types
|
||||
==================================
|
||||
|
|
|
@ -370,11 +370,15 @@ autosuspend the interface's device. When the usage counter is = 0
|
|||
then the interface is considered to be idle, and the kernel may
|
||||
autosuspend the device.
|
||||
|
||||
Drivers need not be concerned about balancing changes to the usage
|
||||
counter; the USB core will undo any remaining "get"s when a driver
|
||||
is unbound from its interface. As a corollary, drivers must not call
|
||||
any of the ``usb_autopm_*`` functions after their ``disconnect``
|
||||
routine has returned.
|
||||
Drivers must be careful to balance their overall changes to the usage
|
||||
counter. Unbalanced "get"s will remain in effect when a driver is
|
||||
unbound from its interface, preventing the device from going into
|
||||
runtime suspend should the interface be bound to a driver again. On
|
||||
the other hand, drivers are allowed to achieve this balance by calling
|
||||
the ``usb_autopm_*`` functions even after their ``disconnect`` routine
|
||||
has returned -- say from within a work-queue routine -- provided they
|
||||
retain an active reference to the interface (via ``usb_get_intf`` and
|
||||
``usb_put_intf``).
|
||||
|
||||
Drivers using the async routines are responsible for their own
|
||||
synchronization and mutual exclusion.
|
||||
|
|
|
@ -15,7 +15,7 @@
|
|||
| h8300: | ok |
|
||||
| hexagon: | ok |
|
||||
| ia64: | ok |
|
||||
| m68k: | TODO |
|
||||
| m68k: | ok |
|
||||
| microblaze: | ok |
|
||||
| mips: | ok |
|
||||
| nds32: | ok |
|
||||
|
|
|
@ -1,8 +1,11 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==================================
|
||||
_DSD Device Properties Usage Rules
|
||||
----------------------------------
|
||||
==================================
|
||||
|
||||
Properties, Property Sets and Property Subsets
|
||||
----------------------------------------------
|
||||
==============================================
|
||||
|
||||
The _DSD (Device Specific Data) configuration object, introduced in ACPI 5.1,
|
||||
allows any type of device configuration data to be provided via the ACPI
|
||||
|
@ -18,7 +21,7 @@ specific type) associated with it.
|
|||
|
||||
In the ACPI _DSD context it is an element of the sub-package following the
|
||||
generic Device Properties UUID in the _DSD return package as specified in the
|
||||
Device Properties UUID definition document [1].
|
||||
Device Properties UUID definition document [1]_.
|
||||
|
||||
It also may be regarded as the definition of a key and the associated data type
|
||||
that can be returned by _DSD in the Device Properties UUID sub-package for a
|
||||
|
@ -33,14 +36,14 @@ Property subsets are nested collections of properties. Each of them is
|
|||
associated with an additional key (name) allowing the subset to be referred
|
||||
to as a whole (and to be treated as a separate entity). The canonical
|
||||
representation of property subsets is via the mechanism specified in the
|
||||
Hierarchical Properties Extension UUID definition document [2].
|
||||
Hierarchical Properties Extension UUID definition document [2]_.
|
||||
|
||||
Property sets may be hierarchical. That is, a property set may contain
|
||||
multiple property subsets that each may contain property subsets of its
|
||||
own and so on.
|
||||
|
||||
General Validity Rule for Property Sets
|
||||
---------------------------------------
|
||||
=======================================
|
||||
|
||||
Valid property sets must follow the guidance given by the Device Properties UUID
|
||||
definition document [1].
|
||||
|
@ -73,7 +76,7 @@ suitable for the ACPI environment and consequently they cannot belong to a valid
|
|||
property set.
|
||||
|
||||
Property Sets and Device Tree Bindings
|
||||
--------------------------------------
|
||||
======================================
|
||||
|
||||
It often is useful to make _DSD return property sets that follow Device Tree
|
||||
bindings.
|
||||
|
@ -91,7 +94,7 @@ expected to automatically work in the ACPI environment regardless of their
|
|||
contents.
|
||||
|
||||
References
|
||||
----------
|
||||
==========
|
||||
|
||||
[1] http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
|
||||
[2] http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf
|
||||
.. [1] http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf
|
||||
.. [2] http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf
|
|
@ -1,13 +1,18 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
=========================================================
|
||||
Special Usage Model of the ACPI Control Method Lid Device
|
||||
=========================================================
|
||||
|
||||
Copyright (C) 2016, Intel Corporation
|
||||
Author: Lv Zheng <lv.zheng@intel.com>
|
||||
:Copyright: |copy| 2016, Intel Corporation
|
||||
|
||||
:Author: Lv Zheng <lv.zheng@intel.com>
|
||||
|
||||
Abstract:
|
||||
|
||||
Platforms containing lids convey lid state (open/close) to OSPMs using a
|
||||
control method lid device. To implement this, the AML tables issue
|
||||
Abstract
|
||||
========
|
||||
Platforms containing lids convey lid state (open/close) to OSPMs
|
||||
using a control method lid device. To implement this, the AML tables issue
|
||||
Notify(lid_device, 0x80) to notify the OSPMs whenever the lid state has
|
||||
changed. The _LID control method for the lid device must be implemented to
|
||||
report the "current" state of the lid as either "opened" or "closed".
|
||||
|
@ -19,7 +24,8 @@ taken into account. This document describes the restrictions and the
|
|||
expections of the Linux ACPI lid device driver.
|
||||
|
||||
|
||||
1. Restrictions of the returning value of the _LID control method
|
||||
Restrictions of the returning value of the _LID control method
|
||||
==============================================================
|
||||
|
||||
The _LID control method is described to return the "current" lid state.
|
||||
However the word of "current" has ambiguity, some buggy AML tables return
|
||||
|
@ -30,7 +36,8 @@ initial returning value. When the AML tables implement this control method
|
|||
with cached value, the initial returning value is likely not reliable.
|
||||
There are platforms always retun "closed" as initial lid state.
|
||||
|
||||
2. Restrictions of the lid state change notifications
|
||||
Restrictions of the lid state change notifications
|
||||
==================================================
|
||||
|
||||
There are buggy AML tables never notifying when the lid device state is
|
||||
changed to "opened". Thus the "opened" notification is not guaranteed. But
|
||||
|
@ -39,18 +46,22 @@ state is changed to "closed". The "closed" notification is normally used to
|
|||
trigger some system power saving operations on Windows. Since it is fully
|
||||
tested, it is reliable from all AML tables.
|
||||
|
||||
3. Expections for the userspace users of the ACPI lid device driver
|
||||
Expections for the userspace users of the ACPI lid device driver
|
||||
================================================================
|
||||
|
||||
The ACPI button driver exports the lid state to the userspace via the
|
||||
following file:
|
||||
following file::
|
||||
|
||||
/proc/acpi/button/lid/LID0/state
|
||||
|
||||
This file actually calls the _LID control method described above. And given
|
||||
the previous explanation, it is not reliable enough on some platforms. So
|
||||
it is advised for the userspace program to not to solely rely on this file
|
||||
to determine the actual lid state.
|
||||
|
||||
The ACPI button driver emits the following input event to the userspace:
|
||||
SW_LID
|
||||
* SW_LID
|
||||
|
||||
The ACPI lid device driver is implemented to try to deliver the platform
|
||||
triggered events to the userspace. However, given the fact that the buggy
|
||||
firmware cannot make sure "opened"/"closed" events are paired, the ACPI
|
||||
|
@ -59,20 +70,25 @@ button driver uses the following 3 modes in order not to trigger issues.
|
|||
If the userspace hasn't been prepared to ignore the unreliable "opened"
|
||||
events and the unreliable initial state notification, Linux users can use
|
||||
the following kernel parameters to handle the possible issues:
|
||||
|
||||
A. button.lid_init_state=method:
|
||||
When this option is specified, the ACPI button driver reports the
|
||||
initial lid state using the returning value of the _LID control method
|
||||
and whether the "opened"/"closed" events are paired fully relies on the
|
||||
firmware implementation.
|
||||
|
||||
This option can be used to fix some platforms where the returning value
|
||||
of the _LID control method is reliable but the initial lid state
|
||||
notification is missing.
|
||||
|
||||
This option is the default behavior during the period the userspace
|
||||
isn't ready to handle the buggy AML tables.
|
||||
|
||||
B. button.lid_init_state=open:
|
||||
When this option is specified, the ACPI button driver always reports the
|
||||
initial lid state as "opened" and whether the "opened"/"closed" events
|
||||
are paired fully relies on the firmware implementation.
|
||||
|
||||
This may fix some platforms where the returning value of the _LID
|
||||
control method is not reliable and the initial lid state notification is
|
||||
missing.
|
||||
|
@ -80,6 +96,7 @@ B. button.lid_init_state=open:
|
|||
If the userspace has been prepared to ignore the unreliable "opened" events
|
||||
and the unreliable initial state notification, Linux users should always
|
||||
use the following kernel parameter:
|
||||
|
||||
C. button.lid_init_state=ignore:
|
||||
When this option is specified, the ACPI button driver never reports the
|
||||
initial lid state and there is a compensation mechanism implemented to
|
||||
|
@ -89,6 +106,7 @@ C. button.lid_init_state=ignore:
|
|||
notifications can be delivered to the userspace when the lid is actually
|
||||
opens given that some AML tables do not send "opened" notifications
|
||||
reliably.
|
||||
|
||||
In this mode, if everything is correctly implemented by the platform
|
||||
firmware, the old userspace programs should still work. Otherwise, the
|
||||
new userspace programs are required to work with the ACPI button driver.
|
|
@ -0,0 +1,75 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
================
|
||||
The AML Debugger
|
||||
================
|
||||
|
||||
:Copyright: |copy| 2016, Intel Corporation
|
||||
:Author: Lv Zheng <lv.zheng@intel.com>
|
||||
|
||||
|
||||
This document describes the usage of the AML debugger embedded in the Linux
|
||||
kernel.
|
||||
|
||||
1. Build the debugger
|
||||
=====================
|
||||
|
||||
The following kernel configuration items are required to enable the AML
|
||||
debugger interface from the Linux kernel::
|
||||
|
||||
CONFIG_ACPI_DEBUGGER=y
|
||||
CONFIG_ACPI_DEBUGGER_USER=m
|
||||
|
||||
The userspace utilities can be built from the kernel source tree using
|
||||
the following commands::
|
||||
|
||||
$ cd tools
|
||||
$ make acpi
|
||||
|
||||
The resultant userspace tool binary is then located at::
|
||||
|
||||
tools/power/acpi/acpidbg
|
||||
|
||||
It can be installed to system directories by running "make install" (as a
|
||||
sufficiently privileged user).
|
||||
|
||||
2. Start the userspace debugger interface
|
||||
=========================================
|
||||
|
||||
After booting the kernel with the debugger built-in, the debugger can be
|
||||
started by using the following commands::
|
||||
|
||||
# mount -t debugfs none /sys/kernel/debug
|
||||
# modprobe acpi_dbg
|
||||
# tools/power/acpi/acpidbg
|
||||
|
||||
That spawns the interactive AML debugger environment where you can execute
|
||||
debugger commands.
|
||||
|
||||
The commands are documented in the "ACPICA Overview and Programmer Reference"
|
||||
that can be downloaded from
|
||||
|
||||
https://acpica.org/documentation
|
||||
|
||||
The detailed debugger commands reference is located in Chapter 12 "ACPICA
|
||||
Debugger Reference". The "help" command can be used for a quick reference.
|
||||
|
||||
3. Stop the userspace debugger interface
|
||||
========================================
|
||||
|
||||
The interactive debugger interface can be closed by pressing Ctrl+C or using
|
||||
the "quit" or "exit" commands. When finished, unload the module with::
|
||||
|
||||
# rmmod acpi_dbg
|
||||
|
||||
The module unloading may fail if there is an acpidbg instance running.
|
||||
|
||||
4. Run the debugger in a script
|
||||
===============================
|
||||
|
||||
It may be useful to run the AML debugger in a test script. "acpidbg" supports
|
||||
this in a special "batch" mode. For example, the following command outputs
|
||||
the entire ACPI namespace::
|
||||
|
||||
# acpidbg -b "namespace"
|
|
@ -1,13 +1,16 @@
|
|||
APEI Error INJection
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
====================
|
||||
APEI Error INJection
|
||||
====================
|
||||
|
||||
EINJ provides a hardware error injection mechanism. It is very useful
|
||||
for debugging and testing APEI and RAS features in general.
|
||||
|
||||
You need to check whether your BIOS supports EINJ first. For that, look
|
||||
for early boot messages similar to this one:
|
||||
for early boot messages similar to this one::
|
||||
|
||||
ACPI: EINJ 0x000000007370A000 000150 (v01 INTEL 00000001 INTL 00000001)
|
||||
ACPI: EINJ 0x000000007370A000 000150 (v01 INTEL 00000001 INTL 00000001)
|
||||
|
||||
which shows that the BIOS is exposing an EINJ table - it is the
|
||||
mechanism through which the injection is done.
|
||||
|
@ -23,11 +26,11 @@ order to see the APEI,EINJ,... functionality supported and exposed by
|
|||
the BIOS menu.
|
||||
|
||||
To use EINJ, make sure the following are options enabled in your kernel
|
||||
configuration:
|
||||
configuration::
|
||||
|
||||
CONFIG_DEBUG_FS
|
||||
CONFIG_ACPI_APEI
|
||||
CONFIG_ACPI_APEI_EINJ
|
||||
CONFIG_DEBUG_FS
|
||||
CONFIG_ACPI_APEI
|
||||
CONFIG_ACPI_APEI_EINJ
|
||||
|
||||
The EINJ user interface is in <debugfs mount point>/apei/einj.
|
||||
|
||||
|
@ -37,20 +40,22 @@ The following files belong to it:
|
|||
|
||||
This file shows which error types are supported:
|
||||
|
||||
================ ===================================
|
||||
Error Type Value Error Description
|
||||
================ =================
|
||||
0x00000001 Processor Correctable
|
||||
0x00000002 Processor Uncorrectable non-fatal
|
||||
0x00000004 Processor Uncorrectable fatal
|
||||
0x00000008 Memory Correctable
|
||||
0x00000010 Memory Uncorrectable non-fatal
|
||||
0x00000020 Memory Uncorrectable fatal
|
||||
0x00000040 PCI Express Correctable
|
||||
0x00000080 PCI Express Uncorrectable fatal
|
||||
0x00000100 PCI Express Uncorrectable non-fatal
|
||||
0x00000200 Platform Correctable
|
||||
0x00000400 Platform Uncorrectable non-fatal
|
||||
0x00000800 Platform Uncorrectable fatal
|
||||
================ ===================================
|
||||
0x00000001 Processor Correctable
|
||||
0x00000002 Processor Uncorrectable non-fatal
|
||||
0x00000004 Processor Uncorrectable fatal
|
||||
0x00000008 Memory Correctable
|
||||
0x00000010 Memory Uncorrectable non-fatal
|
||||
0x00000020 Memory Uncorrectable fatal
|
||||
0x00000040 PCI Express Correctable
|
||||
0x00000080 PCI Express Uncorrectable fatal
|
||||
0x00000100 PCI Express Uncorrectable non-fatal
|
||||
0x00000200 Platform Correctable
|
||||
0x00000400 Platform Uncorrectable non-fatal
|
||||
0x00000800 Platform Uncorrectable fatal
|
||||
================ ===================================
|
||||
|
||||
The format of the file contents are as above, except present are only
|
||||
the available error types.
|
||||
|
@ -73,9 +78,12 @@ The following files belong to it:
|
|||
injection. Value is a bitmask as specified in ACPI5.0 spec for the
|
||||
SET_ERROR_TYPE_WITH_ADDRESS data structure:
|
||||
|
||||
Bit 0 - Processor APIC field valid (see param3 below).
|
||||
Bit 1 - Memory address and mask valid (param1 and param2).
|
||||
Bit 2 - PCIe (seg,bus,dev,fn) valid (see param4 below).
|
||||
Bit 0
|
||||
Processor APIC field valid (see param3 below).
|
||||
Bit 1
|
||||
Memory address and mask valid (param1 and param2).
|
||||
Bit 2
|
||||
PCIe (seg,bus,dev,fn) valid (see param4 below).
|
||||
|
||||
If set to zero, legacy behavior is mimicked where the type of
|
||||
injection specifies just one bit set, and param1 is multiplexed.
|
||||
|
@ -121,7 +129,7 @@ BIOS versions based on the ACPI 5.0 specification have more control over
|
|||
the target of the injection. For processor-related errors (type 0x1, 0x2
|
||||
and 0x4), you can set flags to 0x3 (param3 for bit 0, and param1 and
|
||||
param2 for bit 1) so that you have more information added to the error
|
||||
signature being injected. The actual data passed is this:
|
||||
signature being injected. The actual data passed is this::
|
||||
|
||||
memory_address = param1;
|
||||
memory_address_range = param2;
|
||||
|
@ -131,7 +139,7 @@ signature being injected. The actual data passed is this:
|
|||
For memory errors (type 0x8, 0x10 and 0x20) the address is set using
|
||||
param1 with a mask in param2 (0x0 is equivalent to all ones). For PCI
|
||||
express errors (type 0x40, 0x80 and 0x100) the segment, bus, device and
|
||||
function are specified using param1:
|
||||
function are specified using param1::
|
||||
|
||||
31 24 23 16 15 11 10 8 7 0
|
||||
+-------------------------------------------------+
|
||||
|
@ -152,26 +160,26 @@ documentation for details (and expect changes to this API if vendors
|
|||
creativity in using this feature expands beyond our expectations).
|
||||
|
||||
|
||||
An error injection example:
|
||||
An error injection example::
|
||||
|
||||
# cd /sys/kernel/debug/apei/einj
|
||||
# cat available_error_type # See which errors can be injected
|
||||
0x00000002 Processor Uncorrectable non-fatal
|
||||
0x00000008 Memory Correctable
|
||||
0x00000010 Memory Uncorrectable non-fatal
|
||||
# echo 0x12345000 > param1 # Set memory address for injection
|
||||
# echo $((-1 << 12)) > param2 # Mask 0xfffffffffffff000 - anywhere in this page
|
||||
# echo 0x8 > error_type # Choose correctable memory error
|
||||
# echo 1 > error_inject # Inject now
|
||||
# cd /sys/kernel/debug/apei/einj
|
||||
# cat available_error_type # See which errors can be injected
|
||||
0x00000002 Processor Uncorrectable non-fatal
|
||||
0x00000008 Memory Correctable
|
||||
0x00000010 Memory Uncorrectable non-fatal
|
||||
# echo 0x12345000 > param1 # Set memory address for injection
|
||||
# echo $((-1 << 12)) > param2 # Mask 0xfffffffffffff000 - anywhere in this page
|
||||
# echo 0x8 > error_type # Choose correctable memory error
|
||||
# echo 1 > error_inject # Inject now
|
||||
|
||||
You should see something like this in dmesg:
|
||||
You should see something like this in dmesg::
|
||||
|
||||
[22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
|
||||
[22715.834759] EDAC sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
|
||||
[22715.834759] EDAC sbridge MC3: TSC 0
|
||||
[22715.834759] EDAC sbridge MC3: ADDR 12345000 EDAC sbridge MC3: MISC 144780c86
|
||||
[22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
|
||||
[22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 - area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
|
||||
[22715.830801] EDAC sbridge MC3: HANDLING MCE MEMORY ERROR
|
||||
[22715.834759] EDAC sbridge MC3: CPU 0: Machine Check Event: 0 Bank 7: 8c00004000010090
|
||||
[22715.834759] EDAC sbridge MC3: TSC 0
|
||||
[22715.834759] EDAC sbridge MC3: ADDR 12345000 EDAC sbridge MC3: MISC 144780c86
|
||||
[22715.834759] EDAC sbridge MC3: PROCESSOR 0:306e7 TIME 1422553404 SOCKET 0 APIC 0
|
||||
[22716.616173] EDAC MC3: 1 CE memory read error on CPU_SrcID#0_Channel#0_DIMM#0 (channel:0 slot:0 page:0x12345 offset:0x0 grain:32 syndrome:0x0 - area:DRAM err_code:0001:0090 socket:0 channel_mask:1 rank:0)
|
||||
|
||||
For more information about EINJ, please refer to ACPI specification
|
||||
version 4.0, section 17.5 and ACPI 5.0, section 18.6.
|
|
@ -0,0 +1,150 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==================
|
||||
APEI output format
|
||||
==================
|
||||
|
||||
APEI uses printk as hardware error reporting interface, the output
|
||||
format is as follow::
|
||||
|
||||
<error record> :=
|
||||
APEI generic hardware error status
|
||||
severity: <integer>, <severity string>
|
||||
section: <integer>, severity: <integer>, <severity string>
|
||||
flags: <integer>
|
||||
<section flags strings>
|
||||
fru_id: <uuid string>
|
||||
fru_text: <string>
|
||||
section_type: <section type string>
|
||||
<section data>
|
||||
|
||||
<severity string>* := recoverable | fatal | corrected | info
|
||||
|
||||
<section flags strings># :=
|
||||
[primary][, containment warning][, reset][, threshold exceeded]\
|
||||
[, resource not accessible][, latent error]
|
||||
|
||||
<section type string> := generic processor error | memory error | \
|
||||
PCIe error | unknown, <uuid string>
|
||||
|
||||
<section data> :=
|
||||
<generic processor section data> | <memory section data> | \
|
||||
<pcie section data> | <null>
|
||||
|
||||
<generic processor section data> :=
|
||||
[processor_type: <integer>, <proc type string>]
|
||||
[processor_isa: <integer>, <proc isa string>]
|
||||
[error_type: <integer>
|
||||
<proc error type strings>]
|
||||
[operation: <integer>, <proc operation string>]
|
||||
[flags: <integer>
|
||||
<proc flags strings>]
|
||||
[level: <integer>]
|
||||
[version_info: <integer>]
|
||||
[processor_id: <integer>]
|
||||
[target_address: <integer>]
|
||||
[requestor_id: <integer>]
|
||||
[responder_id: <integer>]
|
||||
[IP: <integer>]
|
||||
|
||||
<proc type string>* := IA32/X64 | IA64
|
||||
|
||||
<proc isa string>* := IA32 | IA64 | X64
|
||||
|
||||
<processor error type strings># :=
|
||||
[cache error][, TLB error][, bus error][, micro-architectural error]
|
||||
|
||||
<proc operation string>* := unknown or generic | data read | data write | \
|
||||
instruction execution
|
||||
|
||||
<proc flags strings># :=
|
||||
[restartable][, precise IP][, overflow][, corrected]
|
||||
|
||||
<memory section data> :=
|
||||
[error_status: <integer>]
|
||||
[physical_address: <integer>]
|
||||
[physical_address_mask: <integer>]
|
||||
[node: <integer>]
|
||||
[card: <integer>]
|
||||
[module: <integer>]
|
||||
[bank: <integer>]
|
||||
[device: <integer>]
|
||||
[row: <integer>]
|
||||
[column: <integer>]
|
||||
[bit_position: <integer>]
|
||||
[requestor_id: <integer>]
|
||||
[responder_id: <integer>]
|
||||
[target_id: <integer>]
|
||||
[error_type: <integer>, <mem error type string>]
|
||||
|
||||
<mem error type string>* :=
|
||||
unknown | no error | single-bit ECC | multi-bit ECC | \
|
||||
single-symbol chipkill ECC | multi-symbol chipkill ECC | master abort | \
|
||||
target abort | parity error | watchdog timeout | invalid address | \
|
||||
mirror Broken | memory sparing | scrub corrected error | \
|
||||
scrub uncorrected error
|
||||
|
||||
<pcie section data> :=
|
||||
[port_type: <integer>, <pcie port type string>]
|
||||
[version: <integer>.<integer>]
|
||||
[command: <integer>, status: <integer>]
|
||||
[device_id: <integer>:<integer>:<integer>.<integer>
|
||||
slot: <integer>
|
||||
secondary_bus: <integer>
|
||||
vendor_id: <integer>, device_id: <integer>
|
||||
class_code: <integer>]
|
||||
[serial number: <integer>, <integer>]
|
||||
[bridge: secondary_status: <integer>, control: <integer>]
|
||||
[aer_status: <integer>, aer_mask: <integer>
|
||||
<aer status string>
|
||||
[aer_uncor_severity: <integer>]
|
||||
aer_layer=<aer layer string>, aer_agent=<aer agent string>
|
||||
aer_tlp_header: <integer> <integer> <integer> <integer>]
|
||||
|
||||
<pcie port type string>* := PCIe end point | legacy PCI end point | \
|
||||
unknown | unknown | root port | upstream switch port | \
|
||||
downstream switch port | PCIe to PCI/PCI-X bridge | \
|
||||
PCI/PCI-X to PCIe bridge | root complex integrated endpoint device | \
|
||||
root complex event collector
|
||||
|
||||
if section severity is fatal or recoverable
|
||||
<aer status string># :=
|
||||
unknown | unknown | unknown | unknown | Data Link Protocol | \
|
||||
unknown | unknown | unknown | unknown | unknown | unknown | unknown | \
|
||||
Poisoned TLP | Flow Control Protocol | Completion Timeout | \
|
||||
Completer Abort | Unexpected Completion | Receiver Overflow | \
|
||||
Malformed TLP | ECRC | Unsupported Request
|
||||
else
|
||||
<aer status string># :=
|
||||
Receiver Error | unknown | unknown | unknown | unknown | unknown | \
|
||||
Bad TLP | Bad DLLP | RELAY_NUM Rollover | unknown | unknown | unknown | \
|
||||
Replay Timer Timeout | Advisory Non-Fatal
|
||||
fi
|
||||
|
||||
<aer layer string> :=
|
||||
Physical Layer | Data Link Layer | Transaction Layer
|
||||
|
||||
<aer agent string> :=
|
||||
Receiver ID | Requester ID | Completer ID | Transmitter ID
|
||||
|
||||
Where, [] designate corresponding content is optional
|
||||
|
||||
All <field string> description with * has the following format::
|
||||
|
||||
field: <integer>, <field string>
|
||||
|
||||
Where value of <integer> should be the position of "string" in <field
|
||||
string> description. Otherwise, <field string> will be "unknown".
|
||||
|
||||
All <field strings> description with # has the following format::
|
||||
|
||||
field: <integer>
|
||||
<field strings>
|
||||
|
||||
Where each string in <fields strings> corresponding to one set bit of
|
||||
<integer>. The bit position is the position of "string" in <field
|
||||
strings> description.
|
||||
|
||||
For more detailed explanation of every field, please refer to UEFI
|
||||
specification version 2.3 or later, section Appendix N: Common
|
||||
Platform Error Record.
|
|
@ -1,18 +1,21 @@
|
|||
ACPI Debug Output
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=================
|
||||
ACPI Debug Output
|
||||
=================
|
||||
|
||||
The ACPI CA, the Linux ACPI core, and some ACPI drivers can generate debug
|
||||
output. This document describes how to use this facility.
|
||||
|
||||
Compile-time configuration
|
||||
--------------------------
|
||||
==========================
|
||||
|
||||
ACPI debug output is globally enabled by CONFIG_ACPI_DEBUG. If this config
|
||||
option is turned off, the debug messages are not even built into the
|
||||
kernel.
|
||||
|
||||
Boot- and run-time configuration
|
||||
--------------------------------
|
||||
================================
|
||||
|
||||
When CONFIG_ACPI_DEBUG=y, you can select the component and level of messages
|
||||
you're interested in. At boot-time, use the acpi.debug_layer and
|
||||
|
@ -21,7 +24,7 @@ debug_layer and debug_level files in /sys/module/acpi/parameters/ to control
|
|||
the debug messages.
|
||||
|
||||
debug_layer (component)
|
||||
-----------------------
|
||||
=======================
|
||||
|
||||
The "debug_layer" is a mask that selects components of interest, e.g., a
|
||||
specific driver or part of the ACPI interpreter. To build the debug_layer
|
||||
|
@ -33,7 +36,7 @@ to /sys/module/acpi/parameters/debug_layer.
|
|||
|
||||
The possible components are defined in include/acpi/acoutput.h and
|
||||
include/acpi/acpi_drivers.h. Reading /sys/module/acpi/parameters/debug_layer
|
||||
shows the supported mask values, currently these:
|
||||
shows the supported mask values, currently these::
|
||||
|
||||
ACPI_UTILITIES 0x00000001
|
||||
ACPI_HARDWARE 0x00000002
|
||||
|
@ -65,7 +68,7 @@ shows the supported mask values, currently these:
|
|||
ACPI_PROCESSOR_COMPONENT 0x20000000
|
||||
|
||||
debug_level
|
||||
-----------
|
||||
===========
|
||||
|
||||
The "debug_level" is a mask that selects different types of messages, e.g.,
|
||||
those related to initialization, method execution, informational messages, etc.
|
||||
|
@ -81,7 +84,7 @@ to /sys/module/acpi/parameters/debug_level.
|
|||
|
||||
The possible levels are defined in include/acpi/acoutput.h. Reading
|
||||
/sys/module/acpi/parameters/debug_level shows the supported mask values,
|
||||
currently these:
|
||||
currently these::
|
||||
|
||||
ACPI_LV_INIT 0x00000001
|
||||
ACPI_LV_DEBUG_OBJECT 0x00000002
|
||||
|
@ -113,9 +116,9 @@ currently these:
|
|||
ACPI_LV_EVENTS 0x80000000
|
||||
|
||||
Examples
|
||||
--------
|
||||
========
|
||||
|
||||
For example, drivers/acpi/bus.c contains this:
|
||||
For example, drivers/acpi/bus.c contains this::
|
||||
|
||||
#define _COMPONENT ACPI_BUS_COMPONENT
|
||||
...
|
||||
|
@ -127,22 +130,22 @@ statement uses ACPI_DB_INFO, which is macro based on the ACPI_LV_INFO
|
|||
definition.)
|
||||
|
||||
Enable all AML "Debug" output (stores to the Debug object while interpreting
|
||||
AML) during boot:
|
||||
AML) during boot::
|
||||
|
||||
acpi.debug_layer=0xffffffff acpi.debug_level=0x2
|
||||
|
||||
Enable PCI and PCI interrupt routing debug messages:
|
||||
Enable PCI and PCI interrupt routing debug messages::
|
||||
|
||||
acpi.debug_layer=0x400000 acpi.debug_level=0x4
|
||||
|
||||
Enable all ACPI hardware-related messages:
|
||||
Enable all ACPI hardware-related messages::
|
||||
|
||||
acpi.debug_layer=0x2 acpi.debug_level=0xffffffff
|
||||
|
||||
Enable all ACPI_DB_INFO messages after boot:
|
||||
Enable all ACPI_DB_INFO messages after boot::
|
||||
|
||||
# echo 0x4 > /sys/module/acpi/parameters/debug_level
|
||||
|
||||
Show all valid component values:
|
||||
Show all valid component values::
|
||||
|
||||
# cat /sys/module/acpi/parameters/debug_layer
|
|
@ -1,9 +1,12 @@
|
|||
Copyright (C) 2018 Intel Corporation
|
||||
Author: Sakari Ailus <sakari.ailus@linux.intel.com>
|
||||
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
===================================
|
||||
Referencing hierarchical data nodes
|
||||
-----------------------------------
|
||||
===================================
|
||||
|
||||
:Copyright: |copy| 2018 Intel Corporation
|
||||
:Author: Sakari Ailus <sakari.ailus@linux.intel.com>
|
||||
|
||||
ACPI in general allows referring to device objects in the tree only.
|
||||
Hierarchical data extension nodes may not be referred to directly, hence this
|
||||
|
@ -28,13 +31,14 @@ extension key.
|
|||
|
||||
|
||||
Example
|
||||
-------
|
||||
=======
|
||||
|
||||
In the ASL snippet below, the "reference" _DSD property [2] contains a
|
||||
device object reference to DEV0 and under that device object, a
|
||||
hierarchical data extension key "node@1" referring to the NOD1 object
|
||||
and lastly, a hierarchical data extension key "anothernode" referring to
|
||||
the ANOD object which is also the final target node of the reference.
|
||||
In the ASL snippet below, the "reference" _DSD property [2] contains a
|
||||
device object reference to DEV0 and under that device object, a
|
||||
hierarchical data extension key "node@1" referring to the NOD1 object
|
||||
and lastly, a hierarchical data extension key "anothernode" referring to
|
||||
the ANOD object which is also the final target node of the reference.
|
||||
::
|
||||
|
||||
Device (DEV0)
|
||||
{
|
||||
|
@ -75,15 +79,15 @@ Example
|
|||
})
|
||||
}
|
||||
|
||||
Please also see a graph example in graph.txt .
|
||||
Please also see a graph example in :doc:`graph`.
|
||||
|
||||
References
|
||||
----------
|
||||
==========
|
||||
|
||||
[1] Hierarchical Data Extension UUID For _DSD.
|
||||
<URL:http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
|
||||
referenced 2018-07-17.
|
||||
<http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
|
||||
referenced 2018-07-17.
|
||||
|
||||
[2] Device Properties UUID For _DSD.
|
||||
<URL:http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf>,
|
||||
referenced 2016-10-04.
|
||||
<http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf>,
|
||||
referenced 2016-10-04.
|
|
@ -1,8 +1,11 @@
|
|||
Graphs
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
======
|
||||
Graphs
|
||||
======
|
||||
|
||||
_DSD
|
||||
----
|
||||
====
|
||||
|
||||
_DSD (Device Specific Data) [7] is a predefined ACPI device
|
||||
configuration object that can be used to convey information on
|
||||
|
@ -30,7 +33,7 @@ hierarchical data extension array on each depth.
|
|||
|
||||
|
||||
Ports and endpoints
|
||||
-------------------
|
||||
===================
|
||||
|
||||
The port and endpoint concepts are very similar to those in Devicetree
|
||||
[3]. A port represents an interface in a device, and an endpoint
|
||||
|
@ -38,9 +41,9 @@ represents a connection to that interface.
|
|||
|
||||
All port nodes are located under the device's "_DSD" node in the hierarchical
|
||||
data extension tree. The data extension related to each port node must begin
|
||||
with "port" and must be followed by the "@" character and the number of the port
|
||||
as its key. The target object it refers to should be called "PRTX", where "X" is
|
||||
the number of the port. An example of such a package would be:
|
||||
with "port" and must be followed by the "@" character and the number of the
|
||||
port as its key. The target object it refers to should be called "PRTX", where
|
||||
"X" is the number of the port. An example of such a package would be::
|
||||
|
||||
Package() { "port@4", PRT4 }
|
||||
|
||||
|
@ -49,7 +52,7 @@ data extension key of the endpoint nodes must begin with
|
|||
"endpoint" and must be followed by the "@" character and the number of the
|
||||
endpoint. The object it refers to should be called "EPXY", where "X" is the
|
||||
number of the port and "Y" is the number of the endpoint. An example of such a
|
||||
package would be:
|
||||
package would be::
|
||||
|
||||
Package() { "endpoint@0", EP40 }
|
||||
|
||||
|
@ -62,85 +65,85 @@ of that port shall be zero. Similarly, if a port may only have a single
|
|||
endpoint, the number of that endpoint shall be zero.
|
||||
|
||||
The endpoint reference uses property extension with "remote-endpoint" property
|
||||
name followed by a reference in the same package. Such references consist of the
|
||||
name followed by a reference in the same package. Such references consist of
|
||||
the remote device reference, the first package entry of the port data extension
|
||||
reference under the device and finally the first package entry of the endpoint
|
||||
data extension reference under the port. Individual references thus appear as:
|
||||
data extension reference under the port. Individual references thus appear as::
|
||||
|
||||
Package() { device, "port@X", "endpoint@Y" }
|
||||
|
||||
In the above example, "X" is the number of the port and "Y" is the number of the
|
||||
endpoint.
|
||||
In the above example, "X" is the number of the port and "Y" is the number of
|
||||
the endpoint.
|
||||
|
||||
The references to endpoints must be always done both ways, to the
|
||||
remote endpoint and back from the referred remote endpoint node.
|
||||
|
||||
A simple example of this is show below:
|
||||
A simple example of this is show below::
|
||||
|
||||
Scope (\_SB.PCI0.I2C2)
|
||||
{
|
||||
Device (CAM0)
|
||||
{
|
||||
Name (_DSD, Package () {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "compatible", Package () { "nokia,smia" } },
|
||||
},
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () { "port@0", PRT0 },
|
||||
}
|
||||
})
|
||||
Name (PRT0, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "reg", 0 },
|
||||
},
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () { "endpoint@0", EP00 },
|
||||
}
|
||||
})
|
||||
Name (EP00, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "reg", 0 },
|
||||
Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } },
|
||||
}
|
||||
})
|
||||
}
|
||||
Device (CAM0)
|
||||
{
|
||||
Name (_DSD, Package () {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "compatible", Package () { "nokia,smia" } },
|
||||
},
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () { "port@0", PRT0 },
|
||||
}
|
||||
})
|
||||
Name (PRT0, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "reg", 0 },
|
||||
},
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () { "endpoint@0", EP00 },
|
||||
}
|
||||
})
|
||||
Name (EP00, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "reg", 0 },
|
||||
Package () { "remote-endpoint", Package() { \_SB.PCI0.ISP, "port@4", "endpoint@0" } },
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Scope (\_SB.PCI0)
|
||||
{
|
||||
Device (ISP)
|
||||
{
|
||||
Name (_DSD, Package () {
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () { "port@4", PRT4 },
|
||||
}
|
||||
})
|
||||
Device (ISP)
|
||||
{
|
||||
Name (_DSD, Package () {
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () { "port@4", PRT4 },
|
||||
}
|
||||
})
|
||||
|
||||
Name (PRT4, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "reg", 4 }, /* CSI-2 port number */
|
||||
},
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () { "endpoint@0", EP40 },
|
||||
}
|
||||
})
|
||||
Name (PRT4, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "reg", 4 }, /* CSI-2 port number */
|
||||
},
|
||||
ToUUID("dbb8e3e6-5886-4ba6-8795-1319f52a966b"),
|
||||
Package () {
|
||||
Package () { "endpoint@0", EP40 },
|
||||
}
|
||||
})
|
||||
|
||||
Name (EP40, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "reg", 0 },
|
||||
Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } },
|
||||
}
|
||||
})
|
||||
}
|
||||
Name (EP40, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package () { "reg", 0 },
|
||||
Package () { "remote-endpoint", Package () { \_SB.PCI0.I2C2.CAM0, "port@0", "endpoint@0" } },
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Here, the port 0 of the "CAM0" device is connected to the port 4 of
|
||||
|
@ -148,27 +151,27 @@ the "ISP" device and vice versa.
|
|||
|
||||
|
||||
References
|
||||
----------
|
||||
==========
|
||||
|
||||
[1] _DSD (Device Specific Data) Implementation Guide.
|
||||
<URL:http://www.uefi.org/sites/default/files/resources/_DSD-implementation-guide-toplevel-1_1.htm>,
|
||||
http://www.uefi.org/sites/default/files/resources/_DSD-implementation-guide-toplevel-1_1.htm,
|
||||
referenced 2016-10-03.
|
||||
|
||||
[2] Devicetree. <URL:http://www.devicetree.org>, referenced 2016-10-03.
|
||||
[2] Devicetree. http://www.devicetree.org, referenced 2016-10-03.
|
||||
|
||||
[3] Documentation/devicetree/bindings/graph.txt
|
||||
|
||||
[4] Device Properties UUID For _DSD.
|
||||
<URL:http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf>,
|
||||
http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf,
|
||||
referenced 2016-10-04.
|
||||
|
||||
[5] Hierarchical Data Extension UUID For _DSD.
|
||||
<URL:http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf>,
|
||||
http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.1.pdf,
|
||||
referenced 2016-10-04.
|
||||
|
||||
[6] Advanced Configuration and Power Interface Specification.
|
||||
<URL:http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf>,
|
||||
http://www.uefi.org/sites/default/files/resources/ACPI_6_1.pdf,
|
||||
referenced 2016-10-04.
|
||||
|
||||
[7] _DSD Device Properties Usage Rules.
|
||||
Documentation/acpi/DSD-properties-rules.txt
|
||||
:doc:`../DSD-properties-rules`
|
|
@ -1,5 +1,9 @@
|
|||
ACPI based device enumeration
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=============================
|
||||
ACPI Based Device Enumeration
|
||||
=============================
|
||||
|
||||
ACPI 5 introduced a set of new resources (UartTSerialBus, I2cSerialBus,
|
||||
SpiSerialBus, GpioIo and GpioInt) which can be used in enumerating slave
|
||||
devices behind serial bus controllers.
|
||||
|
@ -11,12 +15,12 @@ that are accessed through memory-mapped registers.
|
|||
In order to support this and re-use the existing drivers as much as
|
||||
possible we decided to do following:
|
||||
|
||||
o Devices that have no bus connector resource are represented as
|
||||
platform devices.
|
||||
- Devices that have no bus connector resource are represented as
|
||||
platform devices.
|
||||
|
||||
o Devices behind real busses where there is a connector resource
|
||||
are represented as struct spi_device or struct i2c_device
|
||||
(standard UARTs are not busses so there is no struct uart_device).
|
||||
- Devices behind real busses where there is a connector resource
|
||||
are represented as struct spi_device or struct i2c_device
|
||||
(standard UARTs are not busses so there is no struct uart_device).
|
||||
|
||||
As both ACPI and Device Tree represent a tree of devices (and their
|
||||
resources) this implementation follows the Device Tree way as much as
|
||||
|
@ -31,7 +35,8 @@ enumerated from ACPI namespace. This handle can be used to extract other
|
|||
device-specific configuration. There is an example of this below.
|
||||
|
||||
Platform bus support
|
||||
~~~~~~~~~~~~~~~~~~~~
|
||||
====================
|
||||
|
||||
Since we are using platform devices to represent devices that are not
|
||||
connected to any physical bus we only need to implement a platform driver
|
||||
for the device and add supported ACPI IDs. If this same IP-block is used on
|
||||
|
@ -39,7 +44,7 @@ some other non-ACPI platform, the driver might work out of the box or needs
|
|||
some minor changes.
|
||||
|
||||
Adding ACPI support for an existing driver should be pretty
|
||||
straightforward. Here is the simplest example:
|
||||
straightforward. Here is the simplest example::
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
static const struct acpi_device_id mydrv_acpi_match[] = {
|
||||
|
@ -61,12 +66,13 @@ configuring GPIOs it can get its ACPI handle and extract this information
|
|||
from ACPI tables.
|
||||
|
||||
DMA support
|
||||
~~~~~~~~~~~
|
||||
===========
|
||||
|
||||
DMA controllers enumerated via ACPI should be registered in the system to
|
||||
provide generic access to their resources. For example, a driver that would
|
||||
like to be accessible to slave devices via generic API call
|
||||
dma_request_slave_channel() must register itself at the end of the probe
|
||||
function like this:
|
||||
function like this::
|
||||
|
||||
err = devm_acpi_dma_controller_register(dev, xlate_func, dw);
|
||||
/* Handle the error if it's not a case of !CONFIG_ACPI */
|
||||
|
@ -74,7 +80,7 @@ function like this:
|
|||
and implement custom xlate function if needed (usually acpi_dma_simple_xlate()
|
||||
is enough) which converts the FixedDMA resource provided by struct
|
||||
acpi_dma_spec into the corresponding DMA channel. A piece of code for that case
|
||||
could look like:
|
||||
could look like::
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
struct filter_args {
|
||||
|
@ -114,7 +120,7 @@ provided by struct acpi_dma.
|
|||
Clients must call dma_request_slave_channel() with the string parameter that
|
||||
corresponds to a specific FixedDMA resource. By default "tx" means the first
|
||||
entry of the FixedDMA resource array, "rx" means the second entry. The table
|
||||
below shows a layout:
|
||||
below shows a layout::
|
||||
|
||||
Device (I2C0)
|
||||
{
|
||||
|
@ -138,12 +144,13 @@ acpi_dma_request_slave_chan_by_index() directly and therefore choose the
|
|||
specific FixedDMA resource by its index.
|
||||
|
||||
SPI serial bus support
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
======================
|
||||
|
||||
Slave devices behind SPI bus have SpiSerialBus resource attached to them.
|
||||
This is extracted automatically by the SPI core and the slave devices are
|
||||
enumerated once spi_register_master() is called by the bus driver.
|
||||
|
||||
Here is what the ACPI namespace for a SPI slave might look like:
|
||||
Here is what the ACPI namespace for a SPI slave might look like::
|
||||
|
||||
Device (EEP0)
|
||||
{
|
||||
|
@ -163,7 +170,7 @@ Here is what the ACPI namespace for a SPI slave might look like:
|
|||
|
||||
The SPI device drivers only need to add ACPI IDs in a similar way than with
|
||||
the platform device drivers. Below is an example where we add ACPI support
|
||||
to at25 SPI eeprom driver (this is meant for the above ACPI snippet):
|
||||
to at25 SPI eeprom driver (this is meant for the above ACPI snippet)::
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
static const struct acpi_device_id at25_acpi_match[] = {
|
||||
|
@ -182,7 +189,7 @@ to at25 SPI eeprom driver (this is meant for the above ACPI snippet):
|
|||
|
||||
Note that this driver actually needs more information like page size of the
|
||||
eeprom etc. but at the time writing this there is no standard way of
|
||||
passing those. One idea is to return this in _DSM method like:
|
||||
passing those. One idea is to return this in _DSM method like::
|
||||
|
||||
Device (EEP0)
|
||||
{
|
||||
|
@ -202,7 +209,7 @@ passing those. One idea is to return this in _DSM method like:
|
|||
}
|
||||
|
||||
Then the at25 SPI driver can get this configuration by calling _DSM on its
|
||||
ACPI handle like:
|
||||
ACPI handle like::
|
||||
|
||||
struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
|
||||
struct acpi_object_list input;
|
||||
|
@ -220,14 +227,15 @@ ACPI handle like:
|
|||
kfree(output.pointer);
|
||||
|
||||
I2C serial bus support
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
======================
|
||||
|
||||
The slaves behind I2C bus controller only need to add the ACPI IDs like
|
||||
with the platform and SPI drivers. The I2C core automatically enumerates
|
||||
any slave devices behind the controller device once the adapter is
|
||||
registered.
|
||||
|
||||
Below is an example of how to add ACPI support to the existing mpu3050
|
||||
input driver:
|
||||
input driver::
|
||||
|
||||
#ifdef CONFIG_ACPI
|
||||
static const struct acpi_device_id mpu3050_acpi_match[] = {
|
||||
|
@ -251,56 +259,57 @@ input driver:
|
|||
};
|
||||
|
||||
GPIO support
|
||||
~~~~~~~~~~~~
|
||||
============
|
||||
|
||||
ACPI 5 introduced two new resources to describe GPIO connections: GpioIo
|
||||
and GpioInt. These resources can be used to pass GPIO numbers used by
|
||||
the device to the driver. ACPI 5.1 extended this with _DSD (Device
|
||||
Specific Data) which made it possible to name the GPIOs among other things.
|
||||
|
||||
For example:
|
||||
For example::
|
||||
|
||||
Device (DEV)
|
||||
{
|
||||
Method (_CRS, 0, NotSerialized)
|
||||
Device (DEV)
|
||||
{
|
||||
Name (SBUF, ResourceTemplate()
|
||||
Method (_CRS, 0, NotSerialized)
|
||||
{
|
||||
...
|
||||
// Used to power on/off the device
|
||||
GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
|
||||
IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
|
||||
0x00, ResourceConsumer,,)
|
||||
Name (SBUF, ResourceTemplate()
|
||||
{
|
||||
// Pin List
|
||||
0x0055
|
||||
...
|
||||
// Used to power on/off the device
|
||||
GpioIo (Exclusive, PullDefault, 0x0000, 0x0000,
|
||||
IoRestrictionOutputOnly, "\\_SB.PCI0.GPI0",
|
||||
0x00, ResourceConsumer,,)
|
||||
{
|
||||
// Pin List
|
||||
0x0055
|
||||
}
|
||||
|
||||
// Interrupt for the device
|
||||
GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
|
||||
0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
|
||||
{
|
||||
// Pin list
|
||||
0x0058
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
}
|
||||
|
||||
// Interrupt for the device
|
||||
GpioInt (Edge, ActiveHigh, ExclusiveAndWake, PullNone,
|
||||
0x0000, "\\_SB.PCI0.GPI0", 0x00, ResourceConsumer,,)
|
||||
{
|
||||
// Pin list
|
||||
0x0058
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
Return (SBUF)
|
||||
}
|
||||
|
||||
Return (SBUF)
|
||||
}
|
||||
|
||||
// ACPI 5.1 _DSD used for naming the GPIOs
|
||||
Name (_DSD, Package ()
|
||||
{
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package ()
|
||||
// ACPI 5.1 _DSD used for naming the GPIOs
|
||||
Name (_DSD, Package ()
|
||||
{
|
||||
Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
|
||||
Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
|
||||
}
|
||||
})
|
||||
...
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package ()
|
||||
{
|
||||
Package () {"power-gpios", Package() {^DEV, 0, 0, 0 }},
|
||||
Package () {"irq-gpios", Package() {^DEV, 1, 0, 0 }},
|
||||
}
|
||||
})
|
||||
...
|
||||
|
||||
These GPIO numbers are controller relative and path "\\_SB.PCI0.GPI0"
|
||||
specifies the path to the controller. In order to use these GPIOs in Linux
|
||||
|
@ -310,7 +319,7 @@ There is a standard GPIO API for that and is documented in
|
|||
Documentation/gpio/.
|
||||
|
||||
In the above example we can get the corresponding two GPIO descriptors with
|
||||
a code like this:
|
||||
a code like this::
|
||||
|
||||
#include <linux/gpio/consumer.h>
|
||||
...
|
||||
|
@ -334,21 +343,22 @@ See Documentation/acpi/gpio-properties.txt for more information about the
|
|||
_DSD binding related to GPIOs.
|
||||
|
||||
MFD devices
|
||||
~~~~~~~~~~~
|
||||
===========
|
||||
|
||||
The MFD devices register their children as platform devices. For the child
|
||||
devices there needs to be an ACPI handle that they can use to reference
|
||||
parts of the ACPI namespace that relate to them. In the Linux MFD subsystem
|
||||
we provide two ways:
|
||||
|
||||
o The children share the parent ACPI handle.
|
||||
o The MFD cell can specify the ACPI id of the device.
|
||||
- The children share the parent ACPI handle.
|
||||
- The MFD cell can specify the ACPI id of the device.
|
||||
|
||||
For the first case, the MFD drivers do not need to do anything. The
|
||||
resulting child platform device will have its ACPI_COMPANION() set to point
|
||||
to the parent device.
|
||||
|
||||
If the ACPI namespace has a device that we can match using an ACPI id or ACPI
|
||||
adr, the cell should be set like:
|
||||
adr, the cell should be set like::
|
||||
|
||||
static struct mfd_cell_acpi_match my_subdevice_cell_acpi_match = {
|
||||
.pnpid = "XYZ0001",
|
||||
|
@ -366,7 +376,8 @@ the MFD device and if found, that ACPI companion device is bound to the
|
|||
resulting child platform device.
|
||||
|
||||
Device Tree namespace link device ID
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
====================================
|
||||
|
||||
The Device Tree protocol uses device identification based on the "compatible"
|
||||
property whose value is a string or an array of strings recognized as device
|
||||
identifiers by drivers and the driver core. The set of all those strings may be
|
||||
|
@ -410,6 +421,32 @@ Specifically, the device IDs returned by _HID and preceding PRP0001 in the _CID
|
|||
return package will be checked first. Also in that case the bus type the device
|
||||
will be enumerated to depends on the device ID returned by _HID.
|
||||
|
||||
For example, the following ACPI sample might be used to enumerate an lm75-type
|
||||
I2C temperature sensor and match it to the driver using the Device Tree
|
||||
namespace link:
|
||||
|
||||
Device (TMP0)
|
||||
{
|
||||
Name (_HID, "PRP0001")
|
||||
Name (_DSD, Package() {
|
||||
ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
|
||||
Package () {
|
||||
Package (2) { "compatible", "ti,tmp75" },
|
||||
}
|
||||
})
|
||||
Method (_CRS, 0, Serialized)
|
||||
{
|
||||
Name (SBUF, ResourceTemplate ()
|
||||
{
|
||||
I2cSerialBusV2 (0x48, ControllerInitiated,
|
||||
400000, AddressingMode7Bit,
|
||||
"\\_SB.PCI0.I2C1", 0x00,
|
||||
ResourceConsumer, , Exclusive,)
|
||||
})
|
||||
Return (SBUF)
|
||||
}
|
||||
}
|
||||
|
||||
It is valid to define device objects with a _HID returning PRP0001 and without
|
||||
the "compatible" property in the _DSD or a _CID as long as one of their
|
||||
ancestors provides a _DSD with a valid "compatible" property. Such device
|
||||
|
@ -423,4 +460,4 @@ the _DSD of the device object itself or the _DSD of its ancestor in the
|
|||
Otherwise, the _DSD itself is regarded as invalid and therefore the "compatible"
|
||||
property returned by it is meaningless.
|
||||
|
||||
Refer to DSD-properties-rules.txt for more information.
|
||||
Refer to :doc:`DSD-properties-rules` for more information.
|
|
@ -1,5 +1,8 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
======================================
|
||||
_DSD Device Properties Related to GPIO
|
||||
--------------------------------------
|
||||
======================================
|
||||
|
||||
With the release of ACPI 5.1, the _DSD configuration object finally
|
||||
allows names to be given to GPIOs (and other things as well) returned
|
||||
|
@ -8,7 +11,7 @@ the corresponding GPIO, which is pretty error prone (it depends on
|
|||
the _CRS output ordering, for example).
|
||||
|
||||
With _DSD we can now query GPIOs using a name instead of an integer
|
||||
index, like the ASL example below shows:
|
||||
index, like the ASL example below shows::
|
||||
|
||||
// Bluetooth device with reset and shutdown GPIOs
|
||||
Device (BTH)
|
||||
|
@ -34,15 +37,19 @@ index, like the ASL example below shows:
|
|||
})
|
||||
}
|
||||
|
||||
The format of the supported GPIO property is:
|
||||
The format of the supported GPIO property is::
|
||||
|
||||
Package () { "name", Package () { ref, index, pin, active_low }}
|
||||
|
||||
ref - The device that has _CRS containing GpioIo()/GpioInt() resources,
|
||||
typically this is the device itself (BTH in our case).
|
||||
index - Index of the GpioIo()/GpioInt() resource in _CRS starting from zero.
|
||||
pin - Pin in the GpioIo()/GpioInt() resource. Typically this is zero.
|
||||
active_low - If 1 the GPIO is marked as active_low.
|
||||
ref
|
||||
The device that has _CRS containing GpioIo()/GpioInt() resources,
|
||||
typically this is the device itself (BTH in our case).
|
||||
index
|
||||
Index of the GpioIo()/GpioInt() resource in _CRS starting from zero.
|
||||
pin
|
||||
Pin in the GpioIo()/GpioInt() resource. Typically this is zero.
|
||||
active_low
|
||||
If 1 the GPIO is marked as active_low.
|
||||
|
||||
Since ACPI GpioIo() resource does not have a field saying whether it is
|
||||
active low or high, the "active_low" argument can be used here. Setting
|
||||
|
@ -55,7 +62,7 @@ It is possible to leave holes in the array of GPIOs. This is useful in
|
|||
cases like with SPI host controllers where some chip selects may be
|
||||
implemented as GPIOs and some as native signals. For example a SPI host
|
||||
controller can have chip selects 0 and 2 implemented as GPIOs and 1 as
|
||||
native:
|
||||
native::
|
||||
|
||||
Package () {
|
||||
"cs-gpios",
|
||||
|
@ -67,7 +74,7 @@ native:
|
|||
}
|
||||
|
||||
Other supported properties
|
||||
--------------------------
|
||||
==========================
|
||||
|
||||
Following Device Tree compatible device properties are also supported by
|
||||
_DSD device properties for GPIO controllers:
|
||||
|
@ -78,7 +85,7 @@ _DSD device properties for GPIO controllers:
|
|||
- input
|
||||
- line-name
|
||||
|
||||
Example:
|
||||
Example::
|
||||
|
||||
Name (_DSD, Package () {
|
||||
// _DSD Hierarchical Properties Extension UUID
|
||||
|
@ -100,7 +107,7 @@ Example:
|
|||
|
||||
- gpio-line-names
|
||||
|
||||
Example:
|
||||
Example::
|
||||
|
||||
Package () {
|
||||
"gpio-line-names",
|
||||
|
@ -114,7 +121,7 @@ See Documentation/devicetree/bindings/gpio/gpio.txt for more information
|
|||
about these properties.
|
||||
|
||||
ACPI GPIO Mappings Provided by Drivers
|
||||
--------------------------------------
|
||||
======================================
|
||||
|
||||
There are systems in which the ACPI tables do not contain _DSD but provide _CRS
|
||||
with GpioIo()/GpioInt() resources and device drivers still need to work with
|
||||
|
@ -139,16 +146,16 @@ line in that resource starting from zero, and the active-low flag for that line,
|
|||
respectively, in analogy with the _DSD GPIO property format specified above.
|
||||
|
||||
For the example Bluetooth device discussed previously the data structures in
|
||||
question would look like this:
|
||||
question would look like this::
|
||||
|
||||
static const struct acpi_gpio_params reset_gpio = { 1, 1, false };
|
||||
static const struct acpi_gpio_params shutdown_gpio = { 0, 0, false };
|
||||
static const struct acpi_gpio_params reset_gpio = { 1, 1, false };
|
||||
static const struct acpi_gpio_params shutdown_gpio = { 0, 0, false };
|
||||
|
||||
static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = {
|
||||
{ "reset-gpios", &reset_gpio, 1 },
|
||||
{ "shutdown-gpios", &shutdown_gpio, 1 },
|
||||
{ },
|
||||
};
|
||||
static const struct acpi_gpio_mapping bluetooth_acpi_gpios[] = {
|
||||
{ "reset-gpios", &reset_gpio, 1 },
|
||||
{ "shutdown-gpios", &shutdown_gpio, 1 },
|
||||
{ },
|
||||
};
|
||||
|
||||
Next, the mapping table needs to be passed as the second argument to
|
||||
acpi_dev_add_driver_gpios() that will register it with the ACPI device object
|
||||
|
@ -158,12 +165,12 @@ calling acpi_dev_remove_driver_gpios() on the ACPI device object where that
|
|||
table was previously registered.
|
||||
|
||||
Using the _CRS fallback
|
||||
-----------------------
|
||||
=======================
|
||||
|
||||
If a device does not have _DSD or the driver does not create ACPI GPIO
|
||||
mapping, the Linux GPIO framework refuses to return any GPIOs. This is
|
||||
because the driver does not know what it actually gets. For example if we
|
||||
have a device like below:
|
||||
have a device like below::
|
||||
|
||||
Device (BTH)
|
||||
{
|
||||
|
@ -177,7 +184,7 @@ have a device like below:
|
|||
})
|
||||
}
|
||||
|
||||
The driver might expect to get the right GPIO when it does:
|
||||
The driver might expect to get the right GPIO when it does::
|
||||
|
||||
desc = gpiod_get(dev, "reset", GPIOD_OUT_LOW);
|
||||
|
||||
|
@ -193,22 +200,25 @@ the ACPI GPIO mapping tables are hardly linked to ACPI ID and certain
|
|||
objects, as listed in the above chapter, of the device in question.
|
||||
|
||||
Getting GPIO descriptor
|
||||
-----------------------
|
||||
=======================
|
||||
|
||||
There are two main approaches to get GPIO resource from ACPI:
|
||||
desc = gpiod_get(dev, connection_id, flags);
|
||||
desc = gpiod_get_index(dev, connection_id, index, flags);
|
||||
There are two main approaches to get GPIO resource from ACPI::
|
||||
|
||||
desc = gpiod_get(dev, connection_id, flags);
|
||||
desc = gpiod_get_index(dev, connection_id, index, flags);
|
||||
|
||||
We may consider two different cases here, i.e. when connection ID is
|
||||
provided and otherwise.
|
||||
|
||||
Case 1:
|
||||
desc = gpiod_get(dev, "non-null-connection-id", flags);
|
||||
desc = gpiod_get_index(dev, "non-null-connection-id", index, flags);
|
||||
Case 1::
|
||||
|
||||
Case 2:
|
||||
desc = gpiod_get(dev, NULL, flags);
|
||||
desc = gpiod_get_index(dev, NULL, index, flags);
|
||||
desc = gpiod_get(dev, "non-null-connection-id", flags);
|
||||
desc = gpiod_get_index(dev, "non-null-connection-id", index, flags);
|
||||
|
||||
Case 2::
|
||||
|
||||
desc = gpiod_get(dev, NULL, flags);
|
||||
desc = gpiod_get_index(dev, NULL, index, flags);
|
||||
|
||||
Case 1 assumes that corresponding ACPI device description must have
|
||||
defined device properties and will prevent to getting any GPIO resources
|
|
@ -0,0 +1,61 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==============
|
||||
ACPI I2C Muxes
|
||||
==============
|
||||
|
||||
Describing an I2C device hierarchy that includes I2C muxes requires an ACPI
|
||||
Device () scope per mux channel.
|
||||
|
||||
Consider this topology::
|
||||
|
||||
+------+ +------+
|
||||
| SMB1 |-->| MUX0 |--CH00--> i2c client A (0x50)
|
||||
| | | 0x70 |--CH01--> i2c client B (0x50)
|
||||
+------+ +------+
|
||||
|
||||
which corresponds to the following ASL::
|
||||
|
||||
Device (SMB1)
|
||||
{
|
||||
Name (_HID, ...)
|
||||
Device (MUX0)
|
||||
{
|
||||
Name (_HID, ...)
|
||||
Name (_CRS, ResourceTemplate () {
|
||||
I2cSerialBus (0x70, ControllerInitiated, I2C_SPEED,
|
||||
AddressingMode7Bit, "^SMB1", 0x00,
|
||||
ResourceConsumer,,)
|
||||
}
|
||||
|
||||
Device (CH00)
|
||||
{
|
||||
Name (_ADR, 0)
|
||||
|
||||
Device (CLIA)
|
||||
{
|
||||
Name (_HID, ...)
|
||||
Name (_CRS, ResourceTemplate () {
|
||||
I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
|
||||
AddressingMode7Bit, "^CH00", 0x00,
|
||||
ResourceConsumer,,)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Device (CH01)
|
||||
{
|
||||
Name (_ADR, 1)
|
||||
|
||||
Device (CLIB)
|
||||
{
|
||||
Name (_HID, ...)
|
||||
Name (_CRS, ResourceTemplate () {
|
||||
I2cSerialBus (0x50, ControllerInitiated, I2C_SPEED,
|
||||
AddressingMode7Bit, "^CH01", 0x00,
|
||||
ResourceConsumer,,)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
============
|
||||
ACPI Support
|
||||
============
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
namespace
|
||||
dsd/graph
|
||||
dsd/data-node-references
|
||||
enumeration
|
||||
osi
|
||||
method-customizing
|
||||
method-tracing
|
||||
DSD-properties-rules
|
||||
debug
|
||||
aml-debugger
|
||||
apei/output_format
|
||||
apei/einj
|
||||
gpio-properties
|
||||
i2c-muxes
|
||||
acpi-lid
|
||||
lpit
|
||||
video_extension
|
|
@ -1,3 +1,9 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===========================
|
||||
Low Power Idle Table (LPIT)
|
||||
===========================
|
||||
|
||||
To enumerate platform Low Power Idle states, Intel platforms are using
|
||||
“Low Power Idle Table” (LPIT). More details about this table can be
|
||||
downloaded from:
|
||||
|
@ -8,13 +14,15 @@ Residencies for each low power state can be read via FFH
|
|||
|
||||
On platforms supporting S0ix sleep states, there can be two types of
|
||||
residencies:
|
||||
- CPU PKG C10 (Read via FFH interface)
|
||||
- Platform Controller Hub (PCH) SLP_S0 (Read via memory mapped interface)
|
||||
|
||||
- CPU PKG C10 (Read via FFH interface)
|
||||
- Platform Controller Hub (PCH) SLP_S0 (Read via memory mapped interface)
|
||||
|
||||
The following attributes are added dynamically to the cpuidle
|
||||
sysfs attribute group:
|
||||
/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
|
||||
/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
|
||||
sysfs attribute group::
|
||||
|
||||
/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
|
||||
/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us
|
||||
|
||||
The "low_power_idle_cpu_residency_us" attribute shows time spent
|
||||
by the CPU package in PKG C10
|
|
@ -0,0 +1,89 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=======================================
|
||||
Linux ACPI Custom Control Method How To
|
||||
=======================================
|
||||
|
||||
:Author: Zhang Rui <rui.zhang@intel.com>
|
||||
|
||||
|
||||
Linux supports customizing ACPI control methods at runtime.
|
||||
|
||||
Users can use this to:
|
||||
|
||||
1. override an existing method which may not work correctly,
|
||||
or just for debugging purposes.
|
||||
2. insert a completely new method in order to create a missing
|
||||
method such as _OFF, _ON, _STA, _INI, etc.
|
||||
|
||||
For these cases, it is far simpler to dynamically install a single
|
||||
control method rather than override the entire DSDT, because kernel
|
||||
rebuild/reboot is not needed and test result can be got in minutes.
|
||||
|
||||
.. note::
|
||||
|
||||
- Only ACPI METHOD can be overridden, any other object types like
|
||||
"Device", "OperationRegion", are not recognized. Methods
|
||||
declared inside scope operators are also not supported.
|
||||
|
||||
- The same ACPI control method can be overridden for many times,
|
||||
and it's always the latest one that used by Linux/kernel.
|
||||
|
||||
- To get the ACPI debug object output (Store (AAAA, Debug)),
|
||||
please run::
|
||||
|
||||
echo 1 > /sys/module/acpi/parameters/aml_debug_output
|
||||
|
||||
|
||||
1. override an existing method
|
||||
==============================
|
||||
a) get the ACPI table via ACPI sysfs I/F. e.g. to get the DSDT,
|
||||
just run "cat /sys/firmware/acpi/tables/DSDT > /tmp/dsdt.dat"
|
||||
b) disassemble the table by running "iasl -d dsdt.dat".
|
||||
c) rewrite the ASL code of the method and save it in a new file,
|
||||
d) package the new file (psr.asl) to an ACPI table format.
|
||||
Here is an example of a customized \_SB._AC._PSR method::
|
||||
|
||||
DefinitionBlock ("", "SSDT", 1, "", "", 0x20080715)
|
||||
{
|
||||
Method (\_SB_.AC._PSR, 0, NotSerialized)
|
||||
{
|
||||
Store ("In AC _PSR", Debug)
|
||||
Return (ACON)
|
||||
}
|
||||
}
|
||||
|
||||
Note that the full pathname of the method in ACPI namespace
|
||||
should be used.
|
||||
e) assemble the file to generate the AML code of the method.
|
||||
e.g. "iasl -vw 6084 psr.asl" (psr.aml is generated as a result)
|
||||
If parameter "-vw 6084" is not supported by your iASL compiler,
|
||||
please try a newer version.
|
||||
f) mount debugfs by "mount -t debugfs none /sys/kernel/debug"
|
||||
g) override the old method via the debugfs by running
|
||||
"cat /tmp/psr.aml > /sys/kernel/debug/acpi/custom_method"
|
||||
|
||||
2. insert a new method
|
||||
======================
|
||||
This is easier than overriding an existing method.
|
||||
We just need to create the ASL code of the method we want to
|
||||
insert and then follow the step c) ~ g) in section 1.
|
||||
|
||||
3. undo your changes
|
||||
====================
|
||||
The "undo" operation is not supported for a new inserted method
|
||||
right now, i.e. we can not remove a method currently.
|
||||
For an overridden method, in order to undo your changes, please
|
||||
save a copy of the method original ASL code in step c) section 1,
|
||||
and redo step c) ~ g) to override the method with the original one.
|
||||
|
||||
|
||||
.. note:: We can use a kernel with multiple custom ACPI method running,
|
||||
But each individual write to debugfs can implement a SINGLE
|
||||
method override. i.e. if we want to insert/override multiple
|
||||
ACPI methods, we need to redo step c) ~ g) for multiple times.
|
||||
|
||||
.. note:: Be aware that root can mis-use this driver to modify arbitrary
|
||||
memory and gain additional rights, if root's privileges got
|
||||
restricted (for example if root is not allowed to load additional
|
||||
modules after boot).
|
|
@ -0,0 +1,238 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
=====================
|
||||
ACPICA Trace Facility
|
||||
=====================
|
||||
|
||||
:Copyright: |copy| 2015, Intel Corporation
|
||||
:Author: Lv Zheng <lv.zheng@intel.com>
|
||||
|
||||
|
||||
Abstract
|
||||
========
|
||||
This document describes the functions and the interfaces of the
|
||||
method tracing facility.
|
||||
|
||||
Functionalities and usage examples
|
||||
==================================
|
||||
|
||||
ACPICA provides method tracing capability. And two functions are
|
||||
currently implemented using this capability.
|
||||
|
||||
Log reducer
|
||||
-----------
|
||||
|
||||
ACPICA subsystem provides debugging outputs when CONFIG_ACPI_DEBUG is
|
||||
enabled. The debugging messages which are deployed via
|
||||
ACPI_DEBUG_PRINT() macro can be reduced at 2 levels - per-component
|
||||
level (known as debug layer, configured via
|
||||
/sys/module/acpi/parameters/debug_layer) and per-type level (known as
|
||||
debug level, configured via /sys/module/acpi/parameters/debug_level).
|
||||
|
||||
But when the particular layer/level is applied to the control method
|
||||
evaluations, the quantity of the debugging outputs may still be too
|
||||
large to be put into the kernel log buffer. The idea thus is worked out
|
||||
to only enable the particular debug layer/level (normally more detailed)
|
||||
logs when the control method evaluation is started, and disable the
|
||||
detailed logging when the control method evaluation is stopped.
|
||||
|
||||
The following command examples illustrate the usage of the "log reducer"
|
||||
functionality:
|
||||
|
||||
a. Filter out the debug layer/level matched logs when control methods
|
||||
are being evaluated::
|
||||
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0xXXXXXXXX" > trace_debug_layer
|
||||
# echo "0xYYYYYYYY" > trace_debug_level
|
||||
# echo "enable" > trace_state
|
||||
|
||||
b. Filter out the debug layer/level matched logs when the specified
|
||||
control method is being evaluated::
|
||||
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0xXXXXXXXX" > trace_debug_layer
|
||||
# echo "0xYYYYYYYY" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "method" > /sys/module/acpi/parameters/trace_state
|
||||
|
||||
c. Filter out the debug layer/level matched logs when the specified
|
||||
control method is being evaluated for the first time::
|
||||
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0xXXXXXXXX" > trace_debug_layer
|
||||
# echo "0xYYYYYYYY" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "method-once" > /sys/module/acpi/parameters/trace_state
|
||||
|
||||
Where:
|
||||
0xXXXXXXXX/0xYYYYYYYY
|
||||
Refer to Documentation/acpi/debug.txt for possible debug layer/level
|
||||
masking values.
|
||||
\PPPP.AAAA.TTTT.HHHH
|
||||
Full path of a control method that can be found in the ACPI namespace.
|
||||
It needn't be an entry of a control method evaluation.
|
||||
|
||||
AML tracer
|
||||
----------
|
||||
|
||||
There are special log entries added by the method tracing facility at
|
||||
the "trace points" the AML interpreter starts/stops to execute a control
|
||||
method, or an AML opcode. Note that the format of the log entries are
|
||||
subject to change::
|
||||
|
||||
[ 0.186427] exdebug-0398 ex_trace_point : Method Begin [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
|
||||
[ 0.186630] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905c88:If] execution.
|
||||
[ 0.186820] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905cc0:LEqual] execution.
|
||||
[ 0.187010] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905a20:-NamePath-] execution.
|
||||
[ 0.187214] exdebug-0398 ex_trace_point : Opcode End [0xf5905a20:-NamePath-] execution.
|
||||
[ 0.187407] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905f60:One] execution.
|
||||
[ 0.187594] exdebug-0398 ex_trace_point : Opcode End [0xf5905f60:One] execution.
|
||||
[ 0.187789] exdebug-0398 ex_trace_point : Opcode End [0xf5905cc0:LEqual] execution.
|
||||
[ 0.187980] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905cc0:Return] execution.
|
||||
[ 0.188146] exdebug-0398 ex_trace_point : Opcode Begin [0xf5905f60:One] execution.
|
||||
[ 0.188334] exdebug-0398 ex_trace_point : Opcode End [0xf5905f60:One] execution.
|
||||
[ 0.188524] exdebug-0398 ex_trace_point : Opcode End [0xf5905cc0:Return] execution.
|
||||
[ 0.188712] exdebug-0398 ex_trace_point : Opcode End [0xf5905c88:If] execution.
|
||||
[ 0.188903] exdebug-0398 ex_trace_point : Method End [0xf58394d8:\_SB.PCI0.LPCB.ECOK] execution.
|
||||
|
||||
Developers can utilize these special log entries to track the AML
|
||||
interpretion, thus can aid issue debugging and performance tuning. Note
|
||||
that, as the "AML tracer" logs are implemented via ACPI_DEBUG_PRINT()
|
||||
macro, CONFIG_ACPI_DEBUG is also required to be enabled for enabling
|
||||
"AML tracer" logs.
|
||||
|
||||
The following command examples illustrate the usage of the "AML tracer"
|
||||
functionality:
|
||||
|
||||
a. Filter out the method start/stop "AML tracer" logs when control
|
||||
methods are being evaluated::
|
||||
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "enable" > trace_state
|
||||
|
||||
b. Filter out the method start/stop "AML tracer" when the specified
|
||||
control method is being evaluated::
|
||||
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "method" > trace_state
|
||||
|
||||
c. Filter out the method start/stop "AML tracer" logs when the specified
|
||||
control method is being evaluated for the first time::
|
||||
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "method-once" > trace_state
|
||||
|
||||
d. Filter out the method/opcode start/stop "AML tracer" when the
|
||||
specified control method is being evaluated::
|
||||
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "opcode" > trace_state
|
||||
|
||||
e. Filter out the method/opcode start/stop "AML tracer" when the
|
||||
specified control method is being evaluated for the first time::
|
||||
|
||||
# cd /sys/module/acpi/parameters
|
||||
# echo "0x80" > trace_debug_layer
|
||||
# echo "0x10" > trace_debug_level
|
||||
# echo "\PPPP.AAAA.TTTT.HHHH" > trace_method_name
|
||||
# echo "opcode-opcode" > trace_state
|
||||
|
||||
Note that all above method tracing facility related module parameters can
|
||||
be used as the boot parameters, for example::
|
||||
|
||||
acpi.trace_debug_layer=0x80 acpi.trace_debug_level=0x10 \
|
||||
acpi.trace_method_name=\_SB.LID0._LID acpi.trace_state=opcode-once
|
||||
|
||||
|
||||
Interface descriptions
|
||||
======================
|
||||
|
||||
All method tracing functions can be configured via ACPI module
|
||||
parameters that are accessible at /sys/module/acpi/parameters/:
|
||||
|
||||
trace_method_name
|
||||
The full path of the AML method that the user wants to trace.
|
||||
|
||||
Note that the full path shouldn't contain the trailing "_"s in its
|
||||
name segments but may contain "\" to form an absolute path.
|
||||
|
||||
trace_debug_layer
|
||||
The temporary debug_layer used when the tracing feature is enabled.
|
||||
|
||||
Using ACPI_EXECUTER (0x80) by default, which is the debug_layer
|
||||
used to match all "AML tracer" logs.
|
||||
|
||||
trace_debug_level
|
||||
The temporary debug_level used when the tracing feature is enabled.
|
||||
|
||||
Using ACPI_LV_TRACE_POINT (0x10) by default, which is the
|
||||
debug_level used to match all "AML tracer" logs.
|
||||
|
||||
trace_state
|
||||
The status of the tracing feature.
|
||||
|
||||
Users can enable/disable this debug tracing feature by executing
|
||||
the following command::
|
||||
|
||||
# echo string > /sys/module/acpi/parameters/trace_state
|
||||
|
||||
Where "string" should be one of the following:
|
||||
|
||||
"disable"
|
||||
Disable the method tracing feature.
|
||||
|
||||
"enable"
|
||||
Enable the method tracing feature.
|
||||
|
||||
ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
|
||||
during any method execution will be logged.
|
||||
|
||||
"method"
|
||||
Enable the method tracing feature.
|
||||
|
||||
ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
|
||||
during method execution of "trace_method_name" will be logged.
|
||||
|
||||
"method-once"
|
||||
Enable the method tracing feature.
|
||||
|
||||
ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
|
||||
during method execution of "trace_method_name" will be logged only once.
|
||||
|
||||
"opcode"
|
||||
Enable the method tracing feature.
|
||||
|
||||
ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
|
||||
during method/opcode execution of "trace_method_name" will be logged.
|
||||
|
||||
"opcode-once"
|
||||
Enable the method tracing feature.
|
||||
|
||||
ACPICA debugging messages matching "trace_debug_layer/trace_debug_level"
|
||||
during method/opcode execution of "trace_method_name" will be logged only
|
||||
once.
|
||||
|
||||
Note that, the difference between the "enable" and other feature
|
||||
enabling options are:
|
||||
|
||||
1. When "enable" is specified, since
|
||||
"trace_debug_layer/trace_debug_level" shall apply to all control
|
||||
method evaluations, after configuring "trace_state" to "enable",
|
||||
"trace_method_name" will be reset to NULL.
|
||||
2. When "method/opcode" is specified, if
|
||||
"trace_method_name" is NULL when "trace_state" is configured to
|
||||
these options, the "trace_debug_layer/trace_debug_level" will
|
||||
apply to all control method evaluations.
|
|
@ -1,85 +1,90 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
.. include:: <isonum.txt>
|
||||
|
||||
===================================================
|
||||
ACPI Device Tree - Representation of ACPI Namespace
|
||||
===================================================
|
||||
|
||||
Copyright (C) 2013, Intel Corporation
|
||||
Author: Lv Zheng <lv.zheng@intel.com>
|
||||
:Copyright: |copy| 2013, Intel Corporation
|
||||
|
||||
:Author: Lv Zheng <lv.zheng@intel.com>
|
||||
|
||||
Abstract:
|
||||
:Credit: Thanks for the help from Zhang Rui <rui.zhang@intel.com> and
|
||||
Rafael J.Wysocki <rafael.j.wysocki@intel.com>.
|
||||
|
||||
Abstract
|
||||
========
|
||||
The Linux ACPI subsystem converts ACPI namespace objects into a Linux
|
||||
device tree under the /sys/devices/LNXSYSTEM:00 and updates it upon
|
||||
receiving ACPI hotplug notification events. For each device object in this
|
||||
hierarchy there is a corresponding symbolic link in the
|
||||
receiving ACPI hotplug notification events. For each device object
|
||||
in this hierarchy there is a corresponding symbolic link in the
|
||||
/sys/bus/acpi/devices.
|
||||
|
||||
This document illustrates the structure of the ACPI device tree.
|
||||
|
||||
ACPI Definition Blocks
|
||||
======================
|
||||
|
||||
Credit:
|
||||
The ACPI firmware sets up RSDP (Root System Description Pointer) in the
|
||||
system memory address space pointing to the XSDT (Extended System
|
||||
Description Table). The XSDT always points to the FADT (Fixed ACPI
|
||||
Description Table) using its first entry, the data within the FADT
|
||||
includes various fixed-length entries that describe fixed ACPI features
|
||||
of the hardware. The FADT contains a pointer to the DSDT
|
||||
(Differentiated System Descripition Table). The XSDT also contains
|
||||
entries pointing to possibly multiple SSDTs (Secondary System
|
||||
Description Table).
|
||||
|
||||
Thanks for the help from Zhang Rui <rui.zhang@intel.com> and Rafael J.
|
||||
Wysocki <rafael.j.wysocki@intel.com>.
|
||||
The DSDT and SSDT data is organized in data structures called definition
|
||||
blocks that contain definitions of various objects, including ACPI
|
||||
control methods, encoded in AML (ACPI Machine Language). The data block
|
||||
of the DSDT along with the contents of SSDTs represents a hierarchical
|
||||
data structure called the ACPI namespace whose topology reflects the
|
||||
structure of the underlying hardware platform.
|
||||
|
||||
The relationships between ACPI System Definition Tables described above
|
||||
are illustrated in the following diagram::
|
||||
|
||||
+---------+ +-------+ +--------+ +------------------------+
|
||||
| RSDP | +->| XSDT | +->| FADT | | +-------------------+ |
|
||||
+---------+ | +-------+ | +--------+ +-|->| DSDT | |
|
||||
| Pointer | | | Entry |-+ | ...... | | | +-------------------+ |
|
||||
+---------+ | +-------+ | X_DSDT |--+ | | Definition Blocks | |
|
||||
| Pointer |-+ | ..... | | ...... | | +-------------------+ |
|
||||
+---------+ +-------+ +--------+ | +-------------------+ |
|
||||
| Entry |------------------|->| SSDT | |
|
||||
+- - - -+ | +-------------------| |
|
||||
| Entry | - - - - - - - -+ | | Definition Blocks | |
|
||||
+- - - -+ | | +-------------------+ |
|
||||
| | +- - - - - - - - - -+ |
|
||||
+-|->| SSDT | |
|
||||
| +-------------------+ |
|
||||
| | Definition Blocks | |
|
||||
| +- - - - - - - - - -+ |
|
||||
+------------------------+
|
||||
|
|
||||
OSPM Loading |
|
||||
\|/
|
||||
+----------------+
|
||||
| ACPI Namespace |
|
||||
+----------------+
|
||||
|
||||
Figure 1. ACPI Definition Blocks
|
||||
|
||||
.. note:: RSDP can also contain a pointer to the RSDT (Root System
|
||||
Description Table). Platforms provide RSDT to enable
|
||||
compatibility with ACPI 1.0 operating systems. The OS is expected
|
||||
to use XSDT, if present.
|
||||
|
||||
|
||||
1. ACPI Definition Blocks
|
||||
Example ACPI Namespace
|
||||
======================
|
||||
|
||||
The ACPI firmware sets up RSDP (Root System Description Pointer) in the
|
||||
system memory address space pointing to the XSDT (Extended System
|
||||
Description Table). The XSDT always points to the FADT (Fixed ACPI
|
||||
Description Table) using its first entry, the data within the FADT
|
||||
includes various fixed-length entries that describe fixed ACPI features
|
||||
of the hardware. The FADT contains a pointer to the DSDT
|
||||
(Differentiated System Descripition Table). The XSDT also contains
|
||||
entries pointing to possibly multiple SSDTs (Secondary System
|
||||
Description Table).
|
||||
All definition blocks are loaded into a single namespace. The namespace
|
||||
is a hierarchy of objects identified by names and paths.
|
||||
The following naming conventions apply to object names in the ACPI
|
||||
namespace:
|
||||
|
||||
The DSDT and SSDT data is organized in data structures called definition
|
||||
blocks that contain definitions of various objects, including ACPI
|
||||
control methods, encoded in AML (ACPI Machine Language). The data block
|
||||
of the DSDT along with the contents of SSDTs represents a hierarchical
|
||||
data structure called the ACPI namespace whose topology reflects the
|
||||
structure of the underlying hardware platform.
|
||||
|
||||
The relationships between ACPI System Definition Tables described above
|
||||
are illustrated in the following diagram.
|
||||
|
||||
+---------+ +-------+ +--------+ +------------------------+
|
||||
| RSDP | +->| XSDT | +->| FADT | | +-------------------+ |
|
||||
+---------+ | +-------+ | +--------+ +-|->| DSDT | |
|
||||
| Pointer | | | Entry |-+ | ...... | | | +-------------------+ |
|
||||
+---------+ | +-------+ | X_DSDT |--+ | | Definition Blocks | |
|
||||
| Pointer |-+ | ..... | | ...... | | +-------------------+ |
|
||||
+---------+ +-------+ +--------+ | +-------------------+ |
|
||||
| Entry |------------------|->| SSDT | |
|
||||
+- - - -+ | +-------------------| |
|
||||
| Entry | - - - - - - - -+ | | Definition Blocks | |
|
||||
+- - - -+ | | +-------------------+ |
|
||||
| | +- - - - - - - - - -+ |
|
||||
+-|->| SSDT | |
|
||||
| +-------------------+ |
|
||||
| | Definition Blocks | |
|
||||
| +- - - - - - - - - -+ |
|
||||
+------------------------+
|
||||
|
|
||||
OSPM Loading |
|
||||
\|/
|
||||
+----------------+
|
||||
| ACPI Namespace |
|
||||
+----------------+
|
||||
|
||||
Figure 1. ACPI Definition Blocks
|
||||
|
||||
NOTE: RSDP can also contain a pointer to the RSDT (Root System
|
||||
Description Table). Platforms provide RSDT to enable
|
||||
compatibility with ACPI 1.0 operating systems. The OS is expected
|
||||
to use XSDT, if present.
|
||||
|
||||
|
||||
2. Example ACPI Namespace
|
||||
|
||||
All definition blocks are loaded into a single namespace. The namespace
|
||||
is a hierarchy of objects identified by names and paths.
|
||||
The following naming conventions apply to object names in the ACPI
|
||||
namespace:
|
||||
1. All names are 32 bits long.
|
||||
2. The first byte of a name must be one of 'A' - 'Z', '_'.
|
||||
3. Each of the remaining bytes of a name must be one of 'A' - 'Z', '0'
|
||||
|
@ -91,7 +96,7 @@ Wysocki <rafael.j.wysocki@intel.com>.
|
|||
(i.e. names prepended with '^' are relative to the parent of the
|
||||
current namespace node).
|
||||
|
||||
The figure below shows an example ACPI namespace.
|
||||
The figure below shows an example ACPI namespace::
|
||||
|
||||
+------+
|
||||
| \ | Root
|
||||
|
@ -184,19 +189,20 @@ Wysocki <rafael.j.wysocki@intel.com>.
|
|||
Figure 2. Example ACPI Namespace
|
||||
|
||||
|
||||
3. Linux ACPI Device Objects
|
||||
Linux ACPI Device Objects
|
||||
=========================
|
||||
|
||||
The Linux kernel's core ACPI subsystem creates struct acpi_device
|
||||
objects for ACPI namespace objects representing devices, power resources
|
||||
processors, thermal zones. Those objects are exported to user space via
|
||||
sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00. The
|
||||
format of their names is <bus_id:instance>, where 'bus_id' refers to the
|
||||
ACPI namespace representation of the given object and 'instance' is used
|
||||
for distinguishing different object of the same 'bus_id' (it is
|
||||
two-digit decimal representation of an unsigned integer).
|
||||
The Linux kernel's core ACPI subsystem creates struct acpi_device
|
||||
objects for ACPI namespace objects representing devices, power resources
|
||||
processors, thermal zones. Those objects are exported to user space via
|
||||
sysfs as directories in the subtree under /sys/devices/LNXSYSTM:00. The
|
||||
format of their names is <bus_id:instance>, where 'bus_id' refers to the
|
||||
ACPI namespace representation of the given object and 'instance' is used
|
||||
for distinguishing different object of the same 'bus_id' (it is
|
||||
two-digit decimal representation of an unsigned integer).
|
||||
|
||||
The value of 'bus_id' depends on the type of the object whose name it is
|
||||
part of as listed in the table below.
|
||||
The value of 'bus_id' depends on the type of the object whose name it is
|
||||
part of as listed in the table below::
|
||||
|
||||
+---+-----------------+-------+----------+
|
||||
| | Object/Feature | Table | bus_id |
|
||||
|
@ -226,10 +232,11 @@ Wysocki <rafael.j.wysocki@intel.com>.
|
|||
|
||||
Table 1. ACPI Namespace Objects Mapping
|
||||
|
||||
The following rules apply when creating struct acpi_device objects on
|
||||
the basis of the contents of ACPI System Description Tables (as
|
||||
indicated by the letter in the first column and the notation in the
|
||||
second column of the table above):
|
||||
The following rules apply when creating struct acpi_device objects on
|
||||
the basis of the contents of ACPI System Description Tables (as
|
||||
indicated by the letter in the first column and the notation in the
|
||||
second column of the table above):
|
||||
|
||||
N:
|
||||
The object's source is an ACPI namespace node (as indicated by the
|
||||
named object's type in the second column). In that case the object's
|
||||
|
@ -249,13 +256,14 @@ Wysocki <rafael.j.wysocki@intel.com>.
|
|||
struct acpi_device object with LNXVIDEO 'bus_id' will be created for
|
||||
it.
|
||||
|
||||
The third column of the above table indicates which ACPI System
|
||||
Description Tables contain information used for the creation of the
|
||||
struct acpi_device objects represented by the given row (xSDT means DSDT
|
||||
or SSDT).
|
||||
The third column of the above table indicates which ACPI System
|
||||
Description Tables contain information used for the creation of the
|
||||
struct acpi_device objects represented by the given row (xSDT means DSDT
|
||||
or SSDT).
|
||||
|
||||
The forth column of the above table indicates the 'bus_id' generation
|
||||
rule of the struct acpi_device object:
|
||||
|
||||
The forth column of the above table indicates the 'bus_id' generation
|
||||
rule of the struct acpi_device object:
|
||||
_HID:
|
||||
_HID in the last column of the table means that the object's bus_id
|
||||
is derived from the _HID/_CID identification objects present under
|
||||
|
@ -275,45 +283,47 @@ Wysocki <rafael.j.wysocki@intel.com>.
|
|||
object's bus_id.
|
||||
|
||||
|
||||
4. Linux ACPI Physical Device Glue
|
||||
Linux ACPI Physical Device Glue
|
||||
===============================
|
||||
|
||||
ACPI device (i.e. struct acpi_device) objects may be linked to other
|
||||
objects in the Linux' device hierarchy that represent "physical" devices
|
||||
(for example, devices on the PCI bus). If that happens, it means that
|
||||
the ACPI device object is a "companion" of a device otherwise
|
||||
represented in a different way and is used (1) to provide configuration
|
||||
information on that device which cannot be obtained by other means and
|
||||
(2) to do specific things to the device with the help of its ACPI
|
||||
control methods. One ACPI device object may be linked this way to
|
||||
multiple "physical" devices.
|
||||
ACPI device (i.e. struct acpi_device) objects may be linked to other
|
||||
objects in the Linux' device hierarchy that represent "physical" devices
|
||||
(for example, devices on the PCI bus). If that happens, it means that
|
||||
the ACPI device object is a "companion" of a device otherwise
|
||||
represented in a different way and is used (1) to provide configuration
|
||||
information on that device which cannot be obtained by other means and
|
||||
(2) to do specific things to the device with the help of its ACPI
|
||||
control methods. One ACPI device object may be linked this way to
|
||||
multiple "physical" devices.
|
||||
|
||||
If an ACPI device object is linked to a "physical" device, its sysfs
|
||||
directory contains the "physical_node" symbolic link to the sysfs
|
||||
directory of the target device object. In turn, the target device's
|
||||
sysfs directory will then contain the "firmware_node" symbolic link to
|
||||
the sysfs directory of the companion ACPI device object.
|
||||
The linking mechanism relies on device identification provided by the
|
||||
ACPI namespace. For example, if there's an ACPI namespace object
|
||||
representing a PCI device (i.e. a device object under an ACPI namespace
|
||||
object representing a PCI bridge) whose _ADR returns 0x00020000 and the
|
||||
bus number of the parent PCI bridge is 0, the sysfs directory
|
||||
representing the struct acpi_device object created for that ACPI
|
||||
namespace object will contain the 'physical_node' symbolic link to the
|
||||
/sys/devices/pci0000:00/0000:00:02:0/ sysfs directory of the
|
||||
corresponding PCI device.
|
||||
If an ACPI device object is linked to a "physical" device, its sysfs
|
||||
directory contains the "physical_node" symbolic link to the sysfs
|
||||
directory of the target device object. In turn, the target device's
|
||||
sysfs directory will then contain the "firmware_node" symbolic link to
|
||||
the sysfs directory of the companion ACPI device object.
|
||||
The linking mechanism relies on device identification provided by the
|
||||
ACPI namespace. For example, if there's an ACPI namespace object
|
||||
representing a PCI device (i.e. a device object under an ACPI namespace
|
||||
object representing a PCI bridge) whose _ADR returns 0x00020000 and the
|
||||
bus number of the parent PCI bridge is 0, the sysfs directory
|
||||
representing the struct acpi_device object created for that ACPI
|
||||
namespace object will contain the 'physical_node' symbolic link to the
|
||||
/sys/devices/pci0000:00/0000:00:02:0/ sysfs directory of the
|
||||
corresponding PCI device.
|
||||
|
||||
The linking mechanism is generally bus-specific. The core of its
|
||||
implementation is located in the drivers/acpi/glue.c file, but there are
|
||||
complementary parts depending on the bus types in question located
|
||||
elsewhere. For example, the PCI-specific part of it is located in
|
||||
drivers/pci/pci-acpi.c.
|
||||
The linking mechanism is generally bus-specific. The core of its
|
||||
implementation is located in the drivers/acpi/glue.c file, but there are
|
||||
complementary parts depending on the bus types in question located
|
||||
elsewhere. For example, the PCI-specific part of it is located in
|
||||
drivers/pci/pci-acpi.c.
|
||||
|
||||
|
||||
5. Example Linux ACPI Device Tree
|
||||
Example Linux ACPI Device Tree
|
||||
=================================
|
||||
|
||||
The sysfs hierarchy of struct acpi_device objects corresponding to the
|
||||
example ACPI namespace illustrated in Figure 2 with the addition of
|
||||
fixed PWR_BUTTON/SLP_BUTTON devices is shown below.
|
||||
The sysfs hierarchy of struct acpi_device objects corresponding to the
|
||||
example ACPI namespace illustrated in Figure 2 with the addition of
|
||||
fixed PWR_BUTTON/SLP_BUTTON devices is shown below::
|
||||
|
||||
+--------------+---+-----------------+
|
||||
| LNXSYSTEM:00 | \ | acpi:LNXSYSTEM: |
|
||||
|
@ -377,12 +387,14 @@ Wysocki <rafael.j.wysocki@intel.com>.
|
|||
|
||||
Figure 3. Example Linux ACPI Device Tree
|
||||
|
||||
NOTE: Each node is represented as "object/path/modalias", where:
|
||||
1. 'object' is the name of the object's directory in sysfs.
|
||||
2. 'path' is the ACPI namespace path of the corresponding
|
||||
ACPI namespace object, as returned by the object's 'path'
|
||||
sysfs attribute.
|
||||
3. 'modalias' is the value of the object's 'modalias' sysfs
|
||||
attribute (as described earlier in this document).
|
||||
NOTE: N/A indicates the device object does not have the 'path' or the
|
||||
'modalias' attribute.
|
||||
.. note:: Each node is represented as "object/path/modalias", where:
|
||||
|
||||
1. 'object' is the name of the object's directory in sysfs.
|
||||
2. 'path' is the ACPI namespace path of the corresponding
|
||||
ACPI namespace object, as returned by the object's 'path'
|
||||
sysfs attribute.
|
||||
3. 'modalias' is the value of the object's 'modalias' sysfs
|
||||
attribute (as described earlier in this document).
|
||||
|
||||
.. note:: N/A indicates the device object does not have the 'path' or the
|
||||
'modalias' attribute.
|
|
@ -1,5 +1,8 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==========================
|
||||
ACPI _OSI and _REV methods
|
||||
--------------------------
|
||||
==========================
|
||||
|
||||
An ACPI BIOS can use the "Operating System Interfaces" method (_OSI)
|
||||
to find out what the operating system supports. Eg. If BIOS
|
||||
|
@ -14,7 +17,7 @@ This document explains how and why the BIOS and Linux should use these methods.
|
|||
It also explains how and why they are widely misused.
|
||||
|
||||
How to use _OSI
|
||||
---------------
|
||||
===============
|
||||
|
||||
Linux runs on two groups of machines -- those that are tested by the OEM
|
||||
to be compatible with Linux, and those that were never tested with Linux,
|
||||
|
@ -62,7 +65,7 @@ the string when that support is added to the kernel.
|
|||
That was easy. Read on, to find out how to do it wrong.
|
||||
|
||||
Before _OSI, there was _OS
|
||||
--------------------------
|
||||
==========================
|
||||
|
||||
ACPI 1.0 specified "_OS" as an
|
||||
"object that evaluates to a string that identifies the operating system."
|
||||
|
@ -96,7 +99,7 @@ That is the *only* viable strategy, as that is what modern Windows does,
|
|||
and so doing otherwise could steer the BIOS down an untested path.
|
||||
|
||||
_OSI is born, and immediately misused
|
||||
--------------------------------------
|
||||
=====================================
|
||||
|
||||
With _OSI, the *BIOS* provides the string describing an interface,
|
||||
and asks the OS: "YES/NO, are you compatible with this interface?"
|
||||
|
@ -144,7 +147,7 @@ catastrophic failure resulting from the BIOS taking paths that
|
|||
were never validated under *any* OS.
|
||||
|
||||
Do not use _REV
|
||||
---------------
|
||||
===============
|
||||
|
||||
Since _OSI("Linux") went away, some BIOS writers used _REV
|
||||
to support Linux and Windows differences in the same BIOS.
|
||||
|
@ -164,7 +167,7 @@ from mid-2015 onward. The ACPI specification will also be updated
|
|||
to reflect that _REV is deprecated, and always returns 2.
|
||||
|
||||
Apple Mac and _OSI("Darwin")
|
||||
----------------------------
|
||||
============================
|
||||
|
||||
On Apple's Mac platforms, the ACPI BIOS invokes _OSI("Darwin")
|
||||
to determine if the machine is running Apple OSX.
|
|
@ -1,5 +1,8 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
=====================
|
||||
ACPI video extensions
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
=====================
|
||||
|
||||
This driver implement the ACPI Extensions For Display Adapters for
|
||||
integrated graphics devices on motherboard, as specified in ACPI 2.0
|
||||
|
@ -8,9 +11,10 @@ defining the video POST device, retrieving EDID information or to
|
|||
setup a video output, etc. Note that this is an ref. implementation
|
||||
only. It may or may not work for your integrated video device.
|
||||
|
||||
The ACPI video driver does 3 things regarding backlight control:
|
||||
The ACPI video driver does 3 things regarding backlight control.
|
||||
|
||||
1 Export a sysfs interface for user space to control backlight level
|
||||
Export a sysfs interface for user space to control backlight level
|
||||
==================================================================
|
||||
|
||||
If the ACPI table has a video device, and acpi_backlight=vendor kernel
|
||||
command line is not present, the driver will register a backlight device
|
||||
|
@ -22,36 +26,41 @@ The backlight sysfs interface has a standard definition here:
|
|||
Documentation/ABI/stable/sysfs-class-backlight.
|
||||
|
||||
And what ACPI video driver does is:
|
||||
actual_brightness: on read, control method _BQC will be evaluated to
|
||||
get the brightness level the firmware thinks it is at;
|
||||
bl_power: not implemented, will set the current brightness instead;
|
||||
brightness: on write, control method _BCM will run to set the requested
|
||||
brightness level;
|
||||
max_brightness: Derived from the _BCL package(see below);
|
||||
type: firmware
|
||||
|
||||
actual_brightness:
|
||||
on read, control method _BQC will be evaluated to
|
||||
get the brightness level the firmware thinks it is at;
|
||||
bl_power:
|
||||
not implemented, will set the current brightness instead;
|
||||
brightness:
|
||||
on write, control method _BCM will run to set the requested brightness level;
|
||||
max_brightness:
|
||||
Derived from the _BCL package(see below);
|
||||
type:
|
||||
firmware
|
||||
|
||||
Note that ACPI video backlight driver will always use index for
|
||||
brightness, actual_brightness and max_brightness. So if we have
|
||||
the following _BCL package:
|
||||
the following _BCL package::
|
||||
|
||||
Method (_BCL, 0, NotSerialized)
|
||||
{
|
||||
Return (Package (0x0C)
|
||||
Method (_BCL, 0, NotSerialized)
|
||||
{
|
||||
0x64,
|
||||
0x32,
|
||||
0x0A,
|
||||
0x14,
|
||||
0x1E,
|
||||
0x28,
|
||||
0x32,
|
||||
0x3C,
|
||||
0x46,
|
||||
0x50,
|
||||
0x5A,
|
||||
0x64
|
||||
})
|
||||
}
|
||||
Return (Package (0x0C)
|
||||
{
|
||||
0x64,
|
||||
0x32,
|
||||
0x0A,
|
||||
0x14,
|
||||
0x1E,
|
||||
0x28,
|
||||
0x32,
|
||||
0x3C,
|
||||
0x46,
|
||||
0x50,
|
||||
0x5A,
|
||||
0x64
|
||||
})
|
||||
}
|
||||
|
||||
The first two levels are for when laptop are on AC or on battery and are
|
||||
not used by Linux currently. The remaining 10 levels are supported levels
|
||||
|
@ -62,13 +71,15 @@ as a "brightness level" indicator. Thus from the user space perspective
|
|||
the range of available brightness levels is from 0 to 9 (max_brightness)
|
||||
inclusive.
|
||||
|
||||
2 Notify user space about hotkey event
|
||||
Notify user space about hotkey event
|
||||
====================================
|
||||
|
||||
There are generally two cases for hotkey event reporting:
|
||||
|
||||
i) For some laptops, when user presses the hotkey, a scancode will be
|
||||
generated and sent to user space through the input device created by
|
||||
the keyboard driver as a key type input event, with proper remap, the
|
||||
following key code will appear to user space:
|
||||
following key code will appear to user space::
|
||||
|
||||
EV_KEY, KEY_BRIGHTNESSUP
|
||||
EV_KEY, KEY_BRIGHTNESSDOWN
|
||||
|
@ -84,23 +95,27 @@ ii) For some laptops, the press of the hotkey will not generate the
|
|||
notify value it received and send the event to user space through the
|
||||
input device it created:
|
||||
|
||||
===== ==================
|
||||
event keycode
|
||||
===== ==================
|
||||
0x86 KEY_BRIGHTNESSUP
|
||||
0x87 KEY_BRIGHTNESSDOWN
|
||||
etc.
|
||||
===== ==================
|
||||
|
||||
so this would lead to the same effect as case i) now.
|
||||
|
||||
Once user space tool receives this event, it can modify the backlight
|
||||
level through the sysfs interface.
|
||||
|
||||
3 Change backlight level in the kernel
|
||||
Change backlight level in the kernel
|
||||
====================================
|
||||
|
||||
This works for machines covered by case ii) in Section 2. Once the driver
|
||||
received a notification, it will set the backlight level accordingly. This does
|
||||
not affect the sending of event to user space, they are always sent to user
|
||||
space regardless of whether or not the video module controls the backlight level
|
||||
directly. This behaviour can be controlled through the brightness_switch_enabled
|
||||
module parameter as documented in admin-guide/kernel-parameters.rst. It is recommended to
|
||||
disable this behaviour once a GUI environment starts up and wants to have full
|
||||
control of the backlight level.
|
||||
module parameter as documented in admin-guide/kernel-parameters.rst. It is
|
||||
recommended to disable this behaviour once a GUI environment starts up and
|
||||
wants to have full control of the backlight level.
|
|
@ -0,0 +1,13 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
===============================
|
||||
The Linux kernel firmware guide
|
||||
===============================
|
||||
|
||||
This section describes the ACPI subsystem in Linux from firmware perspective.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 1
|
||||
|
||||
acpi/index
|
||||
|
|
@ -35,6 +35,16 @@ trying to get it to work optimally on a given system.
|
|||
|
||||
admin-guide/index
|
||||
|
||||
Firmware-related documentation
|
||||
------------------------------
|
||||
The following holds information on the kernel's expectations regarding the
|
||||
platform firmwares.
|
||||
|
||||
.. toctree::
|
||||
:maxdepth: 2
|
||||
|
||||
firmware-guide/index
|
||||
|
||||
Application-developer documentation
|
||||
-----------------------------------
|
||||
|
||||
|
|
|
@ -243,10 +243,10 @@ Optimization
|
|||
^^^^^^^^^^^^
|
||||
|
||||
The Kprobe-optimizer doesn't insert the jump instruction immediately;
|
||||
rather, it calls synchronize_sched() for safety first, because it's
|
||||
rather, it calls synchronize_rcu() for safety first, because it's
|
||||
possible for a CPU to be interrupted in the middle of executing the
|
||||
optimized region [3]_. As you know, synchronize_sched() can ensure
|
||||
that all interruptions that were active when synchronize_sched()
|
||||
optimized region [3]_. As you know, synchronize_rcu() can ensure
|
||||
that all interruptions that were active when synchronize_rcu()
|
||||
was called are done, but only if CONFIG_PREEMPT=n. So, this version
|
||||
of kprobe optimization supports only kernels with CONFIG_PREEMPT=n [4]_.
|
||||
|
||||
|
|
|
@ -102,9 +102,11 @@ Byte sequences
|
|||
dictionary which is empty, and that it will always be
|
||||
invalid at this place.
|
||||
|
||||
17 : bitstream version. If the first byte is 17, the next byte
|
||||
gives the bitstream version (version 1 only). If the first byte
|
||||
is not 17, the bitstream version is 0.
|
||||
17 : bitstream version. If the first byte is 17, and compressed
|
||||
stream length is at least 5 bytes (length of shortest possible
|
||||
versioned bitstream), the next byte gives the bitstream version
|
||||
(version 1 only).
|
||||
Otherwise, the bitstream version is 0.
|
||||
|
||||
18..21 : copy 0..3 literals
|
||||
state = (byte - 17) = 0..3 [ copy <state> literals ]
|
||||
|
|
|
@ -623,7 +623,7 @@ the remote via /dev/input/event devices.
|
|||
|
||||
- .. row 78
|
||||
|
||||
- ``KEY_SCREEN``
|
||||
- ``KEY_ASPECT_RATIO``
|
||||
|
||||
- Select screen aspect ratio
|
||||
|
||||
|
@ -631,7 +631,7 @@ the remote via /dev/input/event devices.
|
|||
|
||||
- .. row 79
|
||||
|
||||
- ``KEY_ZOOM``
|
||||
- ``KEY_FULL_SCREEN``
|
||||
|
||||
- Put device into zoom/full screen mode
|
||||
|
||||
|
|
|
@ -1937,21 +1937,6 @@ There are some more advanced barrier functions:
|
|||
information on consistent memory.
|
||||
|
||||
|
||||
MMIO WRITE BARRIER
|
||||
------------------
|
||||
|
||||
The Linux kernel also has a special barrier for use with memory-mapped I/O
|
||||
writes:
|
||||
|
||||
mmiowb();
|
||||
|
||||
This is a variation on the mandatory write barrier that causes writes to weakly
|
||||
ordered I/O regions to be partially ordered. Its effects may go beyond the
|
||||
CPU->Hardware interface and actually affect the hardware at some level.
|
||||
|
||||
See the subsection "Acquires vs I/O accesses" for more information.
|
||||
|
||||
|
||||
===============================
|
||||
IMPLICIT KERNEL MEMORY BARRIERS
|
||||
===============================
|
||||
|
@ -2317,75 +2302,6 @@ But it won't see any of:
|
|||
*E, *F or *G following RELEASE Q
|
||||
|
||||
|
||||
|
||||
ACQUIRES VS I/O ACCESSES
|
||||
------------------------
|
||||
|
||||
Under certain circumstances (especially involving NUMA), I/O accesses within
|
||||
two spinlocked sections on two different CPUs may be seen as interleaved by the
|
||||
PCI bridge, because the PCI bridge does not necessarily participate in the
|
||||
cache-coherence protocol, and is therefore incapable of issuing the required
|
||||
read memory barriers.
|
||||
|
||||
For example:
|
||||
|
||||
CPU 1 CPU 2
|
||||
=============================== ===============================
|
||||
spin_lock(Q)
|
||||
writel(0, ADDR)
|
||||
writel(1, DATA);
|
||||
spin_unlock(Q);
|
||||
spin_lock(Q);
|
||||
writel(4, ADDR);
|
||||
writel(5, DATA);
|
||||
spin_unlock(Q);
|
||||
|
||||
may be seen by the PCI bridge as follows:
|
||||
|
||||
STORE *ADDR = 0, STORE *ADDR = 4, STORE *DATA = 1, STORE *DATA = 5
|
||||
|
||||
which would probably cause the hardware to malfunction.
|
||||
|
||||
|
||||
What is necessary here is to intervene with an mmiowb() before dropping the
|
||||
spinlock, for example:
|
||||
|
||||
CPU 1 CPU 2
|
||||
=============================== ===============================
|
||||
spin_lock(Q)
|
||||
writel(0, ADDR)
|
||||
writel(1, DATA);
|
||||
mmiowb();
|
||||
spin_unlock(Q);
|
||||
spin_lock(Q);
|
||||
writel(4, ADDR);
|
||||
writel(5, DATA);
|
||||
mmiowb();
|
||||
spin_unlock(Q);
|
||||
|
||||
this will ensure that the two stores issued on CPU 1 appear at the PCI bridge
|
||||
before either of the stores issued on CPU 2.
|
||||
|
||||
|
||||
Furthermore, following a store by a load from the same device obviates the need
|
||||
for the mmiowb(), because the load forces the store to complete before the load
|
||||
is performed:
|
||||
|
||||
CPU 1 CPU 2
|
||||
=============================== ===============================
|
||||
spin_lock(Q)
|
||||
writel(0, ADDR)
|
||||
a = readl(DATA);
|
||||
spin_unlock(Q);
|
||||
spin_lock(Q);
|
||||
writel(4, ADDR);
|
||||
b = readl(DATA);
|
||||
spin_unlock(Q);
|
||||
|
||||
|
||||
See Documentation/driver-api/device-io.rst for more information.
|
||||
|
||||
|
||||
=================================
|
||||
WHERE ARE MEMORY BARRIERS NEEDED?
|
||||
=================================
|
||||
|
@ -2532,16 +2448,9 @@ the device to malfunction.
|
|||
Inside of the Linux kernel, I/O should be done through the appropriate accessor
|
||||
routines - such as inb() or writel() - which know how to make such accesses
|
||||
appropriately sequential. While this, for the most part, renders the explicit
|
||||
use of memory barriers unnecessary, there are a couple of situations where they
|
||||
might be needed:
|
||||
|
||||
(1) On some systems, I/O stores are not strongly ordered across all CPUs, and
|
||||
so for _all_ general drivers locks should be used and mmiowb() must be
|
||||
issued prior to unlocking the critical section.
|
||||
|
||||
(2) If the accessor functions are used to refer to an I/O memory window with
|
||||
relaxed memory access properties, then _mandatory_ memory barriers are
|
||||
required to enforce ordering.
|
||||
use of memory barriers unnecessary, if the accessor functions are used to refer
|
||||
to an I/O memory window with relaxed memory access properties, then _mandatory_
|
||||
memory barriers are required to enforce ordering.
|
||||
|
||||
See Documentation/driver-api/device-io.rst for more information.
|
||||
|
||||
|
@ -2586,8 +2495,7 @@ explicit barriers are used.
|
|||
|
||||
Normally this won't be a problem because the I/O accesses done inside such
|
||||
sections will include synchronous load operations on strictly ordered I/O
|
||||
registers that form implicit I/O barriers. If this isn't sufficient then an
|
||||
mmiowb() may need to be used explicitly.
|
||||
registers that form implicit I/O barriers.
|
||||
|
||||
|
||||
A similar situation may occur between an interrupt routine and two routines
|
||||
|
@ -2599,71 +2507,114 @@ likely, then interrupt-disabling locks should be used to guarantee ordering.
|
|||
KERNEL I/O BARRIER EFFECTS
|
||||
==========================
|
||||
|
||||
When accessing I/O memory, drivers should use the appropriate accessor
|
||||
functions:
|
||||
|
||||
(*) inX(), outX():
|
||||
|
||||
These are intended to talk to I/O space rather than memory space, but
|
||||
that's primarily a CPU-specific concept. The i386 and x86_64 processors
|
||||
do indeed have special I/O space access cycles and instructions, but many
|
||||
CPUs don't have such a concept.
|
||||
|
||||
The PCI bus, amongst others, defines an I/O space concept which - on such
|
||||
CPUs as i386 and x86_64 - readily maps to the CPU's concept of I/O
|
||||
space. However, it may also be mapped as a virtual I/O space in the CPU's
|
||||
memory map, particularly on those CPUs that don't support alternate I/O
|
||||
spaces.
|
||||
|
||||
Accesses to this space may be fully synchronous (as on i386), but
|
||||
intermediary bridges (such as the PCI host bridge) may not fully honour
|
||||
that.
|
||||
|
||||
They are guaranteed to be fully ordered with respect to each other.
|
||||
|
||||
They are not guaranteed to be fully ordered with respect to other types of
|
||||
memory and I/O operation.
|
||||
Interfacing with peripherals via I/O accesses is deeply architecture and device
|
||||
specific. Therefore, drivers which are inherently non-portable may rely on
|
||||
specific behaviours of their target systems in order to achieve synchronization
|
||||
in the most lightweight manner possible. For drivers intending to be portable
|
||||
between multiple architectures and bus implementations, the kernel offers a
|
||||
series of accessor functions that provide various degrees of ordering
|
||||
guarantees:
|
||||
|
||||
(*) readX(), writeX():
|
||||
|
||||
Whether these are guaranteed to be fully ordered and uncombined with
|
||||
respect to each other on the issuing CPU depends on the characteristics
|
||||
defined for the memory window through which they're accessing. On later
|
||||
i386 architecture machines, for example, this is controlled by way of the
|
||||
MTRR registers.
|
||||
The readX() and writeX() MMIO accessors take a pointer to the
|
||||
peripheral being accessed as an __iomem * parameter. For pointers
|
||||
mapped with the default I/O attributes (e.g. those returned by
|
||||
ioremap()), the ordering guarantees are as follows:
|
||||
|
||||
Ordinarily, these will be guaranteed to be fully ordered and uncombined,
|
||||
provided they're not accessing a prefetchable device.
|
||||
1. All readX() and writeX() accesses to the same peripheral are ordered
|
||||
with respect to each other. This ensures that MMIO register accesses
|
||||
by the same CPU thread to a particular device will arrive in program
|
||||
order.
|
||||
|
||||
However, intermediary hardware (such as a PCI bridge) may indulge in
|
||||
deferral if it so wishes; to flush a store, a load from the same location
|
||||
is preferred[*], but a load from the same device or from configuration
|
||||
space should suffice for PCI.
|
||||
2. A writeX() issued by a CPU thread holding a spinlock is ordered
|
||||
before a writeX() to the same peripheral from another CPU thread
|
||||
issued after a later acquisition of the same spinlock. This ensures
|
||||
that MMIO register writes to a particular device issued while holding
|
||||
a spinlock will arrive in an order consistent with acquisitions of
|
||||
the lock.
|
||||
|
||||
[*] NOTE! attempting to load from the same location as was written to may
|
||||
cause a malfunction - consider the 16550 Rx/Tx serial registers for
|
||||
example.
|
||||
3. A writeX() by a CPU thread to the peripheral will first wait for the
|
||||
completion of all prior writes to memory either issued by, or
|
||||
propagated to, the same thread. This ensures that writes by the CPU
|
||||
to an outbound DMA buffer allocated by dma_alloc_coherent() will be
|
||||
visible to a DMA engine when the CPU writes to its MMIO control
|
||||
register to trigger the transfer.
|
||||
|
||||
Used with prefetchable I/O memory, an mmiowb() barrier may be required to
|
||||
force stores to be ordered.
|
||||
4. A readX() by a CPU thread from the peripheral will complete before
|
||||
any subsequent reads from memory by the same thread can begin. This
|
||||
ensures that reads by the CPU from an incoming DMA buffer allocated
|
||||
by dma_alloc_coherent() will not see stale data after reading from
|
||||
the DMA engine's MMIO status register to establish that the DMA
|
||||
transfer has completed.
|
||||
|
||||
Please refer to the PCI specification for more information on interactions
|
||||
between PCI transactions.
|
||||
5. A readX() by a CPU thread from the peripheral will complete before
|
||||
any subsequent delay() loop can begin execution on the same thread.
|
||||
This ensures that two MMIO register writes by the CPU to a peripheral
|
||||
will arrive at least 1us apart if the first write is immediately read
|
||||
back with readX() and udelay(1) is called prior to the second
|
||||
writeX():
|
||||
|
||||
(*) readX_relaxed(), writeX_relaxed()
|
||||
writel(42, DEVICE_REGISTER_0); // Arrives at the device...
|
||||
readl(DEVICE_REGISTER_0);
|
||||
udelay(1);
|
||||
writel(42, DEVICE_REGISTER_1); // ...at least 1us before this.
|
||||
|
||||
These are similar to readX() and writeX(), but provide weaker memory
|
||||
ordering guarantees. Specifically, they do not guarantee ordering with
|
||||
respect to normal memory accesses (e.g. DMA buffers) nor do they guarantee
|
||||
ordering with respect to LOCK or UNLOCK operations. If the latter is
|
||||
required, an mmiowb() barrier can be used. Note that relaxed accesses to
|
||||
the same peripheral are guaranteed to be ordered with respect to each
|
||||
other.
|
||||
The ordering properties of __iomem pointers obtained with non-default
|
||||
attributes (e.g. those returned by ioremap_wc()) are specific to the
|
||||
underlying architecture and therefore the guarantees listed above cannot
|
||||
generally be relied upon for accesses to these types of mappings.
|
||||
|
||||
(*) ioreadX(), iowriteX()
|
||||
(*) readX_relaxed(), writeX_relaxed():
|
||||
|
||||
These will perform appropriately for the type of access they're actually
|
||||
doing, be it inX()/outX() or readX()/writeX().
|
||||
These are similar to readX() and writeX(), but provide weaker memory
|
||||
ordering guarantees. Specifically, they do not guarantee ordering with
|
||||
respect to locking, normal memory accesses or delay() loops (i.e.
|
||||
bullets 2-5 above) but they are still guaranteed to be ordered with
|
||||
respect to other accesses from the same CPU thread to the same
|
||||
peripheral when operating on __iomem pointers mapped with the default
|
||||
I/O attributes.
|
||||
|
||||
(*) readsX(), writesX():
|
||||
|
||||
The readsX() and writesX() MMIO accessors are designed for accessing
|
||||
register-based, memory-mapped FIFOs residing on peripherals that are not
|
||||
capable of performing DMA. Consequently, they provide only the ordering
|
||||
guarantees of readX_relaxed() and writeX_relaxed(), as documented above.
|
||||
|
||||
(*) inX(), outX():
|
||||
|
||||
The inX() and outX() accessors are intended to access legacy port-mapped
|
||||
I/O peripherals, which may require special instructions on some
|
||||
architectures (notably x86). The port number of the peripheral being
|
||||
accessed is passed as an argument.
|
||||
|
||||
Since many CPU architectures ultimately access these peripherals via an
|
||||
internal virtual memory mapping, the portable ordering guarantees
|
||||
provided by inX() and outX() are the same as those provided by readX()
|
||||
and writeX() respectively when accessing a mapping with the default I/O
|
||||
attributes.
|
||||
|
||||
Device drivers may expect outX() to emit a non-posted write transaction
|
||||
that waits for a completion response from the I/O peripheral before
|
||||
returning. This is not guaranteed by all architectures and is therefore
|
||||
not part of the portable ordering semantics.
|
||||
|
||||
(*) insX(), outsX():
|
||||
|
||||
As above, the insX() and outsX() accessors provide the same ordering
|
||||
guarantees as readsX() and writesX() respectively when accessing a
|
||||
mapping with the default I/O attributes.
|
||||
|
||||
(*) ioreadX(), iowriteX():
|
||||
|
||||
These will perform appropriately for the type of access they're actually
|
||||
doing, be it inX()/outX() or readX()/writeX().
|
||||
|
||||
With the exception of the string accessors (insX(), outsX(), readsX() and
|
||||
writesX()), all of the above assume that the underlying peripheral is
|
||||
little-endian and will therefore perform byte-swapping operations on big-endian
|
||||
architectures.
|
||||
|
||||
|
||||
========================================
|
||||
|
|
|
@ -0,0 +1,126 @@
|
|||
.. SPDX-License-Identifier: GPL-2.0
|
||||
|
||||
==================
|
||||
BPF Flow Dissector
|
||||
==================
|
||||
|
||||
Overview
|
||||
========
|
||||
|
||||
Flow dissector is a routine that parses metadata out of the packets. It's
|
||||
used in the various places in the networking subsystem (RFS, flow hash, etc).
|
||||
|
||||
BPF flow dissector is an attempt to reimplement C-based flow dissector logic
|
||||
in BPF to gain all the benefits of BPF verifier (namely, limits on the
|
||||
number of instructions and tail calls).
|
||||
|
||||
API
|
||||
===
|
||||
|
||||
BPF flow dissector programs operate on an ``__sk_buff``. However, only the
|
||||
limited set of fields is allowed: ``data``, ``data_end`` and ``flow_keys``.
|
||||
``flow_keys`` is ``struct bpf_flow_keys`` and contains flow dissector input
|
||||
and output arguments.
|
||||
|
||||
The inputs are:
|
||||
* ``nhoff`` - initial offset of the networking header
|
||||
* ``thoff`` - initial offset of the transport header, initialized to nhoff
|
||||
* ``n_proto`` - L3 protocol type, parsed out of L2 header
|
||||
|
||||
Flow dissector BPF program should fill out the rest of the ``struct
|
||||
bpf_flow_keys`` fields. Input arguments ``nhoff/thoff/n_proto`` should be
|
||||
also adjusted accordingly.
|
||||
|
||||
The return code of the BPF program is either BPF_OK to indicate successful
|
||||
dissection, or BPF_DROP to indicate parsing error.
|
||||
|
||||
__sk_buff->data
|
||||
===============
|
||||
|
||||
In the VLAN-less case, this is what the initial state of the BPF flow
|
||||
dissector looks like::
|
||||
|
||||
+------+------+------------+-----------+
|
||||
| DMAC | SMAC | ETHER_TYPE | L3_HEADER |
|
||||
+------+------+------------+-----------+
|
||||
^
|
||||
|
|
||||
+-- flow dissector starts here
|
||||
|
||||
|
||||
.. code:: c
|
||||
|
||||
skb->data + flow_keys->nhoff point to the first byte of L3_HEADER
|
||||
flow_keys->thoff = nhoff
|
||||
flow_keys->n_proto = ETHER_TYPE
|
||||
|
||||
In case of VLAN, flow dissector can be called with the two different states.
|
||||
|
||||
Pre-VLAN parsing::
|
||||
|
||||
+------+------+------+-----+-----------+-----------+
|
||||
| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
|
||||
+------+------+------+-----+-----------+-----------+
|
||||
^
|
||||
|
|
||||
+-- flow dissector starts here
|
||||
|
||||
.. code:: c
|
||||
|
||||
skb->data + flow_keys->nhoff point the to first byte of TCI
|
||||
flow_keys->thoff = nhoff
|
||||
flow_keys->n_proto = TPID
|
||||
|
||||
Please note that TPID can be 802.1AD and, hence, BPF program would
|
||||
have to parse VLAN information twice for double tagged packets.
|
||||
|
||||
|
||||
Post-VLAN parsing::
|
||||
|
||||
+------+------+------+-----+-----------+-----------+
|
||||
| DMAC | SMAC | TPID | TCI |ETHER_TYPE | L3_HEADER |
|
||||
+------+------+------+-----+-----------+-----------+
|
||||
^
|
||||
|
|
||||
+-- flow dissector starts here
|
||||
|
||||
.. code:: c
|
||||
|
||||
skb->data + flow_keys->nhoff point the to first byte of L3_HEADER
|
||||
flow_keys->thoff = nhoff
|
||||
flow_keys->n_proto = ETHER_TYPE
|
||||
|
||||
In this case VLAN information has been processed before the flow dissector
|
||||
and BPF flow dissector is not required to handle it.
|
||||
|
||||
|
||||
The takeaway here is as follows: BPF flow dissector program can be called with
|
||||
the optional VLAN header and should gracefully handle both cases: when single
|
||||
or double VLAN is present and when it is not present. The same program
|
||||
can be called for both cases and would have to be written carefully to
|
||||
handle both cases.
|
||||
|
||||
|
||||
Reference Implementation
|
||||
========================
|
||||
|
||||
See ``tools/testing/selftests/bpf/progs/bpf_flow.c`` for the reference
|
||||
implementation and ``tools/testing/selftests/bpf/flow_dissector_load.[hc]``
|
||||
for the loader. bpftool can be used to load BPF flow dissector program as well.
|
||||
|
||||
The reference implementation is organized as follows:
|
||||
* ``jmp_table`` map that contains sub-programs for each supported L3 protocol
|
||||
* ``_dissect`` routine - entry point; it does input ``n_proto`` parsing and
|
||||
does ``bpf_tail_call`` to the appropriate L3 handler
|
||||
|
||||
Since BPF at this point doesn't support looping (or any jumping back),
|
||||
jmp_table is used instead to handle multiple levels of encapsulation (and
|
||||
IPv6 options).
|
||||
|
||||
|
||||
Current Limitations
|
||||
===================
|
||||
BPF flow dissector doesn't support exporting all the metadata that in-kernel
|
||||
C-based implementation can export. Notable example is single VLAN (802.1Q)
|
||||
and double VLAN (802.1AD) tags. Please refer to the ``struct bpf_flow_keys``
|
||||
for a set of information that's currently can be exported from the BPF context.
|
|
@ -22,8 +22,6 @@ you'll need the following options as well...
|
|||
CONFIG_DECNET_ROUTER (to be able to add/delete routes)
|
||||
CONFIG_NETFILTER (will be required for the DECnet routing daemon)
|
||||
|
||||
CONFIG_DECNET_ROUTE_FWMARK is optional
|
||||
|
||||
Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
|
||||
that you need it, in general you won't and it can cause ifconfig to
|
||||
malfunction.
|
||||
|
|
|
@ -9,6 +9,7 @@ Contents:
|
|||
netdev-FAQ
|
||||
af_xdp
|
||||
batman-adv
|
||||
bpf_flow_dissector
|
||||
can
|
||||
can_ucan_protocol
|
||||
device_drivers/freescale/dpaa2/index
|
||||
|
|
|
@ -422,6 +422,7 @@ tcp_min_rtt_wlen - INTEGER
|
|||
minimum RTT when it is moved to a longer path (e.g., due to traffic
|
||||
engineering). A longer window makes the filter more resistant to RTT
|
||||
inflations such as transient congestion. The unit is seconds.
|
||||
Possible values: 0 - 86400 (1 day)
|
||||
Default: 300
|
||||
|
||||
tcp_moderate_rcvbuf - BOOLEAN
|
||||
|
@ -1336,6 +1337,7 @@ tag - INTEGER
|
|||
Default value is 0.
|
||||
|
||||
xfrm4_gc_thresh - INTEGER
|
||||
(Obsolete since linux-4.14)
|
||||
The threshold at which we will start garbage collecting for IPv4
|
||||
destination cache entries. At twice this value the system will
|
||||
refuse new allocations.
|
||||
|
@ -1919,6 +1921,7 @@ echo_ignore_all - BOOLEAN
|
|||
Default: 0
|
||||
|
||||
xfrm6_gc_thresh - INTEGER
|
||||
(Obsolete since linux-4.14)
|
||||
The threshold at which we will start garbage collecting for IPv6
|
||||
destination cache entries. At twice this value the system will
|
||||
refuse new allocations.
|
||||
|
|
|
@ -132,7 +132,7 @@ version that should be applied. If there is any doubt, the maintainer
|
|||
will reply and ask what should be done.
|
||||
|
||||
Q: I made changes to only a few patches in a patch series should I resend only those changed?
|
||||
--------------------------------------------------------------------------------------------
|
||||
---------------------------------------------------------------------------------------------
|
||||
A: No, please resend the entire patch series and make sure you do number your
|
||||
patches such that it is clear this is the latest and greatest set of patches
|
||||
that can be applied.
|
||||
|
|
|
@ -1009,16 +1009,18 @@ The kernel interface functions are as follows:
|
|||
|
||||
(*) Check call still alive.
|
||||
|
||||
u32 rxrpc_kernel_check_life(struct socket *sock,
|
||||
struct rxrpc_call *call);
|
||||
bool rxrpc_kernel_check_life(struct socket *sock,
|
||||
struct rxrpc_call *call,
|
||||
u32 *_life);
|
||||
void rxrpc_kernel_probe_life(struct socket *sock,
|
||||
struct rxrpc_call *call);
|
||||
|
||||
The first function returns a number that is updated when ACKs are received
|
||||
from the peer (notably including PING RESPONSE ACKs which we can elicit by
|
||||
sending PING ACKs to see if the call still exists on the server). The
|
||||
caller should compare the numbers of two calls to see if the call is still
|
||||
alive after waiting for a suitable interval.
|
||||
The first function passes back in *_life a number that is updated when
|
||||
ACKs are received from the peer (notably including PING RESPONSE ACKs
|
||||
which we can elicit by sending PING ACKs to see if the call still exists
|
||||
on the server). The caller should compare the numbers of two calls to see
|
||||
if the call is still alive after waiting for a suitable interval. It also
|
||||
returns true as long as the call hasn't yet reached the completed state.
|
||||
|
||||
This allows the caller to work out if the server is still contactable and
|
||||
if the call is still alive on the server while waiting for the server to
|
||||
|
|
|
@ -218,5 +218,4 @@ All other architectures should build just fine too - but they won't have
|
|||
the new syscalls yet.
|
||||
|
||||
Architectures need to implement the new futex_atomic_cmpxchg_inatomic()
|
||||
inline function before writing up the syscalls (that function returns
|
||||
-ENOSYS right now).
|
||||
inline function before writing up the syscalls.
|
||||
|
|
|
@ -866,14 +866,14 @@ The intent is that compaction has less work to do in the future and to
|
|||
increase the success rate of future high-order allocations such as SLUB
|
||||
allocations, THP and hugetlbfs pages.
|
||||
|
||||
To make it sensible with respect to the watermark_scale_factor parameter,
|
||||
the unit is in fractions of 10,000. The default value of 15,000 means
|
||||
that up to 150% of the high watermark will be reclaimed in the event of
|
||||
a pageblock being mixed due to fragmentation. The level of reclaim is
|
||||
determined by the number of fragmentation events that occurred in the
|
||||
recent past. If this value is smaller than a pageblock then a pageblocks
|
||||
worth of pages will be reclaimed (e.g. 2MB on 64-bit x86). A boost factor
|
||||
of 0 will disable the feature.
|
||||
To make it sensible with respect to the watermark_scale_factor
|
||||
parameter, the unit is in fractions of 10,000. The default value of
|
||||
15,000 on !DISCONTIGMEM configurations means that up to 150% of the high
|
||||
watermark will be reclaimed in the event of a pageblock being mixed due
|
||||
to fragmentation. The level of reclaim is determined by the number of
|
||||
fragmentation events that occurred in the recent past. If this value is
|
||||
smaller than a pageblock then a pageblocks worth of pages will be reclaimed
|
||||
(e.g. 2MB on 64-bit x86). A boost factor of 0 will disable the feature.
|
||||
|
||||
=============================================================
|
||||
|
||||
|
|
|
@ -493,10 +493,8 @@ CPU 에게 기대할 수 있는 최소한의 보장사항 몇가지가 있습니
|
|||
이 타입의 오퍼레이션은 단방향의 투과성 배리어처럼 동작합니다. ACQUIRE
|
||||
오퍼레이션 뒤의 모든 메모리 오퍼레이션들이 ACQUIRE 오퍼레이션 후에
|
||||
일어난 것으로 시스템의 나머지 컴포넌트들에 보이게 될 것이 보장됩니다.
|
||||
LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_acquire() 오퍼레이션도
|
||||
ACQUIRE 오퍼레이션에 포함됩니다. smp_cond_acquire() 오퍼레이션은 컨트롤
|
||||
의존성과 smp_rmb() 를 사용해서 ACQUIRE 의 의미적 요구사항(semantic)을
|
||||
충족시킵니다.
|
||||
LOCK 오퍼레이션과 smp_load_acquire(), smp_cond_load_acquire() 오퍼레이션도
|
||||
ACQUIRE 오퍼레이션에 포함됩니다.
|
||||
|
||||
ACQUIRE 오퍼레이션 앞의 메모리 오퍼레이션들은 ACQUIRE 오퍼레이션 완료 후에
|
||||
수행된 것처럼 보일 수 있습니다.
|
||||
|
@ -2146,33 +2144,40 @@ set_current_state() 는 다음의 것들로 감싸질 수도 있습니다:
|
|||
event_indicated = 1;
|
||||
wake_up_process(event_daemon);
|
||||
|
||||
wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약 그것들이 뭔가를
|
||||
깨운다면요. 이 배리어는 태스크 상태가 지워지기 전에 수행되므로, 이벤트를
|
||||
알리기 위한 STORE 와 태스크 상태를 TASK_RUNNING 으로 설정하는 STORE 사이에
|
||||
위치하게 됩니다.
|
||||
wake_up() 이 무언가를 깨우게 되면, 이 함수는 범용 메모리 배리어를 수행합니다.
|
||||
이 함수가 아무것도 깨우지 않는다면 메모리 배리어는 수행될 수도, 수행되지 않을
|
||||
수도 있습니다; 이 경우에 메모리 배리어를 수행할 거라 오해해선 안됩니다. 이
|
||||
배리어는 태스크 상태가 접근되기 전에 수행되는데, 자세히 말하면 이 이벤트를
|
||||
알리기 위한 STORE 와 TASK_RUNNING 으로 상태를 쓰는 STORE 사이에 수행됩니다:
|
||||
|
||||
CPU 1 CPU 2
|
||||
CPU 1 (Sleeper) CPU 2 (Waker)
|
||||
=============================== ===============================
|
||||
set_current_state(); STORE event_indicated
|
||||
smp_store_mb(); wake_up();
|
||||
STORE current->state <쓰기 배리어>
|
||||
<범용 배리어> STORE current->state
|
||||
LOAD event_indicated
|
||||
STORE current->state ...
|
||||
<범용 배리어> <범용 배리어>
|
||||
LOAD event_indicated if ((LOAD task->state) & TASK_NORMAL)
|
||||
STORE task->state
|
||||
|
||||
한번더 말합니다만, 이 쓰기 메모리 배리어는 이 코드가 정말로 뭔가를 깨울 때에만
|
||||
실행됩니다. 이걸 설명하기 위해, X 와 Y 는 모두 0 으로 초기화 되어 있다는 가정
|
||||
하에 아래의 이벤트 시퀀스를 생각해 봅시다:
|
||||
여기서 "task" 는 깨어나지는 쓰레드이고 CPU 1 의 "current" 와 같습니다.
|
||||
|
||||
반복하지만, wake_up() 이 무언가를 정말 깨운다면 범용 메모리 배리어가 수행될
|
||||
것이 보장되지만, 그렇지 않다면 그런 보장이 없습니다. 이걸 이해하기 위해, X 와
|
||||
Y 는 모두 0 으로 초기화 되어 있다는 가정 하에 아래의 이벤트 시퀀스를 생각해
|
||||
봅시다:
|
||||
|
||||
CPU 1 CPU 2
|
||||
=============================== ===============================
|
||||
X = 1; STORE event_indicated
|
||||
X = 1; Y = 1;
|
||||
smp_mb(); wake_up();
|
||||
Y = 1; wait_event(wq, Y == 1);
|
||||
wake_up(); load from Y sees 1, no memory barrier
|
||||
load from X might see 0
|
||||
LOAD Y LOAD X
|
||||
|
||||
위 예제에서의 경우와 달리 깨우기가 정말로 행해졌다면, CPU 2 의 X 로드는 1 을
|
||||
본다고 보장될 수 있을 겁니다.
|
||||
정말로 깨우기가 행해졌다면, 두 로드 중 (최소한) 하나는 1 을 보게 됩니다.
|
||||
반면에, 실제 깨우기가 행해지지 않았다면, 두 로드 모두 0을 볼 수도 있습니다.
|
||||
|
||||
wake_up_process() 는 항상 범용 메모리 배리어를 수행합니다. 이 배리어 역시
|
||||
태스크 상태가 접근되기 전에 수행됩니다. 특히, 앞의 예제 코드에서 wake_up() 이
|
||||
wake_up_process() 로 대체된다면 두 로드 중 하나는 1을 볼 것이 보장됩니다.
|
||||
|
||||
사용 가능한 깨우기류 함수들로 다음과 같은 것들이 있습니다:
|
||||
|
||||
|
@ -2192,6 +2197,8 @@ wake_up() 류에 의해 쓰기 메모리 배리어가 내포됩니다. 만약
|
|||
wake_up_poll();
|
||||
wake_up_process();
|
||||
|
||||
메모리 순서규칙 관점에서, 이 함수들은 모두 wake_up() 과 같거나 보다 강한 순서
|
||||
보장을 제공합니다.
|
||||
|
||||
[!] 잠재우는 코드와 깨우는 코드에 내포되는 메모리 배리어들은 깨우기 전에
|
||||
이루어진 스토어를 잠재우는 코드가 set_current_state() 를 호출한 후에 행하는
|
||||
|
|
|
@ -321,7 +321,7 @@ cpu's hardware control block.
|
|||
4.8 KVM_GET_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: basic
|
||||
Architectures: x86
|
||||
Architectures: all
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_dirty_log (in/out)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
@ -3810,7 +3810,7 @@ to I/O ports.
|
|||
4.117 KVM_CLEAR_DIRTY_LOG (vm ioctl)
|
||||
|
||||
Capability: KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||
Architectures: x86
|
||||
Architectures: x86, arm, arm64, mips
|
||||
Type: vm ioctl
|
||||
Parameters: struct kvm_dirty_log (in)
|
||||
Returns: 0 on success, -1 on error
|
||||
|
@ -3830,8 +3830,9 @@ The ioctl clears the dirty status of pages in a memory slot, according to
|
|||
the bitmap that is passed in struct kvm_clear_dirty_log's dirty_bitmap
|
||||
field. Bit 0 of the bitmap corresponds to page "first_page" in the
|
||||
memory slot, and num_pages is the size in bits of the input bitmap.
|
||||
Both first_page and num_pages must be a multiple of 64. For each bit
|
||||
that is set in the input bitmap, the corresponding page is marked "clean"
|
||||
first_page must be a multiple of 64; num_pages must also be a multiple of
|
||||
64 unless first_page + num_pages is the size of the memory slot. For each
|
||||
bit that is set in the input bitmap, the corresponding page is marked "clean"
|
||||
in KVM's dirty bitmap, and dirty tracking is re-enabled for that page
|
||||
(for example via write-protection, or by clearing the dirty bit in
|
||||
a page table entry).
|
||||
|
@ -4799,7 +4800,7 @@ and injected exceptions.
|
|||
|
||||
7.18 KVM_CAP_MANUAL_DIRTY_LOG_PROTECT
|
||||
|
||||
Architectures: all
|
||||
Architectures: x86, arm, arm64, mips
|
||||
Parameters: args[0] whether feature should be enabled or not
|
||||
|
||||
With this capability enabled, KVM_GET_DIRTY_LOG will not automatically
|
||||
|
|
|
@ -59,7 +59,7 @@ If that assumption is ever broken then the stacks will become corrupt.
|
|||
|
||||
The currently assigned IST stacks are :-
|
||||
|
||||
* DOUBLEFAULT_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
* ESTACK_DF. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for interrupt 8 - Double Fault Exception (#DF).
|
||||
|
||||
|
@ -68,7 +68,7 @@ The currently assigned IST stacks are :-
|
|||
Using a separate stack allows the kernel to recover from it well enough
|
||||
in many cases to still output an oops.
|
||||
|
||||
* NMI_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
* ESTACK_NMI. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for non-maskable interrupts (NMI).
|
||||
|
||||
|
@ -76,7 +76,7 @@ The currently assigned IST stacks are :-
|
|||
middle of switching stacks. Using IST for NMI events avoids making
|
||||
assumptions about the previous state of the kernel stack.
|
||||
|
||||
* DEBUG_STACK. DEBUG_STKSZ
|
||||
* ESTACK_DB. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for hardware debug interrupts (interrupt 1) and for software
|
||||
debug interrupts (INT3).
|
||||
|
@ -86,7 +86,12 @@ The currently assigned IST stacks are :-
|
|||
avoids making assumptions about the previous state of the kernel
|
||||
stack.
|
||||
|
||||
* MCE_STACK. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
To handle nested #DB correctly there exist two instances of DB stacks. On
|
||||
#DB entry the IST stackpointer for #DB is switched to the second instance
|
||||
so a nested #DB starts from a clean stack. The nested #DB switches
|
||||
the IST stackpointer to a guard hole to catch triple nesting.
|
||||
|
||||
* ESTACK_MCE. EXCEPTION_STKSZ (PAGE_SIZE).
|
||||
|
||||
Used for interrupt 18 - Machine Check Exception (#MC).
|
||||
|
||||
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue