diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg deleted file mode 100644 index 9bbb1944f962..000000000000 --- a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBH.svg +++ /dev/null @@ -1,499 +0,0 @@ - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rcu_bh - - struct - - rcu_node - - struct - - rcu_node - - rcu_node - - struct - - struct - - rcu_data - - struct - - rcu_data - - struct - - rcu_data - - struct - - rcu_data - - struct rcu_state - - rcu_sched - - - - - - - - - - - diff --git a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg deleted file mode 100644 index 21ba7823479d..000000000000 --- a/Documentation/RCU/Design/Data-Structures/BigTreeClassicRCUBHdyntick.svg +++ /dev/null @@ -1,695 +0,0 @@ - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rcu_bh - - struct - - rcu_node - - struct - - rcu_node - - rcu_node - - struct - - struct - - rcu_data - - struct - - rcu_data - - struct - - rcu_data - - struct - - rcu_data - - struct rcu_state - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - rcu_sched - - - - - diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg deleted file mode 100644 index 15adcac036c7..000000000000 --- a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntick.svg +++ /dev/null @@ -1,741 +0,0 @@ - - - - - - - - - - - - image/svg+xml - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - rcu_bh - - struct - - rcu_node - - struct - - rcu_node - - rcu_node - - struct - - struct - - rcu_data - - struct - - rcu_data - - struct - - rcu_data - - struct - - rcu_data - - struct rcu_state - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - rcu_preempt - - rcu_sched - - - - - diff --git a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg index bbc3801470d0..3a1a4f85dc3a 100644 --- a/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg +++ b/Documentation/RCU/Design/Data-Structures/BigTreePreemptRCUBHdyntickCB.svg @@ -13,12 +13,12 @@ xmlns="http://www.w3.org/2000/svg" xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" - width="7.4in" - height="9.9in" - viewBox="-44 -44 8938 11938" + width="7.4000001in" + height="7.9000001in" + viewBox="-44 -44 8938 9526.283" id="svg2" version="1.1" - inkscape:version="0.48.4 r9939" + inkscape:version="0.92.2pre0 (973e216, 2017-07-25)" sodipodi:docname="BigTreePreemptRCUBHdyntickCB.svg"> @@ -37,15 +37,46 @@ + + + + + + + style="overflow:visible"> + d="M 0,0 5,-5 -12.5,0 5,5 Z" + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt" + transform="matrix(-0.4,0,0,-0.4,-4,0)" + inkscape:connector-curvature="0" /> + style="fill:none;stroke-width:0.025in" + id="g4" + transform="translate(0,-2415.6743)"> - - - - - - - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline20" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline24" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline28" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline32" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline36" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline40" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> + style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline46" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> + style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline50" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> + style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline54" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> + style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline58" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> + style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline62" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> - - - - - - + - + - + - + - + - + - + - + - + + style="stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Mend)" + id="polyline108" + transform="matrix(1,0,0,0.95854605,-604.69715,525.62477)" /> + style="stroke:#000000;stroke-width:30;stroke-linecap:butt;stroke-linejoin:miter;marker-end:url(#Arrow1Mend)" + id="polyline114" + transform="matrix(1,0,0,0.95854605,-604.69715,525.62477)" /> - + + + - - - - - - struct + id="text140" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_head + id="text142" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_head struct + id="text144" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_head + id="text146" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_head struct + id="text148" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_head + id="text150" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_head rcu_sched + id="text152" + style="font-style:normal;font-weight:normal;font-size:187.978302px;font-family:Helvetica;text-anchor:end;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_state + rcu_bh - - struct + id="text156" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_node + id="text158" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_node struct + id="text160" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_node + id="text162" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_node rcu_node + id="text164" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_node struct + id="text166" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct struct + id="text168" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_data + id="text170" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_data struct + id="text172" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_data + id="text174" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_data struct + id="text176" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_data + id="text178" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_data struct + id="text180" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_data + id="text182" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:middle;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">rcu_data struct rcu_state + id="text184" + style="font-style:normal;font-weight:bold;font-size:187.978302px;font-family:Courier;text-anchor:start;fill:#000000;stroke-width:0.02447634in" + transform="scale(1.0213945,0.97905363)">struct rcu_state - struct - rcu_dynticks - struct - rcu_dynticks - struct - rcu_dynticks - struct - rcu_dynticks - rcu_preempt + style="stroke:#00d1d1;stroke-width:29.99464035;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline204" + transform="matrix(1,0,0,0.95854605,12.340758,1579.9033)" /> + diff --git a/Documentation/RCU/Design/Data-Structures/Data-Structures.html b/Documentation/RCU/Design/Data-Structures/Data-Structures.html index 1d2051c0c3fc..18f179807563 100644 --- a/Documentation/RCU/Design/Data-Structures/Data-Structures.html +++ b/Documentation/RCU/Design/Data-Structures/Data-Structures.html @@ -23,8 +23,6 @@ to each other. The rcu_segcblist Structure
  • The rcu_data Structure -
  • - The rcu_dynticks Structure
  • The rcu_head Structure
  • @@ -127,9 +125,11 @@ CPUs, RCU would configure the rcu_node tree as follows:

    RCU currently permits up to a four-level tree, which on a 64-bit system accommodates up to 4,194,304 CPUs, though only a mere 524,288 CPUs for 32-bit systems. -On the other hand, you can set CONFIG_RCU_FANOUT to be -as small as 2 if you wish, which would permit only 16 CPUs, which -is useful for testing. +On the other hand, you can set both CONFIG_RCU_FANOUT and +CONFIG_RCU_FANOUT_LEAF to be as small as 2, which would result +in a 16-CPU test using a 4-level tree. +This can be useful for testing large-system capabilities on small test +machines.

    This multi-level combining tree allows us to get most of the performance and scalability @@ -154,44 +154,9 @@ on that root rcu_node structure remains acceptably low. keeping lock contention under control at all tree levels regardless of the level of loading on the system. -

    The Linux kernel actually supports multiple flavors of RCU -running concurrently, so RCU builds separate data structures for each -flavor. -For example, for CONFIG_TREE_RCU=y kernels, RCU provides -rcu_sched and rcu_bh, as shown below: - -

    BigTreeClassicRCUBH.svg - -

    Energy efficiency is increasingly important, and for that -reason the Linux kernel provides CONFIG_NO_HZ_IDLE, which -turns off the scheduling-clock interrupts on idle CPUs, which in -turn allows those CPUs to attain deeper sleep states and to consume -less energy. -CPUs whose scheduling-clock interrupts have been turned off are -said to be in dyntick-idle mode. -RCU must handle dyntick-idle CPUs specially -because RCU would otherwise wake up each CPU on every grace period, -which would defeat the whole purpose of CONFIG_NO_HZ_IDLE. -RCU uses the rcu_dynticks structure to track -which CPUs are in dyntick idle mode, as shown below: - -

    BigTreeClassicRCUBHdyntick.svg - -

    However, if a CPU is in dyntick-idle mode, it is in that mode -for all flavors of RCU. -Therefore, a single rcu_dynticks structure is allocated per -CPU, and all of a given CPU's rcu_data structures share -that rcu_dynticks, as shown in the figure. - -

    Kernels built with CONFIG_PREEMPT_RCU support -rcu_preempt in addition to rcu_sched and rcu_bh, as shown below: - -

    BigTreePreemptRCUBHdyntick.svg -

    RCU updaters wait for normal grace periods by registering RCU callbacks, either directly via call_rcu() and friends (namely call_rcu_bh() and call_rcu_sched()), -there being a separate interface per flavor of RCU) or indirectly via synchronize_rcu() and friends. RCU callbacks are represented by rcu_head structures, which are queued on rcu_data structures while they are @@ -214,9 +179,6 @@ its own synchronization:

  • Each rcu_node structure has a spinlock.
  • The fields in rcu_data are private to the corresponding CPU, although a few can be read and written by other CPUs. -
  • Similarly, the fields in rcu_dynticks are private - to the corresponding CPU, although a few can be read by - other CPUs.

    It is important to note that different data structures can have @@ -272,11 +234,6 @@ follows: access to this information from the corresponding CPU. Finally, this structure records past dyntick-idle state for the corresponding CPU and also tracks statistics. -

  • rcu_dynticks: - This per-CPU structure tracks the current dyntick-idle - state for the corresponding CPU. - Unlike the other three structures, the rcu_dynticks - structure is not replicated per RCU flavor.
  • rcu_head: This structure represents RCU callbacks, and is the only structure allocated and managed by RCU users. @@ -287,14 +244,14 @@ follows:

    If all you wanted from this article was a general notion of how RCU's data structures are related, you are done. Otherwise, each of the following sections give more details on -the rcu_state, rcu_node, rcu_data, -and rcu_dynticks data structures. +the rcu_state, rcu_node and rcu_data data +structures.

    The rcu_state Structure

    The rcu_state structure is the base structure that -represents a flavor of RCU. +represents the state of RCU in the system. This structure forms the interconnection between the rcu_node and rcu_data structures, tracks grace periods, contains the lock used to @@ -389,7 +346,7 @@ sequence number. The bottom two bits are the state of the current grace period, which can be zero for not yet started or one for in progress. In other words, if the bottom two bits of ->gp_seq are -zero, the corresponding flavor of RCU is idle. +zero, then RCU is idle. Any other value in the bottom two bits indicates that something is broken. This field is protected by the root rcu_node structure's ->lock field. @@ -419,10 +376,10 @@ as follows: grace period in jiffies. It is protected by the root rcu_node's ->lock. -

    The ->name field points to the name of the RCU flavor -(for example, “rcu_sched”), and is constant. -The ->abbr field contains a one-character abbreviation, -for example, “s” for RCU-sched. +

    The ->name and ->abbr fields distinguish +between preemptible RCU (“rcu_preempt” and “p”) +and non-preemptible RCU (“rcu_sched” and “s”). +These fields are used for diagnostic and tracing purposes.

    The rcu_node Structure

    @@ -971,25 +928,31 @@ this rcu_segcblist structure, not the ->head pointer. The reason for this is that all the ready-to-invoke callbacks (that is, those in the RCU_DONE_TAIL segment) are extracted -all at once at callback-invocation time. +all at once at callback-invocation time (rcu_do_batch), due +to which ->head may be set to NULL if there are no not-done +callbacks remaining in the rcu_segcblist. If callback invocation must be postponed, for example, because a high-priority process just woke up on this CPU, then the remaining -callbacks are placed back on the RCU_DONE_TAIL segment. -Either way, the ->len and ->len_lazy counts -are adjusted after the corresponding callbacks have been invoked, and so -again it is the ->len count that accurately reflects whether -or not there are callbacks associated with this rcu_segcblist -structure. +callbacks are placed back on the RCU_DONE_TAIL segment and +->head once again points to the start of the segment. +In short, the head field can briefly be NULL even though the +CPU has callbacks present the entire time. +Therefore, it is not appropriate to test the ->head pointer +for NULL. + +

    In contrast, the ->len and ->len_lazy counts +are adjusted only after the corresponding callbacks have been invoked. +This means that the ->len count is zero only if +the rcu_segcblist structure really is devoid of callbacks. Of course, off-CPU sampling of the ->len count requires -the use of appropriate synchronization, for example, memory barriers. +careful use of appropriate synchronization, for example, memory barriers. This synchronization can be a bit subtle, particularly in the case of rcu_barrier().

    The rcu_data Structure

    -

    The rcu_data maintains the per-CPU state for the -corresponding flavor of RCU. +

    The rcu_data maintains the per-CPU state for the RCU subsystem. The fields in this structure may be accessed only from the corresponding CPU (and from tracing) unless otherwise stated. This structure is the @@ -1015,30 +978,19 @@ as follows:

       1   int cpu;
    -  2   struct rcu_state *rsp;
    -  3   struct rcu_node *mynode;
    -  4   struct rcu_dynticks *dynticks;
    -  5   unsigned long grpmask;
    -  6   bool beenonline;
    +  2   struct rcu_node *mynode;
    +  3   unsigned long grpmask;
    +  4   bool beenonline;
     

    The ->cpu field contains the number of the -corresponding CPU, the ->rsp pointer references -the corresponding rcu_state structure (and is most frequently -used to locate the name of the corresponding flavor of RCU for tracing), -and the ->mynode field references the corresponding -rcu_node structure. +corresponding CPU and the ->mynode field references the +corresponding rcu_node structure. The ->mynode is used to propagate quiescent states up the combining tree. -

    The ->dynticks pointer references the -rcu_dynticks structure corresponding to this -CPU. -Recall that a single per-CPU instance of the rcu_dynticks -structure is shared among all flavors of RCU. -These first four fields are constant and therefore require not -synchronization. +These two fields are constant and therefore do not require synchronization. -

    The ->grpmask field indicates the bit in +

    The ->grpmask field indicates the bit in the ->mynode->qsmask corresponding to this rcu_data structure, and is also used when propagating quiescent states. @@ -1057,12 +1009,12 @@ as follows: 3 bool cpu_no_qs; 4 bool core_needs_qs; 5 bool gpwrap; - 6 unsigned long rcu_qs_ctr_snap; -

    The ->gp_seq and ->gp_seq_needed -fields are the counterparts of the fields of the same name -in the rcu_state and rcu_node structures. +

    The ->gp_seq field is the counterpart of the field of the same +name in the rcu_state and rcu_node structures. The +->gp_seq_needed field is the counterpart of the field of the same +name in the rcu_node structure. They may each lag up to one behind their rcu_node counterparts, but in CONFIG_NO_HZ_IDLE and CONFIG_NO_HZ_FULL kernels can lag @@ -1103,10 +1055,6 @@ CPU has remained idle for so long that the gp_seq counter is in danger of overflow, which will cause the CPU to disregard the values of its counters on its next exit from idle. -Finally, the rcu_qs_ctr_snap field is used to detect -cases where a given operation has resulted in a quiescent state -for all flavors of RCU, for example, cond_resched() -when RCU has indicated a need for quiescent states.

    RCU Callback Handling
    @@ -1179,26 +1127,22 @@ Finally, the ->dynticks_fqs field is used to count the number of times this CPU is determined to be in dyntick-idle state, and is used for tracing and debugging purposes. -

    -The rcu_dynticks Structure

    - -

    The rcu_dynticks maintains the per-CPU dyntick-idle state -for the corresponding CPU. -Unlike the other structures, rcu_dynticks is not -replicated over the different flavors of RCU. -The fields in this structure may be accessed only from the corresponding -CPU (and from tracing) unless otherwise stated. -Its fields are as follows: +

    +This portion of the rcu_data structure is declared as follows:

       1   long dynticks_nesting;
       2   long dynticks_nmi_nesting;
       3   atomic_t dynticks;
       4   bool rcu_need_heavy_qs;
    -  5   unsigned long rcu_qs_ctr;
    -  6   bool rcu_urgent_qs;
    +  5   bool rcu_urgent_qs;
     
    +

    These fields in the rcu_data structure maintain the per-CPU dyntick-idle +state for the corresponding CPU. +The fields may be accessed only from the corresponding CPU (and from tracing) +unless otherwise stated. +

    The ->dynticks_nesting field counts the nesting depth of process execution, so that in normal circumstances this counter has value zero or one. @@ -1240,19 +1184,12 @@ it is willing to call for heavy-weight dyntick-counter operations. This flag is checked by RCU's context-switch and cond_resched() code, which provide a momentary idle sojourn in response. -

    The ->rcu_qs_ctr field is used to record -quiescent states from cond_resched(). -Because cond_resched() can execute quite frequently, this -must be quite lightweight, as in a non-atomic increment of this -per-CPU field. -

    Finally, the ->rcu_urgent_qs field is used to record -the fact that the RCU core code would really like to see a quiescent -state from the corresponding CPU, with the various other fields indicating -just how badly RCU wants this quiescent state. -This flag is checked by RCU's context-switch and cond_resched() -code, which, if nothing else, non-atomically increment ->rcu_qs_ctr -in response. +the fact that the RCU core code would really like to see a quiescent state from +the corresponding CPU, with the various other fields indicating just how badly +RCU wants this quiescent state. +This flag is checked by RCU's context-switch path +(rcu_note_context_switch) and the cond_resched code. @@ -1425,11 +1362,11 @@ the last part of the array, thus traversing only the leaf

    Summary

    -So each flavor of RCU is represented by an rcu_state structure, +So the state of RCU is represented by an rcu_state structure, which contains a combining tree of rcu_node and rcu_data structures. Finally, in CONFIG_NO_HZ_IDLE kernels, each CPU's dyntick-idle -state is tracked by an rcu_dynticks structure. +state is tracked by dynticks-related fields in the rcu_data structure. If you made it this far, you are well prepared to read the code walkthroughs in the other articles in this series. diff --git a/Documentation/RCU/Design/Data-Structures/blkd_task.svg b/Documentation/RCU/Design/Data-Structures/blkd_task.svg index 00e810bb8419..bed13e9ecab8 100644 --- a/Documentation/RCU/Design/Data-Structures/blkd_task.svg +++ b/Documentation/RCU/Design/Data-Structures/blkd_task.svg @@ -14,12 +14,12 @@ xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" width="10.1in" - height="8.6in" - viewBox="-44 -44 12088 10288" + height="6.5999999in" + viewBox="-44 -44 12088 7895.4414" id="svg2" version="1.1" - inkscape:version="0.48.4 r9939" - sodipodi:docname="blkd_task.fig"> + inkscape:version="0.92.2pre0 (973e216, 2017-07-25)" + sodipodi:docname="blkd_task.svg"> @@ -37,15 +37,16 @@ + style="overflow:visible"> + d="M 0,0 5,-5 -12.5,0 5,5 Z" + style="fill-rule:evenodd;stroke:#000000;stroke-width:1.00000003pt" + transform="matrix(-0.4,0,0,-0.4,-4,0)" + inkscape:connector-curvature="0" /> + inkscape:cx="456.40569" + inkscape:cy="348.88682" + inkscape:window-x="0" + inkscape:window-y="0" + inkscape:window-maximized="1" + inkscape:current-layer="g4" + showguides="false" /> + style="fill:none;stroke-width:0.025in" + id="g4" + transform="translate(0,-2393.6637)"> - - - - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline14" + transform="translate(23.757862,2185.7233)" /> - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline18" + transform="translate(23.757862,2185.7233)" /> - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline22" + transform="translate(23.757862,2185.7233)" /> - + style="stroke:#00ff00;stroke-width:14;stroke-miterlimit:8" + id="polyline26" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline32" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline36" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline40" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline44" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline48" + transform="translate(23.757862,2185.7233)" /> - - - - - + + + - - - - - - + points="7350,2850 7350,5100 5550,4350 5550,3450 " + style="fill:#ffbfbf;stroke:#000000;stroke-width:14;stroke-linecap:butt;stroke-linejoin:miter;stroke-dasharray:120, 120" + id="polygon106" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline108" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline114" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline120" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline126" + transform="translate(23.757862,2185.7233)" /> + style="stroke:#000000;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline130" + transform="translate(23.757862,2185.7233)" /> + struct + + rcu_node + + struct + + rcu_node + + struct + + rcu_data + + struct + + rcu_data + + struct + + rcu_data + + struct + + rcu_data + + struct rcu_state + + + + + + + + + + rcu_bh + id="text178" + style="font-style:normal;font-weight:normal;font-size:192px;font-family:Helvetica;text-anchor:end;fill:#000000">rcu_state struct - - rcu_node - - struct - - rcu_node - - struct - - rcu_data - - struct - - rcu_data - - struct - - rcu_data - - struct - - rcu_data - - struct rcu_state - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - struct - - rcu_dynticks - - rcu_sched - - T3 + id="text180" + style="font-style:normal;font-weight:normal;font-size:216px;font-family:Helvetica;text-anchor:middle;fill:#000000">T3 T2 + id="text182" + style="font-style:normal;font-weight:normal;font-size:216px;font-family:Helvetica;text-anchor:middle;fill:#000000">T2 T1 + id="text184" + style="font-style:normal;font-weight:normal;font-size:216px;font-family:Helvetica;text-anchor:middle;fill:#000000">T1 + style="stroke:#00d1d1;stroke-width:30.00057793;stroke-linecap:butt;stroke-linejoin:miter;stroke-miterlimit:4;stroke-dasharray:none;marker-end:url(#Arrow1Mend)" + id="polyline186" + transform="translate(23.757862,2185.7233)" /> rcu_node + id="text198" + style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">rcu_node struct + id="text200" + style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:middle;fill:#000000">struct blkd_tasks + id="text202" + style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:start;fill:#000000">blkd_tasks gp_tasks + id="text204" + style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:start;fill:#000000">gp_tasks exp_tasks + id="text206" + style="font-style:normal;font-weight:bold;font-size:192px;font-family:Courier;text-anchor:start;fill:#000000">exp_tasks diff --git a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html index e62c7c34a369..8e4f873b979f 100644 --- a/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html +++ b/Documentation/RCU/Design/Expedited-Grace-Periods/Expedited-Grace-Periods.html @@ -160,9 +160,9 @@ was in flight. If the CPU is idle, then sync_sched_exp_handler() reports the quiescent state. -

    -Otherwise, the handler invokes resched_cpu(), which forces -a future context switch. +

    Otherwise, the handler forces a future context switch by setting the +NEED_RESCHED flag of the current task's thread flag and the CPU preempt +counter. At the time of the context switch, the CPU reports the quiescent state. Should the CPU go offline first, it will report the quiescent state at that time. diff --git a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html index a346ce0116eb..e4d94fba6c89 100644 --- a/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html +++ b/Documentation/RCU/Design/Memory-Ordering/Tree-RCU-Memory-Ordering.html @@ -77,7 +77,7 @@ The key point is that the lock-acquisition functions, including smp_mb__after_unlock_lock() immediately after successful acquisition of the lock. -

    Therefore, for any given rcu_node struction, any access +

    Therefore, for any given rcu_node structure, any access happening before one of the above lock-release functions will be seen by all CPUs as happening before any access happening after a later one of the above lock-acquisition functions. diff --git a/Documentation/RCU/Design/Requirements/Requirements.html b/Documentation/RCU/Design/Requirements/Requirements.html index 43c4e2f05f40..9fca73e03a98 100644 --- a/Documentation/RCU/Design/Requirements/Requirements.html +++ b/Documentation/RCU/Design/Requirements/Requirements.html @@ -900,8 +900,6 @@ Except where otherwise noted, these non-guarantees were premeditated. Grace Periods Don't Partition Read-Side Critical Sections

  • Read-Side Critical Sections Don't Partition Grace Periods -
  • - Disabling Preemption Does Not Block Grace Periods

    Readers Impose Minimal Ordering

    @@ -1259,54 +1257,6 @@ of RCU grace periods.
  •  
     
    -

    -Disabling Preemption Does Not Block Grace Periods

    - -

    -There was a time when disabling preemption on any given CPU would block -subsequent grace periods. -However, this was an accident of implementation and is not a requirement. -And in the current Linux-kernel implementation, disabling preemption -on a given CPU in fact does not block grace periods, as Oleg Nesterov -demonstrated. - -

    -If you need a preempt-disable region to block grace periods, you need to add -rcu_read_lock() and rcu_read_unlock(), for example -as follows: - -

    -
    - 1 preempt_disable();
    - 2 rcu_read_lock();
    - 3 do_something();
    - 4 rcu_read_unlock();
    - 5 preempt_enable();
    - 6
    - 7 /* Spinlocks implicitly disable preemption. */
    - 8 spin_lock(&mylock);
    - 9 rcu_read_lock();
    -10 do_something();
    -11 rcu_read_unlock();
    -12 spin_unlock(&mylock);
    -
    -
    - -

    -In theory, you could enter the RCU read-side critical section first, -but it is more efficient to keep the entire RCU read-side critical -section contained in the preempt-disable region as shown above. -Of course, RCU read-side critical sections that extend outside of -preempt-disable regions will work correctly, but such critical sections -can be preempted, which forces rcu_read_unlock() to do -more work. -And no, this is not an invitation to enclose all of your RCU -read-side critical sections within preempt-disable regions, because -doing so would degrade real-time response. - -

    -This non-requirement appeared with preemptible RCU. -

    Parallelism Facts of Life

    @@ -1381,6 +1331,7 @@ Classes of quality-of-implementation requirements are as follows:

    1. Specialization
    2. Performance and Scalability +
    3. Forward Progress
    4. Composability
    5. Corner Cases
    @@ -1645,7 +1596,7 @@ used in place of synchronize_rcu() as follows: 16 struct foo *p; 17 18 spin_lock(&gp_lock); -19 p = rcu_dereference(gp); +19 p = rcu_access_pointer(gp); 20 if (!p) { 21 spin_unlock(&gp_lock); 22 return false; @@ -1822,6 +1773,106 @@ so it is too early to tell whether they will stand the test of time. RCU thus provides a range of tools to allow updaters to strike the required tradeoff between latency, flexibility and CPU overhead. +

    Forward Progress

    + +

    +In theory, delaying grace-period completion and callback invocation +is harmless. +In practice, not only are memory sizes finite but also callbacks sometimes +do wakeups, and sufficiently deferred wakeups can be difficult +to distinguish from system hangs. +Therefore, RCU must provide a number of mechanisms to promote forward +progress. + +

    +These mechanisms are not foolproof, nor can they be. +For one simple example, an infinite loop in an RCU read-side critical +section must by definition prevent later grace periods from ever completing. +For a more involved example, consider a 64-CPU system built with +CONFIG_RCU_NOCB_CPU=y and booted with rcu_nocbs=1-63, +where CPUs 1 through 63 spin in tight loops that invoke +call_rcu(). +Even if these tight loops also contain calls to cond_resched() +(thus allowing grace periods to complete), CPU 0 simply will +not be able to invoke callbacks as fast as the other 63 CPUs can +register them, at least not until the system runs out of memory. +In both of these examples, the Spiderman principle applies: With great +power comes great responsibility. +However, short of this level of abuse, RCU is required to +ensure timely completion of grace periods and timely invocation of +callbacks. + +

    +RCU takes the following steps to encourage timely completion of +grace periods: + +

      +
    1. If a grace period fails to complete within 100 milliseconds, + RCU causes future invocations of cond_resched() on + the holdout CPUs to provide an RCU quiescent state. + RCU also causes those CPUs' need_resched() invocations + to return true, but only after the corresponding CPU's + next scheduling-clock. +
    2. CPUs mentioned in the nohz_full kernel boot parameter + can run indefinitely in the kernel without scheduling-clock + interrupts, which defeats the above need_resched() + strategem. + RCU will therefore invoke resched_cpu() on any + nohz_full CPUs still holding out after + 109 milliseconds. +
    3. In kernels built with CONFIG_RCU_BOOST=y, if a given + task that has been preempted within an RCU read-side critical + section is holding out for more than 500 milliseconds, + RCU will resort to priority boosting. +
    4. If a CPU is still holding out 10 seconds into the grace + period, RCU will invoke resched_cpu() on it regardless + of its nohz_full state. +
    + +

    +The above values are defaults for systems running with HZ=1000. +They will vary as the value of HZ varies, and can also be +changed using the relevant Kconfig options and kernel boot parameters. +RCU currently does not do much sanity checking of these +parameters, so please use caution when changing them. +Note that these forward-progress measures are provided only for RCU, +not for +SRCU or +Tasks RCU. + +

    +RCU takes the following steps in call_rcu() to encourage timely +invocation of callbacks when any given non-rcu_nocbs CPU has +10,000 callbacks, or has 10,000 more callbacks than it had the last time +encouragement was provided: + +

      +
    1. Starts a grace period, if one is not already in progress. +
    2. Forces immediate checking for quiescent states, rather than + waiting for three milliseconds to have elapsed since the + beginning of the grace period. +
    3. Immediately tags the CPU's callbacks with their grace period + completion numbers, rather than waiting for the RCU_SOFTIRQ + handler to get around to it. +
    4. Lifts callback-execution batch limits, which speeds up callback + invocation at the expense of degrading realtime response. +
    + +

    +Again, these are default values when running at HZ=1000, +and can be overridden. +Again, these forward-progress measures are provided only for RCU, +not for +SRCU or +Tasks RCU. +Even for RCU, callback-invocation forward progress for rcu_nocbs +CPUs is much less well-developed, in part because workloads benefiting +from rcu_nocbs CPUs tend to invoke call_rcu() +relatively infrequently. +If workloads emerge that need both rcu_nocbs CPUs and high +call_rcu() invocation rates, then additional forward-progress +work will be required. +

    Composability

    @@ -2272,7 +2323,7 @@ that meets this requirement. Furthermore, NMI handlers can be interrupted by what appear to RCU to be normal interrupts. One way that this can happen is for code that directly invokes -rcu_irq_enter() and rcu_irq_exit() to be called +rcu_irq_enter() and rcu_irq_exit() to be called from an NMI handler. This astonishing fact of life prompted the current code structure, which has rcu_irq_enter() invoking rcu_nmi_enter() @@ -2294,7 +2345,7 @@ via del_timer_sync() or similar.

    Unfortunately, there is no way to cancel an RCU callback; once you invoke call_rcu(), the callback function is -going to eventually be invoked, unless the system goes down first. +eventually going to be invoked, unless the system goes down first. Because it is normally considered socially irresponsible to crash the system in response to a module unload request, we need some other way to deal with in-flight RCU callbacks. @@ -2424,23 +2475,37 @@ for context-switch-heavy CONFIG_NO_HZ_FULL=y workloads, but there is room for further improvement.

    -In the past, it was forbidden to disable interrupts across an -rcu_read_unlock() unless that interrupt-disabled region -of code also included the matching rcu_read_lock(). -Violating this restriction could result in deadlocks involving the -scheduler's runqueue and priority-inheritance spinlocks. -This restriction was lifted when interrupt-disabled calls to -rcu_read_unlock() started deferring the reporting of -the resulting RCU-preempt quiescent state until the end of that +It is forbidden to hold any of scheduler's runqueue or priority-inheritance +spinlocks across an rcu_read_unlock() unless interrupts have been +disabled across the entire RCU read-side critical section, that is, +up to and including the matching rcu_read_lock(). +Violating this restriction can result in deadlocks involving these +scheduler spinlocks. +There was hope that this restriction might be lifted when interrupt-disabled +calls to rcu_read_unlock() started deferring the reporting of +the resulting RCU-preempt quiescent state until the end of the corresponding interrupts-disabled region. -This deferred reporting means that the scheduler's runqueue and -priority-inheritance locks cannot be held while reporting an RCU-preempt -quiescent state, which lifts the earlier restriction, at least from -a deadlock perspective. -Unfortunately, real-time systems using RCU priority boosting may +Unfortunately, timely reporting of the corresponding quiescent state +to expedited grace periods requires a call to raise_softirq(), +which can acquire these scheduler spinlocks. +In addition, real-time systems using RCU priority boosting need this restriction to remain in effect because deferred -quiescent-state reporting also defers deboosting, which in turn -degrades real-time latencies. +quiescent-state reporting would also defer deboosting, which in turn +would degrade real-time latencies. + +

    +In theory, if a given RCU read-side critical section could be +guaranteed to be less than one second in duration, holding a scheduler +spinlock across that critical section's rcu_read_unlock() +would require only that preemption be disabled across the entire +RCU read-side critical section, not interrupts. +Unfortunately, given the possibility of vCPU preemption, long-running +interrupts, and so on, it is not possible in practice to guarantee +that a given RCU read-side critical section will complete in less than +one second. +Therefore, as noted above, if scheduler spinlocks are held across +a given call to rcu_read_unlock(), interrupts must be +disabled across the entire RCU read-side critical section.

    Tracing and RCU

    @@ -3233,6 +3298,11 @@ For example, RCU callback overhead might be charged back to the originating call_rcu() instance, though probably not in production kernels. +

    +Additional work may be required to provide reasonable forward-progress +guarantees under heavy load for grace periods and for callback +invocation. +

    Summary

    diff --git a/Documentation/RCU/checklist.txt b/Documentation/RCU/checklist.txt index 49747717d905..6f469864d9f5 100644 --- a/Documentation/RCU/checklist.txt +++ b/Documentation/RCU/checklist.txt @@ -63,7 +63,7 @@ over a rather long period of time, but improvements are always welcome! pointer must be covered by rcu_read_lock(), rcu_read_lock_bh(), rcu_read_lock_sched(), or by the appropriate update-side lock. Disabling of preemption can serve as rcu_read_lock_sched(), but - is less readable. + is less readable and prevents lockdep from detecting locking issues. Letting RCU-protected pointers "leak" out of an RCU read-side critical section is every bid as bad as letting them leak out @@ -285,11 +285,7 @@ over a rather long period of time, but improvements are always welcome! here is that superuser already has lots of ways to crash the machine. - d. Use call_rcu_bh() rather than call_rcu(), in order to take - advantage of call_rcu_bh()'s faster grace periods. (This - is only a partial solution, though.) - - e. Periodically invoke synchronize_rcu(), permitting a limited + d. Periodically invoke synchronize_rcu(), permitting a limited number of updates per grace period. The same cautions apply to call_rcu_bh(), call_rcu_sched(), @@ -324,37 +320,14 @@ over a rather long period of time, but improvements are always welcome! will break Alpha, cause aggressive compilers to generate bad code, and confuse people trying to read your code. -11. Note that synchronize_rcu() -only- guarantees to wait until - all currently executing rcu_read_lock()-protected RCU read-side - critical sections complete. It does -not- necessarily guarantee - that all currently running interrupts, NMIs, preempt_disable() - code, or idle loops will complete. Therefore, if your - read-side critical sections are protected by something other - than rcu_read_lock(), do -not- use synchronize_rcu(). - - Similarly, disabling preemption is not an acceptable substitute - for rcu_read_lock(). Code that attempts to use preemption - disabling where it should be using rcu_read_lock() will break - in CONFIG_PREEMPT=y kernel builds. - - If you want to wait for interrupt handlers, NMI handlers, and - code under the influence of preempt_disable(), you instead - need to use synchronize_irq() or synchronize_sched(). - - This same limitation also applies to synchronize_rcu_bh() - and synchronize_srcu(), as well as to the asynchronous and - expedited forms of the three primitives, namely call_rcu(), - call_rcu_bh(), call_srcu(), synchronize_rcu_expedited(), - synchronize_rcu_bh_expedited(), and synchronize_srcu_expedited(). - -12. Any lock acquired by an RCU callback must be acquired elsewhere +11. Any lock acquired by an RCU callback must be acquired elsewhere with softirq disabled, e.g., via spin_lock_irqsave(), spin_lock_bh(), etc. Failing to disable irq on a given acquisition of that lock will result in deadlock as soon as the RCU softirq handler happens to run your RCU callback while interrupting that acquisition's critical section. -13. RCU callbacks can be and are executed in parallel. In many cases, +12. RCU callbacks can be and are executed in parallel. In many cases, the callback code simply wrappers around kfree(), so that this is not an issue (or, more accurately, to the extent that it is an issue, the memory-allocator locking handles it). However, @@ -370,7 +343,7 @@ over a rather long period of time, but improvements are always welcome! not the case, a self-spawning RCU callback would prevent the victim CPU from ever going offline.) -14. Unlike other forms of RCU, it -is- permissible to block in an +13. Unlike other forms of RCU, it -is- permissible to block in an SRCU read-side critical section (demarked by srcu_read_lock() and srcu_read_unlock()), hence the "SRCU": "sleepable RCU". Please note that if you don't need to sleep in read-side critical @@ -414,7 +387,7 @@ over a rather long period of time, but improvements are always welcome! Note that rcu_dereference() and rcu_assign_pointer() relate to SRCU just as they do to other forms of RCU. -15. The whole point of call_rcu(), synchronize_rcu(), and friends +14. The whole point of call_rcu(), synchronize_rcu(), and friends is to wait until all pre-existing readers have finished before carrying out some otherwise-destructive operation. It is therefore critically important to -first- remove any path @@ -426,13 +399,13 @@ over a rather long period of time, but improvements are always welcome! is the caller's responsibility to guarantee that any subsequent readers will execute safely. -16. The various RCU read-side primitives do -not- necessarily contain +15. The various RCU read-side primitives do -not- necessarily contain memory barriers. You should therefore plan for the CPU and the compiler to freely reorder code into and out of RCU read-side critical sections. It is the responsibility of the RCU update-side primitives to deal with this. -17. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the +16. Use CONFIG_PROVE_LOCKING, CONFIG_DEBUG_OBJECTS_RCU_HEAD, and the __rcu sparse checks to validate your RCU code. These can help find problems as follows: @@ -455,7 +428,7 @@ over a rather long period of time, but improvements are always welcome! These debugging aids can help you find problems that are otherwise extremely difficult to spot. -18. If you register a callback using call_rcu(), call_rcu_bh(), +17. If you register a callback using call_rcu(), call_rcu_bh(), call_rcu_sched(), or call_srcu(), and pass in a function defined within a loadable module, then it in necessary to wait for all pending callbacks to be invoked after the last invocation @@ -469,8 +442,8 @@ over a rather long period of time, but improvements are always welcome! You instead need to use one of the barrier functions: o call_rcu() -> rcu_barrier() - o call_rcu_bh() -> rcu_barrier_bh() - o call_rcu_sched() -> rcu_barrier_sched() + o call_rcu_bh() -> rcu_barrier() + o call_rcu_sched() -> rcu_barrier() o call_srcu() -> srcu_barrier() However, these barrier functions are absolutely -not- guaranteed diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt index 491043fd976f..073dbc12d1ea 100644 --- a/Documentation/RCU/stallwarn.txt +++ b/Documentation/RCU/stallwarn.txt @@ -176,9 +176,8 @@ causing stalls, and that the stall was affecting RCU-sched. This message will normally be followed by stack dumps for each CPU. Please note that PREEMPT_RCU builds can be stalled by tasks as well as by CPUs, and that the tasks will be indicated by PID, for example, "P3421". It is even -possible for a rcu_preempt_state stall to be caused by both CPUs -and- -tasks, in which case the offending CPUs and tasks will all be called -out in the list. +possible for an rcu_state stall to be caused by both CPUs -and- tasks, +in which case the offending CPUs and tasks will all be called out in the list. CPU 2's "(3 GPs behind)" indicates that this CPU has not interacted with the RCU core for the past three grace periods. In contrast, CPU 16's "(0 @@ -206,7 +205,7 @@ handlers are no longer able to execute on this CPU. This can happen if the stalled CPU is spinning with interrupts are disabled, or, in -rt kernels, if a high-priority process is starving RCU's softirq handler. -The "fps=" shows the number of force-quiescent-state idle/offline +The "fqs=" shows the number of force-quiescent-state idle/offline detection passes that the grace-period kthread has made across this CPU since the last time that this CPU noted the beginning of a grace period. diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt index 86d82f7f3500..4a6854318b17 100644 --- a/Documentation/RCU/whatisRCU.txt +++ b/Documentation/RCU/whatisRCU.txt @@ -266,7 +266,7 @@ rcu_dereference() unnecessary overhead on Alpha CPUs. Note that the value returned by rcu_dereference() is valid - only within the enclosing RCU read-side critical section. + only within the enclosing RCU read-side critical section [1]. For example, the following is -not- legal: rcu_read_lock(); @@ -292,6 +292,19 @@ rcu_dereference() typically used indirectly, via the _rcu list-manipulation primitives, such as list_for_each_entry_rcu(). + [1] The variant rcu_dereference_protected() can be used outside + of an RCU read-side critical section as long as the usage is + protected by locks acquired by the update-side code. This variant + avoids the lockdep warning that would happen when using (for + example) rcu_dereference() without rcu_read_lock() protection. + Using rcu_dereference_protected() also has the advantage + of permitting compiler optimizations that rcu_dereference() + must prohibit. The rcu_dereference_protected() variant takes + a lockdep expression to indicate which locks must be acquired + by the caller. If the indicated protection is not provided, + a lockdep splat is emitted. See RCU/Design/Requirements.html + and the API's code comments for more details and example usage. + The following diagram shows how each API communicates among the reader, updater, and reclaimer. @@ -322,28 +335,27 @@ to their callers and (2) call_rcu() callbacks may be invoked. Efficient implementations of the RCU infrastructure make heavy use of batching in order to amortize their overhead over many uses of the corresponding APIs. -There are no fewer than three RCU mechanisms in the Linux kernel; the -diagram above shows the first one, which is by far the most commonly used. -The rcu_dereference() and rcu_assign_pointer() primitives are used for -all three mechanisms, but different defer and protect primitives are -used as follows: +There are at least three flavors of RCU usage in the Linux kernel. The diagram +above shows the most common one. On the updater side, the rcu_assign_pointer(), +sychronize_rcu() and call_rcu() primitives used are the same for all three +flavors. However for protection (on the reader side), the primitives used vary +depending on the flavor: - Defer Protect +a. rcu_read_lock() / rcu_read_unlock() + rcu_dereference() -a. synchronize_rcu() rcu_read_lock() / rcu_read_unlock() - call_rcu() rcu_dereference() +b. rcu_read_lock_bh() / rcu_read_unlock_bh() + local_bh_disable() / local_bh_enable() + rcu_dereference_bh() -b. synchronize_rcu_bh() rcu_read_lock_bh() / rcu_read_unlock_bh() - call_rcu_bh() rcu_dereference_bh() +c. rcu_read_lock_sched() / rcu_read_unlock_sched() + preempt_disable() / preempt_enable() + local_irq_save() / local_irq_restore() + hardirq enter / hardirq exit + NMI enter / NMI exit + rcu_dereference_sched() -c. synchronize_sched() rcu_read_lock_sched() / rcu_read_unlock_sched() - call_rcu_sched() preempt_disable() / preempt_enable() - local_irq_save() / local_irq_restore() - hardirq enter / hardirq exit - NMI enter / NMI exit - rcu_dereference_sched() - -These three mechanisms are used as follows: +These three flavors are used as follows: a. RCU applied to normal data structures. @@ -867,18 +879,20 @@ RCU: Critical sections Grace period Barrier bh: Critical sections Grace period Barrier - rcu_read_lock_bh call_rcu_bh rcu_barrier_bh - rcu_read_unlock_bh synchronize_rcu_bh - rcu_dereference_bh synchronize_rcu_bh_expedited + rcu_read_lock_bh call_rcu rcu_barrier + rcu_read_unlock_bh synchronize_rcu + [local_bh_disable] synchronize_rcu_expedited + [and friends] + rcu_dereference_bh rcu_dereference_bh_check rcu_dereference_bh_protected rcu_read_lock_bh_held sched: Critical sections Grace period Barrier - rcu_read_lock_sched synchronize_sched rcu_barrier_sched - rcu_read_unlock_sched call_rcu_sched - [preempt_disable] synchronize_sched_expedited + rcu_read_lock_sched call_rcu rcu_barrier + rcu_read_unlock_sched synchronize_rcu + [preempt_disable] synchronize_rcu_expedited [and friends] rcu_read_lock_sched_notrace rcu_read_unlock_sched_notrace @@ -890,8 +904,8 @@ sched: Critical sections Grace period Barrier SRCU: Critical sections Grace period Barrier - srcu_read_lock synchronize_srcu srcu_barrier - srcu_read_unlock call_srcu + srcu_read_lock call_srcu srcu_barrier + srcu_read_unlock synchronize_srcu srcu_dereference synchronize_srcu_expedited srcu_dereference_check srcu_read_lock_held @@ -1034,7 +1048,7 @@ Answer: Just as PREEMPT_RT permits preemption of spinlock spinlocks blocking while in RCU read-side critical sections. - Why the apparent inconsistency? Because it is it + Why the apparent inconsistency? Because it is possible to use priority boosting to keep the RCU grace periods short if need be (for example, if running short of memory). In contrast, if blocking waiting diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 7fd61bf339b4..60536779ed24 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3754,24 +3754,6 @@ in microseconds. The default of zero says no holdoff. - rcutorture.cbflood_inter_holdoff= [KNL] - Set holdoff time (jiffies) between successive - callback-flood tests. - - rcutorture.cbflood_intra_holdoff= [KNL] - Set holdoff time (jiffies) between successive - bursts of callbacks within a given callback-flood - test. - - rcutorture.cbflood_n_burst= [KNL] - Set the number of bursts making up a given - callback-flood test. Set this to zero to - disable callback-flood testing. - - rcutorture.cbflood_n_per_burst= [KNL] - Set the number of callbacks to be registered - in a given burst of a callback-flood test. - rcutorture.fqs_duration= [KNL] Set duration of force_quiescent_state bursts in microseconds. @@ -3784,6 +3766,23 @@ Set wait time between force_quiescent_state bursts in seconds. + rcutorture.fwd_progress= [KNL] + Enable RCU grace-period forward-progress testing + for the types of RCU supporting this notion. + + rcutorture.fwd_progress_div= [KNL] + Specify the fraction of a CPU-stall-warning + period to do tight-loop forward-progress testing. + + rcutorture.fwd_progress_holdoff= [KNL] + Number of seconds to wait between successive + forward-progress tests. + + rcutorture.fwd_progress_need_resched= [KNL] + Enclose cond_resched() calls within checks for + need_resched() during tight-loop forward-progress + testing. + rcutorture.gp_cond= [KNL] Use conditional/asynchronous update-side primitives, if available. diff --git a/MAINTAINERS b/MAINTAINERS index dd08e7018b17..705555d9b3af 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4138,7 +4138,7 @@ S: Supported F: drivers/net/ethernet/chelsio/cxgb4vf/ CXL (IBM Coherent Accelerator Processor Interface CAPI) DRIVER -M: Frederic Barrat +M: Frederic Barrat M: Andrew Donnellan L: linuxppc-dev@lists.ozlabs.org S: Supported @@ -4150,9 +4150,9 @@ F: Documentation/powerpc/cxl.txt F: Documentation/ABI/testing/sysfs-class-cxl CXLFLASH (IBM Coherent Accelerator Processor Interface CAPI Flash) SCSI DRIVER -M: Manoj N. Kumar -M: Matthew R. Ochs -M: Uma Krishnan +M: Manoj N. Kumar +M: Matthew R. Ochs +M: Uma Krishnan L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/cxlflash/ @@ -5544,7 +5544,7 @@ S: Orphan F: fs/efs/ EHEA (IBM pSeries eHEA 10Gb ethernet adapter) DRIVER -M: Douglas Miller +M: Douglas Miller L: netdev@vger.kernel.org S: Maintained F: drivers/net/ethernet/ibm/ehea/ @@ -5682,7 +5682,7 @@ F: Documentation/filesystems/ext4/ext4.rst F: fs/ext4/ Extended Verification Module (EVM) -M: Mimi Zohar +M: Mimi Zohar L: linux-integrity@vger.kernel.org S: Supported F: security/integrity/evm/ @@ -5892,7 +5892,7 @@ F: include/linux/firmware.h FLASH ADAPTER DRIVER (IBM Flash Adapter 900GB Full Height PCI Flash Card) M: Joshua Morris -M: Philip Kelleher +M: Philip Kelleher S: Maintained F: drivers/block/rsxx/ @@ -6159,7 +6159,7 @@ F: include/linux/fscrypt*.h F: Documentation/filesystems/fscrypt.rst FSI-ATTACHED I2C DRIVER -M: Eddie James +M: Eddie James L: linux-i2c@vger.kernel.org L: openbmc@lists.ozlabs.org (moderated for non-subscribers) S: Maintained @@ -6335,8 +6335,7 @@ S: Supported F: drivers/uio/uio_pci_generic.c GENWQE (IBM Generic Workqueue Card) -M: Frank Haverkamp -M: Guilherme G. Piccoli +M: Frank Haverkamp S: Supported F: drivers/misc/genwqe/ @@ -7147,8 +7146,7 @@ F: crypto/842.c F: lib/842/ IBM Power in-Nest Crypto Acceleration -M: Leonidas S. Barbosa -M: Paulo Flabiano Smorigo +M: Paulo Flabiano Smorigo L: linux-crypto@vger.kernel.org S: Supported F: drivers/crypto/nx/Makefile @@ -7165,8 +7163,8 @@ S: Supported F: drivers/scsi/ipr.* IBM Power SRIOV Virtual NIC Device Driver -M: Thomas Falcon -M: John Allen +M: Thomas Falcon +M: John Allen L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmvnic.* @@ -7181,41 +7179,38 @@ F: arch/powerpc/include/asm/vas.h F: arch/powerpc/include/uapi/asm/vas.h IBM Power Virtual Ethernet Device Driver -M: Thomas Falcon +M: Thomas Falcon L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/ibm/ibmveth.* IBM Power Virtual FC Device Drivers -M: Tyrel Datwyler +M: Tyrel Datwyler L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/ibmvscsi/ibmvfc* IBM Power Virtual Management Channel Driver -M: Bryant G. Ly -M: Steven Royer +M: Steven Royer S: Supported F: drivers/misc/ibmvmc.* IBM Power Virtual SCSI Device Drivers -M: Tyrel Datwyler +M: Tyrel Datwyler L: linux-scsi@vger.kernel.org S: Supported F: drivers/scsi/ibmvscsi/ibmvscsi* F: include/scsi/viosrp.h IBM Power Virtual SCSI Device Target Driver -M: Bryant G. Ly -M: Michael Cyr +M: Michael Cyr L: linux-scsi@vger.kernel.org L: target-devel@vger.kernel.org S: Supported F: drivers/scsi/ibmvscsi_tgt/ IBM Power VMX Cryptographic instructions -M: Leonidas S. Barbosa -M: Paulo Flabiano Smorigo +M: Paulo Flabiano Smorigo L: linux-crypto@vger.kernel.org S: Supported F: drivers/crypto/vmx/Makefile @@ -7492,7 +7487,7 @@ S: Maintained L: linux-crypto@vger.kernel.org INTEGRITY MEASUREMENT ARCHITECTURE (IMA) -M: Mimi Zohar +M: Mimi Zohar M: Dmitry Kasatkin L: linux-integrity@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/zohar/linux-integrity.git @@ -8101,9 +8096,8 @@ S: Maintained F: drivers/media/platform/rcar_jpu.c JSM Neo PCI based serial card -M: Guilherme G. Piccoli L: linux-serial@vger.kernel.org -S: Maintained +S: Orphan F: drivers/tty/serial/jsm/ K10TEMP HARDWARE MONITORING DRIVER @@ -8334,7 +8328,7 @@ F: include/uapi/linux/kexec.h F: kernel/kexec* KEYS-ENCRYPTED -M: Mimi Zohar +M: Mimi Zohar L: linux-integrity@vger.kernel.org L: keyrings@vger.kernel.org S: Supported @@ -8343,9 +8337,9 @@ F: include/keys/encrypted-type.h F: security/keys/encrypted-keys/ KEYS-TRUSTED -M: James Bottomley +M: James Bottomley M: Jarkko Sakkinen -M: Mimi Zohar +M: Mimi Zohar L: linux-integrity@vger.kernel.org L: keyrings@vger.kernel.org S: Supported @@ -8398,7 +8392,7 @@ F: lib/test_kmod.c F: tools/testing/selftests/kmod/ KPROBES -M: Naveen N. Rao +M: Naveen N. Rao M: Anil S Keshavamurthy M: "David S. Miller" M: Masami Hiramatsu @@ -8754,7 +8748,7 @@ M: Nicholas Piggin M: David Howells M: Jade Alglave M: Luc Maranget -M: "Paul E. McKenney" +M: "Paul E. McKenney" R: Akira Yokosawa R: Daniel Lustig L: linux-kernel@vger.kernel.org @@ -9719,7 +9713,7 @@ F: drivers/platform/x86/mlx-platform.c MEMBARRIER SUPPORT M: Mathieu Desnoyers -M: "Paul E. McKenney" +M: "Paul E. McKenney" L: linux-kernel@vger.kernel.org S: Supported F: kernel/sched/membarrier.c @@ -10861,7 +10855,7 @@ S: Supported F: tools/objtool/ OCXL (Open Coherent Accelerator Processor Interface OpenCAPI) DRIVER -M: Frederic Barrat +M: Frederic Barrat M: Andrew Donnellan L: linuxppc-dev@lists.ozlabs.org S: Supported @@ -12675,7 +12669,7 @@ S: Orphan F: drivers/net/wireless/ray* RCUTORTURE TEST FRAMEWORK -M: "Paul E. McKenney" +M: "Paul E. McKenney" M: Josh Triplett R: Steven Rostedt R: Mathieu Desnoyers @@ -12722,11 +12716,12 @@ F: arch/x86/include/asm/resctrl_sched.h F: Documentation/x86/resctrl* READ-COPY UPDATE (RCU) -M: "Paul E. McKenney" +M: "Paul E. McKenney" M: Josh Triplett R: Steven Rostedt R: Mathieu Desnoyers R: Lai Jiangshan +R: Joel Fernandes L: linux-kernel@vger.kernel.org W: http://www.rdrop.com/users/paulmck/RCU/ S: Supported @@ -12862,7 +12857,7 @@ F: include/linux/reset-controller.h RESTARTABLE SEQUENCES SUPPORT M: Mathieu Desnoyers M: Peter Zijlstra -M: "Paul E. McKenney" +M: "Paul E. McKenney" M: Boqun Feng L: linux-kernel@vger.kernel.org S: Supported @@ -13394,7 +13389,7 @@ F: drivers/scsi/sg.c F: include/scsi/sg.h SCSI SUBSYSTEM -M: "James E.J. Bottomley" +M: "James E.J. Bottomley" T: git git://git.kernel.org/pub/scm/linux/kernel/git/jejb/scsi.git M: "Martin K. Petersen" T: git git://git.kernel.org/pub/scm/linux/kernel/git/mkp/scsi.git @@ -13835,7 +13830,7 @@ F: mm/sl?b* SLEEPABLE READ-COPY UPDATE (SRCU) M: Lai Jiangshan -M: "Paul E. McKenney" +M: "Paul E. McKenney" M: Josh Triplett R: Steven Rostedt R: Mathieu Desnoyers @@ -15280,7 +15275,7 @@ F: drivers/platform/x86/topstar-laptop.c TORTURE-TEST MODULES M: Davidlohr Bueso -M: "Paul E. McKenney" +M: "Paul E. McKenney" M: Josh Triplett L: linux-kernel@vger.kernel.org S: Supported diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c index 8cf035e68378..4c01e9a01a74 100644 --- a/arch/powerpc/mm/hugetlbpage.c +++ b/arch/powerpc/mm/hugetlbpage.c @@ -289,7 +289,7 @@ static void hugepd_free(struct mmu_gather *tlb, void *hugepte) (*batchp)->ptes[(*batchp)->index++] = hugepte; if ((*batchp)->index == HUGEPD_FREELIST_SIZE) { - call_rcu_sched(&(*batchp)->rcu, hugepd_free_rcu_callback); + call_rcu(&(*batchp)->rcu, hugepd_free_rcu_callback); *batchp = NULL; } put_cpu_var(hugepd_freelist_cur); diff --git a/arch/s390/mm/pgalloc.c b/arch/s390/mm/pgalloc.c index 6791562779ee..db6bb2f97a2c 100644 --- a/arch/s390/mm/pgalloc.c +++ b/arch/s390/mm/pgalloc.c @@ -352,7 +352,7 @@ void tlb_table_flush(struct mmu_gather *tlb) struct mmu_table_batch **batch = &tlb->batch; if (*batch) { - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + call_rcu(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } } diff --git a/arch/sparc/oprofile/init.c b/arch/sparc/oprofile/init.c index f9024bccff16..43730c9b1c86 100644 --- a/arch/sparc/oprofile/init.c +++ b/arch/sparc/oprofile/init.c @@ -53,7 +53,7 @@ static void timer_stop(void) { nmi_adjust_hz(1); unregister_die_notifier(&profile_timer_exceptions_nb); - synchronize_sched(); /* Allow already-started NMIs to complete. */ + synchronize_rcu(); /* Allow already-started NMIs to complete. */ } static int op_nmi_timer_init(struct oprofile_operations *ops) diff --git a/arch/x86/pci/i386.c b/arch/x86/pci/i386.c index 8cd66152cdb0..9df652d3d927 100644 --- a/arch/x86/pci/i386.c +++ b/arch/x86/pci/i386.c @@ -59,7 +59,7 @@ static struct pcibios_fwaddrmap *pcibios_fwaddrmap_lookup(struct pci_dev *dev) { struct pcibios_fwaddrmap *map; - WARN_ON_SMP(!spin_is_locked(&pcibios_fwaddrmap_lock)); + lockdep_assert_held(&pcibios_fwaddrmap_lock); list_for_each_entry(map, &pcibios_fwaddrmappings, list) if (map->dev == dev) diff --git a/crypto/pcrypt.c b/crypto/pcrypt.c index f8ec3d4ba4a8..8eb3c4c9ff67 100644 --- a/crypto/pcrypt.c +++ b/crypto/pcrypt.c @@ -382,7 +382,7 @@ static int pcrypt_cpumask_change_notify(struct notifier_block *self, cpumask_copy(new_mask->mask, cpumask->cbcpu); rcu_assign_pointer(pcrypt->cb_cpumask, new_mask); - synchronize_rcu_bh(); + synchronize_rcu(); free_cpumask_var(old_mask->mask); kfree(old_mask); diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c index 677618e6f1f7..dc8603d34320 100644 --- a/drivers/char/ipmi/ipmi_si_intf.c +++ b/drivers/char/ipmi/ipmi_si_intf.c @@ -2187,7 +2187,7 @@ static void shutdown_smi(void *send_info) * handlers might have been running before we freed the * interrupt. */ - synchronize_sched(); + synchronize_rcu(); /* * Timeouts are stopped, now make sure the interrupts are off diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 6d53f7d9fc7a..ffa9adeaba31 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -346,7 +346,7 @@ static inline void gov_clear_update_util(struct cpufreq_policy *policy) for_each_cpu(i, policy->cpus) cpufreq_remove_update_util_hook(i); - synchronize_sched(); + synchronize_rcu(); } static struct policy_dbs_info *alloc_policy_dbs_info(struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 106402b89961..dd66decf2087 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -1952,7 +1952,7 @@ static void intel_pstate_clear_update_util_hook(unsigned int cpu) cpufreq_remove_update_util_hook(cpu); cpu_data->update_util_set = false; - synchronize_sched(); + synchronize_rcu(); } static int intel_pstate_get_max_freq(struct cpudata *cpu) diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c index ffd68a7bc9e1..69d752f0b621 100644 --- a/drivers/net/ethernet/realtek/8139too.c +++ b/drivers/net/ethernet/realtek/8139too.c @@ -1661,7 +1661,7 @@ static void rtl8139_tx_timeout_task (struct work_struct *work) napi_disable(&tp->napi); netif_stop_queue(dev); - synchronize_sched(); + synchronize_rcu(); netdev_dbg(dev, "Transmit timeout, status %02x %04x %04x media %02x\n", RTL_R8(ChipCmd), RTL_R16(IntrStatus), diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c index 209566f8097b..fe2d754c6c8e 100644 --- a/drivers/net/ethernet/realtek/r8169.c +++ b/drivers/net/ethernet/realtek/r8169.c @@ -5866,7 +5866,7 @@ static void rtl_reset_work(struct rtl8169_private *tp) napi_disable(&tp->napi); netif_stop_queue(dev); - synchronize_sched(); + synchronize_rcu(); rtl8169_hw_reset(tp); @@ -6609,7 +6609,7 @@ static void rtl8169_down(struct net_device *dev) rtl8169_rx_missed(dev); /* Give a racing hard_start_xmit a few cycles to complete. */ - synchronize_sched(); + synchronize_rcu(); rtl8169_tx_clear(tp); diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index 98fe7e762e17..3643015a55cf 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -3167,7 +3167,7 @@ struct hlist_head *efx_rps_hash_bucket(struct efx_nic *efx, { u32 hash = efx_filter_spec_hash(spec); - WARN_ON(!spin_is_locked(&efx->rps_hash_lock)); + lockdep_assert_held(&efx->rps_hash_lock); if (!efx->rps_hash_table) return NULL; return &efx->rps_hash_table[hash % EFX_ARFS_HASH_TABLE_SIZE]; diff --git a/drivers/net/ethernet/sis/sis190.c b/drivers/net/ethernet/sis/sis190.c index c2c50522b96d..808cf9816673 100644 --- a/drivers/net/ethernet/sis/sis190.c +++ b/drivers/net/ethernet/sis/sis190.c @@ -1142,7 +1142,7 @@ static void sis190_down(struct net_device *dev) if (!poll_locked) poll_locked++; - synchronize_sched(); + synchronize_rcu(); } while (SIS_R32(IntrMask)); diff --git a/drivers/net/ethernet/smsc/smsc911x.h b/drivers/net/ethernet/smsc/smsc911x.h index 8d75508acd2b..51b2fc1a395f 100644 --- a/drivers/net/ethernet/smsc/smsc911x.h +++ b/drivers/net/ethernet/smsc/smsc911x.h @@ -67,7 +67,7 @@ #ifdef CONFIG_DEBUG_SPINLOCK #define SMSC_ASSERT_MAC_LOCK(pdata) \ - WARN_ON_SMP(!spin_is_locked(&pdata->mac_lock)) + lockdep_assert_held(&pdata->mac_lock) #else #define SMSC_ASSERT_MAC_LOCK(pdata) do {} while (0) #endif /* CONFIG_DEBUG_SPINLOCK */ diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index ad7a6f475a44..e87ef969a0fd 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -1365,7 +1365,7 @@ static int vhost_net_release(struct inode *inode, struct file *f) if (rx_sock) sockfd_put(rx_sock); /* Make sure no callbacks are outstanding */ - synchronize_rcu_bh(); + synchronize_rcu(); /* We do an extra flush before freeing memory, * since jobs can re-queue themselves. */ vhost_net_flush(n); diff --git a/fs/file.c b/fs/file.c index 7ffd6e9d103d..50304c7525ea 100644 --- a/fs/file.c +++ b/fs/file.c @@ -158,7 +158,7 @@ static int expand_fdtable(struct files_struct *files, unsigned int nr) * or have finished their rcu_read_lock_sched() section. */ if (atomic_read(&files->count) > 1) - synchronize_sched(); + synchronize_rcu(); spin_lock(&files->file_lock); if (!new_fdt) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 7a85e609fc27..59dc28047030 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -926,7 +926,7 @@ static inline struct userfaultfd_wait_queue *find_userfault_in( wait_queue_entry_t *wq; struct userfaultfd_wait_queue *uwq; - VM_BUG_ON(!spin_is_locked(&wqh->lock)); + lockdep_assert_held(&wqh->lock); uwq = NULL; if (!waitqueue_active(wqh)) diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 79b99d653e03..71b75643c432 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -41,7 +41,7 @@ static inline void percpu_down_read_preempt_disable(struct percpu_rw_semaphore * * cannot both change sem->state from readers_fast and start checking * counters while we are here. So if we see !sem->state, we know that * the writer won't be checking until we're past the preempt_enable() - * and that one the synchronize_sched() is done, the writer will see + * and that once the synchronize_rcu() is done, the writer will see * anything we did within this RCU-sched read-size critical section. */ __this_cpu_inc(*sem->read_count); diff --git a/include/linux/rcupdate_wait.h b/include/linux/rcupdate_wait.h index 8a16c3eb3dd0..c0578ba23c1a 100644 --- a/include/linux/rcupdate_wait.h +++ b/include/linux/rcupdate_wait.h @@ -31,21 +31,4 @@ do { \ #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) -/** - * synchronize_rcu_mult - Wait concurrently for multiple grace periods - * @...: List of call_rcu() functions for different grace periods to wait on - * - * This macro waits concurrently for multiple types of RCU grace periods. - * For example, synchronize_rcu_mult(call_rcu, call_rcu_tasks) would wait - * on concurrent RCU and RCU-tasks grace periods. Waiting on a give SRCU - * domain requires you to write a wrapper function for that SRCU domain's - * call_srcu() function, supplying the corresponding srcu_struct. - * - * If Tiny RCU, tell _wait_rcu_gp() does not bother waiting for RCU, - * given that anywhere synchronize_rcu_mult() can be called is automatically - * a grace period. - */ -#define synchronize_rcu_mult(...) \ - _wait_rcu_gp(IS_ENABLED(CONFIG_TINY_RCU), __VA_ARGS__) - #endif /* _LINUX_SCHED_RCUPDATE_WAIT_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index b3c51e869388..4f1db3ef62a9 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -572,8 +572,10 @@ union rcu_special { struct { u8 blocked; u8 need_qs; + u8 exp_hint; /* Hint for performance. */ + u8 pad; /* No garbage from compiler! */ } b; /* Bits. */ - u16 s; /* Set of bits. */ + u32 s; /* Set of bits. */ }; enum perf_event_task_context { diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 67135d4a8a30..c614375cd264 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -38,20 +38,20 @@ struct srcu_struct; #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key); -#define init_srcu_struct(sp) \ +#define init_srcu_struct(ssp) \ ({ \ static struct lock_class_key __srcu_key; \ \ - __init_srcu_struct((sp), #sp, &__srcu_key); \ + __init_srcu_struct((ssp), #ssp, &__srcu_key); \ }) #define __SRCU_DEP_MAP_INIT(srcu_name) .dep_map = { .name = #srcu_name }, #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -int init_srcu_struct(struct srcu_struct *sp); +int init_srcu_struct(struct srcu_struct *ssp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ @@ -67,28 +67,28 @@ int init_srcu_struct(struct srcu_struct *sp); struct srcu_struct { }; #endif -void call_srcu(struct srcu_struct *sp, struct rcu_head *head, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *head, void (*func)(struct rcu_head *head)); -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced); -int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); -void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); -void synchronize_srcu(struct srcu_struct *sp); +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced); +int __srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp); +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) __releases(ssp); +void synchronize_srcu(struct srcu_struct *ssp); /** * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. */ -static inline void cleanup_srcu_struct(struct srcu_struct *sp) +static inline void cleanup_srcu_struct(struct srcu_struct *ssp) { - _cleanup_srcu_struct(sp, false); + _cleanup_srcu_struct(ssp, false); } /** * cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. Also, @@ -103,16 +103,16 @@ static inline void cleanup_srcu_struct(struct srcu_struct *sp) * (with high probability, anyway), and will also cause the srcu_struct * to be leaked. */ -static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp) +static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *ssp) { - _cleanup_srcu_struct(sp, true); + _cleanup_srcu_struct(ssp, true); } #ifdef CONFIG_DEBUG_LOCK_ALLOC /** * srcu_read_lock_held - might we be in SRCU read-side critical section? - * @sp: The srcu_struct structure to check + * @ssp: The srcu_struct structure to check * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, @@ -126,16 +126,16 @@ static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp) * relies on normal RCU, it can be called from the CPU which * is in the idle loop from an RCU point of view or offline. */ -static inline int srcu_read_lock_held(const struct srcu_struct *sp) +static inline int srcu_read_lock_held(const struct srcu_struct *ssp) { if (!debug_lockdep_rcu_enabled()) return 1; - return lock_is_held(&sp->dep_map); + return lock_is_held(&ssp->dep_map); } #else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -static inline int srcu_read_lock_held(const struct srcu_struct *sp) +static inline int srcu_read_lock_held(const struct srcu_struct *ssp) { return 1; } @@ -145,7 +145,7 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) /** * srcu_dereference_check - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing - * @sp: pointer to the srcu_struct, which is used to check that we + * @ssp: pointer to the srcu_struct, which is used to check that we * really are in an SRCU read-side critical section. * @c: condition to check for update-side use * @@ -154,29 +154,32 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) * to 1. The @c argument will normally be a logical expression containing * lockdep_is_held() calls. */ -#define srcu_dereference_check(p, sp, c) \ - __rcu_dereference_check((p), (c) || srcu_read_lock_held(sp), __rcu) +#define srcu_dereference_check(p, ssp, c) \ + __rcu_dereference_check((p), (c) || srcu_read_lock_held(ssp), __rcu) /** * srcu_dereference - fetch SRCU-protected pointer for later dereferencing * @p: the pointer to fetch and protect for later dereferencing - * @sp: pointer to the srcu_struct, which is used to check that we + * @ssp: pointer to the srcu_struct, which is used to check that we * really are in an SRCU read-side critical section. * * Makes rcu_dereference_check() do the dirty work. If PROVE_RCU * is enabled, invoking this outside of an RCU read-side critical * section will result in an RCU-lockdep splat. */ -#define srcu_dereference(p, sp) srcu_dereference_check((p), (sp), 0) +#define srcu_dereference(p, ssp) srcu_dereference_check((p), (ssp), 0) /** * srcu_dereference_notrace - no tracing and no lockdep calls from here + * @p: the pointer to fetch and protect for later dereferencing + * @ssp: pointer to the srcu_struct, which is used to check that we + * really are in an SRCU read-side critical section. */ -#define srcu_dereference_notrace(p, sp) srcu_dereference_check((p), (sp), 1) +#define srcu_dereference_notrace(p, ssp) srcu_dereference_check((p), (ssp), 1) /** * srcu_read_lock - register a new reader for an SRCU-protected structure. - * @sp: srcu_struct in which to register the new reader. + * @ssp: srcu_struct in which to register the new reader. * * Enter an SRCU read-side critical section. Note that SRCU read-side * critical sections may be nested. However, it is illegal to @@ -191,44 +194,44 @@ static inline int srcu_read_lock_held(const struct srcu_struct *sp) * srcu_read_unlock() in an irq handler if the matching srcu_read_lock() * was invoked in process context. */ -static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) +static inline int srcu_read_lock(struct srcu_struct *ssp) __acquires(ssp) { int retval; - retval = __srcu_read_lock(sp); - rcu_lock_acquire(&(sp)->dep_map); + retval = __srcu_read_lock(ssp); + rcu_lock_acquire(&(ssp)->dep_map); return retval; } /* Used by tracing, cannot be traced and cannot invoke lockdep. */ static inline notrace int -srcu_read_lock_notrace(struct srcu_struct *sp) __acquires(sp) +srcu_read_lock_notrace(struct srcu_struct *ssp) __acquires(ssp) { int retval; - retval = __srcu_read_lock(sp); + retval = __srcu_read_lock(ssp); return retval; } /** * srcu_read_unlock - unregister a old reader from an SRCU-protected structure. - * @sp: srcu_struct in which to unregister the old reader. + * @ssp: srcu_struct in which to unregister the old reader. * @idx: return value from corresponding srcu_read_lock(). * * Exit an SRCU read-side critical section. */ -static inline void srcu_read_unlock(struct srcu_struct *sp, int idx) - __releases(sp) +static inline void srcu_read_unlock(struct srcu_struct *ssp, int idx) + __releases(ssp) { - rcu_lock_release(&(sp)->dep_map); - __srcu_read_unlock(sp, idx); + rcu_lock_release(&(ssp)->dep_map); + __srcu_read_unlock(ssp, idx); } /* Used by tracing, cannot be traced and cannot call lockdep. */ static inline notrace void -srcu_read_unlock_notrace(struct srcu_struct *sp, int idx) __releases(sp) +srcu_read_unlock_notrace(struct srcu_struct *ssp, int idx) __releases(ssp) { - __srcu_read_unlock(sp, idx); + __srcu_read_unlock(ssp, idx); } /** diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index f41d2fb09f87..b19216aaaef2 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -60,7 +60,7 @@ void srcu_drive_gp(struct work_struct *wp); #define DEFINE_STATIC_SRCU(name) \ static struct srcu_struct name = __SRCU_STRUCT_INIT(name, name) -void synchronize_srcu(struct srcu_struct *sp); +void synchronize_srcu(struct srcu_struct *ssp); /* * Counts the new reader in the appropriate per-CPU element of the @@ -68,36 +68,36 @@ void synchronize_srcu(struct srcu_struct *sp); * __srcu_read_unlock() must be in the same handler instance. Returns an * index that must be passed to the matching srcu_read_unlock(). */ -static inline int __srcu_read_lock(struct srcu_struct *sp) +static inline int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - idx = READ_ONCE(sp->srcu_idx); - WRITE_ONCE(sp->srcu_lock_nesting[idx], sp->srcu_lock_nesting[idx] + 1); + idx = READ_ONCE(ssp->srcu_idx); + WRITE_ONCE(ssp->srcu_lock_nesting[idx], ssp->srcu_lock_nesting[idx] + 1); return idx; } -static inline void synchronize_srcu_expedited(struct srcu_struct *sp) +static inline void synchronize_srcu_expedited(struct srcu_struct *ssp) { - synchronize_srcu(sp); + synchronize_srcu(ssp); } -static inline void srcu_barrier(struct srcu_struct *sp) +static inline void srcu_barrier(struct srcu_struct *ssp) { - synchronize_srcu(sp); + synchronize_srcu(ssp); } /* Defined here to avoid size increase for non-torture kernels. */ -static inline void srcu_torture_stats_print(struct srcu_struct *sp, +static inline void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) { int idx; - idx = READ_ONCE(sp->srcu_idx) & 0x1; + idx = READ_ONCE(ssp->srcu_idx) & 0x1; pr_alert("%s%s Tiny SRCU per-CPU(idx=%d): (%hd,%hd)\n", tt, tf, idx, - READ_ONCE(sp->srcu_lock_nesting[!idx]), - READ_ONCE(sp->srcu_lock_nesting[idx])); + READ_ONCE(ssp->srcu_lock_nesting[!idx]), + READ_ONCE(ssp->srcu_lock_nesting[idx])); } #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 0ae91b3a7406..6f292bd3e7db 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -51,7 +51,7 @@ struct srcu_data { unsigned long grpmask; /* Mask for leaf srcu_node */ /* ->srcu_data_have_cbs[]. */ int cpu; - struct srcu_struct *sp; + struct srcu_struct *ssp; }; /* @@ -138,8 +138,8 @@ struct srcu_struct { #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) -void synchronize_srcu_expedited(struct srcu_struct *sp); -void srcu_barrier(struct srcu_struct *sp); -void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf); +void synchronize_srcu_expedited(struct srcu_struct *ssp); +void srcu_barrier(struct srcu_struct *ssp); +void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf); #endif diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index e9de8ad0bad7..9c3186578ce0 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -82,7 +82,7 @@ int unregister_tracepoint_module_notifier(struct notifier_block *nb) static inline void tracepoint_synchronize_unregister(void) { synchronize_srcu(&tracepoint_srcu); - synchronize_sched(); + synchronize_rcu(); } #else static inline void tracepoint_synchronize_unregister(void) diff --git a/include/linux/types.h b/include/linux/types.h index 9834e90aa010..c2615d6a019e 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -212,8 +212,8 @@ struct ustat { * weird ABI and we need to ask it explicitly. * * The alignment is required to guarantee that bit 0 of @next will be - * clear under normal conditions -- as long as we use call_rcu(), - * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. + * clear under normal conditions -- as long as we use call_rcu() or + * call_srcu() to queue the callback. * * This guarantee is important for few reasons: * - future call_rcu_lazy() will make use of lower bits in the pointer; diff --git a/init/main.c b/init/main.c index ee147103ba1b..a45486330243 100644 --- a/init/main.c +++ b/init/main.c @@ -1046,12 +1046,12 @@ static void mark_readonly(void) { if (rodata_enabled) { /* - * load_module() results in W+X mappings, which are cleaned up - * with call_rcu_sched(). Let's make sure that queued work is + * load_module() results in W+X mappings, which are cleaned + * up with call_rcu(). Let's make sure that queued work is * flushed so that we don't hit false positives looking for * insecure pages which are W+X. */ - rcu_barrier_sched(); + rcu_barrier(); mark_rodata_ro(); rodata_test(); } else diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 6aaf5dd5383b..7a8429f8e280 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -5343,7 +5343,7 @@ int __init cgroup_init(void) cgroup_rstat_boot(); /* - * The latency of the synchronize_sched() is too high for cgroups, + * The latency of the synchronize_rcu() is too high for cgroups, * avoid it at the cost of forcing all readers into the slow path. */ rcu_sync_enter_start(&cgroup_threadgroup_rwsem.rss); diff --git a/kernel/events/core.c b/kernel/events/core.c index 84530ab358c3..c4b90cf7734a 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -9918,7 +9918,7 @@ static void account_event(struct perf_event *event) * call the perf scheduling hooks before proceeding to * install events that need them. */ - synchronize_sched(); + synchronize_rcu(); } /* * Now that we have waited for the sync_sched(), allow further diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 90e98e233647..08e31d863191 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -229,7 +229,7 @@ static int collect_garbage_slots(struct kprobe_insn_cache *c) struct kprobe_insn_page *kip, *next; /* Ensure no-one is interrupted on the garbages */ - synchronize_sched(); + synchronize_rcu(); list_for_each_entry_safe(kip, next, &c->pages, list) { int i; @@ -1382,7 +1382,7 @@ out: if (ret) { ap->flags |= KPROBE_FLAG_DISABLED; list_del_rcu(&p->list); - synchronize_sched(); + synchronize_rcu(); } } } @@ -1597,7 +1597,7 @@ int register_kprobe(struct kprobe *p) ret = arm_kprobe(p); if (ret) { hlist_del_rcu(&p->hlist); - synchronize_sched(); + synchronize_rcu(); goto out; } } @@ -1776,7 +1776,7 @@ void unregister_kprobes(struct kprobe **kps, int num) kps[i]->addr = NULL; mutex_unlock(&kprobe_mutex); - synchronize_sched(); + synchronize_rcu(); for (i = 0; i < num; i++) if (kps[i]->addr) __unregister_kprobe_bottom(kps[i]); @@ -1966,7 +1966,7 @@ void unregister_kretprobes(struct kretprobe **rps, int num) rps[i]->kp.addr = NULL; mutex_unlock(&kprobe_mutex); - synchronize_sched(); + synchronize_rcu(); for (i = 0; i < num; i++) { if (rps[i]->kp.addr) { __unregister_kprobe_bottom(&rps[i]->kp); diff --git a/kernel/livepatch/patch.c b/kernel/livepatch/patch.c index 82d584225dc6..7702cb4064fc 100644 --- a/kernel/livepatch/patch.c +++ b/kernel/livepatch/patch.c @@ -61,7 +61,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, ops = container_of(fops, struct klp_ops, fops); /* - * A variant of synchronize_sched() is used to allow patching functions + * A variant of synchronize_rcu() is used to allow patching functions * where RCU is not watching, see klp_synchronize_transition(). */ preempt_disable_notrace(); @@ -72,7 +72,7 @@ static void notrace klp_ftrace_handler(unsigned long ip, /* * func should never be NULL because preemption should be disabled here * and unregister_ftrace_function() does the equivalent of a - * synchronize_sched() before the func_stack removal. + * synchronize_rcu() before the func_stack removal. */ if (WARN_ON_ONCE(!func)) goto unlock; diff --git a/kernel/livepatch/transition.c b/kernel/livepatch/transition.c index 5bc349805e03..304d5eb8a98c 100644 --- a/kernel/livepatch/transition.c +++ b/kernel/livepatch/transition.c @@ -52,7 +52,7 @@ static DECLARE_DELAYED_WORK(klp_transition_work, klp_transition_work_fn); /* * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing + * of synchronize_rcu(). This requires synchronizing * tasks even in userspace and idle. */ static void klp_sync(struct work_struct *work) @@ -175,7 +175,7 @@ void klp_cancel_transition(void) void klp_update_patch_state(struct task_struct *task) { /* - * A variant of synchronize_sched() is used to allow patching functions + * A variant of synchronize_rcu() is used to allow patching functions * where RCU is not watching, see klp_synchronize_transition(). */ preempt_disable_notrace(); diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 1efada2dd9dd..ef27f98714c0 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -4195,7 +4195,7 @@ void lockdep_free_key_range(void *start, unsigned long size) * * sync_sched() is sufficient because the read-side is IRQ disable. */ - synchronize_sched(); + synchronize_rcu(); /* * XXX at this point we could return the resources to the pool; diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index 9aa713629387..771d4ca96dda 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -36,7 +36,7 @@ void debug_mutex_lock_common(struct mutex *lock, struct mutex_waiter *waiter) void debug_mutex_wake_waiter(struct mutex *lock, struct mutex_waiter *waiter) { - SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + lockdep_assert_held(&lock->wait_lock); DEBUG_LOCKS_WARN_ON(list_empty(&lock->wait_list)); DEBUG_LOCKS_WARN_ON(waiter->magic != waiter); DEBUG_LOCKS_WARN_ON(list_empty(&waiter->list)); @@ -51,7 +51,7 @@ void debug_mutex_free_waiter(struct mutex_waiter *waiter) void debug_mutex_add_waiter(struct mutex *lock, struct mutex_waiter *waiter, struct task_struct *task) { - SMP_DEBUG_LOCKS_WARN_ON(!spin_is_locked(&lock->wait_lock)); + lockdep_assert_held(&lock->wait_lock); /* Mark the current thread as blocked on the lock: */ task->blocked_on = waiter; diff --git a/kernel/module.c b/kernel/module.c index 49a405891587..99b46c32d579 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -2159,7 +2159,7 @@ static void free_module(struct module *mod) /* Remove this module from bug list, this uses list_del_rcu */ module_bug_cleanup(mod); /* Wait for RCU-sched synchronizing before releasing mod->list and buglist. */ - synchronize_sched(); + synchronize_rcu(); mutex_unlock(&module_mutex); /* This may be empty, but that's OK */ @@ -3507,15 +3507,15 @@ static noinline int do_init_module(struct module *mod) /* * We want to free module_init, but be aware that kallsyms may be * walking this with preempt disabled. In all the failure paths, we - * call synchronize_sched(), but we don't want to slow down the success + * call synchronize_rcu(), but we don't want to slow down the success * path, so use actual RCU here. * Note that module_alloc() on most architectures creates W+X page * mappings which won't be cleaned up until do_free_init() runs. Any * code such as mark_rodata_ro() which depends on those mappings to * be cleaned up needs to sync with the queued work - ie - * rcu_barrier_sched() + * rcu_barrier() */ - call_rcu_sched(&freeinit->rcu, do_free_init); + call_rcu(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); wake_up_all(&module_wq); @@ -3526,7 +3526,7 @@ fail_free_freeinit: fail: /* Try to protect us from buggy refcounters. */ mod->state = MODULE_STATE_GOING; - synchronize_sched(); + synchronize_rcu(); module_put(mod); blocking_notifier_call_chain(&module_notify_list, MODULE_STATE_GOING, mod); @@ -3819,7 +3819,7 @@ static int load_module(struct load_info *info, const char __user *uargs, ddebug_cleanup: ftrace_release_mod(mod); dynamic_debug_remove(mod, info->debug); - synchronize_sched(); + synchronize_rcu(); kfree(mod->args); free_arch_cleanup: module_arch_cleanup(mod); @@ -3834,7 +3834,7 @@ static int load_module(struct load_info *info, const char __user *uargs, mod_tree_remove(mod); wake_up_all(&module_wq); /* Wait for RCU-sched synchronizing before releasing mod->list. */ - synchronize_sched(); + synchronize_rcu(); mutex_unlock(&module_mutex); free_module: /* Free lock-classes; relies on the preceding sync_rcu() */ diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h index 2866166863f0..a393e24a9195 100644 --- a/kernel/rcu/rcu.h +++ b/kernel/rcu/rcu.h @@ -526,12 +526,14 @@ srcu_batches_completed(struct srcu_struct *sp) { return 0; } static inline void rcu_force_quiescent_state(void) { } static inline void show_rcu_gp_kthreads(void) { } static inline int rcu_get_gp_kthreads_prio(void) { return 0; } +static inline void rcu_fwd_progress_check(unsigned long j) { } #else /* #ifdef CONFIG_TINY_RCU */ unsigned long rcu_get_gp_seq(void); unsigned long rcu_exp_batches_completed(void); unsigned long srcu_batches_completed(struct srcu_struct *sp); void show_rcu_gp_kthreads(void); int rcu_get_gp_kthreads_prio(void); +void rcu_fwd_progress_check(unsigned long j); void rcu_force_quiescent_state(void); extern struct workqueue_struct *rcu_gp_wq; extern struct workqueue_struct *rcu_par_gp_wq; @@ -539,8 +541,10 @@ extern struct workqueue_struct *rcu_par_gp_wq; #ifdef CONFIG_RCU_NOCB_CPU bool rcu_is_nocb_cpu(int cpu); +void rcu_bind_current_to_nocb(void); #else static inline bool rcu_is_nocb_cpu(int cpu) { return false; } +static inline void rcu_bind_current_to_nocb(void) { } #endif #endif /* __LINUX_RCU_H */ diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 210c77460365..f6e85faa4ff4 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "rcu.h" @@ -80,13 +81,6 @@ MODULE_AUTHOR("Paul E. McKenney and Josh Triplett get_gp_seq(); ts = rcu_trace_clock_local(); if (preempt_count() & (SOFTIRQ_MASK | HARDIRQ_MASK)) @@ -870,59 +864,6 @@ checkwait: stutter_wait("rcu_torture_boost"); return 0; } -static void rcu_torture_cbflood_cb(struct rcu_head *rhp) -{ -} - -/* - * RCU torture callback-flood kthread. Repeatedly induces bursts of calls - * to call_rcu() or analogous, increasing the probability of occurrence - * of callback-overflow corner cases. - */ -static int -rcu_torture_cbflood(void *arg) -{ - int err = 1; - int i; - int j; - struct rcu_head *rhp; - - if (cbflood_n_per_burst > 0 && - cbflood_inter_holdoff > 0 && - cbflood_intra_holdoff > 0 && - cur_ops->call && - cur_ops->cb_barrier) { - rhp = vmalloc(array3_size(cbflood_n_burst, - cbflood_n_per_burst, - sizeof(*rhp))); - err = !rhp; - } - if (err) { - VERBOSE_TOROUT_STRING("rcu_torture_cbflood disabled: Bad args or OOM"); - goto wait_for_stop; - } - VERBOSE_TOROUT_STRING("rcu_torture_cbflood task started"); - do { - schedule_timeout_interruptible(cbflood_inter_holdoff); - atomic_long_inc(&n_cbfloods); - WARN_ON(signal_pending(current)); - for (i = 0; i < cbflood_n_burst; i++) { - for (j = 0; j < cbflood_n_per_burst; j++) { - cur_ops->call(&rhp[i * cbflood_n_per_burst + j], - rcu_torture_cbflood_cb); - } - schedule_timeout_interruptible(cbflood_intra_holdoff); - WARN_ON(signal_pending(current)); - } - cur_ops->cb_barrier(); - stutter_wait("rcu_torture_cbflood"); - } while (!torture_must_stop()); - vfree(rhp); -wait_for_stop: - torture_kthread_stopping("rcu_torture_cbflood"); - return 0; -} - /* * RCU torture force-quiescent-state kthread. Repeatedly induces * bursts of calls to force_quiescent_state(), increasing the probability @@ -1457,11 +1398,10 @@ rcu_torture_stats_print(void) n_rcu_torture_boosts, atomic_long_read(&n_rcu_torture_timers)); torture_onoff_stats(); - pr_cont("barrier: %ld/%ld:%ld ", + pr_cont("barrier: %ld/%ld:%ld\n", n_barrier_successes, n_barrier_attempts, n_rcu_torture_barrier_error); - pr_cont("cbflood: %ld\n", atomic_long_read(&n_cbfloods)); pr_alert("%s%s ", torture_type, TORTURE_FLAG); if (atomic_read(&n_rcu_torture_mberror) != 0 || @@ -1674,8 +1614,90 @@ static void rcu_torture_fwd_prog_cb(struct rcu_head *rhp) cur_ops->call(&fcsp->rh, rcu_torture_fwd_prog_cb); } -/* Carry out grace-period forward-progress testing. */ -static int rcu_torture_fwd_prog(void *args) +/* State for continuous-flood RCU callbacks. */ +struct rcu_fwd_cb { + struct rcu_head rh; + struct rcu_fwd_cb *rfc_next; + int rfc_gps; +}; +static DEFINE_SPINLOCK(rcu_fwd_lock); +static struct rcu_fwd_cb *rcu_fwd_cb_head; +static struct rcu_fwd_cb **rcu_fwd_cb_tail = &rcu_fwd_cb_head; +static long n_launders_cb; +static unsigned long rcu_fwd_startat; +static bool rcu_fwd_emergency_stop; +#define MAX_FWD_CB_JIFFIES (8 * HZ) /* Maximum CB test duration. */ +#define MIN_FWD_CB_LAUNDERS 3 /* This many CB invocations to count. */ +#define MIN_FWD_CBS_LAUNDERED 100 /* Number of counted CBs. */ +#define FWD_CBS_HIST_DIV 10 /* Histogram buckets/second. */ +static long n_launders_hist[2 * MAX_FWD_CB_JIFFIES / (HZ / FWD_CBS_HIST_DIV)]; + +static void rcu_torture_fwd_cb_hist(void) +{ + int i; + int j; + + for (i = ARRAY_SIZE(n_launders_hist) - 1; i > 0; i--) + if (n_launders_hist[i] > 0) + break; + pr_alert("%s: Callback-invocation histogram (duration %lu jiffies):", + __func__, jiffies - rcu_fwd_startat); + for (j = 0; j <= i; j++) + pr_cont(" %ds/%d: %ld", + j + 1, FWD_CBS_HIST_DIV, n_launders_hist[j]); + pr_cont("\n"); +} + +/* Callback function for continuous-flood RCU callbacks. */ +static void rcu_torture_fwd_cb_cr(struct rcu_head *rhp) +{ + unsigned long flags; + int i; + struct rcu_fwd_cb *rfcp = container_of(rhp, struct rcu_fwd_cb, rh); + struct rcu_fwd_cb **rfcpp; + + rfcp->rfc_next = NULL; + rfcp->rfc_gps++; + spin_lock_irqsave(&rcu_fwd_lock, flags); + rfcpp = rcu_fwd_cb_tail; + rcu_fwd_cb_tail = &rfcp->rfc_next; + WRITE_ONCE(*rfcpp, rfcp); + WRITE_ONCE(n_launders_cb, n_launders_cb + 1); + i = ((jiffies - rcu_fwd_startat) / (HZ / FWD_CBS_HIST_DIV)); + if (i >= ARRAY_SIZE(n_launders_hist)) + i = ARRAY_SIZE(n_launders_hist) - 1; + n_launders_hist[i]++; + spin_unlock_irqrestore(&rcu_fwd_lock, flags); +} + +/* + * Free all callbacks on the rcu_fwd_cb_head list, either because the + * test is over or because we hit an OOM event. + */ +static unsigned long rcu_torture_fwd_prog_cbfree(void) +{ + unsigned long flags; + unsigned long freed = 0; + struct rcu_fwd_cb *rfcp; + + for (;;) { + spin_lock_irqsave(&rcu_fwd_lock, flags); + rfcp = rcu_fwd_cb_head; + if (!rfcp) + break; + rcu_fwd_cb_head = rfcp->rfc_next; + if (!rcu_fwd_cb_head) + rcu_fwd_cb_tail = &rcu_fwd_cb_head; + spin_unlock_irqrestore(&rcu_fwd_lock, flags); + kfree(rfcp); + freed++; + } + spin_unlock_irqrestore(&rcu_fwd_lock, flags); + return freed; +} + +/* Carry out need_resched()/cond_resched() forward-progress testing. */ +static void rcu_torture_fwd_prog_nr(int *tested, int *tested_tries) { unsigned long cver; unsigned long dur; @@ -1686,56 +1708,186 @@ static int rcu_torture_fwd_prog(void *args) int sd4; bool selfpropcb = false; unsigned long stopat; - int tested = 0; - int tested_tries = 0; static DEFINE_TORTURE_RANDOM(trs); - VERBOSE_TOROUT_STRING("rcu_torture_fwd_progress task started"); - if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST)) - set_user_nice(current, MAX_NICE); if (cur_ops->call && cur_ops->sync && cur_ops->cb_barrier) { init_rcu_head_on_stack(&fcs.rh); selfpropcb = true; } - do { - schedule_timeout_interruptible(fwd_progress_holdoff * HZ); - if (selfpropcb) { - WRITE_ONCE(fcs.stop, 0); - cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb); - } - cver = READ_ONCE(rcu_torture_current_version); - gps = cur_ops->get_gp_seq(); - sd = cur_ops->stall_dur() + 1; - sd4 = (sd + fwd_progress_div - 1) / fwd_progress_div; - dur = sd4 + torture_random(&trs) % (sd - sd4); - stopat = jiffies + dur; - while (time_before(jiffies, stopat) && !torture_must_stop()) { - idx = cur_ops->readlock(); - udelay(10); - cur_ops->readunlock(idx); - if (!fwd_progress_need_resched || need_resched()) - cond_resched(); - } - tested_tries++; - if (!time_before(jiffies, stopat) && !torture_must_stop()) { - tested++; - cver = READ_ONCE(rcu_torture_current_version) - cver; - gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps); - WARN_ON(!cver && gps < 2); - pr_alert("%s: Duration %ld cver %ld gps %ld\n", __func__, dur, cver, gps); - } - if (selfpropcb) { - WRITE_ONCE(fcs.stop, 1); - cur_ops->sync(); /* Wait for running CB to complete. */ - cur_ops->cb_barrier(); /* Wait for queued callbacks. */ - } - /* Avoid slow periods, better to test when busy. */ - stutter_wait("rcu_torture_fwd_prog"); - } while (!torture_must_stop()); + + /* Tight loop containing cond_resched(). */ + if (selfpropcb) { + WRITE_ONCE(fcs.stop, 0); + cur_ops->call(&fcs.rh, rcu_torture_fwd_prog_cb); + } + cver = READ_ONCE(rcu_torture_current_version); + gps = cur_ops->get_gp_seq(); + sd = cur_ops->stall_dur() + 1; + sd4 = (sd + fwd_progress_div - 1) / fwd_progress_div; + dur = sd4 + torture_random(&trs) % (sd - sd4); + WRITE_ONCE(rcu_fwd_startat, jiffies); + stopat = rcu_fwd_startat + dur; + while (time_before(jiffies, stopat) && + !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { + idx = cur_ops->readlock(); + udelay(10); + cur_ops->readunlock(idx); + if (!fwd_progress_need_resched || need_resched()) + cond_resched(); + } + (*tested_tries)++; + if (!time_before(jiffies, stopat) && + !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { + (*tested)++; + cver = READ_ONCE(rcu_torture_current_version) - cver; + gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps); + WARN_ON(!cver && gps < 2); + pr_alert("%s: Duration %ld cver %ld gps %ld\n", __func__, dur, cver, gps); + } + if (selfpropcb) { + WRITE_ONCE(fcs.stop, 1); + cur_ops->sync(); /* Wait for running CB to complete. */ + cur_ops->cb_barrier(); /* Wait for queued callbacks. */ + } + if (selfpropcb) { WARN_ON(READ_ONCE(fcs.stop) != 2); destroy_rcu_head_on_stack(&fcs.rh); } +} + +/* Carry out call_rcu() forward-progress testing. */ +static void rcu_torture_fwd_prog_cr(void) +{ + unsigned long cver; + unsigned long gps; + int i; + long n_launders; + long n_launders_cb_snap; + long n_launders_sa; + long n_max_cbs; + long n_max_gps; + struct rcu_fwd_cb *rfcp; + struct rcu_fwd_cb *rfcpn; + unsigned long stopat; + unsigned long stoppedat; + + if (READ_ONCE(rcu_fwd_emergency_stop)) + return; /* Get out of the way quickly, no GP wait! */ + + /* Loop continuously posting RCU callbacks. */ + WRITE_ONCE(rcu_fwd_cb_nodelay, true); + cur_ops->sync(); /* Later readers see above write. */ + WRITE_ONCE(rcu_fwd_startat, jiffies); + stopat = rcu_fwd_startat + MAX_FWD_CB_JIFFIES; + n_launders = 0; + n_launders_cb = 0; + n_launders_sa = 0; + n_max_cbs = 0; + n_max_gps = 0; + for (i = 0; i < ARRAY_SIZE(n_launders_hist); i++) + n_launders_hist[i] = 0; + cver = READ_ONCE(rcu_torture_current_version); + gps = cur_ops->get_gp_seq(); + while (time_before(jiffies, stopat) && + !READ_ONCE(rcu_fwd_emergency_stop) && !torture_must_stop()) { + rfcp = READ_ONCE(rcu_fwd_cb_head); + rfcpn = NULL; + if (rfcp) + rfcpn = READ_ONCE(rfcp->rfc_next); + if (rfcpn) { + if (rfcp->rfc_gps >= MIN_FWD_CB_LAUNDERS && + ++n_max_gps >= MIN_FWD_CBS_LAUNDERED) + break; + rcu_fwd_cb_head = rfcpn; + n_launders++; + n_launders_sa++; + } else { + rfcp = kmalloc(sizeof(*rfcp), GFP_KERNEL); + if (WARN_ON_ONCE(!rfcp)) { + schedule_timeout_interruptible(1); + continue; + } + n_max_cbs++; + n_launders_sa = 0; + rfcp->rfc_gps = 0; + } + cur_ops->call(&rfcp->rh, rcu_torture_fwd_cb_cr); + cond_resched(); + } + stoppedat = jiffies; + n_launders_cb_snap = READ_ONCE(n_launders_cb); + cver = READ_ONCE(rcu_torture_current_version) - cver; + gps = rcutorture_seq_diff(cur_ops->get_gp_seq(), gps); + cur_ops->cb_barrier(); /* Wait for callbacks to be invoked. */ + (void)rcu_torture_fwd_prog_cbfree(); + + WRITE_ONCE(rcu_fwd_cb_nodelay, false); + if (!torture_must_stop() && !READ_ONCE(rcu_fwd_emergency_stop)) { + WARN_ON(n_max_gps < MIN_FWD_CBS_LAUNDERED); + pr_alert("%s Duration %lu barrier: %lu pending %ld n_launders: %ld n_launders_sa: %ld n_max_gps: %ld n_max_cbs: %ld cver %ld gps %ld\n", + __func__, + stoppedat - rcu_fwd_startat, jiffies - stoppedat, + n_launders + n_max_cbs - n_launders_cb_snap, + n_launders, n_launders_sa, + n_max_gps, n_max_cbs, cver, gps); + rcu_torture_fwd_cb_hist(); + } +} + + +/* + * OOM notifier, but this only prints diagnostic information for the + * current forward-progress test. + */ +static int rcutorture_oom_notify(struct notifier_block *self, + unsigned long notused, void *nfreed) +{ + WARN(1, "%s invoked upon OOM during forward-progress testing.\n", + __func__); + rcu_torture_fwd_cb_hist(); + rcu_fwd_progress_check(1 + (jiffies - READ_ONCE(rcu_fwd_startat) / 2)); + WRITE_ONCE(rcu_fwd_emergency_stop, true); + smp_mb(); /* Emergency stop before free and wait to avoid hangs. */ + pr_info("%s: Freed %lu RCU callbacks.\n", + __func__, rcu_torture_fwd_prog_cbfree()); + rcu_barrier(); + pr_info("%s: Freed %lu RCU callbacks.\n", + __func__, rcu_torture_fwd_prog_cbfree()); + rcu_barrier(); + pr_info("%s: Freed %lu RCU callbacks.\n", + __func__, rcu_torture_fwd_prog_cbfree()); + smp_mb(); /* Frees before return to avoid redoing OOM. */ + (*(unsigned long *)nfreed)++; /* Forward progress CBs freed! */ + pr_info("%s returning after OOM processing.\n", __func__); + return NOTIFY_OK; +} + +static struct notifier_block rcutorture_oom_nb = { + .notifier_call = rcutorture_oom_notify +}; + +/* Carry out grace-period forward-progress testing. */ +static int rcu_torture_fwd_prog(void *args) +{ + int tested = 0; + int tested_tries = 0; + + VERBOSE_TOROUT_STRING("rcu_torture_fwd_progress task started"); + rcu_bind_current_to_nocb(); + if (!IS_ENABLED(CONFIG_SMP) || !IS_ENABLED(CONFIG_RCU_BOOST)) + set_user_nice(current, MAX_NICE); + do { + schedule_timeout_interruptible(fwd_progress_holdoff * HZ); + WRITE_ONCE(rcu_fwd_emergency_stop, false); + register_oom_notifier(&rcutorture_oom_nb); + rcu_torture_fwd_prog_nr(&tested, &tested_tries); + rcu_torture_fwd_prog_cr(); + unregister_oom_notifier(&rcutorture_oom_nb); + + /* Avoid slow periods, better to test when busy. */ + stutter_wait("rcu_torture_fwd_prog"); + } while (!torture_must_stop()); /* Short runs might not contain a valid forward-progress attempt. */ WARN_ON(!tested && tested_tries >= 5); pr_alert("%s: tested %d tested_tries %d\n", __func__, tested, tested_tries); @@ -1748,7 +1900,8 @@ static int __init rcu_torture_fwd_prog_init(void) { if (!fwd_progress) return 0; /* Not requested, so don't do it. */ - if (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0) { + if (!cur_ops->stall_dur || cur_ops->stall_dur() <= 0 || + cur_ops == &rcu_busted_ops) { VERBOSE_TOROUT_STRING("rcu_torture_fwd_prog_init: Disabled, unsupported by RCU flavor under test"); return 0; } @@ -1968,8 +2121,6 @@ rcu_torture_cleanup(void) cur_ops->name, gp_seq, flags); torture_stop_kthread(rcu_torture_stats, stats_task); torture_stop_kthread(rcu_torture_fqs, fqs_task); - for (i = 0; i < ncbflooders; i++) - torture_stop_kthread(rcu_torture_cbflood, cbflood_task[i]); if (rcu_torture_can_boost()) cpuhp_remove_state(rcutor_hp); @@ -2252,24 +2403,6 @@ rcu_torture_init(void) goto unwind; if (object_debug) rcu_test_debug_objects(); - if (cbflood_n_burst > 0) { - /* Create the cbflood threads */ - ncbflooders = (num_online_cpus() + 3) / 4; - cbflood_task = kcalloc(ncbflooders, sizeof(*cbflood_task), - GFP_KERNEL); - if (!cbflood_task) { - VERBOSE_TOROUT_ERRSTRING("out of memory"); - firsterr = -ENOMEM; - goto unwind; - } - for (i = 0; i < ncbflooders; i++) { - firsterr = torture_create_kthread(rcu_torture_cbflood, - NULL, - cbflood_task[i]); - if (firsterr) - goto unwind; - } - } torture_init_end(); return 0; diff --git a/kernel/rcu/srcutiny.c b/kernel/rcu/srcutiny.c index b46e6683f8c9..32dfd6522548 100644 --- a/kernel/rcu/srcutiny.c +++ b/kernel/rcu/srcutiny.c @@ -37,30 +37,30 @@ int rcu_scheduler_active __read_mostly; static LIST_HEAD(srcu_boot_list); static bool srcu_init_done; -static int init_srcu_struct_fields(struct srcu_struct *sp) +static int init_srcu_struct_fields(struct srcu_struct *ssp) { - sp->srcu_lock_nesting[0] = 0; - sp->srcu_lock_nesting[1] = 0; - init_swait_queue_head(&sp->srcu_wq); - sp->srcu_cb_head = NULL; - sp->srcu_cb_tail = &sp->srcu_cb_head; - sp->srcu_gp_running = false; - sp->srcu_gp_waiting = false; - sp->srcu_idx = 0; - INIT_WORK(&sp->srcu_work, srcu_drive_gp); - INIT_LIST_HEAD(&sp->srcu_work.entry); + ssp->srcu_lock_nesting[0] = 0; + ssp->srcu_lock_nesting[1] = 0; + init_swait_queue_head(&ssp->srcu_wq); + ssp->srcu_cb_head = NULL; + ssp->srcu_cb_tail = &ssp->srcu_cb_head; + ssp->srcu_gp_running = false; + ssp->srcu_gp_waiting = false; + ssp->srcu_idx = 0; + INIT_WORK(&ssp->srcu_work, srcu_drive_gp); + INIT_LIST_HEAD(&ssp->srcu_work.entry); return 0; } #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key) { /* Don't re-initialize a lock while it is held. */ - debug_check_no_locks_freed((void *)sp, sizeof(*sp)); - lockdep_init_map(&sp->dep_map, name, key, 0); - return init_srcu_struct_fields(sp); + debug_check_no_locks_freed((void *)ssp, sizeof(*ssp)); + lockdep_init_map(&ssp->dep_map, name, key, 0); + return init_srcu_struct_fields(ssp); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -68,15 +68,15 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); /* * init_srcu_struct - initialize a sleep-RCU structure - * @sp: structure to initialize. + * @ssp: structure to initialize. * * Must invoke this on a given srcu_struct before passing that srcu_struct * to any other function. Each srcu_struct represents a separate domain * of SRCU protection. */ -int init_srcu_struct(struct srcu_struct *sp) +int init_srcu_struct(struct srcu_struct *ssp) { - return init_srcu_struct_fields(sp); + return init_srcu_struct_fields(ssp); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -84,22 +84,22 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); /* * cleanup_srcu_struct - deconstruct a sleep-RCU structure - * @sp: structure to clean up. + * @ssp: structure to clean up. * * Must invoke this after you are finished using a given srcu_struct that * was initialized via init_srcu_struct(), else you leak memory. */ -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) { - WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]); + WARN_ON(ssp->srcu_lock_nesting[0] || ssp->srcu_lock_nesting[1]); if (quiesced) - WARN_ON(work_pending(&sp->srcu_work)); + WARN_ON(work_pending(&ssp->srcu_work)); else - flush_work(&sp->srcu_work); - WARN_ON(sp->srcu_gp_running); - WARN_ON(sp->srcu_gp_waiting); - WARN_ON(sp->srcu_cb_head); - WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail); + flush_work(&ssp->srcu_work); + WARN_ON(ssp->srcu_gp_running); + WARN_ON(ssp->srcu_gp_waiting); + WARN_ON(ssp->srcu_cb_head); + WARN_ON(&ssp->srcu_cb_head != ssp->srcu_cb_tail); } EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); @@ -107,13 +107,13 @@ EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); * Removes the count for the old reader from the appropriate element of * the srcu_struct. */ -void __srcu_read_unlock(struct srcu_struct *sp, int idx) +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { - int newval = sp->srcu_lock_nesting[idx] - 1; + int newval = ssp->srcu_lock_nesting[idx] - 1; - WRITE_ONCE(sp->srcu_lock_nesting[idx], newval); - if (!newval && READ_ONCE(sp->srcu_gp_waiting)) - swake_up_one(&sp->srcu_wq); + WRITE_ONCE(ssp->srcu_lock_nesting[idx], newval); + if (!newval && READ_ONCE(ssp->srcu_gp_waiting)) + swake_up_one(&ssp->srcu_wq); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -127,24 +127,24 @@ void srcu_drive_gp(struct work_struct *wp) int idx; struct rcu_head *lh; struct rcu_head *rhp; - struct srcu_struct *sp; + struct srcu_struct *ssp; - sp = container_of(wp, struct srcu_struct, srcu_work); - if (sp->srcu_gp_running || !READ_ONCE(sp->srcu_cb_head)) + ssp = container_of(wp, struct srcu_struct, srcu_work); + if (ssp->srcu_gp_running || !READ_ONCE(ssp->srcu_cb_head)) return; /* Already running or nothing to do. */ /* Remove recently arrived callbacks and wait for readers. */ - WRITE_ONCE(sp->srcu_gp_running, true); + WRITE_ONCE(ssp->srcu_gp_running, true); local_irq_disable(); - lh = sp->srcu_cb_head; - sp->srcu_cb_head = NULL; - sp->srcu_cb_tail = &sp->srcu_cb_head; + lh = ssp->srcu_cb_head; + ssp->srcu_cb_head = NULL; + ssp->srcu_cb_tail = &ssp->srcu_cb_head; local_irq_enable(); - idx = sp->srcu_idx; - WRITE_ONCE(sp->srcu_idx, !sp->srcu_idx); - WRITE_ONCE(sp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ - swait_event_exclusive(sp->srcu_wq, !READ_ONCE(sp->srcu_lock_nesting[idx])); - WRITE_ONCE(sp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ + idx = ssp->srcu_idx; + WRITE_ONCE(ssp->srcu_idx, !ssp->srcu_idx); + WRITE_ONCE(ssp->srcu_gp_waiting, true); /* srcu_read_unlock() wakes! */ + swait_event_exclusive(ssp->srcu_wq, !READ_ONCE(ssp->srcu_lock_nesting[idx])); + WRITE_ONCE(ssp->srcu_gp_waiting, false); /* srcu_read_unlock() cheap. */ /* Invoke the callbacks we removed above. */ while (lh) { @@ -161,9 +161,9 @@ void srcu_drive_gp(struct work_struct *wp) * at interrupt level, but the ->srcu_gp_running checks will * straighten that out. */ - WRITE_ONCE(sp->srcu_gp_running, false); - if (READ_ONCE(sp->srcu_cb_head)) - schedule_work(&sp->srcu_work); + WRITE_ONCE(ssp->srcu_gp_running, false); + if (READ_ONCE(ssp->srcu_cb_head)) + schedule_work(&ssp->srcu_work); } EXPORT_SYMBOL_GPL(srcu_drive_gp); @@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(srcu_drive_gp); * Enqueue an SRCU callback on the specified srcu_struct structure, * initiating grace-period processing if it is not already running. */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func) { unsigned long flags; @@ -179,14 +179,14 @@ void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, rhp->func = func; rhp->next = NULL; local_irq_save(flags); - *sp->srcu_cb_tail = rhp; - sp->srcu_cb_tail = &rhp->next; + *ssp->srcu_cb_tail = rhp; + ssp->srcu_cb_tail = &rhp->next; local_irq_restore(flags); - if (!READ_ONCE(sp->srcu_gp_running)) { + if (!READ_ONCE(ssp->srcu_gp_running)) { if (likely(srcu_init_done)) - schedule_work(&sp->srcu_work); - else if (list_empty(&sp->srcu_work.entry)) - list_add(&sp->srcu_work.entry, &srcu_boot_list); + schedule_work(&ssp->srcu_work); + else if (list_empty(&ssp->srcu_work.entry)) + list_add(&ssp->srcu_work.entry, &srcu_boot_list); } } EXPORT_SYMBOL_GPL(call_srcu); @@ -194,13 +194,13 @@ EXPORT_SYMBOL_GPL(call_srcu); /* * synchronize_srcu - wait for prior SRCU read-side critical-section completion */ -void synchronize_srcu(struct srcu_struct *sp) +void synchronize_srcu(struct srcu_struct *ssp) { struct rcu_synchronize rs; init_rcu_head_on_stack(&rs.head); init_completion(&rs.completion); - call_srcu(sp, &rs.head, wakeme_after_rcu); + call_srcu(ssp, &rs.head, wakeme_after_rcu); wait_for_completion(&rs.completion); destroy_rcu_head_on_stack(&rs.head); } @@ -219,13 +219,13 @@ void __init rcu_scheduler_starting(void) */ void __init srcu_init(void) { - struct srcu_struct *sp; + struct srcu_struct *ssp; srcu_init_done = true; while (!list_empty(&srcu_boot_list)) { - sp = list_first_entry(&srcu_boot_list, + ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, srcu_work.entry); - list_del_init(&sp->srcu_work.entry); - schedule_work(&sp->srcu_work); + list_del_init(&ssp->srcu_work.entry); + schedule_work(&ssp->srcu_work); } } diff --git a/kernel/rcu/srcutree.c b/kernel/rcu/srcutree.c index a8846ed7f352..3600d88d8956 100644 --- a/kernel/rcu/srcutree.c +++ b/kernel/rcu/srcutree.c @@ -56,7 +56,7 @@ static LIST_HEAD(srcu_boot_list); static bool __read_mostly srcu_init_done; static void srcu_invoke_callbacks(struct work_struct *work); -static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay); +static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay); static void process_srcu(struct work_struct *work); /* Wrappers for lock acquisition and release, see raw_spin_lock_rcu_node(). */ @@ -92,7 +92,7 @@ do { \ * srcu_read_unlock() running against them. So if the is_static parameter * is set, don't initialize ->srcu_lock_count[] and ->srcu_unlock_count[]. */ -static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) +static void init_srcu_struct_nodes(struct srcu_struct *ssp, bool is_static) { int cpu; int i; @@ -103,13 +103,13 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) struct srcu_node *snp_first; /* Work out the overall tree geometry. */ - sp->level[0] = &sp->node[0]; + ssp->level[0] = &ssp->node[0]; for (i = 1; i < rcu_num_lvls; i++) - sp->level[i] = sp->level[i - 1] + num_rcu_lvl[i - 1]; + ssp->level[i] = ssp->level[i - 1] + num_rcu_lvl[i - 1]; rcu_init_levelspread(levelspread, num_rcu_lvl); /* Each pass through this loop initializes one srcu_node structure. */ - srcu_for_each_node_breadth_first(sp, snp) { + srcu_for_each_node_breadth_first(ssp, snp) { spin_lock_init(&ACCESS_PRIVATE(snp, lock)); WARN_ON_ONCE(ARRAY_SIZE(snp->srcu_have_cbs) != ARRAY_SIZE(snp->srcu_data_have_cbs)); @@ -120,17 +120,17 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) snp->srcu_gp_seq_needed_exp = 0; snp->grplo = -1; snp->grphi = -1; - if (snp == &sp->node[0]) { + if (snp == &ssp->node[0]) { /* Root node, special case. */ snp->srcu_parent = NULL; continue; } /* Non-root node. */ - if (snp == sp->level[level + 1]) + if (snp == ssp->level[level + 1]) level++; - snp->srcu_parent = sp->level[level - 1] + - (snp - sp->level[level]) / + snp->srcu_parent = ssp->level[level - 1] + + (snp - ssp->level[level]) / levelspread[level - 1]; } @@ -141,14 +141,14 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) WARN_ON_ONCE(ARRAY_SIZE(sdp->srcu_lock_count) != ARRAY_SIZE(sdp->srcu_unlock_count)); level = rcu_num_lvls - 1; - snp_first = sp->level[level]; + snp_first = ssp->level[level]; for_each_possible_cpu(cpu) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_init(&ACCESS_PRIVATE(sdp, lock)); rcu_segcblist_init(&sdp->srcu_cblist); sdp->srcu_cblist_invoking = false; - sdp->srcu_gp_seq_needed = sp->srcu_gp_seq; - sdp->srcu_gp_seq_needed_exp = sp->srcu_gp_seq; + sdp->srcu_gp_seq_needed = ssp->srcu_gp_seq; + sdp->srcu_gp_seq_needed_exp = ssp->srcu_gp_seq; sdp->mynode = &snp_first[cpu / levelspread[level]]; for (snp = sdp->mynode; snp != NULL; snp = snp->srcu_parent) { if (snp->grplo < 0) @@ -157,7 +157,7 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) } sdp->cpu = cpu; INIT_DELAYED_WORK(&sdp->work, srcu_invoke_callbacks); - sdp->sp = sp; + sdp->ssp = ssp; sdp->grpmask = 1 << (cpu - sdp->mynode->grplo); if (is_static) continue; @@ -176,35 +176,35 @@ static void init_srcu_struct_nodes(struct srcu_struct *sp, bool is_static) * parameter is passed through to init_srcu_struct_nodes(), and * also tells us that ->sda has already been wired up to srcu_data. */ -static int init_srcu_struct_fields(struct srcu_struct *sp, bool is_static) +static int init_srcu_struct_fields(struct srcu_struct *ssp, bool is_static) { - mutex_init(&sp->srcu_cb_mutex); - mutex_init(&sp->srcu_gp_mutex); - sp->srcu_idx = 0; - sp->srcu_gp_seq = 0; - sp->srcu_barrier_seq = 0; - mutex_init(&sp->srcu_barrier_mutex); - atomic_set(&sp->srcu_barrier_cpu_cnt, 0); - INIT_DELAYED_WORK(&sp->work, process_srcu); + mutex_init(&ssp->srcu_cb_mutex); + mutex_init(&ssp->srcu_gp_mutex); + ssp->srcu_idx = 0; + ssp->srcu_gp_seq = 0; + ssp->srcu_barrier_seq = 0; + mutex_init(&ssp->srcu_barrier_mutex); + atomic_set(&ssp->srcu_barrier_cpu_cnt, 0); + INIT_DELAYED_WORK(&ssp->work, process_srcu); if (!is_static) - sp->sda = alloc_percpu(struct srcu_data); - init_srcu_struct_nodes(sp, is_static); - sp->srcu_gp_seq_needed_exp = 0; - sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - smp_store_release(&sp->srcu_gp_seq_needed, 0); /* Init done. */ - return sp->sda ? 0 : -ENOMEM; + ssp->sda = alloc_percpu(struct srcu_data); + init_srcu_struct_nodes(ssp, is_static); + ssp->srcu_gp_seq_needed_exp = 0; + ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + smp_store_release(&ssp->srcu_gp_seq_needed, 0); /* Init done. */ + return ssp->sda ? 0 : -ENOMEM; } #ifdef CONFIG_DEBUG_LOCK_ALLOC -int __init_srcu_struct(struct srcu_struct *sp, const char *name, +int __init_srcu_struct(struct srcu_struct *ssp, const char *name, struct lock_class_key *key) { /* Don't re-initialize a lock while it is held. */ - debug_check_no_locks_freed((void *)sp, sizeof(*sp)); - lockdep_init_map(&sp->dep_map, name, key, 0); - spin_lock_init(&ACCESS_PRIVATE(sp, lock)); - return init_srcu_struct_fields(sp, false); + debug_check_no_locks_freed((void *)ssp, sizeof(*ssp)); + lockdep_init_map(&ssp->dep_map, name, key, 0); + spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); + return init_srcu_struct_fields(ssp, false); } EXPORT_SYMBOL_GPL(__init_srcu_struct); @@ -212,16 +212,16 @@ EXPORT_SYMBOL_GPL(__init_srcu_struct); /** * init_srcu_struct - initialize a sleep-RCU structure - * @sp: structure to initialize. + * @ssp: structure to initialize. * * Must invoke this on a given srcu_struct before passing that srcu_struct * to any other function. Each srcu_struct represents a separate domain * of SRCU protection. */ -int init_srcu_struct(struct srcu_struct *sp) +int init_srcu_struct(struct srcu_struct *ssp) { - spin_lock_init(&ACCESS_PRIVATE(sp, lock)); - return init_srcu_struct_fields(sp, false); + spin_lock_init(&ACCESS_PRIVATE(ssp, lock)); + return init_srcu_struct_fields(ssp, false); } EXPORT_SYMBOL_GPL(init_srcu_struct); @@ -231,37 +231,37 @@ EXPORT_SYMBOL_GPL(init_srcu_struct); * First-use initialization of statically allocated srcu_struct * structure. Wiring up the combining tree is more than can be * done with compile-time initialization, so this check is added - * to each update-side SRCU primitive. Use sp->lock, which -is- + * to each update-side SRCU primitive. Use ssp->lock, which -is- * compile-time initialized, to resolve races involving multiple * CPUs trying to garner first-use privileges. */ -static void check_init_srcu_struct(struct srcu_struct *sp) +static void check_init_srcu_struct(struct srcu_struct *ssp) { unsigned long flags; /* The smp_load_acquire() pairs with the smp_store_release(). */ - if (!rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq_needed))) /*^^^*/ + if (!rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq_needed))) /*^^^*/ return; /* Already initialized. */ - spin_lock_irqsave_rcu_node(sp, flags); - if (!rcu_seq_state(sp->srcu_gp_seq_needed)) { - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(ssp, flags); + if (!rcu_seq_state(ssp->srcu_gp_seq_needed)) { + spin_unlock_irqrestore_rcu_node(ssp, flags); return; } - init_srcu_struct_fields(sp, true); - spin_unlock_irqrestore_rcu_node(sp, flags); + init_srcu_struct_fields(ssp, true); + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* * Returns approximate total of the readers' ->srcu_lock_count[] values * for the rank of per-CPU counters specified by idx. */ -static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) +static unsigned long srcu_readers_lock_idx(struct srcu_struct *ssp, int idx) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_lock_count[idx]); } @@ -272,13 +272,13 @@ static unsigned long srcu_readers_lock_idx(struct srcu_struct *sp, int idx) * Returns approximate total of the readers' ->srcu_unlock_count[] values * for the rank of per-CPU counters specified by idx. */ -static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) +static unsigned long srcu_readers_unlock_idx(struct srcu_struct *ssp, int idx) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_unlock_count[idx]); } @@ -289,11 +289,11 @@ static unsigned long srcu_readers_unlock_idx(struct srcu_struct *sp, int idx) * Return true if the number of pre-existing readers is determined to * be zero. */ -static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) +static bool srcu_readers_active_idx_check(struct srcu_struct *ssp, int idx) { unsigned long unlocks; - unlocks = srcu_readers_unlock_idx(sp, idx); + unlocks = srcu_readers_unlock_idx(ssp, idx); /* * Make sure that a lock is always counted if the corresponding @@ -329,25 +329,25 @@ static bool srcu_readers_active_idx_check(struct srcu_struct *sp, int idx) * of floor(ULONG_MAX/NR_CPUS/2), which should be sufficient, * especially on 64-bit systems. */ - return srcu_readers_lock_idx(sp, idx) == unlocks; + return srcu_readers_lock_idx(ssp, idx) == unlocks; } /** * srcu_readers_active - returns true if there are readers. and false * otherwise - * @sp: which srcu_struct to count active readers (holding srcu_read_lock). + * @ssp: which srcu_struct to count active readers (holding srcu_read_lock). * * Note that this is not an atomic primitive, and can therefore suffer * severe errors when invoked on an active srcu_struct. That said, it * can be useful as an error check at cleanup time. */ -static bool srcu_readers_active(struct srcu_struct *sp) +static bool srcu_readers_active(struct srcu_struct *ssp) { int cpu; unsigned long sum = 0; for_each_possible_cpu(cpu) { - struct srcu_data *cpuc = per_cpu_ptr(sp->sda, cpu); + struct srcu_data *cpuc = per_cpu_ptr(ssp->sda, cpu); sum += READ_ONCE(cpuc->srcu_lock_count[0]); sum += READ_ONCE(cpuc->srcu_lock_count[1]); @@ -363,44 +363,44 @@ static bool srcu_readers_active(struct srcu_struct *sp) * Return grace-period delay, zero if there are expedited grace * periods pending, SRCU_INTERVAL otherwise. */ -static unsigned long srcu_get_delay(struct srcu_struct *sp) +static unsigned long srcu_get_delay(struct srcu_struct *ssp) { - if (ULONG_CMP_LT(READ_ONCE(sp->srcu_gp_seq), - READ_ONCE(sp->srcu_gp_seq_needed_exp))) + if (ULONG_CMP_LT(READ_ONCE(ssp->srcu_gp_seq), + READ_ONCE(ssp->srcu_gp_seq_needed_exp))) return 0; return SRCU_INTERVAL; } /* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */ -void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced) +void _cleanup_srcu_struct(struct srcu_struct *ssp, bool quiesced) { int cpu; - if (WARN_ON(!srcu_get_delay(sp))) + if (WARN_ON(!srcu_get_delay(ssp))) return; /* Just leak it! */ - if (WARN_ON(srcu_readers_active(sp))) + if (WARN_ON(srcu_readers_active(ssp))) return; /* Just leak it! */ if (quiesced) { - if (WARN_ON(delayed_work_pending(&sp->work))) + if (WARN_ON(delayed_work_pending(&ssp->work))) return; /* Just leak it! */ } else { - flush_delayed_work(&sp->work); + flush_delayed_work(&ssp->work); } for_each_possible_cpu(cpu) if (quiesced) { - if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work))) + if (WARN_ON(delayed_work_pending(&per_cpu_ptr(ssp->sda, cpu)->work))) return; /* Just leak it! */ } else { - flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work); + flush_delayed_work(&per_cpu_ptr(ssp->sda, cpu)->work); } - if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) || - WARN_ON(srcu_readers_active(sp))) { + if (WARN_ON(rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) != SRCU_STATE_IDLE) || + WARN_ON(srcu_readers_active(ssp))) { pr_info("%s: Active srcu_struct %p state: %d\n", - __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq))); + __func__, ssp, rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq))); return; /* Caller forgot to stop doing call_srcu()? */ } - free_percpu(sp->sda); - sp->sda = NULL; + free_percpu(ssp->sda); + ssp->sda = NULL; } EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); @@ -409,12 +409,12 @@ EXPORT_SYMBOL_GPL(_cleanup_srcu_struct); * srcu_struct. * Returns an index that must be passed to the matching srcu_read_unlock(). */ -int __srcu_read_lock(struct srcu_struct *sp) +int __srcu_read_lock(struct srcu_struct *ssp) { int idx; - idx = READ_ONCE(sp->srcu_idx) & 0x1; - this_cpu_inc(sp->sda->srcu_lock_count[idx]); + idx = READ_ONCE(ssp->srcu_idx) & 0x1; + this_cpu_inc(ssp->sda->srcu_lock_count[idx]); smp_mb(); /* B */ /* Avoid leaking the critical section. */ return idx; } @@ -425,10 +425,10 @@ EXPORT_SYMBOL_GPL(__srcu_read_lock); * element of the srcu_struct. Note that this may well be a different * CPU than that which was incremented by the corresponding srcu_read_lock(). */ -void __srcu_read_unlock(struct srcu_struct *sp, int idx) +void __srcu_read_unlock(struct srcu_struct *ssp, int idx) { smp_mb(); /* C */ /* Avoid leaking the critical section. */ - this_cpu_inc(sp->sda->srcu_unlock_count[idx]); + this_cpu_inc(ssp->sda->srcu_unlock_count[idx]); } EXPORT_SYMBOL_GPL(__srcu_read_unlock); @@ -444,20 +444,22 @@ EXPORT_SYMBOL_GPL(__srcu_read_unlock); /* * Start an SRCU grace period. */ -static void srcu_gp_start(struct srcu_struct *sp) +static void srcu_gp_start(struct srcu_struct *ssp) { - struct srcu_data *sdp = this_cpu_ptr(sp->sda); + struct srcu_data *sdp = this_cpu_ptr(ssp->sda); int state; - lockdep_assert_held(&ACCESS_PRIVATE(sp, lock)); - WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); + lockdep_assert_held(&ACCESS_PRIVATE(ssp, lock)); + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); + spin_lock_rcu_node(sdp); /* Interrupts already disabled. */ rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_gp_seq)); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&sp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_gp_seq)); + spin_unlock_rcu_node(sdp); /* Interrupts remain disabled. */ smp_mb(); /* Order prior store to ->srcu_gp_seq_needed vs. GP start. */ - rcu_seq_start(&sp->srcu_gp_seq); - state = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + rcu_seq_start(&ssp->srcu_gp_seq); + state = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); WARN_ON_ONCE(state != SRCU_STATE_SCAN1); } @@ -511,7 +513,7 @@ static void srcu_schedule_cbs_sdp(struct srcu_data *sdp, unsigned long delay) * just-completed grace period, the one corresponding to idx. If possible, * schedule this invocation on the corresponding CPUs. */ -static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, +static void srcu_schedule_cbs_snp(struct srcu_struct *ssp, struct srcu_node *snp, unsigned long mask, unsigned long delay) { int cpu; @@ -519,7 +521,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { if (!(mask & (1 << (cpu - snp->grplo)))) continue; - srcu_schedule_cbs_sdp(per_cpu_ptr(sp->sda, cpu), delay); + srcu_schedule_cbs_sdp(per_cpu_ptr(ssp->sda, cpu), delay); } } @@ -532,7 +534,7 @@ static void srcu_schedule_cbs_snp(struct srcu_struct *sp, struct srcu_node *snp, * are initiating callback invocation. This allows the ->srcu_have_cbs[] * array to have a finite number of elements. */ -static void srcu_gp_end(struct srcu_struct *sp) +static void srcu_gp_end(struct srcu_struct *ssp) { unsigned long cbdelay; bool cbs; @@ -546,28 +548,28 @@ static void srcu_gp_end(struct srcu_struct *sp) struct srcu_node *snp; /* Prevent more than one additional grace period. */ - mutex_lock(&sp->srcu_cb_mutex); + mutex_lock(&ssp->srcu_cb_mutex); /* End the current grace period. */ - spin_lock_irq_rcu_node(sp); - idx = rcu_seq_state(sp->srcu_gp_seq); + spin_lock_irq_rcu_node(ssp); + idx = rcu_seq_state(ssp->srcu_gp_seq); WARN_ON_ONCE(idx != SRCU_STATE_SCAN2); - cbdelay = srcu_get_delay(sp); - sp->srcu_last_gp_end = ktime_get_mono_fast_ns(); - rcu_seq_end(&sp->srcu_gp_seq); - gpseq = rcu_seq_current(&sp->srcu_gp_seq); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, gpseq)) - sp->srcu_gp_seq_needed_exp = gpseq; - spin_unlock_irq_rcu_node(sp); - mutex_unlock(&sp->srcu_gp_mutex); + cbdelay = srcu_get_delay(ssp); + ssp->srcu_last_gp_end = ktime_get_mono_fast_ns(); + rcu_seq_end(&ssp->srcu_gp_seq); + gpseq = rcu_seq_current(&ssp->srcu_gp_seq); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, gpseq)) + ssp->srcu_gp_seq_needed_exp = gpseq; + spin_unlock_irq_rcu_node(ssp); + mutex_unlock(&ssp->srcu_gp_mutex); /* A new grace period can start at this point. But only one. */ /* Initiate callback invocation as needed. */ idx = rcu_seq_ctr(gpseq) % ARRAY_SIZE(snp->srcu_have_cbs); - srcu_for_each_node_breadth_first(sp, snp) { + srcu_for_each_node_breadth_first(ssp, snp) { spin_lock_irq_rcu_node(snp); cbs = false; - last_lvl = snp >= sp->level[rcu_num_lvls - 1]; + last_lvl = snp >= ssp->level[rcu_num_lvls - 1]; if (last_lvl) cbs = snp->srcu_have_cbs[idx] == gpseq; snp->srcu_have_cbs[idx] = gpseq; @@ -578,12 +580,12 @@ static void srcu_gp_end(struct srcu_struct *sp) snp->srcu_data_have_cbs[idx] = 0; spin_unlock_irq_rcu_node(snp); if (cbs) - srcu_schedule_cbs_snp(sp, snp, mask, cbdelay); + srcu_schedule_cbs_snp(ssp, snp, mask, cbdelay); /* Occasionally prevent srcu_data counter wrap. */ if (!(gpseq & counter_wrap_check) && last_lvl) for (cpu = snp->grplo; cpu <= snp->grphi; cpu++) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_irqsave_rcu_node(sdp, flags); if (ULONG_CMP_GE(gpseq, sdp->srcu_gp_seq_needed + 100)) @@ -596,18 +598,18 @@ static void srcu_gp_end(struct srcu_struct *sp) } /* Callback initiation done, allow grace periods after next. */ - mutex_unlock(&sp->srcu_cb_mutex); + mutex_unlock(&ssp->srcu_cb_mutex); /* Start a new grace period if needed. */ - spin_lock_irq_rcu_node(sp); - gpseq = rcu_seq_current(&sp->srcu_gp_seq); + spin_lock_irq_rcu_node(ssp); + gpseq = rcu_seq_current(&ssp->srcu_gp_seq); if (!rcu_seq_state(gpseq) && - ULONG_CMP_LT(gpseq, sp->srcu_gp_seq_needed)) { - srcu_gp_start(sp); - spin_unlock_irq_rcu_node(sp); - srcu_reschedule(sp, 0); + ULONG_CMP_LT(gpseq, ssp->srcu_gp_seq_needed)) { + srcu_gp_start(ssp); + spin_unlock_irq_rcu_node(ssp); + srcu_reschedule(ssp, 0); } else { - spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(ssp); } } @@ -618,13 +620,13 @@ static void srcu_gp_end(struct srcu_struct *sp) * but without expediting. To start a completely new grace period, * whether expedited or not, use srcu_funnel_gp_start() instead. */ -static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, +static void srcu_funnel_exp_start(struct srcu_struct *ssp, struct srcu_node *snp, unsigned long s) { unsigned long flags; for (; snp != NULL; snp = snp->srcu_parent) { - if (rcu_seq_done(&sp->srcu_gp_seq, s) || + if (rcu_seq_done(&ssp->srcu_gp_seq, s) || ULONG_CMP_GE(READ_ONCE(snp->srcu_gp_seq_needed_exp), s)) return; spin_lock_irqsave_rcu_node(snp, flags); @@ -635,10 +637,10 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, WRITE_ONCE(snp->srcu_gp_seq_needed_exp, s); spin_unlock_irqrestore_rcu_node(snp, flags); } - spin_lock_irqsave_rcu_node(sp, flags); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) - sp->srcu_gp_seq_needed_exp = s; - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_lock_irqsave_rcu_node(ssp, flags); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) + ssp->srcu_gp_seq_needed_exp = s; + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* @@ -651,7 +653,7 @@ static void srcu_funnel_exp_start(struct srcu_struct *sp, struct srcu_node *snp, * Note that this function also does the work of srcu_funnel_exp_start(), * in some cases by directly invoking it. */ -static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, +static void srcu_funnel_gp_start(struct srcu_struct *ssp, struct srcu_data *sdp, unsigned long s, bool do_norm) { unsigned long flags; @@ -661,7 +663,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, /* Each pass through the loop does one level of the srcu_node tree. */ for (; snp != NULL; snp = snp->srcu_parent) { - if (rcu_seq_done(&sp->srcu_gp_seq, s) && snp != sdp->mynode) + if (rcu_seq_done(&ssp->srcu_gp_seq, s) && snp != sdp->mynode) return; /* GP already done and CBs recorded. */ spin_lock_irqsave_rcu_node(snp, flags); if (ULONG_CMP_GE(snp->srcu_have_cbs[idx], s)) { @@ -676,7 +678,7 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, return; } if (!do_norm) - srcu_funnel_exp_start(sp, snp, s); + srcu_funnel_exp_start(ssp, snp, s); return; } snp->srcu_have_cbs[idx] = s; @@ -688,29 +690,29 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, } /* Top of tree, must ensure the grace period will be started. */ - spin_lock_irqsave_rcu_node(sp, flags); - if (ULONG_CMP_LT(sp->srcu_gp_seq_needed, s)) { + spin_lock_irqsave_rcu_node(ssp, flags); + if (ULONG_CMP_LT(ssp->srcu_gp_seq_needed, s)) { /* * Record need for grace period s. Pair with load * acquire setting up for initialization. */ - smp_store_release(&sp->srcu_gp_seq_needed, s); /*^^^*/ + smp_store_release(&ssp->srcu_gp_seq_needed, s); /*^^^*/ } - if (!do_norm && ULONG_CMP_LT(sp->srcu_gp_seq_needed_exp, s)) - sp->srcu_gp_seq_needed_exp = s; + if (!do_norm && ULONG_CMP_LT(ssp->srcu_gp_seq_needed_exp, s)) + ssp->srcu_gp_seq_needed_exp = s; /* If grace period not already done and none in progress, start it. */ - if (!rcu_seq_done(&sp->srcu_gp_seq, s) && - rcu_seq_state(sp->srcu_gp_seq) == SRCU_STATE_IDLE) { - WARN_ON_ONCE(ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)); - srcu_gp_start(sp); + if (!rcu_seq_done(&ssp->srcu_gp_seq, s) && + rcu_seq_state(ssp->srcu_gp_seq) == SRCU_STATE_IDLE) { + WARN_ON_ONCE(ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)); + srcu_gp_start(ssp); if (likely(srcu_init_done)) - queue_delayed_work(rcu_gp_wq, &sp->work, - srcu_get_delay(sp)); - else if (list_empty(&sp->work.work.entry)) - list_add(&sp->work.work.entry, &srcu_boot_list); + queue_delayed_work(rcu_gp_wq, &ssp->work, + srcu_get_delay(ssp)); + else if (list_empty(&ssp->work.work.entry)) + list_add(&ssp->work.work.entry, &srcu_boot_list); } - spin_unlock_irqrestore_rcu_node(sp, flags); + spin_unlock_irqrestore_rcu_node(ssp, flags); } /* @@ -718,12 +720,12 @@ static void srcu_funnel_gp_start(struct srcu_struct *sp, struct srcu_data *sdp, * loop an additional time if there is an expedited grace period pending. * The caller must ensure that ->srcu_idx is not changed while checking. */ -static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) +static bool try_check_zero(struct srcu_struct *ssp, int idx, int trycount) { for (;;) { - if (srcu_readers_active_idx_check(sp, idx)) + if (srcu_readers_active_idx_check(ssp, idx)) return true; - if (--trycount + !srcu_get_delay(sp) <= 0) + if (--trycount + !srcu_get_delay(ssp) <= 0) return false; udelay(SRCU_RETRY_CHECK_DELAY); } @@ -734,7 +736,7 @@ static bool try_check_zero(struct srcu_struct *sp, int idx, int trycount) * use the other rank of the ->srcu_(un)lock_count[] arrays. This allows * us to wait for pre-existing readers in a starvation-free manner. */ -static void srcu_flip(struct srcu_struct *sp) +static void srcu_flip(struct srcu_struct *ssp) { /* * Ensure that if this updater saw a given reader's increment @@ -746,7 +748,7 @@ static void srcu_flip(struct srcu_struct *sp) */ smp_mb(); /* E */ /* Pairs with B and C. */ - WRITE_ONCE(sp->srcu_idx, sp->srcu_idx + 1); + WRITE_ONCE(ssp->srcu_idx, ssp->srcu_idx + 1); /* * Ensure that if the updater misses an __srcu_read_unlock() @@ -779,7 +781,7 @@ static void srcu_flip(struct srcu_struct *sp) * negligible when amoritized over that time period, and the extra latency * of a needlessly non-expedited grace period is similarly negligible. */ -static bool srcu_might_be_idle(struct srcu_struct *sp) +static bool srcu_might_be_idle(struct srcu_struct *ssp) { unsigned long curseq; unsigned long flags; @@ -788,7 +790,7 @@ static bool srcu_might_be_idle(struct srcu_struct *sp) /* If the local srcu_data structure has callbacks, not idle. */ local_irq_save(flags); - sdp = this_cpu_ptr(sp->sda); + sdp = this_cpu_ptr(ssp->sda); if (rcu_segcblist_pend_cbs(&sdp->srcu_cblist)) { local_irq_restore(flags); return false; /* Callbacks already present, so not idle. */ @@ -804,17 +806,17 @@ static bool srcu_might_be_idle(struct srcu_struct *sp) /* First, see if enough time has passed since the last GP. */ t = ktime_get_mono_fast_ns(); if (exp_holdoff == 0 || - time_in_range_open(t, sp->srcu_last_gp_end, - sp->srcu_last_gp_end + exp_holdoff)) + time_in_range_open(t, ssp->srcu_last_gp_end, + ssp->srcu_last_gp_end + exp_holdoff)) return false; /* Too soon after last GP. */ /* Next, check for probable idleness. */ - curseq = rcu_seq_current(&sp->srcu_gp_seq); + curseq = rcu_seq_current(&ssp->srcu_gp_seq); smp_mb(); /* Order ->srcu_gp_seq with ->srcu_gp_seq_needed. */ - if (ULONG_CMP_LT(curseq, READ_ONCE(sp->srcu_gp_seq_needed))) + if (ULONG_CMP_LT(curseq, READ_ONCE(ssp->srcu_gp_seq_needed))) return false; /* Grace period in progress, so not idle. */ smp_mb(); /* Order ->srcu_gp_seq with prior access. */ - if (curseq != rcu_seq_current(&sp->srcu_gp_seq)) + if (curseq != rcu_seq_current(&ssp->srcu_gp_seq)) return false; /* GP # changed, so not idle. */ return true; /* With reasonable probability, idle! */ } @@ -854,16 +856,17 @@ static void srcu_leak_callback(struct rcu_head *rhp) * srcu_read_lock(), and srcu_read_unlock() that are all passed the same * srcu_struct structure. */ -void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void __call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func, bool do_norm) { unsigned long flags; + int idx; bool needexp = false; bool needgp = false; unsigned long s; struct srcu_data *sdp; - check_init_srcu_struct(sp); + check_init_srcu_struct(ssp); if (debug_rcu_head_queue(rhp)) { /* Probable double call_srcu(), so leak the callback. */ WRITE_ONCE(rhp->func, srcu_leak_callback); @@ -871,13 +874,14 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, return; } rhp->func = func; + idx = srcu_read_lock(ssp); local_irq_save(flags); - sdp = this_cpu_ptr(sp->sda); + sdp = this_cpu_ptr(ssp->sda); spin_lock_rcu_node(sdp); rcu_segcblist_enqueue(&sdp->srcu_cblist, rhp, false); rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); - s = rcu_seq_snap(&sp->srcu_gp_seq); + rcu_seq_current(&ssp->srcu_gp_seq)); + s = rcu_seq_snap(&ssp->srcu_gp_seq); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, s); if (ULONG_CMP_LT(sdp->srcu_gp_seq_needed, s)) { sdp->srcu_gp_seq_needed = s; @@ -889,14 +893,15 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, } spin_unlock_irqrestore_rcu_node(sdp, flags); if (needgp) - srcu_funnel_gp_start(sp, sdp, s, do_norm); + srcu_funnel_gp_start(ssp, sdp, s, do_norm); else if (needexp) - srcu_funnel_exp_start(sp, sdp->mynode, s); + srcu_funnel_exp_start(ssp, sdp->mynode, s); + srcu_read_unlock(ssp, idx); } /** * call_srcu() - Queue a callback for invocation after an SRCU grace period - * @sp: srcu_struct in queue the callback + * @ssp: srcu_struct in queue the callback * @rhp: structure to be used for queueing the SRCU callback. * @func: function to be invoked after the SRCU grace period * @@ -911,21 +916,21 @@ void __call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, * The callback will be invoked from process context, but must nevertheless * be fast and must not block. */ -void call_srcu(struct srcu_struct *sp, struct rcu_head *rhp, +void call_srcu(struct srcu_struct *ssp, struct rcu_head *rhp, rcu_callback_t func) { - __call_srcu(sp, rhp, func, true); + __call_srcu(ssp, rhp, func, true); } EXPORT_SYMBOL_GPL(call_srcu); /* * Helper function for synchronize_srcu() and synchronize_srcu_expedited(). */ -static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) +static void __synchronize_srcu(struct srcu_struct *ssp, bool do_norm) { struct rcu_synchronize rcu; - RCU_LOCKDEP_WARN(lock_is_held(&sp->dep_map) || + RCU_LOCKDEP_WARN(lock_is_held(&ssp->dep_map) || lock_is_held(&rcu_bh_lock_map) || lock_is_held(&rcu_lock_map) || lock_is_held(&rcu_sched_lock_map), @@ -934,10 +939,10 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) if (rcu_scheduler_active == RCU_SCHEDULER_INACTIVE) return; might_sleep(); - check_init_srcu_struct(sp); + check_init_srcu_struct(ssp); init_completion(&rcu.completion); init_rcu_head_on_stack(&rcu.head); - __call_srcu(sp, &rcu.head, wakeme_after_rcu, do_norm); + __call_srcu(ssp, &rcu.head, wakeme_after_rcu, do_norm); wait_for_completion(&rcu.completion); destroy_rcu_head_on_stack(&rcu.head); @@ -953,7 +958,7 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) /** * synchronize_srcu_expedited - Brute-force SRCU grace period - * @sp: srcu_struct with which to synchronize. + * @ssp: srcu_struct with which to synchronize. * * Wait for an SRCU grace period to elapse, but be more aggressive about * spinning rather than blocking when waiting. @@ -961,15 +966,15 @@ static void __synchronize_srcu(struct srcu_struct *sp, bool do_norm) * Note that synchronize_srcu_expedited() has the same deadlock and * memory-ordering properties as does synchronize_srcu(). */ -void synchronize_srcu_expedited(struct srcu_struct *sp) +void synchronize_srcu_expedited(struct srcu_struct *ssp) { - __synchronize_srcu(sp, rcu_gp_is_normal()); + __synchronize_srcu(ssp, rcu_gp_is_normal()); } EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); /** * synchronize_srcu - wait for prior SRCU read-side critical-section completion - * @sp: srcu_struct with which to synchronize. + * @ssp: srcu_struct with which to synchronize. * * Wait for the count to drain to zero of both indexes. To avoid the * possible starvation of synchronize_srcu(), it waits for the count of @@ -1011,12 +1016,12 @@ EXPORT_SYMBOL_GPL(synchronize_srcu_expedited); * SRCU must also provide it. Note that detecting idleness is heuristic * and subject to both false positives and negatives. */ -void synchronize_srcu(struct srcu_struct *sp) +void synchronize_srcu(struct srcu_struct *ssp) { - if (srcu_might_be_idle(sp) || rcu_gp_is_expedited()) - synchronize_srcu_expedited(sp); + if (srcu_might_be_idle(ssp) || rcu_gp_is_expedited()) + synchronize_srcu_expedited(ssp); else - __synchronize_srcu(sp, true); + __synchronize_srcu(ssp, true); } EXPORT_SYMBOL_GPL(synchronize_srcu); @@ -1026,36 +1031,36 @@ EXPORT_SYMBOL_GPL(synchronize_srcu); static void srcu_barrier_cb(struct rcu_head *rhp) { struct srcu_data *sdp; - struct srcu_struct *sp; + struct srcu_struct *ssp; sdp = container_of(rhp, struct srcu_data, srcu_barrier_head); - sp = sdp->sp; - if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) - complete(&sp->srcu_barrier_completion); + ssp = sdp->ssp; + if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) + complete(&ssp->srcu_barrier_completion); } /** * srcu_barrier - Wait until all in-flight call_srcu() callbacks complete. - * @sp: srcu_struct on which to wait for in-flight callbacks. + * @ssp: srcu_struct on which to wait for in-flight callbacks. */ -void srcu_barrier(struct srcu_struct *sp) +void srcu_barrier(struct srcu_struct *ssp) { int cpu; struct srcu_data *sdp; - unsigned long s = rcu_seq_snap(&sp->srcu_barrier_seq); + unsigned long s = rcu_seq_snap(&ssp->srcu_barrier_seq); - check_init_srcu_struct(sp); - mutex_lock(&sp->srcu_barrier_mutex); - if (rcu_seq_done(&sp->srcu_barrier_seq, s)) { + check_init_srcu_struct(ssp); + mutex_lock(&ssp->srcu_barrier_mutex); + if (rcu_seq_done(&ssp->srcu_barrier_seq, s)) { smp_mb(); /* Force ordering following return. */ - mutex_unlock(&sp->srcu_barrier_mutex); + mutex_unlock(&ssp->srcu_barrier_mutex); return; /* Someone else did our work for us. */ } - rcu_seq_start(&sp->srcu_barrier_seq); - init_completion(&sp->srcu_barrier_completion); + rcu_seq_start(&ssp->srcu_barrier_seq); + init_completion(&ssp->srcu_barrier_completion); /* Initial count prevents reaching zero until all CBs are posted. */ - atomic_set(&sp->srcu_barrier_cpu_cnt, 1); + atomic_set(&ssp->srcu_barrier_cpu_cnt, 1); /* * Each pass through this loop enqueues a callback, but only @@ -1066,39 +1071,39 @@ void srcu_barrier(struct srcu_struct *sp) * grace period as the last callback already in the queue. */ for_each_possible_cpu(cpu) { - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); spin_lock_irq_rcu_node(sdp); - atomic_inc(&sp->srcu_barrier_cpu_cnt); + atomic_inc(&ssp->srcu_barrier_cpu_cnt); sdp->srcu_barrier_head.func = srcu_barrier_cb; debug_rcu_head_queue(&sdp->srcu_barrier_head); if (!rcu_segcblist_entrain(&sdp->srcu_cblist, &sdp->srcu_barrier_head, 0)) { debug_rcu_head_unqueue(&sdp->srcu_barrier_head); - atomic_dec(&sp->srcu_barrier_cpu_cnt); + atomic_dec(&ssp->srcu_barrier_cpu_cnt); } spin_unlock_irq_rcu_node(sdp); } /* Remove the initial count, at which point reaching zero can happen. */ - if (atomic_dec_and_test(&sp->srcu_barrier_cpu_cnt)) - complete(&sp->srcu_barrier_completion); - wait_for_completion(&sp->srcu_barrier_completion); + if (atomic_dec_and_test(&ssp->srcu_barrier_cpu_cnt)) + complete(&ssp->srcu_barrier_completion); + wait_for_completion(&ssp->srcu_barrier_completion); - rcu_seq_end(&sp->srcu_barrier_seq); - mutex_unlock(&sp->srcu_barrier_mutex); + rcu_seq_end(&ssp->srcu_barrier_seq); + mutex_unlock(&ssp->srcu_barrier_mutex); } EXPORT_SYMBOL_GPL(srcu_barrier); /** * srcu_batches_completed - return batches completed. - * @sp: srcu_struct on which to report batch completion. + * @ssp: srcu_struct on which to report batch completion. * * Report the number of batches, correlated with, but not necessarily * precisely the same as, the number of grace periods that have elapsed. */ -unsigned long srcu_batches_completed(struct srcu_struct *sp) +unsigned long srcu_batches_completed(struct srcu_struct *ssp) { - return sp->srcu_idx; + return ssp->srcu_idx; } EXPORT_SYMBOL_GPL(srcu_batches_completed); @@ -1107,11 +1112,11 @@ EXPORT_SYMBOL_GPL(srcu_batches_completed); * to SRCU_STATE_SCAN2, and invoke srcu_gp_end() when scan has * completed in that state. */ -static void srcu_advance_state(struct srcu_struct *sp) +static void srcu_advance_state(struct srcu_struct *ssp) { int idx; - mutex_lock(&sp->srcu_gp_mutex); + mutex_lock(&ssp->srcu_gp_mutex); /* * Because readers might be delayed for an extended period after @@ -1123,47 +1128,47 @@ static void srcu_advance_state(struct srcu_struct *sp) * The load-acquire ensures that we see the accesses performed * by the prior grace period. */ - idx = rcu_seq_state(smp_load_acquire(&sp->srcu_gp_seq)); /* ^^^ */ + idx = rcu_seq_state(smp_load_acquire(&ssp->srcu_gp_seq)); /* ^^^ */ if (idx == SRCU_STATE_IDLE) { - spin_lock_irq_rcu_node(sp); - if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { - WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq)); - spin_unlock_irq_rcu_node(sp); - mutex_unlock(&sp->srcu_gp_mutex); + spin_lock_irq_rcu_node(ssp); + if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { + WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq)); + spin_unlock_irq_rcu_node(ssp); + mutex_unlock(&ssp->srcu_gp_mutex); return; } - idx = rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)); + idx = rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)); if (idx == SRCU_STATE_IDLE) - srcu_gp_start(sp); - spin_unlock_irq_rcu_node(sp); + srcu_gp_start(ssp); + spin_unlock_irq_rcu_node(ssp); if (idx != SRCU_STATE_IDLE) { - mutex_unlock(&sp->srcu_gp_mutex); + mutex_unlock(&ssp->srcu_gp_mutex); return; /* Someone else started the grace period. */ } } - if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { - idx = 1 ^ (sp->srcu_idx & 1); - if (!try_check_zero(sp, idx, 1)) { - mutex_unlock(&sp->srcu_gp_mutex); + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN1) { + idx = 1 ^ (ssp->srcu_idx & 1); + if (!try_check_zero(ssp, idx, 1)) { + mutex_unlock(&ssp->srcu_gp_mutex); return; /* readers present, retry later. */ } - srcu_flip(sp); - rcu_seq_set_state(&sp->srcu_gp_seq, SRCU_STATE_SCAN2); + srcu_flip(ssp); + rcu_seq_set_state(&ssp->srcu_gp_seq, SRCU_STATE_SCAN2); } - if (rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { + if (rcu_seq_state(READ_ONCE(ssp->srcu_gp_seq)) == SRCU_STATE_SCAN2) { /* * SRCU read-side critical sections are normally short, * so check at least twice in quick succession after a flip. */ - idx = 1 ^ (sp->srcu_idx & 1); - if (!try_check_zero(sp, idx, 2)) { - mutex_unlock(&sp->srcu_gp_mutex); + idx = 1 ^ (ssp->srcu_idx & 1); + if (!try_check_zero(ssp, idx, 2)) { + mutex_unlock(&ssp->srcu_gp_mutex); return; /* readers present, retry later. */ } - srcu_gp_end(sp); /* Releases ->srcu_gp_mutex. */ + srcu_gp_end(ssp); /* Releases ->srcu_gp_mutex. */ } } @@ -1179,14 +1184,14 @@ static void srcu_invoke_callbacks(struct work_struct *work) struct rcu_cblist ready_cbs; struct rcu_head *rhp; struct srcu_data *sdp; - struct srcu_struct *sp; + struct srcu_struct *ssp; sdp = container_of(work, struct srcu_data, work.work); - sp = sdp->sp; + ssp = sdp->ssp; rcu_cblist_init(&ready_cbs); spin_lock_irq_rcu_node(sdp); rcu_segcblist_advance(&sdp->srcu_cblist, - rcu_seq_current(&sp->srcu_gp_seq)); + rcu_seq_current(&ssp->srcu_gp_seq)); if (sdp->srcu_cblist_invoking || !rcu_segcblist_ready_cbs(&sdp->srcu_cblist)) { spin_unlock_irq_rcu_node(sdp); @@ -1212,7 +1217,7 @@ static void srcu_invoke_callbacks(struct work_struct *work) spin_lock_irq_rcu_node(sdp); rcu_segcblist_insert_count(&sdp->srcu_cblist, &ready_cbs); (void)rcu_segcblist_accelerate(&sdp->srcu_cblist, - rcu_seq_snap(&sp->srcu_gp_seq)); + rcu_seq_snap(&ssp->srcu_gp_seq)); sdp->srcu_cblist_invoking = false; more = rcu_segcblist_ready_cbs(&sdp->srcu_cblist); spin_unlock_irq_rcu_node(sdp); @@ -1224,24 +1229,24 @@ static void srcu_invoke_callbacks(struct work_struct *work) * Finished one round of SRCU grace period. Start another if there are * more SRCU callbacks queued, otherwise put SRCU into not-running state. */ -static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) +static void srcu_reschedule(struct srcu_struct *ssp, unsigned long delay) { bool pushgp = true; - spin_lock_irq_rcu_node(sp); - if (ULONG_CMP_GE(sp->srcu_gp_seq, sp->srcu_gp_seq_needed)) { - if (!WARN_ON_ONCE(rcu_seq_state(sp->srcu_gp_seq))) { + spin_lock_irq_rcu_node(ssp); + if (ULONG_CMP_GE(ssp->srcu_gp_seq, ssp->srcu_gp_seq_needed)) { + if (!WARN_ON_ONCE(rcu_seq_state(ssp->srcu_gp_seq))) { /* All requests fulfilled, time to go idle. */ pushgp = false; } - } else if (!rcu_seq_state(sp->srcu_gp_seq)) { + } else if (!rcu_seq_state(ssp->srcu_gp_seq)) { /* Outstanding request and no GP. Start one. */ - srcu_gp_start(sp); + srcu_gp_start(ssp); } - spin_unlock_irq_rcu_node(sp); + spin_unlock_irq_rcu_node(ssp); if (pushgp) - queue_delayed_work(rcu_gp_wq, &sp->work, delay); + queue_delayed_work(rcu_gp_wq, &ssp->work, delay); } /* @@ -1249,41 +1254,41 @@ static void srcu_reschedule(struct srcu_struct *sp, unsigned long delay) */ static void process_srcu(struct work_struct *work) { - struct srcu_struct *sp; + struct srcu_struct *ssp; - sp = container_of(work, struct srcu_struct, work.work); + ssp = container_of(work, struct srcu_struct, work.work); - srcu_advance_state(sp); - srcu_reschedule(sp, srcu_get_delay(sp)); + srcu_advance_state(ssp); + srcu_reschedule(ssp, srcu_get_delay(ssp)); } void srcutorture_get_gp_data(enum rcutorture_type test_type, - struct srcu_struct *sp, int *flags, + struct srcu_struct *ssp, int *flags, unsigned long *gp_seq) { if (test_type != SRCU_FLAVOR) return; *flags = 0; - *gp_seq = rcu_seq_current(&sp->srcu_gp_seq); + *gp_seq = rcu_seq_current(&ssp->srcu_gp_seq); } EXPORT_SYMBOL_GPL(srcutorture_get_gp_data); -void srcu_torture_stats_print(struct srcu_struct *sp, char *tt, char *tf) +void srcu_torture_stats_print(struct srcu_struct *ssp, char *tt, char *tf) { int cpu; int idx; unsigned long s0 = 0, s1 = 0; - idx = sp->srcu_idx & 0x1; + idx = ssp->srcu_idx & 0x1; pr_alert("%s%s Tree SRCU g%ld per-CPU(idx=%d):", - tt, tf, rcu_seq_current(&sp->srcu_gp_seq), idx); + tt, tf, rcu_seq_current(&ssp->srcu_gp_seq), idx); for_each_possible_cpu(cpu) { unsigned long l0, l1; unsigned long u0, u1; long c0, c1; struct srcu_data *sdp; - sdp = per_cpu_ptr(sp->sda, cpu); + sdp = per_cpu_ptr(ssp->sda, cpu); u0 = sdp->srcu_unlock_count[!idx]; u1 = sdp->srcu_unlock_count[idx]; @@ -1318,14 +1323,14 @@ early_initcall(srcu_bootup_announce); void __init srcu_init(void) { - struct srcu_struct *sp; + struct srcu_struct *ssp; srcu_init_done = true; while (!list_empty(&srcu_boot_list)) { - sp = list_first_entry(&srcu_boot_list, struct srcu_struct, + ssp = list_first_entry(&srcu_boot_list, struct srcu_struct, work.work.entry); - check_init_srcu_struct(sp); - list_del_init(&sp->work.work.entry); - queue_work(rcu_gp_wq, &sp->work.work); + check_init_srcu_struct(ssp); + list_del_init(&ssp->work.work.entry); + queue_work(rcu_gp_wq, &ssp->work.work); } } diff --git a/kernel/rcu/sync.c b/kernel/rcu/sync.c index 3f943efcf61c..be10036fa621 100644 --- a/kernel/rcu/sync.c +++ b/kernel/rcu/sync.c @@ -44,15 +44,15 @@ static const struct { __INIT_HELD(rcu_read_lock_held) }, [RCU_SCHED_SYNC] = { - .sync = synchronize_sched, - .call = call_rcu_sched, - .wait = rcu_barrier_sched, + .sync = synchronize_rcu, + .call = call_rcu, + .wait = rcu_barrier, __INIT_HELD(rcu_read_lock_sched_held) }, [RCU_BH_SYNC] = { - .sync = synchronize_rcu_bh, - .call = call_rcu_bh, - .wait = rcu_barrier_bh, + .sync = synchronize_rcu, + .call = call_rcu, + .wait = rcu_barrier, __INIT_HELD(rcu_read_lock_bh_held) }, }; @@ -125,8 +125,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) rsp->gp_state = GP_PENDING; spin_unlock_irq(&rsp->rss_lock); - BUG_ON(need_wait && need_sync); - + WARN_ON_ONCE(need_wait && need_sync); if (need_sync) { gp_ops[rsp->gp_type].sync(); rsp->gp_state = GP_PASSED; @@ -139,7 +138,7 @@ void rcu_sync_enter(struct rcu_sync *rsp) * Nobody has yet been allowed the 'fast' path and thus we can * avoid doing any sync(). The callback will get 'dropped'. */ - BUG_ON(rsp->gp_state != GP_PASSED); + WARN_ON_ONCE(rsp->gp_state != GP_PASSED); } } @@ -166,8 +165,8 @@ static void rcu_sync_func(struct rcu_head *rhp) struct rcu_sync *rsp = container_of(rhp, struct rcu_sync, cb_head); unsigned long flags; - BUG_ON(rsp->gp_state != GP_PASSED); - BUG_ON(rsp->cb_state == CB_IDLE); + WARN_ON_ONCE(rsp->gp_state != GP_PASSED); + WARN_ON_ONCE(rsp->cb_state == CB_IDLE); spin_lock_irqsave(&rsp->rss_lock, flags); if (rsp->gp_count) { @@ -225,7 +224,7 @@ void rcu_sync_dtor(struct rcu_sync *rsp) { int cb_state; - BUG_ON(rsp->gp_count); + WARN_ON_ONCE(rsp->gp_count); spin_lock_irq(&rsp->rss_lock); if (rsp->cb_state == CB_REPLAY) @@ -235,6 +234,6 @@ void rcu_sync_dtor(struct rcu_sync *rsp) if (cb_state != CB_IDLE) { gp_ops[rsp->gp_type].wait(); - BUG_ON(rsp->cb_state != CB_IDLE); + WARN_ON_ONCE(rsp->cb_state != CB_IDLE); } } diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 121f833acd04..9180158756d2 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -207,6 +207,19 @@ static int rcu_gp_in_progress(void) return rcu_seq_state(rcu_seq_current(&rcu_state.gp_seq)); } +/* + * Return the number of callbacks queued on the specified CPU. + * Handles both the nocbs and normal cases. + */ +static long rcu_get_n_cbs_cpu(int cpu) +{ + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + + if (rcu_segcblist_is_enabled(&rdp->cblist)) /* Online normal CPU? */ + return rcu_segcblist_n_cbs(&rdp->cblist); + return rcu_get_n_cbs_nocb_cpu(rdp); /* Works for offline, too. */ +} + void rcu_softirq_qs(void) { rcu_qs(); @@ -499,17 +512,30 @@ void rcu_force_quiescent_state(void) } EXPORT_SYMBOL_GPL(rcu_force_quiescent_state); +/* + * Convert a ->gp_state value to a character string. + */ +static const char *gp_state_getname(short gs) +{ + if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) + return "???"; + return gp_state_names[gs]; +} + /* * Show the state of the grace-period kthreads. */ void show_rcu_gp_kthreads(void) { int cpu; + unsigned long j; struct rcu_data *rdp; struct rcu_node *rnp; - pr_info("%s: wait state: %d ->state: %#lx\n", rcu_state.name, - rcu_state.gp_state, rcu_state.gp_kthread->state); + j = jiffies - READ_ONCE(rcu_state.gp_activity); + pr_info("%s: wait state: %s(%d) ->state: %#lx delta ->gp_activity %ld\n", + rcu_state.name, gp_state_getname(rcu_state.gp_state), + rcu_state.gp_state, rcu_state.gp_kthread->state, j); rcu_for_each_node_breadth_first(rnp) { if (ULONG_CMP_GE(rcu_state.gp_seq, rnp->gp_seq_needed)) continue; @@ -891,12 +917,12 @@ void rcu_irq_enter_irqson(void) } /** - * rcu_is_watching - see if RCU thinks that the current CPU is idle + * rcu_is_watching - see if RCU thinks that the current CPU is not idle * * Return true if RCU is watching the running CPU, which means that this * CPU can safely enter RCU read-side critical sections. In other words, - * if the current CPU is in its idle loop and is neither in an interrupt - * or NMI handler, return true. + * if the current CPU is not in its idle loop or is in an interrupt or + * NMI handler, return true. */ bool notrace rcu_is_watching(void) { @@ -1142,16 +1168,6 @@ static void record_gp_stall_check_time(void) rcu_state.n_force_qs_gpstart = READ_ONCE(rcu_state.n_force_qs); } -/* - * Convert a ->gp_state value to a character string. - */ -static const char *gp_state_getname(short gs) -{ - if (gs < 0 || gs >= ARRAY_SIZE(gp_state_names)) - return "???"; - return gp_state_names[gs]; -} - /* * Complain about starvation of grace-period kthread. */ @@ -1262,8 +1278,7 @@ static void print_other_cpu_stall(unsigned long gp_seq) print_cpu_stall_info_end(); for_each_possible_cpu(cpu) - totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(&rcu_data, - cpu)->cblist); + totqlen += rcu_get_n_cbs_cpu(cpu); pr_cont("(detected by %d, t=%ld jiffies, g=%ld, q=%lu)\n", smp_processor_id(), (long)(jiffies - rcu_state.gp_start), (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); @@ -1323,8 +1338,7 @@ static void print_cpu_stall(void) raw_spin_unlock_irqrestore_rcu_node(rdp->mynode, flags); print_cpu_stall_info_end(); for_each_possible_cpu(cpu) - totqlen += rcu_segcblist_n_cbs(&per_cpu_ptr(&rcu_data, - cpu)->cblist); + totqlen += rcu_get_n_cbs_cpu(cpu); pr_cont(" (t=%lu jiffies g=%ld q=%lu)\n", jiffies - rcu_state.gp_start, (long)rcu_seq_current(&rcu_state.gp_seq), totqlen); @@ -1986,7 +2000,8 @@ static void rcu_gp_cleanup(void) WRITE_ONCE(rcu_state.gp_activity, jiffies); raw_spin_lock_irq_rcu_node(rnp); - gp_duration = jiffies - rcu_state.gp_start; + rcu_state.gp_end = jiffies; + gp_duration = rcu_state.gp_end - rcu_state.gp_start; if (gp_duration > rcu_state.gp_max) rcu_state.gp_max = gp_duration; @@ -2032,9 +2047,9 @@ static void rcu_gp_cleanup(void) rnp = rcu_get_root(); raw_spin_lock_irq_rcu_node(rnp); /* GP before ->gp_seq update. */ - /* Declare grace period done. */ - rcu_seq_end(&rcu_state.gp_seq); + /* Declare grace period done, trace first to use old GP number. */ trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end")); + rcu_seq_end(&rcu_state.gp_seq); rcu_state.gp_state = RCU_GP_IDLE; /* Check for GP requests since above loop. */ rdp = this_cpu_ptr(&rcu_data); @@ -2600,10 +2615,10 @@ static void force_quiescent_state(void) * This function checks for grace-period requests that fail to motivate * RCU to come out of its idle mode. */ -static void -rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp) +void +rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp, + const unsigned long gpssdelay) { - const unsigned long gpssdelay = rcu_jiffies_till_stall_check() * HZ; unsigned long flags; unsigned long j; struct rcu_node *rnp_root = rcu_get_root(); @@ -2654,6 +2669,48 @@ rcu_check_gp_start_stall(struct rcu_node *rnp, struct rcu_data *rdp) raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } +/* + * Do a forward-progress check for rcutorture. This is normally invoked + * due to an OOM event. The argument "j" gives the time period during + * which rcutorture would like progress to have been made. + */ +void rcu_fwd_progress_check(unsigned long j) +{ + unsigned long cbs; + int cpu; + unsigned long max_cbs = 0; + int max_cpu = -1; + struct rcu_data *rdp; + + if (rcu_gp_in_progress()) { + pr_info("%s: GP age %lu jiffies\n", + __func__, jiffies - rcu_state.gp_start); + show_rcu_gp_kthreads(); + } else { + pr_info("%s: Last GP end %lu jiffies ago\n", + __func__, jiffies - rcu_state.gp_end); + preempt_disable(); + rdp = this_cpu_ptr(&rcu_data); + rcu_check_gp_start_stall(rdp->mynode, rdp, j); + preempt_enable(); + } + for_each_possible_cpu(cpu) { + cbs = rcu_get_n_cbs_cpu(cpu); + if (!cbs) + continue; + if (max_cpu < 0) + pr_info("%s: callbacks", __func__); + pr_cont(" %d: %lu", cpu, cbs); + if (cbs <= max_cbs) + continue; + max_cbs = cbs; + max_cpu = cpu; + } + if (max_cpu >= 0) + pr_cont("\n"); +} +EXPORT_SYMBOL_GPL(rcu_fwd_progress_check); + /* * This does the RCU core processing work for the specified rcu_data * structures. This may be called only from the CPU to whom the rdp @@ -2690,7 +2747,7 @@ static __latent_entropy void rcu_process_callbacks(struct softirq_action *unused local_irq_restore(flags); } - rcu_check_gp_start_stall(rnp, rdp); + rcu_check_gp_start_stall(rnp, rdp, rcu_jiffies_till_stall_check()); /* If there are callbacks ready, invoke them. */ if (rcu_segcblist_ready_cbs(&rdp->cblist)) @@ -2826,7 +2883,7 @@ __call_rcu(struct rcu_head *head, rcu_callback_t func, int cpu, bool lazy) * Very early boot, before rcu_init(). Initialize if needed * and then drop through to queue the callback. */ - BUG_ON(cpu != -1); + WARN_ON_ONCE(cpu != -1); WARN_ON_ONCE(!rcu_is_watching()); if (rcu_segcblist_empty(&rdp->cblist)) rcu_segcblist_init(&rdp->cblist); @@ -3485,7 +3542,8 @@ static int __init rcu_spawn_gp_kthread(void) rcu_scheduler_fully_active = 1; t = kthread_create(rcu_gp_kthread, NULL, "%s", rcu_state.name); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start grace-period kthread, OOM is now expected behavior\n", __func__)) + return 0; rnp = rcu_get_root(); raw_spin_lock_irqsave_rcu_node(rnp, flags); rcu_state.gp_kthread = t; diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 703e19ff532d..d90b02b53c0e 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -57,7 +57,7 @@ struct rcu_node { /* some rcu_state fields as well as */ /* following. */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq. */ - unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed. */ + unsigned long gp_seq_needed; /* Track furthest future GP request. */ unsigned long completedqs; /* All QSes done for this node. */ unsigned long qsmask; /* CPUs or groups that need to switch in */ /* order for current grace period to proceed.*/ @@ -163,7 +163,7 @@ union rcu_noqs { struct rcu_data { /* 1) quiescent-state and grace-period handling : */ unsigned long gp_seq; /* Track rsp->rcu_gp_seq counter. */ - unsigned long gp_seq_needed; /* Track rsp->rcu_gp_seq_needed ctr. */ + unsigned long gp_seq_needed; /* Track furthest future GP request. */ union rcu_noqs cpu_no_qs; /* No QSes yet for this CPU. */ bool core_needs_qs; /* Core waits for quiesc state. */ bool beenonline; /* CPU online at least once. */ @@ -328,6 +328,8 @@ struct rcu_state { /* force_quiescent_state(). */ unsigned long gp_start; /* Time at which GP started, */ /* but in jiffies. */ + unsigned long gp_end; /* Time last GP ended, again */ + /* in jiffies. */ unsigned long gp_activity; /* Time of last GP kthread */ /* activity in jiffies. */ unsigned long gp_req_activity; /* Time of last GP request */ @@ -398,17 +400,6 @@ static const char *tp_rcu_varname __used __tracepoint_string = rcu_name; #define RCU_NAME rcu_name #endif /* #else #ifdef CONFIG_TRACING */ -/* - * RCU implementation internal declarations: - */ -extern struct rcu_state rcu_sched_state; - -extern struct rcu_state rcu_bh_state; - -#ifdef CONFIG_PREEMPT_RCU -extern struct rcu_state rcu_preempt_state; -#endif /* #ifdef CONFIG_PREEMPT_RCU */ - int rcu_dynticks_snap(struct rcu_data *rdp); #ifdef CONFIG_RCU_BOOST @@ -466,6 +457,7 @@ static void __init rcu_spawn_nocb_kthreads(void); static void __init rcu_organize_nocb_kthreads(void); #endif /* #ifdef CONFIG_RCU_NOCB_CPU */ static bool init_nocb_callback_list(struct rcu_data *rdp); +static unsigned long rcu_get_n_cbs_nocb_cpu(struct rcu_data *rdp); static void rcu_bind_gp_kthread(void); static bool rcu_nohz_full_cpu(void); static void rcu_dynticks_task_enter(void); diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index 8d18c1014e2b..928fe5893a57 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -450,10 +450,12 @@ static void sync_rcu_exp_select_cpus(smp_call_func_t func) } INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus); preempt_disable(); - cpu = cpumask_next(rnp->grplo - 1, cpu_online_mask); + cpu = find_next_bit(&rnp->ffmask, BITS_PER_LONG, -1); /* If all offline, queue the work on an unbound CPU. */ - if (unlikely(cpu > rnp->grphi)) + if (unlikely(cpu > rnp->grphi - rnp->grplo)) cpu = WORK_CPU_UNBOUND; + else + cpu += rnp->grplo; queue_work_on(cpu, rcu_par_gp_wq, &rnp->rew.rew_work); preempt_enable(); rnp->exp_need_flush = true; @@ -690,8 +692,10 @@ static void sync_rcu_exp_handler(void *unused) */ if (t->rcu_read_lock_nesting > 0) { raw_spin_lock_irqsave_rcu_node(rnp, flags); - if (rnp->expmask & rdp->grpmask) + if (rnp->expmask & rdp->grpmask) { rdp->deferred_qs = true; + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true); + } raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 05915e536336..1b3dd2fc0cd6 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -397,6 +397,11 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) return rnp->gp_tasks != NULL; } +/* Bias and limit values for ->rcu_read_lock_nesting. */ +#define RCU_NEST_BIAS INT_MAX +#define RCU_NEST_NMAX (-INT_MAX / 2) +#define RCU_NEST_PMAX (INT_MAX / 2) + /* * Preemptible RCU implementation for rcu_read_lock(). * Just increment ->rcu_read_lock_nesting, shared state will be updated @@ -405,6 +410,8 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp) void __rcu_read_lock(void) { current->rcu_read_lock_nesting++; + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) + WARN_ON_ONCE(current->rcu_read_lock_nesting > RCU_NEST_PMAX); barrier(); /* critical section after entry code. */ } EXPORT_SYMBOL_GPL(__rcu_read_lock); @@ -424,20 +431,18 @@ void __rcu_read_unlock(void) --t->rcu_read_lock_nesting; } else { barrier(); /* critical section before exit code. */ - t->rcu_read_lock_nesting = INT_MIN; + t->rcu_read_lock_nesting = -RCU_NEST_BIAS; barrier(); /* assign before ->rcu_read_unlock_special load */ if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s))) rcu_read_unlock_special(t); barrier(); /* ->rcu_read_unlock_special load before assign */ t->rcu_read_lock_nesting = 0; } -#ifdef CONFIG_PROVE_LOCKING - { - int rrln = READ_ONCE(t->rcu_read_lock_nesting); + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + int rrln = t->rcu_read_lock_nesting; - WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2); + WARN_ON_ONCE(rrln < 0 && rrln > RCU_NEST_NMAX); } -#endif /* #ifdef CONFIG_PROVE_LOCKING */ } EXPORT_SYMBOL_GPL(__rcu_read_unlock); @@ -597,7 +602,7 @@ rcu_preempt_deferred_qs_irqrestore(struct task_struct *t, unsigned long flags) */ static bool rcu_preempt_need_deferred_qs(struct task_struct *t) { - return (this_cpu_ptr(&rcu_data)->deferred_qs || + return (__this_cpu_read(rcu_data.deferred_qs) || READ_ONCE(t->rcu_read_unlock_special.s)) && t->rcu_read_lock_nesting <= 0; } @@ -617,11 +622,11 @@ static void rcu_preempt_deferred_qs(struct task_struct *t) if (!rcu_preempt_need_deferred_qs(t)) return; if (couldrecurse) - t->rcu_read_lock_nesting -= INT_MIN; + t->rcu_read_lock_nesting -= RCU_NEST_BIAS; local_irq_save(flags); rcu_preempt_deferred_qs_irqrestore(t, flags); if (couldrecurse) - t->rcu_read_lock_nesting += INT_MIN; + t->rcu_read_lock_nesting += RCU_NEST_BIAS; } /* @@ -642,13 +647,21 @@ static void rcu_read_unlock_special(struct task_struct *t) local_irq_save(flags); irqs_were_disabled = irqs_disabled_flags(flags); - if ((preempt_bh_were_disabled || irqs_were_disabled) && - t->rcu_read_unlock_special.b.blocked) { + if (preempt_bh_were_disabled || irqs_were_disabled) { + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); /* Need to defer quiescent state until everything is enabled. */ - raise_softirq_irqoff(RCU_SOFTIRQ); + if (irqs_were_disabled) { + /* Enabling irqs does not reschedule, so... */ + raise_softirq_irqoff(RCU_SOFTIRQ); + } else { + /* Enabling BH or preempt does reschedule, so... */ + set_tsk_need_resched(current); + set_preempt_need_resched(); + } local_irq_restore(flags); return; } + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, false); rcu_preempt_deferred_qs_irqrestore(t, flags); } @@ -1464,7 +1477,8 @@ static void __init rcu_spawn_boost_kthreads(void) for_each_possible_cpu(cpu) per_cpu(rcu_cpu_has_work, cpu) = 0; - BUG_ON(smpboot_register_percpu_thread(&rcu_cpu_thread_spec)); + if (WARN_ONCE(smpboot_register_percpu_thread(&rcu_cpu_thread_spec), "%s: Could not start rcub kthread, OOM is now expected behavior\n", __func__)) + return; rcu_for_each_leaf_node(rnp) (void)rcu_spawn_one_boost_kthread(rnp); } @@ -1997,7 +2011,7 @@ static bool rcu_nocb_cpu_needs_barrier(int cpu) * (if a callback is in fact needed). This is associated with an * atomic_inc() in the caller. */ - ret = atomic_long_read(&rdp->nocb_q_count); + ret = rcu_get_n_cbs_nocb_cpu(rdp); #ifdef CONFIG_PROVE_RCU rhp = READ_ONCE(rdp->nocb_head); @@ -2052,7 +2066,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp, TPS("WakeNotPoll")); return; } - len = atomic_long_read(&rdp->nocb_q_count); + len = rcu_get_n_cbs_nocb_cpu(rdp); if (old_rhpp == &rdp->nocb_head) { if (!irqs_disabled_flags(flags)) { /* ... if queue was empty ... */ @@ -2101,11 +2115,11 @@ static bool __call_rcu_nocb(struct rcu_data *rdp, struct rcu_head *rhp, trace_rcu_kfree_callback(rcu_state.name, rhp, (unsigned long)rhp->func, -atomic_long_read(&rdp->nocb_q_count_lazy), - -atomic_long_read(&rdp->nocb_q_count)); + -rcu_get_n_cbs_nocb_cpu(rdp)); else trace_rcu_callback(rcu_state.name, rhp, -atomic_long_read(&rdp->nocb_q_count_lazy), - -atomic_long_read(&rdp->nocb_q_count)); + -rcu_get_n_cbs_nocb_cpu(rdp)); /* * If called from an extended quiescent state with interrupts @@ -2322,13 +2336,14 @@ static int rcu_nocb_kthread(void *arg) tail = rdp->nocb_follower_tail; rdp->nocb_follower_tail = &rdp->nocb_follower_head; raw_spin_unlock_irqrestore(&rdp->nocb_lock, flags); - BUG_ON(!list); + if (WARN_ON_ONCE(!list)) + continue; trace_rcu_nocb_wake(rcu_state.name, rdp->cpu, TPS("WokeNonEmpty")); /* Each pass through the following loop invokes a callback. */ trace_rcu_batch_start(rcu_state.name, atomic_long_read(&rdp->nocb_q_count_lazy), - atomic_long_read(&rdp->nocb_q_count), -1); + rcu_get_n_cbs_nocb_cpu(rdp), -1); c = cl = 0; while (list) { next = list->next; @@ -2495,7 +2510,8 @@ static void rcu_spawn_one_nocb_kthread(int cpu) /* Spawn the kthread for this CPU. */ t = kthread_run(rcu_nocb_kthread, rdp_spawn, "rcuo%c/%d", rcu_state.abbr, cpu); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start rcuo kthread, OOM is now expected behavior\n", __func__)) + return; WRITE_ONCE(rdp_spawn->nocb_kthread, t); } @@ -2587,6 +2603,26 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) return true; } +/* + * Bind the current task to the offloaded CPUs. If there are no offloaded + * CPUs, leave the task unbound. Splat if the bind attempt fails. + */ +void rcu_bind_current_to_nocb(void) +{ + if (cpumask_available(rcu_nocb_mask) && cpumask_weight(rcu_nocb_mask)) + WARN_ON(sched_setaffinity(current->pid, rcu_nocb_mask)); +} +EXPORT_SYMBOL_GPL(rcu_bind_current_to_nocb); + +/* + * Return the number of RCU callbacks still queued from the specified + * CPU, which must be a nocbs CPU. + */ +static unsigned long rcu_get_n_cbs_nocb_cpu(struct rcu_data *rdp) +{ + return atomic_long_read(&rdp->nocb_q_count); +} + #else /* #ifdef CONFIG_RCU_NOCB_CPU */ static bool rcu_nocb_cpu_needs_barrier(int cpu) @@ -2647,6 +2683,11 @@ static bool init_nocb_callback_list(struct rcu_data *rdp) return false; } +static unsigned long rcu_get_n_cbs_nocb_cpu(struct rcu_data *rdp) +{ + return 0; +} + #endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */ /* diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index f203b94f6b5b..1971869c4072 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -335,8 +335,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, /* Initialize and register callbacks for each crcu_array element. */ for (i = 0; i < n; i++) { if (checktiny && - (crcu_array[i] == call_rcu || - crcu_array[i] == call_rcu_bh)) { + (crcu_array[i] == call_rcu)) { might_sleep(); continue; } @@ -352,8 +351,7 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, /* Wait for all callbacks to be invoked. */ for (i = 0; i < n; i++) { if (checktiny && - (crcu_array[i] == call_rcu || - crcu_array[i] == call_rcu_bh)) + (crcu_array[i] == call_rcu)) continue; for (j = 0; j < i; j++) if (crcu_array[j] == crcu_array[i]) @@ -822,7 +820,8 @@ static int __init rcu_spawn_tasks_kthread(void) struct task_struct *t; t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread"); - BUG_ON(IS_ERR(t)); + if (WARN_ONCE(IS_ERR(t), "%s: Could not start Tasks-RCU grace-period kthread, OOM is now expected behavior\n", __func__)) + return 0; smp_mb(); /* Ensure others see full kthread. */ WRITE_ONCE(rcu_tasks_kthread_ptr, t); return 0; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 6fedf3a98581..a5b7f1c9f24f 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -5783,7 +5783,7 @@ int sched_cpu_deactivate(unsigned int cpu) * * Do sync before park smpboot threads to take care the rcu boost case. */ - synchronize_rcu_mult(call_rcu, call_rcu_sched); + synchronize_rcu(); #ifdef CONFIG_SCHED_SMT /* diff --git a/kernel/sched/membarrier.c b/kernel/sched/membarrier.c index 76e0eaf4654e..3cd8a3a795d2 100644 --- a/kernel/sched/membarrier.c +++ b/kernel/sched/membarrier.c @@ -210,7 +210,7 @@ static int membarrier_register_global_expedited(void) * future scheduler executions will observe the new * thread flag state for this mm. */ - synchronize_sched(); + synchronize_rcu(); } atomic_or(MEMBARRIER_STATE_GLOBAL_EXPEDITED_READY, &mm->membarrier_state); @@ -246,7 +246,7 @@ static int membarrier_register_private_expedited(int flags) * Ensure all future scheduler executions will observe the * new thread flag state for this process. */ - synchronize_sched(); + synchronize_rcu(); } atomic_or(state, &mm->membarrier_state); @@ -298,7 +298,7 @@ SYSCALL_DEFINE2(membarrier, int, cmd, int, flags) if (tick_nohz_full_enabled()) return -EINVAL; if (num_online_cpus() > 1) - synchronize_sched(); + synchronize_rcu(); return 0; case MEMBARRIER_CMD_GLOBAL_EXPEDITED: return membarrier_global_expedited(); diff --git a/kernel/torture.c b/kernel/torture.c index 17d91f5fba2a..bbf6d473e50c 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -194,11 +194,23 @@ torture_onoff(void *arg) int cpu; int maxcpu = -1; DEFINE_TORTURE_RANDOM(rand); + int ret; VERBOSE_TOROUT_STRING("torture_onoff task started"); for_each_online_cpu(cpu) maxcpu = cpu; WARN_ON(maxcpu < 0); + if (!IS_MODULE(CONFIG_TORTURE_TEST)) + for_each_possible_cpu(cpu) { + if (cpu_online(cpu)) + continue; + ret = cpu_up(cpu); + if (ret && verbose) { + pr_alert("%s" TORTURE_FLAG + "%s: Initial online %d: errno %d\n", + __func__, torture_type, cpu, ret); + } + } if (maxcpu == 0) { VERBOSE_TOROUT_STRING("Only one CPU, so CPU-hotplug testing is disabled"); @@ -233,16 +245,15 @@ stop: */ int torture_onoff_init(long ooholdoff, long oointerval) { - int ret = 0; - #ifdef CONFIG_HOTPLUG_CPU onoff_holdoff = ooholdoff; onoff_interval = oointerval; if (onoff_interval <= 0) return 0; - ret = torture_create_kthread(torture_onoff, NULL, onoff_task); -#endif /* #ifdef CONFIG_HOTPLUG_CPU */ - return ret; + return torture_create_kthread(torture_onoff, NULL, onoff_task); +#else /* #ifdef CONFIG_HOTPLUG_CPU */ + return 0; +#endif /* #else #ifdef CONFIG_HOTPLUG_CPU */ } EXPORT_SYMBOL_GPL(torture_onoff_init); @@ -513,15 +524,13 @@ static int torture_shutdown(void *arg) */ int torture_shutdown_init(int ssecs, void (*cleanup)(void)) { - int ret = 0; - torture_shutdown_hook = cleanup; if (ssecs > 0) { shutdown_time = ktime_add(ktime_get(), ktime_set(ssecs, 0)); - ret = torture_create_kthread(torture_shutdown, NULL, + return torture_create_kthread(torture_shutdown, NULL, shutdown_task); } - return ret; + return 0; } EXPORT_SYMBOL_GPL(torture_shutdown_init); @@ -620,13 +629,10 @@ static int torture_stutter(void *arg) /* * Initialize and kick off the torture_stutter kthread. */ -int torture_stutter_init(int s) +int torture_stutter_init(const int s) { - int ret; - stutter = s; - ret = torture_create_kthread(torture_stutter, NULL, stutter_task); - return ret; + return torture_create_kthread(torture_stutter, NULL, stutter_task); } EXPORT_SYMBOL_GPL(torture_stutter_init); diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e23eb9fc77aa..f0ff24173a0b 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -173,7 +173,7 @@ static void ftrace_sync(struct work_struct *work) { /* * This function is just a stub to implement a hard force - * of synchronize_sched(). This requires synchronizing + * of synchronize_rcu(). This requires synchronizing * tasks even in userspace and idle. * * Yes, function tracing is rude. @@ -934,7 +934,7 @@ ftrace_profile_write(struct file *filp, const char __user *ubuf, ftrace_profile_enabled = 0; /* * unregister_ftrace_profiler calls stop_machine - * so this acts like an synchronize_sched. + * so this acts like an synchronize_rcu. */ unregister_ftrace_profiler(); } @@ -1086,7 +1086,7 @@ struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr) /* * Some of the ops may be dynamically allocated, - * they are freed after a synchronize_sched(). + * they are freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -1286,7 +1286,7 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash) { if (!hash || hash == EMPTY_HASH) return; - call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu); + call_rcu(&hash->rcu, __free_ftrace_hash_rcu); } void ftrace_free_filter(struct ftrace_ops *ops) @@ -1501,7 +1501,7 @@ static bool hash_contains_ip(unsigned long ip, * the ip is not in the ops->notrace_hash. * * This needs to be called with preemption disabled as - * the hashes are freed with call_rcu_sched(). + * the hashes are freed with call_rcu(). */ static int ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs) @@ -4496,7 +4496,7 @@ unregister_ftrace_function_probe_func(char *glob, struct trace_array *tr, if (ftrace_enabled && !ftrace_hash_empty(hash)) ftrace_run_modify_code(&probe->ops, FTRACE_UPDATE_CALLS, &old_hash_ops); - synchronize_sched(); + synchronize_rcu(); hlist_for_each_entry_safe(entry, tmp, &hhd, hlist) { hlist_del(&entry->hlist); @@ -5314,7 +5314,7 @@ ftrace_graph_release(struct inode *inode, struct file *file) mutex_unlock(&graph_lock); /* Wait till all users are no longer using the old hash */ - synchronize_sched(); + synchronize_rcu(); free_ftrace_hash(old_hash); } @@ -5708,7 +5708,7 @@ void ftrace_release_mod(struct module *mod) list_for_each_entry_safe(mod_map, n, &ftrace_mod_maps, list) { if (mod_map->mod == mod) { list_del_rcu(&mod_map->list); - call_rcu_sched(&mod_map->rcu, ftrace_free_mod_map); + call_rcu(&mod_map->rcu, ftrace_free_mod_map); break; } } @@ -5928,7 +5928,7 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, struct ftrace_mod_map *mod_map; const char *ret = NULL; - /* mod_map is freed via call_rcu_sched() */ + /* mod_map is freed via call_rcu() */ preempt_disable(); list_for_each_entry_rcu(mod_map, &ftrace_mod_maps, list) { ret = ftrace_func_address_lookup(mod_map, addr, size, off, sym); @@ -6263,7 +6263,7 @@ __ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip, /* * Some of the ops may be dynamically allocated, - * they must be freed after a synchronize_sched(). + * they must be freed after a synchronize_rcu(). */ preempt_disable_notrace(); @@ -6434,7 +6434,7 @@ static void clear_ftrace_pids(struct trace_array *tr) rcu_assign_pointer(tr->function_pids, NULL); /* Wait till all users are no longer using pid filtering */ - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(pid_list); } @@ -6581,7 +6581,7 @@ ftrace_pid_write(struct file *filp, const char __user *ubuf, rcu_assign_pointer(tr->function_pids, pid_list); if (filtered_pids) { - synchronize_sched(); + synchronize_rcu(); trace_free_pid_list(filtered_pids); } else if (pid_list) { /* Register a probe to set whether to ignore the tracing of a task */ diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 65bd4616220d..4f3247a53259 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -1834,7 +1834,7 @@ int ring_buffer_resize(struct ring_buffer *buffer, unsigned long size, * There could have been a race between checking * record_disable and incrementing it. */ - synchronize_sched(); + synchronize_rcu(); for_each_buffer_cpu(buffer, cpu) { cpu_buffer = buffer->buffers[cpu]; rb_check_pages(cpu_buffer); @@ -3151,7 +3151,7 @@ static bool rb_per_cpu_empty(struct ring_buffer_per_cpu *cpu_buffer) * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * - * The caller should call synchronize_sched() after this. + * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable(struct ring_buffer *buffer) { @@ -3253,7 +3253,7 @@ bool ring_buffer_record_is_set_on(struct ring_buffer *buffer) * This prevents all writes to the buffer. Any attempt to write * to the buffer after this will fail and return NULL. * - * The caller should call synchronize_sched() after this. + * The caller should call synchronize_rcu() after this. */ void ring_buffer_record_disable_cpu(struct ring_buffer *buffer, int cpu) { @@ -4191,7 +4191,7 @@ EXPORT_SYMBOL_GPL(ring_buffer_read_prepare); void ring_buffer_read_prepare_sync(void) { - synchronize_sched(); + synchronize_rcu(); } EXPORT_SYMBOL_GPL(ring_buffer_read_prepare_sync); @@ -4363,7 +4363,7 @@ void ring_buffer_reset_cpu(struct ring_buffer *buffer, int cpu) atomic_inc(&cpu_buffer->record_disabled); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags); @@ -4496,7 +4496,7 @@ int ring_buffer_swap_cpu(struct ring_buffer *buffer_a, goto out; /* - * We can't do a synchronize_sched here because this + * We can't do a synchronize_rcu here because this * function can be called in atomic context. * Normally this will be called from the same CPU as cpu. * If not it's up to the caller to protect this. diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ff1c4b20cd0a..51612b4a603f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -1681,7 +1681,7 @@ void tracing_reset(struct trace_buffer *buf, int cpu) ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); ring_buffer_reset_cpu(buffer, cpu); ring_buffer_record_enable(buffer); @@ -1698,7 +1698,7 @@ void tracing_reset_online_cpus(struct trace_buffer *buf) ring_buffer_record_disable(buffer); /* Make sure all commits have finished */ - synchronize_sched(); + synchronize_rcu(); buf->time_start = buffer_ftrace_now(buf, buf->cpu); @@ -2250,7 +2250,7 @@ void trace_buffered_event_disable(void) preempt_enable(); /* Wait for all current users to finish */ - synchronize_sched(); + synchronize_rcu(); for_each_tracing_cpu(cpu) { free_page((unsigned long)per_cpu(trace_buffered_event, cpu)); @@ -5398,7 +5398,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) if (tr->current_trace->reset) tr->current_trace->reset(tr); - /* Current trace needs to be nop_trace before synchronize_sched */ + /* Current trace needs to be nop_trace before synchronize_rcu */ tr->current_trace = &nop_trace; #ifdef CONFIG_TRACER_MAX_TRACE @@ -5412,7 +5412,7 @@ static int tracing_set_tracer(struct trace_array *tr, const char *buf) * The update_max_tr is called from interrupts disabled * so a synchronized_sched() is sufficient. */ - synchronize_sched(); + synchronize_rcu(); free_snapshot(tr); } #endif diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c index 5574e862de8d..27821480105e 100644 --- a/kernel/trace/trace_events_filter.c +++ b/kernel/trace/trace_events_filter.c @@ -1616,7 +1616,7 @@ static int process_system_preds(struct trace_subsystem_dir *dir, /* * The calls can still be using the old filters. - * Do a synchronize_sched() and to ensure all calls are + * Do a synchronize_rcu() and to ensure all calls are * done with them before we free them. */ tracepoint_synchronize_unregister(); @@ -1848,7 +1848,7 @@ int apply_subsystem_event_filter(struct trace_subsystem_dir *dir, if (filter) { /* * No event actually uses the system filter - * we can free it without synchronize_sched(). + * we can free it without synchronize_rcu(). */ __free_filter(system->filter); system->filter = filter; diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index fec67188c4d2..adc153ab51c0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -333,7 +333,7 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file) * event_call related objects, which will be accessed in * the kprobe_trace_func/kretprobe_trace_func. */ - synchronize_sched(); + synchronize_rcu(); kfree(link); /* Ignored if link == NULL */ } diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index a3be42304485..46f2ab1e08a9 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -92,7 +92,7 @@ static __init int release_early_probes(void) while (early_probes) { tmp = early_probes; early_probes = tmp->next; - call_rcu_sched(tmp, rcu_free_old_probes); + call_rcu(tmp, rcu_free_old_probes); } return 0; @@ -123,7 +123,7 @@ static inline void release_probes(struct tracepoint_func *old) * cover both cases. So let us chain the SRCU and sched RCU * callbacks to wait for both grace periods. */ - call_rcu_sched(&tp_probes->rcu, rcu_free_old_probes); + call_rcu(&tp_probes->rcu, rcu_free_old_probes); } } diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 0280deac392e..392be4b252f6 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -3396,7 +3396,7 @@ static void put_unbound_pool(struct worker_pool *pool) del_timer_sync(&pool->mayday_timer); /* sched-RCU protected to allow dereferences from get_work_pool() */ - call_rcu_sched(&pool->rcu, rcu_free_pool); + call_rcu(&pool->rcu, rcu_free_pool); } /** @@ -3503,14 +3503,14 @@ static void pwq_unbound_release_workfn(struct work_struct *work) put_unbound_pool(pool); mutex_unlock(&wq_pool_mutex); - call_rcu_sched(&pwq->rcu, rcu_free_pwq); + call_rcu(&pwq->rcu, rcu_free_pwq); /* * If we're the last pwq going away, @wq is already dead and no one * is gonna access it anymore. Schedule RCU free. */ if (is_last) - call_rcu_sched(&wq->rcu, rcu_free_wq); + call_rcu(&wq->rcu, rcu_free_wq); } /** @@ -4195,7 +4195,7 @@ void destroy_workqueue(struct workqueue_struct *wq) * The base ref is never dropped on per-cpu pwqs. Directly * schedule RCU free. */ - call_rcu_sched(&wq->rcu, rcu_free_wq); + call_rcu(&wq->rcu, rcu_free_wq); } else { /* * We're the sole accessor of @wq at this point. Directly diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index de10b8c0bff6..9877682e49c7 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -181,7 +181,7 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref, ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch; percpu_ref_get(ref); /* put after confirmation */ - call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu); + call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu); } static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref) diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 8e2ff195ecb3..43ce2f4d2551 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1225,7 +1225,7 @@ static void collect_mm_slot(struct mm_slot *mm_slot) { struct mm_struct *mm = mm_slot->mm; - VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock)); + lockdep_assert_held(&khugepaged_mm_lock); if (khugepaged_test_exit(mm)) { /* free mm_slot */ @@ -1653,7 +1653,7 @@ static unsigned int khugepaged_scan_mm_slot(unsigned int pages, int progress = 0; VM_BUG_ON(!pages); - VM_BUG_ON(NR_CPUS != 1 && !spin_is_locked(&khugepaged_mm_lock)); + lockdep_assert_held(&khugepaged_mm_lock); if (khugepaged_scan.mm_slot) mm_slot = khugepaged_scan.mm_slot; diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 2a9fbc4a37d5..f2f03c655807 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -199,7 +199,7 @@ void tlb_table_flush(struct mmu_gather *tlb) if (*batch) { tlb_table_invalidate(tlb); - call_rcu_sched(&(*batch)->rcu, tlb_remove_table_rcu); + call_rcu(&(*batch)->rcu, tlb_remove_table_rcu); *batch = NULL; } } diff --git a/mm/slab.c b/mm/slab.c index 2a5654bb3b3f..3abb9feb3818 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -962,10 +962,10 @@ static int setup_kmem_cache_node(struct kmem_cache *cachep, * To protect lockless access to n->shared during irq disabled context. * If n->shared isn't NULL in irq disabled context, accessing to it is * guaranteed to be valid until irq is re-enabled, because it will be - * freed after synchronize_sched(). + * freed after synchronize_rcu(). */ if (old_shared && force_change) - synchronize_sched(); + synchronize_rcu(); fail: kfree(old_shared); diff --git a/mm/slab_common.c b/mm/slab_common.c index 7eb8dc136c1c..9c11e8a937d2 100644 --- a/mm/slab_common.c +++ b/mm/slab_common.c @@ -724,7 +724,7 @@ void slab_deactivate_memcg_cache_rcu_sched(struct kmem_cache *s, css_get(&s->memcg_params.memcg->css); s->memcg_params.deact_fn = deact_fn; - call_rcu_sched(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn); + call_rcu(&s->memcg_params.deact_rcu_head, kmemcg_deactivate_rcufn); } void memcg_deactivate_kmem_caches(struct mem_cgroup *memcg) @@ -839,11 +839,11 @@ static void flush_memcg_workqueue(struct kmem_cache *s) mutex_unlock(&slab_mutex); /* - * SLUB deactivates the kmem_caches through call_rcu_sched. Make + * SLUB deactivates the kmem_caches through call_rcu. Make * sure all registered rcu callbacks have been invoked. */ if (IS_ENABLED(CONFIG_SLUB)) - rcu_barrier_sched(); + rcu_barrier(); /* * SLAB and SLUB create memcg kmem_caches through workqueue and SLUB diff --git a/mm/swap.c b/mm/swap.c index aa483719922e..5d786019eab9 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -823,8 +823,7 @@ void lru_add_page_tail(struct page *page, struct page *page_tail, VM_BUG_ON_PAGE(!PageHead(page), page); VM_BUG_ON_PAGE(PageCompound(page_tail), page); VM_BUG_ON_PAGE(PageLRU(page_tail), page); - VM_BUG_ON(NR_CPUS != 1 && - !spin_is_locked(&lruvec_pgdat(lruvec)->lru_lock)); + lockdep_assert_held(&lruvec_pgdat(lruvec)->lru_lock); if (!list) SetPageLRU(page_tail); diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index a7ea2d431714..596ec6e7df11 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -728,7 +728,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); - call_rcu_bh(&p->rcu, br_multicast_free_pg); + call_rcu(&p->rcu, br_multicast_free_pg); err = 0; if (!mp->ports && !mp->host_joined && diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6bac0d6b7b94..0255223f2001 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -260,7 +260,7 @@ static void br_multicast_group_expired(struct timer_list *t) hlist_del_rcu(&mp->hlist[mdb->ver]); mdb->size--; - call_rcu_bh(&mp->rcu, br_multicast_free_group); + call_rcu(&mp->rcu, br_multicast_free_group); out: spin_unlock(&br->multicast_lock); @@ -291,7 +291,7 @@ static void br_multicast_del_pg(struct net_bridge *br, del_timer(&p->timer); br_mdb_notify(br->dev, p->port, &pg->addr, RTM_DELMDB, p->flags); - call_rcu_bh(&p->rcu, br_multicast_free_pg); + call_rcu(&p->rcu, br_multicast_free_pg); if (!mp->ports && !mp->host_joined && netif_running(br->dev)) @@ -358,7 +358,7 @@ static int br_mdb_rehash(struct net_bridge_mdb_htable __rcu **mdbp, int max, } br_mdb_rehash_seq++; - call_rcu_bh(&mdb->rcu, br_mdb_free); + call_rcu(&mdb->rcu, br_mdb_free); out: rcu_assign_pointer(*mdbp, mdb); @@ -1629,7 +1629,7 @@ br_multicast_leave_group(struct net_bridge *br, rcu_assign_pointer(*pp, p->next); hlist_del_init(&p->mglist); del_timer(&p->timer); - call_rcu_bh(&p->rcu, br_multicast_free_pg); + call_rcu(&p->rcu, br_multicast_free_pg); br_mdb_notify(br->dev, port, group, RTM_DELMDB, p->flags); @@ -2051,19 +2051,19 @@ void br_multicast_dev_del(struct net_bridge *br) hlist_for_each_entry_safe(mp, n, &mdb->mhash[i], hlist[ver]) { del_timer(&mp->timer); - call_rcu_bh(&mp->rcu, br_multicast_free_group); + call_rcu(&mp->rcu, br_multicast_free_group); } } if (mdb->old) { spin_unlock_bh(&br->multicast_lock); - rcu_barrier_bh(); + rcu_barrier(); spin_lock_bh(&br->multicast_lock); WARN_ON(mdb->old); } mdb->old = mdb; - call_rcu_bh(&mdb->rcu, br_mdb_free); + call_rcu(&mdb->rcu, br_mdb_free); out: spin_unlock_bh(&br->multicast_lock); diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 2b9fdbc43205..464f0ff46c22 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -801,7 +801,7 @@ void __netpoll_cleanup(struct netpoll *np) ops->ndo_netpoll_cleanup(np->dev); RCU_INIT_POINTER(np->dev->npinfo, NULL); - call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info); + call_rcu(&npinfo->rcu, rcu_cleanup_netpoll_info); } else RCU_INIT_POINTER(np->dev->npinfo, NULL); } @@ -812,7 +812,7 @@ void __netpoll_free(struct netpoll *np) ASSERT_RTNL(); /* Wait for transmitting packets to finish before freeing. */ - synchronize_rcu_bh(); + synchronize_rcu(); __netpoll_cleanup(np); kfree(np); } diff --git a/net/core/skmsg.c b/net/core/skmsg.c index b7dbb3c976cd..abaa5f48d3ec 100644 --- a/net/core/skmsg.c +++ b/net/core/skmsg.c @@ -583,7 +583,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock) write_unlock_bh(&sk->sk_callback_lock); sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED); - call_rcu_sched(&psock->rcu, sk_psock_destroy); + call_rcu(&psock->rcu, sk_psock_destroy); } EXPORT_SYMBOL_GPL(sk_psock_drop); diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 7d6ff983ba2c..dbd0f7bae00a 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -2405,7 +2405,7 @@ static void __exit decnet_exit(void) proto_unregister(&dn_proto); - rcu_barrier_bh(); /* Wait for completion of call_rcu_bh()'s */ + rcu_barrier(); /* Wait for completion of call_rcu()'s */ } module_exit(decnet_exit); #endif diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index ca3b0f46de53..016e628c6ac9 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -540,7 +540,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab) if (--tab->refcnt == 0) { list_del(&tab->list); - call_rcu_bh(&tab->rcu, stab_kfree_rcu); + call_rcu(&tab->rcu, stab_kfree_rcu); } } EXPORT_SYMBOL(qdisc_put_stab); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index de1663f7d3ad..66ba2ce2320f 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1372,7 +1372,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, if (!tp_head) { RCU_INIT_POINTER(*miniqp->p_miniq, NULL); /* Wait for flying RCU callback before it is freed. */ - rcu_barrier_bh(); + rcu_barrier(); return; } @@ -1380,10 +1380,10 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, &miniqp->miniq1 : &miniqp->miniq2; /* We need to make sure that readers won't see the miniq - * we are about to modify. So wait until previous call_rcu_bh callback + * we are about to modify. So wait until previous call_rcu callback * is done. */ - rcu_barrier_bh(); + rcu_barrier(); miniq->filter_list = tp_head; rcu_assign_pointer(*miniqp->p_miniq, miniq); @@ -1392,7 +1392,7 @@ void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, * block potential new user of miniq_old until all readers * are not seeing it. */ - call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func); + call_rcu(&miniq_old->rcu, mini_qdisc_rcu_func); } EXPORT_SYMBOL(mini_qdisc_pair_swap); diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index c883ec55654f..377f373db6c0 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -573,6 +573,27 @@ foreach my $entry (@mode_permission_funcs) { } $mode_perms_search = "(?:${mode_perms_search})"; +our %deprecated_apis = ( + "synchronize_rcu_bh" => "synchronize_rcu", + "synchronize_rcu_bh_expedited" => "synchronize_rcu_expedited", + "call_rcu_bh" => "call_rcu", + "rcu_barrier_bh" => "rcu_barrier", + "synchronize_sched" => "synchronize_rcu", + "synchronize_sched_expedited" => "synchronize_rcu_expedited", + "call_rcu_sched" => "call_rcu", + "rcu_barrier_sched" => "rcu_barrier", + "get_state_synchronize_sched" => "get_state_synchronize_rcu", + "cond_synchronize_sched" => "cond_synchronize_rcu", +); + +#Create a search pattern for all these strings to speed up a loop below +our $deprecated_apis_search = ""; +foreach my $entry (keys %deprecated_apis) { + $deprecated_apis_search .= '|' if ($deprecated_apis_search ne ""); + $deprecated_apis_search .= $entry; +} +$deprecated_apis_search = "(?:${deprecated_apis_search})"; + our $mode_perms_world_writable = qr{ S_IWUGO | S_IWOTH | @@ -6368,6 +6389,20 @@ sub process { "please use device_initcall() or more appropriate function instead of __initcall() (see include/linux/init.h)\n" . $herecurr); } +# check for spin_is_locked(), suggest lockdep instead + if ($line =~ /\bspin_is_locked\(/) { + WARN("USE_LOCKDEP", + "Where possible, use lockdep_assert_held instead of assertions based on spin_is_locked\n" . $herecurr); + } + +# check for deprecated apis + if ($line =~ /\b($deprecated_apis_search)\b\s*\(/) { + my $deprecated_api = $1; + my $new_api = $deprecated_apis{$deprecated_api}; + WARN("DEPRECATED_API", + "Deprecated use of '$deprecated_api', prefer '$new_api' instead\n" . $herecurr); + } + # check for various structs that are normally const (ops, kgdb, device_tree) # and avoid what seem like struct definitions 'struct foo {' if ($line !~ /\bconst\b/ && diff --git a/tools/include/linux/kernel.h b/tools/include/linux/kernel.h index 6935ef94e77a..857d9e22826e 100644 --- a/tools/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -116,6 +116,6 @@ int scnprintf(char * buf, size_t size, const char * fmt, ...); #define round_down(x, y) ((x) & ~__round_mask(x, y)) #define current_gfp_context(k) 0 -#define synchronize_sched() +#define synchronize_rcu() #endif diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 5a7a62d76a50..19864f1cb27a 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -194,6 +194,14 @@ do shift done +if test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh +then + : +else + echo No initrd and unable to create one, aborting test >&2 + exit 1 +fi + CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG if test -z "$configs" diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh new file mode 100755 index 000000000000..da298394daa2 --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh @@ -0,0 +1,136 @@ +#!/bin/bash +# +# Create an initrd directory if one does not already exist. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, you can access it online at +# http://www.gnu.org/licenses/gpl-2.0.html. +# +# Copyright (C) IBM Corporation, 2013 +# +# Author: Connor Shu + +D=tools/testing/selftests/rcutorture + +# Prerequisite checks +[ -z "$D" ] && echo >&2 "No argument supplied" && exit 1 +if [ ! -d "$D" ]; then + echo >&2 "$D does not exist: Malformed kernel source tree?" + exit 1 +fi +if [ -s "$D/initrd/init" ]; then + echo "$D/initrd/init already exists, no need to create it" + exit 0 +fi + +T=${TMPDIR-/tmp}/mkinitrd.sh.$$ +trap 'rm -rf $T' 0 2 +mkdir $T + +cat > $T/init << '__EOF___' +#!/bin/sh +# Run in userspace a few milliseconds every second. This helps to +# exercise the NO_HZ_FULL portions of RCU. +while : +do + q= + for i in \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a \ + a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a a + do + q="$q $i" + done + sleep 1 +done +__EOF___ + +# Try using dracut to create initrd +if command -v dracut >/dev/null 2>&1 +then + echo Creating $D/initrd using dracut. + # Filesystem creation + dracut --force --no-hostonly --no-hostonly-cmdline --module "base" $T/initramfs.img + cd $D + mkdir -p initrd + cd initrd + zcat $T/initramfs.img | cpio -id + cp $T/init init + chmod +x init + echo Done creating $D/initrd using dracut + exit 0 +fi + +# No dracut, so create a C-language initrd/init program and statically +# link it. This results in a very small initrd, but might be a bit less +# future-proof than dracut. +echo "Could not find dracut, attempting C initrd" +cd $D +mkdir -p initrd +cd initrd +cat > init.c << '___EOF___' +#ifndef NOLIBC +#include +#include +#endif + +volatile unsigned long delaycount; + +int main(int argc, int argv[]) +{ + int i; + struct timeval tv; + struct timeval tvb; + + for (;;) { + sleep(1); + /* Need some userspace time. */ + if (gettimeofday(&tvb, NULL)) + continue; + do { + for (i = 0; i < 1000 * 100; i++) + delaycount = i * i; + if (gettimeofday(&tv, NULL)) + break; + tv.tv_sec -= tvb.tv_sec; + if (tv.tv_sec > 1) + break; + tv.tv_usec += tv.tv_sec * 1000 * 1000; + tv.tv_usec -= tvb.tv_usec; + } while (tv.tv_usec < 1000); + } + return 0; +} +___EOF___ + +# build using nolibc on supported archs (smaller executable) and fall +# back to regular glibc on other ones. +if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \ + "||__ARM_EABI__||__aarch64__\nyes\n#endif" \ + | ${CROSS_COMPILE}gcc -E -nostdlib -xc - \ + | grep -q '^yes'; then + # architecture supported by nolibc + ${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \ + -nostdlib -include ../bin/nolibc.h -lgcc -s -static -Os \ + -o init init.c +else + ${CROSS_COMPILE}gcc -s -static -Os -o init init.c +fi + +rm init.c +echo "Done creating a statically linked C-language initrd" + +exit 0 diff --git a/tools/testing/selftests/rcutorture/bin/nolibc.h b/tools/testing/selftests/rcutorture/bin/nolibc.h new file mode 100644 index 000000000000..f98f5b92d3eb --- /dev/null +++ b/tools/testing/selftests/rcutorture/bin/nolibc.h @@ -0,0 +1,2197 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* nolibc.h + * Copyright (C) 2017-2018 Willy Tarreau + */ + +/* some archs (at least aarch64) don't expose the regular syscalls anymore by + * default, either because they have an "_at" replacement, or because there are + * more modern alternatives. For now we'd rather still use them. + */ +#define __ARCH_WANT_SYSCALL_NO_AT +#define __ARCH_WANT_SYSCALL_NO_FLAGS +#define __ARCH_WANT_SYSCALL_DEPRECATED + +#include +#include +#include +#include +#include + +#define NOLIBC + +/* Build a static executable this way : + * $ gcc -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \ + * -static -include nolibc.h -lgcc -o hello hello.c + * + * Useful calling convention table found here : + * http://man7.org/linux/man-pages/man2/syscall.2.html + * + * This doc is even better : + * https://w3challs.com/syscalls/ + */ + + +/* this way it will be removed if unused */ +static int errno; + +#ifndef NOLIBC_IGNORE_ERRNO +#define SET_ERRNO(v) do { errno = (v); } while (0) +#else +#define SET_ERRNO(v) do { } while (0) +#endif + +/* errno codes all ensure that they will not conflict with a valid pointer + * because they all correspond to the highest addressable memry page. + */ +#define MAX_ERRNO 4095 + +/* Declare a few quite common macros and types that usually are in stdlib.h, + * stdint.h, ctype.h, unistd.h and a few other common locations. + */ + +#define NULL ((void *)0) + +/* stdint types */ +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef unsigned short uint16_t; +typedef signed short int16_t; +typedef unsigned int uint32_t; +typedef signed int int32_t; +typedef unsigned long long uint64_t; +typedef signed long long int64_t; +typedef unsigned long size_t; +typedef signed long ssize_t; +typedef unsigned long uintptr_t; +typedef signed long intptr_t; +typedef signed long ptrdiff_t; + +/* for stat() */ +typedef unsigned int dev_t; +typedef unsigned long ino_t; +typedef unsigned int mode_t; +typedef signed int pid_t; +typedef unsigned int uid_t; +typedef unsigned int gid_t; +typedef unsigned long nlink_t; +typedef signed long off_t; +typedef signed long blksize_t; +typedef signed long blkcnt_t; +typedef signed long time_t; + +/* for poll() */ +struct pollfd { + int fd; + short int events; + short int revents; +}; + +/* for select() */ +struct timeval { + long tv_sec; + long tv_usec; +}; + +/* for pselect() */ +struct timespec { + long tv_sec; + long tv_nsec; +}; + +/* for gettimeofday() */ +struct timezone { + int tz_minuteswest; + int tz_dsttime; +}; + +/* for getdents64() */ +struct linux_dirent64 { + uint64_t d_ino; + int64_t d_off; + unsigned short d_reclen; + unsigned char d_type; + char d_name[]; +}; + +/* commonly an fd_set represents 256 FDs */ +#define FD_SETSIZE 256 +typedef struct { uint32_t fd32[FD_SETSIZE/32]; } fd_set; + +/* needed by wait4() */ +struct rusage { + struct timeval ru_utime; + struct timeval ru_stime; + long ru_maxrss; + long ru_ixrss; + long ru_idrss; + long ru_isrss; + long ru_minflt; + long ru_majflt; + long ru_nswap; + long ru_inblock; + long ru_oublock; + long ru_msgsnd; + long ru_msgrcv; + long ru_nsignals; + long ru_nvcsw; + long ru_nivcsw; +}; + +/* stat flags (WARNING, octal here) */ +#define S_IFDIR 0040000 +#define S_IFCHR 0020000 +#define S_IFBLK 0060000 +#define S_IFREG 0100000 +#define S_IFIFO 0010000 +#define S_IFLNK 0120000 +#define S_IFSOCK 0140000 +#define S_IFMT 0170000 + +#define S_ISDIR(mode) (((mode) & S_IFDIR) == S_IFDIR) +#define S_ISCHR(mode) (((mode) & S_IFCHR) == S_IFCHR) +#define S_ISBLK(mode) (((mode) & S_IFBLK) == S_IFBLK) +#define S_ISREG(mode) (((mode) & S_IFREG) == S_IFREG) +#define S_ISFIFO(mode) (((mode) & S_IFIFO) == S_IFIFO) +#define S_ISLNK(mode) (((mode) & S_IFLNK) == S_IFLNK) +#define S_ISSOCK(mode) (((mode) & S_IFSOCK) == S_IFSOCK) + +#define DT_UNKNOWN 0 +#define DT_FIFO 1 +#define DT_CHR 2 +#define DT_DIR 4 +#define DT_BLK 6 +#define DT_REG 8 +#define DT_LNK 10 +#define DT_SOCK 12 + +/* all the *at functions */ +#ifndef AT_FDWCD +#define AT_FDCWD -100 +#endif + +/* lseek */ +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +/* reboot */ +#define LINUX_REBOOT_MAGIC1 0xfee1dead +#define LINUX_REBOOT_MAGIC2 0x28121969 +#define LINUX_REBOOT_CMD_HALT 0xcdef0123 +#define LINUX_REBOOT_CMD_POWER_OFF 0x4321fedc +#define LINUX_REBOOT_CMD_RESTART 0x01234567 +#define LINUX_REBOOT_CMD_SW_SUSPEND 0xd000fce2 + + +/* The format of the struct as returned by the libc to the application, which + * significantly differs from the format returned by the stat() syscall flavours. + */ +struct stat { + dev_t st_dev; /* ID of device containing file */ + ino_t st_ino; /* inode number */ + mode_t st_mode; /* protection */ + nlink_t st_nlink; /* number of hard links */ + uid_t st_uid; /* user ID of owner */ + gid_t st_gid; /* group ID of owner */ + dev_t st_rdev; /* device ID (if special file) */ + off_t st_size; /* total size, in bytes */ + blksize_t st_blksize; /* blocksize for file system I/O */ + blkcnt_t st_blocks; /* number of 512B blocks allocated */ + time_t st_atime; /* time of last access */ + time_t st_mtime; /* time of last modification */ + time_t st_ctime; /* time of last status change */ +}; + +#define WEXITSTATUS(status) (((status) & 0xff00) >> 8) +#define WIFEXITED(status) (((status) & 0x7f) == 0) + + +/* Below comes the architecture-specific code. For each architecture, we have + * the syscall declarations and the _start code definition. This is the only + * global part. On all architectures the kernel puts everything in the stack + * before jumping to _start just above us, without any return address (_start + * is not a function but an entry pint). So at the stack pointer we find argc. + * Then argv[] begins, and ends at the first NULL. Then we have envp which + * starts and ends with a NULL as well. So envp=argv+argc+1. + */ + +#if defined(__x86_64__) +/* Syscalls for x86_64 : + * - registers are 64-bit + * - syscall number is passed in rax + * - arguments are in rdi, rsi, rdx, r10, r8, r9 respectively + * - the system call is performed by calling the syscall instruction + * - syscall return comes in rax + * - rcx and r8..r11 may be clobbered, others are preserved. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + */ + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r10", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "rcx", "r8", "r9", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + register long _arg5 asm("r8") = (long)(arg5); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "rcx", "r9", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + long _ret; \ + register long _num asm("rax") = (num); \ + register long _arg1 asm("rdi") = (long)(arg1); \ + register long _arg2 asm("rsi") = (long)(arg2); \ + register long _arg3 asm("rdx") = (long)(arg3); \ + register long _arg4 asm("r10") = (long)(arg4); \ + register long _arg5 asm("r8") = (long)(arg5); \ + register long _arg6 asm("r9") = (long)(arg6); \ + \ + asm volatile ( \ + "syscall\n" \ + : "=a" (_ret), "=r"(_arg4), "=r"(_arg5) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "0"(_num) \ + : "rcx", "r11", "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "pop %rdi\n" // argc (first arg, %rdi) + "mov %rsp, %rsi\n" // argv[] (second arg, %rsi) + "lea 8(%rsi,%rdi,8),%rdx\n" // then a NULL then envp (third arg, %rdx) + "and $-16, %rsp\n" // x86 ABI : esp must be 16-byte aligned when + "sub $8, %rsp\n" // entering the callee + "call main\n" // main() returns the status code, we'll exit with it. + "movzb %al, %rdi\n" // retrieve exit code from 8 lower bits + "mov $60, %rax\n" // NR_exit == 60 + "syscall\n" // really exit + "hlt\n" // ensure it does not return + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall, equivalent to stat64(). The + * syscall returns 116 bytes and stops in the middle of __unused. + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned long st_nlink; + unsigned int st_mode; + unsigned int st_uid; + + unsigned int st_gid; + unsigned int __pad0; + unsigned long st_rdev; + long st_size; + long st_blksize; + + long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + long __unused[3]; +}; + +#elif defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__) +/* Syscalls for i386 : + * - mostly similar to x86_64 + * - registers are 32-bit + * - syscall number is passed in eax + * - arguments are in ebx, ecx, edx, esi, edi, ebp respectively + * - all registers are preserved (except eax of course) + * - the system call is performed by calling int $0x80 + * - syscall return comes in eax + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, i386 supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + register long _arg4 asm("esi") = (long)(arg4); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + long _ret; \ + register long _num asm("eax") = (num); \ + register long _arg1 asm("ebx") = (long)(arg1); \ + register long _arg2 asm("ecx") = (long)(arg2); \ + register long _arg3 asm("edx") = (long)(arg3); \ + register long _arg4 asm("esi") = (long)(arg4); \ + register long _arg5 asm("edi") = (long)(arg5); \ + \ + asm volatile ( \ + "int $0x80\n" \ + : "=a" (_ret) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "0"(_num) \ + : "memory", "cc" \ + ); \ + _ret; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "pop %eax\n" // argc (first arg, %eax) + "mov %esp, %ebx\n" // argv[] (second arg, %ebx) + "lea 4(%ebx,%eax,4),%ecx\n" // then a NULL then envp (third arg, %ecx) + "and $-16, %esp\n" // x86 ABI : esp must be 16-byte aligned when + "push %ecx\n" // push all registers on the stack so that we + "push %ebx\n" // support both regparm and plain stack modes + "push %eax\n" + "call main\n" // main() returns the status code in %eax + "movzbl %al, %ebx\n" // retrieve exit code from lower 8 bits + "movl $1, %eax\n" // NR_exit == 1 + "int $0x80\n" // exit now + "hlt\n" // ensure it does not + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall, 32-bit only, the syscall returns + * exactly 56 bytes (stops before the unused array). + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; + + unsigned long st_rdev; + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused[2]; +}; + +#elif defined(__ARM_EABI__) +/* Syscalls for ARM in ARM or Thumb modes : + * - registers are 32-bit + * - stack is 8-byte aligned + * ( http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.faqs/ka4127.html) + * - syscall number is passed in r7 + * - arguments are in r0, r1, r2, r3, r4, r5 + * - the system call is performed by calling svc #0 + * - syscall return comes in r0. + * - only lr is clobbered. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * - the syscall number is always specified last in order to allow to force + * some registers before (gcc refuses a %-register at the last position). + * + * Also, ARM supports the old_select syscall if newselect is not available + */ +#define __ARCH_WANT_SYS_OLD_SELECT + +#define my_syscall0(num) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0"); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + register long _arg4 asm("r3") = (long)(arg4); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("r7") = (num); \ + register long _arg1 asm("r0") = (long)(arg1); \ + register long _arg2 asm("r1") = (long)(arg2); \ + register long _arg3 asm("r2") = (long)(arg3); \ + register long _arg4 asm("r3") = (long)(arg4); \ + register long _arg5 asm("r4") = (long)(arg5); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc", "lr" \ + ); \ + _arg1; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" +#if defined(__THUMBEB__) || defined(__THUMBEL__) + /* We enter here in 32-bit mode but if some previous functions were in + * 16-bit mode, the assembler cannot know, so we need to tell it we're in + * 32-bit now, then switch to 16-bit (is there a better way to do it than + * adding 1 by hand ?) and tell the asm we're now in 16-bit mode so that + * it generates correct instructions. Note that we do not support thumb1. + */ + ".code 32\n" + "add r0, pc, #1\n" + "bx r0\n" + ".code 16\n" +#endif + "pop {%r0}\n" // argc was in the stack + "mov %r1, %sp\n" // argv = sp + "add %r2, %r1, %r0, lsl #2\n" // envp = argv + 4*argc ... + "add %r2, %r2, $4\n" // ... + 4 + "and %r3, %r1, $-8\n" // AAPCS : sp must be 8-byte aligned in the + "mov %sp, %r3\n" // callee, an bl doesn't push (lr=pc) + "bl main\n" // main() returns the status code, we'll exit with it. + "and %r0, %r0, $0xff\n" // limit exit code to 8 bits + "movs r7, $1\n" // NR_exit == 1 + "svc $0x00\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x4000 + +/* The struct returned by the stat() syscall, 32-bit only, the syscall returns + * exactly 56 bytes (stops before the unused array). In big endian, the format + * differs as devices are returned as short only. + */ +struct sys_stat_struct { +#if defined(__ARMEB__) + unsigned short st_dev; + unsigned short __pad1; +#else + unsigned long st_dev; +#endif + unsigned long st_ino; + unsigned short st_mode; + unsigned short st_nlink; + unsigned short st_uid; + unsigned short st_gid; +#if defined(__ARMEB__) + unsigned short st_rdev; + unsigned short __pad2; +#else + unsigned long st_rdev; +#endif + unsigned long st_size; + unsigned long st_blksize; + unsigned long st_blocks; + unsigned long st_atime; + unsigned long st_atime_nsec; + unsigned long st_mtime; + unsigned long st_mtime_nsec; + unsigned long st_ctime; + unsigned long st_ctime_nsec; + unsigned long __unused[2]; +}; + +#elif defined(__aarch64__) +/* Syscalls for AARCH64 : + * - registers are 64-bit + * - stack is 16-byte aligned + * - syscall number is passed in x8 + * - arguments are in x0, x1, x2, x3, x4, x5 + * - the system call is performed by calling svc 0 + * - syscall return comes in x0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + * + * On aarch64, select() is not implemented so we have to use pselect6(). + */ +#define __ARCH_WANT_SYS_PSELECT6 + +#define my_syscall0(num) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0"); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r"(_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + register long _arg5 asm("x4") = (long)(arg5); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +#define my_syscall6(num, arg1, arg2, arg3, arg4, arg5, arg6) \ +({ \ + register long _num asm("x8") = (num); \ + register long _arg1 asm("x0") = (long)(arg1); \ + register long _arg2 asm("x1") = (long)(arg2); \ + register long _arg3 asm("x2") = (long)(arg3); \ + register long _arg4 asm("x3") = (long)(arg4); \ + register long _arg5 asm("x4") = (long)(arg5); \ + register long _arg6 asm("x5") = (long)(arg6); \ + \ + asm volatile ( \ + "svc #0\n" \ + : "=r" (_arg1) \ + : "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5), \ + "r"(_arg6), "r"(_num) \ + : "memory", "cc" \ + ); \ + _arg1; \ +}) + +/* startup code */ +asm(".section .text\n" + ".global _start\n" + "_start:\n" + "ldr x0, [sp]\n" // argc (x0) was in the stack + "add x1, sp, 8\n" // argv (x1) = sp + "lsl x2, x0, 3\n" // envp (x2) = 8*argc ... + "add x2, x2, 8\n" // + 8 (skip null) + "add x2, x2, x1\n" // + argv + "and sp, x1, -16\n" // sp must be 16-byte aligned in the callee + "bl main\n" // main() returns the status code, we'll exit with it. + "and x0, x0, 0xff\n" // limit exit code to 8 bits + "mov x8, 93\n" // NR_exit == 93 + "svc #0\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_CREAT 0x40 +#define O_EXCL 0x80 +#define O_NOCTTY 0x100 +#define O_TRUNC 0x200 +#define O_APPEND 0x400 +#define O_NONBLOCK 0x800 +#define O_DIRECTORY 0x4000 + +/* The struct returned by the newfstatat() syscall. Differs slightly from the + * x86_64's stat one by field ordering, so be careful. + */ +struct sys_stat_struct { + unsigned long st_dev; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + + unsigned long st_rdev; + unsigned long __pad1; + long st_size; + int st_blksize; + int __pad2; + + long st_blocks; + long st_atime; + unsigned long st_atime_nsec; + long st_mtime; + + unsigned long st_mtime_nsec; + long st_ctime; + unsigned long st_ctime_nsec; + unsigned int __unused[2]; +}; + +#elif defined(__mips__) && defined(_ABIO32) +/* Syscalls for MIPS ABI O32 : + * - WARNING! there's always a delayed slot! + * - WARNING again, the syntax is different, registers take a '$' and numbers + * do not. + * - registers are 32-bit + * - stack is 8-byte aligned + * - syscall number is passed in v0 (starts at 0xfa0). + * - arguments are in a0, a1, a2, a3, then the stack. The caller needs to + * leave some room in the stack for the callee to save a0..a3 if needed. + * - Many registers are clobbered, in fact only a0..a2 and s0..s8 are + * preserved. See: https://www.linux-mips.org/wiki/Syscall as well as + * scall32-o32.S in the kernel sources. + * - the system call is performed by calling "syscall" + * - syscall return comes in v0, and register a3 needs to be checked to know + * if an error occured, in which case errno is in v0. + * - the arguments are cast to long and assigned into the target registers + * which are then simply passed as registers to the asm code, so that we + * don't have to experience issues with register constraints. + */ + +#define my_syscall0(num) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "r"(_num) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall1(num, arg1) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall2(num, arg1, arg2) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall3(num, arg1, arg2, arg3) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3"); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r"(_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall4(num, arg1, arg2, arg3, arg4) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3") = (long)(arg4); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "syscall\n" \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +#define my_syscall5(num, arg1, arg2, arg3, arg4, arg5) \ +({ \ + register long _num asm("v0") = (num); \ + register long _arg1 asm("a0") = (long)(arg1); \ + register long _arg2 asm("a1") = (long)(arg2); \ + register long _arg3 asm("a2") = (long)(arg3); \ + register long _arg4 asm("a3") = (long)(arg4); \ + register long _arg5 = (long)(arg5); \ + \ + asm volatile ( \ + "addiu $sp, $sp, -32\n" \ + "sw %7, 16($sp)\n" \ + "syscall\n " \ + "addiu $sp, $sp, 32\n" \ + : "=r" (_num), "=r"(_arg4) \ + : "0"(_num), \ + "r"(_arg1), "r"(_arg2), "r"(_arg3), "r"(_arg4), "r"(_arg5) \ + : "memory", "cc", "at", "v1", "hi", "lo", \ + \ + ); \ + _arg4 ? -_num : _num; \ +}) + +/* startup code, note that it's called __start on MIPS */ +asm(".section .text\n" + ".set nomips16\n" + ".global __start\n" + ".set noreorder\n" + ".option pic0\n" + ".ent __start\n" + "__start:\n" + "lw $a0,($sp)\n" // argc was in the stack + "addiu $a1, $sp, 4\n" // argv = sp + 4 + "sll $a2, $a0, 2\n" // a2 = argc * 4 + "add $a2, $a2, $a1\n" // envp = argv + 4*argc ... + "addiu $a2, $a2, 4\n" // ... + 4 + "li $t0, -8\n" + "and $sp, $sp, $t0\n" // sp must be 8-byte aligned + "addiu $sp,$sp,-16\n" // the callee expects to save a0..a3 there! + "jal main\n" // main() returns the status code, we'll exit with it. + "nop\n" // delayed slot + "and $a0, $v0, 0xff\n" // limit exit code to 8 bits + "li $v0, 4001\n" // NR_exit == 4001 + "syscall\n" + ".end __start\n" + ""); + +/* fcntl / open */ +#define O_RDONLY 0 +#define O_WRONLY 1 +#define O_RDWR 2 +#define O_APPEND 0x0008 +#define O_NONBLOCK 0x0080 +#define O_CREAT 0x0100 +#define O_TRUNC 0x0200 +#define O_EXCL 0x0400 +#define O_NOCTTY 0x0800 +#define O_DIRECTORY 0x10000 + +/* The struct returned by the stat() syscall. 88 bytes are returned by the + * syscall. + */ +struct sys_stat_struct { + unsigned int st_dev; + long st_pad1[3]; + unsigned long st_ino; + unsigned int st_mode; + unsigned int st_nlink; + unsigned int st_uid; + unsigned int st_gid; + unsigned int st_rdev; + long st_pad2[2]; + long st_size; + long st_pad3; + long st_atime; + long st_atime_nsec; + long st_mtime; + long st_mtime_nsec; + long st_ctime; + long st_ctime_nsec; + long st_blksize; + long st_blocks; + long st_pad4[14]; +}; + +#endif + + +/* Below are the C functions used to declare the raw syscalls. They try to be + * architecture-agnostic, and return either a success or -errno. Declaring them + * static will lead to them being inlined in most cases, but it's still possible + * to reference them by a pointer if needed. + */ +static __attribute__((unused)) +void *sys_brk(void *addr) +{ + return (void *)my_syscall1(__NR_brk, addr); +} + +static __attribute__((noreturn,unused)) +void sys_exit(int status) +{ + my_syscall1(__NR_exit, status & 255); + while(1); // shut the "noreturn" warnings. +} + +static __attribute__((unused)) +int sys_chdir(const char *path) +{ + return my_syscall1(__NR_chdir, path); +} + +static __attribute__((unused)) +int sys_chmod(const char *path, mode_t mode) +{ +#ifdef __NR_fchmodat + return my_syscall4(__NR_fchmodat, AT_FDCWD, path, mode, 0); +#else + return my_syscall2(__NR_chmod, path, mode); +#endif +} + +static __attribute__((unused)) +int sys_chown(const char *path, uid_t owner, gid_t group) +{ +#ifdef __NR_fchownat + return my_syscall5(__NR_fchownat, AT_FDCWD, path, owner, group, 0); +#else + return my_syscall3(__NR_chown, path, owner, group); +#endif +} + +static __attribute__((unused)) +int sys_chroot(const char *path) +{ + return my_syscall1(__NR_chroot, path); +} + +static __attribute__((unused)) +int sys_close(int fd) +{ + return my_syscall1(__NR_close, fd); +} + +static __attribute__((unused)) +int sys_dup(int fd) +{ + return my_syscall1(__NR_dup, fd); +} + +static __attribute__((unused)) +int sys_dup2(int old, int new) +{ + return my_syscall2(__NR_dup2, old, new); +} + +static __attribute__((unused)) +int sys_execve(const char *filename, char *const argv[], char *const envp[]) +{ + return my_syscall3(__NR_execve, filename, argv, envp); +} + +static __attribute__((unused)) +pid_t sys_fork(void) +{ + return my_syscall0(__NR_fork); +} + +static __attribute__((unused)) +int sys_fsync(int fd) +{ + return my_syscall1(__NR_fsync, fd); +} + +static __attribute__((unused)) +int sys_getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + return my_syscall3(__NR_getdents64, fd, dirp, count); +} + +static __attribute__((unused)) +pid_t sys_getpgrp(void) +{ + return my_syscall0(__NR_getpgrp); +} + +static __attribute__((unused)) +pid_t sys_getpid(void) +{ + return my_syscall0(__NR_getpid); +} + +static __attribute__((unused)) +int sys_gettimeofday(struct timeval *tv, struct timezone *tz) +{ + return my_syscall2(__NR_gettimeofday, tv, tz); +} + +static __attribute__((unused)) +int sys_ioctl(int fd, unsigned long req, void *value) +{ + return my_syscall3(__NR_ioctl, fd, req, value); +} + +static __attribute__((unused)) +int sys_kill(pid_t pid, int signal) +{ + return my_syscall2(__NR_kill, pid, signal); +} + +static __attribute__((unused)) +int sys_link(const char *old, const char *new) +{ +#ifdef __NR_linkat + return my_syscall5(__NR_linkat, AT_FDCWD, old, AT_FDCWD, new, 0); +#else + return my_syscall2(__NR_link, old, new); +#endif +} + +static __attribute__((unused)) +off_t sys_lseek(int fd, off_t offset, int whence) +{ + return my_syscall3(__NR_lseek, fd, offset, whence); +} + +static __attribute__((unused)) +int sys_mkdir(const char *path, mode_t mode) +{ +#ifdef __NR_mkdirat + return my_syscall3(__NR_mkdirat, AT_FDCWD, path, mode); +#else + return my_syscall2(__NR_mkdir, path, mode); +#endif +} + +static __attribute__((unused)) +long sys_mknod(const char *path, mode_t mode, dev_t dev) +{ +#ifdef __NR_mknodat + return my_syscall4(__NR_mknodat, AT_FDCWD, path, mode, dev); +#else + return my_syscall3(__NR_mknod, path, mode, dev); +#endif +} + +static __attribute__((unused)) +int sys_mount(const char *src, const char *tgt, const char *fst, + unsigned long flags, const void *data) +{ + return my_syscall5(__NR_mount, src, tgt, fst, flags, data); +} + +static __attribute__((unused)) +int sys_open(const char *path, int flags, mode_t mode) +{ +#ifdef __NR_openat + return my_syscall4(__NR_openat, AT_FDCWD, path, flags, mode); +#else + return my_syscall3(__NR_open, path, flags, mode); +#endif +} + +static __attribute__((unused)) +int sys_pivot_root(const char *new, const char *old) +{ + return my_syscall2(__NR_pivot_root, new, old); +} + +static __attribute__((unused)) +int sys_poll(struct pollfd *fds, int nfds, int timeout) +{ + return my_syscall3(__NR_poll, fds, nfds, timeout); +} + +static __attribute__((unused)) +ssize_t sys_read(int fd, void *buf, size_t count) +{ + return my_syscall3(__NR_read, fd, buf, count); +} + +static __attribute__((unused)) +ssize_t sys_reboot(int magic1, int magic2, int cmd, void *arg) +{ + return my_syscall4(__NR_reboot, magic1, magic2, cmd, arg); +} + +static __attribute__((unused)) +int sys_sched_yield(void) +{ + return my_syscall0(__NR_sched_yield); +} + +static __attribute__((unused)) +int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ +#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) + struct sel_arg_struct { + unsigned long n; + fd_set *r, *w, *e; + struct timeval *t; + } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; + return my_syscall1(__NR_select, &arg); +#elif defined(__ARCH_WANT_SYS_PSELECT6) && defined(__NR_pselect6) + struct timespec t; + + if (timeout) { + t.tv_sec = timeout->tv_sec; + t.tv_nsec = timeout->tv_usec * 1000; + } + return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); +#else +#ifndef __NR__newselect +#define __NR__newselect __NR_select +#endif + return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); +#endif +} + +static __attribute__((unused)) +int sys_setpgid(pid_t pid, pid_t pgid) +{ + return my_syscall2(__NR_setpgid, pid, pgid); +} + +static __attribute__((unused)) +pid_t sys_setsid(void) +{ + return my_syscall0(__NR_setsid); +} + +static __attribute__((unused)) +int sys_stat(const char *path, struct stat *buf) +{ + struct sys_stat_struct stat; + long ret; + +#ifdef __NR_newfstatat + /* only solution for arm64 */ + ret = my_syscall4(__NR_newfstatat, AT_FDCWD, path, &stat, 0); +#else + ret = my_syscall2(__NR_stat, path, &stat); +#endif + buf->st_dev = stat.st_dev; + buf->st_ino = stat.st_ino; + buf->st_mode = stat.st_mode; + buf->st_nlink = stat.st_nlink; + buf->st_uid = stat.st_uid; + buf->st_gid = stat.st_gid; + buf->st_rdev = stat.st_rdev; + buf->st_size = stat.st_size; + buf->st_blksize = stat.st_blksize; + buf->st_blocks = stat.st_blocks; + buf->st_atime = stat.st_atime; + buf->st_mtime = stat.st_mtime; + buf->st_ctime = stat.st_ctime; + return ret; +} + + +static __attribute__((unused)) +int sys_symlink(const char *old, const char *new) +{ +#ifdef __NR_symlinkat + return my_syscall3(__NR_symlinkat, old, AT_FDCWD, new); +#else + return my_syscall2(__NR_symlink, old, new); +#endif +} + +static __attribute__((unused)) +mode_t sys_umask(mode_t mode) +{ + return my_syscall1(__NR_umask, mode); +} + +static __attribute__((unused)) +int sys_umount2(const char *path, int flags) +{ + return my_syscall2(__NR_umount2, path, flags); +} + +static __attribute__((unused)) +int sys_unlink(const char *path) +{ +#ifdef __NR_unlinkat + return my_syscall3(__NR_unlinkat, AT_FDCWD, path, 0); +#else + return my_syscall1(__NR_unlink, path); +#endif +} + +static __attribute__((unused)) +pid_t sys_wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + return my_syscall4(__NR_wait4, pid, status, options, rusage); +} + +static __attribute__((unused)) +pid_t sys_waitpid(pid_t pid, int *status, int options) +{ + return sys_wait4(pid, status, options, 0); +} + +static __attribute__((unused)) +pid_t sys_wait(int *status) +{ + return sys_waitpid(-1, status, 0); +} + +static __attribute__((unused)) +ssize_t sys_write(int fd, const void *buf, size_t count) +{ + return my_syscall3(__NR_write, fd, buf, count); +} + + +/* Below are the libc-compatible syscalls which return x or -1 and set errno. + * They rely on the functions above. Similarly they're marked static so that it + * is possible to assign pointers to them if needed. + */ + +static __attribute__((unused)) +int brk(void *addr) +{ + void *ret = sys_brk(addr); + + if (!ret) { + SET_ERRNO(ENOMEM); + return -1; + } + return 0; +} + +static __attribute__((noreturn,unused)) +void exit(int status) +{ + sys_exit(status); +} + +static __attribute__((unused)) +int chdir(const char *path) +{ + int ret = sys_chdir(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chmod(const char *path, mode_t mode) +{ + int ret = sys_chmod(path, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chown(const char *path, uid_t owner, gid_t group) +{ + int ret = sys_chown(path, owner, group); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int chroot(const char *path) +{ + int ret = sys_chroot(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int close(int fd) +{ + int ret = sys_close(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int dup2(int old, int new) +{ + int ret = sys_dup2(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int execve(const char *filename, char *const argv[], char *const envp[]) +{ + int ret = sys_execve(filename, argv, envp); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t fork(void) +{ + pid_t ret = sys_fork(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int fsync(int fd) +{ + int ret = sys_fsync(fd); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int getdents64(int fd, struct linux_dirent64 *dirp, int count) +{ + int ret = sys_getdents64(fd, dirp, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t getpgrp(void) +{ + pid_t ret = sys_getpgrp(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t getpid(void) +{ + pid_t ret = sys_getpid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int gettimeofday(struct timeval *tv, struct timezone *tz) +{ + int ret = sys_gettimeofday(tv, tz); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int ioctl(int fd, unsigned long req, void *value) +{ + int ret = sys_ioctl(fd, req, value); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int kill(pid_t pid, int signal) +{ + int ret = sys_kill(pid, signal); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int link(const char *old, const char *new) +{ + int ret = sys_link(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +off_t lseek(int fd, off_t offset, int whence) +{ + off_t ret = sys_lseek(fd, offset, whence); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mkdir(const char *path, mode_t mode) +{ + int ret = sys_mkdir(path, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mknod(const char *path, mode_t mode, dev_t dev) +{ + int ret = sys_mknod(path, mode, dev); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int mount(const char *src, const char *tgt, + const char *fst, unsigned long flags, + const void *data) +{ + int ret = sys_mount(src, tgt, fst, flags, data); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int open(const char *path, int flags, mode_t mode) +{ + int ret = sys_open(path, flags, mode); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int pivot_root(const char *new, const char *old) +{ + int ret = sys_pivot_root(new, old); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int poll(struct pollfd *fds, int nfds, int timeout) +{ + int ret = sys_poll(fds, nfds, timeout); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +ssize_t read(int fd, void *buf, size_t count) +{ + ssize_t ret = sys_read(fd, buf, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int reboot(int cmd) +{ + int ret = sys_reboot(LINUX_REBOOT_MAGIC1, LINUX_REBOOT_MAGIC2, cmd, 0); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +void *sbrk(intptr_t inc) +{ + void *ret; + + /* first call to find current end */ + if ((ret = sys_brk(0)) && (sys_brk(ret + inc) == ret + inc)) + return ret + inc; + + SET_ERRNO(ENOMEM); + return (void *)-1; +} + +static __attribute__((unused)) +int sched_yield(void) +{ + int ret = sys_sched_yield(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) +{ + int ret = sys_select(nfds, rfds, wfds, efds, timeout); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int setpgid(pid_t pid, pid_t pgid) +{ + int ret = sys_setpgid(pid, pgid); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t setsid(void) +{ + pid_t ret = sys_setsid(); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +unsigned int sleep(unsigned int seconds) +{ + struct timeval my_timeval = { seconds, 0 }; + + if (sys_select(0, 0, 0, 0, &my_timeval) < 0) + return my_timeval.tv_sec + !!my_timeval.tv_usec; + else + return 0; +} + +static __attribute__((unused)) +int stat(const char *path, struct stat *buf) +{ + int ret = sys_stat(path, buf); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int symlink(const char *old, const char *new) +{ + int ret = sys_symlink(old, new); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int tcsetpgrp(int fd, pid_t pid) +{ + return ioctl(fd, TIOCSPGRP, &pid); +} + +static __attribute__((unused)) +mode_t umask(mode_t mode) +{ + return sys_umask(mode); +} + +static __attribute__((unused)) +int umount2(const char *path, int flags) +{ + int ret = sys_umount2(path, flags); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +int unlink(const char *path) +{ + int ret = sys_unlink(path); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t wait4(pid_t pid, int *status, int options, struct rusage *rusage) +{ + pid_t ret = sys_wait4(pid, status, options, rusage); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t waitpid(pid_t pid, int *status, int options) +{ + pid_t ret = sys_waitpid(pid, status, options); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +pid_t wait(int *status) +{ + pid_t ret = sys_wait(status); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +static __attribute__((unused)) +ssize_t write(int fd, const void *buf, size_t count) +{ + ssize_t ret = sys_write(fd, buf, count); + + if (ret < 0) { + SET_ERRNO(-ret); + ret = -1; + } + return ret; +} + +/* some size-optimized reimplementations of a few common str* and mem* + * functions. They're marked static, except memcpy() and raise() which are used + * by libgcc on ARM, so they are marked weak instead in order not to cause an + * error when building a program made of multiple files (not recommended). + */ + +static __attribute__((unused)) +void *memmove(void *dst, const void *src, size_t len) +{ + ssize_t pos = (dst <= src) ? -1 : (long)len; + void *ret = dst; + + while (len--) { + pos += (dst <= src) ? 1 : -1; + ((char *)dst)[pos] = ((char *)src)[pos]; + } + return ret; +} + +static __attribute__((unused)) +void *memset(void *dst, int b, size_t len) +{ + char *p = dst; + + while (len--) + *(p++) = b; + return dst; +} + +static __attribute__((unused)) +int memcmp(const void *s1, const void *s2, size_t n) +{ + size_t ofs = 0; + char c1 = 0; + + while (ofs < n && !(c1 = ((char *)s1)[ofs] - ((char *)s2)[ofs])) { + ofs++; + } + return c1; +} + +static __attribute__((unused)) +char *strcpy(char *dst, const char *src) +{ + char *ret = dst; + + while ((*dst++ = *src++)); + return ret; +} + +static __attribute__((unused)) +char *strchr(const char *s, int c) +{ + while (*s) { + if (*s == (char)c) + return (char *)s; + s++; + } + return NULL; +} + +static __attribute__((unused)) +char *strrchr(const char *s, int c) +{ + const char *ret = NULL; + + while (*s) { + if (*s == (char)c) + ret = s; + s++; + } + return (char *)ret; +} + +static __attribute__((unused)) +size_t nolibc_strlen(const char *str) +{ + size_t len; + + for (len = 0; str[len]; len++); + return len; +} + +#define strlen(str) ({ \ + __builtin_constant_p((str)) ? \ + __builtin_strlen((str)) : \ + nolibc_strlen((str)); \ +}) + +static __attribute__((unused)) +int isdigit(int c) +{ + return (unsigned int)(c - '0') <= 9; +} + +static __attribute__((unused)) +long atol(const char *s) +{ + unsigned long ret = 0; + unsigned long d; + int neg = 0; + + if (*s == '-') { + neg = 1; + s++; + } + + while (1) { + d = (*s++) - '0'; + if (d > 9) + break; + ret *= 10; + ret += d; + } + + return neg ? -ret : ret; +} + +static __attribute__((unused)) +int atoi(const char *s) +{ + return atol(s); +} + +static __attribute__((unused)) +const char *ltoa(long in) +{ + /* large enough for -9223372036854775808 */ + static char buffer[21]; + char *pos = buffer + sizeof(buffer) - 1; + int neg = in < 0; + unsigned long n = neg ? -in : in; + + *pos-- = '\0'; + do { + *pos-- = '0' + n % 10; + n /= 10; + if (pos < buffer) + return pos + 1; + } while (n); + + if (neg) + *pos-- = '-'; + return pos + 1; +} + +__attribute__((weak,unused)) +void *memcpy(void *dst, const void *src, size_t len) +{ + return memmove(dst, src, len); +} + +/* needed by libgcc for divide by zero */ +__attribute__((weak,unused)) +int raise(int signal) +{ + return kill(getpid(), signal); +} + +/* Here come a few helper functions */ + +static __attribute__((unused)) +void FD_ZERO(fd_set *set) +{ + memset(set, 0, sizeof(*set)); +} + +static __attribute__((unused)) +void FD_SET(int fd, fd_set *set) +{ + if (fd < 0 || fd >= FD_SETSIZE) + return; + set->fd32[fd / 32] |= 1 << (fd & 31); +} + +/* WARNING, it only deals with the 4096 first majors and 256 first minors */ +static __attribute__((unused)) +dev_t makedev(unsigned int major, unsigned int minor) +{ + return ((major & 0xfff) << 8) | (minor & 0xff); +} diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt index 833f826d6ec2..933b4fd12327 100644 --- a/tools/testing/selftests/rcutorture/doc/initrd.txt +++ b/tools/testing/selftests/rcutorture/doc/initrd.txt @@ -1,9 +1,12 @@ -This document describes one way to create the initrd directory hierarchy -in order to allow an initrd to be built into your kernel. The trick -here is to steal the initrd file used on your Linux laptop, Ubuntu in -this case. There are probably much better ways of doing this. +The rcutorture scripting tools automatically create the needed initrd +directory using dracut. Failing that, this tool will create an initrd +containing a single statically linked binary named "init" that loops +over a very long sleep() call. In both cases, this creation is done +by tools/testing/selftests/rcutorture/bin/mkinitrd.sh. -That said, here are the commands: +However, if you are attempting to run rcutorture on a system that does +not have dracut installed, and if you don't like the notion of static +linking, you might wish to press an existing initrd into service: ------------------------------------------------------------------------ cd tools/testing/selftests/rcutorture @@ -11,22 +14,7 @@ zcat /initrd.img > /tmp/initrd.img.zcat mkdir initrd cd initrd cpio -id < /tmp/initrd.img.zcat ------------------------------------------------------------------------- - -Another way to create an initramfs image is using "dracut"[1], which is -available on many distros, however the initramfs dracut generates is a cpio -archive with another cpio archive in it, so an extra step is needed to create -the initrd directory hierarchy. - -Here are the commands to create a initrd directory for rcutorture using -dracut: - ------------------------------------------------------------------------- -dracut --no-hostonly --no-hostonly-cmdline --module "base bash shutdown" /tmp/initramfs.img -cd tools/testing/selftests/rcutorture -mkdir initrd -cd initrd -/usr/lib/dracut/skipcpio /tmp/initramfs.img | zcat | cpio -id < /tmp/initramfs.img +# Manually verify that initrd contains needed binaries and libraries. ------------------------------------------------------------------------ Interestingly enough, if you are running rcutorture, you don't really @@ -39,75 +27,12 @@ with 0755 mode. ------------------------------------------------------------------------ #!/bin/sh -[ -d /dev ] || mkdir -m 0755 /dev -[ -d /root ] || mkdir -m 0700 /root -[ -d /sys ] || mkdir /sys -[ -d /proc ] || mkdir /proc -[ -d /tmp ] || mkdir /tmp -mkdir -p /var/lock -mount -t sysfs -o nodev,noexec,nosuid sysfs /sys -mount -t proc -o nodev,noexec,nosuid proc /proc -# Some things don't work properly without /etc/mtab. -ln -sf /proc/mounts /etc/mtab - -# Note that this only becomes /dev on the real filesystem if udev's scripts -# are used; which they will be, but it's worth pointing out -if ! mount -t devtmpfs -o mode=0755 udev /dev; then - echo "W: devtmpfs not available, falling back to tmpfs for /dev" - mount -t tmpfs -o mode=0755 udev /dev - [ -e /dev/console ] || mknod --mode=600 /dev/console c 5 1 - [ -e /dev/kmsg ] || mknod --mode=644 /dev/kmsg c 1 11 - [ -e /dev/null ] || mknod --mode=666 /dev/null c 1 3 -fi - -mkdir /dev/pts -mount -t devpts -o noexec,nosuid,gid=5,mode=0620 devpts /dev/pts || true -mount -t tmpfs -o "nosuid,size=20%,mode=0755" tmpfs /run -mkdir /run/initramfs -# compatibility symlink for the pre-oneiric locations -ln -s /run/initramfs /dev/.initramfs - -# Export relevant variables -export ROOT= -export ROOTDELAY= -export ROOTFLAGS= -export ROOTFSTYPE= -export IP= -export BOOT= -export BOOTIF= -export UBIMTD= -export break= -export init=/sbin/init -export quiet=n -export readonly=y -export rootmnt=/root -export debug= -export panic= -export blacklist= -export resume= -export resume_offset= -export recovery= - -for i in /sys/devices/system/cpu/cpu*/online -do - case $i in - '/sys/devices/system/cpu/cpu0/online') - ;; - '/sys/devices/system/cpu/cpu*/online') - ;; - *) - echo 1 > $i - ;; - esac -done - while : do sleep 10 done ------------------------------------------------------------------------ -References: -[1]: https://dracut.wiki.kernel.org/index.php/Main_Page -[2]: http://blog.elastocloud.org/2015/06/rapid-linux-kernel-devtest-with-qemu.html -[3]: https://www.centos.org/forums/viewtopic.php?t=51621 +This approach also allows most of the binaries and libraries in the +initrd filesystem to be dispensed with, which can save significant +space in rcutorture's "res" directory. diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h index 891ad13e95b2..d27285f8ee82 100644 --- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h +++ b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h @@ -131,8 +131,8 @@ struct hlist_node { * weird ABI and we need to ask it explicitly. * * The alignment is required to guarantee that bits 0 and 1 of @next will be - * clear under normal conditions -- as long as we use call_rcu(), - * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. + * clear under normal conditions -- as long as we use call_rcu() or + * call_srcu() to queue callback. * * This guarantee is important for few reasons: * - future call_rcu_lazy() will make use of lower bits in the pointer; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index a6b135491b6c..870b1185173b 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -196,7 +196,7 @@ void vgic_irq_set_phys_active(struct vgic_irq *irq, bool active) */ static struct kvm_vcpu *vgic_target_oracle(struct vgic_irq *irq) { - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); + lockdep_assert_held(&irq->irq_lock); /* If the interrupt is active, it must stay on the current vcpu */ if (irq->active) @@ -273,7 +273,7 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) { struct vgic_cpu *vgic_cpu = &vcpu->arch.vgic_cpu; - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); + lockdep_assert_held(&vgic_cpu->ap_list_lock); list_sort(NULL, &vgic_cpu->ap_list_head, vgic_irq_cmp); } @@ -311,7 +311,7 @@ bool vgic_queue_irq_unlock(struct kvm *kvm, struct vgic_irq *irq, { struct kvm_vcpu *vcpu; - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); + lockdep_assert_held(&irq->irq_lock); retry: vcpu = vgic_target_oracle(irq); @@ -702,7 +702,7 @@ static inline void vgic_fold_lr_state(struct kvm_vcpu *vcpu) static inline void vgic_populate_lr(struct kvm_vcpu *vcpu, struct vgic_irq *irq, int lr) { - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&irq->irq_lock)); + lockdep_assert_held(&irq->irq_lock); if (kvm_vgic_global_state.type == VGIC_V2) vgic_v2_populate_lr(vcpu, irq, lr); @@ -736,7 +736,7 @@ static int compute_ap_list_depth(struct kvm_vcpu *vcpu, *multi_sgi = false; - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); + lockdep_assert_held(&vgic_cpu->ap_list_lock); list_for_each_entry(irq, &vgic_cpu->ap_list_head, ap_list) { int w; @@ -761,7 +761,7 @@ static void vgic_flush_lr_state(struct kvm_vcpu *vcpu) bool multi_sgi; u8 prio = 0xff; - DEBUG_SPINLOCK_BUG_ON(!spin_is_locked(&vgic_cpu->ap_list_lock)); + lockdep_assert_held(&vgic_cpu->ap_list_lock); count = compute_ap_list_depth(vcpu, &multi_sgi); if (count > kvm_vgic_global_state.nr_lr || multi_sgi)