1
0
Fork 0

s390 updates for the 5.3 merge window

- Improve stop_machine wait logic: replace cpu_relax_yield call in generic
    stop_machine function with a weak stop_machine_yield function. This is
    overridden on s390, which yields the current cpu to the neighbouring cpu
    after a couple of retries, instead of blindly giving up the cpu to the
    hipervisor. This significantly improves stop_machine performance on s390 in
    overcommitted scenarios.
    This includes common code changes which have been Acked by Peter Zijlstra
    and Thomas Gleixner.
 
  - Improve jump label transformation speed: transform jump labels without
    using stop_machine.
 
  - Refactoring of the vfio-ccw cp handling, simplifying the code and
    avoiding unneeded allocating/copying.
 
  - Various vfio-ccw fixes (ccw translation, state machine).
 
  - Add support for vfio-ap queue interrupt control in the guest.
    This includes s390 kvm changes which have been Acked by Christian
    Borntraeger.
 
  - Add protected virtualization support for virtio-ccw.
 
  - Enforce both CONFIG_SMP and CONFIG_HOTPLUG_CPU, which allows to remove some
    code which most likely isn't working at all, besides that s390 didn't even
    compile for !CONFIG_SMP.
 
  - Support for special flagged EP11 CPRBs for zcrypt.
 
  - Handle PCI devices with no support for new MIO instructions.
 
  - Avoid KASAN false positives in reworked stack unwinder.
 
  - Couple of fixes for the QDIO layer.
 
  - Convert s390 specific documentation to ReST format.
 
  - Let s390 crypto modules return -ENODEV instead of -EOPNOTSUPP if hardware is
    missing. This way our modules behave like most other modules and which is
    also what systemd's systemd-modules-load.service expects.
 
  - Replace defconfig with performance_defconfig, so there is one config file
    less to maintain.
 
  - Remove the SCLP call home device driver, which was never useful.
 
  - Cleanups all over the place.
 -----BEGIN PGP SIGNATURE-----
 
 iQEzBAABCAAdFiEE3QHqV+H2a8xAv27vjYWKoQLXFBgFAl0iEpcACgkQjYWKoQLX
 FBgtZwf8DOJ6COUG91jKP0RSDlc2YvIMBxopQ38ql1lIsTj5t6DvJ2z3X5uct1wy
 6mMiF01VuyD4V4UXbTJQrihzNx7D4dUh47s2sS+diGHxJyXacVxlmjS5k+6pLIUO
 AyLvtCcoqDPPiThqnSTZFRm/TcfO/25fCG/IdjrFGj1MD09wHpUCh16tmRPTGFlC
 BWZeilDT77fVXnh7Ggn3JB0mQay5PAw2ODOxELHTUBaLmYF8RJPPVKBPmXGl9P1W
 84ESm2p+iALGGWDiTOUad9eu8wyQci/V/R+hFgs0Bz/HRcjznNH5EVvfQNCD4VNF
 g/PET10nIQYZv2BNdi0cwRjR9jCFbw==
 =jp0i
 -----END PGP SIGNATURE-----

Merge tag 's390-5.3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux

Pull s390 updates from Vasily Gorbik:

 - Improve stop_machine wait logic: replace cpu_relax_yield call in
   generic stop_machine function with a weak stop_machine_yield
   function. This is overridden on s390, which yields the current cpu to
   the neighbouring cpu after a couple of retries, instead of blindly
   giving up the cpu to the hipervisor. This significantly improves
   stop_machine performance on s390 in overcommitted scenarios.

   This includes common code changes which have been Acked by Peter
   Zijlstra and Thomas Gleixner.

 - Improve jump label transformation speed: transform jump labels
   without using stop_machine.

 - Refactoring of the vfio-ccw cp handling, simplifying the code and
   avoiding unneeded allocating/copying.

 - Various vfio-ccw fixes (ccw translation, state machine).

 - Add support for vfio-ap queue interrupt control in the guest. This
   includes s390 kvm changes which have been Acked by Christian
   Borntraeger.

 - Add protected virtualization support for virtio-ccw.

 - Enforce both CONFIG_SMP and CONFIG_HOTPLUG_CPU, which allows to
   remove some code which most likely isn't working at all, besides that
   s390 didn't even compile for !CONFIG_SMP.

 - Support for special flagged EP11 CPRBs for zcrypt.

 - Handle PCI devices with no support for new MIO instructions.

 - Avoid KASAN false positives in reworked stack unwinder.

 - Couple of fixes for the QDIO layer.

 - Convert s390 specific documentation to ReST format.

 - Let s390 crypto modules return -ENODEV instead of -EOPNOTSUPP if
   hardware is missing. This way our modules behave like most other
   modules and which is also what systemd's systemd-modules-load.service
   expects.

 - Replace defconfig with performance_defconfig, so there is one config
   file less to maintain.

 - Remove the SCLP call home device driver, which was never useful.

 - Cleanups all over the place.

* tag 's390-5.3-1' of git://git.kernel.org/pub/scm/linux/kernel/git/s390/linux: (83 commits)
  docs: s390: s390dbf: typos and formatting, update crash command
  docs: s390: unify and update s390dbf kdocs at debug.c
  docs: s390: restore important non-kdoc parts of s390dbf.rst
  vfio-ccw: Fix the conversion of Format-0 CCWs to Format-1
  s390/pci: correctly handle MIO opt-out
  s390/pci: deal with devices that have no support for MIO instructions
  s390: ap: kvm: Enable PQAP/AQIC facility for the guest
  s390: ap: implement PAPQ AQIC interception in kernel
  vfio: ap: register IOMMU VFIO notifier
  s390: ap: kvm: add PQAP interception for AQIC
  s390/unwind: cleanup unused READ_ONCE_TASK_STACK
  s390/kasan: avoid false positives during stack unwind
  s390/qdio: don't touch the dsci in tiqdio_add_input_queues()
  s390/qdio: (re-)initialize tiqdio list entries
  s390/dasd: Fix a precision vs width bug in dasd_feature_list()
  s390/cio: introduce driver_override on the css bus
  vfio-ccw: make convert_ccw0_to_ccw1 static
  vfio-ccw: Remove copy_ccw_from_iova()
  vfio-ccw: Factor out the ccw0-to-ccw1 transition
  vfio-ccw: Copy CCW data outside length calculation
  ...
alistair/sunxi64-5.4-dsi
Linus Torvalds 2019-07-08 10:06:12 -07:00
commit 1758feddb0
109 changed files with 6300 additions and 5232 deletions

View File

@ -33,3 +33,26 @@ Description: Contains the PIM/PAM/POM values, as reported by the
in sync with the values current in the channel subsystem).
Note: This is an I/O-subchannel specific attribute.
Users: s390-tools, HAL
What: /sys/bus/css/devices/.../driver_override
Date: June 2019
Contact: Cornelia Huck <cohuck@redhat.com>
linux-s390@vger.kernel.org
Description: This file allows the driver for a device to be specified. When
specified, only a driver with a name matching the value written
to driver_override will have an opportunity to bind to the
device. The override is specified by writing a string to the
driver_override file (echo vfio-ccw > driver_override) and
may be cleared with an empty string (echo > driver_override).
This returns the device to standard matching rules binding.
Writing to driver_override does not automatically unbind the
device from its current driver or make any attempt to
automatically load the specified driver. If no driver with a
matching name is currently loaded in the kernel, the device
will not bind to any driver. This also allows devices to
opt-out of driver binding using a driver_override name such as
"none". Only a single driver may be specified in the override,
there is no support for parsing delimiters.
Note that unlike the mechanism of the same name for pci, this
file does not allow to override basic matching rules. I.e.,
the driver must still match the subchannel type of the device.

View File

@ -478,7 +478,7 @@
others).
ccw_timeout_log [S390]
See Documentation/s390/CommonIO for details.
See Documentation/s390/common_io.rst for details.
cgroup_disable= [KNL] Disable a particular controller
Format: {name of the controller(s) to disable}
@ -516,7 +516,7 @@
/selinux/checkreqprot.
cio_ignore= [S390]
See Documentation/s390/CommonIO for details.
See Documentation/s390/common_io.rst for details.
clk_ignore_unused
[CLK]
Prevents the clock framework from automatically gating

View File

@ -27,7 +27,7 @@ not strictly considered I/O devices. They are considered here as well,
although they are not the focus of this document.
Some additional information can also be found in the kernel source under
Documentation/s390/driver-model.txt.
Documentation/s390/driver-model.rst.
The css bus
===========
@ -38,7 +38,7 @@ into several categories:
* Standard I/O subchannels, for use by the system. They have a child
device on the ccw bus and are described below.
* I/O subchannels bound to the vfio-ccw driver. See
Documentation/s390/vfio-ccw.txt.
Documentation/s390/vfio-ccw.rst.
* Message subchannels. No Linux driver currently exists.
* CHSC subchannels (at most one). The chsc subchannel driver can be used
to send asynchronous chsc commands.

View File

@ -1,13 +1,17 @@
===============================
IBM 3270 Display System support
===============================
This file describes the driver that supports local channel attachment
of IBM 3270 devices. It consists of three sections:
* Introduction
* Installation
* Operation
INTRODUCTION.
Introduction
============
This paper describes installing and operating 3270 devices under
Linux/390. A 3270 device is a block-mode rows-and-columns terminal of
@ -17,12 +21,12 @@ twenty and thirty years ago.
You may have 3270s in-house and not know it. If you're using the
VM-ESA operating system, define a 3270 to your virtual machine by using
the command "DEF GRAF <hex-address>" This paper presumes you will be
defining four 3270s with the CP/CMS commands
defining four 3270s with the CP/CMS commands:
DEF GRAF 620
DEF GRAF 621
DEF GRAF 622
DEF GRAF 623
- DEF GRAF 620
- DEF GRAF 621
- DEF GRAF 622
- DEF GRAF 623
Your network connection from VM-ESA allows you to use x3270, tn3270, or
another 3270 emulator, started from an xterm window on your PC or
@ -34,7 +38,8 @@ This paper covers installation of the driver and operation of a
dialed-in x3270.
INSTALLATION.
Installation
============
You install the driver by installing a patch, doing a kernel build, and
running the configuration script (config3270.sh, in this directory).
@ -59,13 +64,15 @@ Use #CP TERM CONMODE 3270 to change it to 3270. If you generate only
at boot time to a 3270 if it is a 3215.
In brief, these are the steps:
1. Install the tub3270 patch
2. (If a module) add a line to a file in /etc/modprobe.d/*.conf
2. (If a module) add a line to a file in `/etc/modprobe.d/*.conf`
3. (If VM) define devices with DEF GRAF
4. Reboot
5. Configure
To test that everything works, assuming VM and x3270,
1. Bring up an x3270 window.
2. Use the DIAL command in that window.
3. You should immediately see a Linux login screen.
@ -74,7 +81,8 @@ Here are the installation steps in detail:
1. The 3270 driver is a part of the official Linux kernel
source. Build a tree with the kernel source and any necessary
patches. Then do
patches. Then do::
make oldconfig
(If you wish to disable 3215 console support, edit
.config; change CONFIG_TN3215's value to "n";
@ -84,20 +92,22 @@ Here are the installation steps in detail:
make modules_install
2. (Perform this step only if you have configured tub3270 as a
module.) Add a line to a file /etc/modprobe.d/*.conf to automatically
module.) Add a line to a file `/etc/modprobe.d/*.conf` to automatically
load the driver when it's needed. With this line added, you will see
login prompts appear on your 3270s as soon as boot is complete (or
with emulated 3270s, as soon as you dial into your vm guest using the
command "DIAL <vmguestname>"). Since the line-mode major number is
227, the line to add should be:
227, the line to add should be::
alias char-major-227 tub3270
3. Define graphic devices to your vm guest machine, if you
haven't already. Define them before you reboot (reipl):
DEFINE GRAF 620
DEFINE GRAF 621
DEFINE GRAF 622
DEFINE GRAF 623
- DEFINE GRAF 620
- DEFINE GRAF 621
- DEFINE GRAF 622
- DEFINE GRAF 623
4. Reboot. The reboot process scans hardware devices, including
3270s, and this enables the tub3270 driver once loaded to respond
@ -107,21 +117,23 @@ Here are the installation steps in detail:
5. Run the 3270 configuration script config3270. It is
distributed in this same directory, Documentation/s390, as
config3270.sh. Inspect the output script it produces,
config3270.sh. Inspect the output script it produces,
/tmp/mkdev3270, and then run that script. This will create the
necessary character special device files and make the necessary
changes to /etc/inittab.
Then notify /sbin/init that /etc/inittab has changed, by issuing
the telinit command with the q operand:
the telinit command with the q operand::
cd Documentation/s390
sh config3270.sh
sh /tmp/mkdev3270
telinit q
This should be sufficient for your first time. If your 3270
This should be sufficient for your first time. If your 3270
configuration has changed and you're reusing config3270, you
should follow these steps:
should follow these steps::
Change 3270 configuration
Reboot
Run config3270 and /tmp/mkdev3270
@ -132,8 +144,10 @@ Here are the testing steps in detail:
1. Bring up an x3270 window, or use an actual hardware 3278 or
3279, or use the 3270 emulator of your choice. You would be
running the emulator on your PC or workstation. You would use
the command, for example,
the command, for example::
x3270 vm-esa-domain-name &
if you wanted a 3278 Model 4 with 43 rows of 80 columns, the
default model number. The driver does not take advantage of
extended attributes.
@ -144,7 +158,8 @@ Here are the testing steps in detail:
2. Use the DIAL command instead of the LOGIN command to connect
to one of the virtual 3270s you defined with the DEF GRAF
commands:
commands::
dial my-vm-guest-name
3. You should immediately see a login prompt from your
@ -171,14 +186,17 @@ Here are the testing steps in detail:
Wrong major number? Wrong minor number? There's your
problem!
D. Do you get the message
D. Do you get the message::
"HCPDIA047E my-vm-guest-name 0620 does not exist"?
If so, you must issue the command "DEF GRAF 620" from your VM
3215 console and then reboot the system.
OPERATION.
==========
The driver defines three areas on the 3270 screen: the log area, the
input area, and the status area.
@ -203,8 +221,10 @@ which indicates no scrolling will occur. (If you hit ENTER with "Linux
Running" and nothing typed, the application receives a newline.)
You may change the scrolling timeout value. For example, the following
command line:
command line::
echo scrolltime=60 > /proc/tty/driver/tty3270
changes the scrolling timeout value to 60 sec. Set scrolltime to 0 if
you wish to prevent scrolling entirely.
@ -228,7 +248,8 @@ cause an EOF also by typing "^D" and hitting ENTER.
No PF key is preassigned to cause a job suspension, but you may cause a
job suspension by typing "^Z" and hitting ENTER. You may wish to
assign this function to a PF key. To make PF7 cause job suspension,
execute the command:
execute the command::
echo pf7=^z > /proc/tty/driver/tty3270
If the input you type does not end with the two characters "^n", the
@ -243,8 +264,10 @@ command is entered into the stack only when the input area is not made
invisible (such as for password entry) and it is not identical to the
current top entry. PF10 rotates backward through the command stack;
PF11 rotates forward. You may assign the backward function to any PF
key (or PA key, for that matter), say, PA3, with the command:
key (or PA key, for that matter), say, PA3, with the command::
echo -e pa3=\\033k > /proc/tty/driver/tty3270
This assigns the string ESC-k to PA3. Similarly, the string ESC-j
performs the forward function. (Rationale: In bash with vi-mode line
editing, ESC-k and ESC-j retrieve backward and forward history.
@ -252,15 +275,19 @@ Suggestions welcome.)
Is a stack size of twenty commands not to your liking? Change it on
the fly. To change to saving the last 100 commands, execute the
command:
command::
echo recallsize=100 > /proc/tty/driver/tty3270
Have a command you issue frequently? Assign it to a PF or PA key! Use
the command
echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
the command::
echo pf24="mkdir foobar; cd foobar" > /proc/tty/driver/tty3270
to execute the commands mkdir foobar and cd foobar immediately when you
hit PF24. Want to see the command line first, before you execute it?
Use the -n option of the echo command:
Use the -n option of the echo command::
echo -n pf24="mkdir foo; cd foo" > /proc/tty/driver/tty3270

File diff suppressed because it is too large Load Diff

View File

@ -1,14 +1,18 @@
===========================
Linux for S/390 and zSeries
===========================
Common Device Support (CDS)
Device Driver I/O Support Routines
Authors : Ingo Adlung
Cornelia Huck
Authors:
- Ingo Adlung
- Cornelia Huck
Copyright, IBM Corp. 1999-2002
Introduction
============
This document describes the common device support routines for Linux/390.
Different than other hardware architectures, ESA/390 has defined a unified
@ -27,18 +31,20 @@ Operation manual (IBM Form. No. SA22-7201).
In order to build common device support for ESA/390 I/O interfaces, a
functional layer was introduced that provides generic I/O access methods to
the hardware.
the hardware.
The common device support layer comprises the I/O support routines defined
below. Some of them implement common Linux device driver interfaces, while
The common device support layer comprises the I/O support routines defined
below. Some of them implement common Linux device driver interfaces, while
some of them are ESA/390 platform specific.
Note:
In order to write a driver for S/390, you also need to look into the interface
described in Documentation/s390/driver-model.txt.
In order to write a driver for S/390, you also need to look into the interface
described in Documentation/s390/driver-model.rst.
Note for porting drivers from 2.4:
The major changes are:
* The functions use a ccw_device instead of an irq (subchannel).
* All drivers must define a ccw_driver (see driver-model.txt) and the associated
functions.
@ -57,19 +63,16 @@ The major changes are:
ccw_device_get_ciw()
get commands from extended sense data.
ccw_device_start()
ccw_device_start_timeout()
ccw_device_start_key()
ccw_device_start_key_timeout()
ccw_device_start(), ccw_device_start_timeout(), ccw_device_start_key(), ccw_device_start_key_timeout()
initiate an I/O request.
ccw_device_resume()
resume channel program execution.
ccw_device_halt()
ccw_device_halt()
terminate the current I/O request processed on the device.
do_IRQ()
do_IRQ()
generic interrupt routine. This function is called by the interrupt entry
routine whenever an I/O interrupt is presented to the system. The do_IRQ()
routine determines the interrupt status and calls the device specific
@ -82,12 +85,15 @@ first level interrupt handler only and does not comprise a device driver
callable interface. Instead, the functional description of do_IO() also
describes the input to the device specific interrupt handler.
Note: All explanations apply also to the 64 bit architecture s390x.
Note:
All explanations apply also to the 64 bit architecture s390x.
Common Device Support (CDS) for Linux/390 Device Drivers
========================================================
General Information
-------------------
The following chapters describe the I/O related interface routines the
Linux/390 common device support (CDS) provides to allow for device specific
@ -101,6 +107,7 @@ can be found in the architecture specific C header file
linux/arch/s390/include/asm/irq.h.
Overview of CDS interface concepts
----------------------------------
Different to other hardware platforms, the ESA/390 architecture doesn't define
interrupt lines managed by a specific interrupt controller and bus systems
@ -126,7 +133,7 @@ has to call every single device driver registered on this IRQ in order to
determine the device driver owning the device that raised the interrupt.
Up to kernel 2.4, Linux/390 used to provide interfaces via the IRQ (subchannel).
For internal use of the common I/O layer, these are still there. However,
For internal use of the common I/O layer, these are still there. However,
device drivers should use the new calling interface via the ccw_device only.
During its startup the Linux/390 system checks for peripheral devices. Each
@ -134,7 +141,7 @@ of those devices is uniquely defined by a so called subchannel by the ESA/390
channel subsystem. While the subchannel numbers are system generated, each
subchannel also takes a user defined attribute, the so called device number.
Both subchannel number and device number cannot exceed 65535. During sysfs
initialisation, the information about control unit type and device types that
initialisation, the information about control unit type and device types that
imply specific I/O commands (channel command words - CCWs) in order to operate
the device are gathered. Device drivers can retrieve this set of hardware
information during their initialization step to recognize the devices they
@ -164,18 +171,26 @@ get_ciw() - get command information word
This call enables a device driver to get information about supported commands
from the extended SenseID data.
struct ciw *
ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
::
cdev - The ccw_device for which the command is to be retrieved.
cmd - The command type to be retrieved.
struct ciw *
ccw_device_get_ciw(struct ccw_device *cdev, __u32 cmd);
==== ========================================================
cdev The ccw_device for which the command is to be retrieved.
cmd The command type to be retrieved.
==== ========================================================
ccw_device_get_ciw() returns:
NULL - No extended data available, invalid device or command not found.
!NULL - The command requested.
===== ================================================================
NULL No extended data available, invalid device or command not found.
!NULL The command requested.
===== ================================================================
ccw_device_start() - Initiate I/O Request
::
ccw_device_start() - Initiate I/O Request
The ccw_device_start() routines is the I/O request front-end processor. All
device driver I/O requests must be issued using this routine. A device driver
@ -186,93 +201,105 @@ This description also covers the status information passed to the device
driver's interrupt handler as this is related to the rules (flags) defined
with the associated I/O request when calling ccw_device_start().
int ccw_device_start(struct ccw_device *cdev,
struct ccw1 *cpa,
unsigned long intparm,
__u8 lpm,
unsigned long flags);
int ccw_device_start_timeout(struct ccw_device *cdev,
struct ccw1 *cpa,
unsigned long intparm,
__u8 lpm,
unsigned long flags,
int expires);
int ccw_device_start_key(struct ccw_device *cdev,
struct ccw1 *cpa,
unsigned long intparm,
__u8 lpm,
__u8 key,
unsigned long flags);
int ccw_device_start_key_timeout(struct ccw_device *cdev,
struct ccw1 *cpa,
unsigned long intparm,
__u8 lpm,
__u8 key,
unsigned long flags,
int expires);
::
cdev : ccw_device the I/O is destined for
cpa : logical start address of channel program
user_intparm : user specific interrupt information; will be presented
back to the device driver's interrupt handler. Allows a
device driver to associate the interrupt with a
particular I/O request.
lpm : defines the channel path to be used for a specific I/O
request. A value of 0 will make cio use the opm.
key : the storage key to use for the I/O (useful for operating on a
storage with a storage key != default key)
flag : defines the action to be performed for I/O processing
expires : timeout value in jiffies. The common I/O layer will terminate
the running program after this and call the interrupt handler
with ERR_PTR(-ETIMEDOUT) as irb.
int ccw_device_start(struct ccw_device *cdev,
struct ccw1 *cpa,
unsigned long intparm,
__u8 lpm,
unsigned long flags);
int ccw_device_start_timeout(struct ccw_device *cdev,
struct ccw1 *cpa,
unsigned long intparm,
__u8 lpm,
unsigned long flags,
int expires);
int ccw_device_start_key(struct ccw_device *cdev,
struct ccw1 *cpa,
unsigned long intparm,
__u8 lpm,
__u8 key,
unsigned long flags);
int ccw_device_start_key_timeout(struct ccw_device *cdev,
struct ccw1 *cpa,
unsigned long intparm,
__u8 lpm,
__u8 key,
unsigned long flags,
int expires);
Possible flag values are :
============= =============================================================
cdev ccw_device the I/O is destined for
cpa logical start address of channel program
user_intparm user specific interrupt information; will be presented
back to the device driver's interrupt handler. Allows a
device driver to associate the interrupt with a
particular I/O request.
lpm defines the channel path to be used for a specific I/O
request. A value of 0 will make cio use the opm.
key the storage key to use for the I/O (useful for operating on a
storage with a storage key != default key)
flag defines the action to be performed for I/O processing
expires timeout value in jiffies. The common I/O layer will terminate
the running program after this and call the interrupt handler
with ERR_PTR(-ETIMEDOUT) as irb.
============= =============================================================
DOIO_ALLOW_SUSPEND - channel program may become suspended
DOIO_DENY_PREFETCH - don't allow for CCW prefetch; usually
this implies the channel program might
become modified
DOIO_SUPPRESS_INTER - don't call the handler on intermediate status
Possible flag values are:
The cpa parameter points to the first format 1 CCW of a channel program :
========================= =============================================
DOIO_ALLOW_SUSPEND channel program may become suspended
DOIO_DENY_PREFETCH don't allow for CCW prefetch; usually
this implies the channel program might
become modified
DOIO_SUPPRESS_INTER don't call the handler on intermediate status
========================= =============================================
struct ccw1 {
__u8 cmd_code;/* command code */
__u8 flags; /* flags, like IDA addressing, etc. */
__u16 count; /* byte count */
__u32 cda; /* data address */
} __attribute__ ((packed,aligned(8)));
The cpa parameter points to the first format 1 CCW of a channel program::
with the following CCW flags values defined :
struct ccw1 {
__u8 cmd_code;/* command code */
__u8 flags; /* flags, like IDA addressing, etc. */
__u16 count; /* byte count */
__u32 cda; /* data address */
} __attribute__ ((packed,aligned(8)));
CCW_FLAG_DC - data chaining
CCW_FLAG_CC - command chaining
CCW_FLAG_SLI - suppress incorrect length
CCW_FLAG_SKIP - skip
CCW_FLAG_PCI - PCI
CCW_FLAG_IDA - indirect addressing
CCW_FLAG_SUSPEND - suspend
with the following CCW flags values defined:
=================== =========================
CCW_FLAG_DC data chaining
CCW_FLAG_CC command chaining
CCW_FLAG_SLI suppress incorrect length
CCW_FLAG_SKIP skip
CCW_FLAG_PCI PCI
CCW_FLAG_IDA indirect addressing
CCW_FLAG_SUSPEND suspend
=================== =========================
Via ccw_device_set_options(), the device driver may specify the following
options for the device:
DOIO_EARLY_NOTIFICATION - allow for early interrupt notification
DOIO_REPORT_ALL - report all interrupt conditions
========================= ======================================
DOIO_EARLY_NOTIFICATION allow for early interrupt notification
DOIO_REPORT_ALL report all interrupt conditions
========================= ======================================
The ccw_device_start() function returns :
The ccw_device_start() function returns:
0 - successful completion or request successfully initiated
-EBUSY - The device is currently processing a previous I/O request, or there is
a status pending at the device.
-ENODEV - cdev is invalid, the device is not operational or the ccw_device is
not online.
======== ======================================================================
0 successful completion or request successfully initiated
-EBUSY The device is currently processing a previous I/O request, or there is
a status pending at the device.
-ENODEV cdev is invalid, the device is not operational or the ccw_device is
not online.
======== ======================================================================
When the I/O request completes, the CDS first level interrupt handler will
accumulate the status in a struct irb and then call the device interrupt handler.
The intparm field will contain the value the device driver has associated with a
particular I/O request. If a pending device status was recognized,
The intparm field will contain the value the device driver has associated with a
particular I/O request. If a pending device status was recognized,
intparm will be set to 0 (zero). This may happen during I/O initiation or delayed
by an alert status notification. In any case this status is not related to the
current (last) I/O request. In case of a delayed status notification no special
@ -282,9 +309,11 @@ never started, even though ccw_device_start() returned with successful completio
The irb may contain an error value, and the device driver should check for this
first:
-ETIMEDOUT: the common I/O layer terminated the request after the specified
timeout value
-EIO: the common I/O layer terminated the request due to an error state
========== =================================================================
-ETIMEDOUT the common I/O layer terminated the request after the specified
timeout value
-EIO the common I/O layer terminated the request due to an error state
========== =================================================================
If the concurrent sense flag in the extended status word (esw) in the irb is
set, the field erw.scnt in the esw describes the number of device specific
@ -294,6 +323,7 @@ sensing by the device driver itself is required.
The device interrupt handler can use the following definitions to investigate
the primary unit check source coded in sense byte 0 :
======================= ====
SNS0_CMD_REJECT 0x80
SNS0_INTERVENTION_REQ 0x40
SNS0_BUS_OUT_CHECK 0x20
@ -301,36 +331,41 @@ SNS0_EQUIPMENT_CHECK 0x10
SNS0_DATA_CHECK 0x08
SNS0_OVERRUN 0x04
SNS0_INCOMPL_DOMAIN 0x01
======================= ====
Depending on the device status, multiple of those values may be set together.
Please refer to the device specific documentation for details.
The irb->scsw.cstat field provides the (accumulated) subchannel status :
SCHN_STAT_PCI - program controlled interrupt
SCHN_STAT_INCORR_LEN - incorrect length
SCHN_STAT_PROG_CHECK - program check
SCHN_STAT_PROT_CHECK - protection check
SCHN_STAT_CHN_DATA_CHK - channel data check
SCHN_STAT_CHN_CTRL_CHK - channel control check
SCHN_STAT_INTF_CTRL_CHK - interface control check
SCHN_STAT_CHAIN_CHECK - chaining check
========================= ============================
SCHN_STAT_PCI program controlled interrupt
SCHN_STAT_INCORR_LEN incorrect length
SCHN_STAT_PROG_CHECK program check
SCHN_STAT_PROT_CHECK protection check
SCHN_STAT_CHN_DATA_CHK channel data check
SCHN_STAT_CHN_CTRL_CHK channel control check
SCHN_STAT_INTF_CTRL_CHK interface control check
SCHN_STAT_CHAIN_CHECK chaining check
========================= ============================
The irb->scsw.dstat field provides the (accumulated) device status :
DEV_STAT_ATTENTION - attention
DEV_STAT_STAT_MOD - status modifier
DEV_STAT_CU_END - control unit end
DEV_STAT_BUSY - busy
DEV_STAT_CHN_END - channel end
DEV_STAT_DEV_END - device end
DEV_STAT_UNIT_CHECK - unit check
DEV_STAT_UNIT_EXCEP - unit exception
===================== =================
DEV_STAT_ATTENTION attention
DEV_STAT_STAT_MOD status modifier
DEV_STAT_CU_END control unit end
DEV_STAT_BUSY busy
DEV_STAT_CHN_END channel end
DEV_STAT_DEV_END device end
DEV_STAT_UNIT_CHECK unit check
DEV_STAT_UNIT_EXCEP unit exception
===================== =================
Please see the ESA/390 Principles of Operation manual for details on the
individual flag meanings.
Usage Notes :
Usage Notes:
ccw_device_start() must be called disabled and with the ccw device lock held.
@ -374,32 +409,39 @@ secondary status without error (alert status) is presented, this indicates
successful completion for all overlapping ccw_device_start() requests that have
been issued since the last secondary (final) status.
Channel programs that intend to set the suspend flag on a channel command word
(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
suspend flag will cause a channel program check. At the time the channel program
becomes suspended an intermediate interrupt will be generated by the channel
Channel programs that intend to set the suspend flag on a channel command word
(CCW) must start the I/O operation with the DOIO_ALLOW_SUSPEND option or the
suspend flag will cause a channel program check. At the time the channel program
becomes suspended an intermediate interrupt will be generated by the channel
subsystem.
ccw_device_resume() - Resume Channel Program Execution
ccw_device_resume() - Resume Channel Program Execution
If a device driver chooses to suspend the current channel program execution by
setting the CCW suspend flag on a particular CCW, the channel program execution
is suspended. In order to resume channel program execution the CIO layer
provides the ccw_device_resume() routine.
If a device driver chooses to suspend the current channel program execution by
setting the CCW suspend flag on a particular CCW, the channel program execution
is suspended. In order to resume channel program execution the CIO layer
provides the ccw_device_resume() routine.
int ccw_device_resume(struct ccw_device *cdev);
::
cdev - ccw_device the resume operation is requested for
int ccw_device_resume(struct ccw_device *cdev);
==== ================================================
cdev ccw_device the resume operation is requested for
==== ================================================
The ccw_device_resume() function returns:
0 - suspended channel program is resumed
-EBUSY - status pending
-ENODEV - cdev invalid or not-operational subchannel
-EINVAL - resume function not applicable
-ENOTCONN - there is no I/O request pending for completion
========= ==============================================
0 suspended channel program is resumed
-EBUSY status pending
-ENODEV cdev invalid or not-operational subchannel
-EINVAL resume function not applicable
-ENOTCONN there is no I/O request pending for completion
========= ==============================================
Usage Notes:
Please have a look at the ccw_device_start() usage notes for more details on
suspended channel programs.
@ -412,22 +454,28 @@ command is provided.
ccw_device_halt() must be called disabled and with the ccw device lock held.
int ccw_device_halt(struct ccw_device *cdev,
unsigned long intparm);
::
cdev : ccw_device the halt operation is requested for
intparm : interruption parameter; value is only used if no I/O
is outstanding, otherwise the intparm associated with
the I/O request is returned
int ccw_device_halt(struct ccw_device *cdev,
unsigned long intparm);
The ccw_device_halt() function returns :
======= =====================================================
cdev ccw_device the halt operation is requested for
intparm interruption parameter; value is only used if no I/O
is outstanding, otherwise the intparm associated with
the I/O request is returned
======= =====================================================
0 - request successfully initiated
-EBUSY - the device is currently busy, or status pending.
-ENODEV - cdev invalid.
-EINVAL - The device is not operational or the ccw device is not online.
The ccw_device_halt() function returns:
Usage Notes :
======= ==============================================================
0 request successfully initiated
-EBUSY the device is currently busy, or status pending.
-ENODEV cdev invalid.
-EINVAL The device is not operational or the ccw device is not online.
======= ==============================================================
Usage Notes:
A device driver may write a never-ending channel program by writing a channel
program that at its end loops back to its beginning by means of a transfer in
@ -438,25 +486,34 @@ can then perform an appropriate action. Prior to interrupt of an outstanding
read to a network device (with or without PCI flag) a ccw_device_halt()
is required to end the pending operation.
ccw_device_clear() - Terminage I/O Request Processing
::
ccw_device_clear() - Terminage I/O Request Processing
In order to terminate all I/O processing at the subchannel, the clear subchannel
(CSCH) command is used. It can be issued via ccw_device_clear().
ccw_device_clear() must be called disabled and with the ccw device lock held.
int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
::
cdev: ccw_device the clear operation is requested for
intparm: interruption parameter (see ccw_device_halt())
int ccw_device_clear(struct ccw_device *cdev, unsigned long intparm);
======= ===============================================
cdev ccw_device the clear operation is requested for
intparm interruption parameter (see ccw_device_halt())
======= ===============================================
The ccw_device_clear() function returns:
0 - request successfully initiated
-ENODEV - cdev invalid
-EINVAL - The device is not operational or the ccw device is not online.
======= ==============================================================
0 request successfully initiated
-ENODEV cdev invalid
-EINVAL The device is not operational or the ccw device is not online.
======= ==============================================================
Miscellaneous Support Routines
------------------------------
This chapter describes various routines to be used in a Linux/390 device
driver programming environment.
@ -466,7 +523,8 @@ get_ccwdev_lock()
Get the address of the device specific lock. This is then used in
spin_lock() / spin_unlock() calls.
::
__u8 ccw_device_get_path_mask(struct ccw_device *cdev);
__u8 ccw_device_get_path_mask(struct ccw_device *cdev);
Get the mask of the path currently available for cdev.

View File

@ -1,5 +1,9 @@
S/390 common I/O-Layer - command line parameters, procfs and debugfs entries
============================================================================
======================
S/390 common I/O-Layer
======================
command line parameters, procfs and debugfs entries
===================================================
Command line parameters
-----------------------
@ -13,7 +17,7 @@ Command line parameters
device := {all | [!]ipldev | [!]condev | [!]<devno> | [!]<devno>-<devno>}
The given devices will be ignored by the common I/O-layer; no detection
and device sensing will be done on any of those devices. The subchannel to
and device sensing will be done on any of those devices. The subchannel to
which the device in question is attached will be treated as if no device was
attached.
@ -28,14 +32,20 @@ Command line parameters
keywords can be used to refer to the CCW based boot device and CCW console
device respectively (these are probably useful only when combined with the '!'
operator). The '!' operator will cause the I/O-layer to _not_ ignore a device.
The command line is parsed from left to right.
The command line
is parsed from left to right.
For example::
For example,
cio_ignore=0.0.0023-0.0.0042,0.0.4711
will ignore all devices ranging from 0.0.0023 to 0.0.0042 and the device
0.0.4711, if detected.
As another example,
As another example::
cio_ignore=all,!0.0.4711,!0.0.fd00-0.0.fd02
will ignore all devices but 0.0.4711, 0.0.fd00, 0.0.fd01, 0.0.fd02.
By default, no devices are ignored.
@ -48,40 +58,45 @@ Command line parameters
Lists the ranges of devices (by bus id) which are ignored by common I/O.
You can un-ignore certain or all devices by piping to /proc/cio_ignore.
"free all" will un-ignore all ignored devices,
You can un-ignore certain or all devices by piping to /proc/cio_ignore.
"free all" will un-ignore all ignored devices,
"free <device range>, <device range>, ..." will un-ignore the specified
devices.
For example, if devices 0.0.0023 to 0.0.0042 and 0.0.4711 are ignored,
- echo free 0.0.0030-0.0.0032 > /proc/cio_ignore
will un-ignore devices 0.0.0030 to 0.0.0032 and will leave devices 0.0.0023
to 0.0.002f, 0.0.0033 to 0.0.0042 and 0.0.4711 ignored;
- echo free 0.0.0041 > /proc/cio_ignore will furthermore un-ignore device
0.0.0041;
- echo free all > /proc/cio_ignore will un-ignore all remaining ignored
- echo free all > /proc/cio_ignore will un-ignore all remaining ignored
devices.
When a device is un-ignored, device recognition and sensing is performed and
When a device is un-ignored, device recognition and sensing is performed and
the device driver will be notified if possible, so the device will become
available to the system. Note that un-ignoring is performed asynchronously.
You can also add ranges of devices to be ignored by piping to
You can also add ranges of devices to be ignored by piping to
/proc/cio_ignore; "add <device range>, <device range>, ..." will ignore the
specified devices.
Note: While already known devices can be added to the list of devices to be
ignored, there will be no effect on then. However, if such a device
ignored, there will be no effect on then. However, if such a device
disappears and then reappears, it will then be ignored. To make
known devices go away, you need the "purge" command (see below).
For example,
For example::
"echo add 0.0.a000-0.0.accc, 0.0.af00-0.0.afff > /proc/cio_ignore"
will add 0.0.a000-0.0.accc and 0.0.af00-0.0.afff to the list of ignored
devices.
You can remove already known but now ignored devices via
You can remove already known but now ignored devices via::
"echo purge > /proc/cio_ignore"
All devices ignored but still registered and not online (= not in use)
will be deregistered and thus removed from the system.
@ -115,11 +130,11 @@ debugfs entries
Various debug messages from the common I/O-layer.
- /sys/kernel/debug/s390dbf/cio_trace/hex_ascii
Logs the calling of functions in the common I/O-layer and, if applicable,
Logs the calling of functions in the common I/O-layer and, if applicable,
which subchannel they were called for, as well as dumps of some data
structures (like irb in an error case).
The level of logging can be changed to be more or less verbose by piping to
The level of logging can be changed to be more or less verbose by piping to
/sys/kernel/debug/s390dbf/cio_*/level a number between 0 and 6; see the
documentation on the S/390 debug feature (Documentation/s390/s390dbf.txt)
documentation on the S/390 debug feature (Documentation/s390/s390dbf.rst)
for details.

View File

@ -1,4 +1,6 @@
==================
DASD device driver
==================
S/390's disk devices (DASDs) are managed by Linux via the DASD device
driver. It is valid for all types of DASDs and represents them to
@ -14,14 +16,14 @@ parameters are to be given in hexadecimal notation without a leading
If you supply kernel parameters the different instances are processed
in order of appearance and a minor number is reserved for any device
covered by the supplied range up to 64 volumes. Additional DASDs are
ignored. If you do not supply the 'dasd=' kernel parameter at all, the
ignored. If you do not supply the 'dasd=' kernel parameter at all, the
DASD driver registers all supported DASDs of your system to a minor
number in ascending order of the subchannel number.
The driver currently supports ECKD-devices and there are stubs for
support of the FBA and CKD architectures. For the FBA architecture
only some smart data structures are missing to make the support
complete.
complete.
We performed our testing on 3380 and 3390 type disks of different
sizes, under VM and on the bare hardware (LPAR), using internal disks
of the multiprise as well as a RAMAC virtual array. Disks exported by
@ -34,19 +36,22 @@ accessibility of the DASD from other OSs. In a later stage we will
provide support of partitions, maybe VTOC oriented or using a kind of
partition table in the label record.
USAGE
Usage
=====
-Low-level format (?CKD only)
For using an ECKD-DASD as a Linux harddisk you have to low-level
format the tracks by issuing the BLKDASDFORMAT-ioctl on that
device. This will erase any data on that volume including IBM volume
labels, VTOCs etc. The ioctl may take a 'struct format_data *' or
'NULL' as an argument.
typedef struct {
labels, VTOCs etc. The ioctl may take a `struct format_data *` or
'NULL' as an argument::
typedef struct {
int start_unit;
int stop_unit;
int blksize;
} format_data_t;
} format_data_t;
When a NULL argument is passed to the BLKDASDFORMAT ioctl the whole
disk is formatted to a blocksize of 1024 bytes. Otherwise start_unit
and stop_unit are the first and last track to be formatted. If
@ -56,17 +61,23 @@ up to the last track. blksize can be any power of two between 512 and
1kB blocks anyway and you gain approx. 50% of capacity increasing your
blksize from 512 byte to 1kB.
-Make a filesystem
Make a filesystem
=================
Then you can mk??fs the filesystem of your choice on that volume or
partition. For reasons of sanity you should build your filesystem on
the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
the partition /dev/dd?1 instead of the whole volume. You only lose 3kB
but may be sure that you can reuse your data after introduction of a
real partition table.
BUGS:
Bugs
====
- Performance sometimes is rather low because we don't fully exploit clustering
TODO-List:
TODO-List
=========
- Add IBM'S Disk layout to genhd
- Enhance driver to use more than one major number
- Enable usage as a module

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,6 @@
=============================
S/390 driver model interfaces
-----------------------------
=============================
1. CCW devices
--------------
@ -7,13 +8,13 @@ S/390 driver model interfaces
All devices which can be addressed by means of ccws are called 'CCW devices' -
even if they aren't actually driven by ccws.
All ccw devices are accessed via a subchannel, this is reflected in the
structures under devices/:
All ccw devices are accessed via a subchannel, this is reflected in the
structures under devices/::
devices/
devices/
- system/
- css0/
- 0.0.0000/0.0.0815/
- 0.0.0000/0.0.0815/
- 0.0.0001/0.0.4711/
- 0.0.0002/
- 0.1.0000/0.1.1234/
@ -35,14 +36,18 @@ be found under bus/ccw/devices/.
All ccw devices export some data via sysfs.
cutype: The control unit type / model.
cutype:
The control unit type / model.
devtype: The device type / model, if applicable.
devtype:
The device type / model, if applicable.
availability: Can be 'good' or 'boxed'; 'no path' or 'no device' for
availability:
Can be 'good' or 'boxed'; 'no path' or 'no device' for
disconnected devices.
online: An interface to set the device online and offline.
online:
An interface to set the device online and offline.
In the special case of the device being disconnected (see the
notify function under 1.2), piping 0 to online will forcibly delete
the device.
@ -52,9 +57,11 @@ The device drivers can add entries to export per-device data and interfaces.
There is also some data exported on a per-subchannel basis (see under
bus/css/devices/):
chpids: Via which chpids the device is connected.
chpids:
Via which chpids the device is connected.
pimpampom: The path installed, path available and path operational masks.
pimpampom:
The path installed, path available and path operational masks.
There also might be additional data, for example for block devices.
@ -74,77 +81,93 @@ b. After a. has been performed, if necessary, the device is finally brought up
------------------------------------
The basic struct ccw_device and struct ccw_driver data structures can be found
under include/asm/ccwdev.h.
under include/asm/ccwdev.h::
struct ccw_device {
spinlock_t *ccwlock;
struct ccw_device_private *private;
struct ccw_device_id id;
struct ccw_device {
spinlock_t *ccwlock;
struct ccw_device_private *private;
struct ccw_device_id id;
struct ccw_driver *drv;
struct device dev;
struct ccw_driver *drv;
struct device dev;
int online;
void (*handler) (struct ccw_device *dev, unsigned long intparm,
struct irb *irb);
};
struct irb *irb);
};
struct ccw_driver {
struct module *owner;
struct ccw_device_id *ids;
int (*probe) (struct ccw_device *);
struct ccw_driver {
struct module *owner;
struct ccw_device_id *ids;
int (*probe) (struct ccw_device *);
int (*remove) (struct ccw_device *);
int (*set_online) (struct ccw_device *);
int (*set_offline) (struct ccw_device *);
int (*notify) (struct ccw_device *, int);
struct device_driver driver;
char *name;
};
};
The 'private' field contains data needed for internal i/o operation only, and
is not available to the device driver.
Each driver should declare in a MODULE_DEVICE_TABLE into which CU types/models
and/or device types/models it is interested. This information can later be found
in the struct ccw_device_id fields:
in the struct ccw_device_id fields::
struct ccw_device_id {
__u16 match_flags;
struct ccw_device_id {
__u16 match_flags;
__u16 cu_type;
__u16 dev_type;
__u8 cu_model;
__u8 dev_model;
__u16 cu_type;
__u16 dev_type;
__u8 cu_model;
__u8 dev_model;
unsigned long driver_info;
};
};
The functions in ccw_driver should be used in the following way:
probe: This function is called by the device layer for each device the driver
probe:
This function is called by the device layer for each device the driver
is interested in. The driver should only allocate private structures
to put in dev->driver_data and create attributes (if needed). Also,
the interrupt handler (see below) should be set here.
int (*probe) (struct ccw_device *cdev);
::
Parameters: cdev - the device to be probed.
int (*probe) (struct ccw_device *cdev);
Parameters:
cdev
- the device to be probed.
remove: This function is called by the device layer upon removal of the driver,
remove:
This function is called by the device layer upon removal of the driver,
the device or the module. The driver should perform cleanups here.
int (*remove) (struct ccw_device *cdev);
::
Parameters: cdev - the device to be removed.
int (*remove) (struct ccw_device *cdev);
Parameters:
cdev
- the device to be removed.
set_online: This function is called by the common I/O layer when the device is
set_online:
This function is called by the common I/O layer when the device is
activated via the 'online' attribute. The driver should finally
setup and activate the device here.
int (*set_online) (struct ccw_device *);
::
Parameters: cdev - the device to be activated. The common layer has
int (*set_online) (struct ccw_device *);
Parameters:
cdev
- the device to be activated. The common layer has
verified that the device is not already online.
@ -152,15 +175,22 @@ set_offline: This function is called by the common I/O layer when the device is
de-activated via the 'online' attribute. The driver should shut
down the device, but not de-allocate its private data.
int (*set_offline) (struct ccw_device *);
::
Parameters: cdev - the device to be deactivated. The common layer has
int (*set_offline) (struct ccw_device *);
Parameters:
cdev
- the device to be deactivated. The common layer has
verified that the device is online.
notify: This function is called by the common I/O layer for some state changes
notify:
This function is called by the common I/O layer for some state changes
of the device.
Signalled to the driver are:
* In online state, device detached (CIO_GONE) or last path gone
(CIO_NO_PATH). The driver must return !0 to keep the device; for
return code 0, the device will be deleted as usual (also when no
@ -173,32 +203,40 @@ notify: This function is called by the common I/O layer for some state changes
return code of the notify function the device driver signals if it
wants the device back: !0 for keeping, 0 to make the device being
removed and re-registered.
int (*notify) (struct ccw_device *, int);
Parameters: cdev - the device whose state changed.
event - the event that happened. This can be one of CIO_GONE,
CIO_NO_PATH or CIO_OPER.
::
int (*notify) (struct ccw_device *, int);
Parameters:
cdev
- the device whose state changed.
event
- the event that happened. This can be one of CIO_GONE,
CIO_NO_PATH or CIO_OPER.
The handler field of the struct ccw_device is meant to be set to the interrupt
handler for the device. In order to accommodate drivers which use several
handler for the device. In order to accommodate drivers which use several
distinct handlers (e.g. multi subchannel devices), this is a member of ccw_device
instead of ccw_driver.
The handler is registered with the common layer during set_online() processing
before the driver is called, and is deregistered during set_offline() after the
driver has been called. Also, after registering / before deregistering, path
driver has been called. Also, after registering / before deregistering, path
grouping resp. disbanding of the path group (if applicable) are performed.
void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
::
Parameters: dev - the device the handler is called for
void (*handler) (struct ccw_device *dev, unsigned long intparm, struct irb *irb);
Parameters: dev - the device the handler is called for
intparm - the intparm which allows the device driver to identify
the i/o the interrupt is associated with, or to recognize
the interrupt as unsolicited.
irb - interruption response block which contains the accumulated
status.
the i/o the interrupt is associated with, or to recognize
the interrupt as unsolicited.
irb - interruption response block which contains the accumulated
status.
The device driver is called from the common ccw_device layer and can retrieve
The device driver is called from the common ccw_device layer and can retrieve
information about the interrupt from the irb parameter.
@ -237,23 +275,27 @@ only the logical state and not the physical state, since we cannot track the
latter consistently due to lacking machine support (we don't need to be aware
of it anyway).
status - Can be 'online' or 'offline'.
status
- Can be 'online' or 'offline'.
Piping 'on' or 'off' sets the chpid logically online/offline.
Piping 'on' to an online chpid triggers path reprobing for all devices
the chpid connects to. This can be used to force the kernel to re-use
a channel path the user knows to be online, but the machine hasn't
created a machine check for.
type - The physical type of the channel path.
type
- The physical type of the channel path.
shared - Whether the channel path is shared.
shared
- Whether the channel path is shared.
cmg - The channel measurement group.
cmg
- The channel measurement group.
3. System devices
-----------------
3.1 xpram
3.1 xpram
---------
xpram shows up under devices/system/ as 'xpram'.
@ -279,9 +321,8 @@ Netiucv connections show up under devices/iucv/ as "netiucv<ifnum>". The interfa
number is assigned sequentially to the connections defined via the 'connection'
attribute.
user - shows the connection partner.
buffer - maximum buffer size.
Pipe to it to change buffer size.
user
- shows the connection partner.
buffer
- maximum buffer size. Pipe to it to change buffer size.

View File

@ -0,0 +1,30 @@
:orphan:
=================
s390 Architecture
=================
.. toctree::
:maxdepth: 1
cds
3270
debugging390
driver-model
monreader
qeth
s390dbf
vfio-ap
vfio-ccw
zfcpdump
dasd
common_io
text_files
.. only:: subproject and html
Indices
=======
* :ref:`genindex`

View File

@ -1,24 +1,26 @@
=================================================
Linux API for read access to z/VM Monitor Records
=================================================
Date : 2004-Nov-26
Author: Gerald Schaefer (geraldsc@de.ibm.com)
Linux API for read access to z/VM Monitor Records
=================================================
Description
===========
This item delivers a new Linux API in the form of a misc char device that is
usable from user space and allows read access to the z/VM Monitor Records
collected by the *MONITOR System Service of z/VM.
collected by the `*MONITOR` System Service of z/VM.
User Requirements
=================
The z/VM guest on which you want to access this API needs to be configured in
order to allow IUCV connections to the *MONITOR service, i.e. it needs the
IUCV *MONITOR statement in its user entry. If the monitor DCSS to be used is
order to allow IUCV connections to the `*MONITOR` service, i.e. it needs the
IUCV `*MONITOR` statement in its user entry. If the monitor DCSS to be used is
restricted (likely), you also need the NAMESAVE <DCSS NAME> statement.
This item will use the IUCV device driver to access the z/VM services, so you
need a kernel with IUCV support. You also need z/VM version 4.4 or 5.1.
@ -50,7 +52,9 @@ Your guest virtual storage has to end below the starting address of the DCSS
and you have to specify the "mem=" kernel parameter in your parmfile with a
value greater than the ending address of the DCSS.
Example: DEF STOR 140M
Example::
DEF STOR 140M
This defines 140MB storage size for your guest, the parameter "mem=160M" is
added to the parmfile.
@ -66,24 +70,27 @@ kernel, the kernel parameter "monreader.mondcss=<DCSS NAME>" can be specified
in the parmfile.
The default name for the DCSS is "MONDCSS" if none is specified. In case that
there are other users already connected to the *MONITOR service (e.g.
there are other users already connected to the `*MONITOR` service (e.g.
Performance Toolkit), the monitor DCSS is already defined and you have to use
the same DCSS. The CP command Q MONITOR (Class E privileged) shows the name
of the monitor DCSS, if already defined, and the users connected to the
*MONITOR service.
`*MONITOR` service.
Refer to the "z/VM Performance" book (SC24-6109-00) on how to create a monitor
DCSS if your z/VM doesn't have one already, you need Class E privileges to
define and save a DCSS.
Example:
--------
modprobe monreader mondcss=MYDCSS
::
modprobe monreader mondcss=MYDCSS
This loads the module and sets the DCSS name to "MYDCSS".
NOTE:
-----
This API provides no interface to control the *MONITOR service, e.g. specify
This API provides no interface to control the `*MONITOR` service, e.g. specify
which data should be collected. This can be done by the CP command MONITOR
(Class E privileged), see "CP Command and Utility Reference".
@ -98,6 +105,7 @@ If your distribution does not support udev, a device node will not be created
automatically and you have to create it manually after loading the module.
Therefore you need to know the major and minor numbers of the device. These
numbers can be found in /sys/class/misc/monreader/dev.
Typing cat /sys/class/misc/monreader/dev will give an output of the form
<major>:<minor>. The device node can be created via the mknod command, enter
mknod <name> c <major> <minor>, where <name> is the name of the device node
@ -105,10 +113,13 @@ to be created.
Example:
--------
# modprobe monreader
# cat /sys/class/misc/monreader/dev
10:63
# mknod /dev/monreader c 10 63
::
# modprobe monreader
# cat /sys/class/misc/monreader/dev
10:63
# mknod /dev/monreader c 10 63
This loads the module with the default monitor DCSS (MONDCSS) and creates a
device node.
@ -133,20 +144,21 @@ last byte of data. The start address is needed to handle "end-of-frame" records
correctly (domain 1, record 13), i.e. it can be used to determine the record
start offset relative to a 4K page (frame) boundary.
See "Appendix A: *MONITOR" in the "z/VM Performance" document for a description
See "Appendix A: `*MONITOR`" in the "z/VM Performance" document for a description
of the monitor control element layout. The layout of the monitor records can
be found here (z/VM 5.1): http://www.vm.ibm.com/pubs/mon510/index.html
The layout of the data stream provided by the monreader device is as follows:
...
<0 byte read>
<first MCE> \
<first set of records> |
... |- data set
<last MCE> |
<last set of records> /
<0 byte read>
...
The layout of the data stream provided by the monreader device is as follows::
...
<0 byte read>
<first MCE> \
<first set of records> |
... |- data set
<last MCE> |
<last set of records> /
<0 byte read>
...
There may be more than one combination of MCE and corresponding record set
within one data set and the end of each data set is indicated by a successful
@ -165,15 +177,19 @@ As with most char devices, error conditions are indicated by returning a
negative value for the number of bytes read. In this case, the errno variable
indicates the error condition:
EIO: reply failed, read data is invalid and the application
EIO:
reply failed, read data is invalid and the application
should discard the data read since the last successful read with 0 size.
EFAULT: copy_to_user failed, read data is invalid and the application should
discard the data read since the last successful read with 0 size.
EAGAIN: occurs on a non-blocking read if there is no data available at the
moment. There is no data missing or corrupted, just try again or rather
use polling for non-blocking reads.
EOVERFLOW: message limit reached, the data read since the last successful
read with 0 size is valid but subsequent records may be missing.
EFAULT:
copy_to_user failed, read data is invalid and the application should
discard the data read since the last successful read with 0 size.
EAGAIN:
occurs on a non-blocking read if there is no data available at the
moment. There is no data missing or corrupted, just try again or rather
use polling for non-blocking reads.
EOVERFLOW:
message limit reached, the data read since the last successful
read with 0 size is valid but subsequent records may be missing.
In the last case (EOVERFLOW) there may be missing data, in the first two cases
(EIO, EFAULT) there will be missing data. It's up to the application if it will
@ -183,7 +199,7 @@ Open:
-----
Only one user is allowed to open the char device. If it is already in use, the
open function will fail (return a negative value) and set errno to EBUSY.
The open function may also fail if an IUCV connection to the *MONITOR service
The open function may also fail if an IUCV connection to the `*MONITOR` service
cannot be established. In this case errno will be set to EIO and an error
message with an IPUSER SEVER code will be printed into syslog. The IPUSER SEVER
codes are described in the "z/VM Performance" book, Appendix A.
@ -194,4 +210,3 @@ As soon as the device is opened, incoming messages will be accepted and they
will account for the message limit, i.e. opening the device without reading
from it will provoke the "message limit reached" error (EOVERFLOW error code)
eventually.

View File

@ -1,8 +1,12 @@
=============================
IBM s390 QDIO Ethernet Driver
=============================
OSA and HiperSockets Bridge Port Support
========================================
Uevents
-------
To generate the events the device must be assigned a role of either
a primary or a secondary Bridge Port. For more information, see
@ -13,12 +17,15 @@ of some configured Bridge Port device on the channel changes, a udev
event with ACTION=CHANGE is emitted on behalf of the corresponding
ccwgroup device. The event has the following attributes:
BRIDGEPORT=statechange - indicates that the Bridge Port device changed
BRIDGEPORT=statechange
indicates that the Bridge Port device changed
its state.
ROLE={primary|secondary|none} - the role assigned to the port.
ROLE={primary|secondary|none}
the role assigned to the port.
STATE={active|standby|inactive} - the newly assumed state of the port.
STATE={active|standby|inactive}
the newly assumed state of the port.
When run on HiperSockets Bridge Capable Port hardware with host address
notifications enabled, a udev event with ACTION=CHANGE is emitted.
@ -26,25 +33,32 @@ It is emitted on behalf of the corresponding ccwgroup device when a host
or a VLAN is registered or unregistered on the network served by the device.
The event has the following attributes:
BRIDGEDHOST={reset|register|deregister|abort} - host address
BRIDGEDHOST={reset|register|deregister|abort}
host address
notifications are started afresh, a new host or VLAN is registered or
deregistered on the Bridge Port HiperSockets channel, or address
notifications are aborted.
VLAN=numeric-vlan-id - VLAN ID on which the event occurred. Not included
VLAN=numeric-vlan-id
VLAN ID on which the event occurred. Not included
if no VLAN is involved in the event.
MAC=xx:xx:xx:xx:xx:xx - MAC address of the host that is being registered
MAC=xx:xx:xx:xx:xx:xx
MAC address of the host that is being registered
or deregistered from the HiperSockets channel. Not reported if the
event reports the creation or destruction of a VLAN.
NTOK_BUSID=x.y.zzzz - device bus ID (CSSID, SSID and device number).
NTOK_BUSID=x.y.zzzz
device bus ID (CSSID, SSID and device number).
NTOK_IID=xx - device IID.
NTOK_IID=xx
device IID.
NTOK_CHPID=xx - device CHPID.
NTOK_CHPID=xx
device CHPID.
NTOK_CHID=xxxx - device channel ID.
NTOK_CHID=xxxx
device channel ID.
Note that the NTOK_* attributes refer to devices other than the one
Note that the `NTOK_*` attributes refer to devices other than the one
connected to the system on which the OS is running.

View File

@ -0,0 +1,487 @@
==================
S390 Debug Feature
==================
files:
- arch/s390/kernel/debug.c
- arch/s390/include/asm/debug.h
Description:
------------
The goal of this feature is to provide a kernel debug logging API
where log records can be stored efficiently in memory, where each component
(e.g. device drivers) can have one separate debug log.
One purpose of this is to inspect the debug logs after a production system crash
in order to analyze the reason for the crash.
If the system still runs but only a subcomponent which uses dbf fails,
it is possible to look at the debug logs on a live system via the Linux
debugfs filesystem.
The debug feature may also very useful for kernel and driver development.
Design:
-------
Kernel components (e.g. device drivers) can register themselves at the debug
feature with the function call :c:func:`debug_register()`.
This function initializes a
debug log for the caller. For each debug log exists a number of debug areas
where exactly one is active at one time. Each debug area consists of contiguous
pages in memory. In the debug areas there are stored debug entries (log records)
which are written by event- and exception-calls.
An event-call writes the specified debug entry to the active debug
area and updates the log pointer for the active area. If the end
of the active debug area is reached, a wrap around is done (ring buffer)
and the next debug entry will be written at the beginning of the active
debug area.
An exception-call writes the specified debug entry to the log and
switches to the next debug area. This is done in order to be sure
that the records which describe the origin of the exception are not
overwritten when a wrap around for the current area occurs.
The debug areas themselves are also ordered in form of a ring buffer.
When an exception is thrown in the last debug area, the following debug
entries are then written again in the very first area.
There are four versions for the event- and exception-calls: One for
logging raw data, one for text, one for numbers (unsigned int and long),
and one for sprintf-like formatted strings.
Each debug entry contains the following data:
- Timestamp
- Cpu-Number of calling task
- Level of debug entry (0...6)
- Return Address to caller
- Flag, if entry is an exception or not
The debug logs can be inspected in a live system through entries in
the debugfs-filesystem. Under the toplevel directory "``s390dbf``" there is
a directory for each registered component, which is named like the
corresponding component. The debugfs normally should be mounted to
``/sys/kernel/debug`` therefore the debug feature can be accessed under
``/sys/kernel/debug/s390dbf``.
The content of the directories are files which represent different views
to the debug log. Each component can decide which views should be
used through registering them with the function :c:func:`debug_register_view()`.
Predefined views for hex/ascii, sprintf and raw binary data are provided.
It is also possible to define other views. The content of
a view can be inspected simply by reading the corresponding debugfs file.
All debug logs have an actual debug level (range from 0 to 6).
The default level is 3. Event and Exception functions have a :c:data:`level`
parameter. Only debug entries with a level that is lower or equal
than the actual level are written to the log. This means, when
writing events, high priority log entries should have a low level
value whereas low priority entries should have a high one.
The actual debug level can be changed with the help of the debugfs-filesystem
through writing a number string "x" to the ``level`` debugfs file which is
provided for every debug log. Debugging can be switched off completely
by using "-" on the ``level`` debugfs file.
Example::
> echo "-" > /sys/kernel/debug/s390dbf/dasd/level
It is also possible to deactivate the debug feature globally for every
debug log. You can change the behavior using 2 sysctl parameters in
``/proc/sys/s390dbf``:
There are currently 2 possible triggers, which stop the debug feature
globally. The first possibility is to use the ``debug_active`` sysctl. If
set to 1 the debug feature is running. If ``debug_active`` is set to 0 the
debug feature is turned off.
The second trigger which stops the debug feature is a kernel oops.
That prevents the debug feature from overwriting debug information that
happened before the oops. After an oops you can reactivate the debug feature
by piping 1 to ``/proc/sys/s390dbf/debug_active``. Nevertheless, it's not
suggested to use an oopsed kernel in a production environment.
If you want to disallow the deactivation of the debug feature, you can use
the ``debug_stoppable`` sysctl. If you set ``debug_stoppable`` to 0 the debug
feature cannot be stopped. If the debug feature is already stopped, it
will stay deactivated.
Kernel Interfaces:
------------------
.. kernel-doc:: arch/s390/kernel/debug.c
.. kernel-doc:: arch/s390/include/asm/debug.h
Predefined views:
-----------------
.. code-block:: c
extern struct debug_view debug_hex_ascii_view;
extern struct debug_view debug_raw_view;
extern struct debug_view debug_sprintf_view;
Examples
--------
.. code-block:: c
/*
* hex_ascii- + raw-view Example
*/
#include <linux/init.h>
#include <asm/debug.h>
static debug_info_t *debug_info;
static int init(void)
{
/* register 4 debug areas with one page each and 4 byte data field */
debug_info = debug_register("test", 1, 4, 4 );
debug_register_view(debug_info, &debug_hex_ascii_view);
debug_register_view(debug_info, &debug_raw_view);
debug_text_event(debug_info, 4 , "one ");
debug_int_exception(debug_info, 4, 4711);
debug_event(debug_info, 3, &debug_info, 4);
return 0;
}
static void cleanup(void)
{
debug_unregister(debug_info);
}
module_init(init);
module_exit(cleanup);
.. code-block:: c
/*
* sprintf-view Example
*/
#include <linux/init.h>
#include <asm/debug.h>
static debug_info_t *debug_info;
static int init(void)
{
/* register 4 debug areas with one page each and data field for */
/* format string pointer + 2 varargs (= 3 * sizeof(long)) */
debug_info = debug_register("test", 1, 4, sizeof(long) * 3);
debug_register_view(debug_info, &debug_sprintf_view);
debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
return 0;
}
static void cleanup(void)
{
debug_unregister(debug_info);
}
module_init(init);
module_exit(cleanup);
Debugfs Interface
-----------------
Views to the debug logs can be investigated through reading the corresponding
debugfs-files:
Example::
> ls /sys/kernel/debug/s390dbf/dasd
flush hex_ascii level pages raw
> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
See section about predefined views for explanation of the above output!
Changing the debug level
------------------------
Example::
> cat /sys/kernel/debug/s390dbf/dasd/level
3
> echo "5" > /sys/kernel/debug/s390dbf/dasd/level
> cat /sys/kernel/debug/s390dbf/dasd/level
5
Flushing debug areas
--------------------
Debug areas can be flushed with piping the number of the desired
area (0...n) to the debugfs file "flush". When using "-" all debug areas
are flushed.
Examples:
1. Flush debug area 0::
> echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
2. Flush all debug areas::
> echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
Changing the size of debug areas
------------------------------------
It is possible the change the size of debug areas through piping
the number of pages to the debugfs file "pages". The resize request will
also flush the debug areas.
Example:
Define 4 pages for the debug areas of debug feature "dasd"::
> echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
Stopping the debug feature
--------------------------
Example:
1. Check if stopping is allowed::
> cat /proc/sys/s390dbf/debug_stoppable
2. Stop debug feature::
> echo 0 > /proc/sys/s390dbf/debug_active
crash Interface
----------------
The ``crash`` tool since v5.1.0 has a built-in command
``s390dbf`` to display all the debug logs or export them to the file system.
With this tool it is possible
to investigate the debug logs on a live system and with a memory dump after
a system crash.
Investigating raw memory
------------------------
One last possibility to investigate the debug logs at a live
system and after a system crash is to look at the raw memory
under VM or at the Service Element.
It is possible to find the anchor of the debug-logs through
the ``debug_area_first`` symbol in the System map. Then one has
to follow the correct pointers of the data-structures defined
in debug.h and find the debug-areas in memory.
Normally modules which use the debug feature will also have
a global variable with the pointer to the debug-logs. Following
this pointer it will also be possible to find the debug logs in
memory.
For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
for the length of the data field in :c:func:`debug_register()` in
order to see the debug entries well formatted.
Predefined Views
----------------
There are three predefined views: hex_ascii, raw and sprintf.
The hex_ascii view shows the data field in hex and ascii representation
(e.g. ``45 43 4b 44 | ECKD``).
The raw view returns a bytestream as the debug areas are stored in memory.
The sprintf view formats the debug entries in the same way as the sprintf
function would do. The sprintf event/exception functions write to the
debug entry a pointer to the format string (size = sizeof(long))
and for each vararg a long value. So e.g. for a debug entry with a format
string plus two varargs one would need to allocate a (3 * sizeof(long))
byte data area in the debug_register() function.
IMPORTANT:
Using "%s" in sprintf event functions is dangerous. You can only
use "%s" in the sprintf event functions, if the memory for the passed string
is available as long as the debug feature exists. The reason behind this is
that due to performance considerations only a pointer to the string is stored
in the debug feature. If you log a string that is freed afterwards, you will
get an OOPS when inspecting the debug feature, because then the debug feature
will access the already freed memory.
NOTE:
If using the sprintf view do NOT use other event/exception functions
than the sprintf-event and -exception functions.
The format of the hex_ascii and sprintf view is as follows:
- Number of area
- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
Universal Time (UTC), January 1, 1970)
- level of debug entry
- Exception flag (* = Exception)
- Cpu-Number of calling task
- Return Address to caller
- data field
The format of the raw view is:
- Header as described in debug.h
- datafield
A typical line of the hex_ascii view will look like the following (first line
is only for explanation and will not be displayed when 'cating' the view)::
area time level exception cpu caller data (hex + ascii)
--------------------------------------------------------------------------
00 00964419409:440690 1 - 00 88023fe
Defining views
--------------
Views are specified with the 'debug_view' structure. There are defined
callback functions which are used for reading and writing the debugfs files:
.. code-block:: c
struct debug_view {
char name[DEBUG_MAX_PROCF_LEN];
debug_prolog_proc_t* prolog_proc;
debug_header_proc_t* header_proc;
debug_format_proc_t* format_proc;
debug_input_proc_t* input_proc;
void* private_data;
};
where:
.. code-block:: c
typedef int (debug_header_proc_t) (debug_info_t* id,
struct debug_view* view,
int area,
debug_entry_t* entry,
char* out_buf);
typedef int (debug_format_proc_t) (debug_info_t* id,
struct debug_view* view, char* out_buf,
const char* in_buf);
typedef int (debug_prolog_proc_t) (debug_info_t* id,
struct debug_view* view,
char* out_buf);
typedef int (debug_input_proc_t) (debug_info_t* id,
struct debug_view* view,
struct file* file, const char* user_buf,
size_t in_buf_size, loff_t* offset);
The "private_data" member can be used as pointer to view specific data.
It is not used by the debug feature itself.
The output when reading a debugfs file is structured like this::
"prolog_proc output"
"header_proc output 1" "format_proc output 1"
"header_proc output 2" "format_proc output 2"
"header_proc output 3" "format_proc output 3"
...
When a view is read from the debugfs, the Debug Feature calls the
'prolog_proc' once for writing the prolog.
Then 'header_proc' and 'format_proc' are called for each
existing debug entry.
The input_proc can be used to implement functionality when it is written to
the view (e.g. like with ``echo "0" > /sys/kernel/debug/s390dbf/dasd/level``).
For header_proc there can be used the default function
:c:func:`debug_dflt_header_fn()` which is defined in debug.h.
and which produces the same header output as the predefined views.
E.g::
00 00964419409:440761 2 - 00 88023ec
In order to see how to use the callback functions check the implementation
of the default views!
Example:
.. code-block:: c
#include <asm/debug.h>
#define UNKNOWNSTR "data: %08x"
const char* messages[] =
{"This error...........\n",
"That error...........\n",
"Problem..............\n",
"Something went wrong.\n",
"Everything ok........\n",
NULL
};
static int debug_test_format_fn(
debug_info_t *id, struct debug_view *view,
char *out_buf, const char *in_buf
)
{
int i, rc = 0;
if (id->buf_size >= 4) {
int msg_nr = *((int*)in_buf);
if (msg_nr < sizeof(messages) / sizeof(char*) - 1)
rc += sprintf(out_buf, "%s", messages[msg_nr]);
else
rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
}
return rc;
}
struct debug_view debug_test_view = {
"myview", /* name of view */
NULL, /* no prolog */
&debug_dflt_header_fn, /* default header for each entry */
&debug_test_format_fn, /* our own format function */
NULL, /* no input function */
NULL /* no private data */
};
test:
=====
.. code-block:: c
debug_info_t *debug_info;
int i;
...
debug_info = debug_register("test", 0, 4, 4);
debug_register_view(debug_info, &debug_test_view);
for (i = 0; i < 10; i ++)
debug_int_event(debug_info, 1, i);
::
> cat /sys/kernel/debug/s390dbf/test/myview
00 00964419734:611402 1 - 00 88042ca This error...........
00 00964419734:611405 1 - 00 88042ca That error...........
00 00964419734:611408 1 - 00 88042ca Problem..............
00 00964419734:611411 1 - 00 88042ca Something went wrong.
00 00964419734:611414 1 - 00 88042ca Everything ok........
00 00964419734:611417 1 - 00 88042ca data: 00000005
00 00964419734:611419 1 - 00 88042ca data: 00000006
00 00964419734:611422 1 - 00 88042ca data: 00000007
00 00964419734:611425 1 - 00 88042ca data: 00000008
00 00964419734:611428 1 - 00 88042ca data: 00000009

View File

@ -1,667 +0,0 @@
S390 Debug Feature
==================
files: arch/s390/kernel/debug.c
arch/s390/include/asm/debug.h
Description:
------------
The goal of this feature is to provide a kernel debug logging API
where log records can be stored efficiently in memory, where each component
(e.g. device drivers) can have one separate debug log.
One purpose of this is to inspect the debug logs after a production system crash
in order to analyze the reason for the crash.
If the system still runs but only a subcomponent which uses dbf fails,
it is possible to look at the debug logs on a live system via the Linux
debugfs filesystem.
The debug feature may also very useful for kernel and driver development.
Design:
-------
Kernel components (e.g. device drivers) can register themselves at the debug
feature with the function call debug_register(). This function initializes a
debug log for the caller. For each debug log exists a number of debug areas
where exactly one is active at one time. Each debug area consists of contiguous
pages in memory. In the debug areas there are stored debug entries (log records)
which are written by event- and exception-calls.
An event-call writes the specified debug entry to the active debug
area and updates the log pointer for the active area. If the end
of the active debug area is reached, a wrap around is done (ring buffer)
and the next debug entry will be written at the beginning of the active
debug area.
An exception-call writes the specified debug entry to the log and
switches to the next debug area. This is done in order to be sure
that the records which describe the origin of the exception are not
overwritten when a wrap around for the current area occurs.
The debug areas themselves are also ordered in form of a ring buffer.
When an exception is thrown in the last debug area, the following debug
entries are then written again in the very first area.
There are three versions for the event- and exception-calls: One for
logging raw data, one for text and one for numbers.
Each debug entry contains the following data:
- Timestamp
- Cpu-Number of calling task
- Level of debug entry (0...6)
- Return Address to caller
- Flag, if entry is an exception or not
The debug logs can be inspected in a live system through entries in
the debugfs-filesystem. Under the toplevel directory "s390dbf" there is
a directory for each registered component, which is named like the
corresponding component. The debugfs normally should be mounted to
/sys/kernel/debug therefore the debug feature can be accessed under
/sys/kernel/debug/s390dbf.
The content of the directories are files which represent different views
to the debug log. Each component can decide which views should be
used through registering them with the function debug_register_view().
Predefined views for hex/ascii, sprintf and raw binary data are provided.
It is also possible to define other views. The content of
a view can be inspected simply by reading the corresponding debugfs file.
All debug logs have an actual debug level (range from 0 to 6).
The default level is 3. Event and Exception functions have a 'level'
parameter. Only debug entries with a level that is lower or equal
than the actual level are written to the log. This means, when
writing events, high priority log entries should have a low level
value whereas low priority entries should have a high one.
The actual debug level can be changed with the help of the debugfs-filesystem
through writing a number string "x" to the 'level' debugfs file which is
provided for every debug log. Debugging can be switched off completely
by using "-" on the 'level' debugfs file.
Example:
> echo "-" > /sys/kernel/debug/s390dbf/dasd/level
It is also possible to deactivate the debug feature globally for every
debug log. You can change the behavior using 2 sysctl parameters in
/proc/sys/s390dbf:
There are currently 2 possible triggers, which stop the debug feature
globally. The first possibility is to use the "debug_active" sysctl. If
set to 1 the debug feature is running. If "debug_active" is set to 0 the
debug feature is turned off.
The second trigger which stops the debug feature is a kernel oops.
That prevents the debug feature from overwriting debug information that
happened before the oops. After an oops you can reactivate the debug feature
by piping 1 to /proc/sys/s390dbf/debug_active. Nevertheless, its not
suggested to use an oopsed kernel in a production environment.
If you want to disallow the deactivation of the debug feature, you can use
the "debug_stoppable" sysctl. If you set "debug_stoppable" to 0 the debug
feature cannot be stopped. If the debug feature is already stopped, it
will stay deactivated.
Kernel Interfaces:
------------------
----------------------------------------------------------------------------
debug_info_t *debug_register(char *name, int pages, int nr_areas,
int buf_size);
Parameter: name: Name of debug log (e.g. used for debugfs entry)
pages: number of pages, which will be allocated per area
nr_areas: number of debug areas
buf_size: size of data area in each debug entry
Return Value: Handle for generated debug area
NULL if register failed
Description: Allocates memory for a debug log
Must not be called within an interrupt handler
----------------------------------------------------------------------------
debug_info_t *debug_register_mode(char *name, int pages, int nr_areas,
int buf_size, mode_t mode, uid_t uid,
gid_t gid);
Parameter: name: Name of debug log (e.g. used for debugfs entry)
pages: Number of pages, which will be allocated per area
nr_areas: Number of debug areas
buf_size: Size of data area in each debug entry
mode: File mode for debugfs files. E.g. S_IRWXUGO
uid: User ID for debugfs files. Currently only 0 is
supported.
gid: Group ID for debugfs files. Currently only 0 is
supported.
Return Value: Handle for generated debug area
NULL if register failed
Description: Allocates memory for a debug log
Must not be called within an interrupt handler
---------------------------------------------------------------------------
void debug_unregister (debug_info_t * id);
Parameter: id: handle for debug log
Return Value: none
Description: frees memory for a debug log and removes all registered debug
views.
Must not be called within an interrupt handler
---------------------------------------------------------------------------
void debug_set_level (debug_info_t * id, int new_level);
Parameter: id: handle for debug log
new_level: new debug level
Return Value: none
Description: Sets new actual debug level if new_level is valid.
---------------------------------------------------------------------------
bool debug_level_enabled (debug_info_t * id, int level);
Parameter: id: handle for debug log
level: debug level
Return Value: True if level is less or equal to the current debug level.
Description: Returns true if debug events for the specified level would be
logged. Otherwise returns false.
---------------------------------------------------------------------------
void debug_stop_all(void);
Parameter: none
Return Value: none
Description: stops the debug feature if stopping is allowed. Currently
used in case of a kernel oops.
---------------------------------------------------------------------------
debug_entry_t* debug_event (debug_info_t* id, int level, void* data,
int length);
Parameter: id: handle for debug log
level: debug level
data: pointer to data for debug entry
length: length of data in bytes
Return Value: Address of written debug entry
Description: writes debug entry to active debug area (if level <= actual
debug level)
---------------------------------------------------------------------------
debug_entry_t* debug_int_event (debug_info_t * id, int level,
unsigned int data);
debug_entry_t* debug_long_event(debug_info_t * id, int level,
unsigned long data);
Parameter: id: handle for debug log
level: debug level
data: integer value for debug entry
Return Value: Address of written debug entry
Description: writes debug entry to active debug area (if level <= actual
debug level)
---------------------------------------------------------------------------
debug_entry_t* debug_text_event (debug_info_t * id, int level,
const char* data);
Parameter: id: handle for debug log
level: debug level
data: string for debug entry
Return Value: Address of written debug entry
Description: writes debug entry in ascii format to active debug area
(if level <= actual debug level)
---------------------------------------------------------------------------
debug_entry_t* debug_sprintf_event (debug_info_t * id, int level,
char* string,...);
Parameter: id: handle for debug log
level: debug level
string: format string for debug entry
...: varargs used as in sprintf()
Return Value: Address of written debug entry
Description: writes debug entry with format string and varargs (longs) to
active debug area (if level $<=$ actual debug level).
floats and long long datatypes cannot be used as varargs.
---------------------------------------------------------------------------
debug_entry_t* debug_exception (debug_info_t* id, int level, void* data,
int length);
Parameter: id: handle for debug log
level: debug level
data: pointer to data for debug entry
length: length of data in bytes
Return Value: Address of written debug entry
Description: writes debug entry to active debug area (if level <= actual
debug level) and switches to next debug area
---------------------------------------------------------------------------
debug_entry_t* debug_int_exception (debug_info_t * id, int level,
unsigned int data);
debug_entry_t* debug_long_exception(debug_info_t * id, int level,
unsigned long data);
Parameter: id: handle for debug log
level: debug level
data: integer value for debug entry
Return Value: Address of written debug entry
Description: writes debug entry to active debug area (if level <= actual
debug level) and switches to next debug area
---------------------------------------------------------------------------
debug_entry_t* debug_text_exception (debug_info_t * id, int level,
const char* data);
Parameter: id: handle for debug log
level: debug level
data: string for debug entry
Return Value: Address of written debug entry
Description: writes debug entry in ascii format to active debug area
(if level <= actual debug level) and switches to next debug
area
---------------------------------------------------------------------------
debug_entry_t* debug_sprintf_exception (debug_info_t * id, int level,
char* string,...);
Parameter: id: handle for debug log
level: debug level
string: format string for debug entry
...: varargs used as in sprintf()
Return Value: Address of written debug entry
Description: writes debug entry with format string and varargs (longs) to
active debug area (if level $<=$ actual debug level) and
switches to next debug area.
floats and long long datatypes cannot be used as varargs.
---------------------------------------------------------------------------
int debug_register_view (debug_info_t * id, struct debug_view *view);
Parameter: id: handle for debug log
view: pointer to debug view struct
Return Value: 0 : ok
< 0: Error
Description: registers new debug view and creates debugfs dir entry
---------------------------------------------------------------------------
int debug_unregister_view (debug_info_t * id, struct debug_view *view);
Parameter: id: handle for debug log
view: pointer to debug view struct
Return Value: 0 : ok
< 0: Error
Description: unregisters debug view and removes debugfs dir entry
Predefined views:
-----------------
extern struct debug_view debug_hex_ascii_view;
extern struct debug_view debug_raw_view;
extern struct debug_view debug_sprintf_view;
Examples
--------
/*
* hex_ascii- + raw-view Example
*/
#include <linux/init.h>
#include <asm/debug.h>
static debug_info_t* debug_info;
static int init(void)
{
/* register 4 debug areas with one page each and 4 byte data field */
debug_info = debug_register ("test", 1, 4, 4 );
debug_register_view(debug_info,&debug_hex_ascii_view);
debug_register_view(debug_info,&debug_raw_view);
debug_text_event(debug_info, 4 , "one ");
debug_int_exception(debug_info, 4, 4711);
debug_event(debug_info, 3, &debug_info, 4);
return 0;
}
static void cleanup(void)
{
debug_unregister (debug_info);
}
module_init(init);
module_exit(cleanup);
---------------------------------------------------------------------------
/*
* sprintf-view Example
*/
#include <linux/init.h>
#include <asm/debug.h>
static debug_info_t* debug_info;
static int init(void)
{
/* register 4 debug areas with one page each and data field for */
/* format string pointer + 2 varargs (= 3 * sizeof(long)) */
debug_info = debug_register ("test", 1, 4, sizeof(long) * 3);
debug_register_view(debug_info,&debug_sprintf_view);
debug_sprintf_event(debug_info, 2 , "first event in %s:%i\n",__FILE__,__LINE__);
debug_sprintf_exception(debug_info, 1, "pointer to debug info: %p\n",&debug_info);
return 0;
}
static void cleanup(void)
{
debug_unregister (debug_info);
}
module_init(init);
module_exit(cleanup);
Debugfs Interface
----------------
Views to the debug logs can be investigated through reading the corresponding
debugfs-files:
Example:
> ls /sys/kernel/debug/s390dbf/dasd
flush hex_ascii level pages raw
> cat /sys/kernel/debug/s390dbf/dasd/hex_ascii | sort -k2,2 -s
00 00974733272:680099 2 - 02 0006ad7e 07 ea 4a 90 | ....
00 00974733272:682210 2 - 02 0006ade6 46 52 45 45 | FREE
00 00974733272:682213 2 - 02 0006adf6 07 ea 4a 90 | ....
00 00974733272:682281 1 * 02 0006ab08 41 4c 4c 43 | EXCP
01 00974733272:682284 2 - 02 0006ab16 45 43 4b 44 | ECKD
01 00974733272:682287 2 - 02 0006ab28 00 00 00 04 | ....
01 00974733272:682289 2 - 02 0006ab3e 00 00 00 20 | ...
01 00974733272:682297 2 - 02 0006ad7e 07 ea 4a 90 | ....
01 00974733272:684384 2 - 00 0006ade6 46 52 45 45 | FREE
01 00974733272:684388 2 - 00 0006adf6 07 ea 4a 90 | ....
See section about predefined views for explanation of the above output!
Changing the debug level
------------------------
Example:
> cat /sys/kernel/debug/s390dbf/dasd/level
3
> echo "5" > /sys/kernel/debug/s390dbf/dasd/level
> cat /sys/kernel/debug/s390dbf/dasd/level
5
Flushing debug areas
--------------------
Debug areas can be flushed with piping the number of the desired
area (0...n) to the debugfs file "flush". When using "-" all debug areas
are flushed.
Examples:
1. Flush debug area 0:
> echo "0" > /sys/kernel/debug/s390dbf/dasd/flush
2. Flush all debug areas:
> echo "-" > /sys/kernel/debug/s390dbf/dasd/flush
Changing the size of debug areas
------------------------------------
It is possible the change the size of debug areas through piping
the number of pages to the debugfs file "pages". The resize request will
also flush the debug areas.
Example:
Define 4 pages for the debug areas of debug feature "dasd":
> echo "4" > /sys/kernel/debug/s390dbf/dasd/pages
Stooping the debug feature
--------------------------
Example:
1. Check if stopping is allowed
> cat /proc/sys/s390dbf/debug_stoppable
2. Stop debug feature
> echo 0 > /proc/sys/s390dbf/debug_active
lcrash Interface
----------------
It is planned that the dump analysis tool lcrash gets an additional command
's390dbf' to display all the debug logs. With this tool it will be possible
to investigate the debug logs on a live system and with a memory dump after
a system crash.
Investigating raw memory
------------------------
One last possibility to investigate the debug logs at a live
system and after a system crash is to look at the raw memory
under VM or at the Service Element.
It is possible to find the anker of the debug-logs through
the 'debug_area_first' symbol in the System map. Then one has
to follow the correct pointers of the data-structures defined
in debug.h and find the debug-areas in memory.
Normally modules which use the debug feature will also have
a global variable with the pointer to the debug-logs. Following
this pointer it will also be possible to find the debug logs in
memory.
For this method it is recommended to use '16 * x + 4' byte (x = 0..n)
for the length of the data field in debug_register() in
order to see the debug entries well formatted.
Predefined Views
----------------
There are three predefined views: hex_ascii, raw and sprintf.
The hex_ascii view shows the data field in hex and ascii representation
(e.g. '45 43 4b 44 | ECKD').
The raw view returns a bytestream as the debug areas are stored in memory.
The sprintf view formats the debug entries in the same way as the sprintf
function would do. The sprintf event/exception functions write to the
debug entry a pointer to the format string (size = sizeof(long))
and for each vararg a long value. So e.g. for a debug entry with a format
string plus two varargs one would need to allocate a (3 * sizeof(long))
byte data area in the debug_register() function.
IMPORTANT: Using "%s" in sprintf event functions is dangerous. You can only
use "%s" in the sprintf event functions, if the memory for the passed string is
available as long as the debug feature exists. The reason behind this is that
due to performance considerations only a pointer to the string is stored in
the debug feature. If you log a string that is freed afterwards, you will get
an OOPS when inspecting the debug feature, because then the debug feature will
access the already freed memory.
NOTE: If using the sprintf view do NOT use other event/exception functions
than the sprintf-event and -exception functions.
The format of the hex_ascii and sprintf view is as follows:
- Number of area
- Timestamp (formatted as seconds and microseconds since 00:00:00 Coordinated
Universal Time (UTC), January 1, 1970)
- level of debug entry
- Exception flag (* = Exception)
- Cpu-Number of calling task
- Return Address to caller
- data field
The format of the raw view is:
- Header as described in debug.h
- datafield
A typical line of the hex_ascii view will look like the following (first line
is only for explanation and will not be displayed when 'cating' the view):
area time level exception cpu caller data (hex + ascii)
--------------------------------------------------------------------------
00 00964419409:440690 1 - 00 88023fe
Defining views
--------------
Views are specified with the 'debug_view' structure. There are defined
callback functions which are used for reading and writing the debugfs files:
struct debug_view {
char name[DEBUG_MAX_PROCF_LEN];
debug_prolog_proc_t* prolog_proc;
debug_header_proc_t* header_proc;
debug_format_proc_t* format_proc;
debug_input_proc_t* input_proc;
void* private_data;
};
where
typedef int (debug_header_proc_t) (debug_info_t* id,
struct debug_view* view,
int area,
debug_entry_t* entry,
char* out_buf);
typedef int (debug_format_proc_t) (debug_info_t* id,
struct debug_view* view, char* out_buf,
const char* in_buf);
typedef int (debug_prolog_proc_t) (debug_info_t* id,
struct debug_view* view,
char* out_buf);
typedef int (debug_input_proc_t) (debug_info_t* id,
struct debug_view* view,
struct file* file, const char* user_buf,
size_t in_buf_size, loff_t* offset);
The "private_data" member can be used as pointer to view specific data.
It is not used by the debug feature itself.
The output when reading a debugfs file is structured like this:
"prolog_proc output"
"header_proc output 1" "format_proc output 1"
"header_proc output 2" "format_proc output 2"
"header_proc output 3" "format_proc output 3"
...
When a view is read from the debugfs, the Debug Feature calls the
'prolog_proc' once for writing the prolog.
Then 'header_proc' and 'format_proc' are called for each
existing debug entry.
The input_proc can be used to implement functionality when it is written to
the view (e.g. like with 'echo "0" > /sys/kernel/debug/s390dbf/dasd/level).
For header_proc there can be used the default function
debug_dflt_header_fn() which is defined in debug.h.
and which produces the same header output as the predefined views.
E.g:
00 00964419409:440761 2 - 00 88023ec
In order to see how to use the callback functions check the implementation
of the default views!
Example
#include <asm/debug.h>
#define UNKNOWNSTR "data: %08x"
const char* messages[] =
{"This error...........\n",
"That error...........\n",
"Problem..............\n",
"Something went wrong.\n",
"Everything ok........\n",
NULL
};
static int debug_test_format_fn(
debug_info_t * id, struct debug_view *view,
char *out_buf, const char *in_buf
)
{
int i, rc = 0;
if(id->buf_size >= 4) {
int msg_nr = *((int*)in_buf);
if(msg_nr < sizeof(messages)/sizeof(char*) - 1)
rc += sprintf(out_buf, "%s", messages[msg_nr]);
else
rc += sprintf(out_buf, UNKNOWNSTR, msg_nr);
}
out:
return rc;
}
struct debug_view debug_test_view = {
"myview", /* name of view */
NULL, /* no prolog */
&debug_dflt_header_fn, /* default header for each entry */
&debug_test_format_fn, /* our own format function */
NULL, /* no input function */
NULL /* no private data */
};
=====
test:
=====
debug_info_t *debug_info;
...
debug_info = debug_register ("test", 0, 4, 4 ));
debug_register_view(debug_info, &debug_test_view);
for(i = 0; i < 10; i ++) debug_int_event(debug_info, 1, i);
> cat /sys/kernel/debug/s390dbf/test/myview
00 00964419734:611402 1 - 00 88042ca This error...........
00 00964419734:611405 1 - 00 88042ca That error...........
00 00964419734:611408 1 - 00 88042ca Problem..............
00 00964419734:611411 1 - 00 88042ca Something went wrong.
00 00964419734:611414 1 - 00 88042ca Everything ok........
00 00964419734:611417 1 - 00 88042ca data: 00000005
00 00964419734:611419 1 - 00 88042ca data: 00000006
00 00964419734:611422 1 - 00 88042ca data: 00000007
00 00964419734:611425 1 - 00 88042ca data: 00000008
00 00964419734:611428 1 - 00 88042ca data: 00000009

View File

@ -0,0 +1,11 @@
ibm 3270 changelog
------------------
.. include:: 3270.ChangeLog
:literal:
ibm 3270 config3270.sh
----------------------
.. literalinclude:: config3270.sh
:language: shell

View File

@ -1,4 +1,9 @@
Introduction:
===============================
Adjunct Processor (AP) facility
===============================
Introduction
============
The Adjunct Processor (AP) facility is an IBM Z cryptographic facility comprised
of three AP instructions and from 1 up to 256 PCIe cryptographic adapter cards.
@ -11,7 +16,7 @@ framework. This implementation relies considerably on the s390 virtualization
facilities which do most of the hard work of providing direct access to AP
devices.
AP Architectural Overview:
AP Architectural Overview
=========================
To facilitate the comprehension of the design, let's start with some
definitions:
@ -31,13 +36,13 @@ definitions:
in the LPAR, the AP bus detects the AP adapter cards assigned to the LPAR and
creates a sysfs device for each assigned adapter. For example, if AP adapters
4 and 10 (0x0a) are assigned to the LPAR, the AP bus will create the following
sysfs device entries:
sysfs device entries::
/sys/devices/ap/card04
/sys/devices/ap/card0a
Symbolic links to these devices will also be created in the AP bus devices
sub-directory:
sub-directory::
/sys/bus/ap/devices/[card04]
/sys/bus/ap/devices/[card04]
@ -84,7 +89,7 @@ definitions:
the cross product of the AP adapter and usage domain numbers detected when the
AP bus module is loaded. For example, if adapters 4 and 10 (0x0a) and usage
domains 6 and 71 (0x47) are assigned to the LPAR, the AP bus will create the
following sysfs entries:
following sysfs entries::
/sys/devices/ap/card04/04.0006
/sys/devices/ap/card04/04.0047
@ -92,7 +97,7 @@ definitions:
/sys/devices/ap/card0a/0a.0047
The following symbolic links to these devices will be created in the AP bus
devices subdirectory:
devices subdirectory::
/sys/bus/ap/devices/[04.0006]
/sys/bus/ap/devices/[04.0047]
@ -112,7 +117,7 @@ definitions:
domain that is not one of the usage domains, but the modified domain
must be one of the control domains.
AP and SIE:
AP and SIE
==========
Let's now take a look at how AP instructions executed on a guest are interpreted
by the hardware.
@ -153,7 +158,7 @@ and 2 and usage domains 5 and 6 are assigned to a guest, the APQNs (1,5), (1,6),
The APQNs can provide secure key functionality - i.e., a private key is stored
on the adapter card for each of its domains - so each APQN must be assigned to
at most one guest or to the linux host.
at most one guest or to the linux host::
Example 1: Valid configuration:
------------------------------
@ -181,8 +186,8 @@ at most one guest or to the linux host.
This is an invalid configuration because both guests have access to
APQN (1,6).
The Design:
===========
The Design
==========
The design introduces three new objects:
1. AP matrix device
@ -205,43 +210,43 @@ The VFIO AP (vfio_ap) device driver serves the following purposes:
Reserve APQNs for exclusive use of KVM guests
---------------------------------------------
The following block diagram illustrates the mechanism by which APQNs are
reserved:
reserved::
+------------------+
7 remove | |
+--------------------> cex4queue driver |
| | |
| +------------------+
|
|
| +------------------+ +-----------------+
| 5 register driver | | 3 create | |
| +----------------> Device core +----------> matrix device |
| | | | | |
| | +--------^---------+ +-----------------+
| | |
| | +-------------------+
| | +-----------------------------------+ |
| | | 4 register AP driver | | 2 register device
| | | | |
+--------+---+-v---+ +--------+-------+-+
| | | |
| ap_bus +--------------------- > vfio_ap driver |
| | 8 probe | |
+--------^---------+ +--^--^------------+
6 edit | | |
apmask | +-----------------------------+ | 9 mdev create
aqmask | | 1 modprobe |
+--------+-----+---+ +----------------+-+ +------------------+
| | | |8 create | mediated |
| admin | | VFIO device core |---------> matrix |
| + | | | device |
+------+-+---------+ +--------^---------+ +--------^---------+
| | | |
| | 9 create vfio_ap-passthrough | |
| +------------------------------+ |
+-------------------------------------------------------------+
10 assign adapter/domain/control domain
+------------------+
7 remove | |
+--------------------> cex4queue driver |
| | |
| +------------------+
|
|
| +------------------+ +----------------+
| 5 register driver | | 3 create | |
| +----------------> Device core +----------> matrix device |
| | | | | |
| | +--------^---------+ +----------------+
| | |
| | +-------------------+
| | +-----------------------------------+ |
| | | 4 register AP driver | | 2 register device
| | | | |
+--------+---+-v---+ +--------+-------+-+
| | | |
| ap_bus +--------------------- > vfio_ap driver |
| | 8 probe | |
+--------^---------+ +--^--^------------+
6 edit | | |
apmask | +-----------------------------+ | 9 mdev create
aqmask | | 1 modprobe |
+--------+-----+---+ +----------------+-+ +----------------+
| | | |8 create | mediated |
| admin | | VFIO device core |---------> matrix |
| + | | | device |
+------+-+---------+ +--------^---------+ +--------^-------+
| | | |
| | 9 create vfio_ap-passthrough | |
| +------------------------------+ |
+-------------------------------------------------------------+
10 assign adapter/domain/control domain
The process for reserving an AP queue for use by a KVM guest is:
@ -250,7 +255,7 @@ The process for reserving an AP queue for use by a KVM guest is:
device with the device core. This will serve as the parent device for
all mediated matrix devices used to configure an AP matrix for a guest.
3. The /sys/devices/vfio_ap/matrix device is created by the device core
4 The vfio_ap device driver will register with the AP bus for AP queue devices
4. The vfio_ap device driver will register with the AP bus for AP queue devices
of type 10 and higher (CEX4 and newer). The driver will provide the vfio_ap
driver's probe and remove callback interfaces. Devices older than CEX4 queues
are not supported to simplify the implementation by not needlessly
@ -266,13 +271,14 @@ The process for reserving an AP queue for use by a KVM guest is:
it.
9. The administrator creates a passthrough type mediated matrix device to be
used by a guest
10 The administrator assigns the adapters, usage domains and control domains
to be exclusively used by a guest.
10. The administrator assigns the adapters, usage domains and control domains
to be exclusively used by a guest.
Set up the VFIO mediated device interfaces
------------------------------------------
The VFIO AP device driver utilizes the common interface of the VFIO mediated
device core driver to:
* Register an AP mediated bus driver to add a mediated matrix device to and
remove it from a VFIO group.
* Create and destroy a mediated matrix device
@ -280,25 +286,25 @@ device core driver to:
* Add a mediated matrix device to and remove it from an IOMMU group
The following high-level block diagram shows the main components and interfaces
of the VFIO AP mediated matrix device driver:
of the VFIO AP mediated matrix device driver::
+-------------+
| |
| +---------+ | mdev_register_driver() +--------------+
| | Mdev | +<-----------------------+ |
| | bus | | | vfio_mdev.ko |
| | driver | +----------------------->+ |<-> VFIO user
| +---------+ | probe()/remove() +--------------+ APIs
| |
| MDEV CORE |
| MODULE |
| mdev.ko |
| +---------+ | mdev_register_device() +--------------+
| |Physical | +<-----------------------+ |
| | device | | | vfio_ap.ko |<-> matrix
| |interface| +----------------------->+ | device
| +---------+ | callback +--------------+
+-------------+
+-------------+
| |
| +---------+ | mdev_register_driver() +--------------+
| | Mdev | +<-----------------------+ |
| | bus | | | vfio_mdev.ko |
| | driver | +----------------------->+ |<-> VFIO user
| +---------+ | probe()/remove() +--------------+ APIs
| |
| MDEV CORE |
| MODULE |
| mdev.ko |
| +---------+ | mdev_register_device() +--------------+
| |Physical | +<-----------------------+ |
| | device | | | vfio_ap.ko |<-> matrix
| |interface| +----------------------->+ | device
| +---------+ | callback +--------------+
+-------------+
During initialization of the vfio_ap module, the matrix device is registered
with an 'mdev_parent_ops' structure that provides the sysfs attribute
@ -306,7 +312,8 @@ structures, mdev functions and callback interfaces for managing the mediated
matrix device.
* sysfs attribute structures:
* supported_type_groups
supported_type_groups
The VFIO mediated device framework supports creation of user-defined
mediated device types. These mediated device types are specified
via the 'supported_type_groups' structure when a device is registered
@ -318,61 +325,72 @@ matrix device.
The VFIO AP device driver will register one mediated device type for
passthrough devices:
/sys/devices/vfio_ap/matrix/mdev_supported_types/vfio_ap-passthrough
Only the read-only attributes required by the VFIO mdev framework will
be provided:
... name
... device_api
... available_instances
... device_api
Where:
* name: specifies the name of the mediated device type
* device_api: the mediated device type's API
* available_instances: the number of mediated matrix passthrough devices
that can be created
* device_api: specifies the VFIO API
* mdev_attr_groups
be provided::
... name
... device_api
... available_instances
... device_api
Where:
* name:
specifies the name of the mediated device type
* device_api:
the mediated device type's API
* available_instances:
the number of mediated matrix passthrough devices
that can be created
* device_api:
specifies the VFIO API
mdev_attr_groups
This attribute group identifies the user-defined sysfs attributes of the
mediated device. When a device is registered with the VFIO mediated device
framework, the sysfs attribute files identified in the 'mdev_attr_groups'
structure will be created in the mediated matrix device's directory. The
sysfs attributes for a mediated matrix device are:
* assign_adapter:
* unassign_adapter:
assign_adapter / unassign_adapter:
Write-only attributes for assigning/unassigning an AP adapter to/from the
mediated matrix device. To assign/unassign an adapter, the APID of the
adapter is echoed to the respective attribute file.
* assign_domain:
* unassign_domain:
assign_domain / unassign_domain:
Write-only attributes for assigning/unassigning an AP usage domain to/from
the mediated matrix device. To assign/unassign a domain, the domain
number of the the usage domain is echoed to the respective attribute
file.
* matrix:
matrix:
A read-only file for displaying the APQNs derived from the cross product
of the adapter and domain numbers assigned to the mediated matrix device.
* assign_control_domain:
* unassign_control_domain:
assign_control_domain / unassign_control_domain:
Write-only attributes for assigning/unassigning an AP control domain
to/from the mediated matrix device. To assign/unassign a control domain,
the ID of the domain to be assigned/unassigned is echoed to the respective
attribute file.
* control_domains:
control_domains:
A read-only file for displaying the control domain numbers assigned to the
mediated matrix device.
* functions:
* create:
create:
allocates the ap_matrix_mdev structure used by the vfio_ap driver to:
* Store the reference to the KVM structure for the guest using the mdev
* Store the AP matrix configuration for the adapters, domains, and control
domains assigned via the corresponding sysfs attributes files
* remove:
remove:
deallocates the mediated matrix device's ap_matrix_mdev structure. This will
be allowed only if a running guest is not using the mdev.
* callback interfaces
* open:
open:
The vfio_ap driver uses this callback to register a
VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the mdev matrix
device. The open is invoked when QEMU connects the VFIO iommu group
@ -380,16 +398,17 @@ matrix device.
to configure the KVM guest is provided via this callback. The KVM structure,
is used to configure the guest's access to the AP matrix defined via the
mediated matrix device's sysfs attribute files.
* release:
release:
unregisters the VFIO_GROUP_NOTIFY_SET_KVM notifier callback function for the
mdev matrix device and deconfigures the guest's AP matrix.
Configure the APM, AQM and ADM in the CRYCB:
Configure the APM, AQM and ADM in the CRYCB
-------------------------------------------
Configuring the AP matrix for a KVM guest will be performed when the
VFIO_GROUP_NOTIFY_SET_KVM notifier callback is invoked. The notifier
function is called when QEMU connects to KVM. The guest's AP matrix is
configured via it's CRYCB by:
* Setting the bits in the APM corresponding to the APIDs assigned to the
mediated matrix device via its 'assign_adapter' interface.
* Setting the bits in the AQM corresponding to the domains assigned to the
@ -418,12 +437,12 @@ available to a KVM guest via the following CPU model features:
Note: If the user chooses to specify a CPU model different than the 'host'
model to QEMU, the CPU model features and facilities need to be turned on
explicitly; for example:
explicitly; for example::
/usr/bin/qemu-system-s390x ... -cpu z13,ap=on,apqci=on,apft=on
A guest can be precluded from using AP features/facilities by turning them off
explicitly; for example:
explicitly; for example::
/usr/bin/qemu-system-s390x ... -cpu host,ap=off,apqci=off,apft=off
@ -435,7 +454,7 @@ the APFT facility is not installed on the guest, then the probe of device
drivers will fail since only type 10 and newer devices can be configured for
guest use.
Example:
Example
=======
Let's now provide an example to illustrate how KVM guests may be given
access to AP facilities. For this example, we will show how to configure
@ -444,30 +463,36 @@ look like this:
Guest1
------
=========== ===== ============
CARD.DOMAIN TYPE MODE
------------------------------
=========== ===== ============
05 CEX5C CCA-Coproc
05.0004 CEX5C CCA-Coproc
05.00ab CEX5C CCA-Coproc
06 CEX5A Accelerator
06.0004 CEX5A Accelerator
06.00ab CEX5C CCA-Coproc
=========== ===== ============
Guest2
------
=========== ===== ============
CARD.DOMAIN TYPE MODE
------------------------------
=========== ===== ============
05 CEX5A Accelerator
05.0047 CEX5A Accelerator
05.00ff CEX5A Accelerator
=========== ===== ============
Guest2
------
=========== ===== ============
CARD.DOMAIN TYPE MODE
------------------------------
=========== ===== ============
06 CEX5A Accelerator
06.0047 CEX5A Accelerator
06.00ff CEX5A Accelerator
=========== ===== ============
These are the steps:
@ -492,25 +517,26 @@ These are the steps:
* VFIO_MDEV_DEVICE
* KVM
If using make menuconfig select the following to build the vfio_ap module:
-> Device Drivers
-> IOMMU Hardware Support
select S390 AP IOMMU Support
-> VFIO Non-Privileged userspace driver framework
-> Mediated device driver frramework
-> VFIO driver for Mediated devices
-> I/O subsystem
-> VFIO support for AP devices
If using make menuconfig select the following to build the vfio_ap module::
-> Device Drivers
-> IOMMU Hardware Support
select S390 AP IOMMU Support
-> VFIO Non-Privileged userspace driver framework
-> Mediated device driver frramework
-> VFIO driver for Mediated devices
-> I/O subsystem
-> VFIO support for AP devices
2. Secure the AP queues to be used by the three guests so that the host can not
access them. To secure them, there are two sysfs files that specify
bitmasks marking a subset of the APQN range as 'usable by the default AP
queue device drivers' or 'not usable by the default device drivers' and thus
available for use by the vfio_ap device driver'. The location of the sysfs
files containing the masks are:
files containing the masks are::
/sys/bus/ap/apmask
/sys/bus/ap/aqmask
/sys/bus/ap/apmask
/sys/bus/ap/aqmask
The 'apmask' is a 256-bit mask that identifies a set of AP adapter IDs
(APID). Each bit in the mask, from left to right (i.e., from most significant
@ -526,7 +552,7 @@ These are the steps:
queue device drivers; otherwise, the APQI is usable by the vfio_ap device
driver.
Take, for example, the following mask:
Take, for example, the following mask::
0x7dffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
@ -548,68 +574,70 @@ These are the steps:
respective sysfs mask file in one of two formats:
* An absolute hex string starting with 0x - like "0x12345678" - sets
the mask. If the given string is shorter than the mask, it is padded
with 0s on the right; for example, specifying a mask value of 0x41 is
the same as specifying:
the mask. If the given string is shorter than the mask, it is padded
with 0s on the right; for example, specifying a mask value of 0x41 is
the same as specifying::
0x4100000000000000000000000000000000000000000000000000000000000000
0x4100000000000000000000000000000000000000000000000000000000000000
Keep in mind that the mask reads from left to right (i.e., most
significant to least significant bit in big endian order), so the mask
above identifies device numbers 1 and 7 (01000001).
Keep in mind that the mask reads from left to right (i.e., most
significant to least significant bit in big endian order), so the mask
above identifies device numbers 1 and 7 (01000001).
If the string is longer than the mask, the operation is terminated with
an error (EINVAL).
If the string is longer than the mask, the operation is terminated with
an error (EINVAL).
* Individual bits in the mask can be switched on and off by specifying
each bit number to be switched in a comma separated list. Each bit
number string must be prepended with a ('+') or minus ('-') to indicate
the corresponding bit is to be switched on ('+') or off ('-'). Some
valid values are:
each bit number to be switched in a comma separated list. Each bit
number string must be prepended with a ('+') or minus ('-') to indicate
the corresponding bit is to be switched on ('+') or off ('-'). Some
valid values are:
"+0" switches bit 0 on
"-13" switches bit 13 off
"+0x41" switches bit 65 on
"-0xff" switches bit 255 off
- "+0" switches bit 0 on
- "-13" switches bit 13 off
- "+0x41" switches bit 65 on
- "-0xff" switches bit 255 off
The following example:
+0,-6,+0x47,-0xf0
The following example:
Switches bits 0 and 71 (0x47) on
Switches bits 6 and 240 (0xf0) off
+0,-6,+0x47,-0xf0
Note that the bits not specified in the list remain as they were before
the operation.
Switches bits 0 and 71 (0x47) on
Switches bits 6 and 240 (0xf0) off
Note that the bits not specified in the list remain as they were before
the operation.
2. The masks can also be changed at boot time via parameters on the kernel
command line like this:
ap.apmask=0xffff ap.aqmask=0x40
ap.apmask=0xffff ap.aqmask=0x40
This would create the following masks:
This would create the following masks::
apmask:
0xffff000000000000000000000000000000000000000000000000000000000000
apmask:
0xffff000000000000000000000000000000000000000000000000000000000000
aqmask:
0x4000000000000000000000000000000000000000000000000000000000000000
aqmask:
0x4000000000000000000000000000000000000000000000000000000000000000
Resulting in these two pools:
Resulting in these two pools::
default drivers pool: adapter 0-15, domain 1
alternate drivers pool: adapter 16-255, domains 0, 2-255
default drivers pool: adapter 0-15, domain 1
alternate drivers pool: adapter 16-255, domains 0, 2-255
Securing the APQNs for our example:
----------------------------------
Securing the APQNs for our example
----------------------------------
To secure the AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004, 06.0047,
06.00ab, and 06.00ff for use by the vfio_ap device driver, the corresponding
APQNs can either be removed from the default masks:
APQNs can either be removed from the default masks::
echo -5,-6 > /sys/bus/ap/apmask
echo -4,-0x47,-0xab,-0xff > /sys/bus/ap/aqmask
Or the masks can be set as follows:
Or the masks can be set as follows::
echo 0xf9ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff \
> apmask
@ -620,19 +648,19 @@ These are the steps:
This will result in AP queues 05.0004, 05.0047, 05.00ab, 05.00ff, 06.0004,
06.0047, 06.00ab, and 06.00ff getting bound to the vfio_ap device driver. The
sysfs directory for the vfio_ap device driver will now contain symbolic links
to the AP queue devices bound to it:
to the AP queue devices bound to it::
/sys/bus/ap
... [drivers]
...... [vfio_ap]
......... [05.0004]
......... [05.0047]
......... [05.00ab]
......... [05.00ff]
......... [06.0004]
......... [06.0047]
......... [06.00ab]
......... [06.00ff]
/sys/bus/ap
... [drivers]
...... [vfio_ap]
......... [05.0004]
......... [05.0047]
......... [05.00ab]
......... [05.00ff]
......... [06.0004]
......... [06.0047]
......... [06.00ab]
......... [06.00ff]
Keep in mind that only type 10 and newer adapters (i.e., CEX4 and later)
can be bound to the vfio_ap device driver. The reason for this is to
@ -645,96 +673,96 @@ These are the steps:
queue device can be read from the parent card's sysfs directory. For example,
to see the hardware type of the queue 05.0004:
cat /sys/bus/ap/devices/card05/hwtype
cat /sys/bus/ap/devices/card05/hwtype
The hwtype must be 10 or higher (CEX4 or newer) in order to be bound to the
vfio_ap device driver.
3. Create the mediated devices needed to configure the AP matrixes for the
three guests and to provide an interface to the vfio_ap driver for
use by the guests:
use by the guests::
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
--------- create
--------- [devices]
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
------ [vfio_ap-passthrough] (passthrough mediated matrix device type)
--------- create
--------- [devices]
To create the mediated devices for the three guests:
To create the mediated devices for the three guests::
uuidgen > create
uuidgen > create
uuidgen > create
or
or
echo $uuid1 > create
echo $uuid2 > create
echo $uuid3 > create
echo $uuid1 > create
echo $uuid2 > create
echo $uuid3 > create
This will create three mediated devices in the [devices] subdirectory named
after the UUID written to the create attribute file. We call them $uuid1,
$uuid2 and $uuid3 and this is the sysfs directory structure after creation:
$uuid2 and $uuid3 and this is the sysfs directory structure after creation::
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
------ [vfio_ap-passthrough]
--------- [devices]
------------ [$uuid1]
--------------- assign_adapter
--------------- assign_control_domain
--------------- assign_domain
--------------- matrix
--------------- unassign_adapter
--------------- unassign_control_domain
--------------- unassign_domain
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
------ [vfio_ap-passthrough]
--------- [devices]
------------ [$uuid1]
--------------- assign_adapter
--------------- assign_control_domain
--------------- assign_domain
--------------- matrix
--------------- unassign_adapter
--------------- unassign_control_domain
--------------- unassign_domain
------------ [$uuid2]
--------------- assign_adapter
--------------- assign_control_domain
--------------- assign_domain
--------------- matrix
--------------- unassign_adapter
----------------unassign_control_domain
----------------unassign_domain
------------ [$uuid2]
--------------- assign_adapter
--------------- assign_control_domain
--------------- assign_domain
--------------- matrix
--------------- unassign_adapter
----------------unassign_control_domain
----------------unassign_domain
------------ [$uuid3]
--------------- assign_adapter
--------------- assign_control_domain
--------------- assign_domain
--------------- matrix
--------------- unassign_adapter
----------------unassign_control_domain
----------------unassign_domain
------------ [$uuid3]
--------------- assign_adapter
--------------- assign_control_domain
--------------- assign_domain
--------------- matrix
--------------- unassign_adapter
----------------unassign_control_domain
----------------unassign_domain
4. The administrator now needs to configure the matrixes for the mediated
devices $uuid1 (for Guest1), $uuid2 (for Guest2) and $uuid3 (for Guest3).
This is how the matrix is configured for Guest1:
This is how the matrix is configured for Guest1::
echo 5 > assign_adapter
echo 6 > assign_adapter
echo 4 > assign_domain
echo 0xab > assign_domain
Control domains can similarly be assigned using the assign_control_domain
sysfs file.
Control domains can similarly be assigned using the assign_control_domain
sysfs file.
If a mistake is made configuring an adapter, domain or control domain,
you can use the unassign_xxx files to unassign the adapter, domain or
control domain.
If a mistake is made configuring an adapter, domain or control domain,
you can use the unassign_xxx files to unassign the adapter, domain or
control domain.
To display the matrix configuration for Guest1:
To display the matrix configuration for Guest1::
cat matrix
cat matrix
This is how the matrix is configured for Guest2:
This is how the matrix is configured for Guest2::
echo 5 > assign_adapter
echo 0x47 > assign_domain
echo 0xff > assign_domain
This is how the matrix is configured for Guest3:
This is how the matrix is configured for Guest3::
echo 6 > assign_adapter
echo 0x47 > assign_domain
@ -783,24 +811,24 @@ These are the steps:
configured for the system. If a control domain number higher than the maximum
is specified, the operation will terminate with an error (ENODEV).
5. Start Guest1:
5. Start Guest1::
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid1 ...
7. Start Guest2:
7. Start Guest2::
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid2 ...
7. Start Guest3:
7. Start Guest3::
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
/usr/bin/qemu-system-s390x ... -cpu host,ap=on,apqci=on,apft=on \
-device vfio-ap,sysfsdev=/sys/devices/vfio_ap/matrix/$uuid3 ...
When the guest is shut down, the mediated matrix devices may be removed.
Using our example again, to remove the mediated matrix device $uuid1:
Using our example again, to remove the mediated matrix device $uuid1::
/sys/devices/vfio_ap/matrix/
--- [mdev_supported_types]
@ -809,18 +837,19 @@ Using our example again, to remove the mediated matrix device $uuid1:
------------ [$uuid1]
--------------- remove
::
echo 1 > remove
This will remove all of the mdev matrix device's sysfs structures including
the mdev device itself. To recreate and reconfigure the mdev matrix device,
all of the steps starting with step 3 will have to be performed again. Note
that the remove will fail if a guest using the mdev is still running.
This will remove all of the mdev matrix device's sysfs structures including
the mdev device itself. To recreate and reconfigure the mdev matrix device,
all of the steps starting with step 3 will have to be performed again. Note
that the remove will fail if a guest using the mdev is still running.
It is not necessary to remove an mdev matrix device, but one may want to
remove it if no guest will use it during the remaining lifetime of the linux
host. If the mdev matrix device is removed, one may want to also reconfigure
the pool of adapters and queues reserved for use by the default drivers.
It is not necessary to remove an mdev matrix device, but one may want to
remove it if no guest will use it during the remaining lifetime of the linux
host. If the mdev matrix device is removed, one may want to also reconfigure
the pool of adapters and queues reserved for use by the default drivers.
Limitations
===========

View File

@ -1,3 +1,4 @@
==================================
vfio-ccw: the basic infrastructure
==================================
@ -11,9 +12,11 @@ virtual machine, while vfio is the means.
Different than other hardware architectures, s390 has defined a unified
I/O access method, which is so called Channel I/O. It has its own access
patterns:
- Channel programs run asynchronously on a separate (co)processor.
- The channel subsystem will access any memory designated by the caller
in the channel program directly, i.e. there is no iommu involved.
Thus when we introduce vfio support for these devices, we realize it
with a mediated device (mdev) implementation. The vfio mdev will be
added to an iommu group, so as to make itself able to be managed by the
@ -24,6 +27,7 @@ to perform I/O instructions.
This document does not intend to explain the s390 I/O architecture in
every detail. More information/reference could be found here:
- A good start to know Channel I/O in general:
https://en.wikipedia.org/wiki/Channel_I/O
- s390 architecture:
@ -80,6 +84,7 @@ until interrupted. The I/O completion result is received by the
interrupt handler in the form of interrupt response block (IRB).
Back to vfio-ccw, in short:
- ORBs and channel programs are built in guest kernel (with guest
physical addresses).
- ORBs and channel programs are passed to the host kernel.
@ -106,6 +111,7 @@ it gets sent to hardware.
Within this implementation, we have two drivers for two types of
devices:
- The vfio_ccw driver for the physical subchannel device.
This is an I/O subchannel driver for the real subchannel device. It
realizes a group of callbacks and registers to the mdev framework as a
@ -137,7 +143,7 @@ devices:
vfio_pin_pages and a vfio_unpin_pages interfaces from the vfio iommu
backend for the physical devices to pin and unpin pages by demand.
Below is a high Level block diagram.
Below is a high Level block diagram::
+-------------+
| |
@ -158,6 +164,7 @@ Below is a high Level block diagram.
+-------------+
The process of how these work together.
1. vfio_ccw.ko drives the physical I/O subchannel, and registers the
physical device (with callbacks) to mdev framework.
When vfio_ccw probing the subchannel device, it registers device
@ -178,17 +185,17 @@ vfio-ccw I/O region
An I/O region is used to accept channel program request from user
space and store I/O interrupt result for user space to retrieve. The
definition of the region is:
definition of the region is::
struct ccw_io_region {
#define ORB_AREA_SIZE 12
__u8 orb_area[ORB_AREA_SIZE];
#define SCSW_AREA_SIZE 12
__u8 scsw_area[SCSW_AREA_SIZE];
#define IRB_AREA_SIZE 96
__u8 irb_area[IRB_AREA_SIZE];
__u32 ret_code;
} __packed;
struct ccw_io_region {
#define ORB_AREA_SIZE 12
__u8 orb_area[ORB_AREA_SIZE];
#define SCSW_AREA_SIZE 12
__u8 scsw_area[SCSW_AREA_SIZE];
#define IRB_AREA_SIZE 96
__u8 irb_area[IRB_AREA_SIZE];
__u32 ret_code;
} __packed;
While starting an I/O request, orb_area should be filled with the
guest ORB, and scsw_area should be filled with the SCSW of the Virtual
@ -205,7 +212,7 @@ vfio-ccw follows what vfio-pci did on the s390 platform and uses
vfio-iommu-type1 as the vfio iommu backend.
* CCW translation APIs
A group of APIs (start with 'cp_') to do CCW translation. The CCWs
A group of APIs (start with `cp_`) to do CCW translation. The CCWs
passed in by a user space program are organized with their guest
physical memory addresses. These APIs will copy the CCWs into kernel
space, and assemble a runnable kernel channel program by updating the
@ -217,12 +224,14 @@ vfio-iommu-type1 as the vfio iommu backend.
This driver utilizes the CCW translation APIs and introduces
vfio_ccw, which is the driver for the I/O subchannel devices you want
to pass through.
vfio_ccw implements the following vfio ioctls:
vfio_ccw implements the following vfio ioctls::
VFIO_DEVICE_GET_INFO
VFIO_DEVICE_GET_IRQ_INFO
VFIO_DEVICE_GET_REGION_INFO
VFIO_DEVICE_RESET
VFIO_DEVICE_SET_IRQS
This provides an I/O region, so that the user space program can pass a
channel program to the kernel, to do further CCW translation before
issuing them to a real device.
@ -236,32 +245,49 @@ bit more detail how an I/O request triggered by the QEMU guest will be
handled (without error handling).
Explanation:
Q1-Q7: QEMU side process.
K1-K5: Kernel side process.
Q1. Get I/O region info during initialization.
Q2. Setup event notifier and handler to handle I/O completion.
- Q1-Q7: QEMU side process.
- K1-K5: Kernel side process.
Q1.
Get I/O region info during initialization.
Q2.
Setup event notifier and handler to handle I/O completion.
... ...
Q3. Intercept a ssch instruction.
Q4. Write the guest channel program and ORB to the I/O region.
K1. Copy from guest to kernel.
K2. Translate the guest channel program to a host kernel space
channel program, which becomes runnable for a real device.
K3. With the necessary information contained in the orb passed in
by QEMU, issue the ccwchain to the device.
K4. Return the ssch CC code.
Q5. Return the CC code to the guest.
Q3.
Intercept a ssch instruction.
Q4.
Write the guest channel program and ORB to the I/O region.
K1.
Copy from guest to kernel.
K2.
Translate the guest channel program to a host kernel space
channel program, which becomes runnable for a real device.
K3.
With the necessary information contained in the orb passed in
by QEMU, issue the ccwchain to the device.
K4.
Return the ssch CC code.
Q5.
Return the CC code to the guest.
... ...
K5. Interrupt handler gets the I/O result and write the result to
the I/O region.
K6. Signal QEMU to retrieve the result.
Q6. Get the signal and event handler reads out the result from the I/O
K5.
Interrupt handler gets the I/O result and write the result to
the I/O region.
K6.
Signal QEMU to retrieve the result.
Q6.
Get the signal and event handler reads out the result from the I/O
region.
Q7. Update the irb for the guest.
Q7.
Update the irb for the guest.
Limitations
-----------
@ -295,6 +321,6 @@ Reference
1. ESA/s390 Principles of Operation manual (IBM Form. No. SA22-7832)
2. ESA/390 Common I/O Device Commands manual (IBM Form. No. SA22-7204)
3. https://en.wikipedia.org/wiki/Channel_I/O
4. Documentation/s390/cds.txt
4. Documentation/s390/cds.rst
5. Documentation/vfio.txt
6. Documentation/vfio-mediated-device.txt

View File

@ -1,4 +1,6 @@
==================================
The s390 SCSI dump tool (zfcpdump)
==================================
System z machines (z900 or higher) provide hardware support for creating system
dumps on SCSI disks. The dump process is initiated by booting a dump tool, which

View File

@ -23,7 +23,6 @@ show up in /proc/sys/kernel:
- auto_msgmni
- bootloader_type [ X86 only ]
- bootloader_version [ X86 only ]
- callhome [ S390 only ]
- cap_last_cap
- core_pattern
- core_pipe_limit
@ -171,21 +170,6 @@ Documentation/x86/boot.txt for additional information.
==============================================================
callhome:
Controls the kernel's callhome behavior in case of a kernel panic.
The s390 hardware allows an operating system to send a notification
to a service organization (callhome) in case of an operating system panic.
When the value in this file is 0 (which is the default behavior)
nothing happens in case of a kernel panic. If this value is set to "1"
the complete kernel oops message is send to the IBM customer service
organization in case the mainframe the Linux operating system is running
on has a service contract with IBM.
==============================================================
cap_last_cap
Highest valid capability of the running kernel. Exports

View File

@ -13718,7 +13718,7 @@ L: linux-s390@vger.kernel.org
L: kvm@vger.kernel.org
S: Supported
F: drivers/s390/cio/vfio_ccw*
F: Documentation/s390/vfio-ccw.txt
F: Documentation/s390/vfio-ccw.rst
F: include/uapi/linux/vfio_ccw.h
S390 ZCRYPT DRIVER
@ -13738,7 +13738,7 @@ S: Supported
F: drivers/s390/crypto/vfio_ap_drv.c
F: drivers/s390/crypto/vfio_ap_private.h
F: drivers/s390/crypto/vfio_ap_ops.c
F: Documentation/s390/vfio-ap.txt
F: Documentation/s390/vfio-ap.rst
S390 ZFCP DRIVER
M: Steffen Maier <maier@linux.ibm.com>

View File

@ -346,8 +346,6 @@ static inline unsigned long __pack_fe01(unsigned int fpmode)
#define spin_cpu_relax() barrier()
#define spin_cpu_yield() spin_cpu_relax()
#define spin_end() HMT_medium()
#define spin_until_cond(cond) \

View File

@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
config ARCH_HAS_MEM_ENCRYPT
def_bool y
config MMU
def_bool y
@ -30,7 +33,7 @@ config GENERIC_BUG_RELATIVE_POINTERS
def_bool y
config GENERIC_LOCKBREAK
def_bool y if SMP && PREEMPT
def_bool y if PREEMPT
config PGSTE
def_bool y if KVM
@ -113,7 +116,6 @@ config S390
select DYNAMIC_FTRACE if FUNCTION_TRACER
select GENERIC_CLOCKEVENTS
select GENERIC_CPU_AUTOPROBE
select GENERIC_CPU_DEVICES if !SMP
select GENERIC_CPU_VULNERABILITIES
select GENERIC_FIND_FIRST_BIT
select GENERIC_SMP_IDLE_THREAD
@ -187,6 +189,8 @@ config S390
select VIRT_CPU_ACCOUNTING
select ARCH_HAS_SCALED_CPUTIME
select HAVE_NMI
select SWIOTLB
select GENERIC_ALLOCATOR
config SCHED_OMIT_FRAME_POINTER
@ -399,27 +403,10 @@ config SYSVIPC_COMPAT
config SMP
def_bool y
prompt "Symmetric multi-processing support"
---help---
This enables support for systems with more than one CPU. If you have
a system with only one CPU, like most personal computers, say N. If
you have a system with more than one CPU, say Y.
If you say N here, the kernel will run on uni- and multiprocessor
machines, but will use only one CPU of a multiprocessor machine. If
you say Y here, the kernel will run on many, but not all,
uniprocessor machines. On a uniprocessor machine, the kernel
will run faster if you say N here.
See also the SMP-HOWTO available at
<http://www.tldp.org/docs.html#howto>.
Even if you don't know what to do here, say Y.
config NR_CPUS
int "Maximum number of CPUs (2-512)"
range 2 512
depends on SMP
default "64"
help
This allows you to specify the maximum number of CPUs which this
@ -431,12 +418,6 @@ config NR_CPUS
config HOTPLUG_CPU
def_bool y
prompt "Support for hot-pluggable CPUs"
depends on SMP
help
Say Y here to be able to turn CPUs off and on. CPUs
can be controlled through /sys/devices/system/cpu/cpu#.
Say N if you want to disable CPU hotplug.
# Some NUMA nodes have memory ranges that span
# other nodes. Even though a pfn is valid and
@ -448,7 +429,7 @@ config NODES_SPAN_OTHER_NODES
config NUMA
bool "NUMA support"
depends on SMP && SCHED_TOPOLOGY
depends on SCHED_TOPOLOGY
default n
help
Enable NUMA support
@ -523,7 +504,6 @@ config SCHED_DRAWER
config SCHED_TOPOLOGY
def_bool y
prompt "Topology scheduler support"
depends on SMP
select SCHED_SMT
select SCHED_MC
select SCHED_BOOK
@ -763,7 +743,7 @@ config PCI_NR_FUNCTIONS
This allows you to specify the maximum number of PCI functions which
this kernel will support.
endif # PCI
endif # PCI
config HAS_IOMEM
def_bool PCI
@ -829,16 +809,15 @@ menu "Dump support"
config CRASH_DUMP
bool "kernel crash dumps"
depends on SMP
select KEXEC
help
Generate crash dump after being started by kexec.
Crash dump kernels are loaded in the main kernel with kexec-tools
into a specially reserved region and then later executed after
a crash by kdump/kexec.
Refer to <file:Documentation/s390/zfcpdump.txt> for more details on this.
Refer to <file:Documentation/s390/zfcpdump.rst> for more details on this.
This option also enables s390 zfcpdump.
See also <file:Documentation/s390/zfcpdump.txt>
See also <file:Documentation/s390/zfcpdump.rst>
endmenu

View File

@ -88,6 +88,7 @@ CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_AP=m
CONFIG_VFIO_CCW=m
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
@ -498,6 +499,7 @@ CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_S390_AP_IOMMU=y
CONFIG_S390_CCW_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y

View File

@ -1,21 +1,22 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_USELIB=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
# CONFIG_CPU_ISOLATION is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CGROUPS=y
CONFIG_NUMA_BALANCING=y
# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
CONFIG_CGROUP_SCHED=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_FREEZER=y
@ -26,98 +27,402 @@ CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_WBT_SQ=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
CONFIG_LIVEPATCH=y
CONFIG_NR_CPUS=256
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_VERIFY_SIG=y
CONFIG_CRASH_DUMP=y
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_CMM=m
CONFIG_OPROFILE=y
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_STATIC_KEYS_SELFTEST=y
CONFIG_MODULES=y
CONFIG_MODULE_UNLOAD=y
CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_DEFAULT_DEADLINE=y
CONFIG_BINFMT_MISC=m
CONFIG_EXPOLINE=y
CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PCI=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_AP=m
CONFIG_VFIO_CCW=m
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_NET_KEY=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_NET_IPIP=m
CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPGRE=m
CONFIG_NET_IPGRE_BROADCAST=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_SYN_COOKIES=y
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
CONFIG_INET_XFRM_MODE_TRANSPORT=m
CONFIG_INET_XFRM_MODE_TUNNEL=m
CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_HSTCP=m
CONFIG_TCP_CONG_HYBLA=m
CONFIG_TCP_CONG_SCALABLE=m
CONFIG_TCP_CONG_LP=m
CONFIG_TCP_CONG_VENO=m
CONFIG_TCP_CONG_YEAH=m
CONFIG_TCP_CONG_ILLINOIS=m
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_MIP6=m
CONFIG_INET6_XFRM_MODE_TRANSPORT=m
CONFIG_INET6_XFRM_MODE_TUNNEL=m
CONFIG_INET6_XFRM_MODE_BEET=m
CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_SIT=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
CONFIG_NF_CONNTRACK_TIMESTAMP=y
CONFIG_NF_CONNTRACK_AMANDA=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_H323=m
CONFIG_NF_CONNTRACK_IRC=m
CONFIG_NF_CONNTRACK_NETBIOS_NS=m
CONFIG_NF_CONNTRACK_SNMP=m
CONFIG_NF_CONNTRACK_PPTP=m
CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NFT_CT=m
CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NETFILTER_XT_SET=m
CONFIG_NETFILTER_XT_TARGET_AUDIT=m
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
CONFIG_NETFILTER_XT_TARGET_CT=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_TRACE=m
CONFIG_NETFILTER_XT_TARGET_SECMARK=m
CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
CONFIG_NETFILTER_XT_MATCH_COMMENT=m
CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
CONFIG_NETFILTER_XT_MATCH_CPU=m
CONFIG_NETFILTER_XT_MATCH_DCCP=m
CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
CONFIG_NETFILTER_XT_MATCH_DSCP=m
CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
CONFIG_NETFILTER_XT_MATCH_HELPER=m
CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
CONFIG_NETFILTER_XT_MATCH_IPVS=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_LIMIT=m
CONFIG_NETFILTER_XT_MATCH_MAC=m
CONFIG_NETFILTER_XT_MATCH_MARK=m
CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
CONFIG_NETFILTER_XT_MATCH_NFACCT=m
CONFIG_NETFILTER_XT_MATCH_OSF=m
CONFIG_NETFILTER_XT_MATCH_OWNER=m
CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
CONFIG_NETFILTER_XT_MATCH_QUOTA=m
CONFIG_NETFILTER_XT_MATCH_RATEEST=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_RECENT=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
CONFIG_NETFILTER_XT_MATCH_TIME=m
CONFIG_NETFILTER_XT_MATCH_U32=m
CONFIG_IP_SET=m
CONFIG_IP_SET_BITMAP_IP=m
CONFIG_IP_SET_BITMAP_IPMAC=m
CONFIG_IP_SET_BITMAP_PORT=m
CONFIG_IP_SET_HASH_IP=m
CONFIG_IP_SET_HASH_IPPORT=m
CONFIG_IP_SET_HASH_IPPORTIP=m
CONFIG_IP_SET_HASH_IPPORTNET=m
CONFIG_IP_SET_HASH_NETPORTNET=m
CONFIG_IP_SET_HASH_NET=m
CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
CONFIG_IP_VS=m
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_PROTO_UDP=y
CONFIG_IP_VS_PROTO_ESP=y
CONFIG_IP_VS_PROTO_AH=y
CONFIG_IP_VS_RR=m
CONFIG_IP_VS_WRR=m
CONFIG_IP_VS_LC=m
CONFIG_IP_VS_WLC=m
CONFIG_IP_VS_LBLC=m
CONFIG_IP_VS_LBLCR=m
CONFIG_IP_VS_DH=m
CONFIG_IP_VS_SH=m
CONFIG_IP_VS_SED=m
CONFIG_IP_VS_NQ=m
CONFIG_IP_VS_FTP=m
CONFIG_IP_VS_PE_SIP=m
CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_TARGET_MASQUERADE=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_SECURITY=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
CONFIG_IP6_NF_MATCH_FRAG=m
CONFIG_IP6_NF_MATCH_OPTS=m
CONFIG_IP6_NF_MATCH_HL=m
CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RT=m
CONFIG_IP6_NF_TARGET_HL=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_SECURITY=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_NF_TABLES_BRIDGE=y
CONFIG_RDS=m
CONFIG_RDS_RDMA=m
CONFIG_RDS_TCP=m
CONFIG_L2TP=m
CONFIG_L2TP_DEBUGFS=m
CONFIG_VLAN_8021Q=y
CONFIG_L2TP_V3=y
CONFIG_L2TP_IP=m
CONFIG_L2TP_ETH=m
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_PRIO=m
CONFIG_NET_SCH_MULTIQ=m
CONFIG_NET_SCH_RED=m
CONFIG_NET_SCH_SFB=m
CONFIG_NET_SCH_SFQ=m
CONFIG_NET_SCH_TEQL=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_SCH_GRED=m
CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_DRR=m
CONFIG_NET_SCH_MQPRIO=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_QFQ=m
CONFIG_NET_SCH_CODEL=m
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
CONFIG_CLS_U32_PERF=y
CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=y
CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_NET_PKTGEN=m
CONFIG_DEVTMPFS=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_CONNECTOR=y
CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_VIRTIO_BLK=y
CONFIG_BLK_DEV_RBD=m
CONFIG_BLK_DEV_NVME=m
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_GENWQE=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
# CONFIG_SCSI_MQ_DEFAULT is not set
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=y
CONFIG_BLK_DEV_SR=y
CONFIG_BLK_DEV_SR_VENDOR=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
CONFIG_BLK_DEV_SR=m
CONFIG_CHR_DEV_SG=y
CONFIG_CHR_DEV_SCH=m
CONFIG_SCSI_ENCLOSURE=m
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_SAS_LIBSAS=m
CONFIG_SCSI_SRP_ATTRS=m
CONFIG_ISCSI_TCP=m
CONFIG_SCSI_DEBUG=m
CONFIG_ZFCP=y
CONFIG_SCSI_VIRTIO=y
CONFIG_SCSI_VIRTIO=m
CONFIG_SCSI_DH=y
CONFIG_SCSI_DH_RDAC=m
CONFIG_SCSI_DH_HP_SW=m
CONFIG_SCSI_DH_EMC=m
CONFIG_SCSI_DH_ALUA=m
CONFIG_SCSI_OSD_INITIATOR=m
CONFIG_SCSI_OSD_ULD=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=m
CONFIG_MD_MULTIPATH=m
CONFIG_BLK_DEV_DM=y
CONFIG_MD_FAULTY=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_MIRROR=m
CONFIG_DM_LOG_USERSPACE=m
CONFIG_DM_RAID=m
@ -125,71 +430,216 @@ CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_SWITCH=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_EQUALIZER=m
CONFIG_IFB=m
CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
CONFIG_VXLAN=m
CONFIG_TUN=m
CONFIG_VIRTIO_NET=y
# CONFIG_NET_VENDOR_ALACRITECH is not set
# CONFIG_NET_VENDOR_AURORA is not set
# CONFIG_NET_VENDOR_CORTINA is not set
# CONFIG_NET_VENDOR_SOLARFLARE is not set
# CONFIG_NET_VENDOR_SOCIONEXT is not set
# CONFIG_NET_VENDOR_SYNOPSYS is not set
# CONFIG_INPUT is not set
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
CONFIG_NLMON=m
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_NATSEMI is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPP_MPPE=m
CONFIG_PPPOE=m
CONFIG_PPTP=m
CONFIG_PPPOL2TP=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
CONFIG_ISM=m
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
# CONFIG_VT is not set
CONFIG_DEVKMEM=y
CONFIG_LEGACY_PTY_COUNT=0
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_VIRTIO_BALLOON=y
CONFIG_HANGCHECK_TIMER=m
CONFIG_TN3270_FS=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
CONFIG_DRM=y
CONFIG_DRM_VIRTIO_GPU=y
CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_VFIO_MDEV=m
CONFIG_VFIO_MDEV_DEVICE=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_S390_AP_IOMMU=y
CONFIG_S390_CCW_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_JBD2_DEBUG=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
CONFIG_JFS_STATISTICS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
CONFIG_GFS2_FS=m
CONFIG_GFS2_FS_LOCKING_DLM=y
CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_OVERLAY_FS=m
CONFIG_FSCACHE=m
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
# CONFIG_NETWORK_FILESYSTEMS is not set
CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZO=y
CONFIG_SQUASHFS_XZ=y
CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=m
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_V4_SECURITY_LABEL=y
CONFIG_CIFS=m
CONFIG_CIFS_STATS=y
CONFIG_CIFS_STATS2=y
CONFIG_CIFS_WEAK_PW_HASH=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CIFS_POSIX=y
# CONFIG_CIFS_DEBUG is not set
CONFIG_CIFS_DFS_UPCALL=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_850=m
CONFIG_NLS_ASCII=m
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_GDB_SCRIPTS=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_HIST_TRIGGERS=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_S390_PTDUMP=y
CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_INTEGRITY_SIGNATURE=y
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_AUTHENC=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CCM=m
CONFIG_CRYPTO_GCM=m
CONFIG_CRYPTO_CBC=y
CONFIG_CRYPTO_CFB=m
CONFIG_CRYPTO_CTS=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_OFB=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_XTS=m
CONFIG_CRYPTO_CMAC=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_MD4=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA256=y
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_ARC4=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
@ -199,16 +649,16 @@ CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_SM4=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_DEFLATE=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
@ -217,38 +667,14 @@ CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRC7=m
# CONFIG_XZ_DEC_X86 is not set
# CONFIG_XZ_DEC_POWERPC is not set
# CONFIG_XZ_DEC_IA64 is not set
# CONFIG_XZ_DEC_ARM is not set
# CONFIG_XZ_DEC_ARMTHUMB is not set
# CONFIG_XZ_DEC_SPARC is not set
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_GDB_SCRIPTS=y
CONFIG_UNUSED_SYMBOLS=y
CONFIG_DEBUG_SECTION_MISMATCH=y
CONFIG_DEBUG_FORCE_WEAK_PER_CPU=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_PAGEALLOC=y
CONFIG_DETECT_HUNG_TASK=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_PROVE_LOCKING=y
CONFIG_LOCK_STAT=y
CONFIG_DEBUG_LOCKDEP=y
CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_DEBUG_LIST=y
CONFIG_DEBUG_SG=y
CONFIG_DEBUG_NOTIFIERS=y
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_FUNCTION_PROFILER=y
# CONFIG_RUNTIME_TESTING_MENU is not set
CONFIG_S390_PTDUMP=y
CONFIG_CRC8=m
CONFIG_CORDIC=m
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_KVM_S390_UCONTROL=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m

View File

@ -1,678 +0,0 @@
CONFIG_SYSVIPC=y
CONFIG_POSIX_MQUEUE=y
CONFIG_AUDIT=y
CONFIG_NO_HZ_IDLE=y
CONFIG_HIGH_RES_TIMERS=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
CONFIG_TASKSTATS=y
CONFIG_TASK_DELAY_ACCT=y
CONFIG_TASK_XACCT=y
CONFIG_TASK_IO_ACCOUNTING=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_NUMA_BALANCING=y
# CONFIG_NUMA_BALANCING_DEFAULT_ENABLED is not set
CONFIG_MEMCG=y
CONFIG_MEMCG_SWAP=y
CONFIG_BLK_CGROUP=y
CONFIG_CFS_BANDWIDTH=y
CONFIG_RT_GROUP_SCHED=y
CONFIG_CGROUP_PIDS=y
CONFIG_CGROUP_FREEZER=y
CONFIG_CGROUP_HUGETLB=y
CONFIG_CPUSETS=y
CONFIG_CGROUP_DEVICE=y
CONFIG_CGROUP_CPUACCT=y
CONFIG_CGROUP_PERF=y
CONFIG_NAMESPACES=y
CONFIG_USER_NS=y
CONFIG_SCHED_AUTOGROUP=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_EXPERT=y
# CONFIG_SYSFS_SYSCALL is not set
CONFIG_CHECKPOINT_RESTORE=y
CONFIG_BPF_SYSCALL=y
CONFIG_USERFAULTFD=y
# CONFIG_COMPAT_BRK is not set
CONFIG_PROFILING=y
CONFIG_OPROFILE=m
CONFIG_KPROBES=y
CONFIG_JUMP_LABEL=y
CONFIG_MODULES=y
CONFIG_MODULE_FORCE_LOAD=y
CONFIG_MODULE_UNLOAD=y
CONFIG_MODULE_FORCE_UNLOAD=y
CONFIG_MODVERSIONS=y
CONFIG_MODULE_SRCVERSION_ALL=y
CONFIG_MODULE_SIG=y
CONFIG_MODULE_SIG_SHA256=y
CONFIG_BLK_DEV_INTEGRITY=y
CONFIG_BLK_DEV_THROTTLING=y
CONFIG_BLK_WBT=y
CONFIG_BLK_WBT_SQ=y
CONFIG_PARTITION_ADVANCED=y
CONFIG_IBM_PARTITION=y
CONFIG_BSD_DISKLABEL=y
CONFIG_MINIX_SUBPARTITION=y
CONFIG_SOLARIS_X86_PARTITION=y
CONFIG_UNIXWARE_DISKLABEL=y
CONFIG_CFQ_GROUP_IOSCHED=y
CONFIG_DEFAULT_DEADLINE=y
CONFIG_LIVEPATCH=y
CONFIG_TUNE_ZEC12=y
CONFIG_NR_CPUS=512
CONFIG_NUMA=y
CONFIG_HZ_100=y
CONFIG_KEXEC_FILE=y
CONFIG_KEXEC_VERIFY_SIG=y
CONFIG_EXPOLINE=y
CONFIG_EXPOLINE_AUTO=y
CONFIG_MEMORY_HOTPLUG=y
CONFIG_MEMORY_HOTREMOVE=y
CONFIG_KSM=y
CONFIG_TRANSPARENT_HUGEPAGE=y
CONFIG_CLEANCACHE=y
CONFIG_FRONTSWAP=y
CONFIG_MEM_SOFT_DIRTY=y
CONFIG_ZSWAP=y
CONFIG_ZBUD=m
CONFIG_ZSMALLOC=m
CONFIG_ZSMALLOC_STAT=y
CONFIG_DEFERRED_STRUCT_PAGE_INIT=y
CONFIG_IDLE_PAGE_TRACKING=y
CONFIG_PCI=y
CONFIG_HOTPLUG_PCI=y
CONFIG_HOTPLUG_PCI_S390=y
CONFIG_CHSC_SCH=y
CONFIG_VFIO_AP=m
CONFIG_CRASH_DUMP=y
CONFIG_BINFMT_MISC=m
CONFIG_HIBERNATION=y
CONFIG_PM_DEBUG=y
CONFIG_NET=y
CONFIG_PACKET=y
CONFIG_PACKET_DIAG=m
CONFIG_UNIX=y
CONFIG_UNIX_DIAG=m
CONFIG_XFRM_USER=m
CONFIG_NET_KEY=m
CONFIG_SMC=m
CONFIG_SMC_DIAG=m
CONFIG_INET=y
CONFIG_IP_MULTICAST=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
CONFIG_IP_ROUTE_MULTIPATH=y
CONFIG_IP_ROUTE_VERBOSE=y
CONFIG_NET_IPIP=m
CONFIG_NET_IPGRE_DEMUX=m
CONFIG_NET_IPGRE=m
CONFIG_NET_IPGRE_BROADCAST=y
CONFIG_IP_MROUTE=y
CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
CONFIG_IP_PIMSM_V1=y
CONFIG_IP_PIMSM_V2=y
CONFIG_SYN_COOKIES=y
CONFIG_NET_IPVTI=m
CONFIG_INET_AH=m
CONFIG_INET_ESP=m
CONFIG_INET_IPCOMP=m
CONFIG_INET_XFRM_MODE_TRANSPORT=m
CONFIG_INET_XFRM_MODE_TUNNEL=m
CONFIG_INET_XFRM_MODE_BEET=m
CONFIG_INET_DIAG=m
CONFIG_INET_UDP_DIAG=m
CONFIG_TCP_CONG_ADVANCED=y
CONFIG_TCP_CONG_HSTCP=m
CONFIG_TCP_CONG_HYBLA=m
CONFIG_TCP_CONG_SCALABLE=m
CONFIG_TCP_CONG_LP=m
CONFIG_TCP_CONG_VENO=m
CONFIG_TCP_CONG_YEAH=m
CONFIG_TCP_CONG_ILLINOIS=m
CONFIG_IPV6_ROUTER_PREF=y
CONFIG_INET6_AH=m
CONFIG_INET6_ESP=m
CONFIG_INET6_IPCOMP=m
CONFIG_IPV6_MIP6=m
CONFIG_INET6_XFRM_MODE_TRANSPORT=m
CONFIG_INET6_XFRM_MODE_TUNNEL=m
CONFIG_INET6_XFRM_MODE_BEET=m
CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION=m
CONFIG_IPV6_VTI=m
CONFIG_IPV6_SIT=m
CONFIG_IPV6_GRE=m
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_IPV6_SUBTREES=y
CONFIG_NETFILTER=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_SECMARK=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_TIMEOUT=y
CONFIG_NF_CONNTRACK_TIMESTAMP=y
CONFIG_NF_CONNTRACK_AMANDA=m
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_H323=m
CONFIG_NF_CONNTRACK_IRC=m
CONFIG_NF_CONNTRACK_NETBIOS_NS=m
CONFIG_NF_CONNTRACK_SNMP=m
CONFIG_NF_CONNTRACK_PPTP=m
CONFIG_NF_CONNTRACK_SANE=m
CONFIG_NF_CONNTRACK_SIP=m
CONFIG_NF_CONNTRACK_TFTP=m
CONFIG_NF_CT_NETLINK=m
CONFIG_NF_CT_NETLINK_TIMEOUT=m
CONFIG_NF_TABLES=m
CONFIG_NFT_CT=m
CONFIG_NFT_COUNTER=m
CONFIG_NFT_LOG=m
CONFIG_NFT_LIMIT=m
CONFIG_NFT_NAT=m
CONFIG_NFT_COMPAT=m
CONFIG_NFT_HASH=m
CONFIG_NETFILTER_XT_SET=m
CONFIG_NETFILTER_XT_TARGET_AUDIT=m
CONFIG_NETFILTER_XT_TARGET_CHECKSUM=m
CONFIG_NETFILTER_XT_TARGET_CLASSIFY=m
CONFIG_NETFILTER_XT_TARGET_CONNMARK=m
CONFIG_NETFILTER_XT_TARGET_CONNSECMARK=m
CONFIG_NETFILTER_XT_TARGET_CT=m
CONFIG_NETFILTER_XT_TARGET_DSCP=m
CONFIG_NETFILTER_XT_TARGET_HMARK=m
CONFIG_NETFILTER_XT_TARGET_IDLETIMER=m
CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NETFILTER_XT_TARGET_MARK=m
CONFIG_NETFILTER_XT_TARGET_NFLOG=m
CONFIG_NETFILTER_XT_TARGET_NFQUEUE=m
CONFIG_NETFILTER_XT_TARGET_TEE=m
CONFIG_NETFILTER_XT_TARGET_TPROXY=m
CONFIG_NETFILTER_XT_TARGET_TRACE=m
CONFIG_NETFILTER_XT_TARGET_SECMARK=m
CONFIG_NETFILTER_XT_TARGET_TCPMSS=m
CONFIG_NETFILTER_XT_TARGET_TCPOPTSTRIP=m
CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=m
CONFIG_NETFILTER_XT_MATCH_BPF=m
CONFIG_NETFILTER_XT_MATCH_CLUSTER=m
CONFIG_NETFILTER_XT_MATCH_COMMENT=m
CONFIG_NETFILTER_XT_MATCH_CONNBYTES=m
CONFIG_NETFILTER_XT_MATCH_CONNLABEL=m
CONFIG_NETFILTER_XT_MATCH_CONNLIMIT=m
CONFIG_NETFILTER_XT_MATCH_CONNMARK=m
CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m
CONFIG_NETFILTER_XT_MATCH_CPU=m
CONFIG_NETFILTER_XT_MATCH_DCCP=m
CONFIG_NETFILTER_XT_MATCH_DEVGROUP=m
CONFIG_NETFILTER_XT_MATCH_DSCP=m
CONFIG_NETFILTER_XT_MATCH_ESP=m
CONFIG_NETFILTER_XT_MATCH_HASHLIMIT=m
CONFIG_NETFILTER_XT_MATCH_HELPER=m
CONFIG_NETFILTER_XT_MATCH_IPRANGE=m
CONFIG_NETFILTER_XT_MATCH_IPVS=m
CONFIG_NETFILTER_XT_MATCH_LENGTH=m
CONFIG_NETFILTER_XT_MATCH_LIMIT=m
CONFIG_NETFILTER_XT_MATCH_MAC=m
CONFIG_NETFILTER_XT_MATCH_MARK=m
CONFIG_NETFILTER_XT_MATCH_MULTIPORT=m
CONFIG_NETFILTER_XT_MATCH_NFACCT=m
CONFIG_NETFILTER_XT_MATCH_OSF=m
CONFIG_NETFILTER_XT_MATCH_OWNER=m
CONFIG_NETFILTER_XT_MATCH_POLICY=m
CONFIG_NETFILTER_XT_MATCH_PHYSDEV=m
CONFIG_NETFILTER_XT_MATCH_PKTTYPE=m
CONFIG_NETFILTER_XT_MATCH_QUOTA=m
CONFIG_NETFILTER_XT_MATCH_RATEEST=m
CONFIG_NETFILTER_XT_MATCH_REALM=m
CONFIG_NETFILTER_XT_MATCH_RECENT=m
CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_MATCH_TCPMSS=m
CONFIG_NETFILTER_XT_MATCH_TIME=m
CONFIG_NETFILTER_XT_MATCH_U32=m
CONFIG_IP_SET=m
CONFIG_IP_SET_BITMAP_IP=m
CONFIG_IP_SET_BITMAP_IPMAC=m
CONFIG_IP_SET_BITMAP_PORT=m
CONFIG_IP_SET_HASH_IP=m
CONFIG_IP_SET_HASH_IPPORT=m
CONFIG_IP_SET_HASH_IPPORTIP=m
CONFIG_IP_SET_HASH_IPPORTNET=m
CONFIG_IP_SET_HASH_NETPORTNET=m
CONFIG_IP_SET_HASH_NET=m
CONFIG_IP_SET_HASH_NETNET=m
CONFIG_IP_SET_HASH_NETPORT=m
CONFIG_IP_SET_HASH_NETIFACE=m
CONFIG_IP_SET_LIST_SET=m
CONFIG_IP_VS=m
CONFIG_IP_VS_PROTO_TCP=y
CONFIG_IP_VS_PROTO_UDP=y
CONFIG_IP_VS_PROTO_ESP=y
CONFIG_IP_VS_PROTO_AH=y
CONFIG_IP_VS_RR=m
CONFIG_IP_VS_WRR=m
CONFIG_IP_VS_LC=m
CONFIG_IP_VS_WLC=m
CONFIG_IP_VS_LBLC=m
CONFIG_IP_VS_LBLCR=m
CONFIG_IP_VS_DH=m
CONFIG_IP_VS_SH=m
CONFIG_IP_VS_SED=m
CONFIG_IP_VS_NQ=m
CONFIG_IP_VS_FTP=m
CONFIG_IP_VS_PE_SIP=m
CONFIG_NF_CONNTRACK_IPV4=m
CONFIG_NF_TABLES_IPV4=y
CONFIG_NFT_CHAIN_ROUTE_IPV4=m
CONFIG_NF_TABLES_ARP=y
CONFIG_NFT_CHAIN_NAT_IPV4=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP_NF_MATCH_AH=m
CONFIG_IP_NF_MATCH_ECN=m
CONFIG_IP_NF_MATCH_RPFILTER=m
CONFIG_IP_NF_MATCH_TTL=m
CONFIG_IP_NF_FILTER=m
CONFIG_IP_NF_TARGET_REJECT=m
CONFIG_IP_NF_NAT=m
CONFIG_IP_NF_TARGET_MASQUERADE=m
CONFIG_IP_NF_MANGLE=m
CONFIG_IP_NF_TARGET_CLUSTERIP=m
CONFIG_IP_NF_TARGET_ECN=m
CONFIG_IP_NF_TARGET_TTL=m
CONFIG_IP_NF_RAW=m
CONFIG_IP_NF_SECURITY=m
CONFIG_IP_NF_ARPTABLES=m
CONFIG_IP_NF_ARPFILTER=m
CONFIG_IP_NF_ARP_MANGLE=m
CONFIG_NF_CONNTRACK_IPV6=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NFT_CHAIN_ROUTE_IPV6=m
CONFIG_NFT_CHAIN_NAT_IPV6=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP6_NF_MATCH_AH=m
CONFIG_IP6_NF_MATCH_EUI64=m
CONFIG_IP6_NF_MATCH_FRAG=m
CONFIG_IP6_NF_MATCH_OPTS=m
CONFIG_IP6_NF_MATCH_HL=m
CONFIG_IP6_NF_MATCH_IPV6HEADER=m
CONFIG_IP6_NF_MATCH_MH=m
CONFIG_IP6_NF_MATCH_RPFILTER=m
CONFIG_IP6_NF_MATCH_RT=m
CONFIG_IP6_NF_TARGET_HL=m
CONFIG_IP6_NF_FILTER=m
CONFIG_IP6_NF_TARGET_REJECT=m
CONFIG_IP6_NF_MANGLE=m
CONFIG_IP6_NF_RAW=m
CONFIG_IP6_NF_SECURITY=m
CONFIG_IP6_NF_NAT=m
CONFIG_IP6_NF_TARGET_MASQUERADE=m
CONFIG_NF_TABLES_BRIDGE=y
CONFIG_RDS=m
CONFIG_RDS_RDMA=m
CONFIG_RDS_TCP=m
CONFIG_L2TP=m
CONFIG_L2TP_DEBUGFS=m
CONFIG_L2TP_V3=y
CONFIG_L2TP_IP=m
CONFIG_L2TP_ETH=m
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
CONFIG_VLAN_8021Q_GVRP=y
CONFIG_NET_SCHED=y
CONFIG_NET_SCH_CBQ=m
CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_HFSC=m
CONFIG_NET_SCH_PRIO=m
CONFIG_NET_SCH_MULTIQ=m
CONFIG_NET_SCH_RED=m
CONFIG_NET_SCH_SFB=m
CONFIG_NET_SCH_SFQ=m
CONFIG_NET_SCH_TEQL=m
CONFIG_NET_SCH_TBF=m
CONFIG_NET_SCH_GRED=m
CONFIG_NET_SCH_DSMARK=m
CONFIG_NET_SCH_NETEM=m
CONFIG_NET_SCH_DRR=m
CONFIG_NET_SCH_MQPRIO=m
CONFIG_NET_SCH_CHOKE=m
CONFIG_NET_SCH_QFQ=m
CONFIG_NET_SCH_CODEL=m
CONFIG_NET_SCH_FQ_CODEL=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_SCH_PLUG=m
CONFIG_NET_CLS_BASIC=m
CONFIG_NET_CLS_TCINDEX=m
CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
CONFIG_CLS_U32_PERF=y
CONFIG_CLS_U32_MARK=y
CONFIG_NET_CLS_RSVP=m
CONFIG_NET_CLS_RSVP6=m
CONFIG_NET_CLS_FLOW=m
CONFIG_NET_CLS_CGROUP=y
CONFIG_NET_CLS_BPF=m
CONFIG_NET_CLS_ACT=y
CONFIG_NET_ACT_POLICE=m
CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
CONFIG_NET_ACT_SKBEDIT=m
CONFIG_NET_ACT_CSUM=m
CONFIG_DNS_RESOLVER=y
CONFIG_OPENVSWITCH=m
CONFIG_VSOCKETS=m
CONFIG_VIRTIO_VSOCKETS=m
CONFIG_NETLINK_DIAG=m
CONFIG_CGROUP_NET_PRIO=y
CONFIG_BPF_JIT=y
CONFIG_NET_PKTGEN=m
CONFIG_DEVTMPFS=y
CONFIG_DMA_CMA=y
CONFIG_CMA_SIZE_MBYTES=0
CONFIG_CONNECTOR=y
CONFIG_ZRAM=m
CONFIG_BLK_DEV_LOOP=m
CONFIG_BLK_DEV_CRYPTOLOOP=m
CONFIG_BLK_DEV_DRBD=m
CONFIG_BLK_DEV_NBD=m
CONFIG_BLK_DEV_RAM=y
CONFIG_BLK_DEV_RAM_SIZE=32768
CONFIG_VIRTIO_BLK=y
CONFIG_BLK_DEV_RBD=m
CONFIG_BLK_DEV_NVME=m
CONFIG_ENCLOSURE_SERVICES=m
CONFIG_GENWQE=m
CONFIG_RAID_ATTRS=m
CONFIG_SCSI=y
CONFIG_BLK_DEV_SD=y
CONFIG_CHR_DEV_ST=m
CONFIG_CHR_DEV_OSST=m
CONFIG_BLK_DEV_SR=m
CONFIG_CHR_DEV_SG=y
CONFIG_CHR_DEV_SCH=m
CONFIG_SCSI_ENCLOSURE=m
CONFIG_SCSI_CONSTANTS=y
CONFIG_SCSI_LOGGING=y
CONFIG_SCSI_SPI_ATTRS=m
CONFIG_SCSI_FC_ATTRS=y
CONFIG_SCSI_SAS_LIBSAS=m
CONFIG_SCSI_SRP_ATTRS=m
CONFIG_ISCSI_TCP=m
CONFIG_SCSI_DEBUG=m
CONFIG_ZFCP=y
CONFIG_SCSI_VIRTIO=m
CONFIG_SCSI_DH=y
CONFIG_SCSI_DH_RDAC=m
CONFIG_SCSI_DH_HP_SW=m
CONFIG_SCSI_DH_EMC=m
CONFIG_SCSI_DH_ALUA=m
CONFIG_SCSI_OSD_INITIATOR=m
CONFIG_SCSI_OSD_ULD=m
CONFIG_MD=y
CONFIG_BLK_DEV_MD=y
CONFIG_MD_LINEAR=m
CONFIG_MD_MULTIPATH=m
CONFIG_MD_FAULTY=m
CONFIG_BLK_DEV_DM=m
CONFIG_DM_CRYPT=m
CONFIG_DM_SNAPSHOT=m
CONFIG_DM_THIN_PROVISIONING=m
CONFIG_DM_MIRROR=m
CONFIG_DM_LOG_USERSPACE=m
CONFIG_DM_RAID=m
CONFIG_DM_ZERO=m
CONFIG_DM_MULTIPATH=m
CONFIG_DM_MULTIPATH_QL=m
CONFIG_DM_MULTIPATH_ST=m
CONFIG_DM_DELAY=m
CONFIG_DM_UEVENT=y
CONFIG_DM_FLAKEY=m
CONFIG_DM_VERITY=m
CONFIG_DM_SWITCH=m
CONFIG_NETDEVICES=y
CONFIG_BONDING=m
CONFIG_DUMMY=m
CONFIG_EQUALIZER=m
CONFIG_IFB=m
CONFIG_MACVLAN=m
CONFIG_MACVTAP=m
CONFIG_VXLAN=m
CONFIG_TUN=m
CONFIG_VETH=m
CONFIG_VIRTIO_NET=m
CONFIG_NLMON=m
# CONFIG_NET_VENDOR_ARC is not set
# CONFIG_NET_VENDOR_CHELSIO is not set
# CONFIG_NET_VENDOR_INTEL is not set
# CONFIG_NET_VENDOR_MARVELL is not set
CONFIG_MLX4_EN=m
CONFIG_MLX5_CORE=m
CONFIG_MLX5_CORE_EN=y
# CONFIG_NET_VENDOR_NATSEMI is not set
CONFIG_PPP=m
CONFIG_PPP_BSDCOMP=m
CONFIG_PPP_DEFLATE=m
CONFIG_PPP_MPPE=m
CONFIG_PPPOE=m
CONFIG_PPTP=m
CONFIG_PPPOL2TP=m
CONFIG_PPP_ASYNC=m
CONFIG_PPP_SYNC_TTY=m
CONFIG_ISM=m
CONFIG_INPUT_EVDEV=y
# CONFIG_INPUT_KEYBOARD is not set
# CONFIG_INPUT_MOUSE is not set
# CONFIG_SERIO is not set
CONFIG_LEGACY_PTY_COUNT=0
CONFIG_HW_RANDOM_VIRTIO=m
CONFIG_RAW_DRIVER=m
CONFIG_HANGCHECK_TIMER=m
CONFIG_TN3270_FS=y
# CONFIG_HWMON is not set
CONFIG_WATCHDOG=y
CONFIG_WATCHDOG_NOWAYOUT=y
CONFIG_SOFT_WATCHDOG=m
CONFIG_DIAG288_WATCHDOG=m
CONFIG_DRM=y
CONFIG_DRM_VIRTIO_GPU=y
CONFIG_FRAMEBUFFER_CONSOLE=y
# CONFIG_HID is not set
# CONFIG_USB_SUPPORT is not set
CONFIG_INFINIBAND=m
CONFIG_INFINIBAND_USER_ACCESS=m
CONFIG_MLX4_INFINIBAND=m
CONFIG_MLX5_INFINIBAND=m
CONFIG_VFIO=m
CONFIG_VFIO_PCI=m
CONFIG_VFIO_MDEV=m
CONFIG_VFIO_MDEV_DEVICE=m
CONFIG_VIRTIO_PCI=m
CONFIG_VIRTIO_BALLOON=m
CONFIG_VIRTIO_INPUT=y
CONFIG_S390_AP_IOMMU=y
CONFIG_EXT4_FS=y
CONFIG_EXT4_FS_POSIX_ACL=y
CONFIG_EXT4_FS_SECURITY=y
CONFIG_JBD2_DEBUG=y
CONFIG_JFS_FS=m
CONFIG_JFS_POSIX_ACL=y
CONFIG_JFS_SECURITY=y
CONFIG_JFS_STATISTICS=y
CONFIG_XFS_FS=y
CONFIG_XFS_QUOTA=y
CONFIG_XFS_POSIX_ACL=y
CONFIG_XFS_RT=y
CONFIG_GFS2_FS=m
CONFIG_GFS2_FS_LOCKING_DLM=y
CONFIG_OCFS2_FS=m
CONFIG_BTRFS_FS=y
CONFIG_BTRFS_FS_POSIX_ACL=y
CONFIG_NILFS2_FS=m
CONFIG_FS_DAX=y
CONFIG_EXPORTFS_BLOCK_OPS=y
CONFIG_FS_ENCRYPTION=y
CONFIG_FANOTIFY=y
CONFIG_FANOTIFY_ACCESS_PERMISSIONS=y
CONFIG_QUOTA_NETLINK_INTERFACE=y
CONFIG_QFMT_V1=m
CONFIG_QFMT_V2=m
CONFIG_AUTOFS4_FS=m
CONFIG_FUSE_FS=y
CONFIG_CUSE=m
CONFIG_OVERLAY_FS=m
CONFIG_FSCACHE=m
CONFIG_CACHEFILES=m
CONFIG_ISO9660_FS=y
CONFIG_JOLIET=y
CONFIG_ZISOFS=y
CONFIG_UDF_FS=m
CONFIG_MSDOS_FS=m
CONFIG_VFAT_FS=m
CONFIG_NTFS_FS=m
CONFIG_NTFS_RW=y
CONFIG_PROC_KCORE=y
CONFIG_TMPFS=y
CONFIG_TMPFS_POSIX_ACL=y
CONFIG_HUGETLBFS=y
CONFIG_CONFIGFS_FS=m
CONFIG_ECRYPT_FS=m
CONFIG_CRAMFS=m
CONFIG_SQUASHFS=m
CONFIG_SQUASHFS_XATTR=y
CONFIG_SQUASHFS_LZO=y
CONFIG_SQUASHFS_XZ=y
CONFIG_ROMFS_FS=m
CONFIG_NFS_FS=m
CONFIG_NFS_V3_ACL=y
CONFIG_NFS_V4=m
CONFIG_NFS_SWAP=y
CONFIG_NFSD=m
CONFIG_NFSD_V3_ACL=y
CONFIG_NFSD_V4=y
CONFIG_NFSD_V4_SECURITY_LABEL=y
CONFIG_CIFS=m
CONFIG_CIFS_STATS=y
CONFIG_CIFS_STATS2=y
CONFIG_CIFS_WEAK_PW_HASH=y
CONFIG_CIFS_UPCALL=y
CONFIG_CIFS_XATTR=y
CONFIG_CIFS_POSIX=y
# CONFIG_CIFS_DEBUG is not set
CONFIG_CIFS_DFS_UPCALL=y
CONFIG_NLS_DEFAULT="utf8"
CONFIG_NLS_CODEPAGE_437=m
CONFIG_NLS_CODEPAGE_850=m
CONFIG_NLS_ASCII=m
CONFIG_NLS_ISO8859_1=m
CONFIG_NLS_ISO8859_15=m
CONFIG_NLS_UTF8=m
CONFIG_DLM=m
CONFIG_PRINTK_TIME=y
CONFIG_DEBUG_INFO=y
CONFIG_DEBUG_INFO_DWARF4=y
CONFIG_GDB_SCRIPTS=y
# CONFIG_ENABLE_MUST_CHECK is not set
CONFIG_FRAME_WARN=1024
CONFIG_UNUSED_SYMBOLS=y
CONFIG_MAGIC_SYSRQ=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_PANIC_ON_OOPS=y
CONFIG_RCU_TORTURE_TEST=m
CONFIG_RCU_CPU_STALL_TIMEOUT=60
CONFIG_LATENCYTOP=y
CONFIG_SCHED_TRACER=y
CONFIG_FTRACE_SYSCALLS=y
CONFIG_STACK_TRACER=y
CONFIG_BLK_DEV_IO_TRACE=y
CONFIG_FUNCTION_PROFILER=y
CONFIG_HIST_TRIGGERS=y
CONFIG_LKDTM=m
CONFIG_PERCPU_TEST=m
CONFIG_ATOMIC64_SELFTEST=y
CONFIG_TEST_BPF=m
CONFIG_BUG_ON_DATA_CORRUPTION=y
CONFIG_S390_PTDUMP=y
CONFIG_PERSISTENT_KEYRINGS=y
CONFIG_BIG_KEYS=y
CONFIG_ENCRYPTED_KEYS=m
CONFIG_SECURITY=y
CONFIG_SECURITY_NETWORK=y
CONFIG_SECURITY_SELINUX=y
CONFIG_SECURITY_SELINUX_BOOTPARAM=y
CONFIG_SECURITY_SELINUX_BOOTPARAM_VALUE=0
CONFIG_SECURITY_SELINUX_DISABLE=y
CONFIG_INTEGRITY_SIGNATURE=y
CONFIG_INTEGRITY_ASYMMETRIC_KEYS=y
CONFIG_IMA=y
CONFIG_IMA_DEFAULT_HASH_SHA256=y
CONFIG_IMA_WRITE_POLICY=y
CONFIG_IMA_APPRAISE=y
CONFIG_CRYPTO_FIPS=y
CONFIG_CRYPTO_DH=m
CONFIG_CRYPTO_ECDH=m
CONFIG_CRYPTO_USER=m
# CONFIG_CRYPTO_MANAGER_DISABLE_TESTS is not set
CONFIG_CRYPTO_PCRYPT=m
CONFIG_CRYPTO_CRYPTD=m
CONFIG_CRYPTO_TEST=m
CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_CRYPTO_LRW=m
CONFIG_CRYPTO_PCBC=m
CONFIG_CRYPTO_KEYWRAP=m
CONFIG_CRYPTO_XCBC=m
CONFIG_CRYPTO_VMAC=m
CONFIG_CRYPTO_CRC32=m
CONFIG_CRYPTO_MICHAEL_MIC=m
CONFIG_CRYPTO_RMD128=m
CONFIG_CRYPTO_RMD160=m
CONFIG_CRYPTO_RMD256=m
CONFIG_CRYPTO_RMD320=m
CONFIG_CRYPTO_SHA512=m
CONFIG_CRYPTO_SHA3=m
CONFIG_CRYPTO_TGR192=m
CONFIG_CRYPTO_WP512=m
CONFIG_CRYPTO_AES_TI=m
CONFIG_CRYPTO_ANUBIS=m
CONFIG_CRYPTO_BLOWFISH=m
CONFIG_CRYPTO_CAMELLIA=m
CONFIG_CRYPTO_CAST5=m
CONFIG_CRYPTO_CAST6=m
CONFIG_CRYPTO_FCRYPT=m
CONFIG_CRYPTO_KHAZAD=m
CONFIG_CRYPTO_SALSA20=m
CONFIG_CRYPTO_SEED=m
CONFIG_CRYPTO_SERPENT=m
CONFIG_CRYPTO_TEA=m
CONFIG_CRYPTO_TWOFISH=m
CONFIG_CRYPTO_842=m
CONFIG_CRYPTO_LZ4=m
CONFIG_CRYPTO_LZ4HC=m
CONFIG_CRYPTO_ANSI_CPRNG=m
CONFIG_CRYPTO_USER_API_HASH=m
CONFIG_CRYPTO_USER_API_SKCIPHER=m
CONFIG_CRYPTO_USER_API_RNG=m
CONFIG_CRYPTO_USER_API_AEAD=m
CONFIG_ZCRYPT=m
CONFIG_PKEY=m
CONFIG_CRYPTO_PAES_S390=m
CONFIG_CRYPTO_SHA1_S390=m
CONFIG_CRYPTO_SHA256_S390=m
CONFIG_CRYPTO_SHA512_S390=m
CONFIG_CRYPTO_DES_S390=m
CONFIG_CRYPTO_AES_S390=m
CONFIG_CRYPTO_GHASH_S390=m
CONFIG_CRYPTO_CRC32_S390=y
CONFIG_CRC7=m
CONFIG_CRC8=m
CONFIG_CORDIC=m
CONFIG_CMM=m
CONFIG_APPLDATA_BASE=y
CONFIG_KVM=m
CONFIG_KVM_S390_UCONTROL=y
CONFIG_VHOST_NET=m
CONFIG_VHOST_VSOCK=m

View File

@ -24,7 +24,6 @@ CONFIG_CRASH_DUMP=y
# CONFIG_SECCOMP is not set
CONFIG_NET=y
# CONFIG_IUCV is not set
CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
CONFIG_DEVTMPFS=y
CONFIG_BLK_DEV_RAM=y
# CONFIG_BLK_DEV_XPRAM is not set

View File

@ -137,7 +137,7 @@ static struct shash_alg ghash_alg = {
static int __init ghash_mod_init(void)
{
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_GHASH))
return -EOPNOTSUPP;
return -ENODEV;
return crypto_register_shash(&ghash_alg);
}

View File

@ -824,7 +824,7 @@ static int __init prng_init(void)
/* check if the CPU has a PRNG */
if (!cpacf_query_func(CPACF_KMC, CPACF_KMC_PRNG))
return -EOPNOTSUPP;
return -ENODEV;
/* check if TRNG subfunction is available */
if (cpacf_query_func(CPACF_PRNO, CPACF_PRNO_TRNG))
@ -837,7 +837,7 @@ static int __init prng_init(void)
if (prng_mode == PRNG_MODE_SHA512) {
pr_err("The prng module cannot "
"start in SHA-512 mode\n");
return -EOPNOTSUPP;
return -ENODEV;
}
prng_mode = PRNG_MODE_TDES;
} else

View File

@ -86,7 +86,7 @@ static struct shash_alg alg = {
static int __init sha1_s390_init(void)
{
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_1))
return -EOPNOTSUPP;
return -ENODEV;
return crypto_register_shash(&alg);
}

View File

@ -117,7 +117,7 @@ static int __init sha256_s390_init(void)
int ret;
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_256))
return -EOPNOTSUPP;
return -ENODEV;
ret = crypto_register_shash(&sha256_alg);
if (ret < 0)
goto out;

View File

@ -127,7 +127,7 @@ static int __init init(void)
int ret;
if (!cpacf_query_func(CPACF_KIMD, CPACF_KIMD_SHA_512))
return -EOPNOTSUPP;
return -ENODEV;
if ((ret = crypto_register_shash(&sha512_alg)) < 0)
goto out;
if ((ret = crypto_register_shash(&sha384_alg)) < 0)

View File

@ -11,6 +11,7 @@
#define _ASM_S390_AIRQ_H
#include <linux/bit_spinlock.h>
#include <linux/dma-mapping.h>
struct airq_struct {
struct hlist_node list; /* Handler queueing. */
@ -29,6 +30,7 @@ void unregister_adapter_interrupt(struct airq_struct *airq);
/* Adapter interrupt bit vector */
struct airq_iv {
unsigned long *vector; /* Adapter interrupt bit vector */
dma_addr_t vector_dma; /* Adapter interrupt bit vector dma */
unsigned long *avail; /* Allocation bit mask for the bit vector */
unsigned long *bitlock; /* Lock bit mask for the bit vector */
unsigned long *ptr; /* Pointer associated with each bit */

View File

@ -226,6 +226,10 @@ extern int ccw_device_enable_console(struct ccw_device *);
extern void ccw_device_wait_idle(struct ccw_device *);
extern int ccw_device_force_console(struct ccw_device *);
extern void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size);
extern void ccw_device_dma_free(struct ccw_device *cdev,
void *cpu_addr, size_t size);
int ccw_device_siosl(struct ccw_device *);
extern void ccw_device_get_schid(struct ccw_device *, struct subchannel_id *);

View File

@ -7,6 +7,7 @@
#include <linux/spinlock.h>
#include <linux/bitops.h>
#include <linux/genalloc.h>
#include <asm/types.h>
#define LPM_ANYPATH 0xff
@ -263,6 +264,36 @@ struct ciw {
#define CIW_TYPE_SII 0x1 /* set interface identifier */
#define CIW_TYPE_RNI 0x2 /* read node identifier */
/*
* Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
*/
#define ND_VALIDITY_VALID 0
#define ND_VALIDITY_OUTDATED 1
#define ND_VALIDITY_INVALID 2
struct node_descriptor {
/* Flags. */
union {
struct {
u32 validity:3;
u32 reserved:5;
} __packed;
u8 byte0;
} __packed;
/* Node parameters. */
u32 params:24;
/* Node ID. */
char type[6];
char model[3];
char manufacturer[3];
char plant[2];
char seq[12];
u16 tag;
} __packed;
/*
* Flags used as input parameters for do_IO()
*/
@ -328,6 +359,16 @@ static inline u8 pathmask_to_pos(u8 mask)
void channel_subsystem_reinit(void);
extern void css_schedule_reprobe(void);
extern void *cio_dma_zalloc(size_t size);
extern void cio_dma_free(void *cpu_addr, size_t size);
extern struct device *cio_get_dma_css_dev(void);
void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
size_t size);
void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size);
void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev);
struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages);
/* Function from drivers/s390/cio/chsc.c */
int chsc_sstpc(void *page, unsigned int op, u16 ctrl, u64 *clock_delta);
int chsc_sstpi(void *page, void *result, size_t size);

View File

@ -112,13 +112,8 @@ union ctlreg2 {
};
};
#ifdef CONFIG_SMP
# define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
# define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
#else
# define ctl_set_bit(cr, bit) __ctl_set_bit(cr, bit)
# define ctl_clear_bit(cr, bit) __ctl_clear_bit(cr, bit)
#endif
#define ctl_set_bit(cr, bit) smp_ctl_set_bit(cr, bit)
#define ctl_clear_bit(cr, bit) smp_ctl_clear_bit(cr, bit)
#endif /* __ASSEMBLY__ */
#endif /* __ASM_CTL_REG_H */

View File

@ -107,13 +107,37 @@ void debug_unregister(debug_info_t *id);
void debug_set_level(debug_info_t *id, int new_level);
void debug_set_critical(void);
void debug_stop_all(void);
/**
* debug_level_enabled() - Returns true if debug events for the specified
* level would be logged. Otherwise returns false.
*
* @id: handle for debug log
* @level: debug level
*
* Return:
* - %true if level is less or equal to the current debug level.
*/
static inline bool debug_level_enabled(debug_info_t *id, int level)
{
return level <= id->level;
}
/**
* debug_event() - writes binary debug entry to active debug area
* (if level <= actual debug level)
*
* @id: handle for debug log
* @level: debug level
* @data: pointer to data for debug entry
* @length: length of data in bytes
*
* Return:
* - Address of written debug entry
* - %NULL if error
*/
static inline debug_entry_t *debug_event(debug_info_t *id, int level,
void *data, int length)
{
@ -122,6 +146,18 @@ static inline debug_entry_t *debug_event(debug_info_t *id, int level,
return debug_event_common(id, level, data, length);
}
/**
* debug_int_event() - writes unsigned integer debug entry to active debug area
* (if level <= actual debug level)
*
* @id: handle for debug log
* @level: debug level
* @tag: integer value for debug entry
*
* Return:
* - Address of written debug entry
* - %NULL if error
*/
static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
unsigned int tag)
{
@ -132,6 +168,18 @@ static inline debug_entry_t *debug_int_event(debug_info_t *id, int level,
return debug_event_common(id, level, &t, sizeof(unsigned int));
}
/**
* debug_long_event() - writes unsigned long debug entry to active debug area
* (if level <= actual debug level)
*
* @id: handle for debug log
* @level: debug level
* @tag: long integer value for debug entry
*
* Return:
* - Address of written debug entry
* - %NULL if error
*/
static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
unsigned long tag)
{
@ -142,6 +190,18 @@ static inline debug_entry_t *debug_long_event(debug_info_t *id, int level,
return debug_event_common(id, level, &t, sizeof(unsigned long));
}
/**
* debug_text_event() - writes string debug entry in ascii format to active
* debug area (if level <= actual debug level)
*
* @id: handle for debug log
* @level: debug level
* @txt: string for debug entry
*
* Return:
* - Address of written debug entry
* - %NULL if error
*/
static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
const char *txt)
{
@ -152,12 +212,28 @@ static inline debug_entry_t *debug_text_event(debug_info_t *id, int level,
/*
* IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
* stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
* stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
*/
extern debug_entry_t *
__debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
__attribute__ ((format(printf, 3, 4)));
/**
* debug_sprintf_event() - writes debug entry with format string
* and varargs (longs) to active debug area
* (if level $<=$ actual debug level).
*
* @_id: handle for debug log
* @_level: debug level
* @_fmt: format string for debug entry
* @...: varargs used as in sprintf()
*
* Return:
* - Address of written debug entry
* - %NULL if error
*
* floats and long long datatypes cannot be used as varargs.
*/
#define debug_sprintf_event(_id, _level, _fmt, ...) \
({ \
debug_entry_t *__ret; \
@ -172,6 +248,20 @@ __debug_sprintf_event(debug_info_t *id, int level, char *string, ...)
__ret; \
})
/**
* debug_exception() - writes binary debug entry to active debug area
* (if level <= actual debug level)
* and switches to next debug area
*
* @id: handle for debug log
* @level: debug level
* @data: pointer to data for debug entry
* @length: length of data in bytes
*
* Return:
* - Address of written debug entry
* - %NULL if error
*/
static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
void *data, int length)
{
@ -180,6 +270,19 @@ static inline debug_entry_t *debug_exception(debug_info_t *id, int level,
return debug_exception_common(id, level, data, length);
}
/**
* debug_int_exception() - writes unsigned int debug entry to active debug area
* (if level <= actual debug level)
* and switches to next debug area
*
* @id: handle for debug log
* @level: debug level
* @tag: integer value for debug entry
*
* Return:
* - Address of written debug entry
* - %NULL if error
*/
static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
unsigned int tag)
{
@ -190,6 +293,19 @@ static inline debug_entry_t *debug_int_exception(debug_info_t *id, int level,
return debug_exception_common(id, level, &t, sizeof(unsigned int));
}
/**
* debug_long_exception() - writes long debug entry to active debug area
* (if level <= actual debug level)
* and switches to next debug area
*
* @id: handle for debug log
* @level: debug level
* @tag: long integer value for debug entry
*
* Return:
* - Address of written debug entry
* - %NULL if error
*/
static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
unsigned long tag)
{
@ -200,6 +316,20 @@ static inline debug_entry_t *debug_long_exception (debug_info_t *id, int level,
return debug_exception_common(id, level, &t, sizeof(unsigned long));
}
/**
* debug_text_exception() - writes string debug entry in ascii format to active
* debug area (if level <= actual debug level)
* and switches to next debug area
* area
*
* @id: handle for debug log
* @level: debug level
* @txt: string for debug entry
*
* Return:
* - Address of written debug entry
* - %NULL if error
*/
static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
const char *txt)
{
@ -210,12 +340,30 @@ static inline debug_entry_t *debug_text_exception(debug_info_t *id, int level,
/*
* IMPORTANT: Use "%s" in sprintf format strings with care! Only pointers are
* stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details!
* stored in the s390dbf. See Documentation/s390/s390dbf.rst for more details!
*/
extern debug_entry_t *
__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
__attribute__ ((format(printf, 3, 4)));
/**
* debug_sprintf_exception() - writes debug entry with format string and
* varargs (longs) to active debug area
* (if level <= actual debug level)
* and switches to next debug area.
*
* @_id: handle for debug log
* @_level: debug level
* @_fmt: format string for debug entry
* @...: varargs used as in sprintf()
*
* Return:
* - Address of written debug entry
* - %NULL if error
*
* floats and long long datatypes cannot be used as varargs.
*/
#define debug_sprintf_exception(_id, _level, _fmt, ...) \
({ \
debug_entry_t *__ret; \
@ -231,6 +379,7 @@ __debug_sprintf_exception(debug_info_t *id, int level, char *string, ...)
})
int debug_register_view(debug_info_t *id, struct debug_view *view);
int debug_unregister_view(debug_info_t *id, struct debug_view *view);
/*

View File

@ -59,6 +59,18 @@ static inline int test_facility(unsigned long nr)
return __test_facility(nr, &S390_lowcore.stfle_fac_list);
}
static inline unsigned long __stfle_asm(u64 *stfle_fac_list, int size)
{
register unsigned long reg0 asm("0") = size - 1;
asm volatile(
".insn s,0xb2b00000,0(%1)" /* stfle */
: "+d" (reg0)
: "a" (stfle_fac_list)
: "memory", "cc");
return reg0;
}
/**
* stfle - Store facility list extended
* @stfle_fac_list: array where facility list can be stored
@ -75,13 +87,8 @@ static inline void __stfle(u64 *stfle_fac_list, int size)
memcpy(stfle_fac_list, &S390_lowcore.stfl_fac_list, 4);
if (S390_lowcore.stfl_fac_list & 0x01000000) {
/* More facility bits available with stfle */
register unsigned long reg0 asm("0") = size - 1;
asm volatile(".insn s,0xb2b00000,0(%1)" /* stfle */
: "+d" (reg0)
: "a" (stfle_fac_list)
: "memory", "cc");
nr = (reg0 + 1) * 8; /* # bytes stored by stfle */
nr = __stfle_asm(stfle_fac_list, size);
nr = min_t(unsigned long, (nr + 1) * 8, size * 8);
}
memset((char *) stfle_fac_list + nr, 0, size * 8 - nr);
}

View File

@ -122,8 +122,7 @@ idal_buffer_alloc(size_t size, int page_order)
nr_ptrs = (size + IDA_BLOCK_SIZE - 1) >> IDA_SIZE_LOG;
nr_chunks = (4096 << page_order) >> IDA_SIZE_LOG;
ib = kmalloc(sizeof(struct idal_buffer) + nr_ptrs*sizeof(void *),
GFP_DMA | GFP_KERNEL);
ib = kmalloc(struct_size(ib, data, nr_ptrs), GFP_DMA | GFP_KERNEL);
if (ib == NULL)
return ERR_PTR(-ENOMEM);
ib->size = size;

View File

@ -18,6 +18,7 @@
#include <linux/kvm_host.h>
#include <linux/kvm.h>
#include <linux/seqlock.h>
#include <linux/module.h>
#include <asm/debug.h>
#include <asm/cpu.h>
#include <asm/fpu/api.h>
@ -720,8 +721,14 @@ struct kvm_s390_cpu_model {
unsigned short ibc;
};
struct kvm_s390_module_hook {
int (*hook)(struct kvm_vcpu *vcpu);
struct module *owner;
};
struct kvm_s390_crypto {
struct kvm_s390_crypto_cb *crycb;
struct kvm_s390_module_hook *pqap_hook;
__u32 crycbd;
__u8 aes_kw;
__u8 dea_kw;

View File

@ -0,0 +1,17 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef S390_MEM_ENCRYPT_H__
#define S390_MEM_ENCRYPT_H__
#ifndef __ASSEMBLY__
#define sme_me_mask 0ULL
static inline bool sme_active(void) { return false; }
extern bool sev_active(void);
int set_memory_encrypted(unsigned long addr, int numpages);
int set_memory_decrypted(unsigned long addr, int numpages);
#endif /* __ASSEMBLY__ */
#endif /* S390_MEM_ENCRYPT_H__ */

View File

@ -194,6 +194,11 @@ int zpci_init_iommu(struct zpci_dev *zdev);
void zpci_destroy_iommu(struct zpci_dev *zdev);
#ifdef CONFIG_PCI
static inline bool zpci_use_mio(struct zpci_dev *zdev)
{
return static_branch_likely(&have_mio) && zdev->mio_capable;
}
/* Error handling and recovery */
void zpci_event_error(void *);
void zpci_event_availability(void *);

View File

@ -16,7 +16,7 @@
* per cpu area, use weak definitions to force the compiler to
* generate external references.
*/
#if defined(CONFIG_SMP) && defined(MODULE)
#if defined(MODULE)
#define ARCH_NEEDS_WEAK_PER_CPU
#endif

View File

@ -36,6 +36,7 @@
#ifndef __ASSEMBLY__
#include <linux/cpumask.h>
#include <linux/linkage.h>
#include <linux/irqflags.h>
#include <asm/cpu.h>
@ -221,12 +222,6 @@ static __no_kasan_or_inline unsigned short stap(void)
return cpu_address;
}
/*
* Give up the time slice of the virtual PU.
*/
#define cpu_relax_yield cpu_relax_yield
void cpu_relax_yield(void);
#define cpu_relax() barrier()
#define ECAG_CACHE_ATTRIBUTE 0

View File

@ -9,9 +9,6 @@
#define __ASM_SMP_H
#include <asm/sigp.h>
#ifdef CONFIG_SMP
#include <asm/lowcore.h>
#define raw_smp_processor_id() (S390_lowcore.cpu_nr)
@ -40,33 +37,6 @@ extern int smp_cpu_get_polarization(int cpu);
extern void smp_fill_possible_mask(void);
extern void smp_detect_cpus(void);
#else /* CONFIG_SMP */
#define smp_cpu_mtid 0
static inline void smp_call_ipl_cpu(void (*func)(void *), void *data)
{
func(data);
}
static inline void smp_call_online_cpu(void (*func)(void *), void *data)
{
func(data);
}
static inline void smp_emergency_stop(void)
{
}
static inline int smp_find_processor_id(u16 address) { return 0; }
static inline int smp_store_status(int cpu) { return 0; }
static inline int smp_vcpu_scheduled(int cpu) { return 1; }
static inline void smp_yield_cpu(int cpu) { }
static inline void smp_fill_possible_mask(void) { }
static inline void smp_detect_cpus(void) { }
#endif /* CONFIG_SMP */
static inline void smp_stop_cpu(void)
{
u16 pcpu = stap();
@ -83,14 +53,9 @@ static inline int smp_get_base_cpu(int cpu)
return cpu - (cpu % (smp_cpu_mtid + 1));
}
#ifdef CONFIG_HOTPLUG_CPU
extern int smp_rescan_cpus(void);
extern void __noreturn cpu_die(void);
extern void __cpu_die(unsigned int cpu);
extern int __cpu_disable(void);
#else
static inline int smp_rescan_cpus(void) { return 0; }
static inline void cpu_die(void) { }
#endif
#endif /* __ASM_SMP_H */

View File

@ -20,11 +20,7 @@
extern int spin_retry;
#ifndef CONFIG_SMP
static inline bool arch_vcpu_is_preempted(int cpu) { return false; }
#else
bool arch_vcpu_is_preempted(int cpu);
#endif
#define vcpu_is_preempted arch_vcpu_is_preempted

View File

@ -32,7 +32,6 @@ static inline void __tlb_flush_idte(unsigned long asce)
: : "a" (opt), "a" (asce) : "cc");
}
#ifdef CONFIG_SMP
void smp_ptlb_all(void);
/*
@ -83,22 +82,6 @@ static inline void __tlb_flush_kernel(void)
else
__tlb_flush_global();
}
#else
#define __tlb_flush_global() __tlb_flush_local()
/*
* Flush TLB entries for a specific ASCE on all CPUs.
*/
static inline void __tlb_flush_mm(struct mm_struct *mm)
{
__tlb_flush_local();
}
static inline void __tlb_flush_kernel(void)
{
__tlb_flush_local();
}
#endif
static inline void __tlb_flush_mm_lazy(struct mm_struct * mm)
{

View File

@ -79,23 +79,4 @@ static inline void unwind_module_init(struct module *mod, void *orc_ip,
size_t orc_ip_size, void *orc,
size_t orc_size) {}
#ifdef CONFIG_KASAN
/*
* This disables KASAN checking when reading a value from another task's stack,
* since the other task could be running on another CPU and could have poisoned
* the stack in the meantime.
*/
#define READ_ONCE_TASK_STACK(task, x) \
({ \
unsigned long val; \
if (task == current) \
val = READ_ONCE(x); \
else \
val = READ_ONCE_NOCHECK(x); \
val; \
})
#else
#define READ_ONCE_TASK_STACK(task, x) READ_ONCE(x)
#endif
#endif /* _ASM_S390_UNWIND_H */

View File

@ -57,7 +57,7 @@ struct runtime_instr_cb {
__u64 sf;
__u64 rsic;
__u64 reserved8;
} __packed __aligned(8);
} __attribute__((__packed__, __aligned__(8)));
static inline void load_runtime_instr_cb(struct runtime_instr_cb *cb)
{

View File

@ -53,6 +53,7 @@ obj-y += sysinfo.o lgr.o os_info.o machine_kexec.o pgm_check.o
obj-y += runtime_instr.o cache.o fpu.o dumpstack.o guarded_storage.o sthyi.o
obj-y += entry.o reipl.o relocate_kernel.o kdebugfs.o alternative.o
obj-y += nospec-branch.o ipl_vmparm.o machine_kexec_reloc.o unwind_bc.o
obj-y += smp.o
extra-y += head64.o vmlinux.lds
@ -60,7 +61,6 @@ obj-$(CONFIG_SYSFS) += nospec-sysfs.o
CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE)
obj-$(CONFIG_MODULES) += module.o
obj-$(CONFIG_SMP) += smp.o
obj-$(CONFIG_SCHED_TOPOLOGY) += topology.o
obj-$(CONFIG_HIBERNATION) += suspend.o swsusp.o
obj-$(CONFIG_AUDIT) += audit.o

View File

@ -647,11 +647,23 @@ static int debug_close(struct inode *inode, struct file *file)
return 0; /* success */
}
/*
* debug_register_mode:
* - Creates and initializes debug area for the caller
* The mode parameter allows to specify access rights for the s390dbf files
* - Returns handle for debug area
/**
* debug_register_mode() - creates and initializes debug area.
*
* @name: Name of debug log (e.g. used for debugfs entry)
* @pages_per_area: Number of pages, which will be allocated per area
* @nr_areas: Number of debug areas
* @buf_size: Size of data area in each debug entry
* @mode: File mode for debugfs files. E.g. S_IRWXUGO
* @uid: User ID for debugfs files. Currently only 0 is supported.
* @gid: Group ID for debugfs files. Currently only 0 is supported.
*
* Return:
* - Handle for generated debug area
* - %NULL if register failed
*
* Allocates memory for a debug log.
* Must not be called within an interrupt handler.
*/
debug_info_t *debug_register_mode(const char *name, int pages_per_area,
int nr_areas, int buf_size, umode_t mode,
@ -681,10 +693,21 @@ out:
}
EXPORT_SYMBOL(debug_register_mode);
/*
* debug_register:
* - creates and initializes debug area for the caller
* - returns handle for debug area
/**
* debug_register() - creates and initializes debug area with default file mode.
*
* @name: Name of debug log (e.g. used for debugfs entry)
* @pages_per_area: Number of pages, which will be allocated per area
* @nr_areas: Number of debug areas
* @buf_size: Size of data area in each debug entry
*
* Return:
* - Handle for generated debug area
* - %NULL if register failed
*
* Allocates memory for a debug log.
* The debugfs file mode access permissions are read and write for user.
* Must not be called within an interrupt handler.
*/
debug_info_t *debug_register(const char *name, int pages_per_area,
int nr_areas, int buf_size)
@ -694,9 +717,13 @@ debug_info_t *debug_register(const char *name, int pages_per_area,
}
EXPORT_SYMBOL(debug_register);
/*
* debug_unregister:
* - give back debug area
/**
* debug_unregister() - give back debug area.
*
* @id: handle for debug log
*
* Return:
* none
*/
void debug_unregister(debug_info_t *id)
{
@ -745,9 +772,14 @@ out:
return rc;
}
/*
* debug_set_level:
* - set actual debug level
/**
* debug_set_level() - Sets new actual debug level if new_level is valid.
*
* @id: handle for debug log
* @new_level: new debug level
*
* Return:
* none
*/
void debug_set_level(debug_info_t *id, int new_level)
{
@ -873,6 +905,14 @@ static struct ctl_table s390dbf_dir_table[] = {
static struct ctl_table_header *s390dbf_sysctl_header;
/**
* debug_stop_all() - stops the debug feature if stopping is allowed.
*
* Return:
* - none
*
* Currently used in case of a kernel oops.
*/
void debug_stop_all(void)
{
if (debug_stoppable)
@ -880,6 +920,17 @@ void debug_stop_all(void)
}
EXPORT_SYMBOL(debug_stop_all);
/**
* debug_set_critical() - event/exception functions try lock instead of spin.
*
* Return:
* - none
*
* Currently used in case of stopping all CPUs but the current one.
* Once in this state, functions to write a debug entry for an
* event or exception no longer spin on the debug area lock,
* but only try to get it and fail if they do not get the lock.
*/
void debug_set_critical(void)
{
debug_critical = 1;
@ -1036,8 +1087,16 @@ debug_entry_t *__debug_sprintf_exception(debug_info_t *id, int level, char *stri
}
EXPORT_SYMBOL(__debug_sprintf_exception);
/*
* debug_register_view:
/**
* debug_register_view() - registers new debug view and creates debugfs
* dir entry
*
* @id: handle for debug log
* @view: pointer to debug view struct
*
* Return:
* - 0 : ok
* - < 0: Error
*/
int debug_register_view(debug_info_t *id, struct debug_view *view)
{
@ -1077,8 +1136,16 @@ out:
}
EXPORT_SYMBOL(debug_register_view);
/*
* debug_unregister_view:
/**
* debug_unregister_view() - unregisters debug view and removes debugfs
* dir entry
*
* @id: handle for debug log
* @view: pointer to debug view struct
*
* Return:
* - 0 : ok
* - < 0: Error
*/
int debug_unregister_view(debug_info_t *id, struct debug_view *view)
{

View File

@ -242,6 +242,7 @@ static const unsigned char formats[][6] = {
[INSTR_RRF_U0FF] = { F_24, U4_16, F_28, 0, 0, 0 },
[INSTR_RRF_U0RF] = { R_24, U4_16, F_28, 0, 0, 0 },
[INSTR_RRF_U0RR] = { R_24, R_28, U4_16, 0, 0, 0 },
[INSTR_RRF_URR] = { R_24, R_28, U8_16, 0, 0, 0 },
[INSTR_RRF_UUFF] = { F_24, U4_16, F_28, U4_20, 0, 0 },
[INSTR_RRF_UUFR] = { F_24, U4_16, R_28, U4_20, 0, 0 },
[INSTR_RRF_UURF] = { R_24, U4_16, F_28, U4_20, 0, 0 },
@ -306,7 +307,7 @@ static const unsigned char formats[][6] = {
[INSTR_VRI_VVV0UU2] = { V_8, V_12, V_16, U8_28, U4_24, 0 },
[INSTR_VRR_0V] = { V_12, 0, 0, 0, 0, 0 },
[INSTR_VRR_0VV0U] = { V_12, V_16, U4_24, 0, 0, 0 },
[INSTR_VRR_RV0U] = { R_8, V_12, U4_24, 0, 0, 0 },
[INSTR_VRR_RV0UU] = { R_8, V_12, U4_24, U4_28, 0, 0 },
[INSTR_VRR_VRR] = { V_8, R_12, R_16, 0, 0, 0 },
[INSTR_VRR_VV] = { V_8, V_12, 0, 0, 0, 0 },
[INSTR_VRR_VV0U] = { V_8, V_12, U4_32, 0, 0, 0 },
@ -326,10 +327,8 @@ static const unsigned char formats[][6] = {
[INSTR_VRS_RVRDU] = { R_8, V_12, D_20, B_16, U4_32, 0 },
[INSTR_VRS_VRRD] = { V_8, R_12, D_20, B_16, 0, 0 },
[INSTR_VRS_VRRDU] = { V_8, R_12, D_20, B_16, U4_32, 0 },
[INSTR_VRS_VVRD] = { V_8, V_12, D_20, B_16, 0, 0 },
[INSTR_VRS_VVRDU] = { V_8, V_12, D_20, B_16, U4_32, 0 },
[INSTR_VRV_VVXRDU] = { V_8, D_20, VX_12, B_16, U4_32, 0 },
[INSTR_VRX_VRRD] = { V_8, D_20, X_12, B_16, 0, 0 },
[INSTR_VRX_VRRDU] = { V_8, D_20, X_12, B_16, U4_32, 0 },
[INSTR_VRX_VV] = { V_8, V_12, 0, 0, 0, 0 },
[INSTR_VSI_URDV] = { V_32, D_20, B_16, U8_8, 0, 0 },

View File

@ -199,9 +199,7 @@ void die(struct pt_regs *regs, const char *str)
#ifdef CONFIG_PREEMPT
pr_cont("PREEMPT ");
#endif
#ifdef CONFIG_SMP
pr_cont("SMP ");
#endif
if (debug_pagealloc_enabled())
pr_cont("DEBUG_PAGEALLOC");
pr_cont("\n");

View File

@ -986,14 +986,12 @@ ENTRY(psw_idle)
stg %r3,__SF_EMPTY(%r15)
larl %r1,.Lpsw_idle_lpsw+4
stg %r1,__SF_EMPTY+8(%r15)
#ifdef CONFIG_SMP
larl %r1,smp_cpu_mtid
llgf %r1,0(%r1)
ltgr %r1,%r1
jz .Lpsw_idle_stcctm
.insn rsy,0xeb0000000017,%r1,5,__SF_EMPTY+16(%r15)
.Lpsw_idle_stcctm:
#endif
oi __LC_CPU_FLAGS+7,_CIF_ENABLED_WAIT
BPON
STCK __CLOCK_IDLE_ENTER(%r2)
@ -1468,7 +1466,6 @@ ENDPROC(cleanup_critical)
mvc __CLOCK_IDLE_ENTER(8,%r2),__CLOCK_IDLE_EXIT(%r2)
mvc __TIMER_IDLE_ENTER(8,%r2),__TIMER_IDLE_EXIT(%r2)
1: # calculate idle cycles
#ifdef CONFIG_SMP
clg %r9,BASED(.Lcleanup_idle_insn)
jl 3f
larl %r1,smp_cpu_mtid
@ -1486,7 +1483,6 @@ ENDPROC(cleanup_critical)
la %r3,8(%r3)
la %r4,8(%r4)
brct %r1,2b
#endif
3: # account system time going idle
lg %r9,__LC_STEAL_TIMER
alg %r9,__CLOCK_IDLE_ENTER(%r2)

View File

@ -15,16 +15,11 @@ struct insn {
s32 offset;
} __packed;
struct insn_args {
struct jump_entry *entry;
enum jump_label_type type;
};
static void jump_label_make_nop(struct jump_entry *entry, struct insn *insn)
{
/* brcl 0,0 */
/* brcl 0,offset */
insn->opcode = 0xc004;
insn->offset = 0;
insn->offset = (jump_entry_target(entry) - jump_entry_code(entry)) >> 1;
}
static void jump_label_make_branch(struct jump_entry *entry, struct insn *insn)
@ -77,23 +72,15 @@ static void __jump_label_transform(struct jump_entry *entry,
s390_kernel_write(code, &new, sizeof(new));
}
static int __sm_arch_jump_label_transform(void *data)
static void __jump_label_sync(void *dummy)
{
struct insn_args *args = data;
__jump_label_transform(args->entry, args->type, 0);
return 0;
}
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
struct insn_args args;
args.entry = entry;
args.type = type;
stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL);
__jump_label_transform(entry, type, 0);
smp_call_function(__jump_label_sync, NULL, 1);
}
void arch_jump_label_transform_static(struct jump_entry *entry,

View File

@ -141,7 +141,6 @@ static noinline void __machine_kdump(void *image)
*/
store_status(__do_machine_kdump, image);
}
#endif
static unsigned long do_start_kdump(unsigned long addr)
{
@ -155,6 +154,8 @@ static unsigned long do_start_kdump(unsigned long addr)
return rc;
}
#endif /* CONFIG_CRASH_DUMP */
/*
* Check if kdump checksums are valid: We call purgatory with parameter "0"
*/

View File

@ -7,6 +7,7 @@
#define KMSG_COMPONENT "cpu"
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
#include <linux/stop_machine.h>
#include <linux/cpufeature.h>
#include <linux/bitops.h>
#include <linux/kernel.h>
@ -31,6 +32,7 @@ struct cpu_info {
};
static DEFINE_PER_CPU(struct cpu_info, cpu_info);
static DEFINE_PER_CPU(int, cpu_relax_retry);
static bool machine_has_cpu_mhz;
@ -58,15 +60,20 @@ void s390_update_cpu_mhz(void)
on_each_cpu(update_cpu_mhz, NULL, 0);
}
void notrace cpu_relax_yield(void)
void notrace stop_machine_yield(const struct cpumask *cpumask)
{
if (!smp_cpu_mtid && MACHINE_HAS_DIAG44) {
diag_stat_inc(DIAG_STAT_X044);
asm volatile("diag 0,0,0x44");
int cpu, this_cpu;
this_cpu = smp_processor_id();
if (__this_cpu_inc_return(cpu_relax_retry) >= spin_retry) {
__this_cpu_write(cpu_relax_retry, 0);
cpu = cpumask_next_wrap(this_cpu, cpumask, this_cpu, false);
if (cpu >= nr_cpu_ids)
return;
if (arch_vcpu_is_preempted(cpu))
smp_yield_cpu(cpu);
}
barrier();
}
EXPORT_SYMBOL(cpu_relax_yield);
/*
* cpu_init - initializes state that is per-CPU.

View File

@ -461,11 +461,9 @@ static void __init setup_lowcore_dat_off(void)
mem_assign_absolute(S390_lowcore.restart_source, lc->restart_source);
mem_assign_absolute(S390_lowcore.restart_psw, lc->restart_psw);
#ifdef CONFIG_SMP
lc->spinlock_lockval = arch_spin_lockval(0);
lc->spinlock_index = 0;
arch_spin_lock_setup(0);
#endif
lc->br_r1_trampoline = 0x07f1; /* br %r1 */
set_prefix((u32)(unsigned long) lc);

View File

@ -232,8 +232,6 @@ out:
return -ENOMEM;
}
#ifdef CONFIG_HOTPLUG_CPU
static void pcpu_free_lowcore(struct pcpu *pcpu)
{
unsigned long async_stack, nodat_stack, lowcore;
@ -253,8 +251,6 @@ static void pcpu_free_lowcore(struct pcpu *pcpu)
free_pages(lowcore, LC_ORDER);
}
#endif /* CONFIG_HOTPLUG_CPU */
static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu)
{
struct lowcore *lc = pcpu->lowcore;
@ -418,7 +414,7 @@ void smp_yield_cpu(int cpu)
diag_stat_inc_norecursion(DIAG_STAT_X09C);
asm volatile("diag %0,0,0x9c"
: : "d" (pcpu_devices[cpu].address));
} else if (MACHINE_HAS_DIAG44) {
} else if (MACHINE_HAS_DIAG44 && !smp_cpu_mtid) {
diag_stat_inc_norecursion(DIAG_STAT_X044);
asm volatile("diag 0,0,0x44");
}
@ -895,8 +891,6 @@ static int __init _setup_possible_cpus(char *s)
}
early_param("possible_cpus", _setup_possible_cpus);
#ifdef CONFIG_HOTPLUG_CPU
int __cpu_disable(void)
{
unsigned long cregs[16];
@ -937,8 +931,6 @@ void __noreturn cpu_die(void)
for (;;) ;
}
#endif /* CONFIG_HOTPLUG_CPU */
void __init smp_fill_possible_mask(void)
{
unsigned int possible, sclp_max, cpu;
@ -996,7 +988,6 @@ int setup_profiling_timer(unsigned int multiplier)
return 0;
}
#ifdef CONFIG_HOTPLUG_CPU
static ssize_t cpu_configure_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
@ -1073,7 +1064,6 @@ out:
return rc ? rc : count;
}
static DEVICE_ATTR(configure, 0644, cpu_configure_show, cpu_configure_store);
#endif /* CONFIG_HOTPLUG_CPU */
static ssize_t show_cpu_address(struct device *dev,
struct device_attribute *attr, char *buf)
@ -1083,9 +1073,7 @@ static ssize_t show_cpu_address(struct device *dev,
static DEVICE_ATTR(address, 0444, show_cpu_address, NULL);
static struct attribute *cpu_common_attrs[] = {
#ifdef CONFIG_HOTPLUG_CPU
&dev_attr_configure.attr,
#endif
&dev_attr_address.attr,
NULL,
};
@ -1144,15 +1132,11 @@ static int smp_add_present_cpu(int cpu)
out_topology:
sysfs_remove_group(&s->kobj, &cpu_common_attr_group);
out_cpu:
#ifdef CONFIG_HOTPLUG_CPU
unregister_cpu(c);
#endif
out:
return rc;
}
#ifdef CONFIG_HOTPLUG_CPU
int __ref smp_rescan_cpus(void)
{
struct sclp_core_info *info;
@ -1188,17 +1172,14 @@ static ssize_t __ref rescan_store(struct device *dev,
return rc ? rc : count;
}
static DEVICE_ATTR_WO(rescan);
#endif /* CONFIG_HOTPLUG_CPU */
static int __init s390_smp_init(void)
{
int cpu, rc = 0;
#ifdef CONFIG_HOTPLUG_CPU
rc = device_create_file(cpu_subsys.dev_root, &dev_attr_rescan);
if (rc)
return rc;
#endif
for_each_present_cpu(cpu) {
rc = smp_add_present_cpu(cpu);
if (rc)

View File

@ -162,7 +162,6 @@ ENTRY(swsusp_arch_resume)
larl %r1,__swsusp_reset_dma
lg %r1,0(%r1)
BASR_EX %r14,%r1
#ifdef CONFIG_SMP
larl %r1,smp_cpu_mt_shift
icm %r1,15,0(%r1)
jz smt_done
@ -172,7 +171,6 @@ smt_loop:
brc 8,smt_done /* accepted */
brc 2,smt_loop /* busy, try again */
smt_done:
#endif
larl %r1,.Lnew_pgm_check_psw
lpswe 0(%r1)
pgm_check_entry:

View File

@ -229,17 +229,11 @@ void vector_exception(struct pt_regs *regs)
void data_exception(struct pt_regs *regs)
{
int signal = 0;
save_fpu_regs();
if (current->thread.fpu.fpc & FPC_DXC_MASK)
signal = SIGFPE;
else
signal = SIGILL;
if (signal == SIGFPE)
do_fp_trap(regs, current->thread.fpu.fpc);
else if (signal)
do_trap(regs, signal, ILL_ILLOPN, "data exception");
else
do_trap(regs, SIGILL, ILL_ILLOPN, "data exception");
}
void space_switch_exception(struct pt_regs *regs)

View File

@ -46,18 +46,18 @@ bool unwind_next_frame(struct unwind_state *state)
regs = state->regs;
if (unlikely(regs)) {
sp = READ_ONCE_TASK_STACK(state->task, regs->gprs[15]);
sp = READ_ONCE_NOCHECK(regs->gprs[15]);
if (unlikely(outside_of_stack(state, sp))) {
if (!update_stack_info(state, sp))
goto out_err;
}
sf = (struct stack_frame *) sp;
ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
regs = NULL;
} else {
sf = (struct stack_frame *) state->sp;
sp = READ_ONCE_TASK_STACK(state->task, sf->back_chain);
sp = READ_ONCE_NOCHECK(sf->back_chain);
if (likely(sp)) {
/* Non-zero back-chain points to the previous frame */
if (unlikely(outside_of_stack(state, sp))) {
@ -65,7 +65,7 @@ bool unwind_next_frame(struct unwind_state *state)
goto out_err;
}
sf = (struct stack_frame *) sp;
ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = true;
} else {
/* No back-chain, look for a pt_regs structure */
@ -73,9 +73,9 @@ bool unwind_next_frame(struct unwind_state *state)
if (!on_stack(info, sp, sizeof(struct pt_regs)))
goto out_stop;
regs = (struct pt_regs *) sp;
if (user_mode(regs))
if (READ_ONCE_NOCHECK(regs->psw.mask) & PSW_MASK_PSTATE)
goto out_stop;
ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
}
}
@ -132,11 +132,11 @@ void __unwind_start(struct unwind_state *state, struct task_struct *task,
/* Get the instruction pointer from pt_regs or the stack frame */
if (regs) {
ip = READ_ONCE_TASK_STACK(state->task, regs->psw.addr);
ip = READ_ONCE_NOCHECK(regs->psw.addr);
reliable = true;
} else {
sf = (struct stack_frame *) sp;
ip = READ_ONCE_TASK_STACK(state->task, sf->gprs[8]);
ip = READ_ONCE_NOCHECK(sf->gprs[8]);
reliable = false;
}

View File

@ -2461,6 +2461,9 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
set_kvm_facility(kvm->arch.model.fac_list, 147);
}
if (css_general_characteristics.aiv && test_facility(65))
set_kvm_facility(kvm->arch.model.fac_mask, 65);
kvm->arch.model.cpuid = kvm_s390_get_initial_cpuid();
kvm->arch.model.ibc = sclp.ibc & 0x0fff;

View File

@ -27,6 +27,7 @@
#include <asm/io.h>
#include <asm/ptrace.h>
#include <asm/sclp.h>
#include <asm/ap.h>
#include "gaccess.h"
#include "kvm-s390.h"
#include "trace.h"
@ -592,6 +593,89 @@ static int handle_io_inst(struct kvm_vcpu *vcpu)
}
}
/*
* handle_pqap: Handling pqap interception
* @vcpu: the vcpu having issue the pqap instruction
*
* We now support PQAP/AQIC instructions and we need to correctly
* answer the guest even if no dedicated driver's hook is available.
*
* The intercepting code calls a dedicated callback for this instruction
* if a driver did register one in the CRYPTO satellite of the
* SIE block.
*
* If no callback is available, the queues are not available, return this
* response code to the caller and set CC to 3.
* Else return the response code returned by the callback.
*/
static int handle_pqap(struct kvm_vcpu *vcpu)
{
struct ap_queue_status status = {};
unsigned long reg0;
int ret;
uint8_t fc;
/* Verify that the AP instruction are available */
if (!ap_instructions_available())
return -EOPNOTSUPP;
/* Verify that the guest is allowed to use AP instructions */
if (!(vcpu->arch.sie_block->eca & ECA_APIE))
return -EOPNOTSUPP;
/*
* The only possibly intercepted functions when AP instructions are
* available for the guest are AQIC and TAPQ with the t bit set
* since we do not set IC.3 (FIII) we currently will only intercept
* the AQIC function code.
*/
reg0 = vcpu->run->s.regs.gprs[0];
fc = (reg0 >> 24) & 0xff;
if (WARN_ON_ONCE(fc != 0x03))
return -EOPNOTSUPP;
/* PQAP instruction is allowed for guest kernel only */
if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE)
return kvm_s390_inject_program_int(vcpu, PGM_PRIVILEGED_OP);
/* Common PQAP instruction specification exceptions */
/* bits 41-47 must all be zeros */
if (reg0 & 0x007f0000UL)
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* APFT not install and T bit set */
if (!test_kvm_facility(vcpu->kvm, 15) && (reg0 & 0x00800000UL))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* APXA not installed and APID greater 64 or APQI greater 16 */
if (!(vcpu->kvm->arch.crypto.crycbd & 0x02) && (reg0 & 0x0000c0f0UL))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/* AQIC function code specific exception */
/* facility 65 not present for AQIC function code */
if (!test_kvm_facility(vcpu->kvm, 65))
return kvm_s390_inject_program_int(vcpu, PGM_SPECIFICATION);
/*
* Verify that the hook callback is registered, lock the owner
* and call the hook.
*/
if (vcpu->kvm->arch.crypto.pqap_hook) {
if (!try_module_get(vcpu->kvm->arch.crypto.pqap_hook->owner))
return -EOPNOTSUPP;
ret = vcpu->kvm->arch.crypto.pqap_hook->hook(vcpu);
module_put(vcpu->kvm->arch.crypto.pqap_hook->owner);
if (!ret && vcpu->run->s.regs.gprs[1] & 0x00ff0000)
kvm_s390_set_psw_cc(vcpu, 3);
return ret;
}
/*
* A vfio_driver must register a hook.
* No hook means no driver to enable the SIE CRYCB and no queues.
* We send this response to the guest.
*/
status.response_code = 0x01;
memcpy(&vcpu->run->s.regs.gprs[1], &status, sizeof(status));
kvm_s390_set_psw_cc(vcpu, 3);
return 0;
}
static int handle_stfl(struct kvm_vcpu *vcpu)
{
int rc;
@ -878,6 +962,8 @@ int kvm_s390_handle_b2(struct kvm_vcpu *vcpu)
return handle_sthyi(vcpu);
case 0x7d:
return handle_stsi(vcpu);
case 0xaf:
return handle_pqap(vcpu);
case 0xb1:
return handle_stfl(vcpu);
case 0xb2:

View File

@ -3,9 +3,8 @@
# Makefile for s390-specific library files..
#
lib-y += delay.o string.o uaccess.o find.o
lib-y += delay.o string.o uaccess.o find.o spinlock.o
obj-y += mem.o xor.o
lib-$(CONFIG_SMP) += spinlock.o
lib-$(CONFIG_KPROBES) += probes.o
lib-$(CONFIG_UPROBES) += probes.o

View File

@ -18,6 +18,7 @@
#include <linux/mman.h>
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/swiotlb.h>
#include <linux/smp.h>
#include <linux/init.h>
#include <linux/pagemap.h>
@ -29,6 +30,7 @@
#include <linux/export.h>
#include <linux/cma.h>
#include <linux/gfp.h>
#include <linux/dma-mapping.h>
#include <asm/processor.h>
#include <linux/uaccess.h>
#include <asm/pgtable.h>
@ -42,6 +44,8 @@
#include <asm/sclp.h>
#include <asm/set_memory.h>
#include <asm/kasan.h>
#include <asm/dma-mapping.h>
#include <asm/uv.h>
pgd_t swapper_pg_dir[PTRS_PER_PGD] __section(.bss..swapper_pg_dir);
@ -128,6 +132,47 @@ void mark_rodata_ro(void)
pr_info("Write protected read-only-after-init data: %luk\n", size >> 10);
}
int set_memory_encrypted(unsigned long addr, int numpages)
{
int i;
/* make specified pages unshared, (swiotlb, dma_free) */
for (i = 0; i < numpages; ++i) {
uv_remove_shared(addr);
addr += PAGE_SIZE;
}
return 0;
}
int set_memory_decrypted(unsigned long addr, int numpages)
{
int i;
/* make specified pages shared (swiotlb, dma_alloca) */
for (i = 0; i < numpages; ++i) {
uv_set_shared(addr);
addr += PAGE_SIZE;
}
return 0;
}
/* are we a protected virtualization guest? */
bool sev_active(void)
{
return is_prot_virt_guest();
}
/* protected virtualization */
static void pv_init(void)
{
if (!is_prot_virt_guest())
return;
/* make sure bounce buffers are shared */
swiotlb_init(1);
swiotlb_update_mem_attributes();
swiotlb_force = SWIOTLB_FORCE;
}
void __init mem_init(void)
{
cpumask_set_cpu(0, &init_mm.context.cpu_attach_mask);
@ -136,6 +181,8 @@ void __init mem_init(void)
set_max_mapnr(max_low_pfn);
high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
pv_init();
/* Setup guest page hinting */
cmma_init();

View File

@ -52,21 +52,22 @@ static notrace long s390_kernel_write_odd(void *dst, const void *src, size_t siz
* Therefore we have a read-modify-write sequence: the function reads eight
* bytes from destination at an eight byte boundary, modifies the bytes
* requested and writes the result back in a loop.
*
* Note: this means that this function may not be called concurrently on
* several cpus with overlapping words, since this may potentially
* cause data corruption.
*/
static DEFINE_SPINLOCK(s390_kernel_write_lock);
void notrace s390_kernel_write(void *dst, const void *src, size_t size)
{
unsigned long flags;
long copied;
spin_lock_irqsave(&s390_kernel_write_lock, flags);
while (size) {
copied = s390_kernel_write_odd(dst, src, size);
dst += copied;
src += copied;
size -= copied;
}
spin_unlock_irqrestore(&s390_kernel_write_lock, flags);
}
static int __memcpy_real(void *dest, void *src, size_t count)

View File

@ -24,8 +24,6 @@ static unsigned long stack_maxrandom_size(void)
{
if (!(current->flags & PF_RANDOMIZE))
return 0;
if (current->personality & ADDR_NO_RANDOMIZE)
return 0;
return STACK_RND_MASK << PAGE_SHIFT;
}

View File

@ -421,12 +421,12 @@ static void zpci_map_resources(struct pci_dev *pdev)
if (!len)
continue;
if (static_branch_likely(&have_mio))
if (zpci_use_mio(zdev))
pdev->resource[i].start =
(resource_size_t __force) zdev->bars[i].mio_wb;
else
pdev->resource[i].start =
(resource_size_t __force) pci_iomap(pdev, i, 0);
pdev->resource[i].start = (resource_size_t __force)
pci_iomap_range_fh(pdev, i, 0, 0);
pdev->resource[i].end = pdev->resource[i].start + len - 1;
}
@ -444,18 +444,19 @@ static void zpci_map_resources(struct pci_dev *pdev)
static void zpci_unmap_resources(struct pci_dev *pdev)
{
struct zpci_dev *zdev = to_zpci(pdev);
resource_size_t len;
int i;
if (static_branch_likely(&have_mio))
if (zpci_use_mio(zdev))
return;
for (i = 0; i < PCI_BAR_COUNT; i++) {
len = pci_resource_len(pdev, i);
if (!len)
continue;
pci_iounmap(pdev, (void __iomem __force *)
pdev->resource[i].start);
pci_iounmap_fh(pdev, (void __iomem __force *)
pdev->resource[i].start);
}
}
@ -528,7 +529,7 @@ static int zpci_setup_bus_resources(struct zpci_dev *zdev,
if (zdev->bars[i].val & 4)
flags |= IORESOURCE_MEM_64;
if (static_branch_likely(&have_mio))
if (zpci_use_mio(zdev))
addr = (unsigned long) zdev->bars[i].mio_wb;
else
addr = ZPCI_ADDR(entry);

View File

@ -291,7 +291,7 @@ int clp_enable_fh(struct zpci_dev *zdev, u8 nr_dma_as)
goto out;
zdev->fh = fh;
if (zdev->mio_capable) {
if (zpci_use_mio(zdev)) {
rc = clp_set_pci_fn(&fh, nr_dma_as, CLP_SET_ENABLE_MIO);
zpci_dbg(3, "ena mio fid:%x, fh:%x, rc:%d\n", zdev->fid, fh, rc);
if (rc)

View File

@ -1,2 +1,3 @@
kexec-purgatory.c
purgatory
purgatory.lds
purgatory.ro

View File

@ -6,7 +6,6 @@
kapi := arch/$(ARCH)/include/generated/asm
kapi-hdrs-y := $(kapi)/facility-defs.h $(kapi)/dis-defs.h
targets += $(addprefix ../../../,$(kapi-hdrs-y))
PHONY += kapi
kapi: $(kapi-hdrs-y)
@ -14,11 +13,7 @@ kapi: $(kapi-hdrs-y)
hostprogs-y += gen_facilities
hostprogs-y += gen_opcode_table
HOSTCFLAGS_gen_facilities.o += -Wall $(LINUXINCLUDE)
HOSTCFLAGS_gen_opcode_table.o += -Wall $(LINUXINCLUDE)
# Ensure output directory exists
_dummy := $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
HOSTCFLAGS_gen_facilities.o += $(LINUXINCLUDE)
filechk_facility-defs.h = $(obj)/gen_facilities

View File

@ -520,6 +520,9 @@ b92e km RRE_RR
b92f kmc RRE_RR
b930 cgfr RRE_RR
b931 clgfr RRE_RR
b938 sortl RRE_RR
b939 dfltcc RRF_R0RR2
b93a kdsa RRE_RR
b93c ppno RRE_RR
b93e kimd RRE_RR
b93f klmd RRE_RR
@ -538,8 +541,16 @@ b95a cxlgtr RRF_UUFR
b95b cxlftr RRF_UUFR
b960 cgrt RRF_U0RR
b961 clgrt RRF_U0RR
b964 nngrk RRF_R0RR2
b965 ocgrk RRF_R0RR2
b966 nogrk RRF_R0RR2
b967 nxgrk RRF_R0RR2
b972 crt RRF_U0RR
b973 clrt RRF_U0RR
b974 nnrk RRF_R0RR2
b975 ocrk RRF_R0RR2
b976 nork RRF_R0RR2
b977 nxrk RRF_R0RR2
b980 ngr RRE_RR
b981 ogr RRE_RR
b982 xgr RRE_RR
@ -573,6 +584,7 @@ b99f ssair RRE_R0
b9a0 clp RRF_U0RR
b9a1 tpei RRE_RR
b9a2 ptf RRE_R0
b9a4 uvc RRF_URR
b9aa lptea RRF_RURR2
b9ab essa RRF_U0RR
b9ac irbm RRE_RR
@ -585,6 +597,7 @@ b9b3 cu42 RRE_RR
b9bd trtre RRF_U0RR
b9be srstu RRE_RR
b9bf trte RRF_U0RR
b9c0 selhhhr RRF_RURR
b9c8 ahhhr RRF_R0RR2
b9c9 shhhr RRF_R0RR2
b9ca alhhhr RRF_R0RR2
@ -594,6 +607,9 @@ b9cf clhhr RRE_RR
b9d0 pcistg RRE_RR
b9d2 pcilg RRE_RR
b9d3 rpcit RRE_RR
b9d4 pcistgi RRE_RR
b9d5 pciwb RRE_00
b9d6 pcilgi RRE_RR
b9d8 ahhlr RRF_R0RR2
b9d9 shhlr RRF_R0RR2
b9da alhhlr RRF_R0RR2
@ -601,9 +617,11 @@ b9db slhhlr RRF_R0RR2
b9dd chlr RRE_RR
b9df clhlr RRE_RR
b9e0 locfhr RRF_U0RR
b9e1 popcnt RRE_RR
b9e1 popcnt RRF_U0RR
b9e2 locgr RRF_U0RR
b9e3 selgr RRF_RURR
b9e4 ngrk RRF_R0RR2
b9e5 ncgrk RRF_R0RR2
b9e6 ogrk RRF_R0RR2
b9e7 xgrk RRF_R0RR2
b9e8 agrk RRF_R0RR2
@ -612,8 +630,10 @@ b9ea algrk RRF_R0RR2
b9eb slgrk RRF_R0RR2
b9ec mgrk RRF_R0RR2
b9ed msgrkc RRF_R0RR2
b9f0 selr RRF_RURR
b9f2 locr RRF_U0RR
b9f4 nrk RRF_R0RR2
b9f5 ncrk RRF_R0RR2
b9f6 ork RRF_R0RR2
b9f7 xrk RRF_R0RR2
b9f8 ark RRF_R0RR2
@ -822,6 +842,7 @@ e3d4 stpcifc RXY_RRRD
e500 lasp SSE_RDRD
e501 tprot SSE_RDRD
e502 strag SSE_RDRD
e50a mvcrl SSE_RDRD
e50e mvcsk SSE_RDRD
e50f mvcdk SSE_RDRD
e544 mvhhi SIL_RDI
@ -835,6 +856,18 @@ e55c chsi SIL_RDI
e55d clfhsi SIL_RDU
e560 tbegin SIL_RDU
e561 tbeginc SIL_RDU
e601 vlebrh VRX_VRRDU
e602 vlebrg VRX_VRRDU
e603 vlebrf VRX_VRRDU
e604 vllebrz VRX_VRRDU
e605 vlbrrep VRX_VRRDU
e606 vlbr VRX_VRRDU
e607 vler VRX_VRRDU
e609 vstebrh VRX_VRRDU
e60a vstebrg VRX_VRRDU
e60b vstebrf VRX_VRRDU
e60e vstbr VRX_VRRDU
e60f vster VRX_VRRDU
e634 vpkz VSI_URDV
e635 vlrl VSI_URDV
e637 vlrlr VRS_RRDV
@ -842,8 +875,8 @@ e63c vupkz VSI_URDV
e63d vstrl VSI_URDV
e63f vstrlr VRS_RRDV
e649 vlip VRI_V0UU2
e650 vcvb VRR_RV0U
e652 vcvbg VRR_RV0U
e650 vcvb VRR_RV0UU
e652 vcvbg VRR_RV0UU
e658 vcvd VRI_VR0UU
e659 vsrp VRI_VVUUU2
e65a vcvdg VRI_VR0UU
@ -863,13 +896,13 @@ e702 vleg VRX_VRRDU
e703 vlef VRX_VRRDU
e704 vllez VRX_VRRDU
e705 vlrep VRX_VRRDU
e706 vl VRX_VRRD
e706 vl VRX_VRRDU
e707 vlbb VRX_VRRDU
e708 vsteb VRX_VRRDU
e709 vsteh VRX_VRRDU
e70a vsteg VRX_VRRDU
e70b vstef VRX_VRRDU
e70e vst VRX_VRRD
e70e vst VRX_VRRDU
e712 vgeg VRV_VVXRDU
e713 vgef VRV_VVXRDU
e71a vsceg VRV_VVXRDU
@ -879,11 +912,11 @@ e722 vlvg VRS_VRRDU
e727 lcbb RXE_RRRDU
e730 vesl VRS_VVRDU
e733 verll VRS_VVRDU
e736 vlm VRS_VVRD
e736 vlm VRS_VVRDU
e737 vll VRS_VRRD
e738 vesrl VRS_VVRDU
e73a vesra VRS_VVRDU
e73e vstm VRS_VVRD
e73e vstm VRS_VVRDU
e73f vstl VRS_VRRD
e740 vleib VRI_V0IU
e741 vleih VRI_V0IU
@ -932,7 +965,10 @@ e781 vfene VRR_VVV0U0U
e782 vfae VRR_VVV0U0U
e784 vpdi VRR_VVV0U
e785 vbperm VRR_VVV
e786 vsld VRI_VVV0U
e787 vsrd VRI_VVV0U
e78a vstrc VRR_VVVUU0V
e78b vstrs VRR_VVVUU0V
e78c vperm VRR_VVV0V
e78d vsel VRR_VVV0V
e78e vfms VRR_VVVU0UV
@ -1060,6 +1096,7 @@ eb9b stamy RSY_AARD
ebc0 tp RSL_R0RD
ebd0 pcistb RSY_RRRD
ebd1 sic RSY_RRRD
ebd4 pcistbi RSY_RRRD
ebdc srak RSY_RRRD
ebdd slak RSY_RRRD
ebde srlk RSY_RRRD

View File

@ -38,7 +38,7 @@ config DASD_PROFILE
depends on DASD
help
Enable this option if you want to see profiling information
in /proc/dasd/statistics.
in /proc/dasd/statistics.
config DASD_ECKD
def_tristate y

View File

@ -203,7 +203,7 @@ static int __init dasd_feature_list(char *str)
else if (len == 8 && !strncmp(str, "failfast", 8))
features |= DASD_FEATURE_FAILFAST;
else {
pr_warn("%*s is not a supported device option\n",
pr_warn("%.*s is not a supported device option\n",
len, str);
rc = -EINVAL;
}

View File

@ -79,27 +79,6 @@ config SCLP_VT220_CONSOLE
Include support for using an IBM SCLP VT220-compatible terminal as a
Linux system console.
config SCLP_ASYNC
def_tristate m
prompt "Support for Call Home via Asynchronous SCLP Records"
depends on S390
help
This option enables the call home function, which is able to inform
the service element and connected organisations about a kernel panic.
You should only select this option if you know what you are doing,
want for inform other people about your kernel panics,
need this feature and intend to run your kernel in LPAR.
config SCLP_ASYNC_ID
string "Component ID for Call Home"
depends on SCLP_ASYNC
default "000000000"
help
The Component ID for Call Home is used to identify the correct
problem reporting queue the call home records should be sent to.
If your are unsure, please use the default value "000000000".
config HMC_DRV
def_tristate m
prompt "Support for file transfers from HMC drive CD/DVD-ROM"
@ -205,4 +184,3 @@ config S390_VMUR
depends on S390
help
Character device driver for z/VM reader, puncher and printer.

View File

@ -31,7 +31,6 @@ obj-$(CONFIG_TN3215) += con3215.o
obj-$(CONFIG_SCLP_TTY) += sclp_tty.o
obj-$(CONFIG_SCLP_CONSOLE) += sclp_con.o
obj-$(CONFIG_SCLP_VT220_TTY) += sclp_vt220.o
obj-$(CONFIG_SCLP_ASYNC) += sclp_async.o
obj-$(CONFIG_PCI) += sclp_pci.o

View File

@ -1,189 +0,0 @@
// SPDX-License-Identifier: GPL-2.0
/*
* Enable Asynchronous Notification via SCLP.
*
* Copyright IBM Corp. 2009
* Author(s): Hans-Joachim Picht <hans@linux.vnet.ibm.com>
*
*/
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/slab.h>
#include <linux/ctype.h>
#include <linux/kmod.h>
#include <linux/err.h>
#include <linux/errno.h>
#include <linux/proc_fs.h>
#include <linux/sysctl.h>
#include <linux/utsname.h>
#include "sclp.h"
static int callhome_enabled;
static struct sclp_req *request;
static struct sclp_async_sccb *sccb;
static int sclp_async_send_wait(char *message);
static struct ctl_table_header *callhome_sysctl_header;
static DEFINE_SPINLOCK(sclp_async_lock);
#define SCLP_NORMAL_WRITE 0x00
struct async_evbuf {
struct evbuf_header header;
u64 reserved;
u8 rflags;
u8 empty;
u8 rtype;
u8 otype;
char comp_id[12];
char data[3000]; /* there is still some space left */
} __attribute__((packed));
struct sclp_async_sccb {
struct sccb_header header;
struct async_evbuf evbuf;
} __attribute__((packed));
static struct sclp_register sclp_async_register = {
.send_mask = EVTYP_ASYNC_MASK,
};
static int call_home_on_panic(struct notifier_block *self,
unsigned long event, void *data)
{
strncat(data, init_utsname()->nodename,
sizeof(init_utsname()->nodename));
sclp_async_send_wait(data);
return NOTIFY_DONE;
}
static struct notifier_block call_home_panic_nb = {
.notifier_call = call_home_on_panic,
.priority = INT_MAX,
};
static int zero;
static int one = 1;
static struct ctl_table callhome_table[] = {
{
.procname = "callhome",
.data = &callhome_enabled,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
.extra1 = &zero,
.extra2 = &one,
},
{}
};
static struct ctl_table kern_dir_table[] = {
{
.procname = "kernel",
.maxlen = 0,
.mode = 0555,
.child = callhome_table,
},
{}
};
/*
* Function used to transfer asynchronous notification
* records which waits for send completion
*/
static int sclp_async_send_wait(char *message)
{
struct async_evbuf *evb;
int rc;
unsigned long flags;
if (!callhome_enabled)
return 0;
sccb->evbuf.header.type = EVTYP_ASYNC;
sccb->evbuf.rtype = 0xA5;
sccb->evbuf.otype = 0x00;
evb = &sccb->evbuf;
request->command = SCLP_CMDW_WRITE_EVENT_DATA;
request->sccb = sccb;
request->status = SCLP_REQ_FILLED;
strncpy(sccb->evbuf.data, message, sizeof(sccb->evbuf.data));
/*
* Retain Queue
* e.g. 5639CC140 500 Red Hat RHEL5 Linux for zSeries (RHEL AS)
*/
strncpy(sccb->evbuf.comp_id, CONFIG_SCLP_ASYNC_ID,
sizeof(sccb->evbuf.comp_id));
sccb->evbuf.header.length = sizeof(sccb->evbuf);
sccb->header.length = sizeof(sccb->evbuf) + sizeof(sccb->header);
sccb->header.function_code = SCLP_NORMAL_WRITE;
rc = sclp_add_request(request);
if (rc)
return rc;
spin_lock_irqsave(&sclp_async_lock, flags);
while (request->status != SCLP_REQ_DONE &&
request->status != SCLP_REQ_FAILED) {
sclp_sync_wait();
}
spin_unlock_irqrestore(&sclp_async_lock, flags);
if (request->status != SCLP_REQ_DONE)
return -EIO;
rc = ((struct sclp_async_sccb *)
request->sccb)->header.response_code;
if (rc != 0x0020)
return -EIO;
if (evb->header.flags != 0x80)
return -EIO;
return rc;
}
static int __init sclp_async_init(void)
{
int rc;
rc = sclp_register(&sclp_async_register);
if (rc)
return rc;
rc = -EOPNOTSUPP;
if (!(sclp_async_register.sclp_receive_mask & EVTYP_ASYNC_MASK))
goto out_sclp;
rc = -ENOMEM;
callhome_sysctl_header = register_sysctl_table(kern_dir_table);
if (!callhome_sysctl_header)
goto out_sclp;
request = kzalloc(sizeof(struct sclp_req), GFP_KERNEL);
sccb = (struct sclp_async_sccb *) get_zeroed_page(GFP_KERNEL | GFP_DMA);
if (!request || !sccb)
goto out_mem;
rc = atomic_notifier_chain_register(&panic_notifier_list,
&call_home_panic_nb);
if (!rc)
goto out;
out_mem:
kfree(request);
free_page((unsigned long) sccb);
unregister_sysctl_table(callhome_sysctl_header);
out_sclp:
sclp_unregister(&sclp_async_register);
out:
return rc;
}
module_init(sclp_async_init);
static void __exit sclp_async_exit(void)
{
atomic_notifier_chain_unregister(&panic_notifier_list,
&call_home_panic_nb);
unregister_sysctl_table(callhome_sysctl_header);
sclp_unregister(&sclp_async_register);
free_page((unsigned long) sccb);
kfree(request);
}
module_exit(sclp_async_exit);
MODULE_AUTHOR("Copyright IBM Corp. 2009");
MODULE_AUTHOR("Hans-Joachim Picht <hans@linux.vnet.ibm.com>");
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("SCLP Asynchronous Notification Records");

View File

@ -4,7 +4,7 @@
* dumps on SCSI disks (zfcpdump). The "zcore/mem" debugfs file shows the same
* dump format as s390 standalone dumps.
*
* For more information please refer to Documentation/s390/zfcpdump.txt
* For more information please refer to Documentation/s390/zfcpdump.rst
*
* Copyright IBM Corp. 2003, 2008
* Author(s): Michael Holzheu

View File

@ -16,9 +16,11 @@
#include <linux/mutex.h>
#include <linux/rculist.h>
#include <linux/slab.h>
#include <linux/dmapool.h>
#include <asm/airq.h>
#include <asm/isc.h>
#include <asm/cio.h>
#include "cio.h"
#include "cio_debug.h"
@ -27,7 +29,7 @@
static DEFINE_SPINLOCK(airq_lists_lock);
static struct hlist_head airq_lists[MAX_ISC+1];
static struct kmem_cache *airq_iv_cache;
static struct dma_pool *airq_iv_cache;
/**
* register_adapter_interrupt() - register adapter interrupt handler
@ -115,6 +117,11 @@ void __init init_airq_interrupts(void)
setup_irq(THIN_INTERRUPT, &airq_interrupt);
}
static inline unsigned long iv_size(unsigned long bits)
{
return BITS_TO_LONGS(bits) * sizeof(unsigned long);
}
/**
* airq_iv_create - create an interrupt vector
* @bits: number of bits in the interrupt vector
@ -132,17 +139,19 @@ struct airq_iv *airq_iv_create(unsigned long bits, unsigned long flags)
goto out;
iv->bits = bits;
iv->flags = flags;
size = BITS_TO_LONGS(bits) * sizeof(unsigned long);
size = iv_size(bits);
if (flags & AIRQ_IV_CACHELINE) {
if ((cache_line_size() * BITS_PER_BYTE) < bits)
if ((cache_line_size() * BITS_PER_BYTE) < bits
|| !airq_iv_cache)
goto out_free;
iv->vector = kmem_cache_zalloc(airq_iv_cache, GFP_KERNEL);
iv->vector = dma_pool_zalloc(airq_iv_cache, GFP_KERNEL,
&iv->vector_dma);
if (!iv->vector)
goto out_free;
} else {
iv->vector = kzalloc(size, GFP_KERNEL);
iv->vector = cio_dma_zalloc(size);
if (!iv->vector)
goto out_free;
}
@ -178,10 +187,10 @@ out_free:
kfree(iv->ptr);
kfree(iv->bitlock);
kfree(iv->avail);
if (iv->flags & AIRQ_IV_CACHELINE)
kmem_cache_free(airq_iv_cache, iv->vector);
if (iv->flags & AIRQ_IV_CACHELINE && iv->vector)
dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
else
kfree(iv->vector);
cio_dma_free(iv->vector, size);
kfree(iv);
out:
return NULL;
@ -198,9 +207,9 @@ void airq_iv_release(struct airq_iv *iv)
kfree(iv->ptr);
kfree(iv->bitlock);
if (iv->flags & AIRQ_IV_CACHELINE)
kmem_cache_free(airq_iv_cache, iv->vector);
dma_pool_free(airq_iv_cache, iv->vector, iv->vector_dma);
else
kfree(iv->vector);
cio_dma_free(iv->vector, iv_size(iv->bits));
kfree(iv->avail);
kfree(iv);
}
@ -295,12 +304,12 @@ unsigned long airq_iv_scan(struct airq_iv *iv, unsigned long start,
}
EXPORT_SYMBOL(airq_iv_scan);
static int __init airq_init(void)
int __init airq_init(void)
{
airq_iv_cache = kmem_cache_create("airq_iv_cache", cache_line_size(),
cache_line_size(), 0, NULL);
airq_iv_cache = dma_pool_create("airq_iv_cache", cio_get_dma_css_dev(),
cache_line_size(),
cache_line_size(), PAGE_SIZE);
if (!airq_iv_cache)
return -ENOMEM;
return 0;
}
subsys_initcall(airq_init);

View File

@ -63,7 +63,7 @@ static void ccwreq_stop(struct ccw_device *cdev, int rc)
return;
req->done = 1;
ccw_device_set_timeout(cdev, 0);
memset(&cdev->private->irb, 0, sizeof(struct irb));
memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
if (rc && rc != -ENODEV && req->drc)
rc = req->drc;
req->callback(cdev, req->data, rc);
@ -86,7 +86,7 @@ static void ccwreq_do(struct ccw_device *cdev)
continue;
}
/* Perform start function. */
memset(&cdev->private->irb, 0, sizeof(struct irb));
memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
rc = cio_start(sch, cp, (u8) req->mask);
if (rc == 0) {
/* I/O started successfully. */
@ -169,7 +169,7 @@ int ccw_request_cancel(struct ccw_device *cdev)
*/
static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
{
struct irb *irb = &cdev->private->irb;
struct irb *irb = &cdev->private->dma_area->irb;
struct cmd_scsw *scsw = &irb->scsw.cmd;
enum uc_todo todo;
@ -187,7 +187,8 @@ static enum io_status ccwreq_status(struct ccw_device *cdev, struct irb *lcirb)
CIO_TRACE_EVENT(2, "sensedata");
CIO_HEX_EVENT(2, &cdev->private->dev_id,
sizeof(struct ccw_dev_id));
CIO_HEX_EVENT(2, &cdev->private->irb.ecw, SENSE_MAX_COUNT);
CIO_HEX_EVENT(2, &cdev->private->dma_area->irb.ecw,
SENSE_MAX_COUNT);
/* Check for command reject. */
if (irb->ecw[0] & SNS0_CMD_REJECT)
return IO_REJECTED;

View File

@ -322,36 +322,6 @@ struct chsc_sei {
} u;
} __packed __aligned(PAGE_SIZE);
/*
* Node Descriptor as defined in SA22-7204, "Common I/O-Device Commands"
*/
#define ND_VALIDITY_VALID 0
#define ND_VALIDITY_OUTDATED 1
#define ND_VALIDITY_INVALID 2
struct node_descriptor {
/* Flags. */
union {
struct {
u32 validity:3;
u32 reserved:5;
} __packed;
u8 byte0;
} __packed;
/* Node parameters. */
u32 params:24;
/* Node ID. */
char type[6];
char model[3];
char manufacturer[3];
char plant[2];
char seq[12];
u16 tag;
} __packed;
/*
* Link Incident Record as defined in SA22-7202, "ESCON I/O Interface"
*/

View File

@ -113,6 +113,7 @@ struct subchannel {
enum sch_todo todo;
struct work_struct todo_work;
struct schib_config config;
char *driver_override; /* Driver name to force a match */
} __attribute__ ((aligned(8)));
DECLARE_PER_CPU_ALIGNED(struct irb, cio_irb);
@ -135,6 +136,8 @@ extern int cio_commit_config(struct subchannel *sch);
int cio_tm_start_key(struct subchannel *sch, struct tcw *tcw, u8 lpm, u8 key);
int cio_tm_intrg(struct subchannel *sch);
extern int __init airq_init(void);
/* Use with care. */
#ifdef CONFIG_CCW_CONSOLE
extern struct subchannel *cio_probe_console(void);

View File

@ -20,6 +20,8 @@
#include <linux/reboot.h>
#include <linux/suspend.h>
#include <linux/proc_fs.h>
#include <linux/genalloc.h>
#include <linux/dma-mapping.h>
#include <asm/isc.h>
#include <asm/crw.h>
@ -165,6 +167,7 @@ static void css_subchannel_release(struct device *dev)
sch->config.intparm = 0;
cio_commit_config(sch);
kfree(sch->driver_override);
kfree(sch->lock);
kfree(sch);
}
@ -224,6 +227,12 @@ struct subchannel *css_alloc_subchannel(struct subchannel_id schid,
INIT_WORK(&sch->todo_work, css_sch_todo);
sch->dev.release = &css_subchannel_release;
device_initialize(&sch->dev);
/*
* The physical addresses of some the dma structures that can
* belong to a subchannel need to fit 31 bit width (e.g. ccw).
*/
sch->dev.coherent_dma_mask = DMA_BIT_MASK(31);
sch->dev.dma_mask = &sch->dev.coherent_dma_mask;
return sch;
err:
@ -315,9 +324,57 @@ static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
static DEVICE_ATTR_RO(modalias);
static ssize_t driver_override_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
struct subchannel *sch = to_subchannel(dev);
char *driver_override, *old, *cp;
/* We need to keep extra room for a newline */
if (count >= (PAGE_SIZE - 1))
return -EINVAL;
driver_override = kstrndup(buf, count, GFP_KERNEL);
if (!driver_override)
return -ENOMEM;
cp = strchr(driver_override, '\n');
if (cp)
*cp = '\0';
device_lock(dev);
old = sch->driver_override;
if (strlen(driver_override)) {
sch->driver_override = driver_override;
} else {
kfree(driver_override);
sch->driver_override = NULL;
}
device_unlock(dev);
kfree(old);
return count;
}
static ssize_t driver_override_show(struct device *dev,
struct device_attribute *attr, char *buf)
{
struct subchannel *sch = to_subchannel(dev);
ssize_t len;
device_lock(dev);
len = snprintf(buf, PAGE_SIZE, "%s\n", sch->driver_override);
device_unlock(dev);
return len;
}
static DEVICE_ATTR_RW(driver_override);
static struct attribute *subch_attrs[] = {
&dev_attr_type.attr,
&dev_attr_modalias.attr,
&dev_attr_driver_override.attr,
NULL,
};
@ -899,6 +956,13 @@ static int __init setup_css(int nr)
dev_set_name(&css->device, "css%x", nr);
css->device.groups = cssdev_attr_groups;
css->device.release = channel_subsystem_release;
/*
* We currently allocate notifier bits with this (using
* css->device as the device argument with the DMA API)
* and are fine with 64 bit addresses.
*/
css->device.coherent_dma_mask = DMA_BIT_MASK(64);
css->device.dma_mask = &css->device.coherent_dma_mask;
mutex_init(&css->mutex);
css->cssid = chsc_get_cssid(nr);
@ -1018,6 +1082,111 @@ static struct notifier_block css_power_notifier = {
.notifier_call = css_power_event,
};
#define CIO_DMA_GFP (GFP_KERNEL | __GFP_ZERO)
static struct gen_pool *cio_dma_pool;
/* Currently cio supports only a single css */
struct device *cio_get_dma_css_dev(void)
{
return &channel_subsystems[0]->device;
}
struct gen_pool *cio_gp_dma_create(struct device *dma_dev, int nr_pages)
{
struct gen_pool *gp_dma;
void *cpu_addr;
dma_addr_t dma_addr;
int i;
gp_dma = gen_pool_create(3, -1);
if (!gp_dma)
return NULL;
for (i = 0; i < nr_pages; ++i) {
cpu_addr = dma_alloc_coherent(dma_dev, PAGE_SIZE, &dma_addr,
CIO_DMA_GFP);
if (!cpu_addr)
return gp_dma;
gen_pool_add_virt(gp_dma, (unsigned long) cpu_addr,
dma_addr, PAGE_SIZE, -1);
}
return gp_dma;
}
static void __gp_dma_free_dma(struct gen_pool *pool,
struct gen_pool_chunk *chunk, void *data)
{
size_t chunk_size = chunk->end_addr - chunk->start_addr + 1;
dma_free_coherent((struct device *) data, chunk_size,
(void *) chunk->start_addr,
(dma_addr_t) chunk->phys_addr);
}
void cio_gp_dma_destroy(struct gen_pool *gp_dma, struct device *dma_dev)
{
if (!gp_dma)
return;
/* this is quite ugly but no better idea */
gen_pool_for_each_chunk(gp_dma, __gp_dma_free_dma, dma_dev);
gen_pool_destroy(gp_dma);
}
static int cio_dma_pool_init(void)
{
/* No need to free up the resources: compiled in */
cio_dma_pool = cio_gp_dma_create(cio_get_dma_css_dev(), 1);
if (!cio_dma_pool)
return -ENOMEM;
return 0;
}
void *cio_gp_dma_zalloc(struct gen_pool *gp_dma, struct device *dma_dev,
size_t size)
{
dma_addr_t dma_addr;
unsigned long addr;
size_t chunk_size;
if (!gp_dma)
return NULL;
addr = gen_pool_alloc(gp_dma, size);
while (!addr) {
chunk_size = round_up(size, PAGE_SIZE);
addr = (unsigned long) dma_alloc_coherent(dma_dev,
chunk_size, &dma_addr, CIO_DMA_GFP);
if (!addr)
return NULL;
gen_pool_add_virt(gp_dma, addr, dma_addr, chunk_size, -1);
addr = gen_pool_alloc(gp_dma, size);
}
return (void *) addr;
}
void cio_gp_dma_free(struct gen_pool *gp_dma, void *cpu_addr, size_t size)
{
if (!cpu_addr)
return;
memset(cpu_addr, 0, size);
gen_pool_free(gp_dma, (unsigned long) cpu_addr, size);
}
/*
* Allocate dma memory from the css global pool. Intended for memory not
* specific to any single device within the css. The allocated memory
* is not guaranteed to be 31-bit addressable.
*
* Caution: Not suitable for early stuff like console.
*/
void *cio_dma_zalloc(size_t size)
{
return cio_gp_dma_zalloc(cio_dma_pool, cio_get_dma_css_dev(), size);
}
void cio_dma_free(void *cpu_addr, size_t size)
{
cio_gp_dma_free(cio_dma_pool, cpu_addr, size);
}
/*
* Now that the driver core is running, we can setup our channel subsystem.
* The struct subchannel's are created during probing.
@ -1059,16 +1228,22 @@ static int __init css_bus_init(void)
if (ret)
goto out_unregister;
ret = register_pm_notifier(&css_power_notifier);
if (ret) {
unregister_reboot_notifier(&css_reboot_notifier);
goto out_unregister;
}
if (ret)
goto out_unregister_rn;
ret = cio_dma_pool_init();
if (ret)
goto out_unregister_pmn;
airq_init();
css_init_done = 1;
/* Enable default isc for I/O subchannels. */
isc_register(IO_SCH_ISC);
return 0;
out_unregister_pmn:
unregister_pm_notifier(&css_power_notifier);
out_unregister_rn:
unregister_reboot_notifier(&css_reboot_notifier);
out_unregister:
while (i-- > 0) {
struct channel_subsystem *css = channel_subsystems[i];
@ -1222,6 +1397,10 @@ static int css_bus_match(struct device *dev, struct device_driver *drv)
struct css_driver *driver = to_cssdriver(drv);
struct css_device_id *id;
/* When driver_override is set, only bind to the matching driver */
if (sch->driver_override && strcmp(sch->driver_override, drv->name))
return 0;
for (id = driver->subchannel_type; id->match_flags; id++) {
if (sch->st == id->type)
return 1;

View File

@ -24,6 +24,7 @@
#include <linux/timer.h>
#include <linux/kernel_stat.h>
#include <linux/sched/signal.h>
#include <linux/dma-mapping.h>
#include <asm/ccwdev.h>
#include <asm/cio.h>
@ -687,6 +688,9 @@ ccw_device_release(struct device *dev)
struct ccw_device *cdev;
cdev = to_ccwdev(dev);
cio_gp_dma_free(cdev->private->dma_pool, cdev->private->dma_area,
sizeof(*cdev->private->dma_area));
cio_gp_dma_destroy(cdev->private->dma_pool, &cdev->dev);
/* Release reference of parent subchannel. */
put_device(cdev->dev.parent);
kfree(cdev->private);
@ -696,15 +700,33 @@ ccw_device_release(struct device *dev)
static struct ccw_device * io_subchannel_allocate_dev(struct subchannel *sch)
{
struct ccw_device *cdev;
struct gen_pool *dma_pool;
cdev = kzalloc(sizeof(*cdev), GFP_KERNEL);
if (cdev) {
cdev->private = kzalloc(sizeof(struct ccw_device_private),
GFP_KERNEL | GFP_DMA);
if (cdev->private)
return cdev;
}
if (!cdev)
goto err_cdev;
cdev->private = kzalloc(sizeof(struct ccw_device_private),
GFP_KERNEL | GFP_DMA);
if (!cdev->private)
goto err_priv;
cdev->dev.coherent_dma_mask = sch->dev.coherent_dma_mask;
cdev->dev.dma_mask = &cdev->dev.coherent_dma_mask;
dma_pool = cio_gp_dma_create(&cdev->dev, 1);
if (!dma_pool)
goto err_dma_pool;
cdev->private->dma_pool = dma_pool;
cdev->private->dma_area = cio_gp_dma_zalloc(dma_pool, &cdev->dev,
sizeof(*cdev->private->dma_area));
if (!cdev->private->dma_area)
goto err_dma_area;
return cdev;
err_dma_area:
cio_gp_dma_destroy(dma_pool, &cdev->dev);
err_dma_pool:
kfree(cdev->private);
err_priv:
kfree(cdev);
err_cdev:
return ERR_PTR(-ENOMEM);
}
@ -884,7 +906,7 @@ io_subchannel_recog_done(struct ccw_device *cdev)
wake_up(&ccw_device_init_wq);
break;
case DEV_STATE_OFFLINE:
/*
/*
* We can't register the device in interrupt context so
* we schedule a work item.
*/
@ -1062,6 +1084,14 @@ static int io_subchannel_probe(struct subchannel *sch)
if (!io_priv)
goto out_schedule;
io_priv->dma_area = dma_alloc_coherent(&sch->dev,
sizeof(*io_priv->dma_area),
&io_priv->dma_area_dma, GFP_KERNEL);
if (!io_priv->dma_area) {
kfree(io_priv);
goto out_schedule;
}
set_io_private(sch, io_priv);
css_schedule_eval(sch->schid);
return 0;
@ -1088,6 +1118,8 @@ static int io_subchannel_remove(struct subchannel *sch)
set_io_private(sch, NULL);
spin_unlock_irq(sch->lock);
out_free:
dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
io_priv->dma_area, io_priv->dma_area_dma);
kfree(io_priv);
sysfs_remove_group(&sch->dev.kobj, &io_subchannel_attr_group);
return 0;
@ -1593,13 +1625,19 @@ struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv)
return ERR_CAST(sch);
io_priv = kzalloc(sizeof(*io_priv), GFP_KERNEL | GFP_DMA);
if (!io_priv) {
put_device(&sch->dev);
return ERR_PTR(-ENOMEM);
}
if (!io_priv)
goto err_priv;
io_priv->dma_area = dma_alloc_coherent(&sch->dev,
sizeof(*io_priv->dma_area),
&io_priv->dma_area_dma, GFP_KERNEL);
if (!io_priv->dma_area)
goto err_dma_area;
set_io_private(sch, io_priv);
cdev = io_subchannel_create_ccwdev(sch);
if (IS_ERR(cdev)) {
dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
io_priv->dma_area, io_priv->dma_area_dma);
set_io_private(sch, NULL);
put_device(&sch->dev);
kfree(io_priv);
return cdev;
@ -1607,6 +1645,12 @@ struct ccw_device * __init ccw_device_create_console(struct ccw_driver *drv)
cdev->drv = drv;
ccw_device_set_int_class(cdev);
return cdev;
err_dma_area:
kfree(io_priv);
err_priv:
put_device(&sch->dev);
return ERR_PTR(-ENOMEM);
}
void __init ccw_device_destroy_console(struct ccw_device *cdev)
@ -1617,6 +1661,8 @@ void __init ccw_device_destroy_console(struct ccw_device *cdev)
set_io_private(sch, NULL);
put_device(&sch->dev);
put_device(&cdev->dev);
dma_free_coherent(&sch->dev, sizeof(*io_priv->dma_area),
io_priv->dma_area, io_priv->dma_area_dma);
kfree(io_priv);
}

View File

@ -67,8 +67,10 @@ static void ccw_timeout_log(struct ccw_device *cdev)
sizeof(struct tcw), 0);
} else {
printk(KERN_WARNING "cio: orb indicates command mode\n");
if ((void *)(addr_t)orb->cmd.cpa == &private->sense_ccw ||
(void *)(addr_t)orb->cmd.cpa == cdev->private->iccws)
if ((void *)(addr_t)orb->cmd.cpa ==
&private->dma_area->sense_ccw ||
(void *)(addr_t)orb->cmd.cpa ==
cdev->private->dma_area->iccws)
printk(KERN_WARNING "cio: last channel program "
"(intern):\n");
else
@ -143,18 +145,22 @@ ccw_device_cancel_halt_clear(struct ccw_device *cdev)
void ccw_device_update_sense_data(struct ccw_device *cdev)
{
memset(&cdev->id, 0, sizeof(cdev->id));
cdev->id.cu_type = cdev->private->senseid.cu_type;
cdev->id.cu_model = cdev->private->senseid.cu_model;
cdev->id.dev_type = cdev->private->senseid.dev_type;
cdev->id.dev_model = cdev->private->senseid.dev_model;
cdev->id.cu_type = cdev->private->dma_area->senseid.cu_type;
cdev->id.cu_model = cdev->private->dma_area->senseid.cu_model;
cdev->id.dev_type = cdev->private->dma_area->senseid.dev_type;
cdev->id.dev_model = cdev->private->dma_area->senseid.dev_model;
}
int ccw_device_test_sense_data(struct ccw_device *cdev)
{
return cdev->id.cu_type == cdev->private->senseid.cu_type &&
cdev->id.cu_model == cdev->private->senseid.cu_model &&
cdev->id.dev_type == cdev->private->senseid.dev_type &&
cdev->id.dev_model == cdev->private->senseid.dev_model;
return cdev->id.cu_type ==
cdev->private->dma_area->senseid.cu_type &&
cdev->id.cu_model ==
cdev->private->dma_area->senseid.cu_model &&
cdev->id.dev_type ==
cdev->private->dma_area->senseid.dev_type &&
cdev->id.dev_model ==
cdev->private->dma_area->senseid.dev_model;
}
/*
@ -342,7 +348,7 @@ ccw_device_done(struct ccw_device *cdev, int state)
cio_disable_subchannel(sch);
/* Reset device status. */
memset(&cdev->private->irb, 0, sizeof(struct irb));
memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
cdev->private->state = state;
@ -509,13 +515,14 @@ callback:
ccw_device_done(cdev, DEV_STATE_ONLINE);
/* Deliver fake irb to device driver, if needed. */
if (cdev->private->flags.fake_irb) {
create_fake_irb(&cdev->private->irb,
create_fake_irb(&cdev->private->dma_area->irb,
cdev->private->flags.fake_irb);
cdev->private->flags.fake_irb = 0;
if (cdev->handler)
cdev->handler(cdev, cdev->private->intparm,
&cdev->private->irb);
memset(&cdev->private->irb, 0, sizeof(struct irb));
&cdev->private->dma_area->irb);
memset(&cdev->private->dma_area->irb, 0,
sizeof(struct irb));
}
ccw_device_report_path_events(cdev);
ccw_device_handle_broken_paths(cdev);
@ -672,7 +679,8 @@ ccw_device_online_verify(struct ccw_device *cdev, enum dev_event dev_event)
if (scsw_actl(&sch->schib.scsw) != 0 ||
(scsw_stctl(&sch->schib.scsw) & SCSW_STCTL_STATUS_PEND) ||
(scsw_stctl(&cdev->private->irb.scsw) & SCSW_STCTL_STATUS_PEND)) {
(scsw_stctl(&cdev->private->dma_area->irb.scsw) &
SCSW_STCTL_STATUS_PEND)) {
/*
* No final status yet or final status not yet delivered
* to the device driver. Can't do path verification now,
@ -719,7 +727,7 @@ static int ccw_device_call_handler(struct ccw_device *cdev)
* - fast notification was requested (primary status)
* - unsolicited interrupts
*/
stctl = scsw_stctl(&cdev->private->irb.scsw);
stctl = scsw_stctl(&cdev->private->dma_area->irb.scsw);
ending_status = (stctl & SCSW_STCTL_SEC_STATUS) ||
(stctl == (SCSW_STCTL_ALERT_STATUS | SCSW_STCTL_STATUS_PEND)) ||
(stctl == SCSW_STCTL_STATUS_PEND);
@ -735,9 +743,9 @@ static int ccw_device_call_handler(struct ccw_device *cdev)
if (cdev->handler)
cdev->handler(cdev, cdev->private->intparm,
&cdev->private->irb);
&cdev->private->dma_area->irb);
memset(&cdev->private->irb, 0, sizeof(struct irb));
memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
return 1;
}
@ -759,7 +767,8 @@ ccw_device_irq(struct ccw_device *cdev, enum dev_event dev_event)
/* Unit check but no sense data. Need basic sense. */
if (ccw_device_do_sense(cdev, irb) != 0)
goto call_handler_unsol;
memcpy(&cdev->private->irb, irb, sizeof(struct irb));
memcpy(&cdev->private->dma_area->irb, irb,
sizeof(struct irb));
cdev->private->state = DEV_STATE_W4SENSE;
cdev->private->intparm = 0;
return;
@ -842,7 +851,7 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event)
if (scsw_fctl(&irb->scsw) &
(SCSW_FCTL_CLEAR_FUNC | SCSW_FCTL_HALT_FUNC)) {
cdev->private->flags.dosense = 0;
memset(&cdev->private->irb, 0, sizeof(struct irb));
memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
ccw_device_accumulate_irb(cdev, irb);
goto call_handler;
}

View File

@ -99,7 +99,7 @@ static int diag210_to_senseid(struct senseid *senseid, struct diag210 *diag)
static int diag210_get_dev_info(struct ccw_device *cdev)
{
struct ccw_dev_id *dev_id = &cdev->private->dev_id;
struct senseid *senseid = &cdev->private->senseid;
struct senseid *senseid = &cdev->private->dma_area->senseid;
struct diag210 diag_data;
int rc;
@ -134,8 +134,10 @@ err_failed:
static void snsid_init(struct ccw_device *cdev)
{
cdev->private->flags.esid = 0;
memset(&cdev->private->senseid, 0, sizeof(cdev->private->senseid));
cdev->private->senseid.cu_type = 0xffff;
memset(&cdev->private->dma_area->senseid, 0,
sizeof(cdev->private->dma_area->senseid));
cdev->private->dma_area->senseid.cu_type = 0xffff;
}
/*
@ -143,16 +145,16 @@ static void snsid_init(struct ccw_device *cdev)
*/
static int snsid_check(struct ccw_device *cdev, void *data)
{
struct cmd_scsw *scsw = &cdev->private->irb.scsw.cmd;
struct cmd_scsw *scsw = &cdev->private->dma_area->irb.scsw.cmd;
int len = sizeof(struct senseid) - scsw->count;
/* Check for incomplete SENSE ID data. */
if (len < SENSE_ID_MIN_LEN)
goto out_restart;
if (cdev->private->senseid.cu_type == 0xffff)
if (cdev->private->dma_area->senseid.cu_type == 0xffff)
goto out_restart;
/* Check for incompatible SENSE ID data. */
if (cdev->private->senseid.reserved != 0xff)
if (cdev->private->dma_area->senseid.reserved != 0xff)
return -EOPNOTSUPP;
/* Check for extended-identification information. */
if (len > SENSE_ID_BASIC_LEN)
@ -170,7 +172,7 @@ out_restart:
static void snsid_callback(struct ccw_device *cdev, void *data, int rc)
{
struct ccw_dev_id *id = &cdev->private->dev_id;
struct senseid *senseid = &cdev->private->senseid;
struct senseid *senseid = &cdev->private->dma_area->senseid;
int vm = 0;
if (rc && MACHINE_IS_VM) {
@ -200,7 +202,7 @@ void ccw_device_sense_id_start(struct ccw_device *cdev)
{
struct subchannel *sch = to_subchannel(cdev->dev.parent);
struct ccw_request *req = &cdev->private->req;
struct ccw1 *cp = cdev->private->iccws;
struct ccw1 *cp = cdev->private->dma_area->iccws;
CIO_TRACE_EVENT(4, "snsid");
CIO_HEX_EVENT(4, &cdev->private->dev_id, sizeof(cdev->private->dev_id));
@ -208,7 +210,7 @@ void ccw_device_sense_id_start(struct ccw_device *cdev)
snsid_init(cdev);
/* Channel program setup. */
cp->cmd_code = CCW_CMD_SENSE_ID;
cp->cda = (u32) (addr_t) &cdev->private->senseid;
cp->cda = (u32) (addr_t) &cdev->private->dma_area->senseid;
cp->count = sizeof(struct senseid);
cp->flags = CCW_FLAG_SLI;
/* Request setup. */

View File

@ -429,8 +429,8 @@ struct ciw *ccw_device_get_ciw(struct ccw_device *cdev, __u32 ct)
if (cdev->private->flags.esid == 0)
return NULL;
for (ciw_cnt = 0; ciw_cnt < MAX_CIWS; ciw_cnt++)
if (cdev->private->senseid.ciw[ciw_cnt].ct == ct)
return cdev->private->senseid.ciw + ciw_cnt;
if (cdev->private->dma_area->senseid.ciw[ciw_cnt].ct == ct)
return cdev->private->dma_area->senseid.ciw + ciw_cnt;
return NULL;
}
@ -699,6 +699,23 @@ void ccw_device_get_schid(struct ccw_device *cdev, struct subchannel_id *schid)
}
EXPORT_SYMBOL_GPL(ccw_device_get_schid);
/*
* Allocate zeroed dma coherent 31 bit addressable memory using
* the subchannels dma pool. Maximal size of allocation supported
* is PAGE_SIZE.
*/
void *ccw_device_dma_zalloc(struct ccw_device *cdev, size_t size)
{
return cio_gp_dma_zalloc(cdev->private->dma_pool, &cdev->dev, size);
}
EXPORT_SYMBOL(ccw_device_dma_zalloc);
void ccw_device_dma_free(struct ccw_device *cdev, void *cpu_addr, size_t size)
{
cio_gp_dma_free(cdev->private->dma_pool, cpu_addr, size);
}
EXPORT_SYMBOL(ccw_device_dma_free);
EXPORT_SYMBOL(ccw_device_set_options_mask);
EXPORT_SYMBOL(ccw_device_set_options);
EXPORT_SYMBOL(ccw_device_clear_options);

View File

@ -57,7 +57,7 @@ out:
static void nop_build_cp(struct ccw_device *cdev)
{
struct ccw_request *req = &cdev->private->req;
struct ccw1 *cp = cdev->private->iccws;
struct ccw1 *cp = cdev->private->dma_area->iccws;
cp->cmd_code = CCW_CMD_NOOP;
cp->cda = 0;
@ -134,9 +134,9 @@ err:
static void spid_build_cp(struct ccw_device *cdev, u8 fn)
{
struct ccw_request *req = &cdev->private->req;
struct ccw1 *cp = cdev->private->iccws;
struct ccw1 *cp = cdev->private->dma_area->iccws;
int i = pathmask_to_pos(req->lpm);
struct pgid *pgid = &cdev->private->pgid[i];
struct pgid *pgid = &cdev->private->dma_area->pgid[i];
pgid->inf.fc = fn;
cp->cmd_code = CCW_CMD_SET_PGID;
@ -300,7 +300,7 @@ static int pgid_cmp(struct pgid *p1, struct pgid *p2)
static void pgid_analyze(struct ccw_device *cdev, struct pgid **p,
int *mismatch, u8 *reserved, u8 *reset)
{
struct pgid *pgid = &cdev->private->pgid[0];
struct pgid *pgid = &cdev->private->dma_area->pgid[0];
struct pgid *first = NULL;
int lpm;
int i;
@ -342,7 +342,7 @@ static u8 pgid_to_donepm(struct ccw_device *cdev)
lpm = 0x80 >> i;
if ((cdev->private->pgid_valid_mask & lpm) == 0)
continue;
pgid = &cdev->private->pgid[i];
pgid = &cdev->private->dma_area->pgid[i];
if (sch->opm & lpm) {
if (pgid->inf.ps.state1 != SNID_STATE1_GROUPED)
continue;
@ -368,7 +368,8 @@ static void pgid_fill(struct ccw_device *cdev, struct pgid *pgid)
int i;
for (i = 0; i < 8; i++)
memcpy(&cdev->private->pgid[i], pgid, sizeof(struct pgid));
memcpy(&cdev->private->dma_area->pgid[i], pgid,
sizeof(struct pgid));
}
/*
@ -435,12 +436,12 @@ out:
static void snid_build_cp(struct ccw_device *cdev)
{
struct ccw_request *req = &cdev->private->req;
struct ccw1 *cp = cdev->private->iccws;
struct ccw1 *cp = cdev->private->dma_area->iccws;
int i = pathmask_to_pos(req->lpm);
/* Channel program setup. */
cp->cmd_code = CCW_CMD_SENSE_PGID;
cp->cda = (u32) (addr_t) &cdev->private->pgid[i];
cp->cda = (u32) (addr_t) &cdev->private->dma_area->pgid[i];
cp->count = sizeof(struct pgid);
cp->flags = CCW_FLAG_SLI;
req->cp = cp;
@ -516,7 +517,8 @@ static void verify_start(struct ccw_device *cdev)
sch->lpm = sch->schib.pmcw.pam;
/* Initialize PGID data. */
memset(cdev->private->pgid, 0, sizeof(cdev->private->pgid));
memset(cdev->private->dma_area->pgid, 0,
sizeof(cdev->private->dma_area->pgid));
cdev->private->pgid_valid_mask = 0;
cdev->private->pgid_todo_mask = sch->schib.pmcw.pam;
cdev->private->path_notoper_mask = 0;
@ -626,7 +628,7 @@ struct stlck_data {
static void stlck_build_cp(struct ccw_device *cdev, void *buf1, void *buf2)
{
struct ccw_request *req = &cdev->private->req;
struct ccw1 *cp = cdev->private->iccws;
struct ccw1 *cp = cdev->private->dma_area->iccws;
cp[0].cmd_code = CCW_CMD_STLCK;
cp[0].cda = (u32) (addr_t) buf1;

View File

@ -79,15 +79,15 @@ ccw_device_accumulate_ecw(struct ccw_device *cdev, struct irb *irb)
* are condition that have to be met for the extended control
* bit to have meaning. Sick.
*/
cdev->private->irb.scsw.cmd.ectl = 0;
cdev->private->dma_area->irb.scsw.cmd.ectl = 0;
if ((irb->scsw.cmd.stctl & SCSW_STCTL_ALERT_STATUS) &&
!(irb->scsw.cmd.stctl & SCSW_STCTL_INTER_STATUS))
cdev->private->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
cdev->private->dma_area->irb.scsw.cmd.ectl = irb->scsw.cmd.ectl;
/* Check if extended control word is valid. */
if (!cdev->private->irb.scsw.cmd.ectl)
if (!cdev->private->dma_area->irb.scsw.cmd.ectl)
return;
/* Copy concurrent sense / model dependent information. */
memcpy (&cdev->private->irb.ecw, irb->ecw, sizeof (irb->ecw));
memcpy(&cdev->private->dma_area->irb.ecw, irb->ecw, sizeof(irb->ecw));
}
/*
@ -118,7 +118,7 @@ ccw_device_accumulate_esw(struct ccw_device *cdev, struct irb *irb)
if (!ccw_device_accumulate_esw_valid(irb))
return;
cdev_irb = &cdev->private->irb;
cdev_irb = &cdev->private->dma_area->irb;
/* Copy last path used mask. */
cdev_irb->esw.esw1.lpum = irb->esw.esw1.lpum;
@ -210,7 +210,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
ccw_device_path_notoper(cdev);
/* No irb accumulation for transport mode irbs. */
if (scsw_is_tm(&irb->scsw)) {
memcpy(&cdev->private->irb, irb, sizeof(struct irb));
memcpy(&cdev->private->dma_area->irb, irb, sizeof(struct irb));
return;
}
/*
@ -219,7 +219,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
if (!scsw_is_solicited(&irb->scsw))
return;
cdev_irb = &cdev->private->irb;
cdev_irb = &cdev->private->dma_area->irb;
/*
* If the clear function had been performed, all formerly pending
@ -227,7 +227,7 @@ ccw_device_accumulate_irb(struct ccw_device *cdev, struct irb *irb)
* intermediate accumulated status to the device driver.
*/
if (irb->scsw.cmd.fctl & SCSW_FCTL_CLEAR_FUNC)
memset(&cdev->private->irb, 0, sizeof(struct irb));
memset(&cdev->private->dma_area->irb, 0, sizeof(struct irb));
/* Copy bits which are valid only for the start function. */
if (irb->scsw.cmd.fctl & SCSW_FCTL_START_FUNC) {
@ -329,9 +329,9 @@ ccw_device_do_sense(struct ccw_device *cdev, struct irb *irb)
/*
* We have ending status but no sense information. Do a basic sense.
*/
sense_ccw = &to_io_private(sch)->sense_ccw;
sense_ccw = &to_io_private(sch)->dma_area->sense_ccw;
sense_ccw->cmd_code = CCW_CMD_BASIC_SENSE;
sense_ccw->cda = (__u32) __pa(cdev->private->irb.ecw);
sense_ccw->cda = (__u32) __pa(cdev->private->dma_area->irb.ecw);
sense_ccw->count = SENSE_MAX_COUNT;
sense_ccw->flags = CCW_FLAG_SLI;
@ -364,7 +364,7 @@ ccw_device_accumulate_basic_sense(struct ccw_device *cdev, struct irb *irb)
if (!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK) &&
(irb->scsw.cmd.dstat & DEV_STAT_CHN_END)) {
cdev->private->irb.esw.esw0.erw.cons = 1;
cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
cdev->private->flags.dosense = 0;
}
/* Check if path verification is required. */
@ -386,7 +386,7 @@ ccw_device_accumulate_and_sense(struct ccw_device *cdev, struct irb *irb)
/* Check for basic sense. */
if (cdev->private->flags.dosense &&
!(irb->scsw.cmd.dstat & DEV_STAT_UNIT_CHECK)) {
cdev->private->irb.esw.esw0.erw.cons = 1;
cdev->private->dma_area->irb.esw.esw0.erw.cons = 1;
cdev->private->flags.dosense = 0;
return 0;
}

View File

@ -9,15 +9,20 @@
#include "css.h"
#include "orb.h"
struct io_subchannel_dma_area {
struct ccw1 sense_ccw; /* static ccw for sense command */
};
struct io_subchannel_private {
union orb orb; /* operation request block */
struct ccw1 sense_ccw; /* static ccw for sense command */
struct ccw_device *cdev;/* pointer to the child ccw device */
struct {
unsigned int suspend:1; /* allow suspend */
unsigned int prefetch:1;/* deny prefetch */
unsigned int inter:1; /* suppress intermediate interrupts */
} __packed options;
struct io_subchannel_dma_area *dma_area;
dma_addr_t dma_area_dma;
} __aligned(8);
#define to_io_private(n) ((struct io_subchannel_private *) \
@ -115,6 +120,13 @@ enum cdev_todo {
#define FAKE_CMD_IRB 1
#define FAKE_TM_IRB 2
struct ccw_device_dma_area {
struct senseid senseid; /* SenseID info */
struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */
struct irb irb; /* device status */
struct pgid pgid[8]; /* path group IDs per chpid*/
};
struct ccw_device_private {
struct ccw_device *cdev;
struct subchannel *sch;
@ -156,11 +168,7 @@ struct ccw_device_private {
} __attribute__((packed)) flags;
unsigned long intparm; /* user interruption parameter */
struct qdio_irq *qdio_data;
struct irb irb; /* device status */
int async_kill_io_rc;
struct senseid senseid; /* SenseID info */
struct pgid pgid[8]; /* path group IDs per chpid*/
struct ccw1 iccws[2]; /* ccws for SNID/SID/SPGID commands */
struct work_struct todo_work;
enum cdev_todo todo;
wait_queue_head_t wait_q;
@ -169,6 +177,8 @@ struct ccw_device_private {
struct list_head cmb_list; /* list of measured devices */
u64 cmb_start_time; /* clock value of cmb reset */
void *cmb_wait; /* deferred cmb enable/disable */
struct gen_pool *dma_pool;
struct ccw_device_dma_area *dma_area;
enum interruption_class int_class;
};

View File

@ -736,6 +736,7 @@ static int get_outbound_buffer_frontier(struct qdio_q *q, unsigned int start)
switch (state) {
case SLSB_P_OUTPUT_EMPTY:
case SLSB_P_OUTPUT_PENDING:
/* the adapter got it */
DBF_DEV_EVENT(DBF_INFO, q->irq_ptr,
"out empty:%1d %02x", q->nr, count);

View File

@ -150,6 +150,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues)
return -ENOMEM;
}
irq_ptr_qs[i] = q;
INIT_LIST_HEAD(&q->entry);
}
return 0;
}
@ -178,6 +179,7 @@ static void setup_queues_misc(struct qdio_q *q, struct qdio_irq *irq_ptr,
q->mask = 1 << (31 - i);
q->nr = i;
q->handler = handler;
INIT_LIST_HEAD(&q->entry);
}
static void setup_storage_lists(struct qdio_q *q, struct qdio_irq *irq_ptr,

View File

@ -79,7 +79,6 @@ void tiqdio_add_input_queues(struct qdio_irq *irq_ptr)
mutex_lock(&tiq_list_lock);
list_add_rcu(&irq_ptr->input_qs[0]->entry, &tiq_list);
mutex_unlock(&tiq_list_lock);
xchg(irq_ptr->dsci, 1 << 7);
}
void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
@ -87,14 +86,14 @@ void tiqdio_remove_input_queues(struct qdio_irq *irq_ptr)
struct qdio_q *q;
q = irq_ptr->input_qs[0];
/* if establish triggered an error */
if (!q || !q->entry.prev || !q->entry.next)
if (!q)
return;
mutex_lock(&tiq_list_lock);
list_del_rcu(&q->entry);
mutex_unlock(&tiq_list_lock);
synchronize_rcu();
INIT_LIST_HEAD(&q->entry);
}
static inline int has_multiple_inq_on_dsci(struct qdio_irq *irq_ptr)
@ -178,6 +177,7 @@ static inline void tiqdio_call_inq_handlers(struct qdio_irq *irq)
/**
* tiqdio_thinint_handler - thin interrupt handler for qdio
* @airq: pointer to adapter interrupt descriptor
* @floating: flag to recognize floating vs. directed interrupts (unused)
*/
static void tiqdio_thinint_handler(struct airq_struct *airq, bool floating)
{

View File

@ -16,12 +16,6 @@
#include "vfio_ccw_cp.h"
/*
* Max length for ccw chain.
* XXX: Limit to 256, need to check more?
*/
#define CCWCHAIN_LEN_MAX 256
struct pfn_array {
/* Starting guest physical I/O address. */
unsigned long pa_iova;
@ -33,11 +27,6 @@ struct pfn_array {
int pa_nr;
};
struct pfn_array_table {
struct pfn_array *pat_pa;
int pat_nr;
};
struct ccwchain {
struct list_head next;
struct ccw1 *ch_ccw;
@ -46,35 +35,29 @@ struct ccwchain {
/* Count of the valid ccws in chain. */
int ch_len;
/* Pinned PAGEs for the original data. */
struct pfn_array_table *ch_pat;
struct pfn_array *ch_pa;
};
/*
* pfn_array_alloc_pin() - alloc memory for PFNs, then pin user pages in memory
* pfn_array_alloc() - alloc memory for PFNs
* @pa: pfn_array on which to perform the operation
* @mdev: the mediated device to perform pin/unpin operations
* @iova: target guest physical address
* @len: number of bytes that should be pinned from @iova
*
* Attempt to allocate memory for PFNs, and pin user pages in memory.
* Attempt to allocate memory for PFNs.
*
* Usage of pfn_array:
* We expect (pa_nr == 0) and (pa_iova_pfn == NULL), any field in
* this structure will be filled in by this function.
*
* Returns:
* Number of pages pinned on success.
* If @pa->pa_nr is not 0, or @pa->pa_iova_pfn is not NULL initially,
* returns -EINVAL.
* If no pages were pinned, returns -errno.
* 0 if PFNs are allocated
* -EINVAL if pa->pa_nr is not initially zero, or pa->pa_iova_pfn is not NULL
* -ENOMEM if alloc failed
*/
static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
u64 iova, unsigned int len)
static int pfn_array_alloc(struct pfn_array *pa, u64 iova, unsigned int len)
{
int i, ret = 0;
if (!len)
return 0;
int i;
if (pa->pa_nr || pa->pa_iova_pfn)
return -EINVAL;
@ -94,8 +77,27 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
pa->pa_pfn = pa->pa_iova_pfn + pa->pa_nr;
pa->pa_iova_pfn[0] = pa->pa_iova >> PAGE_SHIFT;
for (i = 1; i < pa->pa_nr; i++)
pa->pa_pfn[0] = -1ULL;
for (i = 1; i < pa->pa_nr; i++) {
pa->pa_iova_pfn[i] = pa->pa_iova_pfn[i - 1] + 1;
pa->pa_pfn[i] = -1ULL;
}
return 0;
}
/*
* pfn_array_pin() - Pin user pages in memory
* @pa: pfn_array on which to perform the operation
* @mdev: the mediated device to perform pin operations
*
* Returns number of pages pinned upon success.
* If the pin request partially succeeds, or fails completely,
* all pages are left unpinned and a negative error value is returned.
*/
static int pfn_array_pin(struct pfn_array *pa, struct device *mdev)
{
int ret = 0;
ret = vfio_pin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr,
IOMMU_READ | IOMMU_WRITE, pa->pa_pfn);
@ -112,8 +114,6 @@ static int pfn_array_alloc_pin(struct pfn_array *pa, struct device *mdev,
err_out:
pa->pa_nr = 0;
kfree(pa->pa_iova_pfn);
pa->pa_iova_pfn = NULL;
return ret;
}
@ -121,60 +121,30 @@ err_out:
/* Unpin the pages before releasing the memory. */
static void pfn_array_unpin_free(struct pfn_array *pa, struct device *mdev)
{
vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
/* Only unpin if any pages were pinned to begin with */
if (pa->pa_nr)
vfio_unpin_pages(mdev, pa->pa_iova_pfn, pa->pa_nr);
pa->pa_nr = 0;
kfree(pa->pa_iova_pfn);
}
static int pfn_array_table_init(struct pfn_array_table *pat, int nr)
{
pat->pat_pa = kcalloc(nr, sizeof(*pat->pat_pa), GFP_KERNEL);
if (unlikely(ZERO_OR_NULL_PTR(pat->pat_pa))) {
pat->pat_nr = 0;
return -ENOMEM;
}
pat->pat_nr = nr;
return 0;
}
static void pfn_array_table_unpin_free(struct pfn_array_table *pat,
struct device *mdev)
static bool pfn_array_iova_pinned(struct pfn_array *pa, unsigned long iova)
{
unsigned long iova_pfn = iova >> PAGE_SHIFT;
int i;
for (i = 0; i < pat->pat_nr; i++)
pfn_array_unpin_free(pat->pat_pa + i, mdev);
if (pat->pat_nr) {
kfree(pat->pat_pa);
pat->pat_pa = NULL;
pat->pat_nr = 0;
}
}
static bool pfn_array_table_iova_pinned(struct pfn_array_table *pat,
unsigned long iova)
{
struct pfn_array *pa = pat->pat_pa;
unsigned long iova_pfn = iova >> PAGE_SHIFT;
int i, j;
for (i = 0; i < pat->pat_nr; i++, pa++)
for (j = 0; j < pa->pa_nr; j++)
if (pa->pa_iova_pfn[j] == iova_pfn)
return true;
for (i = 0; i < pa->pa_nr; i++)
if (pa->pa_iova_pfn[i] == iova_pfn)
return true;
return false;
}
/* Create the list idal words for a pfn_array_table. */
static inline void pfn_array_table_idal_create_words(
struct pfn_array_table *pat,
/* Create the list of IDAL words for a pfn_array. */
static inline void pfn_array_idal_create_words(
struct pfn_array *pa,
unsigned long *idaws)
{
struct pfn_array *pa;
int i, j, k;
int i;
/*
* Idal words (execept the first one) rely on the memory being 4k
@ -183,18 +153,35 @@ static inline void pfn_array_table_idal_create_words(
* there will be no problem here to simply use the phys to create an
* idaw.
*/
k = 0;
for (i = 0; i < pat->pat_nr; i++) {
pa = pat->pat_pa + i;
for (j = 0; j < pa->pa_nr; j++) {
idaws[k] = pa->pa_pfn[j] << PAGE_SHIFT;
if (k == 0)
idaws[k] += pa->pa_iova & (PAGE_SIZE - 1);
k++;
}
}
for (i = 0; i < pa->pa_nr; i++)
idaws[i] = pa->pa_pfn[i] << PAGE_SHIFT;
/* Adjust the first IDAW, since it may not start on a page boundary */
idaws[0] += pa->pa_iova & (PAGE_SIZE - 1);
}
static void convert_ccw0_to_ccw1(struct ccw1 *source, unsigned long len)
{
struct ccw0 ccw0;
struct ccw1 *pccw1 = source;
int i;
for (i = 0; i < len; i++) {
ccw0 = *(struct ccw0 *)pccw1;
if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
pccw1->cmd_code = CCW_CMD_TIC;
pccw1->flags = 0;
pccw1->count = 0;
} else {
pccw1->cmd_code = ccw0.cmd_code;
pccw1->flags = ccw0.flags;
pccw1->count = ccw0.count;
}
pccw1->cda = ccw0.cda;
pccw1++;
}
}
/*
* Within the domain (@mdev), copy @n bytes from a guest physical
@ -209,10 +196,16 @@ static long copy_from_iova(struct device *mdev,
int i, ret;
unsigned long l, m;
ret = pfn_array_alloc_pin(&pa, mdev, iova, n);
if (ret <= 0)
ret = pfn_array_alloc(&pa, iova, n);
if (ret < 0)
return ret;
ret = pfn_array_pin(&pa, mdev);
if (ret < 0) {
pfn_array_unpin_free(&pa, mdev);
return ret;
}
l = n;
for (i = 0; i < pa.pa_nr; i++) {
from = pa.pa_pfn[i] << PAGE_SHIFT;
@ -235,54 +228,59 @@ static long copy_from_iova(struct device *mdev,
return l;
}
static long copy_ccw_from_iova(struct channel_program *cp,
struct ccw1 *to, u64 iova,
unsigned long len)
{
struct ccw0 ccw0;
struct ccw1 *pccw1;
int ret;
int i;
ret = copy_from_iova(cp->mdev, to, iova, len * sizeof(struct ccw1));
if (ret)
return ret;
if (!cp->orb.cmd.fmt) {
pccw1 = to;
for (i = 0; i < len; i++) {
ccw0 = *(struct ccw0 *)pccw1;
if ((pccw1->cmd_code & 0x0f) == CCW_CMD_TIC) {
pccw1->cmd_code = CCW_CMD_TIC;
pccw1->flags = 0;
pccw1->count = 0;
} else {
pccw1->cmd_code = ccw0.cmd_code;
pccw1->flags = ccw0.flags;
pccw1->count = ccw0.count;
}
pccw1->cda = ccw0.cda;
pccw1++;
}
}
return ret;
}
/*
* Helpers to operate ccwchain.
*/
#define ccw_is_test(_ccw) (((_ccw)->cmd_code & 0x0F) == 0)
#define ccw_is_read(_ccw) (((_ccw)->cmd_code & 0x03) == 0x02)
#define ccw_is_read_backward(_ccw) (((_ccw)->cmd_code & 0x0F) == 0x0C)
#define ccw_is_sense(_ccw) (((_ccw)->cmd_code & 0x0F) == CCW_CMD_BASIC_SENSE)
#define ccw_is_noop(_ccw) ((_ccw)->cmd_code == CCW_CMD_NOOP)
#define ccw_is_tic(_ccw) ((_ccw)->cmd_code == CCW_CMD_TIC)
#define ccw_is_idal(_ccw) ((_ccw)->flags & CCW_FLAG_IDA)
#define ccw_is_skip(_ccw) ((_ccw)->flags & CCW_FLAG_SKIP)
#define ccw_is_chain(_ccw) ((_ccw)->flags & (CCW_FLAG_CC | CCW_FLAG_DC))
/*
* ccw_does_data_transfer()
*
* Determine whether a CCW will move any data, such that the guest pages
* would need to be pinned before performing the I/O.
*
* Returns 1 if yes, 0 if no.
*/
static inline int ccw_does_data_transfer(struct ccw1 *ccw)
{
/* If the count field is zero, then no data will be transferred */
if (ccw->count == 0)
return 0;
/* If the command is a NOP, then no data will be transferred */
if (ccw_is_noop(ccw))
return 0;
/* If the skip flag is off, then data will be transferred */
if (!ccw_is_skip(ccw))
return 1;
/*
* If the skip flag is on, it is only meaningful if the command
* code is a read, read backward, sense, or sense ID. In those
* cases, no data will be transferred.
*/
if (ccw_is_read(ccw) || ccw_is_read_backward(ccw))
return 0;
if (ccw_is_sense(ccw))
return 0;
/* The skip flag is on, but it is ignored for this command code. */
return 1;
}
/*
* is_cpa_within_range()
*
@ -319,7 +317,7 @@ static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
/* Make ccw address aligned to 8. */
size = ((sizeof(*chain) + 7L) & -8L) +
sizeof(*chain->ch_ccw) * len +
sizeof(*chain->ch_pat) * len;
sizeof(*chain->ch_pa) * len;
chain = kzalloc(size, GFP_DMA | GFP_KERNEL);
if (!chain)
return NULL;
@ -328,7 +326,7 @@ static struct ccwchain *ccwchain_alloc(struct channel_program *cp, int len)
chain->ch_ccw = (struct ccw1 *)data;
data = (u8 *)(chain->ch_ccw) + sizeof(*chain->ch_ccw) * len;
chain->ch_pat = (struct pfn_array_table *)data;
chain->ch_pa = (struct pfn_array *)data;
chain->ch_len = len;
@ -348,31 +346,12 @@ static void ccwchain_cda_free(struct ccwchain *chain, int idx)
{
struct ccw1 *ccw = chain->ch_ccw + idx;
if (ccw_is_test(ccw) || ccw_is_noop(ccw) || ccw_is_tic(ccw))
return;
if (!ccw->count)
if (ccw_is_tic(ccw))
return;
kfree((void *)(u64)ccw->cda);
}
/* Unpin the pages then free the memory resources. */
static void cp_unpin_free(struct channel_program *cp)
{
struct ccwchain *chain, *temp;
int i;
cp->initialized = false;
list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
for (i = 0; i < chain->ch_len; i++) {
pfn_array_table_unpin_free(chain->ch_pat + i,
cp->mdev);
ccwchain_cda_free(chain, i);
}
ccwchain_free(chain);
}
}
/**
* ccwchain_calc_length - calculate the length of the ccw chain.
* @iova: guest physical address of the target ccw chain
@ -388,25 +367,9 @@ static void cp_unpin_free(struct channel_program *cp)
*/
static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
{
struct ccw1 *ccw, *p;
int cnt;
struct ccw1 *ccw = cp->guest_cp;
int cnt = 0;
/*
* Copy current chain from guest to host kernel.
* Currently the chain length is limited to CCWCHAIN_LEN_MAX (256).
* So copying 2K is enough (safe).
*/
p = ccw = kcalloc(CCWCHAIN_LEN_MAX, sizeof(*ccw), GFP_KERNEL);
if (!ccw)
return -ENOMEM;
cnt = copy_ccw_from_iova(cp, ccw, iova, CCWCHAIN_LEN_MAX);
if (cnt) {
kfree(ccw);
return cnt;
}
cnt = 0;
do {
cnt++;
@ -415,10 +378,8 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
* orb specified one of the unsupported formats, we defer
* checking for IDAWs in unsupported formats to here.
*/
if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw)) {
kfree(p);
if ((!cp->orb.cmd.c64 || cp->orb.cmd.i2k) && ccw_is_idal(ccw))
return -EOPNOTSUPP;
}
/*
* We want to keep counting if the current CCW has the
@ -437,7 +398,6 @@ static int ccwchain_calc_length(u64 iova, struct channel_program *cp)
if (cnt == CCWCHAIN_LEN_MAX + 1)
cnt = -EINVAL;
kfree(p);
return cnt;
}
@ -458,17 +418,23 @@ static int tic_target_chain_exists(struct ccw1 *tic, struct channel_program *cp)
static int ccwchain_loop_tic(struct ccwchain *chain,
struct channel_program *cp);
static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
static int ccwchain_handle_ccw(u32 cda, struct channel_program *cp)
{
struct ccwchain *chain;
int len, ret;
int len;
/* May transfer to an existing chain. */
if (tic_target_chain_exists(tic, cp))
return 0;
/* Copy 2K (the most we support today) of possible CCWs */
len = copy_from_iova(cp->mdev, cp->guest_cp, cda,
CCWCHAIN_LEN_MAX * sizeof(struct ccw1));
if (len)
return len;
/* Get chain length. */
len = ccwchain_calc_length(tic->cda, cp);
/* Convert any Format-0 CCWs to Format-1 */
if (!cp->orb.cmd.fmt)
convert_ccw0_to_ccw1(cp->guest_cp, CCWCHAIN_LEN_MAX);
/* Count the CCWs in the current chain */
len = ccwchain_calc_length(cda, cp);
if (len < 0)
return len;
@ -476,14 +442,10 @@ static int ccwchain_handle_tic(struct ccw1 *tic, struct channel_program *cp)
chain = ccwchain_alloc(cp, len);
if (!chain)
return -ENOMEM;
chain->ch_iova = tic->cda;
chain->ch_iova = cda;
/* Copy the new chain from user. */
ret = copy_ccw_from_iova(cp, chain->ch_ccw, tic->cda, len);
if (ret) {
ccwchain_free(chain);
return ret;
}
/* Copy the actual CCWs into the new chain */
memcpy(chain->ch_ccw, cp->guest_cp, len * sizeof(struct ccw1));
/* Loop for tics on this new chain. */
return ccwchain_loop_tic(chain, cp);
@ -501,7 +463,12 @@ static int ccwchain_loop_tic(struct ccwchain *chain, struct channel_program *cp)
if (!ccw_is_tic(tic))
continue;
ret = ccwchain_handle_tic(tic, cp);
/* May transfer to an existing chain. */
if (tic_target_chain_exists(tic, cp))
continue;
/* Build a ccwchain for the next segment */
ret = ccwchain_handle_ccw(tic->cda, cp);
if (ret)
return ret;
}
@ -534,115 +501,90 @@ static int ccwchain_fetch_direct(struct ccwchain *chain,
struct channel_program *cp)
{
struct ccw1 *ccw;
struct pfn_array_table *pat;
struct pfn_array *pa;
u64 iova;
unsigned long *idaws;
int ret;
int bytes = 1;
int idaw_nr, idal_len;
int i;
ccw = chain->ch_ccw + idx;
if (!ccw->count) {
/*
* We just want the translation result of any direct ccw
* to be an IDA ccw, so let's add the IDA flag for it.
* Although the flag will be ignored by firmware.
*/
ccw->flags |= CCW_FLAG_IDA;
return 0;
if (ccw->count)
bytes = ccw->count;
/* Calculate size of IDAL */
if (ccw_is_idal(ccw)) {
/* Read first IDAW to see if it's 4K-aligned or not. */
/* All subsequent IDAws will be 4K-aligned. */
ret = copy_from_iova(cp->mdev, &iova, ccw->cda, sizeof(iova));
if (ret)
return ret;
} else {
iova = ccw->cda;
}
idaw_nr = idal_nr_words((void *)iova, bytes);
idal_len = idaw_nr * sizeof(*idaws);
/* Allocate an IDAL from host storage */
idaws = kcalloc(idaw_nr, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
if (!idaws) {
ret = -ENOMEM;
goto out_init;
}
/*
* Pin data page(s) in memory.
* The number of pages actually is the count of the idaws which will be
* needed when translating a direct ccw to a idal ccw.
* Allocate an array of pfn's for pages to pin/translate.
* The number of pages is actually the count of the idaws
* required for the data transfer, since we only only support
* 4K IDAWs today.
*/
pat = chain->ch_pat + idx;
ret = pfn_array_table_init(pat, 1);
if (ret)
goto out_init;
ret = pfn_array_alloc_pin(pat->pat_pa, cp->mdev, ccw->cda, ccw->count);
pa = chain->ch_pa + idx;
ret = pfn_array_alloc(pa, iova, bytes);
if (ret < 0)
goto out_unpin;
goto out_free_idaws;
/* Translate this direct ccw to a idal ccw. */
idaws = kcalloc(ret, sizeof(*idaws), GFP_DMA | GFP_KERNEL);
if (!idaws) {
ret = -ENOMEM;
goto out_unpin;
if (ccw_is_idal(ccw)) {
/* Copy guest IDAL into host IDAL */
ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idal_len);
if (ret)
goto out_unpin;
/*
* Copy guest IDAWs into pfn_array, in case the memory they
* occupy is not contiguous.
*/
for (i = 0; i < idaw_nr; i++)
pa->pa_iova_pfn[i] = idaws[i] >> PAGE_SHIFT;
} else {
/*
* No action is required here; the iova addresses in pfn_array
* were initialized sequentially in pfn_array_alloc() beginning
* with the contents of ccw->cda.
*/
}
if (ccw_does_data_transfer(ccw)) {
ret = pfn_array_pin(pa, cp->mdev);
if (ret < 0)
goto out_unpin;
} else {
pa->pa_nr = 0;
}
ccw->cda = (__u32) virt_to_phys(idaws);
ccw->flags |= CCW_FLAG_IDA;
pfn_array_table_idal_create_words(pat, idaws);
/* Populate the IDAL with pinned/translated addresses from pfn */
pfn_array_idal_create_words(pa, idaws);
return 0;
out_unpin:
pfn_array_table_unpin_free(pat, cp->mdev);
out_init:
ccw->cda = 0;
return ret;
}
static int ccwchain_fetch_idal(struct ccwchain *chain,
int idx,
struct channel_program *cp)
{
struct ccw1 *ccw;
struct pfn_array_table *pat;
unsigned long *idaws;
u64 idaw_iova;
unsigned int idaw_nr, idaw_len;
int i, ret;
ccw = chain->ch_ccw + idx;
if (!ccw->count)
return 0;
/* Calculate size of idaws. */
ret = copy_from_iova(cp->mdev, &idaw_iova, ccw->cda, sizeof(idaw_iova));
if (ret)
return ret;
idaw_nr = idal_nr_words((void *)(idaw_iova), ccw->count);
idaw_len = idaw_nr * sizeof(*idaws);
/* Pin data page(s) in memory. */
pat = chain->ch_pat + idx;
ret = pfn_array_table_init(pat, idaw_nr);
if (ret)
goto out_init;
/* Translate idal ccw to use new allocated idaws. */
idaws = kzalloc(idaw_len, GFP_DMA | GFP_KERNEL);
if (!idaws) {
ret = -ENOMEM;
goto out_unpin;
}
ret = copy_from_iova(cp->mdev, idaws, ccw->cda, idaw_len);
if (ret)
goto out_free_idaws;
ccw->cda = virt_to_phys(idaws);
for (i = 0; i < idaw_nr; i++) {
idaw_iova = *(idaws + i);
ret = pfn_array_alloc_pin(pat->pat_pa + i, cp->mdev,
idaw_iova, 1);
if (ret < 0)
goto out_free_idaws;
}
pfn_array_table_idal_create_words(pat, idaws);
return 0;
pfn_array_unpin_free(pa, cp->mdev);
out_free_idaws:
kfree(idaws);
out_unpin:
pfn_array_table_unpin_free(pat, cp->mdev);
out_init:
ccw->cda = 0;
return ret;
@ -660,15 +602,9 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
{
struct ccw1 *ccw = chain->ch_ccw + idx;
if (ccw_is_test(ccw) || ccw_is_noop(ccw))
return 0;
if (ccw_is_tic(ccw))
return ccwchain_fetch_tic(chain, idx, cp);
if (ccw_is_idal(ccw))
return ccwchain_fetch_idal(chain, idx, cp);
return ccwchain_fetch_direct(chain, idx, cp);
}
@ -691,9 +627,7 @@ static int ccwchain_fetch_one(struct ccwchain *chain,
*/
int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
{
u64 iova = orb->cmd.cpa;
struct ccwchain *chain;
int len, ret;
int ret;
/*
* XXX:
@ -706,28 +640,11 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
memcpy(&cp->orb, orb, sizeof(*orb));
cp->mdev = mdev;
/* Get chain length. */
len = ccwchain_calc_length(iova, cp);
if (len < 0)
return len;
/* Alloc mem for the head chain. */
chain = ccwchain_alloc(cp, len);
if (!chain)
return -ENOMEM;
chain->ch_iova = iova;
/* Copy the head chain from guest. */
ret = copy_ccw_from_iova(cp, chain->ch_ccw, iova, len);
if (ret) {
ccwchain_free(chain);
return ret;
}
/* Now loop for its TICs. */
ret = ccwchain_loop_tic(chain, cp);
/* Build a ccwchain for the first CCW segment */
ret = ccwchain_handle_ccw(orb->cmd.cpa, cp);
if (ret)
cp_unpin_free(cp);
cp_free(cp);
/* It is safe to force: if not set but idals used
* ccwchain_calc_length returns an error.
*/
@ -750,8 +667,20 @@ int cp_init(struct channel_program *cp, struct device *mdev, union orb *orb)
*/
void cp_free(struct channel_program *cp)
{
if (cp->initialized)
cp_unpin_free(cp);
struct ccwchain *chain, *temp;
int i;
if (!cp->initialized)
return;
cp->initialized = false;
list_for_each_entry_safe(chain, temp, &cp->ccwchain_list, next) {
for (i = 0; i < chain->ch_len; i++) {
pfn_array_unpin_free(chain->ch_pa + i, cp->mdev);
ccwchain_cda_free(chain, i);
}
ccwchain_free(chain);
}
}
/**
@ -886,7 +815,11 @@ void cp_update_scsw(struct channel_program *cp, union scsw *scsw)
*/
list_for_each_entry(chain, &cp->ccwchain_list, next) {
ccw_head = (u32)(u64)chain->ch_ccw;
if (is_cpa_within_range(cpa, ccw_head, chain->ch_len)) {
/*
* On successful execution, cpa points just beyond the end
* of the chain.
*/
if (is_cpa_within_range(cpa, ccw_head, chain->ch_len + 1)) {
/*
* (cpa - ccw_head) is the offset value of the host
* physical ccw to its chain head.
@ -919,8 +852,7 @@ bool cp_iova_pinned(struct channel_program *cp, u64 iova)
list_for_each_entry(chain, &cp->ccwchain_list, next) {
for (i = 0; i < chain->ch_len; i++)
if (pfn_array_table_iova_pinned(chain->ch_pat + i,
iova))
if (pfn_array_iova_pinned(chain->ch_pa + i, iova))
return true;
}

View File

@ -16,6 +16,12 @@
#include "orb.h"
/*
* Max length for ccw chain.
* XXX: Limit to 256, need to check more?
*/
#define CCWCHAIN_LEN_MAX 256
/**
* struct channel_program - manage information for channel program
* @ccwchain_list: list head of ccwchains
@ -32,6 +38,7 @@ struct channel_program {
union orb orb;
struct device *mdev;
bool initialized;
struct ccw1 *guest_cp;
};
extern int cp_init(struct channel_program *cp, struct device *mdev,

View File

@ -95,11 +95,11 @@ static void vfio_ccw_sch_io_todo(struct work_struct *work)
memcpy(private->io_region->irb_area, irb, sizeof(*irb));
mutex_unlock(&private->io_mutex);
if (private->io_trigger)
eventfd_signal(private->io_trigger, 1);
if (private->mdev && is_final)
private->state = VFIO_CCW_STATE_IDLE;
if (private->io_trigger)
eventfd_signal(private->io_trigger, 1);
}
/*
@ -129,6 +129,11 @@ static int vfio_ccw_sch_probe(struct subchannel *sch)
if (!private)
return -ENOMEM;
private->cp.guest_cp = kcalloc(CCWCHAIN_LEN_MAX, sizeof(struct ccw1),
GFP_KERNEL);
if (!private->cp.guest_cp)
goto out_free;
private->io_region = kmem_cache_zalloc(vfio_ccw_io_region,
GFP_KERNEL | GFP_DMA);
if (!private->io_region)
@ -169,6 +174,7 @@ out_free:
kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
if (private->io_region)
kmem_cache_free(vfio_ccw_io_region, private->io_region);
kfree(private->cp.guest_cp);
kfree(private);
return ret;
}
@ -185,6 +191,7 @@ static int vfio_ccw_sch_remove(struct subchannel *sch)
kmem_cache_free(vfio_ccw_cmd_region, private->cmd_region);
kmem_cache_free(vfio_ccw_io_region, private->io_region);
kfree(private->cp.guest_cp);
kfree(private);
return 0;

View File

@ -690,7 +690,7 @@ int pkey_clr2protkey(u32 keytype,
*/
if (!cpacf_test_func(&pckmo_functions, fc)) {
DEBUG_ERR("%s pckmo functions not available\n", __func__);
return -EOPNOTSUPP;
return -ENODEV;
}
/* prepare param block */
@ -1695,15 +1695,15 @@ static int __init pkey_init(void)
* are able to work with protected keys.
*/
if (!cpacf_query(CPACF_PCKMO, &pckmo_functions))
return -EOPNOTSUPP;
return -ENODEV;
/* check for kmc instructions available */
if (!cpacf_query(CPACF_KMC, &kmc_functions))
return -EOPNOTSUPP;
return -ENODEV;
if (!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_128) ||
!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_192) ||
!cpacf_test_func(&kmc_functions, CPACF_KMC_PAES_256))
return -EOPNOTSUPP;
return -ENODEV;
pkey_debug_init();

View File

@ -5,6 +5,7 @@
* Copyright IBM Corp. 2018
*
* Author(s): Tony Krowiak <akrowiak@linux.ibm.com>
* Pierre Morel <pmorel@linux.ibm.com>
*/
#include <linux/module.h>
@ -40,14 +41,45 @@ static struct ap_device_id ap_queue_ids[] = {
MODULE_DEVICE_TABLE(vfio_ap, ap_queue_ids);
/**
* vfio_ap_queue_dev_probe:
*
* Allocate a vfio_ap_queue structure and associate it
* with the device as driver_data.
*/
static int vfio_ap_queue_dev_probe(struct ap_device *apdev)
{
struct vfio_ap_queue *q;
q = kzalloc(sizeof(*q), GFP_KERNEL);
if (!q)
return -ENOMEM;
dev_set_drvdata(&apdev->device, q);
q->apqn = to_ap_queue(&apdev->device)->qid;
q->saved_isc = VFIO_AP_ISC_INVALID;
return 0;
}
/**
* vfio_ap_queue_dev_remove:
*
* Takes the matrix lock to avoid actions on this device while removing
* Free the associated vfio_ap_queue structure
*/
static void vfio_ap_queue_dev_remove(struct ap_device *apdev)
{
/* Nothing to do yet */
struct vfio_ap_queue *q;
int apid, apqi;
mutex_lock(&matrix_dev->lock);
q = dev_get_drvdata(&apdev->device);
dev_set_drvdata(&apdev->device, NULL);
apid = AP_QID_CARD(q->apqn);
apqi = AP_QID_QUEUE(q->apqn);
vfio_ap_mdev_reset_queue(apid, apqi, 1);
vfio_ap_irq_disable(q);
kfree(q);
mutex_unlock(&matrix_dev->lock);
}
static void vfio_ap_matrix_dev_release(struct device *dev)

View File

@ -24,6 +24,296 @@
#define VFIO_AP_MDEV_TYPE_HWVIRT "passthrough"
#define VFIO_AP_MDEV_NAME_HWVIRT "VFIO AP Passthrough Device"
static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev);
static int match_apqn(struct device *dev, void *data)
{
struct vfio_ap_queue *q = dev_get_drvdata(dev);
return (q->apqn == *(int *)(data)) ? 1 : 0;
}
/**
* vfio_ap_get_queue: Retrieve a queue with a specific APQN from a list
* @matrix_mdev: the associated mediated matrix
* @apqn: The queue APQN
*
* Retrieve a queue with a specific APQN from the list of the
* devices of the vfio_ap_drv.
* Verify that the APID and the APQI are set in the matrix.
*
* Returns the pointer to the associated vfio_ap_queue
*/
static struct vfio_ap_queue *vfio_ap_get_queue(
struct ap_matrix_mdev *matrix_mdev,
int apqn)
{
struct vfio_ap_queue *q;
struct device *dev;
if (!test_bit_inv(AP_QID_CARD(apqn), matrix_mdev->matrix.apm))
return NULL;
if (!test_bit_inv(AP_QID_QUEUE(apqn), matrix_mdev->matrix.aqm))
return NULL;
dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
&apqn, match_apqn);
if (!dev)
return NULL;
q = dev_get_drvdata(dev);
q->matrix_mdev = matrix_mdev;
put_device(dev);
return q;
}
/**
* vfio_ap_wait_for_irqclear
* @apqn: The AP Queue number
*
* Checks the IRQ bit for the status of this APQN using ap_tapq.
* Returns if the ap_tapq function succeeded and the bit is clear.
* Returns if ap_tapq function failed with invalid, deconfigured or
* checkstopped AP.
* Otherwise retries up to 5 times after waiting 20ms.
*
*/
static void vfio_ap_wait_for_irqclear(int apqn)
{
struct ap_queue_status status;
int retry = 5;
do {
status = ap_tapq(apqn, NULL);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
case AP_RESPONSE_RESET_IN_PROGRESS:
if (!status.irq_enabled)
return;
/* Fall through */
case AP_RESPONSE_BUSY:
msleep(20);
break;
case AP_RESPONSE_Q_NOT_AVAIL:
case AP_RESPONSE_DECONFIGURED:
case AP_RESPONSE_CHECKSTOPPED:
default:
WARN_ONCE(1, "%s: tapq rc %02x: %04x\n", __func__,
status.response_code, apqn);
return;
}
} while (--retry);
WARN_ONCE(1, "%s: tapq rc %02x: %04x could not clear IR bit\n",
__func__, status.response_code, apqn);
}
/**
* vfio_ap_free_aqic_resources
* @q: The vfio_ap_queue
*
* Unregisters the ISC in the GIB when the saved ISC not invalid.
* Unpin the guest's page holding the NIB when it exist.
* Reset the saved_pfn and saved_isc to invalid values.
* Clear the pointer to the matrix mediated device.
*
*/
static void vfio_ap_free_aqic_resources(struct vfio_ap_queue *q)
{
if (q->saved_isc != VFIO_AP_ISC_INVALID && q->matrix_mdev)
kvm_s390_gisc_unregister(q->matrix_mdev->kvm, q->saved_isc);
if (q->saved_pfn && q->matrix_mdev)
vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev),
&q->saved_pfn, 1);
q->saved_pfn = 0;
q->saved_isc = VFIO_AP_ISC_INVALID;
q->matrix_mdev = NULL;
}
/**
* vfio_ap_irq_disable
* @q: The vfio_ap_queue
*
* Uses ap_aqic to disable the interruption and in case of success, reset
* in progress or IRQ disable command already proceeded: calls
* vfio_ap_wait_for_irqclear() to check for the IRQ bit to be clear
* and calls vfio_ap_free_aqic_resources() to free the resources associated
* with the AP interrupt handling.
*
* In the case the AP is busy, or a reset is in progress,
* retries after 20ms, up to 5 times.
*
* Returns if ap_aqic function failed with invalid, deconfigured or
* checkstopped AP.
*/
struct ap_queue_status vfio_ap_irq_disable(struct vfio_ap_queue *q)
{
struct ap_qirq_ctrl aqic_gisa = {};
struct ap_queue_status status;
int retries = 5;
do {
status = ap_aqic(q->apqn, aqic_gisa, NULL);
switch (status.response_code) {
case AP_RESPONSE_OTHERWISE_CHANGED:
case AP_RESPONSE_NORMAL:
vfio_ap_wait_for_irqclear(q->apqn);
goto end_free;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
msleep(20);
break;
case AP_RESPONSE_Q_NOT_AVAIL:
case AP_RESPONSE_DECONFIGURED:
case AP_RESPONSE_CHECKSTOPPED:
case AP_RESPONSE_INVALID_ADDRESS:
default:
/* All cases in default means AP not operational */
WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
status.response_code);
goto end_free;
}
} while (retries--);
WARN_ONCE(1, "%s: ap_aqic status %d\n", __func__,
status.response_code);
end_free:
vfio_ap_free_aqic_resources(q);
return status;
}
/**
* vfio_ap_setirq: Enable Interruption for a APQN
*
* @dev: the device associated with the ap_queue
* @q: the vfio_ap_queue holding AQIC parameters
*
* Pin the NIB saved in *q
* Register the guest ISC to GIB interface and retrieve the
* host ISC to issue the host side PQAP/AQIC
*
* Response.status may be set to AP_RESPONSE_INVALID_ADDRESS in case the
* vfio_pin_pages failed.
*
* Otherwise return the ap_queue_status returned by the ap_aqic(),
* all retry handling will be done by the guest.
*/
static struct ap_queue_status vfio_ap_irq_enable(struct vfio_ap_queue *q,
int isc,
unsigned long nib)
{
struct ap_qirq_ctrl aqic_gisa = {};
struct ap_queue_status status = {};
struct kvm_s390_gisa *gisa;
struct kvm *kvm;
unsigned long h_nib, g_pfn, h_pfn;
int ret;
g_pfn = nib >> PAGE_SHIFT;
ret = vfio_pin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1,
IOMMU_READ | IOMMU_WRITE, &h_pfn);
switch (ret) {
case 1:
break;
default:
status.response_code = AP_RESPONSE_INVALID_ADDRESS;
return status;
}
kvm = q->matrix_mdev->kvm;
gisa = kvm->arch.gisa_int.origin;
h_nib = (h_pfn << PAGE_SHIFT) | (nib & ~PAGE_MASK);
aqic_gisa.gisc = isc;
aqic_gisa.isc = kvm_s390_gisc_register(kvm, isc);
aqic_gisa.ir = 1;
aqic_gisa.gisa = (uint64_t)gisa >> 4;
status = ap_aqic(q->apqn, aqic_gisa, (void *)h_nib);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
/* See if we did clear older IRQ configuration */
vfio_ap_free_aqic_resources(q);
q->saved_pfn = g_pfn;
q->saved_isc = isc;
break;
case AP_RESPONSE_OTHERWISE_CHANGED:
/* We could not modify IRQ setings: clear new configuration */
vfio_unpin_pages(mdev_dev(q->matrix_mdev->mdev), &g_pfn, 1);
kvm_s390_gisc_unregister(kvm, isc);
break;
default:
pr_warn("%s: apqn %04x: response: %02x\n", __func__, q->apqn,
status.response_code);
vfio_ap_irq_disable(q);
break;
}
return status;
}
/**
* handle_pqap: PQAP instruction callback
*
* @vcpu: The vcpu on which we received the PQAP instruction
*
* Get the general register contents to initialize internal variables.
* REG[0]: APQN
* REG[1]: IR and ISC
* REG[2]: NIB
*
* Response.status may be set to following Response Code:
* - AP_RESPONSE_Q_NOT_AVAIL: if the queue is not available
* - AP_RESPONSE_DECONFIGURED: if the queue is not configured
* - AP_RESPONSE_NORMAL (0) : in case of successs
* Check vfio_ap_setirq() and vfio_ap_clrirq() for other possible RC.
* We take the matrix_dev lock to ensure serialization on queues and
* mediated device access.
*
* Return 0 if we could handle the request inside KVM.
* otherwise, returns -EOPNOTSUPP to let QEMU handle the fault.
*/
static int handle_pqap(struct kvm_vcpu *vcpu)
{
uint64_t status;
uint16_t apqn;
struct vfio_ap_queue *q;
struct ap_queue_status qstatus = {
.response_code = AP_RESPONSE_Q_NOT_AVAIL, };
struct ap_matrix_mdev *matrix_mdev;
/* If we do not use the AIV facility just go to userland */
if (!(vcpu->arch.sie_block->eca & ECA_AIV))
return -EOPNOTSUPP;
apqn = vcpu->run->s.regs.gprs[0] & 0xffff;
mutex_lock(&matrix_dev->lock);
if (!vcpu->kvm->arch.crypto.pqap_hook)
goto out_unlock;
matrix_mdev = container_of(vcpu->kvm->arch.crypto.pqap_hook,
struct ap_matrix_mdev, pqap_hook);
q = vfio_ap_get_queue(matrix_mdev, apqn);
if (!q)
goto out_unlock;
status = vcpu->run->s.regs.gprs[1];
/* If IR bit(16) is set we enable the interrupt */
if ((status >> (63 - 16)) & 0x01)
qstatus = vfio_ap_irq_enable(q, status & 0x07,
vcpu->run->s.regs.gprs[2]);
else
qstatus = vfio_ap_irq_disable(q);
out_unlock:
memcpy(&vcpu->run->s.regs.gprs[1], &qstatus, sizeof(qstatus));
vcpu->run->s.regs.gprs[1] >>= 32;
mutex_unlock(&matrix_dev->lock);
return 0;
}
static void vfio_ap_matrix_init(struct ap_config_info *info,
struct ap_matrix *matrix)
{
@ -45,8 +335,11 @@ static int vfio_ap_mdev_create(struct kobject *kobj, struct mdev_device *mdev)
return -ENOMEM;
}
matrix_mdev->mdev = mdev;
vfio_ap_matrix_init(&matrix_dev->info, &matrix_mdev->matrix);
mdev_set_drvdata(mdev, matrix_mdev);
matrix_mdev->pqap_hook.hook = handle_pqap;
matrix_mdev->pqap_hook.owner = THIS_MODULE;
mutex_lock(&matrix_dev->lock);
list_add(&matrix_mdev->node, &matrix_dev->mdev_list);
mutex_unlock(&matrix_dev->lock);
@ -62,6 +355,7 @@ static int vfio_ap_mdev_remove(struct mdev_device *mdev)
return -EBUSY;
mutex_lock(&matrix_dev->lock);
vfio_ap_mdev_reset_queues(mdev);
list_del(&matrix_mdev->node);
mutex_unlock(&matrix_dev->lock);
@ -754,11 +1048,42 @@ static int vfio_ap_mdev_set_kvm(struct ap_matrix_mdev *matrix_mdev,
}
matrix_mdev->kvm = kvm;
kvm_get_kvm(kvm);
kvm->arch.crypto.pqap_hook = &matrix_mdev->pqap_hook;
mutex_unlock(&matrix_dev->lock);
return 0;
}
/*
* vfio_ap_mdev_iommu_notifier: IOMMU notifier callback
*
* @nb: The notifier block
* @action: Action to be taken
* @data: data associated with the request
*
* For an UNMAP request, unpin the guest IOVA (the NIB guest address we
* pinned before). Other requests are ignored.
*
*/
static int vfio_ap_mdev_iommu_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct ap_matrix_mdev *matrix_mdev;
matrix_mdev = container_of(nb, struct ap_matrix_mdev, iommu_notifier);
if (action == VFIO_IOMMU_NOTIFY_DMA_UNMAP) {
struct vfio_iommu_type1_dma_unmap *unmap = data;
unsigned long g_pfn = unmap->iova >> PAGE_SHIFT;
vfio_unpin_pages(mdev_dev(matrix_mdev->mdev), &g_pfn, 1);
return NOTIFY_OK;
}
return NOTIFY_DONE;
}
static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
@ -790,15 +1115,36 @@ static int vfio_ap_mdev_group_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
static int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
unsigned int retry)
static void vfio_ap_irq_disable_apqn(int apqn)
{
struct device *dev;
struct vfio_ap_queue *q;
dev = driver_find_device(&matrix_dev->vfio_ap_drv->driver, NULL,
&apqn, match_apqn);
if (dev) {
q = dev_get_drvdata(dev);
vfio_ap_irq_disable(q);
put_device(dev);
}
}
int vfio_ap_mdev_reset_queue(unsigned int apid, unsigned int apqi,
unsigned int retry)
{
struct ap_queue_status status;
int retry2 = 2;
int apqn = AP_MKQID(apid, apqi);
do {
status = ap_zapq(AP_MKQID(apid, apqi));
status = ap_zapq(apqn);
switch (status.response_code) {
case AP_RESPONSE_NORMAL:
while (!status.queue_empty && retry2--) {
msleep(20);
status = ap_tapq(apqn, NULL);
}
WARN_ON_ONCE(retry <= 0);
return 0;
case AP_RESPONSE_RESET_IN_PROGRESS:
case AP_RESPONSE_BUSY:
@ -832,6 +1178,7 @@ static int vfio_ap_mdev_reset_queues(struct mdev_device *mdev)
*/
if (ret)
rc = ret;
vfio_ap_irq_disable_apqn(AP_MKQID(apid, apqi));
}
}
@ -858,20 +1205,37 @@ static int vfio_ap_mdev_open(struct mdev_device *mdev)
return ret;
}
return 0;
matrix_mdev->iommu_notifier.notifier_call = vfio_ap_mdev_iommu_notifier;
events = VFIO_IOMMU_NOTIFY_DMA_UNMAP;
ret = vfio_register_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
&events, &matrix_mdev->iommu_notifier);
if (!ret)
return ret;
vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
&matrix_mdev->group_notifier);
module_put(THIS_MODULE);
return ret;
}
static void vfio_ap_mdev_release(struct mdev_device *mdev)
{
struct ap_matrix_mdev *matrix_mdev = mdev_get_drvdata(mdev);
if (matrix_mdev->kvm)
mutex_lock(&matrix_dev->lock);
if (matrix_mdev->kvm) {
kvm_arch_crypto_clear_masks(matrix_mdev->kvm);
matrix_mdev->kvm->arch.crypto.pqap_hook = NULL;
vfio_ap_mdev_reset_queues(mdev);
kvm_put_kvm(matrix_mdev->kvm);
matrix_mdev->kvm = NULL;
}
mutex_unlock(&matrix_dev->lock);
vfio_ap_mdev_reset_queues(mdev);
vfio_unregister_notifier(mdev_dev(mdev), VFIO_IOMMU_NOTIFY,
&matrix_mdev->iommu_notifier);
vfio_unregister_notifier(mdev_dev(mdev), VFIO_GROUP_NOTIFY,
&matrix_mdev->group_notifier);
matrix_mdev->kvm = NULL;
module_put(THIS_MODULE);
}
@ -900,6 +1264,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
{
int ret;
mutex_lock(&matrix_dev->lock);
switch (cmd) {
case VFIO_DEVICE_GET_INFO:
ret = vfio_ap_mdev_get_device_info(arg);
@ -911,6 +1276,7 @@ static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
ret = -EOPNOTSUPP;
break;
}
mutex_unlock(&matrix_dev->lock);
return ret;
}

Some files were not shown because too many files have changed in this diff Show More