diff --git a/Documentation/ABI/testing/sysfs-class-cxl b/Documentation/ABI/testing/sysfs-class-cxl index 640f65e79ef1..8e69345c37cc 100644 --- a/Documentation/ABI/testing/sysfs-class-cxl +++ b/Documentation/ABI/testing/sysfs-class-cxl @@ -244,3 +244,11 @@ Description: read only Returns 1 if the psl timebase register is synchronized with the core timebase register, 0 otherwise. Users: https://github.com/ibm-capi/libcxl + +What: /sys/class/cxl//tunneled_ops_supported +Date: May 2018 +Contact: linuxppc-dev@lists.ozlabs.org +Description: read only + Returns 1 if tunneled operations are supported in capi mode, + 0 otherwise. +Users: https://github.com/ibm-capi/libcxl diff --git a/Documentation/devicetree/bindings/net/marvell-pp2.txt b/Documentation/devicetree/bindings/net/marvell-pp2.txt index 1814fa13f6ab..fc019df0d863 100644 --- a/Documentation/devicetree/bindings/net/marvell-pp2.txt +++ b/Documentation/devicetree/bindings/net/marvell-pp2.txt @@ -21,9 +21,10 @@ Required properties: - main controller clock (for both armada-375-pp2 and armada-7k-pp2) - GOP clock (for both armada-375-pp2 and armada-7k-pp2) - MG clock (only for armada-7k-pp2) + - MG Core clock (only for armada-7k-pp2) - AXI clock (only for armada-7k-pp2) -- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk" - and "axi_clk" (the 2 latter only for armada-7k-pp2). +- clock-names: names of used clocks, must be "pp_clk", "gop_clk", "mg_clk", + "mg_core_clk" and "axi_clk" (the 3 latter only for armada-7k-pp2). The ethernet ports are represented by subnodes. At least one port is required. @@ -80,8 +81,8 @@ cpm_ethernet: ethernet@0 { compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; clocks = <&cpm_syscon0 1 3>, <&cpm_syscon0 1 9>, - <&cpm_syscon0 1 5>, <&cpm_syscon0 1 18>; - clock-names = "pp_clk", "gop_clk", "gp_clk", "axi_clk"; + <&cpm_syscon0 1 5>, <&cpm_syscon0 1 6>, <&cpm_syscon0 1 18>; + clock-names = "pp_clk", "gop_clk", "mg_clk", "mg_core_clk", "axi_clk"; eth0: eth0 { interrupts = , diff --git a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt index 42a248301615..e22d8cfea687 100644 --- a/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt +++ b/Documentation/devicetree/bindings/net/micrel-ksz90x1.txt @@ -57,6 +57,13 @@ KSZ9031: - txd2-skew-ps : Skew control of TX data 2 pad - txd3-skew-ps : Skew control of TX data 3 pad + - micrel,force-master: + Boolean, force phy to master mode. Only set this option if the phy + reference clock provided at CLK125_NDO pin is used as MAC reference + clock because the clock jitter in slave mode is to high (errata#2). + Attention: The link partner must be configurable as slave otherwise + no link will be established. + Examples: mdio { diff --git a/Documentation/virtual/kvm/cpuid.txt b/Documentation/virtual/kvm/cpuid.txt index d4f33eb805dd..ab022dcd0911 100644 --- a/Documentation/virtual/kvm/cpuid.txt +++ b/Documentation/virtual/kvm/cpuid.txt @@ -72,8 +72,8 @@ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side flag || value || meaning ================================================================================== -KVM_HINTS_DEDICATED || 0 || guest checks this feature bit to - || || determine if there is vCPU pinning - || || and there is no vCPU over-commitment, +KVM_HINTS_REALTIME || 0 || guest checks this feature bit to + || || determine that vCPUs are never + || || preempted for an unlimited time, || || allowing optimizations ---------------------------------------------------------------------------------- diff --git a/MAINTAINERS b/MAINTAINERS index 9f2045a5adac..032807a95558 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3690,7 +3690,6 @@ F: drivers/cpufreq/arm_big_little_dt.c CPU POWER MONITORING SUBSYSTEM M: Thomas Renninger -M: Shuah Khan M: Shuah Khan L: linux-pm@vger.kernel.org S: Maintained @@ -4309,7 +4308,7 @@ F: Documentation/driver-api/dma-buf.rst T: git git://anongit.freedesktop.org/drm/drm-misc DMA GENERIC OFFLOAD ENGINE SUBSYSTEM -M: Vinod Koul +M: Vinod Koul L: dmaengine@vger.kernel.org Q: https://patchwork.kernel.org/project/linux-dmaengine/list/ S: Maintained @@ -7694,10 +7693,10 @@ F: include/linux/sunrpc/ F: include/uapi/linux/sunrpc/ KERNEL SELFTEST FRAMEWORK -M: Shuah Khan M: Shuah Khan L: linux-kselftest@vger.kernel.org T: git git://git.kernel.org/pub/scm/linux/kernel/git/shuah/linux-kselftest.git +Q: https://patchwork.kernel.org/project/linux-kselftest/list/ S: Maintained F: tools/testing/selftests/ F: Documentation/dev-tools/kselftest* @@ -12218,7 +12217,7 @@ F: Documentation/s390/vfio-ccw.txt F: include/uapi/linux/vfio_ccw.h S390 ZCRYPT DRIVER -M: Harald Freudenberger +M: Harald Freudenberger L: linux-s390@vger.kernel.org W: http://www.ibm.com/developerworks/linux/linux390/ S: Supported @@ -13262,6 +13261,12 @@ M: Jan-Benedict Glaw S: Maintained F: arch/alpha/kernel/srm_env.c +ST STM32 I2C/SMBUS DRIVER +M: Pierre-Yves MORDRET +L: linux-i2c@vger.kernel.org +S: Maintained +F: drivers/i2c/busses/i2c-stm32* + STABLE BRANCH M: Greg Kroah-Hartman L: stable@vger.kernel.org @@ -14649,7 +14654,6 @@ F: drivers/usb/common/usb-otg-fsm.c USB OVER IP DRIVER M: Valentina Manea -M: Shuah Khan M: Shuah Khan L: linux-usb@vger.kernel.org S: Maintained diff --git a/Makefile b/Makefile index d0d2652db174..ec6f45928fd4 100644 --- a/Makefile +++ b/Makefile @@ -2,7 +2,7 @@ VERSION = 4 PATCHLEVEL = 17 SUBLEVEL = 0 -EXTRAVERSION = -rc4 +EXTRAVERSION = -rc6 NAME = Merciless Moray # *DOCUMENTATION* diff --git a/arch/Kconfig b/arch/Kconfig index 8e0d665c8d53..75dd23acf133 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -464,6 +464,10 @@ config GCC_PLUGIN_LATENT_ENTROPY config GCC_PLUGIN_STRUCTLEAK bool "Force initialization of variables containing userspace addresses" depends on GCC_PLUGINS + # Currently STRUCTLEAK inserts initialization out of live scope of + # variables from KASAN point of view. This leads to KASAN false + # positive reports. Prohibit this combination for now. + depends on !KASAN_EXTRA help This plugin zero-initializes any structures containing a __user attribute. This can prevent some classes of information diff --git a/arch/arm/boot/compressed/Makefile b/arch/arm/boot/compressed/Makefile index 45a6b9b7af2a..6a4e7341ecd3 100644 --- a/arch/arm/boot/compressed/Makefile +++ b/arch/arm/boot/compressed/Makefile @@ -117,11 +117,9 @@ ccflags-y := -fpic -mno-single-pic-base -fno-builtin -I$(obj) asflags-y := -DZIMAGE # Supply kernel BSS size to the decompressor via a linker symbol. -KBSS_SZ = $(shell $(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \ - perl -e 'while (<>) { \ - $$bss_start=hex($$1) if /^([[:xdigit:]]+) B __bss_start$$/; \ - $$bss_end=hex($$1) if /^([[:xdigit:]]+) B __bss_stop$$/; \ - }; printf "%d\n", $$bss_end - $$bss_start;') +KBSS_SZ = $(shell echo $$(($$($(CROSS_COMPILE)nm $(obj)/../../../../vmlinux | \ + sed -n -e 's/^\([^ ]*\) [AB] __bss_start$$/-0x\1/p' \ + -e 's/^\([^ ]*\) [AB] __bss_stop$$/+0x\1/p') )) ) LDFLAGS_vmlinux = --defsym _kernel_bss_size=$(KBSS_SZ) # Supply ZRELADDR to the decompressor via a linker symbol. ifneq ($(CONFIG_AUTO_ZRELADDR),y) diff --git a/arch/arm/boot/compressed/head.S b/arch/arm/boot/compressed/head.S index 45c8823c3750..517e0e18f0b8 100644 --- a/arch/arm/boot/compressed/head.S +++ b/arch/arm/boot/compressed/head.S @@ -29,19 +29,19 @@ #if defined(CONFIG_DEBUG_ICEDCC) #if defined(CONFIG_CPU_V6) || defined(CONFIG_CPU_V6K) || defined(CONFIG_CPU_V7) - .macro loadsp, rb, tmp + .macro loadsp, rb, tmp1, tmp2 .endm .macro writeb, ch, rb mcr p14, 0, \ch, c0, c5, 0 .endm #elif defined(CONFIG_CPU_XSCALE) - .macro loadsp, rb, tmp + .macro loadsp, rb, tmp1, tmp2 .endm .macro writeb, ch, rb mcr p14, 0, \ch, c8, c0, 0 .endm #else - .macro loadsp, rb, tmp + .macro loadsp, rb, tmp1, tmp2 .endm .macro writeb, ch, rb mcr p14, 0, \ch, c1, c0, 0 @@ -57,7 +57,7 @@ .endm #if defined(CONFIG_ARCH_SA1100) - .macro loadsp, rb, tmp + .macro loadsp, rb, tmp1, tmp2 mov \rb, #0x80000000 @ physical base address #ifdef CONFIG_DEBUG_LL_SER3 add \rb, \rb, #0x00050000 @ Ser3 @@ -66,8 +66,8 @@ #endif .endm #else - .macro loadsp, rb, tmp - addruart \rb, \tmp + .macro loadsp, rb, tmp1, tmp2 + addruart \rb, \tmp1, \tmp2 .endm #endif #endif @@ -561,8 +561,6 @@ not_relocated: mov r0, #0 bl decompress_kernel bl cache_clean_flush bl cache_off - mov r1, r7 @ restore architecture number - mov r2, r8 @ restore atags pointer #ifdef CONFIG_ARM_VIRT_EXT mrs r0, spsr @ Get saved CPU boot mode @@ -1297,7 +1295,7 @@ phex: adr r3, phexbuf b 1b @ puts corrupts {r0, r1, r2, r3} -puts: loadsp r3, r1 +puts: loadsp r3, r2, r1 1: ldrb r2, [r0], #1 teq r2, #0 moveq pc, lr @@ -1314,8 +1312,8 @@ puts: loadsp r3, r1 @ putc corrupts {r0, r1, r2, r3} putc: mov r2, r0 + loadsp r3, r1, r0 mov r0, #0 - loadsp r3, r1 b 2b @ memdump corrupts {r0, r1, r2, r3, r10, r11, r12, lr} @@ -1365,6 +1363,8 @@ __hyp_reentry_vectors: __enter_kernel: mov r0, #0 @ must be 0 + mov r1, r7 @ restore architecture number + mov r2, r8 @ restore atags pointer ARM( mov pc, r4 ) @ call kernel M_CLASS( add r4, r4, #1 ) @ enter in Thumb mode for M class THUMB( bx r4 ) @ entry point is always ARM for A/R classes diff --git a/arch/arm/boot/dts/bcm-cygnus.dtsi b/arch/arm/boot/dts/bcm-cygnus.dtsi index 699fdf94d139..9fe4f5a6379e 100644 --- a/arch/arm/boot/dts/bcm-cygnus.dtsi +++ b/arch/arm/boot/dts/bcm-cygnus.dtsi @@ -69,7 +69,7 @@ timer@20200 { compatible = "arm,cortex-a9-global-timer"; reg = <0x20200 0x100>; - interrupts = ; + interrupts = ; clocks = <&periph_clk>; }; diff --git a/arch/arm/boot/dts/da850-lcdk.dts b/arch/arm/boot/dts/da850-lcdk.dts index a1f4d6d5a569..0edf769ea95c 100644 --- a/arch/arm/boot/dts/da850-lcdk.dts +++ b/arch/arm/boot/dts/da850-lcdk.dts @@ -21,8 +21,8 @@ stdout-path = "serial2:115200n8"; }; - memory { - device_type = "memory"; + memory@c0000000 { + /* 128 MB DDR2 SDRAM @ 0xc0000000 */ reg = <0xc0000000 0x08000000>; }; diff --git a/arch/arm/boot/dts/da850.dtsi b/arch/arm/boot/dts/da850.dtsi index c66cf7895363..12010002dbdb 100644 --- a/arch/arm/boot/dts/da850.dtsi +++ b/arch/arm/boot/dts/da850.dtsi @@ -7,10 +7,19 @@ * Free Software Foundation; either version 2 of the License, or (at your * option) any later version. */ -#include "skeleton.dtsi" #include / { + #address-cells = <1>; + #size-cells = <1>; + chosen { }; + aliases { }; + + memory@c0000000 { + device_type = "memory"; + reg = <0xc0000000 0x0>; + }; + arm { #address-cells = <1>; #size-cells = <1>; @@ -46,8 +55,6 @@ pmx_core: pinmux@14120 { compatible = "pinctrl-single"; reg = <0x14120 0x50>; - #address-cells = <1>; - #size-cells = <0>; #pinctrl-cells = <2>; pinctrl-single,bit-per-mux; pinctrl-single,register-width = <32>; diff --git a/arch/arm/boot/dts/dm8148-evm.dts b/arch/arm/boot/dts/dm8148-evm.dts index d6657b3bae84..85d7b5148b0a 100644 --- a/arch/arm/boot/dts/dm8148-evm.dts +++ b/arch/arm/boot/dts/dm8148-evm.dts @@ -10,7 +10,7 @@ / { model = "DM8148 EVM"; - compatible = "ti,dm8148-evm", "ti,dm8148"; + compatible = "ti,dm8148-evm", "ti,dm8148", "ti,dm814"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/dm8148-t410.dts b/arch/arm/boot/dts/dm8148-t410.dts index 63883b3479f9..6418f9cdbe83 100644 --- a/arch/arm/boot/dts/dm8148-t410.dts +++ b/arch/arm/boot/dts/dm8148-t410.dts @@ -9,7 +9,7 @@ / { model = "HP t410 Smart Zero Client"; - compatible = "hp,t410", "ti,dm8148"; + compatible = "hp,t410", "ti,dm8148", "ti,dm814"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/dm8168-evm.dts b/arch/arm/boot/dts/dm8168-evm.dts index c72a2132aa82..1d030d567307 100644 --- a/arch/arm/boot/dts/dm8168-evm.dts +++ b/arch/arm/boot/dts/dm8168-evm.dts @@ -10,7 +10,7 @@ / { model = "DM8168 EVM"; - compatible = "ti,dm8168-evm", "ti,dm8168"; + compatible = "ti,dm8168-evm", "ti,dm8168", "ti,dm816"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/dra62x-j5eco-evm.dts b/arch/arm/boot/dts/dra62x-j5eco-evm.dts index fee0547f7302..31b824ad5d29 100644 --- a/arch/arm/boot/dts/dra62x-j5eco-evm.dts +++ b/arch/arm/boot/dts/dra62x-j5eco-evm.dts @@ -10,7 +10,7 @@ / { model = "DRA62x J5 Eco EVM"; - compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148"; + compatible = "ti,dra62x-j5eco-evm", "ti,dra62x", "ti,dm8148", "ti,dm814"; memory@80000000 { device_type = "memory"; diff --git a/arch/arm/boot/dts/imx51-zii-rdu1.dts b/arch/arm/boot/dts/imx51-zii-rdu1.dts index 0c99ac04ad08..6464f2560e06 100644 --- a/arch/arm/boot/dts/imx51-zii-rdu1.dts +++ b/arch/arm/boot/dts/imx51-zii-rdu1.dts @@ -523,7 +523,7 @@ }; touchscreen@20 { - compatible = "syna,rmi4_i2c"; + compatible = "syna,rmi4-i2c"; reg = <0x20>; pinctrl-names = "default"; pinctrl-0 = <&pinctrl_ts>; @@ -541,8 +541,8 @@ rmi4-f11@11 { reg = <0x11>; - touch-inverted-y; - touch-swapped-x-y; + touchscreen-inverted-y; + touchscreen-swapped-x-y; syna,sensor-type = <1>; }; }; diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi index 4d42335c0dee..ce85b3ca1a55 100644 --- a/arch/arm/boot/dts/imx7s.dtsi +++ b/arch/arm/boot/dts/imx7s.dtsi @@ -868,6 +868,7 @@ crypto: caam@30900000 { compatible = "fsl,sec-v4.0"; + fsl,sec-era = <8>; #address-cells = <1>; #size-cells = <1>; reg = <0x30900000 0x40000>; diff --git a/arch/arm/boot/dts/logicpd-som-lv.dtsi b/arch/arm/boot/dts/logicpd-som-lv.dtsi index b47cac23a04b..6fa7bba3e801 100644 --- a/arch/arm/boot/dts/logicpd-som-lv.dtsi +++ b/arch/arm/boot/dts/logicpd-som-lv.dtsi @@ -26,7 +26,7 @@ gpio = <&gpio1 3 0>; /* gpio_3 */ startup-delay-us = <70000>; enable-active-high; - vin-supply = <&vmmc2>; + vin-supply = <&vaux3>; }; /* HS USB Host PHY on PORT 1 */ @@ -82,6 +82,7 @@ twl_audio: audio { compatible = "ti,twl4030-audio"; codec { + ti,hs_extmute_gpio = <&gpio2 25 GPIO_ACTIVE_HIGH>; }; }; }; @@ -199,6 +200,7 @@ pinctrl-single,pins = < OMAP3_CORE1_IOPAD(0x21ba, PIN_INPUT | MUX_MODE0) /* i2c1_scl.i2c1_scl */ OMAP3_CORE1_IOPAD(0x21bc, PIN_INPUT | MUX_MODE0) /* i2c1_sda.i2c1_sda */ + OMAP3_CORE1_IOPAD(0x20ba, PIN_OUTPUT | MUX_MODE4) /* gpmc_ncs6.gpio_57 */ >; }; }; @@ -213,7 +215,7 @@ }; wl127x_gpio: pinmux_wl127x_gpio_pin { pinctrl-single,pins = < - OMAP3_WKUP_IOPAD(0x2a0c, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */ + OMAP3_WKUP_IOPAD(0x2a0a, PIN_INPUT | MUX_MODE4) /* sys_boot0.gpio_2 */ OMAP3_WKUP_IOPAD(0x2a0c, PIN_OUTPUT | MUX_MODE4) /* sys_boot1.gpio_3 */ >; }; @@ -260,6 +262,11 @@ #include "twl4030.dtsi" #include "twl4030_omap3.dtsi" +&vaux3 { + regulator-min-microvolt = <2800000>; + regulator-max-microvolt = <2800000>; +}; + &twl { twl_power: power { compatible = "ti,twl4030-power-idle-osc-off", "ti,twl4030-power-idle"; diff --git a/arch/arm/boot/dts/r8a7790-lager.dts b/arch/arm/boot/dts/r8a7790-lager.dts index 063fdb65dc60..f07f9018c3e7 100644 --- a/arch/arm/boot/dts/r8a7790-lager.dts +++ b/arch/arm/boot/dts/r8a7790-lager.dts @@ -379,7 +379,7 @@ port@0 { reg = <0>; adv7511_in: endpoint { - remote-endpoint = <&du_out_lvds0>; + remote-endpoint = <&lvds0_out>; }; }; @@ -467,10 +467,8 @@ status = "okay"; clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 722>, - <&cpg CPG_MOD 726>, <&cpg CPG_MOD 725>, <&x13_clk>, <&x2_clk>; - clock-names = "du.0", "du.1", "du.2", "lvds.0", "lvds.1", - "dclkin.0", "dclkin.1"; + clock-names = "du.0", "du.1", "du.2", "dclkin.0", "dclkin.1"; ports { port@0 { @@ -478,12 +476,26 @@ remote-endpoint = <&adv7123_in>; }; }; + }; +}; + +&lvds0 { + status = "okay"; + + ports { port@1 { endpoint { remote-endpoint = <&adv7511_in>; }; }; - port@2 { + }; +}; + +&lvds1 { + status = "okay"; + + ports { + port@1 { lvds_connector: endpoint { }; }; diff --git a/arch/arm/boot/dts/r8a7790.dtsi b/arch/arm/boot/dts/r8a7790.dtsi index e4367cecad18..05a0fc23ac88 100644 --- a/arch/arm/boot/dts/r8a7790.dtsi +++ b/arch/arm/boot/dts/r8a7790.dtsi @@ -1627,18 +1627,13 @@ du: display@feb00000 { compatible = "renesas,du-r8a7790"; - reg = <0 0xfeb00000 0 0x70000>, - <0 0xfeb90000 0 0x1c>, - <0 0xfeb94000 0 0x1c>; - reg-names = "du", "lvds.0", "lvds.1"; + reg = <0 0xfeb00000 0 0x70000>; interrupts = , , ; clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, - <&cpg CPG_MOD 722>, <&cpg CPG_MOD 726>, - <&cpg CPG_MOD 725>; - clock-names = "du.0", "du.1", "du.2", "lvds.0", - "lvds.1"; + <&cpg CPG_MOD 722>; + clock-names = "du.0", "du.1", "du.2"; status = "disabled"; ports { @@ -1653,11 +1648,65 @@ port@1 { reg = <1>; du_out_lvds0: endpoint { + remote-endpoint = <&lvds0_in>; }; }; port@2 { reg = <2>; du_out_lvds1: endpoint { + remote-endpoint = <&lvds1_in>; + }; + }; + }; + }; + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7790-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + power-domains = <&sysc R8A7790_PD_ALWAYS_ON>; + resets = <&cpg 726>; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { + }; + }; + }; + }; + + lvds1: lvds@feb94000 { + compatible = "renesas,r8a7790-lvds"; + reg = <0 0xfeb94000 0 0x1c>; + clocks = <&cpg CPG_MOD 725>; + power-domains = <&sysc R8A7790_PD_ALWAYS_ON>; + resets = <&cpg 725>; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds1_in: endpoint { + remote-endpoint = <&du_out_lvds1>; + }; + }; + port@1 { + reg = <1>; + lvds1_out: endpoint { }; }; }; diff --git a/arch/arm/boot/dts/r8a7791-koelsch.dts b/arch/arm/boot/dts/r8a7791-koelsch.dts index f40321a1c917..9d7213a0b8b8 100644 --- a/arch/arm/boot/dts/r8a7791-koelsch.dts +++ b/arch/arm/boot/dts/r8a7791-koelsch.dts @@ -468,10 +468,9 @@ pinctrl-names = "default"; status = "okay"; - clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>, + clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&x13_clk>, <&x2_clk>; - clock-names = "du.0", "du.1", "lvds.0", - "dclkin.0", "dclkin.1"; + clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1"; ports { port@0 { @@ -479,6 +478,13 @@ remote-endpoint = <&adv7511_in>; }; }; + }; +}; + +&lvds0 { + status = "okay"; + + ports { port@1 { lvds_connector: endpoint { }; diff --git a/arch/arm/boot/dts/r8a7791-porter.dts b/arch/arm/boot/dts/r8a7791-porter.dts index c14e6fe9e4f6..ae9ed9ff53ef 100644 --- a/arch/arm/boot/dts/r8a7791-porter.dts +++ b/arch/arm/boot/dts/r8a7791-porter.dts @@ -441,10 +441,9 @@ pinctrl-names = "default"; status = "okay"; - clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>, + clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&x3_clk>, <&x16_clk>; - clock-names = "du.0", "du.1", "lvds.0", - "dclkin.0", "dclkin.1"; + clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1"; ports { port@0 { @@ -455,6 +454,17 @@ }; }; +&lvds0 { + status = "okay"; + + ports { + port@1 { + lvds_connector: endpoint { + }; + }; + }; +}; + &rcar_sound { pinctrl-0 = <&ssi_pins &audio_clk_pins>; pinctrl-names = "default"; diff --git a/arch/arm/boot/dts/r8a7791.dtsi b/arch/arm/boot/dts/r8a7791.dtsi index f11dab71b03a..506b20885413 100644 --- a/arch/arm/boot/dts/r8a7791.dtsi +++ b/arch/arm/boot/dts/r8a7791.dtsi @@ -1633,15 +1633,12 @@ du: display@feb00000 { compatible = "renesas,du-r8a7791"; - reg = <0 0xfeb00000 0 0x40000>, - <0 0xfeb90000 0 0x1c>; - reg-names = "du", "lvds.0"; + reg = <0 0xfeb00000 0 0x40000>; interrupts = , ; clocks = <&cpg CPG_MOD 724>, - <&cpg CPG_MOD 723>, - <&cpg CPG_MOD 726>; - clock-names = "du.0", "du.1", "lvds.0"; + <&cpg CPG_MOD 723>; + clock-names = "du.0", "du.1"; status = "disabled"; ports { @@ -1656,6 +1653,33 @@ port@1 { reg = <1>; du_out_lvds0: endpoint { + remote-endpoint = <&lvds0_in>; + }; + }; + }; + }; + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7791-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + power-domains = <&sysc R8A7791_PD_ALWAYS_ON>; + resets = <&cpg 726>; + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { }; }; }; diff --git a/arch/arm/boot/dts/r8a7793-gose.dts b/arch/arm/boot/dts/r8a7793-gose.dts index 9ed6961f2d9a..96e117d8b2cc 100644 --- a/arch/arm/boot/dts/r8a7793-gose.dts +++ b/arch/arm/boot/dts/r8a7793-gose.dts @@ -447,10 +447,9 @@ pinctrl-names = "default"; status = "okay"; - clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&cpg CPG_MOD 726>, + clocks = <&cpg CPG_MOD 724>, <&cpg CPG_MOD 723>, <&x13_clk>, <&x2_clk>; - clock-names = "du.0", "du.1", "lvds.0", - "dclkin.0", "dclkin.1"; + clock-names = "du.0", "du.1", "dclkin.0", "dclkin.1"; ports { port@0 { @@ -458,6 +457,11 @@ remote-endpoint = <&adv7511_in>; }; }; + }; +}; + +&lvds0 { + ports { port@1 { lvds_connector: endpoint { }; diff --git a/arch/arm/boot/dts/r8a7793.dtsi b/arch/arm/boot/dts/r8a7793.dtsi index f9c5a557107d..4f526030dc7c 100644 --- a/arch/arm/boot/dts/r8a7793.dtsi +++ b/arch/arm/boot/dts/r8a7793.dtsi @@ -1292,15 +1292,12 @@ du: display@feb00000 { compatible = "renesas,du-r8a7793"; - reg = <0 0xfeb00000 0 0x40000>, - <0 0xfeb90000 0 0x1c>; - reg-names = "du", "lvds.0"; + reg = <0 0xfeb00000 0 0x40000>; interrupts = , ; clocks = <&cpg CPG_MOD 724>, - <&cpg CPG_MOD 723>, - <&cpg CPG_MOD 726>; - clock-names = "du.0", "du.1", "lvds.0"; + <&cpg CPG_MOD 723>; + clock-names = "du.0", "du.1"; status = "disabled"; ports { @@ -1315,6 +1312,34 @@ port@1 { reg = <1>; du_out_lvds0: endpoint { + remote-endpoint = <&lvds0_in>; + }; + }; + }; + }; + + lvds0: lvds@feb90000 { + compatible = "renesas,r8a7793-lvds"; + reg = <0 0xfeb90000 0 0x1c>; + clocks = <&cpg CPG_MOD 726>; + power-domains = <&sysc R8A7793_PD_ALWAYS_ON>; + resets = <&cpg 726>; + + status = "disabled"; + + ports { + #address-cells = <1>; + #size-cells = <0>; + + port@0 { + reg = <0>; + lvds0_in: endpoint { + remote-endpoint = <&du_out_lvds0>; + }; + }; + port@1 { + reg = <1>; + lvds0_out: endpoint { }; }; }; diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index 0a7136462a1a..983dd5c14794 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -741,7 +741,7 @@ phy_type = "ulpi"; clocks = <&tegra_car TEGRA20_CLK_USB2>, <&tegra_car TEGRA20_CLK_PLL_U>, - <&tegra_car TEGRA20_CLK_PLL_P_OUT4>; + <&tegra_car TEGRA20_CLK_CDEV2>; clock-names = "reg", "pll_u", "ulpi-link"; resets = <&tegra_car 58>, <&tegra_car 22>; reset-names = "usb", "utmi-pads"; diff --git a/arch/arm/include/asm/assembler.h b/arch/arm/include/asm/assembler.h index bc8d4bbd82e2..9342904cccca 100644 --- a/arch/arm/include/asm/assembler.h +++ b/arch/arm/include/asm/assembler.h @@ -536,4 +536,14 @@ THUMB( orr \reg , \reg , #PSR_T_BIT ) #endif .endm +#ifdef CONFIG_KPROBES +#define _ASM_NOKPROBE(entry) \ + .pushsection "_kprobe_blacklist", "aw" ; \ + .balign 4 ; \ + .long entry; \ + .popsection +#else +#define _ASM_NOKPROBE(entry) +#endif + #endif /* __ASM_ASSEMBLER_H__ */ diff --git a/arch/arm/include/asm/kvm_mmu.h b/arch/arm/include/asm/kvm_mmu.h index 707a1f06dc5d..f675162663f0 100644 --- a/arch/arm/include/asm/kvm_mmu.h +++ b/arch/arm/include/asm/kvm_mmu.h @@ -309,6 +309,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + static inline void *kvm_get_hyp_vector(void) { return kvm_ksym_ref(__kvm_hyp_vector); diff --git a/arch/arm/include/uapi/asm/siginfo.h b/arch/arm/include/uapi/asm/siginfo.h deleted file mode 100644 index d0513880be21..000000000000 --- a/arch/arm/include/uapi/asm/siginfo.h +++ /dev/null @@ -1,13 +0,0 @@ -#ifndef __ASM_SIGINFO_H -#define __ASM_SIGINFO_H - -#include - -/* - * SIGFPE si_codes - */ -#ifdef __KERNEL__ -#define FPE_FIXME 0 /* Broken dup of SI_USER */ -#endif /* __KERNEL__ */ - -#endif diff --git a/arch/arm/kernel/machine_kexec.c b/arch/arm/kernel/machine_kexec.c index 6b38d7a634c1..dd2eb5f76b9f 100644 --- a/arch/arm/kernel/machine_kexec.c +++ b/arch/arm/kernel/machine_kexec.c @@ -83,7 +83,7 @@ void machine_crash_nonpanic_core(void *unused) { struct pt_regs regs; - crash_setup_regs(®s, NULL); + crash_setup_regs(®s, get_irq_regs()); printk(KERN_DEBUG "CPU %u will stop doing anything useful since another CPU has crashed\n", smp_processor_id()); crash_save_cpu(®s, smp_processor_id()); @@ -95,6 +95,27 @@ void machine_crash_nonpanic_core(void *unused) cpu_relax(); } +void crash_smp_send_stop(void) +{ + static int cpus_stopped; + unsigned long msecs; + + if (cpus_stopped) + return; + + atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); + smp_call_function(machine_crash_nonpanic_core, NULL, false); + msecs = 1000; /* Wait at most a second for the other cpus to stop */ + while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { + mdelay(1); + msecs--; + } + if (atomic_read(&waiting_for_crash_ipi) > 0) + pr_warn("Non-crashing CPUs did not react to IPI\n"); + + cpus_stopped = 1; +} + static void machine_kexec_mask_interrupts(void) { unsigned int i; @@ -120,19 +141,8 @@ static void machine_kexec_mask_interrupts(void) void machine_crash_shutdown(struct pt_regs *regs) { - unsigned long msecs; - local_irq_disable(); - - atomic_set(&waiting_for_crash_ipi, num_online_cpus() - 1); - smp_call_function(machine_crash_nonpanic_core, NULL, false); - msecs = 1000; /* Wait at most a second for the other cpus to stop */ - while ((atomic_read(&waiting_for_crash_ipi) > 0) && msecs) { - mdelay(1); - msecs--; - } - if (atomic_read(&waiting_for_crash_ipi) > 0) - pr_warn("Non-crashing CPUs did not react to IPI\n"); + crash_smp_send_stop(); crash_save_cpu(regs, smp_processor_id()); machine_kexec_mask_interrupts(); diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 5e3633c24e63..2fe87109ae46 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -417,7 +418,8 @@ void unregister_undef_hook(struct undef_hook *hook) raw_spin_unlock_irqrestore(&undef_lock, flags); } -static int call_undef_hook(struct pt_regs *regs, unsigned int instr) +static nokprobe_inline +int call_undef_hook(struct pt_regs *regs, unsigned int instr) { struct undef_hook *hook; unsigned long flags; @@ -490,6 +492,7 @@ die_sig: arm_notify_die("Oops - undefined instruction", regs, &info, 0, 6); } +NOKPROBE_SYMBOL(do_undefinstr) /* * Handle FIQ similarly to NMI on x86 systems. diff --git a/arch/arm/lib/getuser.S b/arch/arm/lib/getuser.S index df73914e81c8..746e7801dcdf 100644 --- a/arch/arm/lib/getuser.S +++ b/arch/arm/lib/getuser.S @@ -38,6 +38,7 @@ ENTRY(__get_user_1) mov r0, #0 ret lr ENDPROC(__get_user_1) +_ASM_NOKPROBE(__get_user_1) ENTRY(__get_user_2) check_uaccess r0, 2, r1, r2, __get_user_bad @@ -58,6 +59,7 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_2) +_ASM_NOKPROBE(__get_user_2) ENTRY(__get_user_4) check_uaccess r0, 4, r1, r2, __get_user_bad @@ -65,6 +67,7 @@ ENTRY(__get_user_4) mov r0, #0 ret lr ENDPROC(__get_user_4) +_ASM_NOKPROBE(__get_user_4) ENTRY(__get_user_8) check_uaccess r0, 8, r1, r2, __get_user_bad8 @@ -78,6 +81,7 @@ ENTRY(__get_user_8) mov r0, #0 ret lr ENDPROC(__get_user_8) +_ASM_NOKPROBE(__get_user_8) #ifdef __ARMEB__ ENTRY(__get_user_32t_8) @@ -91,6 +95,7 @@ ENTRY(__get_user_32t_8) mov r0, #0 ret lr ENDPROC(__get_user_32t_8) +_ASM_NOKPROBE(__get_user_32t_8) ENTRY(__get_user_64t_1) check_uaccess r0, 1, r1, r2, __get_user_bad8 @@ -98,6 +103,7 @@ ENTRY(__get_user_64t_1) mov r0, #0 ret lr ENDPROC(__get_user_64t_1) +_ASM_NOKPROBE(__get_user_64t_1) ENTRY(__get_user_64t_2) check_uaccess r0, 2, r1, r2, __get_user_bad8 @@ -114,6 +120,7 @@ rb .req r0 mov r0, #0 ret lr ENDPROC(__get_user_64t_2) +_ASM_NOKPROBE(__get_user_64t_2) ENTRY(__get_user_64t_4) check_uaccess r0, 4, r1, r2, __get_user_bad8 @@ -121,6 +128,7 @@ ENTRY(__get_user_64t_4) mov r0, #0 ret lr ENDPROC(__get_user_64t_4) +_ASM_NOKPROBE(__get_user_64t_4) #endif __get_user_bad8: @@ -131,6 +139,8 @@ __get_user_bad: ret lr ENDPROC(__get_user_bad) ENDPROC(__get_user_bad8) +_ASM_NOKPROBE(__get_user_bad) +_ASM_NOKPROBE(__get_user_bad8) .pushsection __ex_table, "a" .long 1b, __get_user_bad diff --git a/arch/arm/mach-davinci/board-da830-evm.c b/arch/arm/mach-davinci/board-da830-evm.c index 004f9c8de032..d1e8ce7b4bd2 100644 --- a/arch/arm/mach-davinci/board-da830-evm.c +++ b/arch/arm/mach-davinci/board-da830-evm.c @@ -205,12 +205,17 @@ static const short da830_evm_mmc_sd_pins[] = { -1 }; +#define DA830_MMCSD_WP_PIN GPIO_TO_PIN(2, 1) +#define DA830_MMCSD_CD_PIN GPIO_TO_PIN(2, 2) + static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { /* gpio chip 1 contains gpio range 32-63 */ - GPIO_LOOKUP("davinci_gpio.1", 2, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.1", 1, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_CD_PIN, "cd", + GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA830_MMCSD_WP_PIN, "wp", + GPIO_ACTIVE_LOW), }, }; diff --git a/arch/arm/mach-davinci/board-da850-evm.c b/arch/arm/mach-davinci/board-da850-evm.c index 3063478bcc36..158ed9a1483f 100644 --- a/arch/arm/mach-davinci/board-da850-evm.c +++ b/arch/arm/mach-davinci/board-da850-evm.c @@ -763,12 +763,17 @@ static const short da850_evm_mcasp_pins[] __initconst = { -1 }; +#define DA850_MMCSD_CD_PIN GPIO_TO_PIN(4, 0) +#define DA850_MMCSD_WP_PIN GPIO_TO_PIN(4, 1) + static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { /* gpio chip 2 contains gpio range 64-95 */ - GPIO_LOOKUP("davinci_gpio.2", 0, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.2", 1, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_CD_PIN, "cd", + GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA850_MMCSD_WP_PIN, "wp", + GPIO_ACTIVE_LOW), }, }; diff --git a/arch/arm/mach-davinci/board-dm355-evm.c b/arch/arm/mach-davinci/board-dm355-evm.c index cb30637d9eaf..23ab9e8bc04c 100644 --- a/arch/arm/mach-davinci/board-dm355-evm.c +++ b/arch/arm/mach-davinci/board-dm355-evm.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -109,12 +110,15 @@ static struct platform_device davinci_nand_device = { }, }; +#define DM355_I2C_SDA_PIN GPIO_TO_PIN(0, 15) +#define DM355_I2C_SCL_PIN GPIO_TO_PIN(0, 14) + static struct gpiod_lookup_table i2c_recovery_gpiod_table = { - .dev_id = "i2c_davinci", + .dev_id = "i2c_davinci.1", .table = { - GPIO_LOOKUP("davinci_gpio", 15, "sda", + GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SDA_PIN, "sda", GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - GPIO_LOOKUP("davinci_gpio", 14, "scl", + GPIO_LOOKUP("davinci_gpio.0", DM355_I2C_SCL_PIN, "scl", GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), }, }; @@ -179,11 +183,16 @@ static struct resource dm355evm_dm9000_rsrc[] = { }, }; +static struct dm9000_plat_data dm335evm_dm9000_platdata; + static struct platform_device dm355evm_dm9000 = { .name = "dm9000", .id = -1, .resource = dm355evm_dm9000_rsrc, .num_resources = ARRAY_SIZE(dm355evm_dm9000_rsrc), + .dev = { + .platform_data = &dm335evm_dm9000_platdata, + }, }; static struct tvp514x_platform_data tvp5146_pdata = { diff --git a/arch/arm/mach-davinci/board-dm644x-evm.c b/arch/arm/mach-davinci/board-dm644x-evm.c index 95b55aae1366..509e64ab1994 100644 --- a/arch/arm/mach-davinci/board-dm644x-evm.c +++ b/arch/arm/mach-davinci/board-dm644x-evm.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -596,12 +597,15 @@ static struct i2c_board_info __initdata i2c_info[] = { }, }; +#define DM644X_I2C_SDA_PIN GPIO_TO_PIN(2, 12) +#define DM644X_I2C_SCL_PIN GPIO_TO_PIN(2, 11) + static struct gpiod_lookup_table i2c_recovery_gpiod_table = { - .dev_id = "i2c_davinci", + .dev_id = "i2c_davinci.1", .table = { - GPIO_LOOKUP("davinci_gpio", 44, "sda", + GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SDA_PIN, "sda", GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), - GPIO_LOOKUP("davinci_gpio", 43, "scl", + GPIO_LOOKUP("davinci_gpio.0", DM644X_I2C_SCL_PIN, "scl", GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN), }, }; diff --git a/arch/arm/mach-davinci/board-dm646x-evm.c b/arch/arm/mach-davinci/board-dm646x-evm.c index 2d37f5b0e1f5..a3c0d1e87647 100644 --- a/arch/arm/mach-davinci/board-dm646x-evm.c +++ b/arch/arm/mach-davinci/board-dm646x-evm.c @@ -532,11 +532,12 @@ static struct vpif_display_config dm646x_vpif_display_config = { .set_clock = set_vpif_clock, .subdevinfo = dm646x_vpif_subdev, .subdev_count = ARRAY_SIZE(dm646x_vpif_subdev), + .i2c_adapter_id = 1, .chan_config[0] = { .outputs = dm6467_ch0_outputs, .output_count = ARRAY_SIZE(dm6467_ch0_outputs), }, - .card_name = "DM646x EVM", + .card_name = "DM646x EVM Video Display", }; /** @@ -674,6 +675,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = { .setup_input_channel_mode = setup_vpif_input_channel_mode, .subdev_info = vpif_capture_sdev_info, .subdev_count = ARRAY_SIZE(vpif_capture_sdev_info), + .i2c_adapter_id = 1, .chan_config[0] = { .inputs = dm6467_ch0_inputs, .input_count = ARRAY_SIZE(dm6467_ch0_inputs), @@ -694,6 +696,7 @@ static struct vpif_capture_config dm646x_vpif_capture_cfg = { .fid_pol = 0, }, }, + .card_name = "DM646x EVM Video Capture", }; static void __init evm_init_video(void) diff --git a/arch/arm/mach-davinci/board-omapl138-hawk.c b/arch/arm/mach-davinci/board-omapl138-hawk.c index 0d32042b728f..be8b892a6ea7 100644 --- a/arch/arm/mach-davinci/board-omapl138-hawk.c +++ b/arch/arm/mach-davinci/board-omapl138-hawk.c @@ -123,12 +123,16 @@ static const short hawk_mmcsd0_pins[] = { -1 }; +#define DA850_HAWK_MMCSD_CD_PIN GPIO_TO_PIN(3, 12) +#define DA850_HAWK_MMCSD_WP_PIN GPIO_TO_PIN(3, 13) + static struct gpiod_lookup_table mmc_gpios_table = { .dev_id = "da830-mmc.0", .table = { - /* CD: gpio3_12: gpio60: chip 1 contains gpio range 32-63*/ - GPIO_LOOKUP("davinci_gpio.0", 28, "cd", GPIO_ACTIVE_LOW), - GPIO_LOOKUP("davinci_gpio.0", 29, "wp", GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_CD_PIN, "cd", + GPIO_ACTIVE_LOW), + GPIO_LOOKUP("davinci_gpio.0", DA850_HAWK_MMCSD_WP_PIN, "wp", + GPIO_ACTIVE_LOW), }, }; diff --git a/arch/arm/mach-davinci/dm646x.c b/arch/arm/mach-davinci/dm646x.c index 109ab1fa0d2c..c32ca27ab343 100644 --- a/arch/arm/mach-davinci/dm646x.c +++ b/arch/arm/mach-davinci/dm646x.c @@ -488,7 +488,8 @@ static u8 dm646x_default_priorities[DAVINCI_N_AINTC_IRQ] = { [IRQ_DM646X_MCASP0TXINT] = 7, [IRQ_DM646X_MCASP0RXINT] = 7, [IRQ_DM646X_RESERVED_3] = 7, - [IRQ_DM646X_MCASP1TXINT] = 7, /* clockevent */ + [IRQ_DM646X_MCASP1TXINT] = 7, + [IRQ_TINT0_TINT12] = 7, /* clockevent */ [IRQ_TINT0_TINT34] = 7, /* clocksource */ [IRQ_TINT1_TINT12] = 7, /* DSP timer */ [IRQ_TINT1_TINT34] = 7, /* system tick */ diff --git a/arch/arm/mach-keystone/pm_domain.c b/arch/arm/mach-keystone/pm_domain.c index fe57e2692629..abca83d22ff3 100644 --- a/arch/arm/mach-keystone/pm_domain.c +++ b/arch/arm/mach-keystone/pm_domain.c @@ -29,6 +29,7 @@ static struct dev_pm_domain keystone_pm_domain = { static struct pm_clk_notifier_block platform_domain_notifier = { .pm_domain = &keystone_pm_domain, + .con_ids = { NULL }, }; static const struct of_device_id of_keystone_table[] = { diff --git a/arch/arm/mach-omap1/ams-delta-fiq.c b/arch/arm/mach-omap1/ams-delta-fiq.c index 793a24a53c52..d7ca9e2b40d2 100644 --- a/arch/arm/mach-omap1/ams-delta-fiq.c +++ b/arch/arm/mach-omap1/ams-delta-fiq.c @@ -58,22 +58,24 @@ static irqreturn_t deferred_fiq(int irq, void *dev_id) irq_num = gpio_to_irq(gpio); fiq_count = fiq_buffer[FIQ_CNT_INT_00 + gpio]; - while (irq_counter[gpio] < fiq_count) { - if (gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) { - struct irq_data *d = irq_get_irq_data(irq_num); + if (irq_counter[gpio] < fiq_count && + gpio != AMS_DELTA_GPIO_PIN_KEYBRD_CLK) { + struct irq_data *d = irq_get_irq_data(irq_num); - /* - * It looks like handle_edge_irq() that - * OMAP GPIO edge interrupts default to, - * expects interrupt already unmasked. - */ - if (irq_chip && irq_chip->irq_unmask) + /* + * handle_simple_irq() that OMAP GPIO edge + * interrupts default to since commit 80ac93c27441 + * requires interrupt already acked and unmasked. + */ + if (irq_chip) { + if (irq_chip->irq_ack) + irq_chip->irq_ack(d); + if (irq_chip->irq_unmask) irq_chip->irq_unmask(d); } - generic_handle_irq(irq_num); - - irq_counter[gpio]++; } + for (; irq_counter[gpio] < fiq_count; irq_counter[gpio]++) + generic_handle_irq(irq_num); } return IRQ_HANDLED; } diff --git a/arch/arm/mach-omap2/powerdomain.c b/arch/arm/mach-omap2/powerdomain.c index 76eb6ec5f157..1e6a967cd2d5 100644 --- a/arch/arm/mach-omap2/powerdomain.c +++ b/arch/arm/mach-omap2/powerdomain.c @@ -188,7 +188,7 @@ static int _pwrdm_state_switch(struct powerdomain *pwrdm, int flag) ((prev & OMAP_POWERSTATE_MASK) << 0)); trace_power_domain_target_rcuidle(pwrdm->name, trace_state, - smp_processor_id()); + raw_smp_processor_id()); } break; default: @@ -518,7 +518,7 @@ int pwrdm_set_next_pwrst(struct powerdomain *pwrdm, u8 pwrst) if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) { /* Trace the pwrdm desired target state */ trace_power_domain_target_rcuidle(pwrdm->name, pwrst, - smp_processor_id()); + raw_smp_processor_id()); /* Program the pwrdm desired target state */ ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst); } diff --git a/arch/arm/probes/kprobes/opt-arm.c b/arch/arm/probes/kprobes/opt-arm.c index bcdecc25461b..b2aa9b32bff2 100644 --- a/arch/arm/probes/kprobes/opt-arm.c +++ b/arch/arm/probes/kprobes/opt-arm.c @@ -165,13 +165,14 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) { unsigned long flags; struct kprobe *p = &op->kp; - struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); + struct kprobe_ctlblk *kcb; /* Save skipped registers */ regs->ARM_pc = (unsigned long)op->kp.addr; regs->ARM_ORIG_r0 = ~0UL; local_irq_save(flags); + kcb = get_kprobe_ctlblk(); if (kprobe_running()) { kprobes_inc_nmissed_count(&op->kp); @@ -191,6 +192,7 @@ optimized_callback(struct optimized_kprobe *op, struct pt_regs *regs) local_irq_restore(flags); } +NOKPROBE_SYMBOL(optimized_callback) int arch_prepare_optimized_kprobe(struct optimized_kprobe *op, struct kprobe *orig) { diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 4c375e11ae95..af4ee2cef2f9 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -257,7 +257,7 @@ static void vfp_raise_exceptions(u32 exceptions, u32 inst, u32 fpscr, struct pt_ if (exceptions == VFP_EXCEPTION_ERROR) { vfp_panic("unhandled bounce", inst); - vfp_raise_sigfpe(FPE_FIXME, regs); + vfp_raise_sigfpe(FPE_FLTINV, regs); return; } diff --git a/arch/arm64/boot/dts/exynos/exynos5433.dtsi b/arch/arm64/boot/dts/exynos/exynos5433.dtsi index c0231d077fa6..1ad8677f6a0a 100644 --- a/arch/arm64/boot/dts/exynos/exynos5433.dtsi +++ b/arch/arm64/boot/dts/exynos/exynos5433.dtsi @@ -1317,7 +1317,7 @@ reg = <0x14d60000 0x100>; dmas = <&pdma0 31 &pdma0 30>; dma-names = "tx", "rx"; - interrupts = ; + interrupts = ; clocks = <&cmu_peric CLK_PCLK_I2S1>, <&cmu_peric CLK_PCLK_I2S1>, <&cmu_peric CLK_SCLK_I2S1>; diff --git a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi index 48cad7919efa..ed2f1237ea1e 100644 --- a/arch/arm64/boot/dts/marvell/armada-cp110.dtsi +++ b/arch/arm64/boot/dts/marvell/armada-cp110.dtsi @@ -38,9 +38,10 @@ compatible = "marvell,armada-7k-pp22"; reg = <0x0 0x100000>, <0x129000 0xb000>; clocks = <&CP110_LABEL(clk) 1 3>, <&CP110_LABEL(clk) 1 9>, - <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 18>; + <&CP110_LABEL(clk) 1 5>, <&CP110_LABEL(clk) 1 6>, + <&CP110_LABEL(clk) 1 18>; clock-names = "pp_clk", "gop_clk", - "mg_clk", "axi_clk"; + "mg_clk", "mg_core_clk", "axi_clk"; marvell,system-controller = <&CP110_LABEL(syscon0)>; status = "disabled"; dma-coherent; @@ -141,6 +142,8 @@ #size-cells = <0>; compatible = "marvell,xmdio"; reg = <0x12a600 0x10>; + clocks = <&CP110_LABEL(clk) 1 5>, + <&CP110_LABEL(clk) 1 6>, <&CP110_LABEL(clk) 1 18>; status = "disabled"; }; diff --git a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi index a8baad7b80df..13f57fff1477 100644 --- a/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi +++ b/arch/arm64/boot/dts/nvidia/tegra186-p3310.dtsi @@ -46,7 +46,7 @@ compatible = "ethernet-phy-ieee802.3-c22"; reg = <0x0>; interrupt-parent = <&gpio>; - interrupts = ; + interrupts = ; }; }; }; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi index e62bda1cf2d9..c32dd3419c87 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld11.dtsi @@ -414,7 +414,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts index 2c1a92fafbfb..440c2e6a638b 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20-ref.dts @@ -67,3 +67,11 @@ reg = <0>; }; }; + +&pinctrl_ether_rgmii { + tx { + pins = "RGMII_TXCLK", "RGMII_TXD0", "RGMII_TXD1", + "RGMII_TXD2", "RGMII_TXD3", "RGMII_TXCTL"; + drive-strength = <9>; + }; +}; diff --git a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi index 9efe20d07589..3a5ed789c056 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-ld20.dtsi @@ -519,7 +519,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi index 7c8f710d9bfa..e85d6ddea3c2 100644 --- a/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi +++ b/arch/arm64/boot/dts/socionext/uniphier-pxs3.dtsi @@ -334,7 +334,7 @@ mmc-ddr-1_8v; mmc-hs200-1_8v; mmc-pwrseq = <&emmc_pwrseq>; - cdns,phy-input-delay-legacy = <4>; + cdns,phy-input-delay-legacy = <9>; cdns,phy-input-delay-mmc-highspeed = <2>; cdns,phy-input-delay-mmc-ddr = <3>; cdns,phy-dll-delay-sdclk = <21>; diff --git a/arch/arm64/include/asm/kvm_mmu.h b/arch/arm64/include/asm/kvm_mmu.h index 082110993647..6128992c2ded 100644 --- a/arch/arm64/include/asm/kvm_mmu.h +++ b/arch/arm64/include/asm/kvm_mmu.h @@ -360,6 +360,22 @@ static inline unsigned int kvm_get_vmid_bits(void) return (cpuid_feature_extract_unsigned_field(reg, ID_AA64MMFR1_VMIDBITS_SHIFT) == 2) ? 16 : 8; } +/* + * We are not in the kvm->srcu critical section most of the time, so we take + * the SRCU read lock here. Since we copy the data from the user page, we + * can immediately drop the lock again. + */ +static inline int kvm_read_guest_lock(struct kvm *kvm, + gpa_t gpa, void *data, unsigned long len) +{ + int srcu_idx = srcu_read_lock(&kvm->srcu); + int ret = kvm_read_guest(kvm, gpa, data, len); + + srcu_read_unlock(&kvm->srcu, srcu_idx); + + return ret; +} + #ifdef CONFIG_KVM_INDIRECT_VECTORS /* * EL2 vectors can be mapped and rerouted in a number of ways, diff --git a/arch/mips/boot/compressed/uart-16550.c b/arch/mips/boot/compressed/uart-16550.c index b3043c08f769..aee8d7b8f091 100644 --- a/arch/mips/boot/compressed/uart-16550.c +++ b/arch/mips/boot/compressed/uart-16550.c @@ -18,9 +18,9 @@ #define PORT(offset) (CKSEG1ADDR(AR7_REGS_UART0) + (4 * offset)) #endif -#if defined(CONFIG_MACH_JZ4740) || defined(CONFIG_MACH_JZ4780) -#include -#define PORT(offset) (CKSEG1ADDR(JZ4740_UART0_BASE_ADDR) + (4 * offset)) +#ifdef CONFIG_MACH_INGENIC +#define INGENIC_UART0_BASE_ADDR 0x10030000 +#define PORT(offset) (CKSEG1ADDR(INGENIC_UART0_BASE_ADDR) + (4 * offset)) #endif #ifdef CONFIG_CPU_XLR diff --git a/arch/mips/boot/dts/xilfpga/Makefile b/arch/mips/boot/dts/xilfpga/Makefile index 9987e0e378c5..69ca00590b8d 100644 --- a/arch/mips/boot/dts/xilfpga/Makefile +++ b/arch/mips/boot/dts/xilfpga/Makefile @@ -1,4 +1,2 @@ # SPDX-License-Identifier: GPL-2.0 dtb-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += nexys4ddr.dtb - -obj-y += $(patsubst %.dtb, %.dtb.o, $(dtb-y)) diff --git a/arch/mips/generic/Platform b/arch/mips/generic/Platform index b51432dd10b6..0dd0d5d460a5 100644 --- a/arch/mips/generic/Platform +++ b/arch/mips/generic/Platform @@ -16,3 +16,4 @@ all-$(CONFIG_MIPS_GENERIC) := vmlinux.gz.itb its-y := vmlinux.its.S its-$(CONFIG_FIT_IMAGE_FDT_BOSTON) += board-boston.its.S its-$(CONFIG_FIT_IMAGE_FDT_NI169445) += board-ni169445.its.S +its-$(CONFIG_FIT_IMAGE_FDT_XILFPGA) += board-xilfpga.its.S diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index 0b23b1ad99e6..8d098b9f395c 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -463,7 +463,7 @@ static int fpr_get_msa(struct task_struct *target, /* * Copy the floating-point context to the supplied NT_PRFPREG buffer. * Choose the appropriate helper for general registers, and then copy - * the FCSR register separately. + * the FCSR and FIR registers separately. */ static int fpr_get(struct task_struct *target, const struct user_regset *regset, @@ -471,6 +471,7 @@ static int fpr_get(struct task_struct *target, void *kbuf, void __user *ubuf) { const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fir_pos = fcr31_pos + sizeof(u32); int err; if (sizeof(target->thread.fpu.fpr[0]) == sizeof(elf_fpreg_t)) @@ -483,6 +484,12 @@ static int fpr_get(struct task_struct *target, err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, &target->thread.fpu.fcr31, fcr31_pos, fcr31_pos + sizeof(u32)); + if (err) + return err; + + err = user_regset_copyout(&pos, &count, &kbuf, &ubuf, + &boot_cpu_data.fpu_id, + fir_pos, fir_pos + sizeof(u32)); return err; } @@ -531,7 +538,8 @@ static int fpr_set_msa(struct task_struct *target, /* * Copy the supplied NT_PRFPREG buffer to the floating-point context. * Choose the appropriate helper for general registers, and then copy - * the FCSR register separately. + * the FCSR register separately. Ignore the incoming FIR register + * contents though, as the register is read-only. * * We optimize for the case where `count % sizeof(elf_fpreg_t) == 0', * which is supposed to have been guaranteed by the kernel before @@ -545,6 +553,7 @@ static int fpr_set(struct task_struct *target, const void *kbuf, const void __user *ubuf) { const int fcr31_pos = NUM_FPU_REGS * sizeof(elf_fpreg_t); + const int fir_pos = fcr31_pos + sizeof(u32); u32 fcr31; int err; @@ -572,6 +581,11 @@ static int fpr_set(struct task_struct *target, ptrace_setfcr31(target, fcr31); } + if (count > 0) + err = user_regset_copyin_ignore(&pos, &count, &kbuf, &ubuf, + fir_pos, + fir_pos + sizeof(u32)); + return err; } @@ -793,7 +807,7 @@ long arch_ptrace(struct task_struct *child, long request, fregs = get_fpu_regs(child); #ifdef CONFIG_32BIT - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even @@ -888,7 +902,7 @@ long arch_ptrace(struct task_struct *child, long request, init_fp_ctx(child); #ifdef CONFIG_32BIT - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 2b9260f92ccd..656a137c1fe2 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -99,7 +99,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, break; } fregs = get_fpu_regs(child); - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even @@ -212,7 +212,7 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, sizeof(child->thread.fpu)); child->thread.fpu.fcr31 = 0; } - if (test_thread_flag(TIF_32BIT_FPREGS)) { + if (test_tsk_thread_flag(child, TIF_32BIT_FPREGS)) { /* * The odd registers are actually the high * order bits of the values stored in the even diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 2549fdd27ee1..0f725e9cee8f 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -45,7 +45,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "cache", VCPU_STAT(cache_exits), KVM_STAT_VCPU }, { "signal", VCPU_STAT(signal_exits), KVM_STAT_VCPU }, { "interrupt", VCPU_STAT(int_exits), KVM_STAT_VCPU }, - { "cop_unsuable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, + { "cop_unusable", VCPU_STAT(cop_unusable_exits), KVM_STAT_VCPU }, { "tlbmod", VCPU_STAT(tlbmod_exits), KVM_STAT_VCPU }, { "tlbmiss_ld", VCPU_STAT(tlbmiss_ld_exits), KVM_STAT_VCPU }, { "tlbmiss_st", VCPU_STAT(tlbmiss_st_exits), KVM_STAT_VCPU }, diff --git a/arch/mips/mm/c-r4k.c b/arch/mips/mm/c-r4k.c index 6f534b209971..e12dfa48b478 100644 --- a/arch/mips/mm/c-r4k.c +++ b/arch/mips/mm/c-r4k.c @@ -851,9 +851,12 @@ static void r4k_dma_cache_wback_inv(unsigned long addr, unsigned long size) /* * Either no secondary cache or the available caches don't have the * subset property so we have to flush the primary caches - * explicitly + * explicitly. + * If we would need IPI to perform an INDEX-type operation, then + * we have to use the HIT-type alternative as IPI cannot be used + * here due to interrupts possibly being disabled. */ - if (size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; @@ -890,7 +893,7 @@ static void r4k_dma_cache_inv(unsigned long addr, unsigned long size) return; } - if (size >= dcache_size) { + if (!r4k_op_needs_ipi(R4K_INDEX) && size >= dcache_size) { r4k_blast_dcache(); } else { R4600_HIT_CACHEOP_WAR_IMPL; diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index ee5a78a151a6..e0e1c9775c32 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -268,7 +268,7 @@ static struct parisc_device *find_device_by_addr(unsigned long hpa) * Walks up the device tree looking for a device of the specified type. * If it finds it, it returns it. If not, it returns NULL. */ -const struct parisc_device * __init +const struct parisc_device * find_pa_parent_type(const struct parisc_device *padev, int type) { const struct device *dev = &padev->dev; diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 4065b5e48c9d..5e26dbede5fc 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -423,8 +423,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle) } #ifdef CONFIG_PROC_FS -int __init -setup_profiling_timer(unsigned int multiplier) +int setup_profiling_timer(unsigned int multiplier) { return -EINVAL; } diff --git a/arch/powerpc/platforms/powernv/opal-nvram.c b/arch/powerpc/platforms/powernv/opal-nvram.c index 1bceb95f422d..5584247f5029 100644 --- a/arch/powerpc/platforms/powernv/opal-nvram.c +++ b/arch/powerpc/platforms/powernv/opal-nvram.c @@ -44,6 +44,10 @@ static ssize_t opal_nvram_read(char *buf, size_t count, loff_t *index) return count; } +/* + * This can be called in the panic path with interrupts off, so use + * mdelay in that case. + */ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) { s64 rc = OPAL_BUSY; @@ -58,10 +62,16 @@ static ssize_t opal_nvram_write(char *buf, size_t count, loff_t *index) while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { rc = opal_write_nvram(__pa(buf), count, off); if (rc == OPAL_BUSY_EVENT) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); opal_poll_events(NULL); } else if (rc == OPAL_BUSY) { - msleep(OPAL_BUSY_DELAY_MS); + if (in_interrupt() || irqs_disabled()) + mdelay(OPAL_BUSY_DELAY_MS); + else + msleep(OPAL_BUSY_DELAY_MS); } } diff --git a/arch/s390/configs/debug_defconfig b/arch/s390/configs/debug_defconfig index 6176fe9795ca..941d8cc6c9f5 100644 --- a/arch/s390/configs/debug_defconfig +++ b/arch/s390/configs/debug_defconfig @@ -261,9 +261,9 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -284,7 +284,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m +CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -305,7 +305,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m @@ -604,7 +604,6 @@ CONFIG_DETECT_HUNG_TASK=y CONFIG_WQ_WATCHDOG=y CONFIG_PANIC_ON_OOPS=y CONFIG_DEBUG_TIMEKEEPING=y -CONFIG_DEBUG_WW_MUTEX_SLOWPATH=y CONFIG_PROVE_LOCKING=y CONFIG_LOCK_STAT=y CONFIG_DEBUG_LOCKDEP=y diff --git a/arch/s390/configs/performance_defconfig b/arch/s390/configs/performance_defconfig index c105bcc6d7a6..eb6f75f24208 100644 --- a/arch/s390/configs/performance_defconfig +++ b/arch/s390/configs/performance_defconfig @@ -259,9 +259,9 @@ CONFIG_IP_VS_NQ=m CONFIG_IP_VS_FTP=m CONFIG_IP_VS_PE_SIP=m CONFIG_NF_CONNTRACK_IPV4=m -CONFIG_NF_TABLES_IPV4=m +CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_CHAIN_ROUTE_IPV4=m -CONFIG_NF_TABLES_ARP=m +CONFIG_NF_TABLES_ARP=y CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP_NF_MATCH_AH=m @@ -282,7 +282,7 @@ CONFIG_IP_NF_ARPTABLES=m CONFIG_IP_NF_ARPFILTER=m CONFIG_IP_NF_ARP_MANGLE=m CONFIG_NF_CONNTRACK_IPV6=m -CONFIG_NF_TABLES_IPV6=m +CONFIG_NF_TABLES_IPV6=y CONFIG_NFT_CHAIN_ROUTE_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_IP6_NF_IPTABLES=m @@ -303,7 +303,7 @@ CONFIG_IP6_NF_RAW=m CONFIG_IP6_NF_SECURITY=m CONFIG_IP6_NF_NAT=m CONFIG_IP6_NF_TARGET_MASQUERADE=m -CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_BRIDGE=y CONFIG_RDS=m CONFIG_RDS_RDMA=m CONFIG_RDS_TCP=m diff --git a/arch/s390/crypto/crc32be-vx.S b/arch/s390/crypto/crc32be-vx.S index e8077f0971f8..2bf01ba44107 100644 --- a/arch/s390/crypto/crc32be-vx.S +++ b/arch/s390/crypto/crc32be-vx.S @@ -13,6 +13,7 @@ */ #include +#include #include /* Vector register range containing CRC-32 constants */ @@ -67,6 +68,8 @@ .previous + GEN_BR_THUNK %r14 + .text /* * The CRC-32 function(s) use these calling conventions: @@ -203,6 +206,6 @@ ENTRY(crc32_be_vgfm_16) .Ldone: VLGVF %r2,%v2,3 - br %r14 + BR_EX %r14 .previous diff --git a/arch/s390/crypto/crc32le-vx.S b/arch/s390/crypto/crc32le-vx.S index d8c67a58c0c5..7d6f568bd3ad 100644 --- a/arch/s390/crypto/crc32le-vx.S +++ b/arch/s390/crypto/crc32le-vx.S @@ -14,6 +14,7 @@ */ #include +#include #include /* Vector register range containing CRC-32 constants */ @@ -76,6 +77,7 @@ .previous + GEN_BR_THUNK %r14 .text @@ -264,6 +266,6 @@ crc32_le_vgfm_generic: .Ldone: VLGVF %r2,%v2,2 - br %r14 + BR_EX %r14 .previous diff --git a/arch/s390/include/asm/nospec-insn.h b/arch/s390/include/asm/nospec-insn.h new file mode 100644 index 000000000000..a01f81186e86 --- /dev/null +++ b/arch/s390/include/asm/nospec-insn.h @@ -0,0 +1,196 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_S390_NOSPEC_ASM_H +#define _ASM_S390_NOSPEC_ASM_H + +#include +#include +#include + +#ifdef __ASSEMBLY__ + +#ifdef CONFIG_EXPOLINE + +_LC_BR_R1 = __LC_BR_R1 + +/* + * The expoline macros are used to create thunks in the same format + * as gcc generates them. The 'comdat' section flag makes sure that + * the various thunks are merged into a single copy. + */ + .macro __THUNK_PROLOG_NAME name + .pushsection .text.\name,"axG",@progbits,\name,comdat + .globl \name + .hidden \name + .type \name,@function +\name: + CFI_STARTPROC + .endm + + .macro __THUNK_EPILOG + CFI_ENDPROC + .popsection + .endm + + .macro __THUNK_PROLOG_BR r1,r2 + __THUNK_PROLOG_NAME __s390x_indirect_jump_r\r2\()use_r\r1 + .endm + + .macro __THUNK_PROLOG_BC d0,r1,r2 + __THUNK_PROLOG_NAME __s390x_indirect_branch_\d0\()_\r2\()use_\r1 + .endm + + .macro __THUNK_BR r1,r2 + jg __s390x_indirect_jump_r\r2\()use_r\r1 + .endm + + .macro __THUNK_BC d0,r1,r2 + jg __s390x_indirect_branch_\d0\()_\r2\()use_\r1 + .endm + + .macro __THUNK_BRASL r1,r2,r3 + brasl \r1,__s390x_indirect_jump_r\r3\()use_r\r2 + .endm + + .macro __DECODE_RR expand,reg,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \reg,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r2 + \expand \r1,\r2 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_RR failed" + .endif + .endm + + .macro __DECODE_RRR expand,rsave,rtarget,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rsave,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \rtarget,%r\r2 + .irp r3,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r3 + \expand \r1,\r2,\r3 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_RRR failed" + .endif + .endm + + .macro __DECODE_DRR expand,disp,reg,ruse + .set __decode_fail,1 + .irp r1,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \reg,%r\r1 + .irp r2,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15 + .ifc \ruse,%r\r2 + \expand \disp,\r1,\r2 + .set __decode_fail,0 + .endif + .endr + .endif + .endr + .if __decode_fail == 1 + .error "__DECODE_DRR failed" + .endif + .endm + + .macro __THUNK_EX_BR reg,ruse + # Be very careful when adding instructions to this macro! + # The ALTERNATIVE replacement code has a .+10 which targets + # the "br \reg" after the code has been patched. +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,555f + j . +#else + .ifc \reg,%r1 + ALTERNATIVE "ex %r0,_LC_BR_R1", ".insn ril,0xc60000000000,0,.+10", 35 + j . + .else + larl \ruse,555f + ex 0,0(\ruse) + j . + .endif +#endif +555: br \reg + .endm + + .macro __THUNK_EX_BC disp,reg,ruse +#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES + exrl 0,556f + j . +#else + larl \ruse,556f + ex 0,0(\ruse) + j . +#endif +556: b \disp(\reg) + .endm + + .macro GEN_BR_THUNK reg,ruse=%r1 + __DECODE_RR __THUNK_PROLOG_BR,\reg,\ruse + __THUNK_EX_BR \reg,\ruse + __THUNK_EPILOG + .endm + + .macro GEN_B_THUNK disp,reg,ruse=%r1 + __DECODE_DRR __THUNK_PROLOG_BC,\disp,\reg,\ruse + __THUNK_EX_BC \disp,\reg,\ruse + __THUNK_EPILOG + .endm + + .macro BR_EX reg,ruse=%r1 +557: __DECODE_RR __THUNK_BR,\reg,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 557b-. + .popsection + .endm + + .macro B_EX disp,reg,ruse=%r1 +558: __DECODE_DRR __THUNK_BC,\disp,\reg,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 558b-. + .popsection + .endm + + .macro BASR_EX rsave,rtarget,ruse=%r1 +559: __DECODE_RRR __THUNK_BRASL,\rsave,\rtarget,\ruse + .pushsection .s390_indirect_branches,"a",@progbits + .long 559b-. + .popsection + .endm + +#else + .macro GEN_BR_THUNK reg,ruse=%r1 + .endm + + .macro GEN_B_THUNK disp,reg,ruse=%r1 + .endm + + .macro BR_EX reg,ruse=%r1 + br \reg + .endm + + .macro B_EX disp,reg,ruse=%r1 + b \disp(\reg) + .endm + + .macro BASR_EX rsave,rtarget,ruse=%r1 + basr \rsave,\rtarget + .endm +#endif + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_S390_NOSPEC_ASM_H */ diff --git a/arch/s390/include/asm/purgatory.h b/arch/s390/include/asm/purgatory.h index e297bcfc476f..6090670df51f 100644 --- a/arch/s390/include/asm/purgatory.h +++ b/arch/s390/include/asm/purgatory.h @@ -13,5 +13,11 @@ int verify_sha256_digest(void); +extern u64 kernel_entry; +extern u64 kernel_type; + +extern u64 crash_start; +extern u64 crash_size; + #endif /* __ASSEMBLY__ */ #endif /* _S390_PURGATORY_H_ */ diff --git a/arch/s390/kernel/Makefile b/arch/s390/kernel/Makefile index 84ea6225efb4..f92dd8ed3884 100644 --- a/arch/s390/kernel/Makefile +++ b/arch/s390/kernel/Makefile @@ -65,6 +65,7 @@ obj-y += nospec-branch.o extra-y += head.o head64.o vmlinux.lds +obj-$(CONFIG_SYSFS) += nospec-sysfs.o CFLAGS_REMOVE_nospec-branch.o += $(CC_FLAGS_EXPOLINE) obj-$(CONFIG_MODULES) += module.o diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index eb2a5c0443cd..11aea745a2a6 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -181,6 +181,7 @@ int main(void) OFFSET(__LC_MACHINE_FLAGS, lowcore, machine_flags); OFFSET(__LC_PREEMPT_COUNT, lowcore, preempt_count); OFFSET(__LC_GMAP, lowcore, gmap); + OFFSET(__LC_BR_R1, lowcore, br_r1_trampoline); /* software defined ABI-relevant lowcore locations 0xe00 - 0xe20 */ OFFSET(__LC_DUMP_REIPL, lowcore, ipib); /* hardware defined lowcore locations 0x1000 - 0x18ff */ diff --git a/arch/s390/kernel/base.S b/arch/s390/kernel/base.S index f6c56009e822..b65874b0b412 100644 --- a/arch/s390/kernel/base.S +++ b/arch/s390/kernel/base.S @@ -9,18 +9,22 @@ #include #include +#include #include #include + GEN_BR_THUNK %r9 + GEN_BR_THUNK %r14 + ENTRY(s390_base_mcck_handler) basr %r13,0 0: lg %r15,__LC_PANIC_STACK # load panic stack aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_mcck_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 1: la %r1,4095 lmg %r0,%r15,__LC_GPREGS_SAVE_AREA-4095(%r1) lpswe __LC_MCK_OLD_PSW @@ -37,10 +41,10 @@ ENTRY(s390_base_ext_handler) basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_ext_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 1: lmg %r0,%r15,__LC_SAVE_AREA_ASYNC ni __LC_EXT_OLD_PSW+1,0xfd # clear wait state bit lpswe __LC_EXT_OLD_PSW @@ -57,10 +61,10 @@ ENTRY(s390_base_pgm_handler) basr %r13,0 0: aghi %r15,-STACK_FRAME_OVERHEAD larl %r1,s390_base_pgm_handler_fn - lg %r1,0(%r1) - ltgr %r1,%r1 + lg %r9,0(%r1) + ltgr %r9,%r9 jz 1f - basr %r14,%r1 + BASR_EX %r14,%r9 lmg %r0,%r15,__LC_SAVE_AREA_SYNC lpswe __LC_PGM_OLD_PSW 1: lpswe disabled_wait_psw-0b(%r13) @@ -117,7 +121,7 @@ ENTRY(diag308_reset) larl %r4,.Lcontinue_psw # Restore PSW flags lpswe 0(%r4) .Lcontinue: - br %r14 + BR_EX %r14 .align 16 .Lrestart_psw: .long 0x00080000,0x80000000 + .Lrestart_part2 diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S index 3f22f139a041..f03402efab4b 100644 --- a/arch/s390/kernel/entry.S +++ b/arch/s390/kernel/entry.S @@ -28,6 +28,7 @@ #include #include #include +#include __PT_R0 = __PT_GPRS __PT_R1 = __PT_GPRS + 8 @@ -183,67 +184,9 @@ _LPP_OFFSET = __LC_LPP "jnz .+8; .long 0xb2e8d000", 82 .endm -#ifdef CONFIG_EXPOLINE - - .macro GEN_BR_THUNK name,reg,tmp - .section .text.\name,"axG",@progbits,\name,comdat - .globl \name - .hidden \name - .type \name,@function -\name: - CFI_STARTPROC -#ifdef CONFIG_HAVE_MARCH_Z10_FEATURES - exrl 0,0f -#else - larl \tmp,0f - ex 0,0(\tmp) -#endif - j . -0: br \reg - CFI_ENDPROC - .endm - - GEN_BR_THUNK __s390x_indirect_jump_r1use_r9,%r9,%r1 - GEN_BR_THUNK __s390x_indirect_jump_r1use_r14,%r14,%r1 - GEN_BR_THUNK __s390x_indirect_jump_r11use_r14,%r14,%r11 - - .macro BASR_R14_R9 -0: brasl %r14,__s390x_indirect_jump_r1use_r9 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - - .macro BR_R1USE_R14 -0: jg __s390x_indirect_jump_r1use_r14 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - - .macro BR_R11USE_R14 -0: jg __s390x_indirect_jump_r11use_r14 - .pushsection .s390_indirect_branches,"a",@progbits - .long 0b-. - .popsection - .endm - -#else /* CONFIG_EXPOLINE */ - - .macro BASR_R14_R9 - basr %r14,%r9 - .endm - - .macro BR_R1USE_R14 - br %r14 - .endm - - .macro BR_R11USE_R14 - br %r14 - .endm - -#endif /* CONFIG_EXPOLINE */ - + GEN_BR_THUNK %r9 + GEN_BR_THUNK %r14 + GEN_BR_THUNK %r14,%r11 .section .kprobes.text, "ax" .Ldummy: @@ -260,7 +203,7 @@ _LPP_OFFSET = __LC_LPP ENTRY(__bpon) .globl __bpon BPON - BR_R1USE_R14 + BR_EX %r14 /* * Scheduler resume function, called by switch_to @@ -284,7 +227,7 @@ ENTRY(__switch_to) mvc __LC_CURRENT_PID(4,%r0),0(%r3) # store pid of next lmg %r6,%r15,__SF_GPRS(%r15) # load gprs of next task ALTERNATIVE "", ".insn s,0xb2800000,_LPP_OFFSET", 40 - BR_R1USE_R14 + BR_EX %r14 .L__critical_start: @@ -351,7 +294,7 @@ sie_exit: xgr %r5,%r5 lmg %r6,%r14,__SF_GPRS(%r15) # restore kernel registers lg %r2,__SF_SIE_REASON(%r15) # return exit reason code - BR_R1USE_R14 + BR_EX %r14 .Lsie_fault: lghi %r14,-EFAULT stg %r14,__SF_SIE_REASON(%r15) # set exit reason code @@ -410,7 +353,7 @@ ENTRY(system_call) lgf %r9,0(%r8,%r10) # get system call add. TSTMSK __TI_flags(%r12),_TIF_TRACE jnz .Lsysc_tracesys - BASR_R14_R9 # call sys_xxxx + BASR_EX %r14,%r9 # call sys_xxxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_return: @@ -595,7 +538,7 @@ ENTRY(system_call) lmg %r3,%r7,__PT_R3(%r11) stg %r7,STACK_FRAME_OVERHEAD(%r15) lg %r2,__PT_ORIG_GPR2(%r11) - BASR_R14_R9 # call sys_xxx + BASR_EX %r14,%r9 # call sys_xxx stg %r2,__PT_R2(%r11) # store return value .Lsysc_tracenogo: TSTMSK __TI_flags(%r12),_TIF_TRACE @@ -619,7 +562,7 @@ ENTRY(ret_from_fork) lmg %r9,%r10,__PT_R9(%r11) # load gprs ENTRY(kernel_thread_starter) la %r2,0(%r10) - BASR_R14_R9 + BASR_EX %r14,%r9 j .Lsysc_tracenogo /* @@ -701,7 +644,7 @@ ENTRY(pgm_check_handler) je .Lpgm_return lgf %r9,0(%r10,%r1) # load address of handler routine lgr %r2,%r11 # pass pointer to pt_regs - BASR_R14_R9 # branch to interrupt-handler + BASR_EX %r14,%r9 # branch to interrupt-handler .Lpgm_return: LOCKDEP_SYS_EXIT tm __PT_PSW+1(%r11),0x01 # returning to user ? @@ -1019,7 +962,7 @@ ENTRY(psw_idle) stpt __TIMER_IDLE_ENTER(%r2) .Lpsw_idle_lpsw: lpswe __SF_EMPTY(%r15) - BR_R1USE_R14 + BR_EX %r14 .Lpsw_idle_end: /* @@ -1061,7 +1004,7 @@ ENTRY(save_fpu_regs) .Lsave_fpu_regs_done: oi __LC_CPU_FLAGS+7,_CIF_FPU .Lsave_fpu_regs_exit: - BR_R1USE_R14 + BR_EX %r14 .Lsave_fpu_regs_end: EXPORT_SYMBOL(save_fpu_regs) @@ -1107,7 +1050,7 @@ load_fpu_regs: .Lload_fpu_regs_done: ni __LC_CPU_FLAGS+7,255-_CIF_FPU .Lload_fpu_regs_exit: - BR_R1USE_R14 + BR_EX %r14 .Lload_fpu_regs_end: .L__critical_end: @@ -1322,7 +1265,7 @@ cleanup_critical: jl 0f clg %r9,BASED(.Lcleanup_table+104) # .Lload_fpu_regs_end jl .Lcleanup_load_fpu_regs -0: BR_R11USE_R14 +0: BR_EX %r14 .align 8 .Lcleanup_table: @@ -1358,7 +1301,7 @@ cleanup_critical: ni __SIE_PROG0C+3(%r9),0xfe # no longer in SIE lctlg %c1,%c1,__LC_USER_ASCE # load primary asce larl %r9,sie_exit # skip forward to sie_exit - BR_R11USE_R14 + BR_EX %r14 #endif .Lcleanup_system_call: @@ -1412,7 +1355,7 @@ cleanup_critical: stg %r15,56(%r11) # r15 stack pointer # set new psw address and exit larl %r9,.Lsysc_do_svc - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_system_call_insn: .quad system_call .quad .Lsysc_stmg @@ -1424,7 +1367,7 @@ cleanup_critical: .Lcleanup_sysc_tif: larl %r9,.Lsysc_tif - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_sysc_restore: # check if stpt has been executed @@ -1441,14 +1384,14 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_sysc_restore_insn: .quad .Lsysc_exit_timer .quad .Lsysc_done - 4 .Lcleanup_io_tif: larl %r9,.Lio_tif - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_io_restore: # check if stpt has been executed @@ -1462,7 +1405,7 @@ cleanup_critical: mvc 0(64,%r11),__PT_R8(%r9) lmg %r0,%r7,__PT_R0(%r9) 1: lmg %r8,%r9,__LC_RETURN_PSW - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_io_restore_insn: .quad .Lio_exit_timer .quad .Lio_done - 4 @@ -1515,17 +1458,17 @@ cleanup_critical: # prepare return psw nihh %r8,0xfcfd # clear irq & wait state bits lg %r9,48(%r11) # return from psw_idle - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_idle_insn: .quad .Lpsw_idle_lpsw .Lcleanup_save_fpu_regs: larl %r9,save_fpu_regs - BR_R11USE_R14 + BR_EX %r14,%r11 .Lcleanup_load_fpu_regs: larl %r9,load_fpu_regs - BR_R11USE_R14 + BR_EX %r14,%r11 /* * Integer constants diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 94f2099bceb0..3d17c41074ca 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -176,10 +176,9 @@ void do_softirq_own_stack(void) new -= STACK_FRAME_OVERHEAD; ((struct stack_frame *) new)->back_chain = old; asm volatile(" la 15,0(%0)\n" - " basr 14,%2\n" + " brasl 14,__do_softirq\n" " la 15,0(%1)\n" - : : "a" (new), "a" (old), - "a" (__do_softirq) + : : "a" (new), "a" (old) : "0", "1", "2", "3", "4", "5", "14", "cc", "memory" ); } else { diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 82df7d80fab2..27110f3294ed 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -9,13 +9,17 @@ #include #include #include +#include #include #include + GEN_BR_THUNK %r1 + GEN_BR_THUNK %r14 + .section .kprobes.text, "ax" ENTRY(ftrace_stub) - br %r14 + BR_EX %r14 #define STACK_FRAME_SIZE (STACK_FRAME_OVERHEAD + __PT_SIZE) #define STACK_PTREGS (STACK_FRAME_OVERHEAD) @@ -23,7 +27,7 @@ ENTRY(ftrace_stub) #define STACK_PTREGS_PSW (STACK_PTREGS + __PT_PSW) ENTRY(_mcount) - br %r14 + BR_EX %r14 EXPORT_SYMBOL(_mcount) @@ -53,7 +57,7 @@ ENTRY(ftrace_caller) #endif lgr %r3,%r14 la %r5,STACK_PTREGS(%r15) - basr %r14,%r1 + BASR_EX %r14,%r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER # The j instruction gets runtime patched to a nop instruction. # See ftrace_enable_ftrace_graph_caller. @@ -68,7 +72,7 @@ ftrace_graph_caller_end: #endif lg %r1,(STACK_PTREGS_PSW+8)(%r15) lmg %r2,%r15,(STACK_PTREGS_GPRS+2*8)(%r15) - br %r1 + BR_EX %r1 #ifdef CONFIG_FUNCTION_GRAPH_TRACER @@ -81,6 +85,6 @@ ENTRY(return_to_handler) aghi %r15,STACK_FRAME_OVERHEAD lgr %r14,%r2 lmg %r2,%r5,32(%r15) - br %r14 + BR_EX %r14 #endif diff --git a/arch/s390/kernel/nospec-branch.c b/arch/s390/kernel/nospec-branch.c index 46d49a11663f..8ad6a7128b3a 100644 --- a/arch/s390/kernel/nospec-branch.c +++ b/arch/s390/kernel/nospec-branch.c @@ -1,7 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include #include -#include #include static int __init nobp_setup_early(char *str) @@ -44,24 +43,6 @@ static int __init nospec_report(void) } arch_initcall(nospec_report); -#ifdef CONFIG_SYSFS -ssize_t cpu_show_spectre_v1(struct device *dev, - struct device_attribute *attr, char *buf) -{ - return sprintf(buf, "Mitigation: __user pointer sanitization\n"); -} - -ssize_t cpu_show_spectre_v2(struct device *dev, - struct device_attribute *attr, char *buf) -{ - if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) - return sprintf(buf, "Mitigation: execute trampolines\n"); - if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) - return sprintf(buf, "Mitigation: limited branch prediction.\n"); - return sprintf(buf, "Vulnerable\n"); -} -#endif - #ifdef CONFIG_EXPOLINE int nospec_disable = IS_ENABLED(CONFIG_EXPOLINE_OFF); @@ -112,7 +93,6 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) s32 *epo; /* Second part of the instruction replace is always a nop */ - memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x00, 0x00 }, 4); for (epo = start; epo < end; epo++) { instr = (u8 *) epo + *epo; if (instr[0] == 0xc0 && (instr[1] & 0x0f) == 0x04) @@ -133,18 +113,34 @@ static void __init_or_module __nospec_revert(s32 *start, s32 *end) br = thunk + (*(int *)(thunk + 2)) * 2; else continue; - if (br[0] != 0x07 || (br[1] & 0xf0) != 0xf0) + /* Check for unconditional branch 0x07f? or 0x47f???? */ + if ((br[0] & 0xbf) != 0x07 || (br[1] & 0xf0) != 0xf0) continue; + + memcpy(insnbuf + 2, (char[]) { 0x47, 0x00, 0x07, 0x00 }, 4); switch (type) { case BRCL_EXPOLINE: - /* brcl to thunk, replace with br + nop */ insnbuf[0] = br[0]; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + if (br[0] == 0x47) { + /* brcl to b, replace with bc + nopr */ + insnbuf[2] = br[2]; + insnbuf[3] = br[3]; + } else { + /* brcl to br, replace with bcr + nop */ + } break; case BRASL_EXPOLINE: - /* brasl to thunk, replace with basr + nop */ - insnbuf[0] = 0x0d; insnbuf[1] = (instr[1] & 0xf0) | (br[1] & 0x0f); + if (br[0] == 0x47) { + /* brasl to b, replace with bas + nopr */ + insnbuf[0] = 0x4d; + insnbuf[2] = br[2]; + insnbuf[3] = br[3]; + } else { + /* brasl to br, replace with basr + nop */ + insnbuf[0] = 0x0d; + } break; } diff --git a/arch/s390/kernel/nospec-sysfs.c b/arch/s390/kernel/nospec-sysfs.c new file mode 100644 index 000000000000..8affad5f18cb --- /dev/null +++ b/arch/s390/kernel/nospec-sysfs.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include + +ssize_t cpu_show_spectre_v1(struct device *dev, + struct device_attribute *attr, char *buf) +{ + return sprintf(buf, "Mitigation: __user pointer sanitization\n"); +} + +ssize_t cpu_show_spectre_v2(struct device *dev, + struct device_attribute *attr, char *buf) +{ + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) + return sprintf(buf, "Mitigation: execute trampolines\n"); + if (__test_facility(82, S390_lowcore.alt_stfle_fac_list)) + return sprintf(buf, "Mitigation: limited branch prediction\n"); + return sprintf(buf, "Vulnerable\n"); +} diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 1c9ddd7aa5ec..0292d68e7dde 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -753,6 +753,10 @@ static int __hw_perf_event_init(struct perf_event *event) */ rate = 0; if (attr->freq) { + if (!attr->sample_freq) { + err = -EINVAL; + goto out; + } rate = freq_to_sample_rate(&si, attr->sample_freq); rate = hw_limit_rate(&si, rate); attr->freq = 0; diff --git a/arch/s390/kernel/reipl.S b/arch/s390/kernel/reipl.S index 73cc3750f0d3..7f14adf512c6 100644 --- a/arch/s390/kernel/reipl.S +++ b/arch/s390/kernel/reipl.S @@ -7,8 +7,11 @@ #include #include +#include #include + GEN_BR_THUNK %r9 + # # Issue "store status" for the current CPU to its prefix page # and call passed function afterwards @@ -67,9 +70,9 @@ ENTRY(store_status) st %r4,0(%r1) st %r5,4(%r1) stg %r2,8(%r1) - lgr %r1,%r2 + lgr %r9,%r2 lgr %r2,%r3 - br %r1 + BR_EX %r9 .section .bss .align 8 diff --git a/arch/s390/kernel/swsusp.S b/arch/s390/kernel/swsusp.S index e99187149f17..a049a7b9d6e8 100644 --- a/arch/s390/kernel/swsusp.S +++ b/arch/s390/kernel/swsusp.S @@ -13,6 +13,7 @@ #include #include #include +#include #include /* @@ -24,6 +25,8 @@ * (see below) in the resume process. * This function runs with disabled interrupts. */ + GEN_BR_THUNK %r14 + .section .text ENTRY(swsusp_arch_suspend) stmg %r6,%r15,__SF_GPRS(%r15) @@ -103,7 +106,7 @@ ENTRY(swsusp_arch_suspend) spx 0x318(%r1) lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) lghi %r2,0 - br %r14 + BR_EX %r14 /* * Restore saved memory image to correct place and restore register context. @@ -197,11 +200,10 @@ pgm_check_entry: larl %r15,init_thread_union ahi %r15,1<<(PAGE_SHIFT+THREAD_SIZE_ORDER) larl %r2,.Lpanic_string - larl %r3,sclp_early_printk lghi %r1,0 sam31 sigp %r1,%r0,SIGP_SET_ARCHITECTURE - basr %r14,%r3 + brasl %r14,sclp_early_printk larl %r3,.Ldisabled_wait_31 lpsw 0(%r3) 4: @@ -267,7 +269,7 @@ restore_registers: /* Return 0 */ lmg %r6,%r15,STACK_FRAME_OVERHEAD + __SF_GPRS(%r15) lghi %r2,0 - br %r14 + BR_EX %r14 .section .data..nosave,"aw",@progbits .align 8 diff --git a/arch/s390/lib/mem.S b/arch/s390/lib/mem.S index 495c9c4bacc7..2311f15be9cf 100644 --- a/arch/s390/lib/mem.S +++ b/arch/s390/lib/mem.S @@ -7,6 +7,9 @@ #include #include +#include + + GEN_BR_THUNK %r14 /* * void *memmove(void *dest, const void *src, size_t n) @@ -33,14 +36,14 @@ ENTRY(memmove) .Lmemmove_forward_remainder: larl %r5,.Lmemmove_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemmove_reverse: ic %r0,0(%r4,%r3) stc %r0,0(%r4,%r1) brctg %r4,.Lmemmove_reverse ic %r0,0(%r4,%r3) stc %r0,0(%r4,%r1) - br %r14 + BR_EX %r14 .Lmemmove_mvc: mvc 0(1,%r1),0(%r3) EXPORT_SYMBOL(memmove) @@ -77,7 +80,7 @@ ENTRY(memset) .Lmemset_clear_remainder: larl %r3,.Lmemset_xc ex %r4,0(%r3) - br %r14 + BR_EX %r14 .Lmemset_fill: cghi %r4,1 lgr %r1,%r2 @@ -95,10 +98,10 @@ ENTRY(memset) stc %r3,0(%r1) larl %r5,.Lmemset_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemset_fill_exit: stc %r3,0(%r1) - br %r14 + BR_EX %r14 .Lmemset_xc: xc 0(1,%r1),0(%r1) .Lmemset_mvc: @@ -121,7 +124,7 @@ ENTRY(memcpy) .Lmemcpy_remainder: larl %r5,.Lmemcpy_mvc ex %r4,0(%r5) - br %r14 + BR_EX %r14 .Lmemcpy_loop: mvc 0(256,%r1),0(%r3) la %r1,256(%r1) @@ -159,10 +162,10 @@ ENTRY(__memset\bits) \insn %r3,0(%r1) larl %r5,.L__memset_mvc\bits ex %r4,0(%r5) - br %r14 + BR_EX %r14 .L__memset_exit\bits: \insn %r3,0(%r2) - br %r14 + BR_EX %r14 .L__memset_mvc\bits: mvc \bytes(1,%r1),0(%r1) .endm diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b020bea040b7..d2db8acb1a55 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -25,6 +25,8 @@ #include #include #include +#include +#include #include #include "bpf_jit.h" @@ -41,6 +43,8 @@ struct bpf_jit { int base_ip; /* Base address for literal pool */ int ret0_ip; /* Address of return 0 */ int exit_ip; /* Address of exit */ + int r1_thunk_ip; /* Address of expoline thunk for 'br %r1' */ + int r14_thunk_ip; /* Address of expoline thunk for 'br %r14' */ int tail_call_start; /* Tail call start offset */ int labels[1]; /* Labels for local jumps */ }; @@ -246,6 +250,19 @@ static inline void reg_set_seen(struct bpf_jit *jit, u32 b1) REG_SET_SEEN(b2); \ }) +#define EMIT6_PCREL_RILB(op, b, target) \ +({ \ + int rel = (target - jit->prg) / 2; \ + _EMIT6(op | reg_high(b) << 16 | rel >> 16, rel & 0xffff); \ + REG_SET_SEEN(b); \ +}) + +#define EMIT6_PCREL_RIL(op, target) \ +({ \ + int rel = (target - jit->prg) / 2; \ + _EMIT6(op | rel >> 16, rel & 0xffff); \ +}) + #define _EMIT6_IMM(op, imm) \ ({ \ unsigned int __imm = (imm); \ @@ -438,8 +455,45 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth) EMIT4(0xb9040000, REG_2, BPF_REG_0); /* Restore registers */ save_restore_regs(jit, REGS_RESTORE, stack_depth); + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) { + jit->r14_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r14 thunk */ + if (test_facility(35)) { + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + } else { + /* larl %r1,.+14 */ + EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); + /* ex 0,0(%r1) */ + EMIT4_DISP(0x44000000, REG_0, REG_1, 0); + } + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + } /* br %r14 */ _EMIT2(0x07fe); + + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable && + (jit->seen & SEEN_FUNC)) { + jit->r1_thunk_ip = jit->prg; + /* Generate __s390_indirect_jump_r1 thunk */ + if (test_facility(35)) { + /* exrl %r0,.+10 */ + EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + /* br %r1 */ + _EMIT2(0x07f1); + } else { + /* larl %r1,.+14 */ + EMIT6_PCREL_RILB(0xc0000000, REG_1, jit->prg + 14); + /* ex 0,S390_lowcore.br_r1_tampoline */ + EMIT4_DISP(0x44000000, REG_0, REG_0, + offsetof(struct lowcore, br_r1_trampoline)); + /* j . */ + EMIT4_PCREL(0xa7f40000, 0); + } + } } /* @@ -935,8 +989,13 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp, int i /* lg %w1,(%l) */ EMIT6_DISP_LH(0xe3000000, 0x0004, REG_W1, REG_0, REG_L, EMIT_CONST_U64(func)); - /* basr %r14,%w1 */ - EMIT2(0x0d00, REG_14, REG_W1); + if (IS_ENABLED(CC_USING_EXPOLINE) && !nospec_disable) { + /* brasl %r14,__s390_indirect_jump_r1 */ + EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip); + } else { + /* basr %r14,%w1 */ + EMIT2(0x0d00, REG_14, REG_W1); + } /* lgr %b0,%r2: load return value into %b0 */ EMIT4(0xb9040000, BPF_REG_0, REG_2); break; diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 47d3efff6805..09f36c0d9d4f 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -163,7 +163,8 @@ __setup_efi_pci32(efi_pci_io_protocol_32 *pci, struct pci_setup_rom **__rom) if (status != EFI_SUCCESS) goto free_struct; - memcpy(rom->romdata, pci->romimage, pci->romsize); + memcpy(rom->romdata, (void *)(unsigned long)pci->romimage, + pci->romsize); return status; free_struct: @@ -269,7 +270,8 @@ __setup_efi_pci64(efi_pci_io_protocol_64 *pci, struct pci_setup_rom **__rom) if (status != EFI_SUCCESS) goto free_struct; - memcpy(rom->romdata, pci->romimage, pci->romsize); + memcpy(rom->romdata, (void *)(unsigned long)pci->romimage, + pci->romsize); return status; free_struct: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index fca012baba19..8169e8b7a4dc 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -305,6 +305,25 @@ ENTRY(startup_64) /* Set up the stack */ leaq boot_stack_end(%rbx), %rsp + /* + * paging_prepare() and cleanup_trampoline() below can have GOT + * references. Adjust the table with address we are running at. + * + * Zero RAX for adjust_got: the GOT was not adjusted before; + * there's no adjustment to undo. + */ + xorq %rax, %rax + + /* + * Calculate the address the binary is loaded at and use it as + * a GOT adjustment. + */ + call 1f +1: popq %rdi + subq $1b, %rdi + + call adjust_got + /* * At this point we are in long mode with 4-level paging enabled, * but we might want to enable 5-level paging or vice versa. @@ -370,10 +389,14 @@ trampoline_return: /* * cleanup_trampoline() would restore trampoline memory. * + * RDI is address of the page table to use instead of page table + * in trampoline memory (if required). + * * RSI holds real mode data and needs to be preserved across * this function call. */ pushq %rsi + leaq top_pgtable(%rbx), %rdi call cleanup_trampoline popq %rsi @@ -381,6 +404,21 @@ trampoline_return: pushq $0 popfq + /* + * Previously we've adjusted the GOT with address the binary was + * loaded at. Now we need to re-adjust for relocation address. + * + * Calculate the address the binary is loaded at, so that we can + * undo the previous GOT adjustment. + */ + call 1f +1: popq %rax + subq $1b, %rax + + /* The new adjustment is the relocation address */ + movq %rbx, %rdi + call adjust_got + /* * Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. @@ -481,19 +519,6 @@ relocated: shrq $3, %rcx rep stosq -/* - * Adjust our own GOT - */ - leaq _got(%rip), %rdx - leaq _egot(%rip), %rcx -1: - cmpq %rcx, %rdx - jae 2f - addq %rbx, (%rdx) - addq $8, %rdx - jmp 1b -2: - /* * Do the extraction, and jump to the new kernel.. */ @@ -512,6 +537,27 @@ relocated: */ jmp *%rax +/* + * Adjust the global offset table + * + * RAX is the previous adjustment of the table to undo (use 0 if it's the + * first time we touch GOT). + * RDI is the new adjustment to apply. + */ +adjust_got: + /* Walk through the GOT adding the address to the entries */ + leaq _got(%rip), %rdx + leaq _egot(%rip), %rcx +1: + cmpq %rcx, %rdx + jae 2f + subq %rax, (%rdx) /* Undo previous adjustment */ + addq %rdi, (%rdx) /* Apply the new adjustment */ + addq $8, %rdx + jmp 1b +2: + ret + .code32 /* * This is the 32-bit trampoline that will be copied over to low memory. @@ -649,3 +695,10 @@ boot_stack_end: .balign 4096 pgtable: .fill BOOT_PGT_SIZE, 1, 0 + +/* + * The page table is going to be used instead of page table in the trampoline + * memory. + */ +top_pgtable: + .fill PAGE_SIZE, 1, 0 diff --git a/arch/x86/boot/compressed/pgtable_64.c b/arch/x86/boot/compressed/pgtable_64.c index 32af1cbcd903..a362fa0b849c 100644 --- a/arch/x86/boot/compressed/pgtable_64.c +++ b/arch/x86/boot/compressed/pgtable_64.c @@ -22,14 +22,6 @@ struct paging_config { /* Buffer to preserve trampoline memory */ static char trampoline_save[TRAMPOLINE_32BIT_SIZE]; -/* - * The page table is going to be used instead of page table in the trampoline - * memory. - * - * It must not be in BSS as BSS is cleared after cleanup_trampoline(). - */ -static char top_pgtable[PAGE_SIZE] __aligned(PAGE_SIZE) __section(.data); - /* * Trampoline address will be printed by extract_kernel() for debugging * purposes. @@ -134,7 +126,7 @@ out: return paging_config; } -void cleanup_trampoline(void) +void cleanup_trampoline(void *pgtable) { void *trampoline_pgtable; @@ -145,8 +137,8 @@ void cleanup_trampoline(void) * if it's there. */ if ((void *)__native_read_cr3() == trampoline_pgtable) { - memcpy(top_pgtable, trampoline_pgtable, PAGE_SIZE); - native_write_cr3((unsigned long)top_pgtable); + memcpy(pgtable, trampoline_pgtable, PAGE_SIZE); + native_write_cr3((unsigned long)pgtable); } /* Restore trampoline memory */ diff --git a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c b/arch/x86/entry/vdso/vdso32/vdso-fakesections.c deleted file mode 100644 index 541468e25265..000000000000 --- a/arch/x86/entry/vdso/vdso32/vdso-fakesections.c +++ /dev/null @@ -1 +0,0 @@ -#include "../vdso-fakesections.c" diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index a6006e7bb729..45b2b1c93d04 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include @@ -304,17 +305,20 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event) config = attr->config; - cache_type = (config >> 0) & 0xff; + cache_type = (config >> 0) & 0xff; if (cache_type >= PERF_COUNT_HW_CACHE_MAX) return -EINVAL; + cache_type = array_index_nospec(cache_type, PERF_COUNT_HW_CACHE_MAX); cache_op = (config >> 8) & 0xff; if (cache_op >= PERF_COUNT_HW_CACHE_OP_MAX) return -EINVAL; + cache_op = array_index_nospec(cache_op, PERF_COUNT_HW_CACHE_OP_MAX); cache_result = (config >> 16) & 0xff; if (cache_result >= PERF_COUNT_HW_CACHE_RESULT_MAX) return -EINVAL; + cache_result = array_index_nospec(cache_result, PERF_COUNT_HW_CACHE_RESULT_MAX); val = hw_cache_event_ids[cache_type][cache_op][cache_result]; @@ -421,6 +425,8 @@ int x86_setup_perfctr(struct perf_event *event) if (attr->config >= x86_pmu.max_events) return -EINVAL; + attr->config = array_index_nospec((unsigned long)attr->config, x86_pmu.max_events); + /* * The generic map: */ diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 9aca448bb8e6..9f8084f18d58 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -92,6 +92,7 @@ #include #include #include +#include #include #include #include "../perf_event.h" @@ -302,6 +303,7 @@ static int cstate_pmu_event_init(struct perf_event *event) } else if (event->pmu == &cstate_pkg_pmu) { if (cfg >= PERF_CSTATE_PKG_EVENT_MAX) return -EINVAL; + cfg = array_index_nospec((unsigned long)cfg, PERF_CSTATE_PKG_EVENT_MAX); if (!pkg_msr[cfg].attr) return -EINVAL; event->hw.event_base = pkg_msr[cfg].msr; diff --git a/arch/x86/events/msr.c b/arch/x86/events/msr.c index e7edf19e64c2..b4771a6ddbc1 100644 --- a/arch/x86/events/msr.c +++ b/arch/x86/events/msr.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include enum perf_msr_id { @@ -158,9 +159,6 @@ static int msr_event_init(struct perf_event *event) if (event->attr.type != event->pmu->type) return -ENOENT; - if (cfg >= PERF_MSR_EVENT_MAX) - return -EINVAL; - /* unsupported modes and filters */ if (event->attr.exclude_user || event->attr.exclude_kernel || @@ -171,6 +169,11 @@ static int msr_event_init(struct perf_event *event) event->attr.sample_period) /* no sampling */ return -EINVAL; + if (cfg >= PERF_MSR_EVENT_MAX) + return -EINVAL; + + cfg = array_index_nospec((unsigned long)cfg, PERF_MSR_EVENT_MAX); + if (!msr[cfg].attr) return -EINVAL; diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h index b27da9602a6d..aced6c9290d6 100644 --- a/arch/x86/include/asm/cpufeature.h +++ b/arch/x86/include/asm/cpufeature.h @@ -140,6 +140,20 @@ extern void clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit); #define setup_force_cpu_bug(bit) setup_force_cpu_cap(bit) +#if defined(__clang__) && !defined(CC_HAVE_ASM_GOTO) + +/* + * Workaround for the sake of BPF compilation which utilizes kernel + * headers, but clang does not support ASM GOTO and fails the build. + */ +#ifndef __BPF_TRACING__ +#warning "Compiler lacks ASM_GOTO support. Add -D __BPF_TRACING__ to your compiler arguments" +#endif + +#define static_cpu_has(bit) boot_cpu_has(bit) + +#else + /* * Static testing of CPU features. Used the same as boot_cpu_has(). * These will statically patch the target code for additional @@ -195,6 +209,7 @@ t_no: boot_cpu_has(bit) : \ _static_cpu_has(bit) \ ) +#endif #define cpu_has_bug(c, bit) cpu_has(c, (bit)) #define set_cpu_bug(c, bit) set_cpu_cap(c, (bit)) diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index b3e32b010ab1..c2c01f84df75 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h index 57e3785d0d26..cf9911b5a53c 100644 --- a/arch/x86/include/asm/mmu_context.h +++ b/arch/x86/include/asm/mmu_context.h @@ -193,7 +193,7 @@ static inline int init_new_context(struct task_struct *tsk, #ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS if (cpu_feature_enabled(X86_FEATURE_OSPKE)) { - /* pkey 0 is the default and always allocated */ + /* pkey 0 is the default and allocated implicitly */ mm->context.pkey_allocation_map = 0x1; /* -1 means unallocated or invalid */ mm->context.execute_only_pkey = -1; diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index a0ba1ffda0df..851c04b7a092 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -2,6 +2,8 @@ #ifndef _ASM_X86_PKEYS_H #define _ASM_X86_PKEYS_H +#define ARCH_DEFAULT_PKEY 0 + #define arch_max_pkey() (boot_cpu_has(X86_FEATURE_OSPKE) ? 16 : 1) extern int arch_set_user_pkey_access(struct task_struct *tsk, int pkey, @@ -15,7 +17,7 @@ extern int __execute_only_pkey(struct mm_struct *mm); static inline int execute_only_pkey(struct mm_struct *mm) { if (!boot_cpu_has(X86_FEATURE_OSPKE)) - return 0; + return ARCH_DEFAULT_PKEY; return __execute_only_pkey(mm); } @@ -49,13 +51,21 @@ bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { /* * "Allocated" pkeys are those that have been returned - * from pkey_alloc(). pkey 0 is special, and never - * returned from pkey_alloc(). + * from pkey_alloc() or pkey 0 which is allocated + * implicitly when the mm is created. */ - if (pkey <= 0) + if (pkey < 0) return false; if (pkey >= arch_max_pkey()) return false; + /* + * The exec-only pkey is set in the allocation map, but + * is not available to any of the user interfaces like + * mprotect_pkey(). + */ + if (pkey == mm->context.execute_only_pkey) + return false; + return mm_pkey_allocation_map(mm) & (1U << pkey); } diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 4c851ebb3ceb..0ede697c3961 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -29,7 +29,7 @@ #define KVM_FEATURE_PV_TLB_FLUSH 9 #define KVM_FEATURE_ASYNC_PF_VMEXIT 10 -#define KVM_HINTS_DEDICATED 0 +#define KVM_HINTS_REALTIME 0 /* The last 8 bits are used to indicate how to interpret the flags field * in pvclock structure. If no bits are set, all flags are ignored. diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index c88e0b127810..b481b95bd8f6 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -14,8 +14,11 @@ #include #define PCI_DEVICE_ID_AMD_17H_ROOT 0x1450 +#define PCI_DEVICE_ID_AMD_17H_M10H_ROOT 0x15d0 #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #define PCI_DEVICE_ID_AMD_17H_DF_F4 0x1464 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F4 0x15ec /* Protect the PCI config register pairs used for SMN and DF indirect access. */ static DEFINE_MUTEX(smn_mutex); @@ -24,6 +27,7 @@ static u32 *flush_words; static const struct pci_device_id amd_root_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_ROOT) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT) }, {} }; @@ -39,6 +43,7 @@ const struct pci_device_id amd_nb_misc_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F3) }, {} }; @@ -51,6 +56,7 @@ static const struct pci_device_id amd_nb_link_ids[] = { { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_DF_F4) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F4) }, { PCI_DEVICE(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_CNB17H_F4) }, {} }; diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c index 8b04234e010b..7685444a106b 100644 --- a/arch/x86/kernel/apic/x2apic_cluster.c +++ b/arch/x86/kernel/apic/x2apic_cluster.c @@ -116,6 +116,7 @@ static void init_x2apic_ldr(void) goto update; } cmsk = cluster_hotplug_mask; + cmsk->clusterid = cluster; cluster_hotplug_mask = NULL; update: this_cpu_write(cluster_masks, cmsk); diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index f7666eef4a87..c8e038800591 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -94,6 +94,11 @@ static struct smca_bank_name smca_names[] = { [SMCA_SMU] = { "smu", "System Management Unit" }, }; +static u32 smca_bank_addrs[MAX_NR_BANKS][NR_BLOCKS] __ro_after_init = +{ + [0 ... MAX_NR_BANKS - 1] = { [0 ... NR_BLOCKS - 1] = -1 } +}; + const char *smca_get_name(enum smca_bank_types t) { if (t >= N_SMCA_BANK_TYPES) @@ -443,20 +448,26 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank, if (!block) return MSR_AMD64_SMCA_MCx_MISC(bank); + /* Check our cache first: */ + if (smca_bank_addrs[bank][block] != -1) + return smca_bank_addrs[bank][block]; + /* * For SMCA enabled processors, BLKPTR field of the first MISC register * (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4). */ if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high)) - return addr; + goto out; if (!(low & MCI_CONFIG_MCAX)) - return addr; + goto out; if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) && (low & MASK_BLKPTR_LO)) - return MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); + addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1); +out: + smca_bank_addrs[bank][block] = addr; return addr; } @@ -468,18 +479,6 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi if ((bank >= mca_cfg.banks) || (block >= NR_BLOCKS)) return addr; - /* Get address from already initialized block. */ - if (per_cpu(threshold_banks, cpu)) { - struct threshold_bank *bankp = per_cpu(threshold_banks, cpu)[bank]; - - if (bankp && bankp->blocks) { - struct threshold_block *blockp = &bankp->blocks[block]; - - if (blockp) - return blockp->address; - } - } - if (mce_flags.smca) return smca_get_block_address(cpu, bank, block); diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 0c408f8c4ed4..2d29e47c056e 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -104,6 +104,12 @@ static bool __head check_la57_support(unsigned long physaddr) } #endif +/* Code in __startup_64() can be relocated during execution, but the compiler + * doesn't have to generate PC-relative relocations when accessing globals from + * that function. Clang actually does not generate them, which leads to + * boot-time crashes. To work around this problem, every global pointer must + * be adjusted using fixup_pointer(). + */ unsigned long __head __startup_64(unsigned long physaddr, struct boot_params *bp) { @@ -113,6 +119,7 @@ unsigned long __head __startup_64(unsigned long physaddr, p4dval_t *p4d; pudval_t *pud; pmdval_t *pmd, pmd_entry; + pteval_t *mask_ptr; bool la57; int i; unsigned int *next_pgt_ptr; @@ -196,7 +203,8 @@ unsigned long __head __startup_64(unsigned long physaddr, pmd_entry = __PAGE_KERNEL_LARGE_EXEC & ~_PAGE_GLOBAL; /* Filter out unsupported __PAGE_KERNEL_* bits: */ - pmd_entry &= __supported_pte_mask; + mask_ptr = fixup_pointer(&__supported_pte_mask, physaddr); + pmd_entry &= *mask_ptr; pmd_entry += sme_get_me_mask(); pmd_entry += physaddr; diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 0715f827607c..6f4d42377fe5 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -370,6 +370,10 @@ int __copy_instruction(u8 *dest, u8 *src, u8 *real, struct insn *insn) if (insn->opcode.bytes[0] == BREAKPOINT_INSTRUCTION) return 0; + /* We should not singlestep on the exception masking instructions */ + if (insn_masking_exception(insn)) + return 0; + #ifdef CONFIG_X86_64 /* Only x86_64 has RIP relative instructions */ if (insn_rip_relative(insn)) { diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 7867417cfaff..5b2300b818af 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -457,7 +457,7 @@ static void __init sev_map_percpu_data(void) static void __init kvm_smp_prepare_cpus(unsigned int max_cpus) { native_smp_prepare_cpus(max_cpus); - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) static_branch_disable(&virt_spin_lock_key); } @@ -553,7 +553,7 @@ static void __init kvm_guest_init(void) } if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) pv_mmu_ops.flush_tlb_others = kvm_flush_tlb_others; @@ -649,7 +649,7 @@ static __init int kvm_setup_pv_tlb_flush(void) int cpu; if (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) && - !kvm_para_has_hint(KVM_HINTS_DEDICATED) && + !kvm_para_has_hint(KVM_HINTS_REALTIME) && kvm_para_has_feature(KVM_FEATURE_STEAL_TIME)) { for_each_possible_cpu(cpu) { zalloc_cpumask_var_node(per_cpu_ptr(&__pv_tlb_mask, cpu), @@ -745,7 +745,7 @@ void __init kvm_spinlock_init(void) if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) return; - if (kvm_para_has_hint(KVM_HINTS_DEDICATED)) + if (kvm_para_has_hint(KVM_HINTS_REALTIME)) return; __pv_init_lock_hash(); diff --git a/arch/x86/kernel/machine_kexec_32.c b/arch/x86/kernel/machine_kexec_32.c index 60cdec6628b0..d1ab07ec8c9a 100644 --- a/arch/x86/kernel/machine_kexec_32.c +++ b/arch/x86/kernel/machine_kexec_32.c @@ -57,12 +57,17 @@ static void load_segments(void) static void machine_kexec_free_page_tables(struct kimage *image) { free_page((unsigned long)image->arch.pgd); + image->arch.pgd = NULL; #ifdef CONFIG_X86_PAE free_page((unsigned long)image->arch.pmd0); + image->arch.pmd0 = NULL; free_page((unsigned long)image->arch.pmd1); + image->arch.pmd1 = NULL; #endif free_page((unsigned long)image->arch.pte0); + image->arch.pte0 = NULL; free_page((unsigned long)image->arch.pte1); + image->arch.pte1 = NULL; } static int machine_kexec_alloc_page_tables(struct kimage *image) @@ -79,7 +84,6 @@ static int machine_kexec_alloc_page_tables(struct kimage *image) !image->arch.pmd0 || !image->arch.pmd1 || #endif !image->arch.pte0 || !image->arch.pte1) { - machine_kexec_free_page_tables(image); return -ENOMEM; } return 0; diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index a5e55d832d0a..6010449ca6d2 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -39,9 +39,13 @@ const struct kexec_file_ops * const kexec_file_loaders[] = { static void free_transition_pgtable(struct kimage *image) { free_page((unsigned long)image->arch.p4d); + image->arch.p4d = NULL; free_page((unsigned long)image->arch.pud); + image->arch.pud = NULL; free_page((unsigned long)image->arch.pmd); + image->arch.pmd = NULL; free_page((unsigned long)image->arch.pte); + image->arch.pte = NULL; } static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) @@ -91,7 +95,6 @@ static int init_transition_pgtable(struct kimage *image, pgd_t *pgd) set_pte(pte, pfn_pte(paddr >> PAGE_SHIFT, PAGE_KERNEL_EXEC_NOENC)); return 0; err: - free_transition_pgtable(image); return result; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 4b100fe0f508..12bb445fb98d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -542,6 +542,7 @@ void set_personality_64bit(void) clear_thread_flag(TIF_X32); /* Pretend that this comes from a 64bit execve */ task_pt_regs(current)->orig_ax = __NR_execve; + current_thread_info()->status &= ~TS_COMPAT; /* Ensure the corresponding mm is not marked. */ if (current->mm) diff --git a/arch/x86/kernel/uprobes.c b/arch/x86/kernel/uprobes.c index 85c7ef23d99f..c84bb5396958 100644 --- a/arch/x86/kernel/uprobes.c +++ b/arch/x86/kernel/uprobes.c @@ -299,6 +299,10 @@ static int uprobe_init_insn(struct arch_uprobe *auprobe, struct insn *insn, bool if (is_prefix_bad(insn)) return -ENOTSUPP; + /* We should not singlestep on the exception masking instructions */ + if (insn_masking_exception(insn)) + return -ENOTSUPP; + if (x86_64) good_insns = good_insns_64; else diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 98618e397342..5708e951a5c6 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -1265,7 +1265,7 @@ static int kvm_hv_hypercall_complete_userspace(struct kvm_vcpu *vcpu) struct kvm_run *run = vcpu->run; kvm_hv_hypercall_set_result(vcpu, run->hyperv.u.hcall.result); - return 1; + return kvm_skip_emulated_instruction(vcpu); } static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) @@ -1296,8 +1296,10 @@ static u16 kvm_hvcall_signal_event(struct kvm_vcpu *vcpu, bool fast, u64 param) if (param & ~KVM_HYPERV_CONN_ID_MASK) return HV_STATUS_INVALID_HYPERCALL_INPUT; - /* conn_to_evt is protected by vcpu->kvm->srcu */ + /* the eventfd is protected by vcpu->kvm->srcu, but conn_to_evt isn't */ + rcu_read_lock(); eventfd = idr_find(&vcpu->kvm->arch.hyperv.conn_to_evt, param); + rcu_read_unlock(); if (!eventfd) return HV_STATUS_INVALID_PORT_ID; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c7668806163f..3f1696570b41 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1494,6 +1494,12 @@ static inline bool cpu_has_vmx_vmfunc(void) SECONDARY_EXEC_ENABLE_VMFUNC; } +static bool vmx_umip_emulated(void) +{ + return vmcs_config.cpu_based_2nd_exec_ctrl & + SECONDARY_EXEC_DESC; +} + static inline bool report_flexpriority(void) { return flexpriority_enabled; @@ -4761,14 +4767,16 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) else hw_cr4 |= KVM_PMODE_VM_CR4_ALWAYS_ON; - if ((cr4 & X86_CR4_UMIP) && !boot_cpu_has(X86_FEATURE_UMIP)) { - vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_DESC); - hw_cr4 &= ~X86_CR4_UMIP; - } else if (!is_guest_mode(vcpu) || - !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) - vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, + if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated()) { + if (cr4 & X86_CR4_UMIP) { + vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, SECONDARY_EXEC_DESC); + hw_cr4 &= ~X86_CR4_UMIP; + } else if (!is_guest_mode(vcpu) || + !nested_cpu_has2(get_vmcs12(vcpu), SECONDARY_EXEC_DESC)) + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_DESC); + } if (cr4 & X86_CR4_VMXE) { /* @@ -9497,12 +9505,6 @@ static bool vmx_xsaves_supported(void) SECONDARY_EXEC_XSAVES; } -static bool vmx_umip_emulated(void) -{ - return vmcs_config.cpu_based_2nd_exec_ctrl & - SECONDARY_EXEC_DESC; -} - static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) { u32 exit_intr_info; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 51ecd381793b..59371de5d722 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -114,7 +114,7 @@ module_param(ignore_msrs, bool, S_IRUGO | S_IWUSR); static bool __read_mostly report_ignored_msrs = true; module_param(report_ignored_msrs, bool, S_IRUGO | S_IWUSR); -unsigned int min_timer_period_us = 500; +unsigned int min_timer_period_us = 200; module_param(min_timer_period_us, uint, S_IRUGO | S_IWUSR); static bool __read_mostly kvmclock_periodic_sync = true; @@ -843,7 +843,10 @@ EXPORT_SYMBOL_GPL(kvm_set_cr4); int kvm_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3) { #ifdef CONFIG_X86_64 - cr3 &= ~CR3_PCID_INVD; + bool pcid_enabled = kvm_read_cr4_bits(vcpu, X86_CR4_PCIDE); + + if (pcid_enabled) + cr3 &= ~CR3_PCID_INVD; #endif if (cr3 == kvm_read_cr3(vcpu) && !pdptrs_changed(vcpu)) { @@ -6671,12 +6674,13 @@ void kvm_vcpu_deactivate_apicv(struct kvm_vcpu *vcpu) int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; - int op_64_bit, r; + int op_64_bit; - r = kvm_skip_emulated_instruction(vcpu); - - if (kvm_hv_hypercall_enabled(vcpu->kvm)) - return kvm_hv_hypercall(vcpu); + if (kvm_hv_hypercall_enabled(vcpu->kvm)) { + if (!kvm_hv_hypercall(vcpu)) + return 0; + goto out; + } nr = kvm_register_read(vcpu, VCPU_REGS_RAX); a0 = kvm_register_read(vcpu, VCPU_REGS_RBX); @@ -6697,7 +6701,7 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) if (kvm_x86_ops->get_cpl(vcpu) != 0) { ret = -KVM_EPERM; - goto out; + goto out_error; } switch (nr) { @@ -6717,12 +6721,14 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) ret = -KVM_ENOSYS; break; } -out: +out_error: if (!op_64_bit) ret = (u32)ret; kvm_register_write(vcpu, VCPU_REGS_RAX, ret); + +out: ++vcpu->stat.hypercalls; - return r; + return kvm_skip_emulated_instruction(vcpu); } EXPORT_SYMBOL_GPL(kvm_emulate_hypercall); diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c index d7bc0eea20a5..6e98e0a7c923 100644 --- a/arch/x86/mm/pkeys.c +++ b/arch/x86/mm/pkeys.c @@ -94,26 +94,27 @@ int __arch_override_mprotect_pkey(struct vm_area_struct *vma, int prot, int pkey */ if (pkey != -1) return pkey; - /* - * Look for a protection-key-drive execute-only mapping - * which is now being given permissions that are not - * execute-only. Move it back to the default pkey. - */ - if (vma_is_pkey_exec_only(vma) && - (prot & (PROT_READ|PROT_WRITE))) { - return 0; - } + /* * The mapping is execute-only. Go try to get the * execute-only protection key. If we fail to do that, * fall through as if we do not have execute-only - * support. + * support in this mm. */ if (prot == PROT_EXEC) { pkey = execute_only_pkey(vma->vm_mm); if (pkey > 0) return pkey; + } else if (vma_is_pkey_exec_only(vma)) { + /* + * Protections are *not* PROT_EXEC, but the mapping + * is using the exec-only pkey. This mapping was + * PROT_EXEC and will no longer be. Move back to + * the default pkey. + */ + return ARCH_DEFAULT_PKEY; } + /* * This is a vanilla, non-pkey mprotect (or we failed to * setup execute-only), inherit the pkey from the VMA we diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index d33e7dbe3129..2d76106788a3 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -42,13 +42,11 @@ xmaddr_t arbitrary_virt_to_machine(void *vaddr) } EXPORT_SYMBOL_GPL(arbitrary_virt_to_machine); -static void xen_flush_tlb_all(void) +static noinline void xen_flush_tlb_all(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb_all(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/arch/x86/xen/mmu_pv.c b/arch/x86/xen/mmu_pv.c index 486c0a34d00b..2c30cabfda90 100644 --- a/arch/x86/xen/mmu_pv.c +++ b/arch/x86/xen/mmu_pv.c @@ -1310,13 +1310,11 @@ unsigned long xen_read_cr2_direct(void) return this_cpu_read(xen_vcpu_info.arch.cr2); } -static void xen_flush_tlb(void) +static noinline void xen_flush_tlb(void) { struct mmuext_op *op; struct multicall_space mcs; - trace_xen_mmu_flush_tlb(0); - preempt_disable(); mcs = xen_mc_entry(sizeof(*op)); diff --git a/drivers/acpi/acpica/acnamesp.h b/drivers/acpi/acpica/acnamesp.h index 514aaf948ea9..3825df923480 100644 --- a/drivers/acpi/acpica/acnamesp.h +++ b/drivers/acpi/acpica/acnamesp.h @@ -56,6 +56,10 @@ acpi_status acpi_ns_initialize_objects(void); acpi_status acpi_ns_initialize_devices(u32 flags); +acpi_status +acpi_ns_init_one_package(acpi_handle obj_handle, + u32 level, void *context, void **return_value); + /* * nsload - Namespace loading */ diff --git a/drivers/acpi/acpica/exconfig.c b/drivers/acpi/acpica/exconfig.c index 99d92cb32803..f85c6f3271f6 100644 --- a/drivers/acpi/acpica/exconfig.c +++ b/drivers/acpi/acpica/exconfig.c @@ -174,6 +174,13 @@ acpi_ex_load_table_op(struct acpi_walk_state *walk_state, return_ACPI_STATUS(status); } + /* Complete the initialization/resolution of package objects */ + + status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, 0, + acpi_ns_init_one_package, NULL, NULL, + NULL); + /* Parameter Data (optional) */ if (parameter_node) { @@ -430,6 +437,13 @@ acpi_ex_load_op(union acpi_operand_object *obj_desc, return_ACPI_STATUS(status); } + /* Complete the initialization/resolution of package objects */ + + status = acpi_ns_walk_namespace(ACPI_TYPE_PACKAGE, ACPI_ROOT_OBJECT, + ACPI_UINT32_MAX, 0, + acpi_ns_init_one_package, NULL, NULL, + NULL); + /* Store the ddb_handle into the Target operand */ status = acpi_ex_store(ddb_handle, target, walk_state); diff --git a/drivers/acpi/acpica/nsinit.c b/drivers/acpi/acpica/nsinit.c index 77f2b5f4948a..d77257d1c827 100644 --- a/drivers/acpi/acpica/nsinit.c +++ b/drivers/acpi/acpica/nsinit.c @@ -240,6 +240,58 @@ error_exit: return_ACPI_STATUS(status); } +/******************************************************************************* + * + * FUNCTION: acpi_ns_init_one_package + * + * PARAMETERS: obj_handle - Node + * level - Current nesting level + * context - Not used + * return_value - Not used + * + * RETURN: Status + * + * DESCRIPTION: Callback from acpi_walk_namespace. Invoked for every package + * within the namespace. Used during dynamic load of an SSDT. + * + ******************************************************************************/ + +acpi_status +acpi_ns_init_one_package(acpi_handle obj_handle, + u32 level, void *context, void **return_value) +{ + acpi_status status; + union acpi_operand_object *obj_desc; + struct acpi_namespace_node *node = + (struct acpi_namespace_node *)obj_handle; + + obj_desc = acpi_ns_get_attached_object(node); + if (!obj_desc) { + return (AE_OK); + } + + /* Exit if package is already initialized */ + + if (obj_desc->package.flags & AOPOBJ_DATA_VALID) { + return (AE_OK); + } + + status = acpi_ds_get_package_arguments(obj_desc); + if (ACPI_FAILURE(status)) { + return (AE_OK); + } + + status = + acpi_ut_walk_package_tree(obj_desc, NULL, + acpi_ds_init_package_element, NULL); + if (ACPI_FAILURE(status)) { + return (AE_OK); + } + + obj_desc->package.flags |= AOPOBJ_DATA_VALID; + return (AE_OK); +} + /******************************************************************************* * * FUNCTION: acpi_ns_init_one_object @@ -360,27 +412,11 @@ acpi_ns_init_one_object(acpi_handle obj_handle, case ACPI_TYPE_PACKAGE: + /* Complete the initialization/resolution of the package object */ + info->package_init++; - status = acpi_ds_get_package_arguments(obj_desc); - if (ACPI_FAILURE(status)) { - break; - } - - ACPI_DEBUG_PRINT_RAW((ACPI_DB_PARSE, - "%s: Completing resolution of Package elements\n", - ACPI_GET_FUNCTION_NAME)); - - /* - * Resolve all named references in package objects (and all - * sub-packages). This action has been deferred until the entire - * namespace has been loaded, in order to support external and - * forward references from individual package elements (05/2017). - */ - status = acpi_ut_walk_package_tree(obj_desc, NULL, - acpi_ds_init_package_element, - NULL); - - obj_desc->package.flags |= AOPOBJ_DATA_VALID; + status = + acpi_ns_init_one_package(obj_handle, level, NULL, NULL); break; default: diff --git a/drivers/clk/Kconfig b/drivers/clk/Kconfig index 41492e980ef4..34968a381d0f 100644 --- a/drivers/clk/Kconfig +++ b/drivers/clk/Kconfig @@ -266,15 +266,13 @@ config COMMON_CLK_STM32MP157 Support for stm32mp157 SoC family clocks config COMMON_CLK_STM32F - bool "Clock driver for stm32f4 and stm32f7 SoC families" - depends on MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746 + def_bool COMMON_CLK && (MACH_STM32F429 || MACH_STM32F469 || MACH_STM32F746) help ---help--- Support for stm32f4 and stm32f7 SoC families clocks config COMMON_CLK_STM32H7 - bool "Clock driver for stm32h7 SoC family" - depends on MACH_STM32H743 + def_bool COMMON_CLK && MACH_STM32H743 help ---help--- Support for stm32h7 SoC family clocks diff --git a/drivers/clk/imx/clk-imx6ul.c b/drivers/clk/imx/clk-imx6ul.c index 114ecbb94ec5..12320118f8de 100644 --- a/drivers/clk/imx/clk-imx6ul.c +++ b/drivers/clk/imx/clk-imx6ul.c @@ -464,7 +464,7 @@ static void __init imx6ul_clocks_init(struct device_node *ccm_node) clk_set_rate(clks[IMX6UL_CLK_AHB], 99000000); /* Change periph_pre clock to pll2_bus to adjust AXI rate to 264MHz */ - clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_PLL3_USB_OTG]); + clk_set_parent(clks[IMX6UL_CLK_PERIPH_CLK2_SEL], clks[IMX6UL_CLK_OSC]); clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_CLK2]); clk_set_parent(clks[IMX6UL_CLK_PERIPH_PRE], clks[IMX6UL_CLK_PLL2_BUS]); clk_set_parent(clks[IMX6UL_CLK_PERIPH], clks[IMX6UL_CLK_PERIPH_PRE]); diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm index de55c7d57438..96b35b8b3606 100644 --- a/drivers/cpufreq/Kconfig.arm +++ b/drivers/cpufreq/Kconfig.arm @@ -20,7 +20,7 @@ config ACPI_CPPC_CPUFREQ config ARM_ARMADA_37XX_CPUFREQ tristate "Armada 37xx CPUFreq support" - depends on ARCH_MVEBU + depends on ARCH_MVEBU && CPUFREQ_DT help This adds the CPUFreq driver support for Marvell Armada 37xx SoCs. The Armada 37xx PMU supports 4 frequency and VDD levels. diff --git a/drivers/dma/qcom/bam_dma.c b/drivers/dma/qcom/bam_dma.c index d29275b97e84..4a828c18099a 100644 --- a/drivers/dma/qcom/bam_dma.c +++ b/drivers/dma/qcom/bam_dma.c @@ -524,6 +524,14 @@ static int bam_alloc_chan(struct dma_chan *chan) return 0; } +static int bam_pm_runtime_get_sync(struct device *dev) +{ + if (pm_runtime_enabled(dev)) + return pm_runtime_get_sync(dev); + + return 0; +} + /** * bam_free_chan - Frees dma resources associated with specific channel * @chan: specified channel @@ -539,7 +547,7 @@ static void bam_free_chan(struct dma_chan *chan) unsigned long flags; int ret; - ret = pm_runtime_get_sync(bdev->dev); + ret = bam_pm_runtime_get_sync(bdev->dev); if (ret < 0) return; @@ -720,7 +728,7 @@ static int bam_pause(struct dma_chan *chan) unsigned long flag; int ret; - ret = pm_runtime_get_sync(bdev->dev); + ret = bam_pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -746,7 +754,7 @@ static int bam_resume(struct dma_chan *chan) unsigned long flag; int ret; - ret = pm_runtime_get_sync(bdev->dev); + ret = bam_pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -852,7 +860,7 @@ static irqreturn_t bam_dma_irq(int irq, void *data) if (srcs & P_IRQ) tasklet_schedule(&bdev->task); - ret = pm_runtime_get_sync(bdev->dev); + ret = bam_pm_runtime_get_sync(bdev->dev); if (ret < 0) return ret; @@ -969,7 +977,7 @@ static void bam_start_dma(struct bam_chan *bchan) if (!vd) return; - ret = pm_runtime_get_sync(bdev->dev); + ret = bam_pm_runtime_get_sync(bdev->dev); if (ret < 0) return; diff --git a/drivers/firmware/arm_scmi/driver.c b/drivers/firmware/arm_scmi/driver.c index 14b147135a0c..2455be8cbc4f 100644 --- a/drivers/firmware/arm_scmi/driver.c +++ b/drivers/firmware/arm_scmi/driver.c @@ -778,6 +778,7 @@ scmi_create_protocol_device(struct device_node *np, struct scmi_info *info, if (scmi_mbox_chan_setup(info, &sdev->dev, prot_id)) { dev_err(&sdev->dev, "failed to setup transport\n"); scmi_device_destroy(sdev); + return; } /* setup handle now as the transport is ready */ diff --git a/drivers/firmware/efi/libstub/arm64-stub.c b/drivers/firmware/efi/libstub/arm64-stub.c index b9bd827caa22..1b4d465cc5d9 100644 --- a/drivers/firmware/efi/libstub/arm64-stub.c +++ b/drivers/firmware/efi/libstub/arm64-stub.c @@ -97,6 +97,16 @@ efi_status_t handle_kernel_image(efi_system_table_t *sys_table_arg, u32 offset = !IS_ENABLED(CONFIG_DEBUG_ALIGN_RODATA) ? (phys_seed >> 32) & mask : TEXT_OFFSET; + /* + * With CONFIG_RANDOMIZE_TEXT_OFFSET=y, TEXT_OFFSET may not + * be a multiple of EFI_KIMG_ALIGN, and we must ensure that + * we preserve the misalignment of 'offset' relative to + * EFI_KIMG_ALIGN so that statically allocated objects whose + * alignment exceeds PAGE_SIZE appear correctly aligned in + * memory. + */ + offset |= TEXT_OFFSET % EFI_KIMG_ALIGN; + /* * If KASLR is enabled, and we have some randomness available, * locate the kernel at a randomized offset in physical memory. diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index a1b9338736e3..c2c21d839727 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -716,7 +716,7 @@ static void remove_compat_control_link(struct drm_device *dev) if (!minor) return; - name = kasprintf(GFP_KERNEL, "controlD%d", minor->index); + name = kasprintf(GFP_KERNEL, "controlD%d", minor->index + 64); if (!name) return; diff --git a/drivers/gpu/drm/drm_dumb_buffers.c b/drivers/gpu/drm/drm_dumb_buffers.c index 39ac15ce4702..9e2ae02f31e0 100644 --- a/drivers/gpu/drm/drm_dumb_buffers.c +++ b/drivers/gpu/drm/drm_dumb_buffers.c @@ -65,12 +65,13 @@ int drm_mode_create_dumb_ioctl(struct drm_device *dev, return -EINVAL; /* overflow checks for 32bit size calculations */ - /* NOTE: DIV_ROUND_UP() can overflow */ + if (args->bpp > U32_MAX - 8) + return -EINVAL; cpp = DIV_ROUND_UP(args->bpp, 8); - if (!cpp || cpp > 0xffffffffU / args->width) + if (cpp > U32_MAX / args->width) return -EINVAL; stride = cpp * args->width; - if (args->height > 0xffffffffU / stride) + if (args->height > U32_MAX / stride) return -EINVAL; /* test for wrap-around */ diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index e394799979a6..6d9b9453707c 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -212,6 +212,7 @@ static int drm_open_helper(struct file *filp, struct drm_minor *minor) return -ENOMEM; filp->private_data = priv; + filp->f_mode |= FMODE_UNSIGNED_OFFSET; priv->filp = filp; priv->pid = get_pid(task_pid(current)); priv->minor = minor; diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index d596a8302ca3..854bd51b9478 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -778,6 +778,9 @@ i915_gem_userptr_ioctl(struct drm_device *dev, I915_USERPTR_UNSYNCHRONIZED)) return -EINVAL; + if (!args->user_size) + return -EINVAL; + if (offset_in_page(args->user_ptr | args->user_size)) return -EINVAL; diff --git a/drivers/gpu/drm/i915/i915_reg.h b/drivers/gpu/drm/i915/i915_reg.h index e6a8c0ee7df1..8a69a9275e28 100644 --- a/drivers/gpu/drm/i915/i915_reg.h +++ b/drivers/gpu/drm/i915/i915_reg.h @@ -7326,6 +7326,9 @@ enum { #define SLICE_ECO_CHICKEN0 _MMIO(0x7308) #define PIXEL_MASK_CAMMING_DISABLE (1 << 14) +#define GEN9_WM_CHICKEN3 _MMIO(0x5588) +#define GEN9_FACTOR_IN_CLR_VAL_HIZ (1 << 9) + /* WaCatErrorRejectionIssue */ #define GEN7_SQ_CHICKEN_MBCUNIT_CONFIG _MMIO(0x9030) #define GEN7_SQ_CHICKEN_MBCUNIT_SQINTMOB (1<<11) diff --git a/drivers/gpu/drm/i915/intel_engine_cs.c b/drivers/gpu/drm/i915/intel_engine_cs.c index 4ba139c27fba..f7c25828d3bb 100644 --- a/drivers/gpu/drm/i915/intel_engine_cs.c +++ b/drivers/gpu/drm/i915/intel_engine_cs.c @@ -1149,6 +1149,10 @@ static int gen9_init_workarounds(struct intel_engine_cs *engine) WA_SET_FIELD_MASKED(GEN8_CS_CHICKEN1, GEN9_PREEMPT_GPGPU_LEVEL_MASK, GEN9_PREEMPT_GPGPU_COMMAND_LEVEL); + /* WaClearHIZ_WM_CHICKEN3:bxt,glk */ + if (IS_GEN9_LP(dev_priv)) + WA_SET_BIT_MASKED(GEN9_WM_CHICKEN3, GEN9_FACTOR_IN_CLR_VAL_HIZ); + /* WaVFEStateAfterPipeControlwithMediaStateClear:skl,bxt,glk,cfl */ ret = wa_ring_whitelist_reg(engine, GEN9_CTX_PREEMPT_REG); if (ret) diff --git a/drivers/gpu/drm/i915/intel_lrc.c b/drivers/gpu/drm/i915/intel_lrc.c index e3a5f673ff67..8704f7f8d072 100644 --- a/drivers/gpu/drm/i915/intel_lrc.c +++ b/drivers/gpu/drm/i915/intel_lrc.c @@ -884,6 +884,7 @@ static void execlists_submission_tasklet(unsigned long data) head = execlists->csb_head; tail = READ_ONCE(buf[write_idx]); + rmb(); /* Hopefully paired with a wmb() in HW */ } GEM_TRACE("%s cs-irq head=%d [%d%s], tail=%d [%d%s]\n", engine->name, diff --git a/drivers/gpu/drm/vc4/vc4_drv.c b/drivers/gpu/drm/vc4/vc4_drv.c index 94b99c90425a..7c95ed5c5cac 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.c +++ b/drivers/gpu/drm/vc4/vc4_drv.c @@ -130,6 +130,7 @@ static void vc4_close(struct drm_device *dev, struct drm_file *file) struct vc4_file *vc4file = file->driver_priv; vc4_perfmon_close_file(vc4file); + kfree(vc4file); } static const struct vm_operations_struct vc4_vm_ops = { diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c index 648f8127f65a..3d667e903beb 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_scrn.c @@ -482,6 +482,8 @@ vmw_sou_primary_plane_prepare_fb(struct drm_plane *plane, return ret; } + vps->dmabuf_size = size; + /* * TTM already thinks the buffer is pinned, but make sure the * pin_count is upped. diff --git a/drivers/hwmon/Kconfig b/drivers/hwmon/Kconfig index f249a4428458..6ec307c93ece 100644 --- a/drivers/hwmon/Kconfig +++ b/drivers/hwmon/Kconfig @@ -272,7 +272,7 @@ config SENSORS_K8TEMP config SENSORS_K10TEMP tristate "AMD Family 10h+ temperature sensor" - depends on X86 && PCI + depends on X86 && PCI && AMD_NB help If you say yes here you get support for the temperature sensor(s) inside your CPU. Supported are later revisions of diff --git a/drivers/hwmon/k10temp.c b/drivers/hwmon/k10temp.c index d2cc55e21374..3b73dee6fdc6 100644 --- a/drivers/hwmon/k10temp.c +++ b/drivers/hwmon/k10temp.c @@ -23,6 +23,7 @@ #include #include #include +#include #include MODULE_DESCRIPTION("AMD Family 10h+ CPU core temperature monitor"); @@ -40,8 +41,8 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #define PCI_DEVICE_ID_AMD_17H_DF_F3 0x1463 #endif -#ifndef PCI_DEVICE_ID_AMD_17H_RR_NB -#define PCI_DEVICE_ID_AMD_17H_RR_NB 0x15d0 +#ifndef PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 +#define PCI_DEVICE_ID_AMD_17H_M10H_DF_F3 0x15eb #endif /* CPUID function 0x80000001, ebx */ @@ -63,10 +64,12 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); #define NB_CAP_HTC 0x00000400 /* - * For F15h M60h, functionality of REG_REPORTED_TEMPERATURE - * has been moved to D0F0xBC_xD820_0CA4 [Reported Temperature - * Control] + * For F15h M60h and M70h, REG_HARDWARE_THERMAL_CONTROL + * and REG_REPORTED_TEMPERATURE have been moved to + * D0F0xBC_xD820_0C64 [Hardware Temperature Control] + * D0F0xBC_xD820_0CA4 [Reported Temperature Control] */ +#define F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET 0xd8200c64 #define F15H_M60H_REPORTED_TEMP_CTRL_OFFSET 0xd8200ca4 /* F17h M01h Access througn SMN */ @@ -74,6 +77,7 @@ static DEFINE_MUTEX(nb_smu_ind_mutex); struct k10temp_data { struct pci_dev *pdev; + void (*read_htcreg)(struct pci_dev *pdev, u32 *regval); void (*read_tempreg)(struct pci_dev *pdev, u32 *regval); int temp_offset; u32 temp_adjust_mask; @@ -98,6 +102,11 @@ static const struct tctl_offset tctl_offset_table[] = { { 0x17, "AMD Ryzen Threadripper 1910", 10000 }, }; +static void read_htcreg_pci(struct pci_dev *pdev, u32 *regval) +{ + pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, regval); +} + static void read_tempreg_pci(struct pci_dev *pdev, u32 *regval) { pci_read_config_dword(pdev, REG_REPORTED_TEMPERATURE, regval); @@ -114,6 +123,12 @@ static void amd_nb_index_read(struct pci_dev *pdev, unsigned int devfn, mutex_unlock(&nb_smu_ind_mutex); } +static void read_htcreg_nb_f15(struct pci_dev *pdev, u32 *regval) +{ + amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8, + F15H_M60H_HARDWARE_TEMP_CTRL_OFFSET, regval); +} + static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval) { amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0xb8, @@ -122,8 +137,8 @@ static void read_tempreg_nb_f15(struct pci_dev *pdev, u32 *regval) static void read_tempreg_nb_f17(struct pci_dev *pdev, u32 *regval) { - amd_nb_index_read(pdev, PCI_DEVFN(0, 0), 0x60, - F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval); + amd_smn_read(amd_pci_dev_to_node_id(pdev), + F17H_M01H_REPORTED_TEMP_CTRL_OFFSET, regval); } static ssize_t temp1_input_show(struct device *dev, @@ -160,8 +175,7 @@ static ssize_t show_temp_crit(struct device *dev, u32 regval; int value; - pci_read_config_dword(data->pdev, - REG_HARDWARE_THERMAL_CONTROL, ®val); + data->read_htcreg(data->pdev, ®val); value = ((regval >> 16) & 0x7f) * 500 + 52000; if (show_hyst) value -= ((regval >> 24) & 0xf) * 500; @@ -181,13 +195,18 @@ static umode_t k10temp_is_visible(struct kobject *kobj, struct pci_dev *pdev = data->pdev; if (index >= 2) { - u32 reg_caps, reg_htc; + u32 reg; + + if (!data->read_htcreg) + return 0; pci_read_config_dword(pdev, REG_NORTHBRIDGE_CAPABILITIES, - ®_caps); - pci_read_config_dword(pdev, REG_HARDWARE_THERMAL_CONTROL, - ®_htc); - if (!(reg_caps & NB_CAP_HTC) || !(reg_htc & HTC_ENABLE)) + ®); + if (!(reg & NB_CAP_HTC)) + return 0; + + data->read_htcreg(data->pdev, ®); + if (!(reg & HTC_ENABLE)) return 0; } return attr->mode; @@ -268,11 +287,13 @@ static int k10temp_probe(struct pci_dev *pdev, if (boot_cpu_data.x86 == 0x15 && (boot_cpu_data.x86_model == 0x60 || boot_cpu_data.x86_model == 0x70)) { + data->read_htcreg = read_htcreg_nb_f15; data->read_tempreg = read_tempreg_nb_f15; } else if (boot_cpu_data.x86 == 0x17) { data->temp_adjust_mask = 0x80000; data->read_tempreg = read_tempreg_nb_f17; } else { + data->read_htcreg = read_htcreg_pci; data->read_tempreg = read_tempreg_pci; } @@ -302,7 +323,7 @@ static const struct pci_device_id k10temp_id_table[] = { { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_16H_M30H_NB_F3) }, { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_DF_F3) }, - { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_RR_NB) }, + { PCI_VDEVICE(AMD, PCI_DEVICE_ID_AMD_17H_M10H_DF_F3) }, {} }; MODULE_DEVICE_TABLE(pci, k10temp_id_table); diff --git a/drivers/i2c/busses/i2c-designware-master.c b/drivers/i2c/busses/i2c-designware-master.c index fd36c39ddf4e..0cdba29ae0a9 100644 --- a/drivers/i2c/busses/i2c-designware-master.c +++ b/drivers/i2c/busses/i2c-designware-master.c @@ -209,7 +209,10 @@ static void i2c_dw_xfer_init(struct dw_i2c_dev *dev) i2c_dw_disable_int(dev); /* Enable the adapter */ - __i2c_dw_enable_and_wait(dev, true); + __i2c_dw_enable(dev, true); + + /* Dummy read to avoid the register getting stuck on Bay Trail */ + dw_readl(dev, DW_IC_ENABLE_STATUS); /* Clear and enable interrupts */ dw_readl(dev, DW_IC_CLR_INTR); diff --git a/drivers/i2c/busses/i2c-pmcmsp.c b/drivers/i2c/busses/i2c-pmcmsp.c index 2aa0e83174c5..dae8ac618a52 100644 --- a/drivers/i2c/busses/i2c-pmcmsp.c +++ b/drivers/i2c/busses/i2c-pmcmsp.c @@ -564,10 +564,10 @@ static int pmcmsptwi_master_xfer(struct i2c_adapter *adap, * TODO: We could potentially loop and retry in the case * of MSP_TWI_XFER_TIMEOUT. */ - return -1; + return -EIO; } - return 0; + return num; } static u32 pmcmsptwi_i2c_func(struct i2c_adapter *adapter) diff --git a/drivers/i2c/busses/i2c-viperboard.c b/drivers/i2c/busses/i2c-viperboard.c index e4be86b3de9a..7235c7302bb7 100644 --- a/drivers/i2c/busses/i2c-viperboard.c +++ b/drivers/i2c/busses/i2c-viperboard.c @@ -337,7 +337,7 @@ static int vprbrd_i2c_xfer(struct i2c_adapter *i2c, struct i2c_msg *msgs, } mutex_unlock(&vb->lock); } - return 0; + return num; error: mutex_unlock(&vb->lock); return error; diff --git a/drivers/i2c/i2c-core-acpi.c b/drivers/i2c/i2c-core-acpi.c index a9126b3cda61..7c3b4740b94b 100644 --- a/drivers/i2c/i2c-core-acpi.c +++ b/drivers/i2c/i2c-core-acpi.c @@ -445,10 +445,17 @@ static int acpi_gsb_i2c_read_bytes(struct i2c_client *client, msgs[1].buf = buffer; ret = i2c_transfer(client->adapter, msgs, ARRAY_SIZE(msgs)); - if (ret < 0) - dev_err(&client->adapter->dev, "i2c read failed\n"); - else + if (ret < 0) { + /* Getting a NACK is unfortunately normal with some DSTDs */ + if (ret == -EREMOTEIO) + dev_dbg(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n", + data_len, client->addr, cmd, ret); + else + dev_err(&client->adapter->dev, "i2c read %d bytes from client@%#x starting at reg %#x failed, error: %d\n", + data_len, client->addr, cmd, ret); + } else { memcpy(data, buffer, data_len); + } kfree(buffer); return ret; diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index 4e63c6f6c04d..d030ce3025a6 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -250,7 +250,9 @@ void bch_debug_exit(void) int __init bch_debug_init(struct kobject *kobj) { - bcache_debug = debugfs_create_dir("bcache", NULL); + if (!IS_ENABLED(CONFIG_DEBUG_FS)) + return 0; + bcache_debug = debugfs_create_dir("bcache", NULL); return IS_ERR_OR_NULL(bcache_debug); } diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index a4c9c8297a6d..918d4fb742d1 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -717,6 +717,7 @@ struct cxl { bool perst_select_user; bool perst_same_image; bool psl_timebase_synced; + bool tunneled_ops_supported; /* * number of contexts mapped on to this card. Possible values are: diff --git a/drivers/misc/cxl/pci.c b/drivers/misc/cxl/pci.c index 83f1d08058fc..4d6736f9d463 100644 --- a/drivers/misc/cxl/pci.c +++ b/drivers/misc/cxl/pci.c @@ -1742,6 +1742,15 @@ static int cxl_configure_adapter(struct cxl *adapter, struct pci_dev *dev) /* Required for devices using CAPP DMA mode, harmless for others */ pci_set_master(dev); + adapter->tunneled_ops_supported = false; + + if (cxl_is_power9()) { + if (pnv_pci_set_tunnel_bar(dev, 0x00020000E0000000ull, 1)) + dev_info(&dev->dev, "Tunneled operations unsupported\n"); + else + adapter->tunneled_ops_supported = true; + } + if ((rc = pnv_phb_to_cxl_mode(dev, adapter->native->sl_ops->capi_mode))) goto err; @@ -1768,6 +1777,9 @@ static void cxl_deconfigure_adapter(struct cxl *adapter) { struct pci_dev *pdev = to_pci_dev(adapter->dev.parent); + if (cxl_is_power9()) + pnv_pci_set_tunnel_bar(pdev, 0x00020000E0000000ull, 0); + cxl_native_release_psl_err_irq(adapter); cxl_unmap_adapter_regs(adapter); diff --git a/drivers/misc/cxl/sysfs.c b/drivers/misc/cxl/sysfs.c index 95285b7f636f..4b5a4c5d3c01 100644 --- a/drivers/misc/cxl/sysfs.c +++ b/drivers/misc/cxl/sysfs.c @@ -78,6 +78,15 @@ static ssize_t psl_timebase_synced_show(struct device *device, return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->psl_timebase_synced); } +static ssize_t tunneled_ops_supported_show(struct device *device, + struct device_attribute *attr, + char *buf) +{ + struct cxl *adapter = to_cxl_adapter(device); + + return scnprintf(buf, PAGE_SIZE, "%i\n", adapter->tunneled_ops_supported); +} + static ssize_t reset_adapter_store(struct device *device, struct device_attribute *attr, const char *buf, size_t count) @@ -183,6 +192,7 @@ static struct device_attribute adapter_attrs[] = { __ATTR_RO(base_image), __ATTR_RO(image_loaded), __ATTR_RO(psl_timebase_synced), + __ATTR_RO(tunneled_ops_supported), __ATTR_RW(load_image_on_perst), __ATTR_RW(perst_reloads_same_image), __ATTR(reset, S_IWUSR, NULL, reset_adapter_store), diff --git a/drivers/misc/eeprom/at24.c b/drivers/misc/eeprom/at24.c index 0c125f207aea..33053b0d1fdf 100644 --- a/drivers/misc/eeprom/at24.c +++ b/drivers/misc/eeprom/at24.c @@ -518,7 +518,7 @@ static int at24_get_pdata(struct device *dev, struct at24_platform_data *pdata) if (of_node && of_match_device(at24_of_match, dev)) cdata = of_device_get_match_data(dev); else if (id) - cdata = (void *)&id->driver_data; + cdata = (void *)id->driver_data; else cdata = acpi_device_get_match_data(dev); diff --git a/drivers/mtd/nand/raw/marvell_nand.c b/drivers/mtd/nand/raw/marvell_nand.c index db5ec4e8bde9..ebb1d141b900 100644 --- a/drivers/mtd/nand/raw/marvell_nand.c +++ b/drivers/mtd/nand/raw/marvell_nand.c @@ -1194,11 +1194,13 @@ static void marvell_nfc_hw_ecc_bch_read_chunk(struct nand_chip *chip, int chunk, NDCB0_CMD2(NAND_CMD_READSTART); /* - * Trigger the naked read operation only on the last chunk. - * Otherwise, use monolithic read. + * Trigger the monolithic read on the first chunk, then naked read on + * intermediate chunks and finally a last naked read on the last chunk. */ - if (lt->nchunks == 1 || (chunk < lt->nchunks - 1)) + if (chunk == 0) nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_MONOLITHIC_RW); + else if (chunk < lt->nchunks - 1) + nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_NAKED_RW); else nfc_op.ndcb[0] |= NDCB0_CMD_XTYPE(XTYPE_LAST_NAKED_RW); diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index 23b45da784cb..b89acaee12d4 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -354,10 +354,13 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, /* Locate the first rule available */ if (fs->location == RX_CLS_LOC_ANY) rule_index = find_first_zero_bit(priv->cfp.used, - bcm_sf2_cfp_rule_size(priv)); + priv->num_cfp_rules); else rule_index = fs->location; + if (rule_index > bcm_sf2_cfp_rule_size(priv)) + return -ENOSPC; + layout = &udf_tcpip4_layout; /* We only use one UDF slice for now */ slice_num = bcm_sf2_get_slice_number(layout, 0); @@ -562,19 +565,21 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, * first half because the HW search is by incrementing addresses. */ if (fs->location == RX_CLS_LOC_ANY) - rule_index[0] = find_first_zero_bit(priv->cfp.used, - bcm_sf2_cfp_rule_size(priv)); + rule_index[1] = find_first_zero_bit(priv->cfp.used, + priv->num_cfp_rules); else - rule_index[0] = fs->location; + rule_index[1] = fs->location; + if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) + return -ENOSPC; /* Flag it as used (cleared on error path) such that we can immediately * obtain a second one to chain from. */ - set_bit(rule_index[0], priv->cfp.used); + set_bit(rule_index[1], priv->cfp.used); - rule_index[1] = find_first_zero_bit(priv->cfp.used, - bcm_sf2_cfp_rule_size(priv)); - if (rule_index[1] > bcm_sf2_cfp_rule_size(priv)) { + rule_index[0] = find_first_zero_bit(priv->cfp.used, + priv->num_cfp_rules); + if (rule_index[0] > bcm_sf2_cfp_rule_size(priv)) { ret = -ENOSPC; goto out_err; } @@ -712,14 +717,14 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, /* Flag the second half rule as being used now, return it as the * location, and flag it as unique while dumping rules */ - set_bit(rule_index[1], priv->cfp.used); + set_bit(rule_index[0], priv->cfp.used); set_bit(rule_index[1], priv->cfp.unique); fs->location = rule_index[1]; return ret; out_err: - clear_bit(rule_index[0], priv->cfp.used); + clear_bit(rule_index[1], priv->cfp.used); return ret; } @@ -785,10 +790,6 @@ static int bcm_sf2_cfp_rule_del_one(struct bcm_sf2_priv *priv, int port, int ret; u32 reg; - /* Refuse deletion of unused rules, and the default reserved rule */ - if (!test_bit(loc, priv->cfp.used) || loc == 0) - return -EINVAL; - /* Indicate which rule we want to read */ bcm_sf2_cfp_rule_addr_set(priv, loc); @@ -826,6 +827,13 @@ static int bcm_sf2_cfp_rule_del(struct bcm_sf2_priv *priv, int port, u32 next_loc = 0; int ret; + /* Refuse deleting unused rules, and those that are not unique since + * that could leave IPv6 rules with one of the chained rule in the + * table. + */ + if (!test_bit(loc, priv->cfp.unique) || loc == 0) + return -EINVAL; + ret = bcm_sf2_cfp_rule_del_one(priv, port, loc, &next_loc); if (ret) return ret; diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c index cabbe227bb98..5bc168314ea2 100644 --- a/drivers/net/ethernet/3com/3c59x.c +++ b/drivers/net/ethernet/3com/3c59x.c @@ -1209,9 +1209,9 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq, vp->mii.reg_num_mask = 0x1f; /* Makes sure rings are at least 16 byte aligned. */ - vp->rx_ring = pci_alloc_consistent(pdev, sizeof(struct boom_rx_desc) * RX_RING_SIZE + vp->rx_ring = dma_alloc_coherent(gendev, sizeof(struct boom_rx_desc) * RX_RING_SIZE + sizeof(struct boom_tx_desc) * TX_RING_SIZE, - &vp->rx_ring_dma); + &vp->rx_ring_dma, GFP_KERNEL); retval = -ENOMEM; if (!vp->rx_ring) goto free_device; @@ -1473,11 +1473,10 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq, return 0; free_ring: - pci_free_consistent(pdev, - sizeof(struct boom_rx_desc) * RX_RING_SIZE - + sizeof(struct boom_tx_desc) * TX_RING_SIZE, - vp->rx_ring, - vp->rx_ring_dma); + dma_free_coherent(&pdev->dev, + sizeof(struct boom_rx_desc) * RX_RING_SIZE + + sizeof(struct boom_tx_desc) * TX_RING_SIZE, + vp->rx_ring, vp->rx_ring_dma); free_device: free_netdev(dev); pr_err(PFX "vortex_probe1 fails. Returns %d\n", retval); @@ -1747,9 +1746,9 @@ vortex_open(struct net_device *dev) break; /* Bad news! */ skb_reserve(skb, NET_IP_ALIGN); /* Align IP on 16 byte boundaries */ - dma = pci_map_single(VORTEX_PCI(vp), skb->data, - PKT_BUF_SZ, PCI_DMA_FROMDEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma)) + dma = dma_map_single(vp->gendev, skb->data, + PKT_BUF_SZ, DMA_FROM_DEVICE); + if (dma_mapping_error(vp->gendev, dma)) break; vp->rx_ring[i].addr = cpu_to_le32(dma); } @@ -2052,9 +2051,9 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev) if (vp->bus_master) { /* Set the bus-master controller to transfer the packet. */ int len = (skb->len + 3) & ~3; - vp->tx_skb_dma = pci_map_single(VORTEX_PCI(vp), skb->data, len, - PCI_DMA_TODEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, vp->tx_skb_dma)) { + vp->tx_skb_dma = dma_map_single(vp->gendev, skb->data, len, + DMA_TO_DEVICE); + if (dma_mapping_error(vp->gendev, vp->tx_skb_dma)) { dev_kfree_skb_any(skb); dev->stats.tx_dropped++; return NETDEV_TX_OK; @@ -2153,9 +2152,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) vp->tx_ring[entry].status = cpu_to_le32(skb->len | TxIntrUploaded | AddTCPChksum | AddUDPChksum); if (!skb_shinfo(skb)->nr_frags) { - dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, - PCI_DMA_TODEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) + dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, + DMA_TO_DEVICE); + if (dma_mapping_error(vp->gendev, dma_addr)) goto out_dma_err; vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr); @@ -2163,9 +2162,9 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) } else { int i; - dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, - skb_headlen(skb), PCI_DMA_TODEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) + dma_addr = dma_map_single(vp->gendev, skb->data, + skb_headlen(skb), DMA_TO_DEVICE); + if (dma_mapping_error(vp->gendev, dma_addr)) goto out_dma_err; vp->tx_ring[entry].frag[0].addr = cpu_to_le32(dma_addr); @@ -2174,21 +2173,21 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - dma_addr = skb_frag_dma_map(&VORTEX_PCI(vp)->dev, frag, + dma_addr = skb_frag_dma_map(vp->gendev, frag, 0, frag->size, DMA_TO_DEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) { + if (dma_mapping_error(vp->gendev, dma_addr)) { for(i = i-1; i >= 0; i--) - dma_unmap_page(&VORTEX_PCI(vp)->dev, + dma_unmap_page(vp->gendev, le32_to_cpu(vp->tx_ring[entry].frag[i+1].addr), le32_to_cpu(vp->tx_ring[entry].frag[i+1].length), DMA_TO_DEVICE); - pci_unmap_single(VORTEX_PCI(vp), + dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[entry].frag[0].addr), le32_to_cpu(vp->tx_ring[entry].frag[0].length), - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); goto out_dma_err; } @@ -2203,8 +2202,8 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) } } #else - dma_addr = pci_map_single(VORTEX_PCI(vp), skb->data, skb->len, PCI_DMA_TODEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, dma_addr)) + dma_addr = dma_map_single(vp->gendev, skb->data, skb->len, DMA_TO_DEVICE); + if (dma_mapping_error(vp->gendev, dma_addr)) goto out_dma_err; vp->tx_ring[entry].addr = cpu_to_le32(dma_addr); vp->tx_ring[entry].length = cpu_to_le32(skb->len | LAST_FRAG); @@ -2239,7 +2238,7 @@ boomerang_start_xmit(struct sk_buff *skb, struct net_device *dev) out: return NETDEV_TX_OK; out_dma_err: - dev_err(&VORTEX_PCI(vp)->dev, "Error mapping dma buffer\n"); + dev_err(vp->gendev, "Error mapping dma buffer\n"); goto out; } @@ -2305,7 +2304,7 @@ _vortex_interrupt(int irq, struct net_device *dev) if (status & DMADone) { if (ioread16(ioaddr + Wn7_MasterStatus) & 0x1000) { iowrite16(0x1000, ioaddr + Wn7_MasterStatus); /* Ack the event. */ - pci_unmap_single(VORTEX_PCI(vp), vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, PCI_DMA_TODEVICE); + dma_unmap_single(vp->gendev, vp->tx_skb_dma, (vp->tx_skb->len + 3) & ~3, DMA_TO_DEVICE); pkts_compl++; bytes_compl += vp->tx_skb->len; dev_kfree_skb_irq(vp->tx_skb); /* Release the transferred buffer */ @@ -2434,19 +2433,19 @@ _boomerang_interrupt(int irq, struct net_device *dev) struct sk_buff *skb = vp->tx_skbuff[entry]; #if DO_ZEROCOPY int i; - pci_unmap_single(VORTEX_PCI(vp), + dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[entry].frag[0].addr), le32_to_cpu(vp->tx_ring[entry].frag[0].length)&0xFFF, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); for (i=1; i<=skb_shinfo(skb)->nr_frags; i++) - pci_unmap_page(VORTEX_PCI(vp), + dma_unmap_page(vp->gendev, le32_to_cpu(vp->tx_ring[entry].frag[i].addr), le32_to_cpu(vp->tx_ring[entry].frag[i].length)&0xFFF, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); #else - pci_unmap_single(VORTEX_PCI(vp), - le32_to_cpu(vp->tx_ring[entry].addr), skb->len, PCI_DMA_TODEVICE); + dma_unmap_single(vp->gendev, + le32_to_cpu(vp->tx_ring[entry].addr), skb->len, DMA_TO_DEVICE); #endif pkts_compl++; bytes_compl += skb->len; @@ -2555,14 +2554,14 @@ static int vortex_rx(struct net_device *dev) /* 'skb_put()' points to the start of sk_buff data area. */ if (vp->bus_master && ! (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000)) { - dma_addr_t dma = pci_map_single(VORTEX_PCI(vp), skb_put(skb, pkt_len), - pkt_len, PCI_DMA_FROMDEVICE); + dma_addr_t dma = dma_map_single(vp->gendev, skb_put(skb, pkt_len), + pkt_len, DMA_FROM_DEVICE); iowrite32(dma, ioaddr + Wn7_MasterAddr); iowrite16((skb->len + 3) & ~3, ioaddr + Wn7_MasterLen); iowrite16(StartDMAUp, ioaddr + EL3_CMD); while (ioread16(ioaddr + Wn7_MasterStatus) & 0x8000) ; - pci_unmap_single(VORTEX_PCI(vp), dma, pkt_len, PCI_DMA_FROMDEVICE); + dma_unmap_single(vp->gendev, dma, pkt_len, DMA_FROM_DEVICE); } else { ioread32_rep(ioaddr + RX_FIFO, skb_put(skb, pkt_len), @@ -2629,11 +2628,11 @@ boomerang_rx(struct net_device *dev) if (pkt_len < rx_copybreak && (skb = netdev_alloc_skb(dev, pkt_len + 2)) != NULL) { skb_reserve(skb, 2); /* Align IP on 16 byte boundaries */ - pci_dma_sync_single_for_cpu(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dma_sync_single_for_cpu(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); /* 'skb_put()' points to the start of sk_buff data area. */ skb_put_data(skb, vp->rx_skbuff[entry]->data, pkt_len); - pci_dma_sync_single_for_device(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dma_sync_single_for_device(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); vp->rx_copy++; } else { /* Pre-allocate the replacement skb. If it or its @@ -2645,9 +2644,9 @@ boomerang_rx(struct net_device *dev) dev->stats.rx_dropped++; goto clear_complete; } - newdma = pci_map_single(VORTEX_PCI(vp), newskb->data, - PKT_BUF_SZ, PCI_DMA_FROMDEVICE); - if (dma_mapping_error(&VORTEX_PCI(vp)->dev, newdma)) { + newdma = dma_map_single(vp->gendev, newskb->data, + PKT_BUF_SZ, DMA_FROM_DEVICE); + if (dma_mapping_error(vp->gendev, newdma)) { dev->stats.rx_dropped++; consume_skb(newskb); goto clear_complete; @@ -2658,7 +2657,7 @@ boomerang_rx(struct net_device *dev) vp->rx_skbuff[entry] = newskb; vp->rx_ring[entry].addr = cpu_to_le32(newdma); skb_put(skb, pkt_len); - pci_unmap_single(VORTEX_PCI(vp), dma, PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dma_unmap_single(vp->gendev, dma, PKT_BUF_SZ, DMA_FROM_DEVICE); vp->rx_nocopy++; } skb->protocol = eth_type_trans(skb, dev); @@ -2755,8 +2754,8 @@ vortex_close(struct net_device *dev) if (vp->full_bus_master_rx) { /* Free Boomerang bus master Rx buffers. */ for (i = 0; i < RX_RING_SIZE; i++) if (vp->rx_skbuff[i]) { - pci_unmap_single( VORTEX_PCI(vp), le32_to_cpu(vp->rx_ring[i].addr), - PKT_BUF_SZ, PCI_DMA_FROMDEVICE); + dma_unmap_single(vp->gendev, le32_to_cpu(vp->rx_ring[i].addr), + PKT_BUF_SZ, DMA_FROM_DEVICE); dev_kfree_skb(vp->rx_skbuff[i]); vp->rx_skbuff[i] = NULL; } @@ -2769,12 +2768,12 @@ vortex_close(struct net_device *dev) int k; for (k=0; k<=skb_shinfo(skb)->nr_frags; k++) - pci_unmap_single(VORTEX_PCI(vp), + dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].frag[k].addr), le32_to_cpu(vp->tx_ring[i].frag[k].length)&0xFFF, - PCI_DMA_TODEVICE); + DMA_TO_DEVICE); #else - pci_unmap_single(VORTEX_PCI(vp), le32_to_cpu(vp->tx_ring[i].addr), skb->len, PCI_DMA_TODEVICE); + dma_unmap_single(vp->gendev, le32_to_cpu(vp->tx_ring[i].addr), skb->len, DMA_TO_DEVICE); #endif dev_kfree_skb(skb); vp->tx_skbuff[i] = NULL; @@ -3282,11 +3281,10 @@ static void vortex_remove_one(struct pci_dev *pdev) pci_iounmap(pdev, vp->ioaddr); - pci_free_consistent(pdev, - sizeof(struct boom_rx_desc) * RX_RING_SIZE - + sizeof(struct boom_tx_desc) * TX_RING_SIZE, - vp->rx_ring, - vp->rx_ring_dma); + dma_free_coherent(&pdev->dev, + sizeof(struct boom_rx_desc) * RX_RING_SIZE + + sizeof(struct boom_tx_desc) * TX_RING_SIZE, + vp->rx_ring, vp->rx_ring_dma); pci_release_regions(pdev); diff --git a/drivers/net/ethernet/8390/ne.c b/drivers/net/ethernet/8390/ne.c index ac99d089ac72..1c97e39b478e 100644 --- a/drivers/net/ethernet/8390/ne.c +++ b/drivers/net/ethernet/8390/ne.c @@ -164,7 +164,9 @@ bad_clone_list[] __initdata = { #define NESM_START_PG 0x40 /* First page of TX buffer */ #define NESM_STOP_PG 0x80 /* Last page +1 of RX ring */ -#if defined(CONFIG_ATARI) /* 8-bit mode on Atari, normal on Q40 */ +#if defined(CONFIG_MACH_TX49XX) +# define DCR_VAL 0x48 /* 8-bit mode */ +#elif defined(CONFIG_ATARI) /* 8-bit mode on Atari, normal on Q40 */ # define DCR_VAL (MACH_IS_ATARI ? 0x48 : 0x49) #else # define DCR_VAL 0x49 diff --git a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h index c333e25620a7..3c5057868ab3 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cudbg_entity.h @@ -439,15 +439,15 @@ static const u32 t6_up_cim_reg_array[][IREG_NUM_ELEM + 1] = { {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */ {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */ {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */ - {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */ - {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */ - {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */ - {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */ - {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */ - {0x7b50, 0x7b54, 0x2920, 0x10, 0x10}, /* up_cim_2920_to_2a10 */ - {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2a14 */ - {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */ - {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */ + {0x7b50, 0x7b54, 0x4900, 0x4, 0x4}, /* up_cim_4900_to_4c60 */ + {0x7b50, 0x7b54, 0x4904, 0x4, 0x4}, /* up_cim_4904_to_4c64 */ + {0x7b50, 0x7b54, 0x4908, 0x4, 0x4}, /* up_cim_4908_to_4c68 */ + {0x7b50, 0x7b54, 0x4910, 0x4, 0x4}, /* up_cim_4910_to_4c70 */ + {0x7b50, 0x7b54, 0x4914, 0x4, 0x4}, /* up_cim_4914_to_4c74 */ + {0x7b50, 0x7b54, 0x4920, 0x10, 0x10}, /* up_cim_4920_to_4a10 */ + {0x7b50, 0x7b54, 0x4924, 0x10, 0x10}, /* up_cim_4924_to_4a14 */ + {0x7b50, 0x7b54, 0x4928, 0x10, 0x10}, /* up_cim_4928_to_4a18 */ + {0x7b50, 0x7b54, 0x492c, 0x10, 0x10}, /* up_cim_492c_to_4a1c */ }; static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = { @@ -464,16 +464,6 @@ static const u32 t5_up_cim_reg_array[][IREG_NUM_ELEM + 1] = { {0x7b50, 0x7b54, 0x280, 0x20, 0}, /* up_cim_280_to_2fc */ {0x7b50, 0x7b54, 0x300, 0x20, 0}, /* up_cim_300_to_37c */ {0x7b50, 0x7b54, 0x380, 0x14, 0}, /* up_cim_380_to_3cc */ - {0x7b50, 0x7b54, 0x2900, 0x4, 0x4}, /* up_cim_2900_to_3d40 */ - {0x7b50, 0x7b54, 0x2904, 0x4, 0x4}, /* up_cim_2904_to_3d44 */ - {0x7b50, 0x7b54, 0x2908, 0x4, 0x4}, /* up_cim_2908_to_3d48 */ - {0x7b50, 0x7b54, 0x2910, 0x4, 0x4}, /* up_cim_2910_to_3d4c */ - {0x7b50, 0x7b54, 0x2914, 0x4, 0x4}, /* up_cim_2914_to_3d50 */ - {0x7b50, 0x7b54, 0x2918, 0x4, 0x4}, /* up_cim_2918_to_3d54 */ - {0x7b50, 0x7b54, 0x291c, 0x4, 0x4}, /* up_cim_291c_to_3d58 */ - {0x7b50, 0x7b54, 0x2924, 0x10, 0x10}, /* up_cim_2924_to_2914 */ - {0x7b50, 0x7b54, 0x2928, 0x10, 0x10}, /* up_cim_2928_to_2a18 */ - {0x7b50, 0x7b54, 0x292c, 0x10, 0x10}, /* up_cim_292c_to_2a1c */ }; static const u32 t6_hma_ireg_array[][IREG_NUM_ELEM] = { diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c index ac4d7f75fdc2..00fc5f1afb1d 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_filter.c @@ -852,7 +852,7 @@ bool is_filter_exact_match(struct adapter *adap, { struct tp_params *tp = &adap->params.tp; u64 hash_filter_mask = tp->hash_filter_mask; - u32 mask; + u64 ntuple_mask = 0; if (!is_hashfilter(adap)) return false; @@ -885,73 +885,45 @@ bool is_filter_exact_match(struct adapter *adap, if (!fs->val.fport || fs->mask.fport != 0xffff) return false; - if (tp->fcoe_shift >= 0) { - mask = (hash_filter_mask >> tp->fcoe_shift) & FT_FCOE_W; - if (mask && !fs->mask.fcoe) - return false; - } + /* calculate tuple mask and compare with mask configured in hw */ + if (tp->fcoe_shift >= 0) + ntuple_mask |= (u64)fs->mask.fcoe << tp->fcoe_shift; - if (tp->port_shift >= 0) { - mask = (hash_filter_mask >> tp->port_shift) & FT_PORT_W; - if (mask && !fs->mask.iport) - return false; - } + if (tp->port_shift >= 0) + ntuple_mask |= (u64)fs->mask.iport << tp->port_shift; if (tp->vnic_shift >= 0) { - mask = (hash_filter_mask >> tp->vnic_shift) & FT_VNIC_ID_W; - - if ((adap->params.tp.ingress_config & VNIC_F)) { - if (mask && !fs->mask.pfvf_vld) - return false; - } else { - if (mask && !fs->mask.ovlan_vld) - return false; - } + if ((adap->params.tp.ingress_config & VNIC_F)) + ntuple_mask |= (u64)fs->mask.pfvf_vld << tp->vnic_shift; + else + ntuple_mask |= (u64)fs->mask.ovlan_vld << + tp->vnic_shift; } - if (tp->vlan_shift >= 0) { - mask = (hash_filter_mask >> tp->vlan_shift) & FT_VLAN_W; - if (mask && !fs->mask.ivlan) - return false; - } + if (tp->vlan_shift >= 0) + ntuple_mask |= (u64)fs->mask.ivlan << tp->vlan_shift; - if (tp->tos_shift >= 0) { - mask = (hash_filter_mask >> tp->tos_shift) & FT_TOS_W; - if (mask && !fs->mask.tos) - return false; - } + if (tp->tos_shift >= 0) + ntuple_mask |= (u64)fs->mask.tos << tp->tos_shift; - if (tp->protocol_shift >= 0) { - mask = (hash_filter_mask >> tp->protocol_shift) & FT_PROTOCOL_W; - if (mask && !fs->mask.proto) - return false; - } + if (tp->protocol_shift >= 0) + ntuple_mask |= (u64)fs->mask.proto << tp->protocol_shift; - if (tp->ethertype_shift >= 0) { - mask = (hash_filter_mask >> tp->ethertype_shift) & - FT_ETHERTYPE_W; - if (mask && !fs->mask.ethtype) - return false; - } + if (tp->ethertype_shift >= 0) + ntuple_mask |= (u64)fs->mask.ethtype << tp->ethertype_shift; - if (tp->macmatch_shift >= 0) { - mask = (hash_filter_mask >> tp->macmatch_shift) & FT_MACMATCH_W; - if (mask && !fs->mask.macidx) - return false; - } + if (tp->macmatch_shift >= 0) + ntuple_mask |= (u64)fs->mask.macidx << tp->macmatch_shift; + + if (tp->matchtype_shift >= 0) + ntuple_mask |= (u64)fs->mask.matchtype << tp->matchtype_shift; + + if (tp->frag_shift >= 0) + ntuple_mask |= (u64)fs->mask.frag << tp->frag_shift; + + if (ntuple_mask != hash_filter_mask) + return false; - if (tp->matchtype_shift >= 0) { - mask = (hash_filter_mask >> tp->matchtype_shift) & - FT_MPSHITTYPE_W; - if (mask && !fs->mask.matchtype) - return false; - } - if (tp->frag_shift >= 0) { - mask = (hash_filter_mask >> tp->frag_shift) & - FT_FRAGMENTATION_W; - if (mask && !fs->mask.frag) - return false; - } return true; } diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 6e8d6a6f6aaf..4bb4646a5f92 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -192,6 +192,7 @@ static int alloc_long_term_buff(struct ibmvnic_adapter *adapter, if (adapter->fw_done_rc) { dev_err(dev, "Couldn't map long term buffer,rc = %d\n", adapter->fw_done_rc); + dma_free_coherent(dev, ltb->size, ltb->buff, ltb->addr); return -1; } return 0; @@ -1821,9 +1822,8 @@ static int do_reset(struct ibmvnic_adapter *adapter, if (rc) return rc; } + ibmvnic_disable_irqs(adapter); } - - ibmvnic_disable_irqs(adapter); adapter->state = VNIC_CLOSED; if (reset_state == VNIC_CLOSED) @@ -4586,14 +4586,6 @@ static int ibmvnic_init(struct ibmvnic_adapter *adapter) release_crq_queue(adapter); } - rc = init_stats_buffers(adapter); - if (rc) - return rc; - - rc = init_stats_token(adapter); - if (rc) - return rc; - return rc; } @@ -4662,13 +4654,21 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) goto ibmvnic_init_fail; } while (rc == EAGAIN); + rc = init_stats_buffers(adapter); + if (rc) + goto ibmvnic_init_fail; + + rc = init_stats_token(adapter); + if (rc) + goto ibmvnic_stats_fail; + netdev->mtu = adapter->req_mtu - ETH_HLEN; netdev->min_mtu = adapter->min_mtu - ETH_HLEN; netdev->max_mtu = adapter->max_mtu - ETH_HLEN; rc = device_create_file(&dev->dev, &dev_attr_failover); if (rc) - goto ibmvnic_init_fail; + goto ibmvnic_dev_file_err; netif_carrier_off(netdev); rc = register_netdev(netdev); @@ -4687,6 +4687,12 @@ static int ibmvnic_probe(struct vio_dev *dev, const struct vio_device_id *id) ibmvnic_register_fail: device_remove_file(&dev->dev, &dev_attr_failover); +ibmvnic_dev_file_err: + release_stats_token(adapter); + +ibmvnic_stats_fail: + release_stats_buffers(adapter); + ibmvnic_init_fail: release_sub_crqs(adapter, 1); release_crq_queue(adapter); diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 80a75c80a463..0a30d81aab3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -2932,6 +2932,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) mlx4_err(dev, "Failed to create file for port %d\n", port); devlink_port_unregister(&info->devlink_port); info->port = -1; + return err; } sprintf(info->dev_mtu_name, "mlx4_port%d_mtu", port); @@ -2953,9 +2954,10 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port) &info->port_attr); devlink_port_unregister(&info->devlink_port); info->port = -1; + return err; } - return err; + return 0; } static void mlx4_cleanup_port_info(struct mlx4_port_info *info) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index f1846d8f59cc..fcdfb8e7fdea 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -353,7 +353,7 @@ static int nfp_bpf_parse_capabilities(struct nfp_app *app) return PTR_ERR(mem) == -ENOENT ? 0 : PTR_ERR(mem); start = mem; - while (mem - start + 8 < nfp_cpp_area_size(area)) { + while (mem - start + 8 <= nfp_cpp_area_size(area)) { u8 __iomem *value; u32 type, length; diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 40e2b923af39..7abaf2740530 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1066,13 +1066,12 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) DP_INFO(edev, "Starting qede_remove\n"); + qede_rdma_dev_remove(edev); unregister_netdev(ndev); cancel_delayed_work_sync(&edev->sp_task); qede_ptp_disable(edev); - qede_rdma_dev_remove(edev); - edev->ops->common->set_power_state(cdev, PCI_D0); pci_set_drvdata(pdev, NULL); diff --git a/drivers/net/ethernet/renesas/sh_eth.h b/drivers/net/ethernet/renesas/sh_eth.h index ceb420e556a6..5dee19b61aee 100644 --- a/drivers/net/ethernet/renesas/sh_eth.h +++ b/drivers/net/ethernet/renesas/sh_eth.h @@ -163,7 +163,7 @@ enum { }; /* Driver's parameters */ -#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_SHMOBILE) +#if defined(CONFIG_CPU_SH4) || defined(CONFIG_ARCH_RENESAS) #define SH_ETH_RX_ALIGN 32 #else #define SH_ETH_RX_ALIGN 2 diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c index 450eec264a5e..4377c26f714d 100644 --- a/drivers/net/ipvlan/ipvlan_main.c +++ b/drivers/net/ipvlan/ipvlan_main.c @@ -792,8 +792,10 @@ static int ipvlan_device_event(struct notifier_block *unused, break; case NETDEV_CHANGEADDR: - list_for_each_entry(ipvlan, &port->ipvlans, pnode) + list_for_each_entry(ipvlan, &port->ipvlans, pnode) { ether_addr_copy(ipvlan->dev->dev_addr, dev->dev_addr); + call_netdevice_notifiers(NETDEV_CHANGEADDR, ipvlan->dev); + } break; case NETDEV_PRE_TYPE_CHANGE: diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index de31c5170a5b..3db06b40580d 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -573,9 +573,40 @@ static int ksz9031_config_init(struct phy_device *phydev) ksz9031_of_load_skew_values(phydev, of_node, MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4, tx_data_skews, 4); + + /* Silicon Errata Sheet (DS80000691D or DS80000692D): + * When the device links in the 1000BASE-T slave mode only, + * the optional 125MHz reference output clock (CLK125_NDO) + * has wide duty cycle variation. + * + * The optional CLK125_NDO clock does not meet the RGMII + * 45/55 percent (min/max) duty cycle requirement and therefore + * cannot be used directly by the MAC side for clocking + * applications that have setup/hold time requirements on + * rising and falling clock edges. + * + * Workaround: + * Force the phy to be the master to receive a stable clock + * which meets the duty cycle requirement. + */ + if (of_property_read_bool(of_node, "micrel,force-master")) { + result = phy_read(phydev, MII_CTRL1000); + if (result < 0) + goto err_force_master; + + /* enable master mode, config & prefer master */ + result |= CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER; + result = phy_write(phydev, MII_CTRL1000, result); + if (result < 0) + goto err_force_master; + } } return ksz9031_center_flp_timing(phydev); + +err_force_master: + phydev_err(phydev, "failed to force the phy to master mode\n"); + return result; } #define KSZ8873MLL_GLOBAL_CONTROL_4 0x06 diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 44d4f3d25350..99bf3cee1345 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -675,15 +675,6 @@ static void tun_queue_purge(struct tun_file *tfile) skb_queue_purge(&tfile->sk.sk_error_queue); } -static void tun_cleanup_tx_ring(struct tun_file *tfile) -{ - if (tfile->tx_ring.queue) { - ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); - xdp_rxq_info_unreg(&tfile->xdp_rxq); - memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); - } -} - static void __tun_detach(struct tun_file *tfile, bool clean) { struct tun_file *ntfile; @@ -730,7 +721,9 @@ static void __tun_detach(struct tun_file *tfile, bool clean) tun->dev->reg_state == NETREG_REGISTERED) unregister_netdevice(tun->dev); } - tun_cleanup_tx_ring(tfile); + if (tun) + xdp_rxq_info_unreg(&tfile->xdp_rxq); + ptr_ring_cleanup(&tfile->tx_ring, tun_ptr_free); sock_put(&tfile->sk); } } @@ -777,14 +770,14 @@ static void tun_detach_all(struct net_device *dev) tun_napi_del(tun, tfile); /* Drop read queue */ tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); - tun_cleanup_tx_ring(tfile); } list_for_each_entry_safe(tfile, tmp, &tun->disabled, next) { tun_enable_queue(tfile); tun_queue_purge(tfile); + xdp_rxq_info_unreg(&tfile->xdp_rxq); sock_put(&tfile->sk); - tun_cleanup_tx_ring(tfile); } BUG_ON(tun->numdisabled != 0); @@ -828,7 +821,8 @@ static int tun_attach(struct tun_struct *tun, struct file *file, } if (!tfile->detached && - ptr_ring_init(&tfile->tx_ring, dev->tx_queue_len, GFP_KERNEL)) { + ptr_ring_resize(&tfile->tx_ring, dev->tx_queue_len, + GFP_KERNEL, tun_ptr_free)) { err = -ENOMEM; goto out; } @@ -3221,6 +3215,11 @@ static int tun_chr_open(struct inode *inode, struct file * file) &tun_proto, 0); if (!tfile) return -ENOMEM; + if (ptr_ring_init(&tfile->tx_ring, 0, GFP_KERNEL)) { + sk_free(&tfile->sk); + return -ENOMEM; + } + RCU_INIT_POINTER(tfile->tun, NULL); tfile->flags = 0; tfile->ifindex = 0; @@ -3241,8 +3240,6 @@ static int tun_chr_open(struct inode *inode, struct file * file) sock_set_flag(&tfile->sk, SOCK_ZEROCOPY); - memset(&tfile->tx_ring, 0, sizeof(tfile->tx_ring)); - return 0; } diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c index 2234a334e96e..e454dfc9ad8f 100644 --- a/drivers/net/vmxnet3/vmxnet3_drv.c +++ b/drivers/net/vmxnet3/vmxnet3_drv.c @@ -369,6 +369,11 @@ vmxnet3_tq_tx_complete(struct vmxnet3_tx_queue *tq, gdesc = tq->comp_ring.base + tq->comp_ring.next2proc; while (VMXNET3_TCD_GET_GEN(&gdesc->tcd) == tq->comp_ring.gen) { + /* Prevent any &gdesc->tcd field from being (speculatively) + * read before (&gdesc->tcd)->gen is read. + */ + dma_rmb(); + completed += vmxnet3_unmap_pkt(VMXNET3_TCD_GET_TXIDX( &gdesc->tcd), tq, adapter->pdev, adapter); @@ -1103,6 +1108,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq, gdesc->txd.tci = skb_vlan_tag_get(skb); } + /* Ensure that the write to (&gdesc->txd)->gen will be observed after + * all other writes to &gdesc->txd. + */ + dma_wmb(); + /* finally flips the GEN bit of the SOP desc. */ gdesc->dword[2] = cpu_to_le32(le32_to_cpu(gdesc->dword[2]) ^ VMXNET3_TXD_GEN); @@ -1298,6 +1308,12 @@ vmxnet3_rq_rx_complete(struct vmxnet3_rx_queue *rq, */ break; } + + /* Prevent any rcd field from being (speculatively) read before + * rcd->gen is read. + */ + dma_rmb(); + BUG_ON(rcd->rqID != rq->qid && rcd->rqID != rq->qid2 && rcd->rqID != rq->dataRingQid); idx = rcd->rxdIdx; @@ -1528,6 +1544,12 @@ rcd_done: ring->next2comp = idx; num_to_alloc = vmxnet3_cmd_ring_desc_avail(ring); ring = rq->rx_ring + ring_idx; + + /* Ensure that the writes to rxd->gen bits will be observed + * after all other writes to rxd objects. + */ + dma_wmb(); + while (num_to_alloc) { vmxnet3_getRxDesc(rxd, &ring->base[ring->next2fill].rxd, &rxCmdDesc); @@ -2688,7 +2710,7 @@ vmxnet3_set_mac_addr(struct net_device *netdev, void *p) /* ==================== initialization and cleanup routines ============ */ static int -vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) +vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter) { int err; unsigned long mmio_start, mmio_len; @@ -2700,30 +2722,12 @@ vmxnet3_alloc_pci_resources(struct vmxnet3_adapter *adapter, bool *dma64) return err; } - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { - if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { - dev_err(&pdev->dev, - "pci_set_consistent_dma_mask failed\n"); - err = -EIO; - goto err_set_mask; - } - *dma64 = true; - } else { - if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { - dev_err(&pdev->dev, - "pci_set_dma_mask failed\n"); - err = -EIO; - goto err_set_mask; - } - *dma64 = false; - } - err = pci_request_selected_regions(pdev, (1 << 2) - 1, vmxnet3_driver_name); if (err) { dev_err(&pdev->dev, "Failed to request region for adapter: error %d\n", err); - goto err_set_mask; + goto err_enable_device; } pci_set_master(pdev); @@ -2751,7 +2755,7 @@ err_bar1: iounmap(adapter->hw_addr0); err_ioremap: pci_release_selected_regions(pdev, (1 << 2) - 1); -err_set_mask: +err_enable_device: pci_disable_device(pdev); return err; } @@ -3254,7 +3258,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, #endif }; int err; - bool dma64 = false; /* stupid gcc */ + bool dma64; u32 ver; struct net_device *netdev; struct vmxnet3_adapter *adapter; @@ -3300,6 +3304,24 @@ vmxnet3_probe_device(struct pci_dev *pdev, adapter->rx_ring_size = VMXNET3_DEF_RX_RING_SIZE; adapter->rx_ring2_size = VMXNET3_DEF_RX_RING2_SIZE; + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(64)) == 0) { + if (pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64)) != 0) { + dev_err(&pdev->dev, + "pci_set_consistent_dma_mask failed\n"); + err = -EIO; + goto err_set_mask; + } + dma64 = true; + } else { + if (pci_set_dma_mask(pdev, DMA_BIT_MASK(32)) != 0) { + dev_err(&pdev->dev, + "pci_set_dma_mask failed\n"); + err = -EIO; + goto err_set_mask; + } + dma64 = false; + } + spin_lock_init(&adapter->cmd_lock); adapter->adapter_pa = dma_map_single(&adapter->pdev->dev, adapter, sizeof(struct vmxnet3_adapter), @@ -3307,7 +3329,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, if (dma_mapping_error(&adapter->pdev->dev, adapter->adapter_pa)) { dev_err(&pdev->dev, "Failed to map dma\n"); err = -EFAULT; - goto err_dma_map; + goto err_set_mask; } adapter->shared = dma_alloc_coherent( &adapter->pdev->dev, @@ -3358,7 +3380,7 @@ vmxnet3_probe_device(struct pci_dev *pdev, } #endif /* VMXNET3_RSS */ - err = vmxnet3_alloc_pci_resources(adapter, &dma64); + err = vmxnet3_alloc_pci_resources(adapter); if (err < 0) goto err_alloc_pci; @@ -3504,7 +3526,7 @@ err_alloc_queue_desc: err_alloc_shared: dma_unmap_single(&adapter->pdev->dev, adapter->adapter_pa, sizeof(struct vmxnet3_adapter), PCI_DMA_TODEVICE); -err_dma_map: +err_set_mask: free_netdev(netdev); return err; } diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h index a3326463b71f..a2c554f8a61b 100644 --- a/drivers/net/vmxnet3/vmxnet3_int.h +++ b/drivers/net/vmxnet3/vmxnet3_int.h @@ -69,10 +69,12 @@ /* * Version numbers */ -#define VMXNET3_DRIVER_VERSION_STRING "1.4.14.0-k" +#define VMXNET3_DRIVER_VERSION_STRING "1.4.16.0-k" -/* a 32-bit int, each byte encode a verion number in VMXNET3_DRIVER_VERSION */ -#define VMXNET3_DRIVER_VERSION_NUM 0x01040e00 +/* Each byte of this 32-bit integer encodes a version number in + * VMXNET3_DRIVER_VERSION_STRING. + */ +#define VMXNET3_DRIVER_VERSION_NUM 0x01041000 #if defined(CONFIG_PCI_MSI) /* RSS only makes sense if MSI-X is supported. */ diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index a3771c5729f5..99b857e5a7a9 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -99,6 +99,7 @@ static struct class *nvme_subsys_class; static void nvme_ns_remove(struct nvme_ns *ns); static int nvme_revalidate_disk(struct gendisk *disk); +static void nvme_put_subsystem(struct nvme_subsystem *subsys); int nvme_reset_ctrl(struct nvme_ctrl *ctrl) { @@ -117,7 +118,8 @@ int nvme_reset_ctrl_sync(struct nvme_ctrl *ctrl) ret = nvme_reset_ctrl(ctrl); if (!ret) { flush_work(&ctrl->reset_work); - if (ctrl->state != NVME_CTRL_LIVE) + if (ctrl->state != NVME_CTRL_LIVE && + ctrl->state != NVME_CTRL_ADMIN_ONLY) ret = -ENETRESET; } @@ -350,6 +352,7 @@ static void nvme_free_ns_head(struct kref *ref) ida_simple_remove(&head->subsys->ns_ida, head->instance); list_del_init(&head->entry); cleanup_srcu_struct(&head->srcu); + nvme_put_subsystem(head->subsys); kfree(head); } @@ -2861,6 +2864,9 @@ static struct nvme_ns_head *nvme_alloc_ns_head(struct nvme_ctrl *ctrl, goto out_cleanup_srcu; list_add_tail(&head->entry, &ctrl->subsys->nsheads); + + kref_get(&ctrl->subsys->ref); + return head; out_cleanup_srcu: cleanup_srcu_struct(&head->srcu); diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index 7ded7a51c430..17d2f7cf3fed 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -84,6 +84,11 @@ enum nvme_quirks { * Supports the LighNVM command set if indicated in vs[1]. */ NVME_QUIRK_LIGHTNVM = (1 << 6), + + /* + * Set MEDIUM priority on SQ creation + */ + NVME_QUIRK_MEDIUM_PRIO_SQ = (1 << 7), }; /* diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fbc71fac6f1e..17a0190bd88f 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -1093,9 +1093,18 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid, static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid, struct nvme_queue *nvmeq) { + struct nvme_ctrl *ctrl = &dev->ctrl; struct nvme_command c; int flags = NVME_QUEUE_PHYS_CONTIG; + /* + * Some drives have a bug that auto-enables WRRU if MEDIUM isn't + * set. Since URGENT priority is zeroes, it makes all queues + * URGENT. + */ + if (ctrl->quirks & NVME_QUIRK_MEDIUM_PRIO_SQ) + flags |= NVME_SQ_PRIO_MEDIUM; + /* * Note: we (ab)use the fact that the prp fields survive if no data * is attached to the request. @@ -2701,7 +2710,8 @@ static const struct pci_device_id nvme_id_table[] = { .driver_data = NVME_QUIRK_STRIPE_SIZE | NVME_QUIRK_DEALLOCATE_ZEROES, }, { PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */ - .driver_data = NVME_QUIRK_NO_DEEPEST_PS }, + .driver_data = NVME_QUIRK_NO_DEEPEST_PS | + NVME_QUIRK_MEDIUM_PRIO_SQ }, { PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */ .driver_data = NVME_QUIRK_IDENTIFY_CNS, }, { PCI_DEVICE(0x1c58, 0x0003), /* HGST adapter */ diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 126cf19e869b..297599fcbc32 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1195,7 +1195,7 @@ void * ccio_get_iommu(const struct parisc_device *dev) * to/from certain pages. To avoid this happening, we mark these pages * as `used', and ensure that nothing will try to allocate from them. */ -void ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp) +void __init ccio_cujo20_fixup(struct parisc_device *cujo, u32 iovp) { unsigned int idx; struct parisc_device *dev = parisc_parent(cujo); diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index bc309c5327ff..566644bb496a 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -168,8 +168,8 @@ config DELL_WMI depends on DMI depends on INPUT depends on ACPI_VIDEO || ACPI_VIDEO = n + depends on DELL_SMBIOS select DELL_WMI_DESCRIPTOR - select DELL_SMBIOS select INPUT_SPARSEKMAP ---help--- Say Y here if you want to support WMI-based hotkeys on Dell laptops. diff --git a/drivers/reset/reset-uniphier.c b/drivers/reset/reset-uniphier.c index 360e06b20c53..ac18f2f27881 100644 --- a/drivers/reset/reset-uniphier.c +++ b/drivers/reset/reset-uniphier.c @@ -110,7 +110,7 @@ static const struct uniphier_reset_data uniphier_ld20_sys_reset_data[] = { UNIPHIER_RESETX(4, 0x200c, 2), /* eMMC */ UNIPHIER_RESETX(6, 0x200c, 6), /* Ether */ UNIPHIER_RESETX(8, 0x200c, 8), /* STDMAC (HSC) */ - UNIPHIER_RESETX(12, 0x200c, 5), /* GIO (PCIe, USB3) */ + UNIPHIER_RESETX(14, 0x200c, 5), /* USB30 */ UNIPHIER_RESETX(16, 0x200c, 12), /* USB30-PHY0 */ UNIPHIER_RESETX(17, 0x200c, 13), /* USB30-PHY1 */ UNIPHIER_RESETX(18, 0x200c, 14), /* USB30-PHY2 */ @@ -127,8 +127,8 @@ static const struct uniphier_reset_data uniphier_pxs3_sys_reset_data[] = { UNIPHIER_RESETX(6, 0x200c, 9), /* Ether0 */ UNIPHIER_RESETX(7, 0x200c, 10), /* Ether1 */ UNIPHIER_RESETX(8, 0x200c, 12), /* STDMAC */ - UNIPHIER_RESETX(12, 0x200c, 4), /* USB30 link (GIO0) */ - UNIPHIER_RESETX(13, 0x200c, 5), /* USB31 link (GIO1) */ + UNIPHIER_RESETX(12, 0x200c, 4), /* USB30 link */ + UNIPHIER_RESETX(13, 0x200c, 5), /* USB31 link */ UNIPHIER_RESETX(16, 0x200c, 16), /* USB30-PHY0 */ UNIPHIER_RESETX(17, 0x200c, 18), /* USB30-PHY1 */ UNIPHIER_RESETX(18, 0x200c, 20), /* USB30-PHY2 */ diff --git a/drivers/s390/cio/qdio_setup.c b/drivers/s390/cio/qdio_setup.c index 439991d71b14..4c14ce428e92 100644 --- a/drivers/s390/cio/qdio_setup.c +++ b/drivers/s390/cio/qdio_setup.c @@ -141,7 +141,7 @@ static int __qdio_allocate_qs(struct qdio_q **irq_ptr_qs, int nr_queues) int i; for (i = 0; i < nr_queues; i++) { - q = kmem_cache_alloc(qdio_q_cache, GFP_KERNEL); + q = kmem_cache_zalloc(qdio_q_cache, GFP_KERNEL); if (!q) return -ENOMEM; @@ -456,7 +456,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data) { struct ciw *ciw; struct qdio_irq *irq_ptr = init_data->cdev->private->qdio_data; - int rc; memset(&irq_ptr->qib, 0, sizeof(irq_ptr->qib)); memset(&irq_ptr->siga_flag, 0, sizeof(irq_ptr->siga_flag)); @@ -493,16 +492,14 @@ int qdio_setup_irq(struct qdio_initialize *init_data) ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_EQUEUE); if (!ciw) { DBF_ERROR("%4x NO EQ", irq_ptr->schid.sch_no); - rc = -EINVAL; - goto out_err; + return -EINVAL; } irq_ptr->equeue = *ciw; ciw = ccw_device_get_ciw(init_data->cdev, CIW_TYPE_AQUEUE); if (!ciw) { DBF_ERROR("%4x NO AQ", irq_ptr->schid.sch_no); - rc = -EINVAL; - goto out_err; + return -EINVAL; } irq_ptr->aqueue = *ciw; @@ -512,9 +509,6 @@ int qdio_setup_irq(struct qdio_initialize *init_data) init_data->cdev->handler = qdio_int_handler; spin_unlock_irq(get_ccwdev_lock(irq_ptr->cdev)); return 0; -out_err: - qdio_release_memory(irq_ptr); - return rc; } void qdio_print_subchannel_info(struct qdio_irq *irq_ptr, diff --git a/drivers/s390/cio/vfio_ccw_cp.c b/drivers/s390/cio/vfio_ccw_cp.c index 2c7550797ec2..dce92b2a895d 100644 --- a/drivers/s390/cio/vfio_ccw_cp.c +++ b/drivers/s390/cio/vfio_ccw_cp.c @@ -715,6 +715,10 @@ void cp_free(struct channel_program *cp) * and stores the result to ccwchain list. @cp must have been * initialized by a previous call with cp_init(). Otherwise, undefined * behavior occurs. + * For each chain composing the channel program: + * - On entry ch_len holds the count of CCWs to be translated. + * - On exit ch_len is adjusted to the count of successfully translated CCWs. + * This allows cp_free to find in ch_len the count of CCWs to free in a chain. * * The S/390 CCW Translation APIS (prefixed by 'cp_') are introduced * as helpers to do ccw chain translation inside the kernel. Basically @@ -749,11 +753,18 @@ int cp_prefetch(struct channel_program *cp) for (idx = 0; idx < len; idx++) { ret = ccwchain_fetch_one(chain, idx, cp); if (ret) - return ret; + goto out_err; } } return 0; +out_err: + /* Only cleanup the chain elements that were actually translated. */ + chain->ch_len = idx; + list_for_each_entry_continue(chain, &cp->ccwchain_list, next) { + chain->ch_len = 0; + } + return ret; } /** diff --git a/drivers/scsi/aacraid/commsup.c b/drivers/scsi/aacraid/commsup.c index 0156c9623c35..d62ddd63f4fe 100644 --- a/drivers/scsi/aacraid/commsup.c +++ b/drivers/scsi/aacraid/commsup.c @@ -724,6 +724,8 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback, int wait; unsigned long flags = 0; unsigned long mflags = 0; + struct aac_hba_cmd_req *hbacmd = (struct aac_hba_cmd_req *) + fibptr->hw_fib_va; fibptr->flags = (FIB_CONTEXT_FLAG | FIB_CONTEXT_FLAG_NATIVE_HBA); if (callback) { @@ -734,11 +736,9 @@ int aac_hba_send(u8 command, struct fib *fibptr, fib_callback callback, wait = 1; - if (command == HBA_IU_TYPE_SCSI_CMD_REQ) { - struct aac_hba_cmd_req *hbacmd = - (struct aac_hba_cmd_req *)fibptr->hw_fib_va; + hbacmd->iu_type = command; - hbacmd->iu_type = command; + if (command == HBA_IU_TYPE_SCSI_CMD_REQ) { /* bit1 of request_id must be 0 */ hbacmd->request_id = cpu_to_le32((((u32)(fibptr - dev->fibs)) << 2) + 1); diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c index c374e3b5c678..777e5f1e52d1 100644 --- a/drivers/scsi/vmw_pvscsi.c +++ b/drivers/scsi/vmw_pvscsi.c @@ -609,7 +609,7 @@ static void pvscsi_complete_request(struct pvscsi_adapter *adapter, break; case BTSTAT_ABORTQUEUE: - cmd->result = (DID_ABORT << 16); + cmd->result = (DID_BUS_BUSY << 16); break; case BTSTAT_SCSIPARITY: diff --git a/drivers/spi/spi-bcm-qspi.c b/drivers/spi/spi-bcm-qspi.c index 1596d35498c5..6573152ce893 100644 --- a/drivers/spi/spi-bcm-qspi.c +++ b/drivers/spi/spi-bcm-qspi.c @@ -490,7 +490,7 @@ static int bcm_qspi_bspi_set_mode(struct bcm_qspi *qspi, static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi) { - if (!has_bspi(qspi) || (qspi->bspi_enabled)) + if (!has_bspi(qspi)) return; qspi->bspi_enabled = 1; @@ -505,7 +505,7 @@ static void bcm_qspi_enable_bspi(struct bcm_qspi *qspi) static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi) { - if (!has_bspi(qspi) || (!qspi->bspi_enabled)) + if (!has_bspi(qspi)) return; qspi->bspi_enabled = 0; @@ -519,16 +519,19 @@ static void bcm_qspi_disable_bspi(struct bcm_qspi *qspi) static void bcm_qspi_chip_select(struct bcm_qspi *qspi, int cs) { - u32 data = 0; + u32 rd = 0; + u32 wr = 0; - if (qspi->curr_cs == cs) - return; if (qspi->base[CHIP_SELECT]) { - data = bcm_qspi_read(qspi, CHIP_SELECT, 0); - data = (data & ~0xff) | (1 << cs); - bcm_qspi_write(qspi, CHIP_SELECT, 0, data); + rd = bcm_qspi_read(qspi, CHIP_SELECT, 0); + wr = (rd & ~0xff) | (1 << cs); + if (rd == wr) + return; + bcm_qspi_write(qspi, CHIP_SELECT, 0, wr); usleep_range(10, 20); } + + dev_dbg(&qspi->pdev->dev, "using cs:%d\n", cs); qspi->curr_cs = cs; } @@ -755,8 +758,13 @@ static int write_to_hw(struct bcm_qspi *qspi, struct spi_device *spi) dev_dbg(&qspi->pdev->dev, "WR %04x\n", val); } mspi_cdram = MSPI_CDRAM_CONT_BIT; - mspi_cdram |= (~(1 << spi->chip_select) & - MSPI_CDRAM_PCS); + + if (has_bspi(qspi)) + mspi_cdram &= ~1; + else + mspi_cdram |= (~(1 << spi->chip_select) & + MSPI_CDRAM_PCS); + mspi_cdram |= ((tp.trans->bits_per_word <= 8) ? 0 : MSPI_CDRAM_BITSE_BIT); diff --git a/drivers/spi/spi-bcm2835aux.c b/drivers/spi/spi-bcm2835aux.c index 1431cb98fe40..3094d818cf06 100644 --- a/drivers/spi/spi-bcm2835aux.c +++ b/drivers/spi/spi-bcm2835aux.c @@ -184,6 +184,11 @@ static irqreturn_t bcm2835aux_spi_interrupt(int irq, void *dev_id) struct bcm2835aux_spi *bs = spi_master_get_devdata(master); irqreturn_t ret = IRQ_NONE; + /* IRQ may be shared, so return if our interrupts are disabled */ + if (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_CNTL1) & + (BCM2835_AUX_SPI_CNTL1_TXEMPTY | BCM2835_AUX_SPI_CNTL1_IDLE))) + return ret; + /* check if we have data to read */ while (bs->rx_len && (!(bcm2835aux_rd(bs, BCM2835_AUX_SPI_STAT) & diff --git a/drivers/spi/spi-cadence.c b/drivers/spi/spi-cadence.c index 5c9516ae4942..4a001634023e 100644 --- a/drivers/spi/spi-cadence.c +++ b/drivers/spi/spi-cadence.c @@ -313,6 +313,14 @@ static void cdns_spi_fill_tx_fifo(struct cdns_spi *xspi) while ((trans_cnt < CDNS_SPI_FIFO_DEPTH) && (xspi->tx_bytes > 0)) { + + /* When xspi in busy condition, bytes may send failed, + * then spi control did't work thoroughly, add one byte delay + */ + if (cdns_spi_read(xspi, CDNS_SPI_ISR) & + CDNS_SPI_IXR_TXFULL) + usleep_range(10, 20); + if (xspi->txbuf) cdns_spi_write(xspi, CDNS_SPI_TXD, *xspi->txbuf++); else diff --git a/drivers/spi/spi-imx.c b/drivers/spi/spi-imx.c index 6f57592a7f95..a056ee88a960 100644 --- a/drivers/spi/spi-imx.c +++ b/drivers/spi/spi-imx.c @@ -1701,7 +1701,7 @@ static struct platform_driver spi_imx_driver = { }; module_platform_driver(spi_imx_driver); -MODULE_DESCRIPTION("SPI Master Controller driver"); +MODULE_DESCRIPTION("SPI Controller driver"); MODULE_AUTHOR("Sascha Hauer, Pengutronix"); MODULE_LICENSE("GPL"); MODULE_ALIAS("platform:" DRIVER_NAME); diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h index 513ec6c6e25b..0ae7defd3492 100644 --- a/drivers/spi/spi-pxa2xx.h +++ b/drivers/spi/spi-pxa2xx.h @@ -38,7 +38,7 @@ struct driver_data { /* SSP register addresses */ void __iomem *ioaddr; - u32 ssdr_physical; + phys_addr_t ssdr_physical; /* SSP masks*/ u32 dma_cr1; diff --git a/drivers/spi/spi-sh-msiof.c b/drivers/spi/spi-sh-msiof.c index ae086aab57d5..8171eedbfc90 100644 --- a/drivers/spi/spi-sh-msiof.c +++ b/drivers/spi/spi-sh-msiof.c @@ -283,6 +283,7 @@ static void sh_msiof_spi_set_clk_regs(struct sh_msiof_spi_priv *p, } k = min_t(int, k, ARRAY_SIZE(sh_msiof_spi_div_table) - 1); + brps = min_t(int, brps, 32); scr = sh_msiof_spi_div_table[k].brdv | SCR_BRPS(brps); sh_msiof_write(p, TSCR, scr); diff --git a/drivers/tee/tee_core.c b/drivers/tee/tee_core.c index 0124a91c8d71..dd46b758852a 100644 --- a/drivers/tee/tee_core.c +++ b/drivers/tee/tee_core.c @@ -238,6 +238,17 @@ static int params_from_user(struct tee_context *ctx, struct tee_param *params, if (IS_ERR(shm)) return PTR_ERR(shm); + /* + * Ensure offset + size does not overflow offset + * and does not overflow the size of the referred + * shared memory object. + */ + if ((ip.a + ip.b) < ip.a || + (ip.a + ip.b) > shm->size) { + tee_shm_put(shm); + return -EINVAL; + } + params[n].u.memref.shm_offs = ip.a; params[n].u.memref.size = ip.b; params[n].u.memref.shm = shm; diff --git a/drivers/tee/tee_shm.c b/drivers/tee/tee_shm.c index 556960a1bab3..07d3be6f0780 100644 --- a/drivers/tee/tee_shm.c +++ b/drivers/tee/tee_shm.c @@ -360,9 +360,10 @@ int tee_shm_get_fd(struct tee_shm *shm) if (!(shm->flags & TEE_SHM_DMA_BUF)) return -EINVAL; + get_dma_buf(shm->dmabuf); fd = dma_buf_fd(shm->dmabuf, O_CLOEXEC); - if (fd >= 0) - get_dma_buf(shm->dmabuf); + if (fd < 0) + dma_buf_put(shm->dmabuf); return fd; } diff --git a/drivers/thermal/int340x_thermal/int3403_thermal.c b/drivers/thermal/int340x_thermal/int3403_thermal.c index 8a7f24dd9315..0c19fcd56a0d 100644 --- a/drivers/thermal/int340x_thermal/int3403_thermal.c +++ b/drivers/thermal/int340x_thermal/int3403_thermal.c @@ -194,6 +194,7 @@ static int int3403_cdev_add(struct int3403_priv *priv) return -EFAULT; } + priv->priv = obj; obj->max_state = p->package.count - 1; obj->cdev = thermal_cooling_device_register(acpi_device_bid(priv->adev), @@ -201,8 +202,6 @@ static int int3403_cdev_add(struct int3403_priv *priv) if (IS_ERR(obj->cdev)) result = PTR_ERR(obj->cdev); - priv->priv = obj; - kfree(buf.pointer); /* TODO: add ACPI notification support */ diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index ed805c7c5ace..ac83f721db24 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -185,6 +185,7 @@ * @regulator: pointer to the TMU regulator structure. * @reg_conf: pointer to structure to register with core thermal. * @ntrip: number of supported trip points. + * @enabled: current status of TMU device * @tmu_initialize: SoC specific TMU initialization method * @tmu_control: SoC specific TMU control method * @tmu_read: SoC specific TMU temperature read method @@ -205,6 +206,7 @@ struct exynos_tmu_data { struct regulator *regulator; struct thermal_zone_device *tzd; unsigned int ntrip; + bool enabled; int (*tmu_initialize)(struct platform_device *pdev); void (*tmu_control)(struct platform_device *pdev, bool on); @@ -398,6 +400,7 @@ static void exynos_tmu_control(struct platform_device *pdev, bool on) mutex_lock(&data->lock); clk_enable(data->clk); data->tmu_control(pdev, on); + data->enabled = on; clk_disable(data->clk); mutex_unlock(&data->lock); } @@ -889,19 +892,24 @@ static void exynos7_tmu_control(struct platform_device *pdev, bool on) static int exynos_get_temp(void *p, int *temp) { struct exynos_tmu_data *data = p; + int value, ret = 0; - if (!data || !data->tmu_read) + if (!data || !data->tmu_read || !data->enabled) return -EINVAL; mutex_lock(&data->lock); clk_enable(data->clk); - *temp = code_to_temp(data, data->tmu_read(data)) * MCELSIUS; + value = data->tmu_read(data); + if (value < 0) + ret = value; + else + *temp = code_to_temp(data, value) * MCELSIUS; clk_disable(data->clk); mutex_unlock(&data->lock); - return 0; + return ret; } #ifdef CONFIG_THERMAL_EMULATION diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c index 72ebbc908e19..32cd52ca8318 100644 --- a/drivers/usb/host/xhci-hub.c +++ b/drivers/usb/host/xhci-hub.c @@ -354,7 +354,7 @@ int xhci_find_slot_id_by_port(struct usb_hcd *hcd, struct xhci_hcd *xhci, slot_id = 0; for (i = 0; i < MAX_HC_SLOTS; i++) { - if (!xhci->devs[i]) + if (!xhci->devs[i] || !xhci->devs[i]->udev) continue; speed = xhci->devs[i]->udev->speed; if (((speed >= USB_SPEED_SUPER) == (hcd->speed >= HCD_USB3)) diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c index e7f99d55922a..15a42cee0a9c 100644 --- a/drivers/usb/musb/musb_host.c +++ b/drivers/usb/musb/musb_host.c @@ -2524,8 +2524,11 @@ static int musb_bus_suspend(struct usb_hcd *hcd) { struct musb *musb = hcd_to_musb(hcd); u8 devctl; + int ret; - musb_port_suspend(musb, true); + ret = musb_port_suspend(musb, true); + if (ret) + return ret; if (!is_host_active(musb)) return 0; diff --git a/drivers/usb/musb/musb_host.h b/drivers/usb/musb/musb_host.h index 72392bbcd0a4..2999845632ce 100644 --- a/drivers/usb/musb/musb_host.h +++ b/drivers/usb/musb/musb_host.h @@ -67,7 +67,7 @@ extern void musb_host_rx(struct musb *, u8); extern void musb_root_disconnect(struct musb *musb); extern void musb_host_resume_root_hub(struct musb *musb); extern void musb_host_poke_root_hub(struct musb *musb); -extern void musb_port_suspend(struct musb *musb, bool do_suspend); +extern int musb_port_suspend(struct musb *musb, bool do_suspend); extern void musb_port_reset(struct musb *musb, bool do_reset); extern void musb_host_finish_resume(struct work_struct *work); #else @@ -99,7 +99,10 @@ static inline void musb_root_disconnect(struct musb *musb) {} static inline void musb_host_resume_root_hub(struct musb *musb) {} static inline void musb_host_poll_rh_status(struct musb *musb) {} static inline void musb_host_poke_root_hub(struct musb *musb) {} -static inline void musb_port_suspend(struct musb *musb, bool do_suspend) {} +static inline int musb_port_suspend(struct musb *musb, bool do_suspend) +{ + return 0; +} static inline void musb_port_reset(struct musb *musb, bool do_reset) {} static inline void musb_host_finish_resume(struct work_struct *work) {} #endif diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c index 5165d2b07ade..2f8dd9826e94 100644 --- a/drivers/usb/musb/musb_virthub.c +++ b/drivers/usb/musb/musb_virthub.c @@ -48,14 +48,14 @@ void musb_host_finish_resume(struct work_struct *work) spin_unlock_irqrestore(&musb->lock, flags); } -void musb_port_suspend(struct musb *musb, bool do_suspend) +int musb_port_suspend(struct musb *musb, bool do_suspend) { struct usb_otg *otg = musb->xceiv->otg; u8 power; void __iomem *mbase = musb->mregs; if (!is_host_active(musb)) - return; + return 0; /* NOTE: this doesn't necessarily put PHY into low power mode, * turning off its clock; that's a function of PHY integration and @@ -66,16 +66,20 @@ void musb_port_suspend(struct musb *musb, bool do_suspend) if (do_suspend) { int retries = 10000; - power &= ~MUSB_POWER_RESUME; - power |= MUSB_POWER_SUSPENDM; - musb_writeb(mbase, MUSB_POWER, power); + if (power & MUSB_POWER_RESUME) + return -EBUSY; - /* Needed for OPT A tests */ - power = musb_readb(mbase, MUSB_POWER); - while (power & MUSB_POWER_SUSPENDM) { + if (!(power & MUSB_POWER_SUSPENDM)) { + power |= MUSB_POWER_SUSPENDM; + musb_writeb(mbase, MUSB_POWER, power); + + /* Needed for OPT A tests */ power = musb_readb(mbase, MUSB_POWER); - if (retries-- < 1) - break; + while (power & MUSB_POWER_SUSPENDM) { + power = musb_readb(mbase, MUSB_POWER); + if (retries-- < 1) + break; + } } musb_dbg(musb, "Root port suspended, power %02x", power); @@ -111,6 +115,7 @@ void musb_port_suspend(struct musb *musb, bool do_suspend) schedule_delayed_work(&musb->finish_resume_work, msecs_to_jiffies(USB_RESUME_TIMEOUT)); } + return 0; } void musb_port_reset(struct musb *musb, bool do_reset) diff --git a/drivers/usb/usbip/stub.h b/drivers/usb/usbip/stub.h index 14a72357800a..35618ceb2791 100644 --- a/drivers/usb/usbip/stub.h +++ b/drivers/usb/usbip/stub.h @@ -73,6 +73,7 @@ struct bus_id_priv { struct stub_device *sdev; struct usb_device *udev; char shutdown_busid; + spinlock_t busid_lock; }; /* stub_priv is allocated from stub_priv_cache */ @@ -83,6 +84,7 @@ extern struct usb_device_driver stub_driver; /* stub_main.c */ struct bus_id_priv *get_busid_priv(const char *busid); +void put_busid_priv(struct bus_id_priv *bid); int del_match_busid(char *busid); void stub_device_cleanup_urbs(struct stub_device *sdev); diff --git a/drivers/usb/usbip/stub_dev.c b/drivers/usb/usbip/stub_dev.c index dd8ef36ab10e..c0d6ff1baa72 100644 --- a/drivers/usb/usbip/stub_dev.c +++ b/drivers/usb/usbip/stub_dev.c @@ -300,9 +300,9 @@ static int stub_probe(struct usb_device *udev) struct stub_device *sdev = NULL; const char *udev_busid = dev_name(&udev->dev); struct bus_id_priv *busid_priv; - int rc; + int rc = 0; - dev_dbg(&udev->dev, "Enter\n"); + dev_dbg(&udev->dev, "Enter probe\n"); /* check we should claim or not by busid_table */ busid_priv = get_busid_priv(udev_busid); @@ -317,13 +317,15 @@ static int stub_probe(struct usb_device *udev) * other matched drivers by the driver core. * See driver_probe_device() in driver/base/dd.c */ - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } if (udev->descriptor.bDeviceClass == USB_CLASS_HUB) { dev_dbg(&udev->dev, "%s is a usb hub device... skip!\n", udev_busid); - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } if (!strcmp(udev->bus->bus_name, "vhci_hcd")) { @@ -331,13 +333,16 @@ static int stub_probe(struct usb_device *udev) "%s is attached on vhci_hcd... skip!\n", udev_busid); - return -ENODEV; + rc = -ENODEV; + goto call_put_busid_priv; } /* ok, this is my device */ sdev = stub_device_alloc(udev); - if (!sdev) - return -ENOMEM; + if (!sdev) { + rc = -ENOMEM; + goto call_put_busid_priv; + } dev_info(&udev->dev, "usbip-host: register new device (bus %u dev %u)\n", @@ -369,7 +374,9 @@ static int stub_probe(struct usb_device *udev) } busid_priv->status = STUB_BUSID_ALLOC; - return 0; + rc = 0; + goto call_put_busid_priv; + err_files: usb_hub_release_port(udev->parent, udev->portnum, (struct usb_dev_state *) udev); @@ -379,6 +386,9 @@ err_port: busid_priv->sdev = NULL; stub_device_free(sdev); + +call_put_busid_priv: + put_busid_priv(busid_priv); return rc; } @@ -404,7 +414,7 @@ static void stub_disconnect(struct usb_device *udev) struct bus_id_priv *busid_priv; int rc; - dev_dbg(&udev->dev, "Enter\n"); + dev_dbg(&udev->dev, "Enter disconnect\n"); busid_priv = get_busid_priv(udev_busid); if (!busid_priv) { @@ -417,7 +427,7 @@ static void stub_disconnect(struct usb_device *udev) /* get stub_device */ if (!sdev) { dev_err(&udev->dev, "could not get device"); - return; + goto call_put_busid_priv; } dev_set_drvdata(&udev->dev, NULL); @@ -432,12 +442,12 @@ static void stub_disconnect(struct usb_device *udev) (struct usb_dev_state *) udev); if (rc) { dev_dbg(&udev->dev, "unable to release port\n"); - return; + goto call_put_busid_priv; } /* If usb reset is called from event handler */ if (usbip_in_eh(current)) - return; + goto call_put_busid_priv; /* shutdown the current connection */ shutdown_busid(busid_priv); @@ -448,12 +458,11 @@ static void stub_disconnect(struct usb_device *udev) busid_priv->sdev = NULL; stub_device_free(sdev); - if (busid_priv->status == STUB_BUSID_ALLOC) { + if (busid_priv->status == STUB_BUSID_ALLOC) busid_priv->status = STUB_BUSID_ADDED; - } else { - busid_priv->status = STUB_BUSID_OTHER; - del_match_busid((char *)udev_busid); - } + +call_put_busid_priv: + put_busid_priv(busid_priv); } #ifdef CONFIG_PM diff --git a/drivers/usb/usbip/stub_main.c b/drivers/usb/usbip/stub_main.c index d41d0cdeec0f..bf8a5feb0ee9 100644 --- a/drivers/usb/usbip/stub_main.c +++ b/drivers/usb/usbip/stub_main.c @@ -14,6 +14,7 @@ #define DRIVER_DESC "USB/IP Host Driver" struct kmem_cache *stub_priv_cache; + /* * busid_tables defines matching busids that usbip can grab. A user can change * dynamically what device is locally used and what device is exported to a @@ -25,6 +26,8 @@ static spinlock_t busid_table_lock; static void init_busid_table(void) { + int i; + /* * This also sets the bus_table[i].status to * STUB_BUSID_OTHER, which is 0. @@ -32,6 +35,9 @@ static void init_busid_table(void) memset(busid_table, 0, sizeof(busid_table)); spin_lock_init(&busid_table_lock); + + for (i = 0; i < MAX_BUSID; i++) + spin_lock_init(&busid_table[i].busid_lock); } /* @@ -43,15 +49,20 @@ static int get_busid_idx(const char *busid) int i; int idx = -1; - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (busid_table[i].name[0]) if (!strncmp(busid_table[i].name, busid, BUSID_SIZE)) { idx = i; + spin_unlock(&busid_table[i].busid_lock); break; } + spin_unlock(&busid_table[i].busid_lock); + } return idx; } +/* Returns holding busid_lock. Should call put_busid_priv() to unlock */ struct bus_id_priv *get_busid_priv(const char *busid) { int idx; @@ -59,13 +70,22 @@ struct bus_id_priv *get_busid_priv(const char *busid) spin_lock(&busid_table_lock); idx = get_busid_idx(busid); - if (idx >= 0) + if (idx >= 0) { bid = &(busid_table[idx]); + /* get busid_lock before returning */ + spin_lock(&bid->busid_lock); + } spin_unlock(&busid_table_lock); return bid; } +void put_busid_priv(struct bus_id_priv *bid) +{ + if (bid) + spin_unlock(&bid->busid_lock); +} + static int add_match_busid(char *busid) { int i; @@ -78,15 +98,19 @@ static int add_match_busid(char *busid) goto out; } - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (!busid_table[i].name[0]) { strlcpy(busid_table[i].name, busid, BUSID_SIZE); if ((busid_table[i].status != STUB_BUSID_ALLOC) && (busid_table[i].status != STUB_BUSID_REMOV)) busid_table[i].status = STUB_BUSID_ADDED; ret = 0; + spin_unlock(&busid_table[i].busid_lock); break; } + spin_unlock(&busid_table[i].busid_lock); + } out: spin_unlock(&busid_table_lock); @@ -107,6 +131,8 @@ int del_match_busid(char *busid) /* found */ ret = 0; + spin_lock(&busid_table[idx].busid_lock); + if (busid_table[idx].status == STUB_BUSID_OTHER) memset(busid_table[idx].name, 0, BUSID_SIZE); @@ -114,6 +140,7 @@ int del_match_busid(char *busid) (busid_table[idx].status != STUB_BUSID_ADDED)) busid_table[idx].status = STUB_BUSID_REMOV; + spin_unlock(&busid_table[idx].busid_lock); out: spin_unlock(&busid_table_lock); @@ -126,9 +153,12 @@ static ssize_t match_busid_show(struct device_driver *drv, char *buf) char *out = buf; spin_lock(&busid_table_lock); - for (i = 0; i < MAX_BUSID; i++) + for (i = 0; i < MAX_BUSID; i++) { + spin_lock(&busid_table[i].busid_lock); if (busid_table[i].name[0]) out += sprintf(out, "%s ", busid_table[i].name); + spin_unlock(&busid_table[i].busid_lock); + } spin_unlock(&busid_table_lock); out += sprintf(out, "\n"); @@ -169,6 +199,51 @@ static ssize_t match_busid_store(struct device_driver *dev, const char *buf, } static DRIVER_ATTR_RW(match_busid); +static int do_rebind(char *busid, struct bus_id_priv *busid_priv) +{ + int ret; + + /* device_attach() callers should hold parent lock for USB */ + if (busid_priv->udev->dev.parent) + device_lock(busid_priv->udev->dev.parent); + ret = device_attach(&busid_priv->udev->dev); + if (busid_priv->udev->dev.parent) + device_unlock(busid_priv->udev->dev.parent); + if (ret < 0) { + dev_err(&busid_priv->udev->dev, "rebind failed\n"); + return ret; + } + return 0; +} + +static void stub_device_rebind(void) +{ +#if IS_MODULE(CONFIG_USBIP_HOST) + struct bus_id_priv *busid_priv; + int i; + + /* update status to STUB_BUSID_OTHER so probe ignores the device */ + spin_lock(&busid_table_lock); + for (i = 0; i < MAX_BUSID; i++) { + if (busid_table[i].name[0] && + busid_table[i].shutdown_busid) { + busid_priv = &(busid_table[i]); + busid_priv->status = STUB_BUSID_OTHER; + } + } + spin_unlock(&busid_table_lock); + + /* now run rebind - no need to hold locks. driver files are removed */ + for (i = 0; i < MAX_BUSID; i++) { + if (busid_table[i].name[0] && + busid_table[i].shutdown_busid) { + busid_priv = &(busid_table[i]); + do_rebind(busid_table[i].name, busid_priv); + } + } +#endif +} + static ssize_t rebind_store(struct device_driver *dev, const char *buf, size_t count) { @@ -186,16 +261,17 @@ static ssize_t rebind_store(struct device_driver *dev, const char *buf, if (!bid) return -ENODEV; - /* device_attach() callers should hold parent lock for USB */ - if (bid->udev->dev.parent) - device_lock(bid->udev->dev.parent); - ret = device_attach(&bid->udev->dev); - if (bid->udev->dev.parent) - device_unlock(bid->udev->dev.parent); - if (ret < 0) { - dev_err(&bid->udev->dev, "rebind failed\n"); + /* mark the device for deletion so probe ignores it during rescan */ + bid->status = STUB_BUSID_OTHER; + /* release the busid lock */ + put_busid_priv(bid); + + ret = do_rebind((char *) buf, bid); + if (ret < 0) return ret; - } + + /* delete device from busid_table */ + del_match_busid((char *) buf); return count; } @@ -317,6 +393,9 @@ static void __exit usbip_host_exit(void) */ usb_deregister_device_driver(&stub_driver); + /* initiate scan to attach devices */ + stub_device_rebind(); + kmem_cache_destroy(stub_priv_cache); } diff --git a/fs/afs/addr_list.c b/fs/afs/addr_list.c index 3bedfed608a2..7587fb665ff1 100644 --- a/fs/afs/addr_list.c +++ b/fs/afs/addr_list.c @@ -121,7 +121,7 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, p = text; do { struct sockaddr_rxrpc *srx = &alist->addrs[alist->nr_addrs]; - char tdelim = delim; + const char *q, *stop; if (*p == delim) { p++; @@ -130,28 +130,33 @@ struct afs_addr_list *afs_parse_text_addrs(const char *text, size_t len, if (*p == '[') { p++; - tdelim = ']'; + q = memchr(p, ']', end - p); + } else { + for (q = p; q < end; q++) + if (*q == '+' || *q == delim) + break; } - if (in4_pton(p, end - p, + if (in4_pton(p, q - p, (u8 *)&srx->transport.sin6.sin6_addr.s6_addr32[3], - tdelim, &p)) { + -1, &stop)) { srx->transport.sin6.sin6_addr.s6_addr32[0] = 0; srx->transport.sin6.sin6_addr.s6_addr32[1] = 0; srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); - } else if (in6_pton(p, end - p, + } else if (in6_pton(p, q - p, srx->transport.sin6.sin6_addr.s6_addr, - tdelim, &p)) { + -1, &stop)) { /* Nothing to do */ } else { goto bad_address; } - if (tdelim == ']') { - if (p == end || *p != ']') - goto bad_address; + if (stop != q) + goto bad_address; + + p = q; + if (q < end && *q == ']') p++; - } if (p < end) { if (*p == '+') { diff --git a/fs/afs/callback.c b/fs/afs/callback.c index abd9a84f4e88..571437dcb252 100644 --- a/fs/afs/callback.c +++ b/fs/afs/callback.c @@ -23,36 +23,55 @@ /* * Set up an interest-in-callbacks record for a volume on a server and * register it with the server. - * - Called with volume->server_sem held. + * - Called with vnode->io_lock held. */ int afs_register_server_cb_interest(struct afs_vnode *vnode, - struct afs_server_entry *entry) + struct afs_server_list *slist, + unsigned int index) { - struct afs_cb_interest *cbi = entry->cb_interest, *vcbi, *new, *x; + struct afs_server_entry *entry = &slist->servers[index]; + struct afs_cb_interest *cbi, *vcbi, *new, *old; struct afs_server *server = entry->server; again: + if (vnode->cb_interest && + likely(vnode->cb_interest == entry->cb_interest)) + return 0; + + read_lock(&slist->lock); + cbi = afs_get_cb_interest(entry->cb_interest); + read_unlock(&slist->lock); + vcbi = vnode->cb_interest; if (vcbi) { - if (vcbi == cbi) - return 0; - - if (cbi && vcbi->server == cbi->server) { - write_seqlock(&vnode->cb_lock); - vnode->cb_interest = afs_get_cb_interest(cbi); - write_sequnlock(&vnode->cb_lock); + if (vcbi == cbi) { afs_put_cb_interest(afs_v2net(vnode), cbi); return 0; } + /* Use a new interest in the server list for the same server + * rather than an old one that's still attached to a vnode. + */ + if (cbi && vcbi->server == cbi->server) { + write_seqlock(&vnode->cb_lock); + old = vnode->cb_interest; + vnode->cb_interest = cbi; + write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), old); + return 0; + } + + /* Re-use the one attached to the vnode. */ if (!cbi && vcbi->server == server) { - afs_get_cb_interest(vcbi); - x = cmpxchg(&entry->cb_interest, cbi, vcbi); - if (x != cbi) { - cbi = x; - afs_put_cb_interest(afs_v2net(vnode), vcbi); + write_lock(&slist->lock); + if (entry->cb_interest) { + write_unlock(&slist->lock); + afs_put_cb_interest(afs_v2net(vnode), cbi); goto again; } + + entry->cb_interest = cbi; + write_unlock(&slist->lock); return 0; } } @@ -72,13 +91,16 @@ again: list_add_tail(&new->cb_link, &server->cb_interests); write_unlock(&server->cb_break_lock); - x = cmpxchg(&entry->cb_interest, cbi, new); - if (x == cbi) { + write_lock(&slist->lock); + if (!entry->cb_interest) { + entry->cb_interest = afs_get_cb_interest(new); cbi = new; + new = NULL; } else { - cbi = x; - afs_put_cb_interest(afs_v2net(vnode), new); + cbi = afs_get_cb_interest(entry->cb_interest); } + write_unlock(&slist->lock); + afs_put_cb_interest(afs_v2net(vnode), new); } ASSERT(cbi); @@ -88,11 +110,14 @@ again: */ write_seqlock(&vnode->cb_lock); - vnode->cb_interest = afs_get_cb_interest(cbi); + old = vnode->cb_interest; + vnode->cb_interest = cbi; vnode->cb_s_break = cbi->server->cb_s_break; + vnode->cb_v_break = vnode->volume->cb_v_break; clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); write_sequnlock(&vnode->cb_lock); + afs_put_cb_interest(afs_v2net(vnode), old); return 0; } @@ -171,13 +196,24 @@ static void afs_break_one_callback(struct afs_server *server, if (cbi->vid != fid->vid) continue; - data.volume = NULL; - data.fid = *fid; - inode = ilookup5_nowait(cbi->sb, fid->vnode, afs_iget5_test, &data); - if (inode) { - vnode = AFS_FS_I(inode); - afs_break_callback(vnode); - iput(inode); + if (fid->vnode == 0 && fid->unique == 0) { + /* The callback break applies to an entire volume. */ + struct afs_super_info *as = AFS_FS_S(cbi->sb); + struct afs_volume *volume = as->volume; + + write_lock(&volume->cb_break_lock); + volume->cb_v_break++; + write_unlock(&volume->cb_break_lock); + } else { + data.volume = NULL; + data.fid = *fid; + inode = ilookup5_nowait(cbi->sb, fid->vnode, + afs_iget5_test, &data); + if (inode) { + vnode = AFS_FS_I(inode); + afs_break_callback(vnode); + iput(inode); + } } } @@ -195,6 +231,8 @@ void afs_break_callbacks(struct afs_server *server, size_t count, ASSERT(server != NULL); ASSERTCMP(count, <=, AFSCBMAX); + /* TODO: Sort the callback break list by volume ID */ + for (; count > 0; callbacks++, count--) { _debug("- Fid { vl=%08x n=%u u=%u } CB { v=%u x=%u t=%u }", callbacks->fid.vid, diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c index 357de908df3a..c332c95a6940 100644 --- a/fs/afs/cmservice.c +++ b/fs/afs/cmservice.c @@ -133,21 +133,10 @@ bool afs_cm_incoming_call(struct afs_call *call) } /* - * clean up a cache manager call + * Clean up a cache manager call. */ static void afs_cm_destructor(struct afs_call *call) { - _enter(""); - - /* Break the callbacks here so that we do it after the final ACK is - * received. The step number here must match the final number in - * afs_deliver_cb_callback(). - */ - if (call->unmarshall == 5) { - ASSERT(call->cm_server && call->count && call->request); - afs_break_callbacks(call->cm_server, call->count, call->request); - } - kfree(call->buffer); call->buffer = NULL; } @@ -161,14 +150,14 @@ static void SRXAFSCB_CallBack(struct work_struct *work) _enter(""); - /* be sure to send the reply *before* attempting to spam the AFS server - * with FSFetchStatus requests on the vnodes with broken callbacks lest - * the AFS server get into a vicious cycle of trying to break further - * callbacks because it hadn't received completion of the CBCallBack op - * yet */ - afs_send_empty_reply(call); + /* We need to break the callbacks before sending the reply as the + * server holds up change visibility till it receives our reply so as + * to maintain cache coherency. + */ + if (call->cm_server) + afs_break_callbacks(call->cm_server, call->count, call->request); - afs_break_callbacks(call->cm_server, call->count, call->request); + afs_send_empty_reply(call); afs_put_call(call); _leave(""); } @@ -180,7 +169,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) { struct afs_callback_break *cb; struct sockaddr_rxrpc srx; - struct afs_server *server; __be32 *bp; int ret, loop; @@ -267,15 +255,6 @@ static int afs_deliver_cb_callback(struct afs_call *call) call->offset = 0; call->unmarshall++; - - /* Record that the message was unmarshalled successfully so - * that the call destructor can know do the callback breaking - * work, even if the final ACK isn't received. - * - * If the step number changes, then afs_cm_destructor() must be - * updated also. - */ - call->unmarshall++; case 5: break; } @@ -286,10 +265,9 @@ static int afs_deliver_cb_callback(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - server = afs_find_server(call->net, &srx); - if (!server) - return -ENOTCONN; - call->cm_server = server; + call->cm_server = afs_find_server(call->net, &srx); + if (!call->cm_server) + trace_afs_cm_no_server(call, &srx); return afs_queue_call_work(call); } @@ -303,7 +281,8 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) _enter("{%p}", call->cm_server); - afs_init_callback_state(call->cm_server); + if (call->cm_server) + afs_init_callback_state(call->cm_server); afs_send_empty_reply(call); afs_put_call(call); _leave(""); @@ -315,7 +294,6 @@ static void SRXAFSCB_InitCallBackState(struct work_struct *work) static int afs_deliver_cb_init_call_back_state(struct afs_call *call) { struct sockaddr_rxrpc srx; - struct afs_server *server; int ret; _enter(""); @@ -328,10 +306,9 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - server = afs_find_server(call->net, &srx); - if (!server) - return -ENOTCONN; - call->cm_server = server; + call->cm_server = afs_find_server(call->net, &srx); + if (!call->cm_server) + trace_afs_cm_no_server(call, &srx); return afs_queue_call_work(call); } @@ -341,8 +318,6 @@ static int afs_deliver_cb_init_call_back_state(struct afs_call *call) */ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) { - struct sockaddr_rxrpc srx; - struct afs_server *server; struct afs_uuid *r; unsigned loop; __be32 *b; @@ -398,11 +373,11 @@ static int afs_deliver_cb_init_call_back_state3(struct afs_call *call) /* we'll need the file server record as that tells us which set of * vnodes to operate upon */ - rxrpc_kernel_get_peer(call->net->socket, call->rxcall, &srx); - server = afs_find_server(call->net, &srx); - if (!server) - return -ENOTCONN; - call->cm_server = server; + rcu_read_lock(); + call->cm_server = afs_find_server_by_uuid(call->net, call->request); + rcu_read_unlock(); + if (!call->cm_server) + trace_afs_cm_no_server_u(call, call->request); return afs_queue_call_work(call); } diff --git a/fs/afs/dir.c b/fs/afs/dir.c index 5889f70d4d27..7d623008157f 100644 --- a/fs/afs/dir.c +++ b/fs/afs/dir.c @@ -180,6 +180,7 @@ static int afs_dir_open(struct inode *inode, struct file *file) * get reclaimed during the iteration. */ static struct afs_read *afs_read_dir(struct afs_vnode *dvnode, struct key *key) + __acquires(&dvnode->validate_lock) { struct afs_read *req; loff_t i_size; @@ -261,18 +262,21 @@ retry: /* If we're going to reload, we need to lock all the pages to prevent * races. */ + ret = -ERESTARTSYS; + if (down_read_killable(&dvnode->validate_lock) < 0) + goto error; + + if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) + goto success; + + up_read(&dvnode->validate_lock); + if (down_write_killable(&dvnode->validate_lock) < 0) + goto error; + if (!test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) { - ret = -ERESTARTSYS; - for (i = 0; i < req->nr_pages; i++) - if (lock_page_killable(req->pages[i]) < 0) - goto error_unlock; - - if (test_bit(AFS_VNODE_DIR_VALID, &dvnode->flags)) - goto success; - ret = afs_fetch_data(dvnode, key, req); if (ret < 0) - goto error_unlock_all; + goto error_unlock; task_io_account_read(PAGE_SIZE * req->nr_pages); @@ -284,33 +288,26 @@ retry: for (i = 0; i < req->nr_pages; i++) if (!afs_dir_check_page(dvnode, req->pages[i], req->actual_len)) - goto error_unlock_all; + goto error_unlock; // TODO: Trim excess pages set_bit(AFS_VNODE_DIR_VALID, &dvnode->flags); } + downgrade_write(&dvnode->validate_lock); success: - i = req->nr_pages; - while (i > 0) - unlock_page(req->pages[--i]); return req; -error_unlock_all: - i = req->nr_pages; error_unlock: - while (i > 0) - unlock_page(req->pages[--i]); + up_write(&dvnode->validate_lock); error: afs_put_read(req); _leave(" = %d", ret); return ERR_PTR(ret); content_has_grown: - i = req->nr_pages; - while (i > 0) - unlock_page(req->pages[--i]); + up_write(&dvnode->validate_lock); afs_put_read(req); goto retry; } @@ -473,6 +470,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, } out: + up_read(&dvnode->validate_lock); afs_put_read(req); _leave(" = %d", ret); return ret; @@ -1143,7 +1141,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -1213,7 +1211,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_remove(&fc, dentry->d_name.name, true, data_version); } @@ -1316,7 +1314,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_remove(&fc, dentry->d_name.name, false, data_version); } @@ -1373,7 +1371,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_create(&fc, dentry->d_name.name, mode, data_version, &newfid, &newstatus, &newcb); } @@ -1443,8 +1441,8 @@ static int afs_link(struct dentry *from, struct inode *dir, } while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; - fc.cb_break_2 = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); + fc.cb_break_2 = afs_calc_vnode_cb_break(vnode); afs_fs_link(&fc, vnode, dentry->d_name.name, data_version); } @@ -1512,7 +1510,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, dvnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = dvnode->cb_break + dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(dvnode); afs_fs_symlink(&fc, dentry->d_name.name, content, data_version, &newfid, &newstatus); @@ -1588,8 +1586,8 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry, } } while (afs_select_fileserver(&fc)) { - fc.cb_break = orig_dvnode->cb_break + orig_dvnode->cb_s_break; - fc.cb_break_2 = new_dvnode->cb_break + new_dvnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(orig_dvnode); + fc.cb_break_2 = afs_calc_vnode_cb_break(new_dvnode); afs_fs_rename(&fc, old_dentry->d_name.name, new_dvnode, new_dentry->d_name.name, orig_data_version, new_data_version); diff --git a/fs/afs/file.c b/fs/afs/file.c index c24c08016dd9..7d4f26198573 100644 --- a/fs/afs/file.c +++ b/fs/afs/file.c @@ -238,7 +238,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct key *key, struct afs_read *de ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_fetch_data(&fc, desc); } diff --git a/fs/afs/flock.c b/fs/afs/flock.c index 7a0e017070ec..dc62d15a964b 100644 --- a/fs/afs/flock.c +++ b/fs/afs/flock.c @@ -86,7 +86,7 @@ static int afs_set_lock(struct afs_vnode *vnode, struct key *key, ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_set_lock(&fc, type); } @@ -117,7 +117,7 @@ static int afs_extend_lock(struct afs_vnode *vnode, struct key *key) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_current_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_extend_lock(&fc); } @@ -148,7 +148,7 @@ static int afs_release_lock(struct afs_vnode *vnode, struct key *key) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_current_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_release_lock(&fc); } diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c index efacdb7c1dee..b273e1d60478 100644 --- a/fs/afs/fsclient.c +++ b/fs/afs/fsclient.c @@ -134,6 +134,7 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, struct afs_read *read_req) { const struct afs_xdr_AFSFetchStatus *xdr = (const void *)*_bp; + bool inline_error = (call->operation_ID == afs_FS_InlineBulkStatus); u64 data_version, size; u32 type, abort_code; u8 flags = 0; @@ -142,13 +143,32 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, if (vnode) write_seqlock(&vnode->cb_lock); + abort_code = ntohl(xdr->abort_code); + if (xdr->if_version != htonl(AFS_FSTATUS_VERSION)) { + if (xdr->if_version == htonl(0) && + abort_code != 0 && + inline_error) { + /* The OpenAFS fileserver has a bug in FS.InlineBulkStatus + * whereby it doesn't set the interface version in the error + * case. + */ + status->abort_code = abort_code; + ret = 0; + goto out; + } + pr_warn("Unknown AFSFetchStatus version %u\n", ntohl(xdr->if_version)); goto bad; } + if (abort_code != 0 && inline_error) { + status->abort_code = abort_code; + ret = 0; + goto out; + } + type = ntohl(xdr->type); - abort_code = ntohl(xdr->abort_code); switch (type) { case AFS_FTYPE_FILE: case AFS_FTYPE_DIR: @@ -165,13 +185,6 @@ static int xdr_decode_AFSFetchStatus(struct afs_call *call, } status->type = type; break; - case AFS_FTYPE_INVALID: - if (abort_code != 0) { - status->abort_code = abort_code; - ret = 0; - goto out; - } - /* Fall through */ default: goto bad; } @@ -248,7 +261,7 @@ static void xdr_decode_AFSCallBack(struct afs_call *call, write_seqlock(&vnode->cb_lock); - if (call->cb_break == (vnode->cb_break + cbi->server->cb_s_break)) { + if (call->cb_break == afs_cb_break_sum(vnode, cbi)) { vnode->cb_version = ntohl(*bp++); cb_expiry = ntohl(*bp++); vnode->cb_type = ntohl(*bp++); diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 06194cfe9724..479b7fdda124 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -108,7 +108,7 @@ int afs_fetch_status(struct afs_vnode *vnode, struct key *key, bool new_inode) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_fetch_file_status(&fc, NULL, new_inode); } @@ -393,15 +393,18 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) read_seqlock_excl(&vnode->cb_lock); if (test_bit(AFS_VNODE_CB_PROMISED, &vnode->flags)) { - if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break) { + if (vnode->cb_s_break != vnode->cb_interest->server->cb_s_break || + vnode->cb_v_break != vnode->volume->cb_v_break) { vnode->cb_s_break = vnode->cb_interest->server->cb_s_break; + vnode->cb_v_break = vnode->volume->cb_v_break; + valid = false; } else if (vnode->status.type == AFS_FTYPE_DIR && test_bit(AFS_VNODE_DIR_VALID, &vnode->flags) && vnode->cb_expires_at - 10 > now) { - valid = true; + valid = true; } else if (!test_bit(AFS_VNODE_ZAP_DATA, &vnode->flags) && vnode->cb_expires_at - 10 > now) { - valid = true; + valid = true; } } else if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) { valid = true; @@ -415,7 +418,7 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) if (valid) goto valid; - mutex_lock(&vnode->validate_lock); + down_write(&vnode->validate_lock); /* if the promise has expired, we need to check the server again to get * a new promise - note that if the (parent) directory's metadata was @@ -444,13 +447,13 @@ int afs_validate(struct afs_vnode *vnode, struct key *key) * different */ if (test_and_clear_bit(AFS_VNODE_ZAP_DATA, &vnode->flags)) afs_zap_data(vnode); - mutex_unlock(&vnode->validate_lock); + up_write(&vnode->validate_lock); valid: _leave(" = 0"); return 0; error_unlock: - mutex_unlock(&vnode->validate_lock); + up_write(&vnode->validate_lock); _leave(" = %d", ret); return ret; } @@ -574,7 +577,7 @@ int afs_setattr(struct dentry *dentry, struct iattr *attr) ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_setattr(&fc, attr); } diff --git a/fs/afs/internal.h b/fs/afs/internal.h index f8086ec95e24..e3f8a46663db 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -396,6 +396,7 @@ struct afs_server { #define AFS_SERVER_FL_PROBED 5 /* The fileserver has been probed */ #define AFS_SERVER_FL_PROBING 6 /* Fileserver is being probed */ #define AFS_SERVER_FL_NO_IBULK 7 /* Fileserver doesn't support FS.InlineBulkStatus */ +#define AFS_SERVER_FL_MAY_HAVE_CB 8 /* May have callbacks on this fileserver */ atomic_t usage; u32 addr_version; /* Address list version */ @@ -433,6 +434,7 @@ struct afs_server_list { unsigned short index; /* Server currently in use */ unsigned short vnovol_mask; /* Servers to be skipped due to VNOVOL */ unsigned int seq; /* Set to ->servers_seq when installed */ + rwlock_t lock; struct afs_server_entry servers[]; }; @@ -459,6 +461,9 @@ struct afs_volume { rwlock_t servers_lock; /* Lock for ->servers */ unsigned int servers_seq; /* Incremented each time ->servers changes */ + unsigned cb_v_break; /* Break-everything counter. */ + rwlock_t cb_break_lock; + afs_voltype_t type; /* type of volume */ short error; char type_force; /* force volume type (suppress R/O -> R/W) */ @@ -494,7 +499,7 @@ struct afs_vnode { #endif struct afs_permits __rcu *permit_cache; /* cache of permits so far obtained */ struct mutex io_lock; /* Lock for serialising I/O on this mutex */ - struct mutex validate_lock; /* lock for validating this vnode */ + struct rw_semaphore validate_lock; /* lock for validating this vnode */ spinlock_t wb_lock; /* lock for wb_keys */ spinlock_t lock; /* waitqueue/flags lock */ unsigned long flags; @@ -519,6 +524,7 @@ struct afs_vnode { /* outstanding callback notification on this file */ struct afs_cb_interest *cb_interest; /* Server on which this resides */ unsigned int cb_s_break; /* Mass break counter on ->server */ + unsigned int cb_v_break; /* Mass break counter on ->volume */ unsigned int cb_break; /* Break counter on vnode */ seqlock_t cb_lock; /* Lock for ->cb_interest, ->status, ->cb_*break */ @@ -648,16 +654,29 @@ extern void afs_init_callback_state(struct afs_server *); extern void afs_break_callback(struct afs_vnode *); extern void afs_break_callbacks(struct afs_server *, size_t, struct afs_callback_break*); -extern int afs_register_server_cb_interest(struct afs_vnode *, struct afs_server_entry *); +extern int afs_register_server_cb_interest(struct afs_vnode *, + struct afs_server_list *, unsigned int); extern void afs_put_cb_interest(struct afs_net *, struct afs_cb_interest *); extern void afs_clear_callback_interests(struct afs_net *, struct afs_server_list *); static inline struct afs_cb_interest *afs_get_cb_interest(struct afs_cb_interest *cbi) { - refcount_inc(&cbi->usage); + if (cbi) + refcount_inc(&cbi->usage); return cbi; } +static inline unsigned int afs_calc_vnode_cb_break(struct afs_vnode *vnode) +{ + return vnode->cb_break + vnode->cb_s_break + vnode->cb_v_break; +} + +static inline unsigned int afs_cb_break_sum(struct afs_vnode *vnode, + struct afs_cb_interest *cbi) +{ + return vnode->cb_break + cbi->server->cb_s_break + vnode->volume->cb_v_break; +} + /* * cell.c */ diff --git a/fs/afs/rotate.c b/fs/afs/rotate.c index ac0feac9d746..e065bc0768e6 100644 --- a/fs/afs/rotate.c +++ b/fs/afs/rotate.c @@ -179,7 +179,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) */ if (fc->flags & AFS_FS_CURSOR_VNOVOL) { fc->ac.error = -EREMOTEIO; - goto failed; + goto next_server; } write_lock(&vnode->volume->servers_lock); @@ -201,7 +201,7 @@ bool afs_select_fileserver(struct afs_fs_cursor *fc) */ if (vnode->volume->servers == fc->server_list) { fc->ac.error = -EREMOTEIO; - goto failed; + goto next_server; } /* Try again */ @@ -350,8 +350,8 @@ use_server: * break request before we've finished decoding the reply and * installing the vnode. */ - fc->ac.error = afs_register_server_cb_interest( - vnode, &fc->server_list->servers[fc->index]); + fc->ac.error = afs_register_server_cb_interest(vnode, fc->server_list, + fc->index); if (fc->ac.error < 0) goto failed; @@ -369,8 +369,16 @@ use_server: if (!test_bit(AFS_SERVER_FL_PROBED, &server->flags)) { fc->ac.alist = afs_get_addrlist(alist); - if (!afs_probe_fileserver(fc)) - goto failed; + if (!afs_probe_fileserver(fc)) { + switch (fc->ac.error) { + case -ENOMEM: + case -ERESTARTSYS: + case -EINTR: + goto failed; + default: + goto next_server; + } + } } if (!fc->ac.alist) diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 5c6263972ec9..08735948f15d 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -41,6 +41,7 @@ int afs_open_socket(struct afs_net *net) { struct sockaddr_rxrpc srx; struct socket *socket; + unsigned int min_level; int ret; _enter(""); @@ -60,6 +61,12 @@ int afs_open_socket(struct afs_net *net) srx.transport.sin6.sin6_family = AF_INET6; srx.transport.sin6.sin6_port = htons(AFS_CM_PORT); + min_level = RXRPC_SECURITY_ENCRYPT; + ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL, + (void *)&min_level, sizeof(min_level)); + if (ret < 0) + goto error_2; + ret = kernel_bind(socket, (struct sockaddr *) &srx, sizeof(srx)); if (ret == -EADDRINUSE) { srx.transport.sin6.sin6_port = 0; @@ -482,8 +489,12 @@ static void afs_deliver_to_call(struct afs_call *call) state = READ_ONCE(call->state); switch (ret) { case 0: - if (state == AFS_CALL_CL_PROC_REPLY) + if (state == AFS_CALL_CL_PROC_REPLY) { + if (call->cbi) + set_bit(AFS_SERVER_FL_MAY_HAVE_CB, + &call->cbi->server->flags); goto call_complete; + } ASSERTCMP(state, >, AFS_CALL_CL_PROC_REPLY); goto done; case -EINPROGRESS: @@ -493,11 +504,6 @@ static void afs_deliver_to_call(struct afs_call *call) case -ECONNABORTED: ASSERTCMP(state, ==, AFS_CALL_COMPLETE); goto done; - case -ENOTCONN: - abort_code = RX_CALL_DEAD; - rxrpc_kernel_abort_call(call->net->socket, call->rxcall, - abort_code, ret, "KNC"); - goto local_abort; case -ENOTSUPP: abort_code = RXGEN_OPCODE; rxrpc_kernel_abort_call(call->net->socket, call->rxcall, diff --git a/fs/afs/security.c b/fs/afs/security.c index cea2fff313dc..1992b0ffa543 100644 --- a/fs/afs/security.c +++ b/fs/afs/security.c @@ -147,8 +147,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, break; } - if (cb_break != (vnode->cb_break + - vnode->cb_interest->server->cb_s_break)) { + if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) { changed = true; break; } @@ -178,7 +177,7 @@ void afs_cache_permit(struct afs_vnode *vnode, struct key *key, } } - if (cb_break != (vnode->cb_break + vnode->cb_interest->server->cb_s_break)) + if (cb_break != afs_cb_break_sum(vnode, vnode->cb_interest)) goto someone_else_changed_it; /* We need a ref on any permits list we want to copy as we'll have to @@ -257,7 +256,7 @@ found: spin_lock(&vnode->lock); zap = rcu_access_pointer(vnode->permit_cache); - if (cb_break == (vnode->cb_break + vnode->cb_interest->server->cb_s_break) && + if (cb_break == afs_cb_break_sum(vnode, vnode->cb_interest) && zap == permits) rcu_assign_pointer(vnode->permit_cache, replacement); else diff --git a/fs/afs/server.c b/fs/afs/server.c index 629c74986cff..3af4625e2f8c 100644 --- a/fs/afs/server.c +++ b/fs/afs/server.c @@ -67,12 +67,6 @@ struct afs_server *afs_find_server(struct afs_net *net, sizeof(struct in6_addr)); if (diff == 0) goto found; - if (diff < 0) { - // TODO: Sort the list - //if (i == alist->nr_ipv4) - // goto not_found; - break; - } } } } else { @@ -87,17 +81,10 @@ struct afs_server *afs_find_server(struct afs_net *net, (u32 __force)b->sin6_addr.s6_addr32[3]); if (diff == 0) goto found; - if (diff < 0) { - // TODO: Sort the list - //if (i == 0) - // goto not_found; - break; - } } } } - //not_found: server = NULL; found: if (server && !atomic_inc_not_zero(&server->usage)) @@ -395,14 +382,16 @@ static void afs_destroy_server(struct afs_net *net, struct afs_server *server) struct afs_addr_list *alist = rcu_access_pointer(server->addresses); struct afs_addr_cursor ac = { .alist = alist, - .addr = &alist->addrs[0], .start = alist->index, - .index = alist->index, + .index = 0, + .addr = &alist->addrs[alist->index], .error = 0, }; _enter("%p", server); - afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + if (test_bit(AFS_SERVER_FL_MAY_HAVE_CB, &server->flags)) + afs_fs_give_up_all_callbacks(net, server, &ac, NULL); + call_rcu(&server->rcu, afs_server_rcu); afs_dec_servers_outstanding(net); } diff --git a/fs/afs/server_list.c b/fs/afs/server_list.c index 0f8dc4c8f07c..8a5760aa5832 100644 --- a/fs/afs/server_list.c +++ b/fs/afs/server_list.c @@ -49,6 +49,7 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, goto error; refcount_set(&slist->usage, 1); + rwlock_init(&slist->lock); /* Make sure a records exists for each server in the list. */ for (i = 0; i < vldb->nr_servers; i++) { @@ -64,9 +65,11 @@ struct afs_server_list *afs_alloc_server_list(struct afs_cell *cell, goto error_2; } - /* Insertion-sort by server pointer */ + /* Insertion-sort by UUID */ for (j = 0; j < slist->nr_servers; j++) - if (slist->servers[j].server >= server) + if (memcmp(&slist->servers[j].server->uuid, + &server->uuid, + sizeof(server->uuid)) >= 0) break; if (j < slist->nr_servers) { if (slist->servers[j].server == server) { diff --git a/fs/afs/super.c b/fs/afs/super.c index 65081ec3c36e..9e5d7966621c 100644 --- a/fs/afs/super.c +++ b/fs/afs/super.c @@ -590,7 +590,7 @@ static void afs_i_init_once(void *_vnode) memset(vnode, 0, sizeof(*vnode)); inode_init_once(&vnode->vfs_inode); mutex_init(&vnode->io_lock); - mutex_init(&vnode->validate_lock); + init_rwsem(&vnode->validate_lock); spin_lock_init(&vnode->wb_lock); spin_lock_init(&vnode->lock); INIT_LIST_HEAD(&vnode->wb_keys); @@ -688,7 +688,7 @@ static int afs_statfs(struct dentry *dentry, struct kstatfs *buf) if (afs_begin_vnode_operation(&fc, vnode, key)) { fc.flags |= AFS_FS_CURSOR_NO_VSLEEP; while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_get_volume_status(&fc, &vs); } diff --git a/fs/afs/write.c b/fs/afs/write.c index c164698dc304..8b39e6ebb40b 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -351,7 +351,7 @@ found_key: ret = -ERESTARTSYS; if (afs_begin_vnode_operation(&fc, vnode, wbk->key)) { while (afs_select_fileserver(&fc)) { - fc.cb_break = vnode->cb_break + vnode->cb_s_break; + fc.cb_break = afs_calc_vnode_cb_break(vnode); afs_fs_store_data(&fc, mapping, first, last, offset, to); } diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 3fd44835b386..8c68961925b1 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -2436,10 +2436,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, if (p->reada != READA_NONE) reada_for_search(fs_info, p, level, slot, key->objectid); - btrfs_release_path(p); - ret = -EAGAIN; - tmp = read_tree_block(fs_info, blocknr, 0, parent_level - 1, + tmp = read_tree_block(fs_info, blocknr, gen, parent_level - 1, &first_key); if (!IS_ERR(tmp)) { /* @@ -2454,6 +2452,8 @@ read_block_for_search(struct btrfs_root *root, struct btrfs_path *p, } else { ret = PTR_ERR(tmp); } + + btrfs_release_path(p); return ret; } @@ -5414,12 +5414,24 @@ int btrfs_compare_trees(struct btrfs_root *left_root, down_read(&fs_info->commit_root_sem); left_level = btrfs_header_level(left_root->commit_root); left_root_level = left_level; - left_path->nodes[left_level] = left_root->commit_root; + left_path->nodes[left_level] = + btrfs_clone_extent_buffer(left_root->commit_root); + if (!left_path->nodes[left_level]) { + up_read(&fs_info->commit_root_sem); + ret = -ENOMEM; + goto out; + } extent_buffer_get(left_path->nodes[left_level]); right_level = btrfs_header_level(right_root->commit_root); right_root_level = right_level; - right_path->nodes[right_level] = right_root->commit_root; + right_path->nodes[right_level] = + btrfs_clone_extent_buffer(right_root->commit_root); + if (!right_path->nodes[right_level]) { + up_read(&fs_info->commit_root_sem); + ret = -ENOMEM; + goto out; + } extent_buffer_get(right_path->nodes[right_level]); up_read(&fs_info->commit_root_sem); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 2771cc56a622..0d422c9908b8 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3182,6 +3182,8 @@ noinline int can_nocow_extent(struct inode *inode, u64 offset, u64 *len, u64 *orig_start, u64 *orig_block_len, u64 *ram_bytes); +void __btrfs_del_delalloc_inode(struct btrfs_root *root, + struct btrfs_inode *inode); struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry); int btrfs_set_inode_index(struct btrfs_inode *dir, u64 *index); int btrfs_unlink_inode(struct btrfs_trans_handle *trans, diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 60caa68c3618..c3504b4d281b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3818,6 +3818,7 @@ void close_ctree(struct btrfs_fs_info *fs_info) set_bit(BTRFS_FS_CLOSING_DONE, &fs_info->flags); btrfs_free_qgroup_config(fs_info); + ASSERT(list_empty(&fs_info->delalloc_roots)); if (percpu_counter_sum(&fs_info->delalloc_bytes)) { btrfs_info(fs_info, "at unmount delalloc count %lld", @@ -4125,15 +4126,15 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info) static void btrfs_error_commit_super(struct btrfs_fs_info *fs_info) { + /* cleanup FS via transaction */ + btrfs_cleanup_transaction(fs_info); + mutex_lock(&fs_info->cleaner_mutex); btrfs_run_delayed_iputs(fs_info); mutex_unlock(&fs_info->cleaner_mutex); down_write(&fs_info->cleanup_work_sem); up_write(&fs_info->cleanup_work_sem); - - /* cleanup FS via transaction */ - btrfs_cleanup_transaction(fs_info); } static void btrfs_destroy_ordered_extents(struct btrfs_root *root) @@ -4258,19 +4259,23 @@ static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) list_splice_init(&root->delalloc_inodes, &splice); while (!list_empty(&splice)) { + struct inode *inode = NULL; btrfs_inode = list_first_entry(&splice, struct btrfs_inode, delalloc_inodes); - - list_del_init(&btrfs_inode->delalloc_inodes); - clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, - &btrfs_inode->runtime_flags); + __btrfs_del_delalloc_inode(root, btrfs_inode); spin_unlock(&root->delalloc_lock); - btrfs_invalidate_inodes(btrfs_inode->root); - + /* + * Make sure we get a live inode and that it'll not disappear + * meanwhile. + */ + inode = igrab(&btrfs_inode->vfs_inode); + if (inode) { + invalidate_inode_pages2(inode->i_mapping); + iput(inode); + } spin_lock(&root->delalloc_lock); } - spin_unlock(&root->delalloc_lock); } @@ -4286,7 +4291,6 @@ static void btrfs_destroy_all_delalloc_inodes(struct btrfs_fs_info *fs_info) while (!list_empty(&splice)) { root = list_first_entry(&splice, struct btrfs_root, delalloc_root); - list_del_init(&root->delalloc_root); root = btrfs_grab_fs_root(root); BUG_ON(!root); spin_unlock(&fs_info->delalloc_root_lock); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index d241285a0d2a..8e604e7071f1 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1742,12 +1742,12 @@ static void btrfs_add_delalloc_inodes(struct btrfs_root *root, spin_unlock(&root->delalloc_lock); } -static void btrfs_del_delalloc_inode(struct btrfs_root *root, - struct btrfs_inode *inode) + +void __btrfs_del_delalloc_inode(struct btrfs_root *root, + struct btrfs_inode *inode) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->vfs_inode.i_sb); - spin_lock(&root->delalloc_lock); if (!list_empty(&inode->delalloc_inodes)) { list_del_init(&inode->delalloc_inodes); clear_bit(BTRFS_INODE_IN_DELALLOC_LIST, @@ -1760,6 +1760,13 @@ static void btrfs_del_delalloc_inode(struct btrfs_root *root, spin_unlock(&fs_info->delalloc_root_lock); } } +} + +static void btrfs_del_delalloc_inode(struct btrfs_root *root, + struct btrfs_inode *inode) +{ + spin_lock(&root->delalloc_lock); + __btrfs_del_delalloc_inode(root, inode); spin_unlock(&root->delalloc_lock); } diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c index 53a8c95828e3..dc6140013ae8 100644 --- a/fs/btrfs/props.c +++ b/fs/btrfs/props.c @@ -380,6 +380,7 @@ static int prop_compression_apply(struct inode *inode, const char *value, size_t len) { + struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); int type; if (len == 0) { @@ -390,14 +391,17 @@ static int prop_compression_apply(struct inode *inode, return 0; } - if (!strncmp("lzo", value, 3)) + if (!strncmp("lzo", value, 3)) { type = BTRFS_COMPRESS_LZO; - else if (!strncmp("zlib", value, 4)) + btrfs_set_fs_incompat(fs_info, COMPRESS_LZO); + } else if (!strncmp("zlib", value, 4)) { type = BTRFS_COMPRESS_ZLIB; - else if (!strncmp("zstd", value, len)) + } else if (!strncmp("zstd", value, len)) { type = BTRFS_COMPRESS_ZSTD; - else + btrfs_set_fs_incompat(fs_info, COMPRESS_ZSTD); + } else { return -EINVAL; + } BTRFS_I(inode)->flags &= ~BTRFS_INODE_NOCOMPRESS; BTRFS_I(inode)->flags |= BTRFS_INODE_COMPRESS; diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 43758e30aa7a..8f23a94dab77 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4320,6 +4320,110 @@ static int log_one_extent(struct btrfs_trans_handle *trans, return ret; } +/* + * Log all prealloc extents beyond the inode's i_size to make sure we do not + * lose them after doing a fast fsync and replaying the log. We scan the + * subvolume's root instead of iterating the inode's extent map tree because + * otherwise we can log incorrect extent items based on extent map conversion. + * That can happen due to the fact that extent maps are merged when they + * are not in the extent map tree's list of modified extents. + */ +static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, + struct btrfs_inode *inode, + struct btrfs_path *path) +{ + struct btrfs_root *root = inode->root; + struct btrfs_key key; + const u64 i_size = i_size_read(&inode->vfs_inode); + const u64 ino = btrfs_ino(inode); + struct btrfs_path *dst_path = NULL; + u64 last_extent = (u64)-1; + int ins_nr = 0; + int start_slot; + int ret; + + if (!(inode->flags & BTRFS_INODE_PREALLOC)) + return 0; + + key.objectid = ino; + key.type = BTRFS_EXTENT_DATA_KEY; + key.offset = i_size; + ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); + if (ret < 0) + goto out; + + while (true) { + struct extent_buffer *leaf = path->nodes[0]; + int slot = path->slots[0]; + + if (slot >= btrfs_header_nritems(leaf)) { + if (ins_nr > 0) { + ret = copy_items(trans, inode, dst_path, path, + &last_extent, start_slot, + ins_nr, 1, 0); + if (ret < 0) + goto out; + ins_nr = 0; + } + ret = btrfs_next_leaf(root, path); + if (ret < 0) + goto out; + if (ret > 0) { + ret = 0; + break; + } + continue; + } + + btrfs_item_key_to_cpu(leaf, &key, slot); + if (key.objectid > ino) + break; + if (WARN_ON_ONCE(key.objectid < ino) || + key.type < BTRFS_EXTENT_DATA_KEY || + key.offset < i_size) { + path->slots[0]++; + continue; + } + if (last_extent == (u64)-1) { + last_extent = key.offset; + /* + * Avoid logging extent items logged in past fsync calls + * and leading to duplicate keys in the log tree. + */ + do { + ret = btrfs_truncate_inode_items(trans, + root->log_root, + &inode->vfs_inode, + i_size, + BTRFS_EXTENT_DATA_KEY); + } while (ret == -EAGAIN); + if (ret) + goto out; + } + if (ins_nr == 0) + start_slot = slot; + ins_nr++; + path->slots[0]++; + if (!dst_path) { + dst_path = btrfs_alloc_path(); + if (!dst_path) { + ret = -ENOMEM; + goto out; + } + } + } + if (ins_nr > 0) { + ret = copy_items(trans, inode, dst_path, path, &last_extent, + start_slot, ins_nr, 1, 0); + if (ret > 0) + ret = 0; + } +out: + btrfs_release_path(path); + btrfs_free_path(dst_path); + return ret; +} + static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct btrfs_inode *inode, @@ -4362,6 +4466,11 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, if (em->generation <= test_gen) continue; + /* We log prealloc extents beyond eof later. */ + if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) && + em->start >= i_size_read(&inode->vfs_inode)) + continue; + if (em->start < logged_start) logged_start = em->start; if ((em->start + em->len - 1) > logged_end) @@ -4374,31 +4483,6 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans, num++; } - /* - * Add all prealloc extents beyond the inode's i_size to make sure we - * don't lose them after doing a fast fsync and replaying the log. - */ - if (inode->flags & BTRFS_INODE_PREALLOC) { - struct rb_node *node; - - for (node = rb_last(&tree->map); node; node = rb_prev(node)) { - em = rb_entry(node, struct extent_map, rb_node); - if (em->start < i_size_read(&inode->vfs_inode)) - break; - if (!list_empty(&em->list)) - continue; - /* Same as above loop. */ - if (++num > 32768) { - list_del_init(&tree->modified_extents); - ret = -EFBIG; - goto process; - } - refcount_inc(&em->refs); - set_bit(EXTENT_FLAG_LOGGING, &em->flags); - list_add_tail(&em->list, &extents); - } - } - list_sort(NULL, &extents, extent_cmp); btrfs_get_logged_extents(inode, logged_list, logged_start, logged_end); /* @@ -4443,6 +4527,9 @@ process: up_write(&inode->dio_sem); btrfs_release_path(path); + if (!ret) + ret = btrfs_log_prealloc_extents(trans, inode, path); + return ret; } @@ -4827,6 +4914,7 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans, struct extent_map_tree *em_tree = &inode->extent_tree; u64 logged_isize = 0; bool need_log_inode_item = true; + bool xattrs_logged = false; path = btrfs_alloc_path(); if (!path) @@ -5128,6 +5216,7 @@ next_key: err = btrfs_log_all_xattrs(trans, root, inode, path, dst_path); if (err) goto out_unlock; + xattrs_logged = true; if (max_key.type >= BTRFS_EXTENT_DATA_KEY && !fast_search) { btrfs_release_path(path); btrfs_release_path(dst_path); @@ -5140,6 +5229,11 @@ log_extents: btrfs_release_path(dst_path); if (need_log_inode_item) { err = log_inode_item(trans, log, dst_path, inode); + if (!err && !xattrs_logged) { + err = btrfs_log_all_xattrs(trans, root, inode, path, + dst_path); + btrfs_release_path(path); + } if (err) goto out_unlock; } diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 292266f6ab9c..be3fc701f389 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -4052,6 +4052,15 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info) return 0; } + /* + * A ro->rw remount sequence should continue with the paused balance + * regardless of who pauses it, system or the user as of now, so set + * the resume flag. + */ + spin_lock(&fs_info->balance_lock); + fs_info->balance_ctl->flags |= BTRFS_BALANCE_RESUME; + spin_unlock(&fs_info->balance_lock); + tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); return PTR_ERR_OR_ZERO(tsk); } diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index f715609b13f3..5a5a0158cc8f 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -1047,6 +1047,18 @@ out: return rc; } +/* + * Directory operations under CIFS/SMB2/SMB3 are synchronous, so fsync() + * is a dummy operation. + */ +static int cifs_dir_fsync(struct file *file, loff_t start, loff_t end, int datasync) +{ + cifs_dbg(FYI, "Sync directory - name: %pD datasync: 0x%x\n", + file, datasync); + + return 0; +} + static ssize_t cifs_copy_file_range(struct file *src_file, loff_t off, struct file *dst_file, loff_t destoff, size_t len, unsigned int flags) @@ -1181,6 +1193,7 @@ const struct file_operations cifs_dir_ops = { .copy_file_range = cifs_copy_file_range, .clone_file_range = cifs_clone_file_range, .llseek = generic_file_llseek, + .fsync = cifs_dir_fsync, }; static void diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index a5aa158d535a..7a10a5d0731f 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -1977,14 +1977,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, goto cifs_parse_mount_err; } -#ifdef CONFIG_CIFS_SMB_DIRECT - if (vol->rdma && vol->sign) { - cifs_dbg(VFS, "Currently SMB direct doesn't support signing." - " This is being fixed\n"); - goto cifs_parse_mount_err; - } -#endif - #ifndef CONFIG_KEYS /* Muliuser mounts require CONFIG_KEYS support */ if (vol->multiuser) { diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index b76b85881dcc..9c6d95ffca97 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -589,9 +589,15 @@ smb2_query_eas(const unsigned int xid, struct cifs_tcon *tcon, SMB2_close(xid, tcon, fid.persistent_fid, fid.volatile_fid); + /* + * If ea_name is NULL (listxattr) and there are no EAs, return 0 as it's + * not an error. Otherwise, the specified ea_name was not found. + */ if (!rc) rc = move_smb2_ea_to_cifs(ea_data, buf_size, smb2_data, SMB2_MAX_EA_BUF, ea_name); + else if (!ea_name && rc == -ENODATA) + rc = 0; kfree(smb2_data); return rc; diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c index 60db51bae0e3..0f48741a0130 100644 --- a/fs/cifs/smb2pdu.c +++ b/fs/cifs/smb2pdu.c @@ -730,19 +730,14 @@ neg_exit: int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) { - int rc = 0; - struct validate_negotiate_info_req vneg_inbuf; + int rc; + struct validate_negotiate_info_req *pneg_inbuf; struct validate_negotiate_info_rsp *pneg_rsp = NULL; u32 rsplen; u32 inbuflen; /* max of 4 dialects */ cifs_dbg(FYI, "validate negotiate\n"); -#ifdef CONFIG_CIFS_SMB_DIRECT - if (tcon->ses->server->rdma) - return 0; -#endif - /* In SMB3.11 preauth integrity supersedes validate negotiate */ if (tcon->ses->server->dialect == SMB311_PROT_ID) return 0; @@ -765,63 +760,69 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) if (tcon->ses->session_flags & SMB2_SESSION_FLAG_IS_NULL) cifs_dbg(VFS, "Unexpected null user (anonymous) auth flag sent by server\n"); - vneg_inbuf.Capabilities = + pneg_inbuf = kmalloc(sizeof(*pneg_inbuf), GFP_NOFS); + if (!pneg_inbuf) + return -ENOMEM; + + pneg_inbuf->Capabilities = cpu_to_le32(tcon->ses->server->vals->req_capabilities); - memcpy(vneg_inbuf.Guid, tcon->ses->server->client_guid, + memcpy(pneg_inbuf->Guid, tcon->ses->server->client_guid, SMB2_CLIENT_GUID_SIZE); if (tcon->ses->sign) - vneg_inbuf.SecurityMode = + pneg_inbuf->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_REQUIRED); else if (global_secflags & CIFSSEC_MAY_SIGN) - vneg_inbuf.SecurityMode = + pneg_inbuf->SecurityMode = cpu_to_le16(SMB2_NEGOTIATE_SIGNING_ENABLED); else - vneg_inbuf.SecurityMode = 0; + pneg_inbuf->SecurityMode = 0; if (strcmp(tcon->ses->server->vals->version_string, SMB3ANY_VERSION_STRING) == 0) { - vneg_inbuf.Dialects[0] = cpu_to_le16(SMB30_PROT_ID); - vneg_inbuf.Dialects[1] = cpu_to_le16(SMB302_PROT_ID); - vneg_inbuf.DialectCount = cpu_to_le16(2); + pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID); + pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID); + pneg_inbuf->DialectCount = cpu_to_le16(2); /* structure is big enough for 3 dialects, sending only 2 */ - inbuflen = sizeof(struct validate_negotiate_info_req) - 2; + inbuflen = sizeof(*pneg_inbuf) - + sizeof(pneg_inbuf->Dialects[0]); } else if (strcmp(tcon->ses->server->vals->version_string, SMBDEFAULT_VERSION_STRING) == 0) { - vneg_inbuf.Dialects[0] = cpu_to_le16(SMB21_PROT_ID); - vneg_inbuf.Dialects[1] = cpu_to_le16(SMB30_PROT_ID); - vneg_inbuf.Dialects[2] = cpu_to_le16(SMB302_PROT_ID); - vneg_inbuf.DialectCount = cpu_to_le16(3); + pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID); + pneg_inbuf->Dialects[1] = cpu_to_le16(SMB30_PROT_ID); + pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID); + pneg_inbuf->DialectCount = cpu_to_le16(3); /* structure is big enough for 3 dialects */ - inbuflen = sizeof(struct validate_negotiate_info_req); + inbuflen = sizeof(*pneg_inbuf); } else { /* otherwise specific dialect was requested */ - vneg_inbuf.Dialects[0] = + pneg_inbuf->Dialects[0] = cpu_to_le16(tcon->ses->server->vals->protocol_id); - vneg_inbuf.DialectCount = cpu_to_le16(1); + pneg_inbuf->DialectCount = cpu_to_le16(1); /* structure is big enough for 3 dialects, sending only 1 */ - inbuflen = sizeof(struct validate_negotiate_info_req) - 4; + inbuflen = sizeof(*pneg_inbuf) - + sizeof(pneg_inbuf->Dialects[0]) * 2; } rc = SMB2_ioctl(xid, tcon, NO_FILE_ID, NO_FILE_ID, FSCTL_VALIDATE_NEGOTIATE_INFO, true /* is_fsctl */, - (char *)&vneg_inbuf, sizeof(struct validate_negotiate_info_req), - (char **)&pneg_rsp, &rsplen); + (char *)pneg_inbuf, inbuflen, (char **)&pneg_rsp, &rsplen); if (rc != 0) { cifs_dbg(VFS, "validate protocol negotiate failed: %d\n", rc); - return -EIO; + rc = -EIO; + goto out_free_inbuf; } - if (rsplen != sizeof(struct validate_negotiate_info_rsp)) { + rc = -EIO; + if (rsplen != sizeof(*pneg_rsp)) { cifs_dbg(VFS, "invalid protocol negotiate response size: %d\n", rsplen); /* relax check since Mac returns max bufsize allowed on ioctl */ - if ((rsplen > CIFSMaxBufSize) - || (rsplen < sizeof(struct validate_negotiate_info_rsp))) - goto err_rsp_free; + if (rsplen > CIFSMaxBufSize || rsplen < sizeof(*pneg_rsp)) + goto out_free_rsp; } /* check validate negotiate info response matches what we got earlier */ @@ -838,15 +839,17 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon) goto vneg_out; /* validate negotiate successful */ + rc = 0; cifs_dbg(FYI, "validate negotiate info successful\n"); - kfree(pneg_rsp); - return 0; + goto out_free_rsp; vneg_out: cifs_dbg(VFS, "protocol revalidation - security settings mismatch\n"); -err_rsp_free: +out_free_rsp: kfree(pneg_rsp); - return -EIO; +out_free_inbuf: + kfree(pneg_inbuf); + return rc; } enum securityEnum diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c index 513c357c734b..a6c0f54c48c3 100644 --- a/fs/hfsplus/super.c +++ b/fs/hfsplus/super.c @@ -588,6 +588,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent) return 0; out_put_hidden_dir: + cancel_delayed_work_sync(&sbi->sync_work); iput(sbi->hidden_dir); out_put_root: dput(sb->s_root); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index 01c6b3894406..7869622af22a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -4250,10 +4250,11 @@ out: static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, bool preserve) { - int error; + int error, had_lock; struct inode *inode = d_inode(old_dentry); struct buffer_head *old_bh = NULL; struct inode *new_orphan_inode = NULL; + struct ocfs2_lock_holder oh; if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb))) return -EOPNOTSUPP; @@ -4295,6 +4296,14 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, goto out; } + had_lock = ocfs2_inode_lock_tracker(new_orphan_inode, NULL, 1, + &oh); + if (had_lock < 0) { + error = had_lock; + mlog_errno(error); + goto out; + } + /* If the security isn't preserved, we need to re-initialize them. */ if (!preserve) { error = ocfs2_init_security_and_acl(dir, new_orphan_inode, @@ -4302,14 +4311,15 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir, if (error) mlog_errno(error); } -out: if (!error) { error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode, new_dentry); if (error) mlog_errno(error); } + ocfs2_inode_unlock_tracker(new_orphan_inode, 1, &oh, had_lock); +out: if (new_orphan_inode) { /* * We need to open_unlock the inode no matter whether we diff --git a/fs/proc/base.c b/fs/proc/base.c index 1b2ede6abcdf..1a76d751cf3c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -261,7 +261,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, * Inherently racy -- command line shares address space * with code and data. */ - rv = access_remote_vm(mm, arg_end - 1, &c, 1, 0); + rv = access_remote_vm(mm, arg_end - 1, &c, 1, FOLL_ANON); if (rv <= 0) goto out_free_page; @@ -279,7 +279,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, int nr_read; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -325,7 +325,7 @@ static ssize_t proc_pid_cmdline_read(struct file *file, char __user *buf, bool final; _count = min3(count, len, PAGE_SIZE); - nr_read = access_remote_vm(mm, p, page, _count, 0); + nr_read = access_remote_vm(mm, p, page, _count, FOLL_ANON); if (nr_read < 0) rv = nr_read; if (nr_read <= 0) @@ -946,7 +946,7 @@ static ssize_t environ_read(struct file *file, char __user *buf, max_len = min_t(size_t, PAGE_SIZE, count); this_len = min(max_len, this_len); - retval = access_remote_vm(mm, (env_start + src), page, this_len, 0); + retval = access_remote_vm(mm, (env_start + src), page, this_len, FOLL_ANON); if (retval <= 0) { ret = retval; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index d1e82761de81..e64ecb9f2720 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -209,25 +209,34 @@ kclist_add_private(unsigned long pfn, unsigned long nr_pages, void *arg) { struct list_head *head = (struct list_head *)arg; struct kcore_list *ent; + struct page *p; + + if (!pfn_valid(pfn)) + return 1; + + p = pfn_to_page(pfn); + if (!memmap_valid_within(pfn, p, page_zone(p))) + return 1; ent = kmalloc(sizeof(*ent), GFP_KERNEL); if (!ent) return -ENOMEM; - ent->addr = (unsigned long)__va((pfn << PAGE_SHIFT)); + ent->addr = (unsigned long)page_to_virt(p); ent->size = nr_pages << PAGE_SHIFT; - /* Sanity check: Can happen in 32bit arch...maybe */ - if (ent->addr < (unsigned long) __va(0)) + if (!virt_addr_valid(ent->addr)) goto free_out; /* cut not-mapped area. ....from ppc-32 code. */ if (ULONG_MAX - ent->addr < ent->size) ent->size = ULONG_MAX - ent->addr; - /* cut when vmalloc() area is higher than direct-map area */ - if (VMALLOC_START > (unsigned long)__va(0)) { - if (ent->addr > VMALLOC_START) - goto free_out; + /* + * We've already checked virt_addr_valid so we know this address + * is a valid pointer, therefore we can check against it to determine + * if we need to trim + */ + if (VMALLOC_START > ent->addr) { if (VMALLOC_START - ent->addr < ent->size) ent->size = VMALLOC_START - ent->addr; } diff --git a/include/linux/efi.h b/include/linux/efi.h index f1b7d68ac460..3016d8c456bc 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -395,8 +395,8 @@ typedef struct { u32 attributes; u32 get_bar_attributes; u32 set_bar_attributes; - uint64_t romsize; - void *romimage; + u64 romsize; + u32 romimage; } efi_pci_io_protocol_32; typedef struct { @@ -415,8 +415,8 @@ typedef struct { u64 attributes; u64 get_bar_attributes; u64 set_bar_attributes; - uint64_t romsize; - void *romimage; + u64 romsize; + u64 romimage; } efi_pci_io_protocol_64; typedef struct { diff --git a/include/linux/kthread.h b/include/linux/kthread.h index c1961761311d..2803264c512f 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -62,6 +62,7 @@ void *kthread_probe_data(struct task_struct *k); int kthread_park(struct task_struct *k); void kthread_unpark(struct task_struct *k); void kthread_parkme(void); +void kthread_park_complete(struct task_struct *k); int kthreadd(void *unused); extern struct task_struct *kthreadd_task; diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6930c63126c7..6d6e79c59e68 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1045,13 +1045,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING -#ifdef CONFIG_S390 -#define KVM_MAX_IRQ_ROUTES 4096 //FIXME: we can have more than that... -#elif defined(CONFIG_ARM64) -#define KVM_MAX_IRQ_ROUTES 4096 -#else -#define KVM_MAX_IRQ_ROUTES 1024 -#endif +#define KVM_MAX_IRQ_ROUTES 4096 /* might need extension/rework in the future */ bool kvm_arch_can_set_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2a156c5dfadd..d703774982ca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1286,17 +1286,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - struct irq_desc *desc; - unsigned int irq; - int eqn; - int err; - - err = mlx5_vector2eqn(dev, vector, &eqn, &irq); - if (err) - return NULL; - - desc = irq_to_desc(irq); - return desc->affinity_hint; + return dev->priv.irq_info[vector].mask; } #endif /* MLX5_DRIVER_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 1ac1f06a4be6..c6fa9a255dbf 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2466,6 +2466,13 @@ static inline vm_fault_t vmf_insert_pfn(struct vm_area_struct *vma, return VM_FAULT_NOPAGE; } +static inline vm_fault_t vmf_error(int err) +{ + if (err == -ENOMEM) + return VM_FAULT_OOM; + return VM_FAULT_SIGBUS; +} + struct page *follow_page_mask(struct vm_area_struct *vma, unsigned long address, unsigned int foll_flags, unsigned int *page_mask); @@ -2493,6 +2500,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_MLOCK 0x1000 /* lock present pages */ #define FOLL_REMOTE 0x2000 /* we are working on non-current tsk/mm */ #define FOLL_COW 0x4000 /* internal GUP flag */ +#define FOLL_ANON 0x8000 /* don't do file mappings */ static inline int vm_fault_to_errno(int vm_fault, int foll_flags) { diff --git a/include/linux/mtd/map.h b/include/linux/mtd/map.h index b5b43f94f311..01b990e4b228 100644 --- a/include/linux/mtd/map.h +++ b/include/linux/mtd/map.h @@ -312,7 +312,7 @@ void map_destroy(struct mtd_info *mtd); ({ \ int i, ret = 1; \ for (i = 0; i < map_words(map); i++) { \ - if (((val1).x[i] & (val2).x[i]) != (val2).x[i]) { \ + if (((val1).x[i] & (val2).x[i]) != (val3).x[i]) { \ ret = 0; \ break; \ } \ diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 5dad59b31244..17c919436f48 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -867,12 +867,18 @@ struct nand_op_instr { * tBERS (during an erase) which all of them are u64 values that cannot be * divided by usual kernel macros and must be handled with the special * DIV_ROUND_UP_ULL() macro. + * + * Cast to type of dividend is needed here to guarantee that the result won't + * be an unsigned long long when the dividend is an unsigned long (or smaller), + * which is what the compiler does when it sees ternary operator with 2 + * different return types (picks the largest type to make sure there's no + * loss). */ -#define __DIVIDE(dividend, divisor) ({ \ - sizeof(dividend) == sizeof(u32) ? \ - DIV_ROUND_UP(dividend, divisor) : \ - DIV_ROUND_UP_ULL(dividend, divisor); \ - }) +#define __DIVIDE(dividend, divisor) ({ \ + (__typeof__(dividend))(sizeof(dividend) <= sizeof(unsigned long) ? \ + DIV_ROUND_UP(dividend, divisor) : \ + DIV_ROUND_UP_ULL(dividend, divisor)); \ + }) #define PSEC_TO_NSEC(x) __DIVIDE(x, 1000) #define PSEC_TO_MSEC(x) __DIVIDE(x, 1000000000) diff --git a/include/linux/oom.h b/include/linux/oom.h index 5bad038ac012..6adac113e96d 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -95,6 +95,8 @@ static inline int check_stable_address_space(struct mm_struct *mm) return 0; } +void __oom_reap_task_mm(struct mm_struct *mm); + extern unsigned long oom_badness(struct task_struct *p, struct mem_cgroup *memcg, const nodemask_t *nodemask, unsigned long totalpages); diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index b1f37a89e368..79b99d653e03 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -133,7 +133,7 @@ static inline void percpu_rwsem_release(struct percpu_rw_semaphore *sem, lock_release(&sem->rw_sem.dep_map, 1, ip); #ifdef CONFIG_RWSEM_SPIN_ON_OWNER if (!read) - sem->rw_sem.owner = NULL; + sem->rw_sem.owner = RWSEM_OWNER_UNKNOWN; #endif } @@ -141,6 +141,10 @@ static inline void percpu_rwsem_acquire(struct percpu_rw_semaphore *sem, bool read, unsigned long ip) { lock_acquire(&sem->rw_sem.dep_map, 0, 1, read, 1, NULL, ip); +#ifdef CONFIG_RWSEM_SPIN_ON_OWNER + if (!read) + sem->rw_sem.owner = current; +#endif } #endif diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h index 6bfd2b581f75..af8a61be2d8d 100644 --- a/include/linux/rbtree_augmented.h +++ b/include/linux/rbtree_augmented.h @@ -26,6 +26,7 @@ #include #include +#include /* * Please note - only struct rb_augment_callbacks and the prototypes for diff --git a/include/linux/rbtree_latch.h b/include/linux/rbtree_latch.h index ece43e882b56..7d012faa509a 100644 --- a/include/linux/rbtree_latch.h +++ b/include/linux/rbtree_latch.h @@ -35,6 +35,7 @@ #include #include +#include struct latch_tree_node { struct rb_node node[2]; diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 56707d5ff6ad..ab93b6eae696 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -44,6 +44,12 @@ struct rw_semaphore { #endif }; +/* + * Setting bit 0 of the owner field with other non-zero bits will indicate + * that the rwsem is writer-owned with an unknown owner. + */ +#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-1L) + extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); diff --git a/include/linux/sched.h b/include/linux/sched.h index b3d697f3b573..c2413703f45d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -112,17 +112,36 @@ struct task_group; #ifdef CONFIG_DEBUG_ATOMIC_SLEEP +/* + * Special states are those that do not use the normal wait-loop pattern. See + * the comment with set_special_state(). + */ +#define is_special_task_state(state) \ + ((state) & (__TASK_STOPPED | __TASK_TRACED | TASK_DEAD)) + #define __set_current_state(state_value) \ do { \ + WARN_ON_ONCE(is_special_task_state(state_value));\ current->task_state_change = _THIS_IP_; \ current->state = (state_value); \ } while (0) + #define set_current_state(state_value) \ do { \ + WARN_ON_ONCE(is_special_task_state(state_value));\ current->task_state_change = _THIS_IP_; \ smp_store_mb(current->state, (state_value)); \ } while (0) +#define set_special_state(state_value) \ + do { \ + unsigned long flags; /* may shadow */ \ + WARN_ON_ONCE(!is_special_task_state(state_value)); \ + raw_spin_lock_irqsave(¤t->pi_lock, flags); \ + current->task_state_change = _THIS_IP_; \ + current->state = (state_value); \ + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ + } while (0) #else /* * set_current_state() includes a barrier so that the write of current->state @@ -144,8 +163,8 @@ struct task_group; * * The above is typically ordered against the wakeup, which does: * - * need_sleep = false; - * wake_up_state(p, TASK_UNINTERRUPTIBLE); + * need_sleep = false; + * wake_up_state(p, TASK_UNINTERRUPTIBLE); * * Where wake_up_state() (and all other wakeup primitives) imply enough * barriers to order the store of the variable against wakeup. @@ -154,12 +173,33 @@ struct task_group; * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING). * - * This is obviously fine, since they both store the exact same value. + * However, with slightly different timing the wakeup TASK_RUNNING store can + * also collide with the TASK_UNINTERRUPTIBLE store. Loosing that store is not + * a problem either because that will result in one extra go around the loop + * and our @cond test will save the day. * * Also see the comments of try_to_wake_up(). */ -#define __set_current_state(state_value) do { current->state = (state_value); } while (0) -#define set_current_state(state_value) smp_store_mb(current->state, (state_value)) +#define __set_current_state(state_value) \ + current->state = (state_value) + +#define set_current_state(state_value) \ + smp_store_mb(current->state, (state_value)) + +/* + * set_special_state() should be used for those states when the blocking task + * can not use the regular condition based wait-loop. In that case we must + * serialize against wakeups such that any possible in-flight TASK_RUNNING stores + * will not collide with our state change. + */ +#define set_special_state(state_value) \ + do { \ + unsigned long flags; /* may shadow */ \ + raw_spin_lock_irqsave(¤t->pi_lock, flags); \ + current->state = (state_value); \ + raw_spin_unlock_irqrestore(¤t->pi_lock, flags); \ + } while (0) + #endif /* Task command name length: */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index a7ce74c74e49..113d1ad1ced7 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -280,7 +280,7 @@ static inline void kernel_signal_stop(void) { spin_lock_irq(¤t->sighand->siglock); if (current->jobctl & JOBCTL_STOP_DEQUEUED) - __set_current_state(TASK_STOPPED); + set_special_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); schedule(); diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 435c9e3b9181..fe23dc584be6 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -170,6 +170,7 @@ struct nft_data_desc { int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla); +void nft_data_hold(const struct nft_data *data, enum nft_data_types type); void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); @@ -732,6 +733,10 @@ struct nft_expr_ops { int (*init)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); + void (*activate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); + void (*deactivate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, diff --git a/include/net/tls.h b/include/net/tls.h index ee78f339b4b3..70c273777fe9 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -114,6 +114,10 @@ struct tls_sw_context_rx { struct sk_buff *recv_pkt; u8 control; bool decrypted; + + char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE]; + char rx_aad_plaintext[TLS_AAD_SPACE_SIZE]; + }; struct tls_record_info { diff --git a/include/trace/events/afs.h b/include/trace/events/afs.h index f0820554caa9..d0a341bc4540 100644 --- a/include/trace/events/afs.h +++ b/include/trace/events/afs.h @@ -575,6 +575,48 @@ TRACE_EVENT(afs_protocol_error, __entry->call, __entry->error, __entry->where) ); +TRACE_EVENT(afs_cm_no_server, + TP_PROTO(struct afs_call *call, struct sockaddr_rxrpc *srx), + + TP_ARGS(call, srx), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(unsigned int, op_id ) + __field_struct(struct sockaddr_rxrpc, srx ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->op_id = call->operation_ID; + memcpy(&__entry->srx, srx, sizeof(__entry->srx)); + ), + + TP_printk("c=%08x op=%u %pISpc", + __entry->call, __entry->op_id, &__entry->srx.transport) + ); + +TRACE_EVENT(afs_cm_no_server_u, + TP_PROTO(struct afs_call *call, const uuid_t *uuid), + + TP_ARGS(call, uuid), + + TP_STRUCT__entry( + __field(unsigned int, call ) + __field(unsigned int, op_id ) + __field_struct(uuid_t, uuid ) + ), + + TP_fast_assign( + __entry->call = call->debug_id; + __entry->op_id = call->operation_ID; + memcpy(&__entry->uuid, uuid, sizeof(__entry->uuid)); + ), + + TP_printk("c=%08x op=%u %pU", + __entry->call, __entry->op_id, &__entry->uuid) + ); + #endif /* _TRACE_AFS_H */ /* This part must be outside protection */ diff --git a/include/trace/events/xen.h b/include/trace/events/xen.h index 7dd8f34c37df..fdcf88bcf0ea 100644 --- a/include/trace/events/xen.h +++ b/include/trace/events/xen.h @@ -352,22 +352,6 @@ DECLARE_EVENT_CLASS(xen_mmu_pgd, DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_pin); DEFINE_XEN_MMU_PGD_EVENT(xen_mmu_pgd_unpin); -TRACE_EVENT(xen_mmu_flush_tlb_all, - TP_PROTO(int x), - TP_ARGS(x), - TP_STRUCT__entry(__array(char, x, 0)), - TP_fast_assign((void)x), - TP_printk("%s", "") - ); - -TRACE_EVENT(xen_mmu_flush_tlb, - TP_PROTO(int x), - TP_ARGS(x), - TP_STRUCT__entry(__array(char, x, 0)), - TP_fast_assign((void)x), - TP_printk("%s", "") - ); - TRACE_EVENT(xen_mmu_flush_tlb_one_user, TP_PROTO(unsigned long addr), TP_ARGS(addr), diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h index 74b91151d494..bcba72def817 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tcp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h @@ -46,6 +46,9 @@ enum tcp_conntrack { /* Marks possibility for expected RFC5961 challenge ACK */ #define IP_CT_EXP_CHALLENGE_ACK 0x40 +/* Simultaneous open initialized */ +#define IP_CT_TCP_SIMULTANEOUS_OPEN 0x80 + struct nf_ct_tcp_flags { __u8 flags; __u8 mask; diff --git a/init/Kconfig b/init/Kconfig index 480a4f2713d9..1fecd5b7bad8 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -738,7 +738,7 @@ config CFS_BANDWIDTH tasks running within the fair group scheduler. Groups with no limit set are considered to be unconstrained and will run with no restriction. - See tip/Documentation/scheduler/sched-bwc.txt for more information. + See Documentation/scheduler/sched-bwc.txt for more information. config RT_GROUP_SCHED bool "Group scheduling for SCHED_RR/FIFO" diff --git a/init/main.c b/init/main.c index a404936d85d8..fd37315835b4 100644 --- a/init/main.c +++ b/init/main.c @@ -1034,6 +1034,13 @@ __setup("rodata=", set_debug_rodata); static void mark_readonly(void) { if (rodata_enabled) { + /* + * load_module() results in W+X mappings, which are cleaned up + * with call_rcu_sched(). Let's make sure that queued work is + * flushed so that we don't hit false positives looking for + * insecure pages which are W+X. + */ + rcu_barrier_sched(); mark_rodata_ro(); rodata_test(); } else diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2194c6a9df42..b574dddc05b8 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -219,47 +219,84 @@ int bpf_prog_calc_tag(struct bpf_prog *fp) return 0; } -static void bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta) +static int bpf_adj_delta_to_imm(struct bpf_insn *insn, u32 pos, u32 delta, + u32 curr, const bool probe_pass) { + const s64 imm_min = S32_MIN, imm_max = S32_MAX; + s64 imm = insn->imm; + + if (curr < pos && curr + imm + 1 > pos) + imm += delta; + else if (curr > pos + delta && curr + imm + 1 <= pos + delta) + imm -= delta; + if (imm < imm_min || imm > imm_max) + return -ERANGE; + if (!probe_pass) + insn->imm = imm; + return 0; +} + +static int bpf_adj_delta_to_off(struct bpf_insn *insn, u32 pos, u32 delta, + u32 curr, const bool probe_pass) +{ + const s32 off_min = S16_MIN, off_max = S16_MAX; + s32 off = insn->off; + + if (curr < pos && curr + off + 1 > pos) + off += delta; + else if (curr > pos + delta && curr + off + 1 <= pos + delta) + off -= delta; + if (off < off_min || off > off_max) + return -ERANGE; + if (!probe_pass) + insn->off = off; + return 0; +} + +static int bpf_adj_branches(struct bpf_prog *prog, u32 pos, u32 delta, + const bool probe_pass) +{ + u32 i, insn_cnt = prog->len + (probe_pass ? delta : 0); struct bpf_insn *insn = prog->insnsi; - u32 i, insn_cnt = prog->len; - bool pseudo_call; - u8 code; - int off; + int ret = 0; for (i = 0; i < insn_cnt; i++, insn++) { - code = insn->code; - if (BPF_CLASS(code) != BPF_JMP) - continue; - if (BPF_OP(code) == BPF_EXIT) - continue; - if (BPF_OP(code) == BPF_CALL) { - if (insn->src_reg == BPF_PSEUDO_CALL) - pseudo_call = true; - else - continue; - } else { - pseudo_call = false; + u8 code; + + /* In the probing pass we still operate on the original, + * unpatched image in order to check overflows before we + * do any other adjustments. Therefore skip the patchlet. + */ + if (probe_pass && i == pos) { + i += delta + 1; + insn++; } - off = pseudo_call ? insn->imm : insn->off; - - /* Adjust offset of jmps if we cross boundaries. */ - if (i < pos && i + off + 1 > pos) - off += delta; - else if (i > pos + delta && i + off + 1 <= pos + delta) - off -= delta; - - if (pseudo_call) - insn->imm = off; - else - insn->off = off; + code = insn->code; + if (BPF_CLASS(code) != BPF_JMP || + BPF_OP(code) == BPF_EXIT) + continue; + /* Adjust offset of jmps if we cross patch boundaries. */ + if (BPF_OP(code) == BPF_CALL) { + if (insn->src_reg != BPF_PSEUDO_CALL) + continue; + ret = bpf_adj_delta_to_imm(insn, pos, delta, i, + probe_pass); + } else { + ret = bpf_adj_delta_to_off(insn, pos, delta, i, + probe_pass); + } + if (ret) + break; } + + return ret; } struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len) { u32 insn_adj_cnt, insn_rest, insn_delta = len - 1; + const u32 cnt_max = S16_MAX; struct bpf_prog *prog_adj; /* Since our patchlet doesn't expand the image, we're done. */ @@ -270,6 +307,15 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, insn_adj_cnt = prog->len + insn_delta; + /* Reject anything that would potentially let the insn->off + * target overflow when we have excessive program expansions. + * We need to probe here before we do any reallocation where + * we afterwards may not fail anymore. + */ + if (insn_adj_cnt > cnt_max && + bpf_adj_branches(prog, off, insn_delta, true)) + return NULL; + /* Several new instructions need to be inserted. Make room * for them. Likely, there's no need for a new allocation as * last page could have large enough tailroom. @@ -295,7 +341,11 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, sizeof(*patch) * insn_rest); memcpy(prog_adj->insnsi + off, patch, sizeof(*patch) * len); - bpf_adj_branches(prog_adj, off, insn_delta); + /* We are guaranteed to not fail at this point, otherwise + * the ship has sailed to reverse to the original state. An + * overflow cannot happen at this point. + */ + BUG_ON(bpf_adj_branches(prog_adj, off, insn_delta, false)); return prog_adj; } diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index c6de1393df63..cd832250a478 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -1729,11 +1729,11 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, * we increment the refcnt. If this is the case abort with an * error. */ - verdict = bpf_prog_inc_not_zero(progs->bpf_verdict); + verdict = bpf_prog_inc_not_zero(verdict); if (IS_ERR(verdict)) return PTR_ERR(verdict); - parse = bpf_prog_inc_not_zero(progs->bpf_parse); + parse = bpf_prog_inc_not_zero(parse); if (IS_ERR(parse)) { bpf_prog_put(verdict); return PTR_ERR(parse); @@ -1741,12 +1741,12 @@ static int __sock_map_ctx_update_elem(struct bpf_map *map, } if (tx_msg) { - tx_msg = bpf_prog_inc_not_zero(progs->bpf_tx_msg); + tx_msg = bpf_prog_inc_not_zero(tx_msg); if (IS_ERR(tx_msg)) { - if (verdict) - bpf_prog_put(verdict); - if (parse) + if (parse && verdict) { bpf_prog_put(parse); + bpf_prog_put(verdict); + } return PTR_ERR(tx_msg); } } @@ -1826,10 +1826,10 @@ out_free: kfree(e); smap_release_sock(psock, sock); out_progs: - if (verdict) - bpf_prog_put(verdict); - if (parse) + if (parse && verdict) { bpf_prog_put(parse); + bpf_prog_put(verdict); + } if (tx_msg) bpf_prog_put(tx_msg); write_unlock_bh(&sock->sk_callback_lock); diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 6c6b3c48db71..1d8ca9ea9979 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -14,6 +14,7 @@ #include #include #include +#include #include "internal.h" @@ -867,8 +868,10 @@ perf_mmap_to_page(struct ring_buffer *rb, unsigned long pgoff) return NULL; /* AUX space */ - if (pgoff >= rb->aux_pgoff) - return virt_to_page(rb->aux_pages[pgoff - rb->aux_pgoff]); + if (pgoff >= rb->aux_pgoff) { + int aux_pgoff = array_index_nospec(pgoff - rb->aux_pgoff, rb->aux_nr_pages); + return virt_to_page(rb->aux_pages[aux_pgoff]); + } } return __perf_mmap_to_page(rb, pgoff); diff --git a/kernel/kthread.c b/kernel/kthread.c index cd50e99202b0..2017a39ab490 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -55,7 +55,6 @@ enum KTHREAD_BITS { KTHREAD_IS_PER_CPU = 0, KTHREAD_SHOULD_STOP, KTHREAD_SHOULD_PARK, - KTHREAD_IS_PARKED, }; static inline void set_kthread_struct(void *kthread) @@ -177,14 +176,12 @@ void *kthread_probe_data(struct task_struct *task) static void __kthread_parkme(struct kthread *self) { - __set_current_state(TASK_PARKED); - while (test_bit(KTHREAD_SHOULD_PARK, &self->flags)) { - if (!test_and_set_bit(KTHREAD_IS_PARKED, &self->flags)) - complete(&self->parked); + for (;;) { + set_current_state(TASK_PARKED); + if (!test_bit(KTHREAD_SHOULD_PARK, &self->flags)) + break; schedule(); - __set_current_state(TASK_PARKED); } - clear_bit(KTHREAD_IS_PARKED, &self->flags); __set_current_state(TASK_RUNNING); } @@ -194,6 +191,11 @@ void kthread_parkme(void) } EXPORT_SYMBOL_GPL(kthread_parkme); +void kthread_park_complete(struct task_struct *k) +{ + complete(&to_kthread(k)->parked); +} + static int kthread(void *_create) { /* Copy data: it's on kthread's stack */ @@ -450,22 +452,15 @@ void kthread_unpark(struct task_struct *k) { struct kthread *kthread = to_kthread(k); - clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); /* - * We clear the IS_PARKED bit here as we don't wait - * until the task has left the park code. So if we'd - * park before that happens we'd see the IS_PARKED bit - * which might be about to be cleared. + * Newly created kthread was parked when the CPU was offline. + * The binding was lost and we need to set it again. */ - if (test_and_clear_bit(KTHREAD_IS_PARKED, &kthread->flags)) { - /* - * Newly created kthread was parked when the CPU was offline. - * The binding was lost and we need to set it again. - */ - if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) - __kthread_bind(k, kthread->cpu, TASK_PARKED); - wake_up_state(k, TASK_PARKED); - } + if (test_bit(KTHREAD_IS_PER_CPU, &kthread->flags)) + __kthread_bind(k, kthread->cpu, TASK_PARKED); + + clear_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + wake_up_state(k, TASK_PARKED); } EXPORT_SYMBOL_GPL(kthread_unpark); @@ -488,12 +483,13 @@ int kthread_park(struct task_struct *k) if (WARN_ON(k->flags & PF_EXITING)) return -ENOSYS; - if (!test_bit(KTHREAD_IS_PARKED, &kthread->flags)) { - set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); - if (k != current) { - wake_up_process(k); - wait_for_completion(&kthread->parked); - } + if (WARN_ON_ONCE(test_bit(KTHREAD_SHOULD_PARK, &kthread->flags))) + return -EBUSY; + + set_bit(KTHREAD_SHOULD_PARK, &kthread->flags); + if (k != current) { + wake_up_process(k); + wait_for_completion(&kthread->parked); } return 0; diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index e795908f3607..a90336779375 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -352,16 +352,15 @@ static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem) struct task_struct *owner; bool ret = true; + BUILD_BUG_ON(!rwsem_has_anonymous_owner(RWSEM_OWNER_UNKNOWN)); + if (need_resched()) return false; rcu_read_lock(); owner = READ_ONCE(sem->owner); - if (!rwsem_owner_is_writer(owner)) { - /* - * Don't spin if the rwsem is readers owned. - */ - ret = !rwsem_owner_is_reader(owner); + if (!owner || !is_rwsem_owner_spinnable(owner)) { + ret = !owner; /* !owner is spinnable */ goto done; } @@ -382,11 +381,11 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) { struct task_struct *owner = READ_ONCE(sem->owner); - if (!rwsem_owner_is_writer(owner)) - goto out; + if (!is_rwsem_owner_spinnable(owner)) + return false; rcu_read_lock(); - while (sem->owner == owner) { + while (owner && (READ_ONCE(sem->owner) == owner)) { /* * Ensure we emit the owner->on_cpu, dereference _after_ * checking sem->owner still matches owner, if that fails, @@ -408,12 +407,12 @@ static noinline bool rwsem_spin_on_owner(struct rw_semaphore *sem) cpu_relax(); } rcu_read_unlock(); -out: + /* * If there is a new owner or the owner is not set, we continue * spinning. */ - return !rwsem_owner_is_reader(READ_ONCE(sem->owner)); + return is_rwsem_owner_spinnable(READ_ONCE(sem->owner)); } static bool rwsem_optimistic_spin(struct rw_semaphore *sem) diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 30465a2f2b6c..bc1e507be9ff 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -221,5 +221,3 @@ void up_read_non_owner(struct rw_semaphore *sem) EXPORT_SYMBOL(up_read_non_owner); #endif - - diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index a17cba8d94bb..b9d0e72aa80f 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h @@ -1,20 +1,24 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* * The owner field of the rw_semaphore structure will be set to - * RWSEM_READ_OWNED when a reader grabs the lock. A writer will clear + * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear * the owner field when it unlocks. A reader, on the other hand, will * not touch the owner field when it unlocks. * - * In essence, the owner field now has the following 3 states: + * In essence, the owner field now has the following 4 states: * 1) 0 * - lock is free or the owner hasn't set the field yet * 2) RWSEM_READER_OWNED * - lock is currently or previously owned by readers (lock is free * or not set by owner yet) - * 3) Other non-zero value - * - a writer owns the lock + * 3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well + * - lock is owned by an anonymous writer, so spinning on the lock + * owner should be disabled. + * 4) Other non-zero value + * - a writer owns the lock and other writers can spin on the lock owner. */ -#define RWSEM_READER_OWNED ((struct task_struct *)1UL) +#define RWSEM_ANONYMOUSLY_OWNED (1UL << 0) +#define RWSEM_READER_OWNED ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED) #ifdef CONFIG_DEBUG_RWSEMS # define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) @@ -51,14 +55,22 @@ static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) WRITE_ONCE(sem->owner, RWSEM_READER_OWNED); } -static inline bool rwsem_owner_is_writer(struct task_struct *owner) +/* + * Return true if the a rwsem waiter can spin on the rwsem's owner + * and steal the lock, i.e. the lock is not anonymously owned. + * N.B. !owner is considered spinnable. + */ +static inline bool is_rwsem_owner_spinnable(struct task_struct *owner) { - return owner && owner != RWSEM_READER_OWNED; + return !((unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED); } -static inline bool rwsem_owner_is_reader(struct task_struct *owner) +/* + * Return true if rwsem is owned by an anonymous writer or readers. + */ +static inline bool rwsem_has_anonymous_owner(struct task_struct *owner) { - return owner == RWSEM_READER_OWNED; + return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED; } #else static inline void rwsem_set_owner(struct rw_semaphore *sem) diff --git a/kernel/module.c b/kernel/module.c index ce8066b88178..c9bea7f2b43e 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -3517,6 +3517,11 @@ static noinline int do_init_module(struct module *mod) * walking this with preempt disabled. In all the failure paths, we * call synchronize_sched(), but we don't want to slow down the success * path, so use actual RCU here. + * Note that module_alloc() on most architectures creates W+X page + * mappings which won't be cleaned up until do_free_init() runs. Any + * code such as mark_rodata_ro() which depends on those mappings to + * be cleaned up needs to sync with the queued work - ie + * rcu_barrier_sched() */ call_rcu_sched(&freeinit->rcu, do_free_init); mutex_unlock(&module_mutex); diff --git a/kernel/sched/autogroup.c b/kernel/sched/autogroup.c index 6be6c575b6cd..2d4ff5353ded 100644 --- a/kernel/sched/autogroup.c +++ b/kernel/sched/autogroup.c @@ -2,6 +2,7 @@ /* * Auto-group scheduling implementation: */ +#include #include "sched.h" unsigned int __read_mostly sysctl_sched_autogroup_enabled = 1; @@ -209,7 +210,7 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) static unsigned long next = INITIAL_JIFFIES; struct autogroup *ag; unsigned long shares; - int err; + int err, idx; if (nice < MIN_NICE || nice > MAX_NICE) return -EINVAL; @@ -227,7 +228,9 @@ int proc_sched_autogroup_set_nice(struct task_struct *p, int nice) next = HZ / 10 + jiffies; ag = autogroup_task_get(p); - shares = scale_load(sched_prio_to_weight[nice + 20]); + + idx = array_index_nospec(nice + 20, 40); + shares = scale_load(sched_prio_to_weight[idx]); down_write(&ag->lock); err = sched_group_set_shares(ag->tg, shares); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5e10aaeebfcc..092f7c4de903 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -7,6 +7,9 @@ */ #include "sched.h" +#include +#include + #include #include @@ -2718,20 +2721,28 @@ static struct rq *finish_task_switch(struct task_struct *prev) membarrier_mm_sync_core_before_usermode(mm); mmdrop(mm); } - if (unlikely(prev_state == TASK_DEAD)) { - if (prev->sched_class->task_dead) - prev->sched_class->task_dead(prev); + if (unlikely(prev_state & (TASK_DEAD|TASK_PARKED))) { + switch (prev_state) { + case TASK_DEAD: + if (prev->sched_class->task_dead) + prev->sched_class->task_dead(prev); - /* - * Remove function-return probe instances associated with this - * task and put them back on the free list. - */ - kprobe_flush_task(prev); + /* + * Remove function-return probe instances associated with this + * task and put them back on the free list. + */ + kprobe_flush_task(prev); - /* Task is done with its stack. */ - put_task_stack(prev); + /* Task is done with its stack. */ + put_task_stack(prev); - put_task_struct(prev); + put_task_struct(prev); + break; + + case TASK_PARKED: + kthread_park_complete(prev); + break; + } } tick_nohz_task_switch(); @@ -3498,23 +3509,8 @@ static void __sched notrace __schedule(bool preempt) void __noreturn do_task_dead(void) { - /* - * The setting of TASK_RUNNING by try_to_wake_up() may be delayed - * when the following two conditions become true. - * - There is race condition of mmap_sem (It is acquired by - * exit_mm()), and - * - SMI occurs before setting TASK_RUNINNG. - * (or hypervisor of virtual machine switches to other guest) - * As a result, we may become TASK_RUNNING after becoming TASK_DEAD - * - * To avoid it, we have to wait for releasing tsk->pi_lock which - * is held by try_to_wake_up() - */ - raw_spin_lock_irq(¤t->pi_lock); - raw_spin_unlock_irq(¤t->pi_lock); - /* Causes final put_task_struct in finish_task_switch(): */ - __set_current_state(TASK_DEAD); + set_special_state(TASK_DEAD); /* Tell freezer to ignore us: */ current->flags |= PF_NOFREEZE; @@ -6928,11 +6924,15 @@ static int cpu_weight_nice_write_s64(struct cgroup_subsys_state *css, struct cftype *cft, s64 nice) { unsigned long weight; + int idx; if (nice < MIN_NICE || nice > MAX_NICE) return -ERANGE; - weight = sched_prio_to_weight[NICE_TO_PRIO(nice) - MAX_RT_PRIO]; + idx = NICE_TO_PRIO(nice) - MAX_RT_PRIO; + idx = array_index_nospec(idx, 40); + weight = sched_prio_to_weight[idx]; + return sched_group_set_shares(css_tg(css), scale_load(weight)); } #endif diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index e7b3008b85bb..1356afd1eeb6 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -1117,7 +1117,7 @@ extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); * should be larger than 2^(64 - 20 - 8), which is more than 64 seconds. * So, overflow is not an issue here. */ -u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se) +static u64 grub_reclaim(u64 delta, struct rq *rq, struct sched_dl_entity *dl_se) { u64 u_inact = rq->dl.this_bw - rq->dl.running_bw; /* Utot - Uact */ u64 u_act; @@ -2731,8 +2731,6 @@ bool dl_cpu_busy(unsigned int cpu) #endif #ifdef CONFIG_SCHED_DEBUG -extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq); - void print_dl_stats(struct seq_file *m, int cpu) { print_dl_rq(m, cpu, &cpu_rq(cpu)->dl); diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 54dc31e7ab9b..79f574dba096 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1854,7 +1854,6 @@ static int task_numa_migrate(struct task_struct *p) static void numa_migrate_preferred(struct task_struct *p) { unsigned long interval = HZ; - unsigned long numa_migrate_retry; /* This task has no NUMA fault statistics yet */ if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults)) @@ -1862,18 +1861,7 @@ static void numa_migrate_preferred(struct task_struct *p) /* Periodically retry migrating the task to the preferred node */ interval = min(interval, msecs_to_jiffies(p->numa_scan_period) / 16); - numa_migrate_retry = jiffies + interval; - - /* - * Check that the new retry threshold is after the current one. If - * the retry is in the future, it implies that wake_affine has - * temporarily asked NUMA balancing to backoff from placement. - */ - if (numa_migrate_retry > p->numa_migrate_retry) - return; - - /* Safe to try placing the task on the preferred node */ - p->numa_migrate_retry = numa_migrate_retry; + p->numa_migrate_retry = jiffies + interval; /* Success if task is already running on preferred CPU */ if (task_node(p) == p->numa_preferred_nid) @@ -5922,48 +5910,6 @@ wake_affine_weight(struct sched_domain *sd, struct task_struct *p, return this_eff_load < prev_eff_load ? this_cpu : nr_cpumask_bits; } -#ifdef CONFIG_NUMA_BALANCING -static void -update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target) -{ - unsigned long interval; - - if (!static_branch_likely(&sched_numa_balancing)) - return; - - /* If balancing has no preference then continue gathering data */ - if (p->numa_preferred_nid == -1) - return; - - /* - * If the wakeup is not affecting locality then it is neutral from - * the perspective of NUMA balacing so continue gathering data. - */ - if (cpu_to_node(prev_cpu) == cpu_to_node(target)) - return; - - /* - * Temporarily prevent NUMA balancing trying to place waker/wakee after - * wakee has been moved by wake_affine. This will potentially allow - * related tasks to converge and update their data placement. The - * 4 * numa_scan_period is to allow the two-pass filter to migrate - * hot data to the wakers node. - */ - interval = max(sysctl_numa_balancing_scan_delay, - p->numa_scan_period << 2); - p->numa_migrate_retry = jiffies + msecs_to_jiffies(interval); - - interval = max(sysctl_numa_balancing_scan_delay, - current->numa_scan_period << 2); - current->numa_migrate_retry = jiffies + msecs_to_jiffies(interval); -} -#else -static void -update_wa_numa_placement(struct task_struct *p, int prev_cpu, int target) -{ -} -#endif - static int wake_affine(struct sched_domain *sd, struct task_struct *p, int this_cpu, int prev_cpu, int sync) { @@ -5979,7 +5925,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, if (target == nr_cpumask_bits) return prev_cpu; - update_wa_numa_placement(p, prev_cpu, target); schedstat_inc(sd->ttwu_move_affine); schedstat_inc(p->se.statistics.nr_wakeups_affine); return target; @@ -9847,6 +9792,7 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) if (curr_cost > this_rq->max_idle_balance_cost) this_rq->max_idle_balance_cost = curr_cost; +out: /* * While browsing the domains, we released the rq lock, a task could * have been enqueued in the meantime. Since we're not going idle, @@ -9855,7 +9801,6 @@ static int idle_balance(struct rq *this_rq, struct rq_flags *rf) if (this_rq->cfs.h_nr_running && !pulled_task) pulled_task = 1; -out: /* Move the next balance forward */ if (time_after(this_rq->next_balance, next_balance)) this_rq->next_balance = next_balance; diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index 7aef6b4e885a..ef3c4e6f5345 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2701,8 +2701,6 @@ int sched_rr_handler(struct ctl_table *table, int write, } #ifdef CONFIG_SCHED_DEBUG -extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); - void print_rt_stats(struct seq_file *m, int cpu) { rt_rq_iter_t iter; diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 15750c222ca2..1f0a4bc6a39d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -2025,8 +2025,9 @@ extern bool sched_debug_enabled; extern void print_cfs_stats(struct seq_file *m, int cpu); extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); -extern void -print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); +extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); +extern void print_rt_rq(struct seq_file *m, int cpu, struct rt_rq *rt_rq); +extern void print_dl_rq(struct seq_file *m, int cpu, struct dl_rq *dl_rq); #ifdef CONFIG_NUMA_BALANCING extern void show_numa_stats(struct task_struct *p, struct seq_file *m); diff --git a/kernel/signal.c b/kernel/signal.c index d4ccea599692..9c33163a6165 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1961,14 +1961,27 @@ static void ptrace_stop(int exit_code, int why, int clear_code, siginfo_t *info) return; } + set_special_state(TASK_TRACED); + /* * We're committing to trapping. TRACED should be visible before * TRAPPING is cleared; otherwise, the tracer might fail do_wait(). * Also, transition to TRACED and updates to ->jobctl should be * atomic with respect to siglock and should be done after the arch * hook as siglock is released and regrabbed across it. + * + * TRACER TRACEE + * + * ptrace_attach() + * [L] wait_on_bit(JOBCTL_TRAPPING) [S] set_special_state(TRACED) + * do_wait() + * set_current_state() smp_wmb(); + * ptrace_do_wait() + * wait_task_stopped() + * task_stopped_code() + * [L] task_is_traced() [S] task_clear_jobctl_trapping(); */ - set_current_state(TASK_TRACED); + smp_wmb(); current->last_siginfo = info; current->exit_code = exit_code; @@ -2176,7 +2189,7 @@ static bool do_signal_stop(int signr) if (task_participate_group_stop(current)) notify = CLD_STOPPED; - __set_current_state(TASK_STOPPED); + set_special_state(TASK_STOPPED); spin_unlock_irq(¤t->sighand->siglock); /* diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index b7591261652d..64c0291b579c 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -21,6 +21,7 @@ #include #include #include +#include /* * Structure to determine completion condition and record errors. May @@ -65,27 +66,31 @@ static void cpu_stop_signal_done(struct cpu_stop_done *done) } static void __cpu_stop_queue_work(struct cpu_stopper *stopper, - struct cpu_stop_work *work) + struct cpu_stop_work *work, + struct wake_q_head *wakeq) { list_add_tail(&work->list, &stopper->works); - wake_up_process(stopper->thread); + wake_q_add(wakeq, stopper->thread); } /* queue @work to @stopper. if offline, @work is completed immediately */ static bool cpu_stop_queue_work(unsigned int cpu, struct cpu_stop_work *work) { struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu); + DEFINE_WAKE_Q(wakeq); unsigned long flags; bool enabled; spin_lock_irqsave(&stopper->lock, flags); enabled = stopper->enabled; if (enabled) - __cpu_stop_queue_work(stopper, work); + __cpu_stop_queue_work(stopper, work, &wakeq); else if (work->done) cpu_stop_signal_done(work->done); spin_unlock_irqrestore(&stopper->lock, flags); + wake_up_q(&wakeq); + return enabled; } @@ -229,6 +234,7 @@ static int cpu_stop_queue_two_works(int cpu1, struct cpu_stop_work *work1, { struct cpu_stopper *stopper1 = per_cpu_ptr(&cpu_stopper, cpu1); struct cpu_stopper *stopper2 = per_cpu_ptr(&cpu_stopper, cpu2); + DEFINE_WAKE_Q(wakeq); int err; retry: spin_lock_irq(&stopper1->lock); @@ -252,8 +258,8 @@ retry: goto unlock; err = 0; - __cpu_stop_queue_work(stopper1, work1); - __cpu_stop_queue_work(stopper2, work2); + __cpu_stop_queue_work(stopper1, work1, &wakeq); + __cpu_stop_queue_work(stopper2, work2, &wakeq); unlock: spin_unlock(&stopper2->lock); spin_unlock_irq(&stopper1->lock); @@ -263,6 +269,9 @@ unlock: cpu_relax(); goto retry; } + + wake_up_q(&wakeq); + return err; } /** diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index b398c2ea69b2..aa2094d5dd27 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -612,6 +612,14 @@ static void tick_handle_oneshot_broadcast(struct clock_event_device *dev) now = ktime_get(); /* Find all expired events */ for_each_cpu(cpu, tick_broadcast_oneshot_mask) { + /* + * Required for !SMP because for_each_cpu() reports + * unconditionally CPU0 as set on UP kernels. + */ + if (!IS_ENABLED(CONFIG_SMP) && + cpumask_empty(tick_broadcast_oneshot_mask)) + break; + td = &per_cpu(tick_cpu_device, cpu); if (td->evtdev->next_event <= now) { cpumask_set_cpu(cpu, tmpmask); diff --git a/lib/find_bit_benchmark.c b/lib/find_bit_benchmark.c index 5985a25e6cbc..5367ffa5c18f 100644 --- a/lib/find_bit_benchmark.c +++ b/lib/find_bit_benchmark.c @@ -132,7 +132,12 @@ static int __init find_bit_test(void) test_find_next_bit(bitmap, BITMAP_LEN); test_find_next_zero_bit(bitmap, BITMAP_LEN); test_find_last_bit(bitmap, BITMAP_LEN); - test_find_first_bit(bitmap, BITMAP_LEN); + + /* + * test_find_first_bit() may take some time, so + * traverse only part of bitmap to avoid soft lockup. + */ + test_find_first_bit(bitmap, BITMAP_LEN / 10); test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN); pr_err("\nStart testing find_bit() with sparse bitmap\n"); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index da9e10c827df..43e0cbedc3a0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -1612,11 +1612,9 @@ static void set_iter_tags(struct radix_tree_iter *iter, static void __rcu **skip_siblings(struct radix_tree_node **nodep, void __rcu **slot, struct radix_tree_iter *iter) { - void *sib = node_to_entry(slot - 1); - while (iter->index < iter->next_index) { *nodep = rcu_dereference_raw(*slot); - if (*nodep && *nodep != sib) + if (*nodep && !is_sibling_entry(iter->node, *nodep)) return slot; slot++; iter->index = __radix_tree_iter_add(iter, 1); @@ -1631,7 +1629,7 @@ void __rcu **__radix_tree_next_slot(void __rcu **slot, struct radix_tree_iter *iter, unsigned flags) { unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK; - struct radix_tree_node *node = rcu_dereference_raw(*slot); + struct radix_tree_node *node; slot = skip_siblings(&node, slot, iter); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index 12fbaa445637..cc640588f145 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -714,7 +714,7 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, phys_addr = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), - 0, size, DMA_FROM_DEVICE, 0); + 0, size, DMA_FROM_DEVICE, attrs); if (phys_addr == SWIOTLB_MAP_ERROR) goto out_warn; diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index de16f7869fb1..6cd7d0740005 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -331,23 +331,32 @@ static void noinline __init test_mem_optimisations(void) unsigned int start, nbits; for (start = 0; start < 1024; start += 8) { - memset(bmap1, 0x5a, sizeof(bmap1)); - memset(bmap2, 0x5a, sizeof(bmap2)); for (nbits = 0; nbits < 1024 - start; nbits += 8) { + memset(bmap1, 0x5a, sizeof(bmap1)); + memset(bmap2, 0x5a, sizeof(bmap2)); + bitmap_set(bmap1, start, nbits); __bitmap_set(bmap2, start, nbits); - if (!bitmap_equal(bmap1, bmap2, 1024)) + if (!bitmap_equal(bmap1, bmap2, 1024)) { printk("set not equal %d %d\n", start, nbits); - if (!__bitmap_equal(bmap1, bmap2, 1024)) + failed_tests++; + } + if (!__bitmap_equal(bmap1, bmap2, 1024)) { printk("set not __equal %d %d\n", start, nbits); + failed_tests++; + } bitmap_clear(bmap1, start, nbits); __bitmap_clear(bmap2, start, nbits); - if (!bitmap_equal(bmap1, bmap2, 1024)) + if (!bitmap_equal(bmap1, bmap2, 1024)) { printk("clear not equal %d %d\n", start, nbits); - if (!__bitmap_equal(bmap1, bmap2, 1024)) + failed_tests++; + } + if (!__bitmap_equal(bmap1, bmap2, 1024)) { printk("clear not __equal %d %d\n", start, nbits); + failed_tests++; + } } } } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 30c0cb8cc9bc..23920c5ff728 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1669,19 +1669,22 @@ char *pointer_string(char *buf, char *end, const void *ptr, return number(buf, end, (unsigned long int)ptr, spec); } -static bool have_filled_random_ptr_key __read_mostly; +static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key); static siphash_key_t ptr_key __read_mostly; +static void enable_ptr_key_workfn(struct work_struct *work) +{ + get_random_bytes(&ptr_key, sizeof(ptr_key)); + /* Needs to run from preemptible context */ + static_branch_disable(¬_filled_random_ptr_key); +} + +static DECLARE_WORK(enable_ptr_key_work, enable_ptr_key_workfn); + static void fill_random_ptr_key(struct random_ready_callback *unused) { - get_random_bytes(&ptr_key, sizeof(ptr_key)); - /* - * have_filled_random_ptr_key==true is dependent on get_random_bytes(). - * ptr_to_id() needs to see have_filled_random_ptr_key==true - * after get_random_bytes() returns. - */ - smp_mb(); - WRITE_ONCE(have_filled_random_ptr_key, true); + /* This may be in an interrupt handler. */ + queue_work(system_unbound_wq, &enable_ptr_key_work); } static struct random_ready_callback random_ready = { @@ -1695,7 +1698,8 @@ static int __init initialize_ptr_random(void) if (!ret) { return 0; } else if (ret == -EALREADY) { - fill_random_ptr_key(&random_ready); + /* This is in preemptible context */ + enable_ptr_key_workfn(&enable_ptr_key_work); return 0; } @@ -1709,7 +1713,7 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) unsigned long hashval; const int default_width = 2 * sizeof(ptr); - if (unlikely(!have_filled_random_ptr_key)) { + if (static_branch_unlikely(¬_filled_random_ptr_key)) { spec.field_width = default_width; /* string length must be less than default_width */ return string(buf, end, "(ptrval)", spec); diff --git a/mm/Kconfig b/mm/Kconfig index d5004d82a1d6..e14c01513bfd 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -636,6 +636,7 @@ config DEFERRED_STRUCT_PAGE_INIT default n depends on NO_BOOTMEM depends on !FLATMEM + depends on !NEED_PER_CPU_KM help Ordinarily all struct pages are initialised during early boot in a single thread. On very large machines this can take a considerable diff --git a/mm/gup.c b/mm/gup.c index 76af4cfeaf68..541904a7c60f 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -544,6 +544,9 @@ static int check_vma_flags(struct vm_area_struct *vma, unsigned long gup_flags) if (vm_flags & (VM_IO | VM_PFNMAP)) return -EFAULT; + if (gup_flags & FOLL_ANON && !vma_is_anonymous(vma)) + return -EFAULT; + if (write) { if (!(vm_flags & VM_WRITE)) { if (!(gup_flags & FOLL_FORCE)) diff --git a/mm/migrate.c b/mm/migrate.c index 568433023831..8c0af0f7cab1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -528,14 +528,12 @@ int migrate_page_move_mapping(struct address_space *mapping, int i; int index = page_index(page); - for (i = 0; i < HPAGE_PMD_NR; i++) { + for (i = 1; i < HPAGE_PMD_NR; i++) { pslot = radix_tree_lookup_slot(&mapping->i_pages, index + i); radix_tree_replace_slot(&mapping->i_pages, pslot, newpage + i); } - } else { - radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); } /* diff --git a/mm/mmap.c b/mm/mmap.c index 6fc435760086..fc41c0543d7f 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1327,7 +1327,7 @@ static inline int mlock_future_check(struct mm_struct *mm, static inline u64 file_mmap_size_max(struct file *file, struct inode *inode) { if (S_ISREG(inode->i_mode)) - return inode->i_sb->s_maxbytes; + return MAX_LFS_FILESIZE; if (S_ISBLK(inode->i_mode)) return MAX_LFS_FILESIZE; @@ -3056,6 +3056,32 @@ void exit_mmap(struct mm_struct *mm) /* mm's last user has gone, and its about to be pulled down */ mmu_notifier_release(mm); + if (unlikely(mm_is_oom_victim(mm))) { + /* + * Manually reap the mm to free as much memory as possible. + * Then, as the oom reaper does, set MMF_OOM_SKIP to disregard + * this mm from further consideration. Taking mm->mmap_sem for + * write after setting MMF_OOM_SKIP will guarantee that the oom + * reaper will not run on this mm again after mmap_sem is + * dropped. + * + * Nothing can be holding mm->mmap_sem here and the above call + * to mmu_notifier_release(mm) ensures mmu notifier callbacks in + * __oom_reap_task_mm() will not block. + * + * This needs to be done before calling munlock_vma_pages_all(), + * which clears VM_LOCKED, otherwise the oom reaper cannot + * reliably test it. + */ + mutex_lock(&oom_lock); + __oom_reap_task_mm(mm); + mutex_unlock(&oom_lock); + + set_bit(MMF_OOM_SKIP, &mm->flags); + down_write(&mm->mmap_sem); + up_write(&mm->mmap_sem); + } + if (mm->locked_vm) { vma = mm->mmap; while (vma) { @@ -3077,24 +3103,6 @@ void exit_mmap(struct mm_struct *mm) /* update_hiwater_rss(mm) here? but nobody should be looking */ /* Use -1 here to ensure all VMAs in the mm are unmapped */ unmap_vmas(&tlb, vma, 0, -1); - - if (unlikely(mm_is_oom_victim(mm))) { - /* - * Wait for oom_reap_task() to stop working on this - * mm. Because MMF_OOM_SKIP is already set before - * calling down_read(), oom_reap_task() will not run - * on this "mm" post up_write(). - * - * mm_is_oom_victim() cannot be set from under us - * either because victim->mm is already set to NULL - * under task_lock before calling mmput and oom_mm is - * set not NULL by the OOM killer only if victim->mm - * is found not NULL while holding the task_lock. - */ - set_bit(MMF_OOM_SKIP, &mm->flags); - down_write(&mm->mmap_sem); - up_write(&mm->mmap_sem); - } free_pgtables(&tlb, vma, FIRST_USER_ADDRESS, USER_PGTABLES_CEILING); tlb_finish_mmu(&tlb, 0, -1); diff --git a/mm/oom_kill.c b/mm/oom_kill.c index ff992fa8760a..8ba6cb88cf58 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -469,7 +469,6 @@ bool process_shares_mm(struct task_struct *p, struct mm_struct *mm) return false; } - #ifdef CONFIG_MMU /* * OOM Reaper kernel thread which tries to reap the memory used by the OOM @@ -480,16 +479,54 @@ static DECLARE_WAIT_QUEUE_HEAD(oom_reaper_wait); static struct task_struct *oom_reaper_list; static DEFINE_SPINLOCK(oom_reaper_lock); -static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) +void __oom_reap_task_mm(struct mm_struct *mm) { - struct mmu_gather tlb; struct vm_area_struct *vma; + + /* + * Tell all users of get_user/copy_from_user etc... that the content + * is no longer stable. No barriers really needed because unmapping + * should imply barriers already and the reader would hit a page fault + * if it stumbled over a reaped memory. + */ + set_bit(MMF_UNSTABLE, &mm->flags); + + for (vma = mm->mmap ; vma; vma = vma->vm_next) { + if (!can_madv_dontneed_vma(vma)) + continue; + + /* + * Only anonymous pages have a good chance to be dropped + * without additional steps which we cannot afford as we + * are OOM already. + * + * We do not even care about fs backed pages because all + * which are reclaimable have already been reclaimed and + * we do not want to block exit_mmap by keeping mm ref + * count elevated without a good reason. + */ + if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { + const unsigned long start = vma->vm_start; + const unsigned long end = vma->vm_end; + struct mmu_gather tlb; + + tlb_gather_mmu(&tlb, mm, start, end); + mmu_notifier_invalidate_range_start(mm, start, end); + unmap_page_range(&tlb, vma, start, end, NULL); + mmu_notifier_invalidate_range_end(mm, start, end); + tlb_finish_mmu(&tlb, start, end); + } + } +} + +static bool oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) +{ bool ret = true; /* * We have to make sure to not race with the victim exit path * and cause premature new oom victim selection: - * __oom_reap_task_mm exit_mm + * oom_reap_task_mm exit_mm * mmget_not_zero * mmput * atomic_dec_and_test @@ -534,39 +571,8 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm) trace_start_task_reaping(tsk->pid); - /* - * Tell all users of get_user/copy_from_user etc... that the content - * is no longer stable. No barriers really needed because unmapping - * should imply barriers already and the reader would hit a page fault - * if it stumbled over a reaped memory. - */ - set_bit(MMF_UNSTABLE, &mm->flags); + __oom_reap_task_mm(mm); - for (vma = mm->mmap ; vma; vma = vma->vm_next) { - if (!can_madv_dontneed_vma(vma)) - continue; - - /* - * Only anonymous pages have a good chance to be dropped - * without additional steps which we cannot afford as we - * are OOM already. - * - * We do not even care about fs backed pages because all - * which are reclaimable have already been reclaimed and - * we do not want to block exit_mmap by keeping mm ref - * count elevated without a good reason. - */ - if (vma_is_anonymous(vma) || !(vma->vm_flags & VM_SHARED)) { - const unsigned long start = vma->vm_start; - const unsigned long end = vma->vm_end; - - tlb_gather_mmu(&tlb, mm, start, end); - mmu_notifier_invalidate_range_start(mm, start, end); - unmap_page_range(&tlb, vma, start, end, NULL); - mmu_notifier_invalidate_range_end(mm, start, end); - tlb_finish_mmu(&tlb, start, end); - } - } pr_info("oom_reaper: reaped process %d (%s), now anon-rss:%lukB, file-rss:%lukB, shmem-rss:%lukB\n", task_pid_nr(tsk), tsk->comm, K(get_mm_counter(mm, MM_ANONPAGES)), @@ -587,14 +593,13 @@ static void oom_reap_task(struct task_struct *tsk) struct mm_struct *mm = tsk->signal->oom_mm; /* Retry the down_read_trylock(mmap_sem) a few times */ - while (attempts++ < MAX_OOM_REAP_RETRIES && !__oom_reap_task_mm(tsk, mm)) + while (attempts++ < MAX_OOM_REAP_RETRIES && !oom_reap_task_mm(tsk, mm)) schedule_timeout_idle(HZ/10); if (attempts <= MAX_OOM_REAP_RETRIES || test_bit(MMF_OOM_SKIP, &mm->flags)) goto done; - pr_info("oom_reaper: unable to reap pid:%d (%s)\n", task_pid_nr(tsk), tsk->comm); debug_show_all_locks(); diff --git a/mm/sparse.c b/mm/sparse.c index 62eef264a7bd..73dc2fcc0eab 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -629,7 +629,7 @@ void offline_mem_sections(unsigned long start_pfn, unsigned long end_pfn) unsigned long pfn; for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - unsigned long section_nr = pfn_to_section_nr(start_pfn); + unsigned long section_nr = pfn_to_section_nr(pfn); struct mem_section *ms; /* diff --git a/mm/vmstat.c b/mm/vmstat.c index 536332e988b8..a2b9518980ce 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1161,7 +1161,7 @@ const char * const vmstat_text[] = { "nr_vmscan_immediate_reclaim", "nr_dirtied", "nr_written", - "nr_indirectly_reclaimable", + "", /* nr_indirectly_reclaimable */ /* enum writeback_stat_item counters */ "nr_dirty_threshold", @@ -1740,6 +1740,10 @@ static int vmstat_show(struct seq_file *m, void *arg) unsigned long *l = arg; unsigned long off = l - (unsigned long *)m->private; + /* Skip hidden vmstat items. */ + if (*vmstat_text[off] == '\0') + return 0; + seq_puts(m, vmstat_text[off]); seq_put_decimal_ull(m, " ", *l); seq_putc(m, '\n'); diff --git a/mm/z3fold.c b/mm/z3fold.c index c0bca6153b95..4b366d181f35 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -144,7 +144,8 @@ enum z3fold_page_flags { PAGE_HEADLESS = 0, MIDDLE_CHUNK_MAPPED, NEEDS_COMPACTING, - PAGE_STALE + PAGE_STALE, + UNDER_RECLAIM }; /***************** @@ -173,6 +174,7 @@ static struct z3fold_header *init_z3fold_page(struct page *page, clear_bit(MIDDLE_CHUNK_MAPPED, &page->private); clear_bit(NEEDS_COMPACTING, &page->private); clear_bit(PAGE_STALE, &page->private); + clear_bit(UNDER_RECLAIM, &page->private); spin_lock_init(&zhdr->page_lock); kref_init(&zhdr->refcount); @@ -756,6 +758,10 @@ static void z3fold_free(struct z3fold_pool *pool, unsigned long handle) atomic64_dec(&pool->pages_nr); return; } + if (test_bit(UNDER_RECLAIM, &page->private)) { + z3fold_page_unlock(zhdr); + return; + } if (test_and_set_bit(NEEDS_COMPACTING, &page->private)) { z3fold_page_unlock(zhdr); return; @@ -840,6 +846,8 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) kref_get(&zhdr->refcount); list_del_init(&zhdr->buddy); zhdr->cpu = -1; + set_bit(UNDER_RECLAIM, &page->private); + break; } list_del_init(&page->lru); @@ -887,25 +895,35 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) goto next; } next: - spin_lock(&pool->lock); if (test_bit(PAGE_HEADLESS, &page->private)) { if (ret == 0) { - spin_unlock(&pool->lock); free_z3fold_page(page); return 0; } - } else if (kref_put(&zhdr->refcount, release_z3fold_page)) { - atomic64_dec(&pool->pages_nr); + spin_lock(&pool->lock); + list_add(&page->lru, &pool->lru); spin_unlock(&pool->lock); - return 0; + } else { + z3fold_page_lock(zhdr); + clear_bit(UNDER_RECLAIM, &page->private); + if (kref_put(&zhdr->refcount, + release_z3fold_page_locked)) { + atomic64_dec(&pool->pages_nr); + return 0; + } + /* + * if we are here, the page is still not completely + * free. Take the global pool lock then to be able + * to add it back to the lru list + */ + spin_lock(&pool->lock); + list_add(&page->lru, &pool->lru); + spin_unlock(&pool->lock); + z3fold_page_unlock(zhdr); } - /* - * Add to the beginning of LRU. - * Pool lock has to be kept here to ensure the page has - * not already been released - */ - list_add(&page->lru, &pool->lru); + /* We started off locked to we need to lock the pool back */ + spin_lock(&pool->lock); } spin_unlock(&pool->lock); return -EAGAIN; diff --git a/net/bridge/netfilter/ebt_stp.c b/net/bridge/netfilter/ebt_stp.c index 47ba98db145d..46c1fe7637ea 100644 --- a/net/bridge/netfilter/ebt_stp.c +++ b/net/bridge/netfilter/ebt_stp.c @@ -161,8 +161,8 @@ static int ebt_stp_mt_check(const struct xt_mtchk_param *par) /* Make sure the match only receives stp frames */ if (!par->nft_compat && (!ether_addr_equal(e->destmac, eth_stp_addr) || - !is_broadcast_ether_addr(e->destmsk) || - !(e->bitmask & EBT_DESTMAC))) + !(e->bitmask & EBT_DESTMAC) || + !is_broadcast_ether_addr(e->destmsk))) return -EINVAL; return 0; diff --git a/net/core/dev.c b/net/core/dev.c index 7ca19f47a92a..1844d9bc5714 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2125,7 +2125,7 @@ static bool remove_xps_queue_cpu(struct net_device *dev, int i, j; for (i = count, j = offset; i--; j++) { - if (!remove_xps_queue(dev_maps, cpu, j)) + if (!remove_xps_queue(dev_maps, tci, j)) break; } diff --git a/net/core/filter.c b/net/core/filter.c index 6d0d1560bd70..51ea7ddb2d8d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -649,11 +649,18 @@ do_pass: #define BPF_EMIT_JMP \ do { \ + const s32 off_min = S16_MIN, off_max = S16_MAX; \ + s32 off; \ + \ if (target >= len || target < 0) \ goto err; \ - insn->off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ + off = addrs ? addrs[target] - addrs[i] - 1 : 0; \ /* Adjust pc relative offset for 2nd or 3rd insn. */ \ - insn->off -= insn - tmp_insns; \ + off -= insn - tmp_insns; \ + /* Reject anything not fitting into insn->off. */ \ + if (off < off_min || off > off_max) \ + goto err; \ + insn->off = off; \ } while (0) case BPF_JMP | BPF_JA: diff --git a/net/core/sock.c b/net/core/sock.c index 042cfc612660..435a0ba85e52 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1613,7 +1613,7 @@ static void __sk_free(struct sock *sk) if (likely(sk->sk_net_refcnt)) sock_inuse_add(sock_net(sk), -1); - if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) + if (unlikely(sk->sk_net_refcnt && sock_diag_has_destroy_listeners(sk))) sock_diag_broadcast_destroy(sk); else sk_destruct(sk); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 00126cda4319..dc5d9af3dc80 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -258,11 +258,13 @@ static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) static int dsa_port_setup(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; - int err; + int err = 0; memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); - err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index); + if (dp->type != DSA_PORT_TYPE_UNUSED) + err = devlink_port_register(ds->devlink, &dp->devlink_port, + dp->index); if (err) return err; @@ -317,7 +319,8 @@ static int dsa_port_setup(struct dsa_port *dp) static void dsa_port_teardown(struct dsa_port *dp) { - devlink_port_unregister(&dp->devlink_port); + if (dp->type != DSA_PORT_TYPE_UNUSED) + devlink_port_unregister(&dp->devlink_port); switch (dp->type) { case DSA_PORT_TYPE_UNUSED: diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index f05afaf3235c..4d622112bf95 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -326,10 +326,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, int rpf, struct in_device *idev, u32 *itag) { + struct net *net = dev_net(dev); + struct flow_keys flkeys; int ret, no_addr; struct fib_result res; struct flowi4 fl4; - struct net *net = dev_net(dev); bool dev_match; fl4.flowi4_oif = 0; @@ -347,6 +348,11 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, no_addr = idev->ifa_list == NULL; fl4.flowi4_mark = IN_DEV_SRC_VMARK(idev) ? skb->mark : 0; + if (!fib4_rules_early_flow_dissect(net, skb, &fl4, &flkeys)) { + fl4.flowi4_proto = 0; + fl4.fl4_sport = 0; + fl4.fl4_dport = 0; + } trace_fib_validate_source(dev, &fl4); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 2409e648454d..2d8efeecf619 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -734,10 +734,12 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb, erspan_build_header(skb, ntohl(tunnel->parms.o_key), tunnel->index, truncate, true); - else + else if (tunnel->erspan_ver == 2) erspan_build_header_v2(skb, ntohl(tunnel->parms.o_key), tunnel->dir, tunnel->hwid, truncate, true); + else + goto free_skb; tunnel->parms.o_flags &= ~TUNNEL_KEY; __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index b5e21eb198d8..af5a830ff6ad 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1053,7 +1053,8 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG) && + skb_tailroom(skb) >= copy) { unsigned int off; off = skb->len; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 444f125f3974..1ef37e2e2679 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -34,6 +34,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("IPv4 packet filter"); +MODULE_ALIAS("ipt_icmp"); void *ipt_alloc_initial_table(const struct xt_table *info) { diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index fd01f13c896a..12843c9ef142 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -89,10 +89,10 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) return true ^ invert; } + memset(&flow, 0, sizeof(flow)); flow.flowi4_iif = LOOPBACK_IFINDEX; flow.daddr = iph->saddr; flow.saddr = rpfilter_get_saddr(iph->daddr); - flow.flowi4_oif = 0; flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; flow.flowi4_tos = RT_TOS(iph->tos); flow.flowi4_scope = RT_SCOPE_UNIVERSE; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 29268efad247..2cfa1b518f8d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1961,8 +1961,13 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, fl4.saddr = saddr; fl4.flowi4_uid = sock_net_uid(net, NULL); - if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) + if (fib4_rules_early_flow_dissect(net, skb, &fl4, &_flkeys)) { flkeys = &_flkeys; + } else { + fl4.flowi4_proto = 0; + fl4.fl4_sport = 0; + fl4.fl4_dport = 0; + } err = fib_lookup(net, &fl4, res, 0); if (err != 0) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 437bb7ceba7f..8e08b409c71e 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2823,8 +2823,10 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) return -EBUSY; if (before(TCP_SKB_CB(skb)->seq, tp->snd_una)) { - if (before(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) - BUG(); + if (unlikely(before(TCP_SKB_CB(skb)->end_seq, tp->snd_una))) { + WARN_ON_ONCE(1); + return -EINVAL; + } if (tcp_trim_head(sk, skb, tp->snd_una - TCP_SKB_CB(skb)->seq)) return -ENOMEM; } @@ -3332,6 +3334,7 @@ static void tcp_connect_init(struct sock *sk) sock_reset_flag(sk, SOCK_DONE); tp->snd_wnd = 0; tcp_init_wl(tp, 0); + tcp_write_queue_purge(sk); tp->snd_una = tp->write_seq; tp->snd_sml = tp->write_seq; tp->snd_up = tp->write_seq; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bede77f24784..c8cf2fdbb13b 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -71,6 +71,7 @@ struct ip6gre_net { struct ip6_tnl __rcu *tunnels[4][IP6_GRE_HASH_SIZE]; struct ip6_tnl __rcu *collect_md_tun; + struct ip6_tnl __rcu *collect_md_tun_erspan; struct net_device *fb_tunnel_dev; }; @@ -81,6 +82,7 @@ static int ip6gre_tunnel_init(struct net_device *dev); static void ip6gre_tunnel_setup(struct net_device *dev); static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); +static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu); /* Tunnel hash table */ @@ -232,7 +234,12 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, if (cand) return cand; - t = rcu_dereference(ign->collect_md_tun); + if (gre_proto == htons(ETH_P_ERSPAN) || + gre_proto == htons(ETH_P_ERSPAN2)) + t = rcu_dereference(ign->collect_md_tun_erspan); + else + t = rcu_dereference(ign->collect_md_tun); + if (t && t->dev->flags & IFF_UP) return t; @@ -261,6 +268,31 @@ static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign, return &ign->tunnels[prio][h]; } +static void ip6gre_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, t); +} + +static void ip6erspan_tunnel_link_md(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun_erspan, t); +} + +static void ip6gre_tunnel_unlink_md(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun, NULL); +} + +static void ip6erspan_tunnel_unlink_md(struct ip6gre_net *ign, + struct ip6_tnl *t) +{ + if (t->parms.collect_md) + rcu_assign_pointer(ign->collect_md_tun_erspan, NULL); +} + static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, const struct ip6_tnl *t) { @@ -271,9 +303,6 @@ static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); - if (t->parms.collect_md) - rcu_assign_pointer(ign->collect_md_tun, t); - rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } @@ -283,9 +312,6 @@ static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) struct ip6_tnl __rcu **tp; struct ip6_tnl *iter; - if (t->parms.collect_md) - rcu_assign_pointer(ign->collect_md_tun, NULL); - for (tp = ip6gre_bucket(ign, t); (iter = rtnl_dereference(*tp)) != NULL; tp = &iter->next) { @@ -374,11 +400,23 @@ failed_free: return NULL; } +static void ip6erspan_tunnel_uninit(struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); + + ip6erspan_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); + dst_cache_reset(&t->dst_cache); + dev_put(dev); +} + static void ip6gre_tunnel_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); struct ip6gre_net *ign = net_generic(t->net, ip6gre_net_id); + ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); dst_cache_reset(&t->dst_cache); dev_put(dev); @@ -698,6 +736,9 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, else fl6->daddr = tunnel->parms.raddr; + if (skb_cow_head(skb, dev->needed_headroom ?: tunnel->hlen)) + return -ENOMEM; + /* Push GRE header. */ protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; @@ -920,7 +961,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, (ntohs(ipv6_hdr(skb)->payload_len) > skb->len - thoff)) truncate = true; - if (skb_cow_head(skb, dev->needed_headroom)) + if (skb_cow_head(skb, dev->needed_headroom ?: t->hlen)) goto tx_err; t->parms.o_flags &= ~TUNNEL_KEY; @@ -991,11 +1032,14 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, erspan_build_header(skb, ntohl(t->parms.o_key), t->parms.index, truncate, false); - else + else if (t->parms.erspan_ver == 2) erspan_build_header_v2(skb, ntohl(t->parms.o_key), t->parms.dir, t->parms.hwid, truncate, false); + else + goto tx_err; + fl6.daddr = t->parms.raddr; } @@ -1031,12 +1075,11 @@ tx_err: return NETDEV_TX_OK; } -static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +static void ip6gre_tnl_link_config_common(struct ip6_tnl *t) { struct net_device *dev = t->dev; struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; - int t_hlen; if (dev->type != ARPHRD_ETHER) { memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -1063,12 +1106,13 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; +} - t->tun_hlen = gre_calc_hlen(t->parms.o_flags); - - t->hlen = t->encap_hlen + t->tun_hlen; - - t_hlen = t->hlen + sizeof(struct ipv6hdr); +static void ip6gre_tnl_link_config_route(struct ip6_tnl *t, int set_mtu, + int t_hlen) +{ + const struct __ip6_tnl_parm *p = &t->parms; + struct net_device *dev = t->dev; if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & @@ -1100,8 +1144,26 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) } } -static int ip6gre_tnl_change(struct ip6_tnl *t, - const struct __ip6_tnl_parm *p, int set_mtu) +static int ip6gre_calc_hlen(struct ip6_tnl *tunnel) +{ + int t_hlen; + + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; + + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + return t_hlen; +} + +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + ip6gre_tnl_link_config_common(t); + ip6gre_tnl_link_config_route(t, set_mtu, ip6gre_calc_hlen(t)); +} + +static void ip6gre_tnl_copy_tnl_parm(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; @@ -1117,6 +1179,12 @@ static int ip6gre_tnl_change(struct ip6_tnl *t, t->parms.o_flags = p->o_flags; t->parms.fwmark = p->fwmark; dst_cache_reset(&t->dst_cache); +} + +static int ip6gre_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p, + int set_mtu) +{ + ip6gre_tnl_copy_tnl_parm(t, p); ip6gre_tnl_link_config(t, set_mtu); return 0; } @@ -1395,11 +1463,7 @@ static int ip6gre_tunnel_init_common(struct net_device *dev) if (ret) goto cleanup_dst_cache_init; - tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); - tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen; - t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - - dev->hard_header_len = LL_MAX_HEADER + t_hlen; + t_hlen = ip6gre_calc_hlen(tunnel); dev->mtu = ETH_DATA_LEN - t_hlen; if (dev->type == ARPHRD_ETHER) dev->mtu -= ETH_HLEN; @@ -1749,6 +1813,19 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_get_iflink = ip6_tnl_get_iflink, }; +static int ip6erspan_calc_hlen(struct ip6_tnl *tunnel) +{ + int t_hlen; + + tunnel->tun_hlen = 8; + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + + erspan_hdr_len(tunnel->parms.erspan_ver); + + t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); + tunnel->dev->hard_header_len = LL_MAX_HEADER + t_hlen; + return t_hlen; +} + static int ip6erspan_tap_init(struct net_device *dev) { struct ip6_tnl *tunnel; @@ -1773,12 +1850,7 @@ static int ip6erspan_tap_init(struct net_device *dev) if (ret) goto cleanup_dst_cache_init; - tunnel->tun_hlen = 8; - tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + - erspan_hdr_len(tunnel->parms.erspan_ver); - t_hlen = tunnel->hlen + sizeof(struct ipv6hdr); - - dev->hard_header_len = LL_MAX_HEADER + t_hlen; + t_hlen = ip6erspan_calc_hlen(tunnel); dev->mtu = ETH_DATA_LEN - t_hlen; if (dev->type == ARPHRD_ETHER) dev->mtu -= ETH_HLEN; @@ -1786,7 +1858,7 @@ static int ip6erspan_tap_init(struct net_device *dev) dev->mtu -= 8; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; - ip6gre_tnl_link_config(tunnel, 1); + ip6erspan_tnl_link_config(tunnel, 1); return 0; @@ -1800,7 +1872,7 @@ cleanup_alloc_pcpu_stats: static const struct net_device_ops ip6erspan_netdev_ops = { .ndo_init = ip6erspan_tap_init, - .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_uninit = ip6erspan_tunnel_uninit, .ndo_start_xmit = ip6erspan_tunnel_xmit, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, @@ -1864,13 +1936,11 @@ static bool ip6gre_netlink_encap_parms(struct nlattr *data[], return ret; } -static int ip6gre_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[], - struct netlink_ext_ack *extack) +static int ip6gre_newlink_common(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) { struct ip6_tnl *nt; - struct net *net = dev_net(dev); - struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); struct ip_tunnel_encap ipencap; int err; @@ -1883,16 +1953,6 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, return err; } - ip6gre_netlink_parms(data, &nt->parms); - - if (nt->parms.collect_md) { - if (rtnl_dereference(ign->collect_md_tun)) - return -EEXIST; - } else { - if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) - return -EEXIST; - } - if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) eth_hw_addr_random(dev); @@ -1903,51 +1963,94 @@ static int ip6gre_newlink(struct net *src_net, struct net_device *dev, if (err) goto out; - ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); - if (tb[IFLA_MTU]) ip6_tnl_change_mtu(dev, nla_get_u32(tb[IFLA_MTU])); dev_hold(dev); - ip6gre_tunnel_link(ign, nt); out: return err; } +static int ip6gre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip6_tnl *nt = netdev_priv(dev); + struct net *net = dev_net(dev); + struct ip6gre_net *ign; + int err; + + ip6gre_netlink_parms(data, &nt->parms); + ign = net_generic(net, ip6gre_net_id); + + if (nt->parms.collect_md) { + if (rtnl_dereference(ign->collect_md_tun)) + return -EEXIST; + } else { + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + } + + err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + if (!err) { + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + ip6gre_tunnel_link_md(ign, nt); + ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); + } + return err; +} + +static struct ip6_tnl * +ip6gre_changelink_common(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], struct __ip6_tnl_parm *p_p, + struct netlink_ext_ack *extack) +{ + struct ip6_tnl *t, *nt = netdev_priv(dev); + struct net *net = nt->net; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip_tunnel_encap ipencap; + + if (dev == ign->fb_tunnel_dev) + return ERR_PTR(-EINVAL); + + if (ip6gre_netlink_encap_parms(data, &ipencap)) { + int err = ip6_tnl_encap_setup(nt, &ipencap); + + if (err < 0) + return ERR_PTR(err); + } + + ip6gre_netlink_parms(data, p_p); + + t = ip6gre_tunnel_locate(net, p_p, 0); + + if (t) { + if (t->dev != dev) + return ERR_PTR(-EEXIST); + } else { + t = nt; + } + + return t; +} + static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) { - struct ip6_tnl *t, *nt = netdev_priv(dev); - struct net *net = nt->net; - struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); struct __ip6_tnl_parm p; - struct ip_tunnel_encap ipencap; + struct ip6_tnl *t; - if (dev == ign->fb_tunnel_dev) - return -EINVAL; - - if (ip6gre_netlink_encap_parms(data, &ipencap)) { - int err = ip6_tnl_encap_setup(nt, &ipencap); - - if (err < 0) - return err; - } - - ip6gre_netlink_parms(data, &p); - - t = ip6gre_tunnel_locate(net, &p, 0); - - if (t) { - if (t->dev != dev) - return -EEXIST; - } else { - t = nt; - } + t = ip6gre_changelink_common(dev, tb, data, &p, extack); + if (IS_ERR(t)) + return PTR_ERR(t); + ip6gre_tunnel_unlink_md(ign, t); ip6gre_tunnel_unlink(ign, t); ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link_md(ign, t); ip6gre_tunnel_link(ign, t); return 0; } @@ -2097,6 +2200,69 @@ static void ip6erspan_tap_setup(struct net_device *dev) netif_keep_dst(dev); } +static int ip6erspan_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip6_tnl *nt = netdev_priv(dev); + struct net *net = dev_net(dev); + struct ip6gre_net *ign; + int err; + + ip6gre_netlink_parms(data, &nt->parms); + ign = net_generic(net, ip6gre_net_id); + + if (nt->parms.collect_md) { + if (rtnl_dereference(ign->collect_md_tun_erspan)) + return -EEXIST; + } else { + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + } + + err = ip6gre_newlink_common(src_net, dev, tb, data, extack); + if (!err) { + ip6erspan_tnl_link_config(nt, !tb[IFLA_MTU]); + ip6erspan_tunnel_link_md(ign, nt); + ip6gre_tunnel_link(net_generic(net, ip6gre_net_id), nt); + } + return err; +} + +static void ip6erspan_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + ip6gre_tnl_link_config_common(t); + ip6gre_tnl_link_config_route(t, set_mtu, ip6erspan_calc_hlen(t)); +} + +static int ip6erspan_tnl_change(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p, int set_mtu) +{ + ip6gre_tnl_copy_tnl_parm(t, p); + ip6erspan_tnl_link_config(t, set_mtu); + return 0; +} + +static int ip6erspan_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + struct ip6gre_net *ign = net_generic(dev_net(dev), ip6gre_net_id); + struct __ip6_tnl_parm p; + struct ip6_tnl *t; + + t = ip6gre_changelink_common(dev, tb, data, &p, extack); + if (IS_ERR(t)) + return PTR_ERR(t); + + ip6gre_tunnel_unlink_md(ign, t); + ip6gre_tunnel_unlink(ign, t); + ip6erspan_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6erspan_tunnel_link_md(ign, t); + ip6gre_tunnel_link(ign, t); + return 0; +} + static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { .kind = "ip6gre", .maxtype = IFLA_GRE_MAX, @@ -2133,8 +2299,8 @@ static struct rtnl_link_ops ip6erspan_tap_ops __read_mostly = { .priv_size = sizeof(struct ip6_tnl), .setup = ip6erspan_tap_setup, .validate = ip6erspan_tap_validate, - .newlink = ip6gre_newlink, - .changelink = ip6gre_changelink, + .newlink = ip6erspan_newlink, + .changelink = ip6erspan_changelink, .get_size = ip6gre_get_size, .fill_info = ip6gre_fill_info, .get_link_net = ip6_tnl_get_link_net, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 7f4493080df6..60b0d1652448 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1494,7 +1494,8 @@ alloc_new_skb: if (copy > length) copy = length; - if (!(rt->dst.dev->features&NETIF_F_SG)) { + if (!(rt->dst.dev->features&NETIF_F_SG) && + skb_tailroom(skb) >= copy) { unsigned int off; off = skb->len; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 7097bbf95843..3dc9af775ce2 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -38,6 +38,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team "); MODULE_DESCRIPTION("IPv6 packet filter"); +MODULE_ALIAS("ip6t_icmp6"); void *ip6t_alloc_initial_table(const struct xt_table *info) { diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 0f6b8172fb9a..206fb2c4c319 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -585,7 +585,8 @@ void (*nf_nat_decode_session_hook)(struct sk_buff *, struct flowi *); EXPORT_SYMBOL(nf_nat_decode_session_hook); #endif -static void __net_init __netfilter_net_init(struct nf_hook_entries **e, int max) +static void __net_init +__netfilter_net_init(struct nf_hook_entries __rcu **e, int max) { int h; diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 370abbf6f421..75de46576f51 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -232,7 +232,10 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) { unsigned int hash; - bool ret; + bool ret = false; + + if (cp->flags & IP_VS_CONN_F_ONE_PACKET) + return refcount_dec_if_one(&cp->refcnt); hash = ip_vs_conn_hashkey_conn(cp); @@ -240,15 +243,13 @@ static inline bool ip_vs_conn_unlink(struct ip_vs_conn *cp) spin_lock(&cp->lock); if (cp->flags & IP_VS_CONN_F_HASHED) { - ret = false; /* Decrease refcnt and unlink conn only if we are last user */ if (refcount_dec_if_one(&cp->refcnt)) { hlist_del_rcu(&cp->c_list); cp->flags &= ~IP_VS_CONN_F_HASHED; ret = true; } - } else - ret = refcount_read(&cp->refcnt) ? false : true; + } spin_unlock(&cp->lock); ct_write_unlock_bh(hash); @@ -454,12 +455,6 @@ ip_vs_conn_out_get_proto(struct netns_ipvs *ipvs, int af, } EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); -static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp) -{ - __ip_vs_conn_put(cp); - ip_vs_conn_expire(&cp->timer); -} - /* * Put back the conn and restart its timer with its timeout */ @@ -478,7 +473,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) (refcount_read(&cp->refcnt) == 1) && !timer_pending(&cp->timer)) /* expire connection immediately */ - __ip_vs_conn_put_notimer(cp); + ip_vs_conn_expire(&cp->timer); else __ip_vs_conn_put_timer(cp); } diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5f6f73cf2174..0679dd101e72 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -119,6 +119,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; + local_bh_disable(); + s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.inpkts++; @@ -137,6 +139,8 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.inpkts++; s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } } @@ -151,6 +155,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) struct ip_vs_cpu_stats *s; struct ip_vs_service *svc; + local_bh_disable(); + s = this_cpu_ptr(dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.outpkts++; @@ -169,6 +175,8 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.outpkts++; s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } } @@ -179,6 +187,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) struct netns_ipvs *ipvs = svc->ipvs; struct ip_vs_cpu_stats *s; + local_bh_disable(); + s = this_cpu_ptr(cp->dest->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.conns++; @@ -193,6 +203,8 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc) u64_stats_update_begin(&s->syncp); s->cnt.conns++; u64_stats_update_end(&s->syncp); + + local_bh_enable(); } diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index e97cdc1cf98c..8e67910185a0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -981,6 +981,17 @@ static int tcp_packet(struct nf_conn *ct, return NF_ACCEPT; /* Don't change state */ } break; + case TCP_CONNTRACK_SYN_SENT2: + /* tcp_conntracks table is not smart enough to handle + * simultaneous open. + */ + ct->proto.tcp.last_flags |= IP_CT_TCP_SIMULTANEOUS_OPEN; + break; + case TCP_CONNTRACK_SYN_RECV: + if (dir == IP_CT_DIR_REPLY && index == TCP_ACK_SET && + ct->proto.tcp.last_flags & IP_CT_TCP_SIMULTANEOUS_OPEN) + new_state = TCP_CONNTRACK_ESTABLISHED; + break; case TCP_CONNTRACK_CLOSE: if (index == TCP_RST_SET && (ct->proto.tcp.seen[!dir].flags & IP_CT_TCP_FLAG_MAXACK_SET) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 18bd584fadda..a5f3743fda65 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -214,6 +214,34 @@ static int nft_delchain(struct nft_ctx *ctx) return err; } +static void nft_rule_expr_activate(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + struct nft_expr *expr; + + expr = nft_expr_first(rule); + while (expr != nft_expr_last(rule) && expr->ops) { + if (expr->ops->activate) + expr->ops->activate(ctx, expr); + + expr = nft_expr_next(expr); + } +} + +static void nft_rule_expr_deactivate(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + struct nft_expr *expr; + + expr = nft_expr_first(rule); + while (expr != nft_expr_last(rule) && expr->ops) { + if (expr->ops->deactivate) + expr->ops->deactivate(ctx, expr); + + expr = nft_expr_next(expr); + } +} + static int nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule) { @@ -259,6 +287,7 @@ static int nft_delrule(struct nft_ctx *ctx, struct nft_rule *rule) nft_trans_destroy(trans); return err; } + nft_rule_expr_deactivate(ctx, rule); return 0; } @@ -2242,6 +2271,13 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx, kfree(rule); } +static void nf_tables_rule_release(const struct nft_ctx *ctx, + struct nft_rule *rule) +{ + nft_rule_expr_deactivate(ctx, rule); + nf_tables_rule_destroy(ctx, rule); +} + #define NFT_RULE_MAXEXPRS 128 static struct nft_expr_info *info; @@ -2415,7 +2451,7 @@ static int nf_tables_newrule(struct net *net, struct sock *nlsk, return 0; err2: - nf_tables_rule_destroy(&ctx, rule); + nf_tables_rule_release(&ctx, rule); err1: for (i = 0; i < n; i++) { if (info[i].ops != NULL) @@ -4098,8 +4134,10 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA) ^ nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) || nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) ^ - nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) - return -EBUSY; + nft_set_ext_exists(ext2, NFT_SET_EXT_OBJREF)) { + err = -EBUSY; + goto err5; + } if ((nft_set_ext_exists(ext, NFT_SET_EXT_DATA) && nft_set_ext_exists(ext2, NFT_SET_EXT_DATA) && memcmp(nft_set_ext_data(ext), @@ -4185,7 +4223,7 @@ static int nf_tables_newsetelem(struct net *net, struct sock *nlsk, * NFT_GOTO verdicts. This function must be called on active data objects * from the second phase of the commit protocol. */ -static void nft_data_hold(const struct nft_data *data, enum nft_data_types type) +void nft_data_hold(const struct nft_data *data, enum nft_data_types type) { if (type == NFT_DATA_VERDICT) { switch (data->verdict.code) { @@ -5821,7 +5859,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) } } -static void nf_tables_commit_release(struct nft_trans *trans) +static void nft_commit_release(struct nft_trans *trans) { switch (trans->msg_type) { case NFT_MSG_DELTABLE: @@ -5850,6 +5888,21 @@ static void nf_tables_commit_release(struct nft_trans *trans) kfree(trans); } +static void nf_tables_commit_release(struct net *net) +{ + struct nft_trans *trans, *next; + + if (list_empty(&net->nft.commit_list)) + return; + + synchronize_rcu(); + + list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { + list_del(&trans->list); + nft_commit_release(trans); + } +} + static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nft_trans *trans, *next; @@ -5980,13 +6033,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } } - synchronize_rcu(); - - list_for_each_entry_safe(trans, next, &net->nft.commit_list, list) { - list_del(&trans->list); - nf_tables_commit_release(trans); - } - + nf_tables_commit_release(net); nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN); return 0; @@ -6066,10 +6113,12 @@ static int nf_tables_abort(struct net *net, struct sk_buff *skb) case NFT_MSG_NEWRULE: trans->ctx.chain->use--; list_del_rcu(&nft_trans_rule(trans)->list); + nft_rule_expr_deactivate(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELRULE: trans->ctx.chain->use++; nft_clear(trans->ctx.net, nft_trans_rule(trans)); + nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWSET: @@ -6645,7 +6694,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) { list_del(&rule->list); ctx->chain->use--; - nf_tables_rule_destroy(ctx, rule); + nf_tables_rule_release(ctx, rule); } list_del(&ctx->chain->list); ctx->table->use--; @@ -6683,7 +6732,7 @@ static void __nft_release_tables(struct net *net) list_for_each_entry_safe(rule, nr, &chain->rules, list) { list_del(&rule->list); chain->use--; - nf_tables_rule_destroy(&ctx, rule); + nf_tables_rule_release(&ctx, rule); } } list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) { diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 9cf47c4cb9d5..ebb9799350ed 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -119,15 +119,22 @@ DEFINE_STATIC_KEY_FALSE(nft_counters_enabled); static noinline void nft_update_chain_stats(const struct nft_chain *chain, const struct nft_pktinfo *pkt) { + struct nft_base_chain *base_chain; struct nft_stats *stats; - local_bh_disable(); - stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats)); - u64_stats_update_begin(&stats->syncp); - stats->pkts++; - stats->bytes += pkt->skb->len; - u64_stats_update_end(&stats->syncp); - local_bh_enable(); + base_chain = nft_base_chain(chain); + if (!base_chain->stats) + return; + + stats = this_cpu_ptr(rcu_dereference(base_chain->stats)); + if (stats) { + local_bh_disable(); + u64_stats_update_begin(&stats->syncp); + stats->pkts++; + stats->bytes += pkt->skb->len; + u64_stats_update_end(&stats->syncp); + local_bh_enable(); + } } struct nft_jumpstack { diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index b9505bcd3827..6ddf89183e7b 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -115,7 +115,7 @@ static int nfnl_acct_new(struct net *net, struct sock *nfnl, nfacct->flags = flags; } - strncpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); + nla_strlcpy(nfacct->name, nla_data(tb[NFACCT_NAME]), NFACCT_NAME_MAX); if (tb[NFACCT_BYTES]) { atomic64_set(&nfacct->bytes, diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 4a4b293fb2e5..fa026b269b36 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -149,8 +149,8 @@ nfnl_cthelper_expect_policy(struct nf_conntrack_expect_policy *expect_policy, !tb[NFCTH_POLICY_EXPECT_TIMEOUT]) return -EINVAL; - strncpy(expect_policy->name, - nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); + nla_strlcpy(expect_policy->name, + nla_data(tb[NFCTH_POLICY_NAME]), NF_CT_HELPER_NAME_LEN); expect_policy->max_expected = ntohl(nla_get_be32(tb[NFCTH_POLICY_EXPECT_MAX])); if (expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT) @@ -234,7 +234,8 @@ nfnl_cthelper_create(const struct nlattr * const tb[], if (ret < 0) goto err1; - strncpy(helper->name, nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); + nla_strlcpy(helper->name, + nla_data(tb[NFCTH_NAME]), NF_CT_HELPER_NAME_LEN); size = ntohl(nla_get_be32(tb[NFCTH_PRIV_DATA_LEN])); if (size > FIELD_SIZEOF(struct nf_conn_help, data)) { ret = -ENOMEM; diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index 8e23726b9081..1d99a1efdafc 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -27,14 +27,31 @@ struct nft_xt { struct list_head head; struct nft_expr_ops ops; unsigned int refcnt; + + /* Unlike other expressions, ops doesn't have static storage duration. + * nft core assumes they do. We use kfree_rcu so that nft core can + * can check expr->ops->size even after nft_compat->destroy() frees + * the nft_xt struct that holds the ops structure. + */ + struct rcu_head rcu_head; }; -static void nft_xt_put(struct nft_xt *xt) +/* Used for matches where *info is larger than X byte */ +#define NFT_MATCH_LARGE_THRESH 192 + +struct nft_xt_match_priv { + void *info; +}; + +static bool nft_xt_put(struct nft_xt *xt) { if (--xt->refcnt == 0) { list_del(&xt->head); - kfree(xt); + kfree_rcu(xt, rcu_head); + return true; } + + return false; } static int nft_compat_chain_validate_dependency(const char *tablename, @@ -226,6 +243,7 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, struct xt_target *target = expr->ops->data; struct xt_tgchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_TARGET_INFO])); + struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -236,25 +254,22 @@ nft_target_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (ctx->nla[NFTA_RULE_COMPAT]) { ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); if (ret < 0) - goto err; + return ret; } nft_target_set_tgchk_param(&par, ctx, target, info, &e, proto, inv); ret = xt_check_target(&par, size, proto, inv); if (ret < 0) - goto err; + return ret; /* The standard target cannot be used */ - if (target->target == NULL) { - ret = -EINVAL; - goto err; - } + if (!target->target) + return -EINVAL; + nft_xt = container_of(expr->ops, struct nft_xt, ops); + nft_xt->refcnt++; return 0; -err: - module_put(target->me); - return ret; } static void @@ -271,8 +286,8 @@ nft_target_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.target->destroy != NULL) par.target->destroy(&par); - nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); - module_put(target->me); + if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + module_put(target->me); } static int nft_target_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -316,11 +331,11 @@ static int nft_target_validate(const struct nft_ctx *ctx, return 0; } -static void nft_match_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void __nft_match_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt, + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; struct sk_buff *skb = pkt->skb; bool ret; @@ -344,6 +359,22 @@ static void nft_match_eval(const struct nft_expr *expr, } } +static void nft_match_large_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + __nft_match_eval(expr, regs, pkt, priv->info); +} + +static void nft_match_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + __nft_match_eval(expr, regs, pkt, nft_expr_priv(expr)); +} + static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = { [NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING }, [NFTA_MATCH_REV] = { .type = NLA_U32 }, @@ -404,13 +435,14 @@ static void match_compat_from_user(struct xt_match *m, void *in, void *out) } static int -nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, - const struct nlattr * const tb[]) +__nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[], + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; struct xt_mtchk_param par; size_t size = XT_ALIGN(nla_len(tb[NFTA_MATCH_INFO])); + struct nft_xt *nft_xt; u16 proto = 0; bool inv = false; union nft_entry e = {}; @@ -421,26 +453,50 @@ nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, if (ctx->nla[NFTA_RULE_COMPAT]) { ret = nft_parse_compat(ctx->nla[NFTA_RULE_COMPAT], &proto, &inv); if (ret < 0) - goto err; + return ret; } nft_match_set_mtchk_param(&par, ctx, match, info, &e, proto, inv); ret = xt_check_match(&par, size, proto, inv); if (ret < 0) - goto err; + return ret; + nft_xt = container_of(expr->ops, struct nft_xt, ops); + nft_xt->refcnt++; return 0; -err: - module_put(match->me); +} + +static int +nft_match_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + return __nft_match_init(ctx, expr, tb, nft_expr_priv(expr)); +} + +static int +nft_match_large_init(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nlattr * const tb[]) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + struct xt_match *m = expr->ops->data; + int ret; + + priv->info = kmalloc(XT_ALIGN(m->matchsize), GFP_KERNEL); + if (!priv->info) + return -ENOMEM; + + ret = __nft_match_init(ctx, expr, tb, priv->info); + if (ret) + kfree(priv->info); return ret; } static void -nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +__nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr, + void *info) { struct xt_match *match = expr->ops->data; - void *info = nft_expr_priv(expr); struct xt_mtdtor_param par; par.net = ctx->net; @@ -450,13 +506,28 @@ nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) if (par.match->destroy != NULL) par.match->destroy(&par); - nft_xt_put(container_of(expr->ops, struct nft_xt, ops)); - module_put(match->me); + if (nft_xt_put(container_of(expr->ops, struct nft_xt, ops))) + module_put(match->me); } -static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +static void +nft_match_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + __nft_match_destroy(ctx, expr, nft_expr_priv(expr)); +} + +static void +nft_match_large_destroy(const struct nft_ctx *ctx, const struct nft_expr *expr) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(expr); + + __nft_match_destroy(ctx, expr, priv->info); + kfree(priv->info); +} + +static int __nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr, + void *info) { - void *info = nft_expr_priv(expr); struct xt_match *match = expr->ops->data; if (nla_put_string(skb, NFTA_MATCH_NAME, match->name) || @@ -470,6 +541,18 @@ nla_put_failure: return -1; } +static int nft_match_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + return __nft_match_dump(skb, expr, nft_expr_priv(expr)); +} + +static int nft_match_large_dump(struct sk_buff *skb, const struct nft_expr *e) +{ + struct nft_xt_match_priv *priv = nft_expr_priv(e); + + return __nft_match_dump(skb, e, priv->info); +} + static int nft_match_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nft_data **data) @@ -637,6 +720,7 @@ nft_match_select_ops(const struct nft_ctx *ctx, { struct nft_xt *nft_match; struct xt_match *match; + unsigned int matchsize; char *mt_name; u32 rev, family; int err; @@ -654,13 +738,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_match, &nft_match_list, head) { struct xt_match *match = nft_match->ops.data; - if (nft_match_cmp(match, mt_name, rev, family)) { - if (!try_module_get(match->me)) - return ERR_PTR(-ENOENT); - - nft_match->refcnt++; + if (nft_match_cmp(match, mt_name, rev, family)) return &nft_match->ops; - } } match = xt_request_find_match(family, mt_name, rev); @@ -679,9 +758,8 @@ nft_match_select_ops(const struct nft_ctx *ctx, goto err; } - nft_match->refcnt = 1; + nft_match->refcnt = 0; nft_match->ops.type = &nft_match_type; - nft_match->ops.size = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); nft_match->ops.eval = nft_match_eval; nft_match->ops.init = nft_match_init; nft_match->ops.destroy = nft_match_destroy; @@ -689,6 +767,18 @@ nft_match_select_ops(const struct nft_ctx *ctx, nft_match->ops.validate = nft_match_validate; nft_match->ops.data = match; + matchsize = NFT_EXPR_SIZE(XT_ALIGN(match->matchsize)); + if (matchsize > NFT_MATCH_LARGE_THRESH) { + matchsize = NFT_EXPR_SIZE(sizeof(struct nft_xt_match_priv)); + + nft_match->ops.eval = nft_match_large_eval; + nft_match->ops.init = nft_match_large_init; + nft_match->ops.destroy = nft_match_large_destroy; + nft_match->ops.dump = nft_match_large_dump; + } + + nft_match->ops.size = matchsize; + list_add(&nft_match->head, &nft_match_list); return &nft_match->ops; @@ -739,13 +829,8 @@ nft_target_select_ops(const struct nft_ctx *ctx, list_for_each_entry(nft_target, &nft_target_list, head) { struct xt_target *target = nft_target->ops.data; - if (nft_target_cmp(target, tg_name, rev, family)) { - if (!try_module_get(target->me)) - return ERR_PTR(-ENOENT); - - nft_target->refcnt++; + if (nft_target_cmp(target, tg_name, rev, family)) return &nft_target->ops; - } } target = xt_request_find_target(family, tg_name, rev); @@ -764,7 +849,7 @@ nft_target_select_ops(const struct nft_ctx *ctx, goto err; } - nft_target->refcnt = 1; + nft_target->refcnt = 0; nft_target->ops.type = &nft_target_type; nft_target->ops.size = NFT_EXPR_SIZE(XT_ALIGN(target->targetsize)); nft_target->ops.init = nft_target_init; @@ -823,6 +908,32 @@ err_match: static void __exit nft_compat_module_exit(void) { + struct nft_xt *xt, *next; + + /* list should be empty here, it can be non-empty only in case there + * was an error that caused nft_xt expr to not be initialized fully + * and noone else requested the same expression later. + * + * In this case, the lists contain 0-refcount entries that still + * hold module reference. + */ + list_for_each_entry_safe(xt, next, &nft_target_list, head) { + struct xt_target *target = xt->ops.data; + + if (WARN_ON_ONCE(xt->refcnt)) + continue; + module_put(target->me); + kfree(xt); + } + + list_for_each_entry_safe(xt, next, &nft_match_list, head) { + struct xt_match *match = xt->ops.data; + + if (WARN_ON_ONCE(xt->refcnt)) + continue; + module_put(match->me); + kfree(xt); + } nfnetlink_subsys_unregister(&nfnl_compat_subsys); nft_unregister_expr(&nft_target_type); nft_unregister_expr(&nft_match_type); diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 4717d7796927..aa87ff8beae8 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -69,8 +69,16 @@ err1: return err; } -static void nft_immediate_destroy(const struct nft_ctx *ctx, - const struct nft_expr *expr) +static void nft_immediate_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + const struct nft_immediate_expr *priv = nft_expr_priv(expr); + + return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg)); +} + +static void nft_immediate_deactivate(const struct nft_ctx *ctx, + const struct nft_expr *expr) { const struct nft_immediate_expr *priv = nft_expr_priv(expr); @@ -108,7 +116,8 @@ static const struct nft_expr_ops nft_imm_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_immediate_expr)), .eval = nft_immediate_eval, .init = nft_immediate_init, - .destroy = nft_immediate_destroy, + .activate = nft_immediate_activate, + .deactivate = nft_immediate_deactivate, .dump = nft_immediate_dump, .validate = nft_immediate_validate, }; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 71325fef647d..cb7cb300c3bc 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -183,6 +183,9 @@ struct xt_match *xt_find_match(u8 af, const char *name, u8 revision) struct xt_match *m; int err = -ENOENT; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + mutex_lock(&xt[af].mutex); list_for_each_entry(m, &xt[af].match, list) { if (strcmp(m->name, name) == 0) { @@ -229,6 +232,9 @@ struct xt_target *xt_find_target(u8 af, const char *name, u8 revision) struct xt_target *t; int err = -ENOENT; + if (strnlen(name, XT_EXTENSION_MAXNAMELEN) == XT_EXTENSION_MAXNAMELEN) + return ERR_PTR(-EINVAL); + mutex_lock(&xt[af].mutex); list_for_each_entry(t, &xt[af].target, list) { if (strcmp(t->name, name) == 0) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 611a26d5235c..2cc98c763003 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2871,13 +2871,15 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) if (skb == NULL) goto out_unlock; - skb_set_network_header(skb, reserve); + skb_reset_network_header(skb); err = -EINVAL; if (sock->type == SOCK_DGRAM) { offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len); if (unlikely(offset < 0)) goto out_free; + } else if (reserve) { + skb_push(skb, reserve); } /* Returns -EFAULT on error */ diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 853604685965..1fb39e1f9d07 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -161,6 +161,8 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case htons(ETH_P_8021AD): break; default: + if (exists) + tcf_idr_release(*a, bind); return -EPROTONOSUPPORT; } } else { diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 16644b3d2362..56c181c3feeb 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -222,10 +222,11 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt, extack); if (IS_ERR(child)) return PTR_ERR(child); + + /* child is fifo, no need to check for noop_qdisc */ + qdisc_hash_add(child, true); } - if (child != &noop_qdisc) - qdisc_hash_add(child, true); sch_tree_lock(sch); q->flags = ctl->flags; q->limit = ctl->limit; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 03225a8df973..6f74a426f159 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -383,6 +383,9 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, err = PTR_ERR(child); goto done; } + + /* child is fifo, no need to check for noop_qdisc */ + qdisc_hash_add(child, true); } sch_tree_lock(sch); @@ -391,8 +394,6 @@ static int tbf_change(struct Qdisc *sch, struct nlattr *opt, q->qdisc->qstats.backlog); qdisc_destroy(q->qdisc); q->qdisc = child; - if (child != &noop_qdisc) - qdisc_hash_add(child, true); } q->limit = qopt->limit; if (tb[TCA_TBF_PBURST]) diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 74568cdbca70..d7b88b2d1b22 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -245,40 +245,45 @@ out: static int smc_pnet_fill_entry(struct net *net, struct smc_pnetentry *pnetelem, struct nlattr *tb[]) { - char *string, *ibname = NULL; - int rc = 0; + char *string, *ibname; + int rc; memset(pnetelem, 0, sizeof(*pnetelem)); INIT_LIST_HEAD(&pnetelem->list); - if (tb[SMC_PNETID_NAME]) { - string = (char *)nla_data(tb[SMC_PNETID_NAME]); - if (!smc_pnetid_valid(string, pnetelem->pnet_name)) { - rc = -EINVAL; - goto error; - } - } - if (tb[SMC_PNETID_ETHNAME]) { - string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); - pnetelem->ndev = dev_get_by_name(net, string); - if (!pnetelem->ndev) - return -ENOENT; - } - if (tb[SMC_PNETID_IBNAME]) { - ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); - ibname = strim(ibname); - pnetelem->smcibdev = smc_pnet_find_ib(ibname); - if (!pnetelem->smcibdev) { - rc = -ENOENT; - goto error; - } - } - if (tb[SMC_PNETID_IBPORT]) { - pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); - if (pnetelem->ib_port > SMC_MAX_PORTS) { - rc = -EINVAL; - goto error; - } - } + + rc = -EINVAL; + if (!tb[SMC_PNETID_NAME]) + goto error; + string = (char *)nla_data(tb[SMC_PNETID_NAME]); + if (!smc_pnetid_valid(string, pnetelem->pnet_name)) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_ETHNAME]) + goto error; + rc = -ENOENT; + string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]); + pnetelem->ndev = dev_get_by_name(net, string); + if (!pnetelem->ndev) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_IBNAME]) + goto error; + rc = -ENOENT; + ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]); + ibname = strim(ibname); + pnetelem->smcibdev = smc_pnet_find_ib(ibname); + if (!pnetelem->smcibdev) + goto error; + + rc = -EINVAL; + if (!tb[SMC_PNETID_IBPORT]) + goto error; + pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]); + if (pnetelem->ib_port < 1 || pnetelem->ib_port > SMC_MAX_PORTS) + goto error; + return 0; error: @@ -307,6 +312,8 @@ static int smc_pnet_get(struct sk_buff *skb, struct genl_info *info) void *hdr; int rc; + if (!info->attrs[SMC_PNETID_NAME]) + return -EINVAL; pnetelem = smc_pnet_find_pnetid( (char *)nla_data(info->attrs[SMC_PNETID_NAME])); if (!pnetelem) @@ -359,6 +366,8 @@ static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info) static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info) { + if (!info->attrs[SMC_PNETID_NAME]) + return -EINVAL; return smc_pnet_remove_by_pnetid( (char *)nla_data(info->attrs[SMC_PNETID_NAME])); } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index 5c3909c311f1..839e1e165a0c 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -680,7 +680,6 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, struct scatterlist *sgin = &sgin_arr[0]; struct strp_msg *rxm = strp_msg(skb); int ret, nsg = ARRAY_SIZE(sgin_arr); - char aad_recv[TLS_AAD_SPACE_SIZE]; struct sk_buff *unused; ret = skb_copy_bits(skb, rxm->offset + TLS_HEADER_SIZE, @@ -697,13 +696,13 @@ static int decrypt_skb(struct sock *sk, struct sk_buff *skb, } sg_init_table(sgin, nsg); - sg_set_buf(&sgin[0], aad_recv, sizeof(aad_recv)); + sg_set_buf(&sgin[0], ctx->rx_aad_ciphertext, TLS_AAD_SPACE_SIZE); nsg = skb_to_sgvec(skb, &sgin[1], rxm->offset + tls_ctx->rx.prepend_size, rxm->full_len - tls_ctx->rx.prepend_size); - tls_make_aad(aad_recv, + tls_make_aad(ctx->rx_aad_ciphertext, rxm->full_len - tls_ctx->rx.overhead_size, tls_ctx->rx.rec_seq, tls_ctx->rx.rec_seq_size, @@ -802,12 +801,12 @@ int tls_sw_recvmsg(struct sock *sk, if (to_copy <= len && page_count < MAX_SKB_FRAGS && likely(!(flags & MSG_PEEK))) { struct scatterlist sgin[MAX_SKB_FRAGS + 1]; - char unused[21]; int pages = 0; zc = true; sg_init_table(sgin, MAX_SKB_FRAGS + 1); - sg_set_buf(&sgin[0], unused, 13); + sg_set_buf(&sgin[0], ctx->rx_aad_plaintext, + TLS_AAD_SPACE_SIZE); err = zerocopy_from_iter(sk, &msg->msg_iter, to_copy, &pages, diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 62d1aa1a4cf3..62a99ab680e3 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -247,7 +247,7 @@ $(obj)/%.o: $(src)/%.c @echo " CLANG-bpf " $@ $(Q)$(CLANG) $(NOSTDINC_FLAGS) $(LINUXINCLUDE) $(EXTRA_CFLAGS) -I$(obj) \ -I$(srctree)/tools/testing/selftests/bpf/ \ - -D__KERNEL__ -Wno-unused-value -Wno-pointer-sign \ + -D__KERNEL__ -D__BPF_TRACING__ -Wno-unused-value -Wno-pointer-sign \ -D__TARGET_ARCH_$(ARCH) -Wno-compare-distinct-pointer-types \ -Wno-gnu-variable-sized-type-not-at-end \ -Wno-address-of-packed-member -Wno-tautological-compare \ diff --git a/scripts/faddr2line b/scripts/faddr2line index 9e5735a4d3a5..1876a741087c 100755 --- a/scripts/faddr2line +++ b/scripts/faddr2line @@ -170,7 +170,10 @@ __faddr2line() { echo "$file_lines" | while read -r line do echo $line - eval $(echo $line | awk -F "[ :]" '{printf("n1=%d;n2=%d;f=%s",$NF-5, $NF+5, $(NF-1))}') + n=$(echo $line | sed 's/.*:\([0-9]\+\).*/\1/g') + n1=$[$n-5] + n2=$[$n+5] + f=$(echo $line | sed 's/.*at \(.\+\):.*/\1/g') awk 'NR>=strtonum("'$n1'") && NR<=strtonum("'$n2'") {printf("%d\t%s\n", NR, $0)}' $f done diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5c508d26b367..6bd9358e5e62 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -4578,6 +4578,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, int addrlen) { struct sock *sk = sock->sk; + struct sk_security_struct *sksec = sk->sk_security; u16 family; int err; @@ -4589,11 +4590,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in family = sk->sk_family; if (family == PF_INET || family == PF_INET6) { char *addrp; - struct sk_security_struct *sksec = sk->sk_security; struct common_audit_data ad; struct lsm_network_audit net = {0,}; struct sockaddr_in *addr4 = NULL; struct sockaddr_in6 *addr6 = NULL; + u16 family_sa = address->sa_family; unsigned short snum; u32 sid, node_perm; @@ -4603,11 +4604,20 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in * need to check address->sa_family as it is possible to have * sk->sk_family = PF_INET6 with addr->sa_family = AF_INET. */ - switch (address->sa_family) { + switch (family_sa) { + case AF_UNSPEC: case AF_INET: if (addrlen < sizeof(struct sockaddr_in)) return -EINVAL; addr4 = (struct sockaddr_in *)address; + if (family_sa == AF_UNSPEC) { + /* see __inet_bind(), we only want to allow + * AF_UNSPEC if the address is INADDR_ANY + */ + if (addr4->sin_addr.s_addr != htonl(INADDR_ANY)) + goto err_af; + family_sa = AF_INET; + } snum = ntohs(addr4->sin_port); addrp = (char *)&addr4->sin_addr.s_addr; break; @@ -4619,15 +4629,14 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in addrp = (char *)&addr6->sin6_addr.s6_addr; break; default: - /* Note that SCTP services expect -EINVAL, whereas - * others expect -EAFNOSUPPORT. - */ - if (sksec->sclass == SECCLASS_SCTP_SOCKET) - return -EINVAL; - else - return -EAFNOSUPPORT; + goto err_af; } + ad.type = LSM_AUDIT_DATA_NET; + ad.u.net = &net; + ad.u.net->sport = htons(snum); + ad.u.net->family = family_sa; + if (snum) { int low, high; @@ -4639,10 +4648,6 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in snum, &sid); if (err) goto out; - ad.type = LSM_AUDIT_DATA_NET; - ad.u.net = &net; - ad.u.net->sport = htons(snum); - ad.u.net->family = family; err = avc_has_perm(&selinux_state, sksec->sid, sid, sksec->sclass, @@ -4674,16 +4679,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in break; } - err = sel_netnode_sid(addrp, family, &sid); + err = sel_netnode_sid(addrp, family_sa, &sid); if (err) goto out; - ad.type = LSM_AUDIT_DATA_NET; - ad.u.net = &net; - ad.u.net->sport = htons(snum); - ad.u.net->family = family; - - if (address->sa_family == AF_INET) + if (family_sa == AF_INET) ad.u.net->v4info.saddr = addr4->sin_addr.s_addr; else ad.u.net->v6info.saddr = addr6->sin6_addr; @@ -4696,6 +4696,11 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in } out: return err; +err_af: + /* Note that SCTP services expect -EINVAL, others -EAFNOSUPPORT. */ + if (sksec->sclass == SECCLASS_SCTP_SOCKET) + return -EINVAL; + return -EAFNOSUPPORT; } /* This supports connect(2) and SCTP connect services such as sctp_connectx(3) @@ -4773,7 +4778,7 @@ static int selinux_socket_connect_helper(struct socket *sock, ad.type = LSM_AUDIT_DATA_NET; ad.u.net = &net; ad.u.net->dport = htons(snum); - ad.u.net->family = sk->sk_family; + ad.u.net->family = address->sa_family; err = avc_has_perm(&selinux_state, sksec->sid, sid, sksec->sclass, perm, &ad); if (err) @@ -5274,6 +5279,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, while (walk_size < addrlen) { addr = addr_buf; switch (addr->sa_family) { + case AF_UNSPEC: case AF_INET: len = sizeof(struct sockaddr_in); break; @@ -5281,7 +5287,7 @@ static int selinux_sctp_bind_connect(struct sock *sk, int optname, len = sizeof(struct sockaddr_in6); break; default: - return -EAFNOSUPPORT; + return -EINVAL; } err = -EINVAL; diff --git a/sound/core/control_compat.c b/sound/core/control_compat.c index a848836a5de0..507fd5210c1c 100644 --- a/sound/core/control_compat.c +++ b/sound/core/control_compat.c @@ -396,8 +396,7 @@ static int snd_ctl_elem_add_compat(struct snd_ctl_file *file, if (copy_from_user(&data->id, &data32->id, sizeof(data->id)) || copy_from_user(&data->type, &data32->type, 3 * sizeof(u32))) goto error; - if (get_user(data->owner, &data32->owner) || - get_user(data->type, &data32->type)) + if (get_user(data->owner, &data32->owner)) goto error; switch (data->type) { case SNDRV_CTL_ELEM_TYPE_BOOLEAN: diff --git a/sound/pci/hda/hda_intel.c b/sound/pci/hda/hda_intel.c index b0c8c79848a9..a0c93b9c9a28 100644 --- a/sound/pci/hda/hda_intel.c +++ b/sound/pci/hda/hda_intel.c @@ -2210,6 +2210,8 @@ static struct snd_pci_quirk power_save_blacklist[] = { SND_PCI_QUIRK(0x1849, 0x0c0c, "Asrock B85M-ITX", 0), /* https://bugzilla.redhat.com/show_bug.cgi?id=1525104 */ SND_PCI_QUIRK(0x1043, 0x8733, "Asus Prime X370-Pro", 0), + /* https://bugzilla.redhat.com/show_bug.cgi?id=1572975 */ + SND_PCI_QUIRK(0x17aa, 0x36a7, "Lenovo C50 All in one", 0), /* https://bugzilla.kernel.org/show_bug.cgi?id=198611 */ SND_PCI_QUIRK(0x17aa, 0x2227, "Lenovo X1 Carbon 3rd Gen", 0), {} diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c index 2dd34dd77447..01a6643fc7d4 100644 --- a/sound/pci/hda/patch_realtek.c +++ b/sound/pci/hda/patch_realtek.c @@ -2363,6 +2363,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = { SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3), SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX), SND_PCI_QUIRK(0x1558, 0x9501, "Clevo P950HR", ALC1220_FIXUP_CLEVO_P950), + SND_PCI_QUIRK(0x1558, 0x95e2, "Clevo P950ER", ALC1220_FIXUP_CLEVO_P950), SND_PCI_QUIRK_VENDOR(0x1558, "Clevo laptop", ALC882_FIXUP_EAPD), SND_PCI_QUIRK(0x161f, 0x2054, "Medion laptop", ALC883_FIXUP_EAPD), SND_PCI_QUIRK(0x17aa, 0x3a0d, "Lenovo Y530", ALC882_FIXUP_LENOVO_Y530), diff --git a/sound/usb/mixer.c b/sound/usb/mixer.c index 344d7b069d59..bb5ab7a7dfa5 100644 --- a/sound/usb/mixer.c +++ b/sound/usb/mixer.c @@ -967,6 +967,14 @@ static void volume_control_quirks(struct usb_mixer_elem_info *cval, } break; + case USB_ID(0x0d8c, 0x0103): + if (!strcmp(kctl->id.name, "PCM Playback Volume")) { + usb_audio_info(chip, + "set volume quirk for CM102-A+/102S+\n"); + cval->min = -256; + } + break; + case USB_ID(0x0471, 0x0101): case USB_ID(0x0471, 0x0104): case USB_ID(0x0471, 0x0105): diff --git a/sound/usb/stream.c b/sound/usb/stream.c index 956be9f7c72a..5ed334575fc7 100644 --- a/sound/usb/stream.c +++ b/sound/usb/stream.c @@ -576,7 +576,7 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip, if (protocol == UAC_VERSION_1) { attributes = csep->bmAttributes; - } else { + } else if (protocol == UAC_VERSION_2) { struct uac2_iso_endpoint_descriptor *csep2 = (struct uac2_iso_endpoint_descriptor *) csep; @@ -585,6 +585,13 @@ static int parse_uac_endpoint_attributes(struct snd_usb_audio *chip, /* emulate the endpoint attributes of a v1 device */ if (csep2->bmControls & UAC2_CONTROL_PITCH) attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL; + } else { /* UAC_VERSION_3 */ + struct uac3_iso_endpoint_descriptor *csep3 = + (struct uac3_iso_endpoint_descriptor *) csep; + + /* emulate the endpoint attributes of a v1 device */ + if (le32_to_cpu(csep3->bmControls) & UAC2_CONTROL_PITCH) + attributes |= UAC_EP_CS_ATTR_PITCH_CONTROL; } return attributes; diff --git a/tools/arch/arm/include/uapi/asm/kvm.h b/tools/arch/arm/include/uapi/asm/kvm.h index 2ba95d6fe852..caae4843cb70 100644 --- a/tools/arch/arm/include/uapi/asm/kvm.h +++ b/tools/arch/arm/include/uapi/asm/kvm.h @@ -195,6 +195,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_VFP_FPINST 0x1009 #define KVM_REG_ARM_VFP_FPINST2 0x100A +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/tools/arch/arm64/include/uapi/asm/kvm.h b/tools/arch/arm64/include/uapi/asm/kvm.h index 9abbf3044654..04b3256f8e6d 100644 --- a/tools/arch/arm64/include/uapi/asm/kvm.h +++ b/tools/arch/arm64/include/uapi/asm/kvm.h @@ -206,6 +206,12 @@ struct kvm_arch_memory_slot { #define KVM_REG_ARM_TIMER_CNT ARM64_SYS_REG(3, 3, 14, 3, 2) #define KVM_REG_ARM_TIMER_CVAL ARM64_SYS_REG(3, 3, 14, 0, 2) +/* KVM-as-firmware specific pseudo-registers */ +#define KVM_REG_ARM_FW (0x0014 << KVM_REG_ARM_COPROC_SHIFT) +#define KVM_REG_ARM_FW_REG(r) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \ + KVM_REG_ARM_FW | ((r) & 0xffff)) +#define KVM_REG_ARM_PSCI_VERSION KVM_REG_ARM_FW_REG(0) + /* Device Control API: ARM VGIC */ #define KVM_DEV_ARM_VGIC_GRP_ADDR 0 #define KVM_DEV_ARM_VGIC_GRP_DIST_REGS 1 diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index d554c11e01ff..578793e97431 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -320,6 +320,7 @@ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ +#define X86_FEATURE_CLDEMOTE (16*32+25) /* CLDEMOTE instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */ diff --git a/tools/include/linux/spinlock.h b/tools/include/linux/spinlock.h index b21b586b9854..1738c0391da4 100644 --- a/tools/include/linux/spinlock.h +++ b/tools/include/linux/spinlock.h @@ -6,8 +6,9 @@ #include #define spinlock_t pthread_mutex_t -#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER; +#define DEFINE_SPINLOCK(x) pthread_mutex_t x = PTHREAD_MUTEX_INITIALIZER #define __SPIN_LOCK_UNLOCKED(x) (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER +#define spin_lock_init(x) pthread_mutex_init(x, NULL) #define spin_lock_irqsave(x, f) (void)f, pthread_mutex_lock(x) #define spin_unlock_irqrestore(x, f) (void)f, pthread_mutex_unlock(x) diff --git a/tools/include/uapi/linux/kvm.h b/tools/include/uapi/linux/kvm.h index 1065006c9bf5..b02c41e53d56 100644 --- a/tools/include/uapi/linux/kvm.h +++ b/tools/include/uapi/linux/kvm.h @@ -676,6 +676,13 @@ struct kvm_ioeventfd { __u8 pad[36]; }; +#define KVM_X86_DISABLE_EXITS_MWAIT (1 << 0) +#define KVM_X86_DISABLE_EXITS_HTL (1 << 1) +#define KVM_X86_DISABLE_EXITS_PAUSE (1 << 2) +#define KVM_X86_DISABLE_VALID_EXITS (KVM_X86_DISABLE_EXITS_MWAIT | \ + KVM_X86_DISABLE_EXITS_HTL | \ + KVM_X86_DISABLE_EXITS_PAUSE) + /* for KVM_ENABLE_CAP */ struct kvm_enable_cap { /* in */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3dbe217bf23e..cbdf34a6fb93 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -2205,7 +2205,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, obj = __bpf_object__open(attr->file, NULL, 0, bpf_prog_type__needs_kver(attr->prog_type)); - if (IS_ERR(obj)) + if (IS_ERR_OR_NULL(obj)) return -ENOENT; bpf_object__for_each_program(prog, obj) { diff --git a/tools/objtool/arch/x86/include/asm/insn.h b/tools/objtool/arch/x86/include/asm/insn.h index b3e32b010ab1..c2c01f84df75 100644 --- a/tools/objtool/arch/x86/include/asm/insn.h +++ b/tools/objtool/arch/x86/include/asm/insn.h @@ -208,4 +208,22 @@ static inline int insn_offset_immediate(struct insn *insn) return insn_offset_displacement(insn) + insn->displacement.nbytes; } +#define POP_SS_OPCODE 0x1f +#define MOV_SREG_OPCODE 0x8e + +/* + * Intel SDM Vol.3A 6.8.3 states; + * "Any single-step trap that would be delivered following the MOV to SS + * instruction or POP to SS instruction (because EFLAGS.TF is 1) is + * suppressed." + * This function returns true if @insn is MOV SS or POP SS. On these + * instructions, single stepping is suppressed. + */ +static inline int insn_masking_exception(struct insn *insn) +{ + return insn->opcode.bytes[0] == POP_SS_OPCODE || + (insn->opcode.bytes[0] == MOV_SREG_OPCODE && + X86_MODRM_REG(insn->modrm.bytes[0]) == 2); +} + #endif /* _ASM_X86_INSN_H */ diff --git a/tools/objtool/check.c b/tools/objtool/check.c index 5409f6f6c48d..3a31b238f885 100644 --- a/tools/objtool/check.c +++ b/tools/objtool/check.c @@ -59,6 +59,31 @@ static struct instruction *next_insn_same_sec(struct objtool_file *file, return next; } +static struct instruction *next_insn_same_func(struct objtool_file *file, + struct instruction *insn) +{ + struct instruction *next = list_next_entry(insn, list); + struct symbol *func = insn->func; + + if (!func) + return NULL; + + if (&next->list != &file->insn_list && next->func == func) + return next; + + /* Check if we're already in the subfunction: */ + if (func == func->cfunc) + return NULL; + + /* Move to the subfunction: */ + return find_insn(file, func->cfunc->sec, func->cfunc->offset); +} + +#define func_for_each_insn_all(file, func, insn) \ + for (insn = find_insn(file, func->sec, func->offset); \ + insn; \ + insn = next_insn_same_func(file, insn)) + #define func_for_each_insn(file, func, insn) \ for (insn = find_insn(file, func->sec, func->offset); \ insn && &insn->list != &file->insn_list && \ @@ -149,10 +174,14 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, if (!strcmp(func->name, global_noreturns[i])) return 1; - if (!func->sec) + if (!func->len) return 0; - func_for_each_insn(file, func, insn) { + insn = find_insn(file, func->sec, func->offset); + if (!insn->func) + return 0; + + func_for_each_insn_all(file, func, insn) { empty = false; if (insn->type == INSN_RETURN) @@ -167,35 +196,28 @@ static int __dead_end_function(struct objtool_file *file, struct symbol *func, * case, the function's dead-end status depends on whether the target * of the sibling call returns. */ - func_for_each_insn(file, func, insn) { - if (insn->sec != func->sec || - insn->offset >= func->offset + func->len) - break; - + func_for_each_insn_all(file, func, insn) { if (insn->type == INSN_JUMP_UNCONDITIONAL) { struct instruction *dest = insn->jump_dest; - struct symbol *dest_func; if (!dest) /* sibling call to another file */ return 0; - if (dest->sec != func->sec || - dest->offset < func->offset || - dest->offset >= func->offset + func->len) { - /* local sibling call */ - dest_func = find_symbol_by_offset(dest->sec, - dest->offset); - if (!dest_func) - continue; + if (dest->func && dest->func->pfunc != insn->func->pfunc) { + /* local sibling call */ if (recursion == 5) { - WARN_FUNC("infinite recursion (objtool bug!)", - dest->sec, dest->offset); - return -1; + /* + * Infinite recursion: two functions + * have sibling calls to each other. + * This is a very rare case. It means + * they aren't dead ends. + */ + return 0; } - return __dead_end_function(file, dest_func, + return __dead_end_function(file, dest->func, recursion + 1); } } @@ -422,7 +444,7 @@ static void add_ignores(struct objtool_file *file) if (!ignore_func(file, func)) continue; - func_for_each_insn(file, func, insn) + func_for_each_insn_all(file, func, insn) insn->ignore = true; } } @@ -782,30 +804,35 @@ out: return ret; } -static int add_switch_table(struct objtool_file *file, struct symbol *func, - struct instruction *insn, struct rela *table, - struct rela *next_table) +static int add_switch_table(struct objtool_file *file, struct instruction *insn, + struct rela *table, struct rela *next_table) { struct rela *rela = table; struct instruction *alt_insn; struct alternative *alt; + struct symbol *pfunc = insn->func->pfunc; + unsigned int prev_offset = 0; list_for_each_entry_from(rela, &file->rodata->rela->rela_list, list) { if (rela == next_table) break; - if (rela->sym->sec != insn->sec || - rela->addend <= func->offset || - rela->addend >= func->offset + func->len) + /* Make sure the switch table entries are consecutive: */ + if (prev_offset && rela->offset != prev_offset + 8) break; - alt_insn = find_insn(file, insn->sec, rela->addend); - if (!alt_insn) { - WARN("%s: can't find instruction at %s+0x%x", - file->rodata->rela->name, insn->sec->name, - rela->addend); - return -1; - } + /* Detect function pointers from contiguous objects: */ + if (rela->sym->sec == pfunc->sec && + rela->addend == pfunc->offset) + break; + + alt_insn = find_insn(file, rela->sym->sec, rela->addend); + if (!alt_insn) + break; + + /* Make sure the jmp dest is in the function or subfunction: */ + if (alt_insn->func->pfunc != pfunc) + break; alt = malloc(sizeof(*alt)); if (!alt) { @@ -815,6 +842,13 @@ static int add_switch_table(struct objtool_file *file, struct symbol *func, alt->insn = alt_insn; list_add_tail(&alt->list, &insn->alts); + prev_offset = rela->offset; + } + + if (!prev_offset) { + WARN_FUNC("can't find switch jump table", + insn->sec, insn->offset); + return -1; } return 0; @@ -869,40 +903,21 @@ static struct rela *find_switch_table(struct objtool_file *file, { struct rela *text_rela, *rodata_rela; struct instruction *orig_insn = insn; + unsigned long table_offset; - text_rela = find_rela_by_dest_range(insn->sec, insn->offset, insn->len); - if (text_rela && text_rela->sym == file->rodata->sym) { - /* case 1 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend); - if (rodata_rela) - return rodata_rela; - - /* case 2 */ - rodata_rela = find_rela_by_dest(file->rodata, - text_rela->addend + 4); - if (!rodata_rela) - return NULL; - - file->ignore_unreachables = true; - return rodata_rela; - } - - /* case 3 */ /* * Backward search using the @first_jump_src links, these help avoid * much of the 'in between' code. Which avoids us getting confused by * it. */ - for (insn = list_prev_entry(insn, list); - + for (; &insn->list != &file->insn_list && insn->sec == func->sec && insn->offset >= func->offset; insn = insn->first_jump_src ?: list_prev_entry(insn, list)) { - if (insn->type == INSN_JUMP_DYNAMIC) + if (insn != orig_insn && insn->type == INSN_JUMP_DYNAMIC) break; /* allow small jumps within the range */ @@ -918,18 +933,29 @@ static struct rela *find_switch_table(struct objtool_file *file, if (!text_rela || text_rela->sym != file->rodata->sym) continue; + table_offset = text_rela->addend; + if (text_rela->type == R_X86_64_PC32) + table_offset += 4; + /* * Make sure the .rodata address isn't associated with a * symbol. gcc jump tables are anonymous data. */ - if (find_symbol_containing(file->rodata, text_rela->addend)) + if (find_symbol_containing(file->rodata, table_offset)) continue; - rodata_rela = find_rela_by_dest(file->rodata, text_rela->addend); - if (!rodata_rela) - continue; + rodata_rela = find_rela_by_dest(file->rodata, table_offset); + if (rodata_rela) { + /* + * Use of RIP-relative switch jumps is quite rare, and + * indicates a rare GCC quirk/bug which can leave dead + * code behind. + */ + if (text_rela->type == R_X86_64_PC32) + file->ignore_unreachables = true; - return rodata_rela; + return rodata_rela; + } } return NULL; @@ -943,7 +969,7 @@ static int add_func_switch_tables(struct objtool_file *file, struct rela *rela, *prev_rela = NULL; int ret; - func_for_each_insn(file, func, insn) { + func_for_each_insn_all(file, func, insn) { if (!last) last = insn; @@ -974,8 +1000,7 @@ static int add_func_switch_tables(struct objtool_file *file, * the beginning of another switch table in the same function. */ if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, - rela); + ret = add_switch_table(file, prev_jump, prev_rela, rela); if (ret) return ret; } @@ -985,7 +1010,7 @@ static int add_func_switch_tables(struct objtool_file *file, } if (prev_jump) { - ret = add_switch_table(file, func, prev_jump, prev_rela, NULL); + ret = add_switch_table(file, prev_jump, prev_rela, NULL); if (ret) return ret; } @@ -1749,15 +1774,13 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, while (1) { next_insn = next_insn_same_sec(file, insn); - - if (file->c_file && func && insn->func && func != insn->func) { + if (file->c_file && func && insn->func && func != insn->func->pfunc) { WARN("%s() falls through to next function %s()", func->name, insn->func->name); return 1; } - if (insn->func) - func = insn->func; + func = insn->func ? insn->func->pfunc : NULL; if (func && insn->ignore) { WARN_FUNC("BUG: why am I validating an ignored function?", @@ -1778,7 +1801,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, i = insn; save_insn = NULL; - func_for_each_insn_continue_reverse(file, func, i) { + func_for_each_insn_continue_reverse(file, insn->func, i) { if (i->save) { save_insn = i; break; @@ -1865,7 +1888,7 @@ static int validate_branch(struct objtool_file *file, struct instruction *first, case INSN_JUMP_UNCONDITIONAL: if (insn->jump_dest && (!func || !insn->jump_dest->func || - func == insn->jump_dest->func)) { + insn->jump_dest->func->pfunc == func)) { ret = validate_branch(file, insn->jump_dest, state); if (ret) @@ -2060,7 +2083,7 @@ static int validate_functions(struct objtool_file *file) for_each_sec(file, sec) { list_for_each_entry(func, &sec->symbol_list, list) { - if (func->type != STT_FUNC) + if (func->type != STT_FUNC || func->pfunc != func) continue; insn = find_insn(file, sec, func->offset); diff --git a/tools/objtool/elf.c b/tools/objtool/elf.c index c1c338661699..4e60e105583e 100644 --- a/tools/objtool/elf.c +++ b/tools/objtool/elf.c @@ -79,6 +79,19 @@ struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset) return NULL; } +struct symbol *find_symbol_by_name(struct elf *elf, const char *name) +{ + struct section *sec; + struct symbol *sym; + + list_for_each_entry(sec, &elf->sections, list) + list_for_each_entry(sym, &sec->symbol_list, list) + if (!strcmp(sym->name, name)) + return sym; + + return NULL; +} + struct symbol *find_symbol_containing(struct section *sec, unsigned long offset) { struct symbol *sym; @@ -203,10 +216,11 @@ static int read_sections(struct elf *elf) static int read_symbols(struct elf *elf) { - struct section *symtab; - struct symbol *sym; + struct section *symtab, *sec; + struct symbol *sym, *pfunc; struct list_head *entry, *tmp; int symbols_nr, i; + char *coldstr; symtab = find_section_by_name(elf, ".symtab"); if (!symtab) { @@ -281,6 +295,30 @@ static int read_symbols(struct elf *elf) hash_add(sym->sec->symbol_hash, &sym->hash, sym->idx); } + /* Create parent/child links for any cold subfunctions */ + list_for_each_entry(sec, &elf->sections, list) { + list_for_each_entry(sym, &sec->symbol_list, list) { + if (sym->type != STT_FUNC) + continue; + sym->pfunc = sym->cfunc = sym; + coldstr = strstr(sym->name, ".cold."); + if (coldstr) { + coldstr[0] = '\0'; + pfunc = find_symbol_by_name(elf, sym->name); + coldstr[0] = '.'; + + if (!pfunc) { + WARN("%s(): can't find parent function", + sym->name); + goto err; + } + + sym->pfunc = pfunc; + pfunc->cfunc = sym; + } + } + } + return 0; err: diff --git a/tools/objtool/elf.h b/tools/objtool/elf.h index d86e2ff14466..de5cd2ddded9 100644 --- a/tools/objtool/elf.h +++ b/tools/objtool/elf.h @@ -61,6 +61,7 @@ struct symbol { unsigned char bind, type; unsigned long offset; unsigned int len; + struct symbol *pfunc, *cfunc; }; struct rela { @@ -86,6 +87,7 @@ struct elf { struct elf *elf_open(const char *name, int flags); struct section *find_section_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_by_offset(struct section *sec, unsigned long offset); +struct symbol *find_symbol_by_name(struct elf *elf, const char *name); struct symbol *find_symbol_containing(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest(struct section *sec, unsigned long offset); struct rela *find_rela_by_dest_range(struct section *sec, unsigned long offset, diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index 944070e98a2c..63eb49082774 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -175,7 +175,7 @@ static const struct option options[] = { OPT_UINTEGER('s', "nr_secs" , &p0.nr_secs, "max number of seconds to run (default: 5 secs)"), OPT_UINTEGER('u', "usleep" , &p0.sleep_usecs, "usecs to sleep per loop iteration"), - OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via writes (can be mixed with -W)"), + OPT_BOOLEAN('R', "data_reads" , &p0.data_reads, "access the data via reads (can be mixed with -W)"), OPT_BOOLEAN('W', "data_writes" , &p0.data_writes, "access the data via writes (can be mixed with -R)"), OPT_BOOLEAN('B', "data_backwards", &p0.data_backwards, "access the data backwards as well"), OPT_BOOLEAN('Z', "data_zero_memset", &p0.data_zero_memset,"access the data via glibc bzero only"), diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index 93656f2fd53a..7e3cce3bcf3b 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -29,7 +29,6 @@ GenuineIntel-6-4D,v13,silvermont,core GenuineIntel-6-4C,v13,silvermont,core GenuineIntel-6-2A,v15,sandybridge,core GenuineIntel-6-2C,v2,westmereep-dp,core -GenuineIntel-6-2C,v2,westmereep-dp,core GenuineIntel-6-25,v2,westmereep-sp,core GenuineIntel-6-2F,v2,westmereex,core GenuineIntel-6-55,v1,skylakex,core diff --git a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh index 016882dbbc16..ee86473643be 100755 --- a/tools/perf/tests/shell/record+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/record+probe_libc_inet_pton.sh @@ -16,7 +16,7 @@ nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254 trace_libc_inet_pton_backtrace() { idx=0 expected[0]="ping[][0-9 \.:]+probe_libc:inet_pton: \([[:xdigit:]]+\)" - expected[1]=".*inet_pton[[:space:]]\($libc\)$" + expected[1]=".*inet_pton[[:space:]]\($libc|inlined\)$" case "$(uname -m)" in s390x) eventattr='call-graph=dwarf,max-stack=4' diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 536ee148bff8..5d74a30fe00f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1263,6 +1263,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start max_percent = sample->percent; } + if (al->samples_nr > nr_percent) + nr_percent = al->samples_nr; + if (max_percent < min_pcnt) return -1; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 40020b1ca54f..bf16dc9ee507 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -239,6 +239,7 @@ static void cs_etm__free(struct perf_session *session) for (i = 0; i < aux->num_cpu; i++) zfree(&aux->metadata[i]); + thread__zput(aux->unknown_thread); zfree(&aux->metadata); zfree(&aux); } @@ -612,8 +613,8 @@ cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) return buff->len; } -static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, - struct auxtrace_queue *queue) +static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue) { struct cs_etm_queue *etmq = queue->priv; @@ -1357,6 +1358,23 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->auxtrace.free = cs_etm__free; session->auxtrace = &etm->auxtrace; + etm->unknown_thread = thread__new(999999999, 999999999); + if (!etm->unknown_thread) + goto err_free_queues; + + /* + * Initialize list node so that at thread__zput() we can avoid + * segmentation fault at list_del_init(). + */ + INIT_LIST_HEAD(&etm->unknown_thread->node); + + err = thread__set_comm(etm->unknown_thread, "unknown", 0); + if (err) + goto err_delete_thread; + + if (thread__init_map_groups(etm->unknown_thread, etm->machine)) + goto err_delete_thread; + if (dump_trace) { cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return 0; @@ -1371,16 +1389,18 @@ int cs_etm__process_auxtrace_info(union perf_event *event, err = cs_etm__synth_events(etm, session); if (err) - goto err_free_queues; + goto err_delete_thread; err = auxtrace_queues__process_index(&etm->queues, session); if (err) - goto err_free_queues; + goto err_delete_thread; etm->data_queued = etm->queues.populated; return 0; +err_delete_thread: + thread__zput(etm->unknown_thread); err_free_queues: auxtrace_queues__free(&etm->queues); session->auxtrace = NULL; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 2fb0272146d8..b8b8a9558d32 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1715,7 +1715,7 @@ int parse_events(struct perf_evlist *evlist, const char *str, struct perf_evsel *last; if (list_empty(&parse_state.list)) { - WARN_ONCE(true, "WARNING: event parser found nothing"); + WARN_ONCE(true, "WARNING: event parser found nothing\n"); return -1; } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index d14464c42714..7afeb80cc39e 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -224,15 +224,15 @@ event_def: event_pmu | event_bpf_file event_pmu: -PE_NAME '/' event_config '/' +PE_NAME opt_event_config { struct list_head *list, *orig_terms, *terms; - if (parse_events_copy_term_list($3, &orig_terms)) + if (parse_events_copy_term_list($2, &orig_terms)) YYABORT; ALLOC_LIST(list); - if (parse_events_add_pmu(_parse_state, list, $1, $3, false)) { + if (parse_events_add_pmu(_parse_state, list, $1, $2, false)) { struct perf_pmu *pmu = NULL; int ok = 0; char *pattern; @@ -262,7 +262,7 @@ PE_NAME '/' event_config '/' if (!ok) YYABORT; } - parse_events_terms__delete($3); + parse_events_terms__delete($2); parse_events_terms__delete(orig_terms); $$ = list; } diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile index fa7ee369b3c9..db66f8a0d4be 100644 --- a/tools/testing/radix-tree/Makefile +++ b/tools/testing/radix-tree/Makefile @@ -17,7 +17,7 @@ ifeq ($(BUILD), 32) LDFLAGS += -m32 endif -targets: mapshift $(TARGETS) +targets: generated/map-shift.h $(TARGETS) main: $(OFILES) @@ -42,9 +42,7 @@ radix-tree.c: ../../../lib/radix-tree.c idr.c: ../../../lib/idr.c sed -e 's/^static //' -e 's/__always_inline //' -e 's/inline //' < $< > $@ -.PHONY: mapshift - -mapshift: +generated/map-shift.h: @if ! grep -qws $(SHIFT) generated/map-shift.h; then \ echo "#define RADIX_TREE_MAP_SHIFT $(SHIFT)" > \ generated/map-shift.h; \ diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c index 59245b3d587c..7bf405638b0b 100644 --- a/tools/testing/radix-tree/multiorder.c +++ b/tools/testing/radix-tree/multiorder.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "test.h" @@ -624,6 +625,67 @@ static void multiorder_account(void) item_kill_tree(&tree); } +bool stop_iteration = false; + +static void *creator_func(void *ptr) +{ + /* 'order' is set up to ensure we have sibling entries */ + unsigned int order = RADIX_TREE_MAP_SHIFT - 1; + struct radix_tree_root *tree = ptr; + int i; + + for (i = 0; i < 10000; i++) { + item_insert_order(tree, 0, order); + item_delete_rcu(tree, 0); + } + + stop_iteration = true; + return NULL; +} + +static void *iterator_func(void *ptr) +{ + struct radix_tree_root *tree = ptr; + struct radix_tree_iter iter; + struct item *item; + void **slot; + + while (!stop_iteration) { + rcu_read_lock(); + radix_tree_for_each_slot(slot, tree, &iter, 0) { + item = radix_tree_deref_slot(slot); + + if (!item) + continue; + if (radix_tree_deref_retry(item)) { + slot = radix_tree_iter_retry(&iter); + continue; + } + + item_sanity(item, iter.index); + } + rcu_read_unlock(); + } + return NULL; +} + +static void multiorder_iteration_race(void) +{ + const int num_threads = sysconf(_SC_NPROCESSORS_ONLN); + pthread_t worker_thread[num_threads]; + RADIX_TREE(tree, GFP_KERNEL); + int i; + + pthread_create(&worker_thread[0], NULL, &creator_func, &tree); + for (i = 1; i < num_threads; i++) + pthread_create(&worker_thread[i], NULL, &iterator_func, &tree); + + for (i = 0; i < num_threads; i++) + pthread_join(worker_thread[i], NULL); + + item_kill_tree(&tree); +} + void multiorder_checks(void) { int i; @@ -644,6 +706,7 @@ void multiorder_checks(void) multiorder_join(); multiorder_split(); multiorder_account(); + multiorder_iteration_race(); radix_tree_cpu_dead(0); } diff --git a/tools/testing/radix-tree/test.c b/tools/testing/radix-tree/test.c index 5978ab1f403d..def6015570b2 100644 --- a/tools/testing/radix-tree/test.c +++ b/tools/testing/radix-tree/test.c @@ -75,6 +75,25 @@ int item_delete(struct radix_tree_root *root, unsigned long index) return 0; } +static void item_free_rcu(struct rcu_head *head) +{ + struct item *item = container_of(head, struct item, rcu_head); + + free(item); +} + +int item_delete_rcu(struct radix_tree_root *root, unsigned long index) +{ + struct item *item = radix_tree_delete(root, index); + + if (item) { + item_sanity(item, index); + call_rcu(&item->rcu_head, item_free_rcu); + return 1; + } + return 0; +} + void item_check_present(struct radix_tree_root *root, unsigned long index) { struct item *item; diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index d9c031dbeb1a..31f1d9b6f506 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -5,6 +5,7 @@ #include struct item { + struct rcu_head rcu_head; unsigned long index; unsigned int order; }; @@ -12,9 +13,11 @@ struct item { struct item *item_create(unsigned long index, unsigned int order); int __item_insert(struct radix_tree_root *root, struct item *item); int item_insert(struct radix_tree_root *root, unsigned long index); +void item_sanity(struct item *item, unsigned long index); int item_insert_order(struct radix_tree_root *root, unsigned long index, unsigned order); int item_delete(struct radix_tree_root *root, unsigned long index); +int item_delete_rcu(struct radix_tree_root *root, unsigned long index); struct item *item_lookup(struct radix_tree_root *root, unsigned long index); void item_check_present(struct radix_tree_root *root, unsigned long index); diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index a877af00605d..94498eaf872e 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -12337,6 +12337,11 @@ static void get_unpriv_disabled() FILE *fd; fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r"); + if (!fd) { + perror("fopen /proc/sys/"UNPRIV_SYSCTL); + unpriv_disabled = true; + return; + } if (fgets(buf, 2, fd) == buf && atoi(buf)) unpriv_disabled = true; fclose(fd); diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile index 2ddcc96ae456..d9d00319b07c 100644 --- a/tools/testing/selftests/kvm/Makefile +++ b/tools/testing/selftests/kvm/Makefile @@ -15,7 +15,7 @@ LIBKVM += $(LIBKVM_$(UNAME_M)) INSTALL_HDR_PATH = $(top_srcdir)/usr LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/ -CFLAGS += -O2 -g -std=gnu99 -I$(LINUX_HDR_PATH) -Iinclude -I$( #include #include +#include "kselftest.h" ssize_t test_write(int fd, const void *buf, size_t count); ssize_t test_read(int fd, void *buf, size_t count); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 2cedfda181d4..37e2a787d2fc 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -50,8 +50,8 @@ int kvm_check_cap(long cap) int kvm_fd; kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap); TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n" @@ -91,8 +91,8 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm) vm->mode = mode; kvm_fd = open(KVM_DEV_PATH, perm); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); /* Create VM. */ vm->fd = ioctl(kvm_fd, KVM_CREATE_VM, NULL); @@ -418,8 +418,8 @@ struct kvm_cpuid2 *kvm_get_supported_cpuid(void) cpuid = allocate_kvm_cpuid2(); kvm_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(kvm_fd >= 0, "open %s failed, rc: %i errno: %i", - KVM_DEV_PATH, kvm_fd, errno); + if (kvm_fd < 0) + exit(KSFT_SKIP); ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid); TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n", @@ -675,8 +675,8 @@ static int vcpu_mmap_sz(void) int dev_fd, ret; dev_fd = open(KVM_DEV_PATH, O_RDONLY); - TEST_ASSERT(dev_fd >= 0, "%s open %s failed, rc: %i errno: %i", - __func__, KVM_DEV_PATH, dev_fd, errno); + if (dev_fd < 0) + exit(KSFT_SKIP); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); TEST_ASSERT(ret >= sizeof(struct kvm_run), diff --git a/tools/testing/selftests/kvm/sync_regs_test.c b/tools/testing/selftests/kvm/sync_regs_test.c index 428e9473f5e2..eae1ece3c31b 100644 --- a/tools/testing/selftests/kvm/sync_regs_test.c +++ b/tools/testing/selftests/kvm/sync_regs_test.c @@ -85,6 +85,9 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left, { } +#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS) +#define INVALID_SYNC_FIELD 0x80000000 + int main(int argc, char *argv[]) { struct kvm_vm *vm; @@ -98,9 +101,14 @@ int main(int argc, char *argv[]) setbuf(stdout, NULL); cap = kvm_check_cap(KVM_CAP_SYNC_REGS); - TEST_ASSERT((unsigned long)cap == KVM_SYNC_X86_VALID_FIELDS, - "KVM_CAP_SYNC_REGS (0x%x) != KVM_SYNC_X86_VALID_FIELDS (0x%lx)\n", - cap, KVM_SYNC_X86_VALID_FIELDS); + if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) { + fprintf(stderr, "KVM_CAP_SYNC_REGS not supported, skipping test\n"); + exit(KSFT_SKIP); + } + if ((cap & INVALID_SYNC_FIELD) != 0) { + fprintf(stderr, "The \"invalid\" field is not invalid, skipping test\n"); + exit(KSFT_SKIP); + } /* Create VM */ vm = vm_create_default(VCPU_ID, guest_code); @@ -108,7 +116,14 @@ int main(int argc, char *argv[]) run = vcpu_state(vm, VCPU_ID); /* Request reading invalid register set from VCPU. */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + run->kvm_valid_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; + + run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n", @@ -116,7 +131,14 @@ int main(int argc, char *argv[]) vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0; /* Request setting invalid register set into VCPU. */ - run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS << 1; + run->kvm_dirty_regs = INVALID_SYNC_FIELD; + rv = _vcpu_run(vm, VCPU_ID); + TEST_ASSERT(rv < 0 && errno == EINVAL, + "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", + rv); + vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0; + + run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(rv < 0 && errno == EINVAL, "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n", @@ -125,7 +147,7 @@ int main(int argc, char *argv[]) /* Request and verify all valid register sets. */ /* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "Unexpected exit reason: %u (%s),\n", @@ -146,7 +168,7 @@ int main(int argc, char *argv[]) run->s.regs.sregs.apic_base = 1 << 11; /* TODO run->s.regs.events.XYZ = ABC; */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, @@ -172,7 +194,7 @@ int main(int argc, char *argv[]) /* Clear kvm_dirty_regs bits, verify new s.regs values are * overwritten with existing guest values. */ - run->kvm_valid_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_valid_regs = TEST_SYNC_FIELDS; run->kvm_dirty_regs = 0; run->s.regs.regs.r11 = 0xDEADBEEF; rv = _vcpu_run(vm, VCPU_ID); @@ -211,7 +233,7 @@ int main(int argc, char *argv[]) * with kvm_sync_regs values. */ run->kvm_valid_regs = 0; - run->kvm_dirty_regs = KVM_SYNC_X86_VALID_FIELDS; + run->kvm_dirty_regs = TEST_SYNC_FIELDS; run->s.regs.regs.r11 = 0xBBBB; rv = _vcpu_run(vm, VCPU_ID); TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, diff --git a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c index 8f7f62093add..aaa633263b2c 100644 --- a/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c +++ b/tools/testing/selftests/kvm/vmx_tsc_adjust_test.c @@ -189,8 +189,8 @@ int main(int argc, char *argv[]) struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1); if (!(entry->ecx & CPUID_VMX)) { - printf("nested VMX not enabled, skipping test"); - return 0; + fprintf(stderr, "nested VMX not enabled, skipping test\n"); + exit(KSFT_SKIP); } vm = vm_create_default_vmx(VCPU_ID, (void *) l1_guest_code); diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d744991c0f4f..39f66bc29b82 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -11,7 +11,7 @@ CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \ check_initial_reg_state sigreturn iopl mpx-mini-test ioperm \ - protection_keys test_vdso test_vsyscall + protection_keys test_vdso test_vsyscall mov_ss_trap TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ vdso_restorer diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c new file mode 100644 index 000000000000..3c3a022654f3 --- /dev/null +++ b/tools/testing/selftests/x86/mov_ss_trap.c @@ -0,0 +1,285 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * mov_ss_trap.c: Exercise the bizarre side effects of a watchpoint on MOV SS + * + * This does MOV SS from a watchpointed address followed by various + * types of kernel entries. A MOV SS that hits a watchpoint will queue + * up a #DB trap but will not actually deliver that trap. The trap + * will be delivered after the next instruction instead. The CPU's logic + * seems to be: + * + * - Any fault: drop the pending #DB trap. + * - INT $N, INT3, INTO, SYSCALL, SYSENTER: enter the kernel and then + * deliver #DB. + * - ICEBP: enter the kernel but do not deliver the watchpoint trap + * - breakpoint: only one #DB is delivered (phew!) + * + * There are plenty of ways for a kernel to handle this incorrectly. This + * test tries to exercise all the cases. + * + * This should mostly cover CVE-2018-1087 and CVE-2018-8897. + */ +#define _GNU_SOURCE + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define X86_EFLAGS_RF (1UL << 16) + +#if __x86_64__ +# define REG_IP REG_RIP +#else +# define REG_IP REG_EIP +#endif + +unsigned short ss; +extern unsigned char breakpoint_insn[]; +sigjmp_buf jmpbuf; +static unsigned char altstack_data[SIGSTKSZ]; + +static void enable_watchpoint(void) +{ + pid_t parent = getpid(); + int status; + + pid_t child = fork(); + if (child < 0) + err(1, "fork"); + + if (child) { + if (waitpid(child, &status, 0) != child) + err(1, "waitpid for child"); + } else { + unsigned long dr0, dr1, dr7; + + dr0 = (unsigned long)&ss; + dr1 = (unsigned long)breakpoint_insn; + dr7 = ((1UL << 1) | /* G0 */ + (3UL << 16) | /* RW0 = read or write */ + (1UL << 18) | /* LEN0 = 2 bytes */ + (1UL << 3)); /* G1, RW1 = insn */ + + if (ptrace(PTRACE_ATTACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_ATTACH"); + + if (waitpid(parent, &status, 0) != parent) + err(1, "waitpid for child"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[0]), dr0) != 0) + err(1, "PTRACE_POKEUSER DR0"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[1]), dr1) != 0) + err(1, "PTRACE_POKEUSER DR1"); + + if (ptrace(PTRACE_POKEUSER, parent, (void *)offsetof(struct user, u_debugreg[7]), dr7) != 0) + err(1, "PTRACE_POKEUSER DR7"); + + printf("\tDR0 = %lx, DR1 = %lx, DR7 = %lx\n", dr0, dr1, dr7); + + if (ptrace(PTRACE_DETACH, parent, NULL, NULL) != 0) + err(1, "PTRACE_DETACH"); + + exit(0); + } +} + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static char const * const signames[] = { + [SIGSEGV] = "SIGSEGV", + [SIGBUS] = "SIBGUS", + [SIGTRAP] = "SIGTRAP", + [SIGILL] = "SIGILL", +}; + +static void sigtrap(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot SIGTRAP with RIP=%lx, EFLAGS.RF=%d\n", + (unsigned long)ctx->uc_mcontext.gregs[REG_IP], + !!(ctx->uc_mcontext.gregs[REG_EFL] & X86_EFLAGS_RF)); +} + +static void handle_and_return(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); +} + +static void handle_and_longjmp(int sig, siginfo_t *si, void *ctx_void) +{ + ucontext_t *ctx = ctx_void; + + printf("\tGot %s with RIP=%lx\n", signames[sig], + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); + + siglongjmp(jmpbuf, 1); +} + +int main() +{ + unsigned long nr; + + asm volatile ("mov %%ss, %[ss]" : [ss] "=m" (ss)); + printf("\tSS = 0x%hx, &SS = 0x%p\n", ss, &ss); + + if (prctl(PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0) == 0) + printf("\tPR_SET_PTRACER_ANY succeeded\n"); + + printf("\tSet up a watchpoint\n"); + sethandler(SIGTRAP, sigtrap, 0); + enable_watchpoint(); + + printf("[RUN]\tRead from watched memory (should get SIGTRAP)\n"); + asm volatile ("mov %[ss], %[tmp]" : [tmp] "=r" (nr) : [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT3\n"); + asm volatile ("mov %[ss], %%ss; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0xcd, 0x3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CS CS INT3\n"); + asm volatile ("mov %[ss], %%ss; .byte 0x2e, 0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; CSx14 INT3\n"); + asm volatile ("mov %[ss], %%ss; .fill 14,1,0x2e; int3" :: [ss] "m" (ss)); + + printf("[RUN]\tMOV SS; INT 4\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $4" :: [ss] "m" (ss)); + +#ifdef __i386__ + printf("[RUN]\tMOV SS; INTO\n"); + sethandler(SIGSEGV, handle_and_return, SA_RESETHAND); + nr = -1; + asm volatile ("add $1, %[tmp]; mov %[ss], %%ss; into" + : [tmp] "+r" (nr) : [ss] "m" (ss)); +#endif + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; ICEBP\n"); + + /* Some emulators (e.g. QEMU TCG) don't emulate ICEBP. */ + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + + asm volatile ("mov %[ss], %%ss; .byte 0xf1" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; CLI\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; cli" :: [ss] "m" (ss)); + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; #PF\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; mov (-1), %[tmp]" + : [tmp] "=r" (nr) : [ss] "m" (ss)); + } + + /* + * INT $1: if #DB has DPL=3 and there isn't special handling, + * then the kernel will die. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT 1\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + asm volatile ("mov %[ss], %%ss; int $1" :: [ss] "m" (ss)); + } + +#ifdef __x86_64__ + /* + * In principle, we should test 32-bit SYSCALL as well, but + * the calling convention is so unpredictable that it's + * not obviously worth the effort. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSCALL\n"); + sethandler(SIGILL, handle_and_longjmp, SA_RESETHAND); + nr = SYS_getpid; + /* + * Toggle the high bit of RSP to make it noncanonical to + * strengthen this test on non-SMAP systems. + */ + asm volatile ("btc $63, %%rsp\n\t" + "mov %[ss], %%ss; syscall\n\t" + "btc $63, %%rsp" + : "+a" (nr) : [ss] "m" (ss) + : "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + } +#endif + + printf("[RUN]\tMOV SS; breakpointed NOP\n"); + asm volatile ("mov %[ss], %%ss; breakpoint_insn: nop" :: [ss] "m" (ss)); + + /* + * Invoking SYSENTER directly breaks all the rules. Just handle + * the SIGSEGV. + */ + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; SYSENTER\n"); + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK); + nr = SYS_getpid; + asm volatile ("mov %[ss], %%ss; SYSENTER" : "+a" (nr) + : [ss] "m" (ss) : "flags", "rcx" +#ifdef __x86_64__ + , "r11" +#endif + ); + + /* We're unreachable here. SYSENTER forgets RIP. */ + } + + if (sigsetjmp(jmpbuf, 1) == 0) { + printf("[RUN]\tMOV SS; INT $0x80\n"); + sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND); + nr = 20; /* compat getpid */ + asm volatile ("mov %[ss], %%ss; int $0x80" + : "+a" (nr) : [ss] "m" (ss) + : "flags" +#ifdef __x86_64__ + , "r8", "r9", "r10", "r11" +#endif + ); + } + + printf("[OK]\tI aten't dead\n"); + return 0; +} diff --git a/tools/testing/selftests/x86/mpx-mini-test.c b/tools/testing/selftests/x86/mpx-mini-test.c index 9c0325e1ea68..50f7e9272481 100644 --- a/tools/testing/selftests/x86/mpx-mini-test.c +++ b/tools/testing/selftests/x86/mpx-mini-test.c @@ -368,6 +368,11 @@ static int expected_bnd_index = -1; uint64_t shadow_plb[NR_MPX_BOUNDS_REGISTERS][2]; /* shadow MPX bound registers */ unsigned long shadow_map[NR_MPX_BOUNDS_REGISTERS]; +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + /* * The kernel is supposed to provide some information about the bounds * exception in the siginfo. It should match what we have in the bounds @@ -419,8 +424,6 @@ void handler(int signum, siginfo_t *si, void *vucontext) br_count++; dprintf1("#BR 0x%jx (total seen: %d)\n", status, br_count); -#define SEGV_BNDERR 3 /* failed address bound checks */ - dprintf2("Saw a #BR! status 0x%jx at %016lx br_reason: %jx\n", status, ip, br_reason); dprintf2("si_signo: %d\n", si->si_signo); diff --git a/tools/testing/selftests/x86/pkey-helpers.h b/tools/testing/selftests/x86/pkey-helpers.h index b3cb7670e026..254e5436bdd9 100644 --- a/tools/testing/selftests/x86/pkey-helpers.h +++ b/tools/testing/selftests/x86/pkey-helpers.h @@ -26,30 +26,26 @@ static inline void sigsafe_printf(const char *format, ...) { va_list ap; - va_start(ap, format); if (!dprint_in_signal) { + va_start(ap, format); vprintf(format, ap); + va_end(ap); } else { int ret; - int len = vsnprintf(dprint_in_signal_buffer, - DPRINT_IN_SIGNAL_BUF_SIZE, - format, ap); /* - * len is amount that would have been printed, - * but actual write is truncated at BUF_SIZE. + * No printf() functions are signal-safe. + * They deadlock easily. Write the format + * string to get some output, even if + * incomplete. */ - if (len > DPRINT_IN_SIGNAL_BUF_SIZE) - len = DPRINT_IN_SIGNAL_BUF_SIZE; - ret = write(1, dprint_in_signal_buffer, len); + ret = write(1, format, strlen(format)); if (ret < 0) - abort(); + exit(1); } - va_end(ap); } #define dprintf_level(level, args...) do { \ if (level <= DEBUG_LEVEL) \ sigsafe_printf(args); \ - fflush(NULL); \ } while (0) #define dprintf0(args...) dprintf_level(0, args) #define dprintf1(args...) dprintf_level(1, args) diff --git a/tools/testing/selftests/x86/protection_keys.c b/tools/testing/selftests/x86/protection_keys.c index f15aa5a76fe3..460b4bdf4c1e 100644 --- a/tools/testing/selftests/x86/protection_keys.c +++ b/tools/testing/selftests/x86/protection_keys.c @@ -72,10 +72,9 @@ extern void abort_hooks(void); test_nr, iteration_nr); \ dprintf0("errno at assert: %d", errno); \ abort_hooks(); \ - assert(condition); \ + exit(__LINE__); \ } \ } while (0) -#define raw_assert(cond) assert(cond) void cat_into_file(char *str, char *file) { @@ -87,12 +86,17 @@ void cat_into_file(char *str, char *file) * these need to be raw because they are called under * pkey_assert() */ - raw_assert(fd >= 0); + if (fd < 0) { + fprintf(stderr, "error opening '%s'\n", str); + perror("error: "); + exit(__LINE__); + } + ret = write(fd, str, strlen(str)); if (ret != strlen(str)) { perror("write to file failed"); fprintf(stderr, "filename: '%s' str: '%s'\n", file, str); - raw_assert(0); + exit(__LINE__); } close(fd); } @@ -191,26 +195,30 @@ void lots_o_noops_around_write(int *write_to_me) #ifdef __i386__ #ifndef SYS_mprotect_key -# define SYS_mprotect_key 380 +# define SYS_mprotect_key 380 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 381 -# define SYS_pkey_free 382 +# define SYS_pkey_alloc 381 +# define SYS_pkey_free 382 #endif -#define REG_IP_IDX REG_EIP -#define si_pkey_offset 0x14 + +#define REG_IP_IDX REG_EIP +#define si_pkey_offset 0x14 #else #ifndef SYS_mprotect_key -# define SYS_mprotect_key 329 +# define SYS_mprotect_key 329 #endif + #ifndef SYS_pkey_alloc -# define SYS_pkey_alloc 330 -# define SYS_pkey_free 331 +# define SYS_pkey_alloc 330 +# define SYS_pkey_free 331 #endif -#define REG_IP_IDX REG_RIP -#define si_pkey_offset 0x20 + +#define REG_IP_IDX REG_RIP +#define si_pkey_offset 0x20 #endif @@ -225,8 +233,14 @@ void dump_mem(void *dumpme, int len_bytes) } } -#define SEGV_BNDERR 3 /* failed address bound checks */ -#define SEGV_PKUERR 4 +/* Failed address bound checks: */ +#ifndef SEGV_BNDERR +# define SEGV_BNDERR 3 +#endif + +#ifndef SEGV_PKUERR +# define SEGV_PKUERR 4 +#endif static char *si_code_str(int si_code) { @@ -289,13 +303,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dump_mem(pkru_ptr - 128, 256); pkey_assert(*pkru_ptr); - si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); - dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); - dump_mem(si_pkey_ptr - 8, 24); - siginfo_pkey = *si_pkey_ptr; - pkey_assert(siginfo_pkey < NR_PKEYS); - last_si_pkey = siginfo_pkey; - if ((si->si_code == SEGV_MAPERR) || (si->si_code == SEGV_ACCERR) || (si->si_code == SEGV_BNDERR)) { @@ -303,6 +310,13 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) exit(4); } + si_pkey_ptr = (u32 *)(((u8 *)si) + si_pkey_offset); + dprintf1("si_pkey_ptr: %p\n", si_pkey_ptr); + dump_mem((u8 *)si_pkey_ptr - 8, 24); + siginfo_pkey = *si_pkey_ptr; + pkey_assert(siginfo_pkey < NR_PKEYS); + last_si_pkey = siginfo_pkey; + dprintf1("signal pkru from xsave: %08x\n", *pkru_ptr); /* need __rdpkru() version so we do not do shadow_pkru checking */ dprintf1("signal pkru from pkru: %08x\n", __rdpkru()); @@ -311,22 +325,6 @@ void signal_handler(int signum, siginfo_t *si, void *vucontext) dprintf1("WARNING: set PRKU=0 to allow faulting instruction to continue\n"); pkru_faults++; dprintf1("<<<<==================================================\n"); - return; - if (trapno == 14) { - fprintf(stderr, - "ERROR: In signal handler, page fault, trapno = %d, ip = %016lx\n", - trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(1); - } else { - fprintf(stderr, "unexpected trap %d! at 0x%lx\n", trapno, ip); - fprintf(stderr, "si_addr %p\n", si->si_addr); - fprintf(stderr, "REG_ERR: %lx\n", - (unsigned long)uctxt->uc_mcontext.gregs[REG_ERR]); - exit(2); - } dprint_in_signal = 0; } @@ -393,10 +391,15 @@ pid_t fork_lazy_child(void) return forkret; } -#define PKEY_DISABLE_ACCESS 0x1 -#define PKEY_DISABLE_WRITE 0x2 +#ifndef PKEY_DISABLE_ACCESS +# define PKEY_DISABLE_ACCESS 0x1 +#endif -u32 pkey_get(int pkey, unsigned long flags) +#ifndef PKEY_DISABLE_WRITE +# define PKEY_DISABLE_WRITE 0x2 +#endif + +static u32 hw_pkey_get(int pkey, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 pkru = __rdpkru(); @@ -418,7 +421,7 @@ u32 pkey_get(int pkey, unsigned long flags) return masked_pkru; } -int pkey_set(int pkey, unsigned long rights, unsigned long flags) +static int hw_pkey_set(int pkey, unsigned long rights, unsigned long flags) { u32 mask = (PKEY_DISABLE_ACCESS|PKEY_DISABLE_WRITE); u32 old_pkru = __rdpkru(); @@ -452,15 +455,15 @@ void pkey_disable_set(int pkey, int flags) pkey, flags); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - pkey_rights = pkey_get(pkey, syscall_flags); + pkey_rights = hw_pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, syscall_flags); + ret = hw_pkey_set(pkey, pkey_rights, syscall_flags); assert(!ret); /*pkru and flags have the same format */ shadow_pkru |= flags << (pkey * 2); @@ -468,8 +471,8 @@ void pkey_disable_set(int pkey, int flags) pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -483,24 +486,24 @@ void pkey_disable_clear(int pkey, int flags) { unsigned long syscall_flags = 0; int ret; - int pkey_rights = pkey_get(pkey, syscall_flags); + int pkey_rights = hw_pkey_get(pkey, syscall_flags); u32 orig_pkru = rdpkru(); pkey_assert(flags & (PKEY_DISABLE_ACCESS | PKEY_DISABLE_WRITE)); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); pkey_assert(pkey_rights >= 0); pkey_rights |= flags; - ret = pkey_set(pkey, pkey_rights, 0); + ret = hw_pkey_set(pkey, pkey_rights, 0); /* pkru and flags have the same format */ shadow_pkru &= ~(flags << (pkey * 2)); pkey_assert(ret >= 0); - pkey_rights = pkey_get(pkey, syscall_flags); - dprintf1("%s(%d) pkey_get(%d): %x\n", __func__, + pkey_rights = hw_pkey_get(pkey, syscall_flags); + dprintf1("%s(%d) hw_pkey_get(%d): %x\n", __func__, pkey, pkey, pkey_rights); dprintf1("%s(%d) pkru: 0x%x\n", __func__, pkey, rdpkru()); @@ -674,10 +677,12 @@ int mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot, struct pkey_malloc_record { void *ptr; long size; + int prot; }; struct pkey_malloc_record *pkey_malloc_records; +struct pkey_malloc_record *pkey_last_malloc_record; long nr_pkey_malloc_records; -void record_pkey_malloc(void *ptr, long size) +void record_pkey_malloc(void *ptr, long size, int prot) { long i; struct pkey_malloc_record *rec = NULL; @@ -709,6 +714,8 @@ void record_pkey_malloc(void *ptr, long size) (int)(rec - pkey_malloc_records), rec, ptr, size); rec->ptr = ptr; rec->size = size; + rec->prot = prot; + pkey_last_malloc_record = rec; nr_pkey_malloc_records++; } @@ -753,7 +760,7 @@ void *malloc_pkey_with_mprotect(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); ret = mprotect_pkey((void *)ptr, PAGE_SIZE, prot, pkey); pkey_assert(!ret); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); rdpkru(); dprintf1("%s() for pkey %d @ %p\n", __func__, pkey, ptr); @@ -774,7 +781,7 @@ void *malloc_pkey_anon_huge(long size, int prot, u16 pkey) size = ALIGN_UP(size, HPAGE_SIZE * 2); ptr = mmap(NULL, size, PROT_NONE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); pkey_assert(ptr != (void *)-1); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); mprotect_pkey(ptr, size, prot, pkey); dprintf1("unaligned ptr: %p\n", ptr); @@ -847,7 +854,7 @@ void *malloc_pkey_hugetlb(long size, int prot, u16 pkey) pkey_assert(ptr != (void *)-1); mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d hugetlbfs for pkey %d @ %p\n", pkey, ptr); return ptr; @@ -869,7 +876,7 @@ void *malloc_pkey_mmap_dax(long size, int prot, u16 pkey) mprotect_pkey(ptr, size, prot, pkey); - record_pkey_malloc(ptr, size); + record_pkey_malloc(ptr, size, prot); dprintf1("mmap()'d for pkey %d @ %p\n", pkey, ptr); close(fd); @@ -918,13 +925,21 @@ void *malloc_pkey(long size, int prot, u16 pkey) } int last_pkru_faults; +#define UNKNOWN_PKEY -2 void expected_pk_fault(int pkey) { dprintf2("%s(): last_pkru_faults: %d pkru_faults: %d\n", __func__, last_pkru_faults, pkru_faults); dprintf2("%s(%d): last_si_pkey: %d\n", __func__, pkey, last_si_pkey); pkey_assert(last_pkru_faults + 1 == pkru_faults); - pkey_assert(last_si_pkey == pkey); + + /* + * For exec-only memory, we do not know the pkey in + * advance, so skip this check. + */ + if (pkey != UNKNOWN_PKEY) + pkey_assert(last_si_pkey == pkey); + /* * The signal handler shold have cleared out PKRU to let the * test program continue. We now have to restore it. @@ -939,10 +954,11 @@ void expected_pk_fault(int pkey) last_si_pkey = -1; } -void do_not_expect_pk_fault(void) -{ - pkey_assert(last_pkru_faults == pkru_faults); -} +#define do_not_expect_pk_fault(msg) do { \ + if (last_pkru_faults != pkru_faults) \ + dprintf0("unexpected PK fault: %s\n", msg); \ + pkey_assert(last_pkru_faults == pkru_faults); \ +} while (0) int test_fds[10] = { -1 }; int nr_test_fds; @@ -1151,12 +1167,15 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) pkey_assert(i < NR_PKEYS*2); /* - * There are 16 pkeys supported in hardware. One is taken - * up for the default (0) and another can be taken up by - * an execute-only mapping. Ensure that we can allocate - * at least 14 (16-2). + * There are 16 pkeys supported in hardware. Three are + * allocated by the time we get here: + * 1. The default key (0) + * 2. One possibly consumed by an execute-only mapping. + * 3. One allocated by the test code and passed in via + * 'pkey' to this function. + * Ensure that we can allocate at least another 13 (16-3). */ - pkey_assert(i >= NR_PKEYS-2); + pkey_assert(i >= NR_PKEYS-3); for (i = 0; i < nr_allocated_pkeys; i++) { err = sys_pkey_free(allocated_pkeys[i]); @@ -1165,6 +1184,35 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey) } } +/* + * pkey 0 is special. It is allocated by default, so you do not + * have to call pkey_alloc() to use it first. Make sure that it + * is usable. + */ +void test_mprotect_with_pkey_0(int *ptr, u16 pkey) +{ + long size; + int prot; + + assert(pkey_last_malloc_record); + size = pkey_last_malloc_record->size; + /* + * This is a bit of a hack. But mprotect() requires + * huge-page-aligned sizes when operating on hugetlbfs. + * So, make sure that we use something that's a multiple + * of a huge page when we can. + */ + if (size >= HPAGE_SIZE) + size = HPAGE_SIZE; + prot = pkey_last_malloc_record->prot; + + /* Use pkey 0 */ + mprotect_pkey(ptr, size, prot, 0); + + /* Make sure that we can set it back to the original pkey. */ + mprotect_pkey(ptr, size, prot, pkey); +} + void test_ptrace_of_child(int *ptr, u16 pkey) { __attribute__((__unused__)) int peek_result; @@ -1228,7 +1276,7 @@ void test_ptrace_of_child(int *ptr, u16 pkey) pkey_assert(ret != -1); /* Now access from the current task, and expect NO exception: */ peek_result = read_ptr(plain_ptr); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("read plain pointer after ptrace"); ret = ptrace(PTRACE_DETACH, child_pid, ignored, 0); pkey_assert(ret != -1); @@ -1241,12 +1289,9 @@ void test_ptrace_of_child(int *ptr, u16 pkey) free(plain_ptr_unaligned); } -void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +void *get_pointer_to_instructions(void) { void *p1; - int scratch; - int ptr_contents; - int ret; p1 = ALIGN_PTR_UP(&lots_o_noops_around_write, PAGE_SIZE); dprintf3("&lots_o_noops: %p\n", &lots_o_noops_around_write); @@ -1256,7 +1301,23 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) /* Point 'p1' at the *second* page of the function: */ p1 += PAGE_SIZE; + /* + * Try to ensure we fault this in on next touch to ensure + * we get an instruction fault as opposed to a data one + */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); + + return p1; +} + +void test_executing_on_unreadable_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + p1 = get_pointer_to_instructions(); lots_o_noops_around_write(&scratch); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); @@ -1272,12 +1333,55 @@ void test_executing_on_unreadable_memory(int *ptr, u16 pkey) */ madvise(p1, PAGE_SIZE, MADV_DONTNEED); lots_o_noops_around_write(&scratch); - do_not_expect_pk_fault(); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); ptr_contents = read_ptr(p1); dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); expected_pk_fault(pkey); } +void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey) +{ + void *p1; + int scratch; + int ptr_contents; + int ret; + + dprintf1("%s() start\n", __func__); + + p1 = get_pointer_to_instructions(); + lots_o_noops_around_write(&scratch); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + + /* Use a *normal* mprotect(), not mprotect_pkey(): */ + ret = mprotect(p1, PAGE_SIZE, PROT_EXEC); + pkey_assert(!ret); + + dprintf2("pkru: %x\n", rdpkru()); + + /* Make sure this is an *instruction* fault */ + madvise(p1, PAGE_SIZE, MADV_DONTNEED); + lots_o_noops_around_write(&scratch); + do_not_expect_pk_fault("executing on PROT_EXEC memory"); + ptr_contents = read_ptr(p1); + dprintf2("ptr (%p) contents@%d: %x\n", p1, __LINE__, ptr_contents); + expected_pk_fault(UNKNOWN_PKEY); + + /* + * Put the memory back to non-PROT_EXEC. Should clear the + * exec-only pkey off the VMA and allow it to be readable + * again. Go to PROT_NONE first to check for a kernel bug + * that did not clear the pkey when doing PROT_NONE. + */ + ret = mprotect(p1, PAGE_SIZE, PROT_NONE); + pkey_assert(!ret); + + ret = mprotect(p1, PAGE_SIZE, PROT_READ|PROT_EXEC); + pkey_assert(!ret); + ptr_contents = read_ptr(p1); + do_not_expect_pk_fault("plain read on recently PROT_EXEC area"); +} + void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey) { int size = PAGE_SIZE; @@ -1302,6 +1406,8 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = { test_kernel_gup_of_access_disabled_region, test_kernel_gup_write_to_write_disabled_region, test_executing_on_unreadable_memory, + test_implicit_mprotect_exec_only_memory, + test_mprotect_with_pkey_0, test_ptrace_of_child, test_pkey_syscalls_on_non_allocated_pkey, test_pkey_syscalls_bad_args, diff --git a/virt/kvm/arm/vgic/vgic-debug.c b/virt/kvm/arm/vgic/vgic-debug.c index 10b38178cff2..4ffc0b5e6105 100644 --- a/virt/kvm/arm/vgic/vgic-debug.c +++ b/virt/kvm/arm/vgic/vgic-debug.c @@ -211,6 +211,7 @@ static int vgic_debug_show(struct seq_file *s, void *v) struct vgic_state_iter *iter = (struct vgic_state_iter *)v; struct vgic_irq *irq; struct kvm_vcpu *vcpu = NULL; + unsigned long flags; if (iter->dist_id == 0) { print_dist_state(s, &kvm->arch.vgic); @@ -227,9 +228,9 @@ static int vgic_debug_show(struct seq_file *s, void *v) irq = &kvm->arch.vgic.spis[iter->intid - VGIC_NR_PRIVATE_IRQS]; } - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); print_irq_state(s, irq, vcpu); - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); return 0; } diff --git a/virt/kvm/arm/vgic/vgic-its.c b/virt/kvm/arm/vgic/vgic-its.c index a8f07243aa9f..4ed79c939fb4 100644 --- a/virt/kvm/arm/vgic/vgic-its.c +++ b/virt/kvm/arm/vgic/vgic-its.c @@ -52,6 +52,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = vgic_get_irq(kvm, NULL, intid), *oldirq; + unsigned long flags; int ret; /* In this case there is no put, since we keep the reference. */ @@ -71,7 +72,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, irq->intid = intid; irq->target_vcpu = vcpu; - spin_lock(&dist->lpi_list_lock); + spin_lock_irqsave(&dist->lpi_list_lock, flags); /* * There could be a race with another vgic_add_lpi(), so we need to @@ -99,7 +100,7 @@ static struct vgic_irq *vgic_add_lpi(struct kvm *kvm, u32 intid, dist->lpi_list_count++; out_unlock: - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); /* * We "cache" the configuration table entries in our struct vgic_irq's. @@ -280,8 +281,8 @@ static int update_lpi_config(struct kvm *kvm, struct vgic_irq *irq, int ret; unsigned long flags; - ret = kvm_read_guest(kvm, propbase + irq->intid - GIC_LPI_OFFSET, - &prop, 1); + ret = kvm_read_guest_lock(kvm, propbase + irq->intid - GIC_LPI_OFFSET, + &prop, 1); if (ret) return ret; @@ -315,6 +316,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) { struct vgic_dist *dist = &vcpu->kvm->arch.vgic; struct vgic_irq *irq; + unsigned long flags; u32 *intids; int irq_count, i = 0; @@ -330,7 +332,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) if (!intids) return -ENOMEM; - spin_lock(&dist->lpi_list_lock); + spin_lock_irqsave(&dist->lpi_list_lock, flags); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { if (i == irq_count) break; @@ -339,7 +341,7 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) continue; intids[i++] = irq->intid; } - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); *intid_ptr = intids; return i; @@ -348,10 +350,11 @@ static int vgic_copy_lpi_list(struct kvm_vcpu *vcpu, u32 **intid_ptr) static int update_affinity(struct vgic_irq *irq, struct kvm_vcpu *vcpu) { int ret = 0; + unsigned long flags; - spin_lock(&irq->irq_lock); + spin_lock_irqsave(&irq->irq_lock, flags); irq->target_vcpu = vcpu; - spin_unlock(&irq->irq_lock); + spin_unlock_irqrestore(&irq->irq_lock, flags); if (irq->hw) { struct its_vlpi_map map; @@ -441,8 +444,9 @@ static int its_sync_lpi_pending_table(struct kvm_vcpu *vcpu) * this very same byte in the last iteration. Reuse that. */ if (byte_offset != last_byte_offset) { - ret = kvm_read_guest(vcpu->kvm, pendbase + byte_offset, - &pendmask, 1); + ret = kvm_read_guest_lock(vcpu->kvm, + pendbase + byte_offset, + &pendmask, 1); if (ret) { kfree(intids); return ret; @@ -786,7 +790,7 @@ static bool vgic_its_check_id(struct vgic_its *its, u64 baser, u32 id, return false; /* Each 1st level entry is represented by a 64-bit value. */ - if (kvm_read_guest(its->dev->kvm, + if (kvm_read_guest_lock(its->dev->kvm, BASER_ADDRESS(baser) + index * sizeof(indirect_ptr), &indirect_ptr, sizeof(indirect_ptr))) return false; @@ -1367,8 +1371,8 @@ static void vgic_its_process_commands(struct kvm *kvm, struct vgic_its *its) cbaser = CBASER_ADDRESS(its->cbaser); while (its->cwriter != its->creadr) { - int ret = kvm_read_guest(kvm, cbaser + its->creadr, - cmd_buf, ITS_CMD_SIZE); + int ret = kvm_read_guest_lock(kvm, cbaser + its->creadr, + cmd_buf, ITS_CMD_SIZE); /* * If kvm_read_guest() fails, this could be due to the guest * programming a bogus value in CBASER or something else going @@ -1893,7 +1897,7 @@ static int scan_its_table(struct vgic_its *its, gpa_t base, int size, int esz, int next_offset; size_t byte_offset; - ret = kvm_read_guest(kvm, gpa, entry, esz); + ret = kvm_read_guest_lock(kvm, gpa, entry, esz); if (ret) return ret; @@ -2263,7 +2267,7 @@ static int vgic_its_restore_cte(struct vgic_its *its, gpa_t gpa, int esz) int ret; BUG_ON(esz > sizeof(val)); - ret = kvm_read_guest(kvm, gpa, &val, esz); + ret = kvm_read_guest_lock(kvm, gpa, &val, esz); if (ret) return ret; val = le64_to_cpu(val); diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index c7423f3768e5..bdcf8e7a6161 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -344,7 +344,7 @@ retry: bit_nr = irq->intid % BITS_PER_BYTE; ptr = pendbase + byte_offset; - ret = kvm_read_guest(kvm, ptr, &val, 1); + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; @@ -397,7 +397,7 @@ int vgic_v3_save_pending_tables(struct kvm *kvm) ptr = pendbase + byte_offset; if (byte_offset != last_byte_offset) { - ret = kvm_read_guest(kvm, ptr, &val, 1); + ret = kvm_read_guest_lock(kvm, ptr, &val, 1); if (ret) return ret; last_byte_offset = byte_offset; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 97bfba8d9a59..33c8325c8f35 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -43,9 +43,13 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { * kvm->lock (mutex) * its->cmd_lock (mutex) * its->its_lock (mutex) - * vgic_cpu->ap_list_lock - * kvm->lpi_list_lock - * vgic_irq->irq_lock + * vgic_cpu->ap_list_lock must be taken with IRQs disabled + * kvm->lpi_list_lock must be taken with IRQs disabled + * vgic_irq->irq_lock must be taken with IRQs disabled + * + * As the ap_list_lock might be taken from the timer interrupt handler, + * we have to disable IRQs before taking this lock and everything lower + * than it. * * If you need to take multiple locks, always take the upper lock first, * then the lower ones, e.g. first take the its_lock, then the irq_lock. @@ -72,8 +76,9 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) { struct vgic_dist *dist = &kvm->arch.vgic; struct vgic_irq *irq = NULL; + unsigned long flags; - spin_lock(&dist->lpi_list_lock); + spin_lock_irqsave(&dist->lpi_list_lock, flags); list_for_each_entry(irq, &dist->lpi_list_head, lpi_list) { if (irq->intid != intid) @@ -89,7 +94,7 @@ static struct vgic_irq *vgic_get_lpi(struct kvm *kvm, u32 intid) irq = NULL; out_unlock: - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); return irq; } @@ -134,19 +139,20 @@ static void vgic_irq_release(struct kref *ref) void vgic_put_irq(struct kvm *kvm, struct vgic_irq *irq) { struct vgic_dist *dist = &kvm->arch.vgic; + unsigned long flags; if (irq->intid < VGIC_MIN_LPI) return; - spin_lock(&dist->lpi_list_lock); + spin_lock_irqsave(&dist->lpi_list_lock, flags); if (!kref_put(&irq->refcount, vgic_irq_release)) { - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); return; }; list_del(&irq->lpi_list); dist->lpi_list_count--; - spin_unlock(&dist->lpi_list_lock); + spin_unlock_irqrestore(&dist->lpi_list_lock, flags); kfree(irq); }