drm next pull request for 5.1

-----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJcfzdFAAoJEAx081l5xIa+QxsP/A6QP+gx4vQ8XXikaJMNz89e 59TAbXHW/2qFMHRtUesuB2bc1a2cw2ppFsrryG7c4HqjKDDHna7Yx2JzZYL0MmNh SpJYL4yMuu/2TmyCouaAYzzP+5Supdosfif4LRn3269DH0i5MWXL+NVrbeB47blG XwjQTu46yfn06IFAo5bI2jMqSuPCDd4Hzpyixpvmjt+r16XwoH5nGUrDCHG8t/jV +PUZCYAjn71in6Z66MKZv/EVCVFfTnaVJ2KEgw7e+vWxnERkRh/xnRO6KIXMD5O1 vo2qc2vbxkGpjaE6pDzC/2e5pRJT8Ks0t50jYjbVF+6nHpP5XIPvAXH4R2QdTA7B Jiu8N0oz6wj0H3AJ/V38rEHWW8zgOfXkhbRBfmfQ9NfgiEfwxqCVgspIOwei4oVw hvMXYUBM1CU+JIfW6w7ZT4oHALUlnCpnr5DQRdCNRm8zjClyNfIAoJIJrOtqmX44 qjEzSgxb89ZtS7c0yislSBaovgAmcM3I+aq5I4xokdY0hFEZ6QomuKunyuQ8pBYa 3gsvMEReLxETffhhYpjBt5+b5IgB49nf3Y38CKFurv32Sp0p0YgK0qVo8qRQHclj QIJ+3+zQMCX20swYpCWXhOPUIwtQppdKhWzg12my8rL2VgTlYhjlEbL4EL+Wk+hv 6Ipulthzn0RyrSK9Dojh =GlRQ -----END PGP SIGNATURE----- Merge tag 'drm-next-2019-03-06' of git://anongit.freedesktop.org/drm/drm Pull drm updates from Dave Airlie: "This is the main drm pull request for the 5.1 merge window. The big changes I'd highlight are: - nouveau has HMM support now, there is finally an in-tree user so we can quieten down the rip it out people. - i915 now enables fastboot by default on Skylake+ - Displayport Multistream support has been refactored and should hopefully be more reliable. Core: - header cleanups aiming towards removing drmP.h - dma-buf fence seqnos to 64-bits - common helper for DP mst hotplug for radeon,i915,amdgpu + new refcounting scheme - MST i2c improvements - drm_syncobj_cb removal - ARM FB compression fourcc - P010 + P016 fourcc - allwinner tiled format modifier - i2c over aux I2C_M_STOP support - DRM_AUTH handling fixes TTM: - ref/unref renaming New driver: - ARM komeda display driver scheduler: - refactor mirror list handling - rework hw fence processing - 0 run queue entity fix bridge: - TI DS90C185 LVDS bridge - thc631lvdm83d bridge improvements - cadence + allwinner DSI ported to generic phy panels: - Sitronix ST7701 panel - Kingdisplay KD097D04 - LeMaker BL035-RGB-002 - PDA 91-00156-A0 - Innolux EE101IA-01D i915: - Enable fastboot by default on SKL+/VLV/CHV - Export RPCS configuration for ICL media driver - Coffelake PCI ID - CNL clocks setup fixes - ACPI/PMIC support for MIPI/DSI - Per-engine WA init for all engines - Shrinker locking fixes - Kerneldoc updates - Lots of ring improvements and reset fixes - Coffeelake GVT Support - VFIO GVT EDID Region support - runtime PM wakeref tracking - ILK->IVB primary plane enable delays - userptr mutex locking fixes - DSI fixes - LVDS/TV cleanups - HW readout fixes - LUT robustness fixes - ICL display and watermark fixes - gem mmap race fix amdgpu: - add scheduled dependencies interface - DCC on scanout surfaces - vega10/20 BACO support - Multiple IH rings on soc15 - XGMI locking fixes - DC i2c/aux cleanups - runtime SMU debug interface - Kexec improvmeents - SR-IOV fixes - DC freesync + ABM fixes - GDS fixes - GPUVM fixes - vega20 PCIE DPM switching fixes - Context priority handling fixes radeon: - fix missing break in evergreen parser nouveau: - SVM support via HMM msm: - QCOM Compressed modifier support exynos: - s5pv210 rotator support imx: - zpos property support - pending update fixes v3d: - cache flush improvments vc4: - reflection support - HDMI overscan support tegra: - CEC refactoring - HDMI audio fixes - Tegra186 prep work - SOR crossbar device tree fixes sun4i: - implicit fencing support - YUV and scalar support improvements - A23 support - tiling fixes atmel-hlcdc: - clipping and rotation property fixes qxl: - BO and PRIME improvements - generic fbdev emulation dw-hdmi: - HDMI 2.0 2160p - YUV420 ouput rockchip: - implicit fencing support - reflection proerties virtio-gpu: - use generic fbdev emulation tilcdc: - cpufreq vs crtc init fix rcar-du: - R8A774C0 support - D3/E3 RGB output routing fixes and DPAD0 support - RA87744 LVDS support bochs: - atomic and generic fbdev emulation - ID mismatch error on bochs load meson: - remove firmware fbs" * tag 'drm-next-2019-03-06' of git://anongit.freedesktop.org/drm/drm: (1130 commits) drm/amd/display: Use vrr friendly pageflip throttling in DC. drm/imx: only send commit done event when all state has been applied drm/imx: allow building under COMPILE_TEST drm/imx: imx-tve: depend on COMMON_CLK drm/imx: ipuv3-plane: add zpos property drm/imx: ipuv3-plane: add function to query atomic update status gpu: ipu-v3: prg: add function to get channel configure status gpu: ipu-v3: pre: add double buffer status readback drm/amdgpu: Bump amdgpu version for context priority override. drm/amdgpu/powerplay: fix typo in BACO header guards drm/amdgpu/powerplay: fix return codes in BACO code drm/amdgpu: add missing license on baco files drm/bochs: Fix the ID mismatch error drm/nouveau/dmem: use dma addresses during migration copies drm/nouveau/dmem: use physical vram addresses during migration copies drm/nouveau/dmem: extend copy function to allow direct use of physical addresses drm/nouveau/svm: new ioctl to migrate process memory to GPU memory drm/nouveau/dmem: device memory helpers for SVM drm/nouveau/svm: initial support for shared virtual memory drm/nouveau: prepare for enabling svm with existing userspace interfaces ...
2019-03-08 08:23:15 -08:00 · 2019-03-08 08:23:15 -08:00 · 851ca779d1
parent b5dd0c658c 4b057e73f2
commit 851ca779d1
1141 changed files with 39790 additions and 38139 deletions
--- a/Documentation/devicetree/bindings/display/arm,komeda.txt
+++ b/Documentation/devicetree/bindings/display/arm,komeda.txt
@ -0,0 +1,73 @@
+Device Tree bindings for Arm Komeda display driver
+
+Required properties:
+- compatible: Should be "arm,mali-d71"
+- reg: Physical base address and length of the registers in the system
+- interrupts: the interrupt line number of the device in the system
+- clocks: A list of phandle + clock-specifier pairs, one for each entry
+    in 'clock-names'
+- clock-names: A list of clock names. It should contain:
+      - "mclk": for the main processor clock
+      - "pclk": for the APB interface clock
+- #address-cells: Must be 1
+- #size-cells: Must be 0
+
+Required properties for sub-node: pipeline@nq
+Each device contains one or two pipeline sub-nodes (at least one), each
+pipeline node should provide properties:
+- reg: Zero-indexed identifier for the pipeline
+- clocks: A list of phandle + clock-specifier pairs, one for each entry
+    in 'clock-names'
+- clock-names: should contain:
+      - "pxclk": pixel clock
+      - "aclk": AXI interface clock
+
+- port: each pipeline connect to an encoder input port. The connection is
+    modeled using the OF graph bindings specified in
+    Documentation/devicetree/bindings/graph.txt
+
+Optional properties:
+  - memory-region: phandle to a node describing memory (see
+    Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt)
+    to be used for the framebuffer; if not present, the framebuffer may
+    be located anywhere in memory.
+
+Example:
+/ {
+	...
+
+	dp0: display@c00000 {
+		#address-cells = <1>;
+		#size-cells = <0>;
+		compatible = "arm,mali-d71";
+		reg = <0xc00000 0x20000>;
+		interrupts = <0 168 4>;
+		clocks = <&dpu_mclk>, <&dpu_aclk>;
+		clock-names = "mclk", "pclk";
+
+		dp0_pipe0: pipeline@0 {
+			clocks = <&fpgaosc2>, <&dpu_aclk>;
+			clock-names = "pxclk", "aclk";
+			reg = <0>;
+
+			port {
+				dp0_pipe0_out: endpoint {
+					remote-endpoint = <&db_dvi0_in>;
+				};
+			};
+		};
+
+		dp0_pipe1: pipeline@1 {
+			clocks = <&fpgaosc2>, <&dpu_aclk>;
+			clock-names = "pxclk", "aclk";
+			reg = <1>;
+
+			port {
+				dp0_pipe1_out: endpoint {
+					remote-endpoint = <&db_dvi1_in>;
+				};
+			};
+		};
+	};
+	...
+};
--- a/Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt
+++ b/Documentation/devicetree/bindings/display/bridge/lvds-transmitter.txt
@ -22,13 +22,11 @@ among others.

 Required properties:

- compatible: Must be one or more of the following
-  - "ti,ds90c185" for the TI DS90C185 FPD-Link Serializer
-  - "lvds-encoder" for a generic LVDS encoder device
+- compatible: Must be "lvds-encoder"

-  When compatible with the generic version, nodes must list the
-  device-specific version corresponding to the device first
-  followed by the generic version.
+  Any encoder compatible with this generic binding, but with additional
+  properties not listed here, must list a device specific compatible first
+  followed by this generic compatible.

 Required nodes:

@ -44,8 +42,6 @@ Example

 lvds-encoder {
 	compatible = "lvds-encoder";
-	#address-cells = <1>;
-	#size-cells = <0>;

 	ports {
 		#address-cells = <1>;
--- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
@ -8,6 +8,8 @@ Required properties:

 - compatible : Shall contain one of
  - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
+  - "renesas,r8a7744-lvds" for R8A7744 (RZ/G1N) compatible LVDS encoders
+  - "renesas,r8a774c0-lvds" for R8A774C0 (RZ/G2E) compatible LVDS encoders
  - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
  - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
  - "renesas,r8a7793-lvds" for R8A7793 (R-Car M2-N) compatible LVDS encoders
@ -25,7 +27,7 @@ Required properties:
 - clock-names: Name of the clocks. This property is model-dependent.
  - The functional clock, which mandatory for all models, shall be listed
    first, and shall be named "fck".
-  - On R8A77990 and R8A77995, the LVDS encoder can use the EXTAL or
+  - On R8A77990, R8A77995 and R8A774C0, the LVDS encoder can use the EXTAL or
    DU_DOTCLKINx clocks. Those clocks are optional. When supplied they must be
    named "extal" and "dclkin.x" respectively, with "x" being the DU_DOTCLKIN
    numerical index.
--- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt
+++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvdm83d.txt
@ -10,7 +10,7 @@ Required properties:

 Optional properties:

- pwdn-gpios: Power down control GPIO
+- powerdown-gpios: Power down control GPIO (the /PWDN pin, active low).

 Required nodes:

--- a/Documentation/devicetree/bindings/display/bridge/ti,ds90c185.txt
+++ b/Documentation/devicetree/bindings/display/bridge/ti,ds90c185.txt
@ -0,0 +1,55 @@
+Texas Instruments FPD-Link (LVDS) Serializer
+--------------------------------------------
+
+The DS90C185 and DS90C187 are low-power serializers for portable
+battery-powered applications that reduces the size of the RGB
+interface between the host GPU and the display.
+
+Required properties:
+
+- compatible: Should be
+  "ti,ds90c185", "lvds-encoder"  for the TI DS90C185 FPD-Link Serializer
+  "ti,ds90c187", "lvds-encoder"  for the TI DS90C187 FPD-Link Serializer
+
+Optional properties:
+
+- powerdown-gpios: Power down control GPIO (the PDB pin, active-low)
+
+Required nodes:
+
+The devices have two video ports. Their connections are modeled using the OF
+graph bindings specified in Documentation/devicetree/bindings/graph.txt.
+
+- Video port 0 for parallel input
+- Video port 1 for LVDS output
+
+
+Example
+-------
+
+lvds-encoder {
+	compatible = "ti,ds90c185", "lvds-encoder";
+
+	powerdown-gpios = <&gpio 17 GPIO_ACTIVE_LOW>;
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			reg = <0>;
+
+			lvds_enc_in: endpoint {
+				remote-endpoint = <&lcdc_out_rgb>;
+			};
+		};
+
+		port@1 {
+			reg = <1>;
+
+			lvds_enc_out: endpoint {
+				remote-endpoint = <&lvds_panel_in>;
+			};
+		};
+	};
+};
--- a/Documentation/devicetree/bindings/display/msm/gmu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gmu.txt
@ -0,0 +1,59 @@
+Qualcomm adreno/snapdragon GMU (Graphics management unit)
+
+The GMU is a programmable power controller for the GPU. the CPU controls the
+GMU which in turn handles power controls for the GPU.
+
+Required properties:
+- compatible: "qcom,adreno-gmu-XYZ.W", "qcom,adreno-gmu"
+    for example: "qcom,adreno-gmu-630.2", "qcom,adreno-gmu"
+  Note that you need to list the less specific "qcom,adreno-gmu"
+  for generic matches and the more specific identifier to identify
+  the specific device.
+- reg: Physical base address and length of the GMU registers.
+- reg-names: Matching names for the register regions
+  * "gmu"
+  * "gmu_pdc"
+  * "gmu_pdc_seg"
+- interrupts: The interrupt signals from the GMU.
+- interrupt-names: Matching names for the interrupts
+  * "hfi"
+  * "gmu"
+- clocks: phandles to the device clocks
+- clock-names: Matching names for the clocks
+   * "gmu"
+   * "cxo"
+   * "axi"
+   * "mnoc"
+- power-domains: should be <&clock_gpucc GPU_CX_GDSC>
+- iommus: phandle to the adreno iommu
+- operating-points-v2: phandle to the OPP operating points
+
+Example:
+
+/ {
+	...
+
+	gmu: gmu@506a000 {
+		compatible="qcom,adreno-gmu-630.2", "qcom,adreno-gmu";
+
+		reg = <0x506a000 0x30000>,
+			<0xb280000 0x10000>,
+			<0xb480000 0x10000>;
+		reg-names = "gmu", "gmu_pdc", "gmu_pdc_seq";
+
+		interrupts = <GIC_SPI 304 IRQ_TYPE_LEVEL_HIGH>,
+		     <GIC_SPI 305 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "hfi", "gmu";
+
+		clocks = <&gpucc GPU_CC_CX_GMU_CLK>,
+			<&gpucc GPU_CC_CXO_CLK>,
+			<&gcc GCC_DDRSS_GPU_AXI_CLK>,
+			<&gcc GCC_GPU_MEMNOC_GFX_CLK>;
+		clock-names = "gmu", "cxo", "axi", "memnoc";
+
+		power-domains = <&gpucc GPU_CX_GDSC>;
+		iommus = <&adreno_smmu 5>;
+
+		operating-points-v2 = <&gmu_opp_table>;
+	};
+};
--- a/Documentation/devicetree/bindings/display/msm/gpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/gpu.txt
@ -10,14 +10,23 @@ Required properties:
  If "amd,imageon" is used, there should be no top level msm device.
 - reg: Physical base address and length of the controller's registers.
 - interrupts: The interrupt signal from the gpu.
- clocks: device clocks
+- clocks: device clocks (if applicable)
  See ../clocks/clock-bindings.txt for details.
- clock-names: the following clocks are required:
+- clock-names: the following clocks are required by a3xx, a4xx and a5xx
+  cores:
  * "core"
  * "iface"
  * "mem_iface"
+  For GMU attached devices the GPU clocks are not used and are not required. The
+  following devices should not list clocks:
+   - qcom,adreno-630.2
+- iommus: optional phandle to an adreno iommu instance
+- operating-points-v2: optional phandle to the OPP operating points
+- qcom,gmu: For GMU attached devices a phandle to the GMU device that will
+  control the power for the GPU. Applicable targets:
+    - qcom,adreno-630.2

-Example:
+Example 3xx/4xx/a5xx:

 / {
 	...
@ -37,3 +46,30 @@ Example:
 		    <&mmcc MMSS_IMEM_AHB_CLK>;
 	};
 };
+
+Example a6xx (with GMU):
+
+/ {
+	...
+
+	gpu@5000000 {
+		compatible = "qcom,adreno-630.2", "qcom,adreno";
+		#stream-id-cells = <16>;
+
+		reg = <0x5000000 0x40000>, <0x509e000 0x10>;
+		reg-names = "kgsl_3d0_reg_memory", "cx_mem";
+
+		/*
+		 * Look ma, no clocks! The GPU clocks and power are
+		 * controlled entirely by the GMU
+		 */
+
+		interrupts = <GIC_SPI 300 IRQ_TYPE_LEVEL_HIGH>;
+
+		iommus = <&adreno_smmu 0>;
+
+		operating-points-v2 = <&gpu_opp_table>;
+
+		qcom,gmu = <&gmu>;
+	};
+};
--- a/Documentation/devicetree/bindings/display/panel/auo,g101evn010.txt
+++ b/Documentation/devicetree/bindings/display/panel/auo,g101evn010.txt
--- a/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt
+++ b/Documentation/devicetree/bindings/display/panel/innolux,ee101ia-01d.txt
@ -0,0 +1,7 @@
+Innolux Corporation 10.1" EE101IA-01D WXGA (1280x800) LVDS panel
+
+Required properties:
+- compatible: should be "innolux,ee101ia-01d"
+
+This binding is compatible with the lvds-panel binding, which is specified
+in panel-lvds.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/lemaker,bl035-rgb-002.txt
+++ b/Documentation/devicetree/bindings/display/panel/lemaker,bl035-rgb-002.txt
@ -0,0 +1,12 @@
+LeMaker BL035-RGB-002 3.5" QVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "lemaker,bl035-rgb-002"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+- enable-gpios: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt
+++ b/Documentation/devicetree/bindings/display/panel/pda,91-00156-a0.txt
@ -0,0 +1,14 @@
+PDA 91-00156-A0 5.0" WVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "pda,91-00156-a0"
+- power-supply: this panel requires a single power supply. A phandle to a
+regulator needs to be specified here. Compatible with panel-common binding which
+is specified in the panel-common.txt in this directory.
+- backlight: this panel's backlight is controlled by an external backlight
+controller. A phandle to this controller needs to be specified here.
+Compatible with panel-common binding which is specified in the panel-common.txt
+in this directory.
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/sitronix,st7701.txt
+++ b/Documentation/devicetree/bindings/display/panel/sitronix,st7701.txt
@ -0,0 +1,30 @@
+Sitronix ST7701 based LCD panels
+
+ST7701 designed for small and medium sizes of TFT LCD display, is
+capable of supporting up to 480RGBX864 in resolution. It provides
+several system interfaces like MIPI/RGB/SPI.
+
+Techstar TS8550B is 480x854, 2-lane MIPI DSI LCD panel which has
+inbuilt ST7701 chip.
+
+Required properties:
+- compatible: must be "sitronix,st7701" and one of
+  * "techstar,ts8550b"
+- reset-gpios: a GPIO phandle for the reset pin
+
+Required properties for techstar,ts8550b:
+- reg: DSI virtual channel used by that screen
+- VCC-supply: analog regulator for MIPI circuit
+- IOVCC-supply: I/O system regulator
+
+Optional properties:
+- backlight: phandle for the backlight control.
+
+panel@0 {
+	compatible = "techstar,ts8550b", "sitronix,st7701";
+	reg = <0>;
+	VCC-supply = <&reg_dldo2>;
+	IOVCC-supply = <&reg_dldo2>;
+	reset-gpios = <&pio 3 24 GPIO_ACTIVE_HIGH>; /* LCD-RST: PD24 */
+	backlight = <&backlight>;
+};
--- a/Documentation/devicetree/bindings/display/renesas,du.txt
+++ b/Documentation/devicetree/bindings/display/renesas,du.txt
@ -7,6 +7,7 @@ Required Properties:
    - "renesas,du-r8a7744" for R8A7744 (RZ/G1N) compatible DU
    - "renesas,du-r8a7745" for R8A7745 (RZ/G1E) compatible DU
    - "renesas,du-r8a77470" for R8A77470 (RZ/G1C) compatible DU
+    - "renesas,du-r8a774c0" for R8A774C0 (RZ/G2E) compatible DU
    - "renesas,du-r8a7779" for R8A7779 (R-Car H1) compatible DU
    - "renesas,du-r8a7790" for R8A7790 (R-Car H2) compatible DU
    - "renesas,du-r8a7791" for R8A7791 (R-Car M2-W) compatible DU
@ -57,6 +58,7 @@ corresponding to each DU output.
 R8A7744 (RZ/G1N)       DPAD 0         LVDS 0         -              -
 R8A7745 (RZ/G1E)       DPAD 0         DPAD 1         -              -
 R8A77470 (RZ/G1C)      DPAD 0         DPAD 1         LVDS 0         -
+ R8A774C0 (RZ/G2E)      DPAD 0         LVDS 0         LVDS 1         -
 R8A7779 (R-Car H1)     DPAD 0         DPAD 1         -              -
 R8A7790 (R-Car H2)     DPAD 0         LVDS 0         LVDS 1         -
 R8A7791 (R-Car M2-W)   DPAD 0         LVDS 0         -              -
--- a/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt
+++ b/Documentation/devicetree/bindings/display/rockchip/rockchip-vop.txt
@ -10,6 +10,7 @@ Required properties:
 		"rockchip,rk3126-vop";
 		"rockchip,px30-vop-lit";
 		"rockchip,px30-vop-big";
+		"rockchip,rk3066-vop";
 		"rockchip,rk3188-vop";
 		"rockchip,rk3288-vop";
 		"rockchip,rk3368-vop";
--- a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
+++ b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
@ -156,6 +156,7 @@ Required properties:
   * allwinner,sun6i-a31-tcon
   * allwinner,sun6i-a31s-tcon
   * allwinner,sun7i-a20-tcon
+   * allwinner,sun8i-a23-tcon
   * allwinner,sun8i-a33-tcon
   * allwinner,sun8i-a83t-tcon-lcd
   * allwinner,sun8i-a83t-tcon-tv
@ -276,6 +277,7 @@ Required properties:
  - compatible: value must be one of:
    * allwinner,sun6i-a31-drc
    * allwinner,sun6i-a31s-drc
+    * allwinner,sun8i-a23-drc
    * allwinner,sun8i-a33-drc
    * allwinner,sun9i-a80-drc
  - reg: base address and size of the memory-mapped region.
@ -303,6 +305,7 @@ Required properties:
    * allwinner,sun5i-a13-display-backend
    * allwinner,sun6i-a31-display-backend
    * allwinner,sun7i-a20-display-backend
+    * allwinner,sun8i-a23-display-backend
    * allwinner,sun8i-a33-display-backend
    * allwinner,sun9i-a80-display-backend
  - reg: base address and size of the memory-mapped region.
@ -360,6 +363,7 @@ Required properties:
    * allwinner,sun5i-a13-display-frontend
    * allwinner,sun6i-a31-display-frontend
    * allwinner,sun7i-a20-display-frontend
+    * allwinner,sun8i-a23-display-frontend
    * allwinner,sun8i-a33-display-frontend
    * allwinner,sun9i-a80-display-frontend
  - reg: base address and size of the memory-mapped region.
@ -419,6 +423,7 @@ Required properties:
    * allwinner,sun6i-a31-display-engine
    * allwinner,sun6i-a31s-display-engine
    * allwinner,sun7i-a20-display-engine
+    * allwinner,sun8i-a23-display-engine
    * allwinner,sun8i-a33-display-engine
    * allwinner,sun8i-a83t-display-engine
    * allwinner,sun8i-h3-display-engine
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
@ -238,6 +238,9 @@ of the following host1x client modules:
  - nvidia,hpd-gpio: specifies a GPIO used for hotplug detection
  - nvidia,edid: supplies a binary EDID blob
  - nvidia,panel: phandle of a display panel
+  - nvidia,xbar-cfg: 5 cells containing the crossbar configuration. Each lane
+    of the SOR, identified by the cell's index, is mapped via the crossbar to
+    the pad specified by the cell's value.

  Optional properties when driving an eDP output:
  - nvidia,dpaux: phandle to a DispayPort AUX interface
--- a/Documentation/devicetree/bindings/gpu/samsung-rotator.txt
+++ b/Documentation/devicetree/bindings/gpu/samsung-rotator.txt
@ -2,9 +2,10 @@

 Required properties:
  - compatible : value should be one of the following:
-	(a) "samsung,exynos4210-rotator" for Rotator IP in Exynos4210
-	(b) "samsung,exynos4212-rotator" for Rotator IP in Exynos4212/4412
-	(c) "samsung,exynos5250-rotator" for Rotator IP in Exynos5250
+	* "samsung,s5pv210-rotator" for Rotator IP in S5PV210
+	* "samsung,exynos4210-rotator" for Rotator IP in Exynos4210
+	* "samsung,exynos4212-rotator" for Rotator IP in Exynos4212/4412
+	* "samsung,exynos5250-rotator" for Rotator IP in Exynos5250

  - reg : Physical base address of the IP registers and length of memory
 	  mapped region.
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@ -216,6 +216,7 @@ laird	Laird PLC
 lantiq	Lantiq Semiconductor
 lattice	Lattice Semiconductor
 lego	LEGO Systems A/S
+lemaker	Shenzhen LeMaker Technology Co., Ltd.
 lenovo	Lenovo Group Ltd.
 lg	LG Corporation
 libretech	Shenzhen Libre Technology Co., Ltd
@ -303,6 +304,7 @@ ovti	OmniVision Technologies
 oxsemi	Oxford Semiconductor, Ltd.
 panasonic	Panasonic Corporation
 parade	Parade Technologies Inc.
+pda	Precision Design Associates, Inc.
 pericom	Pericom Technology Inc.
 pervasive	Pervasive Displays, Inc.
 phicomm PHICOMM Co., Ltd.
--- a/Documentation/gpu/afbc.rst
+++ b/Documentation/gpu/afbc.rst
@ -0,0 +1,235 @@
+.. SPDX-License-Identifier: GPL-2.0+
+
+===================================
+ Arm Framebuffer Compression (AFBC)
+===================================
+
+AFBC is a proprietary lossless image compression protocol and format.
+It provides fine-grained random access and minimizes the amount of
+data transferred between IP blocks.
+
+AFBC can be enabled on drivers which support it via use of the AFBC
+format modifiers defined in drm_fourcc.h. See DRM_FORMAT_MOD_ARM_AFBC(*).
+
+All users of the AFBC modifiers must follow the usage guidelines laid
+out in this document, to ensure compatibility across different AFBC
+producers and consumers.
+
+Components and Ordering
+=======================
+
+AFBC streams can contain several components - where a component
+corresponds to a color channel (i.e. R, G, B, X, A, Y, Cb, Cr).
+The assignment of input/output color channels must be consistent
+between the encoder and the decoder for correct operation, otherwise
+the consumer will interpret the decoded data incorrectly.
+
+Furthermore, when the lossless colorspace transform is used
+(AFBC_FORMAT_MOD_YTR, which should be enabled for RGB buffers for
+maximum compression efficiency), the component order must be:
+
+ * Component 0: R
+ * Component 1: G
+ * Component 2: B
+
+The component ordering is communicated via the fourcc code in the
+fourcc:modifier pair. In general, component '0' is considered to
+reside in the least-significant bits of the corresponding linear
+format. For example, COMP(bits):
+
+ * DRM_FORMAT_ABGR8888
+
+   * Component 0: R(8)
+   * Component 1: G(8)
+   * Component 2: B(8)
+   * Component 3: A(8)
+
+ * DRM_FORMAT_BGR888
+
+   * Component 0: R(8)
+   * Component 1: G(8)
+   * Component 2: B(8)
+
+ * DRM_FORMAT_YUYV
+
+   * Component 0: Y(8)
+   * Component 1: Cb(8, 2x1 subsampled)
+   * Component 2: Cr(8, 2x1 subsampled)
+
+In AFBC, 'X' components are not treated any differently from any other
+component. Therefore, an AFBC buffer with fourcc DRM_FORMAT_XBGR8888
+encodes with 4 components, like so:
+
+ * DRM_FORMAT_XBGR8888
+
+   * Component 0: R(8)
+   * Component 1: G(8)
+   * Component 2: B(8)
+   * Component 3: X(8)
+
+Please note, however, that the inclusion of a "wasted" 'X' channel is
+bad for compression efficiency, and so it's recommended to avoid
+formats containing 'X' bits. If a fourth component is
+required/expected by the encoder/decoder, then it is recommended to
+instead use an equivalent format with alpha, setting all alpha bits to
+'1'. If there is no requirement for a fourth component, then a format
+which doesn't include alpha can be used, e.g. DRM_FORMAT_BGR888.
+
+Number of Planes
+================
+
+Formats which are typically multi-planar in linear layouts (e.g. YUV
+420), can be encoded into one, or multiple, AFBC planes. As with
+component order, the encoder and decoder must agree about the number
+of planes in order to correctly decode the buffer. The fourcc code is
+used to determine the number of encoded planes in an AFBC buffer,
+matching the number of planes for the linear (unmodified) format.
+Within each plane, the component ordering also follows the fourcc
+code:
+
+For example:
+
+ * DRM_FORMAT_YUYV: nplanes = 1
+
+   * Plane 0:
+
+     * Component 0: Y(8)
+     * Component 1: Cb(8, 2x1 subsampled)
+     * Component 2: Cr(8, 2x1 subsampled)
+
+ * DRM_FORMAT_NV12: nplanes = 2
+
+   * Plane 0:
+
+     * Component 0: Y(8)
+
+   * Plane 1:
+
+     * Component 0: Cb(8, 2x1 subsampled)
+     * Component 1: Cr(8, 2x1 subsampled)
+
+Cross-device interoperability
+=============================
+
+For maximum compatibility across devices, the table below defines
+canonical formats for use between AFBC-enabled devices. Formats which
+are listed here must be used exactly as specified when using the AFBC
+modifiers. Formats which are not listed should be avoided.
+
+.. flat-table:: AFBC formats
+
+   * - Fourcc code
+     - Description
+     - Planes/Components
+
+   * - DRM_FORMAT_ABGR2101010
+     - 10-bit per component RGB, with 2-bit alpha
+     - Plane 0: 4 components
+              * Component 0: R(10)
+              * Component 1: G(10)
+              * Component 2: B(10)
+              * Component 3: A(2)
+
+   * - DRM_FORMAT_ABGR8888
+     - 8-bit per component RGB, with 8-bit alpha
+     - Plane 0: 4 components
+              * Component 0: R(8)
+              * Component 1: G(8)
+              * Component 2: B(8)
+              * Component 3: A(8)
+
+   * - DRM_FORMAT_BGR888
+     - 8-bit per component RGB
+     - Plane 0: 3 components
+              * Component 0: R(8)
+              * Component 1: G(8)
+              * Component 2: B(8)
+
+   * - DRM_FORMAT_BGR565
+     - 5/6-bit per component RGB
+     - Plane 0: 3 components
+              * Component 0: R(5)
+              * Component 1: G(6)
+              * Component 2: B(5)
+
+   * - DRM_FORMAT_ABGR1555
+     - 5-bit per component RGB, with 1-bit alpha
+     - Plane 0: 4 components
+              * Component 0: R(5)
+              * Component 1: G(5)
+              * Component 2: B(5)
+              * Component 3: A(1)
+
+   * - DRM_FORMAT_VUY888
+     - 8-bit per component YCbCr 444, single plane
+     - Plane 0: 3 components
+              * Component 0: Y(8)
+              * Component 1: Cb(8)
+              * Component 2: Cr(8)
+
+   * - DRM_FORMAT_VUY101010
+     - 10-bit per component YCbCr 444, single plane
+     - Plane 0: 3 components
+              * Component 0: Y(10)
+              * Component 1: Cb(10)
+              * Component 2: Cr(10)
+
+   * - DRM_FORMAT_YUYV
+     - 8-bit per component YCbCr 422, single plane
+     - Plane 0: 3 components
+              * Component 0: Y(8)
+              * Component 1: Cb(8, 2x1 subsampled)
+              * Component 2: Cr(8, 2x1 subsampled)
+
+   * - DRM_FORMAT_NV16
+     - 8-bit per component YCbCr 422, two plane
+     - Plane 0: 1 component
+              * Component 0: Y(8)
+       Plane 1: 2 components
+              * Component 0: Cb(8, 2x1 subsampled)
+              * Component 1: Cr(8, 2x1 subsampled)
+
+   * - DRM_FORMAT_Y210
+     - 10-bit per component YCbCr 422, single plane
+     - Plane 0: 3 components
+              * Component 0: Y(10)
+              * Component 1: Cb(10, 2x1 subsampled)
+              * Component 2: Cr(10, 2x1 subsampled)
+
+   * - DRM_FORMAT_P210
+     - 10-bit per component YCbCr 422, two plane
+     - Plane 0: 1 component
+              * Component 0: Y(10)
+       Plane 1: 2 components
+              * Component 0: Cb(10, 2x1 subsampled)
+              * Component 1: Cr(10, 2x1 subsampled)
+
+   * - DRM_FORMAT_YUV420_8BIT
+     - 8-bit per component YCbCr 420, single plane
+     - Plane 0: 3 components
+              * Component 0: Y(8)
+              * Component 1: Cb(8, 2x2 subsampled)
+              * Component 2: Cr(8, 2x2 subsampled)
+
+   * - DRM_FORMAT_YUV420_10BIT
+     - 10-bit per component YCbCr 420, single plane
+     - Plane 0: 3 components
+              * Component 0: Y(10)
+              * Component 1: Cb(10, 2x2 subsampled)
+              * Component 2: Cr(10, 2x2 subsampled)
+
+   * - DRM_FORMAT_NV12
+     - 8-bit per component YCbCr 420, two plane
+     - Plane 0: 1 component
+              * Component 0: Y(8)
+       Plane 1: 2 components
+              * Component 0: Cb(8, 2x2 subsampled)
+              * Component 1: Cr(8, 2x2 subsampled)
+
+   * - DRM_FORMAT_P010
+     - 10-bit per component YCbCr 420, two plane
+     - Plane 0: 1 component
+              * Component 0: Y(10)
+       Plane 1: 2 components
+              * Component 0: Cb(10, 2x2 subsampled)
+              * Component 1: Cr(10, 2x2 subsampled)
--- a/Documentation/gpu/dp-mst/topology-figure-1.dot
+++ b/Documentation/gpu/dp-mst/topology-figure-1.dot
@ -0,0 +1,52 @@
+digraph T {
+    /* Make sure our payloads are always drawn below the driver node */
+    subgraph cluster_driver {
+        fillcolor = grey;
+        style = filled;
+        driver -> {payload1, payload2} [dir=none];
+    }
+
+    /* Driver malloc references */
+    edge [style=dashed];
+    driver -> port1;
+    driver -> port2;
+    driver -> port3:e;
+    driver -> port4;
+
+    payload1:s -> port1:e;
+    payload2:s -> port3:e;
+    edge [style=""];
+
+    subgraph cluster_topology {
+        label="Topology Manager";
+        labelloc=bottom;
+
+        /* Topology references */
+        mstb1 -> {port1, port2};
+        port1 -> mstb2;
+        port2 -> mstb3 -> {port3, port4};
+        port3 -> mstb4;
+
+        /* Malloc references */
+        edge [style=dashed;dir=back];
+        mstb1 -> {port1, port2};
+        port1 -> mstb2;
+        port2 -> mstb3 -> {port3, port4};
+        port3 -> mstb4;
+    }
+
+    driver [label="DRM driver";style=filled;shape=box;fillcolor=lightblue];
+
+    payload1 [label="Payload #1";style=filled;shape=box;fillcolor=lightblue];
+    payload2 [label="Payload #2";style=filled;shape=box;fillcolor=lightblue];
+
+    mstb1 [label="MSTB #1";style=filled;fillcolor=palegreen;shape=oval];
+    mstb2 [label="MSTB #2";style=filled;fillcolor=palegreen;shape=oval];
+    mstb3 [label="MSTB #3";style=filled;fillcolor=palegreen;shape=oval];
+    mstb4 [label="MSTB #4";style=filled;fillcolor=palegreen;shape=oval];
+
+    port1 [label="Port #1";shape=oval];
+    port2 [label="Port #2";shape=oval];
+    port3 [label="Port #3";shape=oval];
+    port4 [label="Port #4";shape=oval];
+}
--- a/Documentation/gpu/dp-mst/topology-figure-2.dot
+++ b/Documentation/gpu/dp-mst/topology-figure-2.dot
@ -0,0 +1,56 @@
+digraph T {
+    /* Make sure our payloads are always drawn below the driver node */
+    subgraph cluster_driver {
+        fillcolor = grey;
+        style = filled;
+        driver -> {payload1, payload2} [dir=none];
+    }
+
+    /* Driver malloc references */
+    edge [style=dashed];
+    driver -> port1;
+    driver -> port2;
+    driver -> port3:e;
+    driver -> port4 [color=red];
+
+    payload1:s -> port1:e;
+    payload2:s -> port3:e;
+    edge [style=""];
+
+    subgraph cluster_topology {
+        label="Topology Manager";
+        labelloc=bottom;
+
+        /* Topology references */
+        mstb1 -> {port1, port2};
+        port1 -> mstb2;
+        edge [color=red];
+        port2 -> mstb3 -> {port3, port4};
+        port3 -> mstb4;
+        edge [color=""];
+
+        /* Malloc references */
+        edge [style=dashed;dir=back];
+        mstb1 -> {port1, port2};
+        port1 -> mstb2;
+        port2 -> mstb3 -> port3;
+        edge [color=red];
+        mstb3 -> port4;
+        port3 -> mstb4;
+    }
+
+    mstb1 [label="MSTB #1";style=filled;fillcolor=palegreen];
+    mstb2 [label="MSTB #2";style=filled;fillcolor=palegreen];
+    mstb3 [label="MSTB #3";style=filled;fillcolor=palegreen];
+    mstb4 [label="MSTB #4";style=filled;fillcolor=grey];
+
+    port1 [label="Port #1"];
+    port2 [label="Port #2"];
+    port3 [label="Port #3"];
+    port4 [label="Port #4";style=filled;fillcolor=grey];
+
+    driver [label="DRM driver";style=filled;shape=box;fillcolor=lightblue];
+
+    payload1 [label="Payload #1";style=filled;shape=box;fillcolor=lightblue];
+    payload2 [label="Payload #2";style=filled;shape=box;fillcolor=lightblue];
+}
--- a/Documentation/gpu/dp-mst/topology-figure-3.dot
+++ b/Documentation/gpu/dp-mst/topology-figure-3.dot
@ -0,0 +1,59 @@
+digraph T {
+    /* Make sure our payloads are always drawn below the driver node */
+    subgraph cluster_driver {
+        fillcolor = grey;
+        style = filled;
+        edge [dir=none];
+        driver -> payload1;
+        driver -> payload2 [penwidth=3];
+        edge [dir=""];
+    }
+
+    /* Driver malloc references */
+    edge [style=dashed];
+    driver -> port1;
+    driver -> port2;
+    driver -> port3:e;
+    driver -> port4 [color=grey];
+    payload1:s -> port1:e;
+    payload2:s -> port3:e [penwidth=3];
+    edge [style=""];
+
+    subgraph cluster_topology {
+        label="Topology Manager";
+        labelloc=bottom;
+
+        /* Topology references */
+        mstb1 -> {port1, port2};
+        port1 -> mstb2;
+        edge [color=grey];
+        port2 -> mstb3 -> {port3, port4};
+        port3 -> mstb4;
+        edge [color=""];
+
+        /* Malloc references */
+        edge [style=dashed;dir=back];
+        mstb1 -> {port1, port2};
+        port1 -> mstb2;
+        port2 -> mstb3 [penwidth=3];
+        mstb3 -> port3 [penwidth=3];
+        edge [color=grey];
+        mstb3 -> port4;
+        port3 -> mstb4;
+    }
+
+    mstb1 [label="MSTB #1";style=filled;fillcolor=palegreen];
+    mstb2 [label="MSTB #2";style=filled;fillcolor=palegreen];
+    mstb3 [label="MSTB #3";style=filled;fillcolor=palegreen;penwidth=3];
+    mstb4 [label="MSTB #4";style=filled;fillcolor=grey];
+
+    port1 [label="Port #1"];
+    port2 [label="Port #2";penwidth=5];
+    port3 [label="Port #3";penwidth=3];
+    port4 [label="Port #4";style=filled;fillcolor=grey];
+
+    driver [label="DRM driver";style=filled;shape=box;fillcolor=lightblue];
+
+    payload1 [label="Payload #1";style=filled;shape=box;fillcolor=lightblue];
+    payload2 [label="Payload #2";style=filled;shape=box;fillcolor=lightblue;penwidth=3];
+}
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@ -17,6 +17,8 @@ GPU Driver Documentation
   vkms
   bridge/dw-hdmi
   xen-front
+   afbc
+   komeda-kms

 .. only::  subproject and html

--- a/Documentation/gpu/drm-internals.rst
+++ b/Documentation/gpu/drm-internals.rst
@ -39,68 +39,6 @@ sections.
 Driver Information
 ------------------

-Driver Features
-~~~~~~~~~~~~~~~
-
-Drivers inform the DRM core about their requirements and supported
-features by setting appropriate flags in the driver_features field.
-Since those flags influence the DRM core behaviour since registration
-time, most of them must be set to registering the :c:type:`struct
-drm_driver <drm_driver>` instance.
-
-u32 driver_features;
-
-DRIVER_USE_AGP
-    Driver uses AGP interface, the DRM core will manage AGP resources.
-
-DRIVER_LEGACY
-    Denote a legacy driver using shadow attach. Don't use.
-
-DRIVER_KMS_LEGACY_CONTEXT
-    Used only by nouveau for backwards compatibility with existing userspace.
-    Don't use.
-
-DRIVER_PCI_DMA
-    Driver is capable of PCI DMA, mapping of PCI DMA buffers to
-    userspace will be enabled. Deprecated.
-
-DRIVER_SG
-    Driver can perform scatter/gather DMA, allocation and mapping of
-    scatter/gather buffers will be enabled. Deprecated.
-
-DRIVER_HAVE_DMA
-    Driver supports DMA, the userspace DMA API will be supported.
-    Deprecated.
-
-DRIVER_HAVE_IRQ; DRIVER_IRQ_SHARED
-    DRIVER_HAVE_IRQ indicates whether the driver has an IRQ handler
-    managed by the DRM Core. The core will support simple IRQ handler
-    installation when the flag is set. The installation process is
-    described in ?.
-
-    DRIVER_IRQ_SHARED indicates whether the device & handler support
-    shared IRQs (note that this is required of PCI drivers).
-
-DRIVER_GEM
-    Driver use the GEM memory manager.
-
-DRIVER_MODESET
-    Driver supports mode setting interfaces (KMS).
-
-DRIVER_PRIME
-    Driver implements DRM PRIME buffer sharing.
-
-DRIVER_RENDER
-    Driver supports dedicated render nodes.
-
-DRIVER_ATOMIC
-    Driver supports atomic properties. In this case the driver must
-    implement appropriate obj->atomic_get_property() vfuncs for any
-    modeset objects with driver specific properties.
-
-DRIVER_SYNCOBJ
-    Driver support drm sync objects.
-
 Major, Minor and Patchlevel
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~

@ -143,6 +81,9 @@ Device Instance and Driver Handling
 .. kernel-doc:: drivers/gpu/drm/drm_drv.c
   :doc: driver instance overview

+.. kernel-doc:: include/drm/drm_device.h
+   :internal:
+
 .. kernel-doc:: include/drm/drm_drv.h
   :internal:

@ -230,6 +171,15 @@ Printer
 .. kernel-doc:: drivers/gpu/drm/drm_print.c
   :export:

+Utilities
+---------
+
+.. kernel-doc:: include/drm/drm_util.h
+   :doc: drm utils
+
+.. kernel-doc:: include/drm/drm_util.h
+   :internal:
+

 Legacy Support Code
 ===================
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@ -116,8 +116,6 @@ Framebuffer CMA Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_fb_cma_helper.c
   :export:

-.. _drm_bridges:
-
 Framebuffer GEM Helper Reference
 ================================

@ -127,6 +125,8 @@ Framebuffer GEM Helper Reference
 .. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
   :export:

+.. _drm_bridges:
+
 Bridges
 =======

@ -208,18 +208,40 @@ Display Port Dual Mode Adaptor Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_dp_dual_mode_helper.c
   :export:

-Display Port MST Helper Functions Reference
-===========================================
+Display Port MST Helpers
+========================
+
+Overview
+--------

 .. kernel-doc:: drivers/gpu/drm/drm_dp_mst_topology.c
   :doc: dp mst helper

+.. kernel-doc:: drivers/gpu/drm/drm_dp_mst_topology.c
+   :doc: Branch device and port refcounting
+
+Functions Reference
+-------------------
+
 .. kernel-doc:: include/drm/drm_dp_mst_helper.h
   :internal:

 .. kernel-doc:: drivers/gpu/drm/drm_dp_mst_topology.c
   :export:

+Topology Lifetime Internals
+---------------------------
+
+These functions aren't exported to drivers, but are documented here to help make
+the MST topology helpers easier to understand
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_mst_topology.c
+   :functions: drm_dp_mst_topology_try_get_mstb drm_dp_mst_topology_get_mstb
+               drm_dp_mst_topology_put_mstb
+               drm_dp_mst_topology_try_get_port drm_dp_mst_topology_get_port
+               drm_dp_mst_topology_put_port
+               drm_dp_mst_get_mstb_malloc drm_dp_mst_put_mstb_malloc
+
 MIPI DSI Helper Functions Reference
 ===================================

@ -274,18 +296,6 @@ SCDC Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_scdc_helper.c
   :export:

-Rectangle Utilities Reference
-=============================
-
-.. kernel-doc:: include/drm/drm_rect.h
-   :doc: rect utils
-
-.. kernel-doc:: include/drm/drm_rect.h
-   :internal:
-
-.. kernel-doc:: drivers/gpu/drm/drm_rect.c
-   :export:
-
 HDMI Infoframes Helper Reference
 ================================

@ -300,6 +310,18 @@ libraries and hence is also included here.
 .. kernel-doc:: drivers/video/hdmi.c
   :export:

+Rectangle Utilities Reference
+=============================
+
+.. kernel-doc:: include/drm/drm_rect.h
+   :doc: rect utils
+
+.. kernel-doc:: include/drm/drm_rect.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_rect.c
+   :export:
+
 Flip-work Helper Reference
 ==========================

--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@ -410,102 +410,6 @@ Encoder Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_encoder.c
   :export:

-KMS Initialization and Cleanup
-==============================
-
-A KMS device is abstracted and exposed as a set of planes, CRTCs,
-encoders and connectors. KMS drivers must thus create and initialize all
-those objects at load time after initializing mode setting.
-
-CRTCs (:c:type:`struct drm_crtc <drm_crtc>`)
--------------------------------------------
-
-A CRTC is an abstraction representing a part of the chip that contains a
-pointer to a scanout buffer. Therefore, the number of CRTCs available
-determines how many independent scanout buffers can be active at any
-given time. The CRTC structure contains several fields to support this:
-a pointer to some video memory (abstracted as a frame buffer object), a
-display mode, and an (x, y) offset into the video memory to support
-panning or configurations where one piece of video memory spans multiple
-CRTCs.
-
-CRTC Initialization
-~~~~~~~~~~~~~~~~~~~
-
-A KMS device must create and register at least one struct
-:c:type:`struct drm_crtc <drm_crtc>` instance. The instance is
-allocated and zeroed by the driver, possibly as part of a larger
-structure, and registered with a call to :c:func:`drm_crtc_init()`
-with a pointer to CRTC functions.
-
-
-Cleanup
-------
-
-The DRM core manages its objects' lifetime. When an object is not needed
-anymore the core calls its destroy function, which must clean up and
-free every resource allocated for the object. Every
-:c:func:`drm_\*_init()` call must be matched with a corresponding
-:c:func:`drm_\*_cleanup()` call to cleanup CRTCs
-(:c:func:`drm_crtc_cleanup()`), planes
-(:c:func:`drm_plane_cleanup()`), encoders
-(:c:func:`drm_encoder_cleanup()`) and connectors
-(:c:func:`drm_connector_cleanup()`). Furthermore, connectors that
-have been added to sysfs must be removed by a call to
-:c:func:`drm_connector_unregister()` before calling
-:c:func:`drm_connector_cleanup()`.
-
-Connectors state change detection must be cleanup up with a call to
-:c:func:`drm_kms_helper_poll_fini()`.
-
-Output discovery and initialization example
-------------------------------------------
-
-.. code-block:: c
-
-    void intel_crt_init(struct drm_device *dev)
-    {
-        struct drm_connector *connector;
-        struct intel_output *intel_output;
-
-        intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL);
-        if (!intel_output)
-            return;
-
-        connector = &intel_output->base;
-        drm_connector_init(dev, &intel_output->base,
-                   &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
-
-        drm_encoder_init(dev, &intel_output->enc, &intel_crt_enc_funcs,
-                 DRM_MODE_ENCODER_DAC);
-
-        drm_connector_attach_encoder(&intel_output->base,
-                          &intel_output->enc);
-
-        /* Set up the DDC bus. */
-        intel_output->ddc_bus = intel_i2c_create(dev, GPIOA, "CRTDDC_A");
-        if (!intel_output->ddc_bus) {
-            dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
-                   "failed.\n");
-            return;
-        }
-
-        intel_output->type = INTEL_OUTPUT_ANALOG;
-        connector->interlace_allowed = 0;
-        connector->doublescan_allowed = 0;
-
-        drm_encoder_helper_add(&intel_output->enc, &intel_crt_helper_funcs);
-        drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs);
-
-        drm_connector_register(connector);
-    }
-
-In the example above (taken from the i915 driver), a CRTC, connector and
-encoder combination is created. A device-specific i2c bus is also
-created for fetching EDID data and performing monitor detection. Once
-the process is complete, the new connector is registered with sysfs to
-make its properties available to applications.
-
 KMS Locking
 ===========

--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@ -238,6 +238,14 @@ DRM specific patterns. Note that ENOTTY has the slightly unintuitive meaning of
 Testing and validation
 ======================

+Testing Requirements for userspace API
+--------------------------------------
+
+New cross-driver userspace interface extensions, like new IOCTL, new KMS
+properties, new files in sysfs or anything else that constitutes an API change
+should have driver-agnostic testcases in IGT for that feature, if such a test
+can be reasonably made using IGT for the target hardware.
+
 Validating changes with IGT
 ---------------------------

--- a/Documentation/gpu/komeda-kms.rst
+++ b/Documentation/gpu/komeda-kms.rst
@ -0,0 +1,488 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+ drm/komeda Arm display driver
+==============================
+
+The drm/komeda driver supports the Arm display processor D71 and later products,
+this document gives a brief overview of driver design: how it works and why
+design it like that.
+
+Overview of D71 like display IPs
+================================
+
+From D71, Arm display IP begins to adopt a flexible and modularized
+architecture. A display pipeline is made up of multiple individual and
+functional pipeline stages called components, and every component has some
+specific capabilities that can give the flowed pipeline pixel data a
+particular processing.
+
+Typical D71 components:
+
+Layer
+-----
+Layer is the first pipeline stage, which prepares the pixel data for the next
+stage. It fetches the pixel from memory, decodes it if it's AFBC, rotates the
+source image, unpacks or converts YUV pixels to the device internal RGB pixels,
+then adjusts the color_space of pixels if needed.
+
+Scaler
+------
+As its name suggests, scaler takes responsibility for scaling, and D71 also
+supports image enhancements by scaler.
+The usage of scaler is very flexible and can be connected to layer output
+for layer scaling, or connected to compositor and scale the whole display
+frame and then feed the output data into wb_layer which will then write it
+into memory.
+
+Compositor (compiz)
+-------------------
+Compositor blends multiple layers or pixel data flows into one single display
+frame. its output frame can be fed into post image processor for showing it on
+the monitor or fed into wb_layer and written to memory at the same time.
+user can also insert a scaler between compositor and wb_layer to down scale
+the display frame first and and then write to memory.
+
+Writeback Layer (wb_layer)
+--------------------------
+Writeback layer does the opposite things of Layer, which connects to compiz
+and writes the composition result to memory.
+
+Post image processor (improc)
+-----------------------------
+Post image processor adjusts frame data like gamma and color space to fit the
+requirements of the monitor.
+
+Timing controller (timing_ctrlr)
+--------------------------------
+Final stage of display pipeline, Timing controller is not for the pixel
+handling, but only for controlling the display timing.
+
+Merger
+------
+D71 scaler mostly only has the half horizontal input/output capabilities
+compared with Layer, like if Layer supports 4K input size, the scaler only can
+support 2K input/output in the same time. To achieve the ful frame scaling, D71
+introduces Layer Split, which splits the whole image to two half parts and feeds
+them to two Layers A and B, and does the scaling independently. After scaling
+the result need to be fed to merger to merge two part images together, and then
+output merged result to compiz.
+
+Splitter
+--------
+Similar to Layer Split, but Splitter is used for writeback, which splits the
+compiz result to two parts and then feed them to two scalers.
+
+Possible D71 Pipeline usage
+===========================
+
+Benefitting from the modularized architecture, D71 pipelines can be easily
+adjusted to fit different usages. And D71 has two pipelines, which support two
+types of working mode:
+
+-   Dual display mode
+    Two pipelines work independently and separately to drive two display outputs.
+
+-   Single display mode
+    Two pipelines work together to drive only one display output.
+
+    On this mode, pipeline_B doesn't work indenpendently, but outputs its
+    composition result into pipeline_A, and its pixel timing also derived from
+    pipeline_A.timing_ctrlr. The pipeline_B works just like a "slave" of
+    pipeline_A(master)
+
+Single pipeline data flow
+-------------------------
+
+.. kernel-render:: DOT
+   :alt: Single pipeline digraph
+   :caption: Single pipeline data flow
+
+   digraph single_ppl {
+      rankdir=LR;
+
+      subgraph {
+         "Memory";
+         "Monitor";
+      }
+
+      subgraph cluster_pipeline {
+          style=dashed
+          node [shape=box]
+          {
+              node [bgcolor=grey style=dashed]
+              "Scaler-0";
+              "Scaler-1";
+              "Scaler-0/1"
+          }
+
+         node [bgcolor=grey style=filled]
+         "Layer-0" -> "Scaler-0"
+         "Layer-1" -> "Scaler-0"
+         "Layer-2" -> "Scaler-1"
+         "Layer-3" -> "Scaler-1"
+
+         "Layer-0" -> "Compiz"
+         "Layer-1" -> "Compiz"
+         "Layer-2" -> "Compiz"
+         "Layer-3" -> "Compiz"
+         "Scaler-0" -> "Compiz"
+         "Scaler-1" -> "Compiz"
+
+         "Compiz" -> "Scaler-0/1" -> "Wb_layer"
+         "Compiz" -> "Improc" -> "Timing Controller"
+      }
+
+      "Wb_layer" -> "Memory"
+      "Timing Controller" -> "Monitor"
+   }
+
+Dual pipeline with Slave enabled
+--------------------------------
+
+.. kernel-render:: DOT
+   :alt: Slave pipeline digraph
+   :caption: Slave pipeline enabled data flow
+
+   digraph slave_ppl {
+      rankdir=LR;
+
+      subgraph {
+         "Memory";
+         "Monitor";
+      }
+      node [shape=box]
+      subgraph cluster_pipeline_slave {
+          style=dashed
+          label="Slave Pipeline_B"
+          node [shape=box]
+          {
+              node [bgcolor=grey style=dashed]
+              "Slave.Scaler-0";
+              "Slave.Scaler-1";
+          }
+
+         node [bgcolor=grey style=filled]
+         "Slave.Layer-0" -> "Slave.Scaler-0"
+         "Slave.Layer-1" -> "Slave.Scaler-0"
+         "Slave.Layer-2" -> "Slave.Scaler-1"
+         "Slave.Layer-3" -> "Slave.Scaler-1"
+
+         "Slave.Layer-0" -> "Slave.Compiz"
+         "Slave.Layer-1" -> "Slave.Compiz"
+         "Slave.Layer-2" -> "Slave.Compiz"
+         "Slave.Layer-3" -> "Slave.Compiz"
+         "Slave.Scaler-0" -> "Slave.Compiz"
+         "Slave.Scaler-1" -> "Slave.Compiz"
+      }
+
+      subgraph cluster_pipeline_master {
+          style=dashed
+          label="Master Pipeline_A"
+          node [shape=box]
+          {
+              node [bgcolor=grey style=dashed]
+              "Scaler-0";
+              "Scaler-1";
+              "Scaler-0/1"
+          }
+
+         node [bgcolor=grey style=filled]
+         "Layer-0" -> "Scaler-0"
+         "Layer-1" -> "Scaler-0"
+         "Layer-2" -> "Scaler-1"
+         "Layer-3" -> "Scaler-1"
+
+         "Slave.Compiz" -> "Compiz"
+         "Layer-0" -> "Compiz"
+         "Layer-1" -> "Compiz"
+         "Layer-2" -> "Compiz"
+         "Layer-3" -> "Compiz"
+         "Scaler-0" -> "Compiz"
+         "Scaler-1" -> "Compiz"
+
+         "Compiz" -> "Scaler-0/1" -> "Wb_layer"
+         "Compiz" -> "Improc" -> "Timing Controller"
+      }
+
+      "Wb_layer" -> "Memory"
+      "Timing Controller" -> "Monitor"
+   }
+
+Sub-pipelines for input and output
+----------------------------------
+
+A complete display pipeline can be easily divided into three sub-pipelines
+according to the in/out usage.
+
+Layer(input) pipeline
+~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-render:: DOT
+   :alt: Layer data digraph
+   :caption: Layer (input) data flow
+
+   digraph layer_data_flow {
+      rankdir=LR;
+      node [shape=box]
+
+      {
+         node [bgcolor=grey style=dashed]
+           "Scaler-n";
+      }
+
+      "Layer-n" -> "Scaler-n" -> "Compiz"
+   }
+
+.. kernel-render:: DOT
+   :alt: Layer Split digraph
+   :caption: Layer Split pipeline
+
+   digraph layer_data_flow {
+      rankdir=LR;
+      node [shape=box]
+
+      "Layer-0/1" -> "Scaler-0" -> "Merger"
+      "Layer-2/3" -> "Scaler-1" -> "Merger"
+      "Merger" -> "Compiz"
+   }
+
+Writeback(output) pipeline
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+.. kernel-render:: DOT
+   :alt: writeback digraph
+   :caption: Writeback(output) data flow
+
+   digraph writeback_data_flow {
+      rankdir=LR;
+      node [shape=box]
+
+      {
+         node [bgcolor=grey style=dashed]
+           "Scaler-n";
+      }
+
+      "Compiz" -> "Scaler-n" -> "Wb_layer"
+   }
+
+.. kernel-render:: DOT
+   :alt: split writeback digraph
+   :caption: Writeback(output) Split data flow
+
+   digraph writeback_data_flow {
+      rankdir=LR;
+      node [shape=box]
+
+      "Compiz" -> "Splitter"
+      "Splitter" -> "Scaler-0" -> "Merger"
+      "Splitter" -> "Scaler-1" -> "Merger"
+      "Merger" -> "Wb_layer"
+   }
+
+Display output pipeline
+~~~~~~~~~~~~~~~~~~~~~~~
+.. kernel-render:: DOT
+   :alt: display digraph
+   :caption: display output data flow
+
+   digraph single_ppl {
+      rankdir=LR;
+      node [shape=box]
+
+      "Compiz" -> "Improc" -> "Timing Controller"
+   }
+
+In the following section we'll see these three sub-pipelines will be handled
+by KMS-plane/wb_conn/crtc respectively.
+
+Komeda Resource abstraction
+===========================
+
+struct komeda_pipeline/component
+--------------------------------
+
+To fully utilize and easily access/configure the HW, the driver side also uses
+a similar architecture: Pipeline/Component to describe the HW features and
+capabilities, and a specific component includes two parts:
+
+-  Data flow controlling.
+-  Specific component capabilities and features.
+
+So the driver defines a common header struct komeda_component to describe the
+data flow control and all specific components are a subclass of this base
+structure.
+
+.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_pipeline.h
+   :internal:
+
+Resource discovery and initialization
+=====================================
+
+Pipeline and component are used to describe how to handle the pixel data. We
+still need a @struct komeda_dev to describe the whole view of the device, and
+the control-abilites of device.
+
+We have &komeda_dev, &komeda_pipeline, &komeda_component. Now fill devices with
+pipelines. Since komeda is not for D71 only but also intended for later products,
+of course we’d better share as much as possible between different products. To
+achieve this, split the komeda device into two layers: CORE and CHIP.
+
+-   CORE: for common features and capabilities handling.
+-   CHIP: for register programing and HW specific feature (limitation) handling.
+
+CORE can access CHIP by three chip function structures:
+
+-   struct komeda_dev_funcs
+-   struct komeda_pipeline_funcs
+-   struct komeda_component_funcs
+
+.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_dev.h
+   :internal:
+
+Format handling
+===============
+
+.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_format_caps.h
+   :internal:
+.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_framebuffer.h
+   :internal:
+
+Attach komeda_dev to DRM-KMS
+============================
+
+Komeda abstracts resources by pipeline/component, but DRM-KMS uses
+crtc/plane/connector. One KMS-obj cannot represent only one single component,
+since the requirements of a single KMS object cannot simply be achieved by a
+single component, usually that needs multiple components to fit the requirement.
+Like set mode, gamma, ctm for KMS all target on CRTC-obj, but komeda needs
+compiz, improc and timing_ctrlr to work together to fit these requirements.
+And a KMS-Plane may require multiple komeda resources: layer/scaler/compiz.
+
+So, one KMS-Obj represents a sub-pipeline of komeda resources.
+
+-   Plane: `Layer(input) pipeline`_
+-   Wb_connector: `Writeback(output) pipeline`_
+-   Crtc: `Display output pipeline`_
+
+So, for komeda, we treat KMS crtc/plane/connector as users of pipeline and
+component, and at any one time a pipeline/component only can be used by one
+user. And pipeline/component will be treated as private object of DRM-KMS; the
+state will be managed by drm_atomic_state as well.
+
+How to map plane to Layer(input) pipeline
+-----------------------------------------
+
+Komeda has multiple Layer input pipelines, see:
+-   `Single pipeline data flow`_
+-   `Dual pipeline with Slave enabled`_
+
+The easiest way is binding a plane to a fixed Layer pipeline, but consider the
+komeda capabilities:
+
+-   Layer Split, See `Layer(input) pipeline`_
+
+    Layer_Split is quite complicated feature, which splits a big image into two
+    parts and handles it by two layers and two scalers individually. But it
+    imports an edge problem or effect in the middle of the image after the split.
+    To avoid such a problem, it needs a complicated Split calculation and some
+    special configurations to the layer and scaler. We'd better hide such HW
+    related complexity to user mode.
+
+-   Slave pipeline, See `Dual pipeline with Slave enabled`_
+
+    Since the compiz component doesn't output alpha value, the slave pipeline
+    only can be used for bottom layers composition. The komeda driver wants to
+    hide this limitation to the user. The way to do this is to pick a suitable
+    Layer according to plane_state->zpos.
+
+So for komeda, the KMS-plane doesn't represent a fixed komeda layer pipeline,
+but multiple Layers with same capabilities. Komeda will select one or more
+Layers to fit the requirement of one KMS-plane.
+
+Make component/pipeline to be drm_private_obj
+---------------------------------------------
+
+Add :c:type:`drm_private_obj` to :c:type:`komeda_component`, :c:type:`komeda_pipeline`
+
+.. code-block:: c
+
+    struct komeda_component {
+        struct drm_private_obj obj;
+        ...
+    }
+
+    struct komeda_pipeline {
+        struct drm_private_obj obj;
+        ...
+    }
+
+Tracking component_state/pipeline_state by drm_atomic_state
+-----------------------------------------------------------
+
+Add :c:type:`drm_private_state` and user to :c:type:`komeda_component_state`,
+:c:type:`komeda_pipeline_state`
+
+.. code-block:: c
+
+    struct komeda_component_state {
+        struct drm_private_state obj;
+        void *binding_user;
+        ...
+    }
+
+    struct komeda_pipeline_state {
+        struct drm_private_state obj;
+        struct drm_crtc *crtc;
+        ...
+    }
+
+komeda component validation
+---------------------------
+
+Komeda has multiple types of components, but the process of validation are
+similar, usually including the following steps:
+
+.. code-block:: c
+
+    int komeda_xxxx_validate(struct komeda_component_xxx xxx_comp,
+                struct komeda_component_output *input_dflow,
+                struct drm_plane/crtc/connector *user,
+                struct drm_plane/crtc/connector_state, *user_state)
+    {
+         setup 1: check if component is needed, like the scaler is optional depending
+                  on the user_state; if unneeded, just return, and the caller will
+                  put the data flow into next stage.
+         Setup 2: check user_state with component features and capabilities to see
+                  if requirements can be met; if not, return fail.
+         Setup 3: get component_state from drm_atomic_state, and try set to set
+                  user to component; fail if component has been assigned to another
+                  user already.
+         Setup 3: configure the component_state, like set its input component,
+                  convert user_state to component specific state.
+         Setup 4: adjust the input_dflow and prepare it for the next stage.
+    }
+
+komeda_kms Abstraction
+----------------------
+
+.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_kms.h
+   :internal:
+
+komde_kms Functions
+-------------------
+.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_crtc.c
+   :internal:
+.. kernel-doc:: drivers/gpu/drm/arm/display/komeda/komeda_plane.c
+   :internal:
+
+Build komeda to be a Linux module driver
+========================================
+
+Now we have two level devices:
+
+-   komeda_dev: describes the real display hardware.
+-   komeda_kms_dev: attachs or connects komeda_dev to DRM-KMS.
+
+All komeda operations are supplied or operated by komeda_dev or komeda_kms_dev,
+the module driver is only a simple wrapper to pass the Linux command
+(probe/remove/pm) into komeda_dev or komeda_kms_dev.
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@ -82,30 +82,6 @@ events for atomic commits correctly. But fixing these bugs is good anyway.

 Contact: Daniel Vetter, respective driver maintainers

-Better manual-upload support for atomic
---------------------------------------
-
-This would be especially useful for tinydrm:
-
- Add a struct drm_rect dirty_clip to drm_crtc_state. When duplicating the
-  crtc state, clear that to the max values, x/y = 0 and w/h = MAX_INT, in
-  __drm_atomic_helper_crtc_duplicate_state().
-
- Move tinydrm_merge_clips into drm_framebuffer.c, dropping the tinydrm\_
-  prefix ofc and using drm_fb\_. drm_framebuffer.c makes sense since this
-  is a function useful to implement the fb->dirty function.
-
- Create a new drm_fb_dirty function which does essentially what e.g.
-  mipi_dbi_fb_dirty does. You can use e.g. drm_atomic_helper_update_plane as the
-  template. But instead of doing a simple full-screen plane update, this new
-  helper also sets crtc_state->dirty_clip to the right coordinates. And of
-  course it needs to check whether the fb is actually active (and maybe where),
-  so there's some book-keeping involved. There's also some good fun involved in
-  scaling things appropriately. For that case we might simply give up and
-  declare the entire area covered by the plane as dirty.
-
-Contact: Noralf Trønnes, Daniel Vetter
-
 Fallout from atomic KMS
 -----------------------

@ -209,6 +185,36 @@ Would be great to refactor this all into a set of small common helpers.

 Contact: Daniel Vetter

+Generic fbdev defio support
+---------------------------
+
+The defio support code in the fbdev core has some very specific requirements,
+which means drivers need to have a special framebuffer for fbdev. Which prevents
+us from using the generic fbdev emulation code everywhere. The main issue is
+that it uses some fields in struct page itself, which breaks shmem gem objects
+(and other things).
+
+Possible solution would be to write our own defio mmap code in the drm fbdev
+emulation. It would need to fully wrap the existing mmap ops, forwarding
+everything after it has done the write-protect/mkwrite trickery:
+
+- In the drm_fbdev_fb_mmap helper, if we need defio, change the
+  default page prots to write-protected with something like this::
+
+      vma->vm_page_prot = pgprot_wrprotect(vma->vm_page_prot);
+
+- Set the mkwrite and fsync callbacks with similar implementions to the core
+  fbdev defio stuff. These should all work on plain ptes, they don't actually
+  require a struct page.  uff. These should all work on plain ptes, they don't
+  actually require a struct page.
+
+- Track the dirty pages in a separate structure (bitfield with one bit per page
+  should work) to avoid clobbering struct page.
+
+Might be good to also have some igt testcases for this.
+
+Contact: Daniel Vetter, Noralf Tronnes
+
 Put a reservation_object into drm_gem_object
 --------------------------------------------

@ -256,6 +262,44 @@ As a reference, take a look at the conversions already completed in drm core.

 Contact: Sean Paul, respective driver maintainers

+Rename CMA helpers to DMA helpers
+---------------------------------
+
+CMA (standing for contiguous memory allocator) is really a bit an accident of
+what these were used for first, a much better name would be DMA helpers. In the
+text these should even be called coherent DMA memory helpers (so maybe CDM, but
+no one knows what that means) since underneath they just use dma_alloc_coherent.
+
+Contact: Laurent Pinchart, Daniel Vetter
+
+Convert direct mode.vrefresh accesses to use drm_mode_vrefresh()
+----------------------------------------------------------------
+
+drm_display_mode.vrefresh isn't guaranteed to be populated. As such, using it
+is risky and has been known to cause div-by-zero bugs. Fortunately, drm core
+has helper which will use mode.vrefresh if it's !0 and will calculate it from
+the timings when it's 0.
+
+Use simple search/replace, or (more fun) cocci to replace instances of direct
+vrefresh access with a call to the helper. Check out
+https://lists.freedesktop.org/archives/dri-devel/2019-January/205186.html for
+inspiration.
+
+Once all instances of vrefresh have been converted, remove vrefresh from
+drm_display_mode to avoid future use.
+
+Contact: Sean Paul
+
+Remove drm_display_mode.hsync
+-----------------------------
+
+We have drm_mode_hsync() to calculate this from hsync_start/end, since drivers
+shouldn't/don't use this, remove this member to avoid any temptations to use it
+in the future. If there is any debug code using drm_display_mode.hsync, convert
+it to use drm_mode_hsync() instead.
+
+Contact: Sean Paul
+
 Core refactorings
 =================

@ -354,13 +398,6 @@ KMS cleanups

 Some of these date from the very introduction of KMS in 2008 ...

- drm_mode_config.crtc_idr is misnamed, since it contains all KMS object. Should
-  be renamed to drm_mode_config.object_idr.
-
- drm_display_mode doesn't need to be derived from drm_mode_object. That's
-  leftovers from older (never merged into upstream) KMS designs where modes
-  where set using their ID, including support to add/remove modes.
-
 - Make ->funcs and ->helper_private vtables optional. There's a bunch of empty
  function tables in drivers, but before we can remove them we need to make sure
  that all the users in helpers and drivers do correctly check for a NULL
@ -432,21 +469,10 @@ those drivers as simple as possible, so lots of room for refactoring:
  one of the ideas for having a shared dsi/dbi helper, abstracting away the
  transport details more.

- tinydrm_gem_cma_prime_import_sg_table should probably go into the cma
-  helpers, as a _vmapped variant (since not every driver needs the vmap).
-  And tinydrm_gem_cma_free_object could the be merged into
-  drm_gem_cma_free_object().
-
- tinydrm_fb_create we could move into drm_simple_pipe, only need to add
-  the fb_create hook to drm_simple_pipe_funcs, which would again simplify a
-  bunch of things (since it gives you a one-stop vfunc for simple drivers).
-
 - Quick aside: The unregister devm stuff is kinda getting the lifetimes of
  a drm_device wrong. Doesn't matter, since everyone else gets it wrong
  too :-)

- also rework the drm_framebuffer_funcs->dirty hook wire-up, see above.
-
 Contact: Noralf Trønnes, Daniel Vetter

 AMD DC Display Driver
--- a/Documentation/gpu/vkms.rst
+++ b/Documentation/gpu/vkms.rst
@ -23,17 +23,6 @@ CRC API Improvements
 - Add igt test to check extreme alpha values i.e. fully opaque and fully
  transparent (intermediate values are affected by hw-specific rounding modes).

-Vblank issues
-------------
-
-Some IGT test cases are failing. Need to analyze why and fix the issues:
-
- plain-flip-fb-recreate
- plain-flip-ts-check
- flip-vs-blocking-wf-vblank
- plain-flip-fb-recreate-interruptible
- flip-vs-wf_vblank-interruptible
-
 Runtime Configuration
 ---------------------

--- a/43
+++ b/43
@ -1149,13 +1149,26 @@ S:	Supported
 F:	drivers/gpu/drm/arm/hdlcd_*
 F:	Documentation/devicetree/bindings/display/arm,hdlcd.txt

+ARM KOMEDA DRM-KMS DRIVER
+M:	James (Qian) Wang <james.qian.wang@arm.com>
+M:	Liviu Dudau <liviu.dudau@arm.com>
+L:	Mali DP Maintainers <malidp@foss.arm.com>
+S:	Supported
+T:	git git://linux-arm.org/linux-ld.git for-upstream/mali-dp
+F:	drivers/gpu/drm/arm/display/include/
+F:	drivers/gpu/drm/arm/display/komeda/
+F:	Documentation/devicetree/bindings/display/arm/arm,komeda.txt
+F:	Documentation/gpu/komeda-kms.rst
+
 ARM MALI-DP DRM DRIVER
 M:	Liviu Dudau <liviu.dudau@arm.com>
 M:	Brian Starkey <brian.starkey@arm.com>
-M:	Mali DP Maintainers <malidp@foss.arm.com>
+L:	Mali DP Maintainers <malidp@foss.arm.com>
 S:	Supported
+T:	git git://linux-arm.org/linux-ld.git for-upstream/mali-dp
 F:	drivers/gpu/drm/arm/
 F:	Documentation/devicetree/bindings/display/arm,malidp.txt
+F:	Documentation/gpu/afbc.rst

 ARM MFM AND FLOPPY DRIVERS
 M:	Ian Molton <spyro@f2s.com>
@ -4900,10 +4913,11 @@ F:	Documentation/devicetree/bindings/display/multi-inno,mi0283qt.txt

 DRM DRIVER FOR MSM ADRENO GPU
 M:	Rob Clark <robdclark@gmail.com>
+M:	Sean Paul <sean@poorly.run>
 L:	linux-arm-msm@vger.kernel.org
 L:	dri-devel@lists.freedesktop.org
 L:	freedreno@lists.freedesktop.org
-T:	git git://people.freedesktop.org/~robclark/linux
+T:	git https://gitlab.freedesktop.org/drm/msm.git
 S:	Maintained
 F:	drivers/gpu/drm/msm/
 F:	include/uapi/drm/msm_drm.h
@ -4943,6 +4957,7 @@ DRM DRIVER FOR QXL VIRTUAL GPU
 M:	Dave Airlie <airlied@redhat.com>
 M:	Gerd Hoffmann <kraxel@redhat.com>
 L:	virtualization@lists.linux-foundation.org
+L:	spice-devel@lists.freedesktop.org
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 S:	Maintained
 F:	drivers/gpu/drm/qxl/
@ -4963,6 +4978,12 @@ S:	Orphan / Obsolete
 F:	drivers/gpu/drm/sis/
 F:	include/uapi/drm/sis_drm.h

+DRM DRIVER FOR SITRONIX ST7701 PANELS
+M:	Jagan Teki <jagan@amarulasolutions.com>
+S:	Maintained
+F:	drivers/gpu/drm/panel/panel-sitronix-st7701.c
+F:	Documentation/devicetree/bindings/display/panel/sitronix,st7701.txt
+
 DRM DRIVER FOR SITRONIX ST7586 PANELS
 M:	David Lechner <david@lechnology.com>
 S:	Maintained
@ -4979,6 +5000,13 @@ DRM DRIVER FOR TDFX VIDEO CARDS
 S:	Orphan / Obsolete
 F:	drivers/gpu/drm/tdfx/

+DRM DRIVER FOR TPO TPG110 PANELS
+M:	Linus Walleij <linus.walleij@linaro.org>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/panel/panel-tpo-tpg110.c
+F:	Documentation/devicetree/bindings/display/panel/tpo,tpg110.txt
+
 DRM DRIVER FOR USB DISPLAYLINK VIDEO ADAPTERS
 M:	Dave Airlie <airlied@redhat.com>
 R:	Sean Paul <sean@poorly.run>
@ -4987,6 +5015,16 @@ S:	Odd Fixes
 F:	drivers/gpu/drm/udl/
 T:	git git://anongit.freedesktop.org/drm/drm-misc

+DRM DRIVER FOR VIRTUAL KERNEL MODESETTING (VKMS)
+M:	Rodrigo Siqueira <rodrigosiqueiramelo@gmail.com>
+R:	Haneen Mohammed <hamohammed.sa@gmail.com>
+R:	Daniel Vetter <daniel@ffwll.ch>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+L:	dri-devel@lists.freedesktop.org
+F:	drivers/gpu/drm/vkms/
+F:	Documentation/gpu/vkms.rst
+
 DRM DRIVER FOR VMWARE VIRTUAL GPU
 M:	"VMware Graphics" <linux-graphics-maintainer@vmware.com>
 M:	Thomas Hellstrom <thellstrom@vmware.com>
@ -5056,7 +5094,6 @@ F:	Documentation/devicetree/bindings/display/atmel/
 T:	git git://anongit.freedesktop.org/drm/drm-misc

 DRM DRIVERS FOR BRIDGE CHIPS
-M:	Archit Taneja <architt@codeaurora.org>
 M:	Andrzej Hajda <a.hajda@samsung.com>
 R:	Laurent Pinchart <Laurent.pinchart@ideasonboard.com>
 S:	Maintained
--- a/drivers/acpi/pmic/intel_pmic.c
+++ b/drivers/acpi/pmic/intel_pmic.c
@ -15,6 +15,7 @@

 #include <linux/export.h>
 #include <linux/acpi.h>
+#include <linux/mfd/intel_soc_pmic.h>
 #include <linux/regmap.h>
 #include <acpi/acpi_lpat.h>
 #include "intel_pmic.h"
@ -36,6 +37,8 @@ struct intel_pmic_opregion {
 	struct intel_pmic_regs_handler_ctx ctx;
 };

+static struct intel_pmic_opregion *intel_pmic_opregion;
+
 static int pmic_get_reg_bit(int address, struct pmic_table *table,
 			    int count, int *reg, int *bit)
 {
@ -304,6 +307,7 @@ int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle,
 	}

 	opregion->data = d;
+	intel_pmic_opregion = opregion;
 	return 0;

 out_remove_thermal_handler:
@ -319,3 +323,60 @@ out_error:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(intel_pmic_install_opregion_handler);
+
+/**
+ * intel_soc_pmic_exec_mipi_pmic_seq_element - Execute PMIC MIPI sequence
+ * @i2c_address:  I2C client address for the PMIC
+ * @reg_address:  PMIC register address
+ * @value:        New value for the register bits to change
+ * @mask:         Mask indicating which register bits to change
+ *
+ * DSI LCD panels describe an initialization sequence in the i915 VBT (Video
+ * BIOS Tables) using so called MIPI sequences. One possible element in these
+ * sequences is a PMIC specific element of 15 bytes.
+ *
+ * This function executes these PMIC specific elements sending the embedded
+ * commands to the PMIC.
+ *
+ * Return 0 on success, < 0 on failure.
+ */
+int intel_soc_pmic_exec_mipi_pmic_seq_element(u16 i2c_address, u32 reg_address,
+					      u32 value, u32 mask)
+{
+	struct intel_pmic_opregion_data *d;
+	int ret;
+
+	if (!intel_pmic_opregion) {
+		pr_warn("%s: No PMIC registered\n", __func__);
+		return -ENXIO;
+	}
+
+	d = intel_pmic_opregion->data;
+
+	mutex_lock(&intel_pmic_opregion->lock);
+
+	if (d->exec_mipi_pmic_seq_element) {
+		ret = d->exec_mipi_pmic_seq_element(intel_pmic_opregion->regmap,
+						    i2c_address, reg_address,
+						    value, mask);
+	} else if (d->pmic_i2c_address) {
+		if (i2c_address == d->pmic_i2c_address) {
+			ret = regmap_update_bits(intel_pmic_opregion->regmap,
+						 reg_address, mask, value);
+		} else {
+			pr_err("%s: Unexpected i2c-addr: 0x%02x (reg-addr 0x%x value 0x%x mask 0x%x)\n",
+			       __func__, i2c_address, reg_address, value, mask);
+			ret = -ENXIO;
+		}
+	} else {
+		pr_warn("%s: Not implemented\n", __func__);
+		pr_warn("%s: i2c-addr: 0x%x reg-addr 0x%x value 0x%x mask 0x%x\n",
+			__func__, i2c_address, reg_address, value, mask);
+		ret = -EOPNOTSUPP;
+	}
+
+	mutex_unlock(&intel_pmic_opregion->lock);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(intel_soc_pmic_exec_mipi_pmic_seq_element);
--- a/drivers/acpi/pmic/intel_pmic.h
+++ b/drivers/acpi/pmic/intel_pmic.h
@ -15,10 +15,14 @@ struct intel_pmic_opregion_data {
 	int (*update_aux)(struct regmap *r, int reg, int raw_temp);
 	int (*get_policy)(struct regmap *r, int reg, int bit, u64 *value);
 	int (*update_policy)(struct regmap *r, int reg, int bit, int enable);
+	int (*exec_mipi_pmic_seq_element)(struct regmap *r, u16 i2c_address,
+					  u32 reg_address, u32 value, u32 mask);
 	struct pmic_table *power_table;
 	int power_table_count;
 	struct pmic_table *thermal_table;
 	int thermal_table_count;
+	/* For generic exec_mipi_pmic_seq_element handling */
+	int pmic_i2c_address;
 };

 int intel_pmic_install_opregion_handler(struct device *dev, acpi_handle handle, struct regmap *regmap, struct intel_pmic_opregion_data *d);
--- a/drivers/acpi/pmic/intel_pmic_chtwc.c
+++ b/drivers/acpi/pmic/intel_pmic_chtwc.c
@ -231,6 +231,24 @@ static int intel_cht_wc_pmic_update_power(struct regmap *regmap, int reg,
 	return regmap_update_bits(regmap, reg, bitmask, on ? 1 : 0);
 }

+static int intel_cht_wc_exec_mipi_pmic_seq_element(struct regmap *regmap,
+						   u16 i2c_client_address,
+						   u32 reg_address,
+						   u32 value, u32 mask)
+{
+	u32 address;
+
+	if (i2c_client_address > 0xff || reg_address > 0xff) {
+		pr_warn("%s warning addresses too big client 0x%x reg 0x%x\n",
+			__func__, i2c_client_address, reg_address);
+		return -ERANGE;
+	}
+
+	address = (i2c_client_address << 8) | reg_address;
+
+	return regmap_update_bits(regmap, address, mask, value);
+}
+
 /*
 * The thermal table and ops are empty, we do not support the Thermal opregion
 * (DPTF) due to lacking documentation.
@ -238,6 +256,7 @@ static int intel_cht_wc_pmic_update_power(struct regmap *regmap, int reg,
 static struct intel_pmic_opregion_data intel_cht_wc_pmic_opregion_data = {
 	.get_power		= intel_cht_wc_pmic_get_power,
 	.update_power		= intel_cht_wc_pmic_update_power,
+	.exec_mipi_pmic_seq_element = intel_cht_wc_exec_mipi_pmic_seq_element,
 	.power_table		= power_table,
 	.power_table_count	= ARRAY_SIZE(power_table),
 };
--- a/drivers/acpi/pmic/intel_pmic_xpower.c
+++ b/drivers/acpi/pmic/intel_pmic_xpower.c
@ -265,6 +265,7 @@ static struct intel_pmic_opregion_data intel_xpower_pmic_opregion_data = {
 	.power_table_count = ARRAY_SIZE(power_table),
 	.thermal_table = thermal_table,
 	.thermal_table_count = ARRAY_SIZE(thermal_table),
+	.pmic_i2c_address = 0x34,
 };

 static acpi_status intel_xpower_pmic_gpio_handler(u32 function,
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@ -1093,17 +1093,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 	return 0;
 }

-static int dma_buf_debug_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, dma_buf_debug_show, NULL);
-}
-
-static const struct file_operations dma_buf_debug_fops = {
-	.open           = dma_buf_debug_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(dma_buf_debug);

 static struct dentry *dma_buf_debugfs_dir;

--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@ -649,7 +649,7 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout);
 */
 void
 dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
-	       spinlock_t *lock, u64 context, unsigned seqno)
+	       spinlock_t *lock, u64 context, u64 seqno)
 {
 	BUG_ON(!lock);
 	BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@ -172,7 +172,7 @@ static bool timeline_fence_enable_signaling(struct dma_fence *fence)
 static void timeline_fence_value_str(struct dma_fence *fence,
 				    char *str, int size)
 {
-	snprintf(str, size, "%d", fence->seqno);
+	snprintf(str, size, "%lld", fence->seqno);
 }

 static void timeline_fence_timeline_value_str(struct dma_fence *fence,
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@ -147,7 +147,7 @@ static void sync_print_sync_file(struct seq_file *s,
 	}
 }

-static int sync_debugfs_show(struct seq_file *s, void *unused)
+static int sync_info_debugfs_show(struct seq_file *s, void *unused)
 {
 	struct list_head *pos;

@ -178,17 +178,7 @@ static int sync_debugfs_show(struct seq_file *s, void *unused)
 	return 0;
 }

-static int sync_info_debugfs_open(struct inode *inode, struct file *file)
-{
-	return single_open(file, sync_debugfs_show, inode->i_private);
-}
-
-static const struct file_operations sync_info_debugfs_fops = {
-	.open           = sync_info_debugfs_open,
-	.read           = seq_read,
-	.llseek         = seq_lseek,
-	.release        = single_release,
-};
+DEFINE_SHOW_ATTRIBUTE(sync_info_debugfs);

 static __init int sync_debugfs_init(void)
 {
@ -218,7 +208,7 @@ void sync_dump(void)
 	};
 	int i;

-	sync_debugfs_show(&s, NULL);
+	sync_info_debugfs_show(&s, NULL);

 	for (i = 0; i < s.count; i += DUMP_CHUNK) {
 		if ((s.count - i) > DUMP_CHUNK) {
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@ -144,7 +144,7 @@ char *sync_file_get_name(struct sync_file *sync_file, char *buf, int len)
 	} else {
 		struct dma_fence *fence = sync_file->fence;

-		snprintf(buf, len, "%s-%s%llu-%d",
+		snprintf(buf, len, "%s-%s%llu-%lld",
 			 fence->ops->get_driver_name(fence),
 			 fence->ops->get_timeline_name(fence),
 			 fence->context,
@ -258,7 +258,7 @@ static struct sync_file *sync_file_merge(const char *name, struct sync_file *a,

 			i_b++;
 		} else {
-			if (pt_a->seqno - pt_b->seqno <= INT_MAX)
+			if (__dma_fence_is_later(pt_a->seqno, pt_b->seqno))
 				add_fence(fences, &i, pt_a);
 			else
 				add_fence(fences, &i, pt_b);
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@ -170,10 +170,6 @@ config DRM_KMS_CMA_HELPER
 	bool
 	depends on DRM
 	select DRM_GEM_CMA_HELPER
-	select DRM_KMS_FB_HELPER
-	select FB_SYS_FILLRECT
-	select FB_SYS_COPYAREA
-	select FB_SYS_IMAGEBLIT
 	help
 	  Choose this if you need the KMS CMA helper functions

--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@ -51,7 +51,7 @@ obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
 obj-$(CONFIG_DRM)	+= drm.o
 obj-$(CONFIG_DRM_MIPI_DSI) += drm_mipi_dsi.o
 obj-$(CONFIG_DRM_PANEL_ORIENTATION_QUIRKS) += drm_panel_orientation_quirks.o
-obj-$(CONFIG_DRM_ARM)	+= arm/
+obj-y			+= arm/
 obj-$(CONFIG_DRM_TTM)	+= ttm/
 obj-$(CONFIG_DRM_SCHED)	+= scheduler/
 obj-$(CONFIG_DRM_TDFX)	+= tdfx/
@ -81,7 +81,7 @@ obj-$(CONFIG_DRM_UDL) += udl/
 obj-$(CONFIG_DRM_AST) += ast/
 obj-$(CONFIG_DRM_ARMADA) += armada/
 obj-$(CONFIG_DRM_ATMEL_HLCDC)	+= atmel-hlcdc/
-obj-$(CONFIG_DRM_RCAR_DU) += rcar-du/
+obj-y			+= rcar-du/
 obj-$(CONFIG_DRM_SHMOBILE) +=shmobile/
 obj-y			+= omapdrm/
 obj-$(CONFIG_DRM_SUN4I) += sun4i/
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@ -57,7 +57,7 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \

 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
-	ci_smc.o ci_dpm.o dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o
+	dce_v8_0.o gfx_v7_0.o cik_sdma.o uvd_v4_2.o vce_v2_0.o

 amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce_v6_0.o si_dpm.o si_smc.o

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@ -411,6 +411,8 @@ struct amdgpu_fpriv {
 	struct amdgpu_ctx_mgr	ctx_mgr;
 };

+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+
 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
 void amdgpu_ib_free(struct amdgpu_device *adev, struct amdgpu_ib *ib,
@ -542,6 +544,11 @@ struct amdgpu_asic_funcs {
 	bool (*need_full_reset)(struct amdgpu_device *adev);
 	/* initialize doorbell layout for specific asic*/
 	void (*init_doorbell_index)(struct amdgpu_device *adev);
+	/* PCIe bandwidth usage */
+	void (*get_pcie_usage)(struct amdgpu_device *adev, uint64_t *count0,
+			       uint64_t *count1);
+	/* do we need to reset the asic at init time (e.g., kexec) */
+	bool (*need_reset_on_init)(struct amdgpu_device *adev);
 };

 /*
@ -634,7 +641,7 @@ struct amdgpu_nbio_funcs {
 	void (*hdp_flush)(struct amdgpu_device *adev, struct amdgpu_ring *ring);
 	u32 (*get_memsize)(struct amdgpu_device *adev);
 	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
-				    bool use_doorbell, int doorbell_index);
+			bool use_doorbell, int doorbell_index, int doorbell_size);
 	void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
 					 bool enable);
 	void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
@ -1042,6 +1049,8 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define amdgpu_asic_invalidate_hdp(adev, r) (adev)->asic_funcs->invalidate_hdp((adev), (r))
 #define amdgpu_asic_need_full_reset(adev) (adev)->asic_funcs->need_full_reset((adev))
 #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
+#define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
+#define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))

 /* Common functions */
 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@ -28,8 +28,6 @@
 #include <linux/module.h>
 #include <linux/dma-buf.h>

-const struct kgd2kfd_calls *kgd2kfd;
-
 static const unsigned int compute_vmid_bitmap = 0xFF00;

 /* Total memory size in system memory and all GPU VRAM. Used to
@ -47,12 +45,9 @@ int amdgpu_amdkfd_init(void)
 	amdgpu_amdkfd_total_mem_size *= si.mem_unit;

 #ifdef CONFIG_HSA_AMD
-	ret = kgd2kfd_init(KFD_INTERFACE_VERSION, &kgd2kfd);
-	if (ret)
-		kgd2kfd = NULL;
+	ret = kgd2kfd_init();
 	amdgpu_amdkfd_gpuvm_init_mem_limits();
 #else
-	kgd2kfd = NULL;
 	ret = -ENOENT;
 #endif

@ -61,17 +56,13 @@ int amdgpu_amdkfd_init(void)

 void amdgpu_amdkfd_fini(void)
 {
-	if (kgd2kfd)
-		kgd2kfd->exit();
+	kgd2kfd_exit();
 }

 void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 {
 	const struct kfd2kgd_calls *kfd2kgd;

-	if (!kgd2kfd)
-		return;
-
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_KAVERI:
@ -98,8 +89,8 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 		return;
 	}

-	adev->kfd.dev = kgd2kfd->probe((struct kgd_dev *)adev,
-				       adev->pdev, kfd2kgd);
+	adev->kfd.dev = kgd2kfd_probe((struct kgd_dev *)adev,
+				      adev->pdev, kfd2kgd);

 	if (adev->kfd.dev)
 		amdgpu_amdkfd_total_mem_size += adev->gmc.real_vram_size;
@ -140,7 +131,7 @@ static void amdgpu_doorbell_get_kfd_info(struct amdgpu_device *adev,

 void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 {
-	int i, n;
+	int i;
 	int last_valid_bit;

 	if (adev->kfd.dev) {
@ -151,7 +142,9 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 			.gpuvm_size = min(adev->vm_manager.max_pfn
 					  << AMDGPU_GPU_PAGE_SHIFT,
 					  AMDGPU_GMC_HOLE_START),
-			.drm_render_minor = adev->ddev->render->index
+			.drm_render_minor = adev->ddev->render->index,
+			.sdma_doorbell_idx = adev->doorbell_index.sdma_engine,
+
 		};

 		/* this is going to have a few of the MSBs set that we need to
@ -181,44 +174,29 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 				&gpu_resources.doorbell_aperture_size,
 				&gpu_resources.doorbell_start_offset);

-		if (adev->asic_type < CHIP_VEGA10) {
-			kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
-			return;
-		}
-
-		n = (adev->asic_type < CHIP_VEGA20) ? 2 : 8;
-
-		for (i = 0; i < n; i += 2) {
-			/* On SOC15 the BIF is involved in routing
-			 * doorbells using the low 12 bits of the
-			 * address. Communicate the assignments to
-			 * KFD. KFD uses two doorbell pages per
-			 * process in case of 64-bit doorbells so we
-			 * can use each doorbell assignment twice.
-			 */
-			gpu_resources.sdma_doorbell[0][i] =
-				adev->doorbell_index.sdma_engine0 + (i >> 1);
-			gpu_resources.sdma_doorbell[0][i+1] =
-				adev->doorbell_index.sdma_engine0 + 0x200 + (i >> 1);
-			gpu_resources.sdma_doorbell[1][i] =
-				adev->doorbell_index.sdma_engine1 + (i >> 1);
-			gpu_resources.sdma_doorbell[1][i+1] =
-				adev->doorbell_index.sdma_engine1 + 0x200 + (i >> 1);
-		}
-		/* Doorbells 0x0e0-0ff and 0x2e0-2ff are reserved for
-		 * SDMA, IH and VCN. So don't use them for the CP.
+		/* Since SOC15, BIF starts to statically use the
+		 * lower 12 bits of doorbell addresses for routing
+		 * based on settings in registers like
+		 * SDMA0_DOORBELL_RANGE etc..
+		 * In order to route a doorbell to CP engine, the lower
+		 * 12 bits of its address has to be outside the range
+		 * set for SDMA, VCN, and IH blocks.
 		 */
-		gpu_resources.reserved_doorbell_mask = 0x1e0;
-		gpu_resources.reserved_doorbell_val  = 0x0e0;
+		if (adev->asic_type >= CHIP_VEGA10) {
+			gpu_resources.non_cp_doorbells_start =
+					adev->doorbell_index.first_non_cp;
+			gpu_resources.non_cp_doorbells_end =
+					adev->doorbell_index.last_non_cp;
+		}

-		kgd2kfd->device_init(adev->kfd.dev, &gpu_resources);
+		kgd2kfd_device_init(adev->kfd.dev, &gpu_resources);
 	}
 }

 void amdgpu_amdkfd_device_fini(struct amdgpu_device *adev)
 {
 	if (adev->kfd.dev) {
-		kgd2kfd->device_exit(adev->kfd.dev);
+		kgd2kfd_device_exit(adev->kfd.dev);
 		adev->kfd.dev = NULL;
 	}
 }
@ -227,13 +205,13 @@ void amdgpu_amdkfd_interrupt(struct amdgpu_device *adev,
 		const void *ih_ring_entry)
 {
 	if (adev->kfd.dev)
-		kgd2kfd->interrupt(adev->kfd.dev, ih_ring_entry);
+		kgd2kfd_interrupt(adev->kfd.dev, ih_ring_entry);
 }

 void amdgpu_amdkfd_suspend(struct amdgpu_device *adev)
 {
 	if (adev->kfd.dev)
-		kgd2kfd->suspend(adev->kfd.dev);
+		kgd2kfd_suspend(adev->kfd.dev);
 }

 int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
@ -241,7 +219,7 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 	int r = 0;

 	if (adev->kfd.dev)
-		r = kgd2kfd->resume(adev->kfd.dev);
+		r = kgd2kfd_resume(adev->kfd.dev);

 	return r;
 }
@ -251,7 +229,7 @@ int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
 	int r = 0;

 	if (adev->kfd.dev)
-		r = kgd2kfd->pre_reset(adev->kfd.dev);
+		r = kgd2kfd_pre_reset(adev->kfd.dev);

 	return r;
 }
@ -261,7 +239,7 @@ int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
 	int r = 0;

 	if (adev->kfd.dev)
-		r = kgd2kfd->post_reset(adev->kfd.dev);
+		r = kgd2kfd_post_reset(adev->kfd.dev);

 	return r;
 }
@ -619,4 +597,47 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
 {
 	return NULL;
 }
+
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
+			      const struct kfd2kgd_calls *f2g)
+{
+	return NULL;
+}
+
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			 const struct kgd2kfd_shared_resources *gpu_resources)
+{
+	return false;
+}
+
+void kgd2kfd_device_exit(struct kfd_dev *kfd)
+{
+}
+
+void kgd2kfd_exit(void)
+{
+}
+
+void kgd2kfd_suspend(struct kfd_dev *kfd)
+{
+}
+
+int kgd2kfd_resume(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+int kgd2kfd_pre_reset(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+int kgd2kfd_post_reset(struct kfd_dev *kfd)
+{
+	return 0;
+}
+
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry)
+{
+}
 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@ -33,7 +33,6 @@
 #include "amdgpu_sync.h"
 #include "amdgpu_vm.h"

-extern const struct kgd2kfd_calls *kgd2kfd;
 extern uint64_t amdgpu_amdkfd_total_mem_size;

 struct amdgpu_device;
@ -214,4 +213,22 @@ int amdgpu_amdkfd_gpuvm_import_dmabuf(struct kgd_dev *kgd,
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo);

+/* KGD2KFD callbacks */
+int kgd2kfd_init(void);
+void kgd2kfd_exit(void);
+struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
+			      const struct kfd2kgd_calls *f2g);
+bool kgd2kfd_device_init(struct kfd_dev *kfd,
+			 const struct kgd2kfd_shared_resources *gpu_resources);
+void kgd2kfd_device_exit(struct kfd_dev *kfd);
+void kgd2kfd_suspend(struct kfd_dev *kfd);
+int kgd2kfd_resume(struct kfd_dev *kfd);
+int kgd2kfd_pre_reset(struct kfd_dev *kfd);
+int kgd2kfd_post_reset(struct kfd_dev *kfd);
+void kgd2kfd_interrupt(struct kfd_dev *kfd, const void *ih_ring_entry);
+int kgd2kfd_quiesce_mm(struct mm_struct *mm);
+int kgd2kfd_resume_mm(struct mm_struct *mm);
+int kgd2kfd_schedule_evict_and_restore_process(struct mm_struct *mm,
+					       struct dma_fence *fence);
+
 #endif /* AMDGPU_AMDKFD_H_INCLUDED */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@ -122,7 +122,7 @@ static bool amdkfd_fence_enable_signaling(struct dma_fence *f)
 	if (dma_fence_is_signaled(f))
 		return true;

-	if (!kgd2kfd->schedule_evict_and_restore_process(fence->mm, f))
+	if (!kgd2kfd_schedule_evict_and_restore_process(fence->mm, f))
 		return true;

 	return false;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@ -204,38 +204,25 @@ void amdgpu_amdkfd_unreserve_memory_limit(struct amdgpu_bo *bo)
 }


-/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence(s) from BO's
+/* amdgpu_amdkfd_remove_eviction_fence - Removes eviction fence from BO's
 *  reservation object.
 *
 * @bo: [IN] Remove eviction fence(s) from this BO
- * @ef: [IN] If ef is specified, then this eviction fence is removed if it
+ * @ef: [IN] This eviction fence is removed if it
 *  is present in the shared list.
- * @ef_list: [OUT] Returns list of eviction fences. These fences are removed
- *  from BO's reservation object shared list.
- * @ef_count: [OUT] Number of fences in ef_list.
 *
- * NOTE: If called with ef_list, then amdgpu_amdkfd_add_eviction_fence must be
- *  called to restore the eviction fences and to avoid memory leak. This is
- *  useful for shared BOs.
 * NOTE: Must be called with BO reserved i.e. bo->tbo.resv->lock held.
 */
 static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
-					struct amdgpu_amdkfd_fence *ef,
-					struct amdgpu_amdkfd_fence ***ef_list,
-					unsigned int *ef_count)
+					struct amdgpu_amdkfd_fence *ef)
 {
 	struct reservation_object *resv = bo->tbo.resv;
 	struct reservation_object_list *old, *new;
 	unsigned int i, j, k;

-	if (!ef && !ef_list)
+	if (!ef)
 		return -EINVAL;

-	if (ef_list) {
-		*ef_list = NULL;
-		*ef_count = 0;
-	}
-
 	old = reservation_object_get_list(resv);
 	if (!old)
 		return 0;
@ -254,8 +241,7 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 		f = rcu_dereference_protected(old->shared[i],
 					      reservation_object_held(resv));

-		if ((ef && f->context == ef->base.context) ||
-		    (!ef && to_amdgpu_amdkfd_fence(f)))
+		if (f->context == ef->base.context)
 			RCU_INIT_POINTER(new->shared[--j], f);
 		else
 			RCU_INIT_POINTER(new->shared[k++], f);
@ -263,21 +249,6 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,
 	new->shared_max = old->shared_max;
 	new->shared_count = k;

-	if (!ef) {
-		unsigned int count = old->shared_count - j;
-
-		/* Alloc memory for count number of eviction fence pointers.
-		 * Fill the ef_list array and ef_count
-		 */
-		*ef_list = kcalloc(count, sizeof(**ef_list), GFP_KERNEL);
-		*ef_count = count;
-
-		if (!*ef_list) {
-			kfree(new);
-			return -ENOMEM;
-		}
-	}
-
 	/* Install the new fence list, seqcount provides the barriers */
 	preempt_disable();
 	write_seqcount_begin(&resv->seq);
@ -291,46 +262,13 @@ static int amdgpu_amdkfd_remove_eviction_fence(struct amdgpu_bo *bo,

 		f = rcu_dereference_protected(new->shared[i],
 					      reservation_object_held(resv));
-		if (!ef)
-			(*ef_list)[k++] = to_amdgpu_amdkfd_fence(f);
-		else
-			dma_fence_put(f);
+		dma_fence_put(f);
 	}
 	kfree_rcu(old, rcu);

 	return 0;
 }

-/* amdgpu_amdkfd_add_eviction_fence - Adds eviction fence(s) back into BO's
- *  reservation object.
- *
- * @bo: [IN] Add eviction fences to this BO
- * @ef_list: [IN] List of eviction fences to be added
- * @ef_count: [IN] Number of fences in ef_list.
- *
- * NOTE: Must call amdgpu_amdkfd_remove_eviction_fence before calling this
- *  function.
- */
-static void amdgpu_amdkfd_add_eviction_fence(struct amdgpu_bo *bo,
-				struct amdgpu_amdkfd_fence **ef_list,
-				unsigned int ef_count)
-{
-	int i;
-
-	if (!ef_list || !ef_count)
-		return;
-
-	for (i = 0; i < ef_count; i++) {
-		amdgpu_bo_fence(bo, &ef_list[i]->base, true);
-		/* Re-adding the fence takes an additional reference. Drop that
-		 * reference.
-		 */
-		dma_fence_put(&ef_list[i]->base);
-	}
-
-	kfree(ef_list);
-}
-
 static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 				     bool wait)
 {
@ -346,18 +284,8 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	if (ret)
 		goto validate_fail;
-	if (wait) {
-		struct amdgpu_amdkfd_fence **ef_list;
-		unsigned int ef_count;
-
-		ret = amdgpu_amdkfd_remove_eviction_fence(bo, NULL, &ef_list,
-							  &ef_count);
-		if (ret)
-			goto validate_fail;
-
-		ttm_bo_wait(&bo->tbo, false, false);
-		amdgpu_amdkfd_add_eviction_fence(bo, ef_list, ef_count);
-	}
+	if (wait)
+		amdgpu_bo_sync_wait(bo, AMDGPU_FENCE_OWNER_KFD, false);

 validate_fail:
 	return ret;
@ -444,7 +372,6 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 {
 	int ret;
 	struct kfd_bo_va_list *bo_va_entry;
-	struct amdgpu_bo *pd = vm->root.base.bo;
 	struct amdgpu_bo *bo = mem->bo;
 	uint64_t va = mem->va;
 	struct list_head *list_bo_va = &mem->bo_va_list;
@ -484,14 +411,8 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 		*p_bo_va_entry = bo_va_entry;

 	/* Allocate new page tables if needed and validate
-	 * them. Clearing of new page tables and validate need to wait
-	 * on move fences. We don't want that to trigger the eviction
-	 * fence, so remove it temporarily.
+	 * them.
 	 */
-	amdgpu_amdkfd_remove_eviction_fence(pd,
-					vm->process_info->eviction_fence,
-					NULL, NULL);
-
 	ret = amdgpu_vm_alloc_pts(adev, vm, va, amdgpu_bo_size(bo));
 	if (ret) {
 		pr_err("Failed to allocate pts, err=%d\n", ret);
@ -504,13 +425,9 @@ static int add_bo_to_vm(struct amdgpu_device *adev, struct kgd_mem *mem,
 		goto err_alloc_pts;
 	}

-	/* Add the eviction fence back */
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
-
 	return 0;

 err_alloc_pts:
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
 	amdgpu_vm_bo_rmv(adev, bo_va_entry->bo_va);
 	list_del(&bo_va_entry->bo_list);
 err_vmadd:
@ -809,24 +726,11 @@ static int unmap_bo_from_gpuvm(struct amdgpu_device *adev,
 {
 	struct amdgpu_bo_va *bo_va = entry->bo_va;
 	struct amdgpu_vm *vm = bo_va->base.vm;
-	struct amdgpu_bo *pd = vm->root.base.bo;

-	/* Remove eviction fence from PD (and thereby from PTs too as
-	 * they share the resv. object). Otherwise during PT update
-	 * job (see amdgpu_vm_bo_update_mapping), eviction fence would
-	 * get added to job->sync object and job execution would
-	 * trigger the eviction fence.
-	 */
-	amdgpu_amdkfd_remove_eviction_fence(pd,
-					    vm->process_info->eviction_fence,
-					    NULL, NULL);
 	amdgpu_vm_bo_unmap(adev, bo_va, entry->va);

 	amdgpu_vm_clear_freed(adev, vm, &bo_va->last_pt_update);

-	/* Add the eviction fence back */
-	amdgpu_bo_fence(pd, &vm->process_info->eviction_fence->base, true);
-
 	amdgpu_sync_fence(NULL, sync, bo_va->last_pt_update, false);

 	return 0;
@ -1002,7 +906,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 		pr_err("validate_pt_pd_bos() failed\n");
 		goto validate_pd_fail;
 	}
-	ret = ttm_bo_wait(&vm->root.base.bo->tbo, false, false);
+	amdgpu_bo_sync_wait(vm->root.base.bo, AMDGPU_FENCE_OWNER_KFD, false);
 	if (ret)
 		goto wait_pd_fail;
 	amdgpu_bo_fence(vm->root.base.bo,
@ -1389,8 +1293,7 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	 * attached
 	 */
 	amdgpu_amdkfd_remove_eviction_fence(mem->bo,
-					process_info->eviction_fence,
-					NULL, NULL);
+					process_info->eviction_fence);
 	pr_debug("Release VA 0x%llx - 0x%llx\n", mem->va,
 		mem->va + bo_size * (1 + mem->aql_queue));

@ -1617,8 +1520,7 @@ int amdgpu_amdkfd_gpuvm_unmap_memory_from_gpu(
 	if (mem->mapped_to_gpu_memory == 0 &&
 	    !amdgpu_ttm_tt_get_usermm(mem->bo->tbo.ttm) && !mem->bo->pin_count)
 		amdgpu_amdkfd_remove_eviction_fence(mem->bo,
-						process_info->eviction_fence,
-						    NULL, NULL);
+						process_info->eviction_fence);

 unreserve_out:
 	unreserve_bo_and_vms(&ctx, false, false);
@ -1679,7 +1581,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 	}

 	amdgpu_amdkfd_remove_eviction_fence(
-		bo, mem->process_info->eviction_fence, NULL, NULL);
+		bo, mem->process_info->eviction_fence);
 	list_del_init(&mem->validate_list.head);

 	if (size)
@ -1790,7 +1692,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem,
 	evicted_bos = atomic_inc_return(&process_info->evicted_bos);
 	if (evicted_bos == 1) {
 		/* First eviction, stop the queues */
-		r = kgd2kfd->quiesce_mm(mm);
+		r = kgd2kfd_quiesce_mm(mm);
 		if (r)
 			pr_err("Failed to quiesce KFD\n");
 		schedule_delayed_work(&process_info->restore_userptr_work,
@ -1945,16 +1847,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)

 	amdgpu_sync_create(&sync);

-	/* Avoid triggering eviction fences when unmapping invalid
-	 * userptr BOs (waits for all fences, doesn't use
-	 * FENCE_OWNER_VM)
-	 */
-	list_for_each_entry(peer_vm, &process_info->vm_list_head,
-			    vm_list_node)
-		amdgpu_amdkfd_remove_eviction_fence(peer_vm->root.base.bo,
-						process_info->eviction_fence,
-						NULL, NULL);
-
 	ret = process_validate_vms(process_info);
 	if (ret)
 		goto unreserve_out;
@ -2015,10 +1907,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	ret = process_update_pds(process_info, &sync);

 unreserve_out:
-	list_for_each_entry(peer_vm, &process_info->vm_list_head,
-			    vm_list_node)
-		amdgpu_bo_fence(peer_vm->root.base.bo,
-				&process_info->eviction_fence->base, true);
 	ttm_eu_backoff_reservation(&ticket, &resv_list);
 	amdgpu_sync_wait(&sync, false);
 	amdgpu_sync_free(&sync);
@ -2082,7 +1970,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 	    evicted_bos)
 		goto unlock_out;
 	evicted_bos = 0;
-	if (kgd2kfd->resume_mm(mm)) {
+	if (kgd2kfd_resume_mm(mm)) {
 		pr_err("%s: Failed to resume KFD\n", __func__);
 		/* No recovery from this failure. Probably the CP is
 		 * hanging. No point trying again.
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@ -25,8 +25,8 @@
 */
 #include <drm/drmP.h>
 #include <drm/drm_edid.h>
-#include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "atom.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@ -214,6 +214,7 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
+		case AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES:
 			break;

 		default:
@ -1090,6 +1091,15 @@ static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,

 		fence = amdgpu_ctx_get_fence(ctx, entity,
 					     deps[i].handle);
+
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
+			struct drm_sched_fence *s_fence = to_drm_sched_fence(fence);
+			struct dma_fence *old = fence;
+
+			fence = dma_fence_get(&s_fence->scheduled);
+			dma_fence_put(old);
+		}
+
 		if (IS_ERR(fence)) {
 			r = PTR_ERR(fence);
 			amdgpu_ctx_put(ctx);
@ -1177,7 +1187,8 @@ static int amdgpu_cs_dependencies(struct amdgpu_device *adev,

 		chunk = &p->chunks[i];

-		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES) {
+		if (chunk->chunk_id == AMDGPU_CHUNK_ID_DEPENDENCIES ||
+		    chunk->chunk_id == AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES) {
 			r = amdgpu_cs_process_fence_dep(p, chunk);
 			if (r)
 				return r;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@ -124,6 +124,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 		struct amdgpu_ring *rings[AMDGPU_MAX_RINGS];
 		struct drm_sched_rq *rqs[AMDGPU_MAX_RINGS];
 		unsigned num_rings;
+		unsigned num_rqs = 0;

 		switch (i) {
 		case AMDGPU_HW_IP_GFX:
@ -166,12 +167,16 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 			break;
 		}

-		for (j = 0; j < num_rings; ++j)
-			rqs[j] = &rings[j]->sched.sched_rq[priority];
+		for (j = 0; j < num_rings; ++j) {
+			if (!rings[j]->adev)
+				continue;
+
+			rqs[num_rqs++] = &rings[j]->sched.sched_rq[priority];
+		}

 		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j)
 			r = drm_sched_entity_init(&ctx->entities[i][j].entity,
-						  rqs, num_rings, &ctx->guilty);
+						  rqs, num_rqs, &ctx->guilty);
 		if (r)
 			goto error_cleanup_entities;
 	}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@ -158,9 +158,6 @@ static int  amdgpu_debugfs_process_reg_op(bool read, struct file *f,
 	while (size) {
 		uint32_t value;

-		if (*pos > adev->rmmio_size)
-			goto end;
-
 		if (read) {
 			value = RREG32(*pos >> 2);
 			r = put_user(value, (uint32_t *)buf);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@ -30,8 +30,8 @@
 #include <linux/console.h>
 #include <linux/slab.h>
 #include <drm/drmP.h>
-#include <drm/drm_crtc_helper.h>
 #include <drm/drm_atomic_helper.h>
+#include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
 #include <linux/vgaarb.h>
 #include <linux/vga_switcheroo.h>
@ -1645,7 +1645,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 		if (r) {
 			DRM_ERROR("sw_init of IP block <%s> failed %d\n",
 				  adev->ip_blocks[i].version->funcs->name, r);
-			return r;
+			goto init_failed;
 		}
 		adev->ip_blocks[i].status.sw = true;

@ -1654,17 +1654,17 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			r = amdgpu_device_vram_scratch_init(adev);
 			if (r) {
 				DRM_ERROR("amdgpu_vram_scratch_init failed %d\n", r);
-				return r;
+				goto init_failed;
 			}
 			r = adev->ip_blocks[i].version->funcs->hw_init((void *)adev);
 			if (r) {
 				DRM_ERROR("hw_init %d failed %d\n", i, r);
-				return r;
+				goto init_failed;
 			}
 			r = amdgpu_device_wb_init(adev);
 			if (r) {
 				DRM_ERROR("amdgpu_device_wb_init failed %d\n", r);
-				return r;
+				goto init_failed;
 			}
 			adev->ip_blocks[i].status.hw = true;

@ -1675,7 +1675,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 								AMDGPU_CSA_SIZE);
 				if (r) {
 					DRM_ERROR("allocate CSA failed %d\n", r);
-					return r;
+					goto init_failed;
 				}
 			}
 		}
@ -1683,30 +1683,32 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)

 	r = amdgpu_ucode_create_bo(adev); /* create ucode bo when sw_init complete*/
 	if (r)
-		return r;
+		goto init_failed;

 	r = amdgpu_device_ip_hw_init_phase1(adev);
 	if (r)
-		return r;
+		goto init_failed;

 	r = amdgpu_device_fw_loading(adev);
 	if (r)
-		return r;
+		goto init_failed;

 	r = amdgpu_device_ip_hw_init_phase2(adev);
 	if (r)
-		return r;
+		goto init_failed;

 	if (adev->gmc.xgmi.num_physical_nodes > 1)
 		amdgpu_xgmi_add_device(adev);
 	amdgpu_amdkfd_device_init(adev);

+init_failed:
 	if (amdgpu_sriov_vf(adev)) {
-		amdgpu_virt_init_data_exchange(adev);
+		if (!r)
+			amdgpu_virt_init_data_exchange(adev);
 		amdgpu_virt_release_full_gpu(adev, true);
 	}

-	return 0;
+	return r;
 }

 /**
@ -2133,7 +2135,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
 				continue;

 			r = block->version->funcs->hw_init(adev);
-			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
+			DRM_INFO("RE-INIT-early: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
 			if (r)
 				return r;
 		}
@ -2167,7 +2169,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
 				continue;

 			r = block->version->funcs->hw_init(adev);
-			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
+			DRM_INFO("RE-INIT-late: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
 			if (r)
 				return r;
 		}
@ -2548,6 +2550,17 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	/* detect if we are with an SRIOV vbios */
 	amdgpu_device_detect_sriov_bios(adev);

+	/* check if we need to reset the asic
+	 *  E.g., driver was not cleanly unloaded previously, etc.
+	 */
+	if (!amdgpu_sriov_vf(adev) && amdgpu_asic_need_reset_on_init(adev)) {
+		r = amdgpu_asic_reset(adev);
+		if (r) {
+			dev_err(adev->dev, "asic reset on init failed\n");
+			goto failed;
+		}
+	}
+
 	/* Post card if necessary */
 	if (amdgpu_device_need_post(adev)) {
 		if (!adev->bios) {
@ -2612,6 +2625,8 @@ fence_driver_init:
 		}
 		dev_err(adev->dev, "amdgpu_device_ip_init failed\n");
 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_AMDGPU_INIT_FAIL, 0, 0);
+		if (amdgpu_virt_request_full_gpu(adev, false))
+			amdgpu_virt_release_full_gpu(adev, false);
 		goto failed;
 	}

@ -2707,7 +2722,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	amdgpu_irq_disable_all(adev);
 	if (adev->mode_info.mode_config_initialized){
 		if (!amdgpu_device_has_dc_support(adev))
-			drm_crtc_force_disable_all(adev->ddev);
+			drm_helper_force_disable_all(adev->ddev);
 		else
 			drm_atomic_helper_shutdown(adev->ddev);
 	}
@ -3298,17 +3313,15 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 		if (!ring || !ring->sched.thread)
 			continue;

-		kthread_park(ring->sched.thread);
-
-		if (job && job->base.sched != &ring->sched)
-			continue;
-
-		drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);
+		drm_sched_stop(&ring->sched);

 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}

+	if(job)
+		drm_sched_increase_karma(&job->base);
+


 	if (!amdgpu_sriov_vf(adev)) {
@ -3454,14 +3467,10 @@ static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
 		if (!ring || !ring->sched.thread)
 			continue;

-		/* only need recovery sched of the given job's ring
-		 * or all rings (in the case @job is NULL)
-		 * after above amdgpu_reset accomplished
-		 */
-		if ((!job || job->base.sched == &ring->sched) && !adev->asic_reset_res)
-			drm_sched_job_recovery(&ring->sched);
+		if (!adev->asic_reset_res)
+			drm_sched_resubmit_jobs(&ring->sched);

-		kthread_unpark(ring->sched.thread);
+		drm_sched_start(&ring->sched, !adev->asic_reset_res);
 	}

 	if (!amdgpu_device_has_dc_support(adev)) {
@ -3521,9 +3530,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	 * by different nodes. No point also since the one node already executing
 	 * reset will also reset all the other nodes in the hive.
 	 */
-	hive = amdgpu_get_xgmi_hive(adev);
+	hive = amdgpu_get_xgmi_hive(adev, 0);
 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
-	    !mutex_trylock(&hive->hive_lock))
+	    !mutex_trylock(&hive->reset_lock))
 		return 0;

 	/* Start with adev pre asic reset first for soft reset check.*/
@ -3602,13 +3611,45 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	}

 	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
-		mutex_unlock(&hive->hive_lock);
+		mutex_unlock(&hive->reset_lock);

 	if (r)
 		dev_info(adev->dev, "GPU reset end with ret = %d\n", r);
 	return r;
 }

+static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
+						  enum pci_bus_speed *speed,
+						  enum pcie_link_width *width)
+{
+	struct pci_dev *pdev = adev->pdev;
+	enum pci_bus_speed cur_speed;
+	enum pcie_link_width cur_width;
+
+	*speed = PCI_SPEED_UNKNOWN;
+	*width = PCIE_LNK_WIDTH_UNKNOWN;
+
+	while (pdev) {
+		cur_speed = pcie_get_speed_cap(pdev);
+		cur_width = pcie_get_width_cap(pdev);
+
+		if (cur_speed != PCI_SPEED_UNKNOWN) {
+			if (*speed == PCI_SPEED_UNKNOWN)
+				*speed = cur_speed;
+			else if (cur_speed < *speed)
+				*speed = cur_speed;
+		}
+
+		if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
+			if (*width == PCIE_LNK_WIDTH_UNKNOWN)
+				*width = cur_width;
+			else if (cur_width < *width)
+				*width = cur_width;
+		}
+		pdev = pci_upstream_bridge(pdev);
+	}
+}
+
 /**
 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
 *
@ -3621,8 +3662,8 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 {
 	struct pci_dev *pdev;
-	enum pci_bus_speed speed_cap;
-	enum pcie_link_width link_width;
+	enum pci_bus_speed speed_cap, platform_speed_cap;
+	enum pcie_link_width platform_link_width;

 	if (amdgpu_pcie_gen_cap)
 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@ -3639,6 +3680,12 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 		return;
 	}

+	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
+		return;
+
+	amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
+					      &platform_link_width);
+
 	if (adev->pm.pcie_gen_mask == 0) {
 		/* asic caps */
 		pdev = adev->pdev;
@ -3664,22 +3711,20 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
 		}
 		/* platform caps */
-		pdev = adev->ddev->pdev->bus->self;
-		speed_cap = pcie_get_speed_cap(pdev);
-		if (speed_cap == PCI_SPEED_UNKNOWN) {
+		if (platform_speed_cap == PCI_SPEED_UNKNOWN) {
 			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
 						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
 		} else {
-			if (speed_cap == PCIE_SPEED_16_0GT)
+			if (platform_speed_cap == PCIE_SPEED_16_0GT)
 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
-			else if (speed_cap == PCIE_SPEED_8_0GT)
+			else if (platform_speed_cap == PCIE_SPEED_8_0GT)
 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
-			else if (speed_cap == PCIE_SPEED_5_0GT)
+			else if (platform_speed_cap == PCIE_SPEED_5_0GT)
 				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
 							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
 			else
@ -3688,12 +3733,10 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 		}
 	}
 	if (adev->pm.pcie_mlw_mask == 0) {
-		pdev = adev->ddev->pdev->bus->self;
-		link_width = pcie_get_width_cap(pdev);
-		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+		if (platform_link_width == PCIE_LNK_WIDTH_UNKNOWN) {
 			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		} else {
-			switch (link_width) {
+			switch (platform_link_width) {
 			case PCIE_LNK_X32:
 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@ -51,14 +51,7 @@ struct amdgpu_doorbell_index {
 	uint32_t userqueue_start;
 	uint32_t userqueue_end;
 	uint32_t gfx_ring0;
-	uint32_t sdma_engine0;
-	uint32_t sdma_engine1;
-	uint32_t sdma_engine2;
-	uint32_t sdma_engine3;
-	uint32_t sdma_engine4;
-	uint32_t sdma_engine5;
-	uint32_t sdma_engine6;
-	uint32_t sdma_engine7;
+	uint32_t sdma_engine[8];
 	uint32_t ih;
 	union {
 		struct {
@ -78,7 +71,11 @@ struct amdgpu_doorbell_index {
 			uint32_t vce_ring6_7;
 		} uvd_vce;
 	};
+	uint32_t first_non_cp;
+	uint32_t last_non_cp;
 	uint32_t max_assignment;
+	/* Per engine SDMA doorbell size in dword */
+	uint32_t sdma_doorbell_range;
 };

 typedef enum _AMDGPU_DOORBELL_ASSIGNMENT
@ -148,6 +145,10 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
 	AMDGPU_VEGA20_DOORBELL64_VCE_RING2_3             = 0x18D,
 	AMDGPU_VEGA20_DOORBELL64_VCE_RING4_5             = 0x18E,
 	AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7             = 0x18F,
+
+	AMDGPU_VEGA20_DOORBELL64_FIRST_NON_CP            = AMDGPU_VEGA20_DOORBELL_sDMA_ENGINE0,
+	AMDGPU_VEGA20_DOORBELL64_LAST_NON_CP             = AMDGPU_VEGA20_DOORBELL64_VCE_RING6_7,
+
 	AMDGPU_VEGA20_DOORBELL_MAX_ASSIGNMENT            = 0x18F,
 	AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF
 } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;
@ -227,6 +228,9 @@ typedef enum _AMDGPU_DOORBELL64_ASSIGNMENT
 	AMDGPU_DOORBELL64_VCE_RING4_5             = 0xFE,
 	AMDGPU_DOORBELL64_VCE_RING6_7             = 0xFF,

+	AMDGPU_DOORBELL64_FIRST_NON_CP            = AMDGPU_DOORBELL64_sDMA_ENGINE0,
+	AMDGPU_DOORBELL64_LAST_NON_CP             = AMDGPU_DOORBELL64_VCE_RING6_7,
+
 	AMDGPU_DOORBELL64_MAX_ASSIGNMENT          = 0xFF,
 	AMDGPU_DOORBELL64_INVALID                 = 0xFFFF
 } AMDGPU_DOORBELL64_ASSIGNMENT;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@ -184,61 +184,6 @@ u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev)
 	return vrefresh;
 }

-void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
-			      u32 *p, u32 *u)
-{
-	u32 b_c = 0;
-	u32 i_c;
-	u32 tmp;
-
-	i_c = (i * r_c) / 100;
-	tmp = i_c >> p_b;
-
-	while (tmp) {
-		b_c++;
-		tmp >>= 1;
-	}
-
-	*u = (b_c + 1) / 2;
-	*p = i_c / (1 << (2 * (*u)));
-}
-
-int amdgpu_calculate_at(u32 t, u32 h, u32 fh, u32 fl, u32 *tl, u32 *th)
-{
-	u32 k, a, ah, al;
-	u32 t1;
-
-	if ((fl == 0) || (fh == 0) || (fl > fh))
-		return -EINVAL;
-
-	k = (100 * fh) / fl;
-	t1 = (t * (k - 100));
-	a = (1000 * (100 * h + t1)) / (10000 + (t1 / 100));
-	a = (a + 5) / 10;
-	ah = ((a * t) + 5000) / 10000;
-	al = a - ah;
-
-	*th = t - ah;
-	*tl = t + al;
-
-	return 0;
-}
-
-bool amdgpu_is_uvd_state(u32 class, u32 class2)
-{
-	if (class & ATOM_PPLIB_CLASSIFICATION_UVDSTATE)
-		return true;
-	if (class & ATOM_PPLIB_CLASSIFICATION_HD2STATE)
-		return true;
-	if (class & ATOM_PPLIB_CLASSIFICATION_HDSTATE)
-		return true;
-	if (class & ATOM_PPLIB_CLASSIFICATION_SDSTATE)
-		return true;
-	if (class2 & ATOM_PPLIB_CLASSIFICATION2_MVC)
-		return true;
-	return false;
-}
-
 bool amdgpu_is_internal_thermal_sensor(enum amdgpu_int_thermal_type sensor)
 {
 	switch (sensor) {
@ -949,39 +894,6 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
 	return AMDGPU_PCIE_GEN1;
 }

-u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev,
-				 u16 asic_lanes,
-				 u16 default_lanes)
-{
-	switch (asic_lanes) {
-	case 0:
-	default:
-		return default_lanes;
-	case 1:
-		return 1;
-	case 2:
-		return 2;
-	case 4:
-		return 4;
-	case 8:
-		return 8;
-	case 12:
-		return 12;
-	case 16:
-		return 16;
-	}
-}
-
-u8 amdgpu_encode_pci_lane_width(u32 lanes)
-{
-	u8 encoded_lanes[] = { 0, 1, 2, 0, 3, 0, 0, 0, 4, 0, 0, 0, 5, 0, 0, 0, 6 };
-
-	if (lanes > 16)
-		return 0;
-
-	return encoded_lanes[lanes];
-}
-
 struct amd_vce_state*
 amdgpu_get_vce_clock_state(void *handle, u32 idx)
 {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@ -364,6 +364,14 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->enable_mgpu_fan_boost(\
 			(adev)->powerplay.pp_handle))

+#define amdgpu_dpm_get_ppfeature_status(adev, buf) \
+		((adev)->powerplay.pp_funcs->get_ppfeature_status(\
+			(adev)->powerplay.pp_handle, (buf)))
+
+#define amdgpu_dpm_set_ppfeature_status(adev, ppfeatures) \
+		((adev)->powerplay.pp_funcs->set_ppfeature_status(\
+			(adev)->powerplay.pp_handle, (ppfeatures)))
+
 struct amdgpu_dpm {
 	struct amdgpu_ps        *ps;
 	/* number of valid power states */
@ -478,10 +486,6 @@ void amdgpu_dpm_print_ps_status(struct amdgpu_device *adev,
 u32 amdgpu_dpm_get_vblank_time(struct amdgpu_device *adev);
 u32 amdgpu_dpm_get_vrefresh(struct amdgpu_device *adev);
 void amdgpu_dpm_get_active_displays(struct amdgpu_device *adev);
-bool amdgpu_is_uvd_state(u32 class, u32 class2);
-void amdgpu_calculate_u_and_p(u32 i, u32 r_c, u32 p_b,
-			      u32 *p, u32 *u);
-int amdgpu_calculate_at(u32 t, u32 h, u32 fh, u32 fl, u32 *tl, u32 *th);

 bool amdgpu_is_internal_thermal_sensor(enum amdgpu_int_thermal_type sensor);

@ -497,11 +501,6 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
 						 enum amdgpu_pcie_gen asic_gen,
 						 enum amdgpu_pcie_gen default_gen);

-u16 amdgpu_get_pcie_lane_support(struct amdgpu_device *adev,
-				 u16 asic_lanes,
-				 u16 default_lanes);
-u8 amdgpu_encode_pci_lane_width(u32 lanes);
-
 struct amd_vce_state*
 amdgpu_get_vce_clock_state(void *handle, u32 idx);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@ -32,7 +32,7 @@
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
-#include <drm/drm_crtc_helper.h>
+#include <drm/drm_probe_helper.h>

 #include "amdgpu.h"
 #include "amdgpu_irq.h"
@ -71,9 +71,12 @@
 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
 * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
+ * - 3.28.0 - Add AMDGPU_CHUNK_ID_SCHEDULED_DEPENDENCIES
+ * - 3.29.0 - Add AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID
+ * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
 */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	27
+#define KMS_DRIVER_MINOR	30
 #define KMS_DRIVER_PATCHLEVEL	0

 int amdgpu_vram_limit = 0;
@ -1176,6 +1179,22 @@ static const struct file_operations amdgpu_driver_kms_fops = {
 #endif
 };

+int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
+{
+        struct drm_file *file;
+
+	if (!filp)
+		return -EINVAL;
+
+	if (filp->f_op != &amdgpu_driver_kms_fops) {
+		return -EINVAL;
+	}
+
+	file = filp->private_data;
+	*fpriv = file->driver_priv;
+	return 0;
+}
+
 static bool
 amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 				 bool in_vblank_irq, int *vpos, int *hpos,
@ -1189,7 +1208,7 @@ amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP | DRIVER_ATOMIC |
-	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
+	    DRIVER_GEM |
 	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET | DRIVER_SYNCOBJ,
 	.load = amdgpu_driver_load_kms,
 	.open = amdgpu_driver_open_kms,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@ -37,6 +37,8 @@ struct amdgpu_gds {
 	struct amdgpu_gds_asic_info	mem;
 	struct amdgpu_gds_asic_info	gws;
 	struct amdgpu_gds_asic_info	oa;
+	uint32_t			gds_compute_max_wave_id;
+
 	/* At present, GDS, GWS and OA resources for gfx (graphics)
 	 * is always pre-allocated and available for graphics operation.
 	 * Such resource is shared between all gfx clients.
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@ -54,10 +54,6 @@ int amdgpu_gem_object_create(struct amdgpu_device *adev, unsigned long size,

 	memset(&bp, 0, sizeof(bp));
 	*obj = NULL;
-	/* At least align on page size */
-	if (alignment < PAGE_SIZE) {
-		alignment = PAGE_SIZE;
-	}

 	bp.size = size;
 	bp.byte_align = alignment;
@ -244,9 +240,6 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,
 			return -EINVAL;
 		}
 		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
-		/* GDS allocations must be DW aligned */
-		if (args->in.domains & AMDGPU_GEM_DOMAIN_GDS)
-			size = ALIGN(size, 4);
 	}

 	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@ -202,12 +202,12 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 			amdgpu_asic_flush_hdp(adev, ring);
 	}

+	if (need_ctx_switch)
+		status |= AMDGPU_HAVE_CTX_SWITCH;
+
 	skip_preamble = ring->current_ctx == fence_ctx;
 	if (job && ring->funcs->emit_cntxcntl) {
-		if (need_ctx_switch)
-			status |= AMDGPU_HAVE_CTX_SWITCH;
 		status |= job->preamble_status;
-
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}

@ -221,8 +221,8 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 			!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
 			continue;

-		amdgpu_ring_emit_ib(ring, job, ib, need_ctx_switch);
-		need_ctx_switch = false;
+		amdgpu_ring_emit_ib(ring, job, ib, status);
+		status &= ~AMDGPU_HAVE_CTX_SWITCH;
 	}

 	if (ring->funcs->emit_tmz)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@ -52,6 +52,8 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 	ih->use_bus_addr = use_bus_addr;

 	if (use_bus_addr) {
+		dma_addr_t dma_addr;
+
 		if (ih->ring)
 			return 0;

@ -59,21 +61,26 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 		 * add them to the end of the ring allocation.
 		 */
 		ih->ring = dma_alloc_coherent(adev->dev, ih->ring_size + 8,
-					      &ih->rb_dma_addr, GFP_KERNEL);
+					      &dma_addr, GFP_KERNEL);
 		if (ih->ring == NULL)
 			return -ENOMEM;

 		memset((void *)ih->ring, 0, ih->ring_size + 8);
-		ih->wptr_offs = (ih->ring_size / 4) + 0;
-		ih->rptr_offs = (ih->ring_size / 4) + 1;
+		ih->gpu_addr = dma_addr;
+		ih->wptr_addr = dma_addr + ih->ring_size;
+		ih->wptr_cpu = &ih->ring[ih->ring_size / 4];
+		ih->rptr_addr = dma_addr + ih->ring_size + 4;
+		ih->rptr_cpu = &ih->ring[(ih->ring_size / 4) + 1];
 	} else {
-		r = amdgpu_device_wb_get(adev, &ih->wptr_offs);
+		unsigned wptr_offs, rptr_offs;
+
+		r = amdgpu_device_wb_get(adev, &wptr_offs);
 		if (r)
 			return r;

-		r = amdgpu_device_wb_get(adev, &ih->rptr_offs);
+		r = amdgpu_device_wb_get(adev, &rptr_offs);
 		if (r) {
-			amdgpu_device_wb_free(adev, ih->wptr_offs);
+			amdgpu_device_wb_free(adev, wptr_offs);
 			return r;
 		}

@ -82,10 +89,15 @@ int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 					    &ih->ring_obj, &ih->gpu_addr,
 					    (void **)&ih->ring);
 		if (r) {
-			amdgpu_device_wb_free(adev, ih->rptr_offs);
-			amdgpu_device_wb_free(adev, ih->wptr_offs);
+			amdgpu_device_wb_free(adev, rptr_offs);
+			amdgpu_device_wb_free(adev, wptr_offs);
 			return r;
 		}
+
+		ih->wptr_addr = adev->wb.gpu_addr + wptr_offs * 4;
+		ih->wptr_cpu = &adev->wb.wb[wptr_offs];
+		ih->rptr_addr = adev->wb.gpu_addr + rptr_offs * 4;
+		ih->rptr_cpu = &adev->wb.wb[rptr_offs];
 	}
 	return 0;
 }
@ -109,13 +121,13 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 		 * add them to the end of the ring allocation.
 		 */
 		dma_free_coherent(adev->dev, ih->ring_size + 8,
-				  (void *)ih->ring, ih->rb_dma_addr);
+				  (void *)ih->ring, ih->gpu_addr);
 		ih->ring = NULL;
 	} else {
 		amdgpu_bo_free_kernel(&ih->ring_obj, &ih->gpu_addr,
 				      (void **)&ih->ring);
-		amdgpu_device_wb_free(adev, ih->wptr_offs);
-		amdgpu_device_wb_free(adev, ih->rptr_offs);
+		amdgpu_device_wb_free(adev, (ih->wptr_addr - ih->gpu_addr) / 4);
+		amdgpu_device_wb_free(adev, (ih->rptr_addr - ih->gpu_addr) / 4);
 	}
 }

@ -128,16 +140,14 @@ void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 * Interrupt hander (VI), walk the IH ring.
 * Returns irq process return code.
 */
-int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
-		      void (*callback)(struct amdgpu_device *adev,
-				       struct amdgpu_ih_ring *ih))
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih)
 {
 	u32 wptr;

 	if (!ih->enabled || adev->shutdown)
 		return IRQ_NONE;

-	wptr = amdgpu_ih_get_wptr(adev);
+	wptr = amdgpu_ih_get_wptr(adev, ih);

 restart_ih:
 	/* is somebody else already processing irqs? */
@ -150,15 +160,15 @@ restart_ih:
 	rmb();

 	while (ih->rptr != wptr) {
-		callback(adev, ih);
+		amdgpu_irq_dispatch(adev, ih);
 		ih->rptr &= ih->ptr_mask;
 	}

-	amdgpu_ih_set_rptr(adev);
+	amdgpu_ih_set_rptr(adev, ih);
 	atomic_set(&ih->lock, 0);

 	/* make sure wptr hasn't changed while processing */
-	wptr = amdgpu_ih_get_wptr(adev);
+	wptr = amdgpu_ih_get_wptr(adev, ih);
 	if (wptr != ih->rptr)
 		goto restart_ih;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.h
@ -31,40 +31,44 @@ struct amdgpu_iv_entry;
 * R6xx+ IH ring
 */
 struct amdgpu_ih_ring {
-	struct amdgpu_bo	*ring_obj;
-	volatile uint32_t	*ring;
-	unsigned		rptr;
 	unsigned		ring_size;
-	uint64_t		gpu_addr;
 	uint32_t		ptr_mask;
-	atomic_t		lock;
-	bool                    enabled;
-	unsigned		wptr_offs;
-	unsigned		rptr_offs;
 	u32			doorbell_index;
 	bool			use_doorbell;
 	bool			use_bus_addr;
-	dma_addr_t		rb_dma_addr; /* only used when use_bus_addr = true */
+
+	struct amdgpu_bo	*ring_obj;
+	volatile uint32_t	*ring;
+	uint64_t		gpu_addr;
+
+	uint64_t		wptr_addr;
+	volatile uint32_t	*wptr_cpu;
+
+	uint64_t		rptr_addr;
+	volatile uint32_t	*rptr_cpu;
+
+	bool                    enabled;
+	unsigned		rptr;
+	atomic_t		lock;
 };

 /* provided by the ih block */
 struct amdgpu_ih_funcs {
 	/* ring read/write ptr handling, called from interrupt context */
-	u32 (*get_wptr)(struct amdgpu_device *adev);
-	void (*decode_iv)(struct amdgpu_device *adev,
+	u32 (*get_wptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
+	void (*decode_iv)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 			  struct amdgpu_iv_entry *entry);
-	void (*set_rptr)(struct amdgpu_device *adev);
+	void (*set_rptr)(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
 };

-#define amdgpu_ih_get_wptr(adev) (adev)->irq.ih_funcs->get_wptr((adev))
-#define amdgpu_ih_decode_iv(adev, iv) (adev)->irq.ih_funcs->decode_iv((adev), (iv))
-#define amdgpu_ih_set_rptr(adev) (adev)->irq.ih_funcs->set_rptr((adev))
+#define amdgpu_ih_get_wptr(adev, ih) (adev)->irq.ih_funcs->get_wptr((adev), (ih))
+#define amdgpu_ih_decode_iv(adev, iv) \
+	(adev)->irq.ih_funcs->decode_iv((adev), (ih), (iv))
+#define amdgpu_ih_set_rptr(adev, ih) (adev)->irq.ih_funcs->set_rptr((adev), (ih))

 int amdgpu_ih_ring_init(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
 			unsigned ring_size, bool use_bus_addr);
 void amdgpu_ih_ring_fini(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);
-int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih,
-		      void (*callback)(struct amdgpu_device *adev,
-				       struct amdgpu_ih_ring *ih));
+int amdgpu_ih_process(struct amdgpu_device *adev, struct amdgpu_ih_ring *ih);

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@ -130,27 +130,6 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 	spin_unlock_irqrestore(&adev->irq.lock, irqflags);
 }

-/**
- * amdgpu_irq_callback - callback from the IH ring
- *
- * @adev: amdgpu device pointer
- * @ih: amdgpu ih ring
- *
- * Callback from IH ring processing to handle the entry at the current position
- * and advance the read pointer.
- */
-static void amdgpu_irq_callback(struct amdgpu_device *adev,
-				struct amdgpu_ih_ring *ih)
-{
-	u32 ring_index = ih->rptr >> 2;
-	struct amdgpu_iv_entry entry;
-
-	entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
-	amdgpu_ih_decode_iv(adev, &entry);
-
-	amdgpu_irq_dispatch(adev, &entry);
-}
-
 /**
 * amdgpu_irq_handler - IRQ handler
 *
@ -168,12 +147,42 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
 	struct amdgpu_device *adev = dev->dev_private;
 	irqreturn_t ret;

-	ret = amdgpu_ih_process(adev, &adev->irq.ih, amdgpu_irq_callback);
+	ret = amdgpu_ih_process(adev, &adev->irq.ih);
 	if (ret == IRQ_HANDLED)
 		pm_runtime_mark_last_busy(dev->dev);
 	return ret;
 }

+/**
+ * amdgpu_irq_handle_ih1 - kick of processing for IH1
+ *
+ * @work: work structure in struct amdgpu_irq
+ *
+ * Kick of processing IH ring 1.
+ */
+static void amdgpu_irq_handle_ih1(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  irq.ih1_work);
+
+	amdgpu_ih_process(adev, &adev->irq.ih1);
+}
+
+/**
+ * amdgpu_irq_handle_ih2 - kick of processing for IH2
+ *
+ * @work: work structure in struct amdgpu_irq
+ *
+ * Kick of processing IH ring 2.
+ */
+static void amdgpu_irq_handle_ih2(struct work_struct *work)
+{
+	struct amdgpu_device *adev = container_of(work, struct amdgpu_device,
+						  irq.ih2_work);
+
+	amdgpu_ih_process(adev, &adev->irq.ih2);
+}
+
 /**
 * amdgpu_msi_ok - check whether MSI functionality is enabled
 *
@ -238,6 +247,9 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 				amdgpu_hotplug_work_func);
 	}

+	INIT_WORK(&adev->irq.ih1_work, amdgpu_irq_handle_ih1);
+	INIT_WORK(&adev->irq.ih2_work, amdgpu_irq_handle_ih2);
+
 	adev->irq.installed = true;
 	r = drm_irq_install(adev->ddev, adev->ddev->pdev->irq);
 	if (r) {
@ -359,15 +371,22 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
 * Dispatches IRQ to IP blocks.
 */
 void amdgpu_irq_dispatch(struct amdgpu_device *adev,
-			 struct amdgpu_iv_entry *entry)
+			 struct amdgpu_ih_ring *ih)
 {
-	unsigned client_id = entry->client_id;
-	unsigned src_id = entry->src_id;
+	u32 ring_index = ih->rptr >> 2;
+	struct amdgpu_iv_entry entry;
+	unsigned client_id, src_id;
 	struct amdgpu_irq_src *src;
 	bool handled = false;
 	int r;

-	trace_amdgpu_iv(entry);
+	entry.iv_entry = (const uint32_t *)&ih->ring[ring_index];
+	amdgpu_ih_decode_iv(adev, &entry);
+
+	trace_amdgpu_iv(ih - &adev->irq.ih, &entry);
+
+	client_id = entry.client_id;
+	src_id = entry.src_id;

 	if (client_id >= AMDGPU_IRQ_CLIENTID_MAX) {
 		DRM_DEBUG("Invalid client_id in IV: %d\n", client_id);
@ -383,7 +402,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
 			  client_id, src_id);

 	} else if ((src = adev->irq.client[client_id].sources[src_id])) {
-		r = src->funcs->process(adev, src, entry);
+		r = src->funcs->process(adev, src, &entry);
 		if (r < 0)
 			DRM_ERROR("error processing interrupt (%d)\n", r);
 		else if (r)
@ -395,7 +414,7 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,

 	/* Send it to amdkfd as well if it isn't already handled */
 	if (!handled)
-		amdgpu_amdkfd_interrupt(adev, entry->iv_entry);
+		amdgpu_amdkfd_interrupt(adev, entry.iv_entry);
 }

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@ -87,9 +87,11 @@ struct amdgpu_irq {
 	/* status, etc. */
 	bool				msi_enabled; /* msi enabled */

-	/* interrupt ring */
-	struct amdgpu_ih_ring		ih;
-	const struct amdgpu_ih_funcs	*ih_funcs;
+	/* interrupt rings */
+	struct amdgpu_ih_ring		ih, ih1, ih2;
+	const struct amdgpu_ih_funcs    *ih_funcs;
+	struct work_struct		ih1_work, ih2_work;
+	struct amdgpu_irq_src		self_irq;

 	/* gen irq stuff */
 	struct irq_domain		*domain; /* GPU irq controller domain */
@ -106,7 +108,7 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
 		      unsigned client_id, unsigned src_id,
 		      struct amdgpu_irq_src *source);
 void amdgpu_irq_dispatch(struct amdgpu_device *adev,
-			 struct amdgpu_iv_entry *entry);
+			 struct amdgpu_ih_ring *ih);
 int amdgpu_irq_update(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		      unsigned type);
 int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@ -207,7 +207,7 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	if (!r) {
 		acpi_status = amdgpu_acpi_init(adev);
 		if (acpi_status)
-		dev_dbg(&dev->pdev->dev,
+			dev_dbg(&dev->pdev->dev,
 				"Error during ACPI methods call\n");
 	}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mode.h
@ -38,6 +38,7 @@
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_plane_helper.h>
+#include <drm/drm_probe_helper.h>
 #include <linux/i2c.h>
 #include <linux/i2c-algo-bit.h>
 #include <linux/hrtimer.h>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@ -426,12 +426,20 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 	size_t acc_size;
 	int r;

-	page_align = roundup(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
-	if (bp->domain & (AMDGPU_GEM_DOMAIN_GDS | AMDGPU_GEM_DOMAIN_GWS |
-			  AMDGPU_GEM_DOMAIN_OA))
+	/* Note that GDS/GWS/OA allocates 1 page per byte/resource. */
+	if (bp->domain & (AMDGPU_GEM_DOMAIN_GWS | AMDGPU_GEM_DOMAIN_OA)) {
+		/* GWS and OA don't need any alignment. */
+		page_align = bp->byte_align;
 		size <<= PAGE_SHIFT;
-	else
+	} else if (bp->domain & AMDGPU_GEM_DOMAIN_GDS) {
+		/* Both size and alignment must be a multiple of 4. */
+		page_align = ALIGN(bp->byte_align, 4);
+		size = ALIGN(size, 4) << PAGE_SHIFT;
+	} else {
+		/* Memory should be aligned at least to a page size. */
+		page_align = ALIGN(bp->byte_align, PAGE_SIZE) >> PAGE_SHIFT;
 		size = ALIGN(size, PAGE_SIZE);
+	}

 	if (!amdgpu_bo_validate_size(adev, size, bp->domain))
 		return -ENOMEM;
@ -1276,6 +1284,30 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		reservation_object_add_excl_fence(resv, fence);
 }

+/**
+ * amdgpu_sync_wait_resv - Wait for BO reservation fences
+ *
+ * @bo: buffer object
+ * @owner: fence owner
+ * @intr: Whether the wait is interruptible
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct amdgpu_sync sync;
+	int r;
+
+	amdgpu_sync_create(&sync);
+	amdgpu_sync_resv(adev, &sync, bo->tbo.resv, owner, false);
+	r = amdgpu_sync_wait(&sync, intr);
+	amdgpu_sync_free(&sync);
+
+	return r;
+}
+
 /**
 * amdgpu_bo_gpu_offset - return GPU offset of bo
 * @bo:	amdgpu object for which we query the offset
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@ -266,6 +266,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 		     bool shared);
+int amdgpu_bo_sync_wait(struct amdgpu_bo *bo, void *owner, bool intr);
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo);
 int amdgpu_bo_validate(struct amdgpu_bo *bo);
 int amdgpu_bo_restore_shadow(struct amdgpu_bo *shadow,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@ -626,11 +626,71 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 }

 /**
- * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+ * DOC: ppfeatures
+ *
+ * The amdgpu driver provides a sysfs API for adjusting what powerplay
+ * features to be enabled. The file ppfeatures is used for this. And
+ * this is only available for Vega10 and later dGPUs.
+ *
+ * Reading back the file will show you the followings:
+ * - Current ppfeature masks
+ * - List of the all supported powerplay features with their naming,
+ *   bitmasks and enablement status('Y'/'N' means "enabled"/"disabled").
+ *
+ * To manually enable or disable a specific feature, just set or clear
+ * the corresponding bit from original ppfeature masks and input the
+ * new ppfeature masks.
+ */
+static ssize_t amdgpu_set_ppfeature_status(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint64_t featuremask;
+	int ret;
+
+	ret = kstrtou64(buf, 0, &featuremask);
+	if (ret)
+		return -EINVAL;
+
+	pr_debug("featuremask = 0x%llx\n", featuremask);
+
+	if (adev->powerplay.pp_funcs->set_ppfeature_status) {
+		ret = amdgpu_dpm_set_ppfeature_status(adev, featuremask);
+		if (ret)
+			return -EINVAL;
+	}
+
+	return count;
+}
+
+static ssize_t amdgpu_get_ppfeature_status(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	if (adev->powerplay.pp_funcs->get_ppfeature_status)
+		return amdgpu_dpm_get_ppfeature_status(adev, buf);
+
+	return snprintf(buf, PAGE_SIZE, "\n");
+}
+
+/**
+ * DOC: pp_dpm_sclk pp_dpm_mclk pp_dpm_socclk pp_dpm_fclk pp_dpm_dcefclk
+ * pp_dpm_pcie
 *
 * The amdgpu driver provides a sysfs API for adjusting what power levels
 * are enabled for a given power state.  The files pp_dpm_sclk, pp_dpm_mclk,
- * and pp_dpm_pcie are used for this.
+ * pp_dpm_socclk, pp_dpm_fclk, pp_dpm_dcefclk and pp_dpm_pcie are used for
+ * this.
+ *
+ * pp_dpm_socclk and pp_dpm_dcefclk interfaces are only available for
+ * Vega10 and later ASICs.
+ * pp_dpm_fclk interface is only available for Vega20 and later ASICs.
 *
 * Reading back the files will show you the available power levels within
 * the power state and the clock information for those levels.
@ -640,6 +700,8 @@ static ssize_t amdgpu_get_pp_od_clk_voltage(struct device *dev,
 * Secondly,Enter a new value for each level by inputing a string that
 * contains " echo xx xx xx > pp_dpm_sclk/mclk/pcie"
 * E.g., echo 4 5 6 to > pp_dpm_sclk will enable sclk levels 4, 5, and 6.
+ *
+ * NOTE: change to the dcefclk max dpm level is not supported now
 */

 static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
@ -750,6 +812,114 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	return count;
 }

+static ssize_t amdgpu_get_pp_dpm_socclk(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	if (adev->powerplay.pp_funcs->print_clock_levels)
+		return amdgpu_dpm_print_clock_levels(adev, PP_SOCCLK, buf);
+	else
+		return snprintf(buf, PAGE_SIZE, "\n");
+}
+
+static ssize_t amdgpu_set_pp_dpm_socclk(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	uint32_t mask = 0;
+
+	ret = amdgpu_read_mask(buf, count, &mask);
+	if (ret)
+		return ret;
+
+	if (adev->powerplay.pp_funcs->force_clock_level)
+		ret = amdgpu_dpm_force_clock_level(adev, PP_SOCCLK, mask);
+
+	if (ret)
+		return -EINVAL;
+
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_dpm_fclk(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	if (adev->powerplay.pp_funcs->print_clock_levels)
+		return amdgpu_dpm_print_clock_levels(adev, PP_FCLK, buf);
+	else
+		return snprintf(buf, PAGE_SIZE, "\n");
+}
+
+static ssize_t amdgpu_set_pp_dpm_fclk(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	uint32_t mask = 0;
+
+	ret = amdgpu_read_mask(buf, count, &mask);
+	if (ret)
+		return ret;
+
+	if (adev->powerplay.pp_funcs->force_clock_level)
+		ret = amdgpu_dpm_force_clock_level(adev, PP_FCLK, mask);
+
+	if (ret)
+		return -EINVAL;
+
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_dpm_dcefclk(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+
+	if (adev->powerplay.pp_funcs->print_clock_levels)
+		return amdgpu_dpm_print_clock_levels(adev, PP_DCEFCLK, buf);
+	else
+		return snprintf(buf, PAGE_SIZE, "\n");
+}
+
+static ssize_t amdgpu_set_pp_dpm_dcefclk(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	uint32_t mask = 0;
+
+	ret = amdgpu_read_mask(buf, count, &mask);
+	if (ret)
+		return ret;
+
+	if (adev->powerplay.pp_funcs->force_clock_level)
+		ret = amdgpu_dpm_force_clock_level(adev, PP_DCEFCLK, mask);
+
+	if (ret)
+		return -EINVAL;
+
+	return count;
+}
+
 static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,
 		struct device_attribute *attr,
 		char *buf)
@ -990,6 +1160,31 @@ static ssize_t amdgpu_get_busy_percent(struct device *dev,
 	return snprintf(buf, PAGE_SIZE, "%d\n", value);
 }

+/**
+ * DOC: pcie_bw
+ *
+ * The amdgpu driver provides a sysfs API for estimating how much data
+ * has been received and sent by the GPU in the last second through PCIe.
+ * The file pcie_bw is used for this.
+ * The Perf counters count the number of received and sent messages and return
+ * those values, as well as the maximum payload size of a PCIe packet (mps).
+ * Note that it is not possible to easily and quickly obtain the size of each
+ * packet transmitted, so we output the max payload size (mps) to allow for
+ * quick estimation of the PCIe bandwidth usage
+ */
+static ssize_t amdgpu_get_pcie_bw(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint64_t count0, count1;
+
+	amdgpu_asic_get_pcie_usage(adev, &count0, &count1);
+	return snprintf(buf, PAGE_SIZE,	"%llu %llu %i\n",
+			count0, count1, pcie_get_mps(adev->pdev));
+}
+
 static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
 static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
 		   amdgpu_get_dpm_forced_performance_level,
@ -1008,6 +1203,15 @@ static DEVICE_ATTR(pp_dpm_sclk, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_dpm_mclk,
 		amdgpu_set_pp_dpm_mclk);
+static DEVICE_ATTR(pp_dpm_socclk, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_dpm_socclk,
+		amdgpu_set_pp_dpm_socclk);
+static DEVICE_ATTR(pp_dpm_fclk, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_dpm_fclk,
+		amdgpu_set_pp_dpm_fclk);
+static DEVICE_ATTR(pp_dpm_dcefclk, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_dpm_dcefclk,
+		amdgpu_set_pp_dpm_dcefclk);
 static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_dpm_pcie,
 		amdgpu_set_pp_dpm_pcie);
@ -1025,6 +1229,10 @@ static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
 		amdgpu_set_pp_od_clk_voltage);
 static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
 		amdgpu_get_busy_percent, NULL);
+static DEVICE_ATTR(pcie_bw, S_IRUGO, amdgpu_get_pcie_bw, NULL);
+static DEVICE_ATTR(ppfeatures, S_IRUGO | S_IWUSR,
+		amdgpu_get_ppfeature_status,
+		amdgpu_set_ppfeature_status);

 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@ -1516,6 +1724,75 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 	return count;
 }

+static ssize_t amdgpu_hwmon_show_sclk(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
+	uint32_t sclk;
+	int r, size = sizeof(sclk);
+
+	/* Can't get voltage when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+	      return -EINVAL;
+
+	/* get the sclk */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_SCLK,
+				   (void *)&sclk, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", sclk * 10 * 1000);
+}
+
+static ssize_t amdgpu_hwmon_show_sclk_label(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "sclk\n");
+}
+
+static ssize_t amdgpu_hwmon_show_mclk(struct device *dev,
+				      struct device_attribute *attr,
+				      char *buf)
+{
+	struct amdgpu_device *adev = dev_get_drvdata(dev);
+	struct drm_device *ddev = adev->ddev;
+	uint32_t mclk;
+	int r, size = sizeof(mclk);
+
+	/* Can't get voltage when the card is off */
+	if  ((adev->flags & AMD_IS_PX) &&
+	     (ddev->switch_power_state != DRM_SWITCH_POWER_ON))
+		return -EINVAL;
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+	      return -EINVAL;
+
+	/* get the sclk */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GFX_MCLK,
+				   (void *)&mclk, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", mclk * 10 * 1000);
+}
+
+static ssize_t amdgpu_hwmon_show_mclk_label(struct device *dev,
+					    struct device_attribute *attr,
+					    char *buf)
+{
+	return snprintf(buf, PAGE_SIZE, "mclk\n");
+}

 /**
 * DOC: hwmon
@ -1532,6 +1809,10 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 *
 * - GPU fan
 *
+ * - GPU gfx/compute engine clock
+ *
+ * - GPU memory clock (dGPU only)
+ *
 * hwmon interfaces for GPU temperature:
 *
 * - temp1_input: the on die GPU temperature in millidegrees Celsius
@ -1576,6 +1857,12 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 *
 * - fan[1-*]_enable: Enable or disable the sensors.1: Enable 0: Disable
 *
+ * hwmon interfaces for GPU clocks:
+ *
+ * - freq1_input: the gfx/compute clock in hertz
+ *
+ * - freq2_input: the memory clock in hertz
+ *
 * You can use hwmon tools like sensors to view this information on your system.
 *
 */
@ -1600,6 +1887,10 @@ static SENSOR_DEVICE_ATTR(power1_average, S_IRUGO, amdgpu_hwmon_show_power_avg,
 static SENSOR_DEVICE_ATTR(power1_cap_max, S_IRUGO, amdgpu_hwmon_show_power_cap_max, NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_cap_min, S_IRUGO, amdgpu_hwmon_show_power_cap_min, NULL, 0);
 static SENSOR_DEVICE_ATTR(power1_cap, S_IRUGO | S_IWUSR, amdgpu_hwmon_show_power_cap, amdgpu_hwmon_set_power_cap, 0);
+static SENSOR_DEVICE_ATTR(freq1_input, S_IRUGO, amdgpu_hwmon_show_sclk, NULL, 0);
+static SENSOR_DEVICE_ATTR(freq1_label, S_IRUGO, amdgpu_hwmon_show_sclk_label, NULL, 0);
+static SENSOR_DEVICE_ATTR(freq2_input, S_IRUGO, amdgpu_hwmon_show_mclk, NULL, 0);
+static SENSOR_DEVICE_ATTR(freq2_label, S_IRUGO, amdgpu_hwmon_show_mclk_label, NULL, 0);

 static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_temp1_input.dev_attr.attr,
@ -1622,6 +1913,10 @@ static struct attribute *hwmon_attributes[] = {
 	&sensor_dev_attr_power1_cap_max.dev_attr.attr,
 	&sensor_dev_attr_power1_cap_min.dev_attr.attr,
 	&sensor_dev_attr_power1_cap.dev_attr.attr,
+	&sensor_dev_attr_freq1_input.dev_attr.attr,
+	&sensor_dev_attr_freq1_label.dev_attr.attr,
+	&sensor_dev_attr_freq2_input.dev_attr.attr,
+	&sensor_dev_attr_freq2_label.dev_attr.attr,
 	NULL
 };

@ -1713,6 +2008,12 @@ static umode_t hwmon_attributes_visible(struct kobject *kobj,
 	     attr == &sensor_dev_attr_in1_label.dev_attr.attr))
 		return 0;

+	/* no mclk on APUs */
+	if ((adev->flags & AMD_IS_APU) &&
+	    (attr == &sensor_dev_attr_freq2_input.dev_attr.attr ||
+	     attr == &sensor_dev_attr_freq2_label.dev_attr.attr))
+		return 0;
+
 	return effective_mode;
 }

@ -2071,6 +2372,25 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 		DRM_ERROR("failed to create device file pp_dpm_mclk\n");
 		return ret;
 	}
+	if (adev->asic_type >= CHIP_VEGA10) {
+		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_socclk);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_dpm_socclk\n");
+			return ret;
+		}
+		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_dpm_dcefclk\n");
+			return ret;
+		}
+	}
+	if (adev->asic_type >= CHIP_VEGA20) {
+		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_fclk);
+		if (ret) {
+			DRM_ERROR("failed to create device file pp_dpm_fclk\n");
+			return ret;
+		}
+	}
 	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
 	if (ret) {
 		DRM_ERROR("failed to create device file pp_dpm_pcie\n");
@ -2109,12 +2429,31 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 				"gpu_busy_level\n");
 		return ret;
 	}
+	/* PCIe Perf counters won't work on APU nodes */
+	if (!(adev->flags & AMD_IS_APU)) {
+		ret = device_create_file(adev->dev, &dev_attr_pcie_bw);
+		if (ret) {
+			DRM_ERROR("failed to create device file pcie_bw\n");
+			return ret;
+		}
+	}
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
 		return ret;
 	}

+	if ((adev->asic_type >= CHIP_VEGA10) &&
+	    !(adev->flags & AMD_IS_APU)) {
+		ret = device_create_file(adev->dev,
+				&dev_attr_ppfeatures);
+		if (ret) {
+			DRM_ERROR("failed to create device file	"
+					"ppfeatures\n");
+			return ret;
+		}
+	}
+
 	adev->pm.sysfs_initialized = true;

 	return 0;
@ -2139,7 +2478,13 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)

 	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
 	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	if (adev->asic_type >= CHIP_VEGA10) {
+		device_remove_file(adev->dev, &dev_attr_pp_dpm_socclk);
+		device_remove_file(adev->dev, &dev_attr_pp_dpm_dcefclk);
+	}
 	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	if (adev->asic_type >= CHIP_VEGA20)
+		device_remove_file(adev->dev, &dev_attr_pp_dpm_fclk);
 	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
 	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 	device_remove_file(adev->dev,
@ -2148,6 +2493,11 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 		device_remove_file(adev->dev,
 				&dev_attr_pp_od_clk_voltage);
 	device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
+	if (!(adev->flags & AMD_IS_APU))
+		device_remove_file(adev->dev, &dev_attr_pcie_bw);
+	if ((adev->asic_type >= CHIP_VEGA10) &&
+	    !(adev->flags & AMD_IS_APU))
+		device_remove_file(adev->dev, &dev_attr_ppfeatures);
 }

 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.c
@ -67,9 +67,6 @@ static int psp_sw_init(void *handle)

 	psp->adev = adev;

-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
-		return 0;
-
 	ret = psp_init_microcode(psp);
 	if (ret) {
 		DRM_ERROR("Failed to load psp firmware!\n");
@ -83,9 +80,6 @@ static int psp_sw_fini(void *handle)
 {
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
-		return 0;
-
 	release_firmware(adev->psp.sos_fw);
 	adev->psp.sos_fw = NULL;
 	release_firmware(adev->psp.asd_fw);
@ -142,13 +136,24 @@ psp_cmd_submit_buf(struct psp_context *psp,
 	while (*((unsigned int *)psp->fence_buf) != index)
 		msleep(1);

-	/* the status field must be 0 after FW is loaded */
-	if (ucode && psp->cmd_buf_mem->resp.status) {
-		DRM_ERROR("failed loading with status (%d) and ucode id (%d)\n",
-			  psp->cmd_buf_mem->resp.status, ucode->ucode_id);
-		return -EINVAL;
+	/* In some cases, psp response status is not 0 even there is no
+	 * problem while the command is submitted. Some version of PSP FW
+	 * doesn't write 0 to that field.
+	 * So here we would like to only print a warning instead of an error
+	 * during psp initialization to avoid breaking hw_init and it doesn't
+	 * return -EINVAL.
+	 */
+	if (psp->cmd_buf_mem->resp.status) {
+		if (ucode)
+			DRM_WARN("failed to load ucode id (%d) ",
+				  ucode->ucode_id);
+		DRM_WARN("psp command failed and response status is (%d)\n",
+			  psp->cmd_buf_mem->resp.status);
 	}

+	/* get xGMI session id from response buffer */
+	cmd->resp.session_id = psp->cmd_buf_mem->resp.session_id;
+
 	if (ucode) {
 		ucode->tmr_mc_addr_lo = psp->cmd_buf_mem->resp.fw_addr_lo;
 		ucode->tmr_mc_addr_hi = psp->cmd_buf_mem->resp.fw_addr_hi;
@ -500,6 +505,98 @@ static int psp_hw_start(struct psp_context *psp)
 	return 0;
 }

+static int psp_get_fw_type(struct amdgpu_firmware_info *ucode,
+			   enum psp_gfx_fw_type *type)
+{
+	switch (ucode->ucode_id) {
+	case AMDGPU_UCODE_ID_SDMA0:
+		*type = GFX_FW_TYPE_SDMA0;
+		break;
+	case AMDGPU_UCODE_ID_SDMA1:
+		*type = GFX_FW_TYPE_SDMA1;
+		break;
+	case AMDGPU_UCODE_ID_CP_CE:
+		*type = GFX_FW_TYPE_CP_CE;
+		break;
+	case AMDGPU_UCODE_ID_CP_PFP:
+		*type = GFX_FW_TYPE_CP_PFP;
+		break;
+	case AMDGPU_UCODE_ID_CP_ME:
+		*type = GFX_FW_TYPE_CP_ME;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC1:
+		*type = GFX_FW_TYPE_CP_MEC;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC1_JT:
+		*type = GFX_FW_TYPE_CP_MEC_ME1;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC2:
+		*type = GFX_FW_TYPE_CP_MEC;
+		break;
+	case AMDGPU_UCODE_ID_CP_MEC2_JT:
+		*type = GFX_FW_TYPE_CP_MEC_ME2;
+		break;
+	case AMDGPU_UCODE_ID_RLC_G:
+		*type = GFX_FW_TYPE_RLC_G;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_CNTL:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_CNTL;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_GPM_MEM:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_GPM_MEM;
+		break;
+	case AMDGPU_UCODE_ID_RLC_RESTORE_LIST_SRM_MEM:
+		*type = GFX_FW_TYPE_RLC_RESTORE_LIST_SRM_MEM;
+		break;
+	case AMDGPU_UCODE_ID_SMC:
+		*type = GFX_FW_TYPE_SMU;
+		break;
+	case AMDGPU_UCODE_ID_UVD:
+		*type = GFX_FW_TYPE_UVD;
+		break;
+	case AMDGPU_UCODE_ID_UVD1:
+		*type = GFX_FW_TYPE_UVD1;
+		break;
+	case AMDGPU_UCODE_ID_VCE:
+		*type = GFX_FW_TYPE_VCE;
+		break;
+	case AMDGPU_UCODE_ID_VCN:
+		*type = GFX_FW_TYPE_VCN;
+		break;
+	case AMDGPU_UCODE_ID_DMCU_ERAM:
+		*type = GFX_FW_TYPE_DMCU_ERAM;
+		break;
+	case AMDGPU_UCODE_ID_DMCU_INTV:
+		*type = GFX_FW_TYPE_DMCU_ISR;
+		break;
+	case AMDGPU_UCODE_ID_MAXIMUM:
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int psp_prep_load_ip_fw_cmd_buf(struct amdgpu_firmware_info *ucode,
+				       struct psp_gfx_cmd_resp *cmd)
+{
+	int ret;
+	uint64_t fw_mem_mc_addr = ucode->mc_addr;
+
+	memset(cmd, 0, sizeof(struct psp_gfx_cmd_resp));
+
+	cmd->cmd_id = GFX_CMD_ID_LOAD_IP_FW;
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_lo = lower_32_bits(fw_mem_mc_addr);
+	cmd->cmd.cmd_load_ip_fw.fw_phy_addr_hi = upper_32_bits(fw_mem_mc_addr);
+	cmd->cmd.cmd_load_ip_fw.fw_size = ucode->ucode_size;
+
+	ret = psp_get_fw_type(ucode, &cmd->cmd.cmd_load_ip_fw.fw_type);
+	if (ret)
+		DRM_ERROR("Unknown firmware type\n");
+
+	return ret;
+}
+
 static int psp_np_fw_load(struct psp_context *psp)
 {
 	int i, ret;
@ -521,7 +618,7 @@ static int psp_np_fw_load(struct psp_context *psp)
 			/*skip ucode loading in SRIOV VF */
 			continue;

-		ret = psp_prep_cmd_buf(ucode, psp->cmd);
+		ret = psp_prep_load_ip_fw_cmd_buf(ucode, psp->cmd);
 		if (ret)
 			return ret;

@ -546,7 +643,7 @@ static int psp_load_fw(struct amdgpu_device *adev)
 	struct psp_context *psp = &adev->psp;

 	if (amdgpu_sriov_vf(adev) && adev->in_gpu_reset) {
-		psp_ring_destroy(psp, PSP_RING_TYPE__KM);
+		psp_ring_stop(psp, PSP_RING_TYPE__KM); /* should not destroy ring, only stop */
 		goto skip_memalloc;
 	}

@ -623,10 +720,6 @@ static int psp_hw_init(void *handle)
 	int ret;
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

-
-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
-		return 0;
-
 	mutex_lock(&adev->firmware.mutex);
 	/*
 	 * This sequence is just used on hw_init only once, no need on
@ -656,9 +749,6 @@ static int psp_hw_fini(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;

-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
-		return 0;
-
 	if (adev->gmc.xgmi.num_physical_nodes > 1 &&
 	    psp->xgmi_context.initialized == 1)
                psp_xgmi_terminate(psp);
@ -687,9 +777,6 @@ static int psp_suspend(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;

-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
-		return 0;
-
 	if (adev->gmc.xgmi.num_physical_nodes > 1 &&
 	    psp->xgmi_context.initialized == 1) {
 		ret = psp_xgmi_terminate(psp);
@ -714,9 +801,6 @@ static int psp_resume(void *handle)
 	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
 	struct psp_context *psp = &adev->psp;

-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
-		return 0;
-
 	DRM_INFO("PSP is resuming...\n");

 	mutex_lock(&adev->firmware.mutex);
@ -752,11 +836,6 @@ static bool psp_check_fw_loading_status(struct amdgpu_device *adev,
 {
 	struct amdgpu_firmware_info *ucode = NULL;

-	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
-		DRM_INFO("firmware is not loaded by PSP\n");
-		return true;
-	}
-
 	if (!adev->firmware.fw_size)
 		return false;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_psp.h
@ -65,8 +65,6 @@ struct psp_funcs
 	int (*init_microcode)(struct psp_context *psp);
 	int (*bootloader_load_sysdrv)(struct psp_context *psp);
 	int (*bootloader_load_sos)(struct psp_context *psp);
-	int (*prep_cmd_buf)(struct amdgpu_firmware_info *ucode,
-			    struct psp_gfx_cmd_resp *cmd);
 	int (*ring_init)(struct psp_context *psp, enum psp_ring_type ring_type);
 	int (*ring_create)(struct psp_context *psp,
 			   enum psp_ring_type ring_type);
@ -176,7 +174,6 @@ struct psp_xgmi_topology_info {
 	struct psp_xgmi_node_info	nodes[AMDGPU_XGMI_MAX_CONNECTED_NODES];
 };

-#define psp_prep_cmd_buf(ucode, type) (psp)->funcs->prep_cmd_buf((ucode), (type))
 #define psp_ring_init(psp, type) (psp)->funcs->ring_init((psp), (type))
 #define psp_ring_create(psp, type) (psp)->funcs->ring_create((psp), (type))
 #define psp_ring_stop(psp, type) (psp)->funcs->ring_stop((psp), (type))
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@ -131,7 +131,7 @@ struct amdgpu_ring_funcs {
 	void (*emit_ib)(struct amdgpu_ring *ring,
 			struct amdgpu_job *job,
 			struct amdgpu_ib *ib,
-			bool ctx_switch);
+			uint32_t flags);
 	void (*emit_fence)(struct amdgpu_ring *ring, uint64_t addr,
 			   uint64_t seq, unsigned flags);
 	void (*emit_pipeline_sync)(struct amdgpu_ring *ring);
@ -229,7 +229,7 @@ struct amdgpu_ring {
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
-#define amdgpu_ring_emit_ib(r, job, ib, c) ((r)->funcs->emit_ib((r), (job), (ib), (c)))
+#define amdgpu_ring_emit_ib(r, job, ib, flags) ((r)->funcs->emit_ib((r), (job), (ib), (flags)))
 #define amdgpu_ring_emit_pipeline_sync(r) (r)->funcs->emit_pipeline_sync((r))
 #define amdgpu_ring_emit_vm_flush(r, vmid, addr) (r)->funcs->emit_vm_flush((r), (vmid), (addr))
 #define amdgpu_ring_emit_fence(r, addr, seq, flags) (r)->funcs->emit_fence((r), (addr), (seq), (flags))
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@ -388,7 +388,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 			   soffset, eoffset, eoffset - soffset);

 		if (i->fence)
-			seq_printf(m, " protected by 0x%08x on context %llu",
+			seq_printf(m, " protected by 0x%016llx on context %llu",
 				   i->fence->seqno, i->fence->context);

 		seq_printf(m, "\n");
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sched.c
@ -54,16 +54,20 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 						  enum drm_sched_priority priority)
 {
 	struct file *filp = fget(fd);
-	struct drm_file *file;
 	struct amdgpu_fpriv *fpriv;
 	struct amdgpu_ctx *ctx;
 	uint32_t id;
+	int r;

 	if (!filp)
 		return -EINVAL;

-	file = filp->private_data;
-	fpriv = file->driver_priv;
+	r = amdgpu_file_to_fpriv(filp, &fpriv);
+	if (r) {
+		fput(filp);
+		return r;
+	}
+
 	idr_for_each_entry(&fpriv->ctx_mgr.ctx_handles, ctx, id)
 		amdgpu_ctx_priority_override(ctx, priority);

@ -72,6 +76,39 @@ static int amdgpu_sched_process_priority_override(struct amdgpu_device *adev,
 	return 0;
 }

+static int amdgpu_sched_context_priority_override(struct amdgpu_device *adev,
+						  int fd,
+						  unsigned ctx_id,
+						  enum drm_sched_priority priority)
+{
+	struct file *filp = fget(fd);
+	struct amdgpu_fpriv *fpriv;
+	struct amdgpu_ctx *ctx;
+	int r;
+
+	if (!filp)
+		return -EINVAL;
+
+	r = amdgpu_file_to_fpriv(filp, &fpriv);
+	if (r) {
+		fput(filp);
+		return r;
+	}
+
+	ctx = amdgpu_ctx_get(fpriv, ctx_id);
+
+	if (!ctx) {
+		fput(filp);
+		return -EINVAL;
+	}
+
+	amdgpu_ctx_priority_override(ctx, priority);
+	amdgpu_ctx_put(ctx);
+	fput(filp);
+
+	return 0;
+}
+
 int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *filp)
 {
@ -81,7 +118,7 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 	int r;

 	priority = amdgpu_to_sched_priority(args->in.priority);
-	if (args->in.flags || priority == DRM_SCHED_PRIORITY_INVALID)
+	if (priority == DRM_SCHED_PRIORITY_INVALID)
 		return -EINVAL;

 	switch (args->in.op) {
@ -90,6 +127,12 @@ int amdgpu_sched_ioctl(struct drm_device *dev, void *data,
 							   args->in.fd,
 							   priority);
 		break;
+	case AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE:
+		r = amdgpu_sched_context_priority_override(adev,
+							   args->in.fd,
+							   args->in.ctx_id,
+							   priority);
+		break;
 	default:
 		DRM_ERROR("Invalid sched op specified: %d\n", args->in.op);
 		r = -EINVAL;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@ -76,9 +76,10 @@ TRACE_EVENT(amdgpu_mm_wreg,
 );

 TRACE_EVENT(amdgpu_iv,
-	    TP_PROTO(struct amdgpu_iv_entry *iv),
-	    TP_ARGS(iv),
+	    TP_PROTO(unsigned ih, struct amdgpu_iv_entry *iv),
+	    TP_ARGS(ih, iv),
 	    TP_STRUCT__entry(
+			     __field(unsigned, ih)
 			     __field(unsigned, client_id)
 			     __field(unsigned, src_id)
 			     __field(unsigned, ring_id)
@ -90,6 +91,7 @@ TRACE_EVENT(amdgpu_iv,
 			     __array(unsigned, src_data, 4)
 			    ),
 	    TP_fast_assign(
+			   __entry->ih = ih;
 			   __entry->client_id = iv->client_id;
 			   __entry->src_id = iv->src_id;
 			   __entry->ring_id = iv->ring_id;
@ -103,8 +105,9 @@ TRACE_EVENT(amdgpu_iv,
 			   __entry->src_data[2] = iv->src_data[2];
 			   __entry->src_data[3] = iv->src_data[3];
 			   ),
-	    TP_printk("client_id:%u src_id:%u ring:%u vmid:%u timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
-		      __entry->client_id, __entry->src_id,
+	    TP_printk("ih:%u client_id:%u src_id:%u ring:%u vmid:%u "
+		      "timestamp: %llu pasid:%u src_data: %08x %08x %08x %08x",
+		      __entry->ih, __entry->client_id, __entry->src_id,
 		      __entry->ring_id, __entry->vmid,
 		      __entry->timestamp, __entry->pasid,
 		      __entry->src_data[0], __entry->src_data[1],
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@ -1546,7 +1546,8 @@ static struct ttm_bo_driver amdgpu_bo_driver = {
 	.io_mem_reserve = &amdgpu_ttm_io_mem_reserve,
 	.io_mem_free = &amdgpu_ttm_io_mem_free,
 	.io_mem_pfn = amdgpu_ttm_io_mem_pfn,
-	.access_memory = &amdgpu_ttm_access_memory
+	.access_memory = &amdgpu_ttm_access_memory,
+	.del_from_lru_notify = &amdgpu_vm_del_from_lru_notify
 };

 /*
@ -1755,7 +1756,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	}

 	r = amdgpu_bo_create_kernel(adev, adev->gds.mem.gfx_partition_size,
-				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GDS,
+				    4, AMDGPU_GEM_DOMAIN_GDS,
 				    &adev->gds.gds_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
@ -1768,7 +1769,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	}

 	r = amdgpu_bo_create_kernel(adev, adev->gds.gws.gfx_partition_size,
-				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_GWS,
+				    1, AMDGPU_GEM_DOMAIN_GWS,
 				    &adev->gds.gws_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
@ -1781,7 +1782,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 	}

 	r = amdgpu_bo_create_kernel(adev, adev->gds.oa.gfx_partition_size,
-				    PAGE_SIZE, AMDGPU_GEM_DOMAIN_OA,
+				    1, AMDGPU_GEM_DOMAIN_OA,
 				    &adev->gds.oa_gfx_bo, NULL, NULL);
 	if (r)
 		return r;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@ -1035,7 +1035,7 @@ out:
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring,
 				struct amdgpu_job *job,
 				struct amdgpu_ib *ib,
-				bool ctx_switch)
+				uint32_t flags)
 {
 	amdgpu_ring_write(ring, VCE_CMD_IB);
 	amdgpu_ring_write(ring, lower_32_bits(ib->gpu_addr));
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@ -66,7 +66,7 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp);
 int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 int amdgpu_vce_ring_parse_cs_vm(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_job *job,
-				struct amdgpu_ib *ib, bool ctx_switch);
+				struct amdgpu_ib *ib, uint32_t flags);
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@ -107,14 +107,6 @@ struct amdgpu_pte_update_params {
 	 * DMA addresses to use for mapping, used during VM update by CPU
 	 */
 	dma_addr_t *pages_addr;
-
-	/**
-	 * @kptr:
-	 *
-	 * Kernel pointer of PD/PT BO that needs to be updated,
-	 * used during VM update by CPU
-	 */
-	void *kptr;
 };

 /**
@ -623,6 +615,28 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 	list_add(&entry->tv.head, validated);
 }

+void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo)
+{
+	struct amdgpu_bo *abo;
+	struct amdgpu_vm_bo_base *bo_base;
+
+	if (!amdgpu_bo_is_amdgpu_bo(bo))
+		return;
+
+	if (bo->mem.placement & TTM_PL_FLAG_NO_EVICT)
+		return;
+
+	abo = ttm_to_amdgpu_bo(bo);
+	if (!abo->parent)
+		return;
+	for (bo_base = abo->vm_bo; bo_base; bo_base = bo_base->next) {
+		struct amdgpu_vm *vm = bo_base->vm;
+
+		if (abo->tbo.resv == vm->root.base.bo->tbo.resv)
+			vm->bulk_moveable = false;
+	}
+
+}
 /**
 * amdgpu_vm_move_to_lru_tail - move all BOs to the end of LRU
 *
@ -686,8 +700,6 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	struct amdgpu_vm_bo_base *bo_base, *tmp;
 	int r = 0;

-	vm->bulk_moveable &= list_empty(&vm->evicted);
-
 	list_for_each_entry_safe(bo_base, tmp, &vm->evicted, vm_status) {
 		struct amdgpu_bo *bo = bo_base->bo;

@ -801,15 +813,22 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		addr += ats_entries * 8;
 	}

-	if (entries)
+	if (entries) {
+		uint64_t value = 0;
+
+		/* Workaround for fault priority problem on GMC9 */
+		if (level == AMDGPU_VM_PTB && adev->asic_type >= CHIP_VEGA10)
+			value = AMDGPU_PTE_EXECUTABLE;
+
 		amdgpu_vm_set_pte_pde(adev, &job->ibs[0], addr, 0,
-				      entries, 0, 0);
+				      entries, 0, value);
+	}

 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);

 	WARN_ON(job->ibs[0].length_dw > 64);
 	r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
-			     AMDGPU_FENCE_OWNER_UNDEFINED, false);
+			     AMDGPU_FENCE_OWNER_KFD, false);
 	if (r)
 		goto error_free;

@ -1313,31 +1332,6 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 	}
 }

-
-/**
- * amdgpu_vm_wait_pd - Wait for PT BOs to be free.
- *
- * @adev: amdgpu_device pointer
- * @vm: related vm
- * @owner: fence owner
- *
- * Returns:
- * 0 on success, errno otherwise.
- */
-static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
-			     void *owner)
-{
-	struct amdgpu_sync sync;
-	int r;
-
-	amdgpu_sync_create(&sync);
-	amdgpu_sync_resv(adev, &sync, vm->root.base.bo->tbo.resv, owner, false);
-	r = amdgpu_sync_wait(&sync, true);
-	amdgpu_sync_free(&sync);
-
-	return r;
-}
-
 /**
 * amdgpu_vm_update_func - helper to call update function
 *
@ -1432,7 +1426,8 @@ restart:
 	params.adev = adev;

 	if (vm->use_cpu_for_update) {
-		r = amdgpu_vm_wait_pd(adev, vm, AMDGPU_FENCE_OWNER_VM);
+		r = amdgpu_bo_sync_wait(vm->root.base.bo,
+					AMDGPU_FENCE_OWNER_VM, true);
 		if (unlikely(r))
 			return r;

@ -1505,20 +1500,27 @@ error:
 }

 /**
- * amdgpu_vm_update_huge - figure out parameters for PTE updates
+ * amdgpu_vm_update_flags - figure out flags for PTE updates
 *
 * Make sure to set the right flags for the PTEs at the desired level.
 */
-static void amdgpu_vm_update_huge(struct amdgpu_pte_update_params *params,
-				  struct amdgpu_bo *bo, unsigned level,
-				  uint64_t pe, uint64_t addr,
-				  unsigned count, uint32_t incr,
-				  uint64_t flags)
+static void amdgpu_vm_update_flags(struct amdgpu_pte_update_params *params,
+				   struct amdgpu_bo *bo, unsigned level,
+				   uint64_t pe, uint64_t addr,
+				   unsigned count, uint32_t incr,
+				   uint64_t flags)

 {
 	if (level != AMDGPU_VM_PTB) {
 		flags |= AMDGPU_PDE_PTE;
 		amdgpu_gmc_get_vm_pde(params->adev, level, &addr, &flags);
+
+	} else if (params->adev->asic_type >= CHIP_VEGA10 &&
+		   !(flags & AMDGPU_PTE_VALID) &&
+		   !(flags & AMDGPU_PTE_PRT)) {
+
+		/* Workaround for fault priority problem on GMC9 */
+		flags |= AMDGPU_PTE_EXECUTABLE;
 	}

 	amdgpu_vm_update_func(params, bo, pe, addr, count, incr, flags);
@ -1675,9 +1677,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 			uint64_t upd_end = min(entry_end, frag_end);
 			unsigned nptes = (upd_end - frag_start) >> shift;

-			amdgpu_vm_update_huge(params, pt, cursor.level,
-					      pe_start, dst, nptes, incr,
-					      flags | AMDGPU_PTE_FRAG(frag));
+			amdgpu_vm_update_flags(params, pt, cursor.level,
+					       pe_start, dst, nptes, incr,
+					       flags | AMDGPU_PTE_FRAG(frag));

 			pe_start += nptes * 8;
 			dst += (uint64_t)nptes * AMDGPU_GPU_PAGE_SIZE << shift;
@ -1746,22 +1748,29 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	params.adev = adev;
 	params.vm = vm;

-	/* sync to everything on unmapping */
+	/* sync to everything except eviction fences on unmapping */
 	if (!(flags & AMDGPU_PTE_VALID))
-		owner = AMDGPU_FENCE_OWNER_UNDEFINED;
+		owner = AMDGPU_FENCE_OWNER_KFD;

 	if (vm->use_cpu_for_update) {
 		/* params.src is used as flag to indicate system Memory */
 		if (pages_addr)
 			params.src = ~0;

-		/* Wait for PT BOs to be free. PTs share the same resv. object
+		/* Wait for PT BOs to be idle. PTs share the same resv. object
 		 * as the root PD BO
 		 */
-		r = amdgpu_vm_wait_pd(adev, vm, owner);
+		r = amdgpu_bo_sync_wait(vm->root.base.bo, owner, true);
 		if (unlikely(r))
 			return r;

+		/* Wait for any BO move to be completed */
+		if (exclusive) {
+			r = dma_fence_wait(exclusive, true);
+			if (unlikely(r))
+				return r;
+		}
+
 		params.func = amdgpu_vm_cpu_set_ptes;
 		params.pages_addr = pages_addr;
 		return amdgpu_vm_update_ptes(&params, start, last + 1,
@ -1775,13 +1784,12 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 	/*
 	 * reserve space for two commands every (1 << BLOCK_SIZE)
 	 *  entries or 2k dwords (whatever is smaller)
-         *
-         * The second command is for the shadow pagetables.
 	 */
+	ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
+
+	/* The second command is for the shadow pagetables. */
 	if (vm->root.base.bo->shadow)
-		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1) * 2;
-	else
-		ncmds = ((nptes >> min(adev->vm_manager.block_size, 11u)) + 1);
+		ncmds *= 2;

 	/* padding, etc. */
 	ndw = 64;
@ -1800,10 +1808,11 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		ndw += ncmds * 10;

 		/* extra commands for begin/end fragments */
+		ncmds = 2 * adev->vm_manager.fragment_size;
 		if (vm->root.base.bo->shadow)
-		        ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
-		else
-		        ndw += 2 * 10 * adev->vm_manager.fragment_size;
+			ncmds *= 2;
+
+		ndw += 10 * ncmds;

 		params.func = amdgpu_vm_do_set_ptes;
 	}
@ -3005,7 +3014,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_update = NULL;

@ -3135,7 +3144,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm, uns
 	vm->pte_support_ats = pte_support_ats;
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
+	WARN_ONCE((vm->use_cpu_for_update && !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");

 	if (vm->pasid) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@ -363,4 +363,6 @@ int amdgpu_vm_add_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key)

 void amdgpu_vm_clear_fault(struct amdgpu_retryfault_hashtable *fault_hash, u64 key);

+void amdgpu_vm_del_from_lru_notify(struct ttm_buffer_object *bo);
+
 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
@ -40,26 +40,40 @@ void *amdgpu_xgmi_hive_try_lock(struct amdgpu_hive_info *hive)
 	return &hive->device_list;
 }

-struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev)
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock)
 {
 	int i;
 	struct amdgpu_hive_info *tmp;

 	if (!adev->gmc.xgmi.hive_id)
 		return NULL;
+
+	mutex_lock(&xgmi_mutex);
+
 	for (i = 0 ; i < hive_count; ++i) {
 		tmp = &xgmi_hives[i];
-		if (tmp->hive_id == adev->gmc.xgmi.hive_id)
+		if (tmp->hive_id == adev->gmc.xgmi.hive_id) {
+			if (lock)
+				mutex_lock(&tmp->hive_lock);
+			mutex_unlock(&xgmi_mutex);
 			return tmp;
+		}
 	}
-	if (i >= AMDGPU_MAX_XGMI_HIVE)
+	if (i >= AMDGPU_MAX_XGMI_HIVE) {
+		mutex_unlock(&xgmi_mutex);
 		return NULL;
+	}

 	/* initialize new hive if not exist */
 	tmp = &xgmi_hives[hive_count++];
 	tmp->hive_id = adev->gmc.xgmi.hive_id;
 	INIT_LIST_HEAD(&tmp->device_list);
 	mutex_init(&tmp->hive_lock);
+	mutex_init(&tmp->reset_lock);
+	if (lock)
+		mutex_lock(&tmp->hive_lock);
+
+	mutex_unlock(&xgmi_mutex);

 	return tmp;
 }
@ -77,10 +91,6 @@ int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_dev
 			"XGMI: Set topology failure on device %llx, hive %llx, ret %d",
 			adev->gmc.xgmi.node_id,
 			adev->gmc.xgmi.hive_id, ret);
-	else
-		dev_info(adev->dev, "XGMI: Set topology for node %d, hive 0x%llx.\n",
-			 adev->gmc.xgmi.physical_node_id,
-				 adev->gmc.xgmi.hive_id);

 	return ret;
 }
@ -111,10 +121,14 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 		return ret;
 	}

-	mutex_lock(&xgmi_mutex);
-	hive = amdgpu_get_xgmi_hive(adev);
-	if (!hive)
+	hive = amdgpu_get_xgmi_hive(adev, 1);
+	if (!hive) {
+		ret = -EINVAL;
+		dev_err(adev->dev,
+			"XGMI: node 0x%llx, can not match hive 0x%llx in the hive list.\n",
+			adev->gmc.xgmi.node_id, adev->gmc.xgmi.hive_id);
 		goto exit;
+	}

 	hive_topology = &hive->topology_info;

@ -142,8 +156,11 @@ int amdgpu_xgmi_add_device(struct amdgpu_device *adev)
 			break;
 	}

+	dev_info(adev->dev, "XGMI: Add node %d, hive 0x%llx.\n",
+		 adev->gmc.xgmi.physical_node_id, adev->gmc.xgmi.hive_id);
+
+	mutex_unlock(&hive->hive_lock);
 exit:
-	mutex_unlock(&xgmi_mutex);
 	return ret;
 }

@ -154,15 +171,14 @@ void amdgpu_xgmi_remove_device(struct amdgpu_device *adev)
 	if (!adev->gmc.xgmi.supported)
 		return;

-	mutex_lock(&xgmi_mutex);
-
-	hive = amdgpu_get_xgmi_hive(adev);
+	hive = amdgpu_get_xgmi_hive(adev, 1);
 	if (!hive)
-		goto exit;
+		return;

-	if (!(hive->number_devices--))
+	if (!(hive->number_devices--)) {
 		mutex_destroy(&hive->hive_lock);
-
-exit:
-	mutex_unlock(&xgmi_mutex);
+		mutex_destroy(&hive->reset_lock);
+	} else {
+		mutex_unlock(&hive->hive_lock);
+	}
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.h
@ -29,10 +29,11 @@ struct amdgpu_hive_info {
 	struct list_head	device_list;
 	struct psp_xgmi_topology_info	topology_info;
 	int number_devices;
-	struct mutex hive_lock;
+	struct mutex hive_lock,
+		     reset_lock;
 };

-struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev);
+struct amdgpu_hive_info *amdgpu_get_xgmi_hive(struct amdgpu_device *adev, int lock);
 int amdgpu_xgmi_update_topology(struct amdgpu_hive_info *hive, struct amdgpu_device *adev);
 int amdgpu_xgmi_add_device(struct amdgpu_device *adev);
 void amdgpu_xgmi_remove_device(struct amdgpu_device *adev);
--- a/drivers/gpu/drm/amd/amdgpu/atom.c
+++ b/drivers/gpu/drm/amd/amdgpu/atom.c
@ -27,6 +27,8 @@
 #include <linux/slab.h>
 #include <asm/unaligned.h>

+#include <drm/drm_util.h>
+
 #define ATOM_DEBUG

 #include "atom.h"
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
@ -1,349 +0,0 @@
-/*
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- */
-#ifndef __CI_DPM_H__
-#define __CI_DPM_H__
-
-#include "amdgpu_atombios.h"
-#include "ppsmc.h"
-
-#define SMU__NUM_SCLK_DPM_STATE  8
-#define SMU__NUM_MCLK_DPM_LEVELS 6
-#define SMU__NUM_LCLK_DPM_LEVELS 8
-#define SMU__NUM_PCIE_DPM_LEVELS 8
-#include "smu7_discrete.h"
-
-#define CISLANDS_MAX_HARDWARE_POWERLEVELS 2
-
-#define CISLANDS_UNUSED_GPIO_PIN 0x7F
-
-struct ci_pl {
-	u32 mclk;
-	u32 sclk;
-	enum amdgpu_pcie_gen pcie_gen;
-	u16 pcie_lane;
-};
-
-struct ci_ps {
-	u16 performance_level_count;
-	bool dc_compatible;
-	u32 sclk_t;
-	struct ci_pl performance_levels[CISLANDS_MAX_HARDWARE_POWERLEVELS];
-};
-
-struct ci_dpm_level {
-	bool enabled;
-	u32 value;
-	u32 param1;
-};
-
-#define CISLAND_MAX_DEEPSLEEP_DIVIDER_ID 5
-#define MAX_REGULAR_DPM_NUMBER 8
-#define CISLAND_MINIMUM_ENGINE_CLOCK 800
-
-struct ci_single_dpm_table {
-	u32 count;
-	struct ci_dpm_level dpm_levels[MAX_REGULAR_DPM_NUMBER];
-};
-
-struct ci_dpm_table {
-	struct ci_single_dpm_table sclk_table;
-	struct ci_single_dpm_table mclk_table;
-	struct ci_single_dpm_table pcie_speed_table;
-	struct ci_single_dpm_table vddc_table;
-	struct ci_single_dpm_table vddci_table;
-	struct ci_single_dpm_table mvdd_table;
-};
-
-struct ci_mc_reg_entry {
-	u32 mclk_max;
-	u32 mc_data[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE];
-};
-
-struct ci_mc_reg_table {
-	u8 last;
-	u8 num_entries;
-	u16 valid_flag;
-	struct ci_mc_reg_entry mc_reg_table_entry[MAX_AC_TIMING_ENTRIES];
-	SMU7_Discrete_MCRegisterAddress mc_reg_address[SMU7_DISCRETE_MC_REGISTER_ARRAY_SIZE];
-};
-
-struct ci_ulv_parm
-{
-	bool supported;
-	u32 cg_ulv_parameter;
-	u32 volt_change_delay;
-	struct ci_pl pl;
-};
-
-#define CISLANDS_MAX_LEAKAGE_COUNT  8
-
-struct ci_leakage_voltage {
-	u16 count;
-	u16 leakage_id[CISLANDS_MAX_LEAKAGE_COUNT];
-	u16 actual_voltage[CISLANDS_MAX_LEAKAGE_COUNT];
-};
-
-struct ci_dpm_level_enable_mask {
-	u32 uvd_dpm_enable_mask;
-	u32 vce_dpm_enable_mask;
-	u32 acp_dpm_enable_mask;
-	u32 samu_dpm_enable_mask;
-	u32 sclk_dpm_enable_mask;
-	u32 mclk_dpm_enable_mask;
-	u32 pcie_dpm_enable_mask;
-};
-
-struct ci_vbios_boot_state
-{
-	u16 mvdd_bootup_value;
-	u16 vddc_bootup_value;
-	u16 vddci_bootup_value;
-	u32 sclk_bootup_value;
-	u32 mclk_bootup_value;
-	u16 pcie_gen_bootup_value;
-	u16 pcie_lane_bootup_value;
-};
-
-struct ci_clock_registers {
-	u32 cg_spll_func_cntl;
-	u32 cg_spll_func_cntl_2;
-	u32 cg_spll_func_cntl_3;
-	u32 cg_spll_func_cntl_4;
-	u32 cg_spll_spread_spectrum;
-	u32 cg_spll_spread_spectrum_2;
-	u32 dll_cntl;
-	u32 mclk_pwrmgt_cntl;
-	u32 mpll_ad_func_cntl;
-	u32 mpll_dq_func_cntl;
-	u32 mpll_func_cntl;
-	u32 mpll_func_cntl_1;
-	u32 mpll_func_cntl_2;
-	u32 mpll_ss1;
-	u32 mpll_ss2;
-};
-
-struct ci_thermal_temperature_setting {
-	s32 temperature_low;
-	s32 temperature_high;
-	s32 temperature_shutdown;
-};
-
-struct ci_pcie_perf_range {
-	u16 max;
-	u16 min;
-};
-
-enum ci_pt_config_reg_type {
-	CISLANDS_CONFIGREG_MMR = 0,
-	CISLANDS_CONFIGREG_SMC_IND,
-	CISLANDS_CONFIGREG_DIDT_IND,
-	CISLANDS_CONFIGREG_CACHE,
-	CISLANDS_CONFIGREG_MAX
-};
-
-#define POWERCONTAINMENT_FEATURE_BAPM            0x00000001
-#define POWERCONTAINMENT_FEATURE_TDCLimit        0x00000002
-#define POWERCONTAINMENT_FEATURE_PkgPwrLimit     0x00000004
-
-struct ci_pt_config_reg {
-	u32 offset;
-	u32 mask;
-	u32 shift;
-	u32 value;
-	enum ci_pt_config_reg_type type;
-};
-
-struct ci_pt_defaults {
-	u8 svi_load_line_en;
-	u8 svi_load_line_vddc;
-	u8 tdc_vddc_throttle_release_limit_perc;
-	u8 tdc_mawt;
-	u8 tdc_waterfall_ctl;
-	u8 dte_ambient_temp_base;
-	u32 display_cac;
-	u32 bapm_temp_gradient;
-	u16 bapmti_r[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS];
-	u16 bapmti_rc[SMU7_DTE_ITERATIONS * SMU7_DTE_SOURCES * SMU7_DTE_SINKS];
-};
-
-#define DPMTABLE_OD_UPDATE_SCLK     0x00000001
-#define DPMTABLE_OD_UPDATE_MCLK     0x00000002
-#define DPMTABLE_UPDATE_SCLK        0x00000004
-#define DPMTABLE_UPDATE_MCLK        0x00000008
-
-struct ci_power_info {
-	struct ci_dpm_table dpm_table;
-	struct ci_dpm_table golden_dpm_table;
-	u32 voltage_control;
-	u32 mvdd_control;
-	u32 vddci_control;
-	u32 active_auto_throttle_sources;
-	struct ci_clock_registers clock_registers;
-	u16 acpi_vddc;
-	u16 acpi_vddci;
-	enum amdgpu_pcie_gen force_pcie_gen;
-	enum amdgpu_pcie_gen acpi_pcie_gen;
-	struct ci_leakage_voltage vddc_leakage;
-	struct ci_leakage_voltage vddci_leakage;
-	u16 max_vddc_in_pp_table;
-	u16 min_vddc_in_pp_table;
-	u16 max_vddci_in_pp_table;
-	u16 min_vddci_in_pp_table;
-	u32 mclk_strobe_mode_threshold;
-	u32 mclk_stutter_mode_threshold;
-	u32 mclk_edc_enable_threshold;
-	u32 mclk_edc_wr_enable_threshold;
-	struct ci_vbios_boot_state vbios_boot_state;
-	/* smc offsets */
-	u32 sram_end;
-	u32 dpm_table_start;
-	u32 soft_regs_start;
-	u32 mc_reg_table_start;
-	u32 fan_table_start;
-	u32 arb_table_start;
-	/* smc tables */
-	SMU7_Discrete_DpmTable smc_state_table;
-	SMU7_Discrete_MCRegisters smc_mc_reg_table;
-	SMU7_Discrete_PmFuses smc_powertune_table;
-	/* other stuff */
-	struct ci_mc_reg_table mc_reg_table;
-	struct atom_voltage_table vddc_voltage_table;
-	struct atom_voltage_table vddci_voltage_table;
-	struct atom_voltage_table mvdd_voltage_table;
-	struct ci_ulv_parm ulv;
-	u32 power_containment_features;
-	const struct ci_pt_defaults *powertune_defaults;
-	u32 dte_tj_offset;
-	bool vddc_phase_shed_control;
-	struct ci_thermal_temperature_setting thermal_temp_setting;
-	struct ci_dpm_level_enable_mask dpm_level_enable_mask;
-	u32 need_update_smu7_dpm_table;
-	u32 sclk_dpm_key_disabled;
-	u32 mclk_dpm_key_disabled;
-	u32 pcie_dpm_key_disabled;
-	u32 thermal_sclk_dpm_enabled;
-	struct ci_pcie_perf_range pcie_gen_performance;
-	struct ci_pcie_perf_range pcie_lane_performance;
-	struct ci_pcie_perf_range pcie_gen_powersaving;
-	struct ci_pcie_perf_range pcie_lane_powersaving;
-	u32 activity_target[SMU7_MAX_LEVELS_GRAPHICS];
-	u32 mclk_activity_target;
-	u32 low_sclk_interrupt_t;
-	u32 last_mclk_dpm_enable_mask;
-	u32 sys_pcie_mask;
-	/* caps */
-	bool caps_power_containment;
-	bool caps_cac;
-	bool caps_sq_ramping;
-	bool caps_db_ramping;
-	bool caps_td_ramping;
-	bool caps_tcp_ramping;
-	bool caps_fps;
-	bool caps_sclk_ds;
-	bool caps_sclk_ss_support;
-	bool caps_mclk_ss_support;
-	bool caps_uvd_dpm;
-	bool caps_vce_dpm;
-	bool caps_samu_dpm;
-	bool caps_acp_dpm;
-	bool caps_automatic_dc_transition;
-	bool caps_sclk_throttle_low_notification;
-	bool caps_dynamic_ac_timing;
-	bool caps_od_fuzzy_fan_control_support;
-	/* flags */
-	bool thermal_protection;
-	bool pcie_performance_request;
-	bool dynamic_ss;
-	bool dll_default_on;
-	bool cac_enabled;
-	bool uvd_enabled;
-	bool battery_state;
-	bool pspp_notify_required;
-	bool enable_bapm_feature;
-	bool enable_tdc_limit_feature;
-	bool enable_pkg_pwr_tracking_feature;
-	bool use_pcie_performance_levels;
-	bool use_pcie_powersaving_levels;
-	bool uvd_power_gated;
-	/* driver states */
-	struct amdgpu_ps current_rps;
-	struct ci_ps current_ps;
-	struct amdgpu_ps requested_rps;
-	struct ci_ps requested_ps;
-	/* fan control */
-	bool fan_ctrl_is_in_default_mode;
-	bool fan_is_controlled_by_smc;
-	u32 t_min;
-	u32 fan_ctrl_default_mode;
-};
-
-#define CISLANDS_VOLTAGE_CONTROL_NONE                   0x0
-#define CISLANDS_VOLTAGE_CONTROL_BY_GPIO                0x1
-#define CISLANDS_VOLTAGE_CONTROL_BY_SVID2               0x2
-
-#define CISLANDS_Q88_FORMAT_CONVERSION_UNIT             256
-
-#define CISLANDS_VRC_DFLT0                              0x3FFFC000
-#define CISLANDS_VRC_DFLT1                              0x000400
-#define CISLANDS_VRC_DFLT2                              0xC00080
-#define CISLANDS_VRC_DFLT3                              0xC00200
-#define CISLANDS_VRC_DFLT4                              0xC01680
-#define CISLANDS_VRC_DFLT5                              0xC00033
-#define CISLANDS_VRC_DFLT6                              0xC00033
-#define CISLANDS_VRC_DFLT7                              0x3FFFC000
-
-#define CISLANDS_CGULVPARAMETER_DFLT                    0x00040035
-#define CISLAND_TARGETACTIVITY_DFLT                     30
-#define CISLAND_MCLK_TARGETACTIVITY_DFLT                10
-
-#define PCIE_PERF_REQ_REMOVE_REGISTRY   0
-#define PCIE_PERF_REQ_FORCE_LOWPOWER    1
-#define PCIE_PERF_REQ_PECI_GEN1         2
-#define PCIE_PERF_REQ_PECI_GEN2         3
-#define PCIE_PERF_REQ_PECI_GEN3         4
-
-#define CISLANDS_SSTU_DFLT                               0
-#define CISLANDS_SST_DFLT                                0x00C8
-
-/* XXX are these ok? */
-#define CISLANDS_TEMP_RANGE_MIN (90 * 1000)
-#define CISLANDS_TEMP_RANGE_MAX (120 * 1000)
-
-int amdgpu_ci_copy_bytes_to_smc(struct amdgpu_device *adev,
-			 u32 smc_start_address,
-			 const u8 *src, u32 byte_count, u32 limit);
-void amdgpu_ci_start_smc(struct amdgpu_device *adev);
-void amdgpu_ci_reset_smc(struct amdgpu_device *adev);
-int amdgpu_ci_program_jump_on_start(struct amdgpu_device *adev);
-void amdgpu_ci_stop_smc_clock(struct amdgpu_device *adev);
-void amdgpu_ci_start_smc_clock(struct amdgpu_device *adev);
-bool amdgpu_ci_is_smc_running(struct amdgpu_device *adev);
-PPSMC_Result amdgpu_ci_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg);
-PPSMC_Result amdgpu_ci_wait_for_smc_inactive(struct amdgpu_device *adev);
-int amdgpu_ci_load_smc_ucode(struct amdgpu_device *adev, u32 limit);
-int amdgpu_ci_read_smc_sram_dword(struct amdgpu_device *adev,
-			   u32 smc_address, u32 *value, u32 limit);
-int amdgpu_ci_write_smc_sram_dword(struct amdgpu_device *adev,
-			    u32 smc_address, u32 value, u32 limit);
-
-#endif
--- a/drivers/gpu/drm/amd/amdgpu/ci_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_smc.c
@ -1,279 +0,0 @@
-/*
- * Copyright 2011 Advanced Micro Devices, Inc.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
- * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
- * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
- * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
- * OTHER DEALINGS IN THE SOFTWARE.
- *
- * Authors: Alex Deucher
- */
-
-#include <linux/firmware.h>
-#include <drm/drmP.h>
-#include "amdgpu.h"
-#include "cikd.h"
-#include "ppsmc.h"
-#include "amdgpu_ucode.h"
-#include "ci_dpm.h"
-
-#include "smu/smu_7_0_1_d.h"
-#include "smu/smu_7_0_1_sh_mask.h"
-
-static int ci_set_smc_sram_address(struct amdgpu_device *adev,
-				   u32 smc_address, u32 limit)
-{
-	if (smc_address & 3)
-		return -EINVAL;
-	if ((smc_address + 3) > limit)
-		return -EINVAL;
-
-	WREG32(mmSMC_IND_INDEX_0, smc_address);
-	WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
-
-	return 0;
-}
-
-int amdgpu_ci_copy_bytes_to_smc(struct amdgpu_device *adev,
-			 u32 smc_start_address,
-			 const u8 *src, u32 byte_count, u32 limit)
-{
-	unsigned long flags;
-	u32 data, original_data;
-	u32 addr;
-	u32 extra_shift;
-	int ret = 0;
-
-	if (smc_start_address & 3)
-		return -EINVAL;
-	if ((smc_start_address + byte_count) > limit)
-		return -EINVAL;
-
-	addr = smc_start_address;
-
-	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	while (byte_count >= 4) {
-		/* SMC address space is BE */
-		data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3];
-
-		ret = ci_set_smc_sram_address(adev, addr, limit);
-		if (ret)
-			goto done;
-
-		WREG32(mmSMC_IND_DATA_0, data);
-
-		src += 4;
-		byte_count -= 4;
-		addr += 4;
-	}
-
-	/* RMW for the final bytes */
-	if (byte_count > 0) {
-		data = 0;
-
-		ret = ci_set_smc_sram_address(adev, addr, limit);
-		if (ret)
-			goto done;
-
-		original_data = RREG32(mmSMC_IND_DATA_0);
-
-		extra_shift = 8 * (4 - byte_count);
-
-		while (byte_count > 0) {
-			data = (data << 8) + *src++;
-			byte_count--;
-		}
-
-		data <<= extra_shift;
-
-		data |= (original_data & ~((~0UL) << extra_shift));
-
-		ret = ci_set_smc_sram_address(adev, addr, limit);
-		if (ret)
-			goto done;
-
-		WREG32(mmSMC_IND_DATA_0, data);
-	}
-
-done:
-	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
-
-	return ret;
-}
-
-void amdgpu_ci_start_smc(struct amdgpu_device *adev)
-{
-	u32 tmp = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL);
-
-	tmp &= ~SMC_SYSCON_RESET_CNTL__rst_reg_MASK;
-	WREG32_SMC(ixSMC_SYSCON_RESET_CNTL, tmp);
-}
-
-void amdgpu_ci_reset_smc(struct amdgpu_device *adev)
-{
-	u32 tmp = RREG32_SMC(ixSMC_SYSCON_RESET_CNTL);
-
-	tmp |= SMC_SYSCON_RESET_CNTL__rst_reg_MASK;
-	WREG32_SMC(ixSMC_SYSCON_RESET_CNTL, tmp);
-}
-
-int amdgpu_ci_program_jump_on_start(struct amdgpu_device *adev)
-{
-	static u8 data[] = { 0xE0, 0x00, 0x80, 0x40 };
-
-	return amdgpu_ci_copy_bytes_to_smc(adev, 0x0, data, 4, sizeof(data)+1);
-}
-
-void amdgpu_ci_stop_smc_clock(struct amdgpu_device *adev)
-{
-	u32 tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
-
-	tmp |= SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK;
-
-	WREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0, tmp);
-}
-
-void amdgpu_ci_start_smc_clock(struct amdgpu_device *adev)
-{
-	u32 tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
-
-	tmp &= ~SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK;
-
-	WREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0, tmp);
-}
-
-bool amdgpu_ci_is_smc_running(struct amdgpu_device *adev)
-{
-	u32 clk = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
-	u32 pc_c = RREG32_SMC(ixSMC_PC_C);
-
-	if (!(clk & SMC_SYSCON_CLOCK_CNTL_0__ck_disable_MASK) && (0x20100 <= pc_c))
-		return true;
-
-	return false;
-}
-
-PPSMC_Result amdgpu_ci_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg)
-{
-	u32 tmp;
-	int i;
-
-	if (!amdgpu_ci_is_smc_running(adev))
-		return PPSMC_Result_Failed;
-
-	WREG32(mmSMC_MESSAGE_0, msg);
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32(mmSMC_RESP_0);
-		if (tmp != 0)
-			break;
-		udelay(1);
-	}
-	tmp = RREG32(mmSMC_RESP_0);
-
-	return (PPSMC_Result)tmp;
-}
-
-PPSMC_Result amdgpu_ci_wait_for_smc_inactive(struct amdgpu_device *adev)
-{
-	u32 tmp;
-	int i;
-
-	if (!amdgpu_ci_is_smc_running(adev))
-		return PPSMC_Result_OK;
-
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
-		if ((tmp & SMC_SYSCON_CLOCK_CNTL_0__cken_MASK) == 0)
-			break;
-		udelay(1);
-	}
-
-	return PPSMC_Result_OK;
-}
-
-int amdgpu_ci_load_smc_ucode(struct amdgpu_device *adev, u32 limit)
-{
-	const struct smc_firmware_header_v1_0 *hdr;
-	unsigned long flags;
-	u32 ucode_start_address;
-	u32 ucode_size;
-	const u8 *src;
-	u32 data;
-
-	if (!adev->pm.fw)
-		return -EINVAL;
-
-	hdr = (const struct smc_firmware_header_v1_0 *)adev->pm.fw->data;
-	amdgpu_ucode_print_smc_hdr(&hdr->header);
-
-	adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
-	ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
-	ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
-	src = (const u8 *)
-		(adev->pm.fw->data + le32_to_cpu(hdr->header.ucode_array_offset_bytes));
-
-	if (ucode_size & 3)
-		return -EINVAL;
-
-	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	WREG32(mmSMC_IND_INDEX_0, ucode_start_address);
-	WREG32_P(mmSMC_IND_ACCESS_CNTL, SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK,
-		~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
-	while (ucode_size >= 4) {
-		/* SMC address space is BE */
-		data = (src[0] << 24) | (src[1] << 16) | (src[2] << 8) | src[3];
-
-		WREG32(mmSMC_IND_DATA_0, data);
-
-		src += 4;
-		ucode_size -= 4;
-	}
-	WREG32_P(mmSMC_IND_ACCESS_CNTL, 0, ~SMC_IND_ACCESS_CNTL__AUTO_INCREMENT_IND_0_MASK);
-	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
-
-	return 0;
-}
-
-int amdgpu_ci_read_smc_sram_dword(struct amdgpu_device *adev,
-			   u32 smc_address, u32 *value, u32 limit)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	ret = ci_set_smc_sram_address(adev, smc_address, limit);
-	if (ret == 0)
-		*value = RREG32(mmSMC_IND_DATA_0);
-	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
-
-	return ret;
-}
-
-int amdgpu_ci_write_smc_sram_dword(struct amdgpu_device *adev,
-			    u32 smc_address, u32 value, u32 limit)
-{
-	unsigned long flags;
-	int ret;
-
-	spin_lock_irqsave(&adev->smc_idx_lock, flags);
-	ret = ci_set_smc_sram_address(adev, smc_address, limit);
-	if (ret == 0)
-		WREG32(mmSMC_IND_DATA_0, value);
-	spin_unlock_irqrestore(&adev->smc_idx_lock, flags);
-
-	return ret;
-}
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@ -1741,6 +1741,69 @@ static bool cik_need_full_reset(struct amdgpu_device *adev)
 	return true;
 }

+static void cik_get_pcie_usage(struct amdgpu_device *adev, uint64_t *count0,
+			       uint64_t *count1)
+{
+	uint32_t perfctr = 0;
+	uint64_t cnt0_of, cnt1_of;
+	int tmp;
+
+	/* This reports 0 on APUs, so return to avoid writing/reading registers
+	 * that may or may not be different from their GPU counterparts
+	 */
+	if (adev->flags & AMD_IS_APU)
+		return;
+
+	/* Set the 2 events that we wish to watch, defined above */
+	/* Reg 40 is # received msgs, Reg 104 is # of posted requests sent */
+	perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT0_SEL, 40);
+	perfctr = REG_SET_FIELD(perfctr, PCIE_PERF_CNTL_TXCLK, EVENT1_SEL, 104);
+
+	/* Write to enable desired perf counters */
+	WREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK, perfctr);
+	/* Zero out and enable the perf counters
+	 * Write 0x5:
+	 * Bit 0 = Start all counters(1)
+	 * Bit 2 = Global counter reset enable(1)
+	 */
+	WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000005);
+
+	msleep(1000);
+
+	/* Load the shadow and disable the perf counters
+	 * Write 0x2:
+	 * Bit 0 = Stop counters(0)
+	 * Bit 1 = Load the shadow counters(1)
+	 */
+	WREG32_PCIE(ixPCIE_PERF_COUNT_CNTL, 0x00000002);
+
+	/* Read register values to get any >32bit overflow */
+	tmp = RREG32_PCIE(ixPCIE_PERF_CNTL_TXCLK);
+	cnt0_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER0_UPPER);
+	cnt1_of = REG_GET_FIELD(tmp, PCIE_PERF_CNTL_TXCLK, COUNTER1_UPPER);
+
+	/* Get the values and add the overflow */
+	*count0 = RREG32_PCIE(ixPCIE_PERF_COUNT0_TXCLK) | (cnt0_of << 32);
+	*count1 = RREG32_PCIE(ixPCIE_PERF_COUNT1_TXCLK) | (cnt1_of << 32);
+}
+
+static bool cik_need_reset_on_init(struct amdgpu_device *adev)
+{
+	u32 clock_cntl, pc;
+
+	if (adev->flags & AMD_IS_APU)
+		return false;
+
+	/* check if the SMC is already running */
+	clock_cntl = RREG32_SMC(ixSMC_SYSCON_CLOCK_CNTL_0);
+	pc = RREG32_SMC(ixSMC_PC_C);
+	if ((0 == REG_GET_FIELD(clock_cntl, SMC_SYSCON_CLOCK_CNTL_0, ck_disable)) &&
+	    (0x20100 <= pc))
+		return true;
+
+	return false;
+}
+
 static const struct amdgpu_asic_funcs cik_asic_funcs =
 {
 	.read_disabled_bios = &cik_read_disabled_bios,
@ -1756,6 +1819,8 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.invalidate_hdp = &cik_invalidate_hdp,
 	.need_full_reset = &cik_need_full_reset,
 	.init_doorbell_index = &legacy_doorbell_index_init,
+	.get_pcie_usage = &cik_get_pcie_usage,
+	.need_reset_on_init = &cik_need_reset_on_init,
 };

 static int cik_common_early_init(void *handle)
@ -2005,10 +2070,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &gfx_v7_2_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
-		if (amdgpu_dpm == -1)
-			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
-		else
-			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
+		amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
@ -2026,10 +2088,7 @@ int cik_set_ip_blocks(struct amdgpu_device *adev)
 		amdgpu_device_ip_block_add(adev, &cik_ih_ip_block);
 		amdgpu_device_ip_block_add(adev, &gfx_v7_3_ip_block);
 		amdgpu_device_ip_block_add(adev, &cik_sdma_ip_block);
-		if (amdgpu_dpm == -1)
-			amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
-		else
-			amdgpu_device_ip_block_add(adev, &ci_smu_ip_block);
+		amdgpu_device_ip_block_add(adev, &pp_smu_ip_block);
 		if (adev->enable_virtual_display)
 			amdgpu_device_ip_block_add(adev, &dce_virtual_ip_block);
 #if defined(CONFIG_DRM_AMD_DC)
--- a/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/cik_dpm.h
@ -24,7 +24,6 @@
 #ifndef __CIK_DPM_H__
 #define __CIK_DPM_H__

-extern const struct amdgpu_ip_block_version ci_smu_ip_block;
 extern const struct amdgpu_ip_block_version kv_smu_ip_block;

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/cik_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_ih.c
@ -103,9 +103,9 @@ static void cik_ih_disable_interrupts(struct amdgpu_device *adev)
 */
 static int cik_ih_irq_init(struct amdgpu_device *adev)
 {
+	struct amdgpu_ih_ring *ih = &adev->irq.ih;
 	int rb_bufsz;
 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
-	u64 wptr_off;

 	/* disable irqs */
 	cik_ih_disable_interrupts(adev);
@ -131,9 +131,8 @@ static int cik_ih_irq_init(struct amdgpu_device *adev)
 	ih_rb_cntl |= IH_RB_CNTL__WPTR_WRITEBACK_ENABLE_MASK;

 	/* set the writeback address whether it's enabled or not */
-	wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
-	WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
-	WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF);
+	WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+	WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);

 	WREG32(mmIH_RB_CNTL, ih_rb_cntl);

@ -183,11 +182,12 @@ static void cik_ih_irq_disable(struct amdgpu_device *adev)
 * Used by cik_irq_process().
 * Returns the value of the wptr.
 */
-static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
+static u32 cik_ih_get_wptr(struct amdgpu_device *adev,
+			   struct amdgpu_ih_ring *ih)
 {
 	u32 wptr, tmp;

-	wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]);
+	wptr = le32_to_cpu(*ih->wptr_cpu);

 	if (wptr & IH_RB_WPTR__RB_OVERFLOW_MASK) {
 		wptr &= ~IH_RB_WPTR__RB_OVERFLOW_MASK;
@ -196,13 +196,13 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
 		 * this should allow us to catchup.
 		 */
 		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
-			wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask);
-		adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask;
+			 wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+		ih->rptr = (wptr + 16) & ih->ptr_mask;
 		tmp = RREG32(mmIH_RB_CNTL);
 		tmp |= IH_RB_CNTL__WPTR_OVERFLOW_CLEAR_MASK;
 		WREG32(mmIH_RB_CNTL, tmp);
 	}
-	return (wptr & adev->irq.ih.ptr_mask);
+	return (wptr & ih->ptr_mask);
 }

 /*        CIK IV Ring
@ -237,16 +237,17 @@ static u32 cik_ih_get_wptr(struct amdgpu_device *adev)
 * position and also advance the position.
 */
 static void cik_ih_decode_iv(struct amdgpu_device *adev,
+			     struct amdgpu_ih_ring *ih,
 			     struct amdgpu_iv_entry *entry)
 {
 	/* wptr/rptr are in bytes! */
-	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u32 ring_index = ih->rptr >> 2;
 	uint32_t dw[4];

-	dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
-	dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
-	dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
-	dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
+	dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+	dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+	dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+	dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);

 	entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
 	entry->src_id = dw[0] & 0xff;
@ -256,7 +257,7 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
 	entry->pasid = (dw[2] >> 16) & 0xffff;

 	/* wptr/rptr are in bytes! */
-	adev->irq.ih.rptr += 16;
+	ih->rptr += 16;
 }

 /**
@ -266,9 +267,10 @@ static void cik_ih_decode_iv(struct amdgpu_device *adev,
 *
 * Set the IH ring buffer rptr.
 */
-static void cik_ih_set_rptr(struct amdgpu_device *adev)
+static void cik_ih_set_rptr(struct amdgpu_device *adev,
+			    struct amdgpu_ih_ring *ih)
 {
-	WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr);
+	WREG32(mmIH_RB_RPTR, ih->rptr);
 }

 static int cik_ih_early_init(void *handle)
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@ -220,7 +220,7 @@ static void cik_sdma_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count)
 static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_job *job,
 				  struct amdgpu_ib *ib,
-				  bool ctx_switch)
+				  uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 extra_bits = vmid & 0xf;
--- a/drivers/gpu/drm/amd/amdgpu/cz_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_ih.c
@ -103,9 +103,9 @@ static void cz_ih_disable_interrupts(struct amdgpu_device *adev)
 */
 static int cz_ih_irq_init(struct amdgpu_device *adev)
 {
-	int rb_bufsz;
+	struct amdgpu_ih_ring *ih = &adev->irq.ih;
 	u32 interrupt_cntl, ih_cntl, ih_rb_cntl;
-	u64 wptr_off;
+	int rb_bufsz;

 	/* disable irqs */
 	cz_ih_disable_interrupts(adev);
@ -133,9 +133,8 @@ static int cz_ih_irq_init(struct amdgpu_device *adev)
 	ih_rb_cntl = REG_SET_FIELD(ih_rb_cntl, IH_RB_CNTL, WPTR_WRITEBACK_ENABLE, 1);

 	/* set the writeback address whether it's enabled or not */
-	wptr_off = adev->wb.gpu_addr + (adev->irq.ih.wptr_offs * 4);
-	WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(wptr_off));
-	WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(wptr_off) & 0xFF);
+	WREG32(mmIH_RB_WPTR_ADDR_LO, lower_32_bits(ih->wptr_addr));
+	WREG32(mmIH_RB_WPTR_ADDR_HI, upper_32_bits(ih->wptr_addr) & 0xFF);

 	WREG32(mmIH_RB_CNTL, ih_rb_cntl);

@ -185,11 +184,12 @@ static void cz_ih_irq_disable(struct amdgpu_device *adev)
 * Used by cz_irq_process(VI).
 * Returns the value of the wptr.
 */
-static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
+static u32 cz_ih_get_wptr(struct amdgpu_device *adev,
+			  struct amdgpu_ih_ring *ih)
 {
 	u32 wptr, tmp;

-	wptr = le32_to_cpu(adev->wb.wb[adev->irq.ih.wptr_offs]);
+	wptr = le32_to_cpu(*ih->wptr_cpu);

 	if (REG_GET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW)) {
 		wptr = REG_SET_FIELD(wptr, IH_RB_WPTR, RB_OVERFLOW, 0);
@ -198,13 +198,13 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
 		 * this should allow us to catchup.
 		 */
 		dev_warn(adev->dev, "IH ring buffer overflow (0x%08X, 0x%08X, 0x%08X)\n",
-			wptr, adev->irq.ih.rptr, (wptr + 16) & adev->irq.ih.ptr_mask);
-		adev->irq.ih.rptr = (wptr + 16) & adev->irq.ih.ptr_mask;
+			wptr, ih->rptr, (wptr + 16) & ih->ptr_mask);
+		ih->rptr = (wptr + 16) & ih->ptr_mask;
 		tmp = RREG32(mmIH_RB_CNTL);
 		tmp = REG_SET_FIELD(tmp, IH_RB_CNTL, WPTR_OVERFLOW_CLEAR, 1);
 		WREG32(mmIH_RB_CNTL, tmp);
 	}
-	return (wptr & adev->irq.ih.ptr_mask);
+	return (wptr & ih->ptr_mask);
 }

 /**
@ -216,16 +216,17 @@ static u32 cz_ih_get_wptr(struct amdgpu_device *adev)
 * position and also advance the position.
 */
 static void cz_ih_decode_iv(struct amdgpu_device *adev,
-				 struct amdgpu_iv_entry *entry)
+			    struct amdgpu_ih_ring *ih,
+			    struct amdgpu_iv_entry *entry)
 {
 	/* wptr/rptr are in bytes! */
-	u32 ring_index = adev->irq.ih.rptr >> 2;
+	u32 ring_index = ih->rptr >> 2;
 	uint32_t dw[4];

-	dw[0] = le32_to_cpu(adev->irq.ih.ring[ring_index + 0]);
-	dw[1] = le32_to_cpu(adev->irq.ih.ring[ring_index + 1]);
-	dw[2] = le32_to_cpu(adev->irq.ih.ring[ring_index + 2]);
-	dw[3] = le32_to_cpu(adev->irq.ih.ring[ring_index + 3]);
+	dw[0] = le32_to_cpu(ih->ring[ring_index + 0]);
+	dw[1] = le32_to_cpu(ih->ring[ring_index + 1]);
+	dw[2] = le32_to_cpu(ih->ring[ring_index + 2]);
+	dw[3] = le32_to_cpu(ih->ring[ring_index + 3]);

 	entry->client_id = AMDGPU_IRQ_CLIENTID_LEGACY;
 	entry->src_id = dw[0] & 0xff;
@ -235,7 +236,7 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
 	entry->pasid = (dw[2] >> 16) & 0xffff;

 	/* wptr/rptr are in bytes! */
-	adev->irq.ih.rptr += 16;
+	ih->rptr += 16;
 }

 /**
@ -245,9 +246,10 @@ static void cz_ih_decode_iv(struct amdgpu_device *adev,
 *
 * Set the IH ring buffer rptr.
 */
-static void cz_ih_set_rptr(struct amdgpu_device *adev)
+static void cz_ih_set_rptr(struct amdgpu_device *adev,
+			   struct amdgpu_ih_ring *ih)
 {
-	WREG32(mmIH_RB_RPTR, adev->irq.ih.rptr);
+	WREG32(mmIH_RB_RPTR, ih->rptr);
 }

 static int cz_ih_early_init(void *handle)
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@ -1682,7 +1682,7 @@ static void dce_v10_0_afmt_setmode(struct drm_encoder *encoder,
 	dce_v10_0_audio_write_sad_regs(encoder);
 	dce_v10_0_audio_write_latency_fields(encoder, mode);

-	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false);
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
 	if (err < 0) {
 		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
 		return;
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@ -1724,7 +1724,7 @@ static void dce_v11_0_afmt_setmode(struct drm_encoder *encoder,
 	dce_v11_0_audio_write_sad_regs(encoder);
 	dce_v11_0_audio_write_latency_fields(encoder, mode);

-	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false);
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
 	if (err < 0) {
 		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
 		return;
--- a/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
@ -1423,6 +1423,7 @@ static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder,
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_encoder *amdgpu_encoder = to_amdgpu_encoder(encoder);
 	struct amdgpu_encoder_atom_dig *dig = amdgpu_encoder->enc_priv;
+	struct drm_connector *connector = amdgpu_get_connector_for_encoder(encoder);
 	struct hdmi_avi_infoframe frame;
 	u8 buffer[HDMI_INFOFRAME_HEADER_SIZE + HDMI_AVI_INFOFRAME_SIZE];
 	uint8_t *payload = buffer + 3;
@ -1430,7 +1431,7 @@ static void dce_v6_0_audio_set_avi_infoframe(struct drm_encoder *encoder,
 	ssize_t err;
 	u32 tmp;

-	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false);
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
 	if (err < 0) {
 		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
 		return;
@ -2979,7 +2980,7 @@ static int dce_v6_0_pageflip_irq(struct amdgpu_device *adev,
 				 struct amdgpu_irq_src *source,
 				 struct amdgpu_iv_entry *entry)
 {
-		unsigned long flags;
+	unsigned long flags;
 	unsigned crtc_id;
 	struct amdgpu_crtc *amdgpu_crtc;
 	struct amdgpu_flip_work *works;
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@ -1616,7 +1616,7 @@ static void dce_v8_0_afmt_setmode(struct drm_encoder *encoder,
 	dce_v8_0_audio_write_sad_regs(encoder);
 	dce_v8_0_audio_write_latency_fields(encoder, mode);

-	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, mode, false);
+	err = drm_hdmi_avi_infoframe_from_display_mode(&frame, connector, mode);
 	if (err < 0) {
 		DRM_ERROR("failed to setup AVI infoframe: %zd\n", err);
 		return;
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v6_0.c
@ -1842,13 +1842,13 @@ static void gfx_v6_0_ring_emit_fence(struct amdgpu_ring *ring, u64 addr,
 static void gfx_v6_0_ring_emit_ib(struct amdgpu_ring *ring,
 				  struct amdgpu_job *job,
 				  struct amdgpu_ib *ib,
-				  bool ctx_switch)
+				  uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 header, control = 0;

 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
-	if (ctx_switch) {
+	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
 		amdgpu_ring_write(ring, 0);
 	}
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v7_0.c
@ -2228,13 +2228,13 @@ static void gfx_v7_0_ring_emit_fence_compute(struct amdgpu_ring *ring,
 static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 					struct amdgpu_job *job,
 					struct amdgpu_ib *ib,
-					bool ctx_switch)
+					uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 header, control = 0;

 	/* insert SWITCH_BUFFER packet before first IB in the ring frame */
-	if (ctx_switch) {
+	if (flags & AMDGPU_HAVE_CTX_SWITCH) {
 		amdgpu_ring_write(ring, PACKET3(PACKET3_SWITCH_BUFFER, 0));
 		amdgpu_ring_write(ring, 0);
 	}
@ -2259,11 +2259,27 @@ static void gfx_v7_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 static void gfx_v7_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_job *job,
 					  struct amdgpu_ib *ib,
-					  bool ctx_switch)
+					  uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);

+	/* Currently, there is a high possibility to get wave ID mismatch
+	 * between ME and GDS, leading to a hw deadlock, because ME generates
+	 * different wave IDs than the GDS expects. This situation happens
+	 * randomly when at least 5 compute pipes use GDS ordered append.
+	 * The wave IDs generated by ME are also wrong after suspend/resume.
+	 * Those are probably bugs somewhere else in the kernel driver.
+	 *
+	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+	 * GDS to 0 for this ring (me/pipe).
+	 */
+	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+	}
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
@ -5000,7 +5016,7 @@ static const struct amdgpu_ring_funcs gfx_v7_0_ring_funcs_compute = {
 		7 + /* gfx_v7_0_ring_emit_pipeline_sync */
 		CIK_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v7_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v7_0_ring_emit_fence_compute x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v7_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v7_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v7_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v7_0_ring_emit_fence_compute,
 	.emit_pipeline_sync = gfx_v7_0_ring_emit_pipeline_sync,
@ -5057,6 +5073,7 @@ static void gfx_v7_0_set_gds_init(struct amdgpu_device *adev)
 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
 	adev->gds.gws.total_size = 64;
 	adev->gds.oa.total_size = 16;
+	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);

 	if (adev->gds.mem.total_size == 64 * 1024) {
 		adev->gds.mem.gfx_partition_size = 4096;
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v8_0.c
@ -6047,7 +6047,7 @@ static void gfx_v8_0_ring_emit_vgt_flush(struct amdgpu_ring *ring)
 static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 					struct amdgpu_job *job,
 					struct amdgpu_ib *ib,
-					bool ctx_switch)
+					uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 header, control = 0;
@ -6079,11 +6079,27 @@ static void gfx_v8_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 static void gfx_v8_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_job *job,
 					  struct amdgpu_ib *ib,
-					  bool ctx_switch)
+					  uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);

+	/* Currently, there is a high possibility to get wave ID mismatch
+	 * between ME and GDS, leading to a hw deadlock, because ME generates
+	 * different wave IDs than the GDS expects. This situation happens
+	 * randomly when at least 5 compute pipes use GDS ordered append.
+	 * The wave IDs generated by ME are also wrong after suspend/resume.
+	 * Those are probably bugs somewhere else in the kernel driver.
+	 *
+	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+	 * GDS to 0 for this ring (me/pipe).
+	 */
+	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID - PACKET3_SET_CONFIG_REG_START);
+		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+	}
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	amdgpu_ring_write(ring,
 #ifdef __BIG_ENDIAN
@ -6890,7 +6906,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_compute = {
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		VI_FLUSH_GPU_TLB_NUM_WREG * 5 + 7 + /* gfx_v8_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_compute x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v8_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v8_0_ring_emit_fence_compute,
 	.emit_pipeline_sync = gfx_v8_0_ring_emit_pipeline_sync,
@ -6920,7 +6936,7 @@ static const struct amdgpu_ring_funcs gfx_v8_0_ring_funcs_kiq = {
 		7 + /* gfx_v8_0_ring_emit_pipeline_sync */
 		17 + /* gfx_v8_0_ring_emit_vm_flush */
 		7 + 7 + 7, /* gfx_v8_0_ring_emit_fence_kiq x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v8_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v8_0_ring_emit_ib_compute */
 	.emit_fence = gfx_v8_0_ring_emit_fence_kiq,
 	.test_ring = gfx_v8_0_ring_test_ring,
 	.insert_nop = amdgpu_ring_insert_nop,
@ -6996,6 +7012,7 @@ static void gfx_v8_0_set_gds_init(struct amdgpu_device *adev)
 	adev->gds.mem.total_size = RREG32(mmGDS_VMID0_SIZE);
 	adev->gds.gws.total_size = 64;
 	adev->gds.oa.total_size = 16;
+	adev->gds.gds_compute_max_wave_id = RREG32(mmGDS_COMPUTE_MAX_WAVE_ID);

 	if (adev->gds.mem.total_size == 64 * 1024) {
 		adev->gds.mem.gfx_partition_size = 4096;
--- a/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c
@ -3972,7 +3972,7 @@ static void gfx_v9_0_ring_emit_hdp_flush(struct amdgpu_ring *ring)
 static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 					struct amdgpu_job *job,
 					struct amdgpu_ib *ib,
-					bool ctx_switch)
+					uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 header, control = 0;
@ -4005,11 +4005,27 @@ static void gfx_v9_0_ring_emit_ib_gfx(struct amdgpu_ring *ring,
 static void gfx_v9_0_ring_emit_ib_compute(struct amdgpu_ring *ring,
 					  struct amdgpu_job *job,
 					  struct amdgpu_ib *ib,
-					  bool ctx_switch)
+					  uint32_t flags)
 {
 	unsigned vmid = AMDGPU_JOB_GET_VMID(job);
 	u32 control = INDIRECT_BUFFER_VALID | ib->length_dw | (vmid << 24);

+	/* Currently, there is a high possibility to get wave ID mismatch
+	 * between ME and GDS, leading to a hw deadlock, because ME generates
+	 * different wave IDs than the GDS expects. This situation happens
+	 * randomly when at least 5 compute pipes use GDS ordered append.
+	 * The wave IDs generated by ME are also wrong after suspend/resume.
+	 * Those are probably bugs somewhere else in the kernel driver.
+	 *
+	 * Writing GDS_COMPUTE_MAX_WAVE_ID resets wave ID counters in ME and
+	 * GDS to 0 for this ring (me/pipe).
+	 */
+	if (ib->flags & AMDGPU_IB_FLAG_RESET_GDS_MAX_WAVE_ID) {
+		amdgpu_ring_write(ring, PACKET3(PACKET3_SET_CONFIG_REG, 1));
+		amdgpu_ring_write(ring, mmGDS_COMPUTE_MAX_WAVE_ID);
+		amdgpu_ring_write(ring, ring->adev->gds.gds_compute_max_wave_id);
+	}
+
 	amdgpu_ring_write(ring, PACKET3(PACKET3_INDIRECT_BUFFER, 2));
 	BUG_ON(ib->gpu_addr & 0x3); /* Dword align */
 	amdgpu_ring_write(ring,
@ -4729,7 +4745,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_compute = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_ib = gfx_v9_0_ring_emit_ib_compute,
 	.emit_fence = gfx_v9_0_ring_emit_fence,
 	.emit_pipeline_sync = gfx_v9_0_ring_emit_pipeline_sync,
@ -4764,7 +4780,7 @@ static const struct amdgpu_ring_funcs gfx_v9_0_ring_funcs_kiq = {
 		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 7 +
 		2 + /* gfx_v9_0_ring_emit_vm_flush */
 		8 + 8 + 8, /* gfx_v9_0_ring_emit_fence_kiq x3 for user fence, vm fence */
-	.emit_ib_size =	4, /* gfx_v9_0_ring_emit_ib_compute */
+	.emit_ib_size =	7, /* gfx_v9_0_ring_emit_ib_compute */
 	.emit_fence = gfx_v9_0_ring_emit_fence_kiq,
 	.test_ring = gfx_v9_0_ring_test_ring,
 	.insert_nop = amdgpu_ring_insert_nop,
@ -4846,6 +4862,26 @@ static void gfx_v9_0_set_gds_init(struct amdgpu_device *adev)
 		break;
 	}

+	switch (adev->asic_type) {
+	case CHIP_VEGA10:
+	case CHIP_VEGA20:
+		adev->gds.gds_compute_max_wave_id = 0x7ff;
+		break;
+	case CHIP_VEGA12:
+		adev->gds.gds_compute_max_wave_id = 0x27f;
+		break;
+	case CHIP_RAVEN:
+		if (adev->rev_id >= 0x8)
+			adev->gds.gds_compute_max_wave_id = 0x77; /* raven2 */
+		else
+			adev->gds.gds_compute_max_wave_id = 0x15f; /* raven1 */
+		break;
+	default:
+		/* this really depends on the chip */
+		adev->gds.gds_compute_max_wave_id = 0x7ff;
+		break;
+	}
+
 	adev->gds.gws.total_size = 64;
 	adev->gds.oa.total_size = 16;

--- a/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/gmc_v8_0.c
@ -1471,8 +1471,9 @@ static int gmc_v8_0_process_interrupt(struct amdgpu_device *adev,
 		gmc_v8_0_set_fault_enable_default(adev, false);

 	if (printk_ratelimit()) {
-		struct amdgpu_task_info task_info = { 0 };
+		struct amdgpu_task_info task_info;

+		memset(&task_info, 0, sizeof(struct amdgpu_task_info));
 		amdgpu_vm_get_task_info(adev, entry->pasid, &task_info);

 		dev_err(adev->dev, "GPU fault detected: %d 0x%08x for process %s pid %d thread %s pid %d\n",
--- a/Show More
+++ b/Show More