drm main pull request for v5.3-rc1 (sans mm changes)

-----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJdLMSbAAoJEAx081l5xIa+udkP/iWr8mw44tWYb8Wuzc/aR91v 02X/J4S9XTQttNn/1Gpq9ItTLMf0Gc08tk1wEBBHAWi/qGaGZS2al+rv0afeuuQa aFhQzioDi7K/YZt92iEJhdx7wVMyydICTg3INmYlSP7/FyzLp6gBQRGSJ1kX5mHZ qWsFZgUOH9V5evyB6fDMleDaqFOKfcwrD7XYwbOheL/HeYQSv5AYn3VBupBFQ76L 0hclI5VzZQ5V0nnqRTNDQVA9Yl6NTl+2eXTn5vuBtwKXEI6JJw8eihZp2oZDXqfS L441w7wGbkRPzN5kjMZjs1ToPMTlMveR5kL6Sc+o3DT/HmIr1odeaSDXR/93UOLd z0CRJ6xMC8h1ThLNHp8UgbxCKqIwYPsY2wVqjsJt7lDY5jma7Yv2YJ9ocYGHN/sO DVHcU6ugbwvuC5wZZtVZl5J4hjnBZwNRGSVK+iM0tkjalgdEuSFehXT7eQ8SphF/ yI5gD1xNEwGfZ4bvZ3u/QrDCcpUAgPIUYmxEa2tPJILQWOJ9O87yc0y9Z21k9Ef1 9yDqrFV3sPqC2xj/0ufZG/18+Yt99Ykg1jQE3RGDwD/59KAeqPbOvqTKyVODV9jE qje6ScSIc2G0713uss2bcaD3k+rCB5YL2JkKrk5OWW/T2+n9T+JFaiNh7dnSFFcU gBKyeY24OyCDMwXrby0K =SI+Y -----END PGP SIGNATURE----- Merge tag 'drm-next-2019-07-16' of git://anongit.freedesktop.org/drm/drm Pull drm updates from Dave Airlie: "The biggest thing in this is the AMD Navi GPU support, this again contains a bunch of header files that are large. These are the new AMD RX5700 GPUs that just recently became available. New drivers: - ST-Ericsson MCDE driver - Ingenic JZ47xx SoC UAPI change: - HDR source metadata property Core: - HDR inforframes and EDID parsing - drm hdmi infoframe unpacking - remove prime sg_table caching into dma-buf - New gem vram helpers to reduce driver code - Lots of drmP.h removal - reservation fencing fix - documentation updates - drm_fb_helper_connector removed - mode name command handler rewrite fbcon: - Remove the fbcon notifiers ttm: - forward progress fixes dma-buf: - make mmap call optional - debugfs refcount fixes - dma-fence free with pending signals fix - each dma-buf gets an inode Panels: - Lots of additional panel bindings amdgpu: - initial navi10 support - avoid hw reset - HDR metadata support - new thermal sensors for vega asics - RAS fixes - use HMM rather than MMU notifier - xgmi topology via kfd - SR-IOV fixes - driver reload fixes - DC use a core bpc attribute - Aux fixes for DC - Bandwidth calc updates for DC - Clock handling refactor - kfd VEGAM support vmwgfx: - Coherent memory support changes i915: - HDR Support - HDMI i2c link - Icelake multi-segmented gamma support - GuC firmware update - Mule Creek Canyon PCH support for EHL - EHL platform updtes - move i915.alpha_support to i915.force_probe - runtime PM refactoring - VBT parsing refactoring - DSI fixes - struct mutex dependency reduction - GEM code reorg mali-dp: - Komeda driver features msm: - dsi vs EPROBE_DEFER fixes - msm8998 snapdragon 835 support - a540 gpu support - mdp5 and dpu interconnect support exynos: - drmP.h removal tegra: - misc fixes tda998x: - audio support improvements - pixel repeated mode support - quantisation range handling corrections - HDMI vendor info fix armada: - interlace support fix - overlay/video plane register handling refactor - add gamma support rockchip: - RX3328 support panfrost: - expose perf counters via hidden ioctls vkms: - enumerate CRC sources list ast: - rework BO handling mgag200: - rework BO handling dw-hdmi: - suspend/resume support rcar-du: - R8A774A1 Soc Support - LVDS dual-link mode support - Additional formats - Misc fixes omapdrm: - DSI command mode display support stm - fb modifier support - runtime PM support sun4i: - use vmap ops vc4: - binner bo binding rework v3d: - compute shader support - resync/sync fixes - job management refactoring lima: - NULL pointer in irq handler fix - scheduler default timeout virtio: - fence seqno support - trace events bochs: - misc fixes tc458767: - IRQ/HDP handling sii902x: - HDMI audio support atmel-hlcdc: - misc fixes meson: - zpos support" * tag 'drm-next-2019-07-16' of git://anongit.freedesktop.org/drm/drm: (1815 commits) Revert "Merge branch 'vmwgfx-next' of git://people.freedesktop.org/~thomash/linux into drm-next" Revert "mm: adjust apply_to_pfn_range interface for dropped token." mm: adjust apply_to_pfn_range interface for dropped token. drm/amdgpu/navi10: add uclk activity sensor drm/amdgpu: properly guard the generic discovery code drm/amdgpu: add missing documentation on new module parameters drm/amdgpu: don't invalidate caches in RELEASE_MEM, only do the writeback drm/amd/display: avoid 64-bit division drm/amdgpu/psp11: simplify the ucode register logic drm/amdgpu: properly guard DC support in navi code drm/amd/powerplay: vega20: fix uninitialized variable use drm/amd/display: dcn20: include linux/delay.h amdgpu: make pmu support optional drm/amd/powerplay: Zero initialize current_rpm in vega20_get_fan_speed_percent drm/amd/powerplay: Zero initialize freq in smu_v11_0_get_current_clk_freq drm/amd/powerplay: Use memset to initialize metrics structs drm/amdgpu/mes10.1: Fix header guard drm/amd/powerplay: add temperature sensor support for navi10 drm/amdgpu: fix scheduler timeout calc drm/amdgpu: Prepare for hmm_range_register API change (v2) ...
2019-07-15 19:04:27 -07:00 · 2019-07-15 19:04:27 -07:00 · be8454afc5
parent fec88ab0af 3729fe2bc2
commit be8454afc5
1567 changed files with 475768 additions and 34599 deletions
--- a/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
+++ b/Documentation/devicetree/bindings/display/allwinner,sun6i-a31-mipi-dsi.yaml
@ -0,0 +1,100 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun6i-a31-mipi-dsi.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 MIPI-DSI Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#address-cells": true
+  "#size-cells": true
+
+  compatible:
+    const: allwinner,sun6i-a31-mipi-dsi
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+
+  resets:
+    maxItems: 1
+
+  phys:
+    maxItems: 1
+
+  phy-names:
+    const: dphy
+
+  port:
+    type: object
+    description:
+      A port node with endpoint definitions as defined in
+      Documentation/devicetree/bindings/media/video-interfaces.txt. That
+      port should be the input endpoint, usually coming from the
+      associated TCON.
+
+patternProperties:
+  "^panel@[0-9]+$": true
+
+required:
+  - "#address-cells"
+  - "#size-cells"
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - phys
+  - phy-names
+  - resets
+  - port
+
+additionalProperties: false
+
+examples:
+  - |
+    dsi0: dsi@1ca0000 {
+        compatible = "allwinner,sun6i-a31-mipi-dsi";
+        reg = <0x01ca0000 0x1000>;
+        interrupts = <0 89 4>;
+        clocks = <&ccu 23>, <&ccu 96>;
+        clock-names = "bus", "mod";
+        resets = <&ccu 4>;
+        phys = <&dphy0>;
+        phy-names = "dphy";
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        panel@0 {
+                compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
+                reg = <0>;
+                power-gpios = <&pio 1 7 0>; /* PB07 */
+                reset-gpios = <&r_pio 0 5 1>; /* PL05 */
+                backlight = <&pwm_bl>;
+        };
+
+        port {
+            dsi0_in_tcon0: endpoint {
+                remote-endpoint = <&tcon0_out_dsi0>;
+            };
+        };
+    };
+
+...
--- a/Documentation/devicetree/bindings/display/arm,komeda.txt
+++ b/Documentation/devicetree/bindings/display/arm,komeda.txt
@ -7,10 +7,13 @@ Required properties:
 - clocks: A list of phandle + clock-specifier pairs, one for each entry
    in 'clock-names'
 - clock-names: A list of clock names. It should contain:
-      - "mclk": for the main processor clock
-      - "pclk": for the APB interface clock
+      - "aclk": for the main processor clock
 - #address-cells: Must be 1
 - #size-cells: Must be 0
+- iommus: configure the stream id to IOMMU, Must be configured if want to
+    enable iommu in display. for how to configure this node please reference
+        devicetree/bindings/iommu/arm,smmu-v3.txt,
+        devicetree/bindings/iommu/iommu.txt

 Required properties for sub-node: pipeline@nq
 Each device contains one or two pipeline sub-nodes (at least one), each
@ -20,7 +23,6 @@ pipeline node should provide properties:
    in 'clock-names'
 - clock-names: should contain:
      - "pxclk": pixel clock
-      - "aclk": AXI interface clock

 - port: each pipeline connect to an encoder input port. The connection is
    modeled using the OF graph bindings specified in
@ -42,12 +44,15 @@ Example:
 		compatible = "arm,mali-d71";
 		reg = <0xc00000 0x20000>;
 		interrupts = <0 168 4>;
-		clocks = <&dpu_mclk>, <&dpu_aclk>;
-		clock-names = "mclk", "pclk";
+		clocks = <&dpu_aclk>;
+		clock-names = "aclk";
+		iommus = <&smmu 0>, <&smmu 1>, <&smmu 2>, <&smmu 3>,
+			<&smmu 4>, <&smmu 5>, <&smmu 6>, <&smmu 7>,
+			<&smmu 8>, <&smmu 9>;

 		dp0_pipe0: pipeline@0 {
-			clocks = <&fpgaosc2>, <&dpu_aclk>;
-			clock-names = "pxclk", "aclk";
+			clocks = <&fpgaosc2>;
+			clock-names = "pxclk";
 			reg = <0>;

 			port {
@ -58,8 +63,8 @@ Example:
 		};

 		dp0_pipe1: pipeline@1 {
-			clocks = <&fpgaosc2>, <&dpu_aclk>;
-			clock-names = "pxclk", "aclk";
+			clocks = <&fpgaosc2>;
+			clock-names = "pxclk";
 			reg = <1>;

 			port {
--- a/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
+++ b/Documentation/devicetree/bindings/display/bridge/renesas,lvds.txt
@ -9,6 +9,7 @@ Required properties:
 - compatible : Shall contain one of
  - "renesas,r8a7743-lvds" for R8A7743 (RZ/G1M) compatible LVDS encoders
  - "renesas,r8a7744-lvds" for R8A7744 (RZ/G1N) compatible LVDS encoders
+  - "renesas,r8a774a1-lvds" for R8A774A1 (RZ/G2M) compatible LVDS encoders
  - "renesas,r8a774c0-lvds" for R8A774C0 (RZ/G2E) compatible LVDS encoders
  - "renesas,r8a7790-lvds" for R8A7790 (R-Car H2) compatible LVDS encoders
  - "renesas,r8a7791-lvds" for R8A7791 (R-Car M2-W) compatible LVDS encoders
@ -45,14 +46,24 @@ OF graph bindings specified in Documentation/devicetree/bindings/graph.txt.

 Each port shall have a single endpoint.

+Optional properties:
+
+- renesas,companion : phandle to the companion LVDS encoder. This property is
+  mandatory for the first LVDS encoder on D3 and E3 SoCs, and shall point to
+  the second encoder to be used as a companion in dual-link mode. It shall not
+  be set for any other LVDS encoder.
+

 Example:

 	lvds0: lvds@feb90000 {
-		compatible = "renesas,r8a7790-lvds";
-		reg = <0 0xfeb90000 0 0x1c>;
-		clocks = <&cpg CPG_MOD 726>;
-		resets = <&cpg 726>;
+		compatible = "renesas,r8a77990-lvds";
+		reg = <0 0xfeb90000 0 0x20>;
+		clocks = <&cpg CPG_MOD 727>;
+		power-domains = <&sysc R8A77990_PD_ALWAYS_ON>;
+		resets = <&cpg 727>;
+
+		renesas,companion = <&lvds1>;

 		ports {
 			#address-cells = <1>;
--- a/Documentation/devicetree/bindings/display/bridge/sii902x.txt
+++ b/Documentation/devicetree/bindings/display/bridge/sii902x.txt
@ -5,10 +5,44 @@ Required properties:
 	- reg: i2c address of the bridge

 Optional properties:
-	- interrupts: describe the interrupt line used to inform the host 
+	- interrupts: describe the interrupt line used to inform the host
 	  about hotplug events.
 	- reset-gpios: OF device-tree gpio specification for RST_N pin.

+	HDMI audio properties:
+	- #sound-dai-cells: <0> or <1>. <0> if only i2s or spdif pin
+	   is wired, <1> if the both are wired. HDMI audio is
+	   configured only if this property is found.
+	- sil,i2s-data-lanes: Array of up to 4 integers with values of 0-3
+	   Each integer indicates which i2s pin is connected to which
+	   audio fifo. The first integer selects i2s audio pin for the
+	   first audio fifo#0 (HDMI channels 1&2), second for fifo#1
+	   (HDMI channels 3&4), and so on. There is 4 fifos and 4 i2s
+	   pins (SD0 - SD3). Any i2s pin can be connected to any fifo,
+	   but there can be no gaps. E.g. an i2s pin must be mapped to
+	   fifo#0 and fifo#1 before mapping a channel to fifo#2. Default
+	   value is <0>, describing SD0 pin beiging routed to hdmi audio
+	   fifo #0.
+	- clocks: phandle and clock specifier for each clock listed in
+           the clock-names property
+	- clock-names: "mclk"
+	   Describes SII902x MCLK input. MCLK is used to produce
+	   HDMI audio CTS values. This property is required if
+	   "#sound-dai-cells"-property is present. This property follows
+	   Documentation/devicetree/bindings/clock/clock-bindings.txt
+	   consumer binding.
+
+	If HDMI audio is configured the sii902x device becomes an I2S
+	and/or spdif audio codec component (e.g a digital audio sink),
+	that can be used in configuring a full audio devices with
+	simple-card or audio-graph-card binding. See their binding
+	documents on how to describe the way the sii902x device is
+	connected to the rest of the audio system:
+	Documentation/devicetree/bindings/sound/simple-card.txt
+	Documentation/devicetree/bindings/sound/audio-graph-card.txt
+	Note: In case of the audio-graph-card binding the used port
+	index should be 3.
+
 Optional subnodes:
 	- video input: this subnode can contain a video input port node
 	  to connect the bridge to a display controller output (See this
@ -21,6 +55,12 @@ Example:
 		compatible = "sil,sii9022";
 		reg = <0x39>;
 		reset-gpios = <&pioA 1 0>;
+
+		#sound-dai-cells = <0>;
+		sil,i2s-data-lanes = < 0 1 2 >;
+		clocks = <&mclk>;
+		clock-names = "mclk";
+
 		ports {
 			#address-cells = <1>;
 			#size-cells = <0>;
--- a/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
+++ b/Documentation/devicetree/bindings/display/bridge/thine,thc63lvd1024.txt
@ -28,6 +28,12 @@ Optional video port nodes:
 - port@1: Second LVDS input port
 - port@3: Second digital CMOS/TTL parallel output

+The device can operate in single-link mode or dual-link mode. In single-link
+mode, all pixels are received on port@0, and port@1 shall not contain any
+endpoint. In dual-link mode, even-numbered pixels are received on port@0 and
+odd-numbered pixels on port@1, and both port@0 and port@1 shall contain
+endpoints.
+
 Example:
 --------

--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
@ -12,6 +12,7 @@ Optional properties:
                   (active high shutdown input)
 - reset-gpios: OF device-tree gpio specification for RSTX pin
                (active low system reset)
+ - toshiba,hpd-pin: TC358767 GPIO pin number to which HPD is connected to (0 or 1)
 - ports: the ports node can contain video interface port nodes to connect
   to a DPI/DSI source and to an eDP/DP sink according to [1][2]:
    - port@0: DSI input port
--- a/Documentation/devicetree/bindings/display/ingenic,lcd.txt
+++ b/Documentation/devicetree/bindings/display/ingenic,lcd.txt
@ -0,0 +1,44 @@
+Ingenic JZ47xx LCD driver
+
+Required properties:
+- compatible: one of:
+  * ingenic,jz4740-lcd
+  * ingenic,jz4725b-lcd
+- reg: LCD registers location and length
+- clocks: LCD pixclock and device clock specifiers.
+	   The device clock is only required on the JZ4740.
+- clock-names: "lcd_pclk" and "lcd"
+- interrupts: Specifies the interrupt line the LCD controller is connected to.
+
+Example:
+
+panel {
+	compatible = "sharp,ls020b1dd01d";
+
+	backlight = <&backlight>;
+	power-supply = <&vcc>;
+
+	port {
+		panel_input: endpoint {
+			remote-endpoint = <&panel_output>;
+		};
+	};
+};
+
+
+lcd: lcd-controller@13050000 {
+	compatible = "ingenic,jz4725b-lcd";
+	reg = <0x13050000 0x1000>;
+
+	interrupt-parent = <&intc>;
+	interrupts = <31>;
+
+	clocks = <&cgu JZ4725B_CLK_LCD>;
+	clock-names = "lcd";
+
+	port {
+		panel_output: endpoint {
+			remote-endpoint = <&panel_input>;
+		};
+	};
+};
--- a/Documentation/devicetree/bindings/display/msm/dpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/dpu.txt
@ -28,6 +28,11 @@ Required properties:
 - #address-cells: number of address cells for the MDSS children. Should be 1.
 - #size-cells: Should be 1.
 - ranges: parent bus address space is the same as the child bus address space.
+- interconnects : interconnect path specifier for MDSS according to
+  Documentation/devicetree/bindings/interconnect/interconnect.txt. Should be
+  2 paths corresponding to 2 AXI ports.
+- interconnect-names : MDSS will have 2 port names to differentiate between the
+  2 interconnect paths defined with interconnect specifier.

 Optional properties:
 - assigned-clocks: list of clock specifiers for clocks needing rate assignment
@ -86,6 +91,11 @@ Example:
 		interrupt-controller;
 		#interrupt-cells = <1>;

+		interconnects = <&rsc_hlos MASTER_MDP0 &rsc_hlos SLAVE_EBI1>,
+				<&rsc_hlos MASTER_MDP1 &rsc_hlos SLAVE_EBI1>;
+
+		interconnect-names = "mdp0-mem", "mdp1-mem";
+
 		iommus = <&apps_iommu 0>;

 		#address-cells = <2>;
--- a/Documentation/devicetree/bindings/display/msm/dsi.txt
+++ b/Documentation/devicetree/bindings/display/msm/dsi.txt
@ -88,6 +88,7 @@ Required properties:
  * "qcom,dsi-phy-28nm-8960"
  * "qcom,dsi-phy-14nm"
  * "qcom,dsi-phy-10nm"
+  * "qcom,dsi-phy-10nm-8998"
 - reg: Physical base address and length of the registers of PLL, PHY. Some
  revisions require the PHY regulator base address, whereas others require the
  PHY lane base address. See below for each PHY revision.
--- a/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt
+++ b/Documentation/devicetree/bindings/display/panel/armadeus,st0700-adapt.txt
@ -0,0 +1,9 @@
+Armadeus ST0700 Adapt. A Santek ST0700I5Y-RBSLW 7.0" WVGA (800x480) TFT with
+an adapter board.
+
+Required properties:
+- compatible: "armadeus,st0700-adapt"
+- power-supply: see panel-common.txt
+
+Optional properties:
+- backlight: see panel-common.txt
--- a/Documentation/devicetree/bindings/display/panel/edt,et-series.txt
+++ b/Documentation/devicetree/bindings/display/panel/edt,et-series.txt
@ -6,6 +6,22 @@ Display bindings for EDT Display Technology Corp. Displays which are
 compatible with the simple-panel binding, which is specified in
 simple-panel.txt

+3,5" QVGA TFT Panels
+--------------------
+-----------------+---------------------+-------------------------------------+
+| Identifier      | compatbile          | description                         |
+=================+=====================+=====================================+
+| ET035012DM6     | edt,et035012dm6     | 3.5" QVGA TFT LCD panel             |
+-----------------+---------------------+-------------------------------------+
+
+4,3" WVGA TFT Panels
+--------------------
+
+-----------------+---------------------+-------------------------------------+
+| Identifier      | compatbile          | description                         |
+=================+=====================+=====================================+
+| ETM0430G0DH6    | edt,etm0430g0dh6    | 480x272 TFT Display                 |
+-----------------+---------------------+-------------------------------------+

 5,7" WVGA TFT Panels
 --------------------
--- a/Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt
+++ b/Documentation/devicetree/bindings/display/panel/evervision,vgg804821.txt
@ -0,0 +1,12 @@
+Evervision Electronics Co. Ltd. VGG804821 5.0" WVGA TFT LCD Panel
+
+Required properties:
+- compatible: should be "evervision,vgg804821"
+- power-supply: See simple-panel.txt
+
+Optional properties:
+- backlight: See simple-panel.txt
+- enable-gpios: See simple-panel.txt
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt
+++ b/Documentation/devicetree/bindings/display/panel/friendlyarm,hd702e.txt
@ -0,0 +1,32 @@
+FriendlyELEC HD702E 800x1280 LCD panel
+
+HD702E lcd is FriendlyELEC developed eDP LCD panel with 800x1280
+resolution. It has built in Goodix, GT9271 captive touchscreen
+with backlight adjustable via PWM.
+
+Required properties:
+- compatible: should be "friendlyarm,hd702e"
+- power-supply: regulator to provide the supply voltage
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+
+Optional nodes:
+- Video port for LCD panel input.
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
+
+Example:
+
+	panel {
+		compatible ="friendlyarm,hd702e", "simple-panel";
+		backlight = <&backlight>;
+		power-supply = <&vcc3v3_sys>;
+
+		port {
+			panel_in_edp: endpoint {
+				remote-endpoint = <&edp_out_panel>;
+			};
+		};
+	};
--- a/Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt
+++ b/Documentation/devicetree/bindings/display/panel/koe,tx14d24vm1bpa.txt
@ -0,0 +1,42 @@
+Kaohsiung Opto-Electronics Inc. 5.7" QVGA (320 x 240) TFT LCD panel
+
+Required properties:
+- compatible: should be "koe,tx14d24vm1bpa"
+- backlight: phandle of the backlight device attached to the panel
+- power-supply: single regulator to provide the supply voltage
+
+Required nodes:
+- port: Parallel port mapping to connect this display
+
+This panel needs single power supply voltage. Its backlight is conntrolled
+via PWM signal.
+
+Example:
+--------
+
+Example device-tree definition when connected to iMX53 based board
+
+	lcd_panel: lcd-panel {
+		compatible = "koe,tx14d24vm1bpa";
+		backlight = <&backlight_lcd>;
+		power-supply = <&reg_3v3>;
+
+		port {
+			lcd_panel_in: endpoint {
+				remote-endpoint = <&lcd_display_out>;
+			};
+		};
+	};
+
+Then one needs to extend the dispX node:
+
+	lcd_display: disp1 {
+
+		port@1 {
+			reg = <1>;
+
+			lcd_display_out: endpoint {
+				remote-endpoint = <&lcd_panel_in>;
+			};
+		};
+	};
--- a/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt
+++ b/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2045-53ts.txt
@ -0,0 +1,11 @@
+One Stop Displays OSD101T2045-53TS 10.1" 1920x1200 panel
+
+Required properties:
+- compatible: should be "osddisplays,osd101t2045-53ts"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2587-53ts.txt
+++ b/Documentation/devicetree/bindings/display/panel/osddisplays,osd101t2587-53ts.txt
@ -0,0 +1,14 @@
+One Stop Displays OSD101T2587-53TS 10.1" 1920x1200 panel
+
+The panel is similar to OSD101T2045-53TS, but it needs additional
+MIPI_DSI_TURN_ON_PERIPHERAL message from the host.
+
+Required properties:
+- compatible: should be "osddisplays,osd101t2587-53ts"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt
+++ b/Documentation/devicetree/bindings/display/panel/samsung,s6e63m0.txt
@ -0,0 +1,33 @@
+Samsung s6e63m0 AMOLED LCD panel
+
+Required properties:
+  - compatible: "samsung,s6e63m0"
+  - reset-gpios: GPIO spec for reset pin
+  - vdd3-supply: VDD regulator
+  - vci-supply: VCI regulator
+
+The panel must obey rules for SPI slave device specified in document [1].
+
+The device node can contain one 'port' child node with one child
+'endpoint' node, according to the bindings defined in [2]. This
+node should describe panel's video bus.
+
+[1]: Documentation/devicetree/bindings/spi/spi-bus.txt
+[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+		s6e63m0: display@0 {
+			compatible = "samsung,s6e63m0";
+			reg = <0>;
+			reset-gpio = <&mp05 5 1>;
+			vdd3-supply = <&ldo12_reg>;
+			vci-supply = <&ldo11_reg>;
+			spi-max-frequency = <1200000>;
+
+			port {
+				lcd_ep: endpoint {
+					remote-endpoint = <&fimd_ep>;
+				};
+			};
+		};
--- a/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt
+++ b/Documentation/devicetree/bindings/display/panel/tfc,s9700rtwv43tr-01b.txt
@ -0,0 +1,15 @@
+TFC S9700RTWV43TR-01B 7" Three Five Corp 800x480 LCD panel with
+resistive touch
+
+The panel is found on TI AM335x-evm.
+
+Required properties:
+- compatible: should be "tfc,s9700rtwv43tr-01b"
+- power-supply: See panel-common.txt
+
+Optional properties:
+- enable-gpios: GPIO pin to enable or disable the panel, if there is one
+- backlight: phandle of the backlight device attached to the panel
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt
+++ b/Documentation/devicetree/bindings/display/panel/vl050_8048nt_c01.txt
@ -0,0 +1,12 @@
+VXT 800x480 color TFT LCD panel
+
+Required properties:
+- compatible: should be "vxt,vl050-8048nt-c01"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+- enable-gpios: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/renesas,du.txt
+++ b/Documentation/devicetree/bindings/display/renesas,du.txt
@ -7,6 +7,7 @@ Required Properties:
    - "renesas,du-r8a7744" for R8A7744 (RZ/G1N) compatible DU
    - "renesas,du-r8a7745" for R8A7745 (RZ/G1E) compatible DU
    - "renesas,du-r8a77470" for R8A77470 (RZ/G1C) compatible DU
+    - "renesas,du-r8a774a1" for R8A774A1 (RZ/G2M) compatible DU
    - "renesas,du-r8a774c0" for R8A774C0 (RZ/G2E) compatible DU
    - "renesas,du-r8a7779" for R8A7779 (R-Car H1) compatible DU
    - "renesas,du-r8a7790" for R8A7790 (R-Car H2) compatible DU
@ -58,6 +59,7 @@ corresponding to each DU output.
 R8A7744 (RZ/G1N)       DPAD 0         LVDS 0         -              -
 R8A7745 (RZ/G1E)       DPAD 0         DPAD 1         -              -
 R8A77470 (RZ/G1C)      DPAD 0         DPAD 1         LVDS 0         -
+ R8A774A1 (RZ/G2M)      DPAD 0         HDMI 0         LVDS 0         -
 R8A774C0 (RZ/G2E)      DPAD 0         LVDS 0         LVDS 1         -
 R8A7779 (R-Car H1)     DPAD 0         DPAD 1         -              -
 R8A7790 (R-Car H2)     DPAD 0         LVDS 0         LVDS 1         -
--- a/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt
+++ b/Documentation/devicetree/bindings/display/rockchip/dw_hdmi-rockchip.txt
@ -12,6 +12,7 @@ following device-specific properties.
 Required properties:

 - compatible: should be one of the following:
+		"rockchip,rk3228-dw-hdmi"
 		"rockchip,rk3288-dw-hdmi"
 		"rockchip,rk3328-dw-hdmi"
 		"rockchip,rk3399-dw-hdmi"
@ -38,6 +39,13 @@ Optional properties
 - phys: from general PHY binding: the phandle for the PHY device.
 - phy-names: Should be "hdmi" if phys references an external phy.

+Optional pinctrl entry:
+- If you have both a "unwedge" and "default" pinctrl entry, dw_hdmi
+  will switch to the unwedge pinctrl state for 10ms if it ever gets an
+  i2c timeout.  It's intended that this unwedge pinctrl entry will
+  cause the SDA line to be driven low to work around a hardware
+  errata.
+
 Example:

 hdmi: hdmi@ff980000 {
--- a/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
+++ b/Documentation/devicetree/bindings/display/st,stm32-ltdc.txt
@ -40,6 +40,8 @@ Mandatory nodes specific to STM32 DSI:
 - panel or bridge node: A node containing the panel or bridge description as
  documented in [6].
  - port: panel or bridge port node, connected to the DSI output port (port@1).
+Optional properties:
+- phy-dsi-supply: phandle of the regulator that provides the supply voltage.

 Note: You can find more documentation in the following references
 [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
@ -101,6 +103,7 @@ Example 2: DSI panel
 			clock-names = "pclk", "ref";
 			resets = <&rcc STM32F4_APB2_RESET(DSI)>;
 			reset-names = "apb";
+			phy-dsi-supply = <&reg18>;

 			ports {
 				#address-cells = <1>;
--- a/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt
+++ b/Documentation/devicetree/bindings/display/sunxi/sun6i-dsi.txt
@ -1,93 +0,0 @@
-Allwinner A31 DSI Encoder
-=========================
-
-The DSI pipeline consists of two separate blocks: the DSI controller
-itself, and its associated D-PHY.
-
-DSI Encoder
-----------
-
-The DSI Encoder generates the DSI signal from the TCON's.
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun6i-a31-mipi-dsi
-  - reg: base address and size of memory-mapped region
-  - interrupts: interrupt associated to this IP
-  - clocks: phandles to the clocks feeding the DSI encoder
-    * bus: the DSI interface clock
-    * mod: the DSI module clock
-  - clock-names: the clock names mentioned above
-  - phys: phandle to the D-PHY
-  - phy-names: must be "dphy"
-  - resets: phandle to the reset controller driving the encoder
-
-  - ports: A ports node with endpoint definitions as defined in
-    Documentation/devicetree/bindings/media/video-interfaces.txt. The
-    first port should be the input endpoint, usually coming from the
-    associated TCON.
-
-Any MIPI-DSI device attached to this should be described according to
-the bindings defined in ../mipi-dsi-bus.txt
-
-D-PHY
-----
-
-Required properties:
-  - compatible: value must be one of:
-    * allwinner,sun6i-a31-mipi-dphy
-  - reg: base address and size of memory-mapped region
-  - clocks: phandles to the clocks feeding the DSI encoder
-    * bus: the DSI interface clock
-    * mod: the DSI module clock
-  - clock-names: the clock names mentioned above
-  - resets: phandle to the reset controller driving the encoder
-
-Example:
-
-dsi0: dsi@1ca0000 {
-	compatible = "allwinner,sun6i-a31-mipi-dsi";
-	reg = <0x01ca0000 0x1000>;
-	interrupts = <GIC_SPI 89 IRQ_TYPE_LEVEL_HIGH>;
-	clocks = <&ccu CLK_BUS_MIPI_DSI>,
-		 <&ccu CLK_DSI_SCLK>;
-	clock-names = "bus", "mod";
-	resets = <&ccu RST_BUS_MIPI_DSI>;
-	phys = <&dphy0>;
-	phy-names = "dphy";
-	#address-cells = <1>;
-	#size-cells = <0>;
-
-	panel@0 {
-		compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
-		reg = <0>;
-		power-gpios = <&pio 1 7 GPIO_ACTIVE_HIGH>; /* PB07 */
-		reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
-		backlight = <&pwm_bl>;
-	};
-
-	ports {
-		#address-cells = <1>;
-		#size-cells = <0>;
-
-		port@0 {
-			#address-cells = <1>;
-			#size-cells = <0>;
-			reg = <0>;
-
-			dsi0_in_tcon0: endpoint {
-				remote-endpoint = <&tcon0_out_dsi0>;
-			};
-		};
-	};
-};
-
-dphy0: d-phy@1ca1000 {
-	compatible = "allwinner,sun6i-a31-mipi-dphy";
-	reg = <0x01ca1000 0x1000>;
-	clocks = <&ccu CLK_BUS_MIPI_DSI>,
-		 <&ccu CLK_DSI_DPHY>;
-	clock-names = "bus", "mod";
-	resets = <&ccu RST_BUS_MIPI_DSI>;
-	#phy-cells = <0>;
-};
--- a/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
+++ b/Documentation/devicetree/bindings/gpu/arm,mali-midgard.txt
@ -15,6 +15,7 @@ Required properties:
    + "arm,mali-t860"
    + "arm,mali-t880"
  * which must be preceded by one of the following vendor specifics:
+    + "allwinner,sun50i-h6-mali"
    + "amlogic,meson-gxm-mali"
    + "rockchip,rk3288-mali"
    + "rockchip,rk3399-mali"
@ -31,21 +32,36 @@ Optional properties:

 - clocks : Phandle to clock for the Mali Midgard device.

+- clock-names : Specify the names of the clocks specified in clocks
+  when multiple clocks are present.
+    * core: clock driving the GPU itself (When only one clock is present,
+      assume it's this clock.)
+    * bus: bus clock for the GPU
+
 - mali-supply : Phandle to regulator for the Mali device. Refer to
  Documentation/devicetree/bindings/regulator/regulator.txt for details.

 - operating-points-v2 : Refer to Documentation/devicetree/bindings/opp/opp.txt
  for details.

+- #cooling-cells: Refer to Documentation/devicetree/bindings/thermal/thermal.txt
+  for details.
+
 - resets : Phandle of the GPU reset line.

 Vendor-specific bindings
 ------------------------

 The Mali GPU is integrated very differently from one SoC to
-another. In order to accomodate those differences, you have the option
+another. In order to accommodate those differences, you have the option
 to specify one more vendor-specific compatible, among:

+- "allwinner,sun50i-h6-mali"
+  Required properties:
+  - clocks : phandles to core and bus clocks
+  - clock-names : must contain "core" and "bus"
+  - resets: phandle to GPU reset line
+
 - "amlogic,meson-gxm-mali"
  Required properties:
  - resets : Should contain phandles of :
@ -65,6 +81,7 @@ gpu@ffa30000 {
 	mali-supply = <&vdd_gpu>;
 	operating-points-v2 = <&gpu_opp_table>;
 	power-domains = <&power RK3288_PD_GPU>;
+	#cooling-cells = <2>;
 };

 gpu_opp_table: opp_table0 {
--- a/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
+++ b/Documentation/devicetree/bindings/phy/allwinner,sun6i-a31-mipi-dphy.yaml
@ -0,0 +1,57 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/display/allwinner,sun6i-a31-mipi-dphy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Allwinner A31 MIPI D-PHY Controller Device Tree Bindings
+
+maintainers:
+  - Chen-Yu Tsai <wens@csie.org>
+  - Maxime Ripard <maxime.ripard@bootlin.com>
+
+properties:
+  "#phy-cells":
+    const: 0
+
+  compatible:
+    const: allwinner,sun6i-a31-mipi-dphy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: bus
+      - const: mod
+
+  resets:
+    maxItems: 1
+
+required:
+  - "#phy-cells"
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - resets
+
+additionalProperties: false
+
+examples:
+  - |
+    dphy0: d-phy@1ca1000 {
+        compatible = "allwinner,sun6i-a31-mipi-dphy";
+        reg = <0x01ca1000 0x1000>;
+        clocks = <&ccu 23>, <&ccu 97>;
+        clock-names = "bus", "mod";
+        resets = <&ccu 4>;
+        #phy-cells = <0>;
+    };
+
+...
--- a/Documentation/devicetree/bindings/vendor-prefixes.yaml
+++ b/Documentation/devicetree/bindings/vendor-prefixes.yaml
@ -307,6 +307,8 @@ patternProperties:
    description: Everest Semiconductor Co. Ltd.
  "^everspin,.*":
    description: Everspin Technologies, Inc.
+  "^evervision,.*":
+    description: Evervision Electronics Co. Ltd.
  "^exar,.*":
    description: Exar Corporation
  "^excito,.*":
@ -911,6 +913,8 @@ patternProperties:
    description: Shenzhen Techstar Electronics Co., Ltd.
  "^terasic,.*":
    description: Terasic Inc.
+  "^tfc,.*":
+    description: Three Five Corp
  "^thine,.*":
    description: THine Electronics, Inc.
  "^ti,.*":
@ -987,6 +991,8 @@ patternProperties:
    description: Voipac Technologies s.r.o.
  "^vot,.*":
    description: Vision Optical Technology Co., Ltd.
+  "^vxt,.*":
+    description: VXT Ltd
  "^wd,.*":
    description: Western Digital Corp.
  "^wetek,.*":
--- a/Documentation/fb/modedb.rst
+++ b/Documentation/fb/modedb.rst
@ -53,6 +53,20 @@ Specifying the option multiple times for different ports is possible, e.g.::

    video=LVDS-1:d video=HDMI-1:D

+Options can also be passed after the mode, using commas as separator.
+
+       Sample usage: 720x480,rotate=180 - 720x480 mode, rotated by 180 degrees
+
+Valid options are::
+
+  - margin_top, margin_bottom, margin_left, margin_right (integer):
+    Number of pixels in the margins, typically to deal with overscan on TVs
+  - reflect_x (boolean): Perform an axial symmetry on the X axis
+  - reflect_y (boolean): Perform an axial symmetry on the Y axis
+  - rotate (integer): Rotate the initial framebuffer by x
+    degrees. Valid values are 0, 90, 180 and 270.
+
+
 -----------------------------------------------------------------------------

 What is the VESA(TM) Coordinated Video Timings (CVT)?
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@ -37,10 +37,10 @@ Buffer Objects
 PRIME Buffer Sharing
 --------------------

-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
   :doc: PRIME Buffer Sharing

-.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
   :internal:

 MMU Notifier
@ -70,6 +70,26 @@ Interrupt Handling
 .. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
   :internal:

+AMDGPU XGMI Support
+===================
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+   :doc: AMDGPU XGMI Support
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_xgmi.c
+   :internal:
+
+AMDGPU RAS debugfs control interface
+====================================
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :doc: AMDGPU RAS debugfs control interface
+
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c
+   :internal:
+
+
 GPU Power/Thermal Controls and Monitoring
 =========================================

--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@ -7,6 +7,7 @@ GPU Driver Documentation
   amdgpu
   amdgpu-dc
   i915
+   mcde
   meson
   pl111
   tegra
--- a/Documentation/gpu/drm-client.rst
+++ b/Documentation/gpu/drm-client.rst
@ -10,3 +10,6 @@ Kernel clients

 .. kernel-doc:: drivers/gpu/drm/drm_client.c
   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_client_modeset.c
+   :export:
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@ -181,6 +181,21 @@ Panel Helper Reference
 .. kernel-doc:: drivers/gpu/drm/drm_panel_orientation_quirks.c
   :export:

+Panel Self Refresh Helper Reference
+===================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_self_refresh_helper.c
+   :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_self_refresh_helper.c
+   :export:
+
+HDCP Helper Functions Reference
+===============================
+
+.. kernel-doc:: drivers/gpu/drm/drm_hdcp.c
+   :export:
+
 Display Port Helper Functions Reference
 =======================================

--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@ -79,7 +79,6 @@ count for the TTM, which will call your initialization function.

 See the radeon_ttm.c file for an example of usage.

-
 The Graphics Execution Manager (GEM)
 ====================================

@ -380,6 +379,39 @@ GEM CMA Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_gem_cma_helper.c
   :export:

+VRAM Helper Function Reference
+==============================
+
+.. kernel-doc:: drivers/gpu/drm/drm_vram_helper_common.c
+   :doc: overview
+
+.. kernel-doc:: include/drm/drm_gem_vram_helper.h
+   :internal:
+
+GEM VRAM Helper Functions Reference
+-----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_vram_helper.c
+   :doc: overview
+
+.. kernel-doc:: include/drm/drm_gem_vram_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_vram_helper.c
+   :export:
+
+VRAM MM Helper Functions Reference
+----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_vram_mm_helper.c
+   :doc: overview
+
+.. kernel-doc:: include/drm/drm_vram_mm_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_vram_mm_helper.c
+   :export:
+
 VMA Offset Manager
 ==================

--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@ -85,16 +85,18 @@ leads to a few additional requirements:
 - The userspace side must be fully reviewed and tested to the standards of that
  userspace project. For e.g. mesa this means piglit testcases and review on the
  mailing list. This is again to ensure that the new interface actually gets the
-  job done.
+  job done.  The userspace-side reviewer should also provide an Acked-by on the
+  kernel uAPI patch indicating that they believe the proposed uAPI is sound and
+  sufficiently documented and validated for userspace's consumption.

 - The userspace patches must be against the canonical upstream, not some vendor
  fork. This is to make sure that no one cheats on the review and testing
  requirements by doing a quick fork.

 - The kernel patch can only be merged after all the above requirements are met,
-  but it **must** be merged **before** the userspace patches land. uAPI always flows
-  from the kernel, doing things the other way round risks divergence of the uAPI
-  definitions and header files.
+  but it **must** be merged to either drm-next or drm-misc-next **before** the
+  userspace patches land. uAPI always flows from the kernel, doing things the
+  other way round risks divergence of the uAPI definitions and header files.

 These are fairly steep requirements, but have grown out from years of shared
 pain and experience with uAPI added hastily, and almost always regretted about
@ -327,3 +329,12 @@ DRM_IOCTL_MODESET_CTL
    mode setting, since on many devices the vertical blank counter is
    reset to 0 at some point during modeset. Modern drivers should not
    call this any more since with kernel mode setting it is a no-op.
+
+Userspace API Structures
+========================
+
+.. kernel-doc:: include/uapi/drm/drm_mode.h
+   :doc: overview
+
+.. kernel-doc:: include/uapi/drm/drm_mode.h
+   :internal:
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@ -61,7 +61,7 @@ Intel GVT-g Host Support(vGPU device model)
 Workarounds
 -----------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_workarounds.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_workarounds.c
   :doc: Hardware workarounds

 Display Hardware Handling
@ -82,13 +82,13 @@ change.
 Frontbuffer Tracking
 --------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.c
   :doc: frontbuffer tracking

-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.h
   :internal:

-.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_frontbuffer.c
   :internal:

 .. kernel-doc:: drivers/gpu/drm/i915/i915_gem.c
@ -97,10 +97,10 @@ Frontbuffer Tracking
 Display FIFO Underrun Reporting
 -------------------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_fifo_underrun.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fifo_underrun.c
   :doc: fifo underrun handling

-.. kernel-doc:: drivers/gpu/drm/i915/intel_fifo_underrun.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fifo_underrun.c
   :internal:

 Plane Configuration
@ -115,10 +115,10 @@ panel self refresh.
 Atomic Plane Helpers
 --------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_atomic_plane.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_atomic_plane.c
   :doc: atomic plane helpers

-.. kernel-doc:: drivers/gpu/drm/i915/intel_atomic_plane.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_atomic_plane.c
   :internal:

 Output Probing
@ -132,19 +132,19 @@ probing, so those sections fully apply.
 Hotplug
 -------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_hotplug.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_hotplug.c
   :doc: Hotplug

-.. kernel-doc:: drivers/gpu/drm/i915/intel_hotplug.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_hotplug.c
   :internal:

 High Definition Audio
 ---------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_audio.c
   :doc: High Definition Audio over HDMI and Display Port

-.. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_audio.c
   :internal:

 .. kernel-doc:: include/drm/i915_component.h
@ -153,58 +153,58 @@ High Definition Audio
 Intel HDMI LPE Audio Support
 ----------------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_lpe_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_lpe_audio.c
   :doc: LPE Audio integration for HDMI or DP playback

-.. kernel-doc:: drivers/gpu/drm/i915/intel_lpe_audio.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_lpe_audio.c
   :internal:

 Panel Self Refresh PSR (PSR/SRD)
 --------------------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_psr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_psr.c
   :doc: Panel Self Refresh (PSR/SRD)

-.. kernel-doc:: drivers/gpu/drm/i915/intel_psr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_psr.c
   :internal:

 Frame Buffer Compression (FBC)
 ------------------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_fbc.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fbc.c
   :doc: Frame Buffer Compression (FBC)

-.. kernel-doc:: drivers/gpu/drm/i915/intel_fbc.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_fbc.c
   :internal:

 Display Refresh Rate Switching (DRRS)
 -------------------------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
   :doc: Display Refresh Rate Switching (DRRS)

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
   :functions: intel_dp_set_drrs_state

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
   :functions: intel_edp_drrs_enable

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
   :functions: intel_edp_drrs_disable

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
   :functions: intel_edp_drrs_invalidate

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
   :functions: intel_edp_drrs_flush

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dp.c
   :functions: intel_dp_drrs_init

 DPIO
 ----

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpio_phy.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpio_phy.c
   :doc: DPIO

 CSR firmware support for DMC
@ -219,34 +219,34 @@ CSR firmware support for DMC
 Video BIOS Table (VBT)
 ----------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_bios.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_bios.c
   :doc: Video BIOS Table (VBT)

-.. kernel-doc:: drivers/gpu/drm/i915/intel_bios.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_bios.c
   :internal:

-.. kernel-doc:: drivers/gpu/drm/i915/intel_vbt_defs.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_vbt_defs.h
   :internal:

 Display clocks
 --------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_cdclk.c
   :doc: CDCLK / RAWCLK

-.. kernel-doc:: drivers/gpu/drm/i915/intel_cdclk.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_cdclk.c
   :internal:

 Display PLLs
 ------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.c
   :doc: Display PLLs

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.c
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.c
   :internal:

-.. kernel-doc:: drivers/gpu/drm/i915/intel_dpll_mgr.h
+.. kernel-doc:: drivers/gpu/drm/i915/display/intel_dpll_mgr.h
   :internal:

 Memory Management and Command Submission
@ -349,7 +349,7 @@ of buffer object caches. Shrinking is used to make main memory
 available. Note that this is mostly orthogonal to evicting buffer
 objects, which has the goal to make space in gpu virtual address spaces.

-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_shrinker.c
   :internal:

 Batchbuffer Parsing
@ -373,18 +373,15 @@ Batchbuffer Pools
 User Batchbuffer Execution
 --------------------------

-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_execbuffer.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c
   :doc: User command execution

 Logical Rings, Logical Ring Contexts and Execlists
 --------------------------------------------------

-.. kernel-doc:: drivers/gpu/drm/i915/intel_lrc.c
+.. kernel-doc:: drivers/gpu/drm/i915/gt/intel_lrc.c
   :doc: Logical Rings, Logical Ring Contexts and Execlists

-.. kernel-doc:: drivers/gpu/drm/i915/intel_lrc.c
-   :internal:
-
 Global GTT views
 ----------------

@ -415,10 +412,10 @@ Hardware Tiling and Swizzling Details
 Object Tiling IOCTLs
 --------------------

-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c
   :internal:

-.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
+.. kernel-doc:: drivers/gpu/drm/i915/gem/i915_gem_tiling.c
   :doc: buffer object tiling

 WOPCM
@ -478,12 +475,6 @@ i915_context_create and i915_context_free
 .. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
   :doc: i915_context_create and i915_context_free tracepoints

-switch_mm
---------
-
-.. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
-   :doc: switch_mm tracepoint
-
 Perf
 ====

--- a/Documentation/gpu/mcde.rst
+++ b/Documentation/gpu/mcde.rst
@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================================
+ drm/mcde ST-Ericsson MCDE Multi-channel display engine
+=======================================================
+
+.. kernel-doc:: drivers/gpu/drm/mcde/mcde_drv.c
+   :doc: ST-Ericsson MCDE DRM Driver
--- a/Documentation/gpu/todo.rst
+++ b/Documentation/gpu/todo.rst
@ -10,25 +10,6 @@ graphics subsystem useful as newbie projects. Or for slow rainy days.
 Subsystem-wide refactorings
 ===========================

-De-midlayer drivers
-------------------
-
-With the recent ``drm_bus`` cleanup patches for 3.17 it is no longer required
-to have a ``drm_bus`` structure set up. Drivers can directly set up the
-``drm_device`` structure instead of relying on bus methods in ``drm_usb.c``
-and ``drm_pci.c``. The goal is to get rid of the driver's ``->load`` /
-``->unload`` callbacks and open-code the load/unload sequence properly, using
-the new two-stage ``drm_device`` setup/teardown.
-
-Once all existing drivers are converted we can also remove those bus support
-files for USB and platform devices.
-
-All you need is a GPU for a non-converted driver (currently almost all of
-them, but also all the virtual ones used by KVM, so everyone qualifies).
-
-Contact: Daniel Vetter, Thierry Reding, respective driver maintainers
-
-
 Remove custom dumb_map_offset implementations
 ---------------------------------------------

@ -247,6 +228,12 @@ struct drm_gem_object_funcs
 GEM objects can now have a function table instead of having the callbacks on the
 DRM driver struct. This is now the preferred way and drivers can be moved over.

+DRM_GEM_CMA_VMAP_DRIVER_OPS, DRM_GEM_SHMEM_DRIVER_OPS already support this, but
+DRM_GEM_VRAM_DRIVER_PRIME does not yet and needs to be aligned with the previous
+two. We also need a 2nd version of the CMA define that doesn't require the
+vmapping to be present (different hook for prime importing). Plus this needs to
+be rolled out to all drivers using their own implementations, too.
+
 Use DRM_MODESET_LOCK_ALL_* helpers instead of boilerplate
 ---------------------------------------------------------

@ -300,6 +287,21 @@ it to use drm_mode_hsync() instead.

 Contact: Sean Paul

+drm_fb_helper tasks
+-------------------
+
+- drm_fb_helper_restore_fbdev_mode_unlocked() should call restore_fbdev_mode()
+  not the _force variant so it can bail out if there is a master. But first
+  these igt tests need to be fixed: kms_fbcon_fbt@psr and
+  kms_fbcon_fbt@psr-suspend.
+
+- The max connector argument for drm_fb_helper_init() and
+  drm_fb_helper_fbdev_setup() isn't used anymore and can be removed.
+
+- The helper doesn't keep an array of connectors anymore so these can be
+  removed: drm_fb_helper_single_add_all_connectors(),
+  drm_fb_helper_add_one_connector() and drm_fb_helper_remove_one_connector().
+
 Core refactorings
 =================

@ -488,5 +490,20 @@ i915
  device_link_add to model the dependency between i915 and snd_had. See
  https://dri.freedesktop.org/docs/drm/driver-api/device_link.html

+Bootsplash
+==========
+
+There is support in place now for writing internal DRM clients making it
+possible to pick up the bootsplash work that was rejected because it was written
+for fbdev.
+
+- [v6,8/8] drm/client: Hack: Add bootsplash example
+  https://patchwork.freedesktop.org/patch/306579/
+
+- [RFC PATCH v2 00/13] Kernel based bootsplash
+  https://lkml.org/lkml/2017/12/13/764
+
+Contact: Sam Ravnborg
+
 Outside DRM
 ===========
--- a/9
+++ b/9
@ -5199,6 +5199,13 @@ S:	Maintained
 F:	drivers/gpu/drm/tinydrm/st7735r.c
 F:	Documentation/devicetree/bindings/display/sitronix,st7735r.txt

+DRM DRIVER FOR ST-ERICSSON MCDE
+M:	Linus Walleij <linus.walleij@linaro.org>
+T:	git git://anongit.freedesktop.org/drm/drm-misc
+S:	Maintained
+F:	drivers/gpu/drm/mcde/
+F:	Documentation/devicetree/bindings/display/ste,mcde.txt
+
 DRM DRIVER FOR TDFX VIDEO CARDS
 S:	Orphan / Obsolete
 F:	drivers/gpu/drm/tdfx/
@ -5484,6 +5491,7 @@ T:	git git://anongit.freedesktop.org/drm/drm-misc

 DRM PANEL DRIVERS
 M:	Thierry Reding <thierry.reding@gmail.com>
+R:	Sam Ravnborg <sam@ravnborg.org>
 L:	dri-devel@lists.freedesktop.org
 T:	git git://anongit.freedesktop.org/drm/drm-misc
 S:	Maintained
@ -5512,7 +5520,6 @@ F:	Documentation/gpu/xen-front.rst
 DRM TTM SUBSYSTEM
 M:	Christian Koenig <christian.koenig@amd.com>
 M:	Huang Rui <ray.huang@amd.com>
-M:	Junwei Zhang <Jerry.Zhang@amd.com>
 T:	git git://people.freedesktop.org/~agd5f/linux
 S:	Maintained
 L:	dri-devel@lists.freedesktop.org
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@ -23,8 +23,10 @@
 #include <linux/poll.h>
 #include <linux/reservation.h>
 #include <linux/mm.h>
+#include <linux/mount.h>

 #include <uapi/linux/dma-buf.h>
+#include <uapi/linux/magic.h>

 static inline int is_dma_buf_file(struct file *);

@ -35,6 +37,41 @@ struct dma_buf_list {

 static struct dma_buf_list db_list;

+static char *dmabuffs_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	struct dma_buf *dmabuf;
+	char name[DMA_BUF_NAME_LEN];
+	size_t ret = 0;
+
+	dmabuf = dentry->d_fsdata;
+	mutex_lock(&dmabuf->lock);
+	if (dmabuf->name)
+		ret = strlcpy(name, dmabuf->name, DMA_BUF_NAME_LEN);
+	mutex_unlock(&dmabuf->lock);
+
+	return dynamic_dname(dentry, buffer, buflen, "/%s:%s",
+			     dentry->d_name.name, ret > 0 ? name : "");
+}
+
+static const struct dentry_operations dma_buf_dentry_ops = {
+	.d_dname = dmabuffs_dname,
+};
+
+static struct vfsmount *dma_buf_mnt;
+
+static struct dentry *dma_buf_fs_mount(struct file_system_type *fs_type,
+		int flags, const char *name, void *data)
+{
+	return mount_pseudo(fs_type, "dmabuf:", NULL, &dma_buf_dentry_ops,
+			DMA_BUF_MAGIC);
+}
+
+static struct file_system_type dma_buf_fs_type = {
+	.name = "dmabuf",
+	.mount = dma_buf_fs_mount,
+	.kill_sb = kill_anon_super,
+};
+
 static int dma_buf_release(struct inode *inode, struct file *file)
 {
 	struct dma_buf *dmabuf;
@ -79,6 +116,10 @@ static int dma_buf_mmap_internal(struct file *file, struct vm_area_struct *vma)

 	dmabuf = file->private_data;

+	/* check if buffer supports mmap */
+	if (!dmabuf->ops->mmap)
+		return -EINVAL;
+
 	/* check for overflowing the buffer's size */
 	if (vma->vm_pgoff + vma_pages(vma) >
 	    dmabuf->size >> PAGE_SHIFT)
@ -265,6 +306,43 @@ out:
 	return events;
 }

+/**
+ * dma_buf_set_name - Set a name to a specific dma_buf to track the usage.
+ * The name of the dma-buf buffer can only be set when the dma-buf is not
+ * attached to any devices. It could theoritically support changing the
+ * name of the dma-buf if the same piece of memory is used for multiple
+ * purpose between different devices.
+ *
+ * @dmabuf [in]     dmabuf buffer that will be renamed.
+ * @buf:   [in]     A piece of userspace memory that contains the name of
+ *                  the dma-buf.
+ *
+ * Returns 0 on success. If the dma-buf buffer is already attached to
+ * devices, return -EBUSY.
+ *
+ */
+static long dma_buf_set_name(struct dma_buf *dmabuf, const char __user *buf)
+{
+	char *name = strndup_user(buf, DMA_BUF_NAME_LEN);
+	long ret = 0;
+
+	if (IS_ERR(name))
+		return PTR_ERR(name);
+
+	mutex_lock(&dmabuf->lock);
+	if (!list_empty(&dmabuf->attachments)) {
+		ret = -EBUSY;
+		kfree(name);
+		goto out_unlock;
+	}
+	kfree(dmabuf->name);
+	dmabuf->name = name;
+
+out_unlock:
+	mutex_unlock(&dmabuf->lock);
+	return ret;
+}
+
 static long dma_buf_ioctl(struct file *file,
 			  unsigned int cmd, unsigned long arg)
 {
@ -303,11 +381,29 @@ static long dma_buf_ioctl(struct file *file,
 			ret = dma_buf_begin_cpu_access(dmabuf, direction);

 		return ret;
+
+	case DMA_BUF_SET_NAME:
+		return dma_buf_set_name(dmabuf, (const char __user *)arg);
+
 	default:
 		return -ENOTTY;
 	}
 }

+static void dma_buf_show_fdinfo(struct seq_file *m, struct file *file)
+{
+	struct dma_buf *dmabuf = file->private_data;
+
+	seq_printf(m, "size:\t%zu\n", dmabuf->size);
+	/* Don't count the temporary reference taken inside procfs seq_show */
+	seq_printf(m, "count:\t%ld\n", file_count(dmabuf->file) - 1);
+	seq_printf(m, "exp_name:\t%s\n", dmabuf->exp_name);
+	mutex_lock(&dmabuf->lock);
+	if (dmabuf->name)
+		seq_printf(m, "name:\t%s\n", dmabuf->name);
+	mutex_unlock(&dmabuf->lock);
+}
+
 static const struct file_operations dma_buf_fops = {
 	.release	= dma_buf_release,
 	.mmap		= dma_buf_mmap_internal,
@ -317,6 +413,7 @@ static const struct file_operations dma_buf_fops = {
 #ifdef CONFIG_COMPAT
 	.compat_ioctl	= dma_buf_ioctl,
 #endif
+	.show_fdinfo	= dma_buf_show_fdinfo,
 };

 /*
@ -327,6 +424,32 @@ static inline int is_dma_buf_file(struct file *file)
 	return file->f_op == &dma_buf_fops;
 }

+static struct file *dma_buf_getfile(struct dma_buf *dmabuf, int flags)
+{
+	struct file *file;
+	struct inode *inode = alloc_anon_inode(dma_buf_mnt->mnt_sb);
+
+	if (IS_ERR(inode))
+		return ERR_CAST(inode);
+
+	inode->i_size = dmabuf->size;
+	inode_set_bytes(inode, dmabuf->size);
+
+	file = alloc_file_pseudo(inode, dma_buf_mnt, "dmabuf",
+				 flags, &dma_buf_fops);
+	if (IS_ERR(file))
+		goto err_alloc_file;
+	file->f_flags = flags & (O_ACCMODE | O_NONBLOCK);
+	file->private_data = dmabuf;
+	file->f_path.dentry->d_fsdata = dmabuf;
+
+	return file;
+
+err_alloc_file:
+	iput(inode);
+	return file;
+}
+
 /**
 * DOC: dma buf device access
 *
@ -393,8 +516,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 			  || !exp_info->ops
 			  || !exp_info->ops->map_dma_buf
 			  || !exp_info->ops->unmap_dma_buf
-			  || !exp_info->ops->release
-			  || !exp_info->ops->mmap)) {
+			  || !exp_info->ops->release)) {
 		return ERR_PTR(-EINVAL);
 	}

@ -422,8 +544,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	}
 	dmabuf->resv = resv;

-	file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf,
-					exp_info->flags);
+	file = dma_buf_getfile(dmabuf, exp_info->flags);
 	if (IS_ERR(file)) {
 		ret = PTR_ERR(file);
 		goto err_dmabuf;
@ -562,6 +683,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	list_add(&attach->node, &dmabuf->attachments);

 	mutex_unlock(&dmabuf->lock);
+
 	return attach;

 err_attach:
@ -584,6 +706,9 @@ void dma_buf_detach(struct dma_buf *dmabuf, struct dma_buf_attachment *attach)
 	if (WARN_ON(!dmabuf || !attach))
 		return;

+	if (attach->sgt)
+		dmabuf->ops->unmap_dma_buf(attach, attach->sgt, attach->dir);
+
 	mutex_lock(&dmabuf->lock);
 	list_del(&attach->node);
 	if (dmabuf->ops->detach)
@ -619,10 +744,27 @@ struct sg_table *dma_buf_map_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf))
 		return ERR_PTR(-EINVAL);

+	if (attach->sgt) {
+		/*
+		 * Two mappings with different directions for the same
+		 * attachment are not allowed.
+		 */
+		if (attach->dir != direction &&
+		    attach->dir != DMA_BIDIRECTIONAL)
+			return ERR_PTR(-EBUSY);
+
+		return attach->sgt;
+	}
+
 	sg_table = attach->dmabuf->ops->map_dma_buf(attach, direction);
 	if (!sg_table)
 		sg_table = ERR_PTR(-ENOMEM);

+	if (!IS_ERR(sg_table) && attach->dmabuf->ops->cache_sgt_mapping) {
+		attach->sgt = sg_table;
+		attach->dir = direction;
+	}
+
 	return sg_table;
 }
 EXPORT_SYMBOL_GPL(dma_buf_map_attachment);
@ -646,8 +788,10 @@ void dma_buf_unmap_attachment(struct dma_buf_attachment *attach,
 	if (WARN_ON(!attach || !attach->dmabuf || !sg_table))
 		return;

-	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table,
-						direction);
+	if (attach->sgt == sg_table)
+		return;
+
+	attach->dmabuf->ops->unmap_dma_buf(attach, sg_table, direction);
 }
 EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);

@ -895,6 +1039,10 @@ int dma_buf_mmap(struct dma_buf *dmabuf, struct vm_area_struct *vma,
 	if (WARN_ON(!dmabuf || !vma))
 		return -EINVAL;

+	/* check if buffer supports mmap */
+	if (!dmabuf->ops->mmap)
+		return -EINVAL;
+
 	/* check for offset overflow */
 	if (pgoff + vma_pages(vma) < pgoff)
 		return -EOVERFLOW;
@ -1014,8 +1162,8 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 		return ret;

 	seq_puts(s, "\nDma-buf Objects:\n");
-	seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\n",
-		   "size", "flags", "mode", "count");
+	seq_printf(s, "%-8s\t%-8s\t%-8s\t%-8s\texp_name\t%-8s\n",
+		   "size", "flags", "mode", "count", "ino");

 	list_for_each_entry(buf_obj, &db_list.head, list_node) {
 		ret = mutex_lock_interruptible(&buf_obj->lock);
@ -1026,11 +1174,13 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 			continue;
 		}

-		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\n",
+		seq_printf(s, "%08zu\t%08x\t%08x\t%08ld\t%s\t%08lu\t%s\n",
 				buf_obj->size,
 				buf_obj->file->f_flags, buf_obj->file->f_mode,
 				file_count(buf_obj->file),
-				buf_obj->exp_name);
+				buf_obj->exp_name,
+				file_inode(buf_obj->file)->i_ino,
+				buf_obj->name ?: "");

 		robj = buf_obj->resv;
 		while (true) {
@ -1057,6 +1207,7 @@ static int dma_buf_debug_show(struct seq_file *s, void *unused)
 				   fence->ops->get_driver_name(fence),
 				   fence->ops->get_timeline_name(fence),
 				   dma_fence_is_signaled(fence) ? "" : "un");
+			dma_fence_put(fence);
 		}
 		rcu_read_unlock();

@ -1125,6 +1276,10 @@ static inline void dma_buf_uninit_debugfs(void)

 static int __init dma_buf_init(void)
 {
+	dma_buf_mnt = kern_mount(&dma_buf_fs_type);
+	if (IS_ERR(dma_buf_mnt))
+		return PTR_ERR(dma_buf_mnt);
+
 	mutex_init(&db_list.lock);
 	INIT_LIST_HEAD(&db_list.head);
 	dma_buf_init_debugfs();
@ -1135,5 +1290,6 @@ subsys_initcall(dma_buf_init);
 static void __exit dma_buf_deinit(void)
 {
 	dma_buf_uninit_debugfs();
+	kern_unmount(dma_buf_mnt);
 }
 __exitcall(dma_buf_deinit);
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@ -248,8 +248,25 @@ void dma_fence_release(struct kref *kref)

 	trace_dma_fence_destroy(fence);

-	/* Failed to signal before release, could be a refcounting issue */
-	WARN_ON(!list_empty(&fence->cb_list));
+	if (WARN(!list_empty(&fence->cb_list),
+		 "Fence %s:%s:%llx:%llx released with pending signals!\n",
+		 fence->ops->get_driver_name(fence),
+		 fence->ops->get_timeline_name(fence),
+		 fence->context, fence->seqno)) {
+		unsigned long flags;
+
+		/*
+		 * Failed to signal before release, likely a refcounting issue.
+		 *
+		 * This should never happen, but if it does make sure that we
+		 * don't leave chains dangling. We set the error flag first
+		 * so that the callbacks know this signal is due to an error.
+		 */
+		spin_lock_irqsave(fence->lock, flags);
+		fence->error = -EDEADLK;
+		dma_fence_signal_locked(fence);
+		spin_unlock_irqrestore(fence->lock, flags);
+	}

 	if (fence->ops->release)
 		fence->ops->release(fence);
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@ -365,6 +365,10 @@ int reservation_object_get_fences_rcu(struct reservation_object *obj,
 					   GFP_NOWAIT | __GFP_NOWARN);
 			if (!nshared) {
 				rcu_read_unlock();
+
+				dma_fence_put(fence_excl);
+				fence_excl = NULL;
+
 				nshared = krealloc(shared, sz, GFP_KERNEL);
 				if (nshared) {
 					shared = nshared;
--- a/drivers/dma-buf/sync_debug.c
+++ b/drivers/dma-buf/sync_debug.c
@ -188,29 +188,3 @@ static __init int sync_debugfs_init(void)
 	return 0;
 }
 late_initcall(sync_debugfs_init);
-
-#define DUMP_CHUNK 256
-static char sync_dump_buf[64 * 1024];
-void sync_dump(void)
-{
-	struct seq_file s = {
-		.buf = sync_dump_buf,
-		.size = sizeof(sync_dump_buf) - 1,
-	};
-	int i;
-
-	sync_info_debugfs_show(&s, NULL);
-
-	for (i = 0; i < s.count; i += DUMP_CHUNK) {
-		if ((s.count - i) > DUMP_CHUNK) {
-			char c = s.buf[i + DUMP_CHUNK];
-
-			s.buf[i + DUMP_CHUNK] = 0;
-			pr_cont("%s", s.buf + i);
-			s.buf[i + DUMP_CHUNK] = c;
-		} else {
-			s.buf[s.count] = 0;
-			pr_cont("%s", s.buf + i);
-		}
-	}
-}
--- a/drivers/dma-buf/sync_debug.h
+++ b/drivers/dma-buf/sync_debug.h
@ -68,6 +68,5 @@ void sync_timeline_debug_add(struct sync_timeline *obj);
 void sync_timeline_debug_remove(struct sync_timeline *obj);
 void sync_file_debug_add(struct sync_file *fence);
 void sync_file_debug_remove(struct sync_file *fence);
-void sync_dump(void);

 #endif /* _LINUX_SYNC_H */
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@ -161,6 +161,13 @@ config DRM_TTM
 	  GPU memory types. Will be enabled automatically if a device driver
 	  uses it.

+config DRM_VRAM_HELPER
+	tristate
+	depends on DRM
+	select DRM_TTM
+	help
+	  Helpers for VRAM memory management
+
 config DRM_GEM_CMA_HELPER
 	bool
 	depends on DRM
@ -309,6 +316,8 @@ source "drivers/gpu/drm/sti/Kconfig"

 source "drivers/gpu/drm/imx/Kconfig"

+source "drivers/gpu/drm/ingenic/Kconfig"
+
 source "drivers/gpu/drm/v3d/Kconfig"

 source "drivers/gpu/drm/vc4/Kconfig"
@ -343,6 +352,8 @@ source "drivers/gpu/drm/panfrost/Kconfig"

 source "drivers/gpu/drm/aspeed/Kconfig"

+source "drivers/gpu/drm/mcde/Kconfig"
+
 # Keep legacy drivers last

 menuconfig DRM_LEGACY
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@ -17,7 +17,7 @@ drm-y       :=	drm_auth.o drm_cache.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
 		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
 		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o \
-		drm_atomic_uapi.o
+		drm_client_modeset.o drm_atomic_uapi.o drm_hdcp.o

 drm-$(CONFIG_DRM_LEGACY) += drm_legacy_misc.o drm_bufs.o drm_context.o drm_dma.o drm_scatter.o drm_lock.o
 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
@ -32,13 +32,18 @@ drm-$(CONFIG_AGP) += drm_agpsupport.o
 drm-$(CONFIG_DEBUG_FS) += drm_debugfs.o drm_debugfs_crc.o
 drm-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o

+drm_vram_helper-y := drm_gem_vram_helper.o \
+		     drm_vram_helper_common.o \
+		     drm_vram_mm_helper.o
+obj-$(CONFIG_DRM_VRAM_HELPER) += drm_vram_helper.o
+
 drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_dsc.o drm_probe_helper.o \
 		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
 		drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
 		drm_simple_kms_helper.o drm_modeset_helper.o \
 		drm_scdc_helper.o drm_gem_framebuffer_helper.o \
 		drm_atomic_state_helper.o drm_damage_helper.o \
-		drm_format_helper.o
+		drm_format_helper.o drm_self_refresh_helper.o

 drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
 drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
@ -94,6 +99,7 @@ obj-$(CONFIG_DRM_TEGRA) += tegra/
 obj-$(CONFIG_DRM_STM) += stm/
 obj-$(CONFIG_DRM_STI) += sti/
 obj-$(CONFIG_DRM_IMX) += imx/
+obj-$(CONFIG_DRM_INGENIC) += ingenic/
 obj-$(CONFIG_DRM_MEDIATEK) += mediatek/
 obj-$(CONFIG_DRM_MESON)	+= meson/
 obj-y			+= i2c/
@ -113,3 +119,4 @@ obj-$(CONFIG_DRM_VBOXVIDEO) += vboxvideo/
 obj-$(CONFIG_DRM_LIMA)  += lima/
 obj-$(CONFIG_DRM_PANFROST) += panfrost/
 obj-$(CONFIG_DRM_ASPEED_GFX) += aspeed/
+obj-$(CONFIG_DRM_MCDE) += mcde/
--- a/drivers/gpu/drm/amd/amdgpu/Kconfig
+++ b/drivers/gpu/drm/amd/amdgpu/Kconfig
@ -27,10 +27,10 @@ config DRM_AMDGPU_CIK
 config DRM_AMDGPU_USERPTR
 	bool "Always enable userptr write support"
 	depends on DRM_AMDGPU
-	select MMU_NOTIFIER
+	depends on HMM_MIRROR
 	help
-	  This option selects CONFIG_MMU_NOTIFIER if it isn't already
-	  selected to enabled full userptr support.
+	  This option selects CONFIG_HMM and CONFIG_HMM_MIRROR if it
+	  isn't already selected to enabled full userptr support.

 config DRM_AMDGPU_GART_DEBUGFS
 	bool "Allow GART access through debugfs"
--- a/drivers/gpu/drm/amd/amdgpu/Makefile
+++ b/drivers/gpu/drm/amd/amdgpu/Makefile
@ -49,12 +49,14 @@ amdgpu-y += amdgpu_device.o amdgpu_kms.o \
 	amdgpu_cs.o amdgpu_bios.o amdgpu_benchmark.o amdgpu_test.o \
 	amdgpu_pm.o atombios_dp.o amdgpu_afmt.o amdgpu_trace_points.o \
 	atombios_encoders.o amdgpu_sa.o atombios_i2c.o \
-	amdgpu_prime.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
+	amdgpu_dma_buf.o amdgpu_vm.o amdgpu_ib.o amdgpu_pll.o \
 	amdgpu_ucode.o amdgpu_bo_list.o amdgpu_ctx.o amdgpu_sync.o \
 	amdgpu_gtt_mgr.o amdgpu_vram_mgr.o amdgpu_virt.o amdgpu_atomfirmware.o \
 	amdgpu_vf_error.o amdgpu_sched.o amdgpu_debugfs.o amdgpu_ids.o \
 	amdgpu_gmc.o amdgpu_xgmi.o amdgpu_csa.o amdgpu_ras.o amdgpu_vm_cpu.o \
-	amdgpu_vm_sdma.o
+	amdgpu_vm_sdma.o amdgpu_discovery.o
+
+amdgpu-$(CONFIG_PERF_EVENTS) += amdgpu_pmu.o

 # add asic specific block
 amdgpu-$(CONFIG_DRM_AMDGPU_CIK)+= cik.o cik_ih.o kv_smc.o kv_dpm.o \
@ -64,7 +66,7 @@ amdgpu-$(CONFIG_DRM_AMDGPU_SI)+= si.o gmc_v6_0.o gfx_v6_0.o si_ih.o si_dma.o dce

 amdgpu-y += \
 	vi.o mxgpu_vi.o nbio_v6_1.o soc15.o emu_soc.o mxgpu_ai.o nbio_v7_0.o vega10_reg_init.o \
-	vega20_reg_init.o nbio_v7_4.o
+	vega20_reg_init.o nbio_v7_4.o nbio_v2_3.o nv.o navi10_reg_init.o

 # add DF block
 amdgpu-y += \
@ -75,7 +77,8 @@ amdgpu-y += \
 amdgpu-y += \
 	gmc_v7_0.o \
 	gmc_v8_0.o \
-	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o
+	gfxhub_v1_0.o mmhub_v1_0.o gmc_v9_0.o gfxhub_v1_1.o \
+	gfxhub_v2_0.o mmhub_v2_0.o gmc_v10_0.o

 # add IH block
 amdgpu-y += \
@ -84,7 +87,8 @@ amdgpu-y += \
 	iceland_ih.o \
 	tonga_ih.o \
 	cz_ih.o \
-	vega10_ih.o
+	vega10_ih.o \
+	navi10_ih.o

 # add PSP block
 amdgpu-y += \
@ -108,14 +112,20 @@ amdgpu-y += \
 	amdgpu_gfx.o \
 	amdgpu_rlc.o \
 	gfx_v8_0.o \
-	gfx_v9_0.o
+	gfx_v9_0.o \
+	gfx_v10_0.o

 # add async DMA block
 amdgpu-y += \
 	amdgpu_sdma.o \
 	sdma_v2_4.o \
 	sdma_v3_0.o \
-	sdma_v4_0.o
+	sdma_v4_0.o \
+	sdma_v5_0.o
+
+# add MES block
+amdgpu-y += \
+	mes_v10_1.o

 # add UVD block
 amdgpu-y += \
@ -133,7 +143,12 @@ amdgpu-y += \
 # add VCN block
 amdgpu-y += \
 	amdgpu_vcn.o \
-	vcn_v1_0.o
+	vcn_v1_0.o \
+	vcn_v2_0.o
+
+# add ATHUB block
+amdgpu-y += \
+	athub_v2_0.o

 # add amdkfd interfaces
 amdgpu-y += amdgpu_amdkfd.o
@ -146,7 +161,8 @@ amdgpu-y += \
 	 amdgpu_amdkfd_fence.o \
 	 amdgpu_amdkfd_gpuvm.o \
 	 amdgpu_amdkfd_gfx_v8.o \
-	 amdgpu_amdkfd_gfx_v9.o
+	 amdgpu_amdkfd_gfx_v9.o \
+	 amdgpu_amdkfd_gfx_v10.o

 ifneq ($(CONFIG_DRM_AMDGPU_CIK),)
 amdgpu-y += amdgpu_amdkfd_gfx_v7.o
@ -173,7 +189,7 @@ endif
 amdgpu-$(CONFIG_COMPAT) += amdgpu_ioc32.o
 amdgpu-$(CONFIG_VGA_SWITCHEROO) += amdgpu_atpx_handler.o
 amdgpu-$(CONFIG_ACPI) += amdgpu_acpi.o
-amdgpu-$(CONFIG_MMU_NOTIFIER) += amdgpu_mn.o
+amdgpu-$(CONFIG_HMM_MIRROR) += amdgpu_mn.o

 include $(FULL_AMD_PATH)/powerplay/Makefile

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@ -44,9 +44,9 @@
 #include <drm/ttm/ttm_module.h>
 #include <drm/ttm/ttm_execbuf_util.h>

-#include <drm/drmP.h>
-#include <drm/drm_gem.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_gem.h>
+#include <drm/drm_ioctl.h>
 #include <drm/gpu_scheduler.h>

 #include <kgd_kfd_interface.h>
@ -84,6 +84,8 @@
 #include "amdgpu_doorbell.h"
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_smu.h"
+#include "amdgpu_discovery.h"
+#include "amdgpu_mes.h"

 #define MAX_GPU_INSTANCE		16

@ -118,7 +120,6 @@ extern int amdgpu_disp_priority;
 extern int amdgpu_hw_i2c;
 extern int amdgpu_pcie_gen2;
 extern int amdgpu_msi;
-extern int amdgpu_lockup_timeout;
 extern int amdgpu_dpm;
 extern int amdgpu_fw_load_type;
 extern int amdgpu_aspm;
@ -143,7 +144,6 @@ extern uint amdgpu_sdma_phase_quantum;
 extern char *amdgpu_disable_cu;
 extern char *amdgpu_virtual_display;
 extern uint amdgpu_pp_feature_mask;
-extern int amdgpu_vram_page_split;
 extern int amdgpu_ngg;
 extern int amdgpu_prim_buf_per_se;
 extern int amdgpu_pos_buf_per_se;
@ -156,9 +156,14 @@ extern int amdgpu_gpu_recovery;
 extern int amdgpu_emu_mode;
 extern uint amdgpu_smu_memory_pool_size;
 extern uint amdgpu_dc_feature_mask;
+extern uint amdgpu_dm_abm_level;
 extern struct amdgpu_mgpu_info mgpu_info;
 extern int amdgpu_ras_enable;
 extern uint amdgpu_ras_mask;
+extern int amdgpu_async_gfx_ring;
+extern int amdgpu_mcbp;
+extern int amdgpu_discovery;
+extern int amdgpu_mes;

 #ifdef CONFIG_DRM_AMDGPU_SI
 extern int amdgpu_si_support;
@ -211,9 +216,11 @@ struct amdgpu_irq_src;
 struct amdgpu_fpriv;
 struct amdgpu_bo_va_mapping;
 struct amdgpu_atif;
+struct kfd_vm_fault_info;

 enum amdgpu_cp_irq {
-	AMDGPU_CP_IRQ_GFX_EOP = 0,
+	AMDGPU_CP_IRQ_GFX_ME0_PIPE0_EOP = 0,
+	AMDGPU_CP_IRQ_GFX_ME0_PIPE1_EOP,
 	AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE0_EOP,
 	AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE1_EOP,
 	AMDGPU_CP_IRQ_COMPUTE_MEC1_PIPE2_EOP,
@ -415,6 +422,7 @@ struct amdgpu_fpriv {
 };

 int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv);
+int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev);

 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		  unsigned size, struct amdgpu_ib *ib);
@ -558,6 +566,8 @@ struct amdgpu_asic_funcs {
 			       uint64_t *count1);
 	/* do we need to reset the asic at init time (e.g., kexec) */
 	bool (*need_reset_on_init)(struct amdgpu_device *adev);
+	/* PCIe replay counter */
+	uint64_t (*get_pcie_replay_count)(struct amdgpu_device *adev);
 };

 /*
@ -639,6 +649,11 @@ struct nbio_hdp_flush_reg {
 	u32 ref_and_mask_sdma1;
 };

+struct amdgpu_mmio_remap {
+	u32 reg_offset;
+	resource_size_t bus_addr;
+};
+
 struct amdgpu_nbio_funcs {
 	const struct nbio_hdp_flush_reg *hdp_flush_reg;
 	u32 (*get_hdp_flush_req_offset)(struct amdgpu_device *adev);
@ -651,6 +666,8 @@ struct amdgpu_nbio_funcs {
 	u32 (*get_memsize)(struct amdgpu_device *adev);
 	void (*sdma_doorbell_range)(struct amdgpu_device *adev, int instance,
 			bool use_doorbell, int doorbell_index, int doorbell_size);
+	void (*vcn_doorbell_range)(struct amdgpu_device *adev, bool use_doorbell,
+			int doorbell_index);
 	void (*enable_doorbell_aperture)(struct amdgpu_device *adev,
 					 bool enable);
 	void (*enable_doorbell_selfring_aperture)(struct amdgpu_device *adev,
@ -666,10 +683,11 @@ struct amdgpu_nbio_funcs {
 	void (*ih_control)(struct amdgpu_device *adev);
 	void (*init_registers)(struct amdgpu_device *adev);
 	void (*detect_hw_virt)(struct amdgpu_device *adev);
+	void (*remap_hdp_registers)(struct amdgpu_device *adev);
 };

 struct amdgpu_df_funcs {
-	void (*init)(struct amdgpu_device *adev);
+	void (*sw_init)(struct amdgpu_device *adev);
 	void (*enable_broadcast_mode)(struct amdgpu_device *adev,
 				      bool enable);
 	u32 (*get_fb_channel_number)(struct amdgpu_device *adev);
@ -680,6 +698,12 @@ struct amdgpu_df_funcs {
 				      u32 *flags);
 	void (*enable_ecc_force_par_wr_rmw)(struct amdgpu_device *adev,
 					    bool enable);
+	int (*pmc_start)(struct amdgpu_device *adev, uint64_t config,
+					 int is_enable);
+	int (*pmc_stop)(struct amdgpu_device *adev, uint64_t config,
+					 int is_disable);
+	void (*pmc_get_count)(struct amdgpu_device *adev, uint64_t config,
+					 uint64_t *count);
 };
 /* Define the HW IP blocks will be used in driver , add more if necessary */
 enum amd_hw_ip_block_type {
@ -714,6 +738,7 @@ struct amd_powerplay {
 };

 #define AMDGPU_RESET_MAGIC_NUM 64
+#define AMDGPU_MAX_DF_PERFMONS 4
 struct amdgpu_device {
 	struct device			*dev;
 	struct drm_device		*ddev;
@ -740,6 +765,7 @@ struct amdgpu_device {
 	struct amdgpu_debugfs		debugfs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
 	unsigned			debugfs_count;
 #if defined(CONFIG_DEBUG_FS)
+	struct dentry                   *debugfs_preempt;
 	struct dentry			*debugfs_regs[AMDGPU_DEBUGFS_MAX_COMPONENTS];
 #endif
 	struct amdgpu_atif		*atif;
@ -749,6 +775,7 @@ struct amdgpu_device {
 	struct mutex                    grbm_idx_mutex;
 	struct dev_pm_domain		vga_pm_domain;
 	bool				have_disp_power_ref;
+	bool                            have_atomics_support;

 	/* BIOS */
 	bool				is_atom_fw;
@ -764,6 +791,7 @@ struct amdgpu_device {
 	void __iomem			*rmmio;
 	/* protects concurrent MM_INDEX/DATA based register access */
 	spinlock_t mmio_idx_lock;
+	struct amdgpu_mmio_remap        rmmio_remap;
 	/* protects concurrent SMC based register access */
 	spinlock_t smc_idx_lock;
 	amdgpu_rreg_t			smc_rreg;
@ -889,6 +917,13 @@ struct amdgpu_device {
 	/* display related functionality */
 	struct amdgpu_display_manager dm;

+	/* discovery */
+	uint8_t				*discovery;
+
+	/* mes */
+	bool                            enable_mes;
+	struct amdgpu_mes               mes;
+
 	struct amdgpu_ip_block          ip_blocks[AMDGPU_MAX_IP_NUM];
 	int				num_ip_blocks;
 	struct mutex	mn_lock;
@ -906,7 +941,7 @@ struct amdgpu_device {
 	const struct amdgpu_df_funcs	*df_funcs;

 	/* delayed work_func for deferring clockgating during resume */
-	struct delayed_work     late_init_work;
+	struct delayed_work     delayed_init_work;

 	struct amdgpu_virt	virt;
 	/* firmware VRAM reservation */
@ -936,6 +971,14 @@ struct amdgpu_device {
 	struct work_struct		xgmi_reset_work;

 	bool                            in_baco_reset;
+
+	long				gfx_timeout;
+	long				sdma_timeout;
+	long				video_timeout;
+	long				compute_timeout;
+
+	uint64_t			unique_id;
+	uint64_t	df_perfmon_config_assign_mask[AMDGPU_MAX_DF_PERFMONS];
 };

 static inline struct amdgpu_device *amdgpu_ttm_adev(struct ttm_bo_device *bdev)
@ -1065,6 +1108,7 @@ int emu_soc_asic_init(struct amdgpu_device *adev);
 #define amdgpu_asic_init_doorbell_index(adev) (adev)->asic_funcs->init_doorbell_index((adev))
 #define amdgpu_asic_get_pcie_usage(adev, cnt0, cnt1) ((adev)->asic_funcs->get_pcie_usage((adev), (cnt0), (cnt1)))
 #define amdgpu_asic_need_reset_on_init(adev) (adev)->asic_funcs->need_reset_on_init((adev))
+#define amdgpu_asic_get_pcie_replay_count(adev) ((adev)->asic_funcs->get_pcie_replay_count((adev)))

 /* Common functions */
 bool amdgpu_device_should_recover_gpu(struct amdgpu_device *adev);
@ -1081,6 +1125,9 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 					     const u32 array_size);

 bool amdgpu_device_is_px(struct drm_device *dev);
+bool amdgpu_device_is_peer_accessible(struct amdgpu_device *adev,
+				      struct amdgpu_device *peer_adev);
+
 /* atpx handler */
 #if defined(CONFIG_VGA_SWITCHEROO)
 void amdgpu_register_atpx_handler(void);
@ -1170,5 +1217,24 @@ int amdgpu_dm_display_resume(struct amdgpu_device *adev );
 static inline int amdgpu_dm_display_resume(struct amdgpu_device *adev) { return 0; }
 #endif

+
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev);
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev);
+
 #include "amdgpu_object.h"
+
+/* used by df_v3_6.c and amdgpu_pmu.c */
+#define AMDGPU_PMU_ATTR(_name, _object)					\
+static ssize_t								\
+_name##_show(struct device *dev,					\
+			       struct device_attribute *attr,		\
+			       char *page)				\
+{									\
+	BUILD_BUG_ON(sizeof(_object) >= PAGE_SIZE - 1);			\
+	return sprintf(page, _object "\n");				\
+}									\
+									\
+static struct device_attribute pmu_attr_##_name = __ATTR_RO(_name)
+
 #endif
+
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acp.c
@ -24,6 +24,7 @@
 */

 #include <linux/irqdomain.h>
+#include <linux/pci.h>
 #include <linux/pm_domain.h>
 #include <linux/platform_device.h>
 #include <sound/designware_i2s.h>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@ -27,7 +27,7 @@
 #include <linux/power_supply.h>
 #include <linux/pm_runtime.h>
 #include <acpi/video.h>
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
 #include "amdgpu.h"
 #include "amdgpu_pm.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_afmt.c
@ -25,7 +25,7 @@
 */
 #include <linux/hdmi.h>
 #include <linux/gcd.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@ -22,11 +22,13 @@

 #include "amdgpu_amdkfd.h"
 #include "amd_shared.h"
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
+#include "amdgpu_dma_buf.h"
 #include <linux/module.h>
 #include <linux/dma-buf.h>
+#include "amdgpu_xgmi.h"

 static const unsigned int compute_vmid_bitmap = 0xFF00;

@ -76,6 +78,7 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 	case CHIP_POLARIS10:
 	case CHIP_POLARIS11:
 	case CHIP_POLARIS12:
+	case CHIP_VEGAM:
 		kfd2kgd = amdgpu_amdkfd_gfx_8_0_get_functions();
 		break;
 	case CHIP_VEGA10:
@ -84,6 +87,9 @@ void amdgpu_amdkfd_device_probe(struct amdgpu_device *adev)
 	case CHIP_RAVEN:
 		kfd2kgd = amdgpu_amdkfd_gfx_9_0_get_functions();
 		break;
+	case CHIP_NAVI10:
+		kfd2kgd = amdgpu_amdkfd_gfx_10_0_get_functions();
+		break;
 	default:
 		dev_info(adev->dev, "kfd not supported on this ASIC\n");
 		return;
@ -148,21 +154,23 @@ void amdgpu_amdkfd_device_init(struct amdgpu_device *adev)
 		};

 		/* this is going to have a few of the MSBs set that we need to
-		 * clear */
+		 * clear
+		 */
 		bitmap_complement(gpu_resources.queue_bitmap,
 				  adev->gfx.mec.queue_bitmap,
 				  KGD_MAX_QUEUES);

 		/* remove the KIQ bit as well */
 		if (adev->gfx.kiq.ring.sched.ready)
-			clear_bit(amdgpu_gfx_queue_to_bit(adev,
+			clear_bit(amdgpu_gfx_mec_queue_to_bit(adev,
 							  adev->gfx.kiq.ring.me - 1,
 							  adev->gfx.kiq.ring.pipe,
 							  adev->gfx.kiq.ring.queue),
 				  gpu_resources.queue_bitmap);

 		/* According to linux/bitmap.h we shouldn't use bitmap_clear if
-		 * nbits is not compile time constant */
+		 * nbits is not compile time constant
+		 */
 		last_valid_bit = 1 /* only first MEC can have compute queues */
 				* adev->gfx.mec.num_pipe_per_mec
 				* adev->gfx.mec.num_queue_per_pipe;
@ -335,6 +343,40 @@ void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj)
 	amdgpu_bo_unref(&(bo));
 }

+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size,
+				void **mem_obj)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+	struct amdgpu_bo *bo = NULL;
+	struct amdgpu_bo_param bp;
+	int r;
+
+	memset(&bp, 0, sizeof(bp));
+	bp.size = size;
+	bp.byte_align = 1;
+	bp.domain = AMDGPU_GEM_DOMAIN_GWS;
+	bp.flags = AMDGPU_GEM_CREATE_NO_CPU_ACCESS;
+	bp.type = ttm_bo_type_device;
+	bp.resv = NULL;
+
+	r = amdgpu_bo_create(adev, &bp, &bo);
+	if (r) {
+		dev_err(adev->dev,
+			"failed to allocate gws BO for amdkfd (%d)\n", r);
+		return r;
+	}
+
+	*mem_obj = bo;
+	return 0;
+}
+
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj)
+{
+	struct amdgpu_bo *bo = (struct amdgpu_bo *)mem_obj;
+
+	amdgpu_bo_unref(&bo);
+}
+
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
 				      enum kgd_engine_type type)
 {
@ -398,9 +440,12 @@ void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,

 	if (amdgpu_sriov_vf(adev))
 		mem_info->mem_clk_max = adev->clock.default_mclk / 100;
-	else if (adev->powerplay.pp_funcs)
-		mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
-	else
+	else if (adev->powerplay.pp_funcs) {
+		if (amdgpu_emu_mode == 1)
+			mem_info->mem_clk_max = 0;
+		else
+			mem_info->mem_clk_max = amdgpu_dpm_get_mclk(adev, false) / 100;
+	} else
 		mem_info->mem_clk_max = 100;
 }

@ -518,6 +563,34 @@ uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd)

 	return adev->gmc.xgmi.hive_id;
 }
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src)
+{
+	struct amdgpu_device *peer_adev = (struct amdgpu_device *)src;
+	struct amdgpu_device *adev = (struct amdgpu_device *)dst;
+	int ret = amdgpu_xgmi_get_hops_count(adev, peer_adev);
+
+	if (ret < 0) {
+		DRM_ERROR("amdgpu: failed to get  xgmi hops count between node %d and %d. ret = %d\n",
+			adev->gmc.xgmi.physical_node_id,
+			peer_adev->gmc.xgmi.physical_node_id, ret);
+		ret = 0;
+	}
+	return  (uint8_t)ret;
+}
+
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->rmmio_remap.bus_addr;
+}
+
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->gds.gws_size;
+}

 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
@ -595,6 +668,13 @@ bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 	return false;
 }

+bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	return adev->have_atomics_support;
+}
+
 #ifndef CONFIG_HSA_AMD
 bool amdkfd_fence_check_mm(struct dma_fence *f, struct mm_struct *mm)
 {
@ -635,6 +715,11 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
 	return NULL;
 }

+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void)
+{
+	return NULL;
+}
+
 struct kfd_dev *kgd2kfd_probe(struct kgd_dev *kgd, struct pci_dev *pdev,
 			      const struct kfd2kgd_calls *f2g)
 {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@ -61,7 +61,6 @@ struct kgd_mem {

 	atomic_t invalid;
 	struct amdkfd_process_info *process_info;
-	struct page **user_pages;

 	struct amdgpu_sync sync;

@ -136,10 +135,12 @@ int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
 void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);
+bool amdgpu_amdkfd_have_atomics_support(struct kgd_dev *kgd);

 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions(void);

 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);

@ -154,6 +155,10 @@ int amdgpu_amdkfd_alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 				void **mem_obj, uint64_t *gpu_addr,
 				void **cpu_ptr, bool mqd_gfx9);
 void amdgpu_amdkfd_free_gtt_mem(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_alloc_gws(struct kgd_dev *kgd, size_t size, void **mem_obj);
+void amdgpu_amdkfd_free_gws(struct kgd_dev *kgd, void *mem_obj);
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem);
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem);
 uint32_t amdgpu_amdkfd_get_fw_version(struct kgd_dev *kgd,
 				      enum kgd_engine_type type);
 void amdgpu_amdkfd_get_local_mem_info(struct kgd_dev *kgd,
@ -169,6 +174,9 @@ int amdgpu_amdkfd_get_dmabuf_info(struct kgd_dev *kgd, int dma_buf_fd,
 				  uint32_t *flags);
 uint64_t amdgpu_amdkfd_get_vram_usage(struct kgd_dev *kgd);
 uint64_t amdgpu_amdkfd_get_hive_id(struct kgd_dev *kgd);
+uint64_t amdgpu_amdkfd_get_mmio_remap_phys_addr(struct kgd_dev *kgd);
+uint32_t amdgpu_amdkfd_get_num_gws(struct kgd_dev *kgd);
+uint8_t amdgpu_amdkfd_get_xgmi_hops_count(struct kgd_dev *dst, struct kgd_dev *src);

 #define read_user_wptr(mmptr, wptr, dst)				\
 	({								\
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v10.c
@ -0,0 +1,975 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+#undef pr_fmt
+#define pr_fmt(fmt) "kfd2kgd: " fmt
+
+#include <linux/module.h>
+#include <linux/fdtable.h>
+#include <linux/uaccess.h>
+#include <linux/firmware.h>
+#include <linux/mmu_context.h>
+#include <drm/drmP.h>
+#include "amdgpu.h"
+#include "amdgpu_amdkfd.h"
+#include "amdgpu_ucode.h"
+#include "soc15_hw_ip.h"
+#include "gc/gc_10_1_0_offset.h"
+#include "gc/gc_10_1_0_sh_mask.h"
+#include "navi10_enum.h"
+#include "athub/athub_2_0_0_offset.h"
+#include "athub/athub_2_0_0_sh_mask.h"
+#include "oss/osssys_5_0_0_offset.h"
+#include "oss/osssys_5_0_0_sh_mask.h"
+#include "soc15_common.h"
+#include "v10_structs.h"
+#include "nv.h"
+#include "nvd.h"
+
+enum hqd_dequeue_request_type {
+	NO_ACTION = 0,
+	DRAIN_PIPE,
+	RESET_WAVES,
+	SAVE_WAVES
+};
+
+/*
+ * Register access functions
+ */
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+		uint32_t sh_mem_config,
+		uint32_t sh_mem_ape1_base, uint32_t sh_mem_ape1_limit,
+		uint32_t sh_mem_bases);
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+		unsigned int vmid);
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id);
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm);
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs);
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm);
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs);
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+		uint32_t pipe_id, uint32_t queue_id);
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd);
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id);
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+				unsigned int utimeout);
+#if 0
+static uint32_t get_watch_base_addr(struct amdgpu_device *adev);
+#endif
+static int kgd_address_watch_disable(struct kgd_dev *kgd);
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					uint32_t cntl_val,
+					uint32_t addr_hi,
+					uint32_t addr_lo);
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd);
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					unsigned int reg_offset);
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+		uint8_t vmid);
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+		uint8_t vmid);
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint64_t page_table_base);
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+
+/* Because of REG_GET_FIELD() being used, we put this function in the
+ * asic specific file.
+ */
+static int amdgpu_amdkfd_get_tile_config(struct kgd_dev *kgd,
+		struct tile_config *config)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	config->gb_addr_config = adev->gfx.config.gb_addr_config;
+#if 0
+/* TODO - confirm REG_GET_FIELD x2, should be OK as is... but
+ * MC_ARB_RAMCFG register doesn't exist on Vega10 - initial amdgpu
+ * changes commented out related code, doing the same here for now but
+ * need to sync with Ken et al
+ */
+	config->num_banks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFBANK);
+	config->num_ranks = REG_GET_FIELD(adev->gfx.config.mc_arb_ramcfg,
+				MC_ARB_RAMCFG, NOOFRANKS);
+#endif
+
+	config->tile_config_ptr = adev->gfx.config.tile_mode_array;
+	config->num_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.tile_mode_array);
+	config->macro_tile_config_ptr =
+			adev->gfx.config.macrotile_mode_array;
+	config->num_macro_tile_configs =
+			ARRAY_SIZE(adev->gfx.config.macrotile_mode_array);
+
+	return 0;
+}
+
+static const struct kfd2kgd_calls kfd2kgd = {
+	.program_sh_mem_settings = kgd_program_sh_mem_settings,
+	.set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping,
+	.init_interrupts = kgd_init_interrupts,
+	.hqd_load = kgd_hqd_load,
+	.hqd_sdma_load = kgd_hqd_sdma_load,
+	.hqd_dump = kgd_hqd_dump,
+	.hqd_sdma_dump = kgd_hqd_sdma_dump,
+	.hqd_is_occupied = kgd_hqd_is_occupied,
+	.hqd_sdma_is_occupied = kgd_hqd_sdma_is_occupied,
+	.hqd_destroy = kgd_hqd_destroy,
+	.hqd_sdma_destroy = kgd_hqd_sdma_destroy,
+	.address_watch_disable = kgd_address_watch_disable,
+	.address_watch_execute = kgd_address_watch_execute,
+	.wave_control_execute = kgd_wave_control_execute,
+	.address_watch_get_offset = kgd_address_watch_get_offset,
+	.get_atc_vmid_pasid_mapping_pasid =
+			get_atc_vmid_pasid_mapping_pasid,
+	.get_atc_vmid_pasid_mapping_valid =
+			get_atc_vmid_pasid_mapping_valid,
+	.invalidate_tlbs = invalidate_tlbs,
+	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
+	.set_vm_context_page_table_base = set_vm_context_page_table_base,
+	.get_tile_config = amdgpu_amdkfd_get_tile_config,
+};
+
+struct kfd2kgd_calls *amdgpu_amdkfd_gfx_10_0_get_functions()
+{
+	return (struct kfd2kgd_calls *)&kfd2kgd;
+}
+
+static inline struct amdgpu_device *get_amdgpu_device(struct kgd_dev *kgd)
+{
+	return (struct amdgpu_device *)kgd;
+}
+
+static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe,
+			uint32_t queue, uint32_t vmid)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	mutex_lock(&adev->srbm_mutex);
+	nv_grbm_select(adev, mec, pipe, queue, vmid);
+}
+
+static void unlock_srbm(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	nv_grbm_select(adev, 0, 0, 0, 0);
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	uint32_t pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(kgd, mec, pipe, queue_id, 0);
+}
+
+static uint32_t get_queue_mask(struct amdgpu_device *adev,
+			       uint32_t pipe_id, uint32_t queue_id)
+{
+	unsigned int bit = (pipe_id * adev->gfx.mec.num_queue_per_pipe +
+			    queue_id) & 31;
+
+	return ((uint32_t)1) << bit;
+}
+
+static void release_queue(struct kgd_dev *kgd)
+{
+	unlock_srbm(kgd);
+}
+
+static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,
+					uint32_t sh_mem_config,
+					uint32_t sh_mem_ape1_base,
+					uint32_t sh_mem_ape1_limit,
+					uint32_t sh_mem_bases)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	lock_srbm(kgd, 0, 0, 0, vmid);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+	/* APE1 no longer exists on GFX9 */
+
+	unlock_srbm(kgd);
+}
+
+static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid,
+					unsigned int vmid)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	/*
+	 * We have to assume that there is no outstanding mapping.
+	 * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 because
+	 * a mapping is in progress or because a mapping finished
+	 * and the SW cleared it.
+	 * So the protocol is to always wait & clear.
+	 */
+	uint32_t pasid_mapping = (pasid == 0) ? 0 : (uint32_t)pasid |
+			ATC_VMID0_PASID_MAPPING__VALID_MASK;
+
+	pr_debug("pasid 0x%x vmid %d, reg value %x\n", pasid, vmid, pasid_mapping);
+	/*
+	 * need to do this twice, once for gfx and once for mmhub
+	 * for ATC add 16 to VMID for mmhub, for IH different registers.
+	 * ATC_VMID0..15 registers are separate from ATC_VMID16..31.
+	 */
+
+	pr_debug("ATHUB, reg %x\n", SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid);
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING) + vmid,
+	       pasid_mapping);
+
+#if 0
+	/* TODO: uncomment this code when the hardware support is ready. */
+	while (!(RREG32(SOC15_REG_OFFSET(
+				ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS)) &
+		 (1U << vmid)))
+		cpu_relax();
+
+	pr_debug("ATHUB mapping update finished\n");
+	WREG32(SOC15_REG_OFFSET(ATHUB, 0,
+				mmATC_VMID_PASID_MAPPING_UPDATE_STATUS),
+	       1U << vmid);
+#endif
+
+	/* Mapping vmid to pasid also for IH block */
+	pr_debug("update mapping for IH block and mmhub");
+	WREG32(SOC15_REG_OFFSET(OSSSYS, 0, mmIH_VMID_0_LUT) + vmid,
+	       pasid_mapping);
+
+	return 0;
+}
+
+/* TODO - RING0 form of field is obsolete, seems to date back to SI
+ * but still works
+ */
+
+static int kgd_init_interrupts(struct kgd_dev *kgd, uint32_t pipe_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t mec;
+	uint32_t pipe;
+
+	mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+	pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+	lock_srbm(kgd, mec, pipe, 0, 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCPC_INT_CNTL),
+		CP_INT_CNTL_RING0__TIME_STAMP_INT_ENABLE_MASK |
+		CP_INT_CNTL_RING0__OPCODE_ERROR_INT_ENABLE_MASK);
+
+	unlock_srbm(kgd);
+
+	return 0;
+}
+
+static uint32_t get_sdma_base_addr(struct amdgpu_device *adev,
+				unsigned int engine_id,
+				unsigned int queue_id)
+{
+	uint32_t base[2] = {
+		SOC15_REG_OFFSET(SDMA0, 0,
+				 mmSDMA0_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL,
+		/* On gfx10, mmSDMA1_xxx registers are defined NOT based
+		 * on SDMA1 base address (dw 0x1860) but based on SDMA0
+		 * base address (dw 0x1260). Therefore use mmSDMA0_RLC0_RB_CNTL
+		 * instead of mmSDMA1_RLC0_RB_CNTL for the base address calc
+		 * below
+		 */
+		SOC15_REG_OFFSET(SDMA1, 0,
+				 mmSDMA1_RLC0_RB_CNTL) - mmSDMA0_RLC0_RB_CNTL
+	};
+	uint32_t retval;
+
+	retval = base[engine_id] + queue_id * (mmSDMA0_RLC1_RB_CNTL -
+					       mmSDMA0_RLC0_RB_CNTL);
+
+	pr_debug("sdma base address: 0x%x\n", retval);
+
+	return retval;
+}
+
+#if 0
+static uint32_t get_watch_base_addr(struct amdgpu_device *adev)
+{
+	uint32_t retval = SOC15_REG_OFFSET(GC, 0, mmTCP_WATCH0_ADDR_H) -
+			mmTCP_WATCH0_ADDR_H;
+
+	pr_debug("kfd: reg watch base address: 0x%x\n", retval);
+
+	return retval;
+}
+#endif
+
+static inline struct v10_compute_mqd *get_mqd(void *mqd)
+{
+	return (struct v10_compute_mqd *)mqd;
+}
+
+static inline struct v10_sdma_mqd *get_sdma_mqd(void *mqd)
+{
+	return (struct v10_sdma_mqd *)mqd;
+}
+
+static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
+			uint32_t queue_id, uint32_t __user *wptr,
+			uint32_t wptr_shift, uint32_t wptr_mask,
+			struct mm_struct *mm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v10_compute_mqd *m;
+	uint32_t *mqd_hqd;
+	uint32_t reg, hqd_base, data;
+
+	m = get_mqd(mqd);
+
+	pr_debug("Load hqd of pipe %d queue %d\n", pipe_id, queue_id);
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	/* HIQ is set during driver init period with vmid set to 0*/
+	if (m->cp_hqd_vmid == 0) {
+		uint32_t value, mec, pipe;
+
+		mec = (pipe_id / adev->gfx.mec.num_pipe_per_mec) + 1;
+		pipe = (pipe_id % adev->gfx.mec.num_pipe_per_mec);
+
+		pr_debug("kfd: set HIQ, mec:%d, pipe:%d, queue:%d.\n",
+			mec, pipe, queue_id);
+		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
+		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
+			((mec << 5) | (pipe << 3) | queue_id | 0x80));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+	}
+
+	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
+	mqd_hqd = &m->cp_mqd_base_addr_lo;
+	hqd_base = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+
+	for (reg = hqd_base;
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		WREG32(reg, mqd_hqd[reg - hqd_base]);
+
+
+	/* Activate doorbell logic before triggering WPTR poll. */
+	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
+			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+
+	if (wptr) {
+		/* Don't read wptr with get_user because the user
+		 * context may not be accessible (if this function
+		 * runs in a work queue). Instead trigger a one-shot
+		 * polling read from memory in the CP. This assumes
+		 * that wptr is GPU-accessible in the queue's VMID via
+		 * ATC or SVM. WPTR==RPTR before starting the poll so
+		 * the CP starts fetching new commands from the right
+		 * place.
+		 *
+		 * Guessing a 64-bit WPTR from a 32-bit RPTR is a bit
+		 * tricky. Assume that the queue didn't overflow. The
+		 * number of valid bits in the 32-bit RPTR depends on
+		 * the queue size. The remaining bits are taken from
+		 * the saved 64-bit WPTR. If the WPTR wrapped, add the
+		 * queue size.
+		 */
+		uint32_t queue_size =
+			2 << REG_GET_FIELD(m->cp_hqd_pq_control,
+					   CP_HQD_PQ_CONTROL, QUEUE_SIZE);
+		uint64_t guessed_wptr = m->cp_hqd_pq_rptr & (queue_size - 1);
+
+		if ((m->cp_hqd_pq_wptr_lo & (queue_size - 1)) < guessed_wptr)
+			guessed_wptr += queue_size;
+		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
+		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;
+
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+		       lower_32_bits(guessed_wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+		       upper_32_bits(guessed_wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+		       lower_32_bits((uint64_t)wptr));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+		       upper_32_bits((uint64_t)wptr));
+		pr_debug("%s setting CP_PQ_WPTR_POLL_CNTL1 to %x\n", __func__, get_queue_mask(adev, pipe_id, queue_id));
+		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
+		       get_queue_mask(adev, pipe_id, queue_id));
+	}
+
+	/* Start the EOP fetcher */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
+			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));
+
+	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+
+	release_queue(kgd);
+
+	return 0;
+}
+
+static int kgd_hqd_dump(struct kgd_dev *kgd,
+			uint32_t pipe_id, uint32_t queue_id,
+			uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t i = 0, reg;
+#define HQD_N_REGS 56
+#define DUMP_REG(addr) do {				\
+		if (WARN_ON_ONCE(i >= HQD_N_REGS))	\
+			break;				\
+		(*dump)[i][0] = (addr) << 2;		\
+		(*dump)[i++][1] = RREG32(addr);		\
+	} while (0)
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	for (reg = SOC15_REG_OFFSET(GC, 0, mmCP_MQD_BASE_ADDR);
+	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
+		DUMP_REG(reg);
+
+	release_queue(kgd);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_load(struct kgd_dev *kgd, void *mqd,
+			     uint32_t __user *wptr, struct mm_struct *mm)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_base_addr, sdmax_gfx_context_cntl;
+	unsigned long end_jiffies;
+	uint32_t data;
+	uint64_t data64;
+	uint64_t __user *wptr64 = (uint64_t __user *)wptr;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+	pr_debug("sdma load base addr %x for engine %d, queue %d\n", sdma_base_addr, m->sdma_engine_id, m->sdma_queue_id);
+	sdmax_gfx_context_cntl = m->sdma_engine_id ?
+		SOC15_REG_OFFSET(SDMA1, 0, mmSDMA1_GFX_CONTEXT_CNTL) :
+		SOC15_REG_OFFSET(SDMA0, 0, mmSDMA0_GFX_CONTEXT_CNTL);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+		m->sdmax_rlcx_rb_cntl & (~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK));
+
+	end_jiffies = msecs_to_jiffies(2000) + jiffies;
+	while (true) {
+		data = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (data & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies))
+			return -ETIME;
+		usleep_range(500, 1000);
+	}
+	data = RREG32(sdmax_gfx_context_cntl);
+	data = REG_SET_FIELD(data, SDMA0_GFX_CONTEXT_CNTL,
+			     RESUME_CTX, 0);
+	WREG32(sdmax_gfx_context_cntl, data);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL_OFFSET,
+	       m->sdmax_rlcx_doorbell_offset);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_doorbell, SDMA0_RLC0_DOORBELL,
+			     ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, data);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR, m->sdmax_rlcx_rb_rptr);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI,
+				m->sdmax_rlcx_rb_rptr_hi);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 1);
+	if (read_user_wptr(mm, wptr64, data64)) {
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		       lower_32_bits(data64));
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		       upper_32_bits(data64));
+	} else {
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR,
+		       m->sdmax_rlcx_rb_rptr);
+		WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_WPTR_HI,
+		       m->sdmax_rlcx_rb_rptr_hi);
+	}
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_MINOR_PTR_UPDATE, 0);
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE, m->sdmax_rlcx_rb_base);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_BASE_HI,
+			m->sdmax_rlcx_rb_base_hi);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_LO,
+			m->sdmax_rlcx_rb_rptr_addr_lo);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_ADDR_HI,
+			m->sdmax_rlcx_rb_rptr_addr_hi);
+
+	data = REG_SET_FIELD(m->sdmax_rlcx_rb_cntl, SDMA0_RLC0_RB_CNTL,
+			     RB_ENABLE, 1);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, data);
+
+	return 0;
+}
+
+static int kgd_hqd_sdma_dump(struct kgd_dev *kgd,
+			     uint32_t engine_id, uint32_t queue_id,
+			     uint32_t (**dump)[2], uint32_t *n_regs)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t sdma_base_addr = get_sdma_base_addr(adev, engine_id, queue_id);
+	uint32_t i = 0, reg;
+#undef HQD_N_REGS
+#define HQD_N_REGS (19+6+7+10)
+
+	pr_debug("sdma dump engine id %d queue_id %d\n", engine_id, queue_id);
+	pr_debug("sdma base addr %x\n", sdma_base_addr);
+
+	*dump = kmalloc(HQD_N_REGS*2*sizeof(uint32_t), GFP_KERNEL);
+	if (*dump == NULL)
+		return -ENOMEM;
+
+	for (reg = mmSDMA0_RLC0_RB_CNTL; reg <= mmSDMA0_RLC0_DOORBELL; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_STATUS; reg <= mmSDMA0_RLC0_CSA_ADDR_HI; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_IB_SUB_REMAIN;
+	     reg <= mmSDMA0_RLC0_MINOR_PTR_UPDATE; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+	for (reg = mmSDMA0_RLC0_MIDCMD_DATA0;
+	     reg <= mmSDMA0_RLC0_MIDCMD_CNTL; reg++)
+		DUMP_REG(sdma_base_addr + reg);
+
+	WARN_ON_ONCE(i != HQD_N_REGS);
+	*n_regs = i;
+
+	return 0;
+}
+
+static bool kgd_hqd_is_occupied(struct kgd_dev *kgd, uint64_t queue_address,
+				uint32_t pipe_id, uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t act;
+	bool retval = false;
+	uint32_t low, high;
+
+	acquire_queue(kgd, pipe_id, queue_id);
+	act = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+	if (act) {
+		low = lower_32_bits(queue_address >> 8);
+		high = upper_32_bits(queue_address >> 8);
+
+		if (low == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE)) &&
+		   high == RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_BASE_HI)))
+			retval = true;
+	}
+	release_queue(kgd);
+	return retval;
+}
+
+static bool kgd_hqd_sdma_is_occupied(struct kgd_dev *kgd, void *mqd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_base_addr;
+	uint32_t sdma_rlc_rb_cntl;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	sdma_rlc_rb_cntl = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+
+	if (sdma_rlc_rb_cntl & SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK)
+		return true;
+
+	return false;
+}
+
+static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
+				enum kfd_preempt_type reset_type,
+				unsigned int utimeout, uint32_t pipe_id,
+				uint32_t queue_id)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	enum hqd_dequeue_request_type type;
+	unsigned long end_jiffies;
+	uint32_t temp;
+	struct v10_compute_mqd *m = get_mqd(mqd);
+
+#if 0
+	unsigned long flags;
+	int retry;
+#endif
+
+	acquire_queue(kgd, pipe_id, queue_id);
+
+	if (m->cp_hqd_vmid == 0)
+		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+
+	switch (reset_type) {
+	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
+		type = DRAIN_PIPE;
+		break;
+	case KFD_PREEMPT_TYPE_WAVEFRONT_RESET:
+		type = RESET_WAVES;
+		break;
+	default:
+		type = DRAIN_PIPE;
+		break;
+	}
+
+#if 0 /* Is this still needed? */
+	/* Workaround: If IQ timer is active and the wait time is close to or
+	 * equal to 0, dequeueing is not safe. Wait until either the wait time
+	 * is larger or timer is cleared. Also, ensure that IQ_REQ_PEND is
+	 * cleared before continuing. Also, ensure wait times are set to at
+	 * least 0x3.
+	 */
+	local_irq_save(flags);
+	preempt_disable();
+	retry = 5000; /* wait for 500 usecs at maximum */
+	while (true) {
+		temp = RREG32(mmCP_HQD_IQ_TIMER);
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, PROCESSING_IQ)) {
+			pr_debug("HW is processing IQ\n");
+			goto loop;
+		}
+		if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, ACTIVE)) {
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, RETRY_TYPE)
+					== 3) /* SEM-rearm is safe */
+				break;
+			/* Wait time 3 is safe for CP, but our MMIO read/write
+			 * time is close to 1 microsecond, so check for 10 to
+			 * leave more buffer room
+			 */
+			if (REG_GET_FIELD(temp, CP_HQD_IQ_TIMER, WAIT_TIME)
+					>= 10)
+				break;
+			pr_debug("IQ timer is active\n");
+		} else
+			break;
+loop:
+		if (!retry) {
+			pr_err("CP HQD IQ timer status time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	retry = 1000;
+	while (true) {
+		temp = RREG32(mmCP_HQD_DEQUEUE_REQUEST);
+		if (!(temp & CP_HQD_DEQUEUE_REQUEST__IQ_REQ_PEND_MASK))
+			break;
+		pr_debug("Dequeue request is pending\n");
+
+		if (!retry) {
+			pr_err("CP HQD dequeue request time out\n");
+			break;
+		}
+		ndelay(100);
+		--retry;
+	}
+	local_irq_restore(flags);
+	preempt_enable();
+#endif
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+
+	end_jiffies = (utimeout * HZ / 1000) + jiffies;
+	while (true) {
+		temp = RREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE));
+		if (!(temp & CP_HQD_ACTIVE__ACTIVE_MASK))
+			break;
+		if (time_after(jiffies, end_jiffies)) {
+			pr_err("cp queue preemption time out.\n");
+			release_queue(kgd);
+			return -ETIME;
+		}
+		usleep_range(500, 1000);
+	}
+
+	release_queue(kgd);
+	return 0;
+}
+
+static int kgd_hqd_sdma_destroy(struct kgd_dev *kgd, void *mqd,
+				unsigned int utimeout)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	struct v10_sdma_mqd *m;
+	uint32_t sdma_base_addr;
+	uint32_t temp;
+	unsigned long end_jiffies = (utimeout * HZ / 1000) + jiffies;
+
+	m = get_sdma_mqd(mqd);
+	sdma_base_addr = get_sdma_base_addr(adev, m->sdma_engine_id,
+					    m->sdma_queue_id);
+
+	temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL);
+	temp = temp & ~SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK;
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL, temp);
+
+	while (true) {
+		temp = RREG32(sdma_base_addr + mmSDMA0_RLC0_CONTEXT_STATUS);
+		if (temp & SDMA0_RLC0_CONTEXT_STATUS__IDLE_MASK)
+			break;
+		if (time_after(jiffies, end_jiffies))
+			return -ETIME;
+		usleep_range(500, 1000);
+	}
+
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_DOORBELL, 0);
+	WREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL,
+		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_CNTL) |
+		SDMA0_RLC0_RB_CNTL__RB_ENABLE_MASK);
+
+	m->sdmax_rlcx_rb_rptr = RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR);
+	m->sdmax_rlcx_rb_rptr_hi =
+		RREG32(sdma_base_addr + mmSDMA0_RLC0_RB_RPTR_HI);
+
+	return 0;
+}
+
+static bool get_atc_vmid_pasid_mapping_valid(struct kgd_dev *kgd,
+							uint8_t vmid)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__VALID_MASK;
+}
+
+static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
+								uint8_t vmid)
+{
+	uint32_t reg;
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	reg = RREG32(SOC15_REG_OFFSET(ATHUB, 0, mmATC_VMID0_PASID_MAPPING)
+		     + vmid);
+	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
+}
+
+static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	uint32_t req = (1 << vmid) |
+		(0 << GCVM_INVALIDATE_ENG0_REQ__FLUSH_TYPE__SHIFT) |/* legacy */
+		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PTES_MASK |
+		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE0_MASK |
+		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE1_MASK |
+		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L2_PDE2_MASK |
+		GCVM_INVALIDATE_ENG0_REQ__INVALIDATE_L1_PTES_MASK;
+
+	mutex_lock(&adev->srbm_mutex);
+
+	/* Use light weight invalidation.
+	 *
+	 * TODO 1: agree on the right set of invalidation registers for
+	 * KFD use. Use the last one for now. Invalidate only GCHUB as
+	 * SDMA is now moved to GCHUB
+	 *
+	 * TODO 2: support range-based invalidation, requires kfg2kgd
+	 * interface change
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_LO32),
+				0xffffffff);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ADDR_RANGE_HI32),
+				0x0000001f);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_REQ), req);
+
+	while (!(RREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_INVALIDATE_ENG0_ACK)) &
+					(1 << vmid)))
+		cpu_relax();
+
+	mutex_unlock(&adev->srbm_mutex);
+}
+
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+{
+	signed long r;
+	uint32_t seq;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	spin_lock(&adev->gfx.kiq.ring_lock);
+	amdgpu_ring_alloc(ring, 12); /* fence + invalidate_tlbs package*/
+	amdgpu_ring_write(ring, PACKET3(PACKET3_INVALIDATE_TLBS, 0));
+	amdgpu_ring_write(ring,
+			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
+			PACKET3_INVALIDATE_TLBS_PASID(pasid));
+	amdgpu_fence_emit_polling(ring, &seq);
+	amdgpu_ring_commit(ring);
+	spin_unlock(&adev->gfx.kiq.ring_lock);
+
+	r = amdgpu_fence_wait_polling(ring, seq, adev->usec_timeout);
+	if (r < 1) {
+		DRM_ERROR("wait for kiq fence error: %ld.\n", r);
+		return -ETIME;
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+	int vmid;
+	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+
+	if (amdgpu_emu_mode == 0 && ring->sched.ready)
+		return invalidate_tlbs_with_kiq(adev, pasid);
+
+	for (vmid = 0; vmid < 16; vmid++) {
+		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
+			continue;
+		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
+			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
+				== pasid) {
+				write_vmid_invalidate_request(kgd, vmid);
+				break;
+			}
+		}
+	}
+
+	return 0;
+}
+
+static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("non kfd vmid %d\n", vmid);
+		return 0;
+	}
+
+	write_vmid_invalidate_request(kgd, vmid);
+	return 0;
+}
+
+static int kgd_address_watch_disable(struct kgd_dev *kgd)
+{
+	return 0;
+}
+
+static int kgd_address_watch_execute(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					uint32_t cntl_val,
+					uint32_t addr_hi,
+					uint32_t addr_lo)
+{
+	return 0;
+}
+
+static int kgd_wave_control_execute(struct kgd_dev *kgd,
+					uint32_t gfx_index_val,
+					uint32_t sq_cmd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint32_t data = 0;
+
+	mutex_lock(&adev->grbm_idx_mutex);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);
+
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		INSTANCE_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SA_BROADCAST_WRITES, 1);
+	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
+		SE_BROADCAST_WRITES, 1);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+	mutex_unlock(&adev->grbm_idx_mutex);
+
+	return 0;
+}
+
+static uint32_t kgd_address_watch_get_offset(struct kgd_dev *kgd,
+					unsigned int watch_point_id,
+					unsigned int reg_offset)
+{
+	return 0;
+}
+
+static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
+		uint64_t page_table_base)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+	uint64_t base = page_table_base | AMDGPU_PTE_VALID;
+
+	if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid)) {
+		pr_err("trying to set page table base for wrong VMID %u\n",
+		       vmid);
+		return;
+	}
+
+	/* TODO: take advantage of per-process address space size. For
+	 * now, all processes share the same address space size, like
+	 * on GFX8 and older.
+	 */
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_LO32) + (vmid*2), 0);
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_START_ADDR_HI32) + (vmid*2), 0);
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_LO32) + (vmid*2),
+			lower_32_bits(adev->vm_manager.max_pfn - 1));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_END_ADDR_HI32) + (vmid*2),
+			upper_32_bits(adev->vm_manager.max_pfn - 1));
+
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_LO32) + (vmid*2), lower_32_bits(base));
+	WREG32(SOC15_REG_OFFSET(GC, 0, mmGCVM_CONTEXT0_PAGE_TABLE_BASE_ADDR_HI32) + (vmid*2), upper_32_bits(base));
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@ -23,7 +23,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "cikd.h"
@ -310,7 +310,7 @@ static inline uint32_t get_sdma_base_addr(struct cik_sdma_rlc_registers *m)
 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 			m->sdma_queue_id * KFD_CIK_SDMA_QUEUE_OFFSET;

-	pr_debug("kfd: sdma base address: 0x%x\n", retval);
+	pr_debug("sdma base address: 0x%x\n", retval);

 	return retval;
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@ -24,7 +24,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "gfx_v8_0.h"
@ -266,7 +266,7 @@ static inline uint32_t get_sdma_base_addr(struct vi_sdma_mqd *m)

 	retval = m->sdma_engine_id * SDMA1_REGISTER_OFFSET +
 		m->sdma_queue_id * KFD_VI_SDMA_QUEUE_OFFSET;
-	pr_debug("kfd: sdma base address: 0x%x\n", retval);
+	pr_debug("sdma base address: 0x%x\n", retval);

 	return retval;
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@ -26,7 +26,7 @@
 #include <linux/fdtable.h>
 #include <linux/uaccess.h>
 #include <linux/mmu_context.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"
 #include "soc15_hw_ip.h"
@ -225,8 +225,8 @@ static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid,

 	lock_srbm(kgd, 0, 0, 0, vmid);

-	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_CONFIG), sh_mem_config);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmSH_MEM_BASES), sh_mem_bases);
 	/* APE1 no longer exists on GFX9 */

 	unlock_srbm(kgd);
@ -369,7 +369,7 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		value = RREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS));
 		value = REG_SET_FIELD(value, RLC_CP_SCHEDULERS, scheduler1,
 			((mec << 5) | (pipe << 3) | queue_id | 0x80));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmRLC_CP_SCHEDULERS), value);
 	}

 	/* HQD registers extend from CP_MQD_BASE_ADDR to CP_HQD_EOP_WPTR_MEM. */
@ -378,13 +378,13 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,

 	for (reg = hqd_base;
 	     reg <= SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI); reg++)
-		WREG32(reg, mqd_hqd[reg - hqd_base]);
+		WREG32_RLC(reg, mqd_hqd[reg - hqd_base]);


 	/* Activate doorbell logic before triggering WPTR poll. */
 	data = REG_SET_FIELD(m->cp_hqd_pq_doorbell_control,
 			     CP_HQD_PQ_DOORBELL_CONTROL, DOORBELL_EN, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_DOORBELL_CONTROL), data);

 	if (wptr) {
 		/* Don't read wptr with get_user because the user
@ -413,25 +413,25 @@ static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id,
 		guessed_wptr += m->cp_hqd_pq_wptr_lo & ~(queue_size - 1);
 		guessed_wptr += (uint64_t)m->cp_hqd_pq_wptr_hi << 32;

-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_LO),
 		       lower_32_bits(guessed_wptr));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_HI),
 		       upper_32_bits(guessed_wptr));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR),
 		       lower_32_bits((uintptr_t)wptr));
-		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
+		WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_PQ_WPTR_POLL_ADDR_HI),
 		       upper_32_bits((uintptr_t)wptr));
 		WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_PQ_WPTR_POLL_CNTL1),
 		       get_queue_mask(adev, pipe_id, queue_id));
 	}

 	/* Start the EOP fetcher */
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_EOP_RPTR),
 	       REG_SET_FIELD(m->cp_hqd_eop_rptr,
 			     CP_HQD_EOP_RPTR, INIT_FETCHER, 1));

 	data = REG_SET_FIELD(m->cp_hqd_active, CP_HQD_ACTIVE, ACTIVE, 1);
-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_ACTIVE), data);

 	release_queue(kgd);

@ -633,7 +633,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	acquire_queue(kgd, pipe_id, queue_id);

 	if (m->cp_hqd_vmid == 0)
-		WREG32_FIELD15(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);
+		WREG32_FIELD15_RLC(GC, 0, RLC_CP_SCHEDULERS, scheduler1, 0);

 	switch (reset_type) {
 	case KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN:
@ -647,7 +647,7 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 		break;
 	}

-	WREG32(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);
+	WREG32_RLC(SOC15_REG_OFFSET(GC, 0, mmCP_HQD_DEQUEUE_REQUEST), type);

 	end_jiffies = (utimeout * HZ / 1000) + jiffies;
 	while (true) {
@ -726,29 +726,8 @@ static uint16_t get_atc_vmid_pasid_mapping_pasid(struct kgd_dev *kgd,
 	return reg & ATC_VMID0_PASID_MAPPING__PASID_MASK;
 }

-static void write_vmid_invalidate_request(struct kgd_dev *kgd, uint8_t vmid)
-{
-	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
-
-	/* Use legacy mode tlb invalidation.
-	 *
-	 * Currently on Raven the code below is broken for anything but
-	 * legacy mode due to a MMHUB power gating problem. A workaround
-	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
-	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
-	 * bit.
-	 *
-	 * TODO 1: agree on the right set of invalidation registers for
-	 * KFD use. Use the last one for now. Invalidate both GC and
-	 * MMHUB.
-	 *
-	 * TODO 2: support range-based invalidation, requires kfg2kgd
-	 * interface change
-	 */
-	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
-}
-
-static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
+static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid,
+			uint32_t flush_type)
 {
 	signed long r;
 	uint32_t seq;
@ -761,7 +740,7 @@ static int invalidate_tlbs_with_kiq(struct amdgpu_device *adev, uint16_t pasid)
 			PACKET3_INVALIDATE_TLBS_DST_SEL(1) |
 			PACKET3_INVALIDATE_TLBS_ALL_HUB(1) |
 			PACKET3_INVALIDATE_TLBS_PASID(pasid) |
-			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(0)); /* legacy */
+			PACKET3_INVALIDATE_TLBS_FLUSH_TYPE(flush_type));
 	amdgpu_fence_emit_polling(ring, &seq);
 	amdgpu_ring_commit(ring);
 	spin_unlock(&adev->gfx.kiq.ring_lock);
@ -780,12 +759,16 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 	struct amdgpu_device *adev = (struct amdgpu_device *) kgd;
 	int vmid;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;
+	uint32_t flush_type = 0;

 	if (adev->in_gpu_reset)
 		return -EIO;
+	if (adev->gmc.xgmi.num_physical_nodes &&
+		adev->asic_type == CHIP_VEGA20)
+		flush_type = 2;

 	if (ring->sched.ready)
-		return invalidate_tlbs_with_kiq(adev, pasid);
+		return invalidate_tlbs_with_kiq(adev, pasid, flush_type);

 	for (vmid = 0; vmid < 16; vmid++) {
 		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
@ -793,7 +776,8 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 		if (get_atc_vmid_pasid_mapping_valid(kgd, vmid)) {
 			if (get_atc_vmid_pasid_mapping_pasid(kgd, vmid)
 				== pasid) {
-				write_vmid_invalidate_request(kgd, vmid);
+				amdgpu_gmc_flush_gpu_tlb(adev, vmid,
+							 flush_type);
 				break;
 			}
 		}
@ -811,7 +795,22 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 		return 0;
 	}

-	write_vmid_invalidate_request(kgd, vmid);
+	/* Use legacy mode tlb invalidation.
+	 *
+	 * Currently on Raven the code below is broken for anything but
+	 * legacy mode due to a MMHUB power gating problem. A workaround
+	 * is for MMHUB to wait until the condition PER_VMID_INVALIDATE_REQ
+	 * == PER_VMID_INVALIDATE_ACK instead of simply waiting for the ack
+	 * bit.
+	 *
+	 * TODO 1: agree on the right set of invalidation registers for
+	 * KFD use. Use the last one for now. Invalidate both GC and
+	 * MMHUB.
+	 *
+	 * TODO 2: support range-based invalidation, requires kfg2kgd
+	 * interface change
+	 */
+	amdgpu_gmc_flush_gpu_tlb(adev, vmid, 0);
 	return 0;
 }

@ -838,7 +837,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,

 	mutex_lock(&adev->grbm_idx_mutex);

-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), gfx_index_val);
+	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, gfx_index_val);
 	WREG32(SOC15_REG_OFFSET(GC, 0, mmSQ_CMD), sq_cmd);

 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
@ -848,7 +847,7 @@ static int kgd_wave_control_execute(struct kgd_dev *kgd,
 	data = REG_SET_FIELD(data, GRBM_GFX_INDEX,
 		SE_BROADCAST_WRITES, 1);

-	WREG32(SOC15_REG_OFFSET(GC, 0, mmGRBM_GFX_INDEX), data);
+	WREG32_SOC15_RLC_SHADOW(GC, 0, mmGRBM_GFX_INDEX, data);
 	mutex_unlock(&adev->grbm_idx_mutex);

 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@ -22,14 +22,16 @@

 #define pr_fmt(fmt) "kfd2kgd: " fmt

+#include <linux/dma-buf.h>
 #include <linux/list.h>
 #include <linux/pagemap.h>
 #include <linux/sched/mm.h>
-#include <linux/dma-buf.h>
-#include <drm/drmP.h>
+#include <linux/sched/task.h>
+
 #include "amdgpu_object.h"
 #include "amdgpu_vm.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_dma_buf.h"

 /* Special VM and GART address alignment needed for VI pre-Fiji due to
 * a HW bug.
@ -456,6 +458,17 @@ static void add_kgd_mem_to_kfd_bo_list(struct kgd_mem *mem,
 	mutex_unlock(&process_info->lock);
 }

+static void remove_kgd_mem_from_kfd_bo_list(struct kgd_mem *mem,
+		struct amdkfd_process_info *process_info)
+{
+	struct ttm_validate_buffer *bo_list_entry;
+
+	bo_list_entry = &mem->validate_list;
+	mutex_lock(&process_info->lock);
+	list_del(&bo_list_entry->head);
+	mutex_unlock(&process_info->lock);
+}
+
 /* Initializes user pages. It registers the MMU notifier and validates
 * the userptr BO in the GTT domain.
 *
@ -491,28 +504,12 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 		goto out;
 	}

-	/* If no restore worker is running concurrently, user_pages
-	 * should not be allocated
-	 */
-	WARN(mem->user_pages, "Leaking user_pages array");
-
-	mem->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
-					   sizeof(struct page *),
-					   GFP_KERNEL | __GFP_ZERO);
-	if (!mem->user_pages) {
-		pr_err("%s: Failed to allocate pages array\n", __func__);
-		ret = -ENOMEM;
-		goto unregister_out;
-	}
-
-	ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm, mem->user_pages);
+	ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
 	if (ret) {
 		pr_err("%s: Failed to get user pages: %d\n", __func__, ret);
-		goto free_out;
+		goto unregister_out;
 	}

-	amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm, mem->user_pages);
-
 	ret = amdgpu_bo_reserve(bo, true);
 	if (ret) {
 		pr_err("%s: Failed to reserve BO\n", __func__);
@ -525,11 +522,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 	amdgpu_bo_unreserve(bo);

 release_out:
-	if (ret)
-		release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
-free_out:
-	kvfree(mem->user_pages);
-	mem->user_pages = NULL;
+	amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
 unregister_out:
 	if (ret)
 		amdgpu_mn_unregister(bo);
@ -588,13 +581,12 @@ static int reserve_bo_and_vm(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.num_shared = 1;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);

 	amdgpu_vm_get_pd_bo(vm, &ctx->list, &ctx->vm_pd[0]);

 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-				     false, &ctx->duplicates);
+				     false, &ctx->duplicates, true);
 	if (!ret)
 		ctx->reserved = true;
 	else {
@ -652,7 +644,6 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	ctx->kfd_bo.priority = 0;
 	ctx->kfd_bo.tv.bo = &bo->tbo;
 	ctx->kfd_bo.tv.num_shared = 1;
-	ctx->kfd_bo.user_pages = NULL;
 	list_add(&ctx->kfd_bo.tv.head, &ctx->list);

 	i = 0;
@ -668,7 +659,7 @@ static int reserve_bo_and_cond_vms(struct kgd_mem *mem,
 	}

 	ret = ttm_eu_reserve_buffers(&ctx->ticket, &ctx->list,
-				     false, &ctx->duplicates);
+				     false, &ctx->duplicates, true);
 	if (!ret)
 		ctx->reserved = true;
 	else
@ -822,7 +813,7 @@ static int process_sync_pds_resv(struct amdkfd_process_info *process_info,

 		ret = amdgpu_sync_resv(NULL,
 					sync, pd->tbo.resv,
-					AMDGPU_FENCE_OWNER_UNDEFINED, false);
+					AMDGPU_FENCE_OWNER_KFD, false);
 		if (ret)
 			return ret;
 	}
@ -896,6 +887,9 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,
 				  AMDGPU_FENCE_OWNER_KFD, false);
 	if (ret)
 		goto wait_pd_fail;
+	ret = reservation_object_reserve_shared(vm->root.base.bo->tbo.resv, 1);
+	if (ret)
+		goto reserve_shared_fail;
 	amdgpu_bo_fence(vm->root.base.bo,
 			&vm->process_info->eviction_fence->base, true);
 	amdgpu_bo_unreserve(vm->root.base.bo);
@ -909,6 +903,7 @@ static int init_kfd_vm(struct amdgpu_vm *vm, void **process_info,

 	return 0;

+reserve_shared_fail:
 wait_pd_fail:
 validate_pd_fail:
 	amdgpu_bo_unreserve(vm->root.base.bo);
@ -1109,7 +1104,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 		if (!offset || !*offset)
 			return -EINVAL;
 		user_addr = *offset;
-	} else if (flags & ALLOC_MEM_FLAGS_DOORBELL) {
+	} else if (flags & (ALLOC_MEM_FLAGS_DOORBELL |
+			ALLOC_MEM_FLAGS_MMIO_REMAP)) {
 		domain = AMDGPU_GEM_DOMAIN_GTT;
 		alloc_domain = AMDGPU_GEM_DOMAIN_CPU;
 		bo_type = ttm_bo_type_sg;
@ -1199,12 +1195,8 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(

 	if (user_addr) {
 		ret = init_user_pages(*mem, current->mm, user_addr);
-		if (ret) {
-			mutex_lock(&avm->process_info->lock);
-			list_del(&(*mem)->validate_list.head);
-			mutex_unlock(&avm->process_info->lock);
+		if (ret)
 			goto allocate_init_user_pages_failed;
-		}
 	}

 	if (offset)
@ -1213,6 +1205,7 @@ int amdgpu_amdkfd_gpuvm_alloc_memory_of_gpu(
 	return 0;

 allocate_init_user_pages_failed:
+	remove_kgd_mem_from_kfd_bo_list(*mem, avm->process_info);
 	amdgpu_bo_unref(&bo);
 	/* Don't unreserve system mem limit twice */
 	goto err_reserve_limit;
@ -1262,15 +1255,6 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	list_del(&bo_list_entry->head);
 	mutex_unlock(&process_info->lock);

-	/* Free user pages if necessary */
-	if (mem->user_pages) {
-		pr_debug("%s: Freeing user_pages array\n", __func__);
-		if (mem->user_pages[0])
-			release_pages(mem->user_pages,
-					mem->bo->tbo.ttm->num_pages);
-		kvfree(mem->user_pages);
-	}
-
 	ret = reserve_bo_and_cond_vms(mem, NULL, BO_VM_ALL, &ctx);
 	if (unlikely(ret))
 		return ret;
@ -1294,8 +1278,8 @@ int amdgpu_amdkfd_gpuvm_free_memory_of_gpu(
 	/* Free the sync object */
 	amdgpu_sync_free(&mem->sync);

-	/* If the SG is not NULL, it's one we created for a doorbell
-	 * BO. We need to free it.
+	/* If the SG is not NULL, it's one we created for a doorbell or mmio
+	 * remap BO. We need to free it.
 	 */
 	if (mem->bo->tbo.sg) {
 		sg_free_table(mem->bo->tbo.sg);
@ -1409,7 +1393,7 @@ int amdgpu_amdkfd_gpuvm_map_memory_to_gpu(
 			ret = map_bo_to_gpuvm(adev, entry, ctx.sync,
 					      is_invalid_userptr);
 			if (ret) {
-				pr_err("Failed to map radeon bo to gpuvm\n");
+				pr_err("Failed to map bo to gpuvm\n");
 				goto map_bo_to_gpuvm_failed;
 			}

@ -1744,36 +1728,20 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,

 		bo = mem->bo;

-		if (!mem->user_pages) {
-			mem->user_pages =
-				kvmalloc_array(bo->tbo.ttm->num_pages,
-						 sizeof(struct page *),
-						 GFP_KERNEL | __GFP_ZERO);
-			if (!mem->user_pages) {
-				pr_err("%s: Failed to allocate pages array\n",
-				       __func__);
-				return -ENOMEM;
-			}
-		} else if (mem->user_pages[0]) {
-			release_pages(mem->user_pages, bo->tbo.ttm->num_pages);
+		/* Get updated user pages */
+		ret = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
+		if (ret) {
+			pr_debug("%s: Failed to get user pages: %d\n",
+				__func__, ret);
+
+			/* Return error -EBUSY or -ENOMEM, retry restore */
+			return ret;
 		}

-		/* Get updated user pages */
-		ret = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
-						   mem->user_pages);
-		if (ret) {
-			mem->user_pages[0] = NULL;
-			pr_info("%s: Failed to get user pages: %d\n",
-				__func__, ret);
-			/* Pretend it succeeded. It will fail later
-			 * with a VM fault if the GPU tries to access
-			 * it. Better than hanging indefinitely with
-			 * stalled user mode queues.
-			 */
-		}
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

 		/* Mark the BO as valid unless it was invalidated
-		 * again concurrently
+		 * again concurrently.
 		 */
 		if (atomic_cmpxchg(&mem->invalid, invalid, 0) != invalid)
 			return -EAGAIN;
@ -1806,7 +1774,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 				     GFP_KERNEL);
 	if (!pd_bo_list_entries) {
 		pr_err("%s: Failed to allocate PD BO list entries\n", __func__);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto out_no_mem;
 	}

 	INIT_LIST_HEAD(&resv_list);
@ -1827,10 +1796,11 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 	}

 	/* Reserve all BOs and page tables for validation */
-	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates);
+	ret = ttm_eu_reserve_buffers(&ticket, &resv_list, false, &duplicates,
+				     true);
 	WARN(!list_empty(&duplicates), "Duplicates should be empty");
 	if (ret)
-		goto out;
+		goto out_free;

 	amdgpu_sync_create(&sync);

@ -1846,10 +1816,8 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)

 		bo = mem->bo;

-		/* Copy pages array and validate the BO if we got user pages */
-		if (mem->user_pages[0]) {
-			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
-						     mem->user_pages);
+		/* Validate the BO if we got user pages */
+		if (bo->tbo.ttm->pages[0]) {
 			amdgpu_bo_placement_from_domain(bo, mem->domain);
 			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (ret) {
@ -1858,13 +1826,6 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 			}
 		}

-		/* Validate succeeded, now the BO owns the pages, free
-		 * our copy of the pointer array. Put this BO back on
-		 * the userptr_valid_list. If we need to revalidate
-		 * it, we need to start from scratch.
-		 */
-		kvfree(mem->user_pages);
-		mem->user_pages = NULL;
 		list_move_tail(&mem->validate_list.head,
 			       &process_info->userptr_valid_list);

@ -1897,8 +1858,9 @@ unreserve_out:
 	ttm_eu_backoff_reservation(&ticket, &resv_list);
 	amdgpu_sync_wait(&sync, false);
 	amdgpu_sync_free(&sync);
-out:
+out_free:
 	kfree(pd_bo_list_entries);
+out_no_mem:

 	return ret;
 }
@ -1963,6 +1925,7 @@ static void amdgpu_amdkfd_restore_userptr_worker(struct work_struct *work)
 		 * hanging. No point trying again.
 		 */
 	}
+
 unlock_out:
 	mutex_unlock(&process_info->lock);
 	mmput(mm);
@ -2032,7 +1995,7 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)
 	}

 	ret = ttm_eu_reserve_buffers(&ctx.ticket, &ctx.list,
-				     false, &duplicate_save);
+				     false, &duplicate_save, true);
 	if (ret) {
 		pr_debug("Memory eviction: TTM Reserve Failed. Try again\n");
 		goto ttm_reserve_fail;
@ -2130,3 +2093,92 @@ ttm_reserve_fail:
 	kfree(pd_bo_list);
 	return ret;
 }
+
+int amdgpu_amdkfd_add_gws_to_process(void *info, void *gws, struct kgd_mem **mem)
+{
+	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+	struct amdgpu_bo *gws_bo = (struct amdgpu_bo *)gws;
+	int ret;
+
+	if (!info || !gws)
+		return -EINVAL;
+
+	*mem = kzalloc(sizeof(struct kgd_mem), GFP_KERNEL);
+	if (!*mem)
+		return -ENOMEM;
+
+	mutex_init(&(*mem)->lock);
+	(*mem)->bo = amdgpu_bo_ref(gws_bo);
+	(*mem)->domain = AMDGPU_GEM_DOMAIN_GWS;
+	(*mem)->process_info = process_info;
+	add_kgd_mem_to_kfd_bo_list(*mem, process_info, false);
+	amdgpu_sync_create(&(*mem)->sync);
+
+
+	/* Validate gws bo the first time it is added to process */
+	mutex_lock(&(*mem)->process_info->lock);
+	ret = amdgpu_bo_reserve(gws_bo, false);
+	if (unlikely(ret)) {
+		pr_err("Reserve gws bo failed %d\n", ret);
+		goto bo_reservation_failure;
+	}
+
+	ret = amdgpu_amdkfd_bo_validate(gws_bo, AMDGPU_GEM_DOMAIN_GWS, true);
+	if (ret) {
+		pr_err("GWS BO validate failed %d\n", ret);
+		goto bo_validation_failure;
+	}
+	/* GWS resource is shared b/t amdgpu and amdkfd
+	 * Add process eviction fence to bo so they can
+	 * evict each other.
+	 */
+	ret = reservation_object_reserve_shared(gws_bo->tbo.resv, 1);
+	if (ret)
+		goto reserve_shared_fail;
+	amdgpu_bo_fence(gws_bo, &process_info->eviction_fence->base, true);
+	amdgpu_bo_unreserve(gws_bo);
+	mutex_unlock(&(*mem)->process_info->lock);
+
+	return ret;
+
+reserve_shared_fail:
+bo_validation_failure:
+	amdgpu_bo_unreserve(gws_bo);
+bo_reservation_failure:
+	mutex_unlock(&(*mem)->process_info->lock);
+	amdgpu_sync_free(&(*mem)->sync);
+	remove_kgd_mem_from_kfd_bo_list(*mem, process_info);
+	amdgpu_bo_unref(&gws_bo);
+	mutex_destroy(&(*mem)->lock);
+	kfree(*mem);
+	*mem = NULL;
+	return ret;
+}
+
+int amdgpu_amdkfd_remove_gws_from_process(void *info, void *mem)
+{
+	int ret;
+	struct amdkfd_process_info *process_info = (struct amdkfd_process_info *)info;
+	struct kgd_mem *kgd_mem = (struct kgd_mem *)mem;
+	struct amdgpu_bo *gws_bo = kgd_mem->bo;
+
+	/* Remove BO from process's validate list so restore worker won't touch
+	 * it anymore
+	 */
+	remove_kgd_mem_from_kfd_bo_list(kgd_mem, process_info);
+
+	ret = amdgpu_bo_reserve(gws_bo, false);
+	if (unlikely(ret)) {
+		pr_err("Reserve gws bo failed %d\n", ret);
+		//TODO add BO back to validate_list?
+		return ret;
+	}
+	amdgpu_amdkfd_remove_eviction_fence(gws_bo,
+			process_info->eviction_fence);
+	amdgpu_bo_unreserve(gws_bo);
+	amdgpu_sync_free(&kgd_mem->sync);
+	amdgpu_bo_unref(&gws_bo);
+	mutex_destroy(&kgd_mem->lock);
+	kfree(mem);
+	return 0;
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@ -23,7 +23,7 @@
 * Authors: Dave Airlie
 *          Alex Deucher
 */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atomfirmware.c
@ -20,7 +20,7 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "atomfirmware.h"
@ -118,6 +118,7 @@ union umc_info {

 union vram_info {
 	struct atom_vram_info_header_v2_3 v23;
+	struct atom_vram_info_header_v2_4 v24;
 };
 /*
 * Return vram width from integrated system info table, if available,
@ -126,22 +127,50 @@ union vram_info {
 int amdgpu_atomfirmware_get_vram_width(struct amdgpu_device *adev)
 {
 	struct amdgpu_mode_info *mode_info = &adev->mode_info;
-	int index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
-						integratedsysteminfo);
+	int index;
 	u16 data_offset, size;
 	union igp_info *igp_info;
+	union vram_info *vram_info;
+	u32 mem_channel_number;
+	u32 mem_channel_width;
 	u8 frev, crev;

+	if (adev->flags & AMD_IS_APU)
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    integratedsysteminfo);
+	else
+		index = get_index_into_master_table(atom_master_list_of_data_tables_v2_1,
+						    vram_info);
+
 	/* get any igp specific overrides */
 	if (amdgpu_atom_parse_data_header(mode_info->atom_context, index, &size,
 				   &frev, &crev, &data_offset)) {
-		igp_info = (union igp_info *)
-			(mode_info->atom_context->bios + data_offset);
-		switch (crev) {
-		case 11:
-			return igp_info->v11.umachannelnumber * 64;
-		default:
-			return 0;
+		if (adev->flags & AMD_IS_APU) {
+			igp_info = (union igp_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (crev) {
+			case 11:
+				mem_channel_number = igp_info->v11.umachannelnumber;
+				/* channel width is 64 */
+				return mem_channel_number * 64;
+			default:
+				return 0;
+			}
+		} else {
+			vram_info = (union vram_info *)
+				(mode_info->atom_context->bios + data_offset);
+			switch (crev) {
+			case 3:
+				mem_channel_number = vram_info->v23.vram_module[0].channel_num;
+				mem_channel_width = vram_info->v23.vram_module[0].channel_width;
+				return mem_channel_number * (1 << mem_channel_width);
+			case 4:
+				mem_channel_number = vram_info->v24.vram_module[0].channel_num;
+				mem_channel_width = vram_info->v24.vram_module[0].channel_width;
+				return mem_channel_number * (1 << mem_channel_width);
+			default:
+				return 0;
+			}
 		}
 	}

@ -179,6 +208,9 @@ static int convert_atom_mem_type_to_vram_type (struct amdgpu_device *adev,
 		case ATOM_DGPU_VRAM_TYPE_HBM2:
 			vram_type = AMDGPU_VRAM_TYPE_HBM;
 			break;
+		case ATOM_DGPU_VRAM_TYPE_GDDR6:
+			vram_type = AMDGPU_VRAM_TYPE_GDDR6;
+			break;
 		default:
 			vram_type = AMDGPU_VRAM_TYPE_UNKNOWN;
 			break;
@ -227,6 +259,9 @@ int amdgpu_atomfirmware_get_vram_type(struct amdgpu_device *adev)
 			case 3:
 				mem_type = vram_info->v23.vram_module[0].memory_type;
 				return convert_atom_mem_type_to_vram_type(adev, mem_type);
+			case 4:
+				mem_type = vram_info->v24.vram_module[0].memory_type;
+				return convert_atom_mem_type_to_vram_type(adev, mem_type);
 			default:
 				return 0;
 			}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@ -21,7 +21,7 @@
 *
 * Authors: Jerome Glisse
 */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@ -25,10 +25,11 @@
 *          Alex Deucher
 *          Jerome Glisse
 */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "atom.h"

+#include <linux/pci.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
 /*
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@ -28,7 +28,8 @@
 *    Christian König <deathsimple@vodafone.de>
 */

-#include <drm/drmP.h>
+#include <linux/uaccess.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"

@ -81,9 +82,9 @@ int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
 		return -ENOMEM;

 	kref_init(&list->refcount);
-	list->gds_obj = adev->gds.gds_gfx_bo;
-	list->gws_obj = adev->gds.gws_gfx_bo;
-	list->oa_obj = adev->gds.oa_gfx_bo;
+	list->gds_obj = NULL;
+	list->gws_obj = NULL;
+	list->oa_obj = NULL;

 	array = amdgpu_bo_list_array_entry(list, 0);
 	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@ -36,7 +36,7 @@ struct amdgpu_bo_list_entry {
 	struct amdgpu_bo_va		*bo_va;
 	uint32_t			priority;
 	struct page			**user_pages;
-	int				user_invalidated;
+	bool				user_invalidated;
 };

 struct amdgpu_bo_list {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@ -22,8 +22,9 @@
 *
 */
 #include <linux/list.h>
+#include <linux/pci.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include <linux/firmware.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@ -23,7 +23,7 @@
 * Authors: Dave Airlie
 *          Alex Deucher
 */
-#include <drm/drmP.h>
+
 #include <drm/drm_edid.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_probe_helper.h>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@ -24,9 +24,11 @@
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 */
+
+#include <linux/file.h>
 #include <linux/pagemap.h>
 #include <linux/sync_file.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include <drm/drm_syncobj.h>
 #include "amdgpu.h"
@ -52,7 +54,6 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	p->uf_entry.tv.bo = &bo->tbo;
 	/* One for TTM and one for the CS job */
 	p->uf_entry.tv.num_shared = 2;
-	p->uf_entry.user_pages = NULL;

 	drm_gem_object_put_unlocked(gobj);

@ -542,14 +543,14 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		if (usermm && usermm != current->mm)
 			return -EPERM;

-		/* Check if we have user pages and nobody bound the BO already */
-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-		    lobj->user_pages) {
+		if (amdgpu_ttm_tt_is_userptr(bo->tbo.ttm) &&
+		    lobj->user_invalidated && lobj->user_pages) {
 			amdgpu_bo_placement_from_domain(bo,
 							AMDGPU_GEM_DOMAIN_CPU);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (r)
 				return r;
+
 			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 						     lobj->user_pages);
 			binding_userptr = true;
@ -580,7 +581,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo *gds;
 	struct amdgpu_bo *gws;
 	struct amdgpu_bo *oa;
-	unsigned tries = 10;
 	int r;

 	INIT_LIST_HEAD(&p->validated);
@ -616,79 +616,45 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (p->uf_entry.tv.bo && !ttm_to_amdgpu_bo(p->uf_entry.tv.bo)->parent)
 		list_add(&p->uf_entry.tv.head, &p->validated);

-	while (1) {
-		struct list_head need_pages;
+	/* Get userptr backing pages. If pages are updated after registered
+	 * in amdgpu_gem_userptr_ioctl(), amdgpu_cs_list_validate() will do
+	 * amdgpu_ttm_backend_bind() to flush and invalidate new pages
+	 */
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
+		bool userpage_invalidated = false;
+		int i;

-		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
-					   &duplicates);
-		if (unlikely(r != 0)) {
-			if (r != -ERESTARTSYS)
-				DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
-			goto error_free_pages;
+		e->user_pages = kvmalloc_array(bo->tbo.ttm->num_pages,
+					sizeof(struct page *),
+					GFP_KERNEL | __GFP_ZERO);
+		if (!e->user_pages) {
+			DRM_ERROR("calloc failure\n");
+			return -ENOMEM;
 		}

-		INIT_LIST_HEAD(&need_pages);
-		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-			struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);
-
-			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
-				 &e->user_invalidated) && e->user_pages) {
-
-				/* We acquired a page array, but somebody
-				 * invalidated it. Free it and try again
-				 */
-				release_pages(e->user_pages,
-					      bo->tbo.ttm->num_pages);
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-			}
-
-			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
-			    !e->user_pages) {
-				list_del(&e->tv.head);
-				list_add(&e->tv.head, &need_pages);
-
-				amdgpu_bo_unreserve(bo);
-			}
+		r = amdgpu_ttm_tt_get_user_pages(bo, e->user_pages);
+		if (r) {
+			kvfree(e->user_pages);
+			e->user_pages = NULL;
+			return r;
 		}

-		if (list_empty(&need_pages))
-			break;
-
-		/* Unreserve everything again. */
-		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-		/* We tried too many times, just abort */
-		if (!--tries) {
-			r = -EDEADLK;
-			DRM_ERROR("deadlock in %s\n", __func__);
-			goto error_free_pages;
-		}
-
-		/* Fill the page arrays for all userptrs. */
-		list_for_each_entry(e, &need_pages, tv.head) {
-			struct ttm_tt *ttm = e->tv.bo->ttm;
-
-			e->user_pages = kvmalloc_array(ttm->num_pages,
-							 sizeof(struct page*),
-							 GFP_KERNEL | __GFP_ZERO);
-			if (!e->user_pages) {
-				r = -ENOMEM;
-				DRM_ERROR("calloc failure in %s\n", __func__);
-				goto error_free_pages;
-			}
-
-			r = amdgpu_ttm_tt_get_user_pages(ttm, e->user_pages);
-			if (r) {
-				DRM_ERROR("amdgpu_ttm_tt_get_user_pages failed.\n");
-				kvfree(e->user_pages);
-				e->user_pages = NULL;
-				goto error_free_pages;
+		for (i = 0; i < bo->tbo.ttm->num_pages; i++) {
+			if (bo->tbo.ttm->pages[i] != e->user_pages[i]) {
+				userpage_invalidated = true;
+				break;
 			}
 		}
+		e->user_invalidated = userpage_invalidated;
+	}

-		/* And try again. */
-		list_splice(&need_pages, &p->validated);
+	r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
+				   &duplicates, false);
+	if (unlikely(r != 0)) {
+		if (r != -ERESTARTSYS)
+			DRM_ERROR("ttm_eu_reserve_buffers failed.\n");
+		goto out;
 	}

 	amdgpu_cs_get_threshold_for_moves(p->adev, &p->bytes_moved_threshold,
@ -707,16 +673,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	}

 	r = amdgpu_cs_list_validate(p, &duplicates);
-	if (r) {
-		DRM_ERROR("amdgpu_cs_list_validate(duplicates) failed.\n");
+	if (r)
 		goto error_validate;
-	}

 	r = amdgpu_cs_list_validate(p, &p->validated);
-	if (r) {
-		DRM_ERROR("amdgpu_cs_list_validate(validated) failed.\n");
+	if (r)
 		goto error_validate;
-	}

 	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 				     p->bytes_moved_vis);
@ -757,17 +719,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 error_validate:
 	if (r)
 		ttm_eu_backoff_reservation(&p->ticket, &p->validated);
-
-error_free_pages:
-
-	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
-		if (!e->user_pages)
-			continue;
-
-		release_pages(e->user_pages, e->tv.bo->ttm->num_pages);
-		kvfree(e->user_pages);
-	}
-
+out:
 	return r;
 }

@ -922,7 +874,7 @@ static int amdgpu_cs_vm_handling(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;

-	if (amdgpu_sriov_vf(adev)) {
+	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 		struct dma_fence *f;

 		bo_va = fpriv->csa_va;
@ -1011,7 +963,8 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		if (chunk->chunk_id != AMDGPU_CHUNK_ID_IB)
 			continue;

-		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX && amdgpu_sriov_vf(adev)) {
+		if (chunk_ib->ip_type == AMDGPU_HW_IP_GFX &&
+		    (amdgpu_mcbp || amdgpu_sriov_vf(adev))) {
 			if (chunk_ib->flags & AMDGPU_IB_FLAG_PREEMPT) {
 				if (chunk_ib->flags & AMDGPU_IB_FLAG_CE)
 					ce_preempt++;
@ -1054,11 +1007,9 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 		j++;
 	}

-	/* UVD & VCE fw doesn't support user fences */
+	/* MM engine doesn't support user fences */
 	ring = to_amdgpu_ring(parser->entity->rq->sched);
-	if (parser->job->uf_addr && (
-	    ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
-	    ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+	if (parser->job->uf_addr && ring->funcs->no_user_fence)
 		return -EINVAL;

 	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->entity);
@ -1328,7 +1279,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
 	uint64_t seq;
-
 	int r;

 	job = p->job;
@ -1338,15 +1288,23 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_unlock;

-	/* No memory allocation is allowed while holding the mn lock */
+	/* No memory allocation is allowed while holding the mn lock.
+	 * p->mn is hold until amdgpu_cs_submit is finished and fence is added
+	 * to BOs.
+	 */
 	amdgpu_mn_lock(p->mn);
+
+	/* If userptr are invalidated after amdgpu_cs_parser_bos(), return
+	 * -EAGAIN, drmIoctl in libdrm will restart the amdgpu_cs_ioctl.
+	 */
 	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
 		struct amdgpu_bo *bo = ttm_to_amdgpu_bo(e->tv.bo);

-		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-			r = -ERESTARTSYS;
-			goto error_abort;
-		}
+		r |= !amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);
+	}
+	if (r) {
+		r = -EAGAIN;
+		goto error_abort;
 	}

 	job->owner = p->filp;
@ -1424,7 +1382,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 	if (r) {
 		if (r == -ENOMEM)
 			DRM_ERROR("Not enough memory for command submission!\n");
-		else if (r != -ERESTARTSYS)
+		else if (r != -ERESTARTSYS && r != -EAGAIN)
 			DRM_ERROR("Failed to process the buffer list %d!\n", r);
 		goto out;
 	}
@ -1442,6 +1400,7 @@ int amdgpu_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)

 out:
 	amdgpu_cs_parser_fini(&parser, r, reserved_buffers);
+
 	return r;
 }

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_csa.c
@ -47,6 +47,7 @@ int amdgpu_allocate_static_csa(struct amdgpu_device *adev, struct amdgpu_bo **bo
 		return -ENOMEM;

 	memset(ptr, 0, size);
+	adev->virt.csa_cpu_addr = ptr;
 	return 0;
 }

@ -79,7 +80,7 @@ int amdgpu_map_static_csa(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	list_add(&csa_tv.head, &list);
 	amdgpu_vm_get_pd_bo(vm, &list, &pd);

-	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, NULL, false);
 	if (r) {
 		DRM_ERROR("failed to reserve CSA,PD BOs: err=%d\n", r);
 		return r;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@ -22,7 +22,6 @@
 * Authors: monk liu <monk.liu@amd.com>
 */

-#include <drm/drmP.h>
 #include <drm/drm_auth.h>
 #include "amdgpu.h"
 #include "amdgpu_sched.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
@ -24,8 +24,11 @@
 */

 #include <linux/kthread.h>
-#include <drm/drmP.h>
-#include <linux/debugfs.h>
+#include <linux/pci.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"

 /**
@ -920,17 +923,195 @@ static const struct drm_info_list amdgpu_debugfs_list[] = {
 	{"amdgpu_evict_gtt", &amdgpu_debugfs_evict_gtt},
 };

+static void amdgpu_ib_preempt_fences_swap(struct amdgpu_ring *ring,
+					  struct dma_fence **fences)
+{
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	uint32_t sync_seq, last_seq;
+
+	last_seq = atomic_read(&ring->fence_drv.last_seq);
+	sync_seq = ring->fence_drv.sync_seq;
+
+	last_seq &= drv->num_fences_mask;
+	sync_seq &= drv->num_fences_mask;
+
+	do {
+		struct dma_fence *fence, **ptr;
+
+		++last_seq;
+		last_seq &= drv->num_fences_mask;
+		ptr = &drv->fences[last_seq];
+
+		fence = rcu_dereference_protected(*ptr, 1);
+		RCU_INIT_POINTER(*ptr, NULL);
+
+		if (!fence)
+			continue;
+
+		fences[last_seq] = fence;
+
+	} while (last_seq != sync_seq);
+}
+
+static void amdgpu_ib_preempt_signal_fences(struct dma_fence **fences,
+					    int length)
+{
+	int i;
+	struct dma_fence *fence;
+
+	for (i = 0; i < length; i++) {
+		fence = fences[i];
+		if (!fence)
+			continue;
+		dma_fence_signal(fence);
+		dma_fence_put(fence);
+	}
+}
+
+static void amdgpu_ib_preempt_job_recovery(struct drm_gpu_scheduler *sched)
+{
+	struct drm_sched_job *s_job;
+	struct dma_fence *fence;
+
+	spin_lock(&sched->job_list_lock);
+	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+		fence = sched->ops->run_job(s_job);
+		dma_fence_put(fence);
+	}
+	spin_unlock(&sched->job_list_lock);
+}
+
+static void amdgpu_ib_preempt_mark_partial_job(struct amdgpu_ring *ring)
+{
+	struct amdgpu_job *job;
+	struct drm_sched_job *s_job;
+	uint32_t preempt_seq;
+	struct dma_fence *fence, **ptr;
+	struct amdgpu_fence_driver *drv = &ring->fence_drv;
+	struct drm_gpu_scheduler *sched = &ring->sched;
+
+	if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
+		return;
+
+	preempt_seq = le32_to_cpu(*(drv->cpu_addr + 2));
+	if (preempt_seq <= atomic_read(&drv->last_seq))
+		return;
+
+	preempt_seq &= drv->num_fences_mask;
+	ptr = &drv->fences[preempt_seq];
+	fence = rcu_dereference_protected(*ptr, 1);
+
+	spin_lock(&sched->job_list_lock);
+	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
+		job = to_amdgpu_job(s_job);
+		if (job->fence == fence)
+			/* mark the job as preempted */
+			job->preemption_status |= AMDGPU_IB_PREEMPTED;
+	}
+	spin_unlock(&sched->job_list_lock);
+}
+
+static int amdgpu_debugfs_ib_preempt(void *data, u64 val)
+{
+	int r, resched, length;
+	struct amdgpu_ring *ring;
+	struct dma_fence **fences = NULL;
+	struct amdgpu_device *adev = (struct amdgpu_device *)data;
+
+	if (val >= AMDGPU_MAX_RINGS)
+		return -EINVAL;
+
+	ring = adev->rings[val];
+
+	if (!ring || !ring->funcs->preempt_ib || !ring->sched.thread)
+		return -EINVAL;
+
+	/* the last preemption failed */
+	if (ring->trail_seq != le32_to_cpu(*ring->trail_fence_cpu_addr))
+		return -EBUSY;
+
+	length = ring->fence_drv.num_fences_mask + 1;
+	fences = kcalloc(length, sizeof(void *), GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;
+
+	/* stop the scheduler */
+	kthread_park(ring->sched.thread);
+
+	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);
+
+	/* preempt the IB */
+	r = amdgpu_ring_preempt_ib(ring);
+	if (r) {
+		DRM_WARN("failed to preempt ring %d\n", ring->idx);
+		goto failure;
+	}
+
+	amdgpu_fence_process(ring);
+
+	if (atomic_read(&ring->fence_drv.last_seq) !=
+	    ring->fence_drv.sync_seq) {
+		DRM_INFO("ring %d was preempted\n", ring->idx);
+
+		amdgpu_ib_preempt_mark_partial_job(ring);
+
+		/* swap out the old fences */
+		amdgpu_ib_preempt_fences_swap(ring, fences);
+
+		amdgpu_fence_driver_force_completion(ring);
+
+		/* resubmit unfinished jobs */
+		amdgpu_ib_preempt_job_recovery(&ring->sched);
+
+		/* wait for jobs finished */
+		amdgpu_fence_wait_empty(ring);
+
+		/* signal the old fences */
+		amdgpu_ib_preempt_signal_fences(fences, length);
+	}
+
+failure:
+	/* restart the scheduler */
+	kthread_unpark(ring->sched.thread);
+
+	ttm_bo_unlock_delayed_workqueue(&adev->mman.bdev, resched);
+
+	if (fences)
+		kfree(fences);
+
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(fops_ib_preempt, NULL,
+			amdgpu_debugfs_ib_preempt, "%llu\n");
+
 int amdgpu_debugfs_init(struct amdgpu_device *adev)
 {
+	adev->debugfs_preempt =
+		debugfs_create_file("amdgpu_preempt_ib", 0600,
+				    adev->ddev->primary->debugfs_root,
+				    (void *)adev, &fops_ib_preempt);
+	if (!(adev->debugfs_preempt)) {
+		DRM_ERROR("unable to create amdgpu_preempt_ib debugsfs file\n");
+		return -EIO;
+	}
+
 	return amdgpu_debugfs_add_files(adev, amdgpu_debugfs_list,
 					ARRAY_SIZE(amdgpu_debugfs_list));
 }

+void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev)
+{
+	if (adev->debugfs_preempt)
+		debugfs_remove(adev->debugfs_preempt);
+}
+
 #else
 int amdgpu_debugfs_init(struct amdgpu_device *adev)
 {
 	return 0;
 }
+void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev) { }
 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
 {
 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.h
@ -34,6 +34,7 @@ struct amdgpu_debugfs {
 int amdgpu_debugfs_regs_init(struct amdgpu_device *adev);
 void amdgpu_debugfs_regs_cleanup(struct amdgpu_device *adev);
 int amdgpu_debugfs_init(struct amdgpu_device *adev);
+void amdgpu_debugfs_preempt_cleanup(struct amdgpu_device *adev);
 int amdgpu_debugfs_add_files(struct amdgpu_device *adev,
 			     const struct drm_info_list *files,
 			     unsigned nfiles);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@ -27,9 +27,10 @@
 */
 #include <linux/power_supply.h>
 #include <linux/kthread.h>
+#include <linux/module.h>
 #include <linux/console.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include <drm/drm_atomic_helper.h>
 #include <drm/drm_probe_helper.h>
 #include <drm/amdgpu_drm.h>
@ -51,6 +52,7 @@
 #endif
 #include "vi.h"
 #include "soc15.h"
+#include "nv.h"
 #include "bif/bif_4_1_d.h"
 #include <linux/pci.h>
 #include <linux/firmware.h>
@ -61,12 +63,14 @@

 #include "amdgpu_xgmi.h"
 #include "amdgpu_ras.h"
+#include "amdgpu_pmu.h"

 MODULE_FIRMWARE("amdgpu/vega10_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/vega12_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/picasso_gpu_info.bin");
 MODULE_FIRMWARE("amdgpu/raven2_gpu_info.bin");
+MODULE_FIRMWARE("amdgpu/navi10_gpu_info.bin");

 #define AMDGPU_RESUME_MS		2000

@ -94,9 +98,32 @@ static const char *amdgpu_asic_name[] = {
 	"VEGA12",
 	"VEGA20",
 	"RAVEN",
+	"NAVI10",
 	"LAST",
 };

+/**
+ * DOC: pcie_replay_count
+ *
+ * The amdgpu driver provides a sysfs API for reporting the total number
+ * of PCIe replays (NAKs)
+ * The file pcie_replay_count is used for this and returns the total
+ * number of replays as a sum of the NAKs generated and NAKs received
+ */
+
+static ssize_t amdgpu_device_get_pcie_replay_count(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint64_t cnt = amdgpu_asic_get_pcie_replay_count(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%llu\n", cnt);
+}
+
+static DEVICE_ATTR(pcie_replay_count, S_IRUGO,
+		amdgpu_device_get_pcie_replay_count, NULL);
+
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev);

 /**
@ -484,7 +511,10 @@ void amdgpu_device_program_register_sequence(struct amdgpu_device *adev,
 		} else {
 			tmp = RREG32(reg);
 			tmp &= ~and_mask;
-			tmp |= or_mask;
+			if (adev->family >= AMDGPU_FAMILY_AI)
+				tmp |= (or_mask & and_mask);
+			else
+				tmp |= or_mask;
 		}
 		WREG32(reg, tmp);
 	}
@ -910,8 +940,10 @@ def_value:
 * Validates certain module parameters and updates
 * the associated values used by the driver (all asics).
 */
-static void amdgpu_device_check_arguments(struct amdgpu_device *adev)
+static int amdgpu_device_check_arguments(struct amdgpu_device *adev)
 {
+	int ret = 0;
+
 	if (amdgpu_sched_jobs < 4) {
 		dev_warn(adev->dev, "sched jobs (%d) must be at least 4\n",
 			 amdgpu_sched_jobs);
@ -949,19 +981,15 @@ static void amdgpu_device_check_arguments(struct amdgpu_device *adev)

 	amdgpu_device_check_block_size(adev);

-	if (amdgpu_vram_page_split != -1 && (amdgpu_vram_page_split < 16 ||
-	    !is_power_of_2(amdgpu_vram_page_split))) {
-		dev_warn(adev->dev, "invalid VRAM page split (%d)\n",
-			 amdgpu_vram_page_split);
-		amdgpu_vram_page_split = 1024;
-	}
-
-	if (amdgpu_lockup_timeout == 0) {
-		dev_warn(adev->dev, "lockup_timeout msut be > 0, adjusting to 10000\n");
-		amdgpu_lockup_timeout = 10000;
+	ret = amdgpu_device_get_job_timeout_settings(adev);
+	if (ret) {
+		dev_err(adev->dev, "invalid lockup_timeout parameter syntax\n");
+		return ret;
 	}

 	adev->firmware.load_type = amdgpu_ucode_get_load_type(adev, amdgpu_fw_load_type);
+
+	return ret;
 }

 /**
@ -1356,6 +1384,9 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 		else
 			chip_name = "raven";
 		break;
+	case CHIP_NAVI10:
+		chip_name = "navi10";
+		break;
 	}

 	snprintf(fw_name, sizeof(fw_name), "amdgpu/%s_gpu_info.bin", chip_name);
@ -1402,6 +1433,23 @@ static int amdgpu_device_parse_gpu_info_fw(struct amdgpu_device *adev)
 		adev->gfx.cu_info.max_scratch_slots_per_cu =
 			le32_to_cpu(gpu_info_fw->gc_max_scratch_slots_per_cu);
 		adev->gfx.cu_info.lds_size = le32_to_cpu(gpu_info_fw->gc_lds_size);
+		if (hdr->version_minor >= 1) {
+			const struct gpu_info_firmware_v1_1 *gpu_info_fw =
+				(const struct gpu_info_firmware_v1_1 *)(adev->firmware.gpu_info_fw->data +
+									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			adev->gfx.config.num_sc_per_sh =
+				le32_to_cpu(gpu_info_fw->num_sc_per_sh);
+			adev->gfx.config.num_packer_per_sc =
+				le32_to_cpu(gpu_info_fw->num_packer_per_sc);
+		}
+#ifdef CONFIG_DRM_AMD_DC_DCN2_0
+		if (hdr->version_minor == 2) {
+			const struct gpu_info_firmware_v1_2 *gpu_info_fw =
+				(const struct gpu_info_firmware_v1_2 *)(adev->firmware.gpu_info_fw->data +
+									le32_to_cpu(hdr->header.ucode_array_offset_bytes));
+			adev->dm.soc_bounding_box = &gpu_info_fw->soc_bounding_box;
+		}
+#endif
 		break;
 	}
 	default:
@ -1490,6 +1538,13 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		if (r)
 			return r;
 		break;
+	case  CHIP_NAVI10:
+		adev->family = AMDGPU_FAMILY_NV;
+
+		r = nv_set_ip_blocks(adev);
+		if (r)
+			return r;
+		break;
 	default:
 		/* FIXME: not supported yet */
 		return -EINVAL;
@ -1505,6 +1560,9 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 		r = amdgpu_virt_request_full_gpu(adev, true);
 		if (r)
 			return -EAGAIN;
+
+		/* query the reg access mode at the very beginning */
+		amdgpu_virt_init_reg_access_mode(adev);
 	}

 	adev->pm.pp_feature = amdgpu_pp_feature_mask;
@ -1532,6 +1590,19 @@ static int amdgpu_device_ip_early_init(struct amdgpu_device *adev)
 				adev->ip_blocks[i].status.valid = true;
 			}
 		}
+		/* get the vbios after the asic_funcs are set up */
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON) {
+			/* Read BIOS */
+			if (!amdgpu_get_bios(adev))
+				return -EINVAL;
+
+			r = amdgpu_atombios_init(adev);
+			if (r) {
+				dev_err(adev->dev, "amdgpu_atombios_init failed\n");
+				amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
+				return r;
+			}
+		}
 	}

 	adev->cg_flags &= amdgpu_cg_mask;
@ -1550,6 +1621,7 @@ static int amdgpu_device_ip_hw_init_phase1(struct amdgpu_device *adev)
 		if (adev->ip_blocks[i].status.hw)
 			continue;
 		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_COMMON ||
+		    (amdgpu_sriov_vf(adev) && (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP)) ||
 		    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_IH) {
 			r = adev->ip_blocks[i].version->funcs->hw_init(adev);
 			if (r) {
@ -1670,7 +1742,7 @@ static int amdgpu_device_ip_init(struct amdgpu_device *adev)
 			adev->ip_blocks[i].status.hw = true;

 			/* right after GMC hw init, we create CSA */
-			if (amdgpu_sriov_vf(adev)) {
+			if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 				r = amdgpu_allocate_static_csa(adev, &adev->virt.csa_obj,
 								AMDGPU_GEM_DOMAIN_VRAM,
 								AMDGPU_CSA_SIZE);
@ -1821,6 +1893,43 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
 	return 0;
 }

+static int amdgpu_device_enable_mgpu_fan_boost(void)
+{
+	struct amdgpu_gpu_instance *gpu_ins;
+	struct amdgpu_device *adev;
+	int i, ret = 0;
+
+	mutex_lock(&mgpu_info.mutex);
+
+	/*
+	 * MGPU fan boost feature should be enabled
+	 * only when there are two or more dGPUs in
+	 * the system
+	 */
+	if (mgpu_info.num_dgpu < 2)
+		goto out;
+
+	for (i = 0; i < mgpu_info.num_dgpu; i++) {
+		gpu_ins = &(mgpu_info.gpu_ins[i]);
+		adev = gpu_ins->adev;
+		if (!(adev->flags & AMD_IS_APU) &&
+		    !gpu_ins->mgpu_fan_enabled &&
+		    adev->powerplay.pp_funcs &&
+		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
+			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
+			if (ret)
+				break;
+
+			gpu_ins->mgpu_fan_enabled = 1;
+		}
+	}
+
+out:
+	mutex_unlock(&mgpu_info.mutex);
+
+	return ret;
+}
+
 /**
 * amdgpu_device_ip_late_init - run late init for hardware IPs
 *
@ -1854,11 +1963,15 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 	amdgpu_device_set_cg_state(adev, AMD_CG_STATE_GATE);
 	amdgpu_device_set_pg_state(adev, AMD_PG_STATE_GATE);

-	queue_delayed_work(system_wq, &adev->late_init_work,
-			   msecs_to_jiffies(AMDGPU_RESUME_MS));
-
 	amdgpu_device_fill_reset_magic(adev);

+	r = amdgpu_device_enable_mgpu_fan_boost();
+	if (r)
+		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
+
+	/* set to low pstate by default */
+	amdgpu_xgmi_set_pstate(adev, 0);
+
 	return 0;
 }

@ -1957,65 +2070,20 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 	return 0;
 }

-static int amdgpu_device_enable_mgpu_fan_boost(void)
-{
-	struct amdgpu_gpu_instance *gpu_ins;
-	struct amdgpu_device *adev;
-	int i, ret = 0;
-
-	mutex_lock(&mgpu_info.mutex);
-
-	/*
-	 * MGPU fan boost feature should be enabled
-	 * only when there are two or more dGPUs in
-	 * the system
-	 */
-	if (mgpu_info.num_dgpu < 2)
-		goto out;
-
-	for (i = 0; i < mgpu_info.num_dgpu; i++) {
-		gpu_ins = &(mgpu_info.gpu_ins[i]);
-		adev = gpu_ins->adev;
-		if (!(adev->flags & AMD_IS_APU) &&
-		    !gpu_ins->mgpu_fan_enabled &&
-		    adev->powerplay.pp_funcs &&
-		    adev->powerplay.pp_funcs->enable_mgpu_fan_boost) {
-			ret = amdgpu_dpm_enable_mgpu_fan_boost(adev);
-			if (ret)
-				break;
-
-			gpu_ins->mgpu_fan_enabled = 1;
-		}
-	}
-
-out:
-	mutex_unlock(&mgpu_info.mutex);
-
-	return ret;
-}
-
 /**
- * amdgpu_device_ip_late_init_func_handler - work handler for ib test
+ * amdgpu_device_delayed_init_work_handler - work handler for IB tests
 *
 * @work: work_struct.
 */
-static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
+static void amdgpu_device_delayed_init_work_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
-		container_of(work, struct amdgpu_device, late_init_work.work);
+		container_of(work, struct amdgpu_device, delayed_init_work.work);
 	int r;

 	r = amdgpu_ib_ring_tests(adev);
 	if (r)
 		DRM_ERROR("ib ring test failed (%d).\n", r);
-
-	r = amdgpu_device_enable_mgpu_fan_boost();
-	if (r)
-		DRM_ERROR("enable mgpu fan boost failed (%d).\n", r);
-
-	/*set to low pstate by default */
-	amdgpu_xgmi_set_pstate(adev, 0);
-
 }

 static void amdgpu_device_delay_enable_gfx_off(struct work_struct *work)
@ -2355,6 +2423,9 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_VEGA20:
 #if defined(CONFIG_DRM_AMD_DC_DCN1_0)
 	case CHIP_RAVEN:
+#endif
+#if defined(CONFIG_DRM_AMD_DC_DCN2_0)
+	case CHIP_NAVI10:
 #endif
 		return amdgpu_dc != 0;
 #endif
@ -2467,7 +2538,9 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	mutex_init(&adev->lock_reset);
 	mutex_init(&adev->virt.dpm_mutex);

-	amdgpu_device_check_arguments(adev);
+	r = amdgpu_device_check_arguments(adev);
+	if (r)
+		return r;

 	spin_lock_init(&adev->mmio_idx_lock);
 	spin_lock_init(&adev->smc_idx_lock);
@ -2485,8 +2558,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_LIST_HEAD(&adev->ring_lru_list);
 	spin_lock_init(&adev->ring_lru_list_lock);

-	INIT_DELAYED_WORK(&adev->late_init_work,
-			  amdgpu_device_ip_late_init_func_handler);
+	INIT_DELAYED_WORK(&adev->delayed_init_work,
+			  amdgpu_device_delayed_init_work_handler);
 	INIT_DELAYED_WORK(&adev->gfx.gfx_off_delay_work,
 			  amdgpu_device_delay_enable_gfx_off);

@ -2523,8 +2596,33 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (adev->rio_mem == NULL)
 		DRM_INFO("PCI I/O BAR is not found.\n");

+	/* enable PCIE atomic ops */
+	r = pci_enable_atomic_ops_to_root(adev->pdev,
+					  PCI_EXP_DEVCAP2_ATOMIC_COMP32 |
+					  PCI_EXP_DEVCAP2_ATOMIC_COMP64);
+	if (r) {
+		adev->have_atomics_support = false;
+		DRM_INFO("PCIE atomic ops is not supported\n");
+	} else {
+		adev->have_atomics_support = true;
+	}
+
 	amdgpu_device_get_pcie_info(adev);

+	if (amdgpu_mcbp)
+		DRM_INFO("MCBP is enabled\n");
+
+	if (amdgpu_mes && adev->asic_type >= CHIP_NAVI10)
+		adev->enable_mes = true;
+
+	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10) {
+		r = amdgpu_discovery_init(adev);
+		if (r) {
+			dev_err(adev->dev, "amdgpu_discovery_init failed\n");
+			return r;
+		}
+	}
+
 	/* early init functions */
 	r = amdgpu_device_ip_early_init(adev);
 	if (r)
@ -2552,19 +2650,6 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		goto fence_driver_init;
 	}

-	/* Read BIOS */
-	if (!amdgpu_get_bios(adev)) {
-		r = -EINVAL;
-		goto failed;
-	}
-
-	r = amdgpu_atombios_init(adev);
-	if (r) {
-		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_ATOMBIOS_INIT_FAIL, 0, 0);
-		goto failed;
-	}
-
 	/* detect if we are with an SRIOV vbios */
 	amdgpu_device_detect_sriov_bios(adev);

@ -2662,10 +2747,17 @@ fence_driver_init:

 	amdgpu_fbdev_init(adev);

+	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
+		amdgpu_pm_virt_sysfs_init(adev);
+
 	r = amdgpu_pm_sysfs_init(adev);
 	if (r)
 		DRM_ERROR("registering pm debugfs failed (%d).\n", r);

+	r = amdgpu_ucode_sysfs_init(adev);
+	if (r)
+		DRM_ERROR("Creating firmware sysfs failed (%d).\n", r);
+
 	r = amdgpu_debugfs_gem_init(adev);
 	if (r)
 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@ -2706,7 +2798,21 @@ fence_driver_init:
 	}

 	/* must succeed. */
-	amdgpu_ras_post_init(adev);
+	amdgpu_ras_resume(adev);
+
+	queue_delayed_work(system_wq, &adev->delayed_init_work,
+			   msecs_to_jiffies(AMDGPU_RESUME_MS));
+
+	r = device_create_file(adev->dev, &dev_attr_pcie_replay_count);
+	if (r) {
+		dev_err(adev->dev, "Could not create pcie_replay_count");
+		return r;
+	}
+
+	if (IS_ENABLED(CONFIG_PERF_EVENTS))
+		r = amdgpu_pmu_init(adev);
+	if (r)
+		dev_err(adev->dev, "amdgpu_pmu_init failed\n");

 	return 0;

@ -2749,7 +2855,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 		adev->firmware.gpu_info_fw = NULL;
 	}
 	adev->accel_working = false;
-	cancel_delayed_work_sync(&adev->late_init_work);
+	cancel_delayed_work_sync(&adev->delayed_init_work);
 	/* free i2c buses */
 	if (!amdgpu_device_has_dc_support(adev))
 		amdgpu_i2c_fini(adev);
@ -2770,7 +2876,17 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	iounmap(adev->rmmio);
 	adev->rmmio = NULL;
 	amdgpu_device_doorbell_fini(adev);
+	if (amdgpu_sriov_vf(adev) && amdgim_is_hwperf(adev))
+		amdgpu_pm_virt_sysfs_fini(adev);
+
 	amdgpu_debugfs_regs_cleanup(adev);
+	device_remove_file(adev->dev, &dev_attr_pcie_replay_count);
+	amdgpu_ucode_sysfs_fini(adev);
+	if (IS_ENABLED(CONFIG_PERF_EVENTS))
+		amdgpu_pmu_fini(adev);
+	amdgpu_debugfs_preempt_cleanup(adev);
+	if (amdgpu_discovery && adev->asic_type >= CHIP_NAVI10)
+		amdgpu_discovery_fini(adev);
 }


@ -2810,7 +2926,7 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 	if (fbcon)
 		amdgpu_fbdev_set_suspend(adev, 1);

-	cancel_delayed_work_sync(&adev->late_init_work);
+	cancel_delayed_work_sync(&adev->delayed_init_work);

 	if (!amdgpu_device_has_dc_support(adev)) {
 		/* turn off display hw */
@ -2851,6 +2967,8 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)

 	amdgpu_amdkfd_suspend(adev);

+	amdgpu_ras_suspend(adev);
+
 	r = amdgpu_device_ip_suspend_phase1(adev);

 	/* evict vram memory */
@ -2928,6 +3046,9 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	if (r)
 		return r;

+	queue_delayed_work(system_wq, &adev->delayed_init_work,
+			   msecs_to_jiffies(AMDGPU_RESUME_MS));
+
 	if (!amdgpu_device_has_dc_support(adev)) {
 		/* pin cursors */
 		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
@ -2951,7 +3072,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 		return r;

 	/* Make sure IB tests flushed */
-	flush_delayed_work(&adev->late_init_work);
+	flush_delayed_work(&adev->delayed_init_work);

 	/* blat the mode back in */
 	if (fbcon) {
@ -2971,6 +3092,8 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)

 	drm_kms_helper_poll_enable(dev);

+	amdgpu_ras_resume(adev);
+
 	/*
 	 * Most of the connector probing functions try to acquire runtime pm
 	 * refs to ensure that the GPU is powered on when connector polling is
@ -3335,8 +3458,6 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 		if (!ring || !ring->sched.thread)
 			continue;

-		drm_sched_stop(&ring->sched);
-
 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
 	}
@ -3344,8 +3465,7 @@ static int amdgpu_device_pre_asic_reset(struct amdgpu_device *adev,
 	if(job)
 		drm_sched_increase_karma(&job->base);

-
-
+	/* Don't suspend on bare metal if we are not going to HW reset the ASIC */
 	if (!amdgpu_sriov_vf(adev)) {

 		if (!need_full_reset)
@ -3452,6 +3572,19 @@ static int amdgpu_do_asic_reset(struct amdgpu_hive_info *hive,
 				if (vram_lost)
 					amdgpu_device_fill_reset_magic(tmp_adev);

+				/*
+				 * Add this ASIC as tracked as reset was already
+				 * complete successfully.
+				 */
+				amdgpu_register_gpu_instance(tmp_adev);
+
+				r = amdgpu_device_ip_late_init(tmp_adev);
+				if (r)
+					goto out;
+
+				/* must succeed. */
+				amdgpu_ras_resume(tmp_adev);
+
 				/* Update PSP FW topology after reset */
 				if (hive && tmp_adev->gmc.xgmi.num_physical_nodes > 1)
 					r = amdgpu_xgmi_update_topology(hive, tmp_adev);
@ -3483,38 +3616,21 @@ end:
 	return r;
 }

-static void amdgpu_device_post_asic_reset(struct amdgpu_device *adev,
-					  struct amdgpu_job *job)
+static bool amdgpu_device_lock_adev(struct amdgpu_device *adev, bool trylock)
 {
-	int i;
+	if (trylock) {
+		if (!mutex_trylock(&adev->lock_reset))
+			return false;
+	} else
+		mutex_lock(&adev->lock_reset);

-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-		struct amdgpu_ring *ring = adev->rings[i];
-
-		if (!ring || !ring->sched.thread)
-			continue;
-
-		if (!adev->asic_reset_res)
-			drm_sched_resubmit_jobs(&ring->sched);
-
-		drm_sched_start(&ring->sched, !adev->asic_reset_res);
-	}
-
-	if (!amdgpu_device_has_dc_support(adev)) {
-		drm_helper_resume_force_mode(adev->ddev);
-	}
-
-	adev->asic_reset_res = 0;
-}
-
-static void amdgpu_device_lock_adev(struct amdgpu_device *adev)
-{
-	mutex_lock(&adev->lock_reset);
 	atomic_inc(&adev->gpu_reset_counter);
 	adev->in_gpu_reset = 1;
 	/* Block kfd: SRIOV would do it separately */
 	if (!amdgpu_sriov_vf(adev))
                amdgpu_amdkfd_pre_reset(adev);
+
+	return true;
 }

 static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
@ -3542,40 +3658,44 @@ static void amdgpu_device_unlock_adev(struct amdgpu_device *adev)
 int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 			      struct amdgpu_job *job)
 {
-	int r;
-	struct amdgpu_hive_info *hive = NULL;
-	bool need_full_reset = false;
-	struct amdgpu_device *tmp_adev = NULL;
 	struct list_head device_list, *device_list_handle =  NULL;
+	bool need_full_reset, job_signaled;
+	struct amdgpu_hive_info *hive = NULL;
+	struct amdgpu_device *tmp_adev = NULL;
+	int i, r = 0;

+	need_full_reset = job_signaled = false;
 	INIT_LIST_HEAD(&device_list);

 	dev_info(adev->dev, "GPU reset begin!\n");

+	cancel_delayed_work_sync(&adev->delayed_init_work);
+
+	hive = amdgpu_get_xgmi_hive(adev, false);
+
 	/*
-	 * In case of XGMI hive disallow concurrent resets to be triggered
-	 * by different nodes. No point also since the one node already executing
-	 * reset will also reset all the other nodes in the hive.
+	 * Here we trylock to avoid chain of resets executing from
+	 * either trigger by jobs on different adevs in XGMI hive or jobs on
+	 * different schedulers for same device while this TO handler is running.
+	 * We always reset all schedulers for device and all devices for XGMI
+	 * hive so that should take care of them too.
 	 */
-	hive = amdgpu_get_xgmi_hive(adev, 0);
-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1 &&
-	    !mutex_trylock(&hive->reset_lock))
+
+	if (hive && !mutex_trylock(&hive->reset_lock)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, hive: %llx as another already in progress",
+			 job->base.id, hive->hive_id);
 		return 0;
+	}

 	/* Start with adev pre asic reset first for soft reset check.*/
-	amdgpu_device_lock_adev(adev);
-	r = amdgpu_device_pre_asic_reset(adev,
-					 job,
-					 &need_full_reset);
-	if (r) {
-		/*TODO Should we stop ?*/
-		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
-			  r, adev->ddev->unique);
-		adev->asic_reset_res = r;
+	if (!amdgpu_device_lock_adev(adev, !hive)) {
+		DRM_INFO("Bailing on TDR for s_job:%llx, as another already in progress",
+					 job->base.id);
+		return 0;
 	}

 	/* Build list of devices to reset */
-	if  (need_full_reset && adev->gmc.xgmi.num_physical_nodes > 1) {
+	if  (adev->gmc.xgmi.num_physical_nodes > 1) {
 		if (!hive) {
 			amdgpu_device_unlock_adev(adev);
 			return -ENODEV;
@ -3592,13 +3712,67 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		device_list_handle = &device_list;
 	}

+	/*
+	 * Mark these ASICs to be reseted as untracked first
+	 * And add them back after reset completed
+	 */
+	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head)
+		amdgpu_unregister_gpu_instance(tmp_adev);
+
+	/* block all schedulers and reset given job's ring */
+	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
+		/* disable ras on ALL IPs */
+		if (amdgpu_device_ip_need_full_reset(tmp_adev))
+			amdgpu_ras_suspend(tmp_adev);
+
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			drm_sched_stop(&ring->sched, &job->base);
+		}
+	}
+
+
+	/*
+	 * Must check guilty signal here since after this point all old
+	 * HW fences are force signaled.
+	 *
+	 * job->base holds a reference to parent fence
+	 */
+	if (job && job->base.s_fence->parent &&
+	    dma_fence_is_signaled(job->base.s_fence->parent))
+		job_signaled = true;
+
+	if (!amdgpu_device_ip_need_full_reset(adev))
+		device_list_handle = &device_list;
+
+	if (job_signaled) {
+		dev_info(adev->dev, "Guilty job already signaled, skipping HW reset");
+		goto skip_hw_reset;
+	}
+
+
+	/* Guilty job will be freed after this*/
+	r = amdgpu_device_pre_asic_reset(adev,
+					 job,
+					 &need_full_reset);
+	if (r) {
+		/*TODO Should we stop ?*/
+		DRM_ERROR("GPU pre asic reset failed with err, %d for drm dev, %s ",
+			  r, adev->ddev->unique);
+		adev->asic_reset_res = r;
+	}
+
 retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {

 		if (tmp_adev == adev)
 			continue;

-		amdgpu_device_lock_adev(tmp_adev);
+		amdgpu_device_lock_adev(tmp_adev, false);
 		r = amdgpu_device_pre_asic_reset(tmp_adev,
 						 NULL,
 						 &need_full_reset);
@ -3622,9 +3796,28 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 			goto retry;
 	}

+skip_hw_reset:
+
 	/* Post ASIC reset for all devs .*/
 	list_for_each_entry(tmp_adev, device_list_handle, gmc.xgmi.head) {
-		amdgpu_device_post_asic_reset(tmp_adev, tmp_adev == adev ? job : NULL);
+		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
+			struct amdgpu_ring *ring = tmp_adev->rings[i];
+
+			if (!ring || !ring->sched.thread)
+				continue;
+
+			/* No point to resubmit jobs if we didn't HW reset*/
+			if (!tmp_adev->asic_reset_res && !job_signaled)
+				drm_sched_resubmit_jobs(&ring->sched);
+
+			drm_sched_start(&ring->sched, !tmp_adev->asic_reset_res);
+		}
+
+		if (!amdgpu_device_has_dc_support(tmp_adev) && !job_signaled) {
+			drm_helper_resume_force_mode(tmp_adev->ddev);
+		}
+
+		tmp_adev->asic_reset_res = 0;

 		if (r) {
 			/* bad news, how to tell it to userspace ? */
@ -3637,7 +3830,7 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 		amdgpu_device_unlock_adev(tmp_adev);
 	}

-	if (hive && adev->gmc.xgmi.num_physical_nodes > 1)
+	if (hive)
 		mutex_unlock(&hive->reset_lock);

 	if (r)
@ -3645,43 +3838,6 @@ retry:	/* Rest of adevs pre asic reset from XGMI hive. */
 	return r;
 }

-static void amdgpu_device_get_min_pci_speed_width(struct amdgpu_device *adev,
-						  enum pci_bus_speed *speed,
-						  enum pcie_link_width *width)
-{
-	struct pci_dev *pdev = adev->pdev;
-	enum pci_bus_speed cur_speed;
-	enum pcie_link_width cur_width;
-	u32 ret = 1;
-
-	*speed = PCI_SPEED_UNKNOWN;
-	*width = PCIE_LNK_WIDTH_UNKNOWN;
-
-	while (pdev) {
-		cur_speed = pcie_get_speed_cap(pdev);
-		cur_width = pcie_get_width_cap(pdev);
-		ret = pcie_bandwidth_available(adev->pdev, NULL,
-						       NULL, &cur_width);
-		if (!ret)
-			cur_width = PCIE_LNK_WIDTH_RESRV;
-
-		if (cur_speed != PCI_SPEED_UNKNOWN) {
-			if (*speed == PCI_SPEED_UNKNOWN)
-				*speed = cur_speed;
-			else if (cur_speed < *speed)
-				*speed = cur_speed;
-		}
-
-		if (cur_width != PCIE_LNK_WIDTH_UNKNOWN) {
-			if (*width == PCIE_LNK_WIDTH_UNKNOWN)
-				*width = cur_width;
-			else if (cur_width < *width)
-				*width = cur_width;
-		}
-		pdev = pci_upstream_bridge(pdev);
-	}
-}
-
 /**
 * amdgpu_device_get_pcie_info - fence pcie info about the PCIE slot
 *
@ -3715,8 +3871,8 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 	if (adev->pm.pcie_gen_mask && adev->pm.pcie_mlw_mask)
 		return;

-	amdgpu_device_get_min_pci_speed_width(adev, &platform_speed_cap,
-					      &platform_link_width);
+	pcie_bandwidth_available(adev->pdev, NULL,
+				 &platform_speed_cap, &platform_link_width);

 	if (adev->pm.pcie_gen_mask == 0) {
 		/* asic caps */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.c
@ -0,0 +1,415 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "amdgpu.h"
+#include "amdgpu_discovery.h"
+#include "soc15_common.h"
+#include "soc15_hw_ip.h"
+#include "nbio/nbio_2_3_offset.h"
+#include "discovery.h"
+
+#define mmRCC_CONFIG_MEMSIZE	0xde3
+#define mmMM_INDEX		0x0
+#define mmMM_INDEX_HI		0x6
+#define mmMM_DATA		0x1
+#define HW_ID_MAX		300
+
+const char *hw_id_names[HW_ID_MAX] = {
+	[MP1_HWID]		= "MP1",
+	[MP2_HWID]		= "MP2",
+	[THM_HWID]		= "THM",
+	[SMUIO_HWID]		= "SMUIO",
+	[FUSE_HWID]		= "FUSE",
+	[CLKA_HWID]		= "CLKA",
+	[PWR_HWID]		= "PWR",
+	[GC_HWID]		= "GC",
+	[UVD_HWID]		= "UVD",
+	[AUDIO_AZ_HWID]		= "AUDIO_AZ",
+	[ACP_HWID]		= "ACP",
+	[DCI_HWID]		= "DCI",
+	[DMU_HWID]		= "DMU",
+	[DCO_HWID]		= "DCO",
+	[DIO_HWID]		= "DIO",
+	[XDMA_HWID]		= "XDMA",
+	[DCEAZ_HWID]		= "DCEAZ",
+	[DAZ_HWID]		= "DAZ",
+	[SDPMUX_HWID]		= "SDPMUX",
+	[NTB_HWID]		= "NTB",
+	[IOHC_HWID]		= "IOHC",
+	[L2IMU_HWID]		= "L2IMU",
+	[VCE_HWID]		= "VCE",
+	[MMHUB_HWID]		= "MMHUB",
+	[ATHUB_HWID]		= "ATHUB",
+	[DBGU_NBIO_HWID]	= "DBGU_NBIO",
+	[DFX_HWID]		= "DFX",
+	[DBGU0_HWID]		= "DBGU0",
+	[DBGU1_HWID]		= "DBGU1",
+	[OSSSYS_HWID]		= "OSSSYS",
+	[HDP_HWID]		= "HDP",
+	[SDMA0_HWID]		= "SDMA0",
+	[SDMA1_HWID]		= "SDMA1",
+	[ISP_HWID]		= "ISP",
+	[DBGU_IO_HWID]		= "DBGU_IO",
+	[DF_HWID]		= "DF",
+	[CLKB_HWID]		= "CLKB",
+	[FCH_HWID]		= "FCH",
+	[DFX_DAP_HWID]		= "DFX_DAP",
+	[L1IMU_PCIE_HWID]	= "L1IMU_PCIE",
+	[L1IMU_NBIF_HWID]	= "L1IMU_NBIF",
+	[L1IMU_IOAGR_HWID]	= "L1IMU_IOAGR",
+	[L1IMU3_HWID]		= "L1IMU3",
+	[L1IMU4_HWID]		= "L1IMU4",
+	[L1IMU5_HWID]		= "L1IMU5",
+	[L1IMU6_HWID]		= "L1IMU6",
+	[L1IMU7_HWID]		= "L1IMU7",
+	[L1IMU8_HWID]		= "L1IMU8",
+	[L1IMU9_HWID]		= "L1IMU9",
+	[L1IMU10_HWID]		= "L1IMU10",
+	[L1IMU11_HWID]		= "L1IMU11",
+	[L1IMU12_HWID]		= "L1IMU12",
+	[L1IMU13_HWID]		= "L1IMU13",
+	[L1IMU14_HWID]		= "L1IMU14",
+	[L1IMU15_HWID]		= "L1IMU15",
+	[WAFLC_HWID]		= "WAFLC",
+	[FCH_USB_PD_HWID]	= "FCH_USB_PD",
+	[PCIE_HWID]		= "PCIE",
+	[PCS_HWID]		= "PCS",
+	[DDCL_HWID]		= "DDCL",
+	[SST_HWID]		= "SST",
+	[IOAGR_HWID]		= "IOAGR",
+	[NBIF_HWID]		= "NBIF",
+	[IOAPIC_HWID]		= "IOAPIC",
+	[SYSTEMHUB_HWID]	= "SYSTEMHUB",
+	[NTBCCP_HWID]		= "NTBCCP",
+	[UMC_HWID]		= "UMC",
+	[SATA_HWID]		= "SATA",
+	[USB_HWID]		= "USB",
+	[CCXSEC_HWID]		= "CCXSEC",
+	[XGMI_HWID]		= "XGMI",
+	[XGBE_HWID]		= "XGBE",
+	[MP0_HWID]		= "MP0",
+};
+
+static int hw_id_map[MAX_HWIP] = {
+	[GC_HWIP]	= GC_HWID,
+	[HDP_HWIP]	= HDP_HWID,
+	[SDMA0_HWIP]	= SDMA0_HWID,
+	[SDMA1_HWIP]	= SDMA1_HWID,
+	[MMHUB_HWIP]	= MMHUB_HWID,
+	[ATHUB_HWIP]	= ATHUB_HWID,
+	[NBIO_HWIP]	= NBIF_HWID,
+	[MP0_HWIP]	= MP0_HWID,
+	[MP1_HWIP]	= MP1_HWID,
+	[UVD_HWIP]	= UVD_HWID,
+	[VCE_HWIP]	= VCE_HWID,
+	[DF_HWIP]	= DF_HWID,
+	[DCE_HWIP]	= DCEAZ_HWID,
+	[OSSSYS_HWIP]	= OSSSYS_HWID,
+	[SMUIO_HWIP]	= SMUIO_HWID,
+	[PWR_HWIP]	= PWR_HWID,
+	[NBIF_HWIP]	= NBIF_HWID,
+	[THM_HWIP]	= THM_HWID,
+	[CLK_HWIP]	= CLKA_HWID,
+};
+
+static int amdgpu_discovery_read_binary(struct amdgpu_device *adev, uint8_t *binary)
+{
+	uint32_t *p = (uint32_t *)binary;
+	uint64_t vram_size = (uint64_t)RREG32(mmRCC_CONFIG_MEMSIZE) << 20;
+	uint64_t pos = vram_size - BINARY_MAX_SIZE;
+	unsigned long flags;
+
+	while (pos < vram_size) {
+		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
+		WREG32_NO_KIQ(mmMM_INDEX, ((uint32_t)pos) | 0x80000000);
+		WREG32_NO_KIQ(mmMM_INDEX_HI, pos >> 31);
+		*p++ = RREG32_NO_KIQ(mmMM_DATA);
+		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
+		pos += 4;
+	}
+
+	return 0;
+}
+
+static uint16_t amdgpu_discovery_calculate_checksum(uint8_t *data, uint32_t size)
+{
+	uint16_t checksum = 0;
+	int i;
+
+	for (i = 0; i < size; i++)
+		checksum += data[i];
+
+	return checksum;
+}
+
+static inline bool amdgpu_discovery_verify_checksum(uint8_t *data, uint32_t size,
+						    uint16_t expected)
+{
+	return !!(amdgpu_discovery_calculate_checksum(data, size) == expected);
+}
+
+int amdgpu_discovery_init(struct amdgpu_device *adev)
+{
+	struct table_info *info;
+	struct binary_header *bhdr;
+	struct ip_discovery_header *ihdr;
+	struct gpu_info_header *ghdr;
+	uint16_t offset;
+	uint16_t size;
+	uint16_t checksum;
+	int r;
+
+	adev->discovery = kzalloc(BINARY_MAX_SIZE, GFP_KERNEL);
+	if (!adev->discovery)
+		return -ENOMEM;
+
+	r = amdgpu_discovery_read_binary(adev, adev->discovery);
+	if (r) {
+		DRM_ERROR("failed to read ip discovery binary\n");
+		goto out;
+	}
+
+	bhdr = (struct binary_header *)adev->discovery;
+
+	if (le32_to_cpu(bhdr->binary_signature) != BINARY_SIGNATURE) {
+		DRM_ERROR("invalid ip discovery binary signature\n");
+		r = -EINVAL;
+		goto out;
+	}
+
+	offset = offsetof(struct binary_header, binary_checksum) +
+		sizeof(bhdr->binary_checksum);
+	size = bhdr->binary_size - offset;
+	checksum = bhdr->binary_checksum;
+
+	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+					      size, checksum)) {
+		DRM_ERROR("invalid ip discovery binary checksum\n");
+		r = -EINVAL;
+		goto out;
+	}
+
+	info = &bhdr->table_list[IP_DISCOVERY];
+	offset = le16_to_cpu(info->offset);
+	checksum = le16_to_cpu(info->checksum);
+	ihdr = (struct ip_discovery_header *)(adev->discovery + offset);
+
+	if (le32_to_cpu(ihdr->signature) != DISCOVERY_TABLE_SIGNATURE) {
+		DRM_ERROR("invalid ip discovery data table signature\n");
+		r = -EINVAL;
+		goto out;
+	}
+
+	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+					      ihdr->size, checksum)) {
+		DRM_ERROR("invalid ip discovery data table checksum\n");
+		r = -EINVAL;
+		goto out;
+	}
+
+	info = &bhdr->table_list[GC];
+	offset = le16_to_cpu(info->offset);
+	checksum = le16_to_cpu(info->checksum);
+	ghdr = (struct gpu_info_header *)(adev->discovery + offset);
+
+	if (!amdgpu_discovery_verify_checksum(adev->discovery + offset,
+				              ghdr->size, checksum)) {
+		DRM_ERROR("invalid gc data table checksum\n");
+		r = -EINVAL;
+		goto out;
+	}
+
+	return 0;
+
+out:
+	kfree(adev->discovery);
+	adev->discovery = NULL;
+
+	return r;
+}
+
+void amdgpu_discovery_fini(struct amdgpu_device *adev)
+{
+	kfree(adev->discovery);
+	adev->discovery = NULL;
+}
+
+int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev)
+{
+	struct binary_header *bhdr;
+	struct ip_discovery_header *ihdr;
+	struct die_header *dhdr;
+	struct ip *ip;
+	uint16_t die_offset;
+	uint16_t ip_offset;
+	uint16_t num_dies;
+	uint16_t num_ips;
+	uint8_t num_base_address;
+	int hw_ip;
+	int i, j, k;
+
+	if (!adev->discovery) {
+		DRM_ERROR("ip discovery uninitialized\n");
+		return -EINVAL;
+	}
+
+	bhdr = (struct binary_header *)adev->discovery;
+	ihdr = (struct ip_discovery_header *)(adev->discovery +
+			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+	num_dies = le16_to_cpu(ihdr->num_dies);
+
+	DRM_DEBUG("number of dies: %d\n", num_dies);
+
+	for (i = 0; i < num_dies; i++) {
+		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+		dhdr = (struct die_header *)(adev->discovery + die_offset);
+		num_ips = le16_to_cpu(dhdr->num_ips);
+		ip_offset = die_offset + sizeof(*dhdr);
+
+		if (le16_to_cpu(dhdr->die_id) != i) {
+			DRM_ERROR("invalid die id %d, expected %d\n",
+					le16_to_cpu(dhdr->die_id), i);
+			return -EINVAL;
+		}
+
+		DRM_DEBUG("number of hardware IPs on die%d: %d\n",
+				le16_to_cpu(dhdr->die_id), num_ips);
+
+		for (j = 0; j < num_ips; j++) {
+			ip = (struct ip *)(adev->discovery + ip_offset);
+			num_base_address = ip->num_base_address;
+
+			DRM_DEBUG("%s(%d) #%d v%d.%d.%d:\n",
+				  hw_id_names[le16_to_cpu(ip->hw_id)],
+				  le16_to_cpu(ip->hw_id),
+				  ip->number_instance,
+				  ip->major, ip->minor,
+				  ip->revision);
+
+			for (k = 0; k < num_base_address; k++) {
+				/*
+				 * convert the endianness of base addresses in place,
+				 * so that we don't need to convert them when accessing adev->reg_offset.
+				 */
+				ip->base_address[k] = le32_to_cpu(ip->base_address[k]);
+				DRM_DEBUG("\t0x%08x\n", ip->base_address[k]);
+			}
+
+			for (hw_ip = 0; hw_ip < MAX_HWIP; hw_ip++) {
+				if (hw_id_map[hw_ip] == le16_to_cpu(ip->hw_id)) {
+					DRM_INFO("set register base offset for %s\n",
+							hw_id_names[le16_to_cpu(ip->hw_id)]);
+					adev->reg_offset[hw_ip][ip->number_instance] =
+						ip->base_address;
+				}
+
+			}
+
+			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+		}
+	}
+
+	return 0;
+}
+
+int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
+				    int *major, int *minor)
+{
+	struct binary_header *bhdr;
+	struct ip_discovery_header *ihdr;
+	struct die_header *dhdr;
+	struct ip *ip;
+	uint16_t die_offset;
+	uint16_t ip_offset;
+	uint16_t num_dies;
+	uint16_t num_ips;
+	int i, j;
+
+	if (!adev->discovery) {
+		DRM_ERROR("ip discovery uninitialized\n");
+		return -EINVAL;
+	}
+
+	bhdr = (struct binary_header *)adev->discovery;
+	ihdr = (struct ip_discovery_header *)(adev->discovery +
+			le16_to_cpu(bhdr->table_list[IP_DISCOVERY].offset));
+	num_dies = le16_to_cpu(ihdr->num_dies);
+
+	for (i = 0; i < num_dies; i++) {
+		die_offset = le16_to_cpu(ihdr->die_info[i].die_offset);
+		dhdr = (struct die_header *)(adev->discovery + die_offset);
+		num_ips = le16_to_cpu(dhdr->num_ips);
+		ip_offset = die_offset + sizeof(*dhdr);
+
+		for (j = 0; j < num_ips; j++) {
+			ip = (struct ip *)(adev->discovery + ip_offset);
+
+			if (le16_to_cpu(ip->hw_id) == hw_id) {
+				if (major)
+					*major = ip->major;
+				if (minor)
+					*minor = ip->minor;
+				return 0;
+			}
+			ip_offset += sizeof(*ip) + 4 * (ip->num_base_address - 1);
+		}
+	}
+
+	return -EINVAL;
+}
+
+int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev)
+{
+	struct binary_header *bhdr;
+	struct gc_info_v1_0 *gc_info;
+
+	if (!adev->discovery) {
+		DRM_ERROR("ip discovery uninitialized\n");
+		return -EINVAL;
+	}
+
+	bhdr = (struct binary_header *)adev->discovery;
+	gc_info = (struct gc_info_v1_0 *)(adev->discovery +
+			le16_to_cpu(bhdr->table_list[GC].offset));
+
+	adev->gfx.config.max_shader_engines = le32_to_cpu(gc_info->gc_num_se);
+	adev->gfx.config.max_cu_per_sh = 2 * (le32_to_cpu(gc_info->gc_num_wgp0_per_sa) +
+					      le32_to_cpu(gc_info->gc_num_wgp1_per_sa));
+	adev->gfx.config.max_sh_per_se = le32_to_cpu(gc_info->gc_num_sa_per_se);
+	adev->gfx.config.max_backends_per_se = le32_to_cpu(gc_info->gc_num_rb_per_se);
+	adev->gfx.config.max_texture_channel_caches = le32_to_cpu(gc_info->gc_num_gl2c);
+	adev->gfx.config.max_gprs = le32_to_cpu(gc_info->gc_num_gprs);
+	adev->gfx.config.max_gs_threads = le32_to_cpu(gc_info->gc_num_max_gs_thds);
+	adev->gfx.config.gs_vgt_table_depth = le32_to_cpu(gc_info->gc_gs_table_depth);
+	adev->gfx.config.gs_prim_buffer_depth = le32_to_cpu(gc_info->gc_gsprim_buff_depth);
+	adev->gfx.config.double_offchip_lds_buf = le32_to_cpu(gc_info->gc_double_offchip_lds_buffer);
+	adev->gfx.cu_info.wave_front_size = le32_to_cpu(gc_info->gc_wave_size);
+	adev->gfx.cu_info.max_waves_per_simd = le32_to_cpu(gc_info->gc_max_waves_per_simd);
+	adev->gfx.cu_info.max_scratch_slots_per_cu = le32_to_cpu(gc_info->gc_max_scratch_slots_per_cu);
+	adev->gfx.cu_info.lds_size = le32_to_cpu(gc_info->gc_lds_size);
+	adev->gfx.config.num_sc_per_sh = le32_to_cpu(gc_info->gc_num_sc_per_se) /
+					 le32_to_cpu(gc_info->gc_num_sa_per_se);
+	adev->gfx.config.num_packer_per_sc = le32_to_cpu(gc_info->gc_num_packer_per_sc);
+
+	return 0;
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_discovery.h
@ -0,0 +1,34 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#ifndef __AMDGPU_DISCOVERY__
+#define __AMDGPU_DISCOVERY__
+
+int amdgpu_discovery_init(struct amdgpu_device *adev);
+void amdgpu_discovery_fini(struct amdgpu_device *adev);
+int amdgpu_discovery_reg_base_init(struct amdgpu_device *adev);
+int amdgpu_discovery_get_ip_version(struct amdgpu_device *adev, int hw_id,
+                                    int *major, int *minor);
+int amdgpu_discovery_get_gfx_info(struct amdgpu_device *adev);
+
+#endif /* __AMDGPU_DISCOVERY__ */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@ -23,7 +23,7 @@
 * Authors: Dave Airlie
 *          Alex Deucher
 */
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_i2c.h"
@ -32,11 +32,13 @@
 #include "amdgpu_display.h"
 #include <asm/div64.h>

+#include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <drm/drm_crtc_helper.h>
 #include <drm/drm_edid.h>
 #include <drm/drm_gem_framebuffer_helper.h>
 #include <drm/drm_fb_helper.h>
+#include <drm/drm_vblank.h>

 static void amdgpu_display_flip_callback(struct dma_fence *f,
 					 struct dma_fence_cb *cb)
@ -631,10 +633,6 @@ int amdgpu_display_modeset_create_props(struct amdgpu_device *adev)
 					 amdgpu_dither_enum_list, sz);

 	if (amdgpu_device_has_dc_support(adev)) {
-		adev->mode_info.max_bpc_property =
-			drm_property_create_range(adev->ddev, 0, "max bpc", 8, 16);
-		if (!adev->mode_info.max_bpc_property)
-			return -ENOMEM;
 		adev->mode_info.abm_level_property =
 			drm_property_create_range(adev->ddev, 0,
 						"abm level", 0, 4);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.c
@ -1,5 +1,5 @@
 /*
- * Copyright 2012 Advanced Micro Devices, Inc.
+ * Copyright 2019 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
@ -31,8 +31,6 @@
 * objects between different devices via PRIME <prime_buffer_sharing>`.
 */

-#include <drm/drmP.h>
-
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_gem.h"
@ -103,7 +101,8 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 * Returns:
 * 0 on success or a negative error code on failure.
 */
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
+			  struct vm_area_struct *vma)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@ -137,6 +136,235 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
 	return ret;
 }

+static int
+__reservation_object_make_exclusive(struct reservation_object *obj)
+{
+	struct dma_fence **fences;
+	unsigned int count;
+	int r;
+
+	if (!reservation_object_get_list(obj)) /* no shared fences to convert */
+		return 0;
+
+	r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
+	if (r)
+		return r;
+
+	if (count == 0) {
+		/* Now that was unexpected. */
+	} else if (count == 1) {
+		reservation_object_add_excl_fence(obj, fences[0]);
+		dma_fence_put(fences[0]);
+		kfree(fences);
+	} else {
+		struct dma_fence_array *array;
+
+		array = dma_fence_array_create(count, fences,
+					       dma_fence_context_alloc(1), 0,
+					       false);
+		if (!array)
+			goto err_fences_put;
+
+		reservation_object_add_excl_fence(obj, &array->base);
+		dma_fence_put(&array->base);
+	}
+
+	return 0;
+
+err_fences_put:
+	while (count--)
+		dma_fence_put(fences[count]);
+	kfree(fences);
+	return -ENOMEM;
+}
+
+/**
+ * amdgpu_dma_buf_map_attach - &dma_buf_ops.attach implementation
+ * @dma_buf: Shared DMA buffer
+ * @attach: DMA-buf attachment
+ *
+ * Makes sure that the shared DMA buffer can be accessed by the target device.
+ * For now, simply pins it to the GTT domain, where it should be accessible by
+ * all DMA devices.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static int amdgpu_dma_buf_map_attach(struct dma_buf *dma_buf,
+				     struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	long r;
+
+	r = drm_gem_map_attach(dma_buf, attach);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_reserve(bo, false);
+	if (unlikely(r != 0))
+		goto error_detach;
+
+
+	if (attach->dev->driver != adev->dev->driver) {
+		/*
+		 * We only create shared fences for internal use, but importers
+		 * of the dmabuf rely on exclusive fences for implicitly
+		 * tracking write hazards. As any of the current fences may
+		 * correspond to a write, we need to convert all existing
+		 * fences on the reservation object into a single exclusive
+		 * fence.
+		 */
+		r = __reservation_object_make_exclusive(bo->tbo.resv);
+		if (r)
+			goto error_unreserve;
+	}
+
+	/* pin buffer into GTT */
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
+	if (r)
+		goto error_unreserve;
+
+	if (attach->dev->driver != adev->dev->driver)
+		bo->prime_shared_count++;
+
+error_unreserve:
+	amdgpu_bo_unreserve(bo);
+
+error_detach:
+	if (r)
+		drm_gem_map_detach(dma_buf, attach);
+	return r;
+}
+
+/**
+ * amdgpu_dma_buf_map_detach - &dma_buf_ops.detach implementation
+ * @dma_buf: Shared DMA buffer
+ * @attach: DMA-buf attachment
+ *
+ * This is called when a shared DMA buffer no longer needs to be accessible by
+ * another device. For now, simply unpins the buffer from GTT.
+ */
+static void amdgpu_dma_buf_map_detach(struct dma_buf *dma_buf,
+				      struct dma_buf_attachment *attach)
+{
+	struct drm_gem_object *obj = dma_buf->priv;
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	int ret = 0;
+
+	ret = amdgpu_bo_reserve(bo, true);
+	if (unlikely(ret != 0))
+		goto error;
+
+	amdgpu_bo_unpin(bo);
+	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
+		bo->prime_shared_count--;
+	amdgpu_bo_unreserve(bo);
+
+error:
+	drm_gem_map_detach(dma_buf, attach);
+}
+
+/**
+ * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
+ * @obj: GEM BO
+ *
+ * Returns:
+ * The BO's reservation object.
+ */
+struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
+
+	return bo->tbo.resv;
+}
+
+/**
+ * amdgpu_dma_buf_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * @dma_buf: Shared DMA buffer
+ * @direction: Direction of DMA transfer
+ *
+ * This is called before CPU access to the shared DMA buffer's memory. If it's
+ * a read access, the buffer is moved to the GTT domain if possible, for optimal
+ * CPU read performance.
+ *
+ * Returns:
+ * 0 on success or a negative error code on failure.
+ */
+static int amdgpu_dma_buf_begin_cpu_access(struct dma_buf *dma_buf,
+					   enum dma_data_direction direction)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+	struct ttm_operation_ctx ctx = { true, false };
+	u32 domain = amdgpu_display_supported_domains(adev);
+	int ret;
+	bool reads = (direction == DMA_BIDIRECTIONAL ||
+		      direction == DMA_FROM_DEVICE);
+
+	if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
+		return 0;
+
+	/* move to gtt */
+	ret = amdgpu_bo_reserve(bo, false);
+	if (unlikely(ret != 0))
+		return ret;
+
+	if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
+	}
+
+	amdgpu_bo_unreserve(bo);
+	return ret;
+}
+
+const struct dma_buf_ops amdgpu_dmabuf_ops = {
+	.attach = amdgpu_dma_buf_map_attach,
+	.detach = amdgpu_dma_buf_map_detach,
+	.map_dma_buf = drm_gem_map_dma_buf,
+	.unmap_dma_buf = drm_gem_unmap_dma_buf,
+	.release = drm_gem_dmabuf_release,
+	.begin_cpu_access = amdgpu_dma_buf_begin_cpu_access,
+	.mmap = drm_gem_dmabuf_mmap,
+	.vmap = drm_gem_dmabuf_vmap,
+	.vunmap = drm_gem_dmabuf_vunmap,
+};
+
+/**
+ * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
+ * @dev: DRM device
+ * @gobj: GEM BO
+ * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
+ *
+ * The main work is done by the &drm_gem_prime_export helper, which in turn
+ * uses &amdgpu_gem_prime_res_obj.
+ *
+ * Returns:
+ * Shared DMA buffer representing the GEM BO from the given device.
+ */
+struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags)
+{
+	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
+	struct dma_buf *buf;
+
+	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
+	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
+		return ERR_PTR(-EPERM);
+
+	buf = drm_gem_prime_export(dev, gobj, flags);
+	if (!IS_ERR(buf)) {
+		buf->file->f_mapping = dev->anon_inode->i_mapping;
+		buf->ops = &amdgpu_dmabuf_ops;
+	}
+
+	return buf;
+}
+
 /**
 * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
 * implementation
@ -188,235 +416,6 @@ error:
 	return ERR_PTR(ret);
 }

-static int
-__reservation_object_make_exclusive(struct reservation_object *obj)
-{
-	struct dma_fence **fences;
-	unsigned int count;
-	int r;
-
-	if (!reservation_object_get_list(obj)) /* no shared fences to convert */
-		return 0;
-
-	r = reservation_object_get_fences_rcu(obj, NULL, &count, &fences);
-	if (r)
-		return r;
-
-	if (count == 0) {
-		/* Now that was unexpected. */
-	} else if (count == 1) {
-		reservation_object_add_excl_fence(obj, fences[0]);
-		dma_fence_put(fences[0]);
-		kfree(fences);
-	} else {
-		struct dma_fence_array *array;
-
-		array = dma_fence_array_create(count, fences,
-					       dma_fence_context_alloc(1), 0,
-					       false);
-		if (!array)
-			goto err_fences_put;
-
-		reservation_object_add_excl_fence(obj, &array->base);
-		dma_fence_put(&array->base);
-	}
-
-	return 0;
-
-err_fences_put:
-	while (count--)
-		dma_fence_put(fences[count]);
-	kfree(fences);
-	return -ENOMEM;
-}
-
-/**
- * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
- * @dma_buf: Shared DMA buffer
- * @attach: DMA-buf attachment
- *
- * Makes sure that the shared DMA buffer can be accessed by the target device.
- * For now, simply pins it to the GTT domain, where it should be accessible by
- * all DMA devices.
- *
- * Returns:
- * 0 on success or a negative error code on failure.
- */
-static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
-				 struct dma_buf_attachment *attach)
-{
-	struct drm_gem_object *obj = dma_buf->priv;
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	long r;
-
-	r = drm_gem_map_attach(dma_buf, attach);
-	if (r)
-		return r;
-
-	r = amdgpu_bo_reserve(bo, false);
-	if (unlikely(r != 0))
-		goto error_detach;
-
-
-	if (attach->dev->driver != adev->dev->driver) {
-		/*
-		 * We only create shared fences for internal use, but importers
-		 * of the dmabuf rely on exclusive fences for implicitly
-		 * tracking write hazards. As any of the current fences may
-		 * correspond to a write, we need to convert all existing
-		 * fences on the reservation object into a single exclusive
-		 * fence.
-		 */
-		r = __reservation_object_make_exclusive(bo->tbo.resv);
-		if (r)
-			goto error_unreserve;
-	}
-
-	/* pin buffer into GTT */
-	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
-	if (r)
-		goto error_unreserve;
-
-	if (attach->dev->driver != adev->dev->driver)
-		bo->prime_shared_count++;
-
-error_unreserve:
-	amdgpu_bo_unreserve(bo);
-
-error_detach:
-	if (r)
-		drm_gem_map_detach(dma_buf, attach);
-	return r;
-}
-
-/**
- * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
- * @dma_buf: Shared DMA buffer
- * @attach: DMA-buf attachment
- *
- * This is called when a shared DMA buffer no longer needs to be accessible by
- * another device. For now, simply unpins the buffer from GTT.
- */
-static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
-				  struct dma_buf_attachment *attach)
-{
-	struct drm_gem_object *obj = dma_buf->priv;
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	int ret = 0;
-
-	ret = amdgpu_bo_reserve(bo, true);
-	if (unlikely(ret != 0))
-		goto error;
-
-	amdgpu_bo_unpin(bo);
-	if (attach->dev->driver != adev->dev->driver && bo->prime_shared_count)
-		bo->prime_shared_count--;
-	amdgpu_bo_unreserve(bo);
-
-error:
-	drm_gem_map_detach(dma_buf, attach);
-}
-
-/**
- * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
- * @obj: GEM BO
- *
- * Returns:
- * The BO's reservation object.
- */
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
-{
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
-
-	return bo->tbo.resv;
-}
-
-/**
- * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
- * @dma_buf: Shared DMA buffer
- * @direction: Direction of DMA transfer
- *
- * This is called before CPU access to the shared DMA buffer's memory. If it's
- * a read access, the buffer is moved to the GTT domain if possible, for optimal
- * CPU read performance.
- *
- * Returns:
- * 0 on success or a negative error code on failure.
- */
-static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
-				       enum dma_data_direction direction)
-{
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(dma_buf->priv);
-	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	struct ttm_operation_ctx ctx = { true, false };
-	u32 domain = amdgpu_display_supported_domains(adev);
-	int ret;
-	bool reads = (direction == DMA_BIDIRECTIONAL ||
-		      direction == DMA_FROM_DEVICE);
-
-	if (!reads || !(domain & AMDGPU_GEM_DOMAIN_GTT))
-		return 0;
-
-	/* move to gtt */
-	ret = amdgpu_bo_reserve(bo, false);
-	if (unlikely(ret != 0))
-		return ret;
-
-	if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
-		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
-		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
-	}
-
-	amdgpu_bo_unreserve(bo);
-	return ret;
-}
-
-const struct dma_buf_ops amdgpu_dmabuf_ops = {
-	.attach = amdgpu_gem_map_attach,
-	.detach = amdgpu_gem_map_detach,
-	.map_dma_buf = drm_gem_map_dma_buf,
-	.unmap_dma_buf = drm_gem_unmap_dma_buf,
-	.release = drm_gem_dmabuf_release,
-	.begin_cpu_access = amdgpu_gem_begin_cpu_access,
-	.mmap = drm_gem_dmabuf_mmap,
-	.vmap = drm_gem_dmabuf_vmap,
-	.vunmap = drm_gem_dmabuf_vunmap,
-};
-
-/**
- * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
- * @dev: DRM device
- * @gobj: GEM BO
- * @flags: Flags such as DRM_CLOEXEC and DRM_RDWR.
- *
- * The main work is done by the &drm_gem_prime_export helper, which in turn
- * uses &amdgpu_gem_prime_res_obj.
- *
- * Returns:
- * Shared DMA buffer representing the GEM BO from the given device.
- */
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
-					struct drm_gem_object *gobj,
-					int flags)
-{
-	struct amdgpu_bo *bo = gem_to_amdgpu_bo(gobj);
-	struct dma_buf *buf;
-
-	if (amdgpu_ttm_tt_get_usermm(bo->tbo.ttm) ||
-	    bo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID)
-		return ERR_PTR(-EPERM);
-
-	buf = drm_gem_prime_export(dev, gobj, flags);
-	if (!IS_ERR(buf)) {
-		buf->file->f_mapping = dev->anon_inode->i_mapping;
-		buf->ops = &amdgpu_dmabuf_ops;
-	}
-
-	return buf;
-}
-
 /**
 * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
 * @dev: DRM device
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dma_buf.h
@ -0,0 +1,46 @@
+/*
+ * Copyright 2019 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_DMA_BUF_H__
+#define __AMDGPU_DMA_BUF_H__
+
+#include <drm/drm_gem.h>
+
+struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
+struct drm_gem_object *
+amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
+				 struct dma_buf_attachment *attach,
+				 struct sg_table *sg);
+struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
+					struct drm_gem_object *gobj,
+					int flags);
+struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
+					    struct dma_buf *dma_buf);
+struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
+void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
+void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
+int amdgpu_gem_prime_mmap(struct drm_gem_object *obj,
+			  struct vm_area_struct *vma);
+
+extern const struct dma_buf_ops amdgpu_dmabuf_ops;
+
+#endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_doorbell.h
@ -51,6 +51,7 @@ struct amdgpu_doorbell_index {
 	uint32_t userqueue_start;
 	uint32_t userqueue_end;
 	uint32_t gfx_ring0;
+	uint32_t gfx_ring1;
 	uint32_t sdma_engine[8];
 	uint32_t ih;
 	union {
@ -153,6 +154,45 @@ typedef enum _AMDGPU_VEGA20_DOORBELL_ASSIGNMENT
 	AMDGPU_VEGA20_DOORBELL_INVALID                   = 0xFFFF
 } AMDGPU_VEGA20_DOORBELL_ASSIGNMENT;

+typedef enum _AMDGPU_NAVI10_DOORBELL_ASSIGNMENT
+{
+	/* Compute + GFX: 0~255 */
+	AMDGPU_NAVI10_DOORBELL_KIQ			= 0x000,
+	AMDGPU_NAVI10_DOORBELL_HIQ			= 0x001,
+	AMDGPU_NAVI10_DOORBELL_DIQ			= 0x002,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING0		= 0x003,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING1		= 0x004,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING2		= 0x005,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING3		= 0x006,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING4		= 0x007,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING5		= 0x008,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING6		= 0x009,
+	AMDGPU_NAVI10_DOORBELL_MEC_RING7		= 0x00A,
+	AMDGPU_NAVI10_DOORBELL_USERQUEUE_START		= 0x00B,
+	AMDGPU_NAVI10_DOORBELL_USERQUEUE_END		= 0x08A,
+	AMDGPU_NAVI10_DOORBELL_GFX_RING0		= 0x08B,
+	AMDGPU_NAVI10_DOORBELL_GFX_RING1		= 0x08C,
+	/* SDMA:256~335*/
+	AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0		= 0x100,
+	AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE1		= 0x10A,
+	/* IH: 376~391 */
+	AMDGPU_NAVI10_DOORBELL_IH			= 0x178,
+	/* MMSCH: 392~407
+	 * overlap the doorbell assignment with VCN as they are  mutually exclusive
+	 * VCE engine's doorbell is 32 bit and two VCE ring share one QWORD
+	 */
+	AMDGPU_NAVI10_DOORBELL64_VCN0_1			= 0x188, /* lower 32 bits for VNC0 and upper 32 bits for VNC1 */
+	AMDGPU_NAVI10_DOORBELL64_VCN2_3			= 0x189,
+	AMDGPU_NAVI10_DOORBELL64_VCN4_5			= 0x18A,
+	AMDGPU_NAVI10_DOORBELL64_VCN6_7			= 0x18B,
+
+	AMDGPU_NAVI10_DOORBELL64_FIRST_NON_CP		= AMDGPU_NAVI10_DOORBELL_sDMA_ENGINE0,
+	AMDGPU_NAVI10_DOORBELL64_LAST_NON_CP		= AMDGPU_NAVI10_DOORBELL64_VCN6_7,
+
+	AMDGPU_NAVI10_DOORBELL_MAX_ASSIGNMENT		= 0x18F,
+	AMDGPU_NAVI10_DOORBELL_INVALID			= 0xFFFF
+} AMDGPU_NAVI10_DOORBELL_ASSIGNMENT;
+
 /*
 * 64bit doorbell, offset are in QWORD, occupy 2KB doorbell space
 */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@ -22,7 +22,6 @@
 * Authors: Alex Deucher
 */

-#include <drm/drmP.h>
 #include "amdgpu.h"
 #include "amdgpu_atombios.h"
 #include "amdgpu_i2c.h"
@ -907,16 +906,63 @@ amdgpu_get_vce_clock_state(void *handle, u32 idx)

 int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low)
 {
-	if (is_support_sw_smu(adev))
-		return smu_get_sclk(&adev->smu, low);
-	else
+	uint32_t clk_freq;
+	int ret = 0;
+	if (is_support_sw_smu(adev)) {
+		ret = smu_get_dpm_freq_range(&adev->smu, SMU_GFXCLK,
+					     low ? &clk_freq : NULL,
+					     !low ? &clk_freq : NULL);
+		if (ret)
+			return 0;
+		return clk_freq * 100;
+
+	} else {
 		return (adev)->powerplay.pp_funcs->get_sclk((adev)->powerplay.pp_handle, (low));
+	}
 }

 int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low)
 {
-	if (is_support_sw_smu(adev))
-		return smu_get_mclk(&adev->smu, low);
-	else
+	uint32_t clk_freq;
+	int ret = 0;
+	if (is_support_sw_smu(adev)) {
+		ret = smu_get_dpm_freq_range(&adev->smu, SMU_UCLK,
+					     low ? &clk_freq : NULL,
+					     !low ? &clk_freq : NULL);
+		if (ret)
+			return 0;
+		return clk_freq * 100;
+
+	} else {
 		return (adev)->powerplay.pp_funcs->get_mclk((adev)->powerplay.pp_handle, (low));
+	}
+}
+
+int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev, uint32_t block_type, bool gate)
+{
+	int ret = 0;
+	bool swsmu = is_support_sw_smu(adev);
+
+	switch (block_type) {
+	case AMD_IP_BLOCK_TYPE_GFX:
+	case AMD_IP_BLOCK_TYPE_UVD:
+	case AMD_IP_BLOCK_TYPE_VCN:
+	case AMD_IP_BLOCK_TYPE_VCE:
+		if (swsmu)
+			ret = smu_dpm_set_power_gate(&adev->smu, block_type, gate);
+		else
+			ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+				(adev)->powerplay.pp_handle, block_type, gate));
+		break;
+	case AMD_IP_BLOCK_TYPE_GMC:
+	case AMD_IP_BLOCK_TYPE_ACP:
+	case AMD_IP_BLOCK_TYPE_SDMA:
+		ret = ((adev)->powerplay.pp_funcs->set_powergating_by_smu(
+				(adev)->powerplay.pp_handle, block_type, gate));
+		break;
+	default:
+		break;
+	}
+
+	return ret;
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@ -75,6 +75,20 @@ struct amdgpu_dpm_thermal {
 	int                min_temp;
 	/* high temperature threshold */
 	int                max_temp;
+	/* edge max emergency(shutdown) temp */
+	int                max_edge_emergency_temp;
+	/* hotspot low temperature threshold */
+	int                min_hotspot_temp;
+	/* hotspot high temperature critical threshold */
+	int                max_hotspot_crit_temp;
+	/* hotspot max emergency(shutdown) temp */
+	int                max_hotspot_emergency_temp;
+	/* memory low temperature threshold */
+	int                min_mem_temp;
+	/* memory high temperature critical threshold */
+	int                max_mem_crit_temp;
+	/* memory max emergency(shutdown) temp */
+	int                max_mem_emergency_temp;
 	/* was last interrupt low to high or high to low */
 	bool               high_to_low;
 	/* interrupt source */
@ -341,10 +355,6 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
 			(adev)->powerplay.pp_handle, msg_id))

-#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \
-		((adev)->powerplay.pp_funcs->set_powergating_by_smu(\
-			(adev)->powerplay.pp_handle, block_type, gate))
-
 #define amdgpu_dpm_get_power_profile_mode(adev, buf) \
 		((adev)->powerplay.pp_funcs->get_power_profile_mode(\
 			(adev)->powerplay.pp_handle, buf))
@ -506,6 +516,9 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
 struct amd_vce_state*
 amdgpu_get_vce_clock_state(void *handle, u32 idx);

+int amdgpu_dpm_set_powergating_by_smu(struct amdgpu_device *adev,
+				      uint32_t block_type, bool gate);
+
 extern int amdgpu_dpm_get_sclk(struct amdgpu_device *adev, bool low);

 extern int amdgpu_dpm_get_mclk(struct amdgpu_device *adev, bool low);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@ -22,21 +22,23 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 */

-#include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_drv.h>
 #include <drm/drm_gem.h>
+#include <drm/drm_vblank.h>
 #include "amdgpu_drv.h"

 #include <drm/drm_pciids.h>
 #include <linux/console.h>
 #include <linux/module.h>
+#include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include <linux/vga_switcheroo.h>
 #include <drm/drm_probe_helper.h>

 #include "amdgpu.h"
 #include "amdgpu_irq.h"
-#include "amdgpu_gem.h"
+#include "amdgpu_dma_buf.h"

 #include "amdgpu_amdkfd.h"

@ -76,11 +78,14 @@
 * - 3.30.0 - Add AMDGPU_SCHED_OP_CONTEXT_PRIORITY_OVERRIDE.
 * - 3.31.0 - Add support for per-flip tiling attribute changes with DC
 * - 3.32.0 - Add syncobj timeline support to AMDGPU_CS.
+ * - 3.33.0 - Fixes for GDS ENOMEM failures in AMDGPU_CS.
 */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	32
+#define KMS_DRIVER_MINOR	33
 #define KMS_DRIVER_PATCHLEVEL	0

+#define AMDGPU_MAX_TIMEOUT_PARAM_LENTH	256
+
 int amdgpu_vram_limit = 0;
 int amdgpu_vis_vram_limit = 0;
 int amdgpu_gart_size = -1; /* auto */
@ -93,7 +98,7 @@ int amdgpu_disp_priority = 0;
 int amdgpu_hw_i2c = 0;
 int amdgpu_pcie_gen2 = -1;
 int amdgpu_msi = -1;
-int amdgpu_lockup_timeout = 10000;
+char amdgpu_lockup_timeout[AMDGPU_MAX_TIMEOUT_PARAM_LENTH];
 int amdgpu_dpm = -1;
 int amdgpu_fw_load_type = -1;
 int amdgpu_aspm = -1;
@ -106,7 +111,6 @@ int amdgpu_vm_fragment_size = -1;
 int amdgpu_vm_block_size = -1;
 int amdgpu_vm_fault_stop = 0;
 int amdgpu_vm_debug = 0;
-int amdgpu_vram_page_split = 512;
 int amdgpu_vm_update_mode = -1;
 int amdgpu_exp_hw_support = 0;
 int amdgpu_dc = -1;
@ -134,6 +138,10 @@ int amdgpu_emu_mode = 0;
 uint amdgpu_smu_memory_pool_size = 0;
 /* FBC (bit 0) disabled by default*/
 uint amdgpu_dc_feature_mask = 0;
+int amdgpu_async_gfx_ring = 1;
+int amdgpu_mcbp = 0;
+int amdgpu_discovery = 0;
+int amdgpu_mes = 0;

 struct amdgpu_mgpu_info mgpu_info = {
 	.mutex = __MUTEX_INITIALIZER(mgpu_info.mutex),
@ -227,16 +235,28 @@ MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(msi, amdgpu_msi, int, 0444);

 /**
- * DOC: lockup_timeout (int)
- * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
- * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
+ * DOC: lockup_timeout (string)
+ * Set GPU scheduler timeout value in ms.
+ *
+ * The format can be [Non-Compute] or [GFX,Compute,SDMA,Video]. That is there can be one or
+ * multiple values specified. 0 and negative values are invalidated. They will be adjusted
+ * to default timeout.
+ *  - With one value specified, the setting will apply to all non-compute jobs.
+ *  - With multiple values specified, the first one will be for GFX. The second one is for Compute.
+ *    And the third and fourth ones are for SDMA and Video.
+ * By default(with no lockup_timeout settings), the timeout for all non-compute(GFX, SDMA and Video)
+ * jobs is 10000. And there is no timeout enforced on compute jobs.
 */
-MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
-module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);
+MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms (default: 10000 for non-compute jobs and infinity timeout for compute jobs."
+		" 0: keep default value. negative: infinity timeout), "
+		"format is [Non-Compute] or [GFX,Compute,SDMA,Video]");
+module_param_string(lockup_timeout, amdgpu_lockup_timeout, sizeof(amdgpu_lockup_timeout), 0444);

 /**
 * DOC: dpm (int)
- * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto).
+ * Override for dynamic power management setting
+ * (0 = disable, 1 = enable, 2 = enable sw smu driver for vega20)
+ * The default is -1 (auto).
 */
 MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(dpm, amdgpu_dpm, int, 0444);
@ -331,13 +351,6 @@ module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);
 MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
 module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);

-/**
- * DOC: vram_page_split (int)
- * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512.
- */
-MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)");
-module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);
-
 /**
 * DOC: exp_hw_support (int)
 * Enable experimental hw support (1 = enable). The default is 0 (disabled).
@ -561,6 +574,39 @@ MODULE_PARM_DESC(smu_memory_pool_size,
 		"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
 module_param_named(smu_memory_pool_size, amdgpu_smu_memory_pool_size, uint, 0444);

+/**
+ * DOC: async_gfx_ring (int)
+ * It is used to enable gfx rings that could be configured with different prioritites or equal priorities
+ */
+MODULE_PARM_DESC(async_gfx_ring,
+	"Asynchronous GFX rings that could be configured with either different priorities (HP3D ring and LP3D ring), or equal priorities (0 = disabled, 1 = enabled (default))");
+module_param_named(async_gfx_ring, amdgpu_async_gfx_ring, int, 0444);
+
+/**
+ * DOC: mcbp (int)
+ * It is used to enable mid command buffer preemption. (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mcbp,
+	"Enable Mid-command buffer preemption (0 = disabled (default), 1 = enabled)");
+module_param_named(mcbp, amdgpu_mcbp, int, 0444);
+
+/**
+ * DOC: discovery (int)
+ * Allow driver to discover hardware IP information from IP Discovery table at the top of VRAM.
+ */
+MODULE_PARM_DESC(discovery,
+	"Allow driver to discover hardware IPs from IP Discovery table at the top of VRAM");
+module_param_named(discovery, amdgpu_discovery, int, 0444);
+
+/**
+ * DOC: mes (int)
+ * Enable Micro Engine Scheduler. This is a new hw scheduling engine for gfx, sdma, and compute.
+ * (0 = disabled (default), 1 = enabled)
+ */
+MODULE_PARM_DESC(mes,
+	"Enable Micro Engine Scheduler (0 = disabled (default), 1 = enabled)");
+module_param_named(mes, amdgpu_mes, int, 0444);
+
 #ifdef CONFIG_HSA_AMD
 /**
 * DOC: sched_policy (int)
@ -655,6 +701,24 @@ MODULE_PARM_DESC(noretry,
 int halt_if_hws_hang;
 module_param(halt_if_hws_hang, int, 0644);
 MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (default), 1 = on)");
+
+/**
+ * DOC: hws_gws_support(bool)
+ * Whether HWS support gws barriers. Default value: false (not supported)
+ * This will be replaced with a MEC firmware version check once firmware
+ * is ready
+ */
+bool hws_gws_support;
+module_param(hws_gws_support, bool, 0444);
+MODULE_PARM_DESC(hws_gws_support, "MEC FW support gws barriers (false = not supported (Default), true = supported)");
+
+/**
+  * DOC: queue_preemption_timeout_ms (int)
+  * queue preemption timeout in ms (1 = Minimum, 9000 = default)
+  */
+int queue_preemption_timeout_ms = 9000;
+module_param(queue_preemption_timeout_ms, int, 0644);
+MODULE_PARM_DESC(queue_preemption_timeout_ms, "queue preemption timeout in ms (1 = Minimum, 9000 = default)");
 #endif

 /**
@ -665,6 +729,22 @@ MODULE_PARM_DESC(halt_if_hws_hang, "Halt if HWS hang is detected (0 = off (defau
 MODULE_PARM_DESC(dcfeaturemask, "all stable DC features enabled (default))");
 module_param_named(dcfeaturemask, amdgpu_dc_feature_mask, uint, 0444);

+/**
+ * DOC: abmlevel (uint)
+ * Override the default ABM (Adaptive Backlight Management) level used for DC
+ * enabled hardware. Requires DMCU to be supported and loaded.
+ * Valid levels are 0-4. A value of 0 indicates that ABM should be disabled by
+ * default. Values 1-4 control the maximum allowable brightness reduction via
+ * the ABM algorithm, with 1 being the least reduction and 4 being the most
+ * reduction.
+ *
+ * Defaults to 0, or disabled. Userspace can still override this level later
+ * after boot.
+ */
+uint amdgpu_dm_abm_level = 0;
+MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
+module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef  CONFIG_DRM_AMDGPU_SI
 	{0x1002, 0x6780, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_TAHITI},
@ -921,6 +1001,14 @@ static const struct pci_device_id pciidlist[] = {
 	/* Raven */
 	{0x1002, 0x15dd, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
 	{0x1002, 0x15d8, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_RAVEN|AMD_IS_APU},
+	/* Navi10 */
+	{0x1002, 0x7310, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x7312, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x7318, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x7319, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x731A, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x731B, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},
+	{0x1002, 0x731F, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_NAVI10},

 	{0, 0, 0}
 };
@ -1216,6 +1304,66 @@ int amdgpu_file_to_fpriv(struct file *filp, struct amdgpu_fpriv **fpriv)
 	return 0;
 }

+int amdgpu_device_get_job_timeout_settings(struct amdgpu_device *adev)
+{
+	char *input = amdgpu_lockup_timeout;
+	char *timeout_setting = NULL;
+	int index = 0;
+	long timeout;
+	int ret = 0;
+
+	/*
+	 * By default timeout for non compute jobs is 10000.
+	 * And there is no timeout enforced on compute jobs.
+	 */
+	adev->gfx_timeout = msecs_to_jiffies(10000);
+	adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+	adev->compute_timeout = MAX_SCHEDULE_TIMEOUT;
+
+	if (strnlen(input, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+		while ((timeout_setting = strsep(&input, ",")) &&
+				strnlen(timeout_setting, AMDGPU_MAX_TIMEOUT_PARAM_LENTH)) {
+			ret = kstrtol(timeout_setting, 0, &timeout);
+			if (ret)
+				return ret;
+
+			if (timeout == 0) {
+				index++;
+				continue;
+			} else if (timeout < 0) {
+				timeout = MAX_SCHEDULE_TIMEOUT;
+			} else {
+				timeout = msecs_to_jiffies(timeout);
+			}
+
+			switch (index++) {
+			case 0:
+				adev->gfx_timeout = timeout;
+				break;
+			case 1:
+				adev->compute_timeout = timeout;
+				break;
+			case 2:
+				adev->sdma_timeout = timeout;
+				break;
+			case 3:
+				adev->video_timeout = timeout;
+				break;
+			default:
+				break;
+			}
+		}
+		/*
+		 * There is only one value specified and
+		 * it should apply to all non-compute jobs.
+		 */
+		if (index == 1)
+			adev->sdma_timeout = adev->video_timeout = adev->gfx_timeout;
+	}
+
+	return ret;
+}
+
 static bool
 amdgpu_get_crtc_scanout_position(struct drm_device *dev, unsigned int pipe,
 				 bool in_vblank_irq, int *vpos, int *hpos,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@ -23,7 +23,7 @@
 * Authors: Dave Airlie
 *          Alex Deucher
 */
-#include <drm/drmP.h>
+
 #include <drm/drm_crtc_helper.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@ -23,22 +23,22 @@
 * Authors:
 *     David Airlie
 */
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/pm_runtime.h>

-#include <drm/drmP.h>
+#include <linux/module.h>
+#include <linux/pm_runtime.h>
+#include <linux/slab.h>
+#include <linux/vga_switcheroo.h>
+
+#include <drm/amdgpu_drm.h>
 #include <drm/drm_crtc.h>
 #include <drm/drm_crtc_helper.h>
-#include <drm/amdgpu_drm.h>
+#include <drm/drm_fb_helper.h>
+#include <drm/drm_fourcc.h>
+
 #include "amdgpu.h"
 #include "cikd.h"
 #include "amdgpu_gem.h"

-#include <drm/drm_fb_helper.h>
-
-#include <linux/vga_switcheroo.h>
-
 #include "amdgpu_display.h"

 /* object hierarchy -
@ -121,6 +121,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 					 struct drm_mode_fb_cmd2 *mode_cmd,
 					 struct drm_gem_object **gobj_p)
 {
+	const struct drm_format_info *info;
 	struct amdgpu_device *adev = rfbdev->adev;
 	struct drm_gem_object *gobj = NULL;
 	struct amdgpu_bo *abo = NULL;
@ -131,7 +132,8 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	int height = mode_cmd->height;
 	u32 cpp;

-	cpp = drm_format_plane_cpp(mode_cmd->pixel_format, 0);
+	info = drm_get_format_info(adev->ddev, mode_cmd);
+	cpp = info->cpp[0];

 	/* need to align pitch with crtc limits */
 	mode_cmd->pitches[0] = amdgpu_align_pitch(adev, mode_cmd->width, cpp,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@ -34,7 +34,9 @@
 #include <linux/kref.h>
 #include <linux/slab.h>
 #include <linux/firmware.h>
-#include <drm/drmP.h>
+
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"

@ -427,9 +429,13 @@ int amdgpu_fence_driver_start_ring(struct amdgpu_ring *ring,
 int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,
 				  unsigned num_hw_submission)
 {
+	struct amdgpu_device *adev = ring->adev;
 	long timeout;
 	int r;

+	if (!adev)
+		return -EINVAL;
+
 	/* Check that num_hw_submission is a power of two */
 	if ((num_hw_submission & (num_hw_submission - 1)) != 0)
 		return -EINVAL;
@ -451,12 +457,31 @@ int amdgpu_fence_driver_init_ring(struct amdgpu_ring *ring,

 	/* No need to setup the GPU scheduler for KIQ ring */
 	if (ring->funcs->type != AMDGPU_RING_TYPE_KIQ) {
-		/* for non-sriov case, no timeout enforce on compute ring */
-		if ((ring->funcs->type == AMDGPU_RING_TYPE_COMPUTE)
-				&& !amdgpu_sriov_vf(ring->adev))
-			timeout = MAX_SCHEDULE_TIMEOUT;
-		else
-			timeout = msecs_to_jiffies(amdgpu_lockup_timeout);
+		switch (ring->funcs->type) {
+		case AMDGPU_RING_TYPE_GFX:
+			timeout = adev->gfx_timeout;
+			break;
+		case AMDGPU_RING_TYPE_COMPUTE:
+			/*
+			 * For non-sriov case, no timeout enforce
+			 * on compute ring by default. Unless user
+			 * specifies a timeout for compute ring.
+			 *
+			 * For sriov case, always use the timeout
+			 * as gfx ring
+			 */
+			if (!amdgpu_sriov_vf(ring->adev))
+				timeout = adev->compute_timeout;
+			else
+				timeout = adev->gfx_timeout;
+			break;
+		case AMDGPU_RING_TYPE_SDMA:
+			timeout = adev->sdma_timeout;
+			break;
+		default:
+			timeout = adev->video_timeout;
+			break;
+		}

 		r = drm_sched_init(&ring->sched, &amdgpu_sched_ops,
 				   num_hw_submission, amdgpu_job_hang_limit,
@ -684,22 +709,30 @@ static int amdgpu_debugfs_fence_info(struct seq_file *m, void *data)
 		amdgpu_fence_process(ring);

 		seq_printf(m, "--- ring %d (%s) ---\n", i, ring->name);
-		seq_printf(m, "Last signaled fence 0x%08x\n",
+		seq_printf(m, "Last signaled fence          0x%08x\n",
 			   atomic_read(&ring->fence_drv.last_seq));
-		seq_printf(m, "Last emitted        0x%08x\n",
+		seq_printf(m, "Last emitted                 0x%08x\n",
 			   ring->fence_drv.sync_seq);

+		if (ring->funcs->type == AMDGPU_RING_TYPE_GFX ||
+		    ring->funcs->type == AMDGPU_RING_TYPE_SDMA) {
+			seq_printf(m, "Last signaled trailing fence 0x%08x\n",
+				   le32_to_cpu(*ring->trail_fence_cpu_addr));
+			seq_printf(m, "Last emitted                 0x%08x\n",
+				   ring->trail_seq);
+		}
+
 		if (ring->funcs->type != AMDGPU_RING_TYPE_GFX)
 			continue;

 		/* set in CP_VMID_PREEMPT and preemption occurred */
-		seq_printf(m, "Last preempted      0x%08x\n",
+		seq_printf(m, "Last preempted               0x%08x\n",
 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 2)));
 		/* set in CP_VMID_RESET and reset occurred */
-		seq_printf(m, "Last reset          0x%08x\n",
+		seq_printf(m, "Last reset                   0x%08x\n",
 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 4)));
 		/* Both preemption and reset occurred */
-		seq_printf(m, "Last both           0x%08x\n",
+		seq_printf(m, "Last both                    0x%08x\n",
 			   le32_to_cpu(*(ring->fence_drv.cpu_addr + 6)));
 	}
 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@ -25,7 +25,10 @@
 *          Alex Deucher
 *          Jerome Glisse
 */
-#include <drm/drmP.h>
+
+#include <linux/pci.h>
+#include <linux/vmalloc.h>
+
 #include <drm/amdgpu_drm.h>
 #ifdef CONFIG_X86
 #include <asm/set_memory.h>
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gds.h
@ -27,26 +27,12 @@
 struct amdgpu_ring;
 struct amdgpu_bo;

-struct amdgpu_gds_asic_info {
-	uint32_t	total_size;
-	uint32_t	gfx_partition_size;
-	uint32_t	cs_partition_size;
-};
-
 struct amdgpu_gds {
-	struct amdgpu_gds_asic_info	mem;
-	struct amdgpu_gds_asic_info	gws;
-	struct amdgpu_gds_asic_info	oa;
-	uint32_t			gds_compute_max_wave_id;
-
-	/* At present, GDS, GWS and OA resources for gfx (graphics)
-	 * is always pre-allocated and available for graphics operation.
-	 * Such resource is shared between all gfx clients.
-	 * TODO: move this operation to user space
-	 * */
-	struct amdgpu_bo*		gds_gfx_bo;
-	struct amdgpu_bo*		gws_gfx_bo;
-	struct amdgpu_bo*		oa_gfx_bo;
+	uint32_t gds_size;
+	uint32_t gws_size;
+	uint32_t oa_size;
+	uint32_t gds_compute_max_wave_id;
+	uint32_t vgt_gs_max_wave_id;
 };

 struct amdgpu_gds_reg_offset {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@ -26,9 +26,13 @@
 *          Jerome Glisse
 */
 #include <linux/ktime.h>
+#include <linux/module.h>
 #include <linux/pagemap.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"
 #include "amdgpu_display.h"
 #include "amdgpu_xgmi.h"
@ -171,7 +175,7 @@ void amdgpu_gem_object_close(struct drm_gem_object *obj,

 	amdgpu_vm_get_pd_bo(vm, &list, &vm_pd);

-	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, false, &duplicates, false);
 	if (r) {
 		dev_err(adev->dev, "leaking bo va because "
 			"we fail to reserve bo (%d)\n", r);
@ -323,33 +327,30 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 	}

 	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE) {
-		r = amdgpu_ttm_tt_get_user_pages(bo->tbo.ttm,
-						 bo->tbo.ttm->pages);
+		r = amdgpu_ttm_tt_get_user_pages(bo, bo->tbo.ttm->pages);
 		if (r)
 			goto release_object;

 		r = amdgpu_bo_reserve(bo, true);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;

 		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		amdgpu_bo_unreserve(bo);
 		if (r)
-			goto free_pages;
+			goto user_pages_done;
 	}

 	r = drm_gem_handle_create(filp, gobj, &handle);
-	/* drop reference from allocate - handle holds it now */
-	drm_gem_object_put_unlocked(gobj);
 	if (r)
-		return r;
+		goto user_pages_done;

 	args->handle = handle;
-	return 0;

-free_pages:
-	release_pages(bo->tbo.ttm->pages, bo->tbo.ttm->num_pages);
+user_pages_done:
+	if (args->flags & AMDGPU_GEM_USERPTR_VALIDATE)
+		amdgpu_ttm_tt_get_user_pages_done(bo->tbo.ttm);

 release_object:
 	drm_gem_object_put_unlocked(gobj);
@ -610,7 +611,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,

 	amdgpu_vm_get_pd_bo(&fpriv->vm, &list, &vm_pd);

-	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates);
+	r = ttm_eu_reserve_buffers(&ticket, &list, true, &duplicates, false);
 	if (r)
 		goto error_unref;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.h
@ -39,22 +39,6 @@ int amdgpu_gem_object_open(struct drm_gem_object *obj,
 void amdgpu_gem_object_close(struct drm_gem_object *obj,
 				struct drm_file *file_priv);
 unsigned long amdgpu_gem_timeout(uint64_t timeout_ns);
-struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj);
-struct drm_gem_object *
-amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
-				 struct dma_buf_attachment *attach,
-				 struct sg_table *sg);
-struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
-					struct drm_gem_object *gobj,
-					int flags);
-struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
-					    struct dma_buf *dma_buf);
-struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *);
-void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj);
-void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr);
-int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma);
-
-extern const struct dma_buf_ops amdgpu_dmabuf_ops;

 /*
 * GEM objects.
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@ -22,7 +22,7 @@
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_gfx.h"
 #include "amdgpu_rlc.h"
@ -34,8 +34,8 @@
 * GPU GFX IP block helpers function.
 */

-int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
-			    int pipe, int queue)
+int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
+				int pipe, int queue)
 {
 	int bit = 0;

@ -47,8 +47,8 @@ int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
 	return bit;
 }

-void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
-			     int *mec, int *pipe, int *queue)
+void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+				 int *mec, int *pipe, int *queue)
 {
 	*queue = bit % adev->gfx.mec.num_queue_per_pipe;
 	*pipe = (bit / adev->gfx.mec.num_queue_per_pipe)
@ -61,10 +61,40 @@ void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev,
 				     int mec, int pipe, int queue)
 {
-	return test_bit(amdgpu_gfx_queue_to_bit(adev, mec, pipe, queue),
+	return test_bit(amdgpu_gfx_mec_queue_to_bit(adev, mec, pipe, queue),
 			adev->gfx.mec.queue_bitmap);
 }

+int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev,
+			       int me, int pipe, int queue)
+{
+	int bit = 0;
+
+	bit += me * adev->gfx.me.num_pipe_per_me
+		* adev->gfx.me.num_queue_per_pipe;
+	bit += pipe * adev->gfx.me.num_queue_per_pipe;
+	bit += queue;
+
+	return bit;
+}
+
+void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
+				int *me, int *pipe, int *queue)
+{
+	*queue = bit % adev->gfx.me.num_queue_per_pipe;
+	*pipe = (bit / adev->gfx.me.num_queue_per_pipe)
+		% adev->gfx.me.num_pipe_per_me;
+	*me = (bit / adev->gfx.me.num_queue_per_pipe)
+		/ adev->gfx.me.num_pipe_per_me;
+}
+
+bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev,
+				    int me, int pipe, int queue)
+{
+	return test_bit(amdgpu_gfx_me_queue_to_bit(adev, me, pipe, queue),
+			adev->gfx.me.queue_bitmap);
+}
+
 /**
 * amdgpu_gfx_scratch_get - Allocate a scratch register
 *
@ -199,6 +229,30 @@ void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev)
 		adev->gfx.num_compute_rings = AMDGPU_MAX_COMPUTE_RINGS;
 }

+void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev)
+{
+	int i, queue, pipe, me;
+
+	for (i = 0; i < AMDGPU_MAX_GFX_QUEUES; ++i) {
+		queue = i % adev->gfx.me.num_queue_per_pipe;
+		pipe = (i / adev->gfx.me.num_queue_per_pipe)
+			% adev->gfx.me.num_pipe_per_me;
+		me = (i / adev->gfx.me.num_queue_per_pipe)
+		      / adev->gfx.me.num_pipe_per_me;
+
+		if (me >= adev->gfx.me.num_me)
+			break;
+		/* policy: amdgpu owns the first queue per pipe at this stage
+		 * will extend to mulitple queues per pipe later */
+		if (me == 0 && queue < 1)
+			set_bit(i, adev->gfx.me.queue_bitmap);
+	}
+
+	/* update the number of active graphics rings */
+	adev->gfx.num_gfx_rings =
+		bitmap_weight(adev->gfx.me.queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+}
+
 static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring)
 {
@ -213,7 +267,7 @@ static int amdgpu_gfx_kiq_acquire(struct amdgpu_device *adev,
 		if (test_bit(queue_bit, adev->gfx.mec.queue_bitmap))
 			continue;

-		amdgpu_gfx_bit_to_queue(adev, queue_bit, &mec, &pipe, &queue);
+		amdgpu_gfx_bit_to_mec_queue(adev, queue_bit, &mec, &pipe, &queue);

 		/*
 		 * 1. Using pipes 2/3 from MEC 2 seems cause problems.
@ -306,9 +360,9 @@ int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
 	return 0;
 }

-/* create MQD for each compute queue */
-int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
-				   unsigned mqd_size)
+/* create MQD for each compute/gfx queue */
+int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
+			   unsigned mqd_size)
 {
 	struct amdgpu_ring *ring = NULL;
 	int r, i;
@ -335,6 +389,27 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
 				dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
 	}

+	if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+		/* create MQD for each KGQ */
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			if (!ring->mqd_obj) {
+				r = amdgpu_bo_create_kernel(adev, mqd_size, PAGE_SIZE,
+							    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
+							    &ring->mqd_gpu_addr, &ring->mqd_ptr);
+				if (r) {
+					dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
+					return r;
+				}
+
+				/* prepare MQD backup */
+				adev->gfx.me.mqd_backup[i] = kmalloc(mqd_size, GFP_KERNEL);
+				if (!adev->gfx.me.mqd_backup[i])
+					dev_warn(adev->dev, "no memory to create MQD backup for ring %s\n", ring->name);
+			}
+		}
+	}
+
 	/* create MQD for each KCQ */
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
@ -343,7 +418,7 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
 						    AMDGPU_GEM_DOMAIN_GTT, &ring->mqd_obj,
 						    &ring->mqd_gpu_addr, &ring->mqd_ptr);
 			if (r) {
-				dev_warn(adev->dev, "failed to create ring mqd ob (%d)", r);
+				dev_warn(adev->dev, "failed to create ring mqd bo (%d)", r);
 				return r;
 			}

@ -357,11 +432,21 @@ int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
 	return 0;
 }

-void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev)
 {
 	struct amdgpu_ring *ring = NULL;
 	int i;

+	if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring) {
+		for (i = 0; i < adev->gfx.num_gfx_rings; i++) {
+			ring = &adev->gfx.gfx_ring[i];
+			kfree(adev->gfx.me.mqd_backup[i]);
+			amdgpu_bo_free_kernel(&ring->mqd_obj,
+					      &ring->mqd_gpu_addr,
+					      &ring->mqd_ptr);
+		}
+	}
+
 	for (i = 0; i < adev->gfx.num_compute_rings; i++) {
 		ring = &adev->gfx.compute_ring[i];
 		kfree(adev->gfx.mec.mqd_backup[i]);
@ -371,12 +456,81 @@ void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev)
 	}

 	ring = &adev->gfx.kiq.ring;
+	if (adev->asic_type == CHIP_NAVI10 && amdgpu_async_gfx_ring)
+		kfree(adev->gfx.me.mqd_backup[AMDGPU_MAX_GFX_RINGS]);
 	kfree(adev->gfx.mec.mqd_backup[AMDGPU_MAX_COMPUTE_RINGS]);
 	amdgpu_bo_free_kernel(&ring->mqd_obj,
 			      &ring->mqd_gpu_addr,
 			      &ring->mqd_ptr);
 }

+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	struct amdgpu_ring *kiq_ring = &kiq->ring;
+	int i;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_unmap_queues)
+		return -EINVAL;
+
+	if (amdgpu_ring_alloc(kiq_ring, kiq->pmf->unmap_queues_size *
+					adev->gfx.num_compute_rings))
+		return -ENOMEM;
+
+	for (i = 0; i < adev->gfx.num_compute_rings; i++)
+		kiq->pmf->kiq_unmap_queues(kiq_ring, &adev->gfx.compute_ring[i],
+					   RESET_QUEUES, 0, 0);
+
+	return amdgpu_ring_test_ring(kiq_ring);
+}
+
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev)
+{
+	struct amdgpu_kiq *kiq = &adev->gfx.kiq;
+	struct amdgpu_ring *kiq_ring = &adev->gfx.kiq.ring;
+	uint64_t queue_mask = 0;
+	int r, i;
+
+	if (!kiq->pmf || !kiq->pmf->kiq_map_queues || !kiq->pmf->kiq_set_resources)
+		return -EINVAL;
+
+	for (i = 0; i < AMDGPU_MAX_COMPUTE_QUEUES; ++i) {
+		if (!test_bit(i, adev->gfx.mec.queue_bitmap))
+			continue;
+
+		/* This situation may be hit in the future if a new HW
+		 * generation exposes more than 64 queues. If so, the
+		 * definition of queue_mask needs updating */
+		if (WARN_ON(i > (sizeof(queue_mask)*8))) {
+			DRM_ERROR("Invalid KCQ enabled: %d\n", i);
+			break;
+		}
+
+		queue_mask |= (1ull << i);
+	}
+
+	DRM_INFO("kiq ring mec %d pipe %d q %d\n", kiq_ring->me, kiq_ring->pipe,
+							kiq_ring->queue);
+
+	r = amdgpu_ring_alloc(kiq_ring, kiq->pmf->map_queues_size *
+					adev->gfx.num_compute_rings +
+					kiq->pmf->set_resources_size);
+	if (r) {
+		DRM_ERROR("Failed to lock KIQ (%d).\n", r);
+		return r;
+	}
+
+	kiq->pmf->kiq_set_resources(kiq_ring, queue_mask);
+	for (i = 0; i < adev->gfx.num_compute_rings; i++)
+		kiq->pmf->kiq_map_queues(kiq_ring, &adev->gfx.compute_ring[i]);
+
+	r = amdgpu_ring_test_helper(kiq_ring);
+	if (r)
+		DRM_ERROR("KCQ enable failed\n");
+
+	return r;
+}
+
 /* amdgpu_gfx_off_ctrl - Handle gfx off feature enable/disable
 *
 * @adev: amdgpu_device pointer
@ -393,7 +547,9 @@ void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable)
 	if (!(adev->pm.pp_feature & PP_GFXOFF_MASK))
 		return;

-	if (!adev->powerplay.pp_funcs || !adev->powerplay.pp_funcs->set_powergating_by_smu)
+	if (!is_support_sw_smu(adev) &&
+	    (!adev->powerplay.pp_funcs ||
+	     !adev->powerplay.pp_funcs->set_powergating_by_smu))
 		return;


--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@ -38,6 +38,7 @@
 #define AMDGPU_GFX_CG_DISABLED_MODE		0x00000004L
 #define AMDGPU_GFX_LBPW_DISABLED_MODE		0x00000008L

+#define AMDGPU_MAX_GFX_QUEUES KGD_MAX_QUEUES
 #define AMDGPU_MAX_COMPUTE_QUEUES KGD_MAX_QUEUES

 struct amdgpu_mec {
@ -54,12 +55,41 @@ struct amdgpu_mec {
 	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_COMPUTE_QUEUES);
 };

+enum amdgpu_unmap_queues_action {
+	PREEMPT_QUEUES = 0,
+	RESET_QUEUES,
+	DISABLE_PROCESS_QUEUES,
+	PREEMPT_QUEUES_NO_UNMAP,
+};
+
+struct kiq_pm4_funcs {
+	/* Support ASIC-specific kiq pm4 packets*/
+	void (*kiq_set_resources)(struct amdgpu_ring *kiq_ring,
+					uint64_t queue_mask);
+	void (*kiq_map_queues)(struct amdgpu_ring *kiq_ring,
+					struct amdgpu_ring *ring);
+	void (*kiq_unmap_queues)(struct amdgpu_ring *kiq_ring,
+				 struct amdgpu_ring *ring,
+				 enum amdgpu_unmap_queues_action action,
+				 u64 gpu_addr, u64 seq);
+	void (*kiq_query_status)(struct amdgpu_ring *kiq_ring,
+					struct amdgpu_ring *ring,
+					u64 addr,
+					u64 seq);
+	/* Packet sizes */
+	int set_resources_size;
+	int map_queues_size;
+	int unmap_queues_size;
+	int query_status_size;
+};
+
 struct amdgpu_kiq {
 	u64			eop_gpu_addr;
 	struct amdgpu_bo	*eop_obj;
 	spinlock_t              ring_lock;
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
+	const struct kiq_pm4_funcs *pmf;
 };

 /*
@ -131,6 +161,10 @@ struct amdgpu_gfx_config {
 	uint32_t double_offchip_lds_buf;
 	/* cached value of DB_DEBUG2 */
 	uint32_t db_debug2;
+	/* gfx10 specific config */
+	uint32_t num_sc_per_sh;
+	uint32_t num_packer_per_sc;
+	uint32_t pa_sc_tile_steering_override;
 };

 struct amdgpu_cu_info {
@ -191,10 +225,38 @@ struct sq_work {
 	unsigned ih_data;
 };

+struct amdgpu_pfp {
+	struct amdgpu_bo		*pfp_fw_obj;
+	uint64_t			pfp_fw_gpu_addr;
+	uint32_t			*pfp_fw_ptr;
+};
+
+struct amdgpu_ce {
+	struct amdgpu_bo		*ce_fw_obj;
+	uint64_t			ce_fw_gpu_addr;
+	uint32_t			*ce_fw_ptr;
+};
+
+struct amdgpu_me {
+	struct amdgpu_bo		*me_fw_obj;
+	uint64_t			me_fw_gpu_addr;
+	uint32_t			*me_fw_ptr;
+	uint32_t			num_me;
+	uint32_t			num_pipe_per_me;
+	uint32_t			num_queue_per_pipe;
+	void				*mqd_backup[AMDGPU_MAX_GFX_RINGS + 1];
+
+	/* These are the resources for which amdgpu takes ownership */
+	DECLARE_BITMAP(queue_bitmap, AMDGPU_MAX_GFX_QUEUES);
+};
+
 struct amdgpu_gfx {
 	struct mutex			gpu_clock_mutex;
 	struct amdgpu_gfx_config	config;
 	struct amdgpu_rlc		rlc;
+	struct amdgpu_pfp		pfp;
+	struct amdgpu_ce		ce;
+	struct amdgpu_me		me;
 	struct amdgpu_mec		mec;
 	struct amdgpu_kiq		kiq;
 	struct amdgpu_scratch		scratch;
@ -297,17 +359,27 @@ void amdgpu_gfx_kiq_fini(struct amdgpu_device *adev);
 int amdgpu_gfx_kiq_init(struct amdgpu_device *adev,
 			unsigned hpd_size);

-int amdgpu_gfx_compute_mqd_sw_init(struct amdgpu_device *adev,
-				   unsigned mqd_size);
-void amdgpu_gfx_compute_mqd_sw_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_mqd_sw_init(struct amdgpu_device *adev,
+			   unsigned mqd_size);
+void amdgpu_gfx_mqd_sw_fini(struct amdgpu_device *adev);
+int amdgpu_gfx_disable_kcq(struct amdgpu_device *adev);
+int amdgpu_gfx_enable_kcq(struct amdgpu_device *adev);

 void amdgpu_gfx_compute_queue_acquire(struct amdgpu_device *adev);
-int amdgpu_gfx_queue_to_bit(struct amdgpu_device *adev, int mec,
-			    int pipe, int queue);
-void amdgpu_gfx_bit_to_queue(struct amdgpu_device *adev, int bit,
-			     int *mec, int *pipe, int *queue);
+void amdgpu_gfx_graphics_queue_acquire(struct amdgpu_device *adev);
+
+int amdgpu_gfx_mec_queue_to_bit(struct amdgpu_device *adev, int mec,
+				int pipe, int queue);
+void amdgpu_gfx_bit_to_mec_queue(struct amdgpu_device *adev, int bit,
+				 int *mec, int *pipe, int *queue);
 bool amdgpu_gfx_is_mec_queue_enabled(struct amdgpu_device *adev, int mec,
 				     int pipe, int queue);
+int amdgpu_gfx_me_queue_to_bit(struct amdgpu_device *adev, int me,
+			       int pipe, int queue);
+void amdgpu_gfx_bit_to_me_queue(struct amdgpu_device *adev, int bit,
+				int *me, int *pipe, int *queue);
+bool amdgpu_gfx_is_me_queue_enabled(struct amdgpu_device *adev, int me,
+				    int pipe, int queue);
 void amdgpu_gfx_off_ctrl(struct amdgpu_device *adev, bool enable);

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c
@ -24,6 +24,8 @@
 *
 */

+#include <linux/io-64-nonatomic-lo-hi.h>
+
 #include "amdgpu.h"

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gtt_mgr.c
@ -22,7 +22,6 @@
 * Authors: Christian König
 */

-#include <drm/drmP.h>
 #include "amdgpu.h"

 struct amdgpu_gtt_mgr {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_i2c.c
@ -23,9 +23,10 @@
 * Authors: Dave Airlie
 *          Alex Deucher
 */
-#include <linux/export.h>

-#include <drm/drmP.h>
+#include <linux/export.h>
+#include <linux/pci.h>
+
 #include <drm/drm_edid.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@ -28,8 +28,10 @@
 */
 #include <linux/seq_file.h>
 #include <linux/slab.h>
-#include <drm/drmP.h>
+
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_debugfs.h>
+
 #include "amdgpu.h"
 #include "atom.h"
 #include "amdgpu_trace.h"
@ -209,6 +211,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	skip_preamble = ring->current_ctx == fence_ctx;
 	if (job && ring->funcs->emit_cntxcntl) {
 		status |= job->preamble_status;
+		status |= job->preemption_status;
 		amdgpu_ring_emit_cntxcntl(ring, status);
 	}

@ -217,9 +220,10 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,

 		/* drop preamble IBs if we don't have a context switch */
 		if ((ib->flags & AMDGPU_IB_FLAG_PREAMBLE) &&
-			skip_preamble &&
-			!(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
-			!amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
+		    skip_preamble &&
+		    !(status & AMDGPU_PREAMBLE_IB_PRESENT_FIRST) &&
+		    !amdgpu_mcbp &&
+		    !amdgpu_sriov_vf(adev)) /* for SRIOV preemption, Preamble CE ib must be inserted anyway */
 			continue;

 		amdgpu_ring_emit_ib(ring, job, ib, status);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@ -24,7 +24,7 @@

 #include <linux/idr.h>
 #include <linux/dma-fence-array.h>
-#include <drm/drmP.h>
+

 #include "amdgpu.h"
 #include "amdgpu_trace.h"
@ -364,8 +364,11 @@ static int amdgpu_vmid_grab_used(struct amdgpu_vm *vm,
 		if (updates && (!flushed || dma_fence_is_later(updates, flushed)))
 			needs_flush = true;

-		/* Concurrent flushes are only possible starting with Vega10 */
-		if (adev->asic_type < CHIP_VEGA10 && needs_flush)
+		/* Concurrent flushes are only possible starting with Vega10 and
+		 * are broken on Navi10 and Navi14.
+		 */
+		if (needs_flush && (adev->asic_type < CHIP_VEGA10 ||
+				    adev->asic_type == CHIP_NAVI10))
 			continue;

 		/* Good, we can use this VMID. Remember this submission as
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ih.c
@ -21,7 +21,8 @@
 *
 */

-#include <drm/drmP.h>
+#include <linux/dma-mapping.h>
+
 #include "amdgpu.h"
 #include "amdgpu_ih.h"

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ioc32.c
@ -29,8 +29,9 @@
 */
 #include <linux/compat.h>

-#include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
+#include <drm/drm_ioctl.h>
+
 #include "amdgpu_drv.h"

 long amdgpu_kms_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@ -43,8 +43,11 @@
 */

 #include <linux/irq.h>
-#include <drm/drmP.h>
+#include <linux/pci.h>
+
 #include <drm/drm_crtc_helper.h>
+#include <drm/drm_irq.h>
+#include <drm/drm_vblank.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
 #include "amdgpu_ih.h"
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@ -24,7 +24,7 @@
 #include <linux/kthread.h>
 #include <linux/wait.h>
 #include <linux/sched.h>
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
 #include "amdgpu_trace.h"

@ -51,6 +51,8 @@ static void amdgpu_job_timedout(struct drm_sched_job *s_job)

 	if (amdgpu_device_should_recover_gpu(ring->adev))
 		amdgpu_device_gpu_recover(ring->adev, job);
+	else
+		drm_sched_suspend_timeout(&ring->sched);
 }

 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@ -29,6 +29,8 @@
 #define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1)
 /* bit set means context switch occured */
 #define AMDGPU_HAVE_CTX_SWITCH              (1 << 2)
+/* bit set means IB is preempted */
+#define AMDGPU_IB_PREEMPTED                 (1 << 3)

 #define to_amdgpu_job(sched_job)		\
 		container_of((sched_job), struct amdgpu_job, base)
@ -45,6 +47,7 @@ struct amdgpu_job {
 	struct amdgpu_ib	*ibs;
 	struct dma_fence	*fence; /* the hw fence */
 	uint32_t		preamble_status;
+	uint32_t                preemption_status;
 	uint32_t		num_ibs;
 	void			*owner;
 	bool                    vm_needs_flush;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@ -25,8 +25,9 @@
 *          Alex Deucher
 *          Jerome Glisse
 */
-#include <drm/drmP.h>
+
 #include "amdgpu.h"
+#include <drm/drm_debugfs.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu_sched.h"
 #include "amdgpu_uvd.h"
@ -35,13 +36,15 @@

 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/pci.h>
 #include <linux/pm_runtime.h>
 #include "amdgpu_amdkfd.h"
 #include "amdgpu_gem.h"
 #include "amdgpu_display.h"
 #include "amdgpu_ras.h"

-static void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_unregister_gpu_instance(struct amdgpu_device *adev)
 {
 	struct amdgpu_gpu_instance *gpu_instance;
 	int i;
@ -102,7 +105,7 @@ done_free:
 	dev->dev_private = NULL;
 }

-static void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
+void amdgpu_register_gpu_instance(struct amdgpu_device *adev)
 {
 	struct amdgpu_gpu_instance *gpu_instance;

@ -590,13 +593,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		struct drm_amdgpu_info_gds gds_info;

 		memset(&gds_info, 0, sizeof(gds_info));
-		gds_info.gds_gfx_partition_size = adev->gds.mem.gfx_partition_size;
-		gds_info.compute_partition_size = adev->gds.mem.cs_partition_size;
-		gds_info.gds_total_size = adev->gds.mem.total_size;
-		gds_info.gws_per_gfx_partition = adev->gds.gws.gfx_partition_size;
-		gds_info.gws_per_compute_partition = adev->gds.gws.cs_partition_size;
-		gds_info.oa_per_gfx_partition = adev->gds.oa.gfx_partition_size;
-		gds_info.oa_per_compute_partition = adev->gds.oa.cs_partition_size;
+		gds_info.compute_partition_size = adev->gds.gds_size;
+		gds_info.gds_total_size = adev->gds.gds_size;
+		gds_info.gws_per_compute_partition = adev->gds.gws_size;
+		gds_info.oa_per_compute_partition = adev->gds.oa_size;
 		return copy_to_user(out, &gds_info,
 				    min((size_t)size, sizeof(gds_info))) ? -EFAULT : 0;
 	}
@ -712,7 +712,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		dev_info.ids_flags = 0;
 		if (adev->flags & AMD_IS_APU)
 			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_FUSION;
-		if (amdgpu_sriov_vf(adev))
+		if (amdgpu_mcbp || amdgpu_sriov_vf(adev))
 			dev_info.ids_flags |= AMDGPU_IDS_FLAGS_PREEMPTION;

 		vm_size = adev->vm_manager.max_pfn * AMDGPU_GPU_PAGE_SIZE;
@ -765,6 +765,10 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		dev_info.gs_prim_buffer_depth = adev->gfx.config.gs_prim_buffer_depth;
 		dev_info.max_gs_waves_per_vgt = adev->gfx.config.max_gs_threads;

+		if (adev->family >= AMDGPU_FAMILY_NV)
+			dev_info.pa_sc_tile_steering_override =
+				adev->gfx.config.pa_sc_tile_steering_override;
+
 		return copy_to_user(out, &dev_info,
 				    min((size_t)size, sizeof(dev_info))) ? -EFAULT : 0;
 	}
@ -977,7 +981,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 	int r, pasid;

 	/* Ensure IB tests are run on ring */
-	flush_delayed_work(&adev->late_init_work);
+	flush_delayed_work(&adev->delayed_init_work);

 	file_priv->driver_priv = NULL;

@ -1006,7 +1010,7 @@ int amdgpu_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 		goto error_vm;
 	}

-	if (amdgpu_sriov_vf(adev)) {
+	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 		uint64_t csa_addr = amdgpu_csa_vaddr(adev) & AMDGPU_GMC_HOLE_MASK;

 		r = amdgpu_map_static_csa(adev, &fpriv->vm, adev->virt.csa_obj,
@ -1069,7 +1073,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,

 	amdgpu_vm_bo_rmv(adev, fpriv->prt_va);

-	if (amdgpu_sriov_vf(adev)) {
+	if (amdgpu_mcbp || amdgpu_sriov_vf(adev)) {
 		/* TODO: how to handle reserve failure */
 		BUG_ON(amdgpu_bo_reserve(adev->virt.csa_obj, true));
 		amdgpu_vm_bo_rmv(adev, fpriv->csa_va);
--- a/Show More
+++ b/Show More