drm pull for 4.19-rc1

-----BEGIN PGP SIGNATURE----- iQIcBAABAgAGBQJbc41pAAoJEAx081l5xIa+ZrAP/AzKj4i4pBLVJcvNZ2BwD+UD ZNSNj2iqCJ5+Jo/WtIwQ8tLct9UqfVssUwBke6tZksiLdTigGPTUyVIAdK+9kyWD D00m3x/pToJrSF2D0FwxQlPUtPkohp9N+E6+TU7gd1oCasZfBzmcEpoVAmZf+NWE kN1xXpmGxZWpu0wc7JA2lv9MuUTijCwIqJqa5E0bB3z06G5mw+PJ89kYzMx19OyA ZYQK8y3A40ZGl8UbajZ4xg9pqFCRYFFHGqfYlpUWWTh0XMAXu8+Yqzh3dJxmak7r 4u2pdQBsxPMZO8qKBHpVvI7Zhoe0Ntnolc0XVD+2IbqqnTprVbQs0bWf3YyfUlQi 1/9bWFK67W0LEuzac6M7a7EQqFNiHF13Btao7aqENTIe/GaCZJoopaiRMAmh6EHD 4PezeYqrW8cSaPj6OKouL1BhW9Bjixsg0bvjS/uB6m4KekFCt1++BDFGzkqvm6Mo SVW7nkJoCFpCASaR7DhUEOPexaHeJ65HCDDUvYdqz9jd2w1TgvvanEZWual1NwEm ImA8A4wGZ/3KijpyyKm0gE96RX7+zMMZ3brW6p1vhUUKVYJCrvSr5jrXH5+2k6Aw Y455doGL87IRkwyje/YbQF0I8pbUZD9QS5wII13tLGwOH9/uC/Xl6dHNM40gtqyh W4gEdY+NAMJmYLvRNawa =g9rD -----END PGP SIGNATURE----- Merge tag 'drm-next-2018-08-15' of git://anongit.freedesktop.org/drm/drm Pull drm updates from Dave Airlie: "This is the main drm pull request for 4.19. Rob has some new hardware support for new qualcomm hw that I'll send along separately. This has the display part of it, the remaining pull is for the acceleration engine. This also contains a wound-wait/wait-die mutex rework, Peter has acked it for merging via my tree. Otherwise mostly the usual level of activity. Summary: core: - Wound-wait/wait-die mutex rework - Add writeback connector type - Add "content type" property for HDMI - Move GEM bo to drm_framebuffer - Initial gpu scheduler documentation - GPU scheduler fixes for dying processes - Console deferred fbcon takeover support - Displayport support for CEC tunneling over AUX panel: - otm8009a panel driver fixes - Innolux TV123WAM and G070Y2-L01 panel driver - Ilitek ILI9881c panel driver - Rocktech RK070ER9427 LCD - EDT ETM0700G0EDH6 and EDT ETM0700G0BDH6 - DLC DLC0700YZG-1 - BOE HV070WSA-100 - newhaven, nhd-4.3-480272ef-atxl LCD - DataImage SCF0700C48GGU18 - Sharp LQ035Q7DB03 - p079zca: Refactor to support multiple panels tinydrm: - ILI9341 display panel New driver: - vkms - virtual kms driver to testing. i915: - Icelake: Display enablement DSI support IRQ support Powerwell support - GPU reset fixes and improvements - Full ppgtt support refactoring - PSR fixes and improvements - Execlist improvments - GuC related fixes amdgpu: - Initial amdgpu documentation - JPEG engine support on VCN - CIK uses powerplay by default - Move to using core PCIE functionality for gens/lanes - DC/Powerplay interface rework - Stutter mode support for RV - Vega12 Powerplay updates - GFXOFF fixes - GPUVM fault debugging - Vega12 GFXOFF - DC improvements - DC i2c/aux changes - UVD 7.2 fixes - Powerplay fixes for Polaris12, CZ/ST - command submission bo_list fixes amdkfd: - Raven support - Power management fixes udl: - Cleanups and fixes nouveau: - misc fixes and cleanups. msm: - DPU1 support display controller in sdm845 - GPU coredump support. vmwgfx: - Atomic modesetting validation fixes - Support for multisample surfaces armada: - Atomic modesetting support completed. exynos: - IPPv2 fixes - Move g2d to component framework - Suspend/resume support cleanups - Driver cleanups imx: - CSI configuration improvements - Driver cleanups - Use atomic suspend/resume helpers - ipu-v3 V4L2 XRGB32/XBGR32 support pl111: - Add Nomadik LCDC variant v3d: - GPU scheduler jobs management sun4i: - R40 display engine support - TCON TOP driver mediatek: - MT2712 SoC support rockchip: - vop fixes omapdrm: - Workaround for DRA7 errata i932 - Fix mm_list locking mali-dp: - Writeback implementation PM improvements - Internal error reporting debugfs tilcdc: - Single fix for deferred probing hdlcd: - Teardown fixes tda998x: - Converted to a bridge driver. etnaviv: - Misc fixes" * tag 'drm-next-2018-08-15' of git://anongit.freedesktop.org/drm/drm: (1506 commits) drm/amdgpu/sriov: give 8s for recover vram under RUNTIME drm/scheduler: fix param documentation drm/i2c: tda998x: correct PLL divider calculation drm/i2c: tda998x: get rid of private fill_modes function drm/i2c: tda998x: move mode_valid() to bridge drm/i2c: tda998x: register bridge outside of component helper drm/i2c: tda998x: cleanup from previous changes drm/i2c: tda998x: allocate tda998x_priv inside tda998x_create() drm/i2c: tda998x: convert to bridge driver drm/scheduler: fix timeout worker setup for out of order job completions drm/amd/display: display connected to dp-1 does not light up drm/amd/display: update clk for various HDMI color depths drm/amd/display: program display clock on cache match drm/amd/display: Add NULL check for enabling dp ss drm/amd/display: add vbios table check for enabling dp ss drm/amd/display: Don't share clk source between DP and HDMI drm/amd/display: Fix DP HBR2 Eye Diagram Pattern on Carrizo drm/amd/display: Use calculated disp_clk_khz value for dce110 drm/amd/display: Implement custom degamma lut on dcn drm/amd/display: Destroy aux_engines only once ...
2018-08-15 17:39:07 -07:00 · 2018-08-15 17:39:07 -07:00 · 54dbe75bbf
parent dafa5f6577 557ce95051
commit 54dbe75bbf
1126 changed files with 77861 additions and 25406 deletions
--- a/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
+++ b/Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt
@ -74,6 +74,12 @@ Required properties for DSI:
 		The 3 clocks output from the DSI analog PHY: dsi[01]_byte,
 		dsi[01]_ddr2, and dsi[01]_ddr

+Required properties for the TXP (writeback) block:
+- compatible:	Should be "brcm,bcm2835-txp"
+- reg:		Physical base address and length of the TXP block's registers
+- interrupts:	The interrupt number
+		  See bindings/interrupt-controller/brcm,bcm2835-armctrl-ic.txt
+
 [1] Documentation/devicetree/bindings/media/video-interfaces.txt

 Example:
--- a/Documentation/devicetree/bindings/display/ilitek,ili9341.txt
+++ b/Documentation/devicetree/bindings/display/ilitek,ili9341.txt
@ -0,0 +1,27 @@
+Ilitek ILI9341 display panels
+
+This binding is for display panels using an Ilitek ILI9341 controller in SPI
+mode.
+
+Required properties:
+- compatible:	"adafruit,yx240qv29", "ilitek,ili9341"
+- dc-gpios:	D/C pin
+- reset-gpios:	Reset pin
+
+The node for this driver must be a child node of a SPI controller, hence
+all mandatory properties described in ../spi/spi-bus.txt must be specified.
+
+Optional properties:
+- rotation:	panel rotation in degrees counter clockwise (0,90,180,270)
+- backlight:	phandle of the backlight device attached to the panel
+
+Example:
+	display@0{
+		compatible = "adafruit,yx240qv29", "ilitek,ili9341";
+		reg = <0>;
+		spi-max-frequency = <32000000>;
+		dc-gpios = <&gpio0 9 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&gpio0 8 GPIO_ACTIVE_HIGH>;
+		rotation = <270>;
+		backlight = <&backlight>;
+	};
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,disp.txt
@ -40,7 +40,7 @@ Required properties (all function blocks):
 	"mediatek,<chip>-dpi"        - DPI controller, see mediatek,dpi.txt
 	"mediatek,<chip>-disp-mutex" - display mutex
 	"mediatek,<chip>-disp-od"    - overdrive
-  the supported chips are mt2701 and mt8173.
+  the supported chips are mt2701, mt2712 and mt8173.
 - reg: Physical base address and length of the function block register space
 - interrupts: The interrupt signal from the function block (required, except for
  merge and split function blocks).
--- a/Documentation/devicetree/bindings/display/msm/dpu.txt
+++ b/Documentation/devicetree/bindings/display/msm/dpu.txt
@ -0,0 +1,131 @@
+Qualcomm Technologies, Inc. DPU KMS
+
+Description:
+
+Device tree bindings for MSM Mobile Display Subsytem(MDSS) that encapsulates
+sub-blocks like DPU display controller, DSI and DP interfaces etc.
+The DPU display controller is found in SDM845 SoC.
+
+MDSS:
+Required properties:
+- compatible: "qcom,sdm845-mdss"
+- reg: physical base address and length of contoller's registers.
+- reg-names: register region names. The following region is required:
+  * "mdss"
+- power-domains: a power domain consumer specifier according to
+  Documentation/devicetree/bindings/power/power_domain.txt
+- clocks: list of clock specifiers for clocks needed by the device.
+- clock-names: device clock names, must be in same order as clocks property.
+  The following clocks are required:
+  * "iface"
+  * "bus"
+  * "core"
+- interrupts: interrupt signal from MDSS.
+- interrupt-controller: identifies the node as an interrupt controller.
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+  source, should be 1.
+- iommus: phandle of iommu device node.
+- #address-cells: number of address cells for the MDSS children. Should be 1.
+- #size-cells: Should be 1.
+- ranges: parent bus address space is the same as the child bus address space.
+
+Optional properties:
+- assigned-clocks: list of clock specifiers for clocks needing rate assignment
+- assigned-clock-rates: list of clock frequencies sorted in the same order as
+  the assigned-clocks property.
+
+MDP:
+Required properties:
+- compatible: "qcom,sdm845-dpu"
+- reg: physical base address and length of controller's registers.
+- reg-names : register region names. The following region is required:
+  * "mdp"
+  * "vbif"
+- clocks: list of clock specifiers for clocks needed by the device.
+- clock-names: device clock names, must be in same order as clocks property.
+  The following clocks are required.
+  * "bus"
+  * "iface"
+  * "core"
+  * "vsync"
+- interrupts: interrupt line from DPU to MDSS.
+- ports: contains the list of output ports from DPU device. These ports connect
+  to interfaces that are external to the DPU hardware, such as DSI, DP etc.
+
+  Each output port contains an endpoint that describes how it is connected to an
+  external interface. These are described by the standard properties documented
+  here:
+	Documentation/devicetree/bindings/graph.txt
+	Documentation/devicetree/bindings/media/video-interfaces.txt
+
+	Port 0 -> DPU_INTF1 (DSI1)
+	Port 1 -> DPU_INTF2 (DSI2)
+
+Optional properties:
+- assigned-clocks: list of clock specifiers for clocks needing rate assignment
+- assigned-clock-rates: list of clock frequencies sorted in the same order as
+  the assigned-clocks property.
+
+Example:
+
+	mdss: mdss@ae00000 {
+		compatible = "qcom,sdm845-mdss";
+		reg = <0xae00000 0x1000>;
+		reg-names = "mdss";
+
+		power-domains = <&clock_dispcc 0>;
+
+		clocks = <&gcc GCC_DISP_AHB_CLK>, <&gcc GCC_DISP_AXI_CLK>,
+			 <&clock_dispcc DISP_CC_MDSS_MDP_CLK>;
+		clock-names = "iface", "bus", "core";
+
+		assigned-clocks = <&clock_dispcc DISP_CC_MDSS_MDP_CLK>;
+		assigned-clock-rates = <300000000>;
+
+		interrupts = <GIC_SPI 83 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-controller;
+		#interrupt-cells = <1>;
+
+		iommus = <&apps_iommu 0>;
+
+		#address-cells = <2>;
+		#size-cells = <1>;
+		ranges = <0 0 0xae00000 0xb2008>;
+
+		mdss_mdp: mdp@ae01000 {
+			compatible = "qcom,sdm845-dpu";
+			reg = <0 0x1000 0x8f000>, <0 0xb0000 0x2008>;
+			reg-names = "mdp", "vbif";
+
+			clocks = <&clock_dispcc DISP_CC_MDSS_AHB_CLK>,
+				 <&clock_dispcc DISP_CC_MDSS_AXI_CLK>,
+				 <&clock_dispcc DISP_CC_MDSS_MDP_CLK>,
+				 <&clock_dispcc DISP_CC_MDSS_VSYNC_CLK>;
+			clock-names = "iface", "bus", "core", "vsync";
+
+			assigned-clocks = <&clock_dispcc DISP_CC_MDSS_MDP_CLK>,
+					  <&clock_dispcc DISP_CC_MDSS_VSYNC_CLK>;
+			assigned-clock-rates = <0 0 300000000 19200000>;
+
+			interrupts = <0 IRQ_TYPE_LEVEL_HIGH>;
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					dpu_intf1_out: endpoint {
+						remote-endpoint = <&dsi0_in>;
+					};
+				};
+
+				port@1 {
+					reg = <1>;
+					dpu_intf2_out: endpoint {
+						remote-endpoint = <&dsi1_in>;
+					};
+				};
+			};
+		};
+	};
--- a/Documentation/devicetree/bindings/display/msm/dsi.txt
+++ b/Documentation/devicetree/bindings/display/msm/dsi.txt
@ -119,6 +119,20 @@ Required properties:
 Optional properties:
 - qcom,dsi-phy-regulator-ldo-mode: Boolean value indicating if the LDO mode PHY
  regulator is wanted.
+- qcom,mdss-mdp-transfer-time-us:	Specifies the dsi transfer time for command mode
+					panels in microseconds. Driver uses this number to adjust
+					the clock rate according to the expected transfer time.
+					Increasing this value would slow down the mdp processing
+					and can result in slower performance.
+					Decreasing this value can speed up the mdp processing,
+					but this can also impact power consumption.
+					As a rule this time should not be higher than the time
+					that would be expected with the processing at the
+					dsi link rate since anyways this would be the maximum
+					transfer time that could be achieved.
+					If ping pong split is enabled, this time should not be higher
+					than two times the dsi link rate time.
+					If the property is not specified, then the default value is 14000 us.

 [1] Documentation/devicetree/bindings/clock/clock-bindings.txt
 [2] Documentation/devicetree/bindings/graph.txt
@ -169,6 +183,8 @@ Example:
 		qcom,master-dsi;
 		qcom,sync-dual-dsi;

+		qcom,mdss-mdp-transfer-time-us = <12000>;
+
 		pinctrl-names = "default", "sleep";
 		pinctrl-0 = <&dsi_active>;
 		pinctrl-1 = <&dsi_suspend>;
--- a/Documentation/devicetree/bindings/display/panel/auo,g070vvn01.txt
+++ b/Documentation/devicetree/bindings/display/panel/auo,g070vvn01.txt
@ -0,0 +1,29 @@
+AU Optronics Corporation 7.0" FHD (800 x 480) TFT LCD panel
+
+Required properties:
+- compatible: should be "auo,g070vvn01"
+- backlight: phandle of the backlight device attached to the panel
+- power-supply: single regulator to provide the supply voltage
+
+Required nodes:
+- port: Parallel port mapping to connect this display
+
+This panel needs single power supply voltage. Its backlight is conntrolled
+via PWM signal.
+
+Example:
+--------
+
+Example device-tree definition when connected to iMX6Q based board
+
+	lcd_panel: lcd-panel {
+		compatible = "auo,g070vvn01";
+		backlight = <&backlight_lcd>;
+		power-supply = <&reg_display>;
+
+		port {
+			lcd_panel_in: endpoint {
+				remote-endpoint = <&lcd_display_out>;
+			};
+		};
+	};
--- a/Documentation/devicetree/bindings/display/panel/boe,hv070wsa-100.txt
+++ b/Documentation/devicetree/bindings/display/panel/boe,hv070wsa-100.txt
@ -0,0 +1,28 @@
+BOE HV070WSA-100 7.01" WSVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "boe,hv070wsa-100"
+- power-supply: regulator to provide the VCC supply voltage (3.3 volts)
+- enable-gpios: GPIO pin to enable and disable panel (active high)
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
+
+The device node can contain one 'port' child node with one child
+'endpoint' node, according to the bindings defined in [1]. This
+node should describe panel's video bus.
+
+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+
+	panel: panel {
+		compatible = "boe,hv070wsa-100";
+		power-supply = <&vcc_3v3_reg>;
+		enable-gpios = <&gpd1 3 GPIO_ACTIVE_HIGH>;
+		port {
+			panel_ep: endpoint {
+				remote-endpoint = <&bridge_out_ep>;
+			};
+		};
+	};
--- a/Documentation/devicetree/bindings/display/panel/dataimage,scf0700c48ggu18.txt
+++ b/Documentation/devicetree/bindings/display/panel/dataimage,scf0700c48ggu18.txt
@ -0,0 +1,8 @@
+DataImage, Inc. 7" WVGA (800x480) TFT LCD panel with 24-bit parallel interface.
+
+Required properties:
+- compatible: should be "dataimage,scf0700c48ggu18"
+- power-supply: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt
+++ b/Documentation/devicetree/bindings/display/panel/dlc,dlc0700yzg-1.txt
@ -0,0 +1,13 @@
+DLC Display Co. DLC0700YZG-1 7.0" WSVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "dlc,dlc0700yzg-1"
+- power-supply: See simple-panel.txt
+
+Optional properties:
+- reset-gpios: See panel-common.txt
+- enable-gpios: See simple-panel.txt
+- backlight: See simple-panel.txt
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/edt,et-series.txt
+++ b/Documentation/devicetree/bindings/display/panel/edt,et-series.txt
@ -0,0 +1,39 @@
+Emerging Display Technology Corp. Displays
+==========================================
+
+
+Display bindings for EDT Display Technology Corp. Displays which are
+compatible with the simple-panel binding, which is specified in
+simple-panel.txt
+
+
+5,7" WVGA TFT Panels
+--------------------
+
+-----------------+---------------------+-------------------------------------+
+| Identifier      | compatbile          | description                         |
+=================+=====================+=====================================+
+| ET057090DHU     | edt,et057090dhu     | 5.7" VGA TFT LCD panel              |
+-----------------+---------------------+-------------------------------------+
+
+
+7,0" WVGA TFT Panels
+--------------------
+
+-----------------+---------------------+-------------------------------------+
+| Identifier      | compatbile          | description                         |
+=================+=====================+=====================================+
+| ETM0700G0DH6    | edt,etm070080dh6    | WVGA TFT Display with capacitive    |
+|                 |                     | Touchscreen                         |
+-----------------+---------------------+-------------------------------------+
+| ETM0700G0BDH6   | edt,etm070080bdh6   | Same as ETM0700G0DH6 but with       |
+|                 |                     | inverted pixel clock.               |
+-----------------+---------------------+-------------------------------------+
+| ETM0700G0EDH6   | edt,etm070080edh6   | Same display as the ETM0700G0BDH6,  |
+|                 |                     | but with changed Hardware for the   |
+|                 |                     | backlight and the touch interface   |
+-----------------+---------------------+-------------------------------------+
+| ET070080DH6     | edt,etm070080dh6    | Same timings as the ETM0700G0DH6,   |
+|                 |                     | but with resistive touch.           |
+-----------------+---------------------+-------------------------------------+
+
--- a/Documentation/devicetree/bindings/display/panel/edt,et070080dh6.txt
+++ b/Documentation/devicetree/bindings/display/panel/edt,et070080dh6.txt
@ -1,10 +0,0 @@
-Emerging Display Technology Corp. ET070080DH6 7.0" WVGA TFT LCD panel
-
-Required properties:
- compatible: should be "edt,et070080dh6"
-
-This panel is the same as ETM0700G0DH6 except for the touchscreen.
-ET070080DH6 is the model with resistive touch.
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/edt,etm0700g0dh6.txt
+++ b/Documentation/devicetree/bindings/display/panel/edt,etm0700g0dh6.txt
@ -1,10 +0,0 @@
-Emerging Display Technology Corp. ETM0700G0DH6 7.0" WVGA TFT LCD panel
-
-Required properties:
- compatible: should be "edt,etm0700g0dh6"
-
-This panel is the same as ET070080DH6 except for the touchscreen.
-ETM0700G0DH6 is the model with capacitive multitouch.
-
-This binding is compatible with the simple-panel binding, which is specified
-in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.txt
+++ b/Documentation/devicetree/bindings/display/panel/ilitek,ili9881c.txt
@ -0,0 +1,20 @@
+Ilitek ILI9881c based MIPI-DSI panels
+
+Required properties:
+  - compatible: must be "ilitek,ili9881c" and one of:
+    * "bananapi,lhr050h41"
+  - reg: DSI virtual channel used by that screen
+  - power-supply: phandle to the power regulator
+  - reset-gpios: a GPIO phandle for the reset pin
+
+Optional properties:
+  - backlight: phandle to the backlight used
+
+Example:
+panel@0 {
+	compatible = "bananapi,lhr050h41", "ilitek,ili9881c";
+	reg = <0>;
+	power-supply = <&reg_display>;
+	reset-gpios = <&r_pio 0 5 GPIO_ACTIVE_LOW>; /* PL05 */
+	backlight = <&pwm_bl>;
+};
--- a/Documentation/devicetree/bindings/display/panel/innolux,g070y2-l01.txt
+++ b/Documentation/devicetree/bindings/display/panel/innolux,g070y2-l01.txt
@ -0,0 +1,12 @@
+Innolux G070Y2-L01 7" WVGA (800x480) TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,g070y2-l01"
+- power-supply: as specified in the base binding
+
+Optional properties:
+- backlight: as specified in the base binding
+- enable-gpios: as specified in the base binding
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt
+++ b/Documentation/devicetree/bindings/display/panel/innolux,p097pfg.txt
@ -0,0 +1,24 @@
+Innolux P097PFG 9.7" 1536x2048 TFT LCD panel
+
+Required properties:
+- compatible: should be "innolux,p097pfg"
+- reg: DSI virtual channel of the peripheral
+- avdd-supply: phandle of the regulator that provides positive voltage
+- avee-supply: phandle of the regulator that provides negative voltage
+- enable-gpios: panel enable gpio
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+
+Example:
+
+	&mipi_dsi {
+		panel {
+			compatible = "innolux,p079zca";
+			reg = <0>;
+			avdd-supply = <...>;
+			avee-supply = <...>;
+			backlight = <&backlight>;
+			enable-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
+		};
+	};
--- a/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt
+++ b/Documentation/devicetree/bindings/display/panel/innolux,tv123wam.txt
@ -0,0 +1,20 @@
+Innolux TV123WAM 12.3 inch eDP 2K display panel
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
+
+Required properties:
+- compatible: should be "innolux,tv123wam"
+- power-supply: regulator to provide the supply voltage
+
+Optional properties:
+- enable-gpios: GPIO pin to enable or disable the panel
+- backlight: phandle of the backlight device attached to the panel
+
+Example:
+	panel_edp: panel-edp {
+		compatible = "innolux,tv123wam";
+		enable-gpios = <&msmgpio 31 GPIO_ACTIVE_LOW>;
+		power-supply = <&pm8916_l2>;
+		backlight = <&backlight>;
+	};
--- a/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt
+++ b/Documentation/devicetree/bindings/display/panel/kingdisplay,kd097d04.txt
@ -0,0 +1,22 @@
+Kingdisplay KD097D04 9.7" 1536x2048 TFT LCD panel
+
+Required properties:
+- compatible: should be "kingdisplay,kd097d04"
+- reg: DSI virtual channel of the peripheral
+- power-supply: phandle of the regulator that provides the supply voltage
+- enable-gpios: panel enable gpio
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+
+Example:
+
+	&mipi_dsi {
+		panel {
+			compatible = "kingdisplay,kd097d04";
+			reg = <0>;
+			power-supply = <...>;
+			backlight = <&backlight>;
+			enable-gpios = <&gpio1 13 GPIO_ACTIVE_HIGH>;
+		};
+	};
--- a/Documentation/devicetree/bindings/display/panel/newhaven,nhd-4.3-480272ef-atxl.txt
+++ b/Documentation/devicetree/bindings/display/panel/newhaven,nhd-4.3-480272ef-atxl.txt
@ -1,7 +1,7 @@
-Emerging Display Technology Corp. 5.7" VGA TFT LCD panel
+Newhaven Display International 480 x 272 TFT LCD panel

 Required properties:
- compatible: should be "edt,et057090dhu"
+- compatible: should be "newhaven,nhd-4.3-480272ef-atxl"

 This binding is compatible with the simple-panel binding, which is specified
 in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/rocktech,rk070er9427.txt
+++ b/Documentation/devicetree/bindings/display/panel/rocktech,rk070er9427.txt
@ -0,0 +1,25 @@
+Rocktech Display Ltd. RK070ER9427 800(RGB)x480 TFT LCD panel
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
+
+Required properties:
+- compatible: should be "rocktech,rk070er9427"
+
+Optional properties:
+- backlight: phandle of the backlight device attached to the panel
+
+Optional nodes:
+- Video port for LCD panel input.
+
+Example:
+	panel {
+		compatible = "rocktech,rk070er9427";
+		backlight = <&backlight_lcd>;
+
+		port {
+			lcd_panel_in: endpoint {
+				remote-endpoint = <&lcd_display_out>;
+			};
+		};
+	};
--- a/Documentation/devicetree/bindings/display/panel/sharp,lq035q7db03.txt
+++ b/Documentation/devicetree/bindings/display/panel/sharp,lq035q7db03.txt
@ -0,0 +1,12 @@
+Sharp LQ035Q7DB03 3.5" QVGA TFT LCD panel
+
+Required properties:
+- compatible: should be "sharp,lq035q7db03"
+- power-supply: phandle of the regulator that provides the supply voltage
+
+Optional properties:
+- enable-gpios: GPIO pin to enable or disable the panel
+- backlight: phandle of the backlight device attached to the panel
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
+++ b/Documentation/devicetree/bindings/display/sunxi/sun4i-drm.txt
@ -103,6 +103,7 @@ Required properties:
  - compatible: value must be one of:
    * allwinner,sun8i-a83t-hdmi-phy
    * allwinner,sun8i-h3-hdmi-phy
+    * allwinner,sun50i-a64-hdmi-phy
  - reg: base address and size of memory-mapped region
  - clocks: phandles to the clocks feeding the HDMI PHY
    * bus: the HDMI PHY interface clock
@ -111,8 +112,9 @@ Required properties:
  - resets: phandle to the reset controller driving the PHY
  - reset-names: must be "phy"

-H3 HDMI PHY requires additional clock:
+H3 and A64 HDMI PHY require additional clocks:
  - pll-0: parent of phy clock
+  - pll-1: second possible phy clock parent (A64 only)

 TV Encoder
 ----------
@ -145,6 +147,7 @@ Required properties:
   * allwinner,sun8i-a33-tcon
   * allwinner,sun8i-a83t-tcon-lcd
   * allwinner,sun8i-a83t-tcon-tv
+   * allwinner,sun8i-r40-tcon-tv
   * allwinner,sun8i-v3s-tcon
   * allwinner,sun9i-a80-tcon-lcd
   * allwinner,sun9i-a80-tcon-tv
@ -179,7 +182,7 @@ For TCONs with channel 0, there is one more clock required:
 For TCONs with channel 1, there is one more clock required:
   - 'tcon-ch1': The clock driving the TCON channel 1

-When TCON support LVDS (all TCONs except TV TCON on A83T and those found
+When TCON support LVDS (all TCONs except TV TCONs on A83T, R40 and those found
 in A13, H3, H5 and V3s SoCs), you need one more reset line:
   - 'lvds': The reset line driving the LVDS logic

@ -187,6 +190,62 @@ And on the A23, A31, A31s and A33, you need one more clock line:
   - 'lvds-alt': An alternative clock source, separate from the TCON channel 0
                 clock, that can be used to drive the LVDS clock

+TCON TOP
+--------
+
+TCON TOPs main purpose is to configure whole display pipeline. It determines
+relationships between mixers and TCONs, selects source TCON for HDMI, muxes
+LCD and TV encoder GPIO output, selects TV encoder clock source and contains
+additional TV TCON and DSI gates.
+
+It allows display pipeline to be configured in very different ways:
+
+                                / LCD0/LVDS0
+                 / [0] TCON-LCD0
+                 |              \ MIPI DSI
+ mixer0          |
+        \        / [1] TCON-LCD1 - LCD1/LVDS1
+         TCON-TOP
+        /        \ [2] TCON-TV0 [0] - TVE0/RGB
+ mixer1          |                  \
+                 |                   TCON-TOP - HDMI
+                 |                  /
+                 \ [3] TCON-TV1 [1] - TVE1/RGB
+
+Note that both TCON TOP references same physical unit. Both mixers can be
+connected to any TCON.
+
+Required properties:
+  - compatible: value must be one of:
+    * allwinner,sun8i-r40-tcon-top
+  - reg: base address and size of the memory-mapped region.
+  - clocks: phandle to the clocks feeding the TCON TOP
+    * bus: TCON TOP interface clock
+    * tcon-tv0: TCON TV0 clock
+    * tve0: TVE0 clock
+    * tcon-tv1: TCON TV1 clock
+    * tve1: TVE0 clock
+    * dsi: MIPI DSI clock
+  - clock-names: clock name mentioned above
+  - resets: phandle to the reset line driving the TCON TOP
+  - #clock-cells : must contain 1
+  - clock-output-names: Names of clocks created for TCON TV0 channel clock,
+    TCON TV1 channel clock and DSI channel clock, in that order.
+
+- ports: A ports node with endpoint definitions as defined in
+    Documentation/devicetree/bindings/media/video-interfaces.txt. 6 ports should
+    be defined:
+    * port 0 is input for mixer0 mux
+    * port 1 is output for mixer0 mux
+    * port 2 is input for mixer1 mux
+    * port 3 is output for mixer1 mux
+    * port 4 is input for HDMI mux
+    * port 5 is output for HDMI mux
+    All output endpoints for mixer muxes and input endpoints for HDMI mux should
+    have reg property with the id of the target TCON, as shown in above graph
+    (0-3 for mixer muxes and 0-1 for HDMI mux). All ports should have only one
+    endpoint connected to remote endpoint.
+
 DRC
 ---

@ -341,6 +400,7 @@ Required properties:
    * allwinner,sun8i-a33-display-engine
    * allwinner,sun8i-a83t-display-engine
    * allwinner,sun8i-h3-display-engine
+    * allwinner,sun8i-r40-display-engine
    * allwinner,sun8i-v3s-display-engine
    * allwinner,sun9i-a80-display-engine

--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@ -8,6 +8,7 @@ abracon	Abracon Corporation
 actions	Actions Semiconductor Co., Ltd.
 active-semi	Active-Semi International Inc
 ad	Avionic Design GmbH
+adafruit	Adafruit Industries, LLC
 adapteva	Adapteva, Inc.
 adaptrum	Adaptrum, Inc.
 adh	AD Holdings Plc.
@ -87,6 +88,7 @@ cubietech	Cubietech, Ltd.
 cypress	Cypress Semiconductor Corporation
 cznic	CZ.NIC, z.s.p.o.
 dallas	Maxim Integrated Products (formerly Dallas Semiconductor)
+dataimage	DataImage, Inc.
 davicom	DAVICOM Semiconductor, Inc.
 delta	Delta Electronics, Inc.
 denx	Denx Software Engineering
@ -95,6 +97,7 @@ dh	DH electronics GmbH
 digi	Digi International Inc.
 digilent	Diglent, Inc.
 dioo	Dioo Microcircuit Co., Ltd
+dlc	DLC Display Co., Ltd.
 dlg	Dialog Semiconductor
 dlink	D-Link Corporation
 dmo	Data Modul AG
@ -190,6 +193,7 @@ keymile	Keymile GmbH
 khadas	Khadas
 kiebackpeter    Kieback & Peter GmbH
 kinetic Kinetic Technologies
+kingdisplay	King & Display Technology Co., Ltd.
 kingnovel	Kingnovel Technology Co., Ltd.
 koe	Kaohsiung Opto-Electronics Inc.
 kosagi	Sutajio Ko-Usagi PTE Ltd.
--- a/Documentation/driver-api/dma-buf.rst
+++ b/Documentation/driver-api/dma-buf.rst
@ -130,6 +130,12 @@ Reservation Objects
 DMA Fences
 ----------

+.. kernel-doc:: drivers/dma-buf/dma-fence.c
+   :doc: DMA fences overview
+
+DMA Fences Functions Reference
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
 .. kernel-doc:: drivers/dma-buf/dma-fence.c
   :export:

--- a/Documentation/fb/fbcon.txt
+++ b/Documentation/fb/fbcon.txt
@ -155,6 +155,13 @@ C. Boot options
 	used by text. By default, this area will be black. The 'color' value
 	is an integer number that depends on the framebuffer driver being used.

+6. fbcon=nodefer
+
+	If the kernel is compiled with deferred fbcon takeover support, normally
+	the framebuffer contents, left in place by the firmware/bootloader, will
+	be preserved until there actually is some text is output to the console.
+	This option causes fbcon to bind immediately to the fbdev device.
+
 C. Attaching, Detaching and Unloading

 Before going on how to attach, detach and unload the framebuffer console, an
--- a/Documentation/gpu/amdgpu.rst
+++ b/Documentation/gpu/amdgpu.rst
@ -0,0 +1,129 @@
+=========================
+ drm/amdgpu AMDgpu driver
+=========================
+
+The drm/amdgpu driver supports all AMD Radeon GPUs based on the Graphics Core
+Next (GCN) architecture.
+
+Module Parameters
+=================
+
+The amdgpu driver supports the following module parameters:
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+
+Core Driver Infrastructure
+==========================
+
+This section covers core driver infrastructure.
+
+.. _amdgpu_memory_domains:
+
+Memory Domains
+--------------
+
+.. kernel-doc:: include/uapi/drm/amdgpu_drm.h
+   :doc: memory domains
+
+Buffer Objects
+--------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+   :doc: amdgpu_object
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+   :internal:
+
+PRIME Buffer Sharing
+--------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+   :doc: PRIME Buffer Sharing
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+   :internal:
+
+MMU Notifier
+------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+   :doc: MMU Notifier
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+   :internal:
+
+AMDGPU Virtual Memory
+---------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+   :doc: GPUVM
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+   :internal:
+
+Interrupt Handling
+------------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+   :doc: Interrupt Handling
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+   :internal:
+
+GPU Power/Thermal Controls and Monitoring
+=========================================
+
+This section covers hwmon and power/thermal controls.
+
+HWMON Interfaces
+----------------
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: hwmon
+
+GPU sysfs Power State Interfaces
+--------------------------------
+
+GPU power controls are exposed via sysfs files.
+
+power_dpm_state
+~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: power_dpm_state
+
+power_dpm_force_performance_level
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: power_dpm_force_performance_level
+
+pp_table
+~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pp_table
+
+pp_od_clk_voltage
+~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pp_od_clk_voltage
+
+pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pp_dpm_sclk pp_dpm_mclk pp_dpm_pcie
+
+pp_power_profile_mode
+~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: pp_power_profile_mode
+
+busy_percent
+~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+   :doc: busy_percent
--- a/Documentation/gpu/drivers.rst
+++ b/Documentation/gpu/drivers.rst
@ -4,6 +4,7 @@ GPU Driver Documentation

 .. toctree::

+   amdgpu
   i915
   meson
   pl111
--- a/Documentation/gpu/drm-client.rst
+++ b/Documentation/gpu/drm-client.rst
@ -0,0 +1,12 @@
+=================
+Kernel clients
+=================
+
+.. kernel-doc:: drivers/gpu/drm/drm_client.c
+   :doc: overview
+
+.. kernel-doc:: include/drm/drm_client.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_client.c
+   :export:
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@ -109,6 +109,15 @@ Framebuffer CMA Helper Functions Reference

 .. _drm_bridges:

+Framebuffer GEM Helper Reference
+================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
+   :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
+   :export:
+
 Bridges
 =======

@ -169,6 +178,15 @@ Display Port Helper Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_dp_helper.c
   :export:

+Display Port CEC Helper Functions Reference
+===========================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_cec.c
+   :doc: dp cec helpers
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_cec.c
+   :export:
+
 Display Port Dual Mode Adaptor Helper Functions Reference
 =========================================================

@ -282,13 +300,13 @@ Auxiliary Modeset Helpers
 .. kernel-doc:: drivers/gpu/drm/drm_modeset_helper.c
   :export:

-Framebuffer GEM Helper Reference
-================================
+OF/DT Helpers
+=============

-.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
+.. kernel-doc:: drivers/gpu/drm/drm_of.c
   :doc: overview

-.. kernel-doc:: drivers/gpu/drm/drm_gem_framebuffer_helper.c
+.. kernel-doc:: drivers/gpu/drm/drm_of.c
   :export:

 Legacy Plane Helper Reference
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@ -56,11 +56,12 @@ Overview

 The basic object structure KMS presents to userspace is fairly simple.
 Framebuffers (represented by :c:type:`struct drm_framebuffer <drm_framebuffer>`,
-see `Frame Buffer Abstraction`_) feed into planes. One or more (or even no)
-planes feed their pixel data into a CRTC (represented by :c:type:`struct
-drm_crtc <drm_crtc>`, see `CRTC Abstraction`_) for blending. The precise
-blending step is explained in more detail in `Plane Composition Properties`_ and
-related chapters.
+see `Frame Buffer Abstraction`_) feed into planes. Planes are represented by
+:c:type:`struct drm_plane <drm_plane>`, see `Plane Abstraction`_ for more
+details. One or more (or even no) planes feed their pixel data into a CRTC
+(represented by :c:type:`struct drm_crtc <drm_crtc>`, see `CRTC Abstraction`_)
+for blending. The precise blending step is explained in more detail in `Plane
+Composition Properties`_ and related chapters.

 For the output routing the first step is encoders (represented by
 :c:type:`struct drm_encoder <drm_encoder>`, see `Encoder Abstraction`_). Those
@ -373,6 +374,15 @@ Connector Functions Reference
 .. kernel-doc:: drivers/gpu/drm/drm_connector.c
   :export:

+Writeback Connectors
+--------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_writeback.c
+  :doc: overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_writeback.c
+  :export:
+
 Encoder Abstraction
 ===================

@ -457,7 +467,7 @@ Output discovery and initialization example
        drm_encoder_init(dev, &intel_output->enc, &intel_crt_enc_funcs,
                 DRM_MODE_ENCODER_DAC);

-        drm_mode_connector_attach_encoder(&intel_output->base,
+        drm_connector_attach_encoder(&intel_output->base,
                          &intel_output->enc);

        /* Set up the DDC bus. */
@ -517,6 +527,12 @@ Standard Connector Properties
 .. kernel-doc:: drivers/gpu/drm/drm_connector.c
   :doc: standard connector properties

+HDMI Specific Connector Properties
+----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_connector.c
+   :doc: HDMI connector properties
+
 Plane Composition Properties
 ----------------------------

--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@ -395,6 +395,8 @@ VMA Offset Manager
 .. kernel-doc:: drivers/gpu/drm/drm_vma_manager.c
   :export:

+.. _prime_buffer_sharing:
+
 PRIME Buffer Sharing
 ====================

@ -496,3 +498,21 @@ DRM Sync Objects

 .. kernel-doc:: drivers/gpu/drm/drm_syncobj.c
   :export:
+
+GPU Scheduler
+=============
+
+Overview
+--------
+
+.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c
+   :doc: Overview
+
+Scheduler Function References
+-----------------------------
+
+.. kernel-doc:: include/drm/gpu_scheduler.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/scheduler/gpu_scheduler.c
+   :export:
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@ -10,6 +10,7 @@ Linux GPU Driver Developer's Guide
   drm-kms
   drm-kms-helpers
   drm-uapi
+   drm-client
   drivers
   vga-switcheroo
   vgaarbiter
--- a/Documentation/gpu/kms-properties.csv
+++ b/Documentation/gpu/kms-properties.csv
@ -17,6 +17,7 @@ Owner Module/Drivers,Group,Property Name,Type,Property Values,Object attached,De
 ,Virtual GPU,“suggested X”,RANGE,"Min=0, Max=0xffffffff",Connector,property to suggest an X offset for a connector
 ,,“suggested Y”,RANGE,"Min=0, Max=0xffffffff",Connector,property to suggest an Y offset for a connector
 ,Optional,"""aspect ratio""",ENUM,"{ ""None"", ""4:3"", ""16:9"" }",Connector,TDB
+,Optional,"""content type""",ENUM,"{ ""No Data"", ""Graphics"", ""Photo"", ""Cinema"", ""Game"" }",Connector,TBD
 i915,Generic,"""Broadcast RGB""",ENUM,"{ ""Automatic"", ""Full"", ""Limited 16:235"" }",Connector,"When this property is set to Limited 16:235 and CTM is set, the hardware will be programmed with the result of the multiplication of CTM by the limited range matrix to ensure the pixels normaly in the range 0..1.0 are remapped to the range 16/255..235/255."
 ,,“audio”,ENUM,"{ ""force-dvi"", ""off"", ""auto"", ""on"" }",Connector,TBD
 ,SDVO-TV,“mode”,ENUM,"{ ""NTSC_M"", ""NTSC_J"", ""NTSC_443"", ""PAL_B"" } etc.",Connector,TBD
--- a/Documentation/gpu/msm-crash-dump.rst
+++ b/Documentation/gpu/msm-crash-dump.rst
@ -0,0 +1,96 @@
+=====================
+MSM Crash Dump Format
+=====================
+
+Following a GPU hang the MSM driver outputs debugging information via
+/sys/kernel/dri/X/show or via devcoredump (/sys/class/devcoredump/dcdX/data).
+This document describes how the output is formatted.
+
+Each entry is in the form key: value. Sections headers will not have a value
+and all the contents of a section will be indented two spaces from the header.
+Each section might have multiple array entries the start of which is designated
+by a (-).
+
+Mappings
+--------
+
+kernel
+	The kernel version that generated the dump (UTS_RELEASE).
+
+module
+	The module that generated the crashdump.
+
+time
+	The kernel time at crash formated as seconds.microseconds.
+
+comm
+	Comm string for the binary that generated the fault.
+
+cmdline
+	Command line for the binary that generated the fault.
+
+revision
+	ID of the GPU that generated the crash formatted as
+	core.major.minor.patchlevel separated by dots.
+
+rbbm-status
+	The current value of RBBM_STATUS which shows what top level GPU
+	components are in use at the time of crash.
+
+ringbuffer
+	Section containing the contents of each ringbuffer. Each ringbuffer is
+	identified with an id number.
+
+	id
+		Ringbuffer ID (0 based index).  Each ringbuffer in the section
+		will have its own unique id.
+	iova
+		GPU address of the ringbuffer.
+
+	last-fence
+		The last fence that was issued on the ringbuffer
+
+	retired-fence
+		The last fence retired on the ringbuffer.
+
+	rptr
+		The current read pointer (rptr) for the ringbuffer.
+
+	wptr
+		The current write pointer (wptr) for the ringbuffer.
+
+	size
+		Maximum size of the ringbuffer programmed in the hardware.
+
+	data
+		The contents of the ring encoded as ascii85.  Only the used
+		portions of the ring will be printed.
+
+bo
+	List of buffers from the hanging submission if available.
+	Each buffer object will have a uinque iova.
+
+	iova
+		GPU address of the buffer object.
+
+	size
+		Allocated size of the buffer object.
+
+	data
+		The contents of the buffer object encoded with ascii85.  Only
+		Trailing zeros at the end of the buffer will be skipped.
+
+registers
+	Set of registers values. Each entry is on its own line enclosed
+	by brackets { }.
+
+	offset
+		Byte offset of the register from the start of the
+		GPU memory region.
+
+	value
+		Hexadecimal value of the register.
+
+registers-hlsq
+		(5xx only) Register values from the HLSQ aperture.
+		Same format as the register section.
--- a/Documentation/gpu/v3d.rst
+++ b/Documentation/gpu/v3d.rst
@ -0,0 +1,28 @@
+=====================================
+ drm/v3d Broadcom V3D Graphics Driver
+=====================================
+
+.. kernel-doc:: drivers/gpu/drm/v3d/v3d_drv.c
+   :doc: Broadcom V3D Graphics Driver
+
+GPU buffer object (BO) management
+---------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/v3d/v3d_bo.c
+   :doc: V3D GEM BO management support
+
+Address space management
+===========================================
+.. kernel-doc:: drivers/gpu/drm/v3d/v3d_mmu.c
+   :doc: Broadcom V3D MMU
+
+GPU Scheduling
+===========================================
+.. kernel-doc:: drivers/gpu/drm/v3d/v3d_sched.c
+   :doc: Broadcom V3D scheduling
+
+Interrupts
+--------------
+
+.. kernel-doc:: drivers/gpu/drm/v3d/v3d_irq.c
+   :doc: Interrupt management for the V3D engine
--- a/Documentation/locking/ww-mutex-design.txt
+++ b/Documentation/locking/ww-mutex-design.txt
@ -1,4 +1,4 @@
-Wait/Wound Deadlock-Proof Mutex Design
+Wound/Wait Deadlock-Proof Mutex Design
 ======================================

 Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
@ -32,10 +32,26 @@ the oldest task) wins, and the one with the higher reservation id (i.e. the
 younger task) unlocks all of the buffers that it has already locked, and then
 tries again.

-In the RDBMS literature this deadlock handling approach is called wait/wound:
-The older tasks waits until it can acquire the contended lock. The younger tasks
-needs to back off and drop all the locks it is currently holding, i.e. the
-younger task is wounded.
+In the RDBMS literature, a reservation ticket is associated with a transaction.
+and the deadlock handling approach is called Wait-Die. The name is based on
+the actions of a locking thread when it encounters an already locked mutex.
+If the transaction holding the lock is younger, the locking transaction waits.
+If the transaction holding the lock is older, the locking transaction backs off
+and dies. Hence Wait-Die.
+There is also another algorithm called Wound-Wait:
+If the transaction holding the lock is younger, the locking transaction
+wounds the transaction holding the lock, requesting it to die.
+If the transaction holding the lock is older, it waits for the other
+transaction. Hence Wound-Wait.
+The two algorithms are both fair in that a transaction will eventually succeed.
+However, the Wound-Wait algorithm is typically stated to generate fewer backoffs
+compared to Wait-Die, but is, on the other hand, associated with more work than
+Wait-Die when recovering from a backoff. Wound-Wait is also a preemptive
+algorithm in that transactions are wounded by other transactions, and that
+requires a reliable way to pick up up the wounded condition and preempt the
+running transaction. Note that this is not the same as process preemption. A
+Wound-Wait transaction is considered preempted when it dies (returning
+-EDEADLK) following a wound.

 Concepts
 --------
@ -47,18 +63,20 @@ Acquire context: To ensure eventual forward progress it is important the a task
 trying to acquire locks doesn't grab a new reservation id, but keeps the one it
 acquired when starting the lock acquisition. This ticket is stored in the
 acquire context. Furthermore the acquire context keeps track of debugging state
-to catch w/w mutex interface abuse.
+to catch w/w mutex interface abuse. An acquire context is representing a
+transaction.

 W/w class: In contrast to normal mutexes the lock class needs to be explicit for
-w/w mutexes, since it is required to initialize the acquire context.
+w/w mutexes, since it is required to initialize the acquire context. The lock
+class also specifies what algorithm to use, Wound-Wait or Wait-Die.

 Furthermore there are three different class of w/w lock acquire functions:

 * Normal lock acquisition with a context, using ww_mutex_lock.

-* Slowpath lock acquisition on the contending lock, used by the wounded task
-  after having dropped all already acquired locks. These functions have the
-  _slow postfix.
+* Slowpath lock acquisition on the contending lock, used by the task that just
+  killed its transaction after having dropped all already acquired locks.
+  These functions have the _slow postfix.

  From a simple semantics point-of-view the _slow functions are not strictly
  required, since simply calling the normal ww_mutex_lock functions on the
@ -90,6 +108,12 @@ provided.
 Usage
 -----

+The algorithm (Wait-Die vs Wound-Wait) is chosen by using either
+DEFINE_WW_CLASS() (Wound-Wait) or DEFINE_WD_CLASS() (Wait-Die)
+As a rough rule of thumb, use Wound-Wait iff you
+expect the number of simultaneous competing transactions to be typically small,
+and you want to reduce the number of rollbacks.
+
 Three different ways to acquire locks within the same w/w class. Common
 definitions for methods #1 and #2:

@ -220,7 +244,7 @@ mutexes are a natural fit for such a case for two reasons:

 Note that this approach differs in two important ways from the above methods:
 - Since the list of objects is dynamically constructed (and might very well be
-  different when retrying due to hitting the -EDEADLK wound condition) there's
+  different when retrying due to hitting the -EDEADLK die condition) there's
  no need to keep any object on a persistent list when it's not locked. We can
  therefore move the list_head into the object itself.
 - On the other hand the dynamic object list construction also means that the -EALREADY return
@ -312,12 +336,23 @@ Design:
  We maintain the following invariants for the wait list:
  (1) Waiters with an acquire context are sorted by stamp order; waiters
      without an acquire context are interspersed in FIFO order.
-  (2) Among waiters with contexts, only the first one can have other locks
-      acquired already (ctx->acquired > 0). Note that this waiter may come
-      after other waiters without contexts in the list.
+  (2) For Wait-Die, among waiters with contexts, only the first one can have
+      other locks acquired already (ctx->acquired > 0). Note that this waiter
+      may come after other waiters without contexts in the list.
+
+  The Wound-Wait preemption is implemented with a lazy-preemption scheme:
+  The wounded status of the transaction is checked only when there is
+  contention for a new lock and hence a true chance of deadlock. In that
+  situation, if the transaction is wounded, it backs off, clears the
+  wounded status and retries. A great benefit of implementing preemption in
+  this way is that the wounded transaction can identify a contending lock to
+  wait for before restarting the transaction. Just blindly restarting the
+  transaction would likely make the transaction end up in a situation where
+  it would have to back off again.

  In general, not much contention is expected. The locks are typically used to
-  serialize access to resources for devices.
+  serialize access to resources for devices, and optimization focus should
+  therefore be directed towards the uncontended cases.

 Lockdep:
  Special care has been taken to warn for as many cases of api abuse
--- a/19
+++ b/19
@ -734,6 +734,14 @@ S:	Supported
 F:	drivers/crypto/ccp/
 F:	include/linux/ccp.h

+AMD DISPLAY CORE
+M:	Harry Wentland <harry.wentland@amd.com>
+M:	Leo Li <sunpeng.li@amd.com>
+L:	amd-gfx@lists.freedesktop.org
+T:	git git://people.freedesktop.org/~agd5f/linux
+S:	Supported
+F:	drivers/gpu/drm/amd/display/
+
 AMD FAM15H PROCESSOR POWER MONITORING DRIVER
 M:	Huang Rui <ray.huang@amd.com>
 L:	linux-hwmon@vger.kernel.org
@ -783,6 +791,14 @@ F:	drivers/gpu/drm/amd/include/vi_structs.h
 F:	drivers/gpu/drm/amd/include/v9_structs.h
 F:	include/uapi/linux/kfd_ioctl.h

+AMD POWERPLAY
+M:	Rex Zhu <rex.zhu@amd.com>
+M:	Evan Quan <evan.quan@amd.com>
+L:	amd-gfx@lists.freedesktop.org
+S:	Supported
+F:	drivers/gpu/drm/amd/powerplay/
+T:	git git://people.freedesktop.org/~agd5f/linux
+
 AMD SEATTLE DEVICE TREE SUPPORT
 M:	Brijesh Singh <brijeshkumar.singh@amd.com>
 M:	Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
@ -4897,7 +4913,8 @@ F:	Documentation/gpu/xen-front.rst

 DRM TTM SUBSYSTEM
 M:	Christian Koenig <christian.koenig@amd.com>
-M:	Roger He <Hongbo.He@amd.com>
+M:	Huang Rui <ray.huang@amd.com>
+M:	Junwei Zhang <Jerry.Zhang@amd.com>
 T:	git git://people.freedesktop.org/~agd5f/linux
 S:	Maintained
 L:	dri-devel@lists.freedesktop.org
--- a/arch/arm/mm/dma-mapping.c
+++ b/arch/arm/mm/dma-mapping.c
@ -1151,6 +1151,11 @@ int arm_dma_supported(struct device *dev, u64 mask)
 	return __dma_supported(dev, mask, false);
 }

+static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
+{
+	return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
+}
+
 #ifdef CONFIG_ARM_DMA_USE_IOMMU

 static int __dma_info_to_prot(enum dma_data_direction dir, unsigned long attrs)
@ -2296,7 +2301,7 @@ void arm_iommu_detach_device(struct device *dev)
 	iommu_detach_device(mapping->domain, dev);
 	kref_put(&mapping->kref, release_iommu_mapping);
 	to_dma_iommu_mapping(dev) = NULL;
-	set_dma_ops(dev, NULL);
+	set_dma_ops(dev, arm_get_dma_map_ops(dev->archdata.dma_coherent));

 	pr_debug("Detached IOMMU controller from %s device.\n", dev_name(dev));
 }
@ -2357,11 +2362,6 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { }

 #endif	/* CONFIG_ARM_DMA_USE_IOMMU */

-static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent)
-{
-	return coherent ? &arm_coherent_dma_ops : &arm_dma_ops;
-}
-
 void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size,
 			const struct iommu_ops *iommu, bool coherent)
 {
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@ -338,6 +338,18 @@ static resource_size_t __init gen3_stolen_base(int num, int slot, int func,
 	return bsm & INTEL_BSM_MASK;
 }

+static resource_size_t __init gen11_stolen_base(int num, int slot, int func,
+						resource_size_t stolen_size)
+{
+	u64 bsm;
+
+	bsm = read_pci_config(num, slot, func, INTEL_GEN11_BSM_DW0);
+	bsm &= INTEL_BSM_MASK;
+	bsm |= (u64)read_pci_config(num, slot, func, INTEL_GEN11_BSM_DW1) << 32;
+
+	return bsm;
+}
+
 static resource_size_t __init i830_stolen_size(int num, int slot, int func)
 {
 	u16 gmch_ctrl;
@ -498,6 +510,11 @@ static const struct intel_early_ops chv_early_ops __initconst = {
 	.stolen_size = chv_stolen_size,
 };

+static const struct intel_early_ops gen11_early_ops __initconst = {
+	.stolen_base = gen11_stolen_base,
+	.stolen_size = gen9_stolen_size,
+};
+
 static const struct pci_device_id intel_early_ids[] __initconst = {
 	INTEL_I830_IDS(&i830_early_ops),
 	INTEL_I845G_IDS(&i845_early_ops),
@ -529,6 +546,7 @@ static const struct pci_device_id intel_early_ids[] __initconst = {
 	INTEL_CFL_IDS(&gen9_early_ops),
 	INTEL_GLK_IDS(&gen9_early_ops),
 	INTEL_CNL_IDS(&gen9_early_ops),
+	INTEL_ICL_11_IDS(&gen11_early_ops),
 };

 struct resource intel_graphics_stolen_res __ro_after_init = DEFINE_RES_MEM(0, 0);
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@ -405,7 +405,6 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 			  || !exp_info->ops->map_dma_buf
 			  || !exp_info->ops->unmap_dma_buf
 			  || !exp_info->ops->release
-			  || !exp_info->ops->map_atomic
 			  || !exp_info->ops->map
 			  || !exp_info->ops->mmap)) {
 		return ERR_PTR(-EINVAL);
@ -568,7 +567,7 @@ struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf,
 	mutex_lock(&dmabuf->lock);

 	if (dmabuf->ops->attach) {
-		ret = dmabuf->ops->attach(dmabuf, dev, attach);
+		ret = dmabuf->ops->attach(dmabuf, attach);
 		if (ret)
 			goto err_attach;
 	}
@ -687,26 +686,14 @@ EXPORT_SYMBOL_GPL(dma_buf_unmap_attachment);
 *      void \*dma_buf_kmap(struct dma_buf \*, unsigned long);
 *      void dma_buf_kunmap(struct dma_buf \*, unsigned long, void \*);
 *
- *   There are also atomic variants of these interfaces. Like for kmap they
- *   facilitate non-blocking fast-paths. Neither the importer nor the exporter
- *   (in the callback) is allowed to block when using these.
- *
- *   Interfaces::
- *      void \*dma_buf_kmap_atomic(struct dma_buf \*, unsigned long);
- *      void dma_buf_kunmap_atomic(struct dma_buf \*, unsigned long, void \*);
- *
- *   For importers all the restrictions of using kmap apply, like the limited
- *   supply of kmap_atomic slots. Hence an importer shall only hold onto at
- *   max 2 atomic dma_buf kmaps at the same time (in any given process context).
+ *   Implementing the functions is optional for exporters and for importers all
+ *   the restrictions of using kmap apply.
 *
 *   dma_buf kmap calls outside of the range specified in begin_cpu_access are
 *   undefined. If the range is not PAGE_SIZE aligned, kmap needs to succeed on
 *   the partial chunks at the beginning and end but may return stale or bogus
 *   data outside of the range (in these partial chunks).
 *
- *   Note that these calls need to always succeed. The exporter needs to
- *   complete any preparations that might fail in begin_cpu_access.
- *
 *   For some cases the overhead of kmap can be too high, a vmap interface
 *   is introduced. This interface should be used very carefully, as vmalloc
 *   space is a limited resources on many architectures.
@ -859,41 +846,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dmabuf,
 }
 EXPORT_SYMBOL_GPL(dma_buf_end_cpu_access);

-/**
- * dma_buf_kmap_atomic - Map a page of the buffer object into kernel address
- * space. The same restrictions as for kmap_atomic and friends apply.
- * @dmabuf:	[in]	buffer to map page from.
- * @page_num:	[in]	page in PAGE_SIZE units to map.
- *
- * This call must always succeed, any necessary preparations that might fail
- * need to be done in begin_cpu_access.
- */
-void *dma_buf_kmap_atomic(struct dma_buf *dmabuf, unsigned long page_num)
-{
-	WARN_ON(!dmabuf);
-
-	return dmabuf->ops->map_atomic(dmabuf, page_num);
-}
-EXPORT_SYMBOL_GPL(dma_buf_kmap_atomic);
-
-/**
- * dma_buf_kunmap_atomic - Unmap a page obtained by dma_buf_kmap_atomic.
- * @dmabuf:	[in]	buffer to unmap page from.
- * @page_num:	[in]	page in PAGE_SIZE units to unmap.
- * @vaddr:	[in]	kernel space pointer obtained from dma_buf_kmap_atomic.
- *
- * This call must always succeed.
- */
-void dma_buf_kunmap_atomic(struct dma_buf *dmabuf, unsigned long page_num,
-			   void *vaddr)
-{
-	WARN_ON(!dmabuf);
-
-	if (dmabuf->ops->unmap_atomic)
-		dmabuf->ops->unmap_atomic(dmabuf, page_num, vaddr);
-}
-EXPORT_SYMBOL_GPL(dma_buf_kunmap_atomic);
-
 /**
 * dma_buf_kmap - Map a page of the buffer object into kernel address space. The
 * same restrictions as for kmap and friends apply.
@ -907,6 +859,8 @@ void *dma_buf_kmap(struct dma_buf *dmabuf, unsigned long page_num)
 {
 	WARN_ON(!dmabuf);

+	if (!dmabuf->ops->map)
+		return NULL;
 	return dmabuf->ops->map(dmabuf, page_num);
 }
 EXPORT_SYMBOL_GPL(dma_buf_kmap);
--- a/drivers/dma-buf/dma-fence-array.c
+++ b/drivers/dma-buf/dma-fence-array.c
@ -104,7 +104,6 @@ const struct dma_fence_ops dma_fence_array_ops = {
 	.get_timeline_name = dma_fence_array_get_timeline_name,
 	.enable_signaling = dma_fence_array_enable_signaling,
 	.signaled = dma_fence_array_signaled,
-	.wait = dma_fence_default_wait,
 	.release = dma_fence_array_release,
 };
 EXPORT_SYMBOL(dma_fence_array_ops);
--- a/drivers/dma-buf/dma-fence.c
+++ b/drivers/dma-buf/dma-fence.c
@ -39,11 +39,42 @@ EXPORT_TRACEPOINT_SYMBOL(dma_fence_enable_signal);
 static atomic64_t dma_fence_context_counter = ATOMIC64_INIT(0);

 /**
- * dma_fence_context_alloc - allocate an array of fence contexts
- * @num:	[in]	amount of contexts to allocate
+ * DOC: DMA fences overview
 *
- * This function will return the first index of the number of fences allocated.
- * The fence context is used for setting fence->context to a unique number.
+ * DMA fences, represented by &struct dma_fence, are the kernel internal
+ * synchronization primitive for DMA operations like GPU rendering, video
+ * encoding/decoding, or displaying buffers on a screen.
+ *
+ * A fence is initialized using dma_fence_init() and completed using
+ * dma_fence_signal(). Fences are associated with a context, allocated through
+ * dma_fence_context_alloc(), and all fences on the same context are
+ * fully ordered.
+ *
+ * Since the purposes of fences is to facilitate cross-device and
+ * cross-application synchronization, there's multiple ways to use one:
+ *
+ * - Individual fences can be exposed as a &sync_file, accessed as a file
+ *   descriptor from userspace, created by calling sync_file_create(). This is
+ *   called explicit fencing, since userspace passes around explicit
+ *   synchronization points.
+ *
+ * - Some subsystems also have their own explicit fencing primitives, like
+ *   &drm_syncobj. Compared to &sync_file, a &drm_syncobj allows the underlying
+ *   fence to be updated.
+ *
+ * - Then there's also implicit fencing, where the synchronization points are
+ *   implicitly passed around as part of shared &dma_buf instances. Such
+ *   implicit fences are stored in &struct reservation_object through the
+ *   &dma_buf.resv pointer.
+ */
+
+/**
+ * dma_fence_context_alloc - allocate an array of fence contexts
+ * @num: amount of contexts to allocate
+ *
+ * This function will return the first index of the number of fence contexts
+ * allocated.  The fence context is used for setting &dma_fence.context to a
+ * unique number by passing the context to dma_fence_init().
 */
 u64 dma_fence_context_alloc(unsigned num)
 {
@ -59,10 +90,14 @@ EXPORT_SYMBOL(dma_fence_context_alloc);
 * Signal completion for software callbacks on a fence, this will unblock
 * dma_fence_wait() calls and run all the callbacks added with
 * dma_fence_add_callback(). Can be called multiple times, but since a fence
- * can only go from unsignaled to signaled state, it will only be effective
- * the first time.
+ * can only go from the unsignaled to the signaled state and not back, it will
+ * only be effective the first time.
 *
- * Unlike dma_fence_signal, this function must be called with fence->lock held.
+ * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock
+ * held.
+ *
+ * Returns 0 on success and a negative error value when @fence has been
+ * signalled already.
 */
 int dma_fence_signal_locked(struct dma_fence *fence)
 {
@ -102,8 +137,11 @@ EXPORT_SYMBOL(dma_fence_signal_locked);
 * Signal completion for software callbacks on a fence, this will unblock
 * dma_fence_wait() calls and run all the callbacks added with
 * dma_fence_add_callback(). Can be called multiple times, but since a fence
- * can only go from unsignaled to signaled state, it will only be effective
- * the first time.
+ * can only go from the unsignaled to the signaled state and not back, it will
+ * only be effective the first time.
+ *
+ * Returns 0 on success and a negative error value when @fence has been
+ * signalled already.
 */
 int dma_fence_signal(struct dma_fence *fence)
 {
@ -136,9 +174,9 @@ EXPORT_SYMBOL(dma_fence_signal);
 /**
 * dma_fence_wait_timeout - sleep until the fence gets signaled
 * or until timeout elapses
- * @fence:	[in]	the fence to wait on
- * @intr:	[in]	if true, do an interruptible wait
- * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ * @fence: the fence to wait on
+ * @intr: if true, do an interruptible wait
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
 *
 * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
 * remaining timeout in jiffies on success. Other error values may be
@ -148,6 +186,8 @@ EXPORT_SYMBOL(dma_fence_signal);
 * directly or indirectly (buf-mgr between reservation and committing)
 * holds a reference to the fence, otherwise the fence might be
 * freed before return, resulting in undefined behavior.
+ *
+ * See also dma_fence_wait() and dma_fence_wait_any_timeout().
 */
 signed long
 dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
@ -158,12 +198,22 @@ dma_fence_wait_timeout(struct dma_fence *fence, bool intr, signed long timeout)
 		return -EINVAL;

 	trace_dma_fence_wait_start(fence);
-	ret = fence->ops->wait(fence, intr, timeout);
+	if (fence->ops->wait)
+		ret = fence->ops->wait(fence, intr, timeout);
+	else
+		ret = dma_fence_default_wait(fence, intr, timeout);
 	trace_dma_fence_wait_end(fence);
 	return ret;
 }
 EXPORT_SYMBOL(dma_fence_wait_timeout);

+/**
+ * dma_fence_release - default relese function for fences
+ * @kref: &dma_fence.recfount
+ *
+ * This is the default release functions for &dma_fence. Drivers shouldn't call
+ * this directly, but instead call dma_fence_put().
+ */
 void dma_fence_release(struct kref *kref)
 {
 	struct dma_fence *fence =
@ -181,6 +231,13 @@ void dma_fence_release(struct kref *kref)
 }
 EXPORT_SYMBOL(dma_fence_release);

+/**
+ * dma_fence_free - default release function for &dma_fence.
+ * @fence: fence to release
+ *
+ * This is the default implementation for &dma_fence_ops.release. It calls
+ * kfree_rcu() on @fence.
+ */
 void dma_fence_free(struct dma_fence *fence)
 {
 	kfree_rcu(fence, rcu);
@ -189,10 +246,11 @@ EXPORT_SYMBOL(dma_fence_free);

 /**
 * dma_fence_enable_sw_signaling - enable signaling on fence
- * @fence:	[in]	the fence to enable
+ * @fence: the fence to enable
 *
- * this will request for sw signaling to be enabled, to make the fence
- * complete as soon as possible
+ * This will request for sw signaling to be enabled, to make the fence
+ * complete as soon as possible. This calls &dma_fence_ops.enable_signaling
+ * internally.
 */
 void dma_fence_enable_sw_signaling(struct dma_fence *fence)
 {
@ -200,7 +258,8 @@ void dma_fence_enable_sw_signaling(struct dma_fence *fence)

 	if (!test_and_set_bit(DMA_FENCE_FLAG_ENABLE_SIGNAL_BIT,
 			      &fence->flags) &&
-	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags)) {
+	    !test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags) &&
+	    fence->ops->enable_signaling) {
 		trace_dma_fence_enable_signal(fence);

 		spin_lock_irqsave(fence->lock, flags);
@ -216,24 +275,24 @@ EXPORT_SYMBOL(dma_fence_enable_sw_signaling);
 /**
 * dma_fence_add_callback - add a callback to be called when the fence
 * is signaled
- * @fence:	[in]	the fence to wait on
- * @cb:		[in]	the callback to register
- * @func:	[in]	the function to call
+ * @fence: the fence to wait on
+ * @cb: the callback to register
+ * @func: the function to call
 *
- * cb will be initialized by dma_fence_add_callback, no initialization
+ * @cb will be initialized by dma_fence_add_callback(), no initialization
 * by the caller is required. Any number of callbacks can be registered
 * to a fence, but a callback can only be registered to one fence at a time.
 *
 * Note that the callback can be called from an atomic context.  If
 * fence is already signaled, this function will return -ENOENT (and
- * *not* call the callback)
+ * *not* call the callback).
 *
 * Add a software callback to the fence. Same restrictions apply to
- * refcount as it does to dma_fence_wait, however the caller doesn't need to
- * keep a refcount to fence afterwards: when software access is enabled,
- * the creator of the fence is required to keep the fence alive until
- * after it signals with dma_fence_signal. The callback itself can be called
- * from irq context.
+ * refcount as it does to dma_fence_wait(), however the caller doesn't need to
+ * keep a refcount to fence afterward dma_fence_add_callback() has returned:
+ * when software access is enabled, the creator of the fence is required to keep
+ * the fence alive until after it signals with dma_fence_signal(). The callback
+ * itself can be called from irq context.
 *
 * Returns 0 in case of success, -ENOENT if the fence is already signaled
 * and -EINVAL in case of error.
@ -260,7 +319,7 @@ int dma_fence_add_callback(struct dma_fence *fence, struct dma_fence_cb *cb,

 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		ret = -ENOENT;
-	else if (!was_set) {
+	else if (!was_set && fence->ops->enable_signaling) {
 		trace_dma_fence_enable_signal(fence);

 		if (!fence->ops->enable_signaling(fence)) {
@ -282,7 +341,7 @@ EXPORT_SYMBOL(dma_fence_add_callback);

 /**
 * dma_fence_get_status - returns the status upon completion
- * @fence: [in]	the dma_fence to query
+ * @fence: the dma_fence to query
 *
 * This wraps dma_fence_get_status_locked() to return the error status
 * condition on a signaled fence. See dma_fence_get_status_locked() for more
@ -307,8 +366,8 @@ EXPORT_SYMBOL(dma_fence_get_status);

 /**
 * dma_fence_remove_callback - remove a callback from the signaling list
- * @fence:	[in]	the fence to wait on
- * @cb:		[in]	the callback to remove
+ * @fence: the fence to wait on
+ * @cb: the callback to remove
 *
 * Remove a previously queued callback from the fence. This function returns
 * true if the callback is successfully removed, or false if the fence has
@ -319,6 +378,9 @@ EXPORT_SYMBOL(dma_fence_get_status);
 * doing, since deadlocks and race conditions could occur all too easily. For
 * this reason, it should only ever be done on hardware lockup recovery,
 * with a reference held to the fence.
+ *
+ * Behaviour is undefined if @cb has not been added to @fence using
+ * dma_fence_add_callback() beforehand.
 */
 bool
 dma_fence_remove_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
@ -355,9 +417,9 @@ dma_fence_default_wait_cb(struct dma_fence *fence, struct dma_fence_cb *cb)
 /**
 * dma_fence_default_wait - default sleep until the fence gets signaled
 * or until timeout elapses
- * @fence:	[in]	the fence to wait on
- * @intr:	[in]	if true, do an interruptible wait
- * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ * @fence: the fence to wait on
+ * @intr: if true, do an interruptible wait
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
 *
 * Returns -ERESTARTSYS if interrupted, 0 if the wait timed out, or the
 * remaining timeout in jiffies on success. If timeout is zero the value one is
@ -388,7 +450,7 @@ dma_fence_default_wait(struct dma_fence *fence, bool intr, signed long timeout)
 	if (test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags))
 		goto out;

-	if (!was_set) {
+	if (!was_set && fence->ops->enable_signaling) {
 		trace_dma_fence_enable_signal(fence);

 		if (!fence->ops->enable_signaling(fence)) {
@ -450,12 +512,12 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count,
 /**
 * dma_fence_wait_any_timeout - sleep until any fence gets signaled
 * or until timeout elapses
- * @fences:	[in]	array of fences to wait on
- * @count:	[in]	number of fences to wait on
- * @intr:	[in]	if true, do an interruptible wait
- * @timeout:	[in]	timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
- * @idx:       [out]	the first signaled fence index, meaningful only on
- *			positive return
+ * @fences: array of fences to wait on
+ * @count: number of fences to wait on
+ * @intr: if true, do an interruptible wait
+ * @timeout: timeout value in jiffies, or MAX_SCHEDULE_TIMEOUT
+ * @idx: used to store the first signaled fence index, meaningful only on
+ *	positive return
 *
 * Returns -EINVAL on custom fence wait implementation, -ERESTARTSYS if
 * interrupted, 0 if the wait timed out, or the remaining timeout in jiffies
@ -464,6 +526,8 @@ dma_fence_test_signaled_any(struct dma_fence **fences, uint32_t count,
 * Synchronous waits for the first fence in the array to be signaled. The
 * caller needs to hold a reference to all fences in the array, otherwise a
 * fence might be freed before return, resulting in undefined behavior.
+ *
+ * See also dma_fence_wait() and dma_fence_wait_timeout().
 */
 signed long
 dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
@ -496,11 +560,6 @@ dma_fence_wait_any_timeout(struct dma_fence **fences, uint32_t count,
 	for (i = 0; i < count; ++i) {
 		struct dma_fence *fence = fences[i];

-		if (fence->ops->wait != dma_fence_default_wait) {
-			ret = -EINVAL;
-			goto fence_rm_cb;
-		}
-
 		cb[i].task = current;
 		if (dma_fence_add_callback(fence, &cb[i].base,
 					   dma_fence_default_wait_cb)) {
@ -541,27 +600,25 @@ EXPORT_SYMBOL(dma_fence_wait_any_timeout);

 /**
 * dma_fence_init - Initialize a custom fence.
- * @fence:	[in]	the fence to initialize
- * @ops:	[in]	the dma_fence_ops for operations on this fence
- * @lock:	[in]	the irqsafe spinlock to use for locking this fence
- * @context:	[in]	the execution context this fence is run on
- * @seqno:	[in]	a linear increasing sequence number for this context
+ * @fence: the fence to initialize
+ * @ops: the dma_fence_ops for operations on this fence
+ * @lock: the irqsafe spinlock to use for locking this fence
+ * @context: the execution context this fence is run on
+ * @seqno: a linear increasing sequence number for this context
 *
 * Initializes an allocated fence, the caller doesn't have to keep its
 * refcount after committing with this fence, but it will need to hold a
- * refcount again if dma_fence_ops.enable_signaling gets called. This can
- * be used for other implementing other types of fence.
+ * refcount again if &dma_fence_ops.enable_signaling gets called.
 *
 * context and seqno are used for easy comparison between fences, allowing
- * to check which fence is later by simply using dma_fence_later.
+ * to check which fence is later by simply using dma_fence_later().
 */
 void
 dma_fence_init(struct dma_fence *fence, const struct dma_fence_ops *ops,
 	       spinlock_t *lock, u64 context, unsigned seqno)
 {
 	BUG_ON(!lock);
-	BUG_ON(!ops || !ops->wait || !ops->enable_signaling ||
-	       !ops->get_driver_name || !ops->get_timeline_name);
+	BUG_ON(!ops || !ops->get_driver_name || !ops->get_timeline_name);

 	kref_init(&fence->refcount);
 	fence->ops = ops;
--- a/drivers/dma-buf/reservation.c
+++ b/drivers/dma-buf/reservation.c
@ -46,7 +46,7 @@
 * write-side updates.
 */

-DEFINE_WW_CLASS(reservation_ww_class);
+DEFINE_WD_CLASS(reservation_ww_class);
 EXPORT_SYMBOL(reservation_ww_class);

 struct lock_class_key reservation_seqcount_class;
@ -141,6 +141,7 @@ reservation_object_add_shared_inplace(struct reservation_object *obj,
 	if (signaled) {
 		RCU_INIT_POINTER(fobj->shared[signaled_idx], fence);
 	} else {
+		BUG_ON(fobj->shared_count >= fobj->shared_max);
 		RCU_INIT_POINTER(fobj->shared[fobj->shared_count], fence);
 		fobj->shared_count++;
 	}
@ -230,10 +231,9 @@ void reservation_object_add_shared_fence(struct reservation_object *obj,
 	old = reservation_object_get_list(obj);
 	obj->staged = NULL;

-	if (!fobj) {
-		BUG_ON(old->shared_count >= old->shared_max);
+	if (!fobj)
 		reservation_object_add_shared_inplace(obj, old, fence);
-	} else
+	else
 		reservation_object_add_shared_replace(obj, old, fobj, fence);
 }
 EXPORT_SYMBOL(reservation_object_add_shared_fence);
--- a/drivers/dma-buf/sw_sync.c
+++ b/drivers/dma-buf/sw_sync.c
@ -188,7 +188,6 @@ static const struct dma_fence_ops timeline_fence_ops = {
 	.get_timeline_name = timeline_fence_get_timeline_name,
 	.enable_signaling = timeline_fence_enable_signaling,
 	.signaled = timeline_fence_signaled,
-	.wait = dma_fence_default_wait,
 	.release = timeline_fence_release,
 	.fence_value_str = timeline_fence_value_str,
 	.timeline_value_str = timeline_fence_timeline_value_str,
--- a/drivers/gpu/drm/Kconfig
+++ b/drivers/gpu/drm/Kconfig
@ -122,6 +122,16 @@ config DRM_LOAD_EDID_FIRMWARE
 	  default case is N. Details and instructions how to build your own
 	  EDID data are given in Documentation/EDID/HOWTO.txt.

+config DRM_DP_CEC
+	bool "Enable DisplayPort CEC-Tunneling-over-AUX HDMI support"
+	select CEC_CORE
+	help
+	  Choose this option if you want to enable HDMI CEC support for
+	  DisplayPort/USB-C to HDMI adapters.
+
+	  Note: not all adapters support this feature, and even for those
+	  that do support this they often do not hook up the CEC pin.
+
 config DRM_TTM
 	tristate
 	depends on DRM && MMU
@ -213,6 +223,17 @@ config DRM_VGEM
 	  as used by Mesa's software renderer for enhanced performance.
 	  If M is selected the module will be called vgem.

+config DRM_VKMS
+	tristate "Virtual KMS (EXPERIMENTAL)"
+	depends on DRM
+	select DRM_KMS_HELPER
+	default n
+	help
+	  Virtual Kernel Mode-Setting (VKMS) is used for testing or for
+	  running GPU in a headless machines. Choose this option to get
+	  a VKMS.
+
+	  If M is selected the module will be called vkms.

 source "drivers/gpu/drm/exynos/Kconfig"

--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@ -18,7 +18,7 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_encoder.o drm_mode_object.o drm_property.o \
 		drm_plane.o drm_color_mgmt.o drm_print.o \
 		drm_dumb_buffers.o drm_mode_config.o drm_vblank.o \
-		drm_syncobj.o drm_lease.o
+		drm_syncobj.o drm_lease.o drm_writeback.o drm_client.o

 drm-$(CONFIG_DRM_LIB_RANDOM) += lib/drm_random.o
 drm-$(CONFIG_DRM_VM) += drm_vm.o
@ -41,6 +41,7 @@ drm_kms_helper-$(CONFIG_DRM_PANEL_BRIDGE) += bridge/panel.o
 drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
 drm_kms_helper-$(CONFIG_DRM_KMS_CMA_HELPER) += drm_fb_cma_helper.o
 drm_kms_helper-$(CONFIG_DRM_DP_AUX_CHARDEV) += drm_dp_aux_dev.o
+drm_kms_helper-$(CONFIG_DRM_DP_CEC) += drm_dp_cec.o

 obj-$(CONFIG_DRM_KMS_HELPER) += drm_kms_helper.o
 obj-$(CONFIG_DRM_DEBUG_SELFTEST) += selftests/
@ -69,6 +70,7 @@ obj-$(CONFIG_DRM_SAVAGE)+= savage/
 obj-$(CONFIG_DRM_VMWGFX)+= vmwgfx/
 obj-$(CONFIG_DRM_VIA)	+=via/
 obj-$(CONFIG_DRM_VGEM)	+= vgem/
+obj-$(CONFIG_DRM_VKMS)	+= vkms/
 obj-$(CONFIG_DRM_NOUVEAU) +=nouveau/
 obj-$(CONFIG_DRM_EXYNOS) +=exynos/
 obj-$(CONFIG_DRM_ROCKCHIP) +=rockchip/
--- a/drivers/gpu/drm/amd/amdgpu/ObjectID.h
+++ b/drivers/gpu/drm/amd/amdgpu/ObjectID.h
@ -136,6 +136,7 @@
 #define GENERIC_OBJECT_ID_PX2_NON_DRIVABLE        0x02
 #define GENERIC_OBJECT_ID_MXM_OPM                 0x03
 #define GENERIC_OBJECT_ID_STEREO_PIN              0x04        //This object could show up from Misc Object table, it follows ATOM_OBJECT format, and contains one ATOM_OBJECT_GPIO_CNTL_RECORD for the stereo pin
+#define GENERIC_OBJECT_ID_BRACKET_LAYOUT          0x05

 /****************************************************/
 /* Graphics Object ENUM ID Definition               */
@ -714,6 +715,13 @@
                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
                                                 GENERIC_OBJECT_ID_STEREO_PIN << OBJECT_ID_SHIFT)

+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID1    (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID1 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
+
+#define GENERICOBJECT_BRACKET_LAYOUT_ENUM_ID2    (GRAPH_OBJECT_TYPE_GENERIC << OBJECT_TYPE_SHIFT |\
+                                                 GRAPH_OBJECT_ENUM_ID2 << ENUM_ID_SHIFT |\
+                                                 GENERIC_OBJECT_ID_BRACKET_LAYOUT << OBJECT_ID_SHIFT)
 /****************************************************/
 /* Object Cap definition - Shared with BIOS         */
 /****************************************************/
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@ -73,6 +73,8 @@
 #include "amdgpu_virt.h"
 #include "amdgpu_gart.h"
 #include "amdgpu_debugfs.h"
+#include "amdgpu_job.h"
+#include "amdgpu_bo_list.h"

 /*
 * Modules parameters.
@ -105,11 +107,8 @@ extern int amdgpu_vm_fault_stop;
 extern int amdgpu_vm_debug;
 extern int amdgpu_vm_update_mode;
 extern int amdgpu_dc;
-extern int amdgpu_dc_log;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
-extern int amdgpu_no_evict;
-extern int amdgpu_direct_gma_size;
 extern uint amdgpu_pcie_gen_cap;
 extern uint amdgpu_pcie_lane_cap;
 extern uint amdgpu_cg_mask;
@ -600,17 +599,6 @@ struct amdgpu_ib {

 extern const struct drm_sched_backend_ops amdgpu_sched_ops;

-int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
-		     struct amdgpu_job **job, struct amdgpu_vm *vm);
-int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
-			     struct amdgpu_job **job);
-
-void amdgpu_job_free_resources(struct amdgpu_job *job);
-void amdgpu_job_free(struct amdgpu_job *job);
-int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
-		      struct drm_sched_entity *entity, void *owner,
-		      struct dma_fence **f);
-
 /*
 * Queue manager
 */
@ -684,8 +672,8 @@ int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
 int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id);

 void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr);
-void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr);
+void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr);
 void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr);


@ -702,37 +690,6 @@ struct amdgpu_fpriv {
 	struct amdgpu_ctx_mgr	ctx_mgr;
 };

-/*
- * residency list
- */
-struct amdgpu_bo_list_entry {
-	struct amdgpu_bo		*robj;
-	struct ttm_validate_buffer	tv;
-	struct amdgpu_bo_va		*bo_va;
-	uint32_t			priority;
-	struct page			**user_pages;
-	int				user_invalidated;
-};
-
-struct amdgpu_bo_list {
-	struct mutex lock;
-	struct rcu_head rhead;
-	struct kref refcount;
-	struct amdgpu_bo *gds_obj;
-	struct amdgpu_bo *gws_obj;
-	struct amdgpu_bo *oa_obj;
-	unsigned first_userptr;
-	unsigned num_entries;
-	struct amdgpu_bo_list_entry *array;
-};
-
-struct amdgpu_bo_list *
-amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id);
-void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
-			     struct list_head *validated);
-void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
-void amdgpu_bo_list_free(struct amdgpu_bo_list *list);
-
 /*
 * GFX stuff
 */
@ -931,6 +888,11 @@ struct amdgpu_ngg {
 	bool			init;
 };

+struct sq_work {
+	struct work_struct	work;
+	unsigned ih_data;
+};
+
 struct amdgpu_gfx {
 	struct mutex			gpu_clock_mutex;
 	struct amdgpu_gfx_config	config;
@ -969,6 +931,10 @@ struct amdgpu_gfx {
 	struct amdgpu_irq_src		eop_irq;
 	struct amdgpu_irq_src		priv_reg_irq;
 	struct amdgpu_irq_src		priv_inst_irq;
+	struct amdgpu_irq_src		cp_ecc_error_irq;
+	struct amdgpu_irq_src		sq_irq;
+	struct sq_work			sq_work;
+
 	/* gfx status */
 	uint32_t			gfx_current_status;
 	/* ce ram size*/
@ -1020,6 +986,7 @@ struct amdgpu_cs_parser {

 	/* scheduler job object */
 	struct amdgpu_job	*job;
+	struct amdgpu_ring	*ring;

 	/* buffer objects */
 	struct ww_acquire_ctx		ticket;
@ -1041,40 +1008,6 @@ struct amdgpu_cs_parser {
 	struct drm_syncobj **post_dep_syncobjs;
 };

-#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0) /* bit set means command submit involves a preamble IB */
-#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1) /* bit set means preamble IB is first presented in belonging context */
-#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2) /* bit set means context switch occured */
-
-struct amdgpu_job {
-	struct drm_sched_job    base;
-	struct amdgpu_device	*adev;
-	struct amdgpu_vm	*vm;
-	struct amdgpu_ring	*ring;
-	struct amdgpu_sync	sync;
-	struct amdgpu_sync	sched_sync;
-	struct amdgpu_ib	*ibs;
-	struct dma_fence	*fence; /* the hw fence */
-	uint32_t		preamble_status;
-	uint32_t		num_ibs;
-	void			*owner;
-	uint64_t		fence_ctx; /* the fence_context this job uses */
-	bool                    vm_needs_flush;
-	uint64_t		vm_pd_addr;
-	unsigned		vmid;
-	unsigned		pasid;
-	uint32_t		gds_base, gds_size;
-	uint32_t		gws_base, gws_size;
-	uint32_t		oa_base, oa_size;
-	uint32_t		vram_lost_counter;
-
-	/* user fence handling */
-	uint64_t		uf_addr;
-	uint64_t		uf_sequence;
-
-};
-#define to_amdgpu_job(sched_job)		\
-		container_of((sched_job), struct amdgpu_job, base)
-
 static inline u32 amdgpu_get_ib_value(struct amdgpu_cs_parser *p,
 				      uint32_t ib_idx, int idx)
 {
@ -1389,6 +1322,7 @@ enum amd_hw_ip_block_type {
 	PWR_HWIP,
 	NBIF_HWIP,
 	THM_HWIP,
+	CLK_HWIP,
 	MAX_HWIP
 };

@ -1579,9 +1513,9 @@ struct amdgpu_device {
 	DECLARE_HASHTABLE(mn_hash, 7);

 	/* tracking pinned memory */
-	u64 vram_pin_size;
-	u64 invisible_pin_size;
-	u64 gart_pin_size;
+	atomic64_t vram_pin_size;
+	atomic64_t visible_pin_size;
+	atomic64_t gart_pin_size;

 	/* amdkfd interface */
 	struct kfd_dev          *kfd;
@ -1776,6 +1710,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_vm_write_pte(adev, ib, pe, value, count, incr) ((adev)->vm_manager.vm_pte_funcs->write_pte((ib), (pe), (value), (count), (incr)))
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
+#define amdgpu_ring_patch_cs_in_place(r, p, ib) ((r)->funcs->patch_cs_in_place((p), (ib)))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
 #define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
@ -1829,8 +1764,6 @@ void amdgpu_display_update_priority(struct amdgpu_device *adev);

 void amdgpu_cs_report_moved_bytes(struct amdgpu_device *adev, u64 num_bytes,
 				  u64 num_vis_bytes);
-void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
-bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
 void amdgpu_device_vram_location(struct amdgpu_device *adev,
 				 struct amdgpu_gmc *mc, u64 base);
 void amdgpu_device_gart_location(struct amdgpu_device *adev,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
@ -364,7 +364,6 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 			       struct acpi_bus_event *event)
 {
 	struct amdgpu_atif *atif = adev->atif;
-	struct atif_sbios_requests req;
 	int count;

 	DRM_DEBUG_DRIVER("event, device_class = %s, type = %#x\n",
@ -379,42 +378,48 @@ static int amdgpu_atif_handler(struct amdgpu_device *adev,
 		/* Not our event */
 		return NOTIFY_DONE;

-	/* Check pending SBIOS requests */
-	count = amdgpu_atif_get_sbios_requests(atif, &req);
+	if (atif->functions.sbios_requests) {
+		struct atif_sbios_requests req;

-	if (count <= 0)
-		return NOTIFY_DONE;
+		/* Check pending SBIOS requests */
+		count = amdgpu_atif_get_sbios_requests(atif, &req);

-	DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);
+		if (count <= 0)
+			return NOTIFY_DONE;

-	if (req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) {
-		struct amdgpu_encoder *enc = atif->encoder_for_bl;
+		DRM_DEBUG_DRIVER("ATIF: %d pending SBIOS requests\n", count);

-		if (enc) {
-			struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;
+		/* todo: add DC handling */
+		if ((req.pending & ATIF_PANEL_BRIGHTNESS_CHANGE_REQUEST) &&
+		    !amdgpu_device_has_dc_support(adev)) {
+			struct amdgpu_encoder *enc = atif->encoder_for_bl;

-			DRM_DEBUG_DRIVER("Changing brightness to %d\n",
-					req.backlight_level);
+			if (enc) {
+				struct amdgpu_encoder_atom_dig *dig = enc->enc_priv;

-			amdgpu_display_backlight_set_level(adev, enc, req.backlight_level);
+				DRM_DEBUG_DRIVER("Changing brightness to %d\n",
+						 req.backlight_level);
+
+				amdgpu_display_backlight_set_level(adev, enc, req.backlight_level);

 #if defined(CONFIG_BACKLIGHT_CLASS_DEVICE) || defined(CONFIG_BACKLIGHT_CLASS_DEVICE_MODULE)
-			backlight_force_update(dig->bl_dev,
-					       BACKLIGHT_UPDATE_HOTKEY);
+				backlight_force_update(dig->bl_dev,
+						       BACKLIGHT_UPDATE_HOTKEY);
 #endif
+			}
 		}
-	}
-	if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
-		if ((adev->flags & AMD_IS_PX) &&
-		    amdgpu_atpx_dgpu_req_power_for_displays()) {
-			pm_runtime_get_sync(adev->ddev->dev);
-			/* Just fire off a uevent and let userspace tell us what to do */
-			drm_helper_hpd_irq_event(adev->ddev);
-			pm_runtime_mark_last_busy(adev->ddev->dev);
-			pm_runtime_put_autosuspend(adev->ddev->dev);
+		if (req.pending & ATIF_DGPU_DISPLAY_EVENT) {
+			if ((adev->flags & AMD_IS_PX) &&
+			    amdgpu_atpx_dgpu_req_power_for_displays()) {
+				pm_runtime_get_sync(adev->ddev->dev);
+				/* Just fire off a uevent and let userspace tell us what to do */
+				drm_helper_hpd_irq_event(adev->ddev);
+				pm_runtime_mark_last_busy(adev->ddev->dev);
+				pm_runtime_put_autosuspend(adev->ddev->dev);
+			}
 		}
+		/* TODO: check other events */
 	}
-	/* TODO: check other events */

 	/* We've handled the event, stop the notifier chain. The ACPI interface
 	 * overloads ACPI_VIDEO_NOTIFY_PROBE, we don't want to send that to
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@ -243,6 +243,33 @@ int amdgpu_amdkfd_resume(struct amdgpu_device *adev)
 	return r;
 }

+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->kfd)
+		r = kgd2kfd->pre_reset(adev->kfd);
+
+	return r;
+}
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev)
+{
+	int r = 0;
+
+	if (adev->kfd)
+		r = kgd2kfd->post_reset(adev->kfd);
+
+	return r;
+}
+
+void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	amdgpu_device_gpu_recover(adev, NULL, false);
+}
+
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
 			void **cpu_ptr)
@ -251,7 +278,6 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	struct amdgpu_bo *bo = NULL;
 	struct amdgpu_bo_param bp;
 	int r;
-	uint64_t gpu_addr_tmp = 0;
 	void *cpu_ptr_tmp = NULL;

 	memset(&bp, 0, sizeof(bp));
@ -275,13 +301,18 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 		goto allocate_mem_reserve_bo_failed;
 	}

-	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT,
-				&gpu_addr_tmp);
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
 	if (r) {
 		dev_err(adev->dev, "(%d) failed to pin bo for amdkfd\n", r);
 		goto allocate_mem_pin_bo_failed;
 	}

+	r = amdgpu_ttm_alloc_gart(&bo->tbo);
+	if (r) {
+		dev_err(adev->dev, "%p bind failed\n", bo);
+		goto allocate_mem_kmap_bo_failed;
+	}
+
 	r = amdgpu_bo_kmap(bo, &cpu_ptr_tmp);
 	if (r) {
 		dev_err(adev->dev,
@ -290,7 +321,7 @@ int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 	}

 	*mem_obj = bo;
-	*gpu_addr = gpu_addr_tmp;
+	*gpu_addr = amdgpu_bo_gpu_offset(bo);
 	*cpu_ptr = cpu_ptr_tmp;

 	amdgpu_bo_unreserve(bo);
@ -457,6 +488,14 @@ err:
 	return ret;
 }

+void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle)
+{
+	struct amdgpu_device *adev = (struct amdgpu_device *)kgd;
+
+	amdgpu_dpm_switch_power_profile(adev,
+					PP_SMC_POWER_PROFILE_COMPUTE, !idle);
+}
+
 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid)
 {
 	if (adev->kfd) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.h
@ -119,6 +119,7 @@ int amdgpu_amdkfd_evict_userptr(struct kgd_mem *mem, struct mm_struct *mm);
 int amdgpu_amdkfd_submit_ib(struct kgd_dev *kgd, enum kgd_engine_type engine,
 				uint32_t vmid, uint64_t gpu_addr,
 				uint32_t *ib_cmd, uint32_t ib_len);
+void amdgpu_amdkfd_set_compute_idle(struct kgd_dev *kgd, bool idle);

 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void);
 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void);
@ -126,6 +127,12 @@ struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void);

 bool amdgpu_amdkfd_is_kfd_vmid(struct amdgpu_device *adev, u32 vmid);

+int amdgpu_amdkfd_pre_reset(struct amdgpu_device *adev);
+
+int amdgpu_amdkfd_post_reset(struct amdgpu_device *adev);
+
+void amdgpu_amdkfd_gpu_reset(struct kgd_dev *kgd);
+
 /* Shared API */
 int alloc_gtt_mem(struct kgd_dev *kgd, size_t size,
 			void **mem_obj, uint64_t *gpu_addr,
@ -183,6 +190,9 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 int amdgpu_amdkfd_gpuvm_restore_process_bos(void *process_info,
 					    struct dma_fence **ef);

+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+					      struct kfd_vm_fault_info *info);
+
 void amdgpu_amdkfd_gpuvm_init_mem_limits(void);
 void amdgpu_amdkfd_unreserve_system_memory_limit(struct amdgpu_bo *bo);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_fence.c
@ -173,7 +173,5 @@ static const struct dma_fence_ops amdkfd_fence_ops = {
 	.get_driver_name = amdkfd_fence_get_driver_name,
 	.get_timeline_name = amdkfd_fence_get_timeline_name,
 	.enable_signaling = amdkfd_fence_enable_signaling,
-	.signaled = NULL,
-	.wait = dma_fence_default_wait,
 	.release = amdkfd_fence_release,
 };
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v7.c
@ -145,6 +145,7 @@ static void set_vm_context_page_table_base(struct kgd_dev *kgd, uint32_t vmid,
 		uint32_t page_table_base);
 static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid);
 static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid);
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd);

 /* Because of REG_GET_FIELD() being used, we put this function in the
 * asic specific file.
@ -216,6 +217,10 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.invalidate_tlbs = invalidate_tlbs,
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
+	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+	.read_vmid_from_vmfault_reg = read_vmid_from_vmfault_reg,
+	.gpu_recover = amdgpu_amdkfd_gpu_reset,
+	.set_compute_idle = amdgpu_amdkfd_set_compute_idle
 };

 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_7_get_functions(void)
@ -571,6 +576,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	unsigned long flags, end_jiffies;
 	int retry;

+	if (adev->in_gpu_reset)
+		return -EIO;
+
 	acquire_queue(kgd, pipe_id, queue_id);
 	WREG32(mmCP_HQD_PQ_DOORBELL_CONTROL, 0);

@ -882,6 +890,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 	int vmid;
 	unsigned int tmp;

+	if (adev->in_gpu_reset)
+		return -EIO;
+
 	for (vmid = 0; vmid < 16; vmid++) {
 		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
 			continue;
@ -911,3 +922,19 @@ static int invalidate_tlbs_vmid(struct kgd_dev *kgd, uint16_t vmid)
 	RREG32(mmVM_INVALIDATE_RESPONSE);
 	return 0;
 }
+
+ /**
+  * read_vmid_from_vmfault_reg - read vmid from register
+  *
+  * adev: amdgpu_device pointer
+  * @vmid: vmid pointer
+  * read vmid from register (CIK).
+  */
+static uint32_t read_vmid_from_vmfault_reg(struct kgd_dev *kgd)
+{
+	struct amdgpu_device *adev = get_amdgpu_device(kgd);
+
+	uint32_t status = RREG32(mmVM_CONTEXT1_PROTECTION_FAULT_STATUS);
+
+	return REG_GET_FIELD(status, VM_CONTEXT1_PROTECTION_FAULT_STATUS, VMID);
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v8.c
@ -176,6 +176,9 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.invalidate_tlbs = invalidate_tlbs,
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
+	.get_vm_fault_info = amdgpu_amdkfd_gpuvm_get_vm_fault_info,
+	.gpu_recover = amdgpu_amdkfd_gpu_reset,
+	.set_compute_idle = amdgpu_amdkfd_set_compute_idle
 };

 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_8_0_get_functions(void)
@ -568,6 +571,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	int retry;
 	struct vi_mqd *m = get_mqd(mqd);

+	if (adev->in_gpu_reset)
+		return -EIO;
+
 	acquire_queue(kgd, pipe_id, queue_id);

 	if (m->cp_hqd_vmid == 0)
@ -844,6 +850,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 	int vmid;
 	unsigned int tmp;

+	if (adev->in_gpu_reset)
+		return -EIO;
+
 	for (vmid = 0; vmid < 16; vmid++) {
 		if (!amdgpu_amdkfd_is_kfd_vmid(adev, vmid))
 			continue;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gfx_v9.c
@ -213,6 +213,8 @@ static const struct kfd2kgd_calls kfd2kgd = {
 	.invalidate_tlbs = invalidate_tlbs,
 	.invalidate_tlbs_vmid = invalidate_tlbs_vmid,
 	.submit_ib = amdgpu_amdkfd_submit_ib,
+	.gpu_recover = amdgpu_amdkfd_gpu_reset,
+	.set_compute_idle = amdgpu_amdkfd_set_compute_idle
 };

 struct kfd2kgd_calls *amdgpu_amdkfd_gfx_9_0_get_functions(void)
@ -679,6 +681,9 @@ static int kgd_hqd_destroy(struct kgd_dev *kgd, void *mqd,
 	uint32_t temp;
 	struct v9_mqd *m = get_mqd(mqd);

+	if (adev->in_gpu_reset)
+		return -EIO;
+
 	acquire_queue(kgd, pipe_id, queue_id);

 	if (m->cp_hqd_vmid == 0)
@ -866,6 +871,9 @@ static int invalidate_tlbs(struct kgd_dev *kgd, uint16_t pasid)
 	int vmid;
 	struct amdgpu_ring *ring = &adev->gfx.kiq.ring;

+	if (adev->in_gpu_reset)
+		return -EIO;
+
 	if (ring->ready)
 		return invalidate_tlbs_with_kiq(adev, pasid);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c
@ -334,7 +334,7 @@ static int amdgpu_amdkfd_bo_validate(struct amdgpu_bo *bo, uint32_t domain,
 		 "Called with userptr BO"))
 		return -EINVAL;

-	amdgpu_ttm_placement_from_domain(bo, domain);
+	amdgpu_bo_placement_from_domain(bo, domain);

 	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	if (ret)
@ -622,7 +622,7 @@ static int init_user_pages(struct kgd_mem *mem, struct mm_struct *mm,
 		pr_err("%s: Failed to reserve BO\n", __func__);
 		goto release_out;
 	}
-	amdgpu_ttm_placement_from_domain(bo, mem->domain);
+	amdgpu_bo_placement_from_domain(bo, mem->domain);
 	ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	if (ret)
 		pr_err("%s: failed to validate BO\n", __func__);
@ -1587,7 +1587,7 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct kgd_dev *kgd,
 		goto bo_reserve_failed;
 	}

-	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+	ret = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
 	if (ret) {
 		pr_err("Failed to pin bo. ret %d\n", ret);
 		goto pin_failed;
@ -1621,6 +1621,20 @@ bo_reserve_failed:
 	return ret;
 }

+int amdgpu_amdkfd_gpuvm_get_vm_fault_info(struct kgd_dev *kgd,
+					      struct kfd_vm_fault_info *mem)
+{
+	struct amdgpu_device *adev;
+
+	adev = (struct amdgpu_device *)kgd;
+	if (atomic_read(&adev->gmc.vm_fault_info_updated) == 1) {
+		*mem = *adev->gmc.vm_fault_info;
+		mb();
+		atomic_set(&adev->gmc.vm_fault_info_updated, 0);
+	}
+	return 0;
+}
+
 /* Evict a userptr BO by stopping the queues if necessary
 *
 * Runs in MMU notifier, may be in RECLAIM_FS context. This means it
@ -1680,7 +1694,7 @@ static int update_invalid_user_pages(struct amdkfd_process_info *process_info,

 		if (amdgpu_bo_reserve(bo, true))
 			return -EAGAIN;
-		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_CPU);
 		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		amdgpu_bo_unreserve(bo);
 		if (ret) {
@ -1824,7 +1838,7 @@ static int validate_invalid_user_pages(struct amdkfd_process_info *process_info)
 		if (mem->user_pages[0]) {
 			amdgpu_ttm_tt_set_user_pages(bo->tbo.ttm,
 						     mem->user_pages);
-			amdgpu_ttm_placement_from_domain(bo, mem->domain);
+			amdgpu_bo_placement_from_domain(bo, mem->domain);
 			ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (ret) {
 				pr_err("%s: failed to validate BO\n", __func__);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@ -32,7 +32,7 @@ struct amdgpu_atpx_functions {
 	bool switch_start;
 	bool switch_end;
 	bool disp_connectors_mapping;
-	bool disp_detetion_ports;
+	bool disp_detection_ports;
 };

 struct amdgpu_atpx {
@ -162,7 +162,7 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
 	f->switch_start = mask & ATPX_GRAPHICS_DEVICE_SWITCH_START_NOTIFICATION_SUPPORTED;
 	f->switch_end = mask & ATPX_GRAPHICS_DEVICE_SWITCH_END_NOTIFICATION_SUPPORTED;
 	f->disp_connectors_mapping = mask & ATPX_GET_DISPLAY_CONNECTORS_MAPPING_SUPPORTED;
-	f->disp_detetion_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;
+	f->disp_detection_ports = mask & ATPX_GET_DISPLAY_DETECTION_PORTS_SUPPORTED;
 }

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_benchmark.c
@ -95,11 +95,17 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	r = amdgpu_bo_reserve(sobj, false);
 	if (unlikely(r != 0))
 		goto out_cleanup;
-	r = amdgpu_bo_pin(sobj, sdomain, &saddr);
+	r = amdgpu_bo_pin(sobj, sdomain);
+	if (r) {
+		amdgpu_bo_unreserve(sobj);
+		goto out_cleanup;
+	}
+	r = amdgpu_ttm_alloc_gart(&sobj->tbo);
 	amdgpu_bo_unreserve(sobj);
 	if (r) {
 		goto out_cleanup;
 	}
+	saddr = amdgpu_bo_gpu_offset(sobj);
 	bp.domain = ddomain;
 	r = amdgpu_bo_create(adev, &bp, &dobj);
 	if (r) {
@ -108,11 +114,17 @@ static void amdgpu_benchmark_move(struct amdgpu_device *adev, unsigned size,
 	r = amdgpu_bo_reserve(dobj, false);
 	if (unlikely(r != 0))
 		goto out_cleanup;
-	r = amdgpu_bo_pin(dobj, ddomain, &daddr);
+	r = amdgpu_bo_pin(dobj, ddomain);
+	if (r) {
+		amdgpu_bo_unreserve(sobj);
+		goto out_cleanup;
+	}
+	r = amdgpu_ttm_alloc_gart(&dobj->tbo);
 	amdgpu_bo_unreserve(dobj);
 	if (r) {
 		goto out_cleanup;
 	}
+	daddr = amdgpu_bo_gpu_offset(dobj);

 	if (adev->mman.buffer_funcs) {
 		time = amdgpu_benchmark_do_move(adev, size, saddr, daddr, n);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@ -35,92 +35,53 @@
 #define AMDGPU_BO_LIST_MAX_PRIORITY	32u
 #define AMDGPU_BO_LIST_NUM_BUCKETS	(AMDGPU_BO_LIST_MAX_PRIORITY + 1)

-static int amdgpu_bo_list_set(struct amdgpu_device *adev,
-				     struct drm_file *filp,
-				     struct amdgpu_bo_list *list,
-				     struct drm_amdgpu_bo_list_entry *info,
-				     unsigned num_entries);
-
-static void amdgpu_bo_list_release_rcu(struct kref *ref)
+static void amdgpu_bo_list_free_rcu(struct rcu_head *rcu)
 {
-	unsigned i;
-	struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
-						   refcount);
+	struct amdgpu_bo_list *list = container_of(rcu, struct amdgpu_bo_list,
+						   rhead);

-	for (i = 0; i < list->num_entries; ++i)
-		amdgpu_bo_unref(&list->array[i].robj);
-
-	mutex_destroy(&list->lock);
-	kvfree(list->array);
-	kfree_rcu(list, rhead);
+	kvfree(list);
 }

-static int amdgpu_bo_list_create(struct amdgpu_device *adev,
-				 struct drm_file *filp,
-				 struct drm_amdgpu_bo_list_entry *info,
-				 unsigned num_entries,
-				 int *id)
+static void amdgpu_bo_list_free(struct kref *ref)
 {
-	int r;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
-	struct amdgpu_bo_list *list;
+	struct amdgpu_bo_list *list = container_of(ref, struct amdgpu_bo_list,
+						   refcount);
+	struct amdgpu_bo_list_entry *e;

-	list = kzalloc(sizeof(struct amdgpu_bo_list), GFP_KERNEL);
+	amdgpu_bo_list_for_each_entry(e, list)
+		amdgpu_bo_unref(&e->robj);
+
+	call_rcu(&list->rhead, amdgpu_bo_list_free_rcu);
+}
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev, struct drm_file *filp,
+			  struct drm_amdgpu_bo_list_entry *info,
+			  unsigned num_entries, struct amdgpu_bo_list **result)
+{
+	unsigned last_entry = 0, first_userptr = num_entries;
+	struct amdgpu_bo_list_entry *array;
+	struct amdgpu_bo_list *list;
+	uint64_t total_size = 0;
+	size_t size;
+	unsigned i;
+	int r;
+
+	if (num_entries > SIZE_MAX / sizeof(struct amdgpu_bo_list_entry))
+		return -EINVAL;
+
+	size = sizeof(struct amdgpu_bo_list);
+	size += num_entries * sizeof(struct amdgpu_bo_list_entry);
+	list = kvmalloc(size, GFP_KERNEL);
 	if (!list)
 		return -ENOMEM;

-	/* initialize bo list*/
-	mutex_init(&list->lock);
 	kref_init(&list->refcount);
-	r = amdgpu_bo_list_set(adev, filp, list, info, num_entries);
-	if (r) {
-		kfree(list);
-		return r;
-	}
+	list->gds_obj = adev->gds.gds_gfx_bo;
+	list->gws_obj = adev->gds.gws_gfx_bo;
+	list->oa_obj = adev->gds.oa_gfx_bo;

-	/* idr alloc should be called only after initialization of bo list. */
-	mutex_lock(&fpriv->bo_list_lock);
-	r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
-	mutex_unlock(&fpriv->bo_list_lock);
-	if (r < 0) {
-		amdgpu_bo_list_free(list);
-		return r;
-	}
-	*id = r;
-
-	return 0;
-}
-
-static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
-{
-	struct amdgpu_bo_list *list;
-
-	mutex_lock(&fpriv->bo_list_lock);
-	list = idr_remove(&fpriv->bo_list_handles, id);
-	mutex_unlock(&fpriv->bo_list_lock);
-	if (list)
-		kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
-}
-
-static int amdgpu_bo_list_set(struct amdgpu_device *adev,
-				     struct drm_file *filp,
-				     struct amdgpu_bo_list *list,
-				     struct drm_amdgpu_bo_list_entry *info,
-				     unsigned num_entries)
-{
-	struct amdgpu_bo_list_entry *array;
-	struct amdgpu_bo *gds_obj = adev->gds.gds_gfx_bo;
-	struct amdgpu_bo *gws_obj = adev->gds.gws_gfx_bo;
-	struct amdgpu_bo *oa_obj = adev->gds.oa_gfx_bo;
-
-	unsigned last_entry = 0, first_userptr = num_entries;
-	unsigned i;
-	int r;
-	unsigned long total_size = 0;
-
-	array = kvmalloc_array(num_entries, sizeof(struct amdgpu_bo_list_entry), GFP_KERNEL);
-	if (!array)
-		return -ENOMEM;
+	array = amdgpu_bo_list_array_entry(list, 0);
 	memset(array, 0, num_entries * sizeof(struct amdgpu_bo_list_entry));

 	for (i = 0; i < num_entries; ++i) {
@ -157,59 +118,56 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 		entry->tv.shared = !entry->robj->prime_shared_count;

 		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GDS)
-			gds_obj = entry->robj;
+			list->gds_obj = entry->robj;
 		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_GWS)
-			gws_obj = entry->robj;
+			list->gws_obj = entry->robj;
 		if (entry->robj->preferred_domains == AMDGPU_GEM_DOMAIN_OA)
-			oa_obj = entry->robj;
+			list->oa_obj = entry->robj;

 		total_size += amdgpu_bo_size(entry->robj);
 		trace_amdgpu_bo_list_set(list, entry->robj);
 	}

-	for (i = 0; i < list->num_entries; ++i)
-		amdgpu_bo_unref(&list->array[i].robj);
-
-	kvfree(list->array);
-
-	list->gds_obj = gds_obj;
-	list->gws_obj = gws_obj;
-	list->oa_obj = oa_obj;
 	list->first_userptr = first_userptr;
-	list->array = array;
 	list->num_entries = num_entries;

 	trace_amdgpu_cs_bo_status(list->num_entries, total_size);
+
+	*result = list;
 	return 0;

 error_free:
 	while (i--)
 		amdgpu_bo_unref(&array[i].robj);
-	kvfree(array);
+	kvfree(list);
 	return r;
+
 }

-struct amdgpu_bo_list *
-amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id)
+static void amdgpu_bo_list_destroy(struct amdgpu_fpriv *fpriv, int id)
 {
-	struct amdgpu_bo_list *result;
+	struct amdgpu_bo_list *list;

+	mutex_lock(&fpriv->bo_list_lock);
+	list = idr_remove(&fpriv->bo_list_handles, id);
+	mutex_unlock(&fpriv->bo_list_lock);
+	if (list)
+		kref_put(&list->refcount, amdgpu_bo_list_free);
+}
+
+int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
+		       struct amdgpu_bo_list **result)
+{
 	rcu_read_lock();
-	result = idr_find(&fpriv->bo_list_handles, id);
+	*result = idr_find(&fpriv->bo_list_handles, id);

-	if (result) {
-		if (kref_get_unless_zero(&result->refcount)) {
-			rcu_read_unlock();
-			mutex_lock(&result->lock);
-		} else {
-			rcu_read_unlock();
-			result = NULL;
-		}
-	} else {
+	if (*result && kref_get_unless_zero(&(*result)->refcount)) {
 		rcu_read_unlock();
+		return 0;
 	}

-	return result;
+	rcu_read_unlock();
+	return -ENOENT;
 }

 void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
@ -220,6 +178,7 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 	 * concatenated in descending order.
 	 */
 	struct list_head bucket[AMDGPU_BO_LIST_NUM_BUCKETS];
+	struct amdgpu_bo_list_entry *e;
 	unsigned i;

 	for (i = 0; i < AMDGPU_BO_LIST_NUM_BUCKETS; i++)
@ -230,14 +189,13 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
 	 * in the list, the sort mustn't change the ordering of buffers
 	 * with the same priority, i.e. it must be stable.
 	 */
-	for (i = 0; i < list->num_entries; i++) {
-		unsigned priority = list->array[i].priority;
+	amdgpu_bo_list_for_each_entry(e, list) {
+		unsigned priority = e->priority;

-		if (!list->array[i].robj->parent)
-			list_add_tail(&list->array[i].tv.head,
-				      &bucket[priority]);
+		if (!e->robj->parent)
+			list_add_tail(&e->tv.head, &bucket[priority]);

-		list->array[i].user_pages = NULL;
+		e->user_pages = NULL;
 	}

 	/* Connect the sorted buckets in the output list. */
@ -247,71 +205,82 @@ void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,

 void amdgpu_bo_list_put(struct amdgpu_bo_list *list)
 {
-	mutex_unlock(&list->lock);
-	kref_put(&list->refcount, amdgpu_bo_list_release_rcu);
+	kref_put(&list->refcount, amdgpu_bo_list_free);
 }

-void amdgpu_bo_list_free(struct amdgpu_bo_list *list)
-{
-	unsigned i;
-
-	for (i = 0; i < list->num_entries; ++i)
-		amdgpu_bo_unref(&list->array[i].robj);
-
-	mutex_destroy(&list->lock);
-	kvfree(list->array);
-	kfree(list);
-}
-
-int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
-				struct drm_file *filp)
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+				      struct drm_amdgpu_bo_list_entry **info_param)
 {
+	const void __user *uptr = u64_to_user_ptr(in->bo_info_ptr);
 	const uint32_t info_size = sizeof(struct drm_amdgpu_bo_list_entry);
-
-	struct amdgpu_device *adev = dev->dev_private;
-	struct amdgpu_fpriv *fpriv = filp->driver_priv;
-	union drm_amdgpu_bo_list *args = data;
-	uint32_t handle = args->in.list_handle;
-	const void __user *uptr = u64_to_user_ptr(args->in.bo_info_ptr);
-
 	struct drm_amdgpu_bo_list_entry *info;
-	struct amdgpu_bo_list *list;
-
 	int r;

-	info = kvmalloc_array(args->in.bo_number,
-			     sizeof(struct drm_amdgpu_bo_list_entry), GFP_KERNEL);
+	info = kvmalloc_array(in->bo_number, info_size, GFP_KERNEL);
 	if (!info)
 		return -ENOMEM;

 	/* copy the handle array from userspace to a kernel buffer */
 	r = -EFAULT;
-	if (likely(info_size == args->in.bo_info_size)) {
-		unsigned long bytes = args->in.bo_number *
-			args->in.bo_info_size;
+	if (likely(info_size == in->bo_info_size)) {
+		unsigned long bytes = in->bo_number *
+			in->bo_info_size;

 		if (copy_from_user(info, uptr, bytes))
 			goto error_free;

 	} else {
-		unsigned long bytes = min(args->in.bo_info_size, info_size);
+		unsigned long bytes = min(in->bo_info_size, info_size);
 		unsigned i;

-		memset(info, 0, args->in.bo_number * info_size);
-		for (i = 0; i < args->in.bo_number; ++i) {
+		memset(info, 0, in->bo_number * info_size);
+		for (i = 0; i < in->bo_number; ++i) {
 			if (copy_from_user(&info[i], uptr, bytes))
 				goto error_free;

-			uptr += args->in.bo_info_size;
+			uptr += in->bo_info_size;
 		}
 	}

+	*info_param = info;
+	return 0;
+
+error_free:
+	kvfree(info);
+	return r;
+}
+
+int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
+				struct drm_file *filp)
+{
+	struct amdgpu_device *adev = dev->dev_private;
+	struct amdgpu_fpriv *fpriv = filp->driver_priv;
+	union drm_amdgpu_bo_list *args = data;
+	uint32_t handle = args->in.list_handle;
+	struct drm_amdgpu_bo_list_entry *info = NULL;
+	struct amdgpu_bo_list *list, *old;
+	int r;
+
+	r = amdgpu_bo_create_list_entry_array(&args->in, &info);
+	if (r)
+		goto error_free;
+
 	switch (args->in.operation) {
 	case AMDGPU_BO_LIST_OP_CREATE:
 		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
-					  &handle);
+					  &list);
 		if (r)
 			goto error_free;
+
+		mutex_lock(&fpriv->bo_list_lock);
+		r = idr_alloc(&fpriv->bo_list_handles, list, 1, 0, GFP_KERNEL);
+		mutex_unlock(&fpriv->bo_list_lock);
+		if (r < 0) {
+			amdgpu_bo_list_put(list);
+			return r;
+		}
+
+		handle = r;
 		break;

 	case AMDGPU_BO_LIST_OP_DESTROY:
@ -320,17 +289,22 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 		break;

 	case AMDGPU_BO_LIST_OP_UPDATE:
-		r = -ENOENT;
-		list = amdgpu_bo_list_get(fpriv, handle);
-		if (!list)
-			goto error_free;
-
-		r = amdgpu_bo_list_set(adev, filp, list, info,
-					      args->in.bo_number);
-		amdgpu_bo_list_put(list);
+		r = amdgpu_bo_list_create(adev, filp, info, args->in.bo_number,
+					  &list);
 		if (r)
 			goto error_free;

+		mutex_lock(&fpriv->bo_list_lock);
+		old = idr_replace(&fpriv->bo_list_handles, list, handle);
+		mutex_unlock(&fpriv->bo_list_lock);
+
+		if (IS_ERR(old)) {
+			amdgpu_bo_list_put(list);
+			r = PTR_ERR(old);
+			goto error_free;
+		}
+
+		amdgpu_bo_list_put(old);
 		break;

 	default:
@ -345,6 +319,7 @@ int amdgpu_bo_list_ioctl(struct drm_device *dev, void *data,
 	return 0;

 error_free:
-	kvfree(info);
+	if (info)
+		kvfree(info);
 	return r;
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.h
@ -0,0 +1,85 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_BO_LIST_H__
+#define __AMDGPU_BO_LIST_H__
+
+#include <drm/ttm/ttm_execbuf_util.h>
+#include <drm/amdgpu_drm.h>
+
+struct amdgpu_device;
+struct amdgpu_bo;
+struct amdgpu_bo_va;
+struct amdgpu_fpriv;
+
+struct amdgpu_bo_list_entry {
+	struct amdgpu_bo		*robj;
+	struct ttm_validate_buffer	tv;
+	struct amdgpu_bo_va		*bo_va;
+	uint32_t			priority;
+	struct page			**user_pages;
+	int				user_invalidated;
+};
+
+struct amdgpu_bo_list {
+	struct rcu_head rhead;
+	struct kref refcount;
+	struct amdgpu_bo *gds_obj;
+	struct amdgpu_bo *gws_obj;
+	struct amdgpu_bo *oa_obj;
+	unsigned first_userptr;
+	unsigned num_entries;
+};
+
+int amdgpu_bo_list_get(struct amdgpu_fpriv *fpriv, int id,
+		       struct amdgpu_bo_list **result);
+void amdgpu_bo_list_get_list(struct amdgpu_bo_list *list,
+			     struct list_head *validated);
+void amdgpu_bo_list_put(struct amdgpu_bo_list *list);
+int amdgpu_bo_create_list_entry_array(struct drm_amdgpu_bo_list_in *in,
+				      struct drm_amdgpu_bo_list_entry **info_param);
+
+int amdgpu_bo_list_create(struct amdgpu_device *adev,
+				 struct drm_file *filp,
+				 struct drm_amdgpu_bo_list_entry *info,
+				 unsigned num_entries,
+				 struct amdgpu_bo_list **list);
+
+static inline struct amdgpu_bo_list_entry *
+amdgpu_bo_list_array_entry(struct amdgpu_bo_list *list, unsigned index)
+{
+	struct amdgpu_bo_list_entry *array = (void *)&list[1];
+
+	return &array[index];
+}
+
+#define amdgpu_bo_list_for_each_entry(e, list) \
+	for (e = amdgpu_bo_list_array_entry(list, 0); \
+	     e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+	     ++e)
+
+#define amdgpu_bo_list_for_each_userptr_entry(e, list) \
+	for (e = amdgpu_bo_list_array_entry(list, (list)->first_userptr); \
+	     e != amdgpu_bo_list_array_entry(list, (list)->num_entries); \
+	     ++e)
+
+#endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@ -314,17 +314,17 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 					(adev->pdev->revision == 0x81) ||
 					(adev->pdev->device == 0x665f)) {
 					info->is_kicker = true;
-					strcpy(fw_name, "radeon/bonaire_k_smc.bin");
+					strcpy(fw_name, "amdgpu/bonaire_k_smc.bin");
 				} else {
-					strcpy(fw_name, "radeon/bonaire_smc.bin");
+					strcpy(fw_name, "amdgpu/bonaire_smc.bin");
 				}
 				break;
 			case CHIP_HAWAII:
 				if (adev->pdev->revision == 0x80) {
 					info->is_kicker = true;
-					strcpy(fw_name, "radeon/hawaii_k_smc.bin");
+					strcpy(fw_name, "amdgpu/hawaii_k_smc.bin");
 				} else {
-					strcpy(fw_name, "radeon/hawaii_smc.bin");
+					strcpy(fw_name, "amdgpu/hawaii_smc.bin");
 				}
 				break;
 			case CHIP_TOPAZ:
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@ -212,30 +212,21 @@ static void
 amdgpu_connector_update_scratch_regs(struct drm_connector *connector,
 				      enum drm_connector_status status)
 {
-	struct drm_encoder *best_encoder = NULL;
-	struct drm_encoder *encoder = NULL;
+	struct drm_encoder *best_encoder;
+	struct drm_encoder *encoder;
 	const struct drm_connector_helper_funcs *connector_funcs = connector->helper_private;
 	bool connected;
 	int i;

 	best_encoder = connector_funcs->best_encoder(connector);

-	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
-		if (connector->encoder_ids[i] == 0)
-			break;
-
-		encoder = drm_encoder_find(connector->dev, NULL,
-					connector->encoder_ids[i]);
-		if (!encoder)
-			continue;
-
+	drm_connector_for_each_possible_encoder(connector, encoder, i) {
 		if ((encoder == best_encoder) && (status == connector_status_connected))
 			connected = true;
 		else
 			connected = false;

 		amdgpu_atombios_encoder_set_bios_scratch_regs(connector, encoder, connected);
-
 	}
 }

@ -246,17 +237,11 @@ amdgpu_connector_find_encoder(struct drm_connector *connector,
 	struct drm_encoder *encoder;
 	int i;

-	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
-		if (connector->encoder_ids[i] == 0)
-			break;
-		encoder = drm_encoder_find(connector->dev, NULL,
-					connector->encoder_ids[i]);
-		if (!encoder)
-			continue;
-
+	drm_connector_for_each_possible_encoder(connector, encoder, i) {
 		if (encoder->encoder_type == encoder_type)
 			return encoder;
 	}
+
 	return NULL;
 }

@ -349,22 +334,24 @@ static int amdgpu_connector_ddc_get_modes(struct drm_connector *connector)
 	int ret;

 	if (amdgpu_connector->edid) {
-		drm_mode_connector_update_edid_property(connector, amdgpu_connector->edid);
+		drm_connector_update_edid_property(connector, amdgpu_connector->edid);
 		ret = drm_add_edid_modes(connector, amdgpu_connector->edid);
 		return ret;
 	}
-	drm_mode_connector_update_edid_property(connector, NULL);
+	drm_connector_update_edid_property(connector, NULL);
 	return 0;
 }

 static struct drm_encoder *
 amdgpu_connector_best_single_encoder(struct drm_connector *connector)
 {
-	int enc_id = connector->encoder_ids[0];
+	struct drm_encoder *encoder;
+	int i;
+
+	/* pick the first one */
+	drm_connector_for_each_possible_encoder(connector, encoder, i)
+		return encoder;

-	/* pick the encoder ids */
-	if (enc_id)
-		return drm_encoder_find(connector->dev, NULL, enc_id);
 	return NULL;
 }

@ -985,9 +972,8 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)
 	struct drm_device *dev = connector->dev;
 	struct amdgpu_device *adev = dev->dev_private;
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
-	struct drm_encoder *encoder = NULL;
 	const struct drm_encoder_helper_funcs *encoder_funcs;
-	int i, r;
+	int r;
 	enum drm_connector_status ret = connector_status_disconnected;
 	bool dret = false, broken_edid = false;

@ -1077,14 +1063,10 @@ amdgpu_connector_dvi_detect(struct drm_connector *connector, bool force)

 	/* find analog encoder */
 	if (amdgpu_connector->dac_load_detect) {
-		for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
-			if (connector->encoder_ids[i] == 0)
-				break;
-
-			encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
-			if (!encoder)
-				continue;
+		struct drm_encoder *encoder;
+		int i;

+		drm_connector_for_each_possible_encoder(connector, encoder, i) {
 			if (encoder->encoder_type != DRM_MODE_ENCODER_DAC &&
 			    encoder->encoder_type != DRM_MODE_ENCODER_TVDAC)
 				continue;
@ -1132,18 +1114,11 @@ exit:
 static struct drm_encoder *
 amdgpu_connector_dvi_encoder(struct drm_connector *connector)
 {
-	int enc_id = connector->encoder_ids[0];
 	struct amdgpu_connector *amdgpu_connector = to_amdgpu_connector(connector);
 	struct drm_encoder *encoder;
 	int i;
-	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
-		if (connector->encoder_ids[i] == 0)
-			break;
-
-		encoder = drm_encoder_find(connector->dev, NULL, connector->encoder_ids[i]);
-		if (!encoder)
-			continue;

+	drm_connector_for_each_possible_encoder(connector, encoder, i) {
 		if (amdgpu_connector->use_digital == true) {
 			if (encoder->encoder_type == DRM_MODE_ENCODER_TMDS)
 				return encoder;
@ -1158,8 +1133,9 @@ amdgpu_connector_dvi_encoder(struct drm_connector *connector)

 	/* then check use digitial */
 	/* pick the first one */
-	if (enc_id)
-		return drm_encoder_find(connector->dev, NULL, enc_id);
+	drm_connector_for_each_possible_encoder(connector, encoder, i)
+		return encoder;
+
 	return NULL;
 }

@ -1296,15 +1272,7 @@ u16 amdgpu_connector_encoder_get_dp_bridge_encoder_id(struct drm_connector *conn
 	struct amdgpu_encoder *amdgpu_encoder;
 	int i;

-	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
-		if (connector->encoder_ids[i] == 0)
-			break;
-
-		encoder = drm_encoder_find(connector->dev, NULL,
-					connector->encoder_ids[i]);
-		if (!encoder)
-			continue;
-
+	drm_connector_for_each_possible_encoder(connector, encoder, i) {
 		amdgpu_encoder = to_amdgpu_encoder(encoder);

 		switch (amdgpu_encoder->encoder_id) {
@ -1326,14 +1294,7 @@ static bool amdgpu_connector_encoder_is_hbr2(struct drm_connector *connector)
 	int i;
 	bool found = false;

-	for (i = 0; i < DRM_CONNECTOR_MAX_ENCODER; i++) {
-		if (connector->encoder_ids[i] == 0)
-			break;
-		encoder = drm_encoder_find(connector->dev, NULL,
-					connector->encoder_ids[i]);
-		if (!encoder)
-			continue;
-
+	drm_connector_for_each_possible_encoder(connector, encoder, i) {
 		amdgpu_encoder = to_amdgpu_encoder(encoder);
 		if (amdgpu_encoder->caps & ATOM_ENCODER_CAP_RECORD_HBR2)
 			found = true;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@ -31,6 +31,7 @@
 #include <drm/drm_syncobj.h>
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
+#include "amdgpu_gmc.h"

 static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 				      struct drm_amdgpu_cs_chunk_fence *data,
@ -65,11 +66,35 @@ static int amdgpu_cs_user_fence_chunk(struct amdgpu_cs_parser *p,
 	return 0;
 }

-static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
+static int amdgpu_cs_bo_handles_chunk(struct amdgpu_cs_parser *p,
+				      struct drm_amdgpu_bo_list_in *data)
+{
+	int r;
+	struct drm_amdgpu_bo_list_entry *info = NULL;
+
+	r = amdgpu_bo_create_list_entry_array(data, &info);
+	if (r)
+		return r;
+
+	r = amdgpu_bo_list_create(p->adev, p->filp, info, data->bo_number,
+				  &p->bo_list);
+	if (r)
+		goto error_free;
+
+	kvfree(info);
+	return 0;
+
+error_free:
+	if (info)
+		kvfree(info);
+
+	return r;
+}
+
+static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	union drm_amdgpu_cs *cs = data;
 	uint64_t *chunk_array_user;
 	uint64_t *chunk_array;
 	unsigned size, num_ibs = 0;
@ -163,6 +188,19 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)

 			break;

+		case AMDGPU_CHUNK_ID_BO_HANDLES:
+			size = sizeof(struct drm_amdgpu_bo_list_in);
+			if (p->chunks[i].length_dw * sizeof(uint32_t) < size) {
+				ret = -EINVAL;
+				goto free_partial_kdata;
+			}
+
+			ret = amdgpu_cs_bo_handles_chunk(p, p->chunks[i].kdata);
+			if (ret)
+				goto free_partial_kdata;
+
+			break;
+
 		case AMDGPU_CHUNK_ID_DEPENDENCIES:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_IN:
 		case AMDGPU_CHUNK_ID_SYNCOBJ_OUT:
@ -186,6 +224,10 @@ static int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (p->uf_entry.robj)
 		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
+
+	/* Use this opportunity to fill in task info for the vm */
+	amdgpu_vm_set_task_info(vm);
+
 	return 0;

 free_all_kdata:
@ -257,7 +299,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 		return;
 	}

-	total_vram = adev->gmc.real_vram_size - adev->vram_pin_size;
+	total_vram = adev->gmc.real_vram_size - atomic64_read(&adev->vram_pin_size);
 	used_vram = amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 	free_vram = used_vram >= total_vram ? 0 : total_vram - used_vram;

@ -302,7 +344,7 @@ static void amdgpu_cs_get_threshold_for_moves(struct amdgpu_device *adev,
 	*max_bytes = us_to_bytes(adev, adev->mm_stats.accum_us);

 	/* Do the same for visible VRAM if half of it is free */
-	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size) {
+	if (!amdgpu_gmc_vram_full_visible(&adev->gmc)) {
 		u64 total_vis_vram = adev->gmc.visible_vram_size;
 		u64 used_vis_vram =
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
@ -359,7 +401,7 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	 * to move it. Don't move anything if the threshold is zero.
 	 */
 	if (p->bytes_moved < p->bytes_moved_threshold) {
-		if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+		if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 		    (bo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED)) {
 			/* And don't move a CPU_ACCESS_REQUIRED BO to limited
 			 * visible VRAM if we've depleted our allowance to do
@ -377,11 +419,11 @@ static int amdgpu_cs_bo_validate(struct amdgpu_cs_parser *p,
 	}

 retry:
-	amdgpu_ttm_placement_from_domain(bo, domain);
+	amdgpu_bo_placement_from_domain(bo, domain);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);

 	p->bytes_moved += ctx.bytes_moved;
-	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 	    amdgpu_bo_in_cpu_visible_vram(bo))
 		p->bytes_moved_vis += ctx.bytes_moved;

@ -434,9 +476,9 @@ static bool amdgpu_cs_try_evict(struct amdgpu_cs_parser *p,

 		/* Good we can try to move this BO somewhere else */
 		update_bytes_moved_vis =
-			adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
-			amdgpu_bo_in_cpu_visible_vram(bo);
-		amdgpu_ttm_placement_from_domain(bo, other);
+				!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
+				amdgpu_bo_in_cpu_visible_vram(bo);
+		amdgpu_bo_placement_from_domain(bo, other);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		p->bytes_moved += ctx.bytes_moved;
 		if (update_bytes_moved_vis)
@ -490,8 +532,8 @@ static int amdgpu_cs_list_validate(struct amdgpu_cs_parser *p,
 		/* Check if we have user pages and nobody bound the BO already */
 		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm) &&
 		    lobj->user_pages) {
-			amdgpu_ttm_placement_from_domain(bo,
-							 AMDGPU_GEM_DOMAIN_CPU);
+			amdgpu_bo_placement_from_domain(bo,
+							AMDGPU_GEM_DOMAIN_CPU);
 			r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 			if (r)
 				return r;
@ -519,23 +561,38 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 				union drm_amdgpu_cs *cs)
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_vm *vm = &fpriv->vm;
 	struct amdgpu_bo_list_entry *e;
 	struct list_head duplicates;
-	unsigned i, tries = 10;
 	struct amdgpu_bo *gds;
 	struct amdgpu_bo *gws;
 	struct amdgpu_bo *oa;
+	unsigned tries = 10;
 	int r;

 	INIT_LIST_HEAD(&p->validated);

-	p->bo_list = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle);
-	if (p->bo_list) {
-		amdgpu_bo_list_get_list(p->bo_list, &p->validated);
-		if (p->bo_list->first_userptr != p->bo_list->num_entries)
-			p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+	/* p->bo_list could already be assigned if AMDGPU_CHUNK_ID_BO_HANDLES is present */
+	if (cs->in.bo_list_handle) {
+		if (p->bo_list)
+			return -EINVAL;
+
+		r = amdgpu_bo_list_get(fpriv, cs->in.bo_list_handle,
+				       &p->bo_list);
+		if (r)
+			return r;
+	} else if (!p->bo_list) {
+		/* Create a empty bo_list when no handle is provided */
+		r = amdgpu_bo_list_create(p->adev, p->filp, NULL, 0,
+					  &p->bo_list);
+		if (r)
+			return r;
 	}

+	amdgpu_bo_list_get_list(p->bo_list, &p->validated);
+	if (p->bo_list->first_userptr != p->bo_list->num_entries)
+		p->mn = amdgpu_mn_get(p->adev, AMDGPU_MN_TYPE_GFX);
+
 	INIT_LIST_HEAD(&duplicates);
 	amdgpu_vm_get_pd_bo(&fpriv->vm, &p->validated, &p->vm_pd);

@ -544,7 +601,6 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,

 	while (1) {
 		struct list_head need_pages;
-		unsigned i;

 		r = ttm_eu_reserve_buffers(&p->ticket, &p->validated, true,
 					   &duplicates);
@ -554,17 +610,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 			goto error_free_pages;
 		}

-		/* Without a BO list we don't have userptr BOs */
-		if (!p->bo_list)
-			break;
-
 		INIT_LIST_HEAD(&need_pages);
-		for (i = p->bo_list->first_userptr;
-		     i < p->bo_list->num_entries; ++i) {
-			struct amdgpu_bo *bo;
-
-			e = &p->bo_list->array[i];
-			bo = e->robj;
+		amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+			struct amdgpu_bo *bo = e->robj;

 			if (amdgpu_ttm_tt_userptr_invalidated(bo->tbo.ttm,
 				 &e->user_invalidated) && e->user_pages) {
@ -656,23 +704,12 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	amdgpu_cs_report_moved_bytes(p->adev, p->bytes_moved,
 				     p->bytes_moved_vis);

-	if (p->bo_list) {
-		struct amdgpu_vm *vm = &fpriv->vm;
-		unsigned i;
+	gds = p->bo_list->gds_obj;
+	gws = p->bo_list->gws_obj;
+	oa = p->bo_list->oa_obj;

-		gds = p->bo_list->gds_obj;
-		gws = p->bo_list->gws_obj;
-		oa = p->bo_list->oa_obj;
-		for (i = 0; i < p->bo_list->num_entries; i++) {
-			struct amdgpu_bo *bo = p->bo_list->array[i].robj;
-
-			p->bo_list->array[i].bo_va = amdgpu_vm_bo_find(vm, bo);
-		}
-	} else {
-		gds = p->adev->gds.gds_gfx_bo;
-		gws = p->adev->gds.gws_gfx_bo;
-		oa = p->adev->gds.oa_gfx_bo;
-	}
+	amdgpu_bo_list_for_each_entry(e, p->bo_list)
+		e->bo_va = amdgpu_vm_bo_find(vm, e->robj);

 	if (gds) {
 		p->job->gds_base = amdgpu_bo_gpu_offset(gds);
@ -700,18 +737,13 @@ error_validate:

 error_free_pages:

-	if (p->bo_list) {
-		for (i = p->bo_list->first_userptr;
-		     i < p->bo_list->num_entries; ++i) {
-			e = &p->bo_list->array[i];
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		if (!e->user_pages)
+			continue;

-			if (!e->user_pages)
-				continue;
-
-			release_pages(e->user_pages,
-				      e->robj->tbo.ttm->num_pages);
-			kvfree(e->user_pages);
-		}
+		release_pages(e->user_pages,
+			      e->robj->tbo.ttm->num_pages);
+		kvfree(e->user_pages);
 	}

 	return r;
@ -773,12 +805,13 @@ static void amdgpu_cs_parser_fini(struct amdgpu_cs_parser *parser, int error,

 static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 {
-	struct amdgpu_device *adev = p->adev;
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_device *adev = p->adev;
 	struct amdgpu_vm *vm = &fpriv->vm;
+	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_bo_va *bo_va;
 	struct amdgpu_bo *bo;
-	int i, r;
+	int r;

 	r = amdgpu_vm_clear_freed(adev, vm, NULL);
 	if (r)
@ -808,29 +841,26 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 			return r;
 	}

-	if (p->bo_list) {
-		for (i = 0; i < p->bo_list->num_entries; i++) {
-			struct dma_fence *f;
+	amdgpu_bo_list_for_each_entry(e, p->bo_list) {
+		struct dma_fence *f;

-			/* ignore duplicates */
-			bo = p->bo_list->array[i].robj;
-			if (!bo)
-				continue;
+		/* ignore duplicates */
+		bo = e->robj;
+		if (!bo)
+			continue;

-			bo_va = p->bo_list->array[i].bo_va;
-			if (bo_va == NULL)
-				continue;
+		bo_va = e->bo_va;
+		if (bo_va == NULL)
+			continue;

-			r = amdgpu_vm_bo_update(adev, bo_va, false);
-			if (r)
-				return r;
-
-			f = bo_va->last_pt_update;
-			r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
-			if (r)
-				return r;
-		}
+		r = amdgpu_vm_bo_update(adev, bo_va, false);
+		if (r)
+			return r;

+		f = bo_va->last_pt_update;
+		r = amdgpu_sync_fence(adev, &p->job->sync, f, false);
+		if (r)
+			return r;
 	}

 	r = amdgpu_vm_handle_moved(adev, vm);
@ -845,15 +875,14 @@ static int amdgpu_bo_vm_update_pte(struct amdgpu_cs_parser *p)
 	if (r)
 		return r;

-	if (amdgpu_vm_debug && p->bo_list) {
+	if (amdgpu_vm_debug) {
 		/* Invalidate all BOs to test for userspace bugs */
-		for (i = 0; i < p->bo_list->num_entries; i++) {
+		amdgpu_bo_list_for_each_entry(e, p->bo_list) {
 			/* ignore duplicates */
-			bo = p->bo_list->array[i].robj;
-			if (!bo)
+			if (!e->robj)
 				continue;

-			amdgpu_vm_bo_invalidate(adev, bo, false);
+			amdgpu_vm_bo_invalidate(adev, e->robj, false);
 		}
 	}

@ -865,11 +894,11 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 {
 	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
 	struct amdgpu_vm *vm = &fpriv->vm;
-	struct amdgpu_ring *ring = p->job->ring;
+	struct amdgpu_ring *ring = p->ring;
 	int r;

 	/* Only for UVD/VCE VM emulation */
-	if (p->job->ring->funcs->parse_cs) {
+	if (p->ring->funcs->parse_cs || p->ring->funcs->patch_cs_in_place) {
 		unsigned i, j;

 		for (i = 0, j = 0; i < p->nchunks && j < p->job->num_ibs; i++) {
@ -910,12 +939,20 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,
 			offset = m->start * AMDGPU_GPU_PAGE_SIZE;
 			kptr += va_start - offset;

-			memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
-			amdgpu_bo_kunmap(aobj);
+			if (p->ring->funcs->parse_cs) {
+				memcpy(ib->ptr, kptr, chunk_ib->ib_bytes);
+				amdgpu_bo_kunmap(aobj);

-			r = amdgpu_ring_parse_cs(ring, p, j);
-			if (r)
-				return r;
+				r = amdgpu_ring_parse_cs(ring, p, j);
+				if (r)
+					return r;
+			} else {
+				ib->ptr = (uint32_t *)kptr;
+				r = amdgpu_ring_patch_cs_in_place(ring, p, j);
+				amdgpu_bo_kunmap(aobj);
+				if (r)
+					return r;
+			}

 			j++;
 		}
@ -983,10 +1020,10 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 			}
 		}

-		if (parser->job->ring && parser->job->ring != ring)
+		if (parser->ring && parser->ring != ring)
 			return -EINVAL;

-		parser->job->ring = ring;
+		parser->ring = ring;

 		r =  amdgpu_ib_get(adev, vm,
 					ring->funcs->parse_cs ? chunk_ib->ib_bytes : 0,
@ -1005,11 +1042,11 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,

 	/* UVD & VCE fw doesn't support user fences */
 	if (parser->job->uf_addr && (
-	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
-	    parser->job->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
+	    parser->ring->funcs->type == AMDGPU_RING_TYPE_UVD ||
+	    parser->ring->funcs->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;

-	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->job->ring->idx);
+	return amdgpu_ctx_wait_prev_fence(parser->ctx, parser->ring->idx);
 }

 static int amdgpu_cs_process_fence_dep(struct amdgpu_cs_parser *p,
@ -1160,31 +1197,30 @@ static void amdgpu_cs_post_dependencies(struct amdgpu_cs_parser *p)
 static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 			    union drm_amdgpu_cs *cs)
 {
-	struct amdgpu_ring *ring = p->job->ring;
+	struct amdgpu_fpriv *fpriv = p->filp->driver_priv;
+	struct amdgpu_ring *ring = p->ring;
 	struct drm_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
+	enum drm_sched_priority priority;
+	struct amdgpu_bo_list_entry *e;
 	struct amdgpu_job *job;
-	unsigned i;
 	uint64_t seq;

 	int r;

 	amdgpu_mn_lock(p->mn);
-	if (p->bo_list) {
-		for (i = p->bo_list->first_userptr;
-		     i < p->bo_list->num_entries; ++i) {
-			struct amdgpu_bo *bo = p->bo_list->array[i].robj;
+	amdgpu_bo_list_for_each_userptr_entry(e, p->bo_list) {
+		struct amdgpu_bo *bo = e->robj;

-			if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
-				amdgpu_mn_unlock(p->mn);
-				return -ERESTARTSYS;
-			}
+		if (amdgpu_ttm_tt_userptr_needs_pages(bo->tbo.ttm)) {
+			amdgpu_mn_unlock(p->mn);
+			return -ERESTARTSYS;
 		}
 	}

 	job = p->job;
 	p->job = NULL;

-	r = drm_sched_job_init(&job->base, &ring->sched, entity, p->filp);
+	r = drm_sched_job_init(&job->base, entity, p->filp);
 	if (r) {
 		amdgpu_job_free(job);
 		amdgpu_mn_unlock(p->mn);
@ -1192,7 +1228,6 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	}

 	job->owner = p->filp;
-	job->fence_ctx = entity->fence_context;
 	p->fence = dma_fence_get(&job->base.s_fence->finished);

 	r = amdgpu_ctx_add_fence(p->ctx, ring, p->fence, &seq);
@ -1210,11 +1245,15 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 	job->uf_sequence = seq;

 	amdgpu_job_free_resources(job);
-	amdgpu_ring_priority_get(job->ring, job->base.s_priority);

 	trace_amdgpu_cs_ioctl(job);
+	amdgpu_vm_bo_trace_cs(&fpriv->vm, &p->ticket);
+	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);

+	ring = to_amdgpu_ring(entity->rq->sched);
+	amdgpu_ring_priority_get(ring, priority);
+
 	ttm_eu_fence_buffer_objects(&p->ticket, &p->validated, p->fence);
 	amdgpu_mn_unlock(p->mn);

@ -1605,7 +1644,7 @@ int amdgpu_cs_find_mapping(struct amdgpu_cs_parser *parser,

 	if (!((*bo)->flags & AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS)) {
 		(*bo)->flags |= AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
-		amdgpu_ttm_placement_from_domain(*bo, (*bo)->allowed_domains);
+		amdgpu_bo_placement_from_domain(*bo, (*bo)->allowed_domains);
 		r = ttm_bo_validate(&(*bo)->tbo, &(*bo)->placement, &ctx);
 		if (r)
 			return r;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ctx.c
@ -90,8 +90,8 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,
 		if (ring == &adev->gfx.kiq.ring)
 			continue;

-		r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
-					  rq, &ctx->guilty);
+		r = drm_sched_entity_init(&ctx->rings[i].entity,
+					  &rq, 1, &ctx->guilty);
 		if (r)
 			goto failed;
 	}
@ -104,8 +104,7 @@ static int amdgpu_ctx_init(struct amdgpu_device *adev,

 failed:
 	for (j = 0; j < i; j++)
-		drm_sched_entity_fini(&adev->rings[j]->sched,
-				      &ctx->rings[j].entity);
+		drm_sched_entity_destroy(&ctx->rings[j].entity);
 	kfree(ctx->fences);
 	ctx->fences = NULL;
 	return r;
@ -178,8 +177,7 @@ static void amdgpu_ctx_do_release(struct kref *ref)
 		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
 			continue;

-		drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
-			&ctx->rings[i].entity);
+		drm_sched_entity_destroy(&ctx->rings[i].entity);
 	}

 	amdgpu_ctx_fini(ref);
@ -444,6 +442,35 @@ void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
 	idr_init(&mgr->ctx_handles);
 }

+void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
+{
+	struct amdgpu_ctx *ctx;
+	struct idr *idp;
+	uint32_t id, i;
+	long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
+
+	idp = &mgr->ctx_handles;
+
+	mutex_lock(&mgr->lock);
+	idr_for_each_entry(idp, ctx, id) {
+
+		if (!ctx->adev) {
+			mutex_unlock(&mgr->lock);
+			return;
+		}
+
+		for (i = 0; i < ctx->adev->num_rings; i++) {
+
+			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
+				continue;
+
+			max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
+							  max_wait);
+		}
+	}
+	mutex_unlock(&mgr->lock);
+}
+
 void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 {
 	struct amdgpu_ctx *ctx;
@ -463,35 +490,7 @@ void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
 				continue;

 			if (kref_read(&ctx->refcount) == 1)
-				drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
-						  &ctx->rings[i].entity);
-			else
-				DRM_ERROR("ctx %p is still alive\n", ctx);
-		}
-	}
-}
-
-void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
-{
-	struct amdgpu_ctx *ctx;
-	struct idr *idp;
-	uint32_t id, i;
-
-	idp = &mgr->ctx_handles;
-
-	idr_for_each_entry(idp, ctx, id) {
-
-		if (!ctx->adev)
-			return;
-
-		for (i = 0; i < ctx->adev->num_rings; i++) {
-
-			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
-				continue;
-
-			if (kref_read(&ctx->refcount) == 1)
-				drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
-					&ctx->rings[i].entity);
+				drm_sched_entity_fini(&ctx->rings[i].entity);
 			else
 				DRM_ERROR("ctx %p is still alive\n", ctx);
 		}
@ -504,7 +503,7 @@ void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
 	struct idr *idp;
 	uint32_t id;

-	amdgpu_ctx_mgr_entity_cleanup(mgr);
+	amdgpu_ctx_mgr_entity_fini(mgr);

 	idp = &mgr->ctx_handles;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@ -25,6 +25,7 @@
 *          Alex Deucher
 *          Jerome Glisse
 */
+#include <linux/power_supply.h>
 #include <linux/kthread.h>
 #include <linux/console.h>
 #include <linux/slab.h>
@ -675,17 +676,15 @@ void amdgpu_device_vram_location(struct amdgpu_device *adev,
 }

 /**
- * amdgpu_device_gart_location - try to find GTT location
+ * amdgpu_device_gart_location - try to find GART location
 *
 * @adev: amdgpu device structure holding all necessary informations
 * @mc: memory controller structure holding memory informations
 *
- * Function will place try to place GTT before or after VRAM.
+ * Function will place try to place GART before or after VRAM.
 *
- * If GTT size is bigger than space left then we ajust GTT size.
+ * If GART size is bigger than space left then we ajust GART size.
 * Thus function will never fails.
- *
- * FIXME: when reducing GTT size align new size on power of 2.
 */
 void amdgpu_device_gart_location(struct amdgpu_device *adev,
 				 struct amdgpu_gmc *mc)
@ -698,13 +697,13 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
 	size_bf = mc->vram_start;
 	if (size_bf > size_af) {
 		if (mc->gart_size > size_bf) {
-			dev_warn(adev->dev, "limiting GTT\n");
+			dev_warn(adev->dev, "limiting GART\n");
 			mc->gart_size = size_bf;
 		}
 		mc->gart_start = 0;
 	} else {
 		if (mc->gart_size > size_af) {
-			dev_warn(adev->dev, "limiting GTT\n");
+			dev_warn(adev->dev, "limiting GART\n");
 			mc->gart_size = size_af;
 		}
 		/* VCE doesn't like it when BOs cross a 4GB segment, so align
@ -713,7 +712,7 @@ void amdgpu_device_gart_location(struct amdgpu_device *adev,
 		mc->gart_start = ALIGN(mc->vram_end + 1, 0x100000000ULL);
 	}
 	mc->gart_end = mc->gart_start + mc->gart_size - 1;
-	dev_info(adev->dev, "GTT: %lluM 0x%016llX - 0x%016llX\n",
+	dev_info(adev->dev, "GART: %lluM 0x%016llX - 0x%016llX\n",
 			mc->gart_size >> 20, mc->gart_start, mc->gart_end);
 }

@ -1077,7 +1076,7 @@ static const struct vga_switcheroo_client_ops amdgpu_switcheroo_ops = {
 /**
 * amdgpu_device_ip_set_clockgating_state - set the CG state
 *
- * @adev: amdgpu_device pointer
+ * @dev: amdgpu_device pointer
 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
 * @state: clockgating state (gate or ungate)
 *
@ -1111,7 +1110,7 @@ int amdgpu_device_ip_set_clockgating_state(void *dev,
 /**
 * amdgpu_device_ip_set_powergating_state - set the PG state
 *
- * @adev: amdgpu_device pointer
+ * @dev: amdgpu_device pointer
 * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
 * @state: powergating state (gate or ungate)
 *
@ -1222,7 +1221,7 @@ bool amdgpu_device_ip_is_idle(struct amdgpu_device *adev,
 * amdgpu_device_ip_get_ip_block - get a hw IP pointer
 *
 * @adev: amdgpu_device pointer
- * @block_type: Type of hardware IP (SMU, GFX, UVD, etc.)
+ * @type: Type of hardware IP (SMU, GFX, UVD, etc.)
 *
 * Returns a pointer to the hardware IP block structure
 * if it exists for the asic, otherwise NULL.
@ -1708,10 +1707,6 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 	if (amdgpu_emu_mode == 1)
 		return 0;

-	r = amdgpu_ib_ring_tests(adev);
-	if (r)
-		DRM_ERROR("ib ring test failed (%d).\n", r);
-
 	for (i = 0; i < adev->num_ip_blocks; i++) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
@ -1731,17 +1726,34 @@ static int amdgpu_device_ip_late_set_cg_state(struct amdgpu_device *adev)
 		}
 	}

-	if (adev->powerplay.pp_feature & PP_GFXOFF_MASK) {
-		/* enable gfx powergating */
-		amdgpu_device_ip_set_powergating_state(adev,
-						       AMD_IP_BLOCK_TYPE_GFX,
-						       AMD_PG_STATE_GATE);
-		/* enable gfxoff */
-		amdgpu_device_ip_set_powergating_state(adev,
-						       AMD_IP_BLOCK_TYPE_SMC,
-						       AMD_PG_STATE_GATE);
-	}
+	return 0;
+}

+static int amdgpu_device_ip_late_set_pg_state(struct amdgpu_device *adev)
+{
+	int i = 0, r;
+
+	if (amdgpu_emu_mode == 1)
+		return 0;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		/* skip CG for VCE/UVD, it's handled specially */
+		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
+		    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCN &&
+		    adev->ip_blocks[i].version->funcs->set_powergating_state) {
+			/* enable powergating to save power */
+			r = adev->ip_blocks[i].version->funcs->set_powergating_state((void *)adev,
+										     AMD_PG_STATE_GATE);
+			if (r) {
+				DRM_ERROR("set_powergating_state(gate) of IP block <%s> failed %d\n",
+					  adev->ip_blocks[i].version->funcs->name, r);
+				return r;
+			}
+		}
+	}
 	return 0;
 }

@ -1775,6 +1787,9 @@ static int amdgpu_device_ip_late_init(struct amdgpu_device *adev)
 		}
 	}

+	amdgpu_device_ip_late_set_cg_state(adev);
+	amdgpu_device_ip_late_set_pg_state(adev);
+
 	queue_delayed_work(system_wq, &adev->late_init_work,
 			   msecs_to_jiffies(AMDGPU_RESUME_MS));

@ -1813,6 +1828,8 @@ static int amdgpu_device_ip_fini(struct amdgpu_device *adev)
 					  adev->ip_blocks[i].version->funcs->name, r);
 				return r;
 			}
+			if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+				amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
 			r = adev->ip_blocks[i].version->funcs->hw_fini((void *)adev);
 			/* XXX handle errors */
 			if (r) {
@ -1901,11 +1918,15 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
 {
 	struct amdgpu_device *adev =
 		container_of(work, struct amdgpu_device, late_init_work.work);
-	amdgpu_device_ip_late_set_cg_state(adev);
+	int r;
+
+	r = amdgpu_ib_ring_tests(adev);
+	if (r)
+		DRM_ERROR("ib ring test failed (%d).\n", r);
 }

 /**
- * amdgpu_device_ip_suspend - run suspend for hardware IPs
+ * amdgpu_device_ip_suspend_phase1 - run suspend for hardware IPs (phase 1)
 *
 * @adev: amdgpu_device pointer
 *
@ -1915,18 +1936,60 @@ static void amdgpu_device_ip_late_init_func_handler(struct work_struct *work)
 * in each IP into a state suitable for suspend.
 * Returns 0 on success, negative error code on failure.
 */
-int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
 {
 	int i, r;

 	if (amdgpu_sriov_vf(adev))
 		amdgpu_virt_request_full_gpu(adev, false);

-	/* ungate SMC block powergating */
-	if (adev->powerplay.pp_feature & PP_GFXOFF_MASK)
-		amdgpu_device_ip_set_powergating_state(adev,
-						       AMD_IP_BLOCK_TYPE_SMC,
-						       AMD_CG_STATE_UNGATE);
+	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
+		if (!adev->ip_blocks[i].status.valid)
+			continue;
+		/* displays are handled separately */
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE) {
+			/* ungate blocks so that suspend can properly shut them down */
+			if (adev->ip_blocks[i].version->funcs->set_clockgating_state) {
+				r = adev->ip_blocks[i].version->funcs->set_clockgating_state((void *)adev,
+											     AMD_CG_STATE_UNGATE);
+				if (r) {
+					DRM_ERROR("set_clockgating_state(ungate) of IP block <%s> failed %d\n",
+						  adev->ip_blocks[i].version->funcs->name, r);
+				}
+			}
+			/* XXX handle errors */
+			r = adev->ip_blocks[i].version->funcs->suspend(adev);
+			/* XXX handle errors */
+			if (r) {
+				DRM_ERROR("suspend of IP block <%s> failed %d\n",
+					  adev->ip_blocks[i].version->funcs->name, r);
+			}
+		}
+	}
+
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_release_full_gpu(adev, false);
+
+	return 0;
+}
+
+/**
+ * amdgpu_device_ip_suspend_phase2 - run suspend for hardware IPs (phase 2)
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run.  suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
+{
+	int i, r;
+
+	if (amdgpu_sriov_vf(adev))
+		amdgpu_virt_request_full_gpu(adev, false);

 	/* ungate SMC block first */
 	r = amdgpu_device_ip_set_clockgating_state(adev, AMD_IP_BLOCK_TYPE_SMC,
@ -1935,9 +1998,16 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 		DRM_ERROR("set_clockgating_state(ungate) SMC failed %d\n", r);
 	}

+	/* call smu to disable gfx off feature first when suspend */
+	if (adev->powerplay.pp_funcs->set_powergating_by_smu)
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_GFX, false);
+
 	for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
 		if (!adev->ip_blocks[i].status.valid)
 			continue;
+		/* displays are handled in phase1 */
+		if (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_DCE)
+			continue;
 		/* ungate blocks so that suspend can properly shut them down */
 		if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_SMC &&
 			adev->ip_blocks[i].version->funcs->set_clockgating_state) {
@ -1963,6 +2033,29 @@ int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
 	return 0;
 }

+/**
+ * amdgpu_device_ip_suspend - run suspend for hardware IPs
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Main suspend function for hardware IPs.  The list of all the hardware
+ * IPs that make up the asic is walked, clockgating is disabled and the
+ * suspend callbacks are run.  suspend puts the hardware and software state
+ * in each IP into a state suitable for suspend.
+ * Returns 0 on success, negative error code on failure.
+ */
+int amdgpu_device_ip_suspend(struct amdgpu_device *adev)
+{
+	int r;
+
+	r = amdgpu_device_ip_suspend_phase1(adev);
+	if (r)
+		return r;
+	r = amdgpu_device_ip_suspend_phase2(adev);
+
+	return r;
+}
+
 static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
 {
 	int i, r;
@ -1985,7 +2078,7 @@ static int amdgpu_device_ip_reinit_early_sriov(struct amdgpu_device *adev)
 				continue;

 			r = block->version->funcs->hw_init(adev);
-			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
 			if (r)
 				return r;
 		}
@ -2020,7 +2113,7 @@ static int amdgpu_device_ip_reinit_late_sriov(struct amdgpu_device *adev)
 				continue;

 			r = block->version->funcs->hw_init(adev);
-			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"successed");
+			DRM_INFO("RE-INIT: %s %s\n", block->version->funcs->name, r?"failed":"succeeded");
 			if (r)
 				return r;
 		}
@ -2181,7 +2274,7 @@ bool amdgpu_device_asic_has_dc_support(enum amd_asic_type asic_type)
 	case CHIP_VEGA10:
 	case CHIP_VEGA12:
 	case CHIP_VEGA20:
-#if defined(CONFIG_DRM_AMD_DC_DCN1_0)
+#ifdef CONFIG_X86
 	case CHIP_RAVEN:
 #endif
 		return amdgpu_dc != 0;
@ -2210,7 +2303,7 @@ bool amdgpu_device_has_dc_support(struct amdgpu_device *adev)
 * amdgpu_device_init - initialize the driver
 *
 * @adev: amdgpu_device pointer
- * @pdev: drm dev pointer
+ * @ddev: drm dev pointer
 * @pdev: pci dev pointer
 * @flags: driver flags
 *
@ -2301,6 +2394,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	INIT_DELAYED_WORK(&adev->late_init_work,
 			  amdgpu_device_ip_late_init_func_handler);

+	adev->pm.ac_power = power_supply_is_system_supplied() > 0 ? true : false;
+
 	/* Registers mapping */
 	/* TODO: block userspace mapping of io register */
 	if (adev->asic_type >= CHIP_BONAIRE) {
@ -2581,8 +2676,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 /**
 * amdgpu_device_suspend - initiate device suspend
 *
- * @pdev: drm dev pointer
- * @state: suspend state
+ * @dev: drm dev pointer
+ * @suspend: suspend state
+ * @fbcon : notify the fbdev of suspend
 *
 * Puts the hw in the suspend state (all asics).
 * Returns 0 for success or an error on failure.
@ -2606,6 +2702,9 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)

 	drm_kms_helper_poll_disable(dev);

+	if (fbcon)
+		amdgpu_fbdev_set_suspend(adev, 1);
+
 	if (!amdgpu_device_has_dc_support(adev)) {
 		/* turn off display hw */
 		drm_modeset_lock_all(dev);
@ -2613,44 +2712,46 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 			drm_helper_connector_dpms(connector, DRM_MODE_DPMS_OFF);
 		}
 		drm_modeset_unlock_all(dev);
+			/* unpin the front buffers and cursors */
+		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+			struct drm_framebuffer *fb = crtc->primary->fb;
+			struct amdgpu_bo *robj;
+
+			if (amdgpu_crtc->cursor_bo) {
+				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+				r = amdgpu_bo_reserve(aobj, true);
+				if (r == 0) {
+					amdgpu_bo_unpin(aobj);
+					amdgpu_bo_unreserve(aobj);
+				}
+			}
+
+			if (fb == NULL || fb->obj[0] == NULL) {
+				continue;
+			}
+			robj = gem_to_amdgpu_bo(fb->obj[0]);
+			/* don't unpin kernel fb objects */
+			if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
+				r = amdgpu_bo_reserve(robj, true);
+				if (r == 0) {
+					amdgpu_bo_unpin(robj);
+					amdgpu_bo_unreserve(robj);
+				}
+			}
+		}
 	}

 	amdgpu_amdkfd_suspend(adev);

-	/* unpin the front buffers and cursors */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-		struct drm_framebuffer *fb = crtc->primary->fb;
-		struct amdgpu_bo *robj;
+	r = amdgpu_device_ip_suspend_phase1(adev);

-		if (amdgpu_crtc->cursor_bo) {
-			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-			r = amdgpu_bo_reserve(aobj, true);
-			if (r == 0) {
-				amdgpu_bo_unpin(aobj);
-				amdgpu_bo_unreserve(aobj);
-			}
-		}
-
-		if (fb == NULL || fb->obj[0] == NULL) {
-			continue;
-		}
-		robj = gem_to_amdgpu_bo(fb->obj[0]);
-		/* don't unpin kernel fb objects */
-		if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
-			r = amdgpu_bo_reserve(robj, true);
-			if (r == 0) {
-				amdgpu_bo_unpin(robj);
-				amdgpu_bo_unreserve(robj);
-			}
-		}
-	}
 	/* evict vram memory */
 	amdgpu_bo_evict_vram(adev);

 	amdgpu_fence_driver_suspend(adev);

-	r = amdgpu_device_ip_suspend(adev);
+	r = amdgpu_device_ip_suspend_phase2(adev);

 	/* evict remaining vram memory
 	 * This second call to evict vram is to evict the gart page table
@ -2669,18 +2770,15 @@ int amdgpu_device_suspend(struct drm_device *dev, bool suspend, bool fbcon)
 			DRM_ERROR("amdgpu asic reset failed\n");
 	}

-	if (fbcon) {
-		console_lock();
-		amdgpu_fbdev_set_suspend(adev, 1);
-		console_unlock();
-	}
 	return 0;
 }

 /**
 * amdgpu_device_resume - initiate device resume
 *
- * @pdev: drm dev pointer
+ * @dev: drm dev pointer
+ * @resume: resume state
+ * @fbcon : notify the fbdev of resume
 *
 * Bring the hw back to operating state (all asics).
 * Returns 0 for success or an error on failure.
@ -2696,15 +2794,12 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
 		return 0;

-	if (fbcon)
-		console_lock();
-
 	if (resume) {
 		pci_set_power_state(dev->pdev, PCI_D0);
 		pci_restore_state(dev->pdev);
 		r = pci_enable_device(dev->pdev);
 		if (r)
-			goto unlock;
+			return r;
 	}

 	/* post card */
@ -2717,29 +2812,30 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 	r = amdgpu_device_ip_resume(adev);
 	if (r) {
 		DRM_ERROR("amdgpu_device_ip_resume failed (%d).\n", r);
-		goto unlock;
+		return r;
 	}
 	amdgpu_fence_driver_resume(adev);


 	r = amdgpu_device_ip_late_init(adev);
 	if (r)
-		goto unlock;
+		return r;

-	/* pin cursors */
-	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-		struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+	if (!amdgpu_device_has_dc_support(adev)) {
+		/* pin cursors */
+		list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+			struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);

-		if (amdgpu_crtc->cursor_bo) {
-			struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-			r = amdgpu_bo_reserve(aobj, true);
-			if (r == 0) {
-				r = amdgpu_bo_pin(aobj,
-						  AMDGPU_GEM_DOMAIN_VRAM,
-						  &amdgpu_crtc->cursor_addr);
-				if (r != 0)
-					DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
-				amdgpu_bo_unreserve(aobj);
+			if (amdgpu_crtc->cursor_bo) {
+				struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+				r = amdgpu_bo_reserve(aobj, true);
+				if (r == 0) {
+					r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+					if (r != 0)
+						DRM_ERROR("Failed to pin cursor BO (%d)\n", r);
+					amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+					amdgpu_bo_unreserve(aobj);
+				}
 			}
 		}
 	}
@ -2763,6 +2859,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 			}
 			drm_modeset_unlock_all(dev);
 		}
+		amdgpu_fbdev_set_suspend(adev, 0);
 	}

 	drm_kms_helper_poll_enable(dev);
@ -2786,15 +2883,7 @@ int amdgpu_device_resume(struct drm_device *dev, bool resume, bool fbcon)
 #ifdef CONFIG_PM
 	dev->dev->power.disable_depth--;
 #endif
-
-	if (fbcon)
-		amdgpu_fbdev_set_suspend(adev, 0);
-
-unlock:
-	if (fbcon)
-		console_unlock();
-
-	return r;
+	return 0;
 }

 /**
@ -3019,7 +3108,7 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
 	long tmo;

 	if (amdgpu_sriov_runtime(adev))
-		tmo = msecs_to_jiffies(amdgpu_lockup_timeout);
+		tmo = msecs_to_jiffies(8000);
 	else
 		tmo = msecs_to_jiffies(100);

@ -3071,7 +3160,7 @@ static int amdgpu_device_handle_vram_lost(struct amdgpu_device *adev)
 * @adev: amdgpu device pointer
 *
 * attempt to do soft-reset or full-reset and reinitialize Asic
- * return 0 means successed otherwise failed
+ * return 0 means succeeded otherwise failed
 */
 static int amdgpu_device_reset(struct amdgpu_device *adev)
 {
@ -3146,9 +3235,10 @@ out:
 * amdgpu_device_reset_sriov - reset ASIC for SR-IOV vf
 *
 * @adev: amdgpu device pointer
+ * @from_hypervisor: request from hypervisor
 *
 * do VF FLR and reinitialize Asic
- * return 0 means successed otherwise failed
+ * return 0 means succeeded otherwise failed
 */
 static int amdgpu_device_reset_sriov(struct amdgpu_device *adev,
 				     bool from_hypervisor)
@ -3193,7 +3283,7 @@ error:
 *
 * @adev: amdgpu device pointer
 * @job: which job trigger hang
- * @force forces reset regardless of amdgpu_gpu_recovery
+ * @force: forces reset regardless of amdgpu_gpu_recovery
 *
 * Attempt to reset the GPU if it has hung (all asics).
 * Returns 0 for success or an error on failure.
@ -3220,6 +3310,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 	atomic_inc(&adev->gpu_reset_counter);
 	adev->in_gpu_reset = 1;

+	/* Block kfd */
+	amdgpu_amdkfd_pre_reset(adev);
+
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);

@ -3232,10 +3325,10 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,

 		kthread_park(ring->sched.thread);

-		if (job && job->ring->idx != i)
+		if (job && job->base.sched == &ring->sched)
 			continue;

-		drm_sched_hw_job_reset(&ring->sched, &job->base);
+		drm_sched_hw_job_reset(&ring->sched, job ? &job->base : NULL);

 		/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
 		amdgpu_fence_driver_force_completion(ring);
@ -3256,7 +3349,7 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		 * or all rings (in the case @job is NULL)
 		 * after above amdgpu_reset accomplished
 		 */
-		if ((!job || job->ring->idx == i) && !r)
+		if ((!job || job->base.sched == &ring->sched) && !r)
 			drm_sched_job_recovery(&ring->sched);

 		kthread_unpark(ring->sched.thread);
@ -3273,9 +3366,11 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 		dev_info(adev->dev, "GPU reset(%d) failed\n", atomic_read(&adev->gpu_reset_counter));
 		amdgpu_vf_error_put(adev, AMDGIM_ERROR_VF_GPU_RESET_FAIL, 0, r);
 	} else {
-		dev_info(adev->dev, "GPU reset(%d) successed!\n",atomic_read(&adev->gpu_reset_counter));
+		dev_info(adev->dev, "GPU reset(%d) succeeded!\n",atomic_read(&adev->gpu_reset_counter));
 	}

+	/*unlock kfd */
+	amdgpu_amdkfd_post_reset(adev);
 	amdgpu_vf_error_trans_all(adev);
 	adev->in_gpu_reset = 0;
 	mutex_unlock(&adev->lock_reset);
@ -3293,8 +3388,9 @@ int amdgpu_device_gpu_recover(struct amdgpu_device *adev,
 */
 static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 {
-	u32 mask;
-	int ret;
+	struct pci_dev *pdev;
+	enum pci_bus_speed speed_cap;
+	enum pcie_link_width link_width;

 	if (amdgpu_pcie_gen_cap)
 		adev->pm.pcie_gen_mask = amdgpu_pcie_gen_cap;
@ -3312,27 +3408,61 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 	}

 	if (adev->pm.pcie_gen_mask == 0) {
-		ret = drm_pcie_get_speed_cap_mask(adev->ddev, &mask);
-		if (!ret) {
-			adev->pm.pcie_gen_mask = (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+		/* asic caps */
+		pdev = adev->pdev;
+		speed_cap = pcie_get_speed_cap(pdev);
+		if (speed_cap == PCI_SPEED_UNKNOWN) {
+			adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
 						  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
-
-			if (mask & DRM_PCIE_SPEED_25)
-				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
-			if (mask & DRM_PCIE_SPEED_50)
-				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2;
-			if (mask & DRM_PCIE_SPEED_80)
-				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3;
 		} else {
-			adev->pm.pcie_gen_mask = AMDGPU_DEFAULT_PCIE_GEN_MASK;
+			if (speed_cap == PCIE_SPEED_16_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4);
+			else if (speed_cap == PCIE_SPEED_8_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3);
+			else if (speed_cap == PCIE_SPEED_5_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							  CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2);
+			else
+				adev->pm.pcie_gen_mask |= CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1;
+		}
+		/* platform caps */
+		pdev = adev->ddev->pdev->bus->self;
+		speed_cap = pcie_get_speed_cap(pdev);
+		if (speed_cap == PCI_SPEED_UNKNOWN) {
+			adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+						   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+		} else {
+			if (speed_cap == PCIE_SPEED_16_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4);
+			else if (speed_cap == PCIE_SPEED_8_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3);
+			else if (speed_cap == PCIE_SPEED_5_0GT)
+				adev->pm.pcie_gen_mask |= (CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 |
+							   CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2);
+			else
+				adev->pm.pcie_gen_mask |= CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1;
+
 		}
 	}
 	if (adev->pm.pcie_mlw_mask == 0) {
-		ret = drm_pcie_get_max_link_width(adev->ddev, &mask);
-		if (!ret) {
-			switch (mask) {
-			case 32:
+		pdev = adev->ddev->pdev->bus->self;
+		link_width = pcie_get_width_cap(pdev);
+		if (link_width == PCIE_LNK_WIDTH_UNKNOWN) {
+			adev->pm.pcie_mlw_mask |= AMDGPU_DEFAULT_PCIE_MLW_MASK;
+		} else {
+			switch (link_width) {
+			case PCIE_LNK_X32:
 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X32 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
@ -3341,7 +3471,7 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
 				break;
-			case 16:
+			case PCIE_LNK_X16:
 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X16 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
@ -3349,36 +3479,34 @@ static void amdgpu_device_get_pcie_info(struct amdgpu_device *adev)
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
 				break;
-			case 12:
+			case PCIE_LNK_X12:
 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X12 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
 				break;
-			case 8:
+			case PCIE_LNK_X8:
 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X8 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
 				break;
-			case 4:
+			case PCIE_LNK_X4:
 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X4 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
 				break;
-			case 2:
+			case PCIE_LNK_X2:
 				adev->pm.pcie_mlw_mask = (CAIL_PCIE_LINK_WIDTH_SUPPORT_X2 |
 							  CAIL_PCIE_LINK_WIDTH_SUPPORT_X1);
 				break;
-			case 1:
+			case PCIE_LNK_X1:
 				adev->pm.pcie_mlw_mask = CAIL_PCIE_LINK_WIDTH_SUPPORT_X1;
 				break;
 			default:
 				break;
 			}
-		} else {
-			adev->pm.pcie_mlw_mask = AMDGPU_DEFAULT_PCIE_MLW_MASK;
 		}
 	}
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@ -157,7 +157,6 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 	struct amdgpu_bo *new_abo;
 	unsigned long flags;
 	u64 tiling_flags;
-	u64 base;
 	int i, r;

 	work = kzalloc(sizeof *work, GFP_KERNEL);
@ -189,12 +188,18 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 		goto cleanup;
 	}

-	r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev), &base);
+	r = amdgpu_bo_pin(new_abo, amdgpu_display_supported_domains(adev));
 	if (unlikely(r != 0)) {
 		DRM_ERROR("failed to pin new abo buffer before flip\n");
 		goto unreserve;
 	}

+	r = amdgpu_ttm_alloc_gart(&new_abo->tbo);
+	if (unlikely(r != 0)) {
+		DRM_ERROR("%p bind failed\n", new_abo);
+		goto unpin;
+	}
+
 	r = reservation_object_get_fences_rcu(new_abo->tbo.resv, &work->excl,
 					      &work->shared_count,
 					      &work->shared);
@ -206,7 +211,7 @@ int amdgpu_display_crtc_page_flip_target(struct drm_crtc *crtc,
 	amdgpu_bo_get_tiling_flags(new_abo, &tiling_flags);
 	amdgpu_bo_unreserve(new_abo);

-	work->base = base;
+	work->base = amdgpu_bo_gpu_offset(new_abo);
 	work->target_vblank = target - (uint32_t)drm_crtc_vblank_count(crtc) +
 		amdgpu_get_vblank_counter_kms(dev, work->crtc_id);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.c
@ -28,6 +28,7 @@
 #include "amdgpu_i2c.h"
 #include "amdgpu_dpm.h"
 #include "atom.h"
+#include "amd_pcie.h"

 void amdgpu_dpm_print_class_info(u32 class, u32 class2)
 {
@ -936,9 +937,11 @@ enum amdgpu_pcie_gen amdgpu_get_pcie_gen_support(struct amdgpu_device *adev,
 	case AMDGPU_PCIE_GEN3:
 		return AMDGPU_PCIE_GEN3;
 	default:
-		if ((sys_mask & DRM_PCIE_SPEED_80) && (default_gen == AMDGPU_PCIE_GEN3))
+		if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3) &&
+		    (default_gen == AMDGPU_PCIE_GEN3))
 			return AMDGPU_PCIE_GEN3;
-		else if ((sys_mask & DRM_PCIE_SPEED_50) && (default_gen == AMDGPU_PCIE_GEN2))
+		else if ((sys_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2) &&
+			 (default_gen == AMDGPU_PCIE_GEN2))
 			return AMDGPU_PCIE_GEN2;
 		else
 			return AMDGPU_PCIE_GEN1;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_dpm.h
@ -287,12 +287,6 @@ enum amdgpu_pcie_gen {
 #define amdgpu_dpm_force_performance_level(adev, l) \
 		((adev)->powerplay.pp_funcs->force_performance_level((adev)->powerplay.pp_handle, (l)))

-#define amdgpu_dpm_powergate_uvd(adev, g) \
-		((adev)->powerplay.pp_funcs->powergate_uvd((adev)->powerplay.pp_handle, (g)))
-
-#define amdgpu_dpm_powergate_vce(adev, g) \
-		((adev)->powerplay.pp_funcs->powergate_vce((adev)->powerplay.pp_handle, (g)))
-
 #define amdgpu_dpm_get_current_power_state(adev) \
 		((adev)->powerplay.pp_funcs->get_current_power_state((adev)->powerplay.pp_handle))

@ -347,6 +341,10 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->set_clockgating_by_smu(\
 			(adev)->powerplay.pp_handle, msg_id))

+#define amdgpu_dpm_set_powergating_by_smu(adev, block_type, gate) \
+		((adev)->powerplay.pp_funcs->set_powergating_by_smu(\
+			(adev)->powerplay.pp_handle, block_type, gate))
+
 #define amdgpu_dpm_get_power_profile_mode(adev, buf) \
 		((adev)->powerplay.pp_funcs->get_power_profile_mode(\
 			(adev)->powerplay.pp_handle, buf))
@ -359,10 +357,6 @@ enum amdgpu_pcie_gen {
 		((adev)->powerplay.pp_funcs->odn_edit_dpm_table(\
 			(adev)->powerplay.pp_handle, type, parameter, size))

-#define amdgpu_dpm_set_mmhub_powergating_by_smu(adev) \
-		((adev)->powerplay.pp_funcs->set_mmhub_powergating_by_smu( \
-		(adev)->powerplay.pp_handle))
-
 struct amdgpu_dpm {
 	struct amdgpu_ps        *ps;
 	/* number of valid power states */
@ -402,7 +396,6 @@ struct amdgpu_dpm {
 	u32 tdp_adjustment;
 	u16 load_line_slope;
 	bool power_control;
-	bool ac_power;
 	/* special states active */
 	bool                    thermal_active;
 	bool                    uvd_active;
@ -439,6 +432,7 @@ struct amdgpu_pm {
 	struct amd_pp_display_configuration pm_display_cfg;/* set by dc */
 	uint32_t                smu_prv_buffer_size;
 	struct amdgpu_bo        *smu_prv_buffer;
+	bool ac_power;
 };

 #define R600_SSTU_DFLT                               0
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@ -1,10 +1,3 @@
-/**
- * \file amdgpu_drv.c
- * AMD Amdgpu driver
- *
- * \author Gareth Hughes <gareth@valinux.com>
- */
-
 /*
 * Copyright 2000 VA Linux Systems, Inc., Sunnyvale, California.
 * All Rights Reserved.
@ -76,9 +69,10 @@
 * - 3.24.0 - Add high priority compute support for gfx9
 * - 3.25.0 - Add support for sensor query info (stable pstate sclk/mclk).
 * - 3.26.0 - GFX9: Process AMDGPU_IB_FLAG_TC_WB_NOT_INVALIDATE.
+ * - 3.27.0 - Add new chunk to to AMDGPU_CS to enable BO_LIST creation.
 */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	26
+#define KMS_DRIVER_MINOR	27
 #define KMS_DRIVER_PATCHLEVEL	0

 int amdgpu_vram_limit = 0;
@ -110,11 +104,8 @@ int amdgpu_vram_page_split = 512;
 int amdgpu_vm_update_mode = -1;
 int amdgpu_exp_hw_support = 0;
 int amdgpu_dc = -1;
-int amdgpu_dc_log = 0;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
-int amdgpu_no_evict = 0;
-int amdgpu_direct_gma_size = 0;
 uint amdgpu_pcie_gen_cap = 0;
 uint amdgpu_pcie_lane_cap = 0;
 uint amdgpu_cg_mask = 0xffffffff;
@ -122,7 +113,8 @@ uint amdgpu_pg_mask = 0xffffffff;
 uint amdgpu_sdma_phase_quantum = 32;
 char *amdgpu_disable_cu = NULL;
 char *amdgpu_virtual_display = NULL;
-uint amdgpu_pp_feature_mask = 0xffff3fff; /* gfxoff (bit 15) disabled by default */
+/* OverDrive(bit 14),gfxoff(bit 15),stutter mode(bit 17) disabled by default*/
+uint amdgpu_pp_feature_mask = 0xfffd3fff;
 int amdgpu_ngg = 0;
 int amdgpu_prim_buf_per_se = 0;
 int amdgpu_pos_buf_per_se = 0;
@ -135,163 +127,368 @@ int amdgpu_gpu_recovery = -1; /* auto */
 int amdgpu_emu_mode = 0;
 uint amdgpu_smu_memory_pool_size = 0;

+/**
+ * DOC: vramlimit (int)
+ * Restrict the total amount of VRAM in MiB for testing.  The default is 0 (Use full VRAM).
+ */
 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);

+/**
+ * DOC: vis_vramlimit (int)
+ * Restrict the amount of CPU visible VRAM in MiB for testing.  The default is 0 (Use full CPU visible VRAM).
+ */
 MODULE_PARM_DESC(vis_vramlimit, "Restrict visible VRAM for testing, in megabytes");
 module_param_named(vis_vramlimit, amdgpu_vis_vram_limit, int, 0444);

+/**
+ * DOC: gartsize (uint)
+ * Restrict the size of GART in Mib (32, 64, etc.) for testing. The default is -1 (The size depends on asic).
+ */
 MODULE_PARM_DESC(gartsize, "Size of GART to setup in megabytes (32, 64, etc., -1=auto)");
 module_param_named(gartsize, amdgpu_gart_size, uint, 0600);

+/**
+ * DOC: gttsize (int)
+ * Restrict the size of GTT domain in MiB for testing. The default is -1 (It's VRAM size if 3GB < VRAM < 3/4 RAM,
+ * otherwise 3/4 RAM size).
+ */
 MODULE_PARM_DESC(gttsize, "Size of the GTT domain in megabytes (-1 = auto)");
 module_param_named(gttsize, amdgpu_gtt_size, int, 0600);

+/**
+ * DOC: moverate (int)
+ * Set maximum buffer migration rate in MB/s. The default is -1 (8 MB/s).
+ */
 MODULE_PARM_DESC(moverate, "Maximum buffer migration rate in MB/s. (32, 64, etc., -1=auto, 0=1=disabled)");
 module_param_named(moverate, amdgpu_moverate, int, 0600);

+/**
+ * DOC: benchmark (int)
+ * Run benchmarks. The default is 0 (Skip benchmarks).
+ */
 MODULE_PARM_DESC(benchmark, "Run benchmark");
 module_param_named(benchmark, amdgpu_benchmarking, int, 0444);

+/**
+ * DOC: test (int)
+ * Test BO GTT->VRAM and VRAM->GTT GPU copies. The default is 0 (Skip test, only set 1 to run test).
+ */
 MODULE_PARM_DESC(test, "Run tests");
 module_param_named(test, amdgpu_testing, int, 0444);

+/**
+ * DOC: audio (int)
+ * Set HDMI/DPAudio. Only affects non-DC display handling. The default is -1 (Enabled), set 0 to disabled it.
+ */
 MODULE_PARM_DESC(audio, "Audio enable (-1 = auto, 0 = disable, 1 = enable)");
 module_param_named(audio, amdgpu_audio, int, 0444);

+/**
+ * DOC: disp_priority (int)
+ * Set display Priority (1 = normal, 2 = high). Only affects non-DC display handling. The default is 0 (auto).
+ */
 MODULE_PARM_DESC(disp_priority, "Display Priority (0 = auto, 1 = normal, 2 = high)");
 module_param_named(disp_priority, amdgpu_disp_priority, int, 0444);

+/**
+ * DOC: hw_i2c (int)
+ * To enable hw i2c engine. Only affects non-DC display handling. The default is 0 (Disabled).
+ */
 MODULE_PARM_DESC(hw_i2c, "hw i2c engine enable (0 = disable)");
 module_param_named(hw_i2c, amdgpu_hw_i2c, int, 0444);

+/**
+ * DOC: pcie_gen2 (int)
+ * To disable PCIE Gen2/3 mode (0 = disable, 1 = enable). The default is -1 (auto, enabled).
+ */
 MODULE_PARM_DESC(pcie_gen2, "PCIE Gen2 mode (-1 = auto, 0 = disable, 1 = enable)");
 module_param_named(pcie_gen2, amdgpu_pcie_gen2, int, 0444);

+/**
+ * DOC: msi (int)
+ * To disable Message Signaled Interrupts (MSI) functionality (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
 MODULE_PARM_DESC(msi, "MSI support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(msi, amdgpu_msi, int, 0444);

+/**
+ * DOC: lockup_timeout (int)
+ * Set GPU scheduler timeout value in ms. Value 0 is invalidated, will be adjusted to 10000.
+ * Negative values mean 'infinite timeout' (MAX_JIFFY_OFFSET). The default is 10000.
+ */
 MODULE_PARM_DESC(lockup_timeout, "GPU lockup timeout in ms > 0 (default 10000)");
 module_param_named(lockup_timeout, amdgpu_lockup_timeout, int, 0444);

+/**
+ * DOC: dpm (int)
+ * Override for dynamic power management setting (1 = enable, 0 = disable). The default is -1 (auto).
+ */
 MODULE_PARM_DESC(dpm, "DPM support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(dpm, amdgpu_dpm, int, 0444);

+/**
+ * DOC: fw_load_type (int)
+ * Set different firmware loading type for debugging (0 = direct, 1 = SMU, 2 = PSP). The default is -1 (auto).
+ */
 MODULE_PARM_DESC(fw_load_type, "firmware loading type (0 = direct, 1 = SMU, 2 = PSP, -1 = auto)");
 module_param_named(fw_load_type, amdgpu_fw_load_type, int, 0444);

+/**
+ * DOC: aspm (int)
+ * To disable ASPM (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
 MODULE_PARM_DESC(aspm, "ASPM support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(aspm, amdgpu_aspm, int, 0444);

+/**
+ * DOC: runpm (int)
+ * Override for runtime power management control for dGPUs in PX/HG laptops. The amdgpu driver can dynamically power down
+ * the dGPU on PX/HG laptops when it is idle. The default is -1 (auto enable). Setting the value to 0 disables this functionality.
+ */
 MODULE_PARM_DESC(runpm, "PX runtime pm (1 = force enable, 0 = disable, -1 = PX only default)");
 module_param_named(runpm, amdgpu_runtime_pm, int, 0444);

+/**
+ * DOC: ip_block_mask (uint)
+ * Override what IP blocks are enabled on the GPU. Each GPU is a collection of IP blocks (gfx, display, video, etc.).
+ * Use this parameter to disable specific blocks. Note that the IP blocks do not have a fixed index. Some asics may not have
+ * some IPs or may include multiple instances of an IP so the ordering various from asic to asic. See the driver output in
+ * the kernel log for the list of IPs on the asic. The default is 0xffffffff (enable all blocks on a device).
+ */
 MODULE_PARM_DESC(ip_block_mask, "IP Block Mask (all blocks enabled (default))");
 module_param_named(ip_block_mask, amdgpu_ip_block_mask, uint, 0444);

+/**
+ * DOC: bapm (int)
+ * Bidirectional Application Power Management (BAPM) used to dynamically share TDP between CPU and GPU. Set value 0 to disable it.
+ * The default -1 (auto, enabled)
+ */
 MODULE_PARM_DESC(bapm, "BAPM support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(bapm, amdgpu_bapm, int, 0444);

+/**
+ * DOC: deep_color (int)
+ * Set 1 to enable Deep Color support. Only affects non-DC display handling. The default is 0 (disabled).
+ */
 MODULE_PARM_DESC(deep_color, "Deep Color support (1 = enable, 0 = disable (default))");
 module_param_named(deep_color, amdgpu_deep_color, int, 0444);

+/**
+ * DOC: vm_size (int)
+ * Override the size of the GPU's per client virtual address space in GiB.  The default is -1 (automatic for each asic).
+ */
 MODULE_PARM_DESC(vm_size, "VM address space size in gigabytes (default 64GB)");
 module_param_named(vm_size, amdgpu_vm_size, int, 0444);

+/**
+ * DOC: vm_fragment_size (int)
+ * Override VM fragment size in bits (4, 5, etc. 4 = 64K, 9 = 2M). The default is -1 (automatic for each asic).
+ */
 MODULE_PARM_DESC(vm_fragment_size, "VM fragment size in bits (4, 5, etc. 4 = 64K (default), Max 9 = 2M)");
 module_param_named(vm_fragment_size, amdgpu_vm_fragment_size, int, 0444);

+/**
+ * DOC: vm_block_size (int)
+ * Override VM page table size in bits (default depending on vm_size and hw setup). The default is -1 (automatic for each asic).
+ */
 MODULE_PARM_DESC(vm_block_size, "VM page table size in bits (default depending on vm_size)");
 module_param_named(vm_block_size, amdgpu_vm_block_size, int, 0444);

+/**
+ * DOC: vm_fault_stop (int)
+ * Stop on VM fault for debugging (0 = never, 1 = print first, 2 = always). The default is 0 (No stop).
+ */
 MODULE_PARM_DESC(vm_fault_stop, "Stop on VM fault (0 = never (default), 1 = print first, 2 = always)");
 module_param_named(vm_fault_stop, amdgpu_vm_fault_stop, int, 0444);

+/**
+ * DOC: vm_debug (int)
+ * Debug VM handling (0 = disabled, 1 = enabled). The default is 0 (Disabled).
+ */
 MODULE_PARM_DESC(vm_debug, "Debug VM handling (0 = disabled (default), 1 = enabled)");
 module_param_named(vm_debug, amdgpu_vm_debug, int, 0644);

+/**
+ * DOC: vm_update_mode (int)
+ * Override VM update mode. VM updated by using CPU (0 = never, 1 = Graphics only, 2 = Compute only, 3 = Both). The default
+ * is -1 (Only in large BAR(LB) systems Compute VM tables will be updated by CPU, otherwise 0, never).
+ */
 MODULE_PARM_DESC(vm_update_mode, "VM update using CPU (0 = never (default except for large BAR(LB)), 1 = Graphics only, 2 = Compute only (default for LB), 3 = Both");
 module_param_named(vm_update_mode, amdgpu_vm_update_mode, int, 0444);

+/**
+ * DOC: vram_page_split (int)
+ * Override the number of pages after we split VRAM allocations (default 512, -1 = disable). The default is 512.
+ */
 MODULE_PARM_DESC(vram_page_split, "Number of pages after we split VRAM allocations (default 512, -1 = disable)");
 module_param_named(vram_page_split, amdgpu_vram_page_split, int, 0444);

+/**
+ * DOC: exp_hw_support (int)
+ * Enable experimental hw support (1 = enable). The default is 0 (disabled).
+ */
 MODULE_PARM_DESC(exp_hw_support, "experimental hw support (1 = enable, 0 = disable (default))");
 module_param_named(exp_hw_support, amdgpu_exp_hw_support, int, 0444);

+/**
+ * DOC: dc (int)
+ * Disable/Enable Display Core driver for debugging (1 = enable, 0 = disable). The default is -1 (automatic for each asic).
+ */
 MODULE_PARM_DESC(dc, "Display Core driver (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(dc, amdgpu_dc, int, 0444);

-MODULE_PARM_DESC(dc_log, "Display Core Log Level (0 = minimal (default), 1 = chatty");
-module_param_named(dc_log, amdgpu_dc_log, int, 0444);
-
+/**
+ * DOC: sched_jobs (int)
+ * Override the max number of jobs supported in the sw queue. The default is 32.
+ */
 MODULE_PARM_DESC(sched_jobs, "the max number of jobs supported in the sw queue (default 32)");
 module_param_named(sched_jobs, amdgpu_sched_jobs, int, 0444);

+/**
+ * DOC: sched_hw_submission (int)
+ * Override the max number of HW submissions. The default is 2.
+ */
 MODULE_PARM_DESC(sched_hw_submission, "the max number of HW submissions (default 2)");
 module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);

+/**
+ * DOC: ppfeaturemask (uint)
+ * Override power features enabled. See enum PP_FEATURE_MASK in drivers/gpu/drm/amd/include/amd_shared.h.
+ * The default is the current set of stable power features.
+ */
 MODULE_PARM_DESC(ppfeaturemask, "all power features enabled (default))");
 module_param_named(ppfeaturemask, amdgpu_pp_feature_mask, uint, 0444);

-MODULE_PARM_DESC(no_evict, "Support pinning request from user space (1 = enable, 0 = disable (default))");
-module_param_named(no_evict, amdgpu_no_evict, int, 0444);
-
-MODULE_PARM_DESC(direct_gma_size, "Direct GMA size in megabytes (max 96MB)");
-module_param_named(direct_gma_size, amdgpu_direct_gma_size, int, 0444);
-
+/**
+ * DOC: pcie_gen_cap (uint)
+ * Override PCIE gen speed capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
 MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
 module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);

+/**
+ * DOC: pcie_lane_cap (uint)
+ * Override PCIE lanes capabilities. See the CAIL flags in drivers/gpu/drm/amd/include/amd_pcie.h.
+ * The default is 0 (automatic for each asic).
+ */
 MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
 module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);

+/**
+ * DOC: cg_mask (uint)
+ * Override Clockgating features enabled on GPU (0 = disable clock gating). See the AMD_CG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ */
 MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
 module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);

+/**
+ * DOC: pg_mask (uint)
+ * Override Powergating features enabled on GPU (0 = disable power gating). See the AMD_PG_SUPPORT flags in
+ * drivers/gpu/drm/amd/include/amd_shared.h. The default is 0xffffffff (all enabled).
+ */
 MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
 module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);

+/**
+ * DOC: sdma_phase_quantum (uint)
+ * Override SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change). The default is 32.
+ */
 MODULE_PARM_DESC(sdma_phase_quantum, "SDMA context switch phase quantum (x 1K GPU clock cycles, 0 = no change (default 32))");
 module_param_named(sdma_phase_quantum, amdgpu_sdma_phase_quantum, uint, 0444);

+/**
+ * DOC: disable_cu (charp)
+ * Set to disable CUs (It's set like se.sh.cu,...). The default is NULL.
+ */
 MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
 module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);

+/**
+ * DOC: virtual_display (charp)
+ * Set to enable virtual display feature. This feature provides a virtual display hardware on headless boards
+ * or in virtualized environments. It will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x. It's the pci address of
+ * the device, plus the number of crtcs to expose. E.g., 0000:26:00.0,4 would enable 4 virtual crtcs on the pci
+ * device at 26:00.0. The default is NULL.
+ */
 MODULE_PARM_DESC(virtual_display,
 		 "Enable virtual display feature (the virtual_display will be set like xxxx:xx:xx.x,x;xxxx:xx:xx.x,x)");
 module_param_named(virtual_display, amdgpu_virtual_display, charp, 0444);

+/**
+ * DOC: ngg (int)
+ * Set to enable Next Generation Graphics (1 = enable). The default is 0 (disabled).
+ */
 MODULE_PARM_DESC(ngg, "Next Generation Graphics (1 = enable, 0 = disable(default depending on gfx))");
 module_param_named(ngg, amdgpu_ngg, int, 0444);

+/**
+ * DOC: prim_buf_per_se (int)
+ * Override the size of Primitive Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
 MODULE_PARM_DESC(prim_buf_per_se, "the size of Primitive Buffer per Shader Engine (default depending on gfx)");
 module_param_named(prim_buf_per_se, amdgpu_prim_buf_per_se, int, 0444);

+/**
+ * DOC: pos_buf_per_se (int)
+ * Override the size of Position Buffer per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
 MODULE_PARM_DESC(pos_buf_per_se, "the size of Position Buffer per Shader Engine (default depending on gfx)");
 module_param_named(pos_buf_per_se, amdgpu_pos_buf_per_se, int, 0444);

+/**
+ * DOC: cntl_sb_buf_per_se (int)
+ * Override the size of Control Sideband per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
 MODULE_PARM_DESC(cntl_sb_buf_per_se, "the size of Control Sideband per Shader Engine (default depending on gfx)");
 module_param_named(cntl_sb_buf_per_se, amdgpu_cntl_sb_buf_per_se, int, 0444);

+/**
+ * DOC: param_buf_per_se (int)
+ * Override the size of Off-Chip Pramater Cache per Shader Engine in Byte. The default is 0 (depending on gfx).
+ */
 MODULE_PARM_DESC(param_buf_per_se, "the size of Off-Chip Pramater Cache per Shader Engine (default depending on gfx)");
 module_param_named(param_buf_per_se, amdgpu_param_buf_per_se, int, 0444);

+/**
+ * DOC: job_hang_limit (int)
+ * Set how much time allow a job hang and not drop it. The default is 0.
+ */
 MODULE_PARM_DESC(job_hang_limit, "how much time allow a job hang and not drop it (default 0)");
 module_param_named(job_hang_limit, amdgpu_job_hang_limit, int ,0444);

+/**
+ * DOC: lbpw (int)
+ * Override Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable). The default is -1 (auto, enabled).
+ */
 MODULE_PARM_DESC(lbpw, "Load Balancing Per Watt (LBPW) support (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(lbpw, amdgpu_lbpw, int, 0444);

 MODULE_PARM_DESC(compute_multipipe, "Force compute queues to be spread across pipes (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(compute_multipipe, amdgpu_compute_multipipe, int, 0444);

+/**
+ * DOC: gpu_recovery (int)
+ * Set to enable GPU recovery mechanism (1 = enable, 0 = disable). The default is -1 (auto, disabled except SRIOV).
+ */
 MODULE_PARM_DESC(gpu_recovery, "Enable GPU recovery mechanism, (1 = enable, 0 = disable, -1 = auto)");
 module_param_named(gpu_recovery, amdgpu_gpu_recovery, int, 0444);

+/**
+ * DOC: emu_mode (int)
+ * Set value 1 to enable emulation mode. This is only needed when running on an emulator. The default is 0 (disabled).
+ */
 MODULE_PARM_DESC(emu_mode, "Emulation mode, (1 = enable, 0 = disable)");
 module_param_named(emu_mode, amdgpu_emu_mode, int, 0444);

+/**
+ * DOC: si_support (int)
+ * Set SI support driver. This parameter works after set config CONFIG_DRM_AMDGPU_SI. For SI asic, when radeon driver is enabled,
+ * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
+ * otherwise using amdgpu driver.
+ */
 #ifdef CONFIG_DRM_AMDGPU_SI

 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@ -305,6 +502,12 @@ MODULE_PARM_DESC(si_support, "SI support (1 = enabled (default), 0 = disabled)")
 module_param_named(si_support, amdgpu_si_support, int, 0444);
 #endif

+/**
+ * DOC: cik_support (int)
+ * Set CIK support driver. This parameter works after set config CONFIG_DRM_AMDGPU_CIK. For CIK asic, when radeon driver is enabled,
+ * set value 0 to use radeon driver, while set value 1 to use amdgpu driver. The default is using radeon driver when it available,
+ * otherwise using amdgpu driver.
+ */
 #ifdef CONFIG_DRM_AMDGPU_CIK

 #if defined(CONFIG_DRM_RADEON) || defined(CONFIG_DRM_RADEON_MODULE)
@ -318,6 +521,11 @@ MODULE_PARM_DESC(cik_support, "CIK support (1 = enabled (default), 0 = disabled)
 module_param_named(cik_support, amdgpu_cik_support, int, 0444);
 #endif

+/**
+ * DOC: smu_memory_pool_size (uint)
+ * It is used to reserve gtt for smu debug usage, setting value 0 to disable it. The actual size is value * 256MiB.
+ * E.g. 0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte. The default is 0 (disabled).
+ */
 MODULE_PARM_DESC(smu_memory_pool_size,
 	"reserve gtt for smu debug usage, 0 = disable,"
 		"0x1 = 256Mbyte, 0x2 = 512Mbyte, 0x4 = 1 Gbyte, 0x8 = 2GByte");
@ -664,7 +872,7 @@ retry_init:
 err_pci:
 	pci_disable_device(pdev);
 err_free:
-	drm_dev_unref(dev);
+	drm_dev_put(dev);
 	return ret;
 }

@ -674,7 +882,7 @@ amdgpu_pci_remove(struct pci_dev *pdev)
 	struct drm_device *dev = pci_get_drvdata(pdev);

 	drm_dev_unregister(dev);
-	drm_dev_unref(dev);
+	drm_dev_put(dev);
 	pci_disable_device(pdev);
 	pci_set_drvdata(pdev, NULL);
 }
@ -855,9 +1063,21 @@ static const struct dev_pm_ops amdgpu_pm_ops = {
 	.runtime_idle = amdgpu_pmops_runtime_idle,
 };

+static int amdgpu_flush(struct file *f, fl_owner_t id)
+{
+	struct drm_file *file_priv = f->private_data;
+	struct amdgpu_fpriv *fpriv = file_priv->driver_priv;
+
+	amdgpu_ctx_mgr_entity_flush(&fpriv->ctx_mgr);
+
+	return 0;
+}
+
+
 static const struct file_operations amdgpu_driver_kms_fops = {
 	.owner = THIS_MODULE,
 	.open = drm_open,
+	.flush = amdgpu_flush,
 	.release = drm_release,
 	.unlocked_ioctl = amdgpu_drm_ioctl,
 	.mmap = amdgpu_mmap,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_encoders.c
@ -46,7 +46,7 @@ amdgpu_link_encoder_connector(struct drm_device *dev)
 		list_for_each_entry(encoder, &dev->mode_config.encoder_list, head) {
 			amdgpu_encoder = to_amdgpu_encoder(encoder);
 			if (amdgpu_encoder->devices & amdgpu_connector->devices) {
-				drm_mode_connector_attach_encoder(connector, encoder);
+				drm_connector_attach_encoder(connector, encoder);
 				if (amdgpu_encoder->devices & (ATOM_DEVICE_LCD_SUPPORT)) {
 					amdgpu_atombios_encoder_init_backlight(amdgpu_encoder, connector);
 					adev->mode_info.bl_encoder = amdgpu_encoder;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
@ -146,7 +146,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 				       AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED |
 				       AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS |
 				       AMDGPU_GEM_CREATE_VRAM_CLEARED,
-				       true, NULL, &gobj);
+				       ttm_bo_type_kernel, NULL, &gobj);
 	if (ret) {
 		pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
 		return -ENOMEM;
@ -168,11 +168,19 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
 	}


-	ret = amdgpu_bo_pin(abo, domain, NULL);
+	ret = amdgpu_bo_pin(abo, domain);
 	if (ret) {
 		amdgpu_bo_unreserve(abo);
 		goto out_unref;
 	}
+
+	ret = amdgpu_ttm_alloc_gart(&abo->tbo);
+	if (ret) {
+		amdgpu_bo_unreserve(abo);
+		dev_err(adev->dev, "%p bind failed\n", abo);
+		goto out_unref;
+	}
+
 	ret = amdgpu_bo_kmap(abo, NULL);
 	amdgpu_bo_unreserve(abo);
 	if (ret) {
@ -365,8 +373,8 @@ void amdgpu_fbdev_fini(struct amdgpu_device *adev)
 void amdgpu_fbdev_set_suspend(struct amdgpu_device *adev, int state)
 {
 	if (adev->mode_info.rfbdev)
-		drm_fb_helper_set_suspend(&adev->mode_info.rfbdev->helper,
-			state);
+		drm_fb_helper_set_suspend_unlocked(&adev->mode_info.rfbdev->helper,
+						   state);
 }

 int amdgpu_fbdev_total_size(struct amdgpu_device *adev)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@ -646,7 +646,6 @@ static const struct dma_fence_ops amdgpu_fence_ops = {
 	.get_driver_name = amdgpu_fence_get_driver_name,
 	.get_timeline_name = amdgpu_fence_get_timeline_name,
 	.enable_signaling = amdgpu_fence_enable_signaling,
-	.wait = dma_fence_default_wait,
 	.release = amdgpu_fence_release,
 };

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.c
@ -143,14 +143,12 @@ int amdgpu_gart_table_vram_alloc(struct amdgpu_device *adev)
 */
 int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
 {
-	uint64_t gpu_addr;
 	int r;

 	r = amdgpu_bo_reserve(adev->gart.robj, false);
 	if (unlikely(r != 0))
 		return r;
-	r = amdgpu_bo_pin(adev->gart.robj,
-				AMDGPU_GEM_DOMAIN_VRAM, &gpu_addr);
+	r = amdgpu_bo_pin(adev->gart.robj, AMDGPU_GEM_DOMAIN_VRAM);
 	if (r) {
 		amdgpu_bo_unreserve(adev->gart.robj);
 		return r;
@ -159,7 +157,7 @@ int amdgpu_gart_table_vram_pin(struct amdgpu_device *adev)
 	if (r)
 		amdgpu_bo_unpin(adev->gart.robj);
 	amdgpu_bo_unreserve(adev->gart.robj);
-	adev->gart.table_addr = gpu_addr;
+	adev->gart.table_addr = amdgpu_bo_gpu_offset(adev->gart.robj);
 	return r;
 }

@ -234,7 +232,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 	}

 	t = offset / AMDGPU_GPU_PAGE_SIZE;
-	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 	for (i = 0; i < pages; i++, p++) {
 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 		adev->gart.pages[p] = NULL;
@ -243,7 +241,7 @@ int amdgpu_gart_unbind(struct amdgpu_device *adev, uint64_t offset,
 		if (!adev->gart.ptr)
 			continue;

-		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+		for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
 			amdgpu_gmc_set_pte_pde(adev, adev->gart.ptr,
 					       t, page_base, flags);
 			page_base += AMDGPU_GPU_PAGE_SIZE;
@ -282,7 +280,7 @@ int amdgpu_gart_map(struct amdgpu_device *adev, uint64_t offset,

 	for (i = 0; i < pages; i++) {
 		page_base = dma_addr[i];
-		for (j = 0; j < (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE); j++, t++) {
+		for (j = 0; j < AMDGPU_GPU_PAGES_IN_CPU_PAGE; j++, t++) {
 			amdgpu_gmc_set_pte_pde(adev, dst, t, page_base, flags);
 			page_base += AMDGPU_GPU_PAGE_SIZE;
 		}
@ -319,7 +317,7 @@ int amdgpu_gart_bind(struct amdgpu_device *adev, uint64_t offset,

 #ifdef CONFIG_DRM_AMDGPU_GART_DEBUGFS
 	t = offset / AMDGPU_GPU_PAGE_SIZE;
-	p = t / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+	p = t / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 	for (i = 0; i < pages; i++, p++)
 		adev->gart.pages[p] = pagelist ? pagelist[i] : NULL;
 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gart.h
@ -37,6 +37,8 @@ struct amdgpu_bo;
 #define AMDGPU_GPU_PAGE_SHIFT 12
 #define AMDGPU_GPU_PAGE_ALIGN(a) (((a) + AMDGPU_GPU_PAGE_MASK) & ~AMDGPU_GPU_PAGE_MASK)

+#define AMDGPU_GPU_PAGES_IN_CPU_PAGE (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE)
+
 struct amdgpu_gart {
 	u64				table_addr;
 	struct amdgpu_bo		*robj;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@ -265,7 +265,7 @@ int amdgpu_gem_create_ioctl(struct drm_device *dev, void *data,

 	r = amdgpu_gem_object_create(adev, size, args->in.alignment,
 				     (u32)(0xffffffff & args->in.domains),
-				     flags, false, resv, &gobj);
+				     flags, ttm_bo_type_device, resv, &gobj);
 	if (flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID) {
 		if (!r) {
 			struct amdgpu_bo *abo = gem_to_amdgpu_bo(gobj);
@ -317,7 +317,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,

 	/* create a gem object to contain this object in */
 	r = amdgpu_gem_object_create(adev, args->size, 0, AMDGPU_GEM_DOMAIN_CPU,
-				     0, 0, NULL, &gobj);
+				     0, ttm_bo_type_device, NULL, &gobj);
 	if (r)
 		return r;

@ -344,7 +344,7 @@ int amdgpu_gem_userptr_ioctl(struct drm_device *dev, void *data,
 		if (r)
 			goto free_pages;

-		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		amdgpu_bo_unreserve(bo);
 		if (r)
@ -510,7 +510,6 @@ out:
 * @adev: amdgpu_device pointer
 * @vm: vm to update
 * @bo_va: bo_va to update
- * @list: validation list
 * @operation: map, unmap or clear
 *
 * Update the bo_va directly after setting its address. Errors are not
@ -519,7 +518,6 @@ out:
 static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 				    struct amdgpu_vm *vm,
 				    struct amdgpu_bo_va *bo_va,
-				    struct list_head *list,
 				    uint32_t operation)
 {
 	int r;
@ -612,7 +610,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 			return -ENOENT;
 		abo = gem_to_amdgpu_bo(gobj);
 		tv.bo = &abo->tbo;
-		tv.shared = false;
+		tv.shared = !!(abo->flags & AMDGPU_GEM_CREATE_VM_ALWAYS_VALID);
 		list_add(&tv.head, &list);
 	} else {
 		gobj = NULL;
@ -673,7 +671,7 @@ int amdgpu_gem_va_ioctl(struct drm_device *dev, void *data,
 		break;
 	}
 	if (!r && !(args->flags & AMDGPU_VM_DELAY_UPDATE) && !amdgpu_vm_debug)
-		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va, &list,
+		amdgpu_gem_va_update_vm(adev, &fpriv->vm, bo_va,
 					args->operation);

 error_backoff:
@ -768,7 +766,7 @@ int amdgpu_mode_dumb_create(struct drm_file *file_priv,
 				amdgpu_display_supported_domains(adev));
 	r = amdgpu_gem_object_create(adev, args->size, 0, domain,
 				     AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED,
-				     false, NULL, &gobj);
+				     ttm_bo_type_device, NULL, &gobj);
 	if (r)
 		return -ENOMEM;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.h
@ -105,8 +105,25 @@ struct amdgpu_gmc {
 	/* protects concurrent invalidation */
 	spinlock_t		invalidate_lock;
 	bool			translate_further;
+	struct kfd_vm_fault_info *vm_fault_info;
+	atomic_t		vm_fault_info_updated;

 	const struct amdgpu_gmc_funcs	*gmc_funcs;
 };

+/**
+ * amdgpu_gmc_vram_full_visible - Check if full VRAM is visible through the BAR
+ *
+ * @adev: amdgpu_device pointer
+ *
+ * Returns:
+ * True if full VRAM is visible through the BAR
+ */
+static inline bool amdgpu_gmc_vram_full_visible(struct amdgpu_gmc *gmc)
+{
+	WARN_ON(gmc->real_vram_size < gmc->visible_vram_size);
+
+	return (gmc->real_vram_size == gmc->visible_vram_size);
+}
+
 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@ -139,7 +139,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	/* ring tests don't use a job */
 	if (job) {
 		vm = job->vm;
-		fence_ctx = job->fence_ctx;
+		fence_ctx = job->base.s_fence->scheduled.context;
 	} else {
 		vm = NULL;
 		fence_ctx = 0;
@ -353,7 +353,8 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 			ring->funcs->type == AMDGPU_RING_TYPE_VCE ||
 			ring->funcs->type == AMDGPU_RING_TYPE_UVD_ENC ||
 			ring->funcs->type == AMDGPU_RING_TYPE_VCN_DEC ||
-			ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC ||
+			ring->funcs->type == AMDGPU_RING_TYPE_VCN_JPEG)
 			tmo = tmo_mm;
 		else
 			tmo = tmo_gfx;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ids.c
@ -578,11 +578,6 @@ void amdgpu_vmid_mgr_init(struct amdgpu_device *adev)
 			list_add_tail(&id_mgr->ids[j].list, &id_mgr->ids_lru);
 		}
 	}
-
-	adev->vm_manager.fence_context =
-		dma_fence_context_alloc(AMDGPU_MAX_RINGS);
-	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
-		adev->vm_manager.seqno[i] = 0;
 }

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@ -25,6 +25,23 @@
 *          Alex Deucher
 *          Jerome Glisse
 */
+
+/**
+ * DOC: Interrupt Handling
+ *
+ * Interrupts generated within GPU hardware raise interrupt requests that are
+ * passed to amdgpu IRQ handler which is responsible for detecting source and
+ * type of the interrupt and dispatching matching handlers. If handling an
+ * interrupt requires calling kernel functions that may sleep processing is
+ * dispatched to work handlers.
+ *
+ * If MSI functionality is not disabled by module parameter then MSI
+ * support will be enabled.
+ *
+ * For GPU interrupt sources that may be driven by another driver, IRQ domain
+ * support is used (with mapping between virtual and hardware IRQs).
+ */
+
 #include <linux/irq.h>
 #include <drm/drmP.h>
 #include <drm/drm_crtc_helper.h>
@ -43,19 +60,21 @@

 #define AMDGPU_WAIT_IDLE_TIMEOUT 200

-/*
- * Handle hotplug events outside the interrupt handler proper.
- */
 /**
- * amdgpu_hotplug_work_func - display hotplug work handler
+ * amdgpu_hotplug_work_func - work handler for display hotplug event
 *
- * @work: work struct
+ * @work: work struct pointer
 *
- * This is the hot plug event work handler (all asics).
- * The work gets scheduled from the irq handler if there
- * was a hot plug interrupt.  It walks the connector table
- * and calls the hotplug handler for each one, then sends
- * a drm hotplug event to alert userspace.
+ * This is the hotplug event work handler (all ASICs).
+ * The work gets scheduled from the IRQ handler if there
+ * was a hotplug interrupt.  It walks through the connector table
+ * and calls hotplug handler for each connector. After this, it sends
+ * a DRM hotplug event to alert userspace.
+ *
+ * This design approach is required in order to defer hotplug event handling
+ * from the IRQ handler to a work handler because hotplug handler has to use
+ * mutexes which cannot be locked in an IRQ handler (since &mutex_lock may
+ * sleep).
 */
 static void amdgpu_hotplug_work_func(struct work_struct *work)
 {
@ -74,13 +93,12 @@ static void amdgpu_hotplug_work_func(struct work_struct *work)
 }

 /**
- * amdgpu_irq_reset_work_func - execute gpu reset
+ * amdgpu_irq_reset_work_func - execute GPU reset
 *
- * @work: work struct
+ * @work: work struct pointer
 *
- * Execute scheduled gpu reset (cayman+).
- * This function is called when the irq handler
- * thinks we need a gpu reset.
+ * Execute scheduled GPU reset (Cayman+).
+ * This function is called when the IRQ handler thinks we need a GPU reset.
 */
 static void amdgpu_irq_reset_work_func(struct work_struct *work)
 {
@ -91,7 +109,13 @@ static void amdgpu_irq_reset_work_func(struct work_struct *work)
 		amdgpu_device_gpu_recover(adev, NULL, false);
 }

-/* Disable *all* interrupts */
+/**
+ * amdgpu_irq_disable_all - disable *all* interrupts
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Disable all types of interrupts from all sources.
+ */
 void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 {
 	unsigned long irqflags;
@ -123,11 +147,15 @@ void amdgpu_irq_disable_all(struct amdgpu_device *adev)
 }

 /**
- * amdgpu_irq_handler - irq handler
+ * amdgpu_irq_handler - IRQ handler
 *
- * @int irq, void *arg: args
+ * @irq: IRQ number (unused)
+ * @arg: pointer to DRM device
 *
- * This is the irq handler for the amdgpu driver (all asics).
+ * IRQ handler for amdgpu driver (all ASICs).
+ *
+ * Returns:
+ * result of handling the IRQ, as defined by &irqreturn_t
 */
 irqreturn_t amdgpu_irq_handler(int irq, void *arg)
 {
@ -142,18 +170,18 @@ irqreturn_t amdgpu_irq_handler(int irq, void *arg)
 }

 /**
- * amdgpu_msi_ok - asic specific msi checks
+ * amdgpu_msi_ok - check whether MSI functionality is enabled
 *
- * @adev: amdgpu device pointer
+ * @adev: amdgpu device pointer (unused)
 *
- * Handles asic specific MSI checks to determine if
- * MSIs should be enabled on a particular chip (all asics).
- * Returns true if MSIs should be enabled, false if MSIs
- * should not be enabled.
+ * Checks whether MSI functionality has been disabled via module parameter
+ * (all ASICs).
+ *
+ * Returns:
+ * *true* if MSIs are allowed to be enabled or *false* otherwise
 */
 static bool amdgpu_msi_ok(struct amdgpu_device *adev)
 {
-	/* force MSI on */
 	if (amdgpu_msi == 1)
 		return true;
 	else if (amdgpu_msi == 0)
@ -163,12 +191,15 @@ static bool amdgpu_msi_ok(struct amdgpu_device *adev)
 }

 /**
- * amdgpu_irq_init - init driver interrupt info
+ * amdgpu_irq_init - initialize interrupt handling
 *
 * @adev: amdgpu device pointer
 *
- * Sets up the work irq handlers, vblank init, MSIs, etc. (all asics).
- * Returns 0 for success, error for failure.
+ * Sets up work functions for hotplug and reset interrupts, enables MSI
+ * functionality, initializes vblank, hotplug and reset interrupt handling.
+ *
+ * Returns:
+ * 0 on success or error code on failure
 */
 int amdgpu_irq_init(struct amdgpu_device *adev)
 {
@ -176,7 +207,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)

 	spin_lock_init(&adev->irq.lock);

-	/* enable msi */
+	/* Enable MSI if not disabled by module parameter */
 	adev->irq.msi_enabled = false;

 	if (amdgpu_msi_ok(adev)) {
@ -189,7 +220,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)

 	if (!amdgpu_device_has_dc_support(adev)) {
 		if (!adev->enable_virtual_display)
-			/* Disable vblank irqs aggressively for power-saving */
+			/* Disable vblank IRQs aggressively for power-saving */
 			/* XXX: can this be enabled for DC? */
 			adev->ddev->vblank_disable_immediate = true;

@ -197,7 +228,7 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 		if (r)
 			return r;

-		/* pre DCE11 */
+		/* Pre-DCE11 */
 		INIT_WORK(&adev->hotplug_work,
 				amdgpu_hotplug_work_func);
 	}
@ -220,11 +251,13 @@ int amdgpu_irq_init(struct amdgpu_device *adev)
 }

 /**
- * amdgpu_irq_fini - tear down driver interrupt info
+ * amdgpu_irq_fini - shut down interrupt handling
 *
 * @adev: amdgpu device pointer
 *
- * Tears down the work irq handlers, vblank handlers, MSIs, etc. (all asics).
+ * Tears down work functions for hotplug and reset interrupts, disables MSI
+ * functionality, shuts down vblank, hotplug and reset interrupt handling,
+ * turns off interrupts from all sources (all ASICs).
 */
 void amdgpu_irq_fini(struct amdgpu_device *adev)
 {
@ -264,12 +297,17 @@ void amdgpu_irq_fini(struct amdgpu_device *adev)
 }

 /**
- * amdgpu_irq_add_id - register irq source
+ * amdgpu_irq_add_id - register IRQ source
 *
 * @adev: amdgpu device pointer
- * @src_id: source id for this source
- * @source: irq source
+ * @client_id: client id
+ * @src_id: source id
+ * @source: IRQ source pointer
 *
+ * Registers IRQ source on a client.
+ *
+ * Returns:
+ * 0 on success or error code otherwise
 */
 int amdgpu_irq_add_id(struct amdgpu_device *adev,
 		      unsigned client_id, unsigned src_id,
@ -312,12 +350,12 @@ int amdgpu_irq_add_id(struct amdgpu_device *adev,
 }

 /**
- * amdgpu_irq_dispatch - dispatch irq to IP blocks
+ * amdgpu_irq_dispatch - dispatch IRQ to IP blocks
 *
 * @adev: amdgpu device pointer
- * @entry: interrupt vector
+ * @entry: interrupt vector pointer
 *
- * Dispatches the irq to the different IP blocks
+ * Dispatches IRQ to IP blocks.
 */
 void amdgpu_irq_dispatch(struct amdgpu_device *adev,
 			 struct amdgpu_iv_entry *entry)
@ -361,13 +399,13 @@ void amdgpu_irq_dispatch(struct amdgpu_device *adev,
 }

 /**
- * amdgpu_irq_update - update hw interrupt state
+ * amdgpu_irq_update - update hardware interrupt state
 *
 * @adev: amdgpu device pointer
- * @src: interrupt src you want to enable
- * @type: type of interrupt you want to update
+ * @src: interrupt source pointer
+ * @type: type of interrupt
 *
- * Updates the interrupt state for a specific src (all asics).
+ * Updates interrupt state for the specific source (all ASICs).
 */
 int amdgpu_irq_update(struct amdgpu_device *adev,
 			     struct amdgpu_irq_src *src, unsigned type)
@ -378,7 +416,7 @@ int amdgpu_irq_update(struct amdgpu_device *adev,

 	spin_lock_irqsave(&adev->irq.lock, irqflags);

-	/* we need to determine after taking the lock, otherwise
+	/* We need to determine after taking the lock, otherwise
 	   we might disable just enabled interrupts again */
 	if (amdgpu_irq_enabled(adev, src, type))
 		state = AMDGPU_IRQ_STATE_ENABLE;
@ -390,6 +428,14 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
 	return r;
 }

+/**
+ * amdgpu_irq_gpu_reset_resume_helper - update interrupt states on all sources
+ *
+ * @adev: amdgpu device pointer
+ *
+ * Updates state of all types of interrupts on all sources on resume after
+ * reset.
+ */
 void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
 {
 	int i, j, k;
@ -413,10 +459,13 @@ void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
 * amdgpu_irq_get - enable interrupt
 *
 * @adev: amdgpu device pointer
- * @src: interrupt src you want to enable
- * @type: type of interrupt you want to enable
+ * @src: interrupt source pointer
+ * @type: type of interrupt
 *
- * Enables the interrupt type for a specific src (all asics).
+ * Enables specified type of interrupt on the specified source (all ASICs).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
 */
 int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		   unsigned type)
@ -440,10 +489,13 @@ int amdgpu_irq_get(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 * amdgpu_irq_put - disable interrupt
 *
 * @adev: amdgpu device pointer
- * @src: interrupt src you want to disable
- * @type: type of interrupt you want to disable
+ * @src: interrupt source pointer
+ * @type: type of interrupt
 *
- * Disables the interrupt type for a specific src (all asics).
+ * Enables specified type of interrupt on the specified source (all ASICs).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
 */
 int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		   unsigned type)
@ -464,12 +516,17 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 }

 /**
- * amdgpu_irq_enabled - test if irq is enabled or not
+ * amdgpu_irq_enabled - check whether interrupt is enabled or not
 *
 * @adev: amdgpu device pointer
- * @idx: interrupt src you want to test
+ * @src: interrupt source pointer
+ * @type: type of interrupt
 *
- * Tests if the given interrupt source is enabled or not
+ * Checks whether the given type of interrupt is enabled on the given source.
+ *
+ * Returns:
+ * *true* if interrupt is enabled, *false* if interrupt is disabled or on
+ * invalid parameters
 */
 bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 			unsigned type)
@ -486,7 +543,7 @@ bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 	return !!atomic_read(&src->enabled_types[type]);
 }

-/* gen irq */
+/* XXX: Generic IRQ handling */
 static void amdgpu_irq_mask(struct irq_data *irqd)
 {
 	/* XXX */
@ -497,12 +554,26 @@ static void amdgpu_irq_unmask(struct irq_data *irqd)
 	/* XXX */
 }

+/* amdgpu hardware interrupt chip descriptor */
 static struct irq_chip amdgpu_irq_chip = {
 	.name = "amdgpu-ih",
 	.irq_mask = amdgpu_irq_mask,
 	.irq_unmask = amdgpu_irq_unmask,
 };

+/**
+ * amdgpu_irqdomain_map - create mapping between virtual and hardware IRQ numbers
+ *
+ * @d: amdgpu IRQ domain pointer (unused)
+ * @irq: virtual IRQ number
+ * @hwirq: hardware irq number
+ *
+ * Current implementation assigns simple interrupt handler to the given virtual
+ * IRQ.
+ *
+ * Returns:
+ * 0 on success or error code otherwise
+ */
 static int amdgpu_irqdomain_map(struct irq_domain *d,
 				unsigned int irq, irq_hw_number_t hwirq)
 {
@ -514,17 +585,21 @@ static int amdgpu_irqdomain_map(struct irq_domain *d,
 	return 0;
 }

+/* Implementation of methods for amdgpu IRQ domain */
 static const struct irq_domain_ops amdgpu_hw_irqdomain_ops = {
 	.map = amdgpu_irqdomain_map,
 };

 /**
- * amdgpu_irq_add_domain - create a linear irq domain
+ * amdgpu_irq_add_domain - create a linear IRQ domain
 *
 * @adev: amdgpu device pointer
 *
- * Create an irq domain for GPU interrupt sources
+ * Creates an IRQ domain for GPU interrupt sources
 * that may be driven by another driver (e.g., ACP).
+ *
+ * Returns:
+ * 0 on success or error code otherwise
 */
 int amdgpu_irq_add_domain(struct amdgpu_device *adev)
 {
@ -539,11 +614,11 @@ int amdgpu_irq_add_domain(struct amdgpu_device *adev)
 }

 /**
- * amdgpu_irq_remove_domain - remove the irq domain
+ * amdgpu_irq_remove_domain - remove the IRQ domain
 *
 * @adev: amdgpu device pointer
 *
- * Remove the irq domain for GPU interrupt sources
+ * Removes the IRQ domain for GPU interrupt sources
 * that may be driven by another driver (e.g., ACP).
 */
 void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
@ -555,16 +630,17 @@ void amdgpu_irq_remove_domain(struct amdgpu_device *adev)
 }

 /**
- * amdgpu_irq_create_mapping - create a mapping between a domain irq and a
- *                             Linux irq
+ * amdgpu_irq_create_mapping - create mapping between domain Linux IRQs
 *
 * @adev: amdgpu device pointer
 * @src_id: IH source id
 *
- * Create a mapping between a domain irq (GPU IH src id) and a Linux irq
+ * Creates mapping between a domain IRQ (GPU IH src id) and a Linux IRQ
 * Use this for components that generate a GPU interrupt, but are driven
 * by a different driver (e.g., ACP).
- * Returns the Linux irq.
+ *
+ * Returns:
+ * Linux IRQ
 */
 unsigned amdgpu_irq_create_mapping(struct amdgpu_device *adev, unsigned src_id)
 {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@ -30,14 +30,14 @@

 static void amdgpu_job_timedout(struct drm_sched_job *s_job)
 {
-	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
+	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+	struct amdgpu_job *job = to_amdgpu_job(s_job);

-	DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
-		  job->base.sched->name,
-		  atomic_read(&job->ring->fence_drv.last_seq),
-		  job->ring->fence_drv.sync_seq);
+	DRM_ERROR("ring %s timeout, signaled seq=%u, emitted seq=%u\n",
+		  job->base.sched->name, atomic_read(&ring->fence_drv.last_seq),
+		  ring->fence_drv.sync_seq);

-	amdgpu_device_gpu_recover(job->adev, job, false);
+	amdgpu_device_gpu_recover(ring->adev, job, false);
 }

 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@ -54,7 +54,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	if (!*job)
 		return -ENOMEM;

-	(*job)->adev = adev;
+	/*
+	 * Initialize the scheduler to at least some ring so that we always
+	 * have a pointer to adev.
+	 */
+	(*job)->base.sched = &adev->rings[0]->sched;
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
@ -86,6 +90,7 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,

 void amdgpu_job_free_resources(struct amdgpu_job *job)
 {
+	struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched);
 	struct dma_fence *f;
 	unsigned i;

@ -93,14 +98,15 @@ void amdgpu_job_free_resources(struct amdgpu_job *job)
 	f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;

 	for (i = 0; i < job->num_ibs; ++i)
-		amdgpu_ib_free(job->adev, &job->ibs[i], f);
+		amdgpu_ib_free(ring->adev, &job->ibs[i], f);
 }

 static void amdgpu_job_free_cb(struct drm_sched_job *s_job)
 {
-	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
+	struct amdgpu_ring *ring = to_amdgpu_ring(s_job->sched);
+	struct amdgpu_job *job = to_amdgpu_job(s_job);

-	amdgpu_ring_priority_put(job->ring, s_job->s_priority);
+	amdgpu_ring_priority_put(ring, s_job->s_priority);
 	dma_fence_put(job->fence);
 	amdgpu_sync_free(&job->sync);
 	amdgpu_sync_free(&job->sched_sync);
@ -117,50 +123,68 @@ void amdgpu_job_free(struct amdgpu_job *job)
 	kfree(job);
 }

-int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
-		      struct drm_sched_entity *entity, void *owner,
-		      struct dma_fence **f)
+int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+		      void *owner, struct dma_fence **f)
 {
+	enum drm_sched_priority priority;
+	struct amdgpu_ring *ring;
 	int r;
-	job->ring = ring;

 	if (!f)
 		return -EINVAL;

-	r = drm_sched_job_init(&job->base, &ring->sched, entity, owner);
+	r = drm_sched_job_init(&job->base, entity, owner);
 	if (r)
 		return r;

 	job->owner = owner;
-	job->fence_ctx = entity->fence_context;
 	*f = dma_fence_get(&job->base.s_fence->finished);
 	amdgpu_job_free_resources(job);
-	amdgpu_ring_priority_get(job->ring, job->base.s_priority);
+	priority = job->base.s_priority;
 	drm_sched_entity_push_job(&job->base, entity);

+	ring = to_amdgpu_ring(entity->rq->sched);
+	amdgpu_ring_priority_get(ring, priority);
+
+	return 0;
+}
+
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+			     struct dma_fence **fence)
+{
+	int r;
+
+	job->base.sched = &ring->sched;
+	r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, NULL, fence);
+	job->fence = dma_fence_get(*fence);
+	if (r)
+		return r;
+
+	amdgpu_job_free(job);
 	return 0;
 }

 static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,
 					       struct drm_sched_entity *s_entity)
 {
+	struct amdgpu_ring *ring = to_amdgpu_ring(s_entity->rq->sched);
 	struct amdgpu_job *job = to_amdgpu_job(sched_job);
 	struct amdgpu_vm *vm = job->vm;
+	struct dma_fence *fence;
 	bool explicit = false;
 	int r;
-	struct dma_fence *fence = amdgpu_sync_get_fence(&job->sync, &explicit);

+	fence = amdgpu_sync_get_fence(&job->sync, &explicit);
 	if (fence && explicit) {
 		if (drm_sched_dependency_optimized(fence, s_entity)) {
-			r = amdgpu_sync_fence(job->adev, &job->sched_sync, fence, false);
+			r = amdgpu_sync_fence(ring->adev, &job->sched_sync,
+					      fence, false);
 			if (r)
-				DRM_ERROR("Error adding fence to sync (%d)\n", r);
+				DRM_ERROR("Error adding fence (%d)\n", r);
 		}
 	}

 	while (fence == NULL && vm && !job->vmid) {
-		struct amdgpu_ring *ring = job->ring;
-
 		r = amdgpu_vmid_grab(vm, ring, &job->sync,
 				     &job->base.s_fence->finished,
 				     job);
@ -175,30 +199,25 @@ static struct dma_fence *amdgpu_job_dependency(struct drm_sched_job *sched_job,

 static struct dma_fence *amdgpu_job_run(struct drm_sched_job *sched_job)
 {
+	struct amdgpu_ring *ring = to_amdgpu_ring(sched_job->sched);
 	struct dma_fence *fence = NULL, *finished;
-	struct amdgpu_device *adev;
 	struct amdgpu_job *job;
 	int r;

-	if (!sched_job) {
-		DRM_ERROR("job is null\n");
-		return NULL;
-	}
 	job = to_amdgpu_job(sched_job);
 	finished = &job->base.s_fence->finished;
-	adev = job->adev;

 	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));

 	trace_amdgpu_sched_run_job(job);

-	if (job->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
+	if (job->vram_lost_counter != atomic_read(&ring->adev->vram_lost_counter))
 		dma_fence_set_error(finished, -ECANCELED);/* skip IB as well if VRAM lost */

 	if (finished->error < 0) {
 		DRM_INFO("Skip scheduling IBs!\n");
 	} else {
-		r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs, job,
+		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs, job,
 				       &fence);
 		if (r)
 			DRM_ERROR("Error scheduling IBs (%d)\n", r);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.h
@ -0,0 +1,74 @@
+/*
+ * Copyright 2018 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+#ifndef __AMDGPU_JOB_H__
+#define __AMDGPU_JOB_H__
+
+/* bit set means command submit involves a preamble IB */
+#define AMDGPU_PREAMBLE_IB_PRESENT          (1 << 0)
+/* bit set means preamble IB is first presented in belonging context */
+#define AMDGPU_PREAMBLE_IB_PRESENT_FIRST    (1 << 1)
+/* bit set means context switch occured */
+#define AMDGPU_HAVE_CTX_SWITCH              (1 << 2)
+
+#define to_amdgpu_job(sched_job)		\
+		container_of((sched_job), struct amdgpu_job, base)
+
+struct amdgpu_fence;
+
+struct amdgpu_job {
+	struct drm_sched_job    base;
+	struct amdgpu_vm	*vm;
+	struct amdgpu_sync	sync;
+	struct amdgpu_sync	sched_sync;
+	struct amdgpu_ib	*ibs;
+	struct dma_fence	*fence; /* the hw fence */
+	uint32_t		preamble_status;
+	uint32_t		num_ibs;
+	void			*owner;
+	bool                    vm_needs_flush;
+	uint64_t		vm_pd_addr;
+	unsigned		vmid;
+	unsigned		pasid;
+	uint32_t		gds_base, gds_size;
+	uint32_t		gws_base, gws_size;
+	uint32_t		oa_base, oa_size;
+	uint32_t		vram_lost_counter;
+
+	/* user fence handling */
+	uint64_t		uf_addr;
+	uint64_t		uf_sequence;
+
+};
+
+int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
+		     struct amdgpu_job **job, struct amdgpu_vm *vm);
+int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
+			     struct amdgpu_job **job);
+
+void amdgpu_job_free_resources(struct amdgpu_job *job);
+void amdgpu_job_free(struct amdgpu_job *job);
+int amdgpu_job_submit(struct amdgpu_job *job, struct drm_sched_entity *entity,
+		      void *owner, struct dma_fence **f);
+int amdgpu_job_submit_direct(struct amdgpu_job *job, struct amdgpu_ring *ring,
+			     struct dma_fence **fence);
+#endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@ -328,61 +328,71 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		case AMDGPU_HW_IP_GFX:
 			type = AMD_IP_BLOCK_TYPE_GFX;
 			for (i = 0; i < adev->gfx.num_gfx_rings; i++)
-				ring_mask |= ((adev->gfx.gfx_ring[i].ready ? 1 : 0) << i);
-			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
-			ib_size_alignment = 8;
+				ring_mask |= adev->gfx.gfx_ring[i].ready << i;
+			ib_start_alignment = 32;
+			ib_size_alignment = 32;
 			break;
 		case AMDGPU_HW_IP_COMPUTE:
 			type = AMD_IP_BLOCK_TYPE_GFX;
 			for (i = 0; i < adev->gfx.num_compute_rings; i++)
-				ring_mask |= ((adev->gfx.compute_ring[i].ready ? 1 : 0) << i);
-			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
-			ib_size_alignment = 8;
+				ring_mask |= adev->gfx.compute_ring[i].ready << i;
+			ib_start_alignment = 32;
+			ib_size_alignment = 32;
 			break;
 		case AMDGPU_HW_IP_DMA:
 			type = AMD_IP_BLOCK_TYPE_SDMA;
 			for (i = 0; i < adev->sdma.num_instances; i++)
-				ring_mask |= ((adev->sdma.instance[i].ring.ready ? 1 : 0) << i);
-			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
-			ib_size_alignment = 1;
+				ring_mask |= adev->sdma.instance[i].ring.ready << i;
+			ib_start_alignment = 256;
+			ib_size_alignment = 4;
 			break;
 		case AMDGPU_HW_IP_UVD:
 			type = AMD_IP_BLOCK_TYPE_UVD;
-			for (i = 0; i < adev->uvd.num_uvd_inst; i++)
-				ring_mask |= ((adev->uvd.inst[i].ring.ready ? 1 : 0) << i);
-			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
-			ib_size_alignment = 16;
+			for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+				if (adev->uvd.harvest_config & (1 << i))
+					continue;
+				ring_mask |= adev->uvd.inst[i].ring.ready;
+			}
+			ib_start_alignment = 64;
+			ib_size_alignment = 64;
 			break;
 		case AMDGPU_HW_IP_VCE:
 			type = AMD_IP_BLOCK_TYPE_VCE;
 			for (i = 0; i < adev->vce.num_rings; i++)
-				ring_mask |= ((adev->vce.ring[i].ready ? 1 : 0) << i);
-			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+				ring_mask |= adev->vce.ring[i].ready << i;
+			ib_start_alignment = 4;
 			ib_size_alignment = 1;
 			break;
 		case AMDGPU_HW_IP_UVD_ENC:
 			type = AMD_IP_BLOCK_TYPE_UVD;
-			for (i = 0; i < adev->uvd.num_uvd_inst; i++)
+			for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+				if (adev->uvd.harvest_config & (1 << i))
+					continue;
 				for (j = 0; j < adev->uvd.num_enc_rings; j++)
-					ring_mask |=
-					((adev->uvd.inst[i].ring_enc[j].ready ? 1 : 0) <<
-					(j + i * adev->uvd.num_enc_rings));
-			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
-			ib_size_alignment = 1;
+					ring_mask |= adev->uvd.inst[i].ring_enc[j].ready << j;
+			}
+			ib_start_alignment = 64;
+			ib_size_alignment = 64;
 			break;
 		case AMDGPU_HW_IP_VCN_DEC:
 			type = AMD_IP_BLOCK_TYPE_VCN;
-			ring_mask = adev->vcn.ring_dec.ready ? 1 : 0;
-			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+			ring_mask = adev->vcn.ring_dec.ready;
+			ib_start_alignment = 16;
 			ib_size_alignment = 16;
 			break;
 		case AMDGPU_HW_IP_VCN_ENC:
 			type = AMD_IP_BLOCK_TYPE_VCN;
 			for (i = 0; i < adev->vcn.num_enc_rings; i++)
-				ring_mask |= ((adev->vcn.ring_enc[i].ready ? 1 : 0) << i);
-			ib_start_alignment = AMDGPU_GPU_PAGE_SIZE;
+				ring_mask |= adev->vcn.ring_enc[i].ready << i;
+			ib_start_alignment = 64;
 			ib_size_alignment = 1;
 			break;
+		case AMDGPU_HW_IP_VCN_JPEG:
+			type = AMD_IP_BLOCK_TYPE_VCN;
+			ring_mask = adev->vcn.ring_jpeg.ready;
+			ib_start_alignment = 16;
+			ib_size_alignment = 16;
+			break;
 		default:
 			return -EINVAL;
 		}
@ -427,6 +437,7 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 			break;
 		case AMDGPU_HW_IP_VCN_DEC:
 		case AMDGPU_HW_IP_VCN_ENC:
+		case AMDGPU_HW_IP_VCN_JPEG:
 			type = AMD_IP_BLOCK_TYPE_VCN;
 			break;
 		default:
@ -494,13 +505,13 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 	case AMDGPU_INFO_VRAM_GTT: {
 		struct drm_amdgpu_info_vram_gtt vram_gtt;

-		vram_gtt.vram_size = adev->gmc.real_vram_size;
-		vram_gtt.vram_size -= adev->vram_pin_size;
-		vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size;
-		vram_gtt.vram_cpu_accessible_size -= (adev->vram_pin_size - adev->invisible_pin_size);
+		vram_gtt.vram_size = adev->gmc.real_vram_size -
+			atomic64_read(&adev->vram_pin_size);
+		vram_gtt.vram_cpu_accessible_size = adev->gmc.visible_vram_size -
+			atomic64_read(&adev->visible_pin_size);
 		vram_gtt.gtt_size = adev->mman.bdev.man[TTM_PL_TT].size;
 		vram_gtt.gtt_size *= PAGE_SIZE;
-		vram_gtt.gtt_size -= adev->gart_pin_size;
+		vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);
 		return copy_to_user(out, &vram_gtt,
 				    min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0;
 	}
@ -509,17 +520,16 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file

 		memset(&mem, 0, sizeof(mem));
 		mem.vram.total_heap_size = adev->gmc.real_vram_size;
-		mem.vram.usable_heap_size =
-			adev->gmc.real_vram_size - adev->vram_pin_size;
+		mem.vram.usable_heap_size = adev->gmc.real_vram_size -
+			atomic64_read(&adev->vram_pin_size);
 		mem.vram.heap_usage =
 			amdgpu_vram_mgr_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 		mem.vram.max_allocation = mem.vram.usable_heap_size * 3 / 4;

 		mem.cpu_accessible_vram.total_heap_size =
 			adev->gmc.visible_vram_size;
-		mem.cpu_accessible_vram.usable_heap_size =
-			adev->gmc.visible_vram_size -
-			(adev->vram_pin_size - adev->invisible_pin_size);
+		mem.cpu_accessible_vram.usable_heap_size = adev->gmc.visible_vram_size -
+			atomic64_read(&adev->visible_pin_size);
 		mem.cpu_accessible_vram.heap_usage =
 			amdgpu_vram_mgr_vis_usage(&adev->mman.bdev.man[TTM_PL_VRAM]);
 		mem.cpu_accessible_vram.max_allocation =
@ -527,8 +537,8 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file

 		mem.gtt.total_heap_size = adev->mman.bdev.man[TTM_PL_TT].size;
 		mem.gtt.total_heap_size *= PAGE_SIZE;
-		mem.gtt.usable_heap_size = mem.gtt.total_heap_size
-			- adev->gart_pin_size;
+		mem.gtt.usable_heap_size = mem.gtt.total_heap_size -
+			atomic64_read(&adev->gart_pin_size);
 		mem.gtt.heap_usage =
 			amdgpu_gtt_mgr_usage(&adev->mman.bdev.man[TTM_PL_TT]);
 		mem.gtt.max_allocation = mem.gtt.usable_heap_size * 3 / 4;
@ -930,7 +940,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 		return;

 	pm_runtime_get_sync(dev->dev);
-	amdgpu_ctx_mgr_entity_fini(&fpriv->ctx_mgr);

 	if (adev->asic_type != CHIP_RAVEN) {
 		amdgpu_uvd_free_handles(adev, file_priv);
@ -958,7 +967,7 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 	amdgpu_bo_unref(&pd);

 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
-		amdgpu_bo_list_free(list);
+		amdgpu_bo_list_put(list);

 	idr_destroy(&fpriv->bo_list_handles);
 	mutex_destroy(&fpriv->bo_list_lock);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_mn.c
@ -28,6 +28,21 @@
 *    Christian König <christian.koenig@amd.com>
 */

+/**
+ * DOC: MMU Notifier
+ *
+ * For coherent userptr handling registers an MMU notifier to inform the driver
+ * about updates on the page tables of a process.
+ *
+ * When somebody tries to invalidate the page tables we block the update until
+ * all operations on the pages in question are completed, then those pages are
+ * marked as accessed and also dirty if it wasn't a read only access.
+ *
+ * New command submissions using the userptrs in question are delayed until all
+ * page table invalidation are completed and we once more see a coherent process
+ * address space.
+ */
+
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/mmu_notifier.h>
@ -38,6 +53,22 @@
 #include "amdgpu.h"
 #include "amdgpu_amdkfd.h"

+/**
+ * struct amdgpu_mn
+ *
+ * @adev: amdgpu device pointer
+ * @mm: process address space
+ * @mn: MMU notifier structure
+ * @type: type of MMU notifier
+ * @work: destruction work item
+ * @node: hash table node to find structure by adev and mn
+ * @lock: rw semaphore protecting the notifier nodes
+ * @objects: interval tree containing amdgpu_mn_nodes
+ * @read_lock: mutex for recursive locking of @lock
+ * @recursion: depth of recursion
+ *
+ * Data for each amdgpu device and process address space.
+ */
 struct amdgpu_mn {
 	/* constant after initialisation */
 	struct amdgpu_device	*adev;
@ -58,13 +89,21 @@ struct amdgpu_mn {
 	atomic_t		recursion;
 };

+/**
+ * struct amdgpu_mn_node
+ *
+ * @it: interval node defining start-last of the affected address range
+ * @bos: list of all BOs in the affected address range
+ *
+ * Manages all BOs which are affected of a certain range of address space.
+ */
 struct amdgpu_mn_node {
 	struct interval_tree_node	it;
 	struct list_head		bos;
 };

 /**
- * amdgpu_mn_destroy - destroy the rmn
+ * amdgpu_mn_destroy - destroy the MMU notifier
 *
 * @work: previously sheduled work item
 *
@ -72,47 +111,50 @@ struct amdgpu_mn_node {
 */
 static void amdgpu_mn_destroy(struct work_struct *work)
 {
-	struct amdgpu_mn *rmn = container_of(work, struct amdgpu_mn, work);
-	struct amdgpu_device *adev = rmn->adev;
+	struct amdgpu_mn *amn = container_of(work, struct amdgpu_mn, work);
+	struct amdgpu_device *adev = amn->adev;
 	struct amdgpu_mn_node *node, *next_node;
 	struct amdgpu_bo *bo, *next_bo;

 	mutex_lock(&adev->mn_lock);
-	down_write(&rmn->lock);
-	hash_del(&rmn->node);
+	down_write(&amn->lock);
+	hash_del(&amn->node);
 	rbtree_postorder_for_each_entry_safe(node, next_node,
-					     &rmn->objects.rb_root, it.rb) {
+					     &amn->objects.rb_root, it.rb) {
 		list_for_each_entry_safe(bo, next_bo, &node->bos, mn_list) {
 			bo->mn = NULL;
 			list_del_init(&bo->mn_list);
 		}
 		kfree(node);
 	}
-	up_write(&rmn->lock);
+	up_write(&amn->lock);
 	mutex_unlock(&adev->mn_lock);
-	mmu_notifier_unregister_no_release(&rmn->mn, rmn->mm);
-	kfree(rmn);
+	mmu_notifier_unregister_no_release(&amn->mn, amn->mm);
+	kfree(amn);
 }

 /**
 * amdgpu_mn_release - callback to notify about mm destruction
 *
 * @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
 *
 * Shedule a work item to lazy destroy our notifier.
 */
 static void amdgpu_mn_release(struct mmu_notifier *mn,
 			      struct mm_struct *mm)
 {
-	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
-	INIT_WORK(&rmn->work, amdgpu_mn_destroy);
-	schedule_work(&rmn->work);
+	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
+
+	INIT_WORK(&amn->work, amdgpu_mn_destroy);
+	schedule_work(&amn->work);
 }


 /**
- * amdgpu_mn_lock - take the write side lock for this mn
+ * amdgpu_mn_lock - take the write side lock for this notifier
+ *
+ * @mn: our notifier
 */
 void amdgpu_mn_lock(struct amdgpu_mn *mn)
 {
@ -121,7 +163,9 @@ void amdgpu_mn_lock(struct amdgpu_mn *mn)
 }

 /**
- * amdgpu_mn_unlock - drop the write side lock for this mn
+ * amdgpu_mn_unlock - drop the write side lock for this notifier
+ *
+ * @mn: our notifier
 */
 void amdgpu_mn_unlock(struct amdgpu_mn *mn)
 {
@ -130,40 +174,38 @@ void amdgpu_mn_unlock(struct amdgpu_mn *mn)
 }

 /**
- * amdgpu_mn_read_lock - take the rmn read lock
+ * amdgpu_mn_read_lock - take the read side lock for this notifier
 *
- * @rmn: our notifier
- *
- * Take the rmn read side lock.
+ * @amn: our notifier
 */
-static void amdgpu_mn_read_lock(struct amdgpu_mn *rmn)
+static void amdgpu_mn_read_lock(struct amdgpu_mn *amn)
 {
-	mutex_lock(&rmn->read_lock);
-	if (atomic_inc_return(&rmn->recursion) == 1)
-		down_read_non_owner(&rmn->lock);
-	mutex_unlock(&rmn->read_lock);
+	mutex_lock(&amn->read_lock);
+	if (atomic_inc_return(&amn->recursion) == 1)
+		down_read_non_owner(&amn->lock);
+	mutex_unlock(&amn->read_lock);
 }

 /**
- * amdgpu_mn_read_unlock - drop the rmn read lock
+ * amdgpu_mn_read_unlock - drop the read side lock for this notifier
 *
- * @rmn: our notifier
- *
- * Drop the rmn read side lock.
+ * @amn: our notifier
 */
-static void amdgpu_mn_read_unlock(struct amdgpu_mn *rmn)
+static void amdgpu_mn_read_unlock(struct amdgpu_mn *amn)
 {
-	if (atomic_dec_return(&rmn->recursion) == 0)
-		up_read_non_owner(&rmn->lock);
+	if (atomic_dec_return(&amn->recursion) == 0)
+		up_read_non_owner(&amn->lock);
 }

 /**
 * amdgpu_mn_invalidate_node - unmap all BOs of a node
 *
 * @node: the node with the BOs to unmap
+ * @start: start of address range affected
+ * @end: end of address range affected
 *
- * We block for all BOs and unmap them by move them
- * into system domain again.
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
 */
 static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 				      unsigned long start,
@ -190,27 +232,27 @@ static void amdgpu_mn_invalidate_node(struct amdgpu_mn_node *node,
 * amdgpu_mn_invalidate_range_start_gfx - callback to notify about mm change
 *
 * @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
 * @start: start of updated range
 * @end: end of updated range
 *
- * We block for all BOs between start and end to be idle and
- * unmap them by move them into system domain again.
+ * Block for operations on BOs to finish and mark pages as accessed and
+ * potentially dirty.
 */
 static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 						 struct mm_struct *mm,
 						 unsigned long start,
 						 unsigned long end)
 {
-	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;

 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;

-	amdgpu_mn_read_lock(rmn);
+	amdgpu_mn_read_lock(amn);

-	it = interval_tree_iter_first(&rmn->objects, start, end);
+	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;

@ -225,7 +267,7 @@ static void amdgpu_mn_invalidate_range_start_gfx(struct mmu_notifier *mn,
 * amdgpu_mn_invalidate_range_start_hsa - callback to notify about mm change
 *
 * @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
 * @start: start of updated range
 * @end: end of updated range
 *
@ -238,15 +280,15 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 						 unsigned long start,
 						 unsigned long end)
 {
-	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);
 	struct interval_tree_node *it;

 	/* notification is exclusive, but interval is inclusive */
 	end -= 1;

-	amdgpu_mn_read_lock(rmn);
+	amdgpu_mn_read_lock(amn);

-	it = interval_tree_iter_first(&rmn->objects, start, end);
+	it = interval_tree_iter_first(&amn->objects, start, end);
 	while (it) {
 		struct amdgpu_mn_node *node;
 		struct amdgpu_bo *bo;
@ -268,7 +310,7 @@ static void amdgpu_mn_invalidate_range_start_hsa(struct mmu_notifier *mn,
 * amdgpu_mn_invalidate_range_end - callback to notify about mm change
 *
 * @mn: our notifier
- * @mn: the mm this callback is about
+ * @mm: the mm this callback is about
 * @start: start of updated range
 * @end: end of updated range
 *
@ -279,9 +321,9 @@ static void amdgpu_mn_invalidate_range_end(struct mmu_notifier *mn,
 					   unsigned long start,
 					   unsigned long end)
 {
-	struct amdgpu_mn *rmn = container_of(mn, struct amdgpu_mn, mn);
+	struct amdgpu_mn *amn = container_of(mn, struct amdgpu_mn, mn);

-	amdgpu_mn_read_unlock(rmn);
+	amdgpu_mn_read_unlock(amn);
 }

 static const struct mmu_notifier_ops amdgpu_mn_ops[] = {
@ -315,7 +357,7 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 				enum amdgpu_mn_type type)
 {
 	struct mm_struct *mm = current->mm;
-	struct amdgpu_mn *rmn;
+	struct amdgpu_mn *amn;
 	unsigned long key = AMDGPU_MN_KEY(mm, type);
 	int r;

@ -325,41 +367,41 @@ struct amdgpu_mn *amdgpu_mn_get(struct amdgpu_device *adev,
 		return ERR_PTR(-EINTR);
 	}

-	hash_for_each_possible(adev->mn_hash, rmn, node, key)
-		if (AMDGPU_MN_KEY(rmn->mm, rmn->type) == key)
+	hash_for_each_possible(adev->mn_hash, amn, node, key)
+		if (AMDGPU_MN_KEY(amn->mm, amn->type) == key)
 			goto release_locks;

-	rmn = kzalloc(sizeof(*rmn), GFP_KERNEL);
-	if (!rmn) {
-		rmn = ERR_PTR(-ENOMEM);
+	amn = kzalloc(sizeof(*amn), GFP_KERNEL);
+	if (!amn) {
+		amn = ERR_PTR(-ENOMEM);
 		goto release_locks;
 	}

-	rmn->adev = adev;
-	rmn->mm = mm;
-	init_rwsem(&rmn->lock);
-	rmn->type = type;
-	rmn->mn.ops = &amdgpu_mn_ops[type];
-	rmn->objects = RB_ROOT_CACHED;
-	mutex_init(&rmn->read_lock);
-	atomic_set(&rmn->recursion, 0);
+	amn->adev = adev;
+	amn->mm = mm;
+	init_rwsem(&amn->lock);
+	amn->type = type;
+	amn->mn.ops = &amdgpu_mn_ops[type];
+	amn->objects = RB_ROOT_CACHED;
+	mutex_init(&amn->read_lock);
+	atomic_set(&amn->recursion, 0);

-	r = __mmu_notifier_register(&rmn->mn, mm);
+	r = __mmu_notifier_register(&amn->mn, mm);
 	if (r)
-		goto free_rmn;
+		goto free_amn;

-	hash_add(adev->mn_hash, &rmn->node, AMDGPU_MN_KEY(mm, type));
+	hash_add(adev->mn_hash, &amn->node, AMDGPU_MN_KEY(mm, type));

 release_locks:
 	up_write(&mm->mmap_sem);
 	mutex_unlock(&adev->mn_lock);

-	return rmn;
+	return amn;

-free_rmn:
+free_amn:
 	up_write(&mm->mmap_sem);
 	mutex_unlock(&adev->mn_lock);
-	kfree(rmn);
+	kfree(amn);

 	return ERR_PTR(r);
 }
@ -379,14 +421,14 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	enum amdgpu_mn_type type =
 		bo->kfd_bo ? AMDGPU_MN_TYPE_HSA : AMDGPU_MN_TYPE_GFX;
-	struct amdgpu_mn *rmn;
+	struct amdgpu_mn *amn;
 	struct amdgpu_mn_node *node = NULL, *new_node;
 	struct list_head bos;
 	struct interval_tree_node *it;

-	rmn = amdgpu_mn_get(adev, type);
-	if (IS_ERR(rmn))
-		return PTR_ERR(rmn);
+	amn = amdgpu_mn_get(adev, type);
+	if (IS_ERR(amn))
+		return PTR_ERR(amn);

 	new_node = kmalloc(sizeof(*new_node), GFP_KERNEL);
 	if (!new_node)
@ -394,12 +436,12 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)

 	INIT_LIST_HEAD(&bos);

-	down_write(&rmn->lock);
+	down_write(&amn->lock);

-	while ((it = interval_tree_iter_first(&rmn->objects, addr, end))) {
+	while ((it = interval_tree_iter_first(&amn->objects, addr, end))) {
 		kfree(node);
 		node = container_of(it, struct amdgpu_mn_node, it);
-		interval_tree_remove(&node->it, &rmn->objects);
+		interval_tree_remove(&node->it, &amn->objects);
 		addr = min(it->start, addr);
 		end = max(it->last, end);
 		list_splice(&node->bos, &bos);
@ -410,7 +452,7 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	else
 		kfree(new_node);

-	bo->mn = rmn;
+	bo->mn = amn;

 	node->it.start = addr;
 	node->it.last = end;
@ -418,9 +460,9 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 	list_splice(&bos, &node->bos);
 	list_add(&bo->mn_list, &node->bos);

-	interval_tree_insert(&node->it, &rmn->objects);
+	interval_tree_insert(&node->it, &amn->objects);

-	up_write(&rmn->lock);
+	up_write(&amn->lock);

 	return 0;
 }
@ -435,18 +477,18 @@ int amdgpu_mn_register(struct amdgpu_bo *bo, unsigned long addr)
 void amdgpu_mn_unregister(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
-	struct amdgpu_mn *rmn;
+	struct amdgpu_mn *amn;
 	struct list_head *head;

 	mutex_lock(&adev->mn_lock);

-	rmn = bo->mn;
-	if (rmn == NULL) {
+	amn = bo->mn;
+	if (amn == NULL) {
 		mutex_unlock(&adev->mn_lock);
 		return;
 	}

-	down_write(&rmn->lock);
+	down_write(&amn->lock);

 	/* save the next list entry for later */
 	head = bo->mn_list.next;
@ -456,12 +498,13 @@ void amdgpu_mn_unregister(struct amdgpu_bo *bo)

 	if (list_empty(head)) {
 		struct amdgpu_mn_node *node;
+
 		node = container_of(head, struct amdgpu_mn_node, bos);
-		interval_tree_remove(&node->it, &rmn->objects);
+		interval_tree_remove(&node->it, &amn->objects);
 		kfree(node);
 	}

-	up_write(&rmn->lock);
+	up_write(&amn->lock);
 	mutex_unlock(&adev->mn_lock);
 }

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@ -38,7 +38,20 @@
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"

-static bool amdgpu_need_backup(struct amdgpu_device *adev)
+/**
+ * DOC: amdgpu_object
+ *
+ * This defines the interfaces to operate on an &amdgpu_bo buffer object which
+ * represents memory used by driver (VRAM, system memory, etc.). The driver
+ * provides DRM/GEM APIs to userspace. DRM/GEM APIs then use these interfaces
+ * to create/destroy/set buffer object which are then managed by the kernel TTM
+ * memory manager.
+ * The interfaces are also used internally by kernel clients, including gfx,
+ * uvd, etc. for kernel managed allocations used by the GPU.
+ *
+ */
+
+static bool amdgpu_bo_need_backup(struct amdgpu_device *adev)
 {
 	if (adev->flags & AMD_IS_APU)
 		return false;
@ -50,11 +63,35 @@ static bool amdgpu_need_backup(struct amdgpu_device *adev)
 	return true;
 }

-static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
+/**
+ * amdgpu_bo_subtract_pin_size - Remove BO from pin_size accounting
+ *
+ * @bo: &amdgpu_bo buffer object
+ *
+ * This function is called when a BO stops being pinned, and updates the
+ * &amdgpu_device pin_size values accordingly.
+ */
+static void amdgpu_bo_subtract_pin_size(struct amdgpu_bo *bo)
+{
+	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
+
+	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
+		atomic64_sub(amdgpu_bo_size(bo), &adev->vram_pin_size);
+		atomic64_sub(amdgpu_vram_mgr_bo_visible_size(bo),
+			     &adev->visible_pin_size);
+	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
+		atomic64_sub(amdgpu_bo_size(bo), &adev->gart_pin_size);
+	}
+}
+
+static void amdgpu_bo_destroy(struct ttm_buffer_object *tbo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(tbo->bdev);
 	struct amdgpu_bo *bo = ttm_to_amdgpu_bo(tbo);

+	if (bo->pin_count > 0)
+		amdgpu_bo_subtract_pin_size(bo);
+
 	if (bo->kfd_bo)
 		amdgpu_amdkfd_unreserve_system_memory_limit(bo);

@ -73,14 +110,32 @@ static void amdgpu_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 	kfree(bo);
 }

-bool amdgpu_ttm_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
+/**
+ * amdgpu_bo_is_amdgpu_bo - check if the buffer object is an &amdgpu_bo
+ * @bo: buffer object to be checked
+ *
+ * Uses destroy function associated with the object to determine if this is
+ * an &amdgpu_bo.
+ *
+ * Returns:
+ * true if the object belongs to &amdgpu_bo, false if not.
+ */
+bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo)
 {
-	if (bo->destroy == &amdgpu_ttm_bo_destroy)
+	if (bo->destroy == &amdgpu_bo_destroy)
 		return true;
 	return false;
 }

-void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
+/**
+ * amdgpu_bo_placement_from_domain - set buffer's placement
+ * @abo: &amdgpu_bo buffer object whose placement is to be set
+ * @domain: requested domain
+ *
+ * Sets buffer's placement according to requested domain and the buffer's
+ * flags.
+ */
+void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(abo->tbo.bdev);
 	struct ttm_placement *placement = &abo->placement;
@ -161,6 +216,8 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 		c++;
 	}

+	BUG_ON(c >= AMDGPU_BO_MAX_PLACEMENTS);
+
 	placement->num_placement = c;
 	placement->placement = places;

@ -184,7 +241,8 @@ void amdgpu_ttm_placement_from_domain(struct amdgpu_bo *abo, u32 domain)
 *
 * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
 *
- * Returns 0 on success, negative error code otherwise.
+ * Returns:
+ * 0 on success, negative error code otherwise.
 */
 int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 			      unsigned long size, int align,
@ -220,22 +278,33 @@ int amdgpu_bo_create_reserved(struct amdgpu_device *adev,
 		goto error_free;
 	}

-	r = amdgpu_bo_pin(*bo_ptr, domain, gpu_addr);
+	r = amdgpu_bo_pin(*bo_ptr, domain);
 	if (r) {
 		dev_err(adev->dev, "(%d) kernel bo pin failed\n", r);
 		goto error_unreserve;
 	}

+	r = amdgpu_ttm_alloc_gart(&(*bo_ptr)->tbo);
+	if (r) {
+		dev_err(adev->dev, "%p bind failed\n", *bo_ptr);
+		goto error_unpin;
+	}
+
+	if (gpu_addr)
+		*gpu_addr = amdgpu_bo_gpu_offset(*bo_ptr);
+
 	if (cpu_addr) {
 		r = amdgpu_bo_kmap(*bo_ptr, cpu_addr);
 		if (r) {
 			dev_err(adev->dev, "(%d) kernel bo map failed\n", r);
-			goto error_unreserve;
+			goto error_unpin;
 		}
 	}

 	return 0;

+error_unpin:
+	amdgpu_bo_unpin(*bo_ptr);
 error_unreserve:
 	amdgpu_bo_unreserve(*bo_ptr);

@ -261,7 +330,8 @@ error_free:
 *
 * Note: For bo_ptr new BO is only created if bo_ptr points to NULL.
 *
- * Returns 0 on success, negative error code otherwise.
+ * Returns:
+ * 0 on success, negative error code otherwise.
 */
 int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 			    unsigned long size, int align,
@ -285,6 +355,8 @@ int amdgpu_bo_create_kernel(struct amdgpu_device *adev,
 * amdgpu_bo_free_kernel - free BO for kernel use
 *
 * @bo: amdgpu BO to free
+ * @gpu_addr: pointer to where the BO's GPU memory space address was stored
+ * @cpu_addr: pointer to where the BO's CPU memory space address was stored
 *
 * unmaps and unpin a BO for kernel internal use.
 */
@ -418,17 +490,17 @@ static int amdgpu_bo_do_create(struct amdgpu_device *adev,
 #endif

 	bo->tbo.bdev = &adev->mman.bdev;
-	amdgpu_ttm_placement_from_domain(bo, bp->domain);
+	amdgpu_bo_placement_from_domain(bo, bp->domain);
 	if (bp->type == ttm_bo_type_kernel)
 		bo->tbo.priority = 1;

 	r = ttm_bo_init_reserved(&adev->mman.bdev, &bo->tbo, size, bp->type,
 				 &bo->placement, page_align, &ctx, acc_size,
-				 NULL, bp->resv, &amdgpu_ttm_bo_destroy);
+				 NULL, bp->resv, &amdgpu_bo_destroy);
 	if (unlikely(r != 0))
 		return r;

-	if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+	if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 	    bo->tbo.mem.mem_type == TTM_PL_VRAM &&
 	    bo->tbo.mem.start < adev->gmc.visible_vram_size >> PAGE_SHIFT)
 		amdgpu_cs_report_moved_bytes(adev, ctx.bytes_moved,
@ -498,6 +570,20 @@ static int amdgpu_bo_create_shadow(struct amdgpu_device *adev,
 	return r;
 }

+/**
+ * amdgpu_bo_create - create an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @bp: parameters to be used for the buffer object
+ * @bo_ptr: pointer to the buffer object pointer
+ *
+ * Creates an &amdgpu_bo buffer object; and if requested, also creates a
+ * shadow object.
+ * Shadow object is used to backup the original buffer object, and is always
+ * in GTT.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_create(struct amdgpu_device *adev,
 		     struct amdgpu_bo_param *bp,
 		     struct amdgpu_bo **bo_ptr)
@ -510,7 +596,7 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 	if (r)
 		return r;

-	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_need_backup(adev)) {
+	if ((flags & AMDGPU_GEM_CREATE_SHADOW) && amdgpu_bo_need_backup(adev)) {
 		if (!bp->resv)
 			WARN_ON(reservation_object_lock((*bo_ptr)->tbo.resv,
 							NULL));
@ -527,6 +613,21 @@ int amdgpu_bo_create(struct amdgpu_device *adev,
 	return r;
 }

+/**
+ * amdgpu_bo_backup_to_shadow - Backs up an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @ring: amdgpu_ring for the engine handling the buffer operations
+ * @bo: &amdgpu_bo buffer to be backed up
+ * @resv: reservation object with embedded fence
+ * @fence: dma_fence associated with the operation
+ * @direct: whether to submit the job directly
+ *
+ * Copies an &amdgpu_bo buffer object to its shadow object.
+ * Not used for now.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_backup_to_shadow(struct amdgpu_device *adev,
 			       struct amdgpu_ring *ring,
 			       struct amdgpu_bo *bo,
@ -559,6 +660,18 @@ err:
 	return r;
 }

+/**
+ * amdgpu_bo_validate - validate an &amdgpu_bo buffer object
+ * @bo: pointer to the buffer object
+ *
+ * Sets placement according to domain; and changes placement and caching
+ * policy of the buffer object according to the placement.
+ * This is used for validating shadow bos.  It calls ttm_bo_validate() to
+ * make sure the buffer is resident where it needs to be.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_validate(struct amdgpu_bo *bo)
 {
 	struct ttm_operation_ctx ctx = { false, false };
@ -571,7 +684,7 @@ int amdgpu_bo_validate(struct amdgpu_bo *bo)
 	domain = bo->preferred_domains;

 retry:
-	amdgpu_ttm_placement_from_domain(bo, domain);
+	amdgpu_bo_placement_from_domain(bo, domain);
 	r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	if (unlikely(r == -ENOMEM) && domain != bo->allowed_domains) {
 		domain = bo->allowed_domains;
@ -581,6 +694,22 @@ retry:
 	return r;
 }

+/**
+ * amdgpu_bo_restore_from_shadow - restore an &amdgpu_bo buffer object
+ * @adev: amdgpu device object
+ * @ring: amdgpu_ring for the engine handling the buffer operations
+ * @bo: &amdgpu_bo buffer to be restored
+ * @resv: reservation object with embedded fence
+ * @fence: dma_fence associated with the operation
+ * @direct: whether to submit the job directly
+ *
+ * Copies a buffer object's shadow content back to the object.
+ * This is used for recovering a buffer from its shadow in case of a gpu
+ * reset where vram context may be lost.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_restore_from_shadow(struct amdgpu_device *adev,
 				  struct amdgpu_ring *ring,
 				  struct amdgpu_bo *bo,
@ -613,6 +742,17 @@ err:
 	return r;
 }

+/**
+ * amdgpu_bo_kmap - map an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be mapped
+ * @ptr: kernel virtual address to be returned
+ *
+ * Calls ttm_bo_kmap() to set up the kernel virtual mapping; calls
+ * amdgpu_bo_kptr() to get the kernel virtual address.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 {
 	void *kptr;
@ -643,6 +783,15 @@ int amdgpu_bo_kmap(struct amdgpu_bo *bo, void **ptr)
 	return 0;
 }

+/**
+ * amdgpu_bo_kptr - returns a kernel virtual address of the buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Calls ttm_kmap_obj_virtual() to get the kernel virtual address
+ *
+ * Returns:
+ * the virtual address of a buffer object area.
+ */
 void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
 {
 	bool is_iomem;
@ -650,21 +799,42 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo)
 	return ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
 }

+/**
+ * amdgpu_bo_kunmap - unmap an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unmapped
+ *
+ * Unmaps a kernel map set up by amdgpu_bo_kmap().
+ */
 void amdgpu_bo_kunmap(struct amdgpu_bo *bo)
 {
 	if (bo->kmap.bo)
 		ttm_bo_kunmap(&bo->kmap);
 }

+/**
+ * amdgpu_bo_ref - reference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * References the contained &ttm_buffer_object.
+ *
+ * Returns:
+ * a refcounted pointer to the &amdgpu_bo buffer object.
+ */
 struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo)
 {
 	if (bo == NULL)
 		return NULL;

-	ttm_bo_reference(&bo->tbo);
+	ttm_bo_get(&bo->tbo);
 	return bo;
 }

+/**
+ * amdgpu_bo_unref - unreference an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object
+ *
+ * Unreferences the contained &ttm_buffer_object and clear the pointer
+ */
 void amdgpu_bo_unref(struct amdgpu_bo **bo)
 {
 	struct ttm_buffer_object *tbo;
@ -673,14 +843,34 @@ void amdgpu_bo_unref(struct amdgpu_bo **bo)
 		return;

 	tbo = &((*bo)->tbo);
-	ttm_bo_unref(&tbo);
-	if (tbo == NULL)
-		*bo = NULL;
+	ttm_bo_put(tbo);
+	*bo = NULL;
 }

+/**
+ * amdgpu_bo_pin_restricted - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ * @min_offset: the start of requested address range
+ * @max_offset: the end of requested address range
+ *
+ * Pins the buffer object according to requested domain and address range. If
+ * the memory is unbound gart memory, binds the pages into gart table. Adjusts
+ * pin_count and pin_size accordingly.
+ *
+ * Pinning means to lock pages in memory along with keeping them at a fixed
+ * offset. It is required when a buffer can not be moved, for example, when
+ * a display buffer is being scanned out.
+ *
+ * Compared with amdgpu_bo_pin(), this function gives more flexibility on
+ * where to pin a buffer if there are specific restrictions on where a buffer
+ * must be located.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
-			     u64 min_offset, u64 max_offset,
-			     u64 *gpu_addr)
+			     u64 min_offset, u64 max_offset)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	struct ttm_operation_ctx ctx = { false, false };
@ -712,8 +902,6 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 			return -EINVAL;

 		bo->pin_count++;
-		if (gpu_addr)
-			*gpu_addr = amdgpu_bo_gpu_offset(bo);

 		if (max_offset != 0) {
 			u64 domain_start = bo->tbo.bdev->man[mem_type].gpu_offset;
@ -728,7 +916,7 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 	/* force to pin into visible video ram */
 	if (!(bo->flags & AMDGPU_GEM_CREATE_NO_CPU_ACCESS))
 		bo->flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-	amdgpu_ttm_placement_from_domain(bo, domain);
+	amdgpu_bo_placement_from_domain(bo, domain);
 	for (i = 0; i < bo->placement.num_placement; i++) {
 		unsigned fpfn, lpfn;

@ -749,33 +937,48 @@ int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
 		goto error;
 	}

-	r = amdgpu_ttm_alloc_gart(&bo->tbo);
-	if (unlikely(r)) {
-		dev_err(adev->dev, "%p bind failed\n", bo);
-		goto error;
-	}
-
 	bo->pin_count = 1;
-	if (gpu_addr != NULL)
-		*gpu_addr = amdgpu_bo_gpu_offset(bo);

 	domain = amdgpu_mem_type_to_domain(bo->tbo.mem.mem_type);
 	if (domain == AMDGPU_GEM_DOMAIN_VRAM) {
-		adev->vram_pin_size += amdgpu_bo_size(bo);
-		adev->invisible_pin_size += amdgpu_vram_mgr_bo_invisible_size(bo);
+		atomic64_add(amdgpu_bo_size(bo), &adev->vram_pin_size);
+		atomic64_add(amdgpu_vram_mgr_bo_visible_size(bo),
+			     &adev->visible_pin_size);
 	} else if (domain == AMDGPU_GEM_DOMAIN_GTT) {
-		adev->gart_pin_size += amdgpu_bo_size(bo);
+		atomic64_add(amdgpu_bo_size(bo), &adev->gart_pin_size);
 	}

 error:
 	return r;
 }

-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr)
+/**
+ * amdgpu_bo_pin - pin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be pinned
+ * @domain: domain to be pinned to
+ *
+ * A simple wrapper to amdgpu_bo_pin_restricted().
+ * Provides a simpler API for buffers that do not have any strict restrictions
+ * on where a buffer must be located.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain)
 {
-	return amdgpu_bo_pin_restricted(bo, domain, 0, 0, gpu_addr);
+	return amdgpu_bo_pin_restricted(bo, domain, 0, 0);
 }

+/**
+ * amdgpu_bo_unpin - unpin an &amdgpu_bo buffer object
+ * @bo: &amdgpu_bo buffer object to be unpinned
+ *
+ * Decreases the pin_count, and clears the flags if pin_count reaches 0.
+ * Changes placement and pin size accordingly.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@ -790,12 +993,7 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 	if (bo->pin_count)
 		return 0;

-	if (bo->tbo.mem.mem_type == TTM_PL_VRAM) {
-		adev->vram_pin_size -= amdgpu_bo_size(bo);
-		adev->invisible_pin_size -= amdgpu_vram_mgr_bo_invisible_size(bo);
-	} else if (bo->tbo.mem.mem_type == TTM_PL_TT) {
-		adev->gart_pin_size -= amdgpu_bo_size(bo);
-	}
+	amdgpu_bo_subtract_pin_size(bo);

 	for (i = 0; i < bo->placement.num_placement; i++) {
 		bo->placements[i].lpfn = 0;
@ -808,6 +1006,16 @@ int amdgpu_bo_unpin(struct amdgpu_bo *bo)
 	return r;
 }

+/**
+ * amdgpu_bo_evict_vram - evict VRAM buffers
+ * @adev: amdgpu device object
+ *
+ * Evicts all VRAM buffers on the lru list of the memory type.
+ * Mainly used for evicting vram at suspend time.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
 {
 	/* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
@ -830,6 +1038,15 @@ static const char *amdgpu_vram_names[] = {
 	"DDR4",
 };

+/**
+ * amdgpu_bo_init - initialize memory manager
+ * @adev: amdgpu device object
+ *
+ * Calls amdgpu_ttm_init() to initialize amdgpu memory manager.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_init(struct amdgpu_device *adev)
 {
 	/* reserve PAT memory space to WC for VRAM */
@ -847,6 +1064,16 @@ int amdgpu_bo_init(struct amdgpu_device *adev)
 	return amdgpu_ttm_init(adev);
 }

+/**
+ * amdgpu_bo_late_init - late init
+ * @adev: amdgpu device object
+ *
+ * Calls amdgpu_ttm_late_init() to free resources used earlier during
+ * initialization.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_late_init(struct amdgpu_device *adev)
 {
 	amdgpu_ttm_late_init(adev);
@ -854,6 +1081,12 @@ int amdgpu_bo_late_init(struct amdgpu_device *adev)
 	return 0;
 }

+/**
+ * amdgpu_bo_fini - tear down memory manager
+ * @adev: amdgpu device object
+ *
+ * Reverses amdgpu_bo_init() to tear down memory manager.
+ */
 void amdgpu_bo_fini(struct amdgpu_device *adev)
 {
 	amdgpu_ttm_fini(adev);
@ -861,12 +1094,33 @@ void amdgpu_bo_fini(struct amdgpu_device *adev)
 	arch_io_free_memtype_wc(adev->gmc.aper_base, adev->gmc.aper_size);
 }

+/**
+ * amdgpu_bo_fbdev_mmap - mmap fbdev memory
+ * @bo: &amdgpu_bo buffer object
+ * @vma: vma as input from the fbdev mmap method
+ *
+ * Calls ttm_fbdev_mmap() to mmap fbdev memory if it is backed by a bo.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_fbdev_mmap(struct amdgpu_bo *bo,
 			     struct vm_area_struct *vma)
 {
 	return ttm_fbdev_mmap(vma, &bo->tbo);
 }

+/**
+ * amdgpu_bo_set_tiling_flags - set tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: new flags
+ *
+ * Sets buffer object's tiling flags with the new one. Used by GEM ioctl or
+ * kernel driver to set the tiling flags on a buffer.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
@ -879,6 +1133,14 @@ int amdgpu_bo_set_tiling_flags(struct amdgpu_bo *bo, u64 tiling_flags)
 	return 0;
 }

+/**
+ * amdgpu_bo_get_tiling_flags - get tiling flags
+ * @bo: &amdgpu_bo buffer object
+ * @tiling_flags: returned flags
+ *
+ * Gets buffer object's tiling flags. Used by GEM ioctl or kernel driver to
+ * set the tiling flags on a buffer.
+ */
 void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
 {
 	lockdep_assert_held(&bo->tbo.resv->lock.base);
@ -887,6 +1149,19 @@ void amdgpu_bo_get_tiling_flags(struct amdgpu_bo *bo, u64 *tiling_flags)
 		*tiling_flags = bo->tiling_flags;
 }

+/**
+ * amdgpu_bo_set_metadata - set metadata
+ * @bo: &amdgpu_bo buffer object
+ * @metadata: new metadata
+ * @metadata_size: size of the new metadata
+ * @flags: flags of the new metadata
+ *
+ * Sets buffer object's metadata, its size and flags.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 			    uint32_t metadata_size, uint64_t flags)
 {
@ -916,6 +1191,21 @@ int amdgpu_bo_set_metadata (struct amdgpu_bo *bo, void *metadata,
 	return 0;
 }

+/**
+ * amdgpu_bo_get_metadata - get metadata
+ * @bo: &amdgpu_bo buffer object
+ * @buffer: returned metadata
+ * @buffer_size: size of the buffer
+ * @metadata_size: size of the returned metadata
+ * @flags: flags of the returned metadata
+ *
+ * Gets buffer object's metadata, its size and flags. buffer_size shall not be
+ * less than metadata_size.
+ * Used via GEM ioctl.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 			   size_t buffer_size, uint32_t *metadata_size,
 			   uint64_t *flags)
@ -939,6 +1229,16 @@ int amdgpu_bo_get_metadata(struct amdgpu_bo *bo, void *buffer,
 	return 0;
 }

+/**
+ * amdgpu_bo_move_notify - notification about a memory move
+ * @bo: pointer to a buffer object
+ * @evict: if this move is evicting the buffer from the graphics address space
+ * @new_mem: new information of the bufer object
+ *
+ * Marks the corresponding &amdgpu_bo buffer object as invalid, also performs
+ * bookkeeping.
+ * TTM driver callback which is called when ttm moves a buffer.
+ */
 void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 			   bool evict,
 			   struct ttm_mem_reg *new_mem)
@ -947,7 +1247,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 	struct amdgpu_bo *abo;
 	struct ttm_mem_reg *old_mem = &bo->mem;

-	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
+	if (!amdgpu_bo_is_amdgpu_bo(bo))
 		return;

 	abo = ttm_to_amdgpu_bo(bo);
@ -964,9 +1264,20 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 		return;

 	/* move_notify is called before move happens */
-	trace_amdgpu_ttm_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
+	trace_amdgpu_bo_move(abo, new_mem->mem_type, old_mem->mem_type);
 }

+/**
+ * amdgpu_bo_fault_reserve_notify - notification about a memory fault
+ * @bo: pointer to a buffer object
+ *
+ * Notifies the driver we are taking a fault on this BO and have reserved it,
+ * also performs bookkeeping.
+ * TTM driver callback for dealing with vm faults.
+ *
+ * Returns:
+ * 0 for success or a negative error code on failure.
+ */
 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 {
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->bdev);
@ -975,7 +1286,7 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
 	unsigned long offset, size;
 	int r;

-	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
+	if (!amdgpu_bo_is_amdgpu_bo(bo))
 		return 0;

 	abo = ttm_to_amdgpu_bo(bo);
@ -997,8 +1308,8 @@ int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)

 	/* hurrah the memory is not visible ! */
 	atomic64_inc(&adev->num_vram_cpu_page_faults);
-	amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
-					 AMDGPU_GEM_DOMAIN_GTT);
+	amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+					AMDGPU_GEM_DOMAIN_GTT);

 	/* Avoid costly evictions; only set GTT as a busy placement */
 	abo->placement.num_busy_placement = 1;
@ -1040,10 +1351,11 @@ void amdgpu_bo_fence(struct amdgpu_bo *bo, struct dma_fence *fence,
 * amdgpu_bo_gpu_offset - return GPU offset of bo
 * @bo:	amdgpu object for which we query the offset
 *
- * Returns current GPU offset of the object.
- *
 * Note: object should either be pinned or reserved when calling this
 * function, it might be useful to add check for this for debugging.
+ *
+ * Returns:
+ * current GPU offset of the object.
 */
 u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 {
@ -1059,6 +1371,14 @@ u64 amdgpu_bo_gpu_offset(struct amdgpu_bo *bo)
 	return bo->tbo.offset;
 }

+/**
+ * amdgpu_bo_get_preferred_pin_domain - get preferred domain for scanout
+ * @adev: amdgpu device object
+ * @domain: allowed :ref:`memory domains <amdgpu_memory_domains>`
+ *
+ * Returns:
+ * Which of the allowed domains is preferred for pinning the BO for scanout.
+ */
 uint32_t amdgpu_bo_get_preferred_pin_domain(struct amdgpu_device *adev,
 					    uint32_t domain)
 {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.h
@ -32,6 +32,7 @@
 #include "amdgpu.h"

 #define AMDGPU_BO_INVALID_OFFSET	LONG_MAX
+#define AMDGPU_BO_MAX_PLACEMENTS	3

 struct amdgpu_bo_param {
 	unsigned long			size;
@ -77,7 +78,7 @@ struct amdgpu_bo {
 	/* Protected by tbo.reserved */
 	u32				preferred_domains;
 	u32				allowed_domains;
-	struct ttm_place		placements[AMDGPU_GEM_DOMAIN_MAX + 1];
+	struct ttm_place		placements[AMDGPU_BO_MAX_PLACEMENTS];
 	struct ttm_placement		placement;
 	struct ttm_buffer_object	tbo;
 	struct ttm_bo_kmap_obj		kmap;
@ -234,6 +235,9 @@ static inline bool amdgpu_bo_explicit_sync(struct amdgpu_bo *bo)
 	return bo->flags & AMDGPU_GEM_CREATE_EXPLICIT_SYNC;
 }

+bool amdgpu_bo_is_amdgpu_bo(struct ttm_buffer_object *bo);
+void amdgpu_bo_placement_from_domain(struct amdgpu_bo *abo, u32 domain);
+
 int amdgpu_bo_create(struct amdgpu_device *adev,
 		     struct amdgpu_bo_param *bp,
 		     struct amdgpu_bo **bo_ptr);
@ -252,10 +256,9 @@ void *amdgpu_bo_kptr(struct amdgpu_bo *bo);
 void amdgpu_bo_kunmap(struct amdgpu_bo *bo);
 struct amdgpu_bo *amdgpu_bo_ref(struct amdgpu_bo *bo);
 void amdgpu_bo_unref(struct amdgpu_bo **bo);
-int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain, u64 *gpu_addr);
+int amdgpu_bo_pin(struct amdgpu_bo *bo, u32 domain);
 int amdgpu_bo_pin_restricted(struct amdgpu_bo *bo, u32 domain,
-			     u64 min_offset, u64 max_offset,
-			     u64 *gpu_addr);
+			     u64 min_offset, u64 max_offset);
 int amdgpu_bo_unpin(struct amdgpu_bo *bo);
 int amdgpu_bo_evict_vram(struct amdgpu_device *adev);
 int amdgpu_bo_init(struct amdgpu_device *adev);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@ -31,7 +31,7 @@
 #include <linux/power_supply.h>
 #include <linux/hwmon.h>
 #include <linux/hwmon-sysfs.h>
-
+#include <linux/nospec.h>

 static int amdgpu_debugfs_pm_init(struct amdgpu_device *adev);

@ -68,11 +68,11 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
 	if (adev->pm.dpm_enabled) {
 		mutex_lock(&adev->pm.mutex);
 		if (power_supply_is_system_supplied() > 0)
-			adev->pm.dpm.ac_power = true;
+			adev->pm.ac_power = true;
 		else
-			adev->pm.dpm.ac_power = false;
+			adev->pm.ac_power = false;
 		if (adev->powerplay.pp_funcs->enable_bapm)
-			amdgpu_dpm_enable_bapm(adev, adev->pm.dpm.ac_power);
+			amdgpu_dpm_enable_bapm(adev, adev->pm.ac_power);
 		mutex_unlock(&adev->pm.mutex);
 	}
 }
@ -80,12 +80,15 @@ void amdgpu_pm_acpi_event_handler(struct amdgpu_device *adev)
 /**
 * DOC: power_dpm_state
 *
- * This is a legacy interface and is only provided for backwards compatibility.
- * The amdgpu driver provides a sysfs API for adjusting certain power
- * related parameters.  The file power_dpm_state is used for this.
+ * The power_dpm_state file is a legacy interface and is only provided for
+ * backwards compatibility. The amdgpu driver provides a sysfs API for adjusting
+ * certain power related parameters.  The file power_dpm_state is used for this.
 * It accepts the following arguments:
+ *
 * - battery
+ *
 * - balanced
+ *
 * - performance
 *
 * battery
@ -169,14 +172,21 @@ fail:
 * The amdgpu driver provides a sysfs API for adjusting certain power
 * related parameters.  The file power_dpm_force_performance_level is
 * used for this.  It accepts the following arguments:
+ *
 * - auto
+ *
 * - low
+ *
 * - high
+ *
 * - manual
- * - GPU fan
+ *
 * - profile_standard
+ *
 * - profile_min_sclk
+ *
 * - profile_min_mclk
+ *
 * - profile_peak
 *
 * auto
@ -393,6 +403,7 @@ static ssize_t amdgpu_set_pp_force_state(struct device *dev,
 			count = -EINVAL;
 			goto fail;
 		}
+		idx = array_index_nospec(idx, ARRAY_SIZE(data.states));

 		amdgpu_dpm_get_pp_num_states(adev, &data);
 		state = data.states[idx];
@ -463,8 +474,11 @@ static ssize_t amdgpu_set_pp_table(struct device *dev,
 * this.
 *
 * Reading the file will display:
+ *
 * - a list of engine clock levels and voltages labeled OD_SCLK
+ *
 * - a list of memory clock levels and voltages labeled OD_MCLK
+ *
 * - a list of valid ranges for sclk, mclk, and voltage labeled OD_RANGE
 *
 * To manually adjust these settings, first select manual using
@ -593,6 +607,42 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,
 		return snprintf(buf, PAGE_SIZE, "\n");
 }

+/*
+ * Worst case: 32 bits individually specified, in octal at 12 characters
+ * per line (+1 for \n).
+ */
+#define AMDGPU_MASK_BUF_MAX	(32 * 13)
+
+static ssize_t amdgpu_read_mask(const char *buf, size_t count, uint32_t *mask)
+{
+	int ret;
+	long level;
+	char *sub_str = NULL;
+	char *tmp;
+	char buf_cpy[AMDGPU_MASK_BUF_MAX + 1];
+	const char delimiter[3] = {' ', '\n', '\0'};
+	size_t bytes;
+
+	*mask = 0;
+
+	bytes = min(count, sizeof(buf_cpy) - 1);
+	memcpy(buf_cpy, buf, bytes);
+	buf_cpy[bytes] = '\0';
+	tmp = buf_cpy;
+	while (tmp[0]) {
+		sub_str = strsep(&tmp, delimiter);
+		if (strlen(sub_str)) {
+			ret = kstrtol(sub_str, 0, &level);
+			if (ret)
+				return -EINVAL;
+			*mask |= 1 << level;
+		} else
+			break;
+	}
+
+	return 0;
+}
+
 static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 		struct device_attribute *attr,
 		const char *buf,
@ -601,32 +651,15 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
-	long level;
 	uint32_t mask = 0;
-	char *sub_str = NULL;
-	char *tmp;
-	char buf_cpy[count];
-	const char delimiter[3] = {' ', '\n', '\0'};

-	memcpy(buf_cpy, buf, count+1);
-	tmp = buf_cpy;
-	while (tmp[0]) {
-		sub_str =  strsep(&tmp, delimiter);
-		if (strlen(sub_str)) {
-			ret = kstrtol(sub_str, 0, &level);
+	ret = amdgpu_read_mask(buf, count, &mask);
+	if (ret)
+		return ret;

-			if (ret) {
-				count = -EINVAL;
-				goto fail;
-			}
-			mask |= 1 << level;
-		} else
-			break;
-	}
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);

-fail:
 	return count;
 }

@ -651,32 +684,15 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
-	long level;
 	uint32_t mask = 0;
-	char *sub_str = NULL;
-	char *tmp;
-	char buf_cpy[count];
-	const char delimiter[3] = {' ', '\n', '\0'};

-	memcpy(buf_cpy, buf, count+1);
-	tmp = buf_cpy;
-	while (tmp[0]) {
-		sub_str =  strsep(&tmp, delimiter);
-		if (strlen(sub_str)) {
-			ret = kstrtol(sub_str, 0, &level);
+	ret = amdgpu_read_mask(buf, count, &mask);
+	if (ret)
+		return ret;

-			if (ret) {
-				count = -EINVAL;
-				goto fail;
-			}
-			mask |= 1 << level;
-		} else
-			break;
-	}
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);

-fail:
 	return count;
 }

@ -701,33 +717,15 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	struct drm_device *ddev = dev_get_drvdata(dev);
 	struct amdgpu_device *adev = ddev->dev_private;
 	int ret;
-	long level;
 	uint32_t mask = 0;
-	char *sub_str = NULL;
-	char *tmp;
-	char buf_cpy[count];
-	const char delimiter[3] = {' ', '\n', '\0'};

-	memcpy(buf_cpy, buf, count+1);
-	tmp = buf_cpy;
+	ret = amdgpu_read_mask(buf, count, &mask);
+	if (ret)
+		return ret;

-	while (tmp[0]) {
-		sub_str =  strsep(&tmp, delimiter);
-		if (strlen(sub_str)) {
-			ret = kstrtol(sub_str, 0, &level);
-
-			if (ret) {
-				count = -EINVAL;
-				goto fail;
-			}
-			mask |= 1 << level;
-		} else
-			break;
-	}
 	if (adev->powerplay.pp_funcs->force_clock_level)
 		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);

-fail:
 	return count;
 }

@ -905,6 +903,36 @@ fail:
 	return -EINVAL;
 }

+/**
+ * DOC: busy_percent
+ *
+ * The amdgpu driver provides a sysfs API for reading how busy the GPU
+ * is as a percentage.  The file gpu_busy_percent is used for this.
+ * The SMU firmware computes a percentage of load based on the
+ * aggregate activity level in the IP cores.
+ */
+static ssize_t amdgpu_get_busy_percent(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int r, value, size = sizeof(value);
+
+	/* sanity check PP is enabled */
+	if (!(adev->powerplay.pp_funcs &&
+	      adev->powerplay.pp_funcs->read_sensor))
+		return -EINVAL;
+
+	/* read the IP busy sensor */
+	r = amdgpu_dpm_read_sensor(adev, AMDGPU_PP_SENSOR_GPU_LOAD,
+				   (void *)&value, &size);
+	if (r)
+		return r;
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
 static DEVICE_ATTR(power_dpm_state, S_IRUGO | S_IWUSR, amdgpu_get_dpm_state, amdgpu_set_dpm_state);
 static DEVICE_ATTR(power_dpm_force_performance_level, S_IRUGO | S_IWUSR,
 		   amdgpu_get_dpm_forced_performance_level,
@ -938,6 +966,8 @@ static DEVICE_ATTR(pp_power_profile_mode, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_od_clk_voltage, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_od_clk_voltage,
 		amdgpu_set_pp_od_clk_voltage);
+static DEVICE_ATTR(gpu_busy_percent, S_IRUGO,
+		amdgpu_get_busy_percent, NULL);

 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@ -1156,7 +1186,7 @@ static ssize_t amdgpu_hwmon_show_vddnb(struct device *dev,
 	int r, size = sizeof(vddnb);

 	/* only APUs have vddnb */
-	if  (adev->flags & AMD_IS_APU)
+	if  (!(adev->flags & AMD_IS_APU))
 		return -EINVAL;

 	/* Can't get voltage when the card is off */
@ -1285,35 +1315,51 @@ static ssize_t amdgpu_hwmon_set_power_cap(struct device *dev,
 * DOC: hwmon
 *
 * The amdgpu driver exposes the following sensor interfaces:
+ *
 * - GPU temperature (via the on-die sensor)
+ *
 * - GPU voltage
+ *
 * - Northbridge voltage (APUs only)
+ *
 * - GPU power
+ *
 * - GPU fan
 *
 * hwmon interfaces for GPU temperature:
+ *
 * - temp1_input: the on die GPU temperature in millidegrees Celsius
+ *
 * - temp1_crit: temperature critical max value in millidegrees Celsius
+ *
 * - temp1_crit_hyst: temperature hysteresis for critical limit in millidegrees Celsius
 *
 * hwmon interfaces for GPU voltage:
+ *
 * - in0_input: the voltage on the GPU in millivolts
+ *
 * - in1_input: the voltage on the Northbridge in millivolts
 *
 * hwmon interfaces for GPU power:
+ *
 * - power1_average: average power used by the GPU in microWatts
+ *
 * - power1_cap_min: minimum cap supported in microWatts
+ *
 * - power1_cap_max: maximum cap supported in microWatts
+ *
 * - power1_cap: selected power cap in microWatts
 *
 * hwmon interfaces for GPU fan:
+ *
 * - pwm1: pulse width modulation fan level (0-255)
- * - pwm1_enable: pulse width modulation fan control method
- *                0: no fan speed control
- *                1: manual fan speed control using pwm interface
- *                2: automatic fan speed control
+ *
+ * - pwm1_enable: pulse width modulation fan control method (0: no fan speed control, 1: manual fan speed control using pwm interface, 2: automatic fan speed control)
+ *
 * - pwm1_min: pulse width modulation fan control minimum level (0)
+ *
 * - pwm1_max: pulse width modulation fan control maximum level (255)
+ *
 * - fan1_input: fan speed in RPM
 *
 * You can use hwmon tools like sensors to view this information on your system.
@ -1668,10 +1714,10 @@ static void amdgpu_dpm_change_power_state_locked(struct amdgpu_device *adev)

 void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)
 {
-	if (adev->powerplay.pp_funcs->powergate_uvd) {
+	if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
 		/* enable/disable UVD */
 		mutex_lock(&adev->pm.mutex);
-		amdgpu_dpm_powergate_uvd(adev, !enable);
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_UVD, !enable);
 		mutex_unlock(&adev->pm.mutex);
 	} else {
 		if (enable) {
@ -1690,10 +1736,10 @@ void amdgpu_dpm_enable_uvd(struct amdgpu_device *adev, bool enable)

 void amdgpu_dpm_enable_vce(struct amdgpu_device *adev, bool enable)
 {
-	if (adev->powerplay.pp_funcs->powergate_vce) {
+	if (adev->powerplay.pp_funcs->set_powergating_by_smu) {
 		/* enable/disable VCE */
 		mutex_lock(&adev->pm.mutex);
-		amdgpu_dpm_powergate_vce(adev, !enable);
+		amdgpu_dpm_set_powergating_by_smu(adev, AMD_IP_BLOCK_TYPE_VCE, !enable);
 		mutex_unlock(&adev->pm.mutex);
 	} else {
 		if (enable) {
@ -1825,6 +1871,13 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 				"pp_od_clk_voltage\n");
 		return ret;
 	}
+	ret = device_create_file(adev->dev,
+			&dev_attr_gpu_busy_percent);
+	if (ret) {
+		DRM_ERROR("failed to create device file	"
+				"gpu_busy_level\n");
+		return ret;
+	}
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@ -1860,6 +1913,7 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 			&dev_attr_pp_power_profile_mode);
 	device_remove_file(adev->dev,
 			&dev_attr_pp_od_clk_voltage);
+	device_remove_file(adev->dev, &dev_attr_gpu_busy_percent);
 }

 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
@ -1878,6 +1932,14 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 			amdgpu_fence_wait_empty(ring);
 	}

+	mutex_lock(&adev->pm.mutex);
+	/* update battery/ac status */
+	if (power_supply_is_system_supplied() > 0)
+		adev->pm.ac_power = true;
+	else
+		adev->pm.ac_power = false;
+	mutex_unlock(&adev->pm.mutex);
+
 	if (adev->powerplay.pp_funcs->dispatch_tasks) {
 		if (!amdgpu_device_has_dc_support(adev)) {
 			mutex_lock(&adev->pm.mutex);
@ -1898,14 +1960,7 @@ void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
 	} else {
 		mutex_lock(&adev->pm.mutex);
 		amdgpu_dpm_get_active_displays(adev);
-		/* update battery/ac status */
-		if (power_supply_is_system_supplied() > 0)
-			adev->pm.dpm.ac_power = true;
-		else
-			adev->pm.dpm.ac_power = false;
-
 		amdgpu_dpm_change_power_state_locked(adev);
-
 		mutex_unlock(&adev->pm.mutex);
 	}
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_prime.c
@ -23,6 +23,14 @@
 *
 * Authors: Alex Deucher
 */
+
+/**
+ * DOC: PRIME Buffer Sharing
+ *
+ * The following callback implementations are used for :ref:`sharing GEM buffer
+ * objects between different devices via PRIME <prime_buffer_sharing>`.
+ */
+
 #include <drm/drmP.h>

 #include "amdgpu.h"
@ -32,6 +40,14 @@

 static const struct dma_buf_ops amdgpu_dmabuf_ops;

+/**
+ * amdgpu_gem_prime_get_sg_table - &drm_driver.gem_prime_get_sg_table
+ * implementation
+ * @obj: GEM buffer object
+ *
+ * Returns:
+ * A scatter/gather table for the pinned pages of the buffer object's memory.
+ */
 struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@ -40,6 +56,15 @@ struct sg_table *amdgpu_gem_prime_get_sg_table(struct drm_gem_object *obj)
 	return drm_prime_pages_to_sg(bo->tbo.ttm->pages, npages);
 }

+/**
+ * amdgpu_gem_prime_vmap - &dma_buf_ops.vmap implementation
+ * @obj: GEM buffer object
+ *
+ * Sets up an in-kernel virtual mapping of the buffer object's memory.
+ *
+ * Returns:
+ * The virtual address of the mapping or an error pointer.
+ */
 void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@ -53,6 +78,13 @@ void *amdgpu_gem_prime_vmap(struct drm_gem_object *obj)
 	return bo->dma_buf_vmap.virtual;
 }

+/**
+ * amdgpu_gem_prime_vunmap - &dma_buf_ops.vunmap implementation
+ * @obj: GEM buffer object
+ * @vaddr: virtual address (unused)
+ *
+ * Tears down the in-kernel virtual mapping of the buffer object's memory.
+ */
 void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@ -60,6 +92,17 @@ void amdgpu_gem_prime_vunmap(struct drm_gem_object *obj, void *vaddr)
 	ttm_bo_kunmap(&bo->dma_buf_vmap);
 }

+/**
+ * amdgpu_gem_prime_mmap - &drm_driver.gem_prime_mmap implementation
+ * @obj: GEM buffer object
+ * @vma: virtual memory area
+ *
+ * Sets up a userspace mapping of the buffer object's memory in the given
+ * virtual memory area.
+ *
+ * Returns:
+ * 0 on success or negative error code.
+ */
 int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@ -94,6 +137,19 @@ int amdgpu_gem_prime_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma
 	return ret;
 }

+/**
+ * amdgpu_gem_prime_import_sg_table - &drm_driver.gem_prime_import_sg_table
+ * implementation
+ * @dev: DRM device
+ * @attach: DMA-buf attachment
+ * @sg: Scatter/gather table
+ *
+ * Import shared DMA buffer memory exported by another device.
+ *
+ * Returns:
+ * A new GEM buffer object of the given DRM device, representing the memory
+ * described by the given DMA-buf attachment and scatter/gather table.
+ */
 struct drm_gem_object *
 amdgpu_gem_prime_import_sg_table(struct drm_device *dev,
 				 struct dma_buf_attachment *attach,
@ -132,8 +188,19 @@ error:
 	return ERR_PTR(ret);
 }

+/**
+ * amdgpu_gem_map_attach - &dma_buf_ops.attach implementation
+ * @dma_buf: shared DMA buffer
+ * @attach: DMA-buf attachment
+ *
+ * Makes sure that the shared DMA buffer can be accessed by the target device.
+ * For now, simply pins it to the GTT domain, where it should be accessible by
+ * all DMA devices.
+ *
+ * Returns:
+ * 0 on success or negative error code.
+ */
 static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
-				 struct device *target_dev,
 				 struct dma_buf_attachment *attach)
 {
 	struct drm_gem_object *obj = dma_buf->priv;
@ -141,7 +208,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
 	struct amdgpu_device *adev = amdgpu_ttm_adev(bo->tbo.bdev);
 	long r;

-	r = drm_gem_map_attach(dma_buf, target_dev, attach);
+	r = drm_gem_map_attach(dma_buf, attach);
 	if (r)
 		return r;

@ -165,7 +232,7 @@ static int amdgpu_gem_map_attach(struct dma_buf *dma_buf,
 	}

 	/* pin buffer into GTT */
-	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT, NULL);
+	r = amdgpu_bo_pin(bo, AMDGPU_GEM_DOMAIN_GTT);
 	if (r)
 		goto error_unreserve;

@ -181,6 +248,14 @@ error_detach:
 	return r;
 }

+/**
+ * amdgpu_gem_map_detach - &dma_buf_ops.detach implementation
+ * @dma_buf: shared DMA buffer
+ * @attach: DMA-buf attachment
+ *
+ * This is called when a shared DMA buffer no longer needs to be accessible by
+ * the other device. For now, simply unpins the buffer from GTT.
+ */
 static void amdgpu_gem_map_detach(struct dma_buf *dma_buf,
 				  struct dma_buf_attachment *attach)
 {
@ -202,6 +277,13 @@ error:
 	drm_gem_map_detach(dma_buf, attach);
 }

+/**
+ * amdgpu_gem_prime_res_obj - &drm_driver.gem_prime_res_obj implementation
+ * @obj: GEM buffer object
+ *
+ * Returns:
+ * The buffer object's reservation object.
+ */
 struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 {
 	struct amdgpu_bo *bo = gem_to_amdgpu_bo(obj);
@ -209,6 +291,18 @@ struct reservation_object *amdgpu_gem_prime_res_obj(struct drm_gem_object *obj)
 	return bo->tbo.resv;
 }

+/**
+ * amdgpu_gem_begin_cpu_access - &dma_buf_ops.begin_cpu_access implementation
+ * @dma_buf: shared DMA buffer
+ * @direction: direction of DMA transfer
+ *
+ * This is called before CPU access to the shared DMA buffer's memory. If it's
+ * a read access, the buffer is moved to the GTT domain if possible, for optimal
+ * CPU read performance.
+ *
+ * Returns:
+ * 0 on success or negative error code.
+ */
 static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
 				       enum dma_data_direction direction)
 {
@ -229,7 +323,7 @@ static int amdgpu_gem_begin_cpu_access(struct dma_buf *dma_buf,
 		return ret;

 	if (!bo->pin_count && (bo->allowed_domains & AMDGPU_GEM_DOMAIN_GTT)) {
-		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_GTT);
 		ret = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 	}

@ -245,14 +339,24 @@ static const struct dma_buf_ops amdgpu_dmabuf_ops = {
 	.release = drm_gem_dmabuf_release,
 	.begin_cpu_access = amdgpu_gem_begin_cpu_access,
 	.map = drm_gem_dmabuf_kmap,
-	.map_atomic = drm_gem_dmabuf_kmap_atomic,
 	.unmap = drm_gem_dmabuf_kunmap,
-	.unmap_atomic = drm_gem_dmabuf_kunmap_atomic,
 	.mmap = drm_gem_dmabuf_mmap,
 	.vmap = drm_gem_dmabuf_vmap,
 	.vunmap = drm_gem_dmabuf_vunmap,
 };

+/**
+ * amdgpu_gem_prime_export - &drm_driver.gem_prime_export implementation
+ * @dev: DRM device
+ * @gobj: GEM buffer object
+ * @flags: flags like DRM_CLOEXEC and DRM_RDWR
+ *
+ * The main work is done by the &drm_gem_prime_export helper, which in turn
+ * uses &amdgpu_gem_prime_res_obj.
+ *
+ * Returns:
+ * Shared DMA buffer representing the GEM buffer object from the given device.
+ */
 struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 					struct drm_gem_object *gobj,
 					int flags)
@ -273,6 +377,17 @@ struct dma_buf *amdgpu_gem_prime_export(struct drm_device *dev,
 	return buf;
 }

+/**
+ * amdgpu_gem_prime_import - &drm_driver.gem_prime_import implementation
+ * @dev: DRM device
+ * @dma_buf: Shared DMA buffer
+ *
+ * The main work is done by the &drm_gem_prime_import helper, which in turn
+ * uses &amdgpu_gem_prime_import_sg_table.
+ *
+ * Returns:
+ * GEM buffer object representing the shared DMA buffer for the given device.
+ */
 struct drm_gem_object *amdgpu_gem_prime_import(struct drm_device *dev,
 					    struct dma_buf *dma_buf)
 {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_queue_mgr.c
@ -66,8 +66,6 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
 			       u32 ring,
 			       struct amdgpu_ring **out_ring)
 {
-	u32 instance;
-
 	switch (mapper->hw_ip) {
 	case AMDGPU_HW_IP_GFX:
 		*out_ring = &adev->gfx.gfx_ring[ring];
@ -79,16 +77,13 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
 		*out_ring = &adev->sdma.instance[ring].ring;
 		break;
 	case AMDGPU_HW_IP_UVD:
-		instance = ring;
-		*out_ring = &adev->uvd.inst[instance].ring;
+		*out_ring = &adev->uvd.inst[0].ring;
 		break;
 	case AMDGPU_HW_IP_VCE:
 		*out_ring = &adev->vce.ring[ring];
 		break;
 	case AMDGPU_HW_IP_UVD_ENC:
-		instance = ring / adev->uvd.num_enc_rings;
-		*out_ring =
-		&adev->uvd.inst[instance].ring_enc[ring%adev->uvd.num_enc_rings];
+		*out_ring = &adev->uvd.inst[0].ring_enc[ring];
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
 		*out_ring = &adev->vcn.ring_dec;
@ -96,6 +91,9 @@ static int amdgpu_identity_map(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_VCN_ENC:
 		*out_ring = &adev->vcn.ring_enc[ring];
 		break;
+	case AMDGPU_HW_IP_VCN_JPEG:
+		*out_ring = &adev->vcn.ring_jpeg;
+		break;
 	default:
 		*out_ring = NULL;
 		DRM_ERROR("unknown HW IP type: %d\n", mapper->hw_ip);
@ -216,7 +214,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 			 u32 hw_ip, u32 instance, u32 ring,
 			 struct amdgpu_ring **out_ring)
 {
-	int r, ip_num_rings;
+	int i, r, ip_num_rings = 0;
 	struct amdgpu_queue_mapper *mapper = &mgr->mapper[hw_ip];

 	if (!adev || !mgr || !out_ring)
@ -245,14 +243,21 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 		ip_num_rings = adev->sdma.num_instances;
 		break;
 	case AMDGPU_HW_IP_UVD:
-		ip_num_rings = adev->uvd.num_uvd_inst;
+		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+			if (!(adev->uvd.harvest_config & (1 << i)))
+				ip_num_rings++;
+		}
 		break;
 	case AMDGPU_HW_IP_VCE:
 		ip_num_rings = adev->vce.num_rings;
 		break;
 	case AMDGPU_HW_IP_UVD_ENC:
+		for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+			if (!(adev->uvd.harvest_config & (1 << i)))
+				ip_num_rings++;
+		}
 		ip_num_rings =
-			adev->uvd.num_enc_rings * adev->uvd.num_uvd_inst;
+			adev->uvd.num_enc_rings * ip_num_rings;
 		break;
 	case AMDGPU_HW_IP_VCN_DEC:
 		ip_num_rings = 1;
@ -260,6 +265,9 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_VCN_ENC:
 		ip_num_rings = adev->vcn.num_enc_rings;
 		break;
+	case AMDGPU_HW_IP_VCN_JPEG:
+		ip_num_rings = 1;
+		break;
 	default:
 		DRM_DEBUG("unknown ip type: %d\n", hw_ip);
 		return -EINVAL;
@ -287,6 +295,7 @@ int amdgpu_queue_mgr_map(struct amdgpu_device *adev,
 	case AMDGPU_HW_IP_UVD_ENC:
 	case AMDGPU_HW_IP_VCN_DEC:
 	case AMDGPU_HW_IP_VCN_ENC:
+	case AMDGPU_HW_IP_VCN_JPEG:
 		r = amdgpu_identity_map(adev, mapper, ring, out_ring);
 		break;
 	case AMDGPU_HW_IP_DMA:
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@ -211,7 +211,8 @@ void amdgpu_ring_priority_get(struct amdgpu_ring *ring,
 	if (!ring->funcs->set_priority)
 		return;

-	atomic_inc(&ring->num_jobs[priority]);
+	if (atomic_inc_return(&ring->num_jobs[priority]) <= 0)
+		return;

 	mutex_lock(&ring->priority_mutex);
 	if (priority <= ring->priority)
@ -304,7 +305,7 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		0xffffffffffffffff : ring->buf_mask;
 	/* Allocate ring buffer */
 	if (ring->ring_obj == NULL) {
-		r = amdgpu_bo_create_kernel(adev, ring->ring_size, PAGE_SIZE,
+		r = amdgpu_bo_create_kernel(adev, ring->ring_size + ring->funcs->extra_dw, PAGE_SIZE,
 					    AMDGPU_GEM_DOMAIN_GTT,
 					    &ring->ring_obj,
 					    &ring->gpu_addr,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.h
@ -44,6 +44,8 @@
 #define AMDGPU_FENCE_FLAG_INT           (1 << 1)
 #define AMDGPU_FENCE_FLAG_TC_WB_ONLY    (1 << 2)

+#define to_amdgpu_ring(s) container_of((s), struct amdgpu_ring, sched)
+
 enum amdgpu_ring_type {
 	AMDGPU_RING_TYPE_GFX,
 	AMDGPU_RING_TYPE_COMPUTE,
@ -53,7 +55,8 @@ enum amdgpu_ring_type {
 	AMDGPU_RING_TYPE_KIQ,
 	AMDGPU_RING_TYPE_UVD_ENC,
 	AMDGPU_RING_TYPE_VCN_DEC,
-	AMDGPU_RING_TYPE_VCN_ENC
+	AMDGPU_RING_TYPE_VCN_ENC,
+	AMDGPU_RING_TYPE_VCN_JPEG
 };

 struct amdgpu_device;
@ -112,6 +115,7 @@ struct amdgpu_ring_funcs {
 	u32			nop;
 	bool			support_64bit_ptrs;
 	unsigned		vmhub;
+	unsigned		extra_dw;

 	/* ring read/write ptr handling */
 	u64 (*get_rptr)(struct amdgpu_ring *ring);
@ -119,6 +123,7 @@ struct amdgpu_ring_funcs {
 	void (*set_wptr)(struct amdgpu_ring *ring);
 	/* validating and patching of IBs */
 	int (*parse_cs)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
+	int (*patch_cs_in_place)(struct amdgpu_cs_parser *p, uint32_t ib_idx);
 	/* constants to calculate how many DW are needed for an emit */
 	unsigned emit_frame_size;
 	unsigned emit_ib_size;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_test.c
@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
 /*
 * Copyright 2009 VMware, Inc.
 *
@ -75,11 +76,12 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 	r = amdgpu_bo_reserve(vram_obj, false);
 	if (unlikely(r != 0))
 		goto out_unref;
-	r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM, &vram_addr);
+	r = amdgpu_bo_pin(vram_obj, AMDGPU_GEM_DOMAIN_VRAM);
 	if (r) {
 		DRM_ERROR("Failed to pin VRAM object\n");
 		goto out_unres;
 	}
+	vram_addr = amdgpu_bo_gpu_offset(vram_obj);
 	for (i = 0; i < n; i++) {
 		void *gtt_map, *vram_map;
 		void **gart_start, **gart_end;
@ -96,11 +98,17 @@ static void amdgpu_do_test_moves(struct amdgpu_device *adev)
 		r = amdgpu_bo_reserve(gtt_obj[i], false);
 		if (unlikely(r != 0))
 			goto out_lclean_unref;
-		r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT, &gart_addr);
+		r = amdgpu_bo_pin(gtt_obj[i], AMDGPU_GEM_DOMAIN_GTT);
 		if (r) {
 			DRM_ERROR("Failed to pin GTT object %d\n", i);
 			goto out_lclean_unres;
 		}
+		r = amdgpu_ttm_alloc_gart(&gtt_obj[i]->tbo);
+		if (r) {
+			DRM_ERROR("%p bind failed\n", gtt_obj[i]);
+			goto out_lclean_unpin;
+		}
+		gart_addr = amdgpu_bo_gpu_offset(gtt_obj[i]);

 		r = amdgpu_bo_kmap(gtt_obj[i], &gtt_map);
 		if (r) {
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@ -150,10 +150,10 @@ TRACE_EVENT(amdgpu_cs,

 	    TP_fast_assign(
 			   __entry->bo_list = p->bo_list;
-			   __entry->ring = p->job->ring->idx;
+			   __entry->ring = p->ring->idx;
 			   __entry->dw = p->job->ibs[i].length_dw;
 			   __entry->fences = amdgpu_fence_count_emitted(
-				p->job->ring);
+				p->ring);
 			   ),
 	    TP_printk("bo_list=%p, ring=%u, dw=%u, fences=%u",
 		      __entry->bo_list, __entry->ring, __entry->dw,
@ -178,7 +178,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			   __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
 			   __entry->context = job->base.s_fence->finished.context;
 			   __entry->seqno = job->base.s_fence->finished.seqno;
-			   __entry->ring_name = job->ring->name;
+			   __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
 	    TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
@ -203,7 +203,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			   __assign_str(timeline, AMDGPU_JOB_GET_TIMELINE_NAME(job))
 			   __entry->context = job->base.s_fence->finished.context;
 			   __entry->seqno = job->base.s_fence->finished.seqno;
-			   __entry->ring_name = job->ring->name;
+			   __entry->ring_name = to_amdgpu_ring(job->base.sched)->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
 	    TP_printk("sched_job=%llu, timeline=%s, context=%u, seqno=%u, ring_name=%s, num_ibs=%u",
@ -314,6 +314,11 @@ DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_mapping,
 	    TP_ARGS(mapping)
 );

+DEFINE_EVENT(amdgpu_vm_mapping, amdgpu_vm_bo_cs,
+	    TP_PROTO(struct amdgpu_bo_va_mapping *mapping),
+	    TP_ARGS(mapping)
+);
+
 TRACE_EVENT(amdgpu_vm_set_ptes,
 	    TP_PROTO(uint64_t pe, uint64_t addr, unsigned count,
 		     uint32_t incr, uint64_t flags),
@ -436,7 +441,7 @@ TRACE_EVENT(amdgpu_cs_bo_status,
 			__entry->total_bo, __entry->total_size)
 );

-TRACE_EVENT(amdgpu_ttm_bo_move,
+TRACE_EVENT(amdgpu_bo_move,
 	    TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
 	    TP_ARGS(bo, new_placement, old_placement),
 	    TP_STRUCT__entry(
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@ -92,11 +92,9 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
 }

 /**
- * amdgpu_ttm_global_init - Initialize global TTM memory reference
- * 							structures.
+ * amdgpu_ttm_global_init - Initialize global TTM memory reference structures.
 *
- * @adev:  	AMDGPU device for which the global structures need to be
- *			registered.
+ * @adev: AMDGPU device for which the global structures need to be registered.
 *
 * This is called as part of the AMDGPU ttm init from amdgpu_ttm_init()
 * during bring up.
@ -104,8 +102,6 @@ static void amdgpu_ttm_mem_global_release(struct drm_global_reference *ref)
 static int amdgpu_ttm_global_init(struct amdgpu_device *adev)
 {
 	struct drm_global_reference *global_ref;
-	struct amdgpu_ring *ring;
-	struct drm_sched_rq *rq;
 	int r;

 	/* ensure reference is false in case init fails */
@ -138,21 +134,10 @@ static int amdgpu_ttm_global_init(struct amdgpu_device *adev)

 	mutex_init(&adev->mman.gtt_window_lock);

-	ring = adev->mman.buffer_funcs_ring;
-	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
-	r = drm_sched_entity_init(&ring->sched, &adev->mman.entity,
-				  rq, NULL);
-	if (r) {
-		DRM_ERROR("Failed setting up TTM BO move run queue.\n");
-		goto error_entity;
-	}
-
 	adev->mman.mem_global_referenced = true;

 	return 0;

-error_entity:
-	drm_global_item_unref(&adev->mman.bo_global_ref.ref);
 error_bo:
 	drm_global_item_unref(&adev->mman.mem_global_ref);
 error_mem:
@ -162,8 +147,6 @@ error_mem:
 static void amdgpu_ttm_global_fini(struct amdgpu_device *adev)
 {
 	if (adev->mman.mem_global_referenced) {
-		drm_sched_entity_fini(adev->mman.entity.sched,
-				      &adev->mman.entity);
 		mutex_destroy(&adev->mman.gtt_window_lock);
 		drm_global_item_unref(&adev->mman.bo_global_ref.ref);
 		drm_global_item_unref(&adev->mman.mem_global_ref);
@ -177,13 +160,12 @@ static int amdgpu_invalidate_caches(struct ttm_bo_device *bdev, uint32_t flags)
 }

 /**
- * amdgpu_init_mem_type - 	Initialize a memory manager for a specific
- * 							type of memory request.
+ * amdgpu_init_mem_type - Initialize a memory manager for a specific type of
+ * memory request.
 *
- * @bdev:	The TTM BO device object (contains a reference to
- * 			amdgpu_device)
- * @type:	The type of memory requested
- * @man:
+ * @bdev: The TTM BO device object (contains a reference to amdgpu_device)
+ * @type: The type of memory requested
+ * @man: The memory type manager for each domain
 *
 * This is called by ttm_bo_init_mm() when a buffer object is being
 * initialized.
@ -263,7 +245,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	}

 	/* Object isn't an AMDGPU object so ignore */
-	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo)) {
+	if (!amdgpu_bo_is_amdgpu_bo(bo)) {
 		placement->placement = &placements;
 		placement->busy_placement = &placements;
 		placement->num_placement = 1;
@ -276,8 +258,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 	case TTM_PL_VRAM:
 		if (!adev->mman.buffer_funcs_enabled) {
 			/* Move to system memory */
-			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
-		} else if (adev->gmc.visible_vram_size < adev->gmc.real_vram_size &&
+			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+		} else if (!amdgpu_gmc_vram_full_visible(&adev->gmc) &&
 			   !(abo->flags & AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED) &&
 			   amdgpu_bo_in_cpu_visible_vram(abo)) {

@ -286,7 +268,7 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 			 * BO will be evicted to GTT rather than causing other
 			 * BOs to be evicted from VRAM
 			 */
-			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
+			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_VRAM |
 							 AMDGPU_GEM_DOMAIN_GTT);
 			abo->placements[0].fpfn = adev->gmc.visible_vram_size >> PAGE_SHIFT;
 			abo->placements[0].lpfn = 0;
@ -294,12 +276,12 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 			abo->placement.num_busy_placement = 1;
 		} else {
 			/* Move to GTT memory */
-			amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
+			amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_GTT);
 		}
 		break;
 	case TTM_PL_TT:
 	default:
-		amdgpu_ttm_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
+		amdgpu_bo_placement_from_domain(abo, AMDGPU_GEM_DOMAIN_CPU);
 	}
 	*placement = abo->placement;
 }
@ -307,8 +289,8 @@ static void amdgpu_evict_flags(struct ttm_buffer_object *bo,
 /**
 * amdgpu_verify_access - Verify access for a mmap call
 *
- * @bo:		The buffer object to map
- * @filp:	The file pointer from the process performing the mmap
+ * @bo:	The buffer object to map
+ * @filp: The file pointer from the process performing the mmap
 *
 * This is called by ttm_bo_mmap() to verify whether a process
 * has the right to mmap a BO to their process space.
@ -333,11 +315,10 @@ static int amdgpu_verify_access(struct ttm_buffer_object *bo, struct file *filp)
 /**
 * amdgpu_move_null - Register memory for a buffer object
 *
- * @bo:			The bo to assign the memory to
- * @new_mem:	The memory to be assigned.
+ * @bo: The bo to assign the memory to
+ * @new_mem: The memory to be assigned.
 *
- * Assign the memory from new_mem to the memory of the buffer object
- * bo.
+ * Assign the memory from new_mem to the memory of the buffer object bo.
 */
 static void amdgpu_move_null(struct ttm_buffer_object *bo,
 			     struct ttm_mem_reg *new_mem)
@ -350,8 +331,12 @@ static void amdgpu_move_null(struct ttm_buffer_object *bo,
 }

 /**
- * amdgpu_mm_node_addr -	Compute the GPU relative offset of a GTT
- * 							buffer.
+ * amdgpu_mm_node_addr - Compute the GPU relative offset of a GTT buffer.
+ *
+ * @bo: The bo to assign the memory to.
+ * @mm_node: Memory manager node for drm allocator.
+ * @mem: The region where the bo resides.
+ *
 */
 static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 				    struct drm_mm_node *mm_node,
@ -367,10 +352,12 @@ static uint64_t amdgpu_mm_node_addr(struct ttm_buffer_object *bo,
 }

 /**
- * amdgpu_find_mm_node -	Helper function finds the drm_mm_node
- *  						corresponding to @offset. It also modifies
- * 							the offset to be within the drm_mm_node
- * 							returned
+ * amdgpu_find_mm_node - Helper function finds the drm_mm_node corresponding to
+ * @offset. It also modifies the offset to be within the drm_mm_node returned
+ *
+ * @mem: The region where the bo resides.
+ * @offset: The offset that drm_mm_node is used for finding.
+ *
 */
 static struct drm_mm_node *amdgpu_find_mm_node(struct ttm_mem_reg *mem,
 					       unsigned long *offset)
@ -512,8 +499,8 @@ error:
 /**
 * amdgpu_move_blit - Copy an entire buffer to another buffer
 *
- * This is a helper called by amdgpu_bo_move() and
- * amdgpu_move_vram_ram() to help move buffers to and from VRAM.
+ * This is a helper called by amdgpu_bo_move() and amdgpu_move_vram_ram() to
+ * help move buffers to and from VRAM.
 */
 static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 			    bool evict, bool no_wait_gpu,
@ -595,7 +582,7 @@ static int amdgpu_move_vram_ram(struct ttm_buffer_object *bo, bool evict,
 	}

 	/* blit VRAM to GTT */
-	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, &tmp_mem, old_mem);
+	r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, &tmp_mem, old_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@ -647,7 +634,7 @@ static int amdgpu_move_ram_vram(struct ttm_buffer_object *bo, bool evict,
 	}

 	/* copy to VRAM */
-	r = amdgpu_move_blit(bo, true, ctx->no_wait_gpu, new_mem, old_mem);
+	r = amdgpu_move_blit(bo, evict, ctx->no_wait_gpu, new_mem, old_mem);
 	if (unlikely(r)) {
 		goto out_cleanup;
 	}
@ -809,8 +796,8 @@ struct amdgpu_ttm_tt {
 };

 /**
- * amdgpu_ttm_tt_get_user_pages - 	Pin pages of memory pointed to
- * 									by a USERPTR pointer to memory
+ * amdgpu_ttm_tt_get_user_pages - Pin pages of memory pointed to by a USERPTR
+ * pointer to memory
 *
 * Called by amdgpu_gem_userptr_ioctl() and amdgpu_cs_parser_bos().
 * This provides a wrapper around the get_user_pages() call to provide
@ -833,8 +820,10 @@ int amdgpu_ttm_tt_get_user_pages(struct ttm_tt *ttm, struct page **pages)
 	down_read(&mm->mmap_sem);

 	if (gtt->userflags & AMDGPU_GEM_USERPTR_ANONONLY) {
-		/* check that we only use anonymous memory
-		   to prevent problems with writeback */
+		/*
+		 * check that we only use anonymous memory to prevent problems
+		 * with writeback
+		 */
 		unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
 		struct vm_area_struct *vma;

@ -885,10 +874,9 @@ release_pages:
 }

 /**
- * amdgpu_ttm_tt_set_user_pages - 	Copy pages in, putting old pages
- * 									as necessary.
+ * amdgpu_ttm_tt_set_user_pages - Copy pages in, putting old pages as necessary.
 *
- * Called by amdgpu_cs_list_validate().  This creates the page list
+ * Called by amdgpu_cs_list_validate(). This creates the page list
 * that backs user memory and will ultimately be mapped into the device
 * address space.
 */
@ -930,8 +918,7 @@ void amdgpu_ttm_tt_mark_user_pages(struct ttm_tt *ttm)
 }

 /**
- * amdgpu_ttm_tt_pin_userptr - 	prepare the sg table with the
- * 								user pages
+ * amdgpu_ttm_tt_pin_userptr - 	prepare the sg table with the user pages
 *
 * Called by amdgpu_ttm_backend_bind()
 **/
@ -1310,8 +1297,8 @@ static void amdgpu_ttm_tt_unpopulate(struct ttm_tt *ttm)
 }

 /**
- * amdgpu_ttm_tt_set_userptr -	Initialize userptr GTT ttm_tt
- * 								for the current task
+ * amdgpu_ttm_tt_set_userptr - Initialize userptr GTT ttm_tt for the current
+ * task
 *
 * @ttm: The ttm_tt object to bind this userptr object to
 * @addr:  The address in the current tasks VM space to use
@ -1361,9 +1348,8 @@ struct mm_struct *amdgpu_ttm_tt_get_usermm(struct ttm_tt *ttm)
 }

 /**
- * amdgpu_ttm_tt_affect_userptr -	Determine if a ttm_tt object lays
- * 									inside an address range for the
- * 									current task.
+ * amdgpu_ttm_tt_affect_userptr - Determine if a ttm_tt object lays inside an
+ * address range for the current task.
 *
 */
 bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
@ -1401,8 +1387,7 @@ bool amdgpu_ttm_tt_affect_userptr(struct ttm_tt *ttm, unsigned long start,
 }

 /**
- * amdgpu_ttm_tt_userptr_invalidated -	Has the ttm_tt object been
- * 										invalidated?
+ * amdgpu_ttm_tt_userptr_invalidated - Has the ttm_tt object been invalidated?
 */
 bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 				       int *last_invalidated)
@ -1415,10 +1400,8 @@ bool amdgpu_ttm_tt_userptr_invalidated(struct ttm_tt *ttm,
 }

 /**
- * amdgpu_ttm_tt_userptr_needs_pages -	Have the pages backing this
- * 										ttm_tt object been invalidated
- * 										since the last time they've
- * 										been set?
+ * amdgpu_ttm_tt_userptr_needs_pages - Have the pages backing this ttm_tt object
+ * been invalidated since the last time they've been set?
 */
 bool amdgpu_ttm_tt_userptr_needs_pages(struct ttm_tt *ttm)
 {
@ -1474,13 +1457,12 @@ uint64_t amdgpu_ttm_tt_pte_flags(struct amdgpu_device *adev, struct ttm_tt *ttm,
 }

 /**
- * amdgpu_ttm_bo_eviction_valuable -	Check to see if we can evict
- * 										a buffer object.
+ * amdgpu_ttm_bo_eviction_valuable - Check to see if we can evict a buffer
+ * object.
 *
- * Return true if eviction is sensible.  Called by
- * ttm_mem_evict_first() on behalf of ttm_bo_mem_force_space()
- * which tries to evict buffer objects until it can find space
- * for a new object and by ttm_bo_force_list_clean() which is
+ * Return true if eviction is sensible. Called by ttm_mem_evict_first() on
+ * behalf of ttm_bo_mem_force_space() which tries to evict buffer objects until
+ * it can find space for a new object and by ttm_bo_force_list_clean() which is
 * used to clean out a memory space.
 */
 static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
@ -1530,8 +1512,7 @@ static bool amdgpu_ttm_bo_eviction_valuable(struct ttm_buffer_object *bo,
 }

 /**
- * amdgpu_ttm_access_memory -	Read or Write memory that backs a
- * 								buffer object.
+ * amdgpu_ttm_access_memory - Read or Write memory that backs a buffer object.
 *
 * @bo:  The buffer object to read/write
 * @offset:  Offset into buffer object
@ -1695,7 +1676,7 @@ static int amdgpu_ttm_fw_reserve_vram_init(struct amdgpu_device *adev)
 			AMDGPU_GEM_DOMAIN_VRAM,
 			adev->fw_vram_usage.start_offset,
 			(adev->fw_vram_usage.start_offset +
-			adev->fw_vram_usage.size), NULL);
+			adev->fw_vram_usage.size));
 		if (r)
 			goto error_pin;
 		r = amdgpu_bo_kmap(adev->fw_vram_usage.reserved_bo,
@ -1719,8 +1700,8 @@ error_create:
 	return r;
 }
 /**
- * amdgpu_ttm_init -	Init the memory management (ttm) as well as
- * 						various gtt/vram related fields.
+ * amdgpu_ttm_init - Init the memory management (ttm) as well as various
+ * gtt/vram related fields.
 *
 * This initializes all of the memory space pools that the TTM layer
 * will need such as the GTT space (system memory mapped to the device),
@ -1871,8 +1852,7 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)
 }

 /**
- * amdgpu_ttm_late_init -	Handle any late initialization for
- * 							amdgpu_ttm
+ * amdgpu_ttm_late_init - Handle any late initialization for amdgpu_ttm
 */
 void amdgpu_ttm_late_init(struct amdgpu_device *adev)
 {
@ -1921,10 +1901,30 @@ void amdgpu_ttm_set_buffer_funcs_status(struct amdgpu_device *adev, bool enable)
 {
 	struct ttm_mem_type_manager *man = &adev->mman.bdev.man[TTM_PL_VRAM];
 	uint64_t size;
+	int r;

-	if (!adev->mman.initialized || adev->in_gpu_reset)
+	if (!adev->mman.initialized || adev->in_gpu_reset ||
+	    adev->mman.buffer_funcs_enabled == enable)
 		return;

+	if (enable) {
+		struct amdgpu_ring *ring;
+		struct drm_sched_rq *rq;
+
+		ring = adev->mman.buffer_funcs_ring;
+		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
+		r = drm_sched_entity_init(&adev->mman.entity, &rq, 1, NULL);
+		if (r) {
+			DRM_ERROR("Failed setting up TTM BO move entity (%d)\n",
+				  r);
+			return;
+		}
+	} else {
+		drm_sched_entity_destroy(&adev->mman.entity);
+		dma_fence_put(man->move);
+		man->move = NULL;
+	}
+
 	/* this just adjusts TTM size idea, which sets lpfn to the correct value */
 	if (enable)
 		size = adev->gmc.real_vram_size;
@ -2002,7 +2002,7 @@ static int amdgpu_map_buffer(struct ttm_buffer_object *bo,
 	if (r)
 		goto error_free;

-	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+	r = amdgpu_job_submit(job, &adev->mman.entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
 	if (r)
 		goto error_free;
@ -2071,24 +2071,19 @@ int amdgpu_copy_buffer(struct amdgpu_ring *ring, uint64_t src_offset,

 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	WARN_ON(job->ibs[0].length_dw > num_dw);
-	if (direct_submit) {
-		r = amdgpu_ib_schedule(ring, job->num_ibs, job->ibs,
-				       NULL, fence);
-		job->fence = dma_fence_get(*fence);
-		if (r)
-			DRM_ERROR("Error scheduling IBs (%d)\n", r);
-		amdgpu_job_free(job);
-	} else {
-		r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+	if (direct_submit)
+		r = amdgpu_job_submit_direct(job, ring, fence);
+	else
+		r = amdgpu_job_submit(job, &adev->mman.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
-		if (r)
-			goto error_free;
-	}
+	if (r)
+		goto error_free;

 	return r;

 error_free:
 	amdgpu_job_free(job);
+	DRM_ERROR("Error scheduling IBs (%d)\n", r);
 	return r;
 }

@ -2171,7 +2166,7 @@ int amdgpu_fill_buffer(struct amdgpu_bo *bo,

 	amdgpu_ring_pad_ib(ring, &job->ibs[0]);
 	WARN_ON(job->ibs[0].length_dw > num_dw);
-	r = amdgpu_job_submit(job, ring, &adev->mman.entity,
+	r = amdgpu_job_submit(job, &adev->mman.entity,
 			      AMDGPU_FENCE_OWNER_UNDEFINED, fence);
 	if (r)
 		goto error_free;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.h
@ -73,7 +73,7 @@ bool amdgpu_gtt_mgr_has_gart_addr(struct ttm_mem_reg *mem);
 uint64_t amdgpu_gtt_mgr_usage(struct ttm_mem_type_manager *man);
 int amdgpu_gtt_mgr_recover(struct ttm_mem_type_manager *man);

-u64 amdgpu_vram_mgr_bo_invisible_size(struct amdgpu_bo *bo);
+u64 amdgpu_vram_mgr_bo_visible_size(struct amdgpu_bo *bo);
 uint64_t amdgpu_vram_mgr_usage(struct ttm_mem_type_manager *man);
 uint64_t amdgpu_vram_mgr_vis_usage(struct ttm_mem_type_manager *man);

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@ -53,11 +53,11 @@

 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
-#define FIRMWARE_BONAIRE	"radeon/bonaire_uvd.bin"
-#define FIRMWARE_KABINI	"radeon/kabini_uvd.bin"
-#define FIRMWARE_KAVERI	"radeon/kaveri_uvd.bin"
-#define FIRMWARE_HAWAII	"radeon/hawaii_uvd.bin"
-#define FIRMWARE_MULLINS	"radeon/mullins_uvd.bin"
+#define FIRMWARE_BONAIRE	"amdgpu/bonaire_uvd.bin"
+#define FIRMWARE_KABINI	"amdgpu/kabini_uvd.bin"
+#define FIRMWARE_KAVERI	"amdgpu/kaveri_uvd.bin"
+#define FIRMWARE_HAWAII	"amdgpu/hawaii_uvd.bin"
+#define FIRMWARE_MULLINS	"amdgpu/mullins_uvd.bin"
 #endif
 #define FIRMWARE_TONGA		"amdgpu/tonga_uvd.bin"
 #define FIRMWARE_CARRIZO	"amdgpu/carrizo_uvd.bin"
@ -127,7 +127,7 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	unsigned long bo_size;
 	const char *fw_name;
 	const struct common_firmware_header *hdr;
-	unsigned version_major, version_minor, family_id;
+	unsigned family_id;
 	int i, j, r;

 	INIT_DELAYED_WORK(&adev->uvd.idle_work, amdgpu_uvd_idle_work_handler);
@ -208,29 +208,46 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)

 	hdr = (const struct common_firmware_header *)adev->uvd.fw->data;
 	family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
-	version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
-	version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
-	DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
-		version_major, version_minor, family_id);

-	/*
-	 * Limit the number of UVD handles depending on microcode major
-	 * and minor versions. The firmware version which has 40 UVD
-	 * instances support is 1.80. So all subsequent versions should
-	 * also have the same support.
-	 */
-	if ((version_major > 0x01) ||
-	    ((version_major == 0x01) && (version_minor >= 0x50)))
+	if (adev->asic_type < CHIP_VEGA20) {
+		unsigned version_major, version_minor;
+
+		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
+		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+		DRM_INFO("Found UVD firmware Version: %hu.%hu Family ID: %hu\n",
+			version_major, version_minor, family_id);
+
+		/*
+		 * Limit the number of UVD handles depending on microcode major
+		 * and minor versions. The firmware version which has 40 UVD
+		 * instances support is 1.80. So all subsequent versions should
+		 * also have the same support.
+		 */
+		if ((version_major > 0x01) ||
+		    ((version_major == 0x01) && (version_minor >= 0x50)))
+			adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;
+
+		adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
+					(family_id << 8));
+
+		if ((adev->asic_type == CHIP_POLARIS10 ||
+		     adev->asic_type == CHIP_POLARIS11) &&
+		    (adev->uvd.fw_version < FW_1_66_16))
+			DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
+				  version_major, version_minor);
+	} else {
+		unsigned int enc_major, enc_minor, dec_minor;
+
+		dec_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
+		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 24) & 0x3f;
+		enc_major = (le32_to_cpu(hdr->ucode_version) >> 30) & 0x3;
+		DRM_INFO("Found UVD firmware ENC: %hu.%hu DEC: .%hu Family ID: %hu\n",
+			enc_major, enc_minor, dec_minor, family_id);
+
 		adev->uvd.max_handles = AMDGPU_MAX_UVD_HANDLES;

-	adev->uvd.fw_version = ((version_major << 24) | (version_minor << 16) |
-				(family_id << 8));
-
-	if ((adev->asic_type == CHIP_POLARIS10 ||
-	     adev->asic_type == CHIP_POLARIS11) &&
-	    (adev->uvd.fw_version < FW_1_66_16))
-		DRM_ERROR("POLARIS10/11 UVD firmware version %hu.%hu is too old.\n",
-			  version_major, version_minor);
+		adev->uvd.fw_version = le32_to_cpu(hdr->ucode_version);
+	}

 	bo_size = AMDGPU_UVD_STACK_SIZE + AMDGPU_UVD_HEAP_SIZE
 		  +  AMDGPU_UVD_SESSION_SIZE * adev->uvd.max_handles;
@ -238,7 +255,8 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);

 	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
-
+		if (adev->uvd.harvest_config & (1 << j))
+			continue;
 		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
 					    AMDGPU_GEM_DOMAIN_VRAM, &adev->uvd.inst[j].vcpu_bo,
 					    &adev->uvd.inst[j].gpu_addr, &adev->uvd.inst[j].cpu_addr);
@ -246,21 +264,20 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 			dev_err(adev->dev, "(%d) failed to allocate UVD bo\n", r);
 			return r;
 		}
-
-		ring = &adev->uvd.inst[j].ring;
-		rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-		r = drm_sched_entity_init(&ring->sched, &adev->uvd.inst[j].entity,
-					  rq, NULL);
-		if (r != 0) {
-			DRM_ERROR("Failed setting up UVD(%d) run queue.\n", j);
-			return r;
-		}
-
-		for (i = 0; i < adev->uvd.max_handles; ++i) {
-			atomic_set(&adev->uvd.inst[j].handles[i], 0);
-			adev->uvd.inst[j].filp[i] = NULL;
-		}
 	}
+
+	ring = &adev->uvd.inst[0].ring;
+	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
+	r = drm_sched_entity_init(&adev->uvd.entity, &rq, 1, NULL);
+	if (r) {
+		DRM_ERROR("Failed setting up UVD kernel entity.\n");
+		return r;
+	}
+	for (i = 0; i < adev->uvd.max_handles; ++i) {
+		atomic_set(&adev->uvd.handles[i], 0);
+		adev->uvd.filp[i] = NULL;
+	}
+
 	/* from uvd v5.0 HW addressing capacity increased to 64 bits */
 	if (!amdgpu_device_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 		adev->uvd.address_64_bit = true;
@ -289,10 +306,12 @@ int amdgpu_uvd_sw_fini(struct amdgpu_device *adev)
 {
 	int i, j;

-	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
-		kfree(adev->uvd.inst[j].saved_bo);
+	drm_sched_entity_destroy(&adev->uvd.entity);

-		drm_sched_entity_fini(&adev->uvd.inst[j].ring.sched, &adev->uvd.inst[j].entity);
+	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		if (adev->uvd.harvest_config & (1 << j))
+			continue;
+		kfree(adev->uvd.inst[j].saved_bo);

 		amdgpu_bo_free_kernel(&adev->uvd.inst[j].vcpu_bo,
 				      &adev->uvd.inst[j].gpu_addr,
@ -316,20 +335,22 @@ int amdgpu_uvd_suspend(struct amdgpu_device *adev)

 	cancel_delayed_work_sync(&adev->uvd.idle_work);

+	/* only valid for physical mode */
+	if (adev->asic_type < CHIP_POLARIS10) {
+		for (i = 0; i < adev->uvd.max_handles; ++i)
+			if (atomic_read(&adev->uvd.handles[i]))
+				break;
+
+		if (i == adev->uvd.max_handles)
+			return 0;
+	}
+
 	for (j = 0; j < adev->uvd.num_uvd_inst; ++j) {
+		if (adev->uvd.harvest_config & (1 << j))
+			continue;
 		if (adev->uvd.inst[j].vcpu_bo == NULL)
 			continue;

-		/* only valid for physical mode */
-		if (adev->asic_type < CHIP_POLARIS10) {
-			for (i = 0; i < adev->uvd.max_handles; ++i)
-				if (atomic_read(&adev->uvd.inst[j].handles[i]))
-					break;
-
-			if (i == adev->uvd.max_handles)
-				continue;
-		}
-
 		size = amdgpu_bo_size(adev->uvd.inst[j].vcpu_bo);
 		ptr = adev->uvd.inst[j].cpu_addr;

@ -349,6 +370,8 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)
 	int i;

 	for (i = 0; i < adev->uvd.num_uvd_inst; i++) {
+		if (adev->uvd.harvest_config & (1 << i))
+			continue;
 		if (adev->uvd.inst[i].vcpu_bo == NULL)
 			return -EINVAL;

@ -381,30 +404,27 @@ int amdgpu_uvd_resume(struct amdgpu_device *adev)

 void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 {
-	struct amdgpu_ring *ring;
-	int i, j, r;
+	struct amdgpu_ring *ring = &adev->uvd.inst[0].ring;
+	int i, r;

-	for (j = 0; j < adev->uvd.num_uvd_inst; j++) {
-		ring = &adev->uvd.inst[j].ring;
+	for (i = 0; i < adev->uvd.max_handles; ++i) {
+		uint32_t handle = atomic_read(&adev->uvd.handles[i]);

-		for (i = 0; i < adev->uvd.max_handles; ++i) {
-			uint32_t handle = atomic_read(&adev->uvd.inst[j].handles[i]);
-			if (handle != 0 && adev->uvd.inst[j].filp[i] == filp) {
-				struct dma_fence *fence;
+		if (handle != 0 && adev->uvd.filp[i] == filp) {
+			struct dma_fence *fence;

-				r = amdgpu_uvd_get_destroy_msg(ring, handle,
-							       false, &fence);
-				if (r) {
-					DRM_ERROR("Error destroying UVD(%d) %d!\n", j, r);
-					continue;
-				}
-
-				dma_fence_wait(fence, false);
-				dma_fence_put(fence);
-
-				adev->uvd.inst[j].filp[i] = NULL;
-				atomic_set(&adev->uvd.inst[j].handles[i], 0);
+			r = amdgpu_uvd_get_destroy_msg(ring, handle, false,
+						       &fence);
+			if (r) {
+				DRM_ERROR("Error destroying UVD %d!\n", r);
+				continue;
 			}
+
+			dma_fence_wait(fence, false);
+			dma_fence_put(fence);
+
+			adev->uvd.filp[i] = NULL;
+			atomic_set(&adev->uvd.handles[i], 0);
 		}
 	}
 }
@ -459,7 +479,7 @@ static int amdgpu_uvd_cs_pass1(struct amdgpu_uvd_cs_ctx *ctx)
 		if (cmd == 0x0 || cmd == 0x3) {
 			/* yes, force it into VRAM */
 			uint32_t domain = AMDGPU_GEM_DOMAIN_VRAM;
-			amdgpu_ttm_placement_from_domain(bo, domain);
+			amdgpu_bo_placement_from_domain(bo, domain);
 		}
 		amdgpu_uvd_force_into_uvd_segment(bo);

@ -679,16 +699,15 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 	void *ptr;
 	long r;
 	int i;
-	uint32_t ip_instance = ctx->parser->job->ring->me;

 	if (offset & 0x3F) {
-		DRM_ERROR("UVD(%d) messages must be 64 byte aligned!\n", ip_instance);
+		DRM_ERROR("UVD messages must be 64 byte aligned!\n");
 		return -EINVAL;
 	}

 	r = amdgpu_bo_kmap(bo, &ptr);
 	if (r) {
-		DRM_ERROR("Failed mapping the UVD(%d) message (%ld)!\n", ip_instance, r);
+		DRM_ERROR("Failed mapping the UVD) message (%ld)!\n", r);
 		return r;
 	}

@ -698,7 +717,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 	handle = msg[2];

 	if (handle == 0) {
-		DRM_ERROR("Invalid UVD(%d) handle!\n", ip_instance);
+		DRM_ERROR("Invalid UVD handle!\n");
 		return -EINVAL;
 	}

@ -709,18 +728,19 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,

 		/* try to alloc a new handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i) {
-			if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
-				DRM_ERROR("(%d)Handle 0x%x already in use!\n", ip_instance, handle);
+			if (atomic_read(&adev->uvd.handles[i]) == handle) {
+				DRM_ERROR(")Handle 0x%x already in use!\n",
+					  handle);
 				return -EINVAL;
 			}

-			if (!atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], 0, handle)) {
-				adev->uvd.inst[ip_instance].filp[i] = ctx->parser->filp;
+			if (!atomic_cmpxchg(&adev->uvd.handles[i], 0, handle)) {
+				adev->uvd.filp[i] = ctx->parser->filp;
 				return 0;
 			}
 		}

-		DRM_ERROR("No more free UVD(%d) handles!\n", ip_instance);
+		DRM_ERROR("No more free UVD handles!\n");
 		return -ENOSPC;

 	case 1:
@ -732,27 +752,27 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,

 		/* validate the handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i) {
-			if (atomic_read(&adev->uvd.inst[ip_instance].handles[i]) == handle) {
-				if (adev->uvd.inst[ip_instance].filp[i] != ctx->parser->filp) {
-					DRM_ERROR("UVD(%d) handle collision detected!\n", ip_instance);
+			if (atomic_read(&adev->uvd.handles[i]) == handle) {
+				if (adev->uvd.filp[i] != ctx->parser->filp) {
+					DRM_ERROR("UVD handle collision detected!\n");
 					return -EINVAL;
 				}
 				return 0;
 			}
 		}

-		DRM_ERROR("Invalid UVD(%d) handle 0x%x!\n", ip_instance, handle);
+		DRM_ERROR("Invalid UVD handle 0x%x!\n", handle);
 		return -ENOENT;

 	case 2:
 		/* it's a destroy msg, free the handle */
 		for (i = 0; i < adev->uvd.max_handles; ++i)
-			atomic_cmpxchg(&adev->uvd.inst[ip_instance].handles[i], handle, 0);
+			atomic_cmpxchg(&adev->uvd.handles[i], handle, 0);
 		amdgpu_bo_kunmap(bo);
 		return 0;

 	default:
-		DRM_ERROR("Illegal UVD(%d) message type (%d)!\n", ip_instance, msg_type);
+		DRM_ERROR("Illegal UVD message type (%d)!\n", msg_type);
 		return -EINVAL;
 	}
 	BUG();
@ -1000,7 +1020,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 	if (!ring->adev->uvd.address_64_bit) {
 		struct ttm_operation_ctx ctx = { true, false };

-		amdgpu_ttm_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
+		amdgpu_bo_placement_from_domain(bo, AMDGPU_GEM_DOMAIN_VRAM);
 		amdgpu_uvd_force_into_uvd_segment(bo);
 		r = ttm_bo_validate(&bo->tbo, &bo->placement, &ctx);
 		if (r)
@ -1045,19 +1065,16 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,
 		if (r < 0)
 			goto err_free;

-		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
-		job->fence = dma_fence_get(f);
+		r = amdgpu_job_submit_direct(job, ring, &f);
 		if (r)
 			goto err_free;
-
-		amdgpu_job_free(job);
 	} else {
 		r = amdgpu_sync_resv(adev, &job->sync, bo->tbo.resv,
 				     AMDGPU_FENCE_OWNER_UNDEFINED, false);
 		if (r)
 			goto err_free;

-		r = amdgpu_job_submit(job, ring, &adev->uvd.inst[ring->me].entity,
+		r = amdgpu_job_submit(job, &adev->uvd.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
 		if (r)
 			goto err_free;
@ -1149,6 +1166,8 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 	unsigned fences = 0, i, j;

 	for (i = 0; i < adev->uvd.num_uvd_inst; ++i) {
+		if (adev->uvd.harvest_config & (1 << i))
+			continue;
 		fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring);
 		for (j = 0; j < adev->uvd.num_enc_rings; ++j) {
 			fences += amdgpu_fence_count_emitted(&adev->uvd.inst[i].ring_enc[j]);
@ -1259,7 +1278,7 @@ uint32_t amdgpu_uvd_used_handles(struct amdgpu_device *adev)
 		 * necessarily linear. So we need to count
 		 * all non-zero handles.
 		 */
-		if (atomic_read(&adev->uvd.inst->handles[i]))
+		if (atomic_read(&adev->uvd.handles[i]))
 			used_handles++;
 	}

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@ -42,26 +42,29 @@ struct amdgpu_uvd_inst {
 	void			*cpu_addr;
 	uint64_t		gpu_addr;
 	void			*saved_bo;
-	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
-	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
 	struct amdgpu_ring	ring;
 	struct amdgpu_ring	ring_enc[AMDGPU_MAX_UVD_ENC_RINGS];
 	struct amdgpu_irq_src	irq;
-	struct drm_sched_entity entity;
-	struct drm_sched_entity entity_enc;
 	uint32_t                srbm_soft_reset;
 };

+#define AMDGPU_UVD_HARVEST_UVD0 (1 << 0)
+#define AMDGPU_UVD_HARVEST_UVD1 (1 << 1)
+
 struct amdgpu_uvd {
 	const struct firmware	*fw;	/* UVD firmware */
 	unsigned		fw_version;
 	unsigned		max_handles;
 	unsigned		num_enc_rings;
-	uint8_t		num_uvd_inst;
+	uint8_t			num_uvd_inst;
 	bool			address_64_bit;
 	bool			use_ctx_buf;
-	struct amdgpu_uvd_inst		inst[AMDGPU_MAX_UVD_INSTANCES];
+	struct amdgpu_uvd_inst	inst[AMDGPU_MAX_UVD_INSTANCES];
+	struct drm_file		*filp[AMDGPU_MAX_UVD_HANDLES];
+	atomic_t		handles[AMDGPU_MAX_UVD_HANDLES];
+	struct drm_sched_entity entity;
 	struct delayed_work	idle_work;
+	unsigned		harvest_config;
 };

 int amdgpu_uvd_sw_init(struct amdgpu_device *adev);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@ -40,11 +40,11 @@

 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
-#define FIRMWARE_BONAIRE	"radeon/bonaire_vce.bin"
-#define FIRMWARE_KABINI	"radeon/kabini_vce.bin"
-#define FIRMWARE_KAVERI	"radeon/kaveri_vce.bin"
-#define FIRMWARE_HAWAII	"radeon/hawaii_vce.bin"
-#define FIRMWARE_MULLINS	"radeon/mullins_vce.bin"
+#define FIRMWARE_BONAIRE	"amdgpu/bonaire_vce.bin"
+#define FIRMWARE_KABINI	"amdgpu/kabini_vce.bin"
+#define FIRMWARE_KAVERI	"amdgpu/kaveri_vce.bin"
+#define FIRMWARE_HAWAII	"amdgpu/hawaii_vce.bin"
+#define FIRMWARE_MULLINS	"amdgpu/mullins_vce.bin"
 #endif
 #define FIRMWARE_TONGA		"amdgpu/tonga_vce.bin"
 #define FIRMWARE_CARRIZO	"amdgpu/carrizo_vce.bin"
@ -190,8 +190,7 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)

 	ring = &adev->vce.ring[0];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_NORMAL];
-	r = drm_sched_entity_init(&ring->sched, &adev->vce.entity,
-				  rq, NULL);
+	r = drm_sched_entity_init(&adev->vce.entity, &rq, 1, NULL);
 	if (r != 0) {
 		DRM_ERROR("Failed setting up VCE run queue.\n");
 		return r;
@ -222,7 +221,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	if (adev->vce.vcpu_bo == NULL)
 		return 0;

-	drm_sched_entity_fini(&adev->vce.ring[0].sched, &adev->vce.entity);
+	drm_sched_entity_destroy(&adev->vce.entity);

 	amdgpu_bo_free_kernel(&adev->vce.vcpu_bo, &adev->vce.gpu_addr,
 		(void **)&adev->vce.cpu_addr);
@ -470,12 +469,10 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;

-	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
-	job->fence = dma_fence_get(f);
+	r = amdgpu_job_submit_direct(job, ring, &f);
 	if (r)
 		goto err;

-	amdgpu_job_free(job);
 	if (fence)
 		*fence = dma_fence_get(f);
 	dma_fence_put(f);
@ -532,19 +529,13 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;

-	if (direct) {
-		r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
-		job->fence = dma_fence_get(f);
-		if (r)
-			goto err;
-
-		amdgpu_job_free(job);
-	} else {
-		r = amdgpu_job_submit(job, ring, &ring->adev->vce.entity,
+	if (direct)
+		r = amdgpu_job_submit_direct(job, ring, &f);
+	else
+		r = amdgpu_job_submit(job, &ring->adev->vce.entity,
 				      AMDGPU_FENCE_OWNER_UNDEFINED, &f);
-		if (r)
-			goto err;
-	}
+	if (r)
+		goto err;

 	if (fence)
 		*fence = dma_fence_get(f);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.c
@ -140,6 +140,8 @@ int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
 	for (i = 0; i < adev->vcn.num_enc_rings; ++i)
 		amdgpu_ring_fini(&adev->vcn.ring_enc[i]);

+	amdgpu_ring_fini(&adev->vcn.ring_jpeg);
+
 	release_firmware(adev->vcn.fw);

 	return 0;
@ -209,6 +211,8 @@ static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
 		fences += amdgpu_fence_count_emitted(&adev->vcn.ring_enc[i]);
 	}

+	fences += amdgpu_fence_count_emitted(&adev->vcn.ring_jpeg);
+
 	if (fences == 0) {
 		if (adev->pm.dpm_enabled)
 			amdgpu_dpm_enable_uvd(adev, false);
@ -225,7 +229,7 @@ void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
 	struct amdgpu_device *adev = ring->adev;
 	bool set_clocks = !cancel_delayed_work_sync(&adev->vcn.idle_work);

-	if (set_clocks && adev->pm.dpm_enabled) {
+	if (set_clocks) {
 		if (adev->pm.dpm_enabled)
 			amdgpu_dpm_enable_uvd(adev, true);
 		else
@ -304,13 +308,10 @@ static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
 	}
 	ib->length_dw = 16;

-	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
-	job->fence = dma_fence_get(f);
+	r = amdgpu_job_submit_direct(job, ring, &f);
 	if (r)
 		goto err_free;

-	amdgpu_job_free(job);
-
 	amdgpu_bo_fence(bo, f, false);
 	amdgpu_bo_unreserve(bo);
 	amdgpu_bo_unref(&bo);
@ -495,12 +496,10 @@ static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t hand
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;

-	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
-	job->fence = dma_fence_get(f);
+	r = amdgpu_job_submit_direct(job, ring, &f);
 	if (r)
 		goto err;

-	amdgpu_job_free(job);
 	if (fence)
 		*fence = dma_fence_get(f);
 	dma_fence_put(f);
@ -549,12 +548,10 @@ static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t han
 	for (i = ib->length_dw; i < ib_size_dw; ++i)
 		ib->ptr[i] = 0x0;

-	r = amdgpu_ib_schedule(ring, 1, ib, NULL, &f);
-	job->fence = dma_fence_get(f);
+	r = amdgpu_job_submit_direct(job, ring, &f);
 	if (r)
 		goto err;

-	amdgpu_job_free(job);
 	if (fence)
 		*fence = dma_fence_get(f);
 	dma_fence_put(f);
@ -597,3 +594,127 @@ error:
 	dma_fence_put(fence);
 	return r;
 }
+
+int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t tmp = 0;
+	unsigned i;
+	int r;
+
+	WREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0xCAFEDEAD);
+	r = amdgpu_ring_alloc(ring, 3);
+
+	if (r) {
+		DRM_ERROR("amdgpu: cp failed to lock ring %d (%d).\n",
+				  ring->idx, r);
+		return r;
+	}
+
+	amdgpu_ring_write(ring,
+		PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID), 0, 0, 0));
+	amdgpu_ring_write(ring, 0xDEADBEEF);
+	amdgpu_ring_commit(ring);
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_CONTEXT_ID));
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < adev->usec_timeout) {
+		DRM_DEBUG("ring test on %d succeeded in %d usecs\n",
+				  ring->idx, i);
+	} else {
+		DRM_ERROR("amdgpu: ring %d test failed (0x%08X)\n",
+				  ring->idx, tmp);
+		r = -EINVAL;
+	}
+
+	return r;
+}
+
+static int amdgpu_vcn_jpeg_set_reg(struct amdgpu_ring *ring, uint32_t handle,
+		struct dma_fence **fence)
+{
+	struct amdgpu_device *adev = ring->adev;
+	struct amdgpu_job *job;
+	struct amdgpu_ib *ib;
+	struct dma_fence *f = NULL;
+	const unsigned ib_size_dw = 16;
+	int i, r;
+
+	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
+	if (r)
+		return r;
+
+	ib = &job->ibs[0];
+
+	ib->ptr[0] = PACKETJ(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH), 0, 0, PACKETJ_TYPE0);
+	ib->ptr[1] = 0xDEADBEEF;
+	for (i = 2; i < 16; i += 2) {
+		ib->ptr[i] = PACKETJ(0, 0, 0, PACKETJ_TYPE6);
+		ib->ptr[i+1] = 0;
+	}
+	ib->length_dw = 16;
+
+	r = amdgpu_job_submit_direct(job, ring, &f);
+	if (r)
+		goto err;
+
+	if (fence)
+		*fence = dma_fence_get(f);
+	dma_fence_put(f);
+
+	return 0;
+
+err:
+	amdgpu_job_free(job);
+	return r;
+}
+
+int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct amdgpu_device *adev = ring->adev;
+	uint32_t tmp = 0;
+	unsigned i;
+	struct dma_fence *fence = NULL;
+	long r = 0;
+
+	r = amdgpu_vcn_jpeg_set_reg(ring, 1, &fence);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to set jpeg register (%ld).\n", r);
+		goto error;
+	}
+
+	r = dma_fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+		goto error;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+		goto error;
+	} else
+		r = 0;
+
+	for (i = 0; i < adev->usec_timeout; i++) {
+		tmp = RREG32(SOC15_REG_OFFSET(UVD, 0, mmUVD_JPEG_PITCH));
+		if (tmp == 0xDEADBEEF)
+			break;
+		DRM_UDELAY(1);
+	}
+
+	if (i < adev->usec_timeout)
+		DRM_DEBUG("ib test on ring %d succeeded\n", ring->idx);
+	else {
+		DRM_ERROR("ib test failed (0x%08X)\n", tmp);
+		r = -EINVAL;
+	}
+
+	dma_fence_put(fence);
+
+error:
+	return r;
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vcn.h
@ -66,6 +66,7 @@ struct amdgpu_vcn {
 	const struct firmware	*fw;	/* VCN firmware */
 	struct amdgpu_ring	ring_dec;
 	struct amdgpu_ring	ring_enc[AMDGPU_VCN_MAX_ENC_RINGS];
+	struct amdgpu_ring	ring_jpeg;
 	struct amdgpu_irq_src	irq;
 	unsigned		num_enc_rings;
 };
@ -83,4 +84,7 @@ int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout);
 int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring);
 int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout);

+int amdgpu_vcn_jpeg_ring_test_ring(struct amdgpu_ring *ring);
+int amdgpu_vcn_jpeg_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+
 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@ -33,9 +33,11 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"
 #include "amdgpu_amdkfd.h"
+#include "amdgpu_gmc.h"

-/*
- * GPUVM
+/**
+ * DOC: GPUVM
+ *
 * GPUVM is similar to the legacy gart on older asics, however
 * rather than there being a single global gart table
 * for the entire GPU, there are multiple VM page tables active
@ -63,37 +65,84 @@ INTERVAL_TREE_DEFINE(struct amdgpu_bo_va_mapping, rb, uint64_t, __subtree_last,
 #undef START
 #undef LAST

-/* Local structure. Encapsulate some VM table update parameters to reduce
+/**
+ * struct amdgpu_pte_update_params - Local structure
+ *
+ * Encapsulate some VM table update parameters to reduce
 * the number of function parameters
+ *
 */
 struct amdgpu_pte_update_params {
-	/* amdgpu device we do this update for */
+
+	/**
+	 * @adev: amdgpu device we do this update for
+	 */
 	struct amdgpu_device *adev;
-	/* optional amdgpu_vm we do this update for */
+
+	/**
+	 * @vm: optional amdgpu_vm we do this update for
+	 */
 	struct amdgpu_vm *vm;
-	/* address where to copy page table entries from */
+
+	/**
+	 * @src: address where to copy page table entries from
+	 */
 	uint64_t src;
-	/* indirect buffer to fill with commands */
+
+	/**
+	 * @ib: indirect buffer to fill with commands
+	 */
 	struct amdgpu_ib *ib;
-	/* Function which actually does the update */
+
+	/**
+	 * @func: Function which actually does the update
+	 */
 	void (*func)(struct amdgpu_pte_update_params *params,
 		     struct amdgpu_bo *bo, uint64_t pe,
 		     uint64_t addr, unsigned count, uint32_t incr,
 		     uint64_t flags);
-	/* The next two are used during VM update by CPU
-	 *  DMA addresses to use for mapping
-	 *  Kernel pointer of PD/PT BO that needs to be updated
+	/**
+	 * @pages_addr:
+	 *
+	 * DMA addresses to use for mapping, used during VM update by CPU
 	 */
 	dma_addr_t *pages_addr;
+
+	/**
+	 * @kptr:
+	 *
+	 * Kernel pointer of PD/PT BO that needs to be updated,
+	 * used during VM update by CPU
+	 */
 	void *kptr;
 };

-/* Helper to disable partial resident texture feature from a fence callback */
+/**
+ * struct amdgpu_prt_cb - Helper to disable partial resident texture feature from a fence callback
+ */
 struct amdgpu_prt_cb {
+
+	/**
+	 * @adev: amdgpu device
+	 */
 	struct amdgpu_device *adev;
+
+	/**
+	 * @cb: callback
+	 */
 	struct dma_fence_cb cb;
 };

+/**
+ * amdgpu_vm_bo_base_init - Adds bo to the list of bos associated with the vm
+ *
+ * @base: base structure for tracking BO usage in a VM
+ * @vm: vm to which bo is to be added
+ * @bo: amdgpu buffer object
+ *
+ * Initialize a bo_va_base structure and add it to the appropriate lists
+ *
+ */
 static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 				   struct amdgpu_vm *vm,
 				   struct amdgpu_bo *bo)
@ -129,8 +178,10 @@ static void amdgpu_vm_bo_base_init(struct amdgpu_vm_bo_base *base,
 * amdgpu_vm_level_shift - return the addr shift for each level
 *
 * @adev: amdgpu_device pointer
+ * @level: VMPT level
 *
- * Returns the number of bits the pfn needs to be right shifted for a level.
+ * Returns:
+ * The number of bits the pfn needs to be right shifted for a level.
 */
 static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
 				      unsigned level)
@ -158,8 +209,10 @@ static unsigned amdgpu_vm_level_shift(struct amdgpu_device *adev,
 * amdgpu_vm_num_entries - return the number of entries in a PD/PT
 *
 * @adev: amdgpu_device pointer
+ * @level: VMPT level
 *
- * Calculate the number of entries in a page directory or page table.
+ * Returns:
+ * The number of entries in a page directory or page table.
 */
 static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 				      unsigned level)
@ -182,8 +235,10 @@ static unsigned amdgpu_vm_num_entries(struct amdgpu_device *adev,
 * amdgpu_vm_bo_size - returns the size of the BOs in bytes
 *
 * @adev: amdgpu_device pointer
+ * @level: VMPT level
 *
- * Calculate the size of the BO for a page directory or page table in bytes.
+ * Returns:
+ * The size of the BO for a page directory or page table in bytes.
 */
 static unsigned amdgpu_vm_bo_size(struct amdgpu_device *adev, unsigned level)
 {
@ -221,6 +276,9 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 * @param: parameter for the validation callback
 *
 * Validate the page table BOs on command submission if neccessary.
+ *
+ * Returns:
+ * Validation result.
 */
 int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			      int (*validate)(void *p, struct amdgpu_bo *bo),
@ -276,6 +334,9 @@ int amdgpu_vm_validate_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 * @vm: VM to check
 *
 * Check if all VM PDs/PTs are ready for updates
+ *
+ * Returns:
+ * True if eviction list is empty.
 */
 bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 {
@ -286,10 +347,15 @@ bool amdgpu_vm_ready(struct amdgpu_vm *vm)
 * amdgpu_vm_clear_bo - initially clear the PDs/PTs
 *
 * @adev: amdgpu_device pointer
+ * @vm: VM to clear BO from
 * @bo: BO to clear
 * @level: level this BO is at
+ * @pte_support_ats: indicate ATS support from PTE
 *
 * Root PD needs to be reserved when calling this.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
 */
 static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 			      struct amdgpu_vm *vm, struct amdgpu_bo *bo,
@ -321,7 +387,7 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 		ats_entries = 0;
 	}

-	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);

 	r = reservation_object_reserve_shared(bo->tbo.resv);
 	if (r)
@ -359,8 +425,8 @@ static int amdgpu_vm_clear_bo(struct amdgpu_device *adev,
 	if (r)
 		goto error_free;

-	r = amdgpu_job_submit(job, ring, &vm->entity,
-			      AMDGPU_FENCE_OWNER_UNDEFINED, &fence);
+	r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_UNDEFINED,
+			      &fence);
 	if (r)
 		goto error_free;

@ -385,10 +451,16 @@ error:
 *
 * @adev: amdgpu_device pointer
 * @vm: requested vm
+ * @parent: parent PT
 * @saddr: start of the address range
 * @eaddr: end of the address range
+ * @level: VMPT level
+ * @ats: indicate ATS support from PTE
 *
 * Make sure the page directories and page tables are allocated
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
 */
 static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm,
@ -423,11 +495,12 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 	eaddr = eaddr & ((1 << shift) - 1);

 	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
+	if (vm->root.base.bo->shadow)
+		flags |= AMDGPU_GEM_CREATE_SHADOW;
 	if (vm->use_cpu_for_update)
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
 	else
-		flags |= (AMDGPU_GEM_CREATE_NO_CPU_ACCESS |
-				AMDGPU_GEM_CREATE_SHADOW);
+		flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS;

 	/* walk over the address space and allocate the page tables */
 	for (pt_idx = from; pt_idx <= to; ++pt_idx) {
@ -496,6 +569,9 @@ static int amdgpu_vm_alloc_levels(struct amdgpu_device *adev,
 * @size: Size from start address we need.
 *
 * Make sure the page tables are allocated.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
 */
 int amdgpu_vm_alloc_pts(struct amdgpu_device *adev,
 			struct amdgpu_vm *vm,
@ -561,6 +637,15 @@ void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev)
 	}
 }

+/**
+ * amdgpu_vm_need_pipeline_sync - Check if pipe sync is needed for job.
+ *
+ * @ring: ring on which the job will be submitted
+ * @job: job to submit
+ *
+ * Returns:
+ * True if sync is needed.
+ */
 bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 				  struct amdgpu_job *job)
 {
@ -588,19 +673,17 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 	return vm_flush_needed || gds_switch_needed;
 }

-static bool amdgpu_vm_is_large_bar(struct amdgpu_device *adev)
-{
-	return (adev->gmc.real_vram_size == adev->gmc.visible_vram_size);
-}
-
 /**
 * amdgpu_vm_flush - hardware flush the vm
 *
 * @ring: ring to use for flush
- * @vmid: vmid number to use
- * @pd_addr: address of the page directory
+ * @job:  related job
+ * @need_pipe_sync: is pipe sync needed
 *
 * Emit a VM flush when it is necessary.
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
 */
 int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_pipe_sync)
 {
@ -708,6 +791,9 @@ int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job, bool need_
 * Returns the found bo_va or NULL if none is found
 *
 * Object has to be reserved!
+ *
+ * Returns:
+ * Found bo_va or NULL.
 */
 struct amdgpu_bo_va *amdgpu_vm_bo_find(struct amdgpu_vm *vm,
 				       struct amdgpu_bo *bo)
@ -789,7 +875,10 @@ static void amdgpu_vm_do_copy_ptes(struct amdgpu_pte_update_params *params,
 * @addr: the unmapped addr
 *
 * Look up the physical address of the page that the pte resolves
- * to and return the pointer for the page table entry.
+ * to.
+ *
+ * Returns:
+ * The pointer for the page table entry.
 */
 static uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr)
 {
@ -842,6 +931,17 @@ static void amdgpu_vm_cpu_set_ptes(struct amdgpu_pte_update_params *params,
 	}
 }

+
+/**
+ * amdgpu_vm_wait_pd - Wait for PT BOs to be free.
+ *
+ * @adev: amdgpu_device pointer
+ * @vm: related vm
+ * @owner: fence owner
+ *
+ * Returns:
+ * 0 on success, errno otherwise.
+ */
 static int amdgpu_vm_wait_pd(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 			     void *owner)
 {
@ -895,7 +995,10 @@ static void amdgpu_vm_update_pde(struct amdgpu_pte_update_params *params,
 /*
 * amdgpu_vm_invalidate_level - mark all PD levels as invalid
 *
+ * @adev: amdgpu_device pointer
+ * @vm: related vm
 * @parent: parent PD
+ * @level: VMPT level
 *
 * Mark all PD level as invalid after an error.
 */
@ -930,7 +1033,9 @@ static void amdgpu_vm_invalidate_level(struct amdgpu_device *adev,
 * @vm: requested vm
 *
 * Makes sure all directories are up to date.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
 */
 int amdgpu_vm_update_directories(struct amdgpu_device *adev,
 				 struct amdgpu_vm *vm)
@ -980,7 +1085,7 @@ restart:
 					   struct amdgpu_vm_bo_base,
 					   vm_status);
 		bo_base->moved = false;
-		list_move(&bo_base->vm_status, &vm->idle);
+		list_del_init(&bo_base->vm_status);

 		bo = bo_base->bo->parent;
 		if (!bo)
@ -1009,15 +1114,15 @@ restart:
 		struct amdgpu_ring *ring;
 		struct dma_fence *fence;

-		ring = container_of(vm->entity.sched, struct amdgpu_ring,
+		ring = container_of(vm->entity.rq->sched, struct amdgpu_ring,
 				    sched);

 		amdgpu_ring_pad_ib(ring, params.ib);
 		amdgpu_sync_resv(adev, &job->sync, root->tbo.resv,
 				 AMDGPU_FENCE_OWNER_VM, false);
 		WARN_ON(params.ib->length_dw > ndw);
-		r = amdgpu_job_submit(job, ring, &vm->entity,
-				      AMDGPU_FENCE_OWNER_VM, &fence);
+		r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM,
+				      &fence);
 		if (r)
 			goto error;

@ -1117,14 +1222,15 @@ static void amdgpu_vm_handle_huge_pages(struct amdgpu_pte_update_params *p,
 * amdgpu_vm_update_ptes - make sure that page tables are valid
 *
 * @params: see amdgpu_pte_update_params definition
- * @vm: requested vm
 * @start: start of GPU address range
 * @end: end of GPU address range
 * @dst: destination address to map to, the next dst inside the function
 * @flags: mapping flags
 *
 * Update the page tables in the range @start - @end.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
 */
 static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 				  uint64_t start, uint64_t end,
@ -1178,7 +1284,9 @@ static int amdgpu_vm_update_ptes(struct amdgpu_pte_update_params *params,
 * @end: last PTE to handle
 * @dst: addr those PTEs should point to
 * @flags: hw mapping flags
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
 */
 static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 				uint64_t start, uint64_t end,
@ -1250,7 +1358,9 @@ static int amdgpu_vm_frag_ptes(struct amdgpu_pte_update_params	*params,
 * @fence: optional resulting fence
 *
 * Fill in the page table entries between @start and @last.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
 */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 				       struct dma_fence *exclusive,
@ -1294,7 +1404,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 					   addr, flags);
 	}

-	ring = container_of(vm->entity.sched, struct amdgpu_ring, sched);
+	ring = container_of(vm->entity.rq->sched, struct amdgpu_ring, sched);

 	nptes = last - start + 1;

@ -1326,7 +1436,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
 		ndw += ncmds * 10;

 		/* extra commands for begin/end fragments */
-		ndw += 2 * 10 * adev->vm_manager.fragment_size;
+		if (vm->root.base.bo->shadow)
+		        ndw += 2 * 10 * adev->vm_manager.fragment_size * 2;
+		else
+		        ndw += 2 * 10 * adev->vm_manager.fragment_size;

 		params.func = amdgpu_vm_do_set_ptes;
 	}
@ -1373,8 +1486,7 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,

 	amdgpu_ring_pad_ib(ring, params.ib);
 	WARN_ON(params.ib->length_dw > ndw);
-	r = amdgpu_job_submit(job, ring, &vm->entity,
-			      AMDGPU_FENCE_OWNER_VM, &f);
+	r = amdgpu_job_submit(job, &vm->entity, AMDGPU_FENCE_OWNER_VM, &f);
 	if (r)
 		goto error_free;

@ -1402,7 +1514,9 @@ error_free:
 *
 * Split the mapping into smaller chunks so that each update fits
 * into a SDMA IB.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
 */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				      struct dma_fence *exclusive,
@ -1455,7 +1569,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		if (nodes) {
 			addr = nodes->start << PAGE_SHIFT;
 			max_entries = (nodes->size - pfn) *
-				(PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+				AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 		} else {
 			addr = 0;
 			max_entries = S64_MAX;
@ -1466,7 +1580,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,

 			max_entries = min(max_entries, 16ull * 1024ull);
 			for (count = 1;
-			     count < max_entries / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+			     count < max_entries / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 			     ++count) {
 				uint64_t idx = pfn + count;

@ -1480,7 +1594,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 				dma_addr = pages_addr;
 			} else {
 				addr = pages_addr[pfn];
-				max_entries = count * (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+				max_entries = count * AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 			}

 		} else if (flags & AMDGPU_PTE_VALID) {
@ -1495,7 +1609,7 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		if (r)
 			return r;

-		pfn += (last - start + 1) / (PAGE_SIZE / AMDGPU_GPU_PAGE_SIZE);
+		pfn += (last - start + 1) / AMDGPU_GPU_PAGES_IN_CPU_PAGE;
 		if (nodes && nodes->size == pfn) {
 			pfn = 0;
 			++nodes;
@ -1515,7 +1629,9 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 * @clear: if true clear the entries
 *
 * Fill in the page table entries for @bo_va.
- * Returns 0 for success, -EINVAL for failure.
+ *
+ * Returns:
+ * 0 for success, -EINVAL for failure.
 */
 int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 			struct amdgpu_bo_va *bo_va,
@ -1531,18 +1647,17 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	uint64_t flags;
 	int r;

-	if (clear || !bo_va->base.bo) {
+	if (clear || !bo) {
 		mem = NULL;
 		nodes = NULL;
 		exclusive = NULL;
 	} else {
 		struct ttm_dma_tt *ttm;

-		mem = &bo_va->base.bo->tbo.mem;
+		mem = &bo->tbo.mem;
 		nodes = mem->mm_node;
 		if (mem->mem_type == TTM_PL_TT) {
-			ttm = container_of(bo_va->base.bo->tbo.ttm,
-					   struct ttm_dma_tt, ttm);
+			ttm = container_of(bo->tbo.ttm, struct ttm_dma_tt, ttm);
 			pages_addr = ttm->dma_address;
 		}
 		exclusive = reservation_object_get_excl(bo->tbo.resv);
@ -1610,6 +1725,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,

 /**
 * amdgpu_vm_update_prt_state - update the global PRT state
+ *
+ * @adev: amdgpu_device pointer
 */
 static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)
 {
@ -1624,6 +1741,8 @@ static void amdgpu_vm_update_prt_state(struct amdgpu_device *adev)

 /**
 * amdgpu_vm_prt_get - add a PRT user
+ *
+ * @adev: amdgpu_device pointer
 */
 static void amdgpu_vm_prt_get(struct amdgpu_device *adev)
 {
@ -1636,6 +1755,8 @@ static void amdgpu_vm_prt_get(struct amdgpu_device *adev)

 /**
 * amdgpu_vm_prt_put - drop a PRT user
+ *
+ * @adev: amdgpu_device pointer
 */
 static void amdgpu_vm_prt_put(struct amdgpu_device *adev)
 {
@ -1645,6 +1766,9 @@ static void amdgpu_vm_prt_put(struct amdgpu_device *adev)

 /**
 * amdgpu_vm_prt_cb - callback for updating the PRT status
+ *
+ * @fence: fence for the callback
+ * @_cb: the callback function
 */
 static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)
 {
@ -1656,6 +1780,9 @@ static void amdgpu_vm_prt_cb(struct dma_fence *fence, struct dma_fence_cb *_cb)

 /**
 * amdgpu_vm_add_prt_cb - add callback for updating the PRT status
+ *
+ * @adev: amdgpu_device pointer
+ * @fence: fence for the callback
 */
 static void amdgpu_vm_add_prt_cb(struct amdgpu_device *adev,
 				 struct dma_fence *fence)
@ -1747,9 +1874,11 @@ static void amdgpu_vm_prt_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 * or if an error occurred)
 *
 * Make sure all freed BOs are cleared in the PT.
- * Returns 0 for success.
- *
 * PTs have to be reserved and mutex must be locked!
+ *
+ * Returns:
+ * 0 for success.
+ *
 */
 int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			  struct amdgpu_vm *vm,
@ -1794,10 +1923,11 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 *
 * @adev: amdgpu_device pointer
 * @vm: requested vm
- * @sync: sync object to add fences to
 *
 * Make sure all BOs which are moved are updated in the PTs.
- * Returns 0 for success.
+ *
+ * Returns:
+ * 0 for success.
 *
 * PTs have to be reserved!
 */
@ -1852,7 +1982,9 @@ int amdgpu_vm_handle_moved(struct amdgpu_device *adev,
 *
 * Add @bo into the requested vm.
 * Add @bo to the list of bos associated with the vm
- * Returns newly added bo_va or NULL for failure
+ *
+ * Returns:
+ * Newly added bo_va or NULL for failure
 *
 * Object has to be reserved!
 */
@ -1915,10 +2047,13 @@ static void amdgpu_vm_bo_insert_map(struct amdgpu_device *adev,
 * @bo_va: bo_va to store the address
 * @saddr: where to map the BO
 * @offset: requested offset in the BO
+ * @size: BO size in bytes
 * @flags: attributes of pages (read/write/valid/etc.)
 *
 * Add a mapping of the BO at the specefied addr into the VM.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
 *
 * Object has to be reserved and unreserved outside!
 */
@ -1976,11 +2111,14 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
 * @bo_va: bo_va to store the address
 * @saddr: where to map the BO
 * @offset: requested offset in the BO
+ * @size: BO size in bytes
 * @flags: attributes of pages (read/write/valid/etc.)
 *
 * Add a mapping of the BO at the specefied addr into the VM. Replace existing
 * mappings as we do so.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
 *
 * Object has to be reserved and unreserved outside!
 */
@ -2037,7 +2175,9 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
 * @saddr: where to the BO is mapped
 *
 * Remove a mapping of the BO at the specefied addr from the VM.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
 *
 * Object has to be reserved and unreserved outside!
 */
@ -2091,7 +2231,9 @@ int amdgpu_vm_bo_unmap(struct amdgpu_device *adev,
 * @size: size of the range
 *
 * Remove all mappings in a range, split them as appropriate.
- * Returns 0 for success, error for failure.
+ *
+ * Returns:
+ * 0 for success, error for failure.
 */
 int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 				struct amdgpu_vm *vm,
@ -2188,8 +2330,13 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 * amdgpu_vm_bo_lookup_mapping - find mapping by address
 *
 * @vm: the requested VM
+ * @addr: the address
 *
 * Find a mapping by it's address.
+ *
+ * Returns:
+ * The amdgpu_bo_va_mapping matching for addr or NULL
+ *
 */
 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
 							 uint64_t addr)
@ -2197,6 +2344,35 @@ struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
 	return amdgpu_vm_it_iter_first(&vm->va, addr, addr);
 }

+/**
+ * amdgpu_vm_bo_trace_cs - trace all reserved mappings
+ *
+ * @vm: the requested vm
+ * @ticket: CS ticket
+ *
+ * Trace all mappings of BOs reserved during a command submission.
+ */
+void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket)
+{
+	struct amdgpu_bo_va_mapping *mapping;
+
+	if (!trace_amdgpu_vm_bo_cs_enabled())
+		return;
+
+	for (mapping = amdgpu_vm_it_iter_first(&vm->va, 0, U64_MAX); mapping;
+	     mapping = amdgpu_vm_it_iter_next(mapping, 0, U64_MAX)) {
+		if (mapping->bo_va && mapping->bo_va->base.bo) {
+			struct amdgpu_bo *bo;
+
+			bo = mapping->bo_va->base.bo;
+			if (READ_ONCE(bo->tbo.resv->lock.ctx) != ticket)
+				continue;
+		}
+
+		trace_amdgpu_vm_bo_cs(mapping);
+	}
+}
+
 /**
 * amdgpu_vm_bo_rmv - remove a bo to a specific vm
 *
@ -2241,8 +2417,8 @@ void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 * amdgpu_vm_bo_invalidate - mark the bo as invalid
 *
 * @adev: amdgpu_device pointer
- * @vm: requested vm
 * @bo: amdgpu buffer object
+ * @evicted: is the BO evicted
 *
 * Mark @bo as invalid.
 */
@ -2282,6 +2458,14 @@ void amdgpu_vm_bo_invalidate(struct amdgpu_device *adev,
 	}
 }

+/**
+ * amdgpu_vm_get_block_size - calculate VM page table size as power of two
+ *
+ * @vm_size: VM size
+ *
+ * Returns:
+ * VM page table as power of two
+ */
 static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
 {
 	/* Total bits covered by PD + PTs */
@ -2300,6 +2484,10 @@ static uint32_t amdgpu_vm_get_block_size(uint64_t vm_size)
 *
 * @adev: amdgpu_device pointer
 * @vm_size: the default vm size if it's set auto
+ * @fragment_size_default: Default PTE fragment size
+ * @max_level: max VMPT level
+ * @max_bits: max address space size in bits
+ *
 */
 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
 			   uint32_t fragment_size_default, unsigned max_level,
@ -2367,8 +2555,12 @@ void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
 * @adev: amdgpu_device pointer
 * @vm: requested vm
 * @vm_context: Indicates if it GFX or Compute context
+ * @pasid: Process address space identifier
 *
 * Init @vm fields.
+ *
+ * Returns:
+ * 0 for success, error for failure.
 */
 int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 		   int vm_context, unsigned int pasid)
@ -2400,8 +2592,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	ring_instance %= adev->vm_manager.vm_pte_num_rings;
 	ring = adev->vm_manager.vm_pte_rings[ring_instance];
 	rq = &ring->sched.sched_rq[DRM_SCHED_PRIORITY_KERNEL];
-	r = drm_sched_entity_init(&ring->sched, &vm->entity,
-				  rq, NULL);
+	r = drm_sched_entity_init(&vm->entity, &rq, 1, NULL);
 	if (r)
 		return r;

@ -2419,14 +2610,14 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm,
 	}
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");
 	vm->last_update = NULL;

 	flags = AMDGPU_GEM_CREATE_VRAM_CONTIGUOUS;
 	if (vm->use_cpu_for_update)
 		flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED;
-	else
+	else if (vm_context != AMDGPU_VM_CONTEXT_COMPUTE)
 		flags |= AMDGPU_GEM_CREATE_SHADOW;

 	size = amdgpu_vm_bo_size(adev, adev->vm_manager.root_level);
@ -2481,7 +2672,7 @@ error_free_root:
 	vm->root.base.bo = NULL;

 error_free_sched_entity:
-	drm_sched_entity_fini(&ring->sched, &vm->entity);
+	drm_sched_entity_destroy(&vm->entity);

 	return r;
 }
@ -2489,6 +2680,9 @@ error_free_sched_entity:
 /**
 * amdgpu_vm_make_compute - Turn a GFX VM into a compute VM
 *
+ * @adev: amdgpu_device pointer
+ * @vm: requested vm
+ *
 * This only works on GFX VMs that don't have any BOs added and no
 * page tables allocated yet.
 *
@ -2498,10 +2692,10 @@ error_free_sched_entity:
 * - pasid (old PASID is released, because compute manages its own PASIDs)
 *
 * Reinitializes the page directory to reflect the changed ATS
- * setting. May leave behind an unused shadow BO for the page
- * directory when switching from SDMA updates to CPU updates.
+ * setting.
 *
- * Returns 0 for success, -errno for errors.
+ * Returns:
+ * 0 for success, -errno for errors.
 */
 int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 {
@ -2535,7 +2729,7 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	vm->pte_support_ats = pte_support_ats;
 	DRM_DEBUG_DRIVER("VM update mode is %s\n",
 			 vm->use_cpu_for_update ? "CPU" : "SDMA");
-	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_vm_is_large_bar(adev)),
+	WARN_ONCE((vm->use_cpu_for_update & !amdgpu_gmc_vram_full_visible(&adev->gmc)),
 		  "CPU update of VM recommended only for large BAR system\n");

 	if (vm->pasid) {
@ -2548,6 +2742,9 @@ int amdgpu_vm_make_compute(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		vm->pasid = 0;
 	}

+	/* Free the shadow bo for compute VM */
+	amdgpu_bo_unref(&vm->root.base.bo->shadow);
+
 error:
 	amdgpu_bo_unreserve(vm->root.base.bo);
 	return r;
@ -2614,7 +2811,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 		spin_unlock_irqrestore(&adev->vm_manager.pasid_lock, flags);
 	}

-	drm_sched_entity_fini(vm->entity.sched, &vm->entity);
+	drm_sched_entity_destroy(&vm->entity);

 	if (!RB_EMPTY_ROOT(&vm->va.rb_root)) {
 		dev_err(adev->dev, "still active bo inside vm\n");
@ -2656,8 +2853,10 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 * @adev: amdgpu_device pointer
 * @pasid: PASID do identify the VM
 *
- * This function is expected to be called in interrupt context. Returns
- * true if there was fault credit, false otherwise
+ * This function is expected to be called in interrupt context.
+ *
+ * Returns:
+ * True if there was fault credit, false otherwise
 */
 bool amdgpu_vm_pasid_fault_credit(struct amdgpu_device *adev,
 				  unsigned int pasid)
@ -2711,7 +2910,7 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 	 */
 #ifdef CONFIG_X86_64
 	if (amdgpu_vm_update_mode == -1) {
-		if (amdgpu_vm_is_large_bar(adev))
+		if (amdgpu_gmc_vram_full_visible(&adev->gmc))
 			adev->vm_manager.vm_update_mode =
 				AMDGPU_VM_USE_CPU_FOR_COMPUTE;
 		else
@ -2741,6 +2940,16 @@ void amdgpu_vm_manager_fini(struct amdgpu_device *adev)
 	amdgpu_vmid_mgr_fini(adev);
 }

+/**
+ * amdgpu_vm_ioctl - Manages VMID reservation for vm hubs.
+ *
+ * @dev: drm device pointer
+ * @data: drm_amdgpu_vm
+ * @filp: drm file pointer
+ *
+ * Returns:
+ * 0 for success, -errno for errors.
+ */
 int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 {
 	union drm_amdgpu_vm *args = data;
@ -2764,3 +2973,42 @@ int amdgpu_vm_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)

 	return 0;
 }
+
+/**
+ * amdgpu_vm_get_task_info - Extracts task info for a PASID.
+ *
+ * @dev: drm device pointer
+ * @pasid: PASID identifier for VM
+ * @task_info: task_info to fill.
+ */
+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+			 struct amdgpu_task_info *task_info)
+{
+	struct amdgpu_vm *vm;
+
+	spin_lock(&adev->vm_manager.pasid_lock);
+
+	vm = idr_find(&adev->vm_manager.pasid_idr, pasid);
+	if (vm)
+		*task_info = vm->task_info;
+
+	spin_unlock(&adev->vm_manager.pasid_lock);
+}
+
+/**
+ * amdgpu_vm_set_task_info - Sets VMs task info.
+ *
+ * @vm: vm for which to set the info
+ */
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm)
+{
+	if (!vm->task_info.pid) {
+		vm->task_info.pid = current->pid;
+		get_task_comm(vm->task_info.task_name, current);
+
+		if (current->group_leader->mm == current->mm) {
+			vm->task_info.tgid = current->group_leader->pid;
+			get_task_comm(vm->task_info.process_name, current->group_leader);
+		}
+	}
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.h
@ -164,6 +164,14 @@ struct amdgpu_vm_pt {
 #define AMDGPU_VM_FAULT_PASID(fault) ((u64)(fault) >> 48)
 #define AMDGPU_VM_FAULT_ADDR(fault)  ((u64)(fault) & 0xfffffffff000ULL)

+
+struct amdgpu_task_info {
+	char	process_name[TASK_COMM_LEN];
+	char	task_name[TASK_COMM_LEN];
+	pid_t	pid;
+	pid_t	tgid;
+};
+
 struct amdgpu_vm {
 	/* tree of virtual addresses mapped */
 	struct rb_root_cached	va;
@ -215,6 +223,9 @@ struct amdgpu_vm {

 	/* Valid while the PD is reserved or fenced */
 	uint64_t		pd_phys_addr;
+
+	/* Some basic info about the task */
+	struct amdgpu_task_info task_info;
 };

 struct amdgpu_vm_manager {
@ -307,6 +318,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
 				uint64_t saddr, uint64_t size);
 struct amdgpu_bo_va_mapping *amdgpu_vm_bo_lookup_mapping(struct amdgpu_vm *vm,
 							 uint64_t addr);
+void amdgpu_vm_bo_trace_cs(struct amdgpu_vm *vm, struct ww_acquire_ctx *ticket);
 void amdgpu_vm_bo_rmv(struct amdgpu_device *adev,
 		      struct amdgpu_bo_va *bo_va);
 void amdgpu_vm_adjust_size(struct amdgpu_device *adev, uint32_t vm_size,
@ -317,4 +329,9 @@ bool amdgpu_vm_need_pipeline_sync(struct amdgpu_ring *ring,
 				  struct amdgpu_job *job);
 void amdgpu_vm_check_compute_bug(struct amdgpu_device *adev);

+void amdgpu_vm_get_task_info(struct amdgpu_device *adev, unsigned int pasid,
+			 struct amdgpu_task_info *task_info);
+
+void amdgpu_vm_set_task_info(struct amdgpu_vm *vm);
+
 #endif
--- a/Show More
+++ b/Show More