Merge tag 'drm-for-v4.8' of git://people.freedesktop.org/~airlied/linux

Merge drm updates from Dave Airlie: "This is the main drm pull request for 4.8. I'm down with a cold at the moment so hopefully this isn't in too bad a state, I finished pulling stuff last week mostly (nouveau fixes just went in today), so only this message should be influenced by illness. Apologies to anyone who's major feature I missed :-) Core: Lockless GEM BO freeing Non-blocking atomic work Documentation changes (rst/sphinx) Prep for new fencing changes Simple display helpers Master/auth changes Register/unregister rework Loads of trivial patches/fixes. New stuff: ARM Mali display driver (not the 3D chip) sii902x RGB->HDMI bridge Panel: Support for new panels Improved backlight support Bridge: Convert ADV7511 to bridge driver ADV7533 support TC358767 (DSI/DPI to eDP) encoder chip support i915: BXT support enabled by default GVT-g infrastructure GuC command submission and fixes BXT workarounds SKL/BKL workarounds Demidlayering device registration Thundering herd fixes Missing pci ids Atomic updates amdgpu/radeon: ATPX improvements for better dGPU power control on PX systems New power features for CZ/BR/ST Pipelined BO moves and evictions in TTM GPU scheduler improvements GPU reset improvements Overclocking on dGPUs with amdgpu Polaris powermanagement enabled nouveau: GK20A/GM20B volt and clock improvements. Initial support for GP100/GP104 GPUs, GP104 will not yet support acceleration due to NVIDIA having not released firmware for them as of yet. exynos: Exynos5433 SoC with IOMMU support. vc4: Shader validation for branching imx-drm: Atomic mode setting conversion Reworked DMFC FIFO allocation External bridge support analogix-dp: RK3399 eDP support Lots of fixes. rockchip: Lots of small fixes. msm: DT bindings cleanups Shrinker and madvise support ASoC HDMI codec support tegra: Host1x driver cleanups SOR reworking for DP support Runtime PM support omapdrm: PLL enhancements Header refactoring Gamma table support arcgpu: Simulator support virtio-gpu: Atomic modesetting fixes. rcar-du: Misc fixes. mediatek: MT8173 HDMI support sti: ASOC HDMI codec support Minor fixes fsl-dcu: Suspend/resume support Bridge support amdkfd: Minor fixes. etnaviv: Enable GPU clock gating hisilicon: Vblank and other fixes" * tag 'drm-for-v4.8' of git://people.freedesktop.org/~airlied/linux: (1575 commits) drm/nouveau/gr/nv3x: fix instobj write offsets in gr setup drm/nouveau/acpi: fix lockup with PCIe runtime PM drm/nouveau/acpi: check for function 0x1B before using it drm/nouveau/acpi: return supported DSM functions drm/nouveau/acpi: ensure matching ACPI handle and supported functions drm/nouveau/fbcon: fix font width not divisible by 8 drm/amd/powerplay: remove enable_clock_power_gatings_tasks from initialize and resume events drm/amd/powerplay: move clockgating to after ungating power in pp for uvd/vce drm/amdgpu: add query device id and revision id into system info entry at CGS drm/amdgpu: add new definition in bif header drm/amd/powerplay: rename smum header guards drm/amdgpu: enable UVD context buffer for older HW drm/amdgpu: fix default UVD context size drm/amdgpu: fix incorrect type of info_id drm/amdgpu: make amdgpu_cgs_call_acpi_method as static drm/amdgpu: comment out unused defaults_staturn_pro static const structure to fix the build drm/amdgpu: enable UVD VM only on polaris drm/amdgpu: increase timeout of IB test drm/amdgpu: add destroy session when generate VCE destroy msg. drm/amd: fix deadlock of job_list_lock V2 ...
2016-08-01 21:44:08 -04:00 · 2016-08-01 21:44:08 -04:00 · 731c7d3a20
parent 77a87824ed 753e7c8cbd
commit 731c7d3a20
923 changed files with 48254 additions and 25571 deletions
--- a/Documentation/DocBook/Makefile
+++ b/Documentation/DocBook/Makefile
@ -16,7 +16,7 @@ DOCBOOKS := z8530book.xml device-drivers.xml \
 	    genericirq.xml s390-drivers.xml uio-howto.xml scsi.xml \
 	    80211.xml debugobjects.xml sh.xml regulator.xml \
 	    alsa-driver-api.xml writing-an-alsa-driver.xml \
-	    tracepoint.xml gpu.xml media_api.xml w1.xml \
+	    tracepoint.xml media_api.xml w1.xml \
 	    writing_musb_glue_layer.xml crypto-API.xml iio.xml

 include Documentation/DocBook/media/Makefile
--- a/Documentation/DocBook/device-drivers.tmpl
+++ b/Documentation/DocBook/device-drivers.tmpl
@ -161,6 +161,10 @@ X!Edrivers/base/interface.c
 !Iinclude/linux/fence.h
 !Edrivers/dma-buf/seqno-fence.c
 !Iinclude/linux/seqno-fence.h
+!Edrivers/dma-buf/fence-array.c
+!Iinclude/linux/fence-array.h
+!Edrivers/dma-buf/reservation.c
+!Iinclude/linux/reservation.h
 !Edrivers/dma-buf/sync_file.c
 !Iinclude/linux/sync_file.h
       </sect2>
--- a/Documentation/DocBook/gpu.tmpl
+++ b/Documentation/DocBook/gpu.tmpl
--- a/Documentation/devicetree/bindings/display/arm,malidp.txt
+++ b/Documentation/devicetree/bindings/display/arm,malidp.txt
@ -0,0 +1,65 @@
+ARM Mali-DP
+
+The following bindings apply to a family of Display Processors sold as
+licensable IP by ARM Ltd. The bindings describe the Mali DP500, DP550 and
+DP650 processors that offer multiple composition layers, support for
+rotation and scaling output.
+
+Required properties:
+  - compatible: should be one of
+	"arm,mali-dp500"
+	"arm,mali-dp550"
+	"arm,mali-dp650"
+    depending on the particular implementation present in the hardware
+  - reg: Physical base address and size of the block of registers used by
+    the processor.
+  - interrupts: Interrupt list, as defined in ../interrupt-controller/interrupts.txt,
+    interrupt client nodes.
+  - interrupt-names: name of the engine inside the processor that will
+    use the corresponding interrupt. Should be one of "DE" or "SE".
+  - clocks: A list of phandle + clock-specifier pairs, one for each entry
+    in 'clock-names'
+  - clock-names: A list of clock names. It should contain:
+      - "pclk": for the APB interface clock
+      - "aclk": for the AXI interface clock
+      - "mclk": for the main processor clock
+      - "pxlclk": for the pixel clock feeding the output PLL of the processor.
+  - arm,malidp-output-port-lines: Array of u8 values describing the number
+    of output lines per channel (R, G and B).
+
+Required sub-nodes:
+  - port: The Mali DP connection to an encoder input port. The connection
+    is modelled using the OF graph bindings specified in
+    Documentation/devicetree/bindings/graph.txt
+
+Optional properties:
+  - memory-region: phandle to a node describing memory (see
+    Documentation/devicetree/bindings/reserved-memory/reserved-memory.txt)
+    to be used for the framebuffer; if not present, the framebuffer may
+    be located anywhere in memory.
+
+
+Example:
+
+/ {
+	...
+
+	dp0: malidp@6f200000 {
+		compatible = "arm,mali-dp650";
+		reg = <0 0x6f200000 0 0x20000>;
+		memory-region = <&display_reserved>;
+		interrupts = <0 168 IRQ_TYPE_LEVEL_HIGH>,
+			     <0 168 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "DE", "SE";
+		clocks = <&oscclk2>, <&fpgaosc0>, <&fpgaosc1>, <&fpgaosc1>;
+		clock-names = "pxlclk", "mclk", "aclk", "pclk";
+		arm,malidp-output-port-lines = /bits/ 8 <8 8 8>;
+		port {
+			dp0_output: endpoint {
+				remote-endpoint = <&tda998x_2_input>;
+			};
+		};
+	};
+
+	...
+};
--- a/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
+++ b/Documentation/devicetree/bindings/display/bridge/adi,adv7511.txt
@ -1,13 +1,19 @@
-Analog Device ADV7511(W)/13 HDMI Encoders
+Analog Device ADV7511(W)/13/33 HDMI Encoders
 -----------------------------------------

-The ADV7511, ADV7511W and ADV7513 are HDMI audio and video transmitters
+The ADV7511, ADV7511W, ADV7513 and ADV7533 are HDMI audio and video transmitters
 compatible with HDMI 1.4 and DVI 1.0. They support color space conversion,
-S/PDIF, CEC and HDCP.
+S/PDIF, CEC and HDCP. ADV7533 supports the DSI interface for input pixels, while
+the others support RGB interface.

 Required properties:

- compatible: Should be one of "adi,adv7511", "adi,adv7511w" or "adi,adv7513"
+- compatible: Should be one of:
+		"adi,adv7511"
+		"adi,adv7511w"
+		"adi,adv7513"
+		"adi,adv7533"
+
 - reg: I2C slave address

 The ADV7511 supports a large number of input data formats that differ by their
@ -32,6 +38,11 @@ The following input format properties are required except in "rgb 1x" and
 - adi,input-justification: The input bit justification ("left", "evenly",
  "right").

+The following properties are required for ADV7533:
+
+- adi,dsi-lanes: Number of DSI data lanes connected to the DSI host. It should
+  be one of 1, 2, 3 or 4.
+
 Optional properties:

 - interrupts: Specifier for the ADV7511 interrupt
@ -42,13 +53,18 @@ Optional properties:
 - adi,embedded-sync: The input uses synchronization signals embedded in the
  data stream (similar to BT.656). Defaults to separate H/V synchronization
  signals.
+- adi,disable-timing-generator: Only for ADV7533. Disables the internal timing
+  generator. The chip will rely on the sync signals in the DSI data lanes,
+  rather than generate its own timings for HDMI output.

 Required nodes:

 The ADV7511 has two video ports. Their connections are modelled using the OF
 graph bindings specified in Documentation/devicetree/bindings/graph.txt.

- Video port 0 for the RGB or YUV input
+- Video port 0 for the RGB, YUV or DSI input. In the case of ADV7533, the
+  remote endpoint phandle should be a reference to a valid mipi_dsi_host device
+  node.
 - Video port 1 for the HDMI output


--- a/Documentation/devicetree/bindings/display/bridge/analogix_dp.txt
+++ b/Documentation/devicetree/bindings/display/bridge/analogix_dp.txt
@ -5,6 +5,7 @@ Required properties for dp-controller:
 		platform specific such as:
 		 * "samsung,exynos5-dp"
 		 * "rockchip,rk3288-dp"
+		 * "rockchip,rk3399-edp"
 	-reg:
 		physical base address of the controller and length
 		of memory mapped region.
--- a/Documentation/devicetree/bindings/display/bridge/sii902x.txt
+++ b/Documentation/devicetree/bindings/display/bridge/sii902x.txt
@ -0,0 +1,35 @@
+sii902x HDMI bridge bindings
+
+Required properties:
+	- compatible: "sil,sii9022"
+	- reg: i2c address of the bridge
+
+Optional properties:
+	- interrupts-extended or interrupt-parent + interrupts: describe
+	  the interrupt line used to inform the host about hotplug events.
+	- reset-gpios: OF device-tree gpio specification for RST_N pin.
+
+Optional subnodes:
+	- video input: this subnode can contain a video input port node
+	  to connect the bridge to a display controller output (See this
+	  documentation [1]).
+
+[1]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+	hdmi-bridge@39 {
+		compatible = "sil,sii9022";
+		reg = <0x39>;
+		reset-gpios = <&pioA 1 0>;
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				bridge_in: endpoint {
+					remote-endpoint = <&dc_out>;
+				};
+			};
+		};
+	};
--- a/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
+++ b/Documentation/devicetree/bindings/display/bridge/toshiba,tc358767.txt
@ -0,0 +1,53 @@
+Toshiba TC358767 eDP bridge bindings
+
+Required properties:
+ - compatible: "toshiba,tc358767"
+ - reg: i2c address of the bridge, 0x68 or 0x0f, depending on bootstrap pins
+ - clock-names: should be "ref"
+ - clocks: OF device-tree clock specification for refclk input. The reference
+   clock rate must be 13 MHz, 19.2 MHz, 26 MHz, or 38.4 MHz.
+
+Optional properties:
+ - shutdown-gpios: OF device-tree gpio specification for SD pin
+                   (active high shutdown input)
+ - reset-gpios: OF device-tree gpio specification for RSTX pin
+                (active low system reset)
+ - ports: the ports node can contain video interface port nodes to connect
+   to a DPI/DSI source and to an eDP/DP sink according to [1][2]:
+    - port@0: DSI input port
+    - port@1: DPI input port
+    - port@2: eDP/DP output port
+
+[1]: Documentation/devicetree/bindings/graph.txt
+[2]: Documentation/devicetree/bindings/media/video-interfaces.txt
+
+Example:
+	edp-bridge@68 {
+		compatible = "toshiba,tc358767";
+		reg = <0x68>;
+		shutdown-gpios = <&gpio3 23 GPIO_ACTIVE_HIGH>;
+		reset-gpios = <&gpio3 24 GPIO_ACTIVE_LOW>;
+		clock-names = "ref";
+		clocks = <&edp_refclk>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@1 {
+				reg = <1>;
+
+				bridge_in: endpoint {
+					remote-endpoint = <&dpi_out>;
+				};
+			};
+
+			port@2 {
+				reg = <2>;
+
+				bridge_out: endpoint {
+					remote-endpoint = <&panel_in>;
+				};
+			};
+		};
+	};
--- a/Documentation/devicetree/bindings/display/connector/hdmi-connector.txt
+++ b/Documentation/devicetree/bindings/display/connector/hdmi-connector.txt
@ -8,6 +8,7 @@ Required properties:
 Optional properties:
 - label: a symbolic name for the connector
 - hpd-gpios: HPD GPIO number
+- ddc-i2c-bus: phandle link to the I2C controller used for DDC EDID probing

 Required nodes:
 - Video port for HDMI input
--- a/Documentation/devicetree/bindings/display/fsl,dcu.txt
+++ b/Documentation/devicetree/bindings/display/fsl,dcu.txt
@ -12,7 +12,7 @@ Required properties:
 - clock-names:		Should be "dcu" and "pix"
 			See ../clocks/clock-bindings.txt for details.
 - big-endian		Boolean property, LS1021A DCU registers are big-endian.
- fsl,panel:		The phandle to panel node.
+- port			Video port for the panel output

 Optional properties:
 - fsl,tcon:		The phandle to the timing controller node.
@ -24,6 +24,11 @@ dcu: dcu@2ce0000 {
 	clocks = <&platform_clk 0>, <&platform_clk 0>;
 	clock-names = "dcu", "pix";
 	big-endian;
-	fsl,panel = <&panel>;
 	fsl,tcon = <&tcon>;
+
+	port {
+		dcu_out: endpoint {
+			remote-endpoint = <&panel_out>;
+	     };
+	};
 };
--- a/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.txt
+++ b/Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.txt
@ -0,0 +1,148 @@
+Mediatek HDMI Encoder
+=====================
+
+The Mediatek HDMI encoder can generate HDMI 1.4a or MHL 2.0 signals from
+its parallel input.
+
+Required properties:
+- compatible: Should be "mediatek,<chip>-hdmi".
+- reg: Physical base address and length of the controller's registers
+- interrupts: The interrupt signal from the function block.
+- clocks: device clocks
+  See Documentation/devicetree/bindings/clock/clock-bindings.txt for details.
+- clock-names: must contain "pixel", "pll", "bclk", and "spdif".
+- phys: phandle link to the HDMI PHY node.
+  See Documentation/devicetree/bindings/phy/phy-bindings.txt for details.
+- phy-names: must contain "hdmi"
+- mediatek,syscon-hdmi: phandle link and register offset to the system
+  configuration registers. For mt8173 this must be offset 0x900 into the
+  MMSYS_CONFIG region: <&mmsys 0x900>.
+- ports: A node containing input and output port nodes with endpoint
+  definitions as documented in Documentation/devicetree/bindings/graph.txt.
+- port@0: The input port in the ports node should be connected to a DPI output
+  port.
+- port@1: The output port in the ports node should be connected to the input
+  port of a connector node that contains a ddc-i2c-bus property, or to the
+  input port of an attached bridge chip, such as a SlimPort transmitter.
+
+HDMI CEC
+========
+
+The HDMI CEC controller handles hotplug detection and CEC communication.
+
+Required properties:
+- compatible: Should be "mediatek,<chip>-cec"
+- reg: Physical base address and length of the controller's registers
+- interrupts: The interrupt signal from the function block.
+- clocks: device clock
+
+HDMI DDC
+========
+
+The HDMI DDC i2c controller is used to interface with the HDMI DDC pins.
+The Mediatek's I2C controller is used to interface with I2C devices.
+
+Required properties:
+- compatible: Should be "mediatek,<chip>-hdmi-ddc"
+- reg: Physical base address and length of the controller's registers
+- clocks: device clock
+- clock-names: Should be "ddc-i2c".
+
+HDMI PHY
+========
+
+The HDMI PHY serializes the HDMI encoder's three channel 10-bit parallel
+output and drives the HDMI pads.
+
+Required properties:
+- compatible: "mediatek,<chip>-hdmi-phy"
+- reg: Physical base address and length of the module's registers
+- clocks: PLL reference clock
+- clock-names: must contain "pll_ref"
+- clock-output-names: must be "hdmitx_dig_cts" on mt8173
+- #phy-cells: must be <0>
+- #clock-cells: must be <0>
+
+Optional properties:
+- mediatek,ibias: TX DRV bias current for <1.65Gbps, defaults to 0xa
+- mediatek,ibias_up: TX DRV bias current for >1.65Gbps, defaults to 0x1c
+
+Example:
+
+cec: cec@10013000 {
+	compatible = "mediatek,mt8173-cec";
+	reg = <0 0x10013000 0 0xbc>;
+	interrupts = <GIC_SPI 167 IRQ_TYPE_LEVEL_LOW>;
+	clocks = <&infracfg CLK_INFRA_CEC>;
+};
+
+hdmi_phy: hdmi-phy@10209100 {
+	compatible = "mediatek,mt8173-hdmi-phy";
+	reg = <0 0x10209100 0 0x24>;
+	clocks = <&apmixedsys CLK_APMIXED_HDMI_REF>;
+	clock-names = "pll_ref";
+	clock-output-names = "hdmitx_dig_cts";
+	mediatek,ibias = <0xa>;
+	mediatek,ibias_up = <0x1c>;
+	#clock-cells = <0>;
+	#phy-cells = <0>;
+};
+
+hdmi_ddc0: i2c@11012000 {
+	compatible = "mediatek,mt8173-hdmi-ddc";
+	reg = <0 0x11012000 0 0x1c>;
+	interrupts = <GIC_SPI 81 IRQ_TYPE_LEVEL_LOW>;
+	clocks = <&pericfg CLK_PERI_I2C5>;
+	clock-names = "ddc-i2c";
+};
+
+hdmi0: hdmi@14025000 {
+	compatible = "mediatek,mt8173-hdmi";
+	reg = <0 0x14025000 0 0x400>;
+	interrupts = <GIC_SPI 206 IRQ_TYPE_LEVEL_LOW>;
+	clocks = <&mmsys CLK_MM_HDMI_PIXEL>,
+		 <&mmsys CLK_MM_HDMI_PLLCK>,
+		 <&mmsys CLK_MM_HDMI_AUDIO>,
+		 <&mmsys CLK_MM_HDMI_SPDIF>;
+	clock-names = "pixel", "pll", "bclk", "spdif";
+	pinctrl-names = "default";
+	pinctrl-0 = <&hdmi_pin>;
+	phys = <&hdmi_phy>;
+	phy-names = "hdmi";
+	mediatek,syscon-hdmi = <&mmsys 0x900>;
+	assigned-clocks = <&topckgen CLK_TOP_HDMI_SEL>;
+	assigned-clock-parents = <&hdmi_phy>;
+
+	ports {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		port@0 {
+			reg = <0>;
+
+			hdmi0_in: endpoint {
+				remote-endpoint = <&dpi0_out>;
+			};
+		};
+
+		port@1 {
+			reg = <1>;
+
+			hdmi0_out: endpoint {
+				remote-endpoint = <&hdmi_con_in>;
+			};
+		};
+	};
+};
+
+connector {
+	compatible = "hdmi-connector";
+	type = "a";
+	ddc-i2c-bus = <&hdmiddc0>;
+
+	port {
+		hdmi_con_in: endpoint {
+			remote-endpoint = <&hdmi0_out>;
+		};
+	};
+};
--- a/Documentation/devicetree/bindings/display/msm/dsi.txt
+++ b/Documentation/devicetree/bindings/display/msm/dsi.txt
@ -11,8 +11,7 @@ Required properties:
  be 0 or 1, since we have 2 DSI controllers at most for now.
 - interrupts: The interrupt signal from the DSI block.
 - power-domains: Should be <&mmcc MDSS_GDSC>.
- clocks: device clocks
-  See Documentation/devicetree/bindings/clocks/clock-bindings.txt for details.
+- clocks: Phandles to device clocks.
 - clock-names: the following clocks are required:
  * "mdp_core_clk"
  * "iface_clk"
@ -23,16 +22,21 @@ Required properties:
  * "core_clk"
  For DSIv2, we need an additional clock:
   * "src_clk"
+- assigned-clocks: Parents of "byte_clk" and "pixel_clk" for the given platform.
+- assigned-clock-parents: The Byte clock and Pixel clock PLL outputs provided
+  by a DSI PHY block. See [1] for details on clock bindings.
 - vdd-supply: phandle to vdd regulator device node
 - vddio-supply: phandle to vdd-io regulator device node
 - vdda-supply: phandle to vdda regulator device node
- qcom,dsi-phy: phandle to DSI PHY device node
+- phys: phandle to DSI PHY device node
+- phy-names: the name of the corresponding PHY device
 - syscon-sfpb: A phandle to mmss_sfpb syscon node (only for DSIv2)
+- ports: Contains 2 DSI controller ports as child nodes. Each port contains
+  an endpoint subnode as defined in [2] and [3].

 Optional properties:
 - panel@0: Node of panel connected to this DSI controller.
-  See files in Documentation/devicetree/bindings/display/panel/ for each supported
-  panel.
+  See files in [4] for each supported panel.
 - qcom,dual-dsi-mode: Boolean value indicating if the DSI controller is
  driving a panel which needs 2 DSI links.
 - qcom,master-dsi: Boolean value indicating if the DSI controller is driving
@ -44,34 +48,38 @@ Optional properties:
 - pinctrl-names: the pin control state names; should contain "default"
 - pinctrl-0: the default pinctrl state (active)
 - pinctrl-n: the "sleep" pinctrl state
- port: DSI controller output port, containing one endpoint subnode.
+- ports: contains DSI controller input and output ports as children, each
+  containing one endpoint subnode.

  DSI Endpoint properties:
-  - remote-endpoint: set to phandle of the connected panel's endpoint.
-    See Documentation/devicetree/bindings/graph.txt for device graph info.
-  - qcom,data-lane-map: this describes how the logical DSI lanes are mapped
-    to the physical lanes on the given platform. The value contained in
-    index n describes what logical data lane is mapped to the physical data
-    lane n (DATAn, where n lies between 0 and 3).
+  - remote-endpoint: For port@0, set to phandle of the connected panel/bridge's
+    input endpoint. For port@1, set to the MDP interface output. See [2] for
+    device graph info.
+
+  - data-lanes: this describes how the physical DSI data lanes are mapped
+    to the logical lanes on the given platform. The value contained in
+    index n describes what physical lane is mapped to the logical lane n
+    (DATAn, where n lies between 0 and 3). The clock lane position is fixed
+    and can't be changed. Hence, they aren't a part of the DT bindings. See
+    [3] for more info on the data-lanes property.

    For example:

-    qcom,data-lane-map = <3 0 1 2>;
+    data-lanes = <3 0 1 2>;

-    The above mapping describes that the logical data lane DATA3 is mapped to
-    the physical data lane DATA0, logical DATA0 to physical DATA1, logic DATA1
-    to phys DATA2 and logic DATA2 to phys DATA3.
+    The above mapping describes that the logical data lane DATA0 is mapped to
+    the physical data lane DATA3, logical DATA1 to physical DATA0, logic DATA2
+    to phys DATA1 and logic DATA3 to phys DATA2.

    There are only a limited number of physical to logical mappings possible:
-
-    "0123": Logic 0->Phys 0; Logic 1->Phys 1; Logic 2->Phys 2; Logic 3->Phys 3;
-    "3012": Logic 3->Phys 0; Logic 0->Phys 1; Logic 1->Phys 2; Logic 2->Phys 3;
-    "2301": Logic 2->Phys 0; Logic 3->Phys 1; Logic 0->Phys 2; Logic 1->Phys 3;
-    "1230": Logic 1->Phys 0; Logic 2->Phys 1; Logic 3->Phys 2; Logic 0->Phys 3;
-    "0321": Logic 0->Phys 0; Logic 3->Phys 1; Logic 2->Phys 2; Logic 1->Phys 3;
-    "1032": Logic 1->Phys 0; Logic 0->Phys 1; Logic 3->Phys 2; Logic 2->Phys 3;
-    "2103": Logic 2->Phys 0; Logic 1->Phys 1; Logic 0->Phys 2; Logic 3->Phys 3;
-    "3210": Logic 3->Phys 0; Logic 2->Phys 1; Logic 1->Phys 2; Logic 0->Phys 3;
+    <0 1 2 3>
+    <1 2 3 0>
+    <2 3 0 1>
+    <3 0 1 2>
+    <0 3 2 1>
+    <1 0 3 2>
+    <2 1 0 3>
+    <3 2 1 0>

 DSI PHY:
 Required properties:
@ -86,11 +94,12 @@ Required properties:
  * "dsi_pll"
  * "dsi_phy"
  * "dsi_phy_regulator"
+- clock-cells: Must be 1. The DSI PHY block acts as a clock provider, creating
+  2 clocks: A byte clock (index 0), and a pixel clock (index 1).
 - qcom,dsi-phy-index: The ID of DSI PHY hardware instance. This should
  be 0 or 1, since we have 2 DSI PHYs at most for now.
 - power-domains: Should be <&mmcc MDSS_GDSC>.
- clocks: device clocks
-  See Documentation/devicetree/bindings/clocks/clock-bindings.txt for details.
+- clocks: Phandles to device clocks. See [1] for details on clock bindings.
 - clock-names: the following clocks are required:
  * "iface_clk"
 - vddio-supply: phandle to vdd-io regulator device node
@ -99,11 +108,16 @@ Optional properties:
 - qcom,dsi-phy-regulator-ldo-mode: Boolean value indicating if the LDO mode PHY
  regulator is wanted.

+[1] Documentation/devicetree/bindings/clocks/clock-bindings.txt
+[2] Documentation/devicetree/bindings/graph.txt
+[3] Documentation/devicetree/bindings/media/video-interfaces.txt
+[4] Documentation/devicetree/bindings/display/panel/
+
 Example:
-	mdss_dsi0: qcom,mdss_dsi@fd922800 {
+	dsi0: dsi@fd922800 {
 		compatible = "qcom,mdss-dsi-ctrl";
 		qcom,dsi-host-index = <0>;
-		interrupt-parent = <&mdss_mdp>;
+		interrupt-parent = <&mdp>;
 		interrupts = <4 0>;
 		reg-names = "dsi_ctrl";
 		reg = <0xfd922800 0x200>;
@ -124,19 +138,48 @@ Example:
 			<&mmcc MDSS_AHB_CLK>,
 			<&mmcc MDSS_MDP_CLK>,
 			<&mmcc MDSS_PCLK0_CLK>;
+
+		assigned-clocks =
+				 <&mmcc BYTE0_CLK_SRC>,
+				 <&mmcc PCLK0_CLK_SRC>;
+		assigned-clock-parents =
+				 <&dsi_phy0 0>,
+				 <&dsi_phy0 1>;
+
 		vdda-supply = <&pma8084_l2>;
 		vdd-supply = <&pma8084_l22>;
 		vddio-supply = <&pma8084_l12>;

-		qcom,dsi-phy = <&mdss_dsi_phy0>;
+		phys = <&dsi_phy0>;
+		phy-names ="dsi-phy";

 		qcom,dual-dsi-mode;
 		qcom,master-dsi;
 		qcom,sync-dual-dsi;

 		pinctrl-names = "default", "sleep";
-		pinctrl-0 = <&mdss_dsi_active>;
-		pinctrl-1 = <&mdss_dsi_suspend>;
+		pinctrl-0 = <&dsi_active>;
+		pinctrl-1 = <&dsi_suspend>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+			port@0 {
+				reg = <0>;
+				dsi0_in: endpoint {
+					remote-endpoint = <&mdp_intf1_out>;
+				};
+			};
+
+			port@1 {
+				reg = <1>;
+				dsi0_out: endpoint {
+					remote-endpoint = <&panel_in>;
+					data-lanes = <0 1 2 3>;
+				};
+			};
+		};

 		panel: panel@0 {
 			compatible = "sharp,lq101r1sx01";
@ -152,16 +195,9 @@ Example:
 				};
 			};
 		};
-
-		port {
-			dsi0_out: endpoint {
-				remote-endpoint = <&panel_in>;
-				lanes = <0 1 2 3>;
-			};
-		};
 	};

-	mdss_dsi_phy0: qcom,mdss_dsi_phy@fd922a00 {
+	dsi_phy0: dsi-phy@fd922a00 {
 		compatible = "qcom,dsi-phy-28nm-hpm";
 		qcom,dsi-phy-index = <0>;
 		reg-names =
@ -173,6 +209,7 @@ Example:
 			<0xfd922d80 0x7b>;
 		clock-names = "iface_clk";
 		clocks = <&mmcc MDSS_AHB_CLK>;
+		#clock-cells = <1>;
 		vddio-supply = <&pma8084_l12>;

 		qcom,dsi-phy-regulator-ldo-mode;
--- a/Documentation/devicetree/bindings/display/msm/mdp.txt
+++ b/Documentation/devicetree/bindings/display/msm/mdp.txt
@ -1,59 +0,0 @@
-Qualcomm adreno/snapdragon display controller
-
-Required properties:
- compatible:
-  * "qcom,mdp4" - mdp4
-  * "qcom,mdp5" - mdp5
- reg: Physical base address and length of the controller's registers.
- interrupts: The interrupt signal from the display controller.
- connectors: array of phandles for output device(s)
- clocks: device clocks
-  See ../clocks/clock-bindings.txt for details.
- clock-names: the following clocks are required.
-  For MDP4:
-   * "core_clk"
-   * "iface_clk"
-   * "lut_clk"
-   * "src_clk"
-   * "hdmi_clk"
-   * "mdp_clk"
-  For MDP5:
-   * "bus_clk"
-   * "iface_clk"
-   * "core_clk_src"
-   * "core_clk"
-   * "lut_clk" (some MDP5 versions may not need this)
-   * "vsync_clk"
-
-Optional properties:
- gpus: phandle for gpu device
- clock-names: the following clocks are optional:
-  * "lut_clk"
-
-Example:
-
-/ {
-	...
-
-	mdp: qcom,mdp@5100000 {
-		compatible = "qcom,mdp4";
-		reg = <0x05100000 0xf0000>;
-		interrupts = <GIC_SPI 75 0>;
-		connectors = <&hdmi>;
-		gpus = <&gpu>;
-		clock-names =
-		    "core_clk",
-		    "iface_clk",
-		    "lut_clk",
-		    "src_clk",
-		    "hdmi_clk",
-		    "mdp_clk";
-		clocks =
-		    <&mmcc MDP_SRC>,
-		    <&mmcc MDP_AHB_CLK>,
-		    <&mmcc MDP_LUT_CLK>,
-		    <&mmcc TV_SRC>,
-		    <&mmcc HDMI_TV_CLK>,
-		    <&mmcc MDP_TV_CLK>;
-	};
-};
--- a/Documentation/devicetree/bindings/display/msm/mdp4.txt
+++ b/Documentation/devicetree/bindings/display/msm/mdp4.txt
@ -0,0 +1,112 @@
+Qualcomm adreno/snapdragon MDP4 display controller
+
+Description:
+
+This is the bindings documentation for the MDP4 display controller found in
+SoCs like MSM8960, APQ8064 and MSM8660.
+
+Required properties:
+- compatible:
+  * "qcom,mdp4" - mdp4
+- reg: Physical base address and length of the controller's registers.
+- interrupts: The interrupt signal from the display controller.
+- clocks: device clocks
+  See ../clocks/clock-bindings.txt for details.
+- clock-names: the following clocks are required.
+  * "core_clk"
+  * "iface_clk"
+  * "bus_clk"
+  * "lut_clk"
+  * "hdmi_clk"
+  * "tv_clk"
+- ports: contains the list of output ports from MDP. These connect to interfaces
+  that are external to the MDP hardware, such as HDMI, DSI, EDP etc (LVDS is a
+  special case since it is a part of the MDP block itself).
+
+  Each output port contains an endpoint that describes how it is connected to an
+  external interface. These are described by the standard properties documented
+  here:
+	Documentation/devicetree/bindings/graph.txt
+	Documentation/devicetree/bindings/media/video-interfaces.txt
+
+  The output port mappings are:
+	Port 0 -> LCDC/LVDS
+	Port 1 -> DSI1 Cmd/Video
+	Port 2 -> DSI2 Cmd/Video
+	Port 3 -> DTV
+
+Optional properties:
+- clock-names: the following clocks are optional:
+  * "lut_clk"
+
+Example:
+
+/ {
+	...
+
+	hdmi: hdmi@4a00000 {
+		...
+		ports {
+			...
+			port@0 {
+				reg = <0>;
+				hdmi_in: endpoint {
+					remote-endpoint = <&mdp_dtv_out>;
+				};
+			};
+			...
+		};
+		...
+	};
+
+	...
+
+	mdp: mdp@5100000 {
+		compatible = "qcom,mdp4";
+		reg = <0x05100000 0xf0000>;
+		interrupts = <GIC_SPI 75 0>;
+		clock-names =
+		    "core_clk",
+		    "iface_clk",
+		    "lut_clk",
+		    "hdmi_clk",
+		    "tv_clk";
+		clocks =
+		    <&mmcc MDP_CLK>,
+		    <&mmcc MDP_AHB_CLK>,
+		    <&mmcc MDP_AXI_CLK>,
+		    <&mmcc MDP_LUT_CLK>,
+		    <&mmcc HDMI_TV_CLK>,
+		    <&mmcc MDP_TV_CLK>;
+
+		ports {
+			#address-cells = <1>;
+			#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					mdp_lvds_out: endpoint {
+					};
+				};
+
+				port@1 {
+					reg = <1>;
+					mdp_dsi1_out: endpoint {
+					};
+				};
+
+				port@2 {
+					reg = <2>;
+					mdp_dsi2_out: endpoint {
+					};
+				};
+
+				port@3 {
+					reg = <3>;
+					mdp_dtv_out: endpoint {
+						remote-endpoint = <&hdmi_in>;
+					};
+				};
+		};
+	};
+};
--- a/Documentation/devicetree/bindings/display/msm/mdp5.txt
+++ b/Documentation/devicetree/bindings/display/msm/mdp5.txt
@ -0,0 +1,160 @@
+Qualcomm adreno/snapdragon MDP5 display controller
+
+Description:
+
+This is the bindings documentation for the Mobile Display Subsytem(MDSS) that
+encapsulates sub-blocks like MDP5, DSI, HDMI, eDP etc, and the MDP5 display
+controller found in SoCs like MSM8974, APQ8084, MSM8916, MSM8994 and MSM8996.
+
+MDSS:
+Required properties:
+- compatible:
+  * "qcom,mdss" - MDSS
+- reg: Physical base address and length of the controller's registers.
+- reg-names: The names of register regions. The following regions are required:
+  * "mdss_phys"
+  * "vbif_phys"
+- interrupts: The interrupt signal from MDSS.
+- interrupt-controller: identifies the node as an interrupt controller.
+- #interrupt-cells: specifies the number of cells needed to encode an interrupt
+  source, should be 1.
+- power-domains: a power domain consumer specifier according to
+  Documentation/devicetree/bindings/power/power_domain.txt
+- clocks: device clocks. See ../clocks/clock-bindings.txt for details.
+- clock-names: the following clocks are required.
+  * "iface_clk"
+  * "bus_clk"
+  * "vsync_clk"
+- #address-cells: number of address cells for the MDSS children. Should be 1.
+- #size-cells: Should be 1.
+- ranges: parent bus address space is the same as the child bus address space.
+
+Optional properties:
+- clock-names: the following clocks are optional:
+  * "lut_clk"
+
+MDP5:
+Required properties:
+- compatible:
+  * "qcom,mdp5" - MDP5
+- reg: Physical base address and length of the controller's registers.
+- reg-names: The names of register regions. The following regions are required:
+  * "mdp_phys"
+- interrupts: Interrupt line from MDP5 to MDSS interrupt controller.
+- interrupt-parent: phandle to the MDSS block
+  through MDP block
+- clocks: device clocks. See ../clocks/clock-bindings.txt for details.
+- clock-names: the following clocks are required.
+-   * "bus_clk"
+-   * "iface_clk"
+-   * "core_clk"
+-   * "vsync_clk"
+- ports: contains the list of output ports from MDP. These connect to interfaces
+  that are external to the MDP hardware, such as HDMI, DSI, EDP etc (LVDS is a
+  special case since it is a part of the MDP block itself).
+
+  Each output port contains an endpoint that describes how it is connected to an
+  external interface. These are described by the standard properties documented
+  here:
+	Documentation/devicetree/bindings/graph.txt
+	Documentation/devicetree/bindings/media/video-interfaces.txt
+
+  The availability of output ports can vary across SoC revisions:
+
+  For MSM8974 and APQ8084:
+	 Port 0 -> MDP_INTF0 (eDP)
+	 Port 1 -> MDP_INTF1 (DSI1)
+	 Port 2 -> MDP_INTF2 (DSI2)
+	 Port 3 -> MDP_INTF3 (HDMI)
+
+  For MSM8916:
+	 Port 0 -> MDP_INTF1 (DSI1)
+
+  For MSM8994 and MSM8996:
+	 Port 0 -> MDP_INTF1 (DSI1)
+	 Port 1 -> MDP_INTF2 (DSI2)
+	 Port 2 -> MDP_INTF3 (HDMI)
+
+Optional properties:
+- clock-names: the following clocks are optional:
+  * "lut_clk"
+
+Example:
+
+/ {
+	...
+
+	mdss: mdss@1a00000 {
+		compatible = "qcom,mdss";
+		reg = <0x1a00000 0x1000>,
+		      <0x1ac8000 0x3000>;
+		reg-names = "mdss_phys", "vbif_phys";
+
+		power-domains = <&gcc MDSS_GDSC>;
+
+		clocks = <&gcc GCC_MDSS_AHB_CLK>,
+			 <&gcc GCC_MDSS_AXI_CLK>,
+			 <&gcc GCC_MDSS_VSYNC_CLK>;
+		clock-names = "iface_clk",
+			      "bus_clk",
+			      "vsync_clk"
+
+		interrupts = <0 72 0>;
+
+		interrupt-controller;
+		#interrupt-cells = <1>;
+
+		#address-cells = <1>;
+		#size-cells = <1>;
+		ranges;
+
+		mdp: mdp@1a01000 {
+			compatible = "qcom,mdp5";
+			reg = <0x1a01000 0x90000>;
+			reg-names = "mdp_phys";
+
+			interrupt-parent = <&mdss>;
+			interrupts = <0 0>;
+
+			clocks = <&gcc GCC_MDSS_AHB_CLK>,
+				 <&gcc GCC_MDSS_AXI_CLK>,
+				 <&gcc GCC_MDSS_MDP_CLK>,
+				 <&gcc GCC_MDSS_VSYNC_CLK>;
+			clock-names = "iface_clk",
+				      "bus_clk",
+				      "core_clk",
+				      "vsync_clk";
+
+			ports {
+				#address-cells = <1>;
+				#size-cells = <0>;
+
+				port@0 {
+					reg = <0>;
+					mdp5_intf1_out: endpoint {
+						remote-endpoint = <&dsi0_in>;
+					};
+				};
+			};
+		};
+
+		dsi0: dsi@1a98000 {
+			...
+			ports {
+				...
+				port@0 {
+					reg = <0>;
+					dsi0_in: endpoint {
+						remote-endpoint = <&mdp5_intf1_out>;
+					};
+				};
+				...
+			};
+			...
+		};
+
+		dsi_phy0: dsi-phy@1a98300 {
+			...
+		};
+	};
+};
--- a/Documentation/devicetree/bindings/display/panel/lg,lp079qx1-sp0v.txt
+++ b/Documentation/devicetree/bindings/display/panel/lg,lp079qx1-sp0v.txt
@ -0,0 +1,7 @@
+LG LP079QX1-SP0V 7.9" (1536x2048 pixels) TFT LCD panel
+
+Required properties:
+- compatible: should be "lg,lp079qx1-sp0v"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/lg,lp097qx1-spa1.txt
+++ b/Documentation/devicetree/bindings/display/panel/lg,lp097qx1-spa1.txt
@ -0,0 +1,7 @@
+LG 9.7" (2048x1536 pixels) TFT LCD panel
+
+Required properties:
+- compatible: should be "lg,lp097qx1-spa1"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/panel-dpi.txt
+++ b/Documentation/devicetree/bindings/display/panel/panel-dpi.txt
@ -7,6 +7,8 @@ Required properties:
 Optional properties:
 - label: a symbolic name for the panel
 - enable-gpios: panel enable gpio
+- reset-gpios: GPIO to control the RESET pin
+- vcc-supply: phandle of regulator that will be used to enable power to the display

 Required nodes:
 - "panel-timing" containing video timings
--- a/Documentation/devicetree/bindings/display/panel/samsung,lsn122dl01-c01.txt
+++ b/Documentation/devicetree/bindings/display/panel/samsung,lsn122dl01-c01.txt
@ -0,0 +1,7 @@
+Samsung 12.2" (2560x1600 pixels) TFT LCD panel
+
+Required properties:
+- compatible: should be "samsung,lsn122dl01-c01"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/sharp,lq101k1ly04.txt
+++ b/Documentation/devicetree/bindings/display/panel/sharp,lq101k1ly04.txt
@ -0,0 +1,7 @@
+Sharp Display Corp. LQ101K1LY04 10.07" WXGA TFT LCD panel
+
+Required properties:
+- compatible: should be "sharp,lq101k1ly04"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/sharp,lq123p1jx31.txt
+++ b/Documentation/devicetree/bindings/display/panel/sharp,lq123p1jx31.txt
@ -0,0 +1,7 @@
+Sharp 12.3" (2400x1600 pixels) TFT LCD panel
+
+Required properties:
+- compatible: should be "sharp,lq123p1jx31"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/panel/starry,kr122ea0sra.txt
+++ b/Documentation/devicetree/bindings/display/panel/starry,kr122ea0sra.txt
@ -0,0 +1,7 @@
+Starry 12.2" (1920x1200 pixels) TFT LCD panel
+
+Required properties:
+- compatible: should be "starry,kr122ea0sra"
+
+This binding is compatible with the simple-panel binding, which is specified
+in simple-panel.txt in this directory.
--- a/Documentation/devicetree/bindings/display/rockchip/analogix_dp-rockchip.txt
+++ b/Documentation/devicetree/bindings/display/rockchip/analogix_dp-rockchip.txt
@ -2,7 +2,8 @@ Rockchip RK3288 specific extensions to the Analogix Display Port
 ================================

 Required properties:
- compatible: "rockchip,rk3288-edp";
+- compatible: "rockchip,rk3288-dp",
+	      "rockchip,rk3399-edp";

 - reg: physical base address of the controller and length

@ -27,6 +28,12 @@ Required properties:
    Port 0: contained 2 endpoints, connecting to the output of vop.
    Port 1: contained 1 endpoint, connecting to the input of panel.

+Optional property for different chips:
+- clocks: from common clock binding: handle to grf_vio clock.
+
+- clock-names: from common clock binding:
+	       Required elements: "grf"
+
 For the below properties, please refer to Analogix DP binding document:
 * Documentation/devicetree/bindings/drm/bridge/analogix_dp.txt
 - phys (required)
--- a/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
+++ b/Documentation/devicetree/bindings/display/tegra/nvidia,tegra20-host1x.txt
@ -208,6 +208,7 @@ of the following host1x client modules:
    See ../clocks/clock-bindings.txt for details.
  - clock-names: Must include the following entries:
    - sor: clock input for the SOR hardware
+    - source: source clock for the SOR clock
    - parent: input for the pixel clock
    - dp: reference clock for the SOR clock
    - safe: safe reference for the SOR clock during power up
@ -226,9 +227,9 @@ of the following host1x client modules:
  - nvidia,dpaux: phandle to a DispayPort AUX interface

 - dpaux: DisplayPort AUX interface
-  - compatible: For Tegra124, must contain "nvidia,tegra124-dpaux".  Otherwise,
-    must contain '"nvidia,<chip>-dpaux", "nvidia,tegra124-dpaux"', where
-    <chip> is tegra132.
+  - compatible : Should contain one of the following:
+    - "nvidia,tegra124-dpaux": for Tegra124 and Tegra132
+    - "nvidia,tegra210-dpaux": for Tegra210
  - reg: Physical base address and length of the controller's registers.
  - interrupts: The interrupt outputs from the controller.
  - clocks: Must contain an entry for each entry in clock-names.
@ -241,6 +242,12 @@ of the following host1x client modules:
  - reset-names: Must include the following entries:
    - dpaux
  - vdd-supply: phandle of a supply that powers the DisplayPort link
+  - i2c-bus: Subnode where I2C slave devices are listed. This subnode
+    must be always present. If there are no I2C slave devices, an empty
+    node should be added. See ../../i2c/i2c.txt for more information.
+
+  See ../pinctrl/nvidia,tegra124-dpaux-padctl.txt for information
+  regarding the DPAUX pad controller bindings.

 Example:

--- a/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-dpaux-padctl.txt
+++ b/Documentation/devicetree/bindings/pinctrl/nvidia,tegra124-dpaux-padctl.txt
@ -0,0 +1,60 @@
+Device tree binding for NVIDIA Tegra DPAUX pad controller
+========================================================
+
+The Tegra Display Port Auxiliary (DPAUX) pad controller manages two pins
+which can be assigned to either the DPAUX channel or to an I2C
+controller.
+
+This document defines the device-specific binding for the DPAUX pad
+controller. Refer to pinctrl-bindings.txt in this directory for generic
+information about pin controller device tree bindings. Please refer to
+the binding document ../display/tegra/nvidia,tegra20-host1x.txt for more
+details on the DPAUX binding.
+
+Pin muxing:
+-----------
+
+Child nodes contain the pinmux configurations following the conventions
+from the pinctrl-bindings.txt document.
+
+Since only three configurations are possible, only three child nodes are
+needed to describe the pin mux'ing options for the DPAUX pads.
+Furthermore, given that the pad functions are only applicable to a
+single set of pads, the child nodes only need to describe the pad group
+the functions are being applied to rather than the individual pads.
+
+Required properties:
+- groups: Must be "dpaux-io"
+- function: Must be either "aux", "i2c" or "off".
+
+Example:
+--------
+
+	dpaux@545c0000 {
+		...
+
+		state_dpaux_aux: pinmux-aux {
+			groups = "dpaux-io";
+			function = "aux";
+		};
+
+		state_dpaux_i2c: pinmux-i2c {
+			groups = "dpaux-io";
+			function = "i2c";
+		};
+
+		state_dpaux_off: pinmux-off {
+			groups = "dpaux-io";
+			function = "off";
+		};
+	};
+
+	...
+
+	i2c@7000d100 {
+		...
+		pinctrl-0 = <&state_dpaux_i2c>;
+		pinctrl-1 = <&state_dpaux_off>;
+		pinctrl-names = "default", "idle";
+		status = "disabled";
+	};
--- a/Documentation/devicetree/bindings/vendor-prefixes.txt
+++ b/Documentation/devicetree/bindings/vendor-prefixes.txt
@ -249,6 +249,7 @@ sony	Sony Corporation
 spansion	Spansion Inc.
 sprd	Spreadtrum Communications Inc.
 st	STMicroelectronics
+starry	Starry Electronic Technology (ShenZhen) Co., LTD
 startek	Startek
 ste	ST-Ericsson
 stericsson	ST-Ericsson
--- a/Documentation/gpu/drm-internals.rst
+++ b/Documentation/gpu/drm-internals.rst
@ -0,0 +1,381 @@
+=============
+DRM Internals
+=============
+
+This chapter documents DRM internals relevant to driver authors and
+developers working to add support for the latest features to existing
+drivers.
+
+First, we go over some typical driver initialization requirements, like
+setting up command buffers, creating an initial output configuration,
+and initializing core services. Subsequent sections cover core internals
+in more detail, providing implementation notes and examples.
+
+The DRM layer provides several services to graphics drivers, many of
+them driven by the application interfaces it provides through libdrm,
+the library that wraps most of the DRM ioctls. These include vblank
+event handling, memory management, output management, framebuffer
+management, command submission & fencing, suspend/resume support, and
+DMA services.
+
+Driver Initialization
+=====================
+
+At the core of every DRM driver is a :c:type:`struct drm_driver
+<drm_driver>` structure. Drivers typically statically initialize
+a drm_driver structure, and then pass it to
+:c:func:`drm_dev_alloc()` to allocate a device instance. After the
+device instance is fully initialized it can be registered (which makes
+it accessible from userspace) using :c:func:`drm_dev_register()`.
+
+The :c:type:`struct drm_driver <drm_driver>` structure
+contains static information that describes the driver and features it
+supports, and pointers to methods that the DRM core will call to
+implement the DRM API. We will first go through the :c:type:`struct
+drm_driver <drm_driver>` static information fields, and will
+then describe individual operations in details as they get used in later
+sections.
+
+Driver Information
+------------------
+
+Driver Features
+~~~~~~~~~~~~~~~
+
+Drivers inform the DRM core about their requirements and supported
+features by setting appropriate flags in the driver_features field.
+Since those flags influence the DRM core behaviour since registration
+time, most of them must be set to registering the :c:type:`struct
+drm_driver <drm_driver>` instance.
+
+u32 driver_features;
+
+DRIVER_USE_AGP
+    Driver uses AGP interface, the DRM core will manage AGP resources.
+
+DRIVER_REQUIRE_AGP
+    Driver needs AGP interface to function. AGP initialization failure
+    will become a fatal error.
+
+DRIVER_PCI_DMA
+    Driver is capable of PCI DMA, mapping of PCI DMA buffers to
+    userspace will be enabled. Deprecated.
+
+DRIVER_SG
+    Driver can perform scatter/gather DMA, allocation and mapping of
+    scatter/gather buffers will be enabled. Deprecated.
+
+DRIVER_HAVE_DMA
+    Driver supports DMA, the userspace DMA API will be supported.
+    Deprecated.
+
+DRIVER_HAVE_IRQ; DRIVER_IRQ_SHARED
+    DRIVER_HAVE_IRQ indicates whether the driver has an IRQ handler
+    managed by the DRM Core. The core will support simple IRQ handler
+    installation when the flag is set. The installation process is
+    described in ?.
+
+    DRIVER_IRQ_SHARED indicates whether the device & handler support
+    shared IRQs (note that this is required of PCI drivers).
+
+DRIVER_GEM
+    Driver use the GEM memory manager.
+
+DRIVER_MODESET
+    Driver supports mode setting interfaces (KMS).
+
+DRIVER_PRIME
+    Driver implements DRM PRIME buffer sharing.
+
+DRIVER_RENDER
+    Driver supports dedicated render nodes.
+
+DRIVER_ATOMIC
+    Driver supports atomic properties. In this case the driver must
+    implement appropriate obj->atomic_get_property() vfuncs for any
+    modeset objects with driver specific properties.
+
+Major, Minor and Patchlevel
+~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+int major; int minor; int patchlevel;
+The DRM core identifies driver versions by a major, minor and patch
+level triplet. The information is printed to the kernel log at
+initialization time and passed to userspace through the
+DRM_IOCTL_VERSION ioctl.
+
+The major and minor numbers are also used to verify the requested driver
+API version passed to DRM_IOCTL_SET_VERSION. When the driver API
+changes between minor versions, applications can call
+DRM_IOCTL_SET_VERSION to select a specific version of the API. If the
+requested major isn't equal to the driver major, or the requested minor
+is larger than the driver minor, the DRM_IOCTL_SET_VERSION call will
+return an error. Otherwise the driver's set_version() method will be
+called with the requested version.
+
+Name, Description and Date
+~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+char \*name; char \*desc; char \*date;
+The driver name is printed to the kernel log at initialization time,
+used for IRQ registration and passed to userspace through
+DRM_IOCTL_VERSION.
+
+The driver description is a purely informative string passed to
+userspace through the DRM_IOCTL_VERSION ioctl and otherwise unused by
+the kernel.
+
+The driver date, formatted as YYYYMMDD, is meant to identify the date of
+the latest modification to the driver. However, as most drivers fail to
+update it, its value is mostly useless. The DRM core prints it to the
+kernel log at initialization time and passes it to userspace through the
+DRM_IOCTL_VERSION ioctl.
+
+Device Instance and Driver Handling
+-----------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_drv.c
+   :doc: driver instance overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_drv.c
+   :export:
+
+Driver Load
+-----------
+
+IRQ Registration
+~~~~~~~~~~~~~~~~
+
+The DRM core tries to facilitate IRQ handler registration and
+unregistration by providing :c:func:`drm_irq_install()` and
+:c:func:`drm_irq_uninstall()` functions. Those functions only
+support a single interrupt per device, devices that use more than one
+IRQs need to be handled manually.
+
+Managed IRQ Registration
+''''''''''''''''''''''''
+
+:c:func:`drm_irq_install()` starts by calling the irq_preinstall
+driver operation. The operation is optional and must make sure that the
+interrupt will not get fired by clearing all pending interrupt flags or
+disabling the interrupt.
+
+The passed-in IRQ will then be requested by a call to
+:c:func:`request_irq()`. If the DRIVER_IRQ_SHARED driver feature
+flag is set, a shared (IRQF_SHARED) IRQ handler will be requested.
+
+The IRQ handler function must be provided as the mandatory irq_handler
+driver operation. It will get passed directly to
+:c:func:`request_irq()` and thus has the same prototype as all IRQ
+handlers. It will get called with a pointer to the DRM device as the
+second argument.
+
+Finally the function calls the optional irq_postinstall driver
+operation. The operation usually enables interrupts (excluding the
+vblank interrupt, which is enabled separately), but drivers may choose
+to enable/disable interrupts at a different time.
+
+:c:func:`drm_irq_uninstall()` is similarly used to uninstall an
+IRQ handler. It starts by waking up all processes waiting on a vblank
+interrupt to make sure they don't hang, and then calls the optional
+irq_uninstall driver operation. The operation must disable all hardware
+interrupts. Finally the function frees the IRQ by calling
+:c:func:`free_irq()`.
+
+Manual IRQ Registration
+'''''''''''''''''''''''
+
+Drivers that require multiple interrupt handlers can't use the managed
+IRQ registration functions. In that case IRQs must be registered and
+unregistered manually (usually with the :c:func:`request_irq()` and
+:c:func:`free_irq()` functions, or their :c:func:`devm_request_irq()` and
+:c:func:`devm_free_irq()` equivalents).
+
+When manually registering IRQs, drivers must not set the
+DRIVER_HAVE_IRQ driver feature flag, and must not provide the
+irq_handler driver operation. They must set the :c:type:`struct
+drm_device <drm_device>` irq_enabled field to 1 upon
+registration of the IRQs, and clear it to 0 after unregistering the
+IRQs.
+
+Memory Manager Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Every DRM driver requires a memory manager which must be initialized at
+load time. DRM currently contains two memory managers, the Translation
+Table Manager (TTM) and the Graphics Execution Manager (GEM). This
+document describes the use of the GEM memory manager only. See ? for
+details.
+
+Miscellaneous Device Configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Another task that may be necessary for PCI devices during configuration
+is mapping the video BIOS. On many devices, the VBIOS describes device
+configuration, LCD panel timings (if any), and contains flags indicating
+device state. Mapping the BIOS can be done using the pci_map_rom()
+call, a convenience function that takes care of mapping the actual ROM,
+whether it has been shadowed into memory (typically at address 0xc0000)
+or exists on the PCI device in the ROM BAR. Note that after the ROM has
+been mapped and any necessary information has been extracted, it should
+be unmapped; on many devices, the ROM address decoder is shared with
+other BARs, so leaving it mapped could cause undesired behaviour like
+hangs or memory corruption.
+
+Bus-specific Device Registration and PCI Support
+------------------------------------------------
+
+A number of functions are provided to help with device registration. The
+functions deal with PCI and platform devices respectively and are only
+provided for historical reasons. These are all deprecated and shouldn't
+be used in new drivers. Besides that there's a few helpers for pci
+drivers.
+
+.. kernel-doc:: drivers/gpu/drm/drm_pci.c
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_platform.c
+   :export:
+
+Open/Close, File Operations and IOCTLs
+======================================
+
+Open and Close
+--------------
+
+Open and close handlers. None of those methods are mandatory::
+
+    int (*firstopen) (struct drm_device *);
+    void (*lastclose) (struct drm_device *);
+    int (*open) (struct drm_device *, struct drm_file *);
+    void (*preclose) (struct drm_device *, struct drm_file *);
+    void (*postclose) (struct drm_device *, struct drm_file *);
+
+The firstopen method is called by the DRM core for legacy UMS (User Mode
+Setting) drivers only when an application opens a device that has no
+other opened file handle. UMS drivers can implement it to acquire device
+resources. KMS drivers can't use the method and must acquire resources
+in the load method instead.
+
+Similarly the lastclose method is called when the last application
+holding a file handle opened on the device closes it, for both UMS and
+KMS drivers. Additionally, the method is also called at module unload
+time or, for hot-pluggable devices, when the device is unplugged. The
+firstopen and lastclose calls can thus be unbalanced.
+
+The open method is called every time the device is opened by an
+application. Drivers can allocate per-file private data in this method
+and store them in the struct :c:type:`struct drm_file
+<drm_file>` driver_priv field. Note that the open method is
+called before firstopen.
+
+The close operation is split into preclose and postclose methods.
+Drivers must stop and cleanup all per-file operations in the preclose
+method. For instance pending vertical blanking and page flip events must
+be cancelled. No per-file operation is allowed on the file handle after
+returning from the preclose method.
+
+Finally the postclose method is called as the last step of the close
+operation, right before calling the lastclose method if no other open
+file handle exists for the device. Drivers that have allocated per-file
+private data in the open method should free it here.
+
+The lastclose method should restore CRTC and plane properties to default
+value, so that a subsequent open of the device will not inherit state
+from the previous user. It can also be used to execute delayed power
+switching state changes, e.g. in conjunction with the :ref:`vga_switcheroo`
+infrastructure. Beyond that KMS drivers should not do any
+further cleanup. Only legacy UMS drivers might need to clean up device
+state so that the vga console or an independent fbdev driver could take
+over.
+
+File Operations
+---------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_fops.c
+   :doc: file operations
+
+.. kernel-doc:: drivers/gpu/drm/drm_fops.c
+   :export:
+
+IOCTLs
+------
+
+struct drm_ioctl_desc \*ioctls; int num_ioctls;
+    Driver-specific ioctls descriptors table.
+
+Driver-specific ioctls numbers start at DRM_COMMAND_BASE. The ioctls
+descriptors table is indexed by the ioctl number offset from the base
+value. Drivers can use the DRM_IOCTL_DEF_DRV() macro to initialize
+the table entries.
+
+::
+
+    DRM_IOCTL_DEF_DRV(ioctl, func, flags)
+
+``ioctl`` is the ioctl name. Drivers must define the DRM_##ioctl and
+DRM_IOCTL_##ioctl macros to the ioctl number offset from
+DRM_COMMAND_BASE and the ioctl number respectively. The first macro is
+private to the device while the second must be exposed to userspace in a
+public header.
+
+``func`` is a pointer to the ioctl handler function compatible with the
+``drm_ioctl_t`` type.
+
+::
+
+    typedef int drm_ioctl_t(struct drm_device *dev, void *data,
+            struct drm_file *file_priv);
+
+``flags`` is a bitmask combination of the following values. It restricts
+how the ioctl is allowed to be called.
+
+-  DRM_AUTH - Only authenticated callers allowed
+
+-  DRM_MASTER - The ioctl can only be called on the master file handle
+
+-  DRM_ROOT_ONLY - Only callers with the SYSADMIN capability allowed
+
+-  DRM_CONTROL_ALLOW - The ioctl can only be called on a control
+   device
+
+-  DRM_UNLOCKED - The ioctl handler will be called without locking the
+   DRM global mutex. This is the enforced default for kms drivers (i.e.
+   using the DRIVER_MODESET flag) and hence shouldn't be used any more
+   for new drivers.
+
+.. kernel-doc:: drivers/gpu/drm/drm_ioctl.c
+   :export:
+
+Legacy Support Code
+===================
+
+The section very briefly covers some of the old legacy support code
+which is only used by old DRM drivers which have done a so-called
+shadow-attach to the underlying device instead of registering as a real
+driver. This also includes some of the old generic buffer management and
+command submission code. Do not use any of this in new and modern
+drivers.
+
+Legacy Suspend/Resume
+---------------------
+
+The DRM core provides some suspend/resume code, but drivers wanting full
+suspend/resume support should provide save() and restore() functions.
+These are called at suspend, hibernate, or resume time, and should
+perform any state save or restore required by your device across suspend
+or hibernate states.
+
+int (\*suspend) (struct drm_device \*, pm_message_t state); int
+(\*resume) (struct drm_device \*);
+Those are legacy suspend and resume methods which *only* work with the
+legacy shadow-attach driver registration functions. New driver should
+use the power management interface provided by their bus type (usually
+through the :c:type:`struct device_driver <device_driver>`
+dev_pm_ops) and set these methods to NULL.
+
+Legacy DMA Services
+-------------------
+
+This should cover how DMA mapping etc. is supported by the core. These
+functions are deprecated and should not be used.
--- a/Documentation/gpu/drm-kms-helpers.rst
+++ b/Documentation/gpu/drm-kms-helpers.rst
@ -0,0 +1,260 @@
+=============================
+Mode Setting Helper Functions
+=============================
+
+The plane, CRTC, encoder and connector functions provided by the drivers
+implement the DRM API. They're called by the DRM core and ioctl handlers
+to handle device state changes and configuration request. As
+implementing those functions often requires logic not specific to
+drivers, mid-layer helper functions are available to avoid duplicating
+boilerplate code.
+
+The DRM core contains one mid-layer implementation. The mid-layer
+provides implementations of several plane, CRTC, encoder and connector
+functions (called from the top of the mid-layer) that pre-process
+requests and call lower-level functions provided by the driver (at the
+bottom of the mid-layer). For instance, the
+:c:func:`drm_crtc_helper_set_config()` function can be used to
+fill the :c:type:`struct drm_crtc_funcs <drm_crtc_funcs>`
+set_config field. When called, it will split the set_config operation
+in smaller, simpler operations and call the driver to handle them.
+
+To use the mid-layer, drivers call
+:c:func:`drm_crtc_helper_add()`,
+:c:func:`drm_encoder_helper_add()` and
+:c:func:`drm_connector_helper_add()` functions to install their
+mid-layer bottom operations handlers, and fill the :c:type:`struct
+drm_crtc_funcs <drm_crtc_funcs>`, :c:type:`struct
+drm_encoder_funcs <drm_encoder_funcs>` and :c:type:`struct
+drm_connector_funcs <drm_connector_funcs>` structures with
+pointers to the mid-layer top API functions. Installing the mid-layer
+bottom operation handlers is best done right after registering the
+corresponding KMS object.
+
+The mid-layer is not split between CRTC, encoder and connector
+operations. To use it, a driver must provide bottom functions for all of
+the three KMS entities.
+
+Atomic Modeset Helper Functions Reference
+=========================================
+
+Overview
+--------
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
+   :doc: overview
+
+Implementing Asynchronous Atomic Commit
+---------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
+   :doc: implementing nonblocking commit
+
+Atomic State Reset and Initialization
+-------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
+   :doc: atomic state reset and initialization
+
+.. kernel-doc:: include/drm/drm_atomic_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic_helper.c
+   :export:
+
+Modeset Helper Reference for Common Vtables
+===========================================
+
+.. kernel-doc:: include/drm/drm_modeset_helper_vtables.h
+   :internal:
+
+.. kernel-doc:: include/drm/drm_modeset_helper_vtables.h
+   :doc: overview
+
+Legacy CRTC/Modeset Helper Functions Reference
+==============================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_crtc_helper.c
+   :doc: overview
+
+Output Probing Helper Functions Reference
+=========================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_probe_helper.c
+   :doc: output probing helper overview
+
+.. kernel-doc:: drivers/gpu/drm/drm_probe_helper.c
+   :export:
+
+fbdev Helper Functions Reference
+================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_fb_helper.c
+   :doc: fbdev helpers
+
+.. kernel-doc:: drivers/gpu/drm/drm_fb_helper.c
+   :export:
+
+.. kernel-doc:: include/drm/drm_fb_helper.h
+   :internal:
+
+Framebuffer CMA Helper Functions Reference
+==========================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_fb_cma_helper.c
+   :doc: framebuffer cma helper functions
+
+.. kernel-doc:: drivers/gpu/drm/drm_fb_cma_helper.c
+   :export:
+
+Display Port Helper Functions Reference
+=======================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_helper.c
+   :doc: dp helpers
+
+.. kernel-doc:: include/drm/drm_dp_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_helper.c
+   :export:
+
+Display Port Dual Mode Adaptor Helper Functions Reference
+=========================================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_dual_mode_helper.c
+   :doc: dp dual mode helpers
+
+.. kernel-doc:: include/drm/drm_dp_dual_mode_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_dual_mode_helper.c
+   :export:
+
+Display Port MST Helper Functions Reference
+===========================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_mst_topology.c
+   :doc: dp mst helper
+
+.. kernel-doc:: include/drm/drm_dp_mst_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_dp_mst_topology.c
+   :export:
+
+MIPI DSI Helper Functions Reference
+===================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_mipi_dsi.c
+   :doc: dsi helpers
+
+.. kernel-doc:: include/drm/drm_mipi_dsi.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_mipi_dsi.c
+   :export:
+
+EDID Helper Functions Reference
+===============================
+
+.. kernel-doc:: drivers/gpu/drm/drm_edid.c
+   :export:
+
+Rectangle Utilities Reference
+=============================
+
+.. kernel-doc:: include/drm/drm_rect.h
+   :doc: rect utils
+
+.. kernel-doc:: include/drm/drm_rect.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_rect.c
+   :export:
+
+Flip-work Helper Reference
+==========================
+
+.. kernel-doc:: include/drm/drm_flip_work.h
+   :doc: flip utils
+
+.. kernel-doc:: include/drm/drm_flip_work.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_flip_work.c
+   :export:
+
+HDMI Infoframes Helper Reference
+================================
+
+Strictly speaking this is not a DRM helper library but generally useable
+by any driver interfacing with HDMI outputs like v4l or alsa drivers.
+But it nicely fits into the overall topic of mode setting helper
+libraries and hence is also included here.
+
+.. kernel-doc:: include/linux/hdmi.h
+   :internal:
+
+.. kernel-doc:: drivers/video/hdmi.c
+   :export:
+
+Plane Helper Reference
+======================
+
+.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_plane_helper.c
+   :doc: overview
+
+Tile group
+----------
+
+.. kernel-doc:: drivers/gpu/drm/drm_crtc.c
+   :doc: Tile group
+
+Bridges
+=======
+
+Overview
+--------
+
+.. kernel-doc:: drivers/gpu/drm/drm_bridge.c
+   :doc: overview
+
+Default bridge callback sequence
+--------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_bridge.c
+   :doc: bridge callbacks
+
+.. kernel-doc:: drivers/gpu/drm/drm_bridge.c
+   :export:
+
+Panel Helper Reference
+======================
+
+.. kernel-doc:: include/drm/drm_panel.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_panel.c
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_panel.c
+   :doc: drm panel
+
+Simple KMS Helper Reference
+===========================
+
+.. kernel-doc:: include/drm/drm_simple_kms_helper.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_simple_kms_helper.c
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_simple_kms_helper.c
+   :doc: overview
--- a/Documentation/gpu/drm-kms.rst
+++ b/Documentation/gpu/drm-kms.rst
@ -0,0 +1,653 @@
+=========================
+Kernel Mode Setting (KMS)
+=========================
+
+Mode Setting
+============
+
+Drivers must initialize the mode setting core by calling
+:c:func:`drm_mode_config_init()` on the DRM device. The function
+initializes the :c:type:`struct drm_device <drm_device>`
+mode_config field and never fails. Once done, mode configuration must
+be setup by initializing the following fields.
+
+-  int min_width, min_height; int max_width, max_height;
+   Minimum and maximum width and height of the frame buffers in pixel
+   units.
+
+-  struct drm_mode_config_funcs \*funcs;
+   Mode setting functions.
+
+Display Modes Function Reference
+--------------------------------
+
+.. kernel-doc:: include/drm/drm_modes.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_modes.c
+   :export:
+
+Atomic Mode Setting Function Reference
+--------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
+   :export:
+
+.. kernel-doc:: drivers/gpu/drm/drm_atomic.c
+   :internal:
+
+Frame Buffer Abstraction
+------------------------
+
+Frame buffers are abstract memory objects that provide a source of
+pixels to scanout to a CRTC. Applications explicitly request the
+creation of frame buffers through the DRM_IOCTL_MODE_ADDFB(2) ioctls
+and receive an opaque handle that can be passed to the KMS CRTC control,
+plane configuration and page flip functions.
+
+Frame buffers rely on the underneath memory manager for low-level memory
+operations. When creating a frame buffer applications pass a memory
+handle (or a list of memory handles for multi-planar formats) through
+the ``drm_mode_fb_cmd2`` argument. For drivers using GEM as their
+userspace buffer management interface this would be a GEM handle.
+Drivers are however free to use their own backing storage object
+handles, e.g. vmwgfx directly exposes special TTM handles to userspace
+and so expects TTM handles in the create ioctl and not GEM handles.
+
+The lifetime of a drm framebuffer is controlled with a reference count,
+drivers can grab additional references with
+:c:func:`drm_framebuffer_reference()`and drop them again with
+:c:func:`drm_framebuffer_unreference()`. For driver-private
+framebuffers for which the last reference is never dropped (e.g. for the
+fbdev framebuffer when the struct :c:type:`struct drm_framebuffer
+<drm_framebuffer>` is embedded into the fbdev helper struct)
+drivers can manually clean up a framebuffer at module unload time with
+:c:func:`drm_framebuffer_unregister_private()`.
+
+DRM Format Handling
+-------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_fourcc.c
+   :export:
+
+Dumb Buffer Objects
+-------------------
+
+The KMS API doesn't standardize backing storage object creation and
+leaves it to driver-specific ioctls. Furthermore actually creating a
+buffer object even for GEM-based drivers is done through a
+driver-specific ioctl - GEM only has a common userspace interface for
+sharing and destroying objects. While not an issue for full-fledged
+graphics stacks that include device-specific userspace components (in
+libdrm for instance), this limit makes DRM-based early boot graphics
+unnecessarily complex.
+
+Dumb objects partly alleviate the problem by providing a standard API to
+create dumb buffers suitable for scanout, which can then be used to
+create KMS frame buffers.
+
+To support dumb objects drivers must implement the dumb_create,
+dumb_destroy and dumb_map_offset operations.
+
+-  int (\*dumb_create)(struct drm_file \*file_priv, struct
+   drm_device \*dev, struct drm_mode_create_dumb \*args);
+   The dumb_create operation creates a driver object (GEM or TTM
+   handle) suitable for scanout based on the width, height and depth
+   from the struct :c:type:`struct drm_mode_create_dumb
+   <drm_mode_create_dumb>` argument. It fills the argument's
+   handle, pitch and size fields with a handle for the newly created
+   object and its line pitch and size in bytes.
+
+-  int (\*dumb_destroy)(struct drm_file \*file_priv, struct
+   drm_device \*dev, uint32_t handle);
+   The dumb_destroy operation destroys a dumb object created by
+   dumb_create.
+
+-  int (\*dumb_map_offset)(struct drm_file \*file_priv, struct
+   drm_device \*dev, uint32_t handle, uint64_t \*offset);
+   The dumb_map_offset operation associates an mmap fake offset with
+   the object given by the handle and returns it. Drivers must use the
+   :c:func:`drm_gem_create_mmap_offset()` function to associate
+   the fake offset as described in ?.
+
+Note that dumb objects may not be used for gpu acceleration, as has been
+attempted on some ARM embedded platforms. Such drivers really must have
+a hardware-specific ioctl to allocate suitable buffer objects.
+
+Output Polling
+--------------
+
+void (\*output_poll_changed)(struct drm_device \*dev);
+This operation notifies the driver that the status of one or more
+connectors has changed. Drivers that use the fb helper can just call the
+:c:func:`drm_fb_helper_hotplug_event()` function to handle this
+operation.
+
+KMS Initialization and Cleanup
+==============================
+
+A KMS device is abstracted and exposed as a set of planes, CRTCs,
+encoders and connectors. KMS drivers must thus create and initialize all
+those objects at load time after initializing mode setting.
+
+CRTCs (:c:type:`struct drm_crtc <drm_crtc>`)
+--------------------------------------------
+
+A CRTC is an abstraction representing a part of the chip that contains a
+pointer to a scanout buffer. Therefore, the number of CRTCs available
+determines how many independent scanout buffers can be active at any
+given time. The CRTC structure contains several fields to support this:
+a pointer to some video memory (abstracted as a frame buffer object), a
+display mode, and an (x, y) offset into the video memory to support
+panning or configurations where one piece of video memory spans multiple
+CRTCs.
+
+CRTC Initialization
+~~~~~~~~~~~~~~~~~~~
+
+A KMS device must create and register at least one struct
+:c:type:`struct drm_crtc <drm_crtc>` instance. The instance is
+allocated and zeroed by the driver, possibly as part of a larger
+structure, and registered with a call to :c:func:`drm_crtc_init()`
+with a pointer to CRTC functions.
+
+Planes (:c:type:`struct drm_plane <drm_plane>`)
+-----------------------------------------------
+
+A plane represents an image source that can be blended with or overlayed
+on top of a CRTC during the scanout process. Planes are associated with
+a frame buffer to crop a portion of the image memory (source) and
+optionally scale it to a destination size. The result is then blended
+with or overlayed on top of a CRTC.
+
+The DRM core recognizes three types of planes:
+
+-  DRM_PLANE_TYPE_PRIMARY represents a "main" plane for a CRTC.
+   Primary planes are the planes operated upon by CRTC modesetting and
+   flipping operations described in the page_flip hook in
+   :c:type:`struct drm_crtc_funcs <drm_crtc_funcs>`.
+-  DRM_PLANE_TYPE_CURSOR represents a "cursor" plane for a CRTC.
+   Cursor planes are the planes operated upon by the
+   DRM_IOCTL_MODE_CURSOR and DRM_IOCTL_MODE_CURSOR2 ioctls.
+-  DRM_PLANE_TYPE_OVERLAY represents all non-primary, non-cursor
+   planes. Some drivers refer to these types of planes as "sprites"
+   internally.
+
+For compatibility with legacy userspace, only overlay planes are made
+available to userspace by default. Userspace clients may set the
+DRM_CLIENT_CAP_UNIVERSAL_PLANES client capability bit to indicate
+that they wish to receive a universal plane list containing all plane
+types.
+
+Plane Initialization
+~~~~~~~~~~~~~~~~~~~~
+
+To create a plane, a KMS drivers allocates and zeroes an instances of
+:c:type:`struct drm_plane <drm_plane>` (possibly as part of a
+larger structure) and registers it with a call to
+:c:func:`drm_universal_plane_init()`. The function takes a
+bitmask of the CRTCs that can be associated with the plane, a pointer to
+the plane functions, a list of format supported formats, and the type of
+plane (primary, cursor, or overlay) being initialized.
+
+Cursor and overlay planes are optional. All drivers should provide one
+primary plane per CRTC (although this requirement may change in the
+future); drivers that do not wish to provide special handling for
+primary planes may make use of the helper functions described in ? to
+create and register a primary plane with standard capabilities.
+
+Encoders (:c:type:`struct drm_encoder <drm_encoder>`)
+-----------------------------------------------------
+
+An encoder takes pixel data from a CRTC and converts it to a format
+suitable for any attached connectors. On some devices, it may be
+possible to have a CRTC send data to more than one encoder. In that
+case, both encoders would receive data from the same scanout buffer,
+resulting in a "cloned" display configuration across the connectors
+attached to each encoder.
+
+Encoder Initialization
+~~~~~~~~~~~~~~~~~~~~~~
+
+As for CRTCs, a KMS driver must create, initialize and register at least
+one :c:type:`struct drm_encoder <drm_encoder>` instance. The
+instance is allocated and zeroed by the driver, possibly as part of a
+larger structure.
+
+Drivers must initialize the :c:type:`struct drm_encoder
+<drm_encoder>` possible_crtcs and possible_clones fields before
+registering the encoder. Both fields are bitmasks of respectively the
+CRTCs that the encoder can be connected to, and sibling encoders
+candidate for cloning.
+
+After being initialized, the encoder must be registered with a call to
+:c:func:`drm_encoder_init()`. The function takes a pointer to the
+encoder functions and an encoder type. Supported types are
+
+-  DRM_MODE_ENCODER_DAC for VGA and analog on DVI-I/DVI-A
+-  DRM_MODE_ENCODER_TMDS for DVI, HDMI and (embedded) DisplayPort
+-  DRM_MODE_ENCODER_LVDS for display panels
+-  DRM_MODE_ENCODER_TVDAC for TV output (Composite, S-Video,
+   Component, SCART)
+-  DRM_MODE_ENCODER_VIRTUAL for virtual machine displays
+
+Encoders must be attached to a CRTC to be used. DRM drivers leave
+encoders unattached at initialization time. Applications (or the fbdev
+compatibility layer when implemented) are responsible for attaching the
+encoders they want to use to a CRTC.
+
+Connectors (:c:type:`struct drm_connector <drm_connector>`)
+-----------------------------------------------------------
+
+A connector is the final destination for pixel data on a device, and
+usually connects directly to an external display device like a monitor
+or laptop panel. A connector can only be attached to one encoder at a
+time. The connector is also the structure where information about the
+attached display is kept, so it contains fields for display data, EDID
+data, DPMS & connection status, and information about modes supported on
+the attached displays.
+
+Connector Initialization
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Finally a KMS driver must create, initialize, register and attach at
+least one :c:type:`struct drm_connector <drm_connector>`
+instance. The instance is created as other KMS objects and initialized
+by setting the following fields.
+
+interlace_allowed
+    Whether the connector can handle interlaced modes.
+
+doublescan_allowed
+    Whether the connector can handle doublescan.
+
+display_info
+    Display information is filled from EDID information when a display
+    is detected. For non hot-pluggable displays such as flat panels in
+    embedded systems, the driver should initialize the
+    display_info.width_mm and display_info.height_mm fields with the
+    physical size of the display.
+
+polled
+    Connector polling mode, a combination of
+
+    DRM_CONNECTOR_POLL_HPD
+        The connector generates hotplug events and doesn't need to be
+        periodically polled. The CONNECT and DISCONNECT flags must not
+        be set together with the HPD flag.
+
+    DRM_CONNECTOR_POLL_CONNECT
+        Periodically poll the connector for connection.
+
+    DRM_CONNECTOR_POLL_DISCONNECT
+        Periodically poll the connector for disconnection.
+
+    Set to 0 for connectors that don't support connection status
+    discovery.
+
+The connector is then registered with a call to
+:c:func:`drm_connector_init()` with a pointer to the connector
+functions and a connector type, and exposed through sysfs with a call to
+:c:func:`drm_connector_register()`.
+
+Supported connector types are
+
+-  DRM_MODE_CONNECTOR_VGA
+-  DRM_MODE_CONNECTOR_DVII
+-  DRM_MODE_CONNECTOR_DVID
+-  DRM_MODE_CONNECTOR_DVIA
+-  DRM_MODE_CONNECTOR_Composite
+-  DRM_MODE_CONNECTOR_SVIDEO
+-  DRM_MODE_CONNECTOR_LVDS
+-  DRM_MODE_CONNECTOR_Component
+-  DRM_MODE_CONNECTOR_9PinDIN
+-  DRM_MODE_CONNECTOR_DisplayPort
+-  DRM_MODE_CONNECTOR_HDMIA
+-  DRM_MODE_CONNECTOR_HDMIB
+-  DRM_MODE_CONNECTOR_TV
+-  DRM_MODE_CONNECTOR_eDP
+-  DRM_MODE_CONNECTOR_VIRTUAL
+
+Connectors must be attached to an encoder to be used. For devices that
+map connectors to encoders 1:1, the connector should be attached at
+initialization time with a call to
+:c:func:`drm_mode_connector_attach_encoder()`. The driver must
+also set the :c:type:`struct drm_connector <drm_connector>`
+encoder field to point to the attached encoder.
+
+Finally, drivers must initialize the connectors state change detection
+with a call to :c:func:`drm_kms_helper_poll_init()`. If at least
+one connector is pollable but can't generate hotplug interrupts
+(indicated by the DRM_CONNECTOR_POLL_CONNECT and
+DRM_CONNECTOR_POLL_DISCONNECT connector flags), a delayed work will
+automatically be queued to periodically poll for changes. Connectors
+that can generate hotplug interrupts must be marked with the
+DRM_CONNECTOR_POLL_HPD flag instead, and their interrupt handler must
+call :c:func:`drm_helper_hpd_irq_event()`. The function will
+queue a delayed work to check the state of all connectors, but no
+periodic polling will be done.
+
+Connector Operations
+~~~~~~~~~~~~~~~~~~~~
+
+    **Note**
+
+    Unless otherwise state, all operations are mandatory.
+
+DPMS
+''''
+
+void (\*dpms)(struct drm_connector \*connector, int mode);
+The DPMS operation sets the power state of a connector. The mode
+argument is one of
+
+-  DRM_MODE_DPMS_ON
+
+-  DRM_MODE_DPMS_STANDBY
+
+-  DRM_MODE_DPMS_SUSPEND
+
+-  DRM_MODE_DPMS_OFF
+
+In all but DPMS_ON mode the encoder to which the connector is attached
+should put the display in low-power mode by driving its signals
+appropriately. If more than one connector is attached to the encoder
+care should be taken not to change the power state of other displays as
+a side effect. Low-power mode should be propagated to the encoders and
+CRTCs when all related connectors are put in low-power mode.
+
+Modes
+'''''
+
+int (\*fill_modes)(struct drm_connector \*connector, uint32_t
+max_width, uint32_t max_height);
+Fill the mode list with all supported modes for the connector. If the
+``max_width`` and ``max_height`` arguments are non-zero, the
+implementation must ignore all modes wider than ``max_width`` or higher
+than ``max_height``.
+
+The connector must also fill in this operation its display_info
+width_mm and height_mm fields with the connected display physical size
+in millimeters. The fields should be set to 0 if the value isn't known
+or is not applicable (for instance for projector devices).
+
+Connection Status
+'''''''''''''''''
+
+The connection status is updated through polling or hotplug events when
+supported (see ?). The status value is reported to userspace through
+ioctls and must not be used inside the driver, as it only gets
+initialized by a call to :c:func:`drm_mode_getconnector()` from
+userspace.
+
+enum drm_connector_status (\*detect)(struct drm_connector
+\*connector, bool force);
+Check to see if anything is attached to the connector. The ``force``
+parameter is set to false whilst polling or to true when checking the
+connector due to user request. ``force`` can be used by the driver to
+avoid expensive, destructive operations during automated probing.
+
+Return connector_status_connected if something is connected to the
+connector, connector_status_disconnected if nothing is connected and
+connector_status_unknown if the connection state isn't known.
+
+Drivers should only return connector_status_connected if the
+connection status has really been probed as connected. Connectors that
+can't detect the connection status, or failed connection status probes,
+should return connector_status_unknown.
+
+Cleanup
+-------
+
+The DRM core manages its objects' lifetime. When an object is not needed
+anymore the core calls its destroy function, which must clean up and
+free every resource allocated for the object. Every
+:c:func:`drm_\*_init()` call must be matched with a corresponding
+:c:func:`drm_\*_cleanup()` call to cleanup CRTCs
+(:c:func:`drm_crtc_cleanup()`), planes
+(:c:func:`drm_plane_cleanup()`), encoders
+(:c:func:`drm_encoder_cleanup()`) and connectors
+(:c:func:`drm_connector_cleanup()`). Furthermore, connectors that
+have been added to sysfs must be removed by a call to
+:c:func:`drm_connector_unregister()` before calling
+:c:func:`drm_connector_cleanup()`.
+
+Connectors state change detection must be cleanup up with a call to
+:c:func:`drm_kms_helper_poll_fini()`.
+
+Output discovery and initialization example
+-------------------------------------------
+
+::
+
+    void intel_crt_init(struct drm_device *dev)
+    {
+        struct drm_connector *connector;
+        struct intel_output *intel_output;
+
+        intel_output = kzalloc(sizeof(struct intel_output), GFP_KERNEL);
+        if (!intel_output)
+            return;
+
+        connector = &intel_output->base;
+        drm_connector_init(dev, &intel_output->base,
+                   &intel_crt_connector_funcs, DRM_MODE_CONNECTOR_VGA);
+
+        drm_encoder_init(dev, &intel_output->enc, &intel_crt_enc_funcs,
+                 DRM_MODE_ENCODER_DAC);
+
+        drm_mode_connector_attach_encoder(&intel_output->base,
+                          &intel_output->enc);
+
+        /* Set up the DDC bus. */
+        intel_output->ddc_bus = intel_i2c_create(dev, GPIOA, "CRTDDC_A");
+        if (!intel_output->ddc_bus) {
+            dev_printk(KERN_ERR, &dev->pdev->dev, "DDC bus registration "
+                   "failed.\n");
+            return;
+        }
+
+        intel_output->type = INTEL_OUTPUT_ANALOG;
+        connector->interlace_allowed = 0;
+        connector->doublescan_allowed = 0;
+
+        drm_encoder_helper_add(&intel_output->enc, &intel_crt_helper_funcs);
+        drm_connector_helper_add(connector, &intel_crt_connector_helper_funcs);
+
+        drm_connector_register(connector);
+    }
+
+In the example above (taken from the i915 driver), a CRTC, connector and
+encoder combination is created. A device-specific i2c bus is also
+created for fetching EDID data and performing monitor detection. Once
+the process is complete, the new connector is registered with sysfs to
+make its properties available to applications.
+
+KMS API Functions
+-----------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_crtc.c
+   :export:
+
+KMS Data Structures
+-------------------
+
+.. kernel-doc:: include/drm/drm_crtc.h
+   :internal:
+
+KMS Locking
+-----------
+
+.. kernel-doc:: drivers/gpu/drm/drm_modeset_lock.c
+   :doc: kms locking
+
+.. kernel-doc:: include/drm/drm_modeset_lock.h
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/drm_modeset_lock.c
+   :export:
+
+KMS Properties
+==============
+
+Drivers may need to expose additional parameters to applications than
+those described in the previous sections. KMS supports attaching
+properties to CRTCs, connectors and planes and offers a userspace API to
+list, get and set the property values.
+
+Properties are identified by a name that uniquely defines the property
+purpose, and store an associated value. For all property types except
+blob properties the value is a 64-bit unsigned integer.
+
+KMS differentiates between properties and property instances. Drivers
+first create properties and then create and associate individual
+instances of those properties to objects. A property can be instantiated
+multiple times and associated with different objects. Values are stored
+in property instances, and all other property information are stored in
+the property and shared between all instances of the property.
+
+Every property is created with a type that influences how the KMS core
+handles the property. Supported property types are
+
+DRM_MODE_PROP_RANGE
+    Range properties report their minimum and maximum admissible values.
+    The KMS core verifies that values set by application fit in that
+    range.
+
+DRM_MODE_PROP_ENUM
+    Enumerated properties take a numerical value that ranges from 0 to
+    the number of enumerated values defined by the property minus one,
+    and associate a free-formed string name to each value. Applications
+    can retrieve the list of defined value-name pairs and use the
+    numerical value to get and set property instance values.
+
+DRM_MODE_PROP_BITMASK
+    Bitmask properties are enumeration properties that additionally
+    restrict all enumerated values to the 0..63 range. Bitmask property
+    instance values combine one or more of the enumerated bits defined
+    by the property.
+
+DRM_MODE_PROP_BLOB
+    Blob properties store a binary blob without any format restriction.
+    The binary blobs are created as KMS standalone objects, and blob
+    property instance values store the ID of their associated blob
+    object.
+
+    Blob properties are only used for the connector EDID property and
+    cannot be created by drivers.
+
+To create a property drivers call one of the following functions
+depending on the property type. All property creation functions take
+property flags and name, as well as type-specific arguments.
+
+-  struct drm_property \*drm_property_create_range(struct
+   drm_device \*dev, int flags, const char \*name, uint64_t min,
+   uint64_t max);
+   Create a range property with the given minimum and maximum values.
+
+-  struct drm_property \*drm_property_create_enum(struct drm_device
+   \*dev, int flags, const char \*name, const struct
+   drm_prop_enum_list \*props, int num_values);
+   Create an enumerated property. The ``props`` argument points to an
+   array of ``num_values`` value-name pairs.
+
+-  struct drm_property \*drm_property_create_bitmask(struct
+   drm_device \*dev, int flags, const char \*name, const struct
+   drm_prop_enum_list \*props, int num_values);
+   Create a bitmask property. The ``props`` argument points to an array
+   of ``num_values`` value-name pairs.
+
+Properties can additionally be created as immutable, in which case they
+will be read-only for applications but can be modified by the driver. To
+create an immutable property drivers must set the
+DRM_MODE_PROP_IMMUTABLE flag at property creation time.
+
+When no array of value-name pairs is readily available at property
+creation time for enumerated or range properties, drivers can create the
+property using the :c:func:`drm_property_create()` function and
+manually add enumeration value-name pairs by calling the
+:c:func:`drm_property_add_enum()` function. Care must be taken to
+properly specify the property type through the ``flags`` argument.
+
+After creating properties drivers can attach property instances to CRTC,
+connector and plane objects by calling the
+:c:func:`drm_object_attach_property()`. The function takes a
+pointer to the target object, a pointer to the previously created
+property and an initial instance value.
+
+Existing KMS Properties
+-----------------------
+
+The following table gives description of drm properties exposed by
+various modules/drivers.
+
+.. csv-table::
+   :header-rows: 1
+   :file: kms-properties.csv
+
+Vertical Blanking
+=================
+
+Vertical blanking plays a major role in graphics rendering. To achieve
+tear-free display, users must synchronize page flips and/or rendering to
+vertical blanking. The DRM API offers ioctls to perform page flips
+synchronized to vertical blanking and wait for vertical blanking.
+
+The DRM core handles most of the vertical blanking management logic,
+which involves filtering out spurious interrupts, keeping race-free
+blanking counters, coping with counter wrap-around and resets and
+keeping use counts. It relies on the driver to generate vertical
+blanking interrupts and optionally provide a hardware vertical blanking
+counter. Drivers must implement the following operations.
+
+-  int (\*enable_vblank) (struct drm_device \*dev, int crtc); void
+   (\*disable_vblank) (struct drm_device \*dev, int crtc);
+   Enable or disable vertical blanking interrupts for the given CRTC.
+
+-  u32 (\*get_vblank_counter) (struct drm_device \*dev, int crtc);
+   Retrieve the value of the vertical blanking counter for the given
+   CRTC. If the hardware maintains a vertical blanking counter its value
+   should be returned. Otherwise drivers can use the
+   :c:func:`drm_vblank_count()` helper function to handle this
+   operation.
+
+Drivers must initialize the vertical blanking handling core with a call
+to :c:func:`drm_vblank_init()` in their load operation.
+
+Vertical blanking interrupts can be enabled by the DRM core or by
+drivers themselves (for instance to handle page flipping operations).
+The DRM core maintains a vertical blanking use count to ensure that the
+interrupts are not disabled while a user still needs them. To increment
+the use count, drivers call :c:func:`drm_vblank_get()`. Upon
+return vertical blanking interrupts are guaranteed to be enabled.
+
+To decrement the use count drivers call
+:c:func:`drm_vblank_put()`. Only when the use count drops to zero
+will the DRM core disable the vertical blanking interrupts after a delay
+by scheduling a timer. The delay is accessible through the
+vblankoffdelay module parameter or the ``drm_vblank_offdelay`` global
+variable and expressed in milliseconds. Its default value is 5000 ms.
+Zero means never disable, and a negative value means disable
+immediately. Drivers may override the behaviour by setting the
+:c:type:`struct drm_device <drm_device>`
+vblank_disable_immediate flag, which when set causes vblank interrupts
+to be disabled immediately regardless of the drm_vblank_offdelay
+value. The flag should only be set if there's a properly working
+hardware vblank counter present.
+
+When a vertical blanking interrupt occurs drivers only need to call the
+:c:func:`drm_handle_vblank()` function to account for the
+interrupt.
+
+Resources allocated by :c:func:`drm_vblank_init()` must be freed
+with a call to :c:func:`drm_vblank_cleanup()` in the driver unload
+operation handler.
+
+Vertical Blanking and Interrupt Handling Functions Reference
+------------------------------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_irq.c
+   :export:
+
+.. kernel-doc:: include/drm/drm_irq.h
+   :internal:
--- a/Documentation/gpu/drm-mm.rst
+++ b/Documentation/gpu/drm-mm.rst
@ -0,0 +1,454 @@
+=====================
+DRM Memory Management
+=====================
+
+Modern Linux systems require large amount of graphics memory to store
+frame buffers, textures, vertices and other graphics-related data. Given
+the very dynamic nature of many of that data, managing graphics memory
+efficiently is thus crucial for the graphics stack and plays a central
+role in the DRM infrastructure.
+
+The DRM core includes two memory managers, namely Translation Table Maps
+(TTM) and Graphics Execution Manager (GEM). TTM was the first DRM memory
+manager to be developed and tried to be a one-size-fits-them all
+solution. It provides a single userspace API to accommodate the need of
+all hardware, supporting both Unified Memory Architecture (UMA) devices
+and devices with dedicated video RAM (i.e. most discrete video cards).
+This resulted in a large, complex piece of code that turned out to be
+hard to use for driver development.
+
+GEM started as an Intel-sponsored project in reaction to TTM's
+complexity. Its design philosophy is completely different: instead of
+providing a solution to every graphics memory-related problems, GEM
+identified common code between drivers and created a support library to
+share it. GEM has simpler initialization and execution requirements than
+TTM, but has no video RAM management capabilities and is thus limited to
+UMA devices.
+
+The Translation Table Manager (TTM)
+-----------------------------------
+
+TTM design background and information belongs here.
+
+TTM initialization
+~~~~~~~~~~~~~~~~~~
+
+    **Warning**
+
+    This section is outdated.
+
+Drivers wishing to support TTM must fill out a drm_bo_driver
+structure. The structure contains several fields with function pointers
+for initializing the TTM, allocating and freeing memory, waiting for
+command completion and fence synchronization, and memory migration. See
+the radeon_ttm.c file for an example of usage.
+
+The ttm_global_reference structure is made up of several fields:
+
+::
+
+              struct ttm_global_reference {
+                      enum ttm_global_types global_type;
+                      size_t size;
+                      void *object;
+                      int (*init) (struct ttm_global_reference *);
+                      void (*release) (struct ttm_global_reference *);
+              };
+
+
+There should be one global reference structure for your memory manager
+as a whole, and there will be others for each object created by the
+memory manager at runtime. Your global TTM should have a type of
+TTM_GLOBAL_TTM_MEM. The size field for the global object should be
+sizeof(struct ttm_mem_global), and the init and release hooks should
+point at your driver-specific init and release routines, which probably
+eventually call ttm_mem_global_init and ttm_mem_global_release,
+respectively.
+
+Once your global TTM accounting structure is set up and initialized by
+calling ttm_global_item_ref() on it, you need to create a buffer
+object TTM to provide a pool for buffer object allocation by clients and
+the kernel itself. The type of this object should be
+TTM_GLOBAL_TTM_BO, and its size should be sizeof(struct
+ttm_bo_global). Again, driver-specific init and release functions may
+be provided, likely eventually calling ttm_bo_global_init() and
+ttm_bo_global_release(), respectively. Also, like the previous
+object, ttm_global_item_ref() is used to create an initial reference
+count for the TTM, which will call your initialization function.
+
+The Graphics Execution Manager (GEM)
+------------------------------------
+
+The GEM design approach has resulted in a memory manager that doesn't
+provide full coverage of all (or even all common) use cases in its
+userspace or kernel API. GEM exposes a set of standard memory-related
+operations to userspace and a set of helper functions to drivers, and
+let drivers implement hardware-specific operations with their own
+private API.
+
+The GEM userspace API is described in the `GEM - the Graphics Execution
+Manager <http://lwn.net/Articles/283798/>`__ article on LWN. While
+slightly outdated, the document provides a good overview of the GEM API
+principles. Buffer allocation and read and write operations, described
+as part of the common GEM API, are currently implemented using
+driver-specific ioctls.
+
+GEM is data-agnostic. It manages abstract buffer objects without knowing
+what individual buffers contain. APIs that require knowledge of buffer
+contents or purpose, such as buffer allocation or synchronization
+primitives, are thus outside of the scope of GEM and must be implemented
+using driver-specific ioctls.
+
+On a fundamental level, GEM involves several operations:
+
+-  Memory allocation and freeing
+-  Command execution
+-  Aperture management at command execution time
+
+Buffer object allocation is relatively straightforward and largely
+provided by Linux's shmem layer, which provides memory to back each
+object.
+
+Device-specific operations, such as command execution, pinning, buffer
+read & write, mapping, and domain ownership transfers are left to
+driver-specific ioctls.
+
+GEM Initialization
+~~~~~~~~~~~~~~~~~~
+
+Drivers that use GEM must set the DRIVER_GEM bit in the struct
+:c:type:`struct drm_driver <drm_driver>` driver_features
+field. The DRM core will then automatically initialize the GEM core
+before calling the load operation. Behind the scene, this will create a
+DRM Memory Manager object which provides an address space pool for
+object allocation.
+
+In a KMS configuration, drivers need to allocate and initialize a
+command ring buffer following core GEM initialization if required by the
+hardware. UMA devices usually have what is called a "stolen" memory
+region, which provides space for the initial framebuffer and large,
+contiguous memory regions required by the device. This space is
+typically not managed by GEM, and must be initialized separately into
+its own DRM MM object.
+
+GEM Objects Creation
+~~~~~~~~~~~~~~~~~~~~
+
+GEM splits creation of GEM objects and allocation of the memory that
+backs them in two distinct operations.
+
+GEM objects are represented by an instance of struct :c:type:`struct
+drm_gem_object <drm_gem_object>`. Drivers usually need to
+extend GEM objects with private information and thus create a
+driver-specific GEM object structure type that embeds an instance of
+struct :c:type:`struct drm_gem_object <drm_gem_object>`.
+
+To create a GEM object, a driver allocates memory for an instance of its
+specific GEM object type and initializes the embedded struct
+:c:type:`struct drm_gem_object <drm_gem_object>` with a call
+to :c:func:`drm_gem_object_init()`. The function takes a pointer
+to the DRM device, a pointer to the GEM object and the buffer object
+size in bytes.
+
+GEM uses shmem to allocate anonymous pageable memory.
+:c:func:`drm_gem_object_init()` will create an shmfs file of the
+requested size and store it into the struct :c:type:`struct
+drm_gem_object <drm_gem_object>` filp field. The memory is
+used as either main storage for the object when the graphics hardware
+uses system memory directly or as a backing store otherwise.
+
+Drivers are responsible for the actual physical pages allocation by
+calling :c:func:`shmem_read_mapping_page_gfp()` for each page.
+Note that they can decide to allocate pages when initializing the GEM
+object, or to delay allocation until the memory is needed (for instance
+when a page fault occurs as a result of a userspace memory access or
+when the driver needs to start a DMA transfer involving the memory).
+
+Anonymous pageable memory allocation is not always desired, for instance
+when the hardware requires physically contiguous system memory as is
+often the case in embedded devices. Drivers can create GEM objects with
+no shmfs backing (called private GEM objects) by initializing them with
+a call to :c:func:`drm_gem_private_object_init()` instead of
+:c:func:`drm_gem_object_init()`. Storage for private GEM objects
+must be managed by drivers.
+
+GEM Objects Lifetime
+~~~~~~~~~~~~~~~~~~~~
+
+All GEM objects are reference-counted by the GEM core. References can be
+acquired and release by :c:func:`calling
+drm_gem_object_reference()` and
+:c:func:`drm_gem_object_unreference()` respectively. The caller
+must hold the :c:type:`struct drm_device <drm_device>`
+struct_mutex lock when calling
+:c:func:`drm_gem_object_reference()`. As a convenience, GEM
+provides :c:func:`drm_gem_object_unreference_unlocked()`
+functions that can be called without holding the lock.
+
+When the last reference to a GEM object is released the GEM core calls
+the :c:type:`struct drm_driver <drm_driver>` gem_free_object
+operation. That operation is mandatory for GEM-enabled drivers and must
+free the GEM object and all associated resources.
+
+void (\*gem_free_object) (struct drm_gem_object \*obj); Drivers are
+responsible for freeing all GEM object resources. This includes the
+resources created by the GEM core, which need to be released with
+:c:func:`drm_gem_object_release()`.
+
+GEM Objects Naming
+~~~~~~~~~~~~~~~~~~
+
+Communication between userspace and the kernel refers to GEM objects
+using local handles, global names or, more recently, file descriptors.
+All of those are 32-bit integer values; the usual Linux kernel limits
+apply to the file descriptors.
+
+GEM handles are local to a DRM file. Applications get a handle to a GEM
+object through a driver-specific ioctl, and can use that handle to refer
+to the GEM object in other standard or driver-specific ioctls. Closing a
+DRM file handle frees all its GEM handles and dereferences the
+associated GEM objects.
+
+To create a handle for a GEM object drivers call
+:c:func:`drm_gem_handle_create()`. The function takes a pointer
+to the DRM file and the GEM object and returns a locally unique handle.
+When the handle is no longer needed drivers delete it with a call to
+:c:func:`drm_gem_handle_delete()`. Finally the GEM object
+associated with a handle can be retrieved by a call to
+:c:func:`drm_gem_object_lookup()`.
+
+Handles don't take ownership of GEM objects, they only take a reference
+to the object that will be dropped when the handle is destroyed. To
+avoid leaking GEM objects, drivers must make sure they drop the
+reference(s) they own (such as the initial reference taken at object
+creation time) as appropriate, without any special consideration for the
+handle. For example, in the particular case of combined GEM object and
+handle creation in the implementation of the dumb_create operation,
+drivers must drop the initial reference to the GEM object before
+returning the handle.
+
+GEM names are similar in purpose to handles but are not local to DRM
+files. They can be passed between processes to reference a GEM object
+globally. Names can't be used directly to refer to objects in the DRM
+API, applications must convert handles to names and names to handles
+using the DRM_IOCTL_GEM_FLINK and DRM_IOCTL_GEM_OPEN ioctls
+respectively. The conversion is handled by the DRM core without any
+driver-specific support.
+
+GEM also supports buffer sharing with dma-buf file descriptors through
+PRIME. GEM-based drivers must use the provided helpers functions to
+implement the exporting and importing correctly. See ?. Since sharing
+file descriptors is inherently more secure than the easily guessable and
+global GEM names it is the preferred buffer sharing mechanism. Sharing
+buffers through GEM names is only supported for legacy userspace.
+Furthermore PRIME also allows cross-device buffer sharing since it is
+based on dma-bufs.
+
+GEM Objects Mapping
+~~~~~~~~~~~~~~~~~~~
+
+Because mapping operations are fairly heavyweight GEM favours
+read/write-like access to buffers, implemented through driver-specific
+ioctls, over mapping buffers to userspace. However, when random access
+to the buffer is needed (to perform software rendering for instance),
+direct access to the object can be more efficient.
+
+The mmap system call can't be used directly to map GEM objects, as they
+don't have their own file handle. Two alternative methods currently
+co-exist to map GEM objects to userspace. The first method uses a
+driver-specific ioctl to perform the mapping operation, calling
+:c:func:`do_mmap()` under the hood. This is often considered
+dubious, seems to be discouraged for new GEM-enabled drivers, and will
+thus not be described here.
+
+The second method uses the mmap system call on the DRM file handle. void
+\*mmap(void \*addr, size_t length, int prot, int flags, int fd, off_t
+offset); DRM identifies the GEM object to be mapped by a fake offset
+passed through the mmap offset argument. Prior to being mapped, a GEM
+object must thus be associated with a fake offset. To do so, drivers
+must call :c:func:`drm_gem_create_mmap_offset()` on the object.
+
+Once allocated, the fake offset value must be passed to the application
+in a driver-specific way and can then be used as the mmap offset
+argument.
+
+The GEM core provides a helper method :c:func:`drm_gem_mmap()` to
+handle object mapping. The method can be set directly as the mmap file
+operation handler. It will look up the GEM object based on the offset
+value and set the VMA operations to the :c:type:`struct drm_driver
+<drm_driver>` gem_vm_ops field. Note that
+:c:func:`drm_gem_mmap()` doesn't map memory to userspace, but
+relies on the driver-provided fault handler to map pages individually.
+
+To use :c:func:`drm_gem_mmap()`, drivers must fill the struct
+:c:type:`struct drm_driver <drm_driver>` gem_vm_ops field
+with a pointer to VM operations.
+
+struct vm_operations_struct \*gem_vm_ops struct
+vm_operations_struct { void (\*open)(struct vm_area_struct \* area);
+void (\*close)(struct vm_area_struct \* area); int (\*fault)(struct
+vm_area_struct \*vma, struct vm_fault \*vmf); };
+
+The open and close operations must update the GEM object reference
+count. Drivers can use the :c:func:`drm_gem_vm_open()` and
+:c:func:`drm_gem_vm_close()` helper functions directly as open
+and close handlers.
+
+The fault operation handler is responsible for mapping individual pages
+to userspace when a page fault occurs. Depending on the memory
+allocation scheme, drivers can allocate pages at fault time, or can
+decide to allocate memory for the GEM object at the time the object is
+created.
+
+Drivers that want to map the GEM object upfront instead of handling page
+faults can implement their own mmap file operation handler.
+
+Memory Coherency
+~~~~~~~~~~~~~~~~
+
+When mapped to the device or used in a command buffer, backing pages for
+an object are flushed to memory and marked write combined so as to be
+coherent with the GPU. Likewise, if the CPU accesses an object after the
+GPU has finished rendering to the object, then the object must be made
+coherent with the CPU's view of memory, usually involving GPU cache
+flushing of various kinds. This core CPU<->GPU coherency management is
+provided by a device-specific ioctl, which evaluates an object's current
+domain and performs any necessary flushing or synchronization to put the
+object into the desired coherency domain (note that the object may be
+busy, i.e. an active render target; in that case, setting the domain
+blocks the client and waits for rendering to complete before performing
+any necessary flushing operations).
+
+Command Execution
+~~~~~~~~~~~~~~~~~
+
+Perhaps the most important GEM function for GPU devices is providing a
+command execution interface to clients. Client programs construct
+command buffers containing references to previously allocated memory
+objects, and then submit them to GEM. At that point, GEM takes care to
+bind all the objects into the GTT, execute the buffer, and provide
+necessary synchronization between clients accessing the same buffers.
+This often involves evicting some objects from the GTT and re-binding
+others (a fairly expensive operation), and providing relocation support
+which hides fixed GTT offsets from clients. Clients must take care not
+to submit command buffers that reference more objects than can fit in
+the GTT; otherwise, GEM will reject them and no rendering will occur.
+Similarly, if several objects in the buffer require fence registers to
+be allocated for correct rendering (e.g. 2D blits on pre-965 chips),
+care must be taken not to require more fence registers than are
+available to the client. Such resource management should be abstracted
+from the client in libdrm.
+
+GEM Function Reference
+----------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem.c
+   :export:
+
+.. kernel-doc:: include/drm/drm_gem.h
+   :internal:
+
+VMA Offset Manager
+------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_vma_manager.c
+   :doc: vma offset manager
+
+.. kernel-doc:: drivers/gpu/drm/drm_vma_manager.c
+   :export:
+
+.. kernel-doc:: include/drm/drm_vma_manager.h
+   :internal:
+
+PRIME Buffer Sharing
+--------------------
+
+PRIME is the cross device buffer sharing framework in drm, originally
+created for the OPTIMUS range of multi-gpu platforms. To userspace PRIME
+buffers are dma-buf based file descriptors.
+
+Overview and Driver Interface
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+Similar to GEM global names, PRIME file descriptors are also used to
+share buffer objects across processes. They offer additional security:
+as file descriptors must be explicitly sent over UNIX domain sockets to
+be shared between applications, they can't be guessed like the globally
+unique GEM names.
+
+Drivers that support the PRIME API must set the DRIVER_PRIME bit in the
+struct :c:type:`struct drm_driver <drm_driver>`
+driver_features field, and implement the prime_handle_to_fd and
+prime_fd_to_handle operations.
+
+int (\*prime_handle_to_fd)(struct drm_device \*dev, struct drm_file
+\*file_priv, uint32_t handle, uint32_t flags, int \*prime_fd); int
+(\*prime_fd_to_handle)(struct drm_device \*dev, struct drm_file
+\*file_priv, int prime_fd, uint32_t \*handle); Those two operations
+convert a handle to a PRIME file descriptor and vice versa. Drivers must
+use the kernel dma-buf buffer sharing framework to manage the PRIME file
+descriptors. Similar to the mode setting API PRIME is agnostic to the
+underlying buffer object manager, as long as handles are 32bit unsigned
+integers.
+
+While non-GEM drivers must implement the operations themselves, GEM
+drivers must use the :c:func:`drm_gem_prime_handle_to_fd()` and
+:c:func:`drm_gem_prime_fd_to_handle()` helper functions. Those
+helpers rely on the driver gem_prime_export and gem_prime_import
+operations to create a dma-buf instance from a GEM object (dma-buf
+exporter role) and to create a GEM object from a dma-buf instance
+(dma-buf importer role).
+
+struct dma_buf \* (\*gem_prime_export)(struct drm_device \*dev,
+struct drm_gem_object \*obj, int flags); struct drm_gem_object \*
+(\*gem_prime_import)(struct drm_device \*dev, struct dma_buf
+\*dma_buf); These two operations are mandatory for GEM drivers that
+support PRIME.
+
+PRIME Helper Functions
+~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/drm_prime.c
+   :doc: PRIME Helpers
+
+PRIME Function References
+-------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_prime.c
+   :export:
+
+DRM MM Range Allocator
+----------------------
+
+Overview
+~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/drm_mm.c
+   :doc: Overview
+
+LRU Scan/Eviction Support
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/drm_mm.c
+   :doc: lru scan roaster
+
+DRM MM Range Allocator Function References
+------------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_mm.c
+   :export:
+
+.. kernel-doc:: include/drm/drm_mm.h
+   :internal:
+
+CMA Helper Functions Reference
+------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_cma_helper.c
+   :doc: cma helpers
+
+.. kernel-doc:: drivers/gpu/drm/drm_gem_cma_helper.c
+   :export:
+
+.. kernel-doc:: include/drm/drm_gem_cma_helper.h
+   :internal:
--- a/Documentation/gpu/drm-uapi.rst
+++ b/Documentation/gpu/drm-uapi.rst
@ -0,0 +1,111 @@
+===================
+Userland interfaces
+===================
+
+The DRM core exports several interfaces to applications, generally
+intended to be used through corresponding libdrm wrapper functions. In
+addition, drivers export device-specific interfaces for use by userspace
+drivers & device-aware applications through ioctls and sysfs files.
+
+External interfaces include: memory mapping, context management, DMA
+operations, AGP management, vblank control, fence management, memory
+management, and output management.
+
+Cover generic ioctls and sysfs layout here. We only need high-level
+info, since man pages should cover the rest.
+
+libdrm Device Lookup
+====================
+
+.. kernel-doc:: drivers/gpu/drm/drm_ioctl.c
+   :doc: getunique and setversion story
+
+
+Primary Nodes, DRM Master and Authentication
+============================================
+
+.. kernel-doc:: drivers/gpu/drm/drm_auth.c
+   :doc: master and authentication
+
+.. kernel-doc:: drivers/gpu/drm/drm_auth.c
+   :export:
+
+.. kernel-doc:: include/drm/drm_auth.h
+   :internal:
+
+Render nodes
+============
+
+DRM core provides multiple character-devices for user-space to use.
+Depending on which device is opened, user-space can perform a different
+set of operations (mainly ioctls). The primary node is always created
+and called card<num>. Additionally, a currently unused control node,
+called controlD<num> is also created. The primary node provides all
+legacy operations and historically was the only interface used by
+userspace. With KMS, the control node was introduced. However, the
+planned KMS control interface has never been written and so the control
+node stays unused to date.
+
+With the increased use of offscreen renderers and GPGPU applications,
+clients no longer require running compositors or graphics servers to
+make use of a GPU. But the DRM API required unprivileged clients to
+authenticate to a DRM-Master prior to getting GPU access. To avoid this
+step and to grant clients GPU access without authenticating, render
+nodes were introduced. Render nodes solely serve render clients, that
+is, no modesetting or privileged ioctls can be issued on render nodes.
+Only non-global rendering commands are allowed. If a driver supports
+render nodes, it must advertise it via the DRIVER_RENDER DRM driver
+capability. If not supported, the primary node must be used for render
+clients together with the legacy drmAuth authentication procedure.
+
+If a driver advertises render node support, DRM core will create a
+separate render node called renderD<num>. There will be one render node
+per device. No ioctls except PRIME-related ioctls will be allowed on
+this node. Especially GEM_OPEN will be explicitly prohibited. Render
+nodes are designed to avoid the buffer-leaks, which occur if clients
+guess the flink names or mmap offsets on the legacy interface.
+Additionally to this basic interface, drivers must mark their
+driver-dependent render-only ioctls as DRM_RENDER_ALLOW so render
+clients can use them. Driver authors must be careful not to allow any
+privileged ioctls on render nodes.
+
+With render nodes, user-space can now control access to the render node
+via basic file-system access-modes. A running graphics server which
+authenticates clients on the privileged primary/legacy node is no longer
+required. Instead, a client can open the render node and is immediately
+granted GPU access. Communication between clients (or servers) is done
+via PRIME. FLINK from render node to legacy node is not supported. New
+clients must not use the insecure FLINK interface.
+
+Besides dropping all modeset/global ioctls, render nodes also drop the
+DRM-Master concept. There is no reason to associate render clients with
+a DRM-Master as they are independent of any graphics server. Besides,
+they must work without any running master, anyway. Drivers must be able
+to run without a master object if they support render nodes. If, on the
+other hand, a driver requires shared state between clients which is
+visible to user-space and accessible beyond open-file boundaries, they
+cannot support render nodes.
+
+VBlank event handling
+=====================
+
+The DRM core exposes two vertical blank related ioctls:
+
+DRM_IOCTL_WAIT_VBLANK
+    This takes a struct drm_wait_vblank structure as its argument, and
+    it is used to block or request a signal when a specified vblank
+    event occurs.
+
+DRM_IOCTL_MODESET_CTL
+    This was only used for user-mode-settind drivers around modesetting
+    changes to allow the kernel to update the vblank interrupt after
+    mode setting, since on many devices the vertical blank counter is
+    reset to 0 at some point during modeset. Modern drivers should not
+    call this any more since with kernel mode setting it is a no-op.
+
+This second part of the GPU Driver Developer's Guide documents driver
+code, implementation details and also all the driver-specific userspace
+interfaces. Especially since all hardware-acceleration interfaces to
+userspace are driver specific for efficiency and other reasons these
+interfaces can be rather substantial. Hence every driver has its own
+chapter.
--- a/Documentation/gpu/i915.rst
+++ b/Documentation/gpu/i915.rst
@ -0,0 +1,347 @@
+===========================
+ drm/i915 Intel GFX Driver
+===========================
+
+The drm/i915 driver supports all (with the exception of some very early
+models) integrated GFX chipsets with both Intel display and rendering
+blocks. This excludes a set of SoC platforms with an SGX rendering unit,
+those have basic support through the gma500 drm driver.
+
+Core Driver Infrastructure
+==========================
+
+This section covers core driver infrastructure used by both the display
+and the GEM parts of the driver.
+
+Runtime Power Management
+------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_runtime_pm.c
+   :doc: runtime pm
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_runtime_pm.c
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_uncore.c
+   :internal:
+
+Interrupt Handling
+------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_irq.c
+   :doc: interrupt handling
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_irq.c
+   :functions: intel_irq_init intel_irq_init_hw intel_hpd_init
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_irq.c
+   :functions: intel_runtime_pm_disable_interrupts
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_irq.c
+   :functions: intel_runtime_pm_enable_interrupts
+
+Intel GVT-g Guest Support(vGPU)
+-------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_vgpu.c
+   :doc: Intel GVT-g guest support
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_vgpu.c
+   :internal:
+
+Display Hardware Handling
+=========================
+
+This section covers everything related to the display hardware including
+the mode setting infrastructure, plane, sprite and cursor handling and
+display, output probing and related topics.
+
+Mode Setting Infrastructure
+---------------------------
+
+The i915 driver is thus far the only DRM driver which doesn't use the
+common DRM helper code to implement mode setting sequences. Thus it has
+its own tailor-made infrastructure for executing a display configuration
+change.
+
+Frontbuffer Tracking
+--------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
+   :doc: frontbuffer tracking
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_frontbuffer.c
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem.c
+   :functions: i915_gem_track_fb
+
+Display FIFO Underrun Reporting
+-------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_fifo_underrun.c
+   :doc: fifo underrun handling
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_fifo_underrun.c
+   :internal:
+
+Plane Configuration
+-------------------
+
+This section covers plane configuration and composition with the primary
+plane, sprites, cursors and overlays. This includes the infrastructure
+to do atomic vsync'ed updates of all this state and also tightly coupled
+topics like watermark setup and computation, framebuffer compression and
+panel self refresh.
+
+Atomic Plane Helpers
+--------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_atomic_plane.c
+   :doc: atomic plane helpers
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_atomic_plane.c
+   :internal:
+
+Output Probing
+--------------
+
+This section covers output probing and related infrastructure like the
+hotplug interrupt storm detection and mitigation code. Note that the
+i915 driver still uses most of the common DRM helper code for output
+probing, so those sections fully apply.
+
+Hotplug
+-------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_hotplug.c
+   :doc: Hotplug
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_hotplug.c
+   :internal:
+
+High Definition Audio
+---------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+   :doc: High Definition Audio over HDMI and Display Port
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_audio.c
+   :internal:
+
+.. kernel-doc:: include/drm/i915_component.h
+   :internal:
+
+Panel Self Refresh PSR (PSR/SRD)
+--------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_psr.c
+   :doc: Panel Self Refresh (PSR/SRD)
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_psr.c
+   :internal:
+
+Frame Buffer Compression (FBC)
+------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_fbc.c
+   :doc: Frame Buffer Compression (FBC)
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_fbc.c
+   :internal:
+
+Display Refresh Rate Switching (DRRS)
+-------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+   :doc: Display Refresh Rate Switching (DRRS)
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+   :functions: intel_dp_set_drrs_state
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+   :functions: intel_edp_drrs_enable
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+   :functions: intel_edp_drrs_disable
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+   :functions: intel_edp_drrs_invalidate
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+   :functions: intel_edp_drrs_flush
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_dp.c
+   :functions: intel_dp_drrs_init
+
+DPIO
+----
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_reg.h
+   :doc: DPIO
+
+CSR firmware support for DMC
+----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c
+   :doc: csr support for dmc
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_csr.c
+   :internal:
+
+Video BIOS Table (VBT)
+----------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_bios.c
+   :doc: Video BIOS Table (VBT)
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_bios.c
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_vbt_defs.h
+   :internal:
+
+Memory Management and Command Submission
+========================================
+
+This sections covers all things related to the GEM implementation in the
+i915 driver.
+
+Batchbuffer Parsing
+-------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_cmd_parser.c
+   :doc: batch buffer command parser
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_cmd_parser.c
+   :internal:
+
+Batchbuffer Pools
+-----------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c
+   :doc: batch pool
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_batch_pool.c
+   :internal:
+
+Logical Rings, Logical Ring Contexts and Execlists
+--------------------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_lrc.c
+   :doc: Logical Rings, Logical Ring Contexts and Execlists
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_lrc.c
+   :internal:
+
+Global GTT views
+----------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_gtt.c
+   :doc: Global GTT views
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_gtt.c
+   :internal:
+
+GTT Fences and Swizzling
+------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_fence.c
+   :internal:
+
+Global GTT Fence Handling
+~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_fence.c
+   :doc: fence register handling
+
+Hardware Tiling and Swizzling Details
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_fence.c
+   :doc: tiling swizzling details
+
+Object Tiling IOCTLs
+--------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
+   :internal:
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_tiling.c
+   :doc: buffer object tiling
+
+Buffer Object Eviction
+----------------------
+
+This section documents the interface functions for evicting buffer
+objects to make space available in the virtual gpu address spaces. Note
+that this is mostly orthogonal to shrinking buffer objects caches, which
+has the goal to make main memory (shared with the gpu through the
+unified memory architecture) available.
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_evict.c
+   :internal:
+
+Buffer Object Memory Shrinking
+------------------------------
+
+This section documents the interface function for shrinking memory usage
+of buffer object caches. Shrinking is used to make main memory
+available. Note that this is mostly orthogonal to evicting buffer
+objects, which has the goal to make space in gpu virtual address spaces.
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_gem_shrinker.c
+   :internal:
+
+GuC
+===
+
+GuC-specific firmware loader
+----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
+   :doc: GuC-specific firmware loader
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_loader.c
+   :internal:
+
+GuC-based command submission
+----------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_guc_submission.c
+   :doc: GuC-based command submission
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_guc_submission.c
+   :internal:
+
+GuC Firmware Layout
+-------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/intel_guc_fwif.h
+   :doc: GuC Firmware Layout
+
+Tracing
+=======
+
+This sections covers all things related to the tracepoints implemented
+in the i915 driver.
+
+i915_ppgtt_create and i915_ppgtt_release
+----------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
+   :doc: i915_ppgtt_create and i915_ppgtt_release tracepoints
+
+i915_context_create and i915_context_free
+-----------------------------------------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
+   :doc: i915_context_create and i915_context_free tracepoints
+
+switch_mm
+---------
+
+.. kernel-doc:: drivers/gpu/drm/i915/i915_trace.h
+   :doc: switch_mm tracepoint
+
+.. WARNING: DOCPROC directive not supported: !Cdrivers/gpu/drm/i915/i915_irq.c
--- a/Documentation/gpu/index.rst
+++ b/Documentation/gpu/index.rst
@ -0,0 +1,14 @@
+==================================
+Linux GPU Driver Developer's Guide
+==================================
+
+.. toctree::
+
+   introduction
+   drm-internals
+   drm-mm
+   drm-kms
+   drm-kms-helpers
+   drm-uapi
+   i915
+   vga-switcheroo
--- a/Documentation/gpu/introduction.rst
+++ b/Documentation/gpu/introduction.rst
@ -0,0 +1,51 @@
+============
+Introduction
+============
+
+The Linux DRM layer contains code intended to support the needs of
+complex graphics devices, usually containing programmable pipelines well
+suited to 3D graphics acceleration. Graphics drivers in the kernel may
+make use of DRM functions to make tasks like memory management,
+interrupt handling and DMA easier, and provide a uniform interface to
+applications.
+
+A note on versions: this guide covers features found in the DRM tree,
+including the TTM memory manager, output configuration and mode setting,
+and the new vblank internals, in addition to all the regular features
+found in current kernels.
+
+[Insert diagram of typical DRM stack here]
+
+Style Guidelines
+================
+
+For consistency this documentation uses American English. Abbreviations
+are written as all-uppercase, for example: DRM, KMS, IOCTL, CRTC, and so
+on. To aid in reading, documentations make full use of the markup
+characters kerneldoc provides: @parameter for function parameters,
+@member for structure members, &structure to reference structures and
+function() for functions. These all get automatically hyperlinked if
+kerneldoc for the referenced objects exists. When referencing entries in
+function vtables please use ->vfunc(). Note that kerneldoc does not
+support referencing struct members directly, so please add a reference
+to the vtable struct somewhere in the same paragraph or at least
+section.
+
+Except in special situations (to separate locked from unlocked variants)
+locking requirements for functions aren't documented in the kerneldoc.
+Instead locking should be check at runtime using e.g.
+``WARN_ON(!mutex_is_locked(...));``. Since it's much easier to ignore
+documentation than runtime noise this provides more value. And on top of
+that runtime checks do need to be updated when the locking rules change,
+increasing the chances that they're correct. Within the documentation
+the locking rules should be explained in the relevant structures: Either
+in the comment for the lock explaining what it protects, or data fields
+need a note about which lock protects them, or both.
+
+Functions which have a non-\ ``void`` return value should have a section
+called "Returns" explaining the expected return values in different
+cases and their meanings. Currently there's no consensus whether that
+section name should be all upper-case or not, and whether it should end
+in a colon or not. Go with the file-local style. Other common section
+names are "Notes" with information for dangerous or tricky corner cases,
+and "FIXME" where the interface could be cleaned up.
--- a/Documentation/gpu/kms-properties.csv
+++ b/Documentation/gpu/kms-properties.csv
@ -0,0 +1,128 @@
+Owner Module/Drivers,Group,Property Name,Type,Property Values,Object attached,Description/Restrictions
+DRM,Generic,“rotation”,BITMASK,"{ 0, ""rotate-0"" }, { 1, ""rotate-90"" }, { 2, ""rotate-180"" }, { 3, ""rotate-270"" }, { 4, ""reflect-x"" }, { 5, ""reflect-y"" }","CRTC, Plane",rotate-(degrees) rotates the image by the specified amount in degrees in counter clockwise direction. reflect-x and reflect-y reflects the image along the specified axis prior to rotation
+,,“scaling mode”,ENUM,"{ ""None"", ""Full"", ""Center"", ""Full aspect"" }",Connector,"Supported by: amdgpu, gma500, i915, nouveau and radeon."
+,Connector,“EDID”,BLOB | IMMUTABLE,0,Connector,Contains id of edid blob ptr object.
+,,“DPMS”,ENUM,"{ “On”, “Standby”, “Suspend”, “Off” }",Connector,Contains DPMS operation mode value.
+,,“PATH”,BLOB | IMMUTABLE,0,Connector,Contains topology path to a connector.
+,,“TILE”,BLOB | IMMUTABLE,0,Connector,Contains tiling information for a connector.
+,,“CRTC_ID”,OBJECT,DRM_MODE_OBJECT_CRTC,Connector,CRTC that connector is attached to (atomic)
+,Plane,“type”,ENUM | IMMUTABLE,"{ ""Overlay"", ""Primary"", ""Cursor"" }",Plane,Plane type
+,,“SRC_X”,RANGE,"Min=0, Max=UINT_MAX",Plane,Scanout source x coordinate in 16.16 fixed point (atomic)
+,,“SRC_Y”,RANGE,"Min=0, Max=UINT_MAX",Plane,Scanout source y coordinate in 16.16 fixed point (atomic)
+,,“SRC_W”,RANGE,"Min=0, Max=UINT_MAX",Plane,Scanout source width in 16.16 fixed point (atomic)
+,,“SRC_H”,RANGE,"Min=0, Max=UINT_MAX",Plane,Scanout source height in 16.16 fixed point (atomic)
+,,“CRTC_X”,SIGNED_RANGE,"Min=INT_MIN, Max=INT_MAX",Plane,Scanout CRTC (destination) x coordinate (atomic)
+,,“CRTC_Y”,SIGNED_RANGE,"Min=INT_MIN, Max=INT_MAX",Plane,Scanout CRTC (destination) y coordinate (atomic)
+,,“CRTC_W”,RANGE,"Min=0, Max=UINT_MAX",Plane,Scanout CRTC (destination) width (atomic)
+,,“CRTC_H”,RANGE,"Min=0, Max=UINT_MAX",Plane,Scanout CRTC (destination) height (atomic)
+,,“FB_ID”,OBJECT,DRM_MODE_OBJECT_FB,Plane,Scanout framebuffer (atomic)
+,,“CRTC_ID”,OBJECT,DRM_MODE_OBJECT_CRTC,Plane,CRTC that plane is attached to (atomic)
+,DVI-I,“subconnector”,ENUM,"{ “Unknown”, “DVI-D”, “DVI-A” }",Connector,TBD
+,,“select subconnector”,ENUM,"{ “Automatic”, “DVI-D”, “DVI-A” }",Connector,TBD
+,TV,“subconnector”,ENUM,"{ ""Unknown"", ""Composite"", ""SVIDEO"", ""Component"", ""SCART"" }",Connector,TBD
+,,“select subconnector”,ENUM,"{ ""Automatic"", ""Composite"", ""SVIDEO"", ""Component"", ""SCART"" }",Connector,TBD
+,,“mode”,ENUM,"{ ""NTSC_M"", ""NTSC_J"", ""NTSC_443"", ""PAL_B"" } etc.",Connector,TBD
+,,“left margin”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“right margin”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“top margin”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“bottom margin”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“brightness”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“contrast”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“flicker reduction”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“overscan”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“saturation”,RANGE,"Min=0, Max=100",Connector,TBD
+,,“hue”,RANGE,"Min=0, Max=100",Connector,TBD
+,Virtual GPU,“suggested X”,RANGE,"Min=0, Max=0xffffffff",Connector,property to suggest an X offset for a connector
+,,“suggested Y”,RANGE,"Min=0, Max=0xffffffff",Connector,property to suggest an Y offset for a connector
+,Optional,"""aspect ratio""",ENUM,"{ ""None"", ""4:3"", ""16:9"" }",Connector,TDB
+,,“dirty”,ENUM | IMMUTABLE,"{ ""Off"", ""On"", ""Annotate"" }",Connector,TBD
+,,“DEGAMMA_LUT”,BLOB,0,CRTC,DRM property to set the degamma lookup table (LUT) mapping pixel data from the framebuffer before it is given to the transformation matrix. The data is an interpreted as an array of struct drm_color_lut elements. Hardware might choose not to use the full precision of the LUT elements nor use all the elements of the LUT (for example the hardware might choose to interpolate between LUT[0] and LUT[4]).
+,,“DEGAMMA_LUT_SIZE”,RANGE | IMMUTABLE,"Min=0, Max=UINT_MAX",CRTC,DRM property to gives the size of the lookup table to be set on the DEGAMMA_LUT property (the size depends on the underlying hardware).
+,,“CTM”,BLOB,0,CRTC,DRM property to set the current transformation matrix (CTM) apply to pixel data after the lookup through the degamma LUT and before the lookup through the gamma LUT. The data is an interpreted as a struct drm_color_ctm.
+,,“GAMMA_LUT”,BLOB,0,CRTC,DRM property to set the gamma lookup table (LUT) mapping pixel data after to the transformation matrix to data sent to the connector. The data is an interpreted as an array of struct drm_color_lut elements. Hardware might choose not to use the full precision of the LUT elements nor use all the elements of the LUT (for example the hardware might choose to interpolate between LUT[0] and LUT[4]).
+,,“GAMMA_LUT_SIZE”,RANGE | IMMUTABLE,"Min=0, Max=UINT_MAX",CRTC,DRM property to gives the size of the lookup table to be set on the GAMMA_LUT property (the size depends on the underlying hardware).
+i915,Generic,"""Broadcast RGB""",ENUM,"{ ""Automatic"", ""Full"", ""Limited 16:235"" }",Connector,"When this property is set to Limited 16:235 and CTM is set, the hardware will be programmed with the result of the multiplication of CTM by the limited range matrix to ensure the pixels normaly in the range 0..1.0 are remapped to the range 16/255..235/255."
+,,“audio”,ENUM,"{ ""force-dvi"", ""off"", ""auto"", ""on"" }",Connector,TBD
+,SDVO-TV,“mode”,ENUM,"{ ""NTSC_M"", ""NTSC_J"", ""NTSC_443"", ""PAL_B"" } etc.",Connector,TBD
+,,"""left_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,"""right_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,"""top_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,"""bottom_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“hpos”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“vpos”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“contrast”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“saturation”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“hue”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“sharpness”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“flicker_filter”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“flicker_filter_adaptive”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“flicker_filter_2d”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“tv_chroma_filter”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“tv_luma_filter”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“dot_crawl”,RANGE,"Min=0, Max=1",Connector,TBD
+,SDVO-TV/LVDS,“brightness”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+CDV gma-500,Generic,"""Broadcast RGB""",ENUM,"{ “Full”, “Limited 16:235” }",Connector,TBD
+,,"""Broadcast RGB""",ENUM,"{ “off”, “auto”, “on” }",Connector,TBD
+Poulsbo,Generic,“backlight”,RANGE,"Min=0, Max=100",Connector,TBD
+,SDVO-TV,“mode”,ENUM,"{ ""NTSC_M"", ""NTSC_J"", ""NTSC_443"", ""PAL_B"" } etc.",Connector,TBD
+,,"""left_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,"""right_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,"""top_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,"""bottom_margin""",RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“hpos”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“vpos”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“contrast”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“saturation”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“hue”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“sharpness”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“flicker_filter”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“flicker_filter_adaptive”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“flicker_filter_2d”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“tv_chroma_filter”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“tv_luma_filter”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+,,“dot_crawl”,RANGE,"Min=0, Max=1",Connector,TBD
+,SDVO-TV/LVDS,“brightness”,RANGE,"Min=0, Max= SDVO dependent",Connector,TBD
+armada,CRTC,"""CSC_YUV""",ENUM,"{ ""Auto"" , ""CCIR601"", ""CCIR709"" }",CRTC,TBD
+,,"""CSC_RGB""",ENUM,"{ ""Auto"", ""Computer system"", ""Studio"" }",CRTC,TBD
+,Overlay,"""colorkey""",RANGE,"Min=0, Max=0xffffff",Plane,TBD
+,,"""colorkey_min""",RANGE,"Min=0, Max=0xffffff",Plane,TBD
+,,"""colorkey_max""",RANGE,"Min=0, Max=0xffffff",Plane,TBD
+,,"""colorkey_val""",RANGE,"Min=0, Max=0xffffff",Plane,TBD
+,,"""colorkey_alpha""",RANGE,"Min=0, Max=0xffffff",Plane,TBD
+,,"""colorkey_mode""",ENUM,"{ ""disabled"", ""Y component"", ""U component"" , ""V component"", ""RGB"", “R component"", ""G component"", ""B component"" }",Plane,TBD
+,,"""brightness""",RANGE,"Min=0, Max=256 + 255",Plane,TBD
+,,"""contrast""",RANGE,"Min=0, Max=0x7fff",Plane,TBD
+,,"""saturation""",RANGE,"Min=0, Max=0x7fff",Plane,TBD
+exynos,CRTC,“mode”,ENUM,"{ ""normal"", ""blank"" }",CRTC,TBD
+,Overlay,“zpos”,RANGE,"Min=0, Max=MAX_PLANE-1",Plane,TBD
+i2c/ch7006_drv,Generic,“scale”,RANGE,"Min=0, Max=2",Connector,TBD
+,TV,“mode”,ENUM,"{ ""PAL"", ""PAL-M"",""PAL-N""}, ”PAL-Nc"" , ""PAL-60"", ""NTSC-M"", ""NTSC-J"" }",Connector,TBD
+nouveau,NV10 Overlay,"""colorkey""",RANGE,"Min=0, Max=0x01ffffff",Plane,TBD
+,,“contrast”,RANGE,"Min=0, Max=8192-1",Plane,TBD
+,,“brightness”,RANGE,"Min=0, Max=1024",Plane,TBD
+,,“hue”,RANGE,"Min=0, Max=359",Plane,TBD
+,,“saturation”,RANGE,"Min=0, Max=8192-1",Plane,TBD
+,,“iturbt_709”,RANGE,"Min=0, Max=1",Plane,TBD
+,Nv04 Overlay,“colorkey”,RANGE,"Min=0, Max=0x01ffffff",Plane,TBD
+,,“brightness”,RANGE,"Min=0, Max=1024",Plane,TBD
+,Display,“dithering mode”,ENUM,"{ ""auto"", ""off"", ""on"" }",Connector,TBD
+,,“dithering depth”,ENUM,"{ ""auto"", ""off"", ""on"", ""static 2x2"", ""dynamic 2x2"", ""temporal"" }",Connector,TBD
+,,“underscan”,ENUM,"{ ""auto"", ""6 bpc"", ""8 bpc"" }",Connector,TBD
+,,“underscan hborder”,RANGE,"Min=0, Max=128",Connector,TBD
+,,“underscan vborder”,RANGE,"Min=0, Max=128",Connector,TBD
+,,“vibrant hue”,RANGE,"Min=0, Max=180",Connector,TBD
+,,“color vibrance”,RANGE,"Min=0, Max=200",Connector,TBD
+omap,Generic,“zorder”,RANGE,"Min=0, Max=3","CRTC, Plane",TBD
+qxl,Generic,"“hotplug_mode_update""",RANGE,"Min=0, Max=1",Connector,TBD
+radeon,DVI-I,“coherent”,RANGE,"Min=0, Max=1",Connector,TBD
+,DAC enable load detect,“load detection”,RANGE,"Min=0, Max=1",Connector,TBD
+,TV Standard,"""tv standard""",ENUM,"{ ""ntsc"", ""pal"", ""pal-m"", ""pal-60"", ""ntsc-j"" , ""scart-pal"", ""pal-cn"", ""secam"" }",Connector,TBD
+,legacy TMDS PLL detect,"""tmds_pll""",ENUM,"{ ""driver"", ""bios"" }",-,TBD
+,Underscan,"""underscan""",ENUM,"{ ""off"", ""on"", ""auto"" }",Connector,TBD
+,,"""underscan hborder""",RANGE,"Min=0, Max=128",Connector,TBD
+,,"""underscan vborder""",RANGE,"Min=0, Max=128",Connector,TBD
+,Audio,“audio”,ENUM,"{ ""off"", ""on"", ""auto"" }",Connector,TBD
+,FMT Dithering,“dither”,ENUM,"{ ""off"", ""on"" }",Connector,TBD
+rcar-du,Generic,"""alpha""",RANGE,"Min=0, Max=255",Plane,TBD
+,,"""colorkey""",RANGE,"Min=0, Max=0x01ffffff",Plane,TBD
+,,"""zpos""",RANGE,"Min=1, Max=7",Plane,TBD
--- a/Documentation/gpu/vga-switcheroo.rst
+++ b/Documentation/gpu/vga-switcheroo.rst
@ -0,0 +1,98 @@
+.. _vga_switcheroo:
+
+==============
+VGA Switcheroo
+==============
+
+.. kernel-doc:: drivers/gpu/vga/vga_switcheroo.c
+   :doc: Overview
+
+Modes of Use
+============
+
+Manual switching and manual power control
+-----------------------------------------
+
+.. kernel-doc:: drivers/gpu/vga/vga_switcheroo.c
+   :doc: Manual switching and manual power control
+
+Driver power control
+--------------------
+
+.. kernel-doc:: drivers/gpu/vga/vga_switcheroo.c
+   :doc: Driver power control
+
+API
+===
+
+Public functions
+----------------
+
+.. kernel-doc:: drivers/gpu/vga/vga_switcheroo.c
+   :export:
+
+Public structures
+-----------------
+
+.. kernel-doc:: include/linux/vga_switcheroo.h
+   :functions: vga_switcheroo_handler
+
+.. kernel-doc:: include/linux/vga_switcheroo.h
+   :functions: vga_switcheroo_client_ops
+
+Public constants
+----------------
+
+.. kernel-doc:: include/linux/vga_switcheroo.h
+   :functions: vga_switcheroo_handler_flags_t
+
+.. kernel-doc:: include/linux/vga_switcheroo.h
+   :functions: vga_switcheroo_client_id
+
+.. kernel-doc:: include/linux/vga_switcheroo.h
+   :functions: vga_switcheroo_state
+
+Private structures
+------------------
+
+.. kernel-doc:: drivers/gpu/vga/vga_switcheroo.c
+   :functions: vgasr_priv
+
+.. kernel-doc:: drivers/gpu/vga/vga_switcheroo.c
+   :functions: vga_switcheroo_client
+
+Handlers
+========
+
+apple-gmux Handler
+------------------
+
+.. kernel-doc:: drivers/platform/x86/apple-gmux.c
+   :doc: Overview
+
+.. kernel-doc:: drivers/platform/x86/apple-gmux.c
+   :doc: Interrupt
+
+Graphics mux
+~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/platform/x86/apple-gmux.c
+   :doc: Graphics mux
+
+Power control
+~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/platform/x86/apple-gmux.c
+   :doc: Power control
+
+Backlight control
+~~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: drivers/platform/x86/apple-gmux.c
+   :doc: Backlight control
+
+Public functions
+~~~~~~~~~~~~~~~~
+
+.. kernel-doc:: include/linux/apple-gmux.h
+   :internal:
--- a/Documentation/index.rst
+++ b/Documentation/index.rst
@ -18,6 +18,7 @@ Contents:
   media/media_kapi
   media/dvb-drivers/index
   media/v4l-drivers/index
+   gpu/index

 Indices and tables
 ==================
--- a/42
+++ b/42
@ -879,9 +879,17 @@ F:	Documentation/devicetree/bindings/display/snps,arcpgu.txt
 ARM HDLCD DRM DRIVER
 M:	Liviu Dudau <liviu.dudau@arm.com>
 S:	Supported
-F:	drivers/gpu/drm/arm/
+F:	drivers/gpu/drm/arm/hdlcd_*
 F:	Documentation/devicetree/bindings/display/arm,hdlcd.txt

+ARM MALI-DP DRM DRIVER
+M:	Liviu Dudau <liviu.dudau@arm.com>
+M:	Brian Starkey <brian.starkey@arm.com>
+M:	Mali DP Maintainers <malidp@foss.arm.com>
+S:	Supported
+F:	drivers/gpu/drm/arm/
+F:	Documentation/devicetree/bindings/display/arm,malidp.txt
+
 ARM MFM AND FLOPPY DRIVERS
 M:	Ian Molton <spyro@f2s.com>
 S:	Maintained
@ -3821,6 +3829,17 @@ F:	include/linux/*fence.h
 F:	Documentation/dma-buf-sharing.txt
 T:	git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git

+SYNC FILE FRAMEWORK
+M:	Sumit Semwal <sumit.semwal@linaro.org>
+R:	Gustavo Padovan <gustavo@padovan.org>
+S:	Maintained
+L:	linux-media@vger.kernel.org
+L:	dri-devel@lists.freedesktop.org
+F:	drivers/dma-buf/sync_file.c
+F:	include/linux/sync_file.h
+F:	Documentation/sync_file.txt
+T:	git git://git.linaro.org/people/sumitsemwal/linux-dma-buf.git
+
 DMA GENERIC OFFLOAD ENGINE SUBSYSTEM
 M:	Vinod Koul <vinod.koul@intel.com>
 L:	dmaengine@vger.kernel.org
@ -3910,7 +3929,10 @@ T:	git git://people.freedesktop.org/~airlied/linux
 S:	Maintained
 F:	drivers/gpu/drm/
 F:	drivers/gpu/vga/
-F:	Documentation/DocBook/gpu.*
+F:	Documentation/devicetree/bindings/display/
+F:	Documentation/devicetree/bindings/gpu/
+F:	Documentation/devicetree/bindings/video/
+F:	Documentation/gpu/
 F:	include/drm/
 F:	include/uapi/drm/

@ -3962,6 +3984,7 @@ S:	Supported
 F:	drivers/gpu/drm/i915/
 F:	include/drm/i915*
 F:	include/uapi/drm/i915_drm.h
+F:	Documentation/gpu/i915.rst

 DRM DRIVERS FOR ATMEL HLCDC
 M:	Boris Brezillon <boris.brezillon@free-electrons.com>
@ -4157,6 +4180,21 @@ F:	drivers/gpu/drm/vc4/
 F:	include/uapi/drm/vc4_drm.h
 F:	Documentation/devicetree/bindings/display/brcm,bcm-vc4.txt

+DRM DRIVERS FOR TI OMAP
+M:	Tomi Valkeinen <tomi.valkeinen@ti.com>
+L:	dri-devel@lists.freedesktop.org
+S:	Maintained
+F:	drivers/gpu/drm/omapdrm/
+F:	Documentation/devicetree/bindings/display/ti/
+
+DRM DRIVERS FOR TI LCDC
+M:	Jyri Sarha <jsarha@ti.com>
+R:	Tomi Valkeinen <tomi.valkeinen@ti.com>
+L:	dri-devel@lists.freedesktop.org
+S:	Maintained
+F:	drivers/gpu/drm/tilcdc/
+F:	Documentation/devicetree/bindings/display/tilcdc/
+
 DSBR100 USB FM RADIO DRIVER
 M:	Alexey Klimov <klimov.linux@gmail.com>
 L:	linux-media@vger.kernel.org
--- a/arch/arc/boot/dts/nsimosci.dts
+++ b/arch/arc/boot/dts/nsimosci.dts
@ -19,7 +19,7 @@
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24";
 	};

 	aliases {
@ -57,9 +57,17 @@
 			no-loopback-test = <1>;
 		};

-		pgu0: pgu@f9000000 {
-			compatible = "snps,arcpgufb";
+		pguclk: pguclk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <25175000>;
+		};
+
+		pgu@f9000000 {
+			compatible = "snps,arcpgu";
 			reg = <0xf9000000 0x400>;
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
 		};

 		ps2: ps2@f9001000 {
--- a/arch/arc/boot/dts/nsimosci_hs.dts
+++ b/arch/arc/boot/dts/nsimosci_hs.dts
@ -19,7 +19,7 @@
 		/* this is for console on PGU */
 		/* bootargs = "console=tty0 consoleblank=0"; */
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblank=0 debug video=640x480-24";
 	};

 	aliases {
@ -57,9 +57,17 @@
 			no-loopback-test = <1>;
 		};

-		pgu0: pgu@f9000000 {
-			compatible = "snps,arcpgufb";
+		pguclk: pguclk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <25175000>;
+		};
+
+		pgu@f9000000 {
+			compatible = "snps,arcpgu";
 			reg = <0xf9000000 0x400>;
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
 		};

 		ps2: ps2@f9001000 {
--- a/arch/arc/boot/dts/nsimosci_hs_idu.dts
+++ b/arch/arc/boot/dts/nsimosci_hs_idu.dts
@ -17,7 +17,7 @@

 	chosen {
 		/* this is for console on serial */
-		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug";
+		bootargs = "earlycon=uart8250,mmio32,0xf0000000,115200n8 console=tty0 console=ttyS0,115200n8 consoleblan=0 debug video=640x480-24";
 	};

 	aliases {
@ -76,9 +76,17 @@
 			no-loopback-test = <1>;
 		};

-		pgu0: pgu@f9000000 {
-			compatible = "snps,arcpgufb";
+		pguclk: pguclk {
+			#clock-cells = <0>;
+			compatible = "fixed-clock";
+			clock-frequency = <25175000>;
+		};
+
+		pgu@f9000000 {
+			compatible = "snps,arcpgu";
 			reg = <0xf9000000 0x400>;
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
 		};

 		ps2: ps2@f9001000 {
--- a/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
+++ b/arch/arc/boot/dts/vdk_axs10x_mb.dtsi
@ -23,6 +23,11 @@
 				#clock-cells = <0>;
 			};

+			pguclk: pguclk {
+				#clock-cells = <0>;
+				compatible = "fixed-clock";
+				clock-frequency = <25175000>;
+			};
 		};

 		ethernet@0x18000 {
@ -75,11 +80,11 @@
 		};

 /* PGU output directly sent to virtual LCD screen; hdmi controller not modelled */
-		pgu@0x17000 {
-			compatible = "snps,arcpgufb";
+		pgu@17000 {
+			compatible = "snps,arcpgu";
 			reg = <0x17000 0x400>;
-			clock-frequency = <51000000>; /* PGU'clock is initated in init function */
-			/* interrupts = <5>;   PGU interrupts not used, this vector is used for ps2 below */
+			clocks = <&pguclk>;
+			clock-names = "pxlclk";
 		};

 /* VDK has additional ps2 keyboard/mouse interface integrated in LCD screen model */
--- a/arch/arc/boot/dts/vdk_hs38_smp.dts
+++ b/arch/arc/boot/dts/vdk_hs38_smp.dts
@ -16,6 +16,6 @@
 	compatible = "snps,axs103";

 	chosen {
-		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0";
+		bootargs = "earlycon=uart8250,mmio32,0xe0022000,115200n8 console=tty0 console=ttyS3,115200n8 consoleblank=0 video=640x480-24";
 	};
 };
--- a/arch/arc/configs/nsimosci_defconfig
+++ b/arch/arc/configs/nsimosci_defconfig
@ -58,7 +58,8 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=1
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-CONFIG_FB=y
+CONFIG_DRM=y
+CONFIG_DRM_ARCPGU=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
--- a/arch/arc/configs/nsimosci_hs_defconfig
+++ b/arch/arc/configs/nsimosci_hs_defconfig
@ -57,7 +57,8 @@ CONFIG_SERIAL_8250_RUNTIME_UARTS=1
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-CONFIG_FB=y
+CONFIG_DRM=y
+CONFIG_DRM_ARCPGU=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
--- a/arch/arc/configs/nsimosci_hs_smp_defconfig
+++ b/arch/arc/configs/nsimosci_hs_smp_defconfig
@ -70,7 +70,8 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-CONFIG_FB=y
+CONFIG_DRM=y
+CONFIG_DRM_ARCPGU=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
 CONFIG_LOGO=y
 # CONFIG_HID is not set
--- a/arch/arc/configs/vdk_hs38_smp_defconfig
+++ b/arch/arc/configs/vdk_hs38_smp_defconfig
@ -63,12 +63,9 @@ CONFIG_SERIAL_8250_DW=y
 CONFIG_SERIAL_OF_PLATFORM=y
 # CONFIG_HW_RANDOM is not set
 # CONFIG_HWMON is not set
-CONFIG_FB=y
-CONFIG_ARCPGU_RGB888=y
-CONFIG_ARCPGU_DISPTYPE=0
-# CONFIG_VGA_CONSOLE is not set
+CONFIG_DRM=y
+CONFIG_DRM_ARCPGU=y
 CONFIG_FRAMEBUFFER_CONSOLE=y
-CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
 CONFIG_LOGO=y
 # CONFIG_LOGO_LINUX_MONO is not set
 # CONFIG_LOGO_LINUX_VGA16 is not set
--- a/arch/x86/kernel/early-quirks.c
+++ b/arch/x86/kernel/early-quirks.c
@ -237,36 +237,19 @@ static void __init intel_remapping_check(int num, int slot, int func)
 * despite the efforts of the "RAM buffer" approach, which simply rounds
 * memory boundaries up to 64M to try to catch space that may decode
 * as RAM and so is not suitable for MMIO.
- *
- * And yes, so far on current devices the base addr is always under 4G.
 */
-static u32 __init intel_stolen_base(int num, int slot, int func, size_t stolen_size)
-{
-	u32 base;
-
-	/*
-	 * For the PCI IDs in this quirk, the stolen base is always
-	 * in 0x5c, aka the BDSM register (yes that's really what
-	 * it's called).
-	 */
-	base = read_pci_config(num, slot, func, 0x5c);
-	base &= ~((1<<20) - 1);
-
-	return base;
-}

 #define KB(x)	((x) * 1024UL)
 #define MB(x)	(KB (KB (x)))
-#define GB(x)	(MB (KB (x)))

 static size_t __init i830_tseg_size(void)
 {
-	u8 tmp = read_pci_config_byte(0, 0, 0, I830_ESMRAMC);
+	u8 esmramc = read_pci_config_byte(0, 0, 0, I830_ESMRAMC);

-	if (!(tmp & TSEG_ENABLE))
+	if (!(esmramc & TSEG_ENABLE))
 		return 0;

-	if (tmp & I830_TSEG_SIZE_1M)
+	if (esmramc & I830_TSEG_SIZE_1M)
 		return MB(1);
 	else
 		return KB(512);
@ -274,27 +257,26 @@ static size_t __init i830_tseg_size(void)

 static size_t __init i845_tseg_size(void)
 {
-	u8 tmp = read_pci_config_byte(0, 0, 0, I845_ESMRAMC);
+	u8 esmramc = read_pci_config_byte(0, 0, 0, I845_ESMRAMC);
+	u8 tseg_size = esmramc & I845_TSEG_SIZE_MASK;

-	if (!(tmp & TSEG_ENABLE))
+	if (!(esmramc & TSEG_ENABLE))
 		return 0;

-	switch (tmp & I845_TSEG_SIZE_MASK) {
-	case I845_TSEG_SIZE_512K:
-		return KB(512);
-	case I845_TSEG_SIZE_1M:
-		return MB(1);
+	switch (tseg_size) {
+	case I845_TSEG_SIZE_512K:	return KB(512);
+	case I845_TSEG_SIZE_1M:		return MB(1);
 	default:
-		WARN_ON(1);
-		return 0;
+		WARN(1, "Unknown ESMRAMC value: %x!\n", esmramc);
 	}
+	return 0;
 }

 static size_t __init i85x_tseg_size(void)
 {
-	u8 tmp = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC);
+	u8 esmramc = read_pci_config_byte(0, 0, 0, I85X_ESMRAMC);

-	if (!(tmp & TSEG_ENABLE))
+	if (!(esmramc & TSEG_ENABLE))
 		return 0;

 	return MB(1);
@ -314,285 +296,287 @@ static size_t __init i85x_mem_size(void)
 * On 830/845/85x the stolen memory base isn't available in any
 * register. We need to calculate it as TOM-TSEG_SIZE-stolen_size.
 */
-static u32 __init i830_stolen_base(int num, int slot, int func, size_t stolen_size)
+static phys_addr_t __init i830_stolen_base(int num, int slot, int func,
+					   size_t stolen_size)
 {
-	return i830_mem_size() - i830_tseg_size() - stolen_size;
+	return (phys_addr_t)i830_mem_size() - i830_tseg_size() - stolen_size;
 }

-static u32 __init i845_stolen_base(int num, int slot, int func, size_t stolen_size)
+static phys_addr_t __init i845_stolen_base(int num, int slot, int func,
+					   size_t stolen_size)
 {
-	return i830_mem_size() - i845_tseg_size() - stolen_size;
+	return (phys_addr_t)i830_mem_size() - i845_tseg_size() - stolen_size;
 }

-static u32 __init i85x_stolen_base(int num, int slot, int func, size_t stolen_size)
+static phys_addr_t __init i85x_stolen_base(int num, int slot, int func,
+					   size_t stolen_size)
 {
-	return i85x_mem_size() - i85x_tseg_size() - stolen_size;
+	return (phys_addr_t)i85x_mem_size() - i85x_tseg_size() - stolen_size;
 }

-static u32 __init i865_stolen_base(int num, int slot, int func, size_t stolen_size)
+static phys_addr_t __init i865_stolen_base(int num, int slot, int func,
+					   size_t stolen_size)
 {
+	u16 toud;
+
 	/*
 	 * FIXME is the graphics stolen memory region
 	 * always at TOUD? Ie. is it always the last
 	 * one to be allocated by the BIOS?
 	 */
-	return read_pci_config_16(0, 0, 0, I865_TOUD) << 16;
+	toud = read_pci_config_16(0, 0, 0, I865_TOUD);
+
+	return (phys_addr_t)toud << 16;
+}
+
+static phys_addr_t __init gen3_stolen_base(int num, int slot, int func,
+					   size_t stolen_size)
+{
+	u32 bsm;
+
+	/* Almost universally we can find the Graphics Base of Stolen Memory
+	 * at register BSM (0x5c) in the igfx configuration space. On a few
+	 * (desktop) machines this is also mirrored in the bridge device at
+	 * different locations, or in the MCHBAR.
+	 */
+	bsm = read_pci_config(num, slot, func, INTEL_BSM);
+
+	return (phys_addr_t)bsm & INTEL_BSM_MASK;
 }

 static size_t __init i830_stolen_size(int num, int slot, int func)
 {
-	size_t stolen_size;
 	u16 gmch_ctrl;
+	u16 gms;

 	gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL);
+	gms = gmch_ctrl & I830_GMCH_GMS_MASK;

-	switch (gmch_ctrl & I830_GMCH_GMS_MASK) {
-	case I830_GMCH_GMS_STOLEN_512:
-		stolen_size = KB(512);
-		break;
-	case I830_GMCH_GMS_STOLEN_1024:
-		stolen_size = MB(1);
-		break;
-	case I830_GMCH_GMS_STOLEN_8192:
-		stolen_size = MB(8);
-		break;
-	case I830_GMCH_GMS_LOCAL:
-		/* local memory isn't part of the normal address space */
-		stolen_size = 0;
-		break;
+	switch (gms) {
+	case I830_GMCH_GMS_STOLEN_512:	return KB(512);
+	case I830_GMCH_GMS_STOLEN_1024:	return MB(1);
+	case I830_GMCH_GMS_STOLEN_8192:	return MB(8);
+	/* local memory isn't part of the normal address space */
+	case I830_GMCH_GMS_LOCAL:	return 0;
 	default:
-		return 0;
+		WARN(1, "Unknown GMCH_CTRL value: %x!\n", gmch_ctrl);
 	}

-	return stolen_size;
+	return 0;
 }

 static size_t __init gen3_stolen_size(int num, int slot, int func)
 {
-	size_t stolen_size;
 	u16 gmch_ctrl;
+	u16 gms;

 	gmch_ctrl = read_pci_config_16(0, 0, 0, I830_GMCH_CTRL);
+	gms = gmch_ctrl & I855_GMCH_GMS_MASK;

-	switch (gmch_ctrl & I855_GMCH_GMS_MASK) {
-	case I855_GMCH_GMS_STOLEN_1M:
-		stolen_size = MB(1);
-		break;
-	case I855_GMCH_GMS_STOLEN_4M:
-		stolen_size = MB(4);
-		break;
-	case I855_GMCH_GMS_STOLEN_8M:
-		stolen_size = MB(8);
-		break;
-	case I855_GMCH_GMS_STOLEN_16M:
-		stolen_size = MB(16);
-		break;
-	case I855_GMCH_GMS_STOLEN_32M:
-		stolen_size = MB(32);
-		break;
-	case I915_GMCH_GMS_STOLEN_48M:
-		stolen_size = MB(48);
-		break;
-	case I915_GMCH_GMS_STOLEN_64M:
-		stolen_size = MB(64);
-		break;
-	case G33_GMCH_GMS_STOLEN_128M:
-		stolen_size = MB(128);
-		break;
-	case G33_GMCH_GMS_STOLEN_256M:
-		stolen_size = MB(256);
-		break;
-	case INTEL_GMCH_GMS_STOLEN_96M:
-		stolen_size = MB(96);
-		break;
-	case INTEL_GMCH_GMS_STOLEN_160M:
-		stolen_size = MB(160);
-		break;
-	case INTEL_GMCH_GMS_STOLEN_224M:
-		stolen_size = MB(224);
-		break;
-	case INTEL_GMCH_GMS_STOLEN_352M:
-		stolen_size = MB(352);
-		break;
+	switch (gms) {
+	case I855_GMCH_GMS_STOLEN_1M:	return MB(1);
+	case I855_GMCH_GMS_STOLEN_4M:	return MB(4);
+	case I855_GMCH_GMS_STOLEN_8M:	return MB(8);
+	case I855_GMCH_GMS_STOLEN_16M:	return MB(16);
+	case I855_GMCH_GMS_STOLEN_32M:	return MB(32);
+	case I915_GMCH_GMS_STOLEN_48M:	return MB(48);
+	case I915_GMCH_GMS_STOLEN_64M:	return MB(64);
+	case G33_GMCH_GMS_STOLEN_128M:	return MB(128);
+	case G33_GMCH_GMS_STOLEN_256M:	return MB(256);
+	case INTEL_GMCH_GMS_STOLEN_96M:	return MB(96);
+	case INTEL_GMCH_GMS_STOLEN_160M:return MB(160);
+	case INTEL_GMCH_GMS_STOLEN_224M:return MB(224);
+	case INTEL_GMCH_GMS_STOLEN_352M:return MB(352);
 	default:
-		stolen_size = 0;
-		break;
+		WARN(1, "Unknown GMCH_CTRL value: %x!\n", gmch_ctrl);
 	}

-	return stolen_size;
+	return 0;
 }

 static size_t __init gen6_stolen_size(int num, int slot, int func)
 {
 	u16 gmch_ctrl;
+	u16 gms;

 	gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
-	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
-	gmch_ctrl &= SNB_GMCH_GMS_MASK;
+	gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK;

-	return gmch_ctrl << 25; /* 32 MB units */
+	return (size_t)gms * MB(32);
 }

 static size_t __init gen8_stolen_size(int num, int slot, int func)
 {
 	u16 gmch_ctrl;
+	u16 gms;

 	gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
-	gmch_ctrl >>= BDW_GMCH_GMS_SHIFT;
-	gmch_ctrl &= BDW_GMCH_GMS_MASK;
-	return gmch_ctrl << 25; /* 32 MB units */
+	gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK;
+
+	return (size_t)gms * MB(32);
 }

 static size_t __init chv_stolen_size(int num, int slot, int func)
 {
 	u16 gmch_ctrl;
+	u16 gms;

 	gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
-	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
-	gmch_ctrl &= SNB_GMCH_GMS_MASK;
+	gms = (gmch_ctrl >> SNB_GMCH_GMS_SHIFT) & SNB_GMCH_GMS_MASK;

 	/*
 	 * 0x0  to 0x10: 32MB increments starting at 0MB
 	 * 0x11 to 0x16: 4MB increments starting at 8MB
 	 * 0x17 to 0x1d: 4MB increments start at 36MB
 	 */
-	if (gmch_ctrl < 0x11)
-		return gmch_ctrl << 25;
-	else if (gmch_ctrl < 0x17)
-		return (gmch_ctrl - 0x11 + 2) << 22;
+	if (gms < 0x11)
+		return (size_t)gms * MB(32);
+	else if (gms < 0x17)
+		return (size_t)(gms - 0x11 + 2) * MB(4);
 	else
-		return (gmch_ctrl - 0x17 + 9) << 22;
+		return (size_t)(gms - 0x17 + 9) * MB(4);
 }

-struct intel_stolen_funcs {
-	size_t (*size)(int num, int slot, int func);
-	u32 (*base)(int num, int slot, int func, size_t size);
-};
-
 static size_t __init gen9_stolen_size(int num, int slot, int func)
 {
 	u16 gmch_ctrl;
+	u16 gms;

 	gmch_ctrl = read_pci_config_16(num, slot, func, SNB_GMCH_CTRL);
-	gmch_ctrl >>= BDW_GMCH_GMS_SHIFT;
-	gmch_ctrl &= BDW_GMCH_GMS_MASK;
+	gms = (gmch_ctrl >> BDW_GMCH_GMS_SHIFT) & BDW_GMCH_GMS_MASK;

-	if (gmch_ctrl < 0xf0)
-		return gmch_ctrl << 25; /* 32 MB units */
+	/* 0x0  to 0xef: 32MB increments starting at 0MB */
+	/* 0xf0 to 0xfe: 4MB increments starting at 4MB */
+	if (gms < 0xf0)
+		return (size_t)gms * MB(32);
 	else
-		/* 4MB increments starting at 0xf0 for 4MB */
-		return (gmch_ctrl - 0xf0 + 1) << 22;
+		return (size_t)(gms - 0xf0 + 1) * MB(4);
 }

-typedef size_t (*stolen_size_fn)(int num, int slot, int func);
-
-static const struct intel_stolen_funcs i830_stolen_funcs __initconst = {
-	.base = i830_stolen_base,
-	.size = i830_stolen_size,
+struct intel_early_ops {
+	size_t (*stolen_size)(int num, int slot, int func);
+	phys_addr_t (*stolen_base)(int num, int slot, int func, size_t size);
 };

-static const struct intel_stolen_funcs i845_stolen_funcs __initconst = {
-	.base = i845_stolen_base,
-	.size = i830_stolen_size,
+static const struct intel_early_ops i830_early_ops __initconst = {
+	.stolen_base = i830_stolen_base,
+	.stolen_size = i830_stolen_size,
 };

-static const struct intel_stolen_funcs i85x_stolen_funcs __initconst = {
-	.base = i85x_stolen_base,
-	.size = gen3_stolen_size,
+static const struct intel_early_ops i845_early_ops __initconst = {
+	.stolen_base = i845_stolen_base,
+	.stolen_size = i830_stolen_size,
 };

-static const struct intel_stolen_funcs i865_stolen_funcs __initconst = {
-	.base = i865_stolen_base,
-	.size = gen3_stolen_size,
+static const struct intel_early_ops i85x_early_ops __initconst = {
+	.stolen_base = i85x_stolen_base,
+	.stolen_size = gen3_stolen_size,
 };

-static const struct intel_stolen_funcs gen3_stolen_funcs __initconst = {
-	.base = intel_stolen_base,
-	.size = gen3_stolen_size,
+static const struct intel_early_ops i865_early_ops __initconst = {
+	.stolen_base = i865_stolen_base,
+	.stolen_size = gen3_stolen_size,
 };

-static const struct intel_stolen_funcs gen6_stolen_funcs __initconst = {
-	.base = intel_stolen_base,
-	.size = gen6_stolen_size,
+static const struct intel_early_ops gen3_early_ops __initconst = {
+	.stolen_base = gen3_stolen_base,
+	.stolen_size = gen3_stolen_size,
 };

-static const struct intel_stolen_funcs gen8_stolen_funcs __initconst = {
-	.base = intel_stolen_base,
-	.size = gen8_stolen_size,
+static const struct intel_early_ops gen6_early_ops __initconst = {
+	.stolen_base = gen3_stolen_base,
+	.stolen_size = gen6_stolen_size,
 };

-static const struct intel_stolen_funcs gen9_stolen_funcs __initconst = {
-	.base = intel_stolen_base,
-	.size = gen9_stolen_size,
+static const struct intel_early_ops gen8_early_ops __initconst = {
+	.stolen_base = gen3_stolen_base,
+	.stolen_size = gen8_stolen_size,
 };

-static const struct intel_stolen_funcs chv_stolen_funcs __initconst = {
-	.base = intel_stolen_base,
-	.size = chv_stolen_size,
+static const struct intel_early_ops gen9_early_ops __initconst = {
+	.stolen_base = gen3_stolen_base,
+	.stolen_size = gen9_stolen_size,
 };

-static const struct pci_device_id intel_stolen_ids[] __initconst = {
-	INTEL_I830_IDS(&i830_stolen_funcs),
-	INTEL_I845G_IDS(&i845_stolen_funcs),
-	INTEL_I85X_IDS(&i85x_stolen_funcs),
-	INTEL_I865G_IDS(&i865_stolen_funcs),
-	INTEL_I915G_IDS(&gen3_stolen_funcs),
-	INTEL_I915GM_IDS(&gen3_stolen_funcs),
-	INTEL_I945G_IDS(&gen3_stolen_funcs),
-	INTEL_I945GM_IDS(&gen3_stolen_funcs),
-	INTEL_VLV_M_IDS(&gen6_stolen_funcs),
-	INTEL_VLV_D_IDS(&gen6_stolen_funcs),
-	INTEL_PINEVIEW_IDS(&gen3_stolen_funcs),
-	INTEL_I965G_IDS(&gen3_stolen_funcs),
-	INTEL_G33_IDS(&gen3_stolen_funcs),
-	INTEL_I965GM_IDS(&gen3_stolen_funcs),
-	INTEL_GM45_IDS(&gen3_stolen_funcs),
-	INTEL_G45_IDS(&gen3_stolen_funcs),
-	INTEL_IRONLAKE_D_IDS(&gen3_stolen_funcs),
-	INTEL_IRONLAKE_M_IDS(&gen3_stolen_funcs),
-	INTEL_SNB_D_IDS(&gen6_stolen_funcs),
-	INTEL_SNB_M_IDS(&gen6_stolen_funcs),
-	INTEL_IVB_M_IDS(&gen6_stolen_funcs),
-	INTEL_IVB_D_IDS(&gen6_stolen_funcs),
-	INTEL_HSW_D_IDS(&gen6_stolen_funcs),
-	INTEL_HSW_M_IDS(&gen6_stolen_funcs),
-	INTEL_BDW_M_IDS(&gen8_stolen_funcs),
-	INTEL_BDW_D_IDS(&gen8_stolen_funcs),
-	INTEL_CHV_IDS(&chv_stolen_funcs),
-	INTEL_SKL_IDS(&gen9_stolen_funcs),
-	INTEL_BXT_IDS(&gen9_stolen_funcs),
-	INTEL_KBL_IDS(&gen9_stolen_funcs),
+static const struct intel_early_ops chv_early_ops __initconst = {
+	.stolen_base = gen3_stolen_base,
+	.stolen_size = chv_stolen_size,
 };

-static void __init intel_graphics_stolen(int num, int slot, int func)
+static const struct pci_device_id intel_early_ids[] __initconst = {
+	INTEL_I830_IDS(&i830_early_ops),
+	INTEL_I845G_IDS(&i845_early_ops),
+	INTEL_I85X_IDS(&i85x_early_ops),
+	INTEL_I865G_IDS(&i865_early_ops),
+	INTEL_I915G_IDS(&gen3_early_ops),
+	INTEL_I915GM_IDS(&gen3_early_ops),
+	INTEL_I945G_IDS(&gen3_early_ops),
+	INTEL_I945GM_IDS(&gen3_early_ops),
+	INTEL_VLV_M_IDS(&gen6_early_ops),
+	INTEL_VLV_D_IDS(&gen6_early_ops),
+	INTEL_PINEVIEW_IDS(&gen3_early_ops),
+	INTEL_I965G_IDS(&gen3_early_ops),
+	INTEL_G33_IDS(&gen3_early_ops),
+	INTEL_I965GM_IDS(&gen3_early_ops),
+	INTEL_GM45_IDS(&gen3_early_ops),
+	INTEL_G45_IDS(&gen3_early_ops),
+	INTEL_IRONLAKE_D_IDS(&gen3_early_ops),
+	INTEL_IRONLAKE_M_IDS(&gen3_early_ops),
+	INTEL_SNB_D_IDS(&gen6_early_ops),
+	INTEL_SNB_M_IDS(&gen6_early_ops),
+	INTEL_IVB_M_IDS(&gen6_early_ops),
+	INTEL_IVB_D_IDS(&gen6_early_ops),
+	INTEL_HSW_D_IDS(&gen6_early_ops),
+	INTEL_HSW_M_IDS(&gen6_early_ops),
+	INTEL_BDW_M_IDS(&gen8_early_ops),
+	INTEL_BDW_D_IDS(&gen8_early_ops),
+	INTEL_CHV_IDS(&chv_early_ops),
+	INTEL_SKL_IDS(&gen9_early_ops),
+	INTEL_BXT_IDS(&gen9_early_ops),
+	INTEL_KBL_IDS(&gen9_early_ops),
+};
+
+static void __init
+intel_graphics_stolen(int num, int slot, int func,
+		      const struct intel_early_ops *early_ops)
 {
+	phys_addr_t base, end;
 	size_t size;
+
+	size = early_ops->stolen_size(num, slot, func);
+	base = early_ops->stolen_base(num, slot, func, size);
+
+	if (!size || !base)
+		return;
+
+	end = base + size - 1;
+	printk(KERN_INFO "Reserving Intel graphics memory at %pa-%pa\n",
+	       &base, &end);
+
+	/* Mark this space as reserved */
+	e820_add_region(base, size, E820_RESERVED);
+	sanitize_e820_map(e820.map, ARRAY_SIZE(e820.map), &e820.nr_map);
+}
+
+static void __init intel_graphics_quirks(int num, int slot, int func)
+{
+	const struct intel_early_ops *early_ops;
+	u16 device;
 	int i;
-	u32 start;
-	u16 device, subvendor, subdevice;

 	device = read_pci_config_16(num, slot, func, PCI_DEVICE_ID);
-	subvendor = read_pci_config_16(num, slot, func,
-				       PCI_SUBSYSTEM_VENDOR_ID);
-	subdevice = read_pci_config_16(num, slot, func, PCI_SUBSYSTEM_ID);

-	for (i = 0; i < ARRAY_SIZE(intel_stolen_ids); i++) {
-		if (intel_stolen_ids[i].device == device) {
-			const struct intel_stolen_funcs *stolen_funcs =
-				(const struct intel_stolen_funcs *)intel_stolen_ids[i].driver_data;
-			size = stolen_funcs->size(num, slot, func);
-			start = stolen_funcs->base(num, slot, func, size);
-			if (size && start) {
-				printk(KERN_INFO "Reserving Intel graphics stolen memory at 0x%x-0x%x\n",
-				       start, start + (u32)size - 1);
-				/* Mark this space as reserved */
-				e820_add_region(start, size, E820_RESERVED);
-				sanitize_e820_map(e820.map,
-						  ARRAY_SIZE(e820.map),
-						  &e820.nr_map);
-			}
-			return;
-		}
+	for (i = 0; i < ARRAY_SIZE(intel_early_ids); i++) {
+		kernel_ulong_t driver_data = intel_early_ids[i].driver_data;
+
+		if (intel_early_ids[i].device != device)
+			continue;
+
+		early_ops = (typeof(early_ops))driver_data;
+
+		intel_graphics_stolen(num, slot, func, early_ops);
+
+		return;
 	}
 }

@ -690,7 +674,7 @@ static struct chipset early_qrk[] __initdata = {
 	{ PCI_VENDOR_ID_INTEL, 0x3406, PCI_CLASS_BRIDGE_HOST,
 	  PCI_BASE_CLASS_BRIDGE, 0, intel_remapping_check },
 	{ PCI_VENDOR_ID_INTEL, PCI_ANY_ID, PCI_CLASS_DISPLAY_VGA, PCI_ANY_ID,
-	  QFLAG_APPLY_ONCE, intel_graphics_stolen },
+	  QFLAG_APPLY_ONCE, intel_graphics_quirks },
 	/*
 	 * HPET on the current version of the Baytrail platform has accuracy
 	 * problems: it will halt in deep idle state - so we disable it.
--- a/drivers/char/agp/intel-gtt.c
+++ b/drivers/char/agp/intel-gtt.c
@ -840,6 +840,14 @@ static bool i830_check_flags(unsigned int flags)
 	return false;
 }

+void intel_gtt_insert_page(dma_addr_t addr,
+			   unsigned int pg,
+			   unsigned int flags)
+{
+	intel_private.driver->write_entry(addr, pg, flags);
+}
+EXPORT_SYMBOL(intel_gtt_insert_page);
+
 void intel_gtt_insert_sg_entries(struct sg_table *st,
 				 unsigned int pg_start,
 				 unsigned int flags)
--- a/drivers/dma-buf/Kconfig
+++ b/drivers/dma-buf/Kconfig
@ -1,11 +1,20 @@
 menu "DMABUF options"

 config SYNC_FILE
-	bool "sync_file support for fences"
+	bool "Explicit Synchronization Framework"
 	default n
 	select ANON_INODES
 	select DMA_SHARED_BUFFER
 	---help---
-	  This option enables the fence framework synchronization to export
-	  sync_files to userspace that can represent one or more fences.
+	  The Sync File Framework adds explicit syncronization via
+	  userspace. It enables send/receive 'struct fence' objects to/from
+	  userspace via Sync File fds for synchronization between drivers via
+	  userspace components. It has been ported from Android.
+
+	  The first and main user for this is graphics in which a fence is
+	  associated with a buffer. When a job is submitted to the GPU a fence
+	  is attached to the buffer and is transferred via userspace, using Sync
+	  Files fds, to the DRM driver for example. More details at
+	  Documentation/sync_file.txt.
+
 endmenu
--- a/drivers/dma-buf/Makefile
+++ b/drivers/dma-buf/Makefile
@ -1,2 +1,2 @@
-obj-y := dma-buf.o fence.o reservation.o seqno-fence.o
+obj-y := dma-buf.o fence.o reservation.o seqno-fence.o fence-array.o
 obj-$(CONFIG_SYNC_FILE)		+= sync_file.o
--- a/drivers/dma-buf/dma-buf.c
+++ b/drivers/dma-buf/dma-buf.c
@ -334,6 +334,7 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	struct reservation_object *resv = exp_info->resv;
 	struct file *file;
 	size_t alloc_size = sizeof(struct dma_buf);
+	int ret;

 	if (!exp_info->resv)
 		alloc_size += sizeof(struct reservation_object);
@ -357,8 +358,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)

 	dmabuf = kzalloc(alloc_size, GFP_KERNEL);
 	if (!dmabuf) {
-		module_put(exp_info->owner);
-		return ERR_PTR(-ENOMEM);
+		ret = -ENOMEM;
+		goto err_module;
 	}

 	dmabuf->priv = exp_info->priv;
@ -379,8 +380,8 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	file = anon_inode_getfile("dmabuf", &dma_buf_fops, dmabuf,
 					exp_info->flags);
 	if (IS_ERR(file)) {
-		kfree(dmabuf);
-		return ERR_CAST(file);
+		ret = PTR_ERR(file);
+		goto err_dmabuf;
 	}

 	file->f_mode |= FMODE_LSEEK;
@ -394,6 +395,12 @@ struct dma_buf *dma_buf_export(const struct dma_buf_export_info *exp_info)
 	mutex_unlock(&db_list.lock);

 	return dmabuf;
+
+err_dmabuf:
+	kfree(dmabuf);
+err_module:
+	module_put(exp_info->owner);
+	return ERR_PTR(ret);
 }
 EXPORT_SYMBOL_GPL(dma_buf_export);

@ -824,7 +831,7 @@ void dma_buf_vunmap(struct dma_buf *dmabuf, void *vaddr)
 EXPORT_SYMBOL_GPL(dma_buf_vunmap);

 #ifdef CONFIG_DEBUG_FS
-static int dma_buf_describe(struct seq_file *s)
+static int dma_buf_debug_show(struct seq_file *s, void *unused)
 {
 	int ret;
 	struct dma_buf *buf_obj;
@ -879,17 +886,9 @@ static int dma_buf_describe(struct seq_file *s)
 	return 0;
 }

-static int dma_buf_show(struct seq_file *s, void *unused)
-{
-	void (*func)(struct seq_file *) = s->private;
-
-	func(s);
-	return 0;
-}
-
 static int dma_buf_debug_open(struct inode *inode, struct file *file)
 {
-	return single_open(file, dma_buf_show, inode->i_private);
+	return single_open(file, dma_buf_debug_show, NULL);
 }

 static const struct file_operations dma_buf_debug_fops = {
@ -903,20 +902,23 @@ static struct dentry *dma_buf_debugfs_dir;

 static int dma_buf_init_debugfs(void)
 {
+	struct dentry *d;
 	int err = 0;

-	dma_buf_debugfs_dir = debugfs_create_dir("dma_buf", NULL);
+	d = debugfs_create_dir("dma_buf", NULL);
+	if (IS_ERR(d))
+		return PTR_ERR(d);

-	if (IS_ERR(dma_buf_debugfs_dir)) {
-		err = PTR_ERR(dma_buf_debugfs_dir);
-		dma_buf_debugfs_dir = NULL;
-		return err;
-	}
+	dma_buf_debugfs_dir = d;

-	err = dma_buf_debugfs_create_file("bufinfo", dma_buf_describe);
-
-	if (err)
+	d = debugfs_create_file("bufinfo", S_IRUGO, dma_buf_debugfs_dir,
+				NULL, &dma_buf_debug_fops);
+	if (IS_ERR(d)) {
 		pr_debug("dma_buf: debugfs: failed to create node bufinfo\n");
+		debugfs_remove_recursive(dma_buf_debugfs_dir);
+		dma_buf_debugfs_dir = NULL;
+		err = PTR_ERR(d);
+	}

 	return err;
 }
@ -926,17 +928,6 @@ static void dma_buf_uninit_debugfs(void)
 	if (dma_buf_debugfs_dir)
 		debugfs_remove_recursive(dma_buf_debugfs_dir);
 }
-
-int dma_buf_debugfs_create_file(const char *name,
-				int (*write)(struct seq_file *))
-{
-	struct dentry *d;
-
-	d = debugfs_create_file(name, S_IRUGO, dma_buf_debugfs_dir,
-			write, &dma_buf_debug_fops);
-
-	return PTR_ERR_OR_ZERO(d);
-}
 #else
 static inline int dma_buf_init_debugfs(void)
 {
--- a/drivers/dma-buf/fence-array.c
+++ b/drivers/dma-buf/fence-array.c
@ -0,0 +1,144 @@
+/*
+ * fence-array: aggregate fences to be waited together
+ *
+ * Copyright (C) 2016 Collabora Ltd
+ * Copyright (C) 2016 Advanced Micro Devices, Inc.
+ * Authors:
+ *	Gustavo Padovan <gustavo@padovan.org>
+ *	Christian König <christian.koenig@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/fence-array.h>
+
+static void fence_array_cb_func(struct fence *f, struct fence_cb *cb);
+
+static const char *fence_array_get_driver_name(struct fence *fence)
+{
+	return "fence_array";
+}
+
+static const char *fence_array_get_timeline_name(struct fence *fence)
+{
+	return "unbound";
+}
+
+static void fence_array_cb_func(struct fence *f, struct fence_cb *cb)
+{
+	struct fence_array_cb *array_cb =
+		container_of(cb, struct fence_array_cb, cb);
+	struct fence_array *array = array_cb->array;
+
+	if (atomic_dec_and_test(&array->num_pending))
+		fence_signal(&array->base);
+	fence_put(&array->base);
+}
+
+static bool fence_array_enable_signaling(struct fence *fence)
+{
+	struct fence_array *array = to_fence_array(fence);
+	struct fence_array_cb *cb = (void *)(&array[1]);
+	unsigned i;
+
+	for (i = 0; i < array->num_fences; ++i) {
+		cb[i].array = array;
+		/*
+		 * As we may report that the fence is signaled before all
+		 * callbacks are complete, we need to take an additional
+		 * reference count on the array so that we do not free it too
+		 * early. The core fence handling will only hold the reference
+		 * until we signal the array as complete (but that is now
+		 * insufficient).
+		 */
+		fence_get(&array->base);
+		if (fence_add_callback(array->fences[i], &cb[i].cb,
+				       fence_array_cb_func)) {
+			fence_put(&array->base);
+			if (atomic_dec_and_test(&array->num_pending))
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static bool fence_array_signaled(struct fence *fence)
+{
+	struct fence_array *array = to_fence_array(fence);
+
+	return atomic_read(&array->num_pending) <= 0;
+}
+
+static void fence_array_release(struct fence *fence)
+{
+	struct fence_array *array = to_fence_array(fence);
+	unsigned i;
+
+	for (i = 0; i < array->num_fences; ++i)
+		fence_put(array->fences[i]);
+
+	kfree(array->fences);
+	fence_free(fence);
+}
+
+const struct fence_ops fence_array_ops = {
+	.get_driver_name = fence_array_get_driver_name,
+	.get_timeline_name = fence_array_get_timeline_name,
+	.enable_signaling = fence_array_enable_signaling,
+	.signaled = fence_array_signaled,
+	.wait = fence_default_wait,
+	.release = fence_array_release,
+};
+
+/**
+ * fence_array_create - Create a custom fence array
+ * @num_fences:		[in]	number of fences to add in the array
+ * @fences:		[in]	array containing the fences
+ * @context:		[in]	fence context to use
+ * @seqno:		[in]	sequence number to use
+ * @signal_on_any	[in]	signal on any fence in the array
+ *
+ * Allocate a fence_array object and initialize the base fence with fence_init().
+ * In case of error it returns NULL.
+ *
+ * The caller should allocte the fences array with num_fences size
+ * and fill it with the fences it wants to add to the object. Ownership of this
+ * array is take and fence_put() is used on each fence on release.
+ *
+ * If @signal_on_any is true the fence array signals if any fence in the array
+ * signals, otherwise it signals when all fences in the array signal.
+ */
+struct fence_array *fence_array_create(int num_fences, struct fence **fences,
+				       u64 context, unsigned seqno,
+				       bool signal_on_any)
+{
+	struct fence_array *array;
+	size_t size = sizeof(*array);
+
+	/* Allocate the callback structures behind the array. */
+	size += num_fences * sizeof(struct fence_array_cb);
+	array = kzalloc(size, GFP_KERNEL);
+	if (!array)
+		return NULL;
+
+	spin_lock_init(&array->lock);
+	fence_init(&array->base, &fence_array_ops, &array->lock,
+		   context, seqno);
+
+	array->num_fences = num_fences;
+	atomic_set(&array->num_pending, signal_on_any ? 1 : num_fences);
+	array->fences = fences;
+
+	return array;
+}
+EXPORT_SYMBOL(fence_array_create);
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@ -35,7 +35,7 @@ EXPORT_TRACEPOINT_SYMBOL(fence_emit);
 * context or not. One device can have multiple separate contexts,
 * and they're used if some engine can run independently of another.
 */
-static atomic_t fence_context_counter = ATOMIC_INIT(0);
+static atomic64_t fence_context_counter = ATOMIC64_INIT(0);

 /**
 * fence_context_alloc - allocate an array of fence contexts
@ -44,10 +44,10 @@ static atomic_t fence_context_counter = ATOMIC_INIT(0);
 * This function will return the first index of the number of fences allocated.
 * The fence context is used for setting fence->context to a unique number.
 */
-unsigned fence_context_alloc(unsigned num)
+u64 fence_context_alloc(unsigned num)
 {
 	BUG_ON(!num);
-	return atomic_add_return(num, &fence_context_counter) - num;
+	return atomic64_add_return(num, &fence_context_counter) - num;
 }
 EXPORT_SYMBOL(fence_context_alloc);

@ -513,7 +513,7 @@ EXPORT_SYMBOL(fence_wait_any_timeout);
 */
 void
 fence_init(struct fence *fence, const struct fence_ops *ops,
-	     spinlock_t *lock, unsigned context, unsigned seqno)
+	     spinlock_t *lock, u64 context, unsigned seqno)
 {
 	BUG_ON(!lock);
 	BUG_ON(!ops || !ops->wait || !ops->enable_signaling ||
--- a/drivers/dma-buf/sync_file.c
+++ b/drivers/dma-buf/sync_file.c
@ -82,7 +82,7 @@ struct sync_file *sync_file_create(struct fence *fence)

 	sync_file->num_fences = 1;
 	atomic_set(&sync_file->status, 1);
-	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%d-%d",
+	snprintf(sync_file->name, sizeof(sync_file->name), "%s-%s%llu-%d",
 		 fence->ops->get_driver_name(fence),
 		 fence->ops->get_timeline_name(fence), fence->context,
 		 fence->seqno);
--- a/drivers/gpu/drm/Makefile
+++ b/drivers/gpu/drm/Makefile
@ -8,7 +8,7 @@ drm-y       :=	drm_auth.o drm_bufs.o drm_cache.o \
 		drm_lock.o drm_memory.o drm_drv.o drm_vm.o \
 		drm_scatter.o drm_pci.o \
 		drm_platform.o drm_sysfs.o drm_hashtab.o drm_mm.o \
-		drm_crtc.o drm_modes.o drm_edid.o \
+		drm_crtc.o drm_fourcc.o drm_modes.o drm_edid.o \
 		drm_info.o drm_debugfs.o drm_encoder_slave.o \
 		drm_trace_points.o drm_global.o drm_prime.o \
 		drm_rect.o drm_vma_manager.o drm_flip_work.o \
@ -23,7 +23,8 @@ drm-$(CONFIG_AGP) += drm_agpsupport.o

 drm_kms_helper-y := drm_crtc_helper.o drm_dp_helper.o drm_probe_helper.o \
 		drm_plane_helper.o drm_dp_mst_topology.o drm_atomic_helper.o \
-		drm_kms_helper_common.o drm_dp_dual_mode_helper.o
+		drm_kms_helper_common.o drm_dp_dual_mode_helper.o \
+		drm_simple_kms_helper.o

 drm_kms_helper-$(CONFIG_DRM_LOAD_EDID_FIRMWARE) += drm_edid_load.o
 drm_kms_helper-$(CONFIG_DRM_FBDEV_EMULATION) += drm_fb_helper.o
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu.h
@ -85,8 +85,12 @@ extern int amdgpu_vm_debug;
 extern int amdgpu_sched_jobs;
 extern int amdgpu_sched_hw_submission;
 extern int amdgpu_powerplay;
+extern int amdgpu_powercontainment;
 extern unsigned amdgpu_pcie_gen_cap;
 extern unsigned amdgpu_pcie_lane_cap;
+extern unsigned amdgpu_cg_mask;
+extern unsigned amdgpu_pg_mask;
+extern char *amdgpu_disable_cu;

 #define AMDGPU_WAIT_IDLE_TIMEOUT_IN_MS	        3000
 #define AMDGPU_MAX_USEC_TIMEOUT			100000	/* 100 ms */
@ -183,6 +187,10 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 				  enum amd_ip_block_type block_type,
 				  enum amd_powergating_state state);
+int amdgpu_wait_for_idle(struct amdgpu_device *adev,
+			 enum amd_ip_block_type block_type);
+bool amdgpu_is_idle(struct amdgpu_device *adev,
+		    enum amd_ip_block_type block_type);

 struct amdgpu_ip_block_version {
 	enum amd_ip_block_type type;
@ -298,13 +306,16 @@ struct amdgpu_ring_funcs {
 				uint32_t oa_base, uint32_t oa_size);
 	/* testing functions */
 	int (*test_ring)(struct amdgpu_ring *ring);
-	int (*test_ib)(struct amdgpu_ring *ring);
+	int (*test_ib)(struct amdgpu_ring *ring, long timeout);
 	/* insert NOP packets */
 	void (*insert_nop)(struct amdgpu_ring *ring, uint32_t count);
 	/* pad the indirect buffer to the necessary number of dw */
 	void (*pad_ib)(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 	unsigned (*init_cond_exec)(struct amdgpu_ring *ring);
 	void (*patch_cond_exec)(struct amdgpu_ring *ring, unsigned offset);
+	/* note usage for clock and power gating */
+	void (*begin_use)(struct amdgpu_ring *ring);
+	void (*end_use)(struct amdgpu_ring *ring);
 };

 /*
@ -594,11 +605,9 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 		     struct amdgpu_sync *sync,
 		     struct reservation_object *resv,
 		     void *owner);
-bool amdgpu_sync_is_idle(struct amdgpu_sync *sync);
-int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
-			     struct fence *fence);
+struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring);
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync);
-int amdgpu_sync_wait(struct amdgpu_sync *sync);
 void amdgpu_sync_free(struct amdgpu_sync *sync);
 int amdgpu_sync_init(void);
 void amdgpu_sync_fini(void);
@ -754,12 +763,11 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 			     struct amdgpu_job **job);

+void amdgpu_job_free_resources(struct amdgpu_job *job);
 void amdgpu_job_free(struct amdgpu_job *job);
-void amdgpu_job_free_func(struct kref *refcount);
 int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f);
-void amdgpu_job_timeout_func(struct work_struct *work);

 struct amdgpu_ring {
 	struct amdgpu_device		*adev;
@ -767,12 +775,9 @@ struct amdgpu_ring {
 	struct amdgpu_fence_driver	fence_drv;
 	struct amd_gpu_scheduler	sched;

-	spinlock_t              fence_lock;
 	struct amdgpu_bo	*ring_obj;
 	volatile uint32_t	*ring;
 	unsigned		rptr_offs;
-	u64			next_rptr_gpu_addr;
-	volatile u32		*next_rptr_cpu_addr;
 	unsigned		wptr;
 	unsigned		wptr_old;
 	unsigned		ring_size;
@ -791,14 +796,16 @@ struct amdgpu_ring {
 	u32			doorbell_index;
 	bool			use_doorbell;
 	unsigned		wptr_offs;
-	unsigned		next_rptr_offs;
 	unsigned		fence_offs;
 	uint64_t		current_ctx;
 	enum amdgpu_ring_type	type;
 	char			name[16];
 	unsigned		cond_exe_offs;
-	u64				cond_exe_gpu_addr;
-	volatile u32	*cond_exe_cpu_addr;
+	u64			cond_exe_gpu_addr;
+	volatile u32		*cond_exe_cpu_addr;
+#if defined(CONFIG_DEBUG_FS)
+	struct dentry *ent;
+#endif
 };

 /*
@ -861,6 +868,7 @@ struct amdgpu_vm {
 	struct amdgpu_bo	*page_directory;
 	unsigned		max_pde_used;
 	struct fence		*page_directory_fence;
+	uint64_t		last_eviction_counter;

 	/* array of page tables, one for each page directory entry */
 	struct amdgpu_vm_pt	*page_tables;
@ -883,13 +891,14 @@ struct amdgpu_vm_id {
 	struct fence		*first;
 	struct amdgpu_sync	active;
 	struct fence		*last_flush;
-	struct amdgpu_ring      *last_user;
 	atomic64_t		owner;

 	uint64_t		pd_gpu_addr;
 	/* last flushed PD/PT update */
 	struct fence		*flushed_updates;

+	uint32_t                current_gpu_reset_count;
+
 	uint32_t		gds_base;
 	uint32_t		gds_size;
 	uint32_t		gws_base;
@ -905,6 +914,10 @@ struct amdgpu_vm_manager {
 	struct list_head			ids_lru;
 	struct amdgpu_vm_id			ids[AMDGPU_NUM_VM];

+	/* Handling of VM fences */
+	u64					fence_context;
+	unsigned				seqno[AMDGPU_MAX_RINGS];
+
 	uint32_t				max_pfn;
 	/* vram base address for page table entry  */
 	u64					vram_base_offset;
@ -926,17 +939,14 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm);
 void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 			 struct list_head *validated,
 			 struct amdgpu_bo_list_entry *entry);
-void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates);
+void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct list_head *duplicates);
 void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 				  struct amdgpu_vm *vm);
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
-		      unsigned *vm_id, uint64_t *vm_pd_addr);
-int amdgpu_vm_flush(struct amdgpu_ring *ring,
-		    unsigned vm_id, uint64_t pd_addr,
-		    uint32_t gds_base, uint32_t gds_size,
-		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size);
+		      struct amdgpu_job *job);
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job);
 void amdgpu_vm_reset_id(struct amdgpu_device *adev, unsigned vm_id);
 uint64_t amdgpu_vm_map_gart(const dma_addr_t *pages_addr, uint64_t addr);
 int amdgpu_vm_update_page_directory(struct amdgpu_device *adev,
@ -1142,6 +1152,12 @@ struct amdgpu_cu_info {
 	uint32_t bitmap[4][4];
 };

+struct amdgpu_gfx_funcs {
+	/* get the gpu clock counter */
+	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
+	void (*select_se_sh)(struct amdgpu_device *adev, u32 se_num, u32 sh_num, u32 instance);
+};
+
 struct amdgpu_gfx {
 	struct mutex			gpu_clock_mutex;
 	struct amdgpu_gca_config	config;
@ -1178,6 +1194,7 @@ struct amdgpu_gfx {
 	/* ce ram size*/
 	unsigned			ce_ram_size;
 	struct amdgpu_cu_info		cu_info;
+	const struct amdgpu_gfx_funcs	*funcs;
 };

 int amdgpu_ib_get(struct amdgpu_device *adev, struct amdgpu_vm *vm,
@ -1195,10 +1212,6 @@ void amdgpu_ring_insert_nop(struct amdgpu_ring *ring, uint32_t count);
 void amdgpu_ring_generic_pad_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib);
 void amdgpu_ring_commit(struct amdgpu_ring *ring);
 void amdgpu_ring_undo(struct amdgpu_ring *ring);
-unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
-			    uint32_t **data);
-int amdgpu_ring_restore(struct amdgpu_ring *ring,
-			unsigned size, uint32_t *data);
 int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		     unsigned ring_size, u32 nop, u32 align_mask,
 		     struct amdgpu_irq_src *irq_src, unsigned irq_type,
@ -1250,6 +1263,7 @@ struct amdgpu_job {
 	uint32_t		num_ibs;
 	void			*owner;
 	uint64_t		ctx;
+	bool                    vm_needs_flush;
 	unsigned		vm_id;
 	uint64_t		vm_pd_addr;
 	uint32_t		gds_base, gds_size;
@ -1257,8 +1271,7 @@ struct amdgpu_job {
 	uint32_t		oa_base, oa_size;

 	/* user fence handling */
-	struct amdgpu_bo	*uf_bo;
-	uint32_t		uf_offset;
+	uint64_t		uf_addr;
 	uint64_t		uf_sequence;

 };
@ -1560,6 +1573,12 @@ struct amdgpu_dpm_funcs {
 	u32 (*get_fan_control_mode)(struct amdgpu_device *adev);
 	int (*set_fan_speed_percent)(struct amdgpu_device *adev, u32 speed);
 	int (*get_fan_speed_percent)(struct amdgpu_device *adev, u32 *speed);
+	int (*force_clock_level)(struct amdgpu_device *adev, enum pp_clock_type type, uint32_t mask);
+	int (*print_clock_levels)(struct amdgpu_device *adev, enum pp_clock_type type, char *buf);
+	int (*get_sclk_od)(struct amdgpu_device *adev);
+	int (*set_sclk_od)(struct amdgpu_device *adev, uint32_t value);
+	int (*get_mclk_od)(struct amdgpu_device *adev);
+	int (*set_mclk_od)(struct amdgpu_device *adev, uint32_t value);
 };

 struct amdgpu_dpm {
@ -1662,6 +1681,7 @@ struct amdgpu_uvd {
 	struct amdgpu_ring	ring;
 	struct amdgpu_irq_src	irq;
 	bool			address_64_bit;
+	bool			use_ctx_buf;
 	struct amd_sched_entity entity;
 };

@ -1683,6 +1703,7 @@ struct amdgpu_vce {
 	struct drm_file		*filp[AMDGPU_MAX_VCE_HANDLES];
 	uint32_t		img_size[AMDGPU_MAX_VCE_HANDLES];
 	struct delayed_work	idle_work;
+	struct mutex		idle_mutex;
 	const struct firmware	*fw;	/* VCE firmware */
 	struct amdgpu_ring	ring[AMDGPU_MAX_VCE_RINGS];
 	struct amdgpu_irq_src	irq;
@ -1767,6 +1788,8 @@ int amdgpu_debugfs_init(struct drm_minor *minor);
 void amdgpu_debugfs_cleanup(struct drm_minor *minor);
 #endif

+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev);
+
 /*
 * amdgpu smumgr functions
 */
@ -1811,12 +1834,8 @@ struct amdgpu_asic_funcs {
 			     u32 sh_num, u32 reg_offset, u32 *value);
 	void (*set_vga_state)(struct amdgpu_device *adev, bool state);
 	int (*reset)(struct amdgpu_device *adev);
-	/* wait for mc_idle */
-	int (*wait_for_mc_idle)(struct amdgpu_device *adev);
 	/* get the reference clock */
 	u32 (*get_xclk)(struct amdgpu_device *adev);
-	/* get the gpu clock counter */
-	uint64_t (*get_gpu_clock_counter)(struct amdgpu_device *adev);
 	/* MM block clocks */
 	int (*set_uvd_clocks)(struct amdgpu_device *adev, u32 vclk, u32 dclk);
 	int (*set_vce_clocks)(struct amdgpu_device *adev, u32 evclk, u32 ecclk);
@ -2003,6 +2022,10 @@ struct amdgpu_device {
 	spinlock_t didt_idx_lock;
 	amdgpu_rreg_t			didt_rreg;
 	amdgpu_wreg_t			didt_wreg;
+	/* protects concurrent gc_cac register access */
+	spinlock_t gc_cac_idx_lock;
+	amdgpu_rreg_t			gc_cac_rreg;
+	amdgpu_wreg_t			gc_cac_wreg;
 	/* protects concurrent ENDPOINT (audio) register access */
 	spinlock_t audio_endpt_idx_lock;
 	amdgpu_block_rreg_t		audio_endpt_rreg;
@ -2028,6 +2051,7 @@ struct amdgpu_device {
 	atomic64_t			vram_vis_usage;
 	atomic64_t			gtt_usage;
 	atomic64_t			num_bytes_moved;
+	atomic64_t			num_evictions;
 	atomic_t			gpu_reset_counter;

 	/* display */
@ -2038,7 +2062,7 @@ struct amdgpu_device {
 	struct amdgpu_irq_src		hpd_irq;

 	/* rings */
-	unsigned			fence_context;
+	u64				fence_context;
 	unsigned			num_rings;
 	struct amdgpu_ring		*rings[AMDGPU_MAX_RINGS];
 	bool				ib_pool_ready;
@ -2131,6 +2155,8 @@ void amdgpu_mm_wdoorbell(struct amdgpu_device *adev, u32 index, u32 v);
 #define WREG32_UVD_CTX(reg, v) adev->uvd_ctx_wreg(adev, (reg), (v))
 #define RREG32_DIDT(reg) adev->didt_rreg(adev, (reg))
 #define WREG32_DIDT(reg, v) adev->didt_wreg(adev, (reg), (v))
+#define RREG32_GC_CAC(reg) adev->gc_cac_rreg(adev, (reg))
+#define WREG32_GC_CAC(reg, v) adev->gc_cac_wreg(adev, (reg), (v))
 #define RREG32_AUDIO_ENDPT(block, reg) adev->audio_endpt_rreg(adev, (block), (reg))
 #define WREG32_AUDIO_ENDPT(block, reg, v) adev->audio_endpt_wreg(adev, (block), (reg), (v))
 #define WREG32_P(reg, val, mask)				\
@ -2206,12 +2232,10 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 */
 #define amdgpu_asic_set_vga_state(adev, state) (adev)->asic_funcs->set_vga_state((adev), (state))
 #define amdgpu_asic_reset(adev) (adev)->asic_funcs->reset((adev))
-#define amdgpu_asic_wait_for_mc_idle(adev) (adev)->asic_funcs->wait_for_mc_idle((adev))
 #define amdgpu_asic_get_xclk(adev) (adev)->asic_funcs->get_xclk((adev))
 #define amdgpu_asic_set_uvd_clocks(adev, v, d) (adev)->asic_funcs->set_uvd_clocks((adev), (v), (d))
 #define amdgpu_asic_set_vce_clocks(adev, ev, ec) (adev)->asic_funcs->set_vce_clocks((adev), (ev), (ec))
 #define amdgpu_asic_get_virtual_caps(adev) ((adev)->asic_funcs->get_virtual_caps((adev)))
-#define amdgpu_asic_get_gpu_clock_counter(adev) (adev)->asic_funcs->get_gpu_clock_counter((adev))
 #define amdgpu_asic_read_disabled_bios(adev) (adev)->asic_funcs->read_disabled_bios((adev))
 #define amdgpu_asic_read_bios_from_rom(adev, b, l) (adev)->asic_funcs->read_bios_from_rom((adev), (b), (l))
 #define amdgpu_asic_read_register(adev, se, sh, offset, v)((adev)->asic_funcs->read_register((adev), (se), (sh), (offset), (v)))
@ -2222,7 +2246,7 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_vm_set_pte_pde(adev, ib, pe, addr, count, incr, flags) ((adev)->vm_manager.vm_pte_funcs->set_pte_pde((ib), (pe), (addr), (count), (incr), (flags)))
 #define amdgpu_ring_parse_cs(r, p, ib) ((r)->funcs->parse_cs((p), (ib)))
 #define amdgpu_ring_test_ring(r) (r)->funcs->test_ring((r))
-#define amdgpu_ring_test_ib(r) (r)->funcs->test_ib((r))
+#define amdgpu_ring_test_ib(r, t) (r)->funcs->test_ib((r), (t))
 #define amdgpu_ring_get_rptr(r) (r)->funcs->get_rptr((r))
 #define amdgpu_ring_get_wptr(r) (r)->funcs->get_wptr((r))
 #define amdgpu_ring_set_wptr(r) (r)->funcs->set_wptr((r))
@ -2264,6 +2288,8 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_print_power_state(adev, ps) (adev)->pm.funcs->print_power_state((adev), (ps))
 #define amdgpu_dpm_vblank_too_short(adev) (adev)->pm.funcs->vblank_too_short((adev))
 #define amdgpu_dpm_enable_bapm(adev, e) (adev)->pm.funcs->enable_bapm((adev), (e))
+#define amdgpu_gfx_get_gpu_clock_counter(adev) (adev)->gfx.funcs->get_gpu_clock_counter((adev))
+#define amdgpu_gfx_select_se_sh(adev, se, sh, instance) (adev)->gfx.funcs->select_se_sh((adev), (se), (sh), (instance))

 #define amdgpu_dpm_get_temperature(adev) \
 	((adev)->pp_enabled ?						\
@ -2342,6 +2368,18 @@ amdgpu_get_sdma_instance(struct amdgpu_ring *ring)
 #define amdgpu_dpm_force_clock_level(adev, type, level) \
 		(adev)->powerplay.pp_funcs->force_clock_level((adev)->powerplay.pp_handle, type, level)

+#define amdgpu_dpm_get_sclk_od(adev) \
+	(adev)->powerplay.pp_funcs->get_sclk_od((adev)->powerplay.pp_handle)
+
+#define amdgpu_dpm_set_sclk_od(adev, value) \
+	(adev)->powerplay.pp_funcs->set_sclk_od((adev)->powerplay.pp_handle, value)
+
+#define amdgpu_dpm_get_mclk_od(adev) \
+	((adev)->powerplay.pp_funcs->get_mclk_od((adev)->powerplay.pp_handle))
+
+#define amdgpu_dpm_set_mclk_od(adev, value) \
+	((adev)->powerplay.pp_funcs->set_mclk_od((adev)->powerplay.pp_handle, value))
+
 #define amdgpu_dpm_dispatch_task(adev, event_id, input, output)		\
 	(adev)->powerplay.pp_funcs->dispatch_tasks((adev)->powerplay.pp_handle, (event_id), (input), (output))

@ -2383,9 +2421,13 @@ bool amdgpu_device_is_px(struct drm_device *dev);
 #if defined(CONFIG_VGA_SWITCHEROO)
 void amdgpu_register_atpx_handler(void);
 void amdgpu_unregister_atpx_handler(void);
+bool amdgpu_has_atpx_dgpu_power_cntl(void);
+bool amdgpu_is_atpx_hybrid(void);
 #else
 static inline void amdgpu_register_atpx_handler(void) {}
 static inline void amdgpu_unregister_atpx_handler(void) {}
+static inline bool amdgpu_has_atpx_dgpu_power_cntl(void) { return false; }
+static inline bool amdgpu_is_atpx_hybrid(void) { return false; }
 #endif

 /*
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd.c
@ -240,8 +240,8 @@ uint64_t get_gpu_clock_counter(struct kgd_dev *kgd)
 {
 	struct amdgpu_device *rdev = (struct amdgpu_device *)kgd;

-	if (rdev->asic_funcs->get_gpu_clock_counter)
-		return rdev->asic_funcs->get_gpu_clock_counter(rdev);
+	if (rdev->gfx.funcs->get_gpu_clock_counter)
+		return rdev->gfx.funcs->get_gpu_clock_counter(rdev);
 	return 0;
 }

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atombios.c
@ -551,28 +551,19 @@ int amdgpu_atombios_get_clock_info(struct amdgpu_device *adev)
 		    le16_to_cpu(firmware_info->info.usReferenceClock);
 		ppll->reference_div = 0;

-		if (crev < 2)
-			ppll->pll_out_min =
-				le16_to_cpu(firmware_info->info.usMinPixelClockPLL_Output);
-		else
-			ppll->pll_out_min =
-				le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
+		ppll->pll_out_min =
+			le32_to_cpu(firmware_info->info_12.ulMinPixelClockPLL_Output);
 		ppll->pll_out_max =
 		    le32_to_cpu(firmware_info->info.ulMaxPixelClockPLL_Output);

-		if (crev >= 4) {
-			ppll->lcd_pll_out_min =
-				le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
-			if (ppll->lcd_pll_out_min == 0)
-				ppll->lcd_pll_out_min = ppll->pll_out_min;
-			ppll->lcd_pll_out_max =
-				le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
-			if (ppll->lcd_pll_out_max == 0)
-				ppll->lcd_pll_out_max = ppll->pll_out_max;
-		} else {
+		ppll->lcd_pll_out_min =
+			le16_to_cpu(firmware_info->info_14.usLcdMinPixelClockPLL_Output) * 100;
+		if (ppll->lcd_pll_out_min == 0)
 			ppll->lcd_pll_out_min = ppll->pll_out_min;
+		ppll->lcd_pll_out_max =
+			le16_to_cpu(firmware_info->info_14.usLcdMaxPixelClockPLL_Output) * 100;
+		if (ppll->lcd_pll_out_max == 0)
 			ppll->lcd_pll_out_max = ppll->pll_out_max;
-		}

 		if (ppll->pll_out_min == 0)
 			ppll->pll_out_min = 64800;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_atpx_handler.c
@ -10,6 +10,7 @@
 #include <linux/slab.h>
 #include <linux/acpi.h>
 #include <linux/pci.h>
+#include <linux/delay.h>

 #include "amd_acpi.h"

@ -27,6 +28,7 @@ struct amdgpu_atpx_functions {
 struct amdgpu_atpx {
 	acpi_handle handle;
 	struct amdgpu_atpx_functions functions;
+	bool is_hybrid;
 };

 static struct amdgpu_atpx_priv {
@ -63,6 +65,14 @@ bool amdgpu_has_atpx(void) {
 	return amdgpu_atpx_priv.atpx_detected;
 }

+bool amdgpu_has_atpx_dgpu_power_cntl(void) {
+	return amdgpu_atpx_priv.atpx.functions.power_cntl;
+}
+
+bool amdgpu_is_atpx_hybrid(void) {
+	return amdgpu_atpx_priv.atpx.is_hybrid;
+}
+
 /**
 * amdgpu_atpx_call - call an ATPX method
 *
@ -142,18 +152,12 @@ static void amdgpu_atpx_parse_functions(struct amdgpu_atpx_functions *f, u32 mas
 */
 static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 {
-	/* make sure required functions are enabled */
-	/* dGPU power control is required */
-	if (atpx->functions.power_cntl == false) {
-		printk("ATPX dGPU power cntl not present, forcing\n");
-		atpx->functions.power_cntl = true;
-	}
+	u32 valid_bits = 0;

 	if (atpx->functions.px_params) {
 		union acpi_object *info;
 		struct atpx_px_params output;
 		size_t size;
-		u32 valid_bits;

 		info = amdgpu_atpx_call(atpx->handle, ATPX_FUNCTION_GET_PX_PARAMETERS, NULL);
 		if (!info)
@ -172,19 +176,43 @@ static int amdgpu_atpx_validate(struct amdgpu_atpx *atpx)
 		memcpy(&output, info->buffer.pointer, size);

 		valid_bits = output.flags & output.valid_flags;
-		/* if separate mux flag is set, mux controls are required */
-		if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
-			atpx->functions.i2c_mux_cntl = true;
-			atpx->functions.disp_mux_cntl = true;
-		}
-		/* if any outputs are muxed, mux controls are required */
-		if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
-				  ATPX_TV_SIGNAL_MUXED |
-				  ATPX_DFP_SIGNAL_MUXED))
-			atpx->functions.disp_mux_cntl = true;

 		kfree(info);
 	}
+
+	/* if separate mux flag is set, mux controls are required */
+	if (valid_bits & ATPX_SEPARATE_MUX_FOR_I2C) {
+		atpx->functions.i2c_mux_cntl = true;
+		atpx->functions.disp_mux_cntl = true;
+	}
+	/* if any outputs are muxed, mux controls are required */
+	if (valid_bits & (ATPX_CRT1_RGB_SIGNAL_MUXED |
+			  ATPX_TV_SIGNAL_MUXED |
+			  ATPX_DFP_SIGNAL_MUXED))
+		atpx->functions.disp_mux_cntl = true;
+
+
+	/* some bioses set these bits rather than flagging power_cntl as supported */
+	if (valid_bits & (ATPX_DYNAMIC_PX_SUPPORTED |
+			  ATPX_DYNAMIC_DGPU_POWER_OFF_SUPPORTED))
+		atpx->functions.power_cntl = true;
+
+	atpx->is_hybrid = false;
+	if (valid_bits & ATPX_MS_HYBRID_GFX_SUPPORTED) {
+		printk("ATPX Hybrid Graphics\n");
+#if 1
+		/* This is a temporary hack until the D3 cold support
+		 * makes it upstream.  The ATPX power_control method seems
+		 * to still work on even if the system should be using
+		 * the new standardized hybrid D3 cold ACPI interface.
+		 */
+		atpx->functions.power_cntl = true;
+#else
+		atpx->functions.power_cntl = false;
+#endif
+		atpx->is_hybrid = true;
+	}
+
 	return 0;
 }

@ -259,6 +287,10 @@ static int amdgpu_atpx_set_discrete_state(struct amdgpu_atpx *atpx, u8 state)
 		if (!info)
 			return -EIO;
 		kfree(info);
+
+		/* 200ms delay is required after off */
+		if (state == 0)
+			msleep(200);
 	}
 	return 0;
 }
@ -507,7 +539,6 @@ static int amdgpu_atpx_get_client_id(struct pci_dev *pdev)
 static const struct vga_switcheroo_handler amdgpu_atpx_handler = {
 	.switchto = amdgpu_atpx_switchto,
 	.power_state = amdgpu_atpx_power_state,
-	.init = amdgpu_atpx_init,
 	.get_client_id = amdgpu_atpx_get_client_id,
 };

@ -542,6 +573,7 @@ static bool amdgpu_atpx_detect(void)
 		printk(KERN_INFO "vga_switcheroo: detected switching method %s handle\n",
 		       acpi_method_name);
 		amdgpu_atpx_priv.atpx_detected = true;
+		amdgpu_atpx_init();
 		return true;
 	}
 	return false;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bios.c
@ -352,22 +352,22 @@ bool amdgpu_get_bios(struct amdgpu_device *adev)
 	uint16_t tmp, bios_header_start;

 	r = amdgpu_atrm_get_bios(adev);
-	if (r == false)
+	if (!r)
 		r = amdgpu_acpi_vfct_bios(adev);
-	if (r == false)
+	if (!r)
 		r = igp_read_bios_from_vram(adev);
-	if (r == false)
+	if (!r)
 		r = amdgpu_read_bios(adev);
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_bios_from_rom(adev);
 	}
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_disabled_bios(adev);
 	}
-	if (r == false) {
+	if (!r) {
 		r = amdgpu_read_platform_bios(adev);
 	}
-	if (r == false || adev->bios == NULL) {
+	if (!r || adev->bios == NULL) {
 		DRM_ERROR("Unable to locate a BIOS ROM\n");
 		adev->bios = NULL;
 		return false;
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_bo_list.c
@ -94,6 +94,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	unsigned last_entry = 0, first_userptr = num_entries;
 	unsigned i;
 	int r;
+	unsigned long total_size = 0;

 	array = drm_malloc_ab(num_entries, sizeof(struct amdgpu_bo_list_entry));
 	if (!array)
@ -140,6 +141,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 		if (entry->robj->prefered_domains == AMDGPU_GEM_DOMAIN_OA)
 			oa_obj = entry->robj;

+		total_size += amdgpu_bo_size(entry->robj);
 		trace_amdgpu_bo_list_set(list, entry->robj);
 	}

@ -155,6 +157,7 @@ static int amdgpu_bo_list_set(struct amdgpu_device *adev,
 	list->array = array;
 	list->num_entries = num_entries;

+	trace_amdgpu_cs_bo_status(list->num_entries, total_size);
 	return 0;

 error_free:
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cgs.c
@ -312,6 +312,8 @@ static uint32_t amdgpu_cgs_read_ind_register(struct cgs_device *cgs_device,
 		return RREG32_UVD_CTX(index);
 	case CGS_IND_REG__DIDT:
 		return RREG32_DIDT(index);
+	case CGS_IND_REG_GC_CAC:
+		return RREG32_GC_CAC(index);
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return 0;
@ -336,6 +338,8 @@ static void amdgpu_cgs_write_ind_register(struct cgs_device *cgs_device,
 		return WREG32_UVD_CTX(index, value);
 	case CGS_IND_REG__DIDT:
 		return WREG32_DIDT(index, value);
+	case CGS_IND_REG_GC_CAC:
+		return WREG32_GC_CAC(index, value);
 	case CGS_IND_REG__AUDIO_ENDPT:
 		DRM_ERROR("audio endpt register access not implemented.\n");
 		return;
@ -748,6 +752,9 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,

 		if (!adev->pm.fw) {
 			switch (adev->asic_type) {
+			case CHIP_TOPAZ:
+				strcpy(fw_name, "amdgpu/topaz_smc.bin");
+				break;
 			case CHIP_TONGA:
 				strcpy(fw_name, "amdgpu/tonga_smc.bin");
 				break;
@ -787,6 +794,7 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,
 		}

 		hdr = (const struct smc_firmware_header_v1_0 *)	adev->pm.fw->data;
+		amdgpu_ucode_print_smc_hdr(&hdr->header);
 		adev->pm.fw_version = le32_to_cpu(hdr->header.ucode_version);
 		ucode_size = le32_to_cpu(hdr->header.ucode_size_bytes);
 		ucode_start_address = le32_to_cpu(hdr->ucode_start_addr);
@ -795,13 +803,14 @@ static int amdgpu_cgs_get_firmware_info(struct cgs_device *cgs_device,

 		info->version = adev->pm.fw_version;
 		info->image_size = ucode_size;
+		info->ucode_start_address = ucode_start_address;
 		info->kptr = (void *)src;
 	}
 	return 0;
 }

 static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
-				struct cgs_system_info *sys_info)
+					struct cgs_system_info *sys_info)
 {
 	CGS_FUNC_ADEV;

@ -821,6 +830,12 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 	case CGS_SYSTEM_INFO_PCIE_MLW:
 		sys_info->value = adev->pm.pcie_mlw_mask;
 		break;
+	case CGS_SYSTEM_INFO_PCIE_DEV:
+		sys_info->value = adev->pdev->device;
+		break;
+	case CGS_SYSTEM_INFO_PCIE_REV:
+		sys_info->value = adev->pdev->revision;
+		break;
 	case CGS_SYSTEM_INFO_CG_FLAGS:
 		sys_info->value = adev->cg_flags;
 		break;
@ -830,6 +845,9 @@ static int amdgpu_cgs_query_system_info(struct cgs_device *cgs_device,
 	case CGS_SYSTEM_INFO_GFX_CU_INFO:
 		sys_info->value = adev->gfx.cu_info.number;
 		break;
+	case CGS_SYSTEM_INFO_GFX_SE_INFO:
+		sys_info->value = adev->gfx.config.max_shader_engines;
+		break;
 	default:
 		return -ENODEV;
 	}
@ -903,14 +921,12 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 	acpi_handle handle;
 	struct acpi_object_list input;
 	struct acpi_buffer output = { ACPI_ALLOCATE_BUFFER, NULL };
-	union acpi_object *params = NULL;
-	union acpi_object *obj = NULL;
+	union acpi_object *params, *obj;
 	uint8_t name[5] = {'\0'};
-	struct cgs_acpi_method_argument *argument = NULL;
+	struct cgs_acpi_method_argument *argument;
 	uint32_t i, count;
 	acpi_status status;
-	int result = 0;
-	uint32_t func_no = 0xFFFFFFFF;
+	int result;

 	handle = ACPI_HANDLE(&adev->pdev->dev);
 	if (!handle)
@ -927,7 +943,6 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		if (info->pinput_argument == NULL)
 			return -EINVAL;
 		argument = info->pinput_argument;
-		func_no = argument->value;
 		for (i = 0; i < info->input_count; i++) {
 			if (((argument->type == ACPI_TYPE_STRING) ||
 			     (argument->type == ACPI_TYPE_BUFFER)) &&
@ -972,11 +987,11 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 				params->integer.value = argument->value;
 				break;
 			case ACPI_TYPE_STRING:
-				params->string.length = argument->method_length;
+				params->string.length = argument->data_length;
 				params->string.pointer = argument->pointer;
 				break;
 			case ACPI_TYPE_BUFFER:
-				params->buffer.length = argument->method_length;
+				params->buffer.length = argument->data_length;
 				params->buffer.pointer = argument->pointer;
 				break;
 			default:
@ -996,7 +1011,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,

 	if (ACPI_FAILURE(status)) {
 		result = -EIO;
-		goto error;
+		goto free_input;
 	}

 	/* return the output info */
@ -1006,7 +1021,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		if ((obj->type != ACPI_TYPE_PACKAGE) ||
 			(obj->package.count != count)) {
 			result = -EIO;
-			goto error;
+			goto free_obj;
 		}
 		params = obj->package.elements;
 	} else
@ -1014,13 +1029,13 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,

 	if (params == NULL) {
 		result = -EIO;
-		goto error;
+		goto free_obj;
 	}

 	for (i = 0; i < count; i++) {
 		if (argument->type != params->type) {
 			result = -EIO;
-			goto error;
+			goto free_obj;
 		}
 		switch (params->type) {
 		case ACPI_TYPE_INTEGER:
@ -1030,7 +1045,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 			if ((params->string.length != argument->data_length) ||
 				(params->string.pointer == NULL)) {
 				result = -EIO;
-				goto error;
+				goto free_obj;
 			}
 			strncpy(argument->pointer,
 				params->string.pointer,
@ -1039,7 +1054,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		case ACPI_TYPE_BUFFER:
 			if (params->buffer.pointer == NULL) {
 				result = -EIO;
-				goto error;
+				goto free_obj;
 			}
 			memcpy(argument->pointer,
 				params->buffer.pointer,
@ -1052,9 +1067,10 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 		params++;
 	}

-error:
-	if (obj != NULL)
-		kfree(obj);
+	result = 0;
+free_obj:
+	kfree(obj);
+free_input:
 	kfree((void *)input.pointer);
 	return result;
 }
@ -1066,7 +1082,7 @@ static int amdgpu_cgs_acpi_eval_object(struct cgs_device *cgs_device,
 }
 #endif

-int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
+static int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 					uint32_t acpi_method,
 					uint32_t acpi_function,
 					void *pinput, void *poutput,
@ -1079,17 +1095,14 @@ int amdgpu_cgs_call_acpi_method(struct cgs_device *cgs_device,
 	struct cgs_acpi_method_info info = {0};

 	acpi_input[0].type = CGS_ACPI_TYPE_INTEGER;
-	acpi_input[0].method_length = sizeof(uint32_t);
 	acpi_input[0].data_length = sizeof(uint32_t);
 	acpi_input[0].value = acpi_function;

 	acpi_input[1].type = CGS_ACPI_TYPE_BUFFER;
-	acpi_input[1].method_length = CGS_ACPI_MAX_BUFFER_SIZE;
 	acpi_input[1].data_length = input_size;
 	acpi_input[1].pointer = pinput;

 	acpi_output.type = CGS_ACPI_TYPE_BUFFER;
-	acpi_output.method_length = CGS_ACPI_MAX_BUFFER_SIZE;
 	acpi_output.data_length = output_size;
 	acpi_output.pointer = poutput;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_connectors.c
@ -1690,7 +1690,6 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 						   DRM_MODE_SCALE_NONE);
 			/* no HPD on analog connectors */
 			amdgpu_connector->hpd.hpd = AMDGPU_HPD_NONE;
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
 			connector->interlace_allowed = true;
 			connector->doublescan_allowed = true;
 			break;
@ -1893,8 +1892,10 @@ amdgpu_connector_add(struct amdgpu_device *adev,
 	}

 	if (amdgpu_connector->hpd.hpd == AMDGPU_HPD_NONE) {
-		if (i2c_bus->valid)
-			connector->polled = DRM_CONNECTOR_POLL_CONNECT;
+		if (i2c_bus->valid) {
+			connector->polled = DRM_CONNECTOR_POLL_CONNECT |
+			                    DRM_CONNECTOR_POLL_DISCONNECT;
+		}
 	} else
 		connector->polled = DRM_CONNECTOR_POLL_HPD;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_cs.c
@ -216,11 +216,8 @@ int amdgpu_cs_parser_init(struct amdgpu_cs_parser *p, void *data)
 	if (ret)
 		goto free_all_kdata;

-	if (p->uf_entry.robj) {
-		p->job->uf_bo = amdgpu_bo_ref(p->uf_entry.robj);
-		p->job->uf_offset = uf_offset;
-	}
-
+	if (p->uf_entry.robj)
+		p->job->uf_addr = uf_offset;
 	kfree(chunk_array);
 	return 0;

@ -459,7 +456,7 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		list_splice(&need_pages, &p->validated);
 	}

-	amdgpu_vm_get_pt_bos(&fpriv->vm, &duplicates);
+	amdgpu_vm_get_pt_bos(p->adev, &fpriv->vm, &duplicates);

 	p->bytes_moved_threshold = amdgpu_cs_get_threshold_for_moves(p->adev);
 	p->bytes_moved = 0;
@ -472,6 +469,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 	if (r)
 		goto error_validate;

+	fpriv->vm.last_eviction_counter =
+		atomic64_read(&p->adev->num_evictions);
+
 	if (p->bo_list) {
 		struct amdgpu_bo *gds = p->bo_list->gds_obj;
 		struct amdgpu_bo *gws = p->bo_list->gws_obj;
@ -499,6 +499,9 @@ static int amdgpu_cs_parser_bos(struct amdgpu_cs_parser *p,
 		}
 	}

+	if (p->uf_entry.robj)
+		p->job->uf_addr += amdgpu_bo_gpu_offset(p->uf_entry.robj);
+
 error_validate:
 	if (r) {
 		amdgpu_vm_move_pt_bos_in_lru(p->adev, &fpriv->vm);
@ -653,18 +656,21 @@ static int amdgpu_cs_ib_vm_chunk(struct amdgpu_device *adev,

 	/* Only for UVD/VCE VM emulation */
 	if (ring->funcs->parse_cs) {
+		p->job->vm = NULL;
 		for (i = 0; i < p->job->num_ibs; i++) {
 			r = amdgpu_ring_parse_cs(ring, p, i);
 			if (r)
 				return r;
 		}
+	} else {
+		p->job->vm_pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
+
+		r = amdgpu_bo_vm_update_pte(p, vm);
+		if (r)
+			return r;
 	}

-	r = amdgpu_bo_vm_update_pte(p, vm);
-	if (!r)
-		amdgpu_cs_sync_rings(p);
-
-	return r;
+	return amdgpu_cs_sync_rings(p);
 }

 static int amdgpu_cs_handle_lockup(struct amdgpu_device *adev, int r)
@ -761,7 +767,7 @@ static int amdgpu_cs_ib_fill(struct amdgpu_device *adev,
 	}

 	/* UVD & VCE fw doesn't support user fences */
-	if (parser->job->uf_bo && (
+	if (parser->job->uf_addr && (
 	    parser->job->ring->type == AMDGPU_RING_TYPE_UVD ||
 	    parser->job->ring->type == AMDGPU_RING_TYPE_VCE))
 		return -EINVAL;
@ -830,17 +836,13 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,
 {
 	struct amdgpu_ring *ring = p->job->ring;
 	struct amd_sched_entity *entity = &p->ctx->rings[ring->idx].entity;
-	struct fence *fence;
 	struct amdgpu_job *job;
 	int r;

 	job = p->job;
 	p->job = NULL;

-	r = amd_sched_job_init(&job->base, &ring->sched,
-			       entity, amdgpu_job_timeout_func,
-			       amdgpu_job_free_func,
-			       p->filp, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched, entity, p->filp);
 	if (r) {
 		amdgpu_job_free(job);
 		return r;
@ -848,9 +850,10 @@ static int amdgpu_cs_submit(struct amdgpu_cs_parser *p,

 	job->owner = p->filp;
 	job->ctx = entity->fence_context;
-	p->fence = fence_get(fence);
-	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, fence);
+	p->fence = fence_get(&job->base.s_fence->finished);
+	cs->out.handle = amdgpu_ctx_add_fence(p->ctx, ring, p->fence);
 	job->uf_sequence = cs->out.handle;
+	amdgpu_job_free_resources(job);

 	trace_amdgpu_cs_ioctl(job);
 	amd_sched_entity_push_job(&job->base);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
@ -25,6 +25,7 @@
 *          Alex Deucher
 *          Jerome Glisse
 */
+#include <linux/kthread.h>
 #include <linux/console.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
@ -35,6 +36,7 @@
 #include <linux/vga_switcheroo.h>
 #include <linux/efi.h>
 #include "amdgpu.h"
+#include "amdgpu_trace.h"
 #include "amdgpu_i2c.h"
 #include "atom.h"
 #include "amdgpu_atombios.h"
@ -79,24 +81,27 @@ bool amdgpu_device_is_px(struct drm_device *dev)
 uint32_t amdgpu_mm_rreg(struct amdgpu_device *adev, uint32_t reg,
 			bool always_indirect)
 {
+	uint32_t ret;
+
 	if ((reg * 4) < adev->rmmio_size && !always_indirect)
-		return readl(((void __iomem *)adev->rmmio) + (reg * 4));
+		ret = readl(((void __iomem *)adev->rmmio) + (reg * 4));
 	else {
 		unsigned long flags;
-		uint32_t ret;

 		spin_lock_irqsave(&adev->mmio_idx_lock, flags);
 		writel((reg * 4), ((void __iomem *)adev->rmmio) + (mmMM_INDEX * 4));
 		ret = readl(((void __iomem *)adev->rmmio) + (mmMM_DATA * 4));
 		spin_unlock_irqrestore(&adev->mmio_idx_lock, flags);
-
-		return ret;
 	}
+	trace_amdgpu_mm_rreg(adev->pdev->device, reg, ret);
+	return ret;
 }

 void amdgpu_mm_wreg(struct amdgpu_device *adev, uint32_t reg, uint32_t v,
 		    bool always_indirect)
 {
+	trace_amdgpu_mm_wreg(adev->pdev->device, reg, v);
+	
 	if ((reg * 4) < adev->rmmio_size && !always_indirect)
 		writel(v, ((void __iomem *)adev->rmmio) + (reg * 4));
 	else {
@ -1070,11 +1075,14 @@ int amdgpu_set_clockgating_state(struct amdgpu_device *adev,
 	int i, r = 0;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
 		if (adev->ip_blocks[i].type == block_type) {
 			r = adev->ip_blocks[i].funcs->set_clockgating_state((void *)adev,
 									    state);
 			if (r)
 				return r;
+			break;
 		}
 	}
 	return r;
@ -1087,16 +1095,53 @@ int amdgpu_set_powergating_state(struct amdgpu_device *adev,
 	int i, r = 0;

 	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
 		if (adev->ip_blocks[i].type == block_type) {
 			r = adev->ip_blocks[i].funcs->set_powergating_state((void *)adev,
 									    state);
 			if (r)
 				return r;
+			break;
 		}
 	}
 	return r;
 }

+int amdgpu_wait_for_idle(struct amdgpu_device *adev,
+			 enum amd_ip_block_type block_type)
+{
+	int i, r;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
+		if (adev->ip_blocks[i].type == block_type) {
+			r = adev->ip_blocks[i].funcs->wait_for_idle((void *)adev);
+			if (r)
+				return r;
+			break;
+		}
+	}
+	return 0;
+
+}
+
+bool amdgpu_is_idle(struct amdgpu_device *adev,
+		    enum amd_ip_block_type block_type)
+{
+	int i;
+
+	for (i = 0; i < adev->num_ip_blocks; i++) {
+		if (!adev->ip_block_status[i].valid)
+			continue;
+		if (adev->ip_blocks[i].type == block_type)
+			return adev->ip_blocks[i].funcs->is_idle((void *)adev);
+	}
+	return true;
+
+}
+
 const struct amdgpu_ip_block_version * amdgpu_get_ip_block(
 					struct amdgpu_device *adev,
 					enum amd_ip_block_type type)
@ -1209,6 +1254,9 @@ static int amdgpu_early_init(struct amdgpu_device *adev)
 		}
 	}

+	adev->cg_flags &= amdgpu_cg_mask;
+	adev->pg_flags &= amdgpu_pg_mask;
+
 	return 0;
 }

@ -1440,9 +1488,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	adev->uvd_ctx_wreg = &amdgpu_invalid_wreg;
 	adev->didt_rreg = &amdgpu_invalid_rreg;
 	adev->didt_wreg = &amdgpu_invalid_wreg;
+	adev->gc_cac_rreg = &amdgpu_invalid_rreg;
+	adev->gc_cac_wreg = &amdgpu_invalid_wreg;
 	adev->audio_endpt_rreg = &amdgpu_block_invalid_rreg;
 	adev->audio_endpt_wreg = &amdgpu_block_invalid_wreg;

+
 	DRM_INFO("initializing kernel modesetting (%s 0x%04X:0x%04X 0x%04X:0x%04X 0x%02X).\n",
 		 amdgpu_asic_name[adev->asic_type], pdev->vendor, pdev->device,
 		 pdev->subsystem_vendor, pdev->subsystem_device, pdev->revision);
@ -1467,6 +1518,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	spin_lock_init(&adev->pcie_idx_lock);
 	spin_lock_init(&adev->uvd_ctx_idx_lock);
 	spin_lock_init(&adev->didt_idx_lock);
+	spin_lock_init(&adev->gc_cac_idx_lock);
 	spin_lock_init(&adev->audio_endpt_idx_lock);

 	adev->rmmio_base = pci_resource_start(adev->pdev, 5);
@ -1511,17 +1563,20 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		vga_switcheroo_init_domain_pm_ops(adev->dev, &adev->vga_pm_domain);

 	/* Read BIOS */
-	if (!amdgpu_get_bios(adev))
-		return -EINVAL;
+	if (!amdgpu_get_bios(adev)) {
+		r = -EINVAL;
+		goto failed;
+	}
 	/* Must be an ATOMBIOS */
 	if (!adev->is_atom_bios) {
 		dev_err(adev->dev, "Expecting atombios for GPU\n");
-		return -EINVAL;
+		r = -EINVAL;
+		goto failed;
 	}
 	r = amdgpu_atombios_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_init failed\n");
-		return r;
+		goto failed;
 	}

 	/* See if the asic supports SR-IOV */
@ -1538,7 +1593,8 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	     !(adev->virtualization.caps & AMDGPU_VIRT_CAPS_SRIOV_EN))) {
 		if (!adev->bios) {
 			dev_err(adev->dev, "Card not posted and no BIOS - ignoring\n");
-			return -EINVAL;
+			r = -EINVAL;
+			goto failed;
 		}
 		DRM_INFO("GPU not posted. posting now...\n");
 		amdgpu_atom_asic_init(adev->mode_info.atom_context);
@ -1548,7 +1604,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_atombios_get_clock_info(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_atombios_get_clock_info failed\n");
-		return r;
+		goto failed;
 	}
 	/* init i2c buses */
 	amdgpu_atombios_i2c_init(adev);
@ -1557,7 +1613,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_fence_driver_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_fence_driver_init failed\n");
-		return r;
+		goto failed;
 	}

 	/* init the mode config */
@ -1567,7 +1623,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	if (r) {
 		dev_err(adev->dev, "amdgpu_init failed\n");
 		amdgpu_fini(adev);
-		return r;
+		goto failed;
 	}

 	adev->accel_working = true;
@ -1577,7 +1633,7 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_ib_pool_init(adev);
 	if (r) {
 		dev_err(adev->dev, "IB initialization failed (%d).\n", r);
-		return r;
+		goto failed;
 	}

 	r = amdgpu_ib_ring_tests(adev);
@ -1594,6 +1650,12 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 		DRM_ERROR("registering register debugfs failed (%d).\n", r);
 	}

+	r = amdgpu_debugfs_firmware_init(adev);
+	if (r) {
+		DRM_ERROR("registering firmware debugfs failed (%d).\n", r);
+		return r;
+	}
+
 	if ((amdgpu_testing & 1)) {
 		if (adev->accel_working)
 			amdgpu_test_moves(adev);
@ -1619,10 +1681,15 @@ int amdgpu_device_init(struct amdgpu_device *adev,
 	r = amdgpu_late_init(adev);
 	if (r) {
 		dev_err(adev->dev, "amdgpu_late_init failed\n");
-		return r;
+		goto failed;
 	}

 	return 0;
+
+failed:
+	if (runtime)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
+	return r;
 }

 static void amdgpu_debugfs_remove_files(struct amdgpu_device *adev);
@ -1645,6 +1712,7 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	amdgpu_bo_evict_vram(adev);
 	amdgpu_ib_pool_fini(adev);
 	amdgpu_fence_driver_fini(adev);
+	drm_crtc_force_disable_all(adev->ddev);
 	amdgpu_fbdev_fini(adev);
 	r = amdgpu_fini(adev);
 	kfree(adev->ip_block_status);
@ -1656,6 +1724,8 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
 	kfree(adev->bios);
 	adev->bios = NULL;
 	vga_switcheroo_unregister_client(adev->pdev);
+	if (adev->flags & AMD_IS_PX)
+		vga_switcheroo_fini_domain_pm_ops(adev->dev);
 	vga_client_register(adev->pdev, NULL, NULL, NULL);
 	if (adev->rio_mem)
 		pci_iounmap(adev->pdev, adev->rio_mem);
@ -1841,7 +1911,23 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
 	}

 	drm_kms_helper_poll_enable(dev);
+
+	/*
+	 * Most of the connector probing functions try to acquire runtime pm
+	 * refs to ensure that the GPU is powered on when connector polling is
+	 * performed. Since we're calling this from a runtime PM callback,
+	 * trying to acquire rpm refs will cause us to deadlock.
+	 *
+	 * Since we're guaranteed to be holding the rpm lock, it's safe to
+	 * temporarily disable the rpm helpers so this doesn't deadlock us.
+	 */
+#ifdef CONFIG_PM
+	dev->dev->power.disable_depth++;
+#endif
 	drm_helper_hpd_irq_event(dev);
+#ifdef CONFIG_PM
+	dev->dev->power.disable_depth--;
+#endif

 	if (fbcon) {
 		amdgpu_fbdev_set_suspend(adev, 0);
@ -1861,11 +1947,6 @@ int amdgpu_resume_kms(struct drm_device *dev, bool resume, bool fbcon)
 */
 int amdgpu_gpu_reset(struct amdgpu_device *adev)
 {
-	unsigned ring_sizes[AMDGPU_MAX_RINGS];
-	uint32_t *ring_data[AMDGPU_MAX_RINGS];
-
-	bool saved = false;
-
 	int i, r;
 	int resched;

@ -1874,22 +1955,30 @@ int amdgpu_gpu_reset(struct amdgpu_device *adev)
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&adev->mman.bdev);

-	r = amdgpu_suspend(adev);
-
+	/* block scheduler */
 	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 		struct amdgpu_ring *ring = adev->rings[i];
+
 		if (!ring)
 			continue;
-
-		ring_sizes[i] = amdgpu_ring_backup(ring, &ring_data[i]);
-		if (ring_sizes[i]) {
-			saved = true;
-			dev_info(adev->dev, "Saved %d dwords of commands "
-				 "on ring %d.\n", ring_sizes[i], i);
-		}
+		kthread_park(ring->sched.thread);
+		amd_sched_hw_job_reset(&ring->sched);
 	}
+	/* after all hw jobs are reset, hw fence is meaningless, so force_completion */
+	amdgpu_fence_driver_force_completion(adev);
+
+	/* save scratch */
+	amdgpu_atombios_scratch_regs_save(adev);
+	r = amdgpu_suspend(adev);

 retry:
+	/* Disable fb access */
+	if (adev->mode_info.num_crtc) {
+		struct amdgpu_mode_mc_save save;
+		amdgpu_display_stop_mc_access(adev, &save);
+		amdgpu_wait_for_idle(adev, AMD_IP_BLOCK_TYPE_GMC);
+	}
+
 	r = amdgpu_asic_reset(adev);
 	/* post card */
 	amdgpu_atom_asic_init(adev->mode_info.atom_context);
@ -1898,32 +1987,29 @@ retry:
 		dev_info(adev->dev, "GPU reset succeeded, trying to resume\n");
 		r = amdgpu_resume(adev);
 	}
-
+	/* restore scratch */
+	amdgpu_atombios_scratch_regs_restore(adev);
 	if (!r) {
+		r = amdgpu_ib_ring_tests(adev);
+		if (r) {
+			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
+			r = amdgpu_suspend(adev);
+			goto retry;
+		}
+
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
 			struct amdgpu_ring *ring = adev->rings[i];
 			if (!ring)
 				continue;
-
-			amdgpu_ring_restore(ring, ring_sizes[i], ring_data[i]);
-			ring_sizes[i] = 0;
-			ring_data[i] = NULL;
-		}
-
-		r = amdgpu_ib_ring_tests(adev);
-		if (r) {
-			dev_err(adev->dev, "ib ring test failed (%d).\n", r);
-			if (saved) {
-				saved = false;
-				r = amdgpu_suspend(adev);
-				goto retry;
-			}
+			amd_sched_job_recovery(&ring->sched);
+			kthread_unpark(ring->sched.thread);
 		}
 	} else {
-		amdgpu_fence_driver_force_completion(adev);
+		dev_err(adev->dev, "asic resume failed (%d).\n", r);
 		for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
-			if (adev->rings[i])
-				kfree(ring_data[i]);
+			if (adev->rings[i]) {
+				kthread_unpark(adev->rings[i]->sched.thread);
+			}
 		}
 	}

@ -1934,13 +2020,11 @@ retry:
 		/* bad news, how to tell it to userspace ? */
 		dev_info(adev->dev, "GPU reset failed\n");
 	}
+	amdgpu_irq_gpu_reset_resume_helper(adev);

 	return r;
 }

-#define AMDGPU_DEFAULT_PCIE_GEN_MASK 0x30007  /* gen: chipset 1/2, asic 1/2/3 */
-#define AMDGPU_DEFAULT_PCIE_MLW_MASK 0x2f0000 /* 1/2/4/8/16 lanes */
-
 void amdgpu_get_pcie_info(struct amdgpu_device *adev)
 {
 	u32 mask;
@ -2094,20 +2178,43 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 	struct amdgpu_device *adev = f->f_inode->i_private;
 	ssize_t result = 0;
 	int r;
+	bool use_bank;
+	unsigned instance_bank, sh_bank, se_bank;

 	if (size & 0x3 || *pos & 0x3)
 		return -EINVAL;

+	if (*pos & (1ULL << 62)) {
+		se_bank = (*pos >> 24) & 0x3FF;
+		sh_bank = (*pos >> 34) & 0x3FF;
+		instance_bank = (*pos >> 44) & 0x3FF;
+		use_bank = 1;
+		*pos &= 0xFFFFFF;
+	} else {
+		use_bank = 0;
+	}
+
+	if (use_bank) {
+		if (sh_bank >= adev->gfx.config.max_sh_per_se ||
+		    se_bank >= adev->gfx.config.max_shader_engines)
+			return -EINVAL;
+		mutex_lock(&adev->grbm_idx_mutex);
+		amdgpu_gfx_select_se_sh(adev, se_bank,
+					sh_bank, instance_bank);
+	}
+
 	while (size) {
 		uint32_t value;

 		if (*pos > adev->rmmio_size)
-			return result;
+			goto end;

 		value = RREG32(*pos >> 2);
 		r = put_user(value, (uint32_t *)buf);
-		if (r)
-			return r;
+		if (r) {
+			result = r;
+			goto end;
+		}

 		result += 4;
 		buf += 4;
@ -2115,6 +2222,12 @@ static ssize_t amdgpu_debugfs_regs_read(struct file *f, char __user *buf,
 		size -= 4;
 	}

+end:
+	if (use_bank) {
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
+		mutex_unlock(&adev->grbm_idx_mutex);
+	}
+
 	return result;
 }

@ -2314,6 +2427,68 @@ static ssize_t amdgpu_debugfs_regs_smc_write(struct file *f, const char __user *
 	return result;
 }

+static ssize_t amdgpu_debugfs_gca_config_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
+{
+	struct amdgpu_device *adev = f->f_inode->i_private;
+	ssize_t result = 0;
+	int r;
+	uint32_t *config, no_regs = 0;
+
+	if (size & 0x3 || *pos & 0x3)
+		return -EINVAL;
+
+	config = kmalloc(256 * sizeof(*config), GFP_KERNEL);
+	if (!config)
+		return -ENOMEM;
+
+	/* version, increment each time something is added */
+	config[no_regs++] = 0;
+	config[no_regs++] = adev->gfx.config.max_shader_engines;
+	config[no_regs++] = adev->gfx.config.max_tile_pipes;
+	config[no_regs++] = adev->gfx.config.max_cu_per_sh;
+	config[no_regs++] = adev->gfx.config.max_sh_per_se;
+	config[no_regs++] = adev->gfx.config.max_backends_per_se;
+	config[no_regs++] = adev->gfx.config.max_texture_channel_caches;
+	config[no_regs++] = adev->gfx.config.max_gprs;
+	config[no_regs++] = adev->gfx.config.max_gs_threads;
+	config[no_regs++] = adev->gfx.config.max_hw_contexts;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_frontend;
+	config[no_regs++] = adev->gfx.config.sc_prim_fifo_size_backend;
+	config[no_regs++] = adev->gfx.config.sc_hiz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.sc_earlyz_tile_fifo_size;
+	config[no_regs++] = adev->gfx.config.num_tile_pipes;
+	config[no_regs++] = adev->gfx.config.backend_enable_mask;
+	config[no_regs++] = adev->gfx.config.mem_max_burst_length_bytes;
+	config[no_regs++] = adev->gfx.config.mem_row_size_in_kb;
+	config[no_regs++] = adev->gfx.config.shader_engine_tile_size;
+	config[no_regs++] = adev->gfx.config.num_gpus;
+	config[no_regs++] = adev->gfx.config.multi_gpu_tile_size;
+	config[no_regs++] = adev->gfx.config.mc_arb_ramcfg;
+	config[no_regs++] = adev->gfx.config.gb_addr_config;
+	config[no_regs++] = adev->gfx.config.num_rbs;
+
+	while (size && (*pos < no_regs * 4)) {
+		uint32_t value;
+
+		value = config[*pos >> 2];
+		r = put_user(value, (uint32_t *)buf);
+		if (r) {
+			kfree(config);
+			return r;
+		}
+
+		result += 4;
+		buf += 4;
+		*pos += 4;
+		size -= 4;
+	}
+
+	kfree(config);
+	return result;
+}
+
+
 static const struct file_operations amdgpu_debugfs_regs_fops = {
 	.owner = THIS_MODULE,
 	.read = amdgpu_debugfs_regs_read,
@ -2339,11 +2514,18 @@ static const struct file_operations amdgpu_debugfs_regs_smc_fops = {
 	.llseek = default_llseek
 };

+static const struct file_operations amdgpu_debugfs_gca_config_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_gca_config_read,
+	.llseek = default_llseek
+};
+
 static const struct file_operations *debugfs_regs[] = {
 	&amdgpu_debugfs_regs_fops,
 	&amdgpu_debugfs_regs_didt_fops,
 	&amdgpu_debugfs_regs_pcie_fops,
 	&amdgpu_debugfs_regs_smc_fops,
+	&amdgpu_debugfs_gca_config_fops,
 };

 static const char *debugfs_regs_names[] = {
@ -2351,6 +2533,7 @@ static const char *debugfs_regs_names[] = {
 	"amdgpu_regs_didt",
 	"amdgpu_regs_pcie",
 	"amdgpu_regs_smc",
+	"amdgpu_gca_config",
 };

 static int amdgpu_debugfs_regs_init(struct amdgpu_device *adev)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
@ -122,7 +122,7 @@ static void amdgpu_flip_work_func(struct work_struct *__work)
 		spin_unlock_irqrestore(&crtc->dev->event_lock, flags);
 		usleep_range(min_udelay, 2 * min_udelay);
 		spin_lock_irqsave(&crtc->dev->event_lock, flags);
-	};
+	}

 	if (!repcnt)
 		DRM_DEBUG_DRIVER("Delay problem on crtc %d: min_udelay %d, "
@ -220,19 +220,17 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,

 	r = amdgpu_bo_pin_restricted(new_rbo, AMDGPU_GEM_DOMAIN_VRAM, 0, 0, &base);
 	if (unlikely(r != 0)) {
-		amdgpu_bo_unreserve(new_rbo);
 		r = -EINVAL;
 		DRM_ERROR("failed to pin new rbo buffer before flip\n");
-		goto cleanup;
+		goto unreserve;
 	}

 	r = reservation_object_get_fences_rcu(new_rbo->tbo.resv, &work->excl,
 					      &work->shared_count,
 					      &work->shared);
 	if (unlikely(r != 0)) {
-		amdgpu_bo_unreserve(new_rbo);
 		DRM_ERROR("failed to get fences for buffer\n");
-		goto cleanup;
+		goto unpin;
 	}

 	amdgpu_bo_get_tiling_flags(new_rbo, &tiling_flags);
@ -240,7 +238,7 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,

 	work->base = base;

-	r = drm_vblank_get(crtc->dev, amdgpu_crtc->crtc_id);
+	r = drm_crtc_vblank_get(crtc);
 	if (r) {
 		DRM_ERROR("failed to get vblank before flip\n");
 		goto pflip_cleanup;
@ -268,16 +266,18 @@ int amdgpu_crtc_page_flip(struct drm_crtc *crtc,
 	return 0;

 vblank_cleanup:
-	drm_vblank_put(crtc->dev, amdgpu_crtc->crtc_id);
+	drm_crtc_vblank_put(crtc);

 pflip_cleanup:
 	if (unlikely(amdgpu_bo_reserve(new_rbo, false) != 0)) {
 		DRM_ERROR("failed to reserve new rbo in error path\n");
 		goto cleanup;
 	}
+unpin:
 	if (unlikely(amdgpu_bo_unpin(new_rbo) != 0)) {
 		DRM_ERROR("failed to unpin new rbo in error path\n");
 	}
+unreserve:
 	amdgpu_bo_unreserve(new_rbo);

 cleanup:
@ -516,9 +516,7 @@ static void amdgpu_user_framebuffer_destroy(struct drm_framebuffer *fb)
 {
 	struct amdgpu_framebuffer *amdgpu_fb = to_amdgpu_framebuffer(fb);

-	if (amdgpu_fb->obj) {
-		drm_gem_object_unreference_unlocked(amdgpu_fb->obj);
-	}
+	drm_gem_object_unreference_unlocked(amdgpu_fb->obj);
 	drm_framebuffer_cleanup(fb);
 	kfree(amdgpu_fb);
 }
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
@ -52,9 +52,10 @@
 * - 3.1.0 - allow reading more status registers (GRBM, SRBM, SDMA, CP)
 * - 3.2.0 - GFX8: Uses EOP_TC_WB_ACTION_EN, so UMDs don't have to do the same
 *           at the end of IBs.
+ * - 3.3.0 - Add VM support for UVD on supported hardware.
 */
 #define KMS_DRIVER_MAJOR	3
-#define KMS_DRIVER_MINOR	2
+#define KMS_DRIVER_MINOR	3
 #define KMS_DRIVER_PATCHLEVEL	0

 int amdgpu_vram_limit = 0;
@ -82,8 +83,12 @@ int amdgpu_exp_hw_support = 0;
 int amdgpu_sched_jobs = 32;
 int amdgpu_sched_hw_submission = 2;
 int amdgpu_powerplay = -1;
+int amdgpu_powercontainment = 1;
 unsigned amdgpu_pcie_gen_cap = 0;
 unsigned amdgpu_pcie_lane_cap = 0;
+unsigned amdgpu_cg_mask = 0xffffffff;
+unsigned amdgpu_pg_mask = 0xffffffff;
+char *amdgpu_disable_cu = NULL;

 MODULE_PARM_DESC(vramlimit, "Restrict VRAM for testing, in megabytes");
 module_param_named(vramlimit, amdgpu_vram_limit, int, 0600);
@ -160,6 +165,9 @@ module_param_named(sched_hw_submission, amdgpu_sched_hw_submission, int, 0444);
 #ifdef CONFIG_DRM_AMD_POWERPLAY
 MODULE_PARM_DESC(powerplay, "Powerplay component (1 = enable, 0 = disable, -1 = auto (default))");
 module_param_named(powerplay, amdgpu_powerplay, int, 0444);
+
+MODULE_PARM_DESC(powercontainment, "Power Containment (1 = enable (default), 0 = disable)");
+module_param_named(powercontainment, amdgpu_powercontainment, int, 0444);
 #endif

 MODULE_PARM_DESC(pcie_gen_cap, "PCIE Gen Caps (0: autodetect (default))");
@ -168,6 +176,15 @@ module_param_named(pcie_gen_cap, amdgpu_pcie_gen_cap, uint, 0444);
 MODULE_PARM_DESC(pcie_lane_cap, "PCIE Lane Caps (0: autodetect (default))");
 module_param_named(pcie_lane_cap, amdgpu_pcie_lane_cap, uint, 0444);

+MODULE_PARM_DESC(cg_mask, "Clockgating flags mask (0 = disable clock gating)");
+module_param_named(cg_mask, amdgpu_cg_mask, uint, 0444);
+
+MODULE_PARM_DESC(pg_mask, "Powergating flags mask (0 = disable power gating)");
+module_param_named(pg_mask, amdgpu_pg_mask, uint, 0444);
+
+MODULE_PARM_DESC(disable_cu, "Disable CUs (se.sh.cu,...)");
+module_param_named(disable_cu, amdgpu_disable_cu, charp, 0444);
+
 static const struct pci_device_id pciidlist[] = {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	/* Kaveri */
@ -413,7 +430,10 @@ static int amdgpu_pmops_runtime_suspend(struct device *dev)
 	pci_save_state(pdev);
 	pci_disable_device(pdev);
 	pci_ignore_hotplug(pdev);
-	pci_set_power_state(pdev, PCI_D3cold);
+	if (amdgpu_is_atpx_hybrid())
+		pci_set_power_state(pdev, PCI_D3cold);
+	else if (!amdgpu_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D3hot);
 	drm_dev->switch_power_state = DRM_SWITCH_POWER_DYNAMIC_OFF;

 	return 0;
@ -430,7 +450,9 @@ static int amdgpu_pmops_runtime_resume(struct device *dev)

 	drm_dev->switch_power_state = DRM_SWITCH_POWER_CHANGING;

-	pci_set_power_state(pdev, PCI_D0);
+	if (amdgpu_is_atpx_hybrid() ||
+	    !amdgpu_has_atpx_dgpu_power_cntl())
+		pci_set_power_state(pdev, PCI_D0);
 	pci_restore_state(pdev);
 	ret = pci_enable_device(pdev);
 	if (ret)
@ -515,7 +537,7 @@ static struct drm_driver kms_driver = {
 	.driver_features =
 	    DRIVER_USE_AGP |
 	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM |
-	    DRIVER_PRIME | DRIVER_RENDER,
+	    DRIVER_PRIME | DRIVER_RENDER | DRIVER_MODESET,
 	.dev_priv_size = 0,
 	.load = amdgpu_driver_load_kms,
 	.open = amdgpu_driver_open_kms,
@ -590,7 +612,6 @@ static int __init amdgpu_init(void)
 	DRM_INFO("amdgpu kernel modesetting enabled.\n");
 	driver = &kms_driver;
 	pdriver = &amdgpu_kms_pci_driver;
-	driver->driver_features |= DRIVER_MODESET;
 	driver->num_ioctls = amdgpu_max_kms_ioctl;
 	amdgpu_register_atpx_handler();
 	/* let modprobe override vga console setting */
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_fence.c
@ -204,16 +204,25 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 	if (seq != ring->fence_drv.sync_seq)
 		amdgpu_fence_schedule_fallback(ring);

-	while (last_seq != seq) {
+	if (unlikely(seq == last_seq))
+		return;
+
+	last_seq &= drv->num_fences_mask;
+	seq &= drv->num_fences_mask;
+
+	do {
 		struct fence *fence, **ptr;

-		ptr = &drv->fences[++last_seq & drv->num_fences_mask];
+		++last_seq;
+		last_seq &= drv->num_fences_mask;
+		ptr = &drv->fences[last_seq];

 		/* There is always exactly one thread signaling this fence slot */
 		fence = rcu_dereference_protected(*ptr, 1);
 		RCU_INIT_POINTER(*ptr, NULL);

-		BUG_ON(!fence);
+		if (!fence)
+			continue;

 		r = fence_signal(fence);
 		if (!r)
@ -222,7 +231,7 @@ void amdgpu_fence_process(struct amdgpu_ring *ring)
 			BUG();

 		fence_put(fence);
-	}
+	} while (last_seq != seq);
 }

 /**
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gem.c
@ -503,7 +503,7 @@ static void amdgpu_gem_va_update_vm(struct amdgpu_device *adev,
 	if (r)
 		goto error_print;

-	amdgpu_vm_get_pt_bos(bo_va->vm, &duplicates);
+	amdgpu_vm_get_pt_bos(adev, bo_va->vm, &duplicates);
 	list_for_each_entry(entry, &list, head) {
 		domain = amdgpu_mem_type_to_domain(entry->bo->mem.mem_type);
 		/* if anything is swapped out don't swap it in here,
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c
@ -70,3 +70,47 @@ void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg)
 		}
 	}
 }
+
+/**
+ * amdgpu_gfx_parse_disable_cu - Parse the disable_cu module parameter
+ *
+ * @mask: array in which the per-shader array disable masks will be stored
+ * @max_se: number of SEs
+ * @max_sh: number of SHs
+ *
+ * The bitmask of CUs to be disabled in the shader array determined by se and
+ * sh is stored in mask[se * max_sh + sh].
+ */
+void amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh)
+{
+	unsigned se, sh, cu;
+	const char *p;
+
+	memset(mask, 0, sizeof(*mask) * max_se * max_sh);
+
+	if (!amdgpu_disable_cu || !*amdgpu_disable_cu)
+		return;
+
+	p = amdgpu_disable_cu;
+	for (;;) {
+		char *next;
+		int ret = sscanf(p, "%u.%u.%u", &se, &sh, &cu);
+		if (ret < 3) {
+			DRM_ERROR("amdgpu: could not parse disable_cu\n");
+			return;
+		}
+
+		if (se < max_se && sh < max_sh && cu < 16) {
+			DRM_INFO("amdgpu: disabling CU %u.%u.%u\n", se, sh, cu);
+			mask[se * max_sh + sh] |= 1u << cu;
+		} else {
+			DRM_ERROR("amdgpu: disable_cu %u.%u.%u is out of range\n",
+				  se, sh, cu);
+		}
+
+		next = strchr(p, ',');
+		if (!next)
+			break;
+		p = next + 1;
+	}
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.h
@ -27,4 +27,6 @@
 int amdgpu_gfx_scratch_get(struct amdgpu_device *adev, uint32_t *reg);
 void amdgpu_gfx_scratch_free(struct amdgpu_device *adev, uint32_t reg);

+unsigned amdgpu_gfx_parse_disable_cu(unsigned *mask, unsigned max_se, unsigned max_sh);
+
 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ib.c
@ -33,6 +33,8 @@
 #include "amdgpu.h"
 #include "atom.h"

+#define AMDGPU_IB_TEST_TIMEOUT	msecs_to_jiffies(1000)
+
 /*
 * IB
 * IBs (Indirect Buffers) and areas of GPU accessible memory where
@ -122,7 +124,6 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	bool skip_preamble, need_ctx_switch;
 	unsigned patch_offset = ~0;
 	struct amdgpu_vm *vm;
-	struct fence *hwf;
 	uint64_t ctx;

 	unsigned i;
@ -160,10 +161,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 		patch_offset = amdgpu_ring_init_cond_exec(ring);

 	if (vm) {
-		r = amdgpu_vm_flush(ring, job->vm_id, job->vm_pd_addr,
-				    job->gds_base, job->gds_size,
-				    job->gws_base, job->gws_size,
-				    job->oa_base, job->oa_size);
+		r = amdgpu_vm_flush(ring, job);
 		if (r) {
 			amdgpu_ring_undo(ring);
 			return r;
@ -193,7 +191,7 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	if (ring->funcs->emit_hdp_invalidate)
 		amdgpu_ring_emit_hdp_invalidate(ring);

-	r = amdgpu_fence_emit(ring, &hwf);
+	r = amdgpu_fence_emit(ring, f);
 	if (r) {
 		dev_err(adev->dev, "failed to emit fence (%d)\n", r);
 		if (job && job->vm_id)
@ -203,17 +201,11 @@ int amdgpu_ib_schedule(struct amdgpu_ring *ring, unsigned num_ibs,
 	}

 	/* wrap the last IB with fence */
-	if (job && job->uf_bo) {
-		uint64_t addr = amdgpu_bo_gpu_offset(job->uf_bo);
-
-		addr += job->uf_offset;
-		amdgpu_ring_emit_fence(ring, addr, job->uf_sequence,
+	if (job && job->uf_addr) {
+		amdgpu_ring_emit_fence(ring, job->uf_addr, job->uf_sequence,
 				       AMDGPU_FENCE_FLAG_64BIT);
 	}

-	if (f)
-		*f = fence_get(hwf);
-
 	if (patch_offset != ~0 && ring->funcs->patch_cond_exec)
 		amdgpu_ring_patch_cond_exec(ring, patch_offset);

@ -296,7 +288,7 @@ int amdgpu_ib_ring_tests(struct amdgpu_device *adev)
 		if (!ring || !ring->ready)
 			continue;

-		r = amdgpu_ring_test_ib(ring);
+		r = amdgpu_ring_test_ib(ring, AMDGPU_IB_TEST_TIMEOUT);
 		if (r) {
 			ring->ready = false;

--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.c
@ -383,6 +383,18 @@ int amdgpu_irq_update(struct amdgpu_device *adev,
 	return r;
 }

+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev)
+{
+	int i, j;
+	for (i = 0; i < AMDGPU_MAX_IRQ_SRC_ID; i++) {
+		struct amdgpu_irq_src *src = adev->irq.sources[i];
+		if (!src)
+			continue;
+		for (j = 0; j < src->num_types; j++)
+			amdgpu_irq_update(adev, src, j);
+	}
+}
+
 /**
 * amdgpu_irq_get - enable interrupt
 *
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_irq.h
@ -94,6 +94,7 @@ int amdgpu_irq_put(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 		   unsigned type);
 bool amdgpu_irq_enabled(struct amdgpu_device *adev, struct amdgpu_irq_src *src,
 			unsigned type);
+void amdgpu_irq_gpu_reset_resume_helper(struct amdgpu_device *adev);

 int amdgpu_irq_add_domain(struct amdgpu_device *adev);
 void amdgpu_irq_remove_domain(struct amdgpu_device *adev);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_job.c
@ -28,21 +28,15 @@
 #include "amdgpu.h"
 #include "amdgpu_trace.h"

-static void amdgpu_job_free_handler(struct work_struct *ws)
+static void amdgpu_job_timedout(struct amd_sched_job *s_job)
 {
-	struct amdgpu_job *job = container_of(ws, struct amdgpu_job, base.work_free_job);
-	amd_sched_job_put(&job->base);
-}
+	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);

-void amdgpu_job_timeout_func(struct work_struct *work)
-{
-	struct amdgpu_job *job = container_of(work, struct amdgpu_job, base.work_tdr.work);
 	DRM_ERROR("ring %s timeout, last signaled seq=%u, last emitted seq=%u\n",
-				job->base.sched->name,
-				(uint32_t)atomic_read(&job->ring->fence_drv.last_seq),
-				job->ring->fence_drv.sync_seq);
-
-	amd_sched_job_put(&job->base);
+		  job->base.sched->name,
+		  atomic_read(&job->ring->fence_drv.last_seq),
+		  job->ring->fence_drv.sync_seq);
+	amdgpu_gpu_reset(job->adev);
 }

 int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
@ -63,7 +57,6 @@ int amdgpu_job_alloc(struct amdgpu_device *adev, unsigned num_ibs,
 	(*job)->vm = vm;
 	(*job)->ibs = (void *)&(*job)[1];
 	(*job)->num_ibs = num_ibs;
-	INIT_WORK(&(*job)->base.work_free_job, amdgpu_job_free_handler);

 	amdgpu_sync_create(&(*job)->sync);

@ -86,27 +79,33 @@ int amdgpu_job_alloc_with_ib(struct amdgpu_device *adev, unsigned size,
 	return r;
 }

-void amdgpu_job_free(struct amdgpu_job *job)
+void amdgpu_job_free_resources(struct amdgpu_job *job)
 {
-	unsigned i;
 	struct fence *f;
+	unsigned i;
+
 	/* use sched fence if available */
-	f = (job->base.s_fence)? &job->base.s_fence->base : job->fence;
+	f = job->base.s_fence ? &job->base.s_fence->finished : job->fence;

 	for (i = 0; i < job->num_ibs; ++i)
-		amdgpu_sa_bo_free(job->adev, &job->ibs[i].sa_bo, f);
-	fence_put(job->fence);
-
-	amdgpu_bo_unref(&job->uf_bo);
-	amdgpu_sync_free(&job->sync);
-
-	if (!job->base.use_sched)
-		kfree(job);
+		amdgpu_ib_free(job->adev, &job->ibs[i], f);
 }

-void amdgpu_job_free_func(struct kref *refcount)
+void amdgpu_job_free_cb(struct amd_sched_job *s_job)
 {
-	struct amdgpu_job *job = container_of(refcount, struct amdgpu_job, base.refcount);
+	struct amdgpu_job *job = container_of(s_job, struct amdgpu_job, base);
+
+	fence_put(job->fence);
+	amdgpu_sync_free(&job->sync);
+	kfree(job);
+}
+
+void amdgpu_job_free(struct amdgpu_job *job)
+{
+	amdgpu_job_free_resources(job);
+
+	fence_put(job->fence);
+	amdgpu_sync_free(&job->sync);
 	kfree(job);
 }

@ -114,22 +113,20 @@ int amdgpu_job_submit(struct amdgpu_job *job, struct amdgpu_ring *ring,
 		      struct amd_sched_entity *entity, void *owner,
 		      struct fence **f)
 {
-	struct fence *fence;
 	int r;
 	job->ring = ring;

 	if (!f)
 		return -EINVAL;

-	r = amd_sched_job_init(&job->base, &ring->sched,
-			       entity, amdgpu_job_timeout_func,
-			       amdgpu_job_free_func, owner, &fence);
+	r = amd_sched_job_init(&job->base, &ring->sched, entity, owner);
 	if (r)
 		return r;

 	job->owner = owner;
 	job->ctx = entity->fence_context;
-	*f = fence_get(fence);
+	*f = fence_get(&job->base.s_fence->finished);
+	amdgpu_job_free_resources(job);
 	amd_sched_entity_push_job(&job->base);

 	return 0;
@ -147,8 +144,8 @@ static struct fence *amdgpu_job_dependency(struct amd_sched_job *sched_job)
 		int r;

 		r = amdgpu_vm_grab_id(vm, ring, &job->sync,
-				      &job->base.s_fence->base,
-				      &job->vm_id, &job->vm_pd_addr);
+				      &job->base.s_fence->finished,
+				      job);
 		if (r)
 			DRM_ERROR("Error getting VM ID (%d)\n", r);

@ -170,29 +167,24 @@ static struct fence *amdgpu_job_run(struct amd_sched_job *sched_job)
 	}
 	job = to_amdgpu_job(sched_job);

-	r = amdgpu_sync_wait(&job->sync);
-	if (r) {
-		DRM_ERROR("failed to sync wait (%d)\n", r);
-		return NULL;
-	}
+	BUG_ON(amdgpu_sync_peek_fence(&job->sync, NULL));

 	trace_amdgpu_sched_run_job(job);
 	r = amdgpu_ib_schedule(job->ring, job->num_ibs, job->ibs,
 			       job->sync.last_vm_update, job, &fence);
-	if (r) {
+	if (r)
 		DRM_ERROR("Error scheduling IBs (%d)\n", r);
-		goto err;
-	}

-err:
-	job->fence = fence;
-	amdgpu_job_free(job);
+	/* if gpu reset, hw fence will be replaced here */
+	fence_put(job->fence);
+	job->fence = fence_get(fence);
+	amdgpu_job_free_resources(job);
 	return fence;
 }

 const struct amd_sched_backend_ops amdgpu_sched_ops = {
 	.dependency = amdgpu_job_dependency,
 	.run_job = amdgpu_job_run,
-	.begin_job = amd_sched_job_begin,
-	.finish_job = amd_sched_job_finish,
+	.timedout_job = amdgpu_job_timedout,
+	.free_job = amdgpu_job_free_cb
 };
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
@ -60,7 +60,10 @@ int amdgpu_driver_unload_kms(struct drm_device *dev)
 	if (adev->rmmio == NULL)
 		goto done_free;

-	pm_runtime_get_sync(dev->dev);
+	if (amdgpu_device_is_px(dev)) {
+		pm_runtime_get_sync(dev->dev);
+		pm_runtime_forbid(dev->dev);
+	}

 	amdgpu_amdkfd_device_fini(adev);

@ -135,13 +138,75 @@ int amdgpu_driver_load_kms(struct drm_device *dev, unsigned long flags)
 	}

 out:
-	if (r)
+	if (r) {
+		/* balance pm_runtime_get_sync in amdgpu_driver_unload_kms */
+		if (adev->rmmio && amdgpu_device_is_px(dev))
+			pm_runtime_put_noidle(dev->dev);
 		amdgpu_driver_unload_kms(dev);
-
+	}

 	return r;
 }

+static int amdgpu_firmware_info(struct drm_amdgpu_info_firmware *fw_info,
+				struct drm_amdgpu_query_fw *query_fw,
+				struct amdgpu_device *adev)
+{
+	switch (query_fw->fw_type) {
+	case AMDGPU_INFO_FW_VCE:
+		fw_info->ver = adev->vce.fw_version;
+		fw_info->feature = adev->vce.fb_version;
+		break;
+	case AMDGPU_INFO_FW_UVD:
+		fw_info->ver = adev->uvd.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GMC:
+		fw_info->ver = adev->mc.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_GFX_ME:
+		fw_info->ver = adev->gfx.me_fw_version;
+		fw_info->feature = adev->gfx.me_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_PFP:
+		fw_info->ver = adev->gfx.pfp_fw_version;
+		fw_info->feature = adev->gfx.pfp_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_CE:
+		fw_info->ver = adev->gfx.ce_fw_version;
+		fw_info->feature = adev->gfx.ce_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_RLC:
+		fw_info->ver = adev->gfx.rlc_fw_version;
+		fw_info->feature = adev->gfx.rlc_feature_version;
+		break;
+	case AMDGPU_INFO_FW_GFX_MEC:
+		if (query_fw->index == 0) {
+			fw_info->ver = adev->gfx.mec_fw_version;
+			fw_info->feature = adev->gfx.mec_feature_version;
+		} else if (query_fw->index == 1) {
+			fw_info->ver = adev->gfx.mec2_fw_version;
+			fw_info->feature = adev->gfx.mec2_feature_version;
+		} else
+			return -EINVAL;
+		break;
+	case AMDGPU_INFO_FW_SMC:
+		fw_info->ver = adev->pm.fw_version;
+		fw_info->feature = 0;
+		break;
+	case AMDGPU_INFO_FW_SDMA:
+		if (query_fw->index >= adev->sdma.num_instances)
+			return -EINVAL;
+		fw_info->ver = adev->sdma.instance[query_fw->index].fw_version;
+		fw_info->feature = adev->sdma.instance[query_fw->index].feature_version;
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
 /*
 * Userspace get information ioctl
 */
@ -288,67 +353,20 @@ static int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		return copy_to_user(out, &count, min(size, 4u)) ? -EFAULT : 0;
 	}
 	case AMDGPU_INFO_TIMESTAMP:
-		ui64 = amdgpu_asic_get_gpu_clock_counter(adev);
+		ui64 = amdgpu_gfx_get_gpu_clock_counter(adev);
 		return copy_to_user(out, &ui64, min(size, 8u)) ? -EFAULT : 0;
 	case AMDGPU_INFO_FW_VERSION: {
 		struct drm_amdgpu_info_firmware fw_info;
+		int ret;

 		/* We only support one instance of each IP block right now. */
 		if (info->query_fw.ip_instance != 0)
 			return -EINVAL;

-		switch (info->query_fw.fw_type) {
-		case AMDGPU_INFO_FW_VCE:
-			fw_info.ver = adev->vce.fw_version;
-			fw_info.feature = adev->vce.fb_version;
-			break;
-		case AMDGPU_INFO_FW_UVD:
-			fw_info.ver = adev->uvd.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_GMC:
-			fw_info.ver = adev->mc.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_GFX_ME:
-			fw_info.ver = adev->gfx.me_fw_version;
-			fw_info.feature = adev->gfx.me_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_PFP:
-			fw_info.ver = adev->gfx.pfp_fw_version;
-			fw_info.feature = adev->gfx.pfp_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_CE:
-			fw_info.ver = adev->gfx.ce_fw_version;
-			fw_info.feature = adev->gfx.ce_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_RLC:
-			fw_info.ver = adev->gfx.rlc_fw_version;
-			fw_info.feature = adev->gfx.rlc_feature_version;
-			break;
-		case AMDGPU_INFO_FW_GFX_MEC:
-			if (info->query_fw.index == 0) {
-				fw_info.ver = adev->gfx.mec_fw_version;
-				fw_info.feature = adev->gfx.mec_feature_version;
-			} else if (info->query_fw.index == 1) {
-				fw_info.ver = adev->gfx.mec2_fw_version;
-				fw_info.feature = adev->gfx.mec2_feature_version;
-			} else
-				return -EINVAL;
-			break;
-		case AMDGPU_INFO_FW_SMC:
-			fw_info.ver = adev->pm.fw_version;
-			fw_info.feature = 0;
-			break;
-		case AMDGPU_INFO_FW_SDMA:
-			if (info->query_fw.index >= adev->sdma.num_instances)
-				return -EINVAL;
-			fw_info.ver = adev->sdma.instance[info->query_fw.index].fw_version;
-			fw_info.feature = adev->sdma.instance[info->query_fw.index].feature_version;
-			break;
-		default:
-			return -EINVAL;
-		}
+		ret = amdgpu_firmware_info(&fw_info, &info->query_fw, adev);
+		if (ret)
+			return ret;
+
 		return copy_to_user(out, &fw_info,
 				    min((size_t)size, sizeof(fw_info))) ? -EFAULT : 0;
 	}
@ -566,6 +584,9 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,

 	amdgpu_ctx_mgr_fini(&fpriv->ctx_mgr);

+	amdgpu_uvd_free_handles(adev, file_priv);
+	amdgpu_vce_free_handles(adev, file_priv);
+
 	amdgpu_vm_fini(adev, &fpriv->vm);

 	idr_for_each_entry(&fpriv->bo_list_handles, list, handle)
@ -590,10 +611,6 @@ void amdgpu_driver_postclose_kms(struct drm_device *dev,
 void amdgpu_driver_preclose_kms(struct drm_device *dev,
 				struct drm_file *file_priv)
 {
-	struct amdgpu_device *adev = dev->dev_private;
-
-	amdgpu_uvd_free_handles(adev, file_priv);
-	amdgpu_vce_free_handles(adev, file_priv);
 }

 /*
@ -756,3 +773,130 @@ const struct drm_ioctl_desc amdgpu_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(AMDGPU_GEM_USERPTR, amdgpu_gem_userptr_ioctl, DRM_AUTH|DRM_RENDER_ALLOW),
 };
 const int amdgpu_max_kms_ioctl = ARRAY_SIZE(amdgpu_ioctls_kms);
+
+/*
+ * Debugfs info
+ */
+#if defined(CONFIG_DEBUG_FS)
+
+static int amdgpu_debugfs_firmware_info(struct seq_file *m, void *data)
+{
+	struct drm_info_node *node = (struct drm_info_node *) m->private;
+	struct drm_device *dev = node->minor->dev;
+	struct amdgpu_device *adev = dev->dev_private;
+	struct drm_amdgpu_info_firmware fw_info;
+	struct drm_amdgpu_query_fw query_fw;
+	int ret, i;
+
+	/* VCE */
+	query_fw.fw_type = AMDGPU_INFO_FW_VCE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "VCE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* UVD */
+	query_fw.fw_type = AMDGPU_INFO_FW_UVD;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "UVD feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* GMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* ME */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_ME;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "ME feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* PFP */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_PFP;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "PFP feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* CE */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_CE;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "CE feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* RLC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_RLC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "RLC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC */
+	query_fw.fw_type = AMDGPU_INFO_FW_GFX_MEC;
+	query_fw.index = 0;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "MEC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* MEC2 */
+	if (adev->asic_type == CHIP_KAVERI ||
+	    (adev->asic_type > CHIP_TOPAZ && adev->asic_type != CHIP_STONEY)) {
+		query_fw.index = 1;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "MEC2 feature version: %u, firmware version: 0x%08x\n",
+			   fw_info.feature, fw_info.ver);
+	}
+
+	/* SMC */
+	query_fw.fw_type = AMDGPU_INFO_FW_SMC;
+	ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+	if (ret)
+		return ret;
+	seq_printf(m, "SMC feature version: %u, firmware version: 0x%08x\n",
+		   fw_info.feature, fw_info.ver);
+
+	/* SDMA */
+	query_fw.fw_type = AMDGPU_INFO_FW_SDMA;
+	for (i = 0; i < adev->sdma.num_instances; i++) {
+		query_fw.index = i;
+		ret = amdgpu_firmware_info(&fw_info, &query_fw, adev);
+		if (ret)
+			return ret;
+		seq_printf(m, "SDMA%d feature version: %u, firmware version: 0x%08x\n",
+			   i, fw_info.feature, fw_info.ver);
+	}
+
+	return 0;
+}
+
+static const struct drm_info_list amdgpu_firmware_info_list[] = {
+	{"amdgpu_firmware_info", amdgpu_debugfs_firmware_info, 0, NULL},
+};
+#endif
+
+int amdgpu_debugfs_firmware_init(struct amdgpu_device *adev)
+{
+#if defined(CONFIG_DEBUG_FS)
+	return amdgpu_debugfs_add_files(adev, amdgpu_firmware_info_list,
+					ARRAY_SIZE(amdgpu_firmware_info_list));
+#else
+	return 0;
+#endif
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
@ -589,6 +589,7 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,
 			   struct ttm_mem_reg *new_mem)
 {
 	struct amdgpu_bo *rbo;
+	struct ttm_mem_reg *old_mem = &bo->mem;

 	if (!amdgpu_ttm_bo_is_amdgpu_bo(bo))
 		return;
@ -602,6 +603,8 @@ void amdgpu_bo_move_notify(struct ttm_buffer_object *bo,

 	/* move_notify is called before move happens */
 	amdgpu_update_memory_usage(rbo->adev, &bo->mem, new_mem);
+
+	trace_amdgpu_ttm_bo_move(rbo, new_mem->mem_type, old_mem->mem_type);
 }

 int amdgpu_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_pm.c
@ -347,6 +347,8 @@ static ssize_t amdgpu_get_pp_dpm_sclk(struct device *dev,

 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_SCLK, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_SCLK, buf);

 	return size;
 }
@ -363,7 +365,9 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];

-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@ -377,6 +381,8 @@ static ssize_t amdgpu_set_pp_dpm_sclk(struct device *dev,

 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_SCLK, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_SCLK, mask);
 fail:
 	return count;
 }
@ -391,6 +397,8 @@ static ssize_t amdgpu_get_pp_dpm_mclk(struct device *dev,

 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_MCLK, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_MCLK, buf);

 	return size;
 }
@ -407,7 +415,9 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];

-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@ -421,6 +431,8 @@ static ssize_t amdgpu_set_pp_dpm_mclk(struct device *dev,

 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_MCLK, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_MCLK, mask);
 fail:
 	return count;
 }
@ -435,6 +447,8 @@ static ssize_t amdgpu_get_pp_dpm_pcie(struct device *dev,

 	if (adev->pp_enabled)
 		size = amdgpu_dpm_print_clock_levels(adev, PP_PCIE, buf);
+	else if (adev->pm.funcs->print_clock_levels)
+		size = adev->pm.funcs->print_clock_levels(adev, PP_PCIE, buf);

 	return size;
 }
@ -451,7 +465,9 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,
 	uint32_t i, mask = 0;
 	char sub_str[2];

-	for (i = 0; i < strlen(buf) - 1; i++) {
+	for (i = 0; i < strlen(buf); i++) {
+		if (*(buf + i) == '\n')
+			continue;
 		sub_str[0] = *(buf + i);
 		sub_str[1] = '\0';
 		ret = kstrtol(sub_str, 0, &level);
@ -465,6 +481,100 @@ static ssize_t amdgpu_set_pp_dpm_pcie(struct device *dev,

 	if (adev->pp_enabled)
 		amdgpu_dpm_force_clock_level(adev, PP_PCIE, mask);
+	else if (adev->pm.funcs->force_clock_level)
+		adev->pm.funcs->force_clock_level(adev, PP_PCIE, mask);
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_sclk_od(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t value = 0;
+
+	if (adev->pp_enabled)
+		value = amdgpu_dpm_get_sclk_od(adev);
+	else if (adev->pm.funcs->get_sclk_od)
+		value = adev->pm.funcs->get_sclk_od(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+static ssize_t amdgpu_set_pp_sclk_od(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long int value;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled) {
+		amdgpu_dpm_set_sclk_od(adev, (uint32_t)value);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
+	} else if (adev->pm.funcs->set_sclk_od) {
+		adev->pm.funcs->set_sclk_od(adev, (uint32_t)value);
+		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
+		amdgpu_pm_compute_clocks(adev);
+	}
+
+fail:
+	return count;
+}
+
+static ssize_t amdgpu_get_pp_mclk_od(struct device *dev,
+		struct device_attribute *attr,
+		char *buf)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	uint32_t value = 0;
+
+	if (adev->pp_enabled)
+		value = amdgpu_dpm_get_mclk_od(adev);
+	else if (adev->pm.funcs->get_mclk_od)
+		value = adev->pm.funcs->get_mclk_od(adev);
+
+	return snprintf(buf, PAGE_SIZE, "%d\n", value);
+}
+
+static ssize_t amdgpu_set_pp_mclk_od(struct device *dev,
+		struct device_attribute *attr,
+		const char *buf,
+		size_t count)
+{
+	struct drm_device *ddev = dev_get_drvdata(dev);
+	struct amdgpu_device *adev = ddev->dev_private;
+	int ret;
+	long int value;
+
+	ret = kstrtol(buf, 0, &value);
+
+	if (ret) {
+		count = -EINVAL;
+		goto fail;
+	}
+
+	if (adev->pp_enabled) {
+		amdgpu_dpm_set_mclk_od(adev, (uint32_t)value);
+		amdgpu_dpm_dispatch_task(adev, AMD_PP_EVENT_READJUST_POWER_STATE, NULL, NULL);
+	} else if (adev->pm.funcs->set_mclk_od) {
+		adev->pm.funcs->set_mclk_od(adev, (uint32_t)value);
+		adev->pm.dpm.current_ps = adev->pm.dpm.boot_ps;
+		amdgpu_pm_compute_clocks(adev);
+	}
+
 fail:
 	return count;
 }
@ -490,6 +600,12 @@ static DEVICE_ATTR(pp_dpm_mclk, S_IRUGO | S_IWUSR,
 static DEVICE_ATTR(pp_dpm_pcie, S_IRUGO | S_IWUSR,
 		amdgpu_get_pp_dpm_pcie,
 		amdgpu_set_pp_dpm_pcie);
+static DEVICE_ATTR(pp_sclk_od, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_sclk_od,
+		amdgpu_set_pp_sclk_od);
+static DEVICE_ATTR(pp_mclk_od, S_IRUGO | S_IWUSR,
+		amdgpu_get_pp_mclk_od,
+		amdgpu_set_pp_mclk_od);

 static ssize_t amdgpu_hwmon_show_temp(struct device *dev,
 				      struct device_attribute *attr,
@ -1108,22 +1224,34 @@ int amdgpu_pm_sysfs_init(struct amdgpu_device *adev)
 			DRM_ERROR("failed to create device file pp_table\n");
 			return ret;
 		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_sclk\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_mclk\n");
-			return ret;
-		}
-		ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
-		if (ret) {
-			DRM_ERROR("failed to create device file pp_dpm_pcie\n");
-			return ret;
-		}
 	}
+
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_sclk);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_sclk\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_mclk\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_dpm_pcie\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_sclk_od);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_sclk_od\n");
+		return ret;
+	}
+	ret = device_create_file(adev->dev, &dev_attr_pp_mclk_od);
+	if (ret) {
+		DRM_ERROR("failed to create device file pp_mclk_od\n");
+		return ret;
+	}
+
 	ret = amdgpu_debugfs_pm_init(adev);
 	if (ret) {
 		DRM_ERROR("Failed to register debugfs file for dpm!\n");
@ -1146,10 +1274,12 @@ void amdgpu_pm_sysfs_fini(struct amdgpu_device *adev)
 		device_remove_file(adev->dev, &dev_attr_pp_cur_state);
 		device_remove_file(adev->dev, &dev_attr_pp_force_state);
 		device_remove_file(adev->dev, &dev_attr_pp_table);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
-		device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
 	}
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_sclk);
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_mclk);
+	device_remove_file(adev->dev, &dev_attr_pp_dpm_pcie);
+	device_remove_file(adev->dev, &dev_attr_pp_sclk_od);
+	device_remove_file(adev->dev, &dev_attr_pp_mclk_od);
 }

 void amdgpu_pm_compute_clocks(struct amdgpu_device *adev)
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_powerplay.c
@ -52,6 +52,7 @@ static int amdgpu_powerplay_init(struct amdgpu_device *adev)
 		pp_init->chip_family = adev->family;
 		pp_init->chip_id = adev->asic_type;
 		pp_init->device = amdgpu_cgs_create_device(adev);
+		pp_init->powercontainment_enabled = amdgpu_powercontainment;

 		ret = amd_powerplay_init(pp_init, amd_pp);
 		kfree(pp_init);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ring.c
@ -28,6 +28,7 @@
 */
 #include <linux/seq_file.h>
 #include <linux/slab.h>
+#include <linux/debugfs.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@ -48,6 +49,7 @@
 */
 static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring);
+static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring);

 /**
 * amdgpu_ring_alloc - allocate space on the ring buffer
@ -73,6 +75,10 @@ int amdgpu_ring_alloc(struct amdgpu_ring *ring, unsigned ndw)

 	ring->count_dw = ndw;
 	ring->wptr_old = ring->wptr;
+
+	if (ring->funcs->begin_use)
+		ring->funcs->begin_use(ring);
+
 	return 0;
 }

@ -125,6 +131,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)

 	mb();
 	amdgpu_ring_set_wptr(ring);
+
+	if (ring->funcs->end_use)
+		ring->funcs->end_use(ring);
 }

 /**
@ -137,78 +146,9 @@ void amdgpu_ring_commit(struct amdgpu_ring *ring)
 void amdgpu_ring_undo(struct amdgpu_ring *ring)
 {
 	ring->wptr = ring->wptr_old;
-}

-/**
- * amdgpu_ring_backup - Back up the content of a ring
- *
- * @ring: the ring we want to back up
- *
- * Saves all unprocessed commits from a ring, returns the number of dwords saved.
- */
-unsigned amdgpu_ring_backup(struct amdgpu_ring *ring,
-			    uint32_t **data)
-{
-	unsigned size, ptr, i;
-
-	*data = NULL;
-
-	if (ring->ring_obj == NULL)
-		return 0;
-
-	/* it doesn't make sense to save anything if all fences are signaled */
-	if (!amdgpu_fence_count_emitted(ring))
-		return 0;
-
-	ptr = le32_to_cpu(*ring->next_rptr_cpu_addr);
-
-	size = ring->wptr + (ring->ring_size / 4);
-	size -= ptr;
-	size &= ring->ptr_mask;
-	if (size == 0)
-		return 0;
-
-	/* and then save the content of the ring */
-	*data = kmalloc_array(size, sizeof(uint32_t), GFP_KERNEL);
-	if (!*data)
-		return 0;
-	for (i = 0; i < size; ++i) {
-		(*data)[i] = ring->ring[ptr++];
-		ptr &= ring->ptr_mask;
-	}
-
-	return size;
-}
-
-/**
- * amdgpu_ring_restore - append saved commands to the ring again
- *
- * @ring: ring to append commands to
- * @size: number of dwords we want to write
- * @data: saved commands
- *
- * Allocates space on the ring and restore the previously saved commands.
- */
-int amdgpu_ring_restore(struct amdgpu_ring *ring,
-			unsigned size, uint32_t *data)
-{
-	int i, r;
-
-	if (!size || !data)
-		return 0;
-
-	/* restore the saved ring content */
-	r = amdgpu_ring_alloc(ring, size);
-	if (r)
-		return r;
-
-	for (i = 0; i < size; ++i) {
-		amdgpu_ring_write(ring, data[i]);
-	}
-
-	amdgpu_ring_commit(ring);
-	kfree(data);
-	return 0;
+	if (ring->funcs->end_use)
+		ring->funcs->end_use(ring);
 }

 /**
@ -260,14 +200,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		return r;
 	}

-	r = amdgpu_wb_get(adev, &ring->next_rptr_offs);
-	if (r) {
-		dev_err(adev->dev, "(%d) ring next_rptr wb alloc failed\n", r);
-		return r;
-	}
-	ring->next_rptr_gpu_addr = adev->wb.gpu_addr + ring->next_rptr_offs * 4;
-	ring->next_rptr_cpu_addr = &adev->wb.wb[ring->next_rptr_offs];
-
 	r = amdgpu_wb_get(adev, &ring->cond_exe_offs);
 	if (r) {
 		dev_err(adev->dev, "(%d) ring cond_exec_polling wb alloc failed\n", r);
@ -276,7 +208,6 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 	ring->cond_exe_gpu_addr = adev->wb.gpu_addr + (ring->cond_exe_offs * 4);
 	ring->cond_exe_cpu_addr = &adev->wb.wb[ring->cond_exe_offs];

-	spin_lock_init(&ring->fence_lock);
 	r = amdgpu_fence_driver_start_ring(ring, irq_src, irq_type);
 	if (r) {
 		dev_err(adev->dev, "failed initializing fences (%d).\n", r);
@ -310,6 +241,9 @@ int amdgpu_ring_init(struct amdgpu_device *adev, struct amdgpu_ring *ring,
 		}
 		r = amdgpu_bo_kmap(ring->ring_obj,
 				       (void **)&ring->ring);
+
+		memset((void *)ring->ring, 0, ring->ring_size);
+
 		amdgpu_bo_unreserve(ring->ring_obj);
 		if (r) {
 			dev_err(adev->dev, "(%d) ring map failed\n", r);
@ -347,7 +281,6 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 	amdgpu_wb_free(ring->adev, ring->fence_offs);
 	amdgpu_wb_free(ring->adev, ring->rptr_offs);
 	amdgpu_wb_free(ring->adev, ring->wptr_offs);
-	amdgpu_wb_free(ring->adev, ring->next_rptr_offs);

 	if (ring_obj) {
 		r = amdgpu_bo_reserve(ring_obj, false);
@ -358,6 +291,7 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 		}
 		amdgpu_bo_unref(&ring_obj);
 	}
+	amdgpu_debugfs_ring_fini(ring);
 }

 /*
@ -365,57 +299,62 @@ void amdgpu_ring_fini(struct amdgpu_ring *ring)
 */
 #if defined(CONFIG_DEBUG_FS)

-static int amdgpu_debugfs_ring_info(struct seq_file *m, void *data)
+/* Layout of file is 12 bytes consisting of
+ * - rptr
+ * - wptr
+ * - driver's copy of wptr
+ *
+ * followed by n-words of ring data
+ */
+static ssize_t amdgpu_debugfs_ring_read(struct file *f, char __user *buf,
+					size_t size, loff_t *pos)
 {
-	struct drm_info_node *node = (struct drm_info_node *) m->private;
-	struct drm_device *dev = node->minor->dev;
-	struct amdgpu_device *adev = dev->dev_private;
-	int roffset = (unsigned long)node->info_ent->data;
-	struct amdgpu_ring *ring = (void *)(((uint8_t*)adev) + roffset);
-	uint32_t rptr, wptr, rptr_next;
-	unsigned i;
+	struct amdgpu_ring *ring = (struct amdgpu_ring*)f->f_inode->i_private;
+	int r, i;
+	uint32_t value, result, early[3];

-	wptr = amdgpu_ring_get_wptr(ring);
-	seq_printf(m, "wptr: 0x%08x [%5d]\n", wptr, wptr);
+	if (*pos & 3 || size & 3)
+		return -EINVAL;

-	rptr = amdgpu_ring_get_rptr(ring);
-	rptr_next = le32_to_cpu(*ring->next_rptr_cpu_addr);
+	result = 0;

-	seq_printf(m, "rptr: 0x%08x [%5d]\n", rptr, rptr);
-
-	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
-		   ring->wptr, ring->wptr);
-
-	if (!ring->ready)
-		return 0;
-
-	/* print 8 dw before current rptr as often it's the last executed
-	 * packet that is the root issue
-	 */
-	i = (rptr + ring->ptr_mask + 1 - 32) & ring->ptr_mask;
-	while (i != rptr) {
-		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (i == rptr)
-			seq_puts(m, " *");
-		if (i == rptr_next)
-			seq_puts(m, " #");
-		seq_puts(m, "\n");
-		i = (i + 1) & ring->ptr_mask;
+	if (*pos < 12) {
+		early[0] = amdgpu_ring_get_rptr(ring);
+		early[1] = amdgpu_ring_get_wptr(ring);
+		early[2] = ring->wptr;
+		for (i = *pos / 4; i < 3 && size; i++) {
+			r = put_user(early[i], (uint32_t *)buf);
+			if (r)
+				return r;
+			buf += 4;
+			result += 4;
+			size -= 4;
+			*pos += 4;
+		}
 	}
-	while (i != wptr) {
-		seq_printf(m, "r[%5d]=0x%08x", i, ring->ring[i]);
-		if (i == rptr)
-			seq_puts(m, " *");
-		if (i == rptr_next)
-			seq_puts(m, " #");
-		seq_puts(m, "\n");
-		i = (i + 1) & ring->ptr_mask;
+
+	while (size) {
+		if (*pos >= (ring->ring_size + 12))
+			return result;
+			
+		value = ring->ring[(*pos - 12)/4];
+		r = put_user(value, (uint32_t*)buf);
+		if (r)
+			return r;
+		buf += 4;
+		result += 4;
+		size -= 4;
+		*pos += 4;
 	}
-	return 0;
+
+	return result;
 }

-static struct drm_info_list amdgpu_debugfs_ring_info_list[AMDGPU_MAX_RINGS];
-static char amdgpu_debugfs_ring_names[AMDGPU_MAX_RINGS][32];
+static const struct file_operations amdgpu_debugfs_ring_fops = {
+	.owner = THIS_MODULE,
+	.read = amdgpu_debugfs_ring_read,
+	.llseek = default_llseek
+};

 #endif

@ -423,28 +362,27 @@ static int amdgpu_debugfs_ring_init(struct amdgpu_device *adev,
 				    struct amdgpu_ring *ring)
 {
 #if defined(CONFIG_DEBUG_FS)
-	unsigned offset = (uint8_t*)ring - (uint8_t*)adev;
-	unsigned i;
-	struct drm_info_list *info;
-	char *name;
+	struct drm_minor *minor = adev->ddev->primary;
+	struct dentry *ent, *root = minor->debugfs_root;
+	char name[32];

-	for (i = 0; i < ARRAY_SIZE(amdgpu_debugfs_ring_info_list); ++i) {
-		info = &amdgpu_debugfs_ring_info_list[i];
-		if (!info->data)
-			break;
-	}
-
-	if (i == ARRAY_SIZE(amdgpu_debugfs_ring_info_list))
-		return -ENOSPC;
-
-	name = &amdgpu_debugfs_ring_names[i][0];
 	sprintf(name, "amdgpu_ring_%s", ring->name);
-	info->name = name;
-	info->show = amdgpu_debugfs_ring_info;
-	info->driver_features = 0;
-	info->data = (void*)(uintptr_t)offset;

-	return amdgpu_debugfs_add_files(adev, info, 1);
+	ent = debugfs_create_file(name,
+				  S_IFREG | S_IRUGO, root,
+				  ring, &amdgpu_debugfs_ring_fops);
+	if (IS_ERR(ent))
+		return PTR_ERR(ent);
+
+	i_size_write(ent->d_inode, ring->ring_size + 12);
+	ring->ent = ent;
 #endif
 	return 0;
 }
+
+static void amdgpu_debugfs_ring_fini(struct amdgpu_ring *ring)
+{
+#if defined(CONFIG_DEBUG_FS)
+	debugfs_remove(ring->ent);
+#endif
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sa.c
@ -428,7 +428,7 @@ void amdgpu_sa_bo_dump_debug_info(struct amdgpu_sa_manager *sa_manager,
 			   soffset, eoffset, eoffset - soffset);

 		if (i->fence)
-			seq_printf(m, " protected by 0x%08x on context %d",
+			seq_printf(m, " protected by 0x%08x on context %llu",
 				   i->fence->seqno, i->fence->context);

 		seq_printf(m, "\n");
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_sync.c
@ -223,13 +223,16 @@ int amdgpu_sync_resv(struct amdgpu_device *adev,
 }

 /**
- * amdgpu_sync_is_idle - test if all fences are signaled
+ * amdgpu_sync_peek_fence - get the next fence not signaled yet
 *
 * @sync: the sync object
+ * @ring: optional ring to use for test
 *
- * Returns true if all fences in the sync object are signaled.
+ * Returns the next fence not signaled yet without removing it from the sync
+ * object.
 */
-bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
+struct fence *amdgpu_sync_peek_fence(struct amdgpu_sync *sync,
+				     struct amdgpu_ring *ring)
 {
 	struct amdgpu_sync_entry *e;
 	struct hlist_node *tmp;
@ -237,6 +240,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)

 	hash_for_each_safe(sync->fences, i, tmp, e, node) {
 		struct fence *f = e->fence;
+		struct amd_sched_fence *s_fence = to_amd_sched_fence(f);
+
+		if (ring && s_fence) {
+			/* For fences from the same ring it is sufficient
+			 * when they are scheduled.
+			 */
+			if (s_fence->sched == &ring->sched) {
+				if (fence_is_signaled(&s_fence->scheduled))
+					continue;
+
+				return &s_fence->scheduled;
+			}
+		}

 		if (fence_is_signaled(f)) {
 			hash_del(&e->node);
@ -245,58 +261,19 @@ bool amdgpu_sync_is_idle(struct amdgpu_sync *sync)
 			continue;
 		}

-		return false;
+		return f;
 	}

-	return true;
+	return NULL;
 }

 /**
- * amdgpu_sync_cycle_fences - move fences from one sync object into another
+ * amdgpu_sync_get_fence - get the next fence from the sync object
 *
- * @dst: the destination sync object
- * @src: the source sync object
- * @fence: fence to add to source
+ * @sync: sync object to use
 *
- * Remove all fences from source and put them into destination and add
- * fence as new one into source.
+ * Get and removes the next fence from the sync object not signaled yet.
 */
-int amdgpu_sync_cycle_fences(struct amdgpu_sync *dst, struct amdgpu_sync *src,
-			     struct fence *fence)
-{
-	struct amdgpu_sync_entry *e, *newone;
-	struct hlist_node *tmp;
-	int i;
-
-	/* Allocate the new entry before moving the old ones */
-	newone = kmem_cache_alloc(amdgpu_sync_slab, GFP_KERNEL);
-	if (!newone)
-		return -ENOMEM;
-
-	hash_for_each_safe(src->fences, i, tmp, e, node) {
-		struct fence *f = e->fence;
-
-		hash_del(&e->node);
-		if (fence_is_signaled(f)) {
-			fence_put(f);
-			kmem_cache_free(amdgpu_sync_slab, e);
-			continue;
-		}
-
-		if (amdgpu_sync_add_later(dst, f)) {
-			kmem_cache_free(amdgpu_sync_slab, e);
-			continue;
-		}
-
-		hash_add(dst->fences, &e->node, f->context);
-	}
-
-	hash_add(src->fences, &newone->node, fence->context);
-	newone->fence = fence_get(fence);
-
-	return 0;
-}
-
 struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 {
 	struct amdgpu_sync_entry *e;
@ -319,25 +296,6 @@ struct fence *amdgpu_sync_get_fence(struct amdgpu_sync *sync)
 	return NULL;
 }

-int amdgpu_sync_wait(struct amdgpu_sync *sync)
-{
-	struct amdgpu_sync_entry *e;
-	struct hlist_node *tmp;
-	int i, r;
-
-	hash_for_each_safe(sync->fences, i, tmp, e, node) {
-		r = fence_wait(e->fence, false);
-		if (r)
-			return r;
-
-		hash_del(&e->node);
-		fence_put(e->fence);
-		kmem_cache_free(amdgpu_sync_slab, e);
-	}
-
-	return 0;
-}
-
 /**
 * amdgpu_sync_free - free the sync object
 *
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_trace.h
@ -11,19 +11,68 @@
 #define TRACE_SYSTEM amdgpu
 #define TRACE_INCLUDE_FILE amdgpu_trace

+TRACE_EVENT(amdgpu_mm_rreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
+TRACE_EVENT(amdgpu_mm_wreg,
+	    TP_PROTO(unsigned did, uint32_t reg, uint32_t value),
+	    TP_ARGS(did, reg, value),
+	    TP_STRUCT__entry(
+				__field(unsigned, did)
+				__field(uint32_t, reg)
+				__field(uint32_t, value)
+			    ),
+	    TP_fast_assign(
+			   __entry->did = did;
+			   __entry->reg = reg;
+			   __entry->value = value;
+			   ),
+	    TP_printk("0x%04lx, 0x%04lx, 0x%08lx",
+		      (unsigned long)__entry->did,
+		      (unsigned long)__entry->reg,
+		      (unsigned long)__entry->value)
+);
+
 TRACE_EVENT(amdgpu_bo_create,
 	    TP_PROTO(struct amdgpu_bo *bo),
 	    TP_ARGS(bo),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_bo *, bo)
 			     __field(u32, pages)
+			     __field(u32, type)
+			     __field(u32, prefer)
+			     __field(u32, allow)
+			     __field(u32, visible)
 			     ),

 	    TP_fast_assign(
 			   __entry->bo = bo;
 			   __entry->pages = bo->tbo.num_pages;
+			   __entry->type = bo->tbo.mem.mem_type;
+			   __entry->prefer = bo->prefered_domains;
+			   __entry->allow = bo->allowed_domains;
+			   __entry->visible = bo->flags;
 			   ),
-	    TP_printk("bo=%p, pages=%u", __entry->bo, __entry->pages)
+
+	    TP_printk("bo=%p,pages=%u,type=%d,prefered=%d,allowed=%d,visible=%d",
+		       __entry->bo, __entry->pages, __entry->type,
+		       __entry->prefer, __entry->allow, __entry->visible)
 );

 TRACE_EVENT(amdgpu_cs,
@ -64,7 +113,7 @@ TRACE_EVENT(amdgpu_cs_ioctl,
 			   __entry->adev = job->adev;
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
-			   __entry->fence = &job->base.s_fence->base;
+			   __entry->fence = &job->base.s_fence->finished;
 			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
@ -89,7 +138,7 @@ TRACE_EVENT(amdgpu_sched_run_job,
 			   __entry->adev = job->adev;
 			   __entry->sched_job = &job->base;
 			   __entry->ib = job->ibs;
-			   __entry->fence = &job->base.s_fence->base;
+			   __entry->fence = &job->base.s_fence->finished;
 			   __entry->ring_name = job->ring->name;
 			   __entry->num_ibs = job->num_ibs;
 			   ),
@ -100,24 +149,26 @@ TRACE_EVENT(amdgpu_sched_run_job,


 TRACE_EVENT(amdgpu_vm_grab_id,
-	    TP_PROTO(struct amdgpu_vm *vm, int ring, unsigned vmid,
-		     uint64_t pd_addr),
-	    TP_ARGS(vm, ring, vmid, pd_addr),
+	    TP_PROTO(struct amdgpu_vm *vm, int ring, struct amdgpu_job *job),
+	    TP_ARGS(vm, ring, job),
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_vm *, vm)
 			     __field(u32, ring)
 			     __field(u32, vmid)
 			     __field(u64, pd_addr)
+			     __field(u32, needs_flush)
 			     ),

 	    TP_fast_assign(
 			   __entry->vm = vm;
 			   __entry->ring = ring;
-			   __entry->vmid = vmid;
-			   __entry->pd_addr = pd_addr;
+			   __entry->vmid = job->vm_id;
+			   __entry->pd_addr = job->vm_pd_addr;
+			   __entry->needs_flush = job->vm_needs_flush;
 			   ),
-	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx", __entry->vm,
-		      __entry->ring, __entry->vmid, __entry->pd_addr)
+	    TP_printk("vm=%p, ring=%u, id=%u, pd_addr=%010Lx needs_flush=%u",
+		      __entry->vm, __entry->ring, __entry->vmid,
+		      __entry->pd_addr, __entry->needs_flush)
 );

 TRACE_EVENT(amdgpu_vm_bo_map,
@ -244,13 +295,55 @@ TRACE_EVENT(amdgpu_bo_list_set,
 	    TP_STRUCT__entry(
 			     __field(struct amdgpu_bo_list *, list)
 			     __field(struct amdgpu_bo *, bo)
+			     __field(u64, bo_size)
 			     ),

 	    TP_fast_assign(
 			   __entry->list = list;
 			   __entry->bo = bo;
+			   __entry->bo_size = amdgpu_bo_size(bo);
 			   ),
-	    TP_printk("list=%p, bo=%p", __entry->list, __entry->bo)
+	    TP_printk("list=%p, bo=%p, bo_size = %Ld",
+		      __entry->list,
+		      __entry->bo,
+		      __entry->bo_size)
+);
+
+TRACE_EVENT(amdgpu_cs_bo_status,
+	    TP_PROTO(uint64_t total_bo, uint64_t total_size),
+	    TP_ARGS(total_bo, total_size),
+	    TP_STRUCT__entry(
+			__field(u64, total_bo)
+			__field(u64, total_size)
+			),
+
+	    TP_fast_assign(
+			__entry->total_bo = total_bo;
+			__entry->total_size = total_size;
+			),
+	    TP_printk("total bo size = %Ld, total bo count = %Ld",
+			__entry->total_bo, __entry->total_size)
+);
+
+TRACE_EVENT(amdgpu_ttm_bo_move,
+	    TP_PROTO(struct amdgpu_bo* bo, uint32_t new_placement, uint32_t old_placement),
+	    TP_ARGS(bo, new_placement, old_placement),
+	    TP_STRUCT__entry(
+			__field(struct amdgpu_bo *, bo)
+			__field(u64, bo_size)
+			__field(u32, new_placement)
+			__field(u32, old_placement)
+			),
+
+	    TP_fast_assign(
+			__entry->bo      = bo;
+			__entry->bo_size = amdgpu_bo_size(bo);
+			__entry->new_placement = new_placement;
+			__entry->old_placement = old_placement;
+			),
+	    TP_printk("bo=%p from:%d to %d with size = %Ld",
+			__entry->bo, __entry->old_placement,
+			__entry->new_placement, __entry->bo_size)
 );

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
@ -286,9 +286,10 @@ static int amdgpu_move_blit(struct ttm_buffer_object *bo,
 	r = amdgpu_copy_buffer(ring, old_start, new_start,
 			       new_mem->num_pages * PAGE_SIZE, /* bytes */
 			       bo->resv, &fence);
-	/* FIXME: handle copy error */
-	r = ttm_bo_move_accel_cleanup(bo, fence,
-				      evict, no_wait_gpu, new_mem);
+	if (r)
+		return r;
+
+	r = ttm_bo_pipeline_move(bo, fence, evict, new_mem);
 	fence_put(fence);
 	return r;
 }
@ -396,6 +397,11 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,
 		return -EINVAL;

 	adev = amdgpu_get_adev(bo->bdev);
+
+	/* remember the eviction */
+	if (evict)
+		atomic64_inc(&adev->num_evictions);
+
 	if (old_mem->mem_type == TTM_PL_SYSTEM && bo->ttm == NULL) {
 		amdgpu_move_null(bo, new_mem);
 		return 0;
@ -429,7 +435,8 @@ static int amdgpu_bo_move(struct ttm_buffer_object *bo,

 	if (r) {
 memcpy:
-		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		r = ttm_bo_move_memcpy(bo, evict, interruptible,
+				       no_wait_gpu, new_mem);
 		if (r) {
 			return r;
 		}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.c
@ -40,9 +40,16 @@
 #include "uvd/uvd_4_2_d.h"

 /* 1 second timeout */
-#define UVD_IDLE_TIMEOUT_MS	1000
+#define UVD_IDLE_TIMEOUT	msecs_to_jiffies(1000)
+
+/* Firmware versions for VI */
+#define FW_1_65_10	((1 << 24) | (65 << 16) | (10 << 8))
+#define FW_1_87_11	((1 << 24) | (87 << 16) | (11 << 8))
+#define FW_1_87_12	((1 << 24) | (87 << 16) | (12 << 8))
+#define FW_1_37_15	((1 << 24) | (37 << 16) | (15 << 8))
+
 /* Polaris10/11 firmware version */
-#define FW_1_66_16 ((1 << 24) | (66 << 16) | (16 << 8))
+#define FW_1_66_16	((1 << 24) | (66 << 16) | (16 << 8))

 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
@ -92,7 +99,6 @@ MODULE_FIRMWARE(FIRMWARE_STONEY);
 MODULE_FIRMWARE(FIRMWARE_POLARIS10);
 MODULE_FIRMWARE(FIRMWARE_POLARIS11);

-static void amdgpu_uvd_note_usage(struct amdgpu_device *adev);
 static void amdgpu_uvd_idle_work_handler(struct work_struct *work);

 int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
@ -246,6 +252,23 @@ int amdgpu_uvd_sw_init(struct amdgpu_device *adev)
 	if (!amdgpu_ip_block_version_cmp(adev, AMD_IP_BLOCK_TYPE_UVD, 5, 0))
 		adev->uvd.address_64_bit = true;

+	switch (adev->asic_type) {
+	case CHIP_TONGA:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_65_10;
+		break;
+	case CHIP_CARRIZO:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_11;
+		break;
+	case CHIP_FIJI:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_87_12;
+		break;
+	case CHIP_STONEY:
+		adev->uvd.use_ctx_buf = adev->uvd.fw_version >= FW_1_37_15;
+		break;
+	default:
+		adev->uvd.use_ctx_buf = adev->asic_type >= CHIP_POLARIS10;
+	}
+
 	return 0;
 }

@ -346,8 +369,6 @@ void amdgpu_uvd_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 		if (handle != 0 && adev->uvd.filp[i] == filp) {
 			struct fence *fence;

-			amdgpu_uvd_note_usage(adev);
-
 			r = amdgpu_uvd_get_destroy_msg(ring, handle,
 						       false, &fence);
 			if (r) {
@ -438,7 +459,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
 	unsigned fs_in_mb = width_in_mb * height_in_mb;

 	unsigned image_size, tmp, min_dpb_size, num_dpb_buffer;
-	unsigned min_ctx_size = 0;
+	unsigned min_ctx_size = ~0;

 	image_size = width * height;
 	image_size += image_size / 2;
@ -557,7 +578,7 @@ static int amdgpu_uvd_cs_msg_decode(struct amdgpu_device *adev, uint32_t *msg,
 		/* reference picture buffer */
 		min_dpb_size = image_size * num_dpb_buffer;

-		if (adev->asic_type < CHIP_POLARIS10){
+		if (!adev->uvd.use_ctx_buf){
 			/* macroblock context buffer */
 			min_dpb_size +=
 				width_in_mb * height_in_mb * num_dpb_buffer * 192;
@ -662,7 +683,7 @@ static int amdgpu_uvd_cs_msg(struct amdgpu_uvd_cs_ctx *ctx,
 		}

 		DRM_ERROR("No more free UVD handles!\n");
-		return -EINVAL;
+		return -ENOSPC;

 	case 1:
 		/* it's a decode msg, calc buffer sizes */
@ -913,8 +934,6 @@ int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx)
 		return -EINVAL;
 	}

-	amdgpu_uvd_note_usage(ctx.parser->adev);
-
 	return 0;
 }

@ -968,7 +987,7 @@ static int amdgpu_uvd_send_msg(struct amdgpu_ring *ring, struct amdgpu_bo *bo,

 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = f;
+		job->fence = fence_get(f);
 		if (r)
 			goto err_free;

@ -1106,24 +1125,18 @@ static void amdgpu_uvd_idle_work_handler(struct work_struct *work)
 	if (fences == 0 && handles == 0) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_uvd(adev, false);
-			/* just work around for uvd clock remain high even
-			 * when uvd dpm disabled on Polaris10 */
-			if (adev->asic_type == CHIP_POLARIS10)
-				amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		} else {
 			amdgpu_asic_set_uvd_clocks(adev, 0, 0);
 		}
 	} else {
-		schedule_delayed_work(&adev->uvd.idle_work,
-				      msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));
+		schedule_delayed_work(&adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
 	}
 }

-static void amdgpu_uvd_note_usage(struct amdgpu_device *adev)
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring)
 {
+	struct amdgpu_device *adev = ring->adev;
 	bool set_clocks = !cancel_delayed_work_sync(&adev->uvd.idle_work);
-	set_clocks &= schedule_delayed_work(&adev->uvd.idle_work,
-					    msecs_to_jiffies(UVD_IDLE_TIMEOUT_MS));

 	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
@ -1133,3 +1146,48 @@ static void amdgpu_uvd_note_usage(struct amdgpu_device *adev)
 		}
 	}
 }
+
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->uvd.idle_work, UVD_IDLE_TIMEOUT);
+}
+
+/**
+ * amdgpu_uvd_ring_test_ib - test ib execution
+ *
+ * @ring: amdgpu_ring pointer
+ *
+ * Test if we can successfully execute an IB
+ */
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout)
+{
+	struct fence *fence;
+	long r;
+
+	r = amdgpu_uvd_get_create_msg(ring, 1, NULL);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
+		goto error;
+	}
+
+	r = amdgpu_uvd_get_destroy_msg(ring, 1, true, &fence);
+	if (r) {
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
+		goto error;
+	}
+
+	r = fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
+	} else {
+		DRM_INFO("ib test on ring %d succeeded\n",  ring->idx);
+		r = 0;
+	}
+
+error:
+	fence_put(fence);
+	return r;
+}
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_uvd.h
@ -35,5 +35,8 @@ int amdgpu_uvd_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 void amdgpu_uvd_free_handles(struct amdgpu_device *adev,
 			     struct drm_file *filp);
 int amdgpu_uvd_ring_parse_cs(struct amdgpu_cs_parser *parser, uint32_t ib_idx);
+void amdgpu_uvd_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_uvd_ring_end_use(struct amdgpu_ring *ring);
+int amdgpu_uvd_ring_test_ib(struct amdgpu_ring *ring, long timeout);

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.c
@ -36,7 +36,7 @@
 #include "cikd.h"

 /* 1 second timeout */
-#define VCE_IDLE_TIMEOUT_MS	1000
+#define VCE_IDLE_TIMEOUT	msecs_to_jiffies(1000)

 /* Firmware Names */
 #ifdef CONFIG_DRM_AMDGPU_CIK
@ -85,8 +85,6 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 	unsigned ucode_version, version_major, version_minor, binary_id;
 	int i, r;

-	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
-
 	switch (adev->asic_type) {
 #ifdef CONFIG_DRM_AMDGPU_CIK
 	case CHIP_BONAIRE:
@ -197,6 +195,9 @@ int amdgpu_vce_sw_init(struct amdgpu_device *adev, unsigned long size)
 		adev->vce.filp[i] = NULL;
 	}

+	INIT_DELAYED_WORK(&adev->vce.idle_work, amdgpu_vce_idle_work_handler);
+	mutex_init(&adev->vce.idle_mutex);
+
 	return 0;
 }

@ -220,6 +221,7 @@ int amdgpu_vce_sw_fini(struct amdgpu_device *adev)
 	amdgpu_ring_fini(&adev->vce.ring[1]);

 	release_firmware(adev->vce.fw);
+	mutex_destroy(&adev->vce.idle_mutex);

 	return 0;
 }
@ -310,37 +312,44 @@ static void amdgpu_vce_idle_work_handler(struct work_struct *work)
 			amdgpu_asic_set_vce_clocks(adev, 0, 0);
 		}
 	} else {
-		schedule_delayed_work(&adev->vce.idle_work,
-				      msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
+		schedule_delayed_work(&adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 	}
 }

 /**
- * amdgpu_vce_note_usage - power up VCE
+ * amdgpu_vce_ring_begin_use - power up VCE
 *
- * @adev: amdgpu_device pointer
+ * @ring: amdgpu ring
 *
 * Make sure VCE is powerd up when we want to use it
 */
-static void amdgpu_vce_note_usage(struct amdgpu_device *adev)
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring)
 {
-	bool streams_changed = false;
-	bool set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
-	set_clocks &= schedule_delayed_work(&adev->vce.idle_work,
-					    msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
+	struct amdgpu_device *adev = ring->adev;
+	bool set_clocks;

-	if (adev->pm.dpm_enabled) {
-		/* XXX figure out if the streams changed */
-		streams_changed = false;
-	}
-
-	if (set_clocks || streams_changed) {
+	mutex_lock(&adev->vce.idle_mutex);
+	set_clocks = !cancel_delayed_work_sync(&adev->vce.idle_work);
+	if (set_clocks) {
 		if (adev->pm.dpm_enabled) {
 			amdgpu_dpm_enable_vce(adev, true);
 		} else {
 			amdgpu_asic_set_vce_clocks(adev, 53300, 40000);
 		}
 	}
+	mutex_unlock(&adev->vce.idle_mutex);
+}
+
+/**
+ * amdgpu_vce_ring_end_use - power VCE down
+ *
+ * @ring: amdgpu ring
+ *
+ * Schedule work to power VCE down again
+ */
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring)
+{
+	schedule_delayed_work(&ring->adev->vce.idle_work, VCE_IDLE_TIMEOUT);
 }

 /**
@ -357,11 +366,10 @@ void amdgpu_vce_free_handles(struct amdgpu_device *adev, struct drm_file *filp)
 	int i, r;
 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 		uint32_t handle = atomic_read(&adev->vce.handles[i]);
+
 		if (!handle || adev->vce.filp[i] != filp)
 			continue;

-		amdgpu_vce_note_usage(adev);
-
 		r = amdgpu_vce_get_destroy_msg(ring, handle, false, NULL);
 		if (r)
 			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
@ -437,7 +445,7 @@ int amdgpu_vce_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
 		ib->ptr[i] = 0x0;

 	r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-	job->fence = f;
+	job->fence = fence_get(f);
 	if (r)
 		goto err;

@ -469,7 +477,6 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	struct amdgpu_job *job;
 	struct amdgpu_ib *ib;
 	struct fence *f = NULL;
-	uint64_t dummy;
 	int i, r;

 	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4, &job);
@ -477,7 +484,6 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 		return r;

 	ib = &job->ibs[0];
-	dummy = ib->gpu_addr + 1024;

 	/* stitch together an VCE destroy msg */
 	ib->length_dw = 0;
@ -485,11 +491,14 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
 	ib->ptr[ib->length_dw++] = 0x00000001; /* session cmd */
 	ib->ptr[ib->length_dw++] = handle;

-	ib->ptr[ib->length_dw++] = 0x00000014; /* len */
-	ib->ptr[ib->length_dw++] = 0x05000005; /* feedback buffer */
-	ib->ptr[ib->length_dw++] = upper_32_bits(dummy);
-	ib->ptr[ib->length_dw++] = dummy;
-	ib->ptr[ib->length_dw++] = 0x00000001;
+	ib->ptr[ib->length_dw++] = 0x00000020; /* len */
+	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
+	ib->ptr[ib->length_dw++] = 0xffffffff; /* next task info, set to 0xffffffff if no */
+	ib->ptr[ib->length_dw++] = 0x00000001; /* destroy session */
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0x00000000;
+	ib->ptr[ib->length_dw++] = 0xffffffff; /* feedback is not needed, set to 0xffffffff and firmware will not output feedback */
+	ib->ptr[ib->length_dw++] = 0x00000000;

 	ib->ptr[ib->length_dw++] = 0x00000008; /* len */
 	ib->ptr[ib->length_dw++] = 0x02000001; /* destroy cmd */
@ -499,7 +508,7 @@ int amdgpu_vce_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,

 	if (direct) {
 		r = amdgpu_ib_schedule(ring, 1, ib, NULL, NULL, &f);
-		job->fence = f;
+		job->fence = fence_get(f);
 		if (r)
 			goto err;

@ -580,12 +589,10 @@ static int amdgpu_vce_cs_reloc(struct amdgpu_cs_parser *p, uint32_t ib_idx,
 * we we don't have another free session index.
 */
 static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
-				      uint32_t handle, bool *allocated)
+				      uint32_t handle, uint32_t *allocated)
 {
 	unsigned i;

-	*allocated = false;
-
 	/* validate the handle */
 	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i) {
 		if (atomic_read(&p->adev->vce.handles[i]) == handle) {
@ -602,7 +609,7 @@ static int amdgpu_vce_validate_handle(struct amdgpu_cs_parser *p,
 		if (!atomic_cmpxchg(&p->adev->vce.handles[i], 0, handle)) {
 			p->adev->vce.filp[i] = p->filp;
 			p->adev->vce.img_size[i] = 0;
-			*allocated = true;
+			*allocated |= 1 << i;
 			return i;
 		}
 	}
@ -622,15 +629,13 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 	struct amdgpu_ib *ib = &p->job->ibs[ib_idx];
 	unsigned fb_idx = 0, bs_idx = 0;
 	int session_idx = -1;
-	bool destroyed = false;
-	bool created = false;
-	bool allocated = false;
+	uint32_t destroyed = 0;
+	uint32_t created = 0;
+	uint32_t allocated = 0;
 	uint32_t tmp, handle = 0;
 	uint32_t *size = &tmp;
 	int i, r = 0, idx = 0;

-	amdgpu_vce_note_usage(p->adev);
-
 	while (idx < ib->length_dw) {
 		uint32_t len = amdgpu_get_ib_value(p, ib_idx, idx);
 		uint32_t cmd = amdgpu_get_ib_value(p, ib_idx, idx + 1);
@ -641,30 +646,30 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 			goto out;
 		}

-		if (destroyed) {
-			DRM_ERROR("No other command allowed after destroy!\n");
-			r = -EINVAL;
-			goto out;
-		}
-
 		switch (cmd) {
-		case 0x00000001: // session
+		case 0x00000001: /* session */
 			handle = amdgpu_get_ib_value(p, ib_idx, idx + 2);
 			session_idx = amdgpu_vce_validate_handle(p, handle,
 								 &allocated);
-			if (session_idx < 0)
-				return session_idx;
+			if (session_idx < 0) {
+				r = session_idx;
+				goto out;
+			}
 			size = &p->adev->vce.img_size[session_idx];
 			break;

-		case 0x00000002: // task info
+		case 0x00000002: /* task info */
 			fb_idx = amdgpu_get_ib_value(p, ib_idx, idx + 6);
 			bs_idx = amdgpu_get_ib_value(p, ib_idx, idx + 7);
 			break;

-		case 0x01000001: // create
-			created = true;
-			if (!allocated) {
+		case 0x01000001: /* create */
+			created |= 1 << session_idx;
+			if (destroyed & (1 << session_idx)) {
+				destroyed &= ~(1 << session_idx);
+				allocated |= 1 << session_idx;
+
+			} else if (!(allocated & (1 << session_idx))) {
 				DRM_ERROR("Handle already in use!\n");
 				r = -EINVAL;
 				goto out;
@ -675,16 +680,16 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				8 * 3 / 2;
 			break;

-		case 0x04000001: // config extension
-		case 0x04000002: // pic control
-		case 0x04000005: // rate control
-		case 0x04000007: // motion estimation
-		case 0x04000008: // rdo
-		case 0x04000009: // vui
-		case 0x05000002: // auxiliary buffer
+		case 0x04000001: /* config extension */
+		case 0x04000002: /* pic control */
+		case 0x04000005: /* rate control */
+		case 0x04000007: /* motion estimation */
+		case 0x04000008: /* rdo */
+		case 0x04000009: /* vui */
+		case 0x05000002: /* auxiliary buffer */
 			break;

-		case 0x03000001: // encode
+		case 0x03000001: /* encode */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 10, idx + 9,
 						*size, 0);
 			if (r)
@ -696,18 +701,18 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				goto out;
 			break;

-		case 0x02000001: // destroy
-			destroyed = true;
+		case 0x02000001: /* destroy */
+			destroyed |= 1 << session_idx;
 			break;

-		case 0x05000001: // context buffer
+		case 0x05000001: /* context buffer */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						*size * 2, 0);
 			if (r)
 				goto out;
 			break;

-		case 0x05000004: // video bitstream buffer
+		case 0x05000004: /* video bitstream buffer */
 			tmp = amdgpu_get_ib_value(p, ib_idx, idx + 4);
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						tmp, bs_idx);
@ -715,7 +720,7 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 				goto out;
 			break;

-		case 0x05000005: // feedback buffer
+		case 0x05000005: /* feedback buffer */
 			r = amdgpu_vce_cs_reloc(p, ib_idx, idx + 3, idx + 2,
 						4096, fb_idx);
 			if (r)
@ -737,21 +742,24 @@ int amdgpu_vce_ring_parse_cs(struct amdgpu_cs_parser *p, uint32_t ib_idx)
 		idx += len / 4;
 	}

-	if (allocated && !created) {
+	if (allocated & ~created) {
 		DRM_ERROR("New session without create command!\n");
 		r = -ENOENT;
 	}

 out:
-	if ((!r && destroyed) || (r && allocated)) {
-		/*
-		 * IB contains a destroy msg or we have allocated an
-		 * handle and got an error, anyway free the handle
-		 */
-		for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
-			atomic_cmpxchg(&p->adev->vce.handles[i], handle, 0);
+	if (!r) {
+		/* No error, free all destroyed handle slots */
+		tmp = destroyed;
+	} else {
+		/* Error during parsing, free all allocated handle slots */
+		tmp = allocated;
 	}

+	for (i = 0; i < AMDGPU_MAX_VCE_HANDLES; ++i)
+		if (tmp & (1 << i))
+			atomic_set(&p->adev->vce.handles[i], 0);
+
 	return r;
 }

@ -837,10 +845,10 @@ int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring)
 * @ring: the engine to test on
 *
 */
-int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct fence *fence = NULL;
-	int r;
+	long r;

 	/* skip vce ring1 ib test for now, since it's not reliable */
 	if (ring == &ring->adev->vce.ring[1])
@ -848,21 +856,25 @@ int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring)

 	r = amdgpu_vce_get_create_msg(ring, 1, NULL);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get create msg (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get create msg (%ld).\n", r);
 		goto error;
 	}

 	r = amdgpu_vce_get_destroy_msg(ring, 1, true, &fence);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get destroy ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get destroy ib (%ld).\n", r);
 		goto error;
 	}

-	r = fence_wait(fence, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(fence, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out.\n");
+		r = -ETIMEDOUT;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 	} else {
 		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	}
 error:
 	fence_put(fence);
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vce.h
@ -39,6 +39,8 @@ void amdgpu_vce_ring_emit_ib(struct amdgpu_ring *ring, struct amdgpu_ib *ib,
 void amdgpu_vce_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 seq,
 				unsigned flags);
 int amdgpu_vce_ring_test_ring(struct amdgpu_ring *ring);
-int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring);
+int amdgpu_vce_ring_test_ib(struct amdgpu_ring *ring, long timeout);
+void amdgpu_vce_ring_begin_use(struct amdgpu_ring *ring);
+void amdgpu_vce_ring_end_use(struct amdgpu_ring *ring);

 #endif
--- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
+++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
@ -25,6 +25,7 @@
 *          Alex Deucher
 *          Jerome Glisse
 */
+#include <linux/fence-array.h>
 #include <drm/drmP.h>
 #include <drm/amdgpu_drm.h>
 #include "amdgpu.h"
@ -114,16 +115,26 @@ void amdgpu_vm_get_pd_bo(struct amdgpu_vm *vm,
 /**
 * amdgpu_vm_get_bos - add the vm BOs to a duplicates list
 *
+ * @adev: amdgpu device pointer
 * @vm: vm providing the BOs
 * @duplicates: head of duplicates list
 *
 * Add the page directory to the BO duplicates list
 * for command submission.
 */
-void amdgpu_vm_get_pt_bos(struct amdgpu_vm *vm, struct list_head *duplicates)
+void amdgpu_vm_get_pt_bos(struct amdgpu_device *adev, struct amdgpu_vm *vm,
+			  struct list_head *duplicates)
 {
+	uint64_t num_evictions;
 	unsigned i;

+	/* We only need to validate the page tables
+	 * if they aren't already valid.
+	 */
+	num_evictions = atomic64_read(&adev->num_evictions);
+	if (num_evictions == vm->last_eviction_counter)
+		return;
+
 	/* add the vm page table to the list */
 	for (i = 0; i <= vm->max_pde_used; ++i) {
 		struct amdgpu_bo_list_entry *entry = &vm->page_tables[i].entry;
@ -162,6 +173,13 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 	spin_unlock(&glob->lru_lock);
 }

+static bool amdgpu_vm_is_gpu_reset(struct amdgpu_device *adev,
+			      struct amdgpu_vm_id *id)
+{
+	return id->current_gpu_reset_count !=
+		atomic_read(&adev->gpu_reset_counter) ? true : false;
+}
+
 /**
 * amdgpu_vm_grab_id - allocate the next free VMID
 *
@ -174,18 +192,67 @@ void amdgpu_vm_move_pt_bos_in_lru(struct amdgpu_device *adev,
 */
 int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		      struct amdgpu_sync *sync, struct fence *fence,
-		      unsigned *vm_id, uint64_t *vm_pd_addr)
+		      struct amdgpu_job *job)
 {
-	uint64_t pd_addr = amdgpu_bo_gpu_offset(vm->page_directory);
 	struct amdgpu_device *adev = ring->adev;
+	uint64_t fence_context = adev->fence_context + ring->idx;
 	struct fence *updates = sync->last_vm_update;
-	struct amdgpu_vm_id *id;
-	unsigned i = ring->idx;
-	int r;
+	struct amdgpu_vm_id *id, *idle;
+	struct fence **fences;
+	unsigned i;
+	int r = 0;
+
+	fences = kmalloc_array(sizeof(void *), adev->vm_manager.num_ids,
+			       GFP_KERNEL);
+	if (!fences)
+		return -ENOMEM;

 	mutex_lock(&adev->vm_manager.lock);

+	/* Check if we have an idle VMID */
+	i = 0;
+	list_for_each_entry(idle, &adev->vm_manager.ids_lru, list) {
+		fences[i] = amdgpu_sync_peek_fence(&idle->active, ring);
+		if (!fences[i])
+			break;
+		++i;
+	}
+
+	/* If we can't find a idle VMID to use, wait till one becomes available */
+	if (&idle->list == &adev->vm_manager.ids_lru) {
+		u64 fence_context = adev->vm_manager.fence_context + ring->idx;
+		unsigned seqno = ++adev->vm_manager.seqno[ring->idx];
+		struct fence_array *array;
+		unsigned j;
+
+		for (j = 0; j < i; ++j)
+			fence_get(fences[j]);
+
+		array = fence_array_create(i, fences, fence_context,
+					   seqno, true);
+		if (!array) {
+			for (j = 0; j < i; ++j)
+				fence_put(fences[j]);
+			kfree(fences);
+			r = -ENOMEM;
+			goto error;
+		}
+
+
+		r = amdgpu_sync_fence(ring->adev, sync, &array->base);
+		fence_put(&array->base);
+		if (r)
+			goto error;
+
+		mutex_unlock(&adev->vm_manager.lock);
+		return 0;
+
+	}
+	kfree(fences);
+
+	job->vm_needs_flush = true;
 	/* Check if we can use a VMID already assigned to this VM */
+	i = ring->idx;
 	do {
 		struct fence *flushed;

@ -196,67 +263,52 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 		/* Check all the prerequisites to using this VMID */
 		if (!id)
 			continue;
+		if (amdgpu_vm_is_gpu_reset(adev, id))
+			continue;

 		if (atomic64_read(&id->owner) != vm->client_id)
 			continue;

-		if (pd_addr != id->pd_gpu_addr)
+		if (job->vm_pd_addr != id->pd_gpu_addr)
 			continue;

-		if (id->last_user != ring &&
-		    (!id->last_flush || !fence_is_signaled(id->last_flush)))
+		if (!id->last_flush)
+			continue;
+
+		if (id->last_flush->context != fence_context &&
+		    !fence_is_signaled(id->last_flush))
 			continue;

 		flushed  = id->flushed_updates;
-		if (updates && (!flushed || fence_is_later(updates, flushed)))
+		if (updates &&
+		    (!flushed || fence_is_later(updates, flushed)))
 			continue;

-		/* Good we can use this VMID */
-		if (id->last_user == ring) {
-			r = amdgpu_sync_fence(ring->adev, sync,
-					      id->first);
-			if (r)
-				goto error;
-		}
-
-		/* And remember this submission as user of the VMID */
+		/* Good we can use this VMID. Remember this submission as
+		 * user of the VMID.
+		 */
 		r = amdgpu_sync_fence(ring->adev, &id->active, fence);
 		if (r)
 			goto error;

+		id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 		list_move_tail(&id->list, &adev->vm_manager.ids_lru);
 		vm->ids[ring->idx] = id;

-		*vm_id = id - adev->vm_manager.ids;
-		*vm_pd_addr = AMDGPU_VM_NO_FLUSH;
-		trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+		job->vm_id = id - adev->vm_manager.ids;
+		job->vm_needs_flush = false;
+		trace_amdgpu_vm_grab_id(vm, ring->idx, job);

 		mutex_unlock(&adev->vm_manager.lock);
 		return 0;

 	} while (i != ring->idx);

-	id = list_first_entry(&adev->vm_manager.ids_lru,
-			      struct amdgpu_vm_id,
-			      list);
+	/* Still no ID to use? Then use the idle one found earlier */
+	id = idle;

-	if (!amdgpu_sync_is_idle(&id->active)) {
-		struct list_head *head = &adev->vm_manager.ids_lru;
-		struct amdgpu_vm_id *tmp;
-
-		list_for_each_entry_safe(id, tmp, &adev->vm_manager.ids_lru,
-					 list) {
-			if (amdgpu_sync_is_idle(&id->active)) {
-				list_move(&id->list, head);
-				head = &id->list;
-			}
-		}
-		id = list_first_entry(&adev->vm_manager.ids_lru,
-				      struct amdgpu_vm_id,
-				      list);
-	}
-
-	r = amdgpu_sync_cycle_fences(sync, &id->active, fence);
+	/* Remember this submission as user of the VMID */
+	r = amdgpu_sync_fence(ring->adev, &id->active, fence);
 	if (r)
 		goto error;

@ -269,22 +321,46 @@ int amdgpu_vm_grab_id(struct amdgpu_vm *vm, struct amdgpu_ring *ring,
 	fence_put(id->flushed_updates);
 	id->flushed_updates = fence_get(updates);

-	id->pd_gpu_addr = pd_addr;
-
+	id->pd_gpu_addr = job->vm_pd_addr;
+	id->current_gpu_reset_count = atomic_read(&adev->gpu_reset_counter);
 	list_move_tail(&id->list, &adev->vm_manager.ids_lru);
-	id->last_user = ring;
 	atomic64_set(&id->owner, vm->client_id);
 	vm->ids[ring->idx] = id;

-	*vm_id = id - adev->vm_manager.ids;
-	*vm_pd_addr = pd_addr;
-	trace_amdgpu_vm_grab_id(vm, ring->idx, *vm_id, *vm_pd_addr);
+	job->vm_id = id - adev->vm_manager.ids;
+	trace_amdgpu_vm_grab_id(vm, ring->idx, job);

 error:
 	mutex_unlock(&adev->vm_manager.lock);
 	return r;
 }

+static bool amdgpu_vm_ring_has_compute_vm_bug(struct amdgpu_ring *ring)
+{
+	struct amdgpu_device *adev = ring->adev;
+	const struct amdgpu_ip_block_version *ip_block;
+
+	if (ring->type != AMDGPU_RING_TYPE_COMPUTE)
+		/* only compute rings */
+		return false;
+
+	ip_block = amdgpu_get_ip_block(adev, AMD_IP_BLOCK_TYPE_GFX);
+	if (!ip_block)
+		return false;
+
+	if (ip_block->major <= 7) {
+		/* gfx7 has no workaround */
+		return true;
+	} else if (ip_block->major == 8) {
+		if (adev->gfx.mec_fw_version >= 673)
+			/* gfx8 is fixed in MEC firmware 673 */
+			return false;
+		else
+			return true;
+	}
+	return false;
+}
+
 /**
 * amdgpu_vm_flush - hardware flush the vm
 *
@ -294,59 +370,52 @@ error:
 *
 * Emit a VM flush when it is necessary.
 */
-int amdgpu_vm_flush(struct amdgpu_ring *ring,
-		    unsigned vm_id, uint64_t pd_addr,
-		    uint32_t gds_base, uint32_t gds_size,
-		    uint32_t gws_base, uint32_t gws_size,
-		    uint32_t oa_base, uint32_t oa_size)
+int amdgpu_vm_flush(struct amdgpu_ring *ring, struct amdgpu_job *job)
 {
 	struct amdgpu_device *adev = ring->adev;
-	struct amdgpu_vm_id *id = &adev->vm_manager.ids[vm_id];
+	struct amdgpu_vm_id *id = &adev->vm_manager.ids[job->vm_id];
 	bool gds_switch_needed = ring->funcs->emit_gds_switch && (
-		id->gds_base != gds_base ||
-		id->gds_size != gds_size ||
-		id->gws_base != gws_base ||
-		id->gws_size != gws_size ||
-		id->oa_base != oa_base ||
-		id->oa_size != oa_size);
+		id->gds_base != job->gds_base ||
+		id->gds_size != job->gds_size ||
+		id->gws_base != job->gws_base ||
+		id->gws_size != job->gws_size ||
+		id->oa_base != job->oa_base ||
+		id->oa_size != job->oa_size);
 	int r;

 	if (ring->funcs->emit_pipeline_sync && (
-	    pd_addr != AMDGPU_VM_NO_FLUSH || gds_switch_needed ||
-		    ring->type == AMDGPU_RING_TYPE_COMPUTE))
+	    job->vm_needs_flush || gds_switch_needed ||
+	    amdgpu_vm_ring_has_compute_vm_bug(ring)))
 		amdgpu_ring_emit_pipeline_sync(ring);

-	if (ring->funcs->emit_vm_flush &&
-	    pd_addr != AMDGPU_VM_NO_FLUSH) {
+	if (ring->funcs->emit_vm_flush && (job->vm_needs_flush ||
+	    amdgpu_vm_is_gpu_reset(adev, id))) {
 		struct fence *fence;

-		trace_amdgpu_vm_flush(pd_addr, ring->idx, vm_id);
-		amdgpu_ring_emit_vm_flush(ring, vm_id, pd_addr);
+		trace_amdgpu_vm_flush(job->vm_pd_addr, ring->idx, job->vm_id);
+		amdgpu_ring_emit_vm_flush(ring, job->vm_id, job->vm_pd_addr);
+
+		r = amdgpu_fence_emit(ring, &fence);
+		if (r)
+			return r;

 		mutex_lock(&adev->vm_manager.lock);
-		if ((id->pd_gpu_addr == pd_addr) && (id->last_user == ring)) {
-			r = amdgpu_fence_emit(ring, &fence);
-			if (r) {
-				mutex_unlock(&adev->vm_manager.lock);
-				return r;
-			}
-			fence_put(id->last_flush);
-			id->last_flush = fence;
-		}
+		fence_put(id->last_flush);
+		id->last_flush = fence;
 		mutex_unlock(&adev->vm_manager.lock);
 	}

 	if (gds_switch_needed) {
-		id->gds_base = gds_base;
-		id->gds_size = gds_size;
-		id->gws_base = gws_base;
-		id->gws_size = gws_size;
-		id->oa_base = oa_base;
-		id->oa_size = oa_size;
-		amdgpu_ring_emit_gds_switch(ring, vm_id,
-					    gds_base, gds_size,
-					    gws_base, gws_size,
-					    oa_base, oa_size);
+		id->gds_base = job->gds_base;
+		id->gds_size = job->gds_size;
+		id->gws_base = job->gws_base;
+		id->gws_size = job->gws_size;
+		id->oa_base = job->oa_base;
+		id->oa_size = job->oa_size;
+		amdgpu_ring_emit_gds_switch(ring, job->vm_id,
+					    job->gds_base, job->gds_size,
+					    job->gws_base, job->gws_size,
+					    job->oa_base, job->oa_size);
 	}

 	return 0;
@ -723,7 +792,7 @@ static void amdgpu_vm_frag_ptes(struct amdgpu_device *adev,
 * @vm: requested vm
 * @start: start of GPU address range
 * @end: end of GPU address range
- * @dst: destination address to map to
+ * @dst: destination address to map to, the next dst inside the function
 * @flags: mapping flags
 *
 * Update the page tables in the range @start - @end.
@ -737,49 +806,75 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 {
 	const uint64_t mask = AMDGPU_VM_PTE_COUNT - 1;

-	uint64_t last_pe_start = ~0, last_pe_end = ~0, last_dst = ~0;
-	uint64_t addr;
+	uint64_t cur_pe_start, cur_pe_end, cur_dst;
+	uint64_t addr; /* next GPU address to be updated */
+	uint64_t pt_idx;
+	struct amdgpu_bo *pt;
+	unsigned nptes; /* next number of ptes to be updated */
+	uint64_t next_pe_start;
+
+	/* initialize the variables */
+	addr = start;
+	pt_idx = addr >> amdgpu_vm_block_size;
+	pt = vm->page_tables[pt_idx].entry.robj;
+
+	if ((addr & ~mask) == (end & ~mask))
+		nptes = end - addr;
+	else
+		nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);
+
+	cur_pe_start = amdgpu_bo_gpu_offset(pt);
+	cur_pe_start += (addr & mask) * 8;
+	cur_pe_end = cur_pe_start + 8 * nptes;
+	cur_dst = dst;
+
+	/* for next ptb*/
+	addr += nptes;
+	dst += nptes * AMDGPU_GPU_PAGE_SIZE;

 	/* walk over the address space and update the page tables */
-	for (addr = start; addr < end; ) {
-		uint64_t pt_idx = addr >> amdgpu_vm_block_size;
-		struct amdgpu_bo *pt = vm->page_tables[pt_idx].entry.robj;
-		unsigned nptes;
-		uint64_t pe_start;
+	while (addr < end) {
+		pt_idx = addr >> amdgpu_vm_block_size;
+		pt = vm->page_tables[pt_idx].entry.robj;

 		if ((addr & ~mask) == (end & ~mask))
 			nptes = end - addr;
 		else
 			nptes = AMDGPU_VM_PTE_COUNT - (addr & mask);

-		pe_start = amdgpu_bo_gpu_offset(pt);
-		pe_start += (addr & mask) * 8;
+		next_pe_start = amdgpu_bo_gpu_offset(pt);
+		next_pe_start += (addr & mask) * 8;

-		if (last_pe_end != pe_start) {
-
-			amdgpu_vm_frag_ptes(adev, vm_update_params,
-					    last_pe_start, last_pe_end,
-					    last_dst, flags);
-
-			last_pe_start = pe_start;
-			last_pe_end = pe_start + 8 * nptes;
-			last_dst = dst;
+		if (cur_pe_end == next_pe_start) {
+			/* The next ptb is consecutive to current ptb.
+			 * Don't call amdgpu_vm_frag_ptes now.
+			 * Will update two ptbs together in future.
+			*/
+			cur_pe_end += 8 * nptes;
 		} else {
-			last_pe_end += 8 * nptes;
+			amdgpu_vm_frag_ptes(adev, vm_update_params,
+					    cur_pe_start, cur_pe_end,
+					    cur_dst, flags);
+
+			cur_pe_start = next_pe_start;
+			cur_pe_end = next_pe_start + 8 * nptes;
+			cur_dst = dst;
 		}

+		/* for next ptb*/
 		addr += nptes;
 		dst += nptes * AMDGPU_GPU_PAGE_SIZE;
 	}

-	amdgpu_vm_frag_ptes(adev, vm_update_params, last_pe_start,
-			    last_pe_end, last_dst, flags);
+	amdgpu_vm_frag_ptes(adev, vm_update_params, cur_pe_start,
+			    cur_pe_end, cur_dst, flags);
 }

 /**
 * amdgpu_vm_bo_update_mapping - update a mapping in the vm page table
 *
 * @adev: amdgpu_device pointer
+ * @exclusive: fence we need to sync to
 * @src: address where to copy page table entries from
 * @pages_addr: DMA addresses to use for mapping
 * @vm: requested vm
@ -793,6 +888,7 @@ static void amdgpu_vm_update_ptes(struct amdgpu_device *adev,
 * Returns 0 for success, -EINVAL for failure.
 */
 static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,
+				       struct fence *exclusive,
 				       uint64_t src,
 				       dma_addr_t *pages_addr,
 				       struct amdgpu_vm *vm,
@ -853,6 +949,10 @@ static int amdgpu_vm_bo_update_mapping(struct amdgpu_device *adev,

 	vm_update_params.ib = &job->ibs[0];

+	r = amdgpu_sync_fence(adev, &job->sync, exclusive);
+	if (r)
+		goto error_free;
+
 	r = amdgpu_sync_resv(adev, &job->sync, vm->page_directory->tbo.resv,
 			     owner);
 	if (r)
@ -889,6 +989,7 @@ error_free:
 * amdgpu_vm_bo_split_mapping - split a mapping into smaller chunks
 *
 * @adev: amdgpu_device pointer
+ * @exclusive: fence we need to sync to
 * @gtt_flags: flags as they are used for GTT
 * @pages_addr: DMA addresses to use for mapping
 * @vm: requested vm
@ -902,6 +1003,7 @@ error_free:
 * Returns 0 for success, -EINVAL for failure.
 */
 static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
+				      struct fence *exclusive,
 				      uint32_t gtt_flags,
 				      dma_addr_t *pages_addr,
 				      struct amdgpu_vm *vm,
@ -932,7 +1034,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 	addr += mapping->offset;

 	if (!pages_addr || src)
-		return amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+		return amdgpu_vm_bo_update_mapping(adev, exclusive,
+						   src, pages_addr, vm,
 						   start, mapping->it.last,
 						   flags, addr, fence);

@ -940,7 +1043,8 @@ static int amdgpu_vm_bo_split_mapping(struct amdgpu_device *adev,
 		uint64_t last;

 		last = min((uint64_t)mapping->it.last, start + max_size - 1);
-		r = amdgpu_vm_bo_update_mapping(adev, src, pages_addr, vm,
+		r = amdgpu_vm_bo_update_mapping(adev, exclusive,
+						src, pages_addr, vm,
 						start, last, flags, addr,
 						fence);
 		if (r)
@ -973,6 +1077,7 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	struct amdgpu_bo_va_mapping *mapping;
 	dma_addr_t *pages_addr = NULL;
 	uint32_t gtt_flags, flags;
+	struct fence *exclusive;
 	uint64_t addr;
 	int r;

@ -994,8 +1099,11 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 		default:
 			break;
 		}
+
+		exclusive = reservation_object_get_excl(bo_va->bo->tbo.resv);
 	} else {
 		addr = 0;
+		exclusive = NULL;
 	}

 	flags = amdgpu_ttm_tt_pte_flags(adev, bo_va->bo->tbo.ttm, mem);
@ -1007,7 +1115,8 @@ int amdgpu_vm_bo_update(struct amdgpu_device *adev,
 	spin_unlock(&vm->status_lock);

 	list_for_each_entry(mapping, &bo_va->invalids, list) {
-		r = amdgpu_vm_bo_split_mapping(adev, gtt_flags, pages_addr, vm,
+		r = amdgpu_vm_bo_split_mapping(adev, exclusive,
+					       gtt_flags, pages_addr, vm,
 					       mapping, flags, addr,
 					       &bo_va->last_pt_update);
 		if (r)
@ -1054,7 +1163,7 @@ int amdgpu_vm_clear_freed(struct amdgpu_device *adev,
 			struct amdgpu_bo_va_mapping, list);
 		list_del(&mapping->list);

-		r = amdgpu_vm_bo_split_mapping(adev, 0, NULL, vm, mapping,
+		r = amdgpu_vm_bo_split_mapping(adev, NULL, 0, NULL, vm, mapping,
 					       0, 0, NULL);
 		kfree(mapping);
 		if (r)
@ -1445,6 +1554,7 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm)
 	amdgpu_bo_unreserve(vm->page_directory);
 	if (r)
 		goto error_free_page_directory;
+	vm->last_eviction_counter = atomic64_read(&adev->num_evictions);

 	return 0;

@ -1516,6 +1626,10 @@ void amdgpu_vm_manager_init(struct amdgpu_device *adev)
 			      &adev->vm_manager.ids_lru);
 	}

+	adev->vm_manager.fence_context = fence_context_alloc(AMDGPU_MAX_RINGS);
+	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
+		adev->vm_manager.seqno[i] = 0;
+
 	atomic_set(&adev->vm_manager.vm_pte_next_ring, 0);
 	atomic64_set(&adev->vm_manager.client_counter, 0);
 }
--- a/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
+++ b/drivers/gpu/drm/amd/amdgpu/atombios_encoders.c
@ -98,6 +98,7 @@ amdgpu_atombios_encoder_set_backlight_level(struct amdgpu_encoder *amdgpu_encode
 		case ENCODER_OBJECT_ID_INTERNAL_KLDSCP_LVTMA:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY1:
 		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY2:
+		case ENCODER_OBJECT_ID_INTERNAL_UNIPHY3:
 			if (dig->backlight_level == 0)
 				amdgpu_atombios_encoder_setup_dig_transmitter(encoder,
 								       ATOM_TRANSMITTER_ACTION_LCD_BLOFF, 0, 0);
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.c
@ -50,7 +50,9 @@
 #include "gmc/gmc_7_1_sh_mask.h"

 MODULE_FIRMWARE("radeon/bonaire_smc.bin");
+MODULE_FIRMWARE("radeon/bonaire_k_smc.bin");
 MODULE_FIRMWARE("radeon/hawaii_smc.bin");
+MODULE_FIRMWARE("radeon/hawaii_k_smc.bin");

 #define MC_CG_ARB_FREQ_F0           0x0a
 #define MC_CG_ARB_FREQ_F1           0x0b
@ -84,12 +86,14 @@ static const struct ci_pt_defaults defaults_bonaire_xt =
 	{ 0x17C, 0x172, 0x180, 0x1BC, 0x1B3, 0x1BD, 0x206, 0x200, 0x203, 0x25D, 0x25A, 0x255, 0x2C3, 0x2C5, 0x2B4 }
 };

+#if 0
 static const struct ci_pt_defaults defaults_bonaire_pro =
 {
 	1, 0xF, 0xFD, 0x19, 5, 45, 0, 0x65062,
 	{ 0x8C,  0x23F, 0x244, 0xA6,  0x83,  0x85,  0x86,  0x86,  0x83,  0xDB,  0xDB,  0xDA,  0x67,  0x60,  0x5F  },
 	{ 0x187, 0x193, 0x193, 0x1C7, 0x1D1, 0x1D1, 0x210, 0x219, 0x219, 0x266, 0x26C, 0x26C, 0x2C9, 0x2CB, 0x2CB }
 };
+#endif

 static const struct ci_pt_defaults defaults_saturn_xt =
 {
@ -98,12 +102,14 @@ static const struct ci_pt_defaults defaults_saturn_xt =
 	{ 0x187, 0x187, 0x187, 0x1C7, 0x1C7, 0x1C7, 0x210, 0x210, 0x210, 0x266, 0x266, 0x266, 0x2C9, 0x2C9, 0x2C9 }
 };

+#if 0
 static const struct ci_pt_defaults defaults_saturn_pro =
 {
 	1, 0xF, 0xFD, 0x19, 5, 55, 0, 0x30000,
 	{ 0x96,  0x21D, 0x23B, 0xA1,  0x85,  0x87,  0x83,  0x84,  0x81,  0xE6,  0xE6,  0xE6,  0x71,  0x6A,  0x6A  },
 	{ 0x193, 0x19E, 0x19E, 0x1D2, 0x1DC, 0x1DC, 0x21A, 0x223, 0x223, 0x26E, 0x27E, 0x274, 0x2CF, 0x2D2, 0x2D2 }
 };
+#endif

 static const struct ci_pt_config_reg didt_config_ci[] =
 {
@ -736,19 +742,19 @@ static int ci_enable_didt(struct amdgpu_device *adev, bool enable)

 	if (pi->caps_sq_ramping || pi->caps_db_ramping ||
 	    pi->caps_td_ramping || pi->caps_tcp_ramping) {
-		gfx_v7_0_enter_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->enter_safe_mode(adev);

 		if (enable) {
 			ret = ci_program_pt_config_registers(adev, didt_config_ci);
 			if (ret) {
-				gfx_v7_0_exit_rlc_safe_mode(adev);
+				adev->gfx.rlc.funcs->exit_safe_mode(adev);
 				return ret;
 			}
 		}

 		ci_do_enable_didt(adev, enable);

-		gfx_v7_0_exit_rlc_safe_mode(adev);
+		adev->gfx.rlc.funcs->exit_safe_mode(adev);
 	}

 	return 0;
@ -3030,7 +3036,7 @@ static int ci_populate_single_memory_level(struct amdgpu_device *adev,

 	if (pi->mclk_stutter_mode_threshold &&
 	    (memory_clock <= pi->mclk_stutter_mode_threshold) &&
-	    (pi->uvd_enabled == false) &&
+	    (!pi->uvd_enabled) &&
 	    (RREG32(mmDPG_PIPE_STUTTER_CONTROL) & DPG_PIPE_STUTTER_CONTROL__STUTTER_ENABLE_MASK) &&
 	    (adev->pm.dpm.new_active_crtc_count <= 2))
 		memory_level->StutterEnable = true;
@ -3636,6 +3642,10 @@ static int ci_setup_default_dpm_tables(struct amdgpu_device *adev)

 	ci_setup_default_pcie_tables(adev);

+	/* save a copy of the default DPM table */
+	memcpy(&(pi->golden_dpm_table), &(pi->dpm_table),
+			sizeof(struct ci_dpm_table));
+
 	return 0;
 }

@ -5754,10 +5764,18 @@ static int ci_dpm_init_microcode(struct amdgpu_device *adev)

 	switch (adev->asic_type) {
 	case CHIP_BONAIRE:
-		chip_name = "bonaire";
+		if ((adev->pdev->revision == 0x80) ||
+		    (adev->pdev->revision == 0x81) ||
+		    (adev->pdev->device == 0x665f))
+			chip_name = "bonaire_k";
+		else
+			chip_name = "bonaire";
 		break;
 	case CHIP_HAWAII:
-		chip_name = "hawaii";
+		if (adev->pdev->revision == 0x80)
+			chip_name = "hawaii_k";
+		else
+			chip_name = "hawaii";
 		break;
 	case CHIP_KAVERI:
 	case CHIP_KABINI:
@ -6404,6 +6422,186 @@ static int ci_dpm_set_powergating_state(void *handle,
 	return 0;
 }

+static int ci_dpm_print_clock_levels(struct amdgpu_device *adev,
+		enum pp_clock_type type, char *buf)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *sclk_table = &pi->dpm_table.sclk_table;
+	struct ci_single_dpm_table *mclk_table = &pi->dpm_table.mclk_table;
+	struct ci_single_dpm_table *pcie_table = &pi->dpm_table.pcie_speed_table;
+
+	int i, now, size = 0;
+	uint32_t clock, pcie_speed;
+
+	switch (type) {
+	case PP_SCLK:
+		amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetSclkFrequency);
+		clock = RREG32(mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < sclk_table->count; i++) {
+			if (clock > sclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < sclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, sclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_MCLK:
+		amdgpu_ci_send_msg_to_smc(adev, PPSMC_MSG_API_GetMclkFrequency);
+		clock = RREG32(mmSMC_MSG_ARG_0);
+
+		for (i = 0; i < mclk_table->count; i++) {
+			if (clock > mclk_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < mclk_table->count; i++)
+			size += sprintf(buf + size, "%d: %uMhz %s\n",
+					i, mclk_table->dpm_levels[i].value / 100,
+					(i == now) ? "*" : "");
+		break;
+	case PP_PCIE:
+		pcie_speed = ci_get_current_pcie_speed(adev);
+		for (i = 0; i < pcie_table->count; i++) {
+			if (pcie_speed != pcie_table->dpm_levels[i].value)
+				continue;
+			break;
+		}
+		now = i;
+
+		for (i = 0; i < pcie_table->count; i++)
+			size += sprintf(buf + size, "%d: %s %s\n", i,
+					(pcie_table->dpm_levels[i].value == 0) ? "2.5GB, x1" :
+					(pcie_table->dpm_levels[i].value == 1) ? "5.0GB, x16" :
+					(pcie_table->dpm_levels[i].value == 2) ? "8.0GB, x16" : "",
+					(i == now) ? "*" : "");
+		break;
+	default:
+		break;
+	}
+
+	return size;
+}
+
+static int ci_dpm_force_clock_level(struct amdgpu_device *adev,
+		enum pp_clock_type type, uint32_t mask)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+
+	if (adev->pm.dpm.forced_level
+			!= AMDGPU_DPM_FORCED_LEVEL_MANUAL)
+		return -EINVAL;
+
+	switch (type) {
+	case PP_SCLK:
+		if (!pi->sclk_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_SCLKDPM_SetEnabledMask,
+					pi->dpm_level_enable_mask.sclk_dpm_enable_mask & mask);
+		break;
+
+	case PP_MCLK:
+		if (!pi->mclk_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_MCLKDPM_SetEnabledMask,
+					pi->dpm_level_enable_mask.mclk_dpm_enable_mask & mask);
+		break;
+
+	case PP_PCIE:
+	{
+		uint32_t tmp = mask & pi->dpm_level_enable_mask.pcie_dpm_enable_mask;
+		uint32_t level = 0;
+
+		while (tmp >>= 1)
+			level++;
+
+		if (!pi->pcie_dpm_key_disabled)
+			amdgpu_ci_send_msg_to_smc_with_parameter(adev,
+					PPSMC_MSG_PCIeDPM_ForceLevel,
+					level);
+		break;
+	}
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+static int ci_dpm_get_sclk_od(struct amdgpu_device *adev)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *sclk_table = &(pi->dpm_table.sclk_table);
+	struct ci_single_dpm_table *golden_sclk_table =
+			&(pi->golden_dpm_table.sclk_table);
+	int value;
+
+	value = (sclk_table->dpm_levels[sclk_table->count - 1].value -
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value) *
+			100 /
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return value;
+}
+
+static int ci_dpm_set_sclk_od(struct amdgpu_device *adev, uint32_t value)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
+	struct ci_single_dpm_table *golden_sclk_table =
+			&(pi->golden_dpm_table.sclk_table);
+
+	if (value > 20)
+		value = 20;
+
+	ps->performance_levels[ps->performance_level_count - 1].sclk =
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value *
+			value / 100 +
+			golden_sclk_table->dpm_levels[golden_sclk_table->count - 1].value;
+
+	return 0;
+}
+
+static int ci_dpm_get_mclk_od(struct amdgpu_device *adev)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_single_dpm_table *mclk_table = &(pi->dpm_table.mclk_table);
+	struct ci_single_dpm_table *golden_mclk_table =
+			&(pi->golden_dpm_table.mclk_table);
+	int value;
+
+	value = (mclk_table->dpm_levels[mclk_table->count - 1].value -
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value) *
+			100 /
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return value;
+}
+
+static int ci_dpm_set_mclk_od(struct amdgpu_device *adev, uint32_t value)
+{
+	struct ci_power_info *pi = ci_get_pi(adev);
+	struct ci_ps *ps = ci_get_ps(adev->pm.dpm.requested_ps);
+	struct ci_single_dpm_table *golden_mclk_table =
+			&(pi->golden_dpm_table.mclk_table);
+
+	if (value > 20)
+		value = 20;
+
+	ps->performance_levels[ps->performance_level_count - 1].mclk =
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value *
+			value / 100 +
+			golden_mclk_table->dpm_levels[golden_mclk_table->count - 1].value;
+
+	return 0;
+}
+
 const struct amd_ip_funcs ci_dpm_ip_funcs = {
 	.name = "ci_dpm",
 	.early_init = ci_dpm_early_init,
@ -6438,6 +6636,12 @@ static const struct amdgpu_dpm_funcs ci_dpm_funcs = {
 	.get_fan_control_mode = &ci_dpm_get_fan_control_mode,
 	.set_fan_speed_percent = &ci_dpm_set_fan_speed_percent,
 	.get_fan_speed_percent = &ci_dpm_get_fan_speed_percent,
+	.print_clock_levels = ci_dpm_print_clock_levels,
+	.force_clock_level = ci_dpm_force_clock_level,
+	.get_sclk_od = ci_dpm_get_sclk_od,
+	.set_sclk_od = ci_dpm_set_sclk_od,
+	.get_mclk_od = ci_dpm_get_mclk_od,
+	.set_mclk_od = ci_dpm_set_mclk_od,
 };

 static void ci_dpm_set_dpm_funcs(struct amdgpu_device *adev)
--- a/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
+++ b/drivers/gpu/drm/amd/amdgpu/ci_dpm.h
@ -193,6 +193,7 @@ struct ci_pt_defaults {

 struct ci_power_info {
 	struct ci_dpm_table dpm_table;
+	struct ci_dpm_table golden_dpm_table;
 	u32 voltage_control;
 	u32 mvdd_control;
 	u32 vddci_control;
--- a/drivers/gpu/drm/amd/amdgpu/cik.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik.c
@ -879,7 +879,7 @@ static void cik_vga_set_state(struct amdgpu_device *adev, bool state)
 	uint32_t tmp;

 	tmp = RREG32(mmCONFIG_CNTL);
-	if (state == false)
+	if (!state)
 		tmp |= CONFIG_CNTL__VGA_DIS_MASK;
 	else
 		tmp &= ~CONFIG_CNTL__VGA_DIS_MASK;
@ -1035,12 +1035,12 @@ static uint32_t cik_read_indexed_register(struct amdgpu_device *adev,

 	mutex_lock(&adev->grbm_idx_mutex);
 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v7_0_select_se_sh(adev, se_num, sh_num);
+		amdgpu_gfx_select_se_sh(adev, se_num, sh_num, 0xffffffff);

 	val = RREG32(reg_offset);

 	if (se_num != 0xffffffff || sh_num != 0xffffffff)
-		gfx_v7_0_select_se_sh(adev, 0xffffffff, 0xffffffff);
+		amdgpu_gfx_select_se_sh(adev, 0xffffffff, 0xffffffff, 0xffffffff);
 	mutex_unlock(&adev->grbm_idx_mutex);
 	return val;
 }
@ -1158,10 +1158,11 @@ static void kv_restore_regs_for_reset(struct amdgpu_device *adev,
 	WREG32(mmGMCON_RENG_EXECUTE, save->gmcon_reng_execute);
 }

-static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)
+static int cik_gpu_pci_config_reset(struct amdgpu_device *adev)
 {
 	struct kv_reset_save_regs kv_save = { 0 };
 	u32 i;
+	int r = -EINVAL;

 	dev_info(adev->dev, "GPU pci config reset\n");

@ -1177,14 +1178,20 @@ static void cik_gpu_pci_config_reset(struct amdgpu_device *adev)

 	/* wait for asic to come out of reset */
 	for (i = 0; i < adev->usec_timeout; i++) {
-		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff)
+		if (RREG32(mmCONFIG_MEMSIZE) != 0xffffffff) {
+			/* enable BM */
+			pci_set_master(adev->pdev);
+			r = 0;
 			break;
+		}
 		udelay(1);
 	}

 	/* does asic init need to be run first??? */
 	if (adev->flags & AMD_IS_APU)
 		kv_restore_regs_for_reset(adev, &kv_save);
+
+	return r;
 }

 static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hung)
@ -1210,13 +1217,14 @@ static void cik_set_bios_scratch_engine_hung(struct amdgpu_device *adev, bool hu
 */
 static int cik_asic_reset(struct amdgpu_device *adev)
 {
+	int r;
 	cik_set_bios_scratch_engine_hung(adev, true);

-	cik_gpu_pci_config_reset(adev);
+	r = cik_gpu_pci_config_reset(adev);

 	cik_set_bios_scratch_engine_hung(adev, false);

-	return 0;
+	return r;
 }

 static int cik_set_uvd_clock(struct amdgpu_device *adev, u32 clock,
@ -2014,9 +2022,6 @@ static const struct amdgpu_asic_funcs cik_asic_funcs =
 	.set_uvd_clocks = &cik_set_uvd_clocks,
 	.set_vce_clocks = &cik_set_vce_clocks,
 	.get_virtual_caps = &cik_get_virtual_caps,
-	/* these should be moved to their own ip modules */
-	.get_gpu_clock_counter = &gfx_v7_0_get_gpu_clock_counter,
-	.wait_for_mc_idle = &gmc_v7_0_mc_wait_for_idle,
 };

 static int cik_common_early_init(void *handle)
--- a/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
+++ b/drivers/gpu/drm/amd/amdgpu/cik_sdma.c
@ -224,17 +224,6 @@ static void cik_sdma_ring_emit_ib(struct amdgpu_ring *ring,
 				  unsigned vm_id, bool ctx_switch)
 {
 	u32 extra_bits = vm_id & 0xf;
-	u32 next_rptr = ring->wptr + 5;
-
-	while ((next_rptr & 7) != 4)
-		next_rptr++;
-
-	next_rptr += 4;
-	amdgpu_ring_write(ring, SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0));
-	amdgpu_ring_write(ring, ring->next_rptr_gpu_addr & 0xfffffffc);
-	amdgpu_ring_write(ring, upper_32_bits(ring->next_rptr_gpu_addr) & 0xffffffff);
-	amdgpu_ring_write(ring, 1); /* number of DWs to follow */
-	amdgpu_ring_write(ring, next_rptr);

 	/* IB packet must end on a 8 DW boundary */
 	cik_sdma_ring_insert_nop(ring, (12 - (ring->wptr & 7)) % 8);
@ -365,7 +354,7 @@ static void cik_sdma_enable(struct amdgpu_device *adev, bool enable)
 	u32 me_cntl;
 	int i;

-	if (enable == false) {
+	if (!enable) {
 		cik_sdma_gfx_stop(adev);
 		cik_sdma_rlc_stop(adev);
 	}
@ -628,20 +617,19 @@ static int cik_sdma_ring_test_ring(struct amdgpu_ring *ring)
 * Test a simple IB in the DMA ring (CIK).
 * Returns 0 on success, error on failure.
 */
-static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
+static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring, long timeout)
 {
 	struct amdgpu_device *adev = ring->adev;
 	struct amdgpu_ib ib;
 	struct fence *f = NULL;
-	unsigned i;
 	unsigned index;
-	int r;
 	u32 tmp = 0;
 	u64 gpu_addr;
+	long r;

 	r = amdgpu_wb_get(adev, &index);
 	if (r) {
-		dev_err(adev->dev, "(%d) failed to allocate wb slot\n", r);
+		dev_err(adev->dev, "(%ld) failed to allocate wb slot\n", r);
 		return r;
 	}

@ -651,11 +639,12 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	memset(&ib, 0, sizeof(ib));
 	r = amdgpu_ib_get(adev, NULL, 256, &ib);
 	if (r) {
-		DRM_ERROR("amdgpu: failed to get ib (%d).\n", r);
+		DRM_ERROR("amdgpu: failed to get ib (%ld).\n", r);
 		goto err0;
 	}

-	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE, SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
+	ib.ptr[0] = SDMA_PACKET(SDMA_OPCODE_WRITE,
+				SDMA_WRITE_SUB_OPCODE_LINEAR, 0);
 	ib.ptr[1] = lower_32_bits(gpu_addr);
 	ib.ptr[2] = upper_32_bits(gpu_addr);
 	ib.ptr[3] = 1;
@ -665,28 +654,25 @@ static int cik_sdma_ring_test_ib(struct amdgpu_ring *ring)
 	if (r)
 		goto err1;

-	r = fence_wait(f, false);
-	if (r) {
-		DRM_ERROR("amdgpu: fence wait failed (%d).\n", r);
+	r = fence_wait_timeout(f, false, timeout);
+	if (r == 0) {
+		DRM_ERROR("amdgpu: IB test timed out\n");
+		r = -ETIMEDOUT;
+		goto err1;
+	} else if (r < 0) {
+		DRM_ERROR("amdgpu: fence wait failed (%ld).\n", r);
 		goto err1;
 	}
-	for (i = 0; i < adev->usec_timeout; i++) {
-		tmp = le32_to_cpu(adev->wb.wb[index]);
-		if (tmp == 0xDEADBEEF)
-			break;
-		DRM_UDELAY(1);
-	}
-	if (i < adev->usec_timeout) {
-		DRM_INFO("ib test on ring %d succeeded in %u usecs\n",
-			 ring->idx, i);
-		goto err1;
+	tmp = le32_to_cpu(adev->wb.wb[index]);
+	if (tmp == 0xDEADBEEF) {
+		DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+		r = 0;
 	} else {
 		DRM_ERROR("amdgpu: ib test failed (0x%08X)\n", tmp);
 		r = -EINVAL;
 	}

 err1:
-	fence_put(f);
 	amdgpu_ib_free(adev, &ib, NULL);
 	fence_put(f);
 err0:
--- a/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
+++ b/drivers/gpu/drm/amd/amdgpu/cz_dpm.c
@ -425,7 +425,7 @@ static int cz_dpm_init(struct amdgpu_device *adev)
 	pi->mgcg_cgtt_local1 = 0x0;
 	pi->clock_slow_down_step = 25000;
 	pi->skip_clock_slow_down = 1;
-	pi->enable_nb_ps_policy = 0;
+	pi->enable_nb_ps_policy = false;
 	pi->caps_power_containment = true;
 	pi->caps_cac = true;
 	pi->didt_enabled = false;
@ -2219,6 +2219,7 @@ static void cz_dpm_powergate_vce(struct amdgpu_device *adev, bool gate)
 			}
 		}
 	} else { /*pi->caps_vce_pg*/
+		pi->vce_power_gated = gate;
 		cz_update_vce_dpm(adev);
 		cz_enable_vce_dpm(adev, !gate);
 	}
--- a/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
@ -2667,19 +2667,21 @@ static void dce_v10_0_cursor_reset(struct drm_crtc *crtc)
 	}
 }

-static void dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				    u16 *blue, uint32_t start, uint32_t size)
+static int dce_v10_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				    u16 *blue, uint32_t size)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-	int end = (start + size > 256) ? 256 : start + size, i;
+	int i;

 	/* userspace palettes are always correct as is */
-	for (i = start; i < end; i++) {
+	for (i = 0; i < size; i++) {
 		amdgpu_crtc->lut_r[i] = red[i] >> 6;
 		amdgpu_crtc->lut_g[i] = green[i] >> 6;
 		amdgpu_crtc->lut_b[i] = blue[i] >> 6;
 	}
 	dce_v10_0_crtc_load_lut(crtc);
+
+	return 0;
 }

 static void dce_v10_0_crtc_destroy(struct drm_crtc *crtc)
@ -2717,13 +2719,13 @@ static void dce_v10_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
+		drm_crtc_vblank_on(crtc);
 		dce_v10_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
+		drm_crtc_vblank_off(crtc);
 		if (amdgpu_crtc->enabled) {
 			dce_v10_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@ -3372,7 +3374,7 @@ static int dce_v10_0_pageflip_irq(struct amdgpu_device *adev,

 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);

-	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
+	drm_crtc_vblank_put(&amdgpu_crtc->base);
 	schedule_work(&works->unpin_work);

 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
@ -307,11 +307,10 @@ static void dce_v11_0_page_flip(struct amdgpu_device *adev,
 	struct amdgpu_crtc *amdgpu_crtc = adev->mode_info.crtcs[crtc_id];
 	u32 tmp;

-	/* flip at hsync for async, default is vsync */
-	/* use UPDATE_IMMEDIATE_EN instead for async? */
+	/* flip immediate for async, default is vsync */
 	tmp = RREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset);
 	tmp = REG_SET_FIELD(tmp, GRPH_FLIP_CONTROL,
-			    GRPH_SURFACE_UPDATE_H_RETRACE_EN, async ? 1 : 0);
+			    GRPH_SURFACE_UPDATE_IMMEDIATE_EN, async ? 1 : 0);
 	WREG32(mmGRPH_FLIP_CONTROL + amdgpu_crtc->crtc_offset, tmp);
 	/* update the scanout addresses */
 	WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + amdgpu_crtc->crtc_offset,
@ -2678,19 +2677,21 @@ static void dce_v11_0_cursor_reset(struct drm_crtc *crtc)
 	}
 }

-static void dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				    u16 *blue, uint32_t start, uint32_t size)
+static int dce_v11_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				    u16 *blue, uint32_t size)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-	int end = (start + size > 256) ? 256 : start + size, i;
+	int i;

 	/* userspace palettes are always correct as is */
-	for (i = start; i < end; i++) {
+	for (i = 0; i < size; i++) {
 		amdgpu_crtc->lut_r[i] = red[i] >> 6;
 		amdgpu_crtc->lut_g[i] = green[i] >> 6;
 		amdgpu_crtc->lut_b[i] = blue[i] >> 6;
 	}
 	dce_v11_0_crtc_load_lut(crtc);
+
+	return 0;
 }

 static void dce_v11_0_crtc_destroy(struct drm_crtc *crtc)
@ -2728,13 +2729,13 @@ static void dce_v11_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
+		drm_crtc_vblank_on(crtc);
 		dce_v11_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
+		drm_crtc_vblank_off(crtc);
 		if (amdgpu_crtc->enabled) {
 			dce_v11_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@ -3433,7 +3434,7 @@ static int dce_v11_0_pageflip_irq(struct amdgpu_device *adev,

 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);

-	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
+	drm_crtc_vblank_put(&amdgpu_crtc->base);
 	schedule_work(&works->unpin_work);

 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
+++ b/drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
@ -526,36 +526,16 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
 		crtc_enabled = REG_GET_FIELD(RREG32(mmCRTC_CONTROL + crtc_offsets[i]),
 					     CRTC_CONTROL, CRTC_MASTER_EN);
 		if (crtc_enabled) {
-#if 0
-			u32 frame_count;
-			int j;
-
+#if 1
 			save->crtc_enabled[i] = true;
 			tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
 			if (REG_GET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN) == 0) {
-				amdgpu_display_vblank_wait(adev, i);
-				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
+				/*it is correct only for RGB ; black is 0*/
+				WREG32(mmCRTC_BLANK_DATA_COLOR + crtc_offsets[i], 0);
 				tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 1);
 				WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-				WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-			}
-			/* wait for the next frame */
-			frame_count = amdgpu_display_vblank_get_counter(adev, i);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-					break;
-				udelay(1);
-			}
-			tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK) == 0) {
-				tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 1);
-				WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK) == 0) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 1);
-				WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
 			}
+			mdelay(20);
 #else
 			/* XXX this is a hack to avoid strange behavior with EFI on certain systems */
 			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
@ -575,55 +555,22 @@ static void dce_v8_0_stop_mc_access(struct amdgpu_device *adev,
 static void dce_v8_0_resume_mc_access(struct amdgpu_device *adev,
 				      struct amdgpu_mode_mc_save *save)
 {
-	u32 tmp, frame_count;
-	int i, j;
+	u32 tmp;
+	int i;

 	/* update crtc base addresses */
 	for (i = 0; i < adev->mode_info.num_crtc; i++) {
 		WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
 		       upper_32_bits(adev->mc.vram_start));
-		WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS_HIGH + crtc_offsets[i],
-		       upper_32_bits(adev->mc.vram_start));
 		WREG32(mmGRPH_PRIMARY_SURFACE_ADDRESS + crtc_offsets[i],
 		       (u32)adev->mc.vram_start);
-		WREG32(mmGRPH_SECONDARY_SURFACE_ADDRESS + crtc_offsets[i],
-		       (u32)adev->mc.vram_start);

 		if (save->crtc_enabled[i]) {
-			tmp = RREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE) != 3) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_MODE, MASTER_UPDATE_MODE, 3);
-				WREG32(mmMASTER_UPDATE_MODE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK)) {
-				tmp = REG_SET_FIELD(tmp, GRPH_UPDATE, GRPH_UPDATE_LOCK, 0);
-				WREG32(mmGRPH_UPDATE + crtc_offsets[i], tmp);
-			}
-			tmp = RREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i]);
-			if (REG_GET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK)) {
-				tmp = REG_SET_FIELD(tmp, MASTER_UPDATE_LOCK, MASTER_UPDATE_LOCK, 0);
-				WREG32(mmMASTER_UPDATE_LOCK + crtc_offsets[i], tmp);
-			}
-			for (j = 0; j < adev->usec_timeout; j++) {
-				tmp = RREG32(mmGRPH_UPDATE + crtc_offsets[i]);
-				if (REG_GET_FIELD(tmp, GRPH_UPDATE, GRPH_SURFACE_UPDATE_PENDING) == 0)
-					break;
-				udelay(1);
-			}
 			tmp = RREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i]);
 			tmp = REG_SET_FIELD(tmp, CRTC_BLANK_CONTROL, CRTC_BLANK_DATA_EN, 0);
-			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 1);
 			WREG32(mmCRTC_BLANK_CONTROL + crtc_offsets[i], tmp);
-			WREG32(mmCRTC_UPDATE_LOCK + crtc_offsets[i], 0);
-			/* wait for the next frame */
-			frame_count = amdgpu_display_vblank_get_counter(adev, i);
-			for (j = 0; j < adev->usec_timeout; j++) {
-				if (amdgpu_display_vblank_get_counter(adev, i) != frame_count)
-					break;
-				udelay(1);
-			}
 		}
+		mdelay(20);
 	}

 	WREG32(mmVGA_MEMORY_BASE_ADDRESS_HIGH, upper_32_bits(adev->mc.vram_start));
@ -2574,19 +2521,21 @@ static void dce_v8_0_cursor_reset(struct drm_crtc *crtc)
 	}
 }

-static void dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
-				    u16 *blue, uint32_t start, uint32_t size)
+static int dce_v8_0_crtc_gamma_set(struct drm_crtc *crtc, u16 *red, u16 *green,
+				   u16 *blue, uint32_t size)
 {
 	struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-	int end = (start + size > 256) ? 256 : start + size, i;
+	int i;

 	/* userspace palettes are always correct as is */
-	for (i = start; i < end; i++) {
+	for (i = 0; i < size; i++) {
 		amdgpu_crtc->lut_r[i] = red[i] >> 6;
 		amdgpu_crtc->lut_g[i] = green[i] >> 6;
 		amdgpu_crtc->lut_b[i] = blue[i] >> 6;
 	}
 	dce_v8_0_crtc_load_lut(crtc);
+
+	return 0;
 }

 static void dce_v8_0_crtc_destroy(struct drm_crtc *crtc)
@ -2624,13 +2573,13 @@ static void dce_v8_0_crtc_dpms(struct drm_crtc *crtc, int mode)
 		type = amdgpu_crtc_idx_to_irq_type(adev, amdgpu_crtc->crtc_id);
 		amdgpu_irq_update(adev, &adev->crtc_irq, type);
 		amdgpu_irq_update(adev, &adev->pageflip_irq, type);
-		drm_vblank_on(dev, amdgpu_crtc->crtc_id);
+		drm_crtc_vblank_on(crtc);
 		dce_v8_0_crtc_load_lut(crtc);
 		break;
 	case DRM_MODE_DPMS_STANDBY:
 	case DRM_MODE_DPMS_SUSPEND:
 	case DRM_MODE_DPMS_OFF:
-		drm_vblank_off(dev, amdgpu_crtc->crtc_id);
+		drm_crtc_vblank_off(crtc);
 		if (amdgpu_crtc->enabled) {
 			dce_v8_0_vga_enable(crtc, true);
 			amdgpu_atombios_crtc_blank(crtc, ATOM_ENABLE);
@ -3376,7 +3325,7 @@ static int dce_v8_0_pageflip_irq(struct amdgpu_device *adev,

 	spin_unlock_irqrestore(&adev->ddev->event_lock, flags);

-	drm_vblank_put(adev->ddev, amdgpu_crtc->crtc_id);
+	drm_crtc_vblank_put(&amdgpu_crtc->base);
 	schedule_work(&works->unpin_work);

 	return 0;
--- a/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
+++ b/drivers/gpu/drm/amd/amdgpu/fiji_smc.c
@ -173,7 +173,7 @@ static int fiji_send_msg_to_smc(struct amdgpu_device *adev, PPSMC_Msg msg)
 {
 	if (!fiji_is_smc_ram_running(adev))
 	{
-		return -EINVAL;;
+		return -EINVAL;
 	}

 	if (wait_smu_response(adev)) {
--- a/Show More
+++ b/Show More