Merge branch 'topic/dp-aux-rework' into drm-intel-next-queued

Conflicts: drivers/gpu/drm/i915/intel_dp.c A bit a mess with reverts which differe in details between -fixes and -next and some other unrelated shuffling. Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
2014-03-19 15:54:37 +01:00 · 2014-03-19 15:54:37 +01:00 · b80d6c781e
parent 262ca2b08f 0b99836f23
commit b80d6c781e
252 changed files with 7838 additions and 4232 deletions
--- a/Documentation/DocBook/drm.tmpl
+++ b/Documentation/DocBook/drm.tmpl
@ -29,12 +29,26 @@
 	  </address>
 	</affiliation>
      </author>
+      <author>
+	<firstname>Daniel</firstname>
+	<surname>Vetter</surname>
+	<contrib>Contributions all over the place</contrib>
+	<affiliation>
+	  <orgname>Intel Corporation</orgname>
+	  <address>
+	    <email>daniel.vetter@ffwll.ch</email>
+	  </address>
+	</affiliation>
+      </author>
    </authorgroup>

    <copyright>
      <year>2008-2009</year>
-      <year>2012</year>
+      <year>2013-2014</year>
      <holder>Intel Corporation</holder>
+    </copyright>
+    <copyright>
+      <year>2012</year>
      <holder>Laurent Pinchart</holder>
    </copyright>

@ -60,7 +74,15 @@

 <toc></toc>

-  <!-- Introduction -->
+<part id="drmCore">
+  <title>DRM Core</title>
+  <partintro>
+    <para>
+      This first part of the DRM Developer's Guide documents core DRM code,
+      helper libraries for writting drivers and generic userspace interfaces
+      exposed by DRM drivers.
+    </para>
+  </partintro>

  <chapter id="drmIntroduction">
    <title>Introduction</title>
@ -264,8 +286,8 @@ char *date;</synopsis>
      <para>
        The <methodname>load</methodname> method is the driver and device
        initialization entry point. The method is responsible for allocating and
-        initializing driver private data, specifying supported performance
-        counters, performing resource allocation and mapping (e.g. acquiring
+	initializing driver private data, performing resource allocation and
+	mapping (e.g. acquiring
        clocks, mapping registers or allocating command buffers), initializing
        the memory manager (<xref linkend="drm-memory-management"/>), installing
        the IRQ handler (<xref linkend="drm-irq-registration"/>), setting up
@ -295,7 +317,7 @@ char *date;</synopsis>
 	their <methodname>load</methodname> method called with flags to 0.
      </para>
      <sect3>
-        <title>Driver Private &amp; Performance Counters</title>
+        <title>Driver Private Data</title>
        <para>
          The driver private hangs off the main
          <structname>drm_device</structname> structure and can be used for
@ -307,14 +329,6 @@ char *date;</synopsis>
          <structname>drm_device</structname>.<structfield>dev_priv</structfield>
          set to NULL when the driver is unloaded.
        </para>
-        <para>
-          DRM supports several counters which were used for rough performance
-          characterization. This stat counter system is deprecated and should not
-          be used. If performance monitoring is desired, the developer should
-          investigate and potentially enhance the kernel perf and tracing
-          infrastructure to export GPU related performance information for
-          consumption by performance monitoring tools and applications.
-        </para>
      </sect3>
      <sect3 id="drm-irq-registration">
        <title>IRQ Registration</title>
@ -697,55 +711,16 @@ char *date;</synopsis>
          respectively. The conversion is handled by the DRM core without any
          driver-specific support.
        </para>
-        <para>
-          Similar to global names, GEM file descriptors are also used to share GEM
-          objects across processes. They offer additional security: as file
-          descriptors must be explicitly sent over UNIX domain sockets to be shared
-          between applications, they can't be guessed like the globally unique GEM
-          names.
-        </para>
-        <para>
-          Drivers that support GEM file descriptors, also known as the DRM PRIME
-          API, must set the DRIVER_PRIME bit in the struct
-          <structname>drm_driver</structname>
-          <structfield>driver_features</structfield> field, and implement the
-          <methodname>prime_handle_to_fd</methodname> and
-          <methodname>prime_fd_to_handle</methodname> operations.
-        </para>
-        <para>
-          <synopsis>int (*prime_handle_to_fd)(struct drm_device *dev,
-                            struct drm_file *file_priv, uint32_t handle,
-                            uint32_t flags, int *prime_fd);
-  int (*prime_fd_to_handle)(struct drm_device *dev,
-                            struct drm_file *file_priv, int prime_fd,
-                            uint32_t *handle);</synopsis>
-          Those two operations convert a handle to a PRIME file descriptor and
-          vice versa. Drivers must use the kernel dma-buf buffer sharing framework
-          to manage the PRIME file descriptors.
-        </para>
-        <para>
-          While non-GEM drivers must implement the operations themselves, GEM
-          drivers must use the <function>drm_gem_prime_handle_to_fd</function>
-          and <function>drm_gem_prime_fd_to_handle</function> helper functions.
-          Those helpers rely on the driver
-          <methodname>gem_prime_export</methodname> and
-          <methodname>gem_prime_import</methodname> operations to create a dma-buf
-          instance from a GEM object (dma-buf exporter role) and to create a GEM
-          object from a dma-buf instance (dma-buf importer role).
-        </para>
-        <para>
-          <synopsis>struct dma_buf * (*gem_prime_export)(struct drm_device *dev,
-                                       struct drm_gem_object *obj,
-                                       int flags);
-  struct drm_gem_object * (*gem_prime_import)(struct drm_device *dev,
-                                              struct dma_buf *dma_buf);</synopsis>
-          These two operations are mandatory for GEM drivers that support DRM
-          PRIME.
-        </para>
-        <sect4>
-          <title>DRM PRIME Helper Functions Reference</title>
-!Pdrivers/gpu/drm/drm_prime.c PRIME Helpers
-        </sect4>
+	<para>
+	  GEM also supports buffer sharing with dma-buf file descriptors through
+	  PRIME. GEM-based drivers must use the provided helpers functions to
+	  implement the exporting and importing correctly. See <xref linkend="drm-prime-support" />.
+	  Since sharing file descriptors is inherently more secure than the
+	  easily guessable and global GEM names it is the preferred buffer
+	  sharing mechanism. Sharing buffers through GEM names is only supported
+	  for legacy userspace. Furthermore PRIME also allows cross-device
+	  buffer sharing since it is based on dma-bufs.
+	</para>
      </sect3>
      <sect3 id="drm-gem-objects-mapping">
        <title>GEM Objects Mapping</title>
@ -829,62 +804,6 @@ char *date;</synopsis>
          faults can implement their own mmap file operation handler.
        </para>
      </sect3>
-      <sect3>
-        <title>Dumb GEM Objects</title>
-        <para>
-          The GEM API doesn't standardize GEM objects creation and leaves it to
-          driver-specific ioctls. While not an issue for full-fledged graphics
-          stacks that include device-specific userspace components (in libdrm for
-          instance), this limit makes DRM-based early boot graphics unnecessarily
-          complex.
-        </para>
-        <para>
-          Dumb GEM objects partly alleviate the problem by providing a standard
-          API to create dumb buffers suitable for scanout, which can then be used
-          to create KMS frame buffers.
-        </para>
-        <para>
-          To support dumb GEM objects drivers must implement the
-          <methodname>dumb_create</methodname>,
-          <methodname>dumb_destroy</methodname> and
-          <methodname>dumb_map_offset</methodname> operations.
-        </para>
-        <itemizedlist>
-          <listitem>
-            <synopsis>int (*dumb_create)(struct drm_file *file_priv, struct drm_device *dev,
-                     struct drm_mode_create_dumb *args);</synopsis>
-            <para>
-              The <methodname>dumb_create</methodname> operation creates a GEM
-              object suitable for scanout based on the width, height and depth
-              from the struct <structname>drm_mode_create_dumb</structname>
-              argument. It fills the argument's <structfield>handle</structfield>,
-              <structfield>pitch</structfield> and <structfield>size</structfield>
-              fields with a handle for the newly created GEM object and its line
-              pitch and size in bytes.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>int (*dumb_destroy)(struct drm_file *file_priv, struct drm_device *dev,
-                      uint32_t handle);</synopsis>
-            <para>
-              The <methodname>dumb_destroy</methodname> operation destroys a dumb
-              GEM object created by <methodname>dumb_create</methodname>.
-            </para>
-          </listitem>
-          <listitem>
-            <synopsis>int (*dumb_map_offset)(struct drm_file *file_priv, struct drm_device *dev,
-                         uint32_t handle, uint64_t *offset);</synopsis>
-            <para>
-              The <methodname>dumb_map_offset</methodname> operation associates an
-              mmap fake offset with the GEM object given by the handle and returns
-              it. Drivers must use the
-              <function>drm_gem_create_mmap_offset</function> function to
-              associate the fake offset as described in
-              <xref linkend="drm-gem-objects-mapping"/>.
-            </para>
-          </listitem>
-        </itemizedlist>
-      </sect3>
      <sect3>
        <title>Memory Coherency</title>
        <para>
@ -924,7 +843,99 @@ char *date;</synopsis>
          abstracted from the client in libdrm.
        </para>
      </sect3>
-    </sect2>
+      <sect3>
+        <title>GEM Function Reference</title>
+!Edrivers/gpu/drm/drm_gem.c
+      </sect3>
+      </sect2>
+      <sect2>
+	<title>VMA Offset Manager</title>
+!Pdrivers/gpu/drm/drm_vma_manager.c vma offset manager
+!Edrivers/gpu/drm/drm_vma_manager.c
+!Iinclude/drm/drm_vma_manager.h
+      </sect2>
+      <sect2 id="drm-prime-support">
+	<title>PRIME Buffer Sharing</title>
+	<para>
+	  PRIME is the cross device buffer sharing framework in drm, originally
+	  created for the OPTIMUS range of multi-gpu platforms. To userspace
+	  PRIME buffers are dma-buf based file descriptors.
+	</para>
+	<sect3>
+	  <title>Overview and Driver Interface</title>
+	  <para>
+	    Similar to GEM global names, PRIME file descriptors are
+	    also used to share buffer objects across processes. They offer
+	    additional security: as file descriptors must be explicitly sent over
+	    UNIX domain sockets to be shared between applications, they can't be
+	    guessed like the globally unique GEM names.
+	  </para>
+	  <para>
+	    Drivers that support the PRIME
+	    API must set the DRIVER_PRIME bit in the struct
+	    <structname>drm_driver</structname>
+	    <structfield>driver_features</structfield> field, and implement the
+	    <methodname>prime_handle_to_fd</methodname> and
+	    <methodname>prime_fd_to_handle</methodname> operations.
+	  </para>
+	  <para>
+	    <synopsis>int (*prime_handle_to_fd)(struct drm_device *dev,
+			  struct drm_file *file_priv, uint32_t handle,
+			  uint32_t flags, int *prime_fd);
+int (*prime_fd_to_handle)(struct drm_device *dev,
+			  struct drm_file *file_priv, int prime_fd,
+			  uint32_t *handle);</synopsis>
+	    Those two operations convert a handle to a PRIME file descriptor and
+	    vice versa. Drivers must use the kernel dma-buf buffer sharing framework
+	    to manage the PRIME file descriptors. Similar to the mode setting
+	    API PRIME is agnostic to the underlying buffer object manager, as
+	    long as handles are 32bit unsinged integers.
+	  </para>
+	  <para>
+	    While non-GEM drivers must implement the operations themselves, GEM
+	    drivers must use the <function>drm_gem_prime_handle_to_fd</function>
+	    and <function>drm_gem_prime_fd_to_handle</function> helper functions.
+	    Those helpers rely on the driver
+	    <methodname>gem_prime_export</methodname> and
+	    <methodname>gem_prime_import</methodname> operations to create a dma-buf
+	    instance from a GEM object (dma-buf exporter role) and to create a GEM
+	    object from a dma-buf instance (dma-buf importer role).
+	  </para>
+	  <para>
+	    <synopsis>struct dma_buf * (*gem_prime_export)(struct drm_device *dev,
+				     struct drm_gem_object *obj,
+				     int flags);
+struct drm_gem_object * (*gem_prime_import)(struct drm_device *dev,
+					    struct dma_buf *dma_buf);</synopsis>
+	    These two operations are mandatory for GEM drivers that support
+	    PRIME.
+	  </para>
+	</sect3>
+        <sect3>
+          <title>PRIME Helper Functions</title>
+!Pdrivers/gpu/drm/drm_prime.c PRIME Helpers
+        </sect3>
+      </sect2>
+      <sect2>
+	<title>PRIME Function References</title>
+!Edrivers/gpu/drm/drm_prime.c
+      </sect2>
+      <sect2>
+	<title>DRM MM Range Allocator</title>
+	<sect3>
+	  <title>Overview</title>
+!Pdrivers/gpu/drm/drm_mm.c Overview
+	</sect3>
+	<sect3>
+	  <title>LRU Scan/Eviction Support</title>
+!Pdrivers/gpu/drm/drm_mm.c lru scan roaster
+	</sect3>
+      </sect2>
+      <sect2>
+	<title>DRM MM Range Allocator Function References</title>
+!Edrivers/gpu/drm/drm_mm.c
+!Iinclude/drm/drm_mm.h
+      </sect2>
  </sect1>

  <!-- Internals: mode setting -->
@ -952,6 +963,11 @@ int max_width, max_height;</synopsis>
 	<para>Mode setting functions.</para>
      </listitem>
    </itemizedlist>
+    <sect2>
+      <title>Display Modes Function Reference</title>
+!Iinclude/drm/drm_modes.h
+!Edrivers/gpu/drm/drm_modes.c
+    </sect2>
    <sect2>
      <title>Frame Buffer Creation</title>
      <synopsis>struct drm_framebuffer *(*fb_create)(struct drm_device *dev,
@ -968,9 +984,11 @@ int max_width, max_height;</synopsis>
        Frame buffers rely on the underneath memory manager for low-level memory
        operations. When creating a frame buffer applications pass a memory
        handle (or a list of memory handles for multi-planar formats) through
-        the <parameter>drm_mode_fb_cmd2</parameter> argument. This document
-        assumes that the driver uses GEM, those handles thus reference GEM
-        objects.
+	the <parameter>drm_mode_fb_cmd2</parameter> argument. For drivers using
+	GEM as their userspace buffer management interface this would be a GEM
+	handle.  Drivers are however free to use their own backing storage object
+	handles, e.g. vmwgfx directly exposes special TTM handles to userspace
+	and so expects TTM handles in the create ioctl and not GEM handles.
      </para>
      <para>
        Drivers must first validate the requested frame buffer parameters passed
@ -992,7 +1010,7 @@ int max_width, max_height;</synopsis>
      </para>

      <para>
-	The initailization of the new framebuffer instance is finalized with a
+	The initialization of the new framebuffer instance is finalized with a
 	call to <function>drm_framebuffer_init</function> which takes a pointer
 	to DRM frame buffer operations (struct
 	<structname>drm_framebuffer_funcs</structname>). Note that this function
@ -1042,7 +1060,7 @@ int max_width, max_height;</synopsis>
      <para>
 	The lifetime of a drm framebuffer is controlled with a reference count,
 	drivers can grab additional references with
-	<function>drm_framebuffer_reference</function> </para> and drop them
+	<function>drm_framebuffer_reference</function>and drop them
 	again with <function>drm_framebuffer_unreference</function>. For
 	driver-private framebuffers for which the last reference is never
 	dropped (e.g. for the fbdev framebuffer when the struct
@ -1050,6 +1068,72 @@ int max_width, max_height;</synopsis>
 	helper struct) drivers can manually clean up a framebuffer at module
 	unload time with
 	<function>drm_framebuffer_unregister_private</function>.
+      </para>
+    </sect2>
+    <sect2>
+      <title>Dumb Buffer Objects</title>
+      <para>
+	The KMS API doesn't standardize backing storage object creation and
+	leaves it to driver-specific ioctls. Furthermore actually creating a
+	buffer object even for GEM-based drivers is done through a
+	driver-specific ioctl - GEM only has a common userspace interface for
+	sharing and destroying objects. While not an issue for full-fledged
+	graphics stacks that include device-specific userspace components (in
+	libdrm for instance), this limit makes DRM-based early boot graphics
+	unnecessarily complex.
+      </para>
+      <para>
+        Dumb objects partly alleviate the problem by providing a standard
+        API to create dumb buffers suitable for scanout, which can then be used
+        to create KMS frame buffers.
+      </para>
+      <para>
+        To support dumb objects drivers must implement the
+        <methodname>dumb_create</methodname>,
+        <methodname>dumb_destroy</methodname> and
+        <methodname>dumb_map_offset</methodname> operations.
+      </para>
+      <itemizedlist>
+        <listitem>
+          <synopsis>int (*dumb_create)(struct drm_file *file_priv, struct drm_device *dev,
+                   struct drm_mode_create_dumb *args);</synopsis>
+          <para>
+            The <methodname>dumb_create</methodname> operation creates a driver
+	    object (GEM or TTM handle) suitable for scanout based on the
+	    width, height and depth from the struct
+	    <structname>drm_mode_create_dumb</structname> argument. It fills the
+	    argument's <structfield>handle</structfield>,
+	    <structfield>pitch</structfield> and <structfield>size</structfield>
+	    fields with a handle for the newly created object and its line
+            pitch and size in bytes.
+          </para>
+        </listitem>
+        <listitem>
+          <synopsis>int (*dumb_destroy)(struct drm_file *file_priv, struct drm_device *dev,
+                    uint32_t handle);</synopsis>
+          <para>
+            The <methodname>dumb_destroy</methodname> operation destroys a dumb
+            object created by <methodname>dumb_create</methodname>.
+          </para>
+        </listitem>
+        <listitem>
+          <synopsis>int (*dumb_map_offset)(struct drm_file *file_priv, struct drm_device *dev,
+                       uint32_t handle, uint64_t *offset);</synopsis>
+          <para>
+            The <methodname>dumb_map_offset</methodname> operation associates an
+            mmap fake offset with the object given by the handle and returns
+            it. Drivers must use the
+            <function>drm_gem_create_mmap_offset</function> function to
+            associate the fake offset as described in
+            <xref linkend="drm-gem-objects-mapping"/>.
+          </para>
+        </listitem>
+      </itemizedlist>
+      <para>
+        Note that dumb objects may not be used for gpu acceleration, as has been
+	attempted on some ARM embedded platforms. Such drivers really must have
+	a hardware-specific ioctl to allocate suitable buffer objects.
+      </para>
    </sect2>
    <sect2>
      <title>Output Polling</title>
@ -1130,8 +1214,11 @@ int max_width, max_height;</synopsis>
            This operation is called with the mode config lock held.
          </para>
          <note><para>
-            FIXME: How should set_config interact with DPMS? If the CRTC is
-            suspended, should it be resumed?
+	    Note that the drm core has no notion of restoring the mode setting
+	    state after resume, since all resume handling is in the full
+	    responsibility of the driver. The common mode setting helper library
+	    though provides a helper which can be used for this:
+	    <function>drm_helper_resume_force_mode</function>.
          </para></note>
        </sect4>
        <sect4>
@ -2134,7 +2221,7 @@ void intel_crt_init(struct drm_device *dev)
            set the <structfield>display_info</structfield>
            <structfield>width_mm</structfield> and
            <structfield>height_mm</structfield> fields if they haven't been set
-            already (for instance at initilization time when a fixed-size panel is
+            already (for instance at initialization time when a fixed-size panel is
            attached to the connector). The mode <structfield>width_mm</structfield>
            and <structfield>height_mm</structfield> fields are only used internally
            during EDID parsing and should not be set when creating modes manually.
@ -2196,10 +2283,15 @@ void intel_crt_init(struct drm_device *dev)
 !Edrivers/gpu/drm/drm_flip_work.c
    </sect2>
    <sect2>
-      <title>VMA Offset Manager</title>
-!Pdrivers/gpu/drm/drm_vma_manager.c vma offset manager
-!Edrivers/gpu/drm/drm_vma_manager.c
-!Iinclude/drm/drm_vma_manager.h
+      <title>HDMI Infoframes Helper Reference</title>
+      <para>
+	Strictly speaking this is not a DRM helper library but generally useable
+	by any driver interfacing with HDMI outputs like v4l or alsa drivers.
+	But it nicely fits into the overall topic of mode setting helper
+	libraries and hence is also included here.
+      </para>
+!Iinclude/linux/hdmi.h
+!Edrivers/video/hdmi.c
    </sect2>
  </sect1>

@ -2561,42 +2653,44 @@ int num_ioctls;</synopsis>
      </para>
    </sect2>
  </sect1>
-
  <sect1>
-    <title>Command submission &amp; fencing</title>
+    <title>Legacy Support Code</title>
    <para>
-      This should cover a few device-specific command submission
-      implementations.
+      The section very brievely covers some of the old legacy support code which
+      is only used by old DRM drivers which have done a so-called shadow-attach
+      to the underlying device instead of registering as a real driver. This
+      also includes some of the old generic buffer mangement and command
+      submission code. Do not use any of this in new and modern drivers.
    </para>
-  </sect1>

-  <!-- Internals: suspend/resume -->
+    <sect2>
+      <title>Legacy Suspend/Resume</title>
+      <para>
+	The DRM core provides some suspend/resume code, but drivers wanting full
+	suspend/resume support should provide save() and restore() functions.
+	These are called at suspend, hibernate, or resume time, and should perform
+	any state save or restore required by your device across suspend or
+	hibernate states.
+      </para>
+      <synopsis>int (*suspend) (struct drm_device *, pm_message_t state);
+  int (*resume) (struct drm_device *);</synopsis>
+      <para>
+	Those are legacy suspend and resume methods which
+	<emphasis>only</emphasis> work with the legacy shadow-attach driver
+	registration functions. New driver should use the power management
+	interface provided by their bus type (usually through
+	the struct <structname>device_driver</structname> dev_pm_ops) and set
+	these methods to NULL.
+      </para>
+    </sect2>

-  <sect1>
-    <title>Suspend/Resume</title>
-    <para>
-      The DRM core provides some suspend/resume code, but drivers wanting full
-      suspend/resume support should provide save() and restore() functions.
-      These are called at suspend, hibernate, or resume time, and should perform
-      any state save or restore required by your device across suspend or
-      hibernate states.
-    </para>
-    <synopsis>int (*suspend) (struct drm_device *, pm_message_t state);
-int (*resume) (struct drm_device *);</synopsis>
-    <para>
-      Those are legacy suspend and resume methods. New driver should use the
-      power management interface provided by their bus type (usually through
-      the struct <structname>device_driver</structname> dev_pm_ops) and set
-      these methods to NULL.
-    </para>
-  </sect1>
-
-  <sect1>
-    <title>DMA services</title>
-    <para>
-      This should cover how DMA mapping etc. is supported by the core.
-      These functions are deprecated and should not be used.
-    </para>
+    <sect2>
+      <title>Legacy DMA Services</title>
+      <para>
+	This should cover how DMA mapping etc. is supported by the core.
+	These functions are deprecated and should not be used.
+      </para>
+    </sect2>
  </sect1>
  </chapter>

@ -2658,8 +2752,8 @@ int (*resume) (struct drm_device *);</synopsis>
        DRM core provides multiple character-devices for user-space to use.
        Depending on which device is opened, user-space can perform a different
        set of operations (mainly ioctls). The primary node is always created
-        and called <term>card&lt;num&gt;</term>. Additionally, a currently
-        unused control node, called <term>controlD&lt;num&gt;</term> is also
+        and called card&lt;num&gt;. Additionally, a currently
+        unused control node, called controlD&lt;num&gt; is also
        created. The primary node provides all legacy operations and
        historically was the only interface used by userspace. With KMS, the
        control node was introduced. However, the planned KMS control interface
@ -2674,21 +2768,21 @@ int (*resume) (struct drm_device *);</synopsis>
        nodes were introduced. Render nodes solely serve render clients, that
        is, no modesetting or privileged ioctls can be issued on render nodes.
        Only non-global rendering commands are allowed. If a driver supports
-        render nodes, it must advertise it via the <term>DRIVER_RENDER</term>
+        render nodes, it must advertise it via the DRIVER_RENDER
        DRM driver capability. If not supported, the primary node must be used
        for render clients together with the legacy drmAuth authentication
        procedure.
      </para>
      <para>
        If a driver advertises render node support, DRM core will create a
-        separate render node called <term>renderD&lt;num&gt;</term>. There will
+        separate render node called renderD&lt;num&gt;. There will
        be one render node per device. No ioctls except  PRIME-related ioctls
-        will be allowed on this node. Especially <term>GEM_OPEN</term> will be
+        will be allowed on this node. Especially GEM_OPEN will be
        explicitly prohibited. Render nodes are designed to avoid the
        buffer-leaks, which occur if clients guess the flink names or mmap
        offsets on the legacy interface. Additionally to this basic interface,
        drivers must mark their driver-dependent render-only ioctls as
-        <term>DRM_RENDER_ALLOW</term> so render clients can use them. Driver
+        DRM_RENDER_ALLOW so render clients can use them. Driver
        authors must be careful not to allow any privileged ioctls on render
        nodes.
      </para>
@ -2749,15 +2843,73 @@ int (*resume) (struct drm_device *);</synopsis>
    </sect1>

  </chapter>
+</part>
+<part id="drmDrivers">
+  <title>DRM Drivers</title>

-  <!-- API reference -->
-
-  <appendix id="drmDriverApi">
-    <title>DRM Driver API</title>
+  <partintro>
    <para>
-      Include auto-generated API reference here (need to reference it
-      from paragraphs above too).
+      This second part of the DRM Developer's Guide documents driver code,
+      implementation details and also all the driver-specific userspace
+      interfaces. Especially since all hardware-acceleration interfaces to
+      userspace are driver specific for efficiency and other reasons these
+      interfaces can be rather substantial. Hence every driver has its own
+      chapter.
    </para>
-  </appendix>
+  </partintro>

+  <chapter id="drmI915">
+    <title>drm/i915 Intel GFX Driver</title>
+    <para>
+      The drm/i915 driver supports all (with the exception of some very early
+      models) integrated GFX chipsets with both Intel display and rendering
+      blocks. This excludes a set of SoC platforms with an SGX rendering unit,
+      those have basic support through the gma500 drm driver.
+    </para>
+    <sect1>
+      <title>Display Hardware Handling</title>
+      <para>
+        This section covers everything related to the display hardware including
+        the mode setting infrastructure, plane, sprite and cursor handling and
+        display, output probing and related topics.
+      </para>
+      <sect2>
+        <title>Mode Setting Infrastructure</title>
+        <para>
+          The i915 driver is thus far the only DRM driver which doesn't use the
+          common DRM helper code to implement mode setting sequences. Thus it
+          has its own tailor-made infrastructure for executing a display
+          configuration change.
+        </para>
+      </sect2>
+      <sect2>
+        <title>Plane Configuration</title>
+        <para>
+	  This section covers plane configuration and composition with the
+	  primary plane, sprites, cursors and overlays. This includes the
+	  infrastructure to do atomic vsync'ed updates of all this state and
+	  also tightly coupled topics like watermark setup and computation,
+	  framebuffer compression and panel self refresh.
+        </para>
+      </sect2>
+      <sect2>
+        <title>Output Probing</title>
+        <para>
+	  This section covers output probing and related infrastructure like the
+	  hotplug interrupt storm detection and mitigation code. Note that the
+	  i915 driver still uses most of the common DRM helper code for output
+	  probing, so those sections fully apply.
+        </para>
+      </sect2>
+    </sect1>
+
+    <sect1>
+      <title>Memory Management and Command Submission</title>
+      <para>
+	This sections covers all things related to the GEM implementation in the
+	i915 driver.
+      </para>
+    </sect1>
+  </chapter>
+</part>
 </book>
--- a/Documentation/devicetree/bindings/drm/i2c/tda998x.txt
+++ b/Documentation/devicetree/bindings/drm/i2c/tda998x.txt
@ -0,0 +1,27 @@
+Device-Tree bindings for the NXP TDA998x HDMI transmitter
+
+Required properties;
+  - compatible: must be "nxp,tda998x"
+
+Optional properties:
+  - interrupts: interrupt number and trigger type
+	default: polling
+
+  - pinctrl-0: pin control group to be used for
+	screen plug/unplug interrupt.
+
+  - pinctrl-names: must contain a "default" entry.
+
+  - video-ports: 24 bits value which defines how the video controller
+	output is wired to the TDA998x input - default: <0x230145>
+
+Example:
+
+	tda998x: hdmi-encoder {
+		compatible = "nxp,tda998x";
+		reg = <0x70>;
+		interrupt-parent = <&gpio0>;
+		interrupts = <27 2>;		/* falling edge */
+		pinctrl-0 = <&pmx_camera>;
+		pinctrl-names = "default";
+	};
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@ -453,7 +453,7 @@ TP_STATUS_COPY        : This flag indicates that the frame (and associated
                        enabled previously with setsockopt() and 
                        the PACKET_COPY_THRESH option. 

-                        The number of frames than can be buffered to 
+                        The number of frames that can be buffered to
                        be read with recvfrom is limited like a normal socket.
                        See the SO_RCVBUF option in the socket (7) man page.

--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@ -21,26 +21,38 @@ has such a feature).

 SO_TIMESTAMPING:

-Instructs the socket layer which kind of information is wanted. The
-parameter is an integer with some of the following bits set. Setting
-other bits is an error and doesn't change the current state.
+Instructs the socket layer which kind of information should be collected
+and/or reported.  The parameter is an integer with some of the following
+bits set. Setting other bits is an error and doesn't change the current
+state.

-SOF_TIMESTAMPING_TX_HARDWARE:  try to obtain send time stamp in hardware
-SOF_TIMESTAMPING_TX_SOFTWARE:  if SOF_TIMESTAMPING_TX_HARDWARE is off or
-                               fails, then do it in software
-SOF_TIMESTAMPING_RX_HARDWARE:  return the original, unmodified time stamp
-                               as generated by the hardware
-SOF_TIMESTAMPING_RX_SOFTWARE:  if SOF_TIMESTAMPING_RX_HARDWARE is off or
-                               fails, then do it in software
-SOF_TIMESTAMPING_RAW_HARDWARE: return original raw hardware time stamp
-SOF_TIMESTAMPING_SYS_HARDWARE: return hardware time stamp transformed to
-                               the system time base
-SOF_TIMESTAMPING_SOFTWARE:     return system time stamp generated in
-                               software
+Four of the bits are requests to the stack to try to generate
+timestamps.  Any combination of them is valid.

-SOF_TIMESTAMPING_TX/RX determine how time stamps are generated.
-SOF_TIMESTAMPING_RAW/SYS determine how they are reported in the
-following control message:
+SOF_TIMESTAMPING_TX_HARDWARE:  try to obtain send time stamps in hardware
+SOF_TIMESTAMPING_TX_SOFTWARE:  try to obtain send time stamps in software
+SOF_TIMESTAMPING_RX_HARDWARE:  try to obtain receive time stamps in hardware
+SOF_TIMESTAMPING_RX_SOFTWARE:  try to obtain receive time stamps in software
+
+The other three bits control which timestamps will be reported in a
+generated control message.  If none of these bits are set or if none of
+the set bits correspond to data that is available, then the control
+message will not be generated:
+
+SOF_TIMESTAMPING_SOFTWARE:     report systime if available
+SOF_TIMESTAMPING_SYS_HARDWARE: report hwtimetrans if available
+SOF_TIMESTAMPING_RAW_HARDWARE: report hwtimeraw if available
+
+It is worth noting that timestamps may be collected for reasons other
+than being requested by a particular socket with
+SOF_TIMESTAMPING_[TR]X_(HARD|SOFT)WARE.  For example, most drivers that
+can generate hardware receive timestamps ignore
+SOF_TIMESTAMPING_RX_HARDWARE.  It is still a good idea to set that flag
+in case future drivers pay attention.
+
+If timestamps are reported, they will appear in a control message with
+cmsg_level==SOL_SOCKET, cmsg_type==SO_TIMESTAMPING, and a payload like
+this:

 struct scm_timestamping {
 	struct timespec systime;
--- a/3
+++ b/3
@ -1738,6 +1738,7 @@ F:	include/uapi/linux/bfs_fs.h
 BLACKFIN ARCHITECTURE
 M:	Steven Miao <realmz6@gmail.com>
 L:	adi-buildroot-devel@lists.sourceforge.net
+T:	git git://git.code.sf.net/p/adi-linux/code
 W:	http://blackfin.uclinux.org
 S:	Supported
 F:	arch/blackfin/
@ -6002,6 +6003,8 @@ F:	include/linux/netdevice.h
 F:	include/uapi/linux/in.h
 F:	include/uapi/linux/net.h
 F:	include/uapi/linux/netdevice.h
+F:	tools/net/
+F:	tools/testing/selftests/net/

 NETWORKING [IPv4/IPv6]
 M:	"David S. Miller" <davem@davemloft.net>
--- a/2
+++ b/2
@ -1,7 +1,7 @@
 VERSION = 3
 PATCHLEVEL = 14
 SUBLEVEL = 0
-EXTRAVERSION = -rc6
+EXTRAVERSION = -rc7
 NAME = Shuffling Zombie Juror

 # *DOCUMENTATION*
--- a/arch/cris/include/asm/bitops.h
+++ b/arch/cris/include/asm/bitops.h
@ -144,7 +144,7 @@ static inline int test_and_change_bit(int nr, volatile unsigned long *addr)
 * definition, which doesn't have the same semantics.  We don't want to
 * use -fno-builtin, so just hide the name ffs.
 */
-#define ffs kernel_ffs
+#define ffs(x) kernel_ffs(x)

 #include <asm-generic/bitops/fls.h>
 #include <asm-generic/bitops/__fls.h>
--- a/arch/ia64/kernel/uncached.c
+++ b/arch/ia64/kernel/uncached.c
@ -98,7 +98,7 @@ static int uncached_add_chunk(struct uncached_pool *uc_pool, int nid)
 	/* attempt to allocate a granule's worth of cached memory pages */

 	page = alloc_pages_exact_node(nid,
-				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE,
+				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,
 				IA64_GRANULE_SHIFT-PAGE_SHIFT);
 	if (!page) {
 		mutex_unlock(&uc_pool->add_chunk_mutex);
--- a/arch/powerpc/platforms/cell/ras.c
+++ b/arch/powerpc/platforms/cell/ras.c
@ -123,7 +123,8 @@ static int __init cbe_ptcal_enable_on_node(int nid, int order)

 	area->nid = nid;
 	area->order = order;
-	area->pages = alloc_pages_exact_node(area->nid, GFP_KERNEL|GFP_THISNODE,
+	area->pages = alloc_pages_exact_node(area->nid,
+						GFP_KERNEL|__GFP_THISNODE,
 						area->order);

 	if (!area->pages) {
--- a/arch/x86/Kconfig.cpu
+++ b/arch/x86/Kconfig.cpu
@ -341,10 +341,6 @@ config X86_USE_3DNOW
 	def_bool y
 	depends on (MCYRIXIII || MK7 || MGEODE_LX) && !UML

-config X86_OOSTORE
-	def_bool y
-	depends on (MWINCHIP3D || MWINCHIPC6) && MTRR
-
 #
 # P6_NOPs are a relatively minor optimization that require a family >=
 # 6 processor, except that it is broken on certain VIA chips.
--- a/arch/x86/include/asm/barrier.h
+++ b/arch/x86/include/asm/barrier.h
@ -85,11 +85,7 @@
 #else
 # define smp_rmb()	barrier()
 #endif
-#ifdef CONFIG_X86_OOSTORE
-# define smp_wmb() 	wmb()
-#else
-# define smp_wmb()	barrier()
-#endif
+#define smp_wmb()	barrier()
 #define smp_read_barrier_depends()	read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
 #else /* !SMP */
@ -100,7 +96,7 @@
 #define set_mb(var, value) do { var = value; barrier(); } while (0)
 #endif /* SMP */

-#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+#if defined(CONFIG_X86_PPRO_FENCE)

 /*
 * For either of these options x86 doesn't have a strong TSO memory
--- a/arch/x86/include/asm/io.h
+++ b/arch/x86/include/asm/io.h
@ -237,7 +237,7 @@ memcpy_toio(volatile void __iomem *dst, const void *src, size_t count)

 static inline void flush_write_buffers(void)
 {
-#if defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE)
+#if defined(CONFIG_X86_PPRO_FENCE)
 	asm volatile("lock; addl $0,0(%%esp)": : :"memory");
 #endif
 }
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@ -26,10 +26,9 @@
 # define LOCK_PTR_REG "D"
 #endif

-#if defined(CONFIG_X86_32) && \
-	(defined(CONFIG_X86_OOSTORE) || defined(CONFIG_X86_PPRO_FENCE))
+#if defined(CONFIG_X86_32) && (defined(CONFIG_X86_PPRO_FENCE))
 /*
- * On PPro SMP or if we are using OOSTORE, we use a locked operation to unlock
+ * On PPro SMP, we use a locked operation to unlock
 * (PPro errata 66, 92)
 */
 # define UNLOCK_LOCK_PREFIX LOCK_PREFIX
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@ -8,236 +8,6 @@

 #include "cpu.h"

-#ifdef CONFIG_X86_OOSTORE
-
-static u32 power2(u32 x)
-{
-	u32 s = 1;
-
-	while (s <= x)
-		s <<= 1;
-
-	return s >>= 1;
-}
-
-
-/*
- * Set up an actual MCR
- */
-static void centaur_mcr_insert(int reg, u32 base, u32 size, int key)
-{
-	u32 lo, hi;
-
-	hi = base & ~0xFFF;
-	lo = ~(size-1);		/* Size is a power of 2 so this makes a mask */
-	lo &= ~0xFFF;		/* Remove the ctrl value bits */
-	lo |= key;		/* Attribute we wish to set */
-	wrmsr(reg+MSR_IDT_MCR0, lo, hi);
-	mtrr_centaur_report_mcr(reg, lo, hi);	/* Tell the mtrr driver */
-}
-
-/*
- * Figure what we can cover with MCR's
- *
- * Shortcut: We know you can't put 4Gig of RAM on a winchip
- */
-static u32 ramtop(void)
-{
-	u32 clip = 0xFFFFFFFFUL;
-	u32 top = 0;
-	int i;
-
-	for (i = 0; i < e820.nr_map; i++) {
-		unsigned long start, end;
-
-		if (e820.map[i].addr > 0xFFFFFFFFUL)
-			continue;
-		/*
-		 * Don't MCR over reserved space. Ignore the ISA hole
-		 * we frob around that catastrophe already
-		 */
-		if (e820.map[i].type == E820_RESERVED) {
-			if (e820.map[i].addr >= 0x100000UL &&
-			    e820.map[i].addr < clip)
-				clip = e820.map[i].addr;
-			continue;
-		}
-		start = e820.map[i].addr;
-		end = e820.map[i].addr + e820.map[i].size;
-		if (start >= end)
-			continue;
-		if (end > top)
-			top = end;
-	}
-	/*
-	 * Everything below 'top' should be RAM except for the ISA hole.
-	 * Because of the limited MCR's we want to map NV/ACPI into our
-	 * MCR range for gunk in RAM
-	 *
-	 * Clip might cause us to MCR insufficient RAM but that is an
-	 * acceptable failure mode and should only bite obscure boxes with
-	 * a VESA hole at 15Mb
-	 *
-	 * The second case Clip sometimes kicks in is when the EBDA is marked
-	 * as reserved. Again we fail safe with reasonable results
-	 */
-	if (top > clip)
-		top = clip;
-
-	return top;
-}
-
-/*
- * Compute a set of MCR's to give maximum coverage
- */
-static int centaur_mcr_compute(int nr, int key)
-{
-	u32 mem = ramtop();
-	u32 root = power2(mem);
-	u32 base = root;
-	u32 top = root;
-	u32 floor = 0;
-	int ct = 0;
-
-	while (ct < nr) {
-		u32 fspace = 0;
-		u32 high;
-		u32 low;
-
-		/*
-		 * Find the largest block we will fill going upwards
-		 */
-		high = power2(mem-top);
-
-		/*
-		 * Find the largest block we will fill going downwards
-		 */
-		low = base/2;
-
-		/*
-		 * Don't fill below 1Mb going downwards as there
-		 * is an ISA hole in the way.
-		 */
-		if (base <= 1024*1024)
-			low = 0;
-
-		/*
-		 * See how much space we could cover by filling below
-		 * the ISA hole
-		 */
-
-		if (floor == 0)
-			fspace = 512*1024;
-		else if (floor == 512*1024)
-			fspace = 128*1024;
-
-		/* And forget ROM space */
-
-		/*
-		 * Now install the largest coverage we get
-		 */
-		if (fspace > high && fspace > low) {
-			centaur_mcr_insert(ct, floor, fspace, key);
-			floor += fspace;
-		} else if (high > low) {
-			centaur_mcr_insert(ct, top, high, key);
-			top += high;
-		} else if (low > 0) {
-			base -= low;
-			centaur_mcr_insert(ct, base, low, key);
-		} else
-			break;
-		ct++;
-	}
-	/*
-	 * We loaded ct values. We now need to set the mask. The caller
-	 * must do this bit.
-	 */
-	return ct;
-}
-
-static void centaur_create_optimal_mcr(void)
-{
-	int used;
-	int i;
-
-	/*
-	 * Allocate up to 6 mcrs to mark as much of ram as possible
-	 * as write combining and weak write ordered.
-	 *
-	 * To experiment with: Linux never uses stack operations for
-	 * mmio spaces so we could globally enable stack operation wc
-	 *
-	 * Load the registers with type 31 - full write combining, all
-	 * writes weakly ordered.
-	 */
-	used = centaur_mcr_compute(6, 31);
-
-	/*
-	 * Wipe unused MCRs
-	 */
-	for (i = used; i < 8; i++)
-		wrmsr(MSR_IDT_MCR0+i, 0, 0);
-}
-
-static void winchip2_create_optimal_mcr(void)
-{
-	u32 lo, hi;
-	int used;
-	int i;
-
-	/*
-	 * Allocate up to 6 mcrs to mark as much of ram as possible
-	 * as write combining, weak store ordered.
-	 *
-	 * Load the registers with type 25
-	 *	8	-	weak write ordering
-	 *	16	-	weak read ordering
-	 *	1	-	write combining
-	 */
-	used = centaur_mcr_compute(6, 25);
-
-	/*
-	 * Mark the registers we are using.
-	 */
-	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	for (i = 0; i < used; i++)
-		lo |= 1<<(9+i);
-	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-
-	/*
-	 * Wipe unused MCRs
-	 */
-
-	for (i = used; i < 8; i++)
-		wrmsr(MSR_IDT_MCR0+i, 0, 0);
-}
-
-/*
- * Handle the MCR key on the Winchip 2.
- */
-static void winchip2_unprotect_mcr(void)
-{
-	u32 lo, hi;
-	u32 key;
-
-	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	lo &= ~0x1C0;	/* blank bits 8-6 */
-	key = (lo>>17) & 7;
-	lo |= key<<6;	/* replace with unlock key */
-	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-}
-
-static void winchip2_protect_mcr(void)
-{
-	u32 lo, hi;
-
-	rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-	lo &= ~0x1C0;	/* blank bits 8-6 */
-	wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-}
-#endif /* CONFIG_X86_OOSTORE */
-
 #define ACE_PRESENT	(1 << 6)
 #define ACE_ENABLED	(1 << 7)
 #define ACE_FCR		(1 << 28)	/* MSR_VIA_FCR */
@ -362,20 +132,6 @@ static void init_centaur(struct cpuinfo_x86 *c)
 			fcr_clr = DPDC;
 			printk(KERN_NOTICE "Disabling bugged TSC.\n");
 			clear_cpu_cap(c, X86_FEATURE_TSC);
-#ifdef CONFIG_X86_OOSTORE
-			centaur_create_optimal_mcr();
-			/*
-			 * Enable:
-			 *	write combining on non-stack, non-string
-			 *	write combining on string, all types
-			 *	weak write ordering
-			 *
-			 * The C6 original lacks weak read order
-			 *
-			 * Note 0x120 is write only on Winchip 1
-			 */
-			wrmsr(MSR_IDT_MCR_CTRL, 0x01F0001F, 0);
-#endif
 			break;
 		case 8:
 			switch (c->x86_mask) {
@ -392,40 +148,12 @@ static void init_centaur(struct cpuinfo_x86 *c)
 			fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
 				  E2MMX|EAMD3D;
 			fcr_clr = DPDC;
-#ifdef CONFIG_X86_OOSTORE
-			winchip2_unprotect_mcr();
-			winchip2_create_optimal_mcr();
-			rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-			/*
-			 * Enable:
-			 *	write combining on non-stack, non-string
-			 *	write combining on string, all types
-			 *	weak write ordering
-			 */
-			lo |= 31;
-			wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-			winchip2_protect_mcr();
-#endif
 			break;
 		case 9:
 			name = "3";
 			fcr_set = ECX8|DSMC|DTLOCK|EMMX|EBRPRED|ERETSTK|
 				  E2MMX|EAMD3D;
 			fcr_clr = DPDC;
-#ifdef CONFIG_X86_OOSTORE
-			winchip2_unprotect_mcr();
-			winchip2_create_optimal_mcr();
-			rdmsr(MSR_IDT_MCR_CTRL, lo, hi);
-			/*
-			 * Enable:
-			 *	write combining on non-stack, non-string
-			 *	write combining on string, all types
-			 *	weak write ordering
-			 */
-			lo |= 31;
-			wrmsr(MSR_IDT_MCR_CTRL, lo, hi);
-			winchip2_protect_mcr();
-#endif
 			break;
 		default:
 			name = "??";
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@ -3334,6 +3334,8 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
 	if (!pmus)
 		return -ENOMEM;

+	type->pmus = pmus;
+
 	type->unconstrainted = (struct event_constraint)
 		__EVENT_CONSTRAINT(0, (1ULL << type->num_counters) - 1,
 				0, type->num_counters, 0, 0);
@ -3369,7 +3371,6 @@ static int __init uncore_type_init(struct intel_uncore_type *type)
 	}

 	type->pmu_group = &uncore_pmu_attr_group;
-	type->pmus = pmus;
 	return 0;
 fail:
 	uncore_type_exit(type);
--- a/arch/x86/kernel/i387.c
+++ b/arch/x86/kernel/i387.c
@ -86,10 +86,19 @@ EXPORT_SYMBOL(__kernel_fpu_begin);

 void __kernel_fpu_end(void)
 {
-	if (use_eager_fpu())
-		math_state_restore();
-	else
+	if (use_eager_fpu()) {
+		/*
+		 * For eager fpu, most the time, tsk_used_math() is true.
+		 * Restore the user math as we are done with the kernel usage.
+		 * At few instances during thread exit, signal handling etc,
+		 * tsk_used_math() is false. Those few places will take proper
+		 * actions, so we don't need to restore the math here.
+		 */
+		if (likely(tsk_used_math(current)))
+			math_state_restore();
+	} else {
 		stts();
+	}
 }
 EXPORT_SYMBOL(__kernel_fpu_end);

--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@ -529,7 +529,7 @@ static void quirk_amd_nb_node(struct pci_dev *dev)
 		return;

 	pci_read_config_dword(nb_ht, 0x60, &val);
-	node = val & 7;
+	node = pcibus_to_node(dev->bus) | (val & 7);
 	/*
 	 * Some hardware may return an invalid node ID,
 	 * so check it first:
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@ -3002,10 +3002,8 @@ static int cr8_write_interception(struct vcpu_svm *svm)
 	u8 cr8_prev = kvm_get_cr8(&svm->vcpu);
 	/* instruction emulation calls kvm_set_cr8() */
 	r = cr_interception(svm);
-	if (irqchip_in_kernel(svm->vcpu.kvm)) {
-		clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+	if (irqchip_in_kernel(svm->vcpu.kvm))
 		return r;
-	}
 	if (cr8_prev <= kvm_get_cr8(&svm->vcpu))
 		return r;
 	kvm_run->exit_reason = KVM_EXIT_SET_TPR;
@ -3567,6 +3565,8 @@ static void update_cr8_intercept(struct kvm_vcpu *vcpu, int tpr, int irr)
 	if (is_guest_mode(vcpu) && (vcpu->arch.hflags & HF_VINTR_MASK))
 		return;

+	clr_cr_intercept(svm, INTERCEPT_CR8_WRITE);
+
 	if (irr == -1)
 		return;

--- a/arch/x86/net/bpf_jit.S
+++ b/arch/x86/net/bpf_jit.S
@ -140,7 +140,7 @@ bpf_slow_path_byte_msh:
 	push	%r9;						\
 	push	SKBDATA;					\
 /* rsi already has offset */					\
-	mov	$SIZE,%ecx;	/* size */			\
+	mov	$SIZE,%edx;	/* size */			\
 	call	bpf_internal_load_pointer_neg_helper;		\
 	test	%rax,%rax;					\
 	pop	SKBDATA;					\
--- a/arch/x86/um/asm/barrier.h
+++ b/arch/x86/um/asm/barrier.h
@ -40,11 +40,7 @@
 #define smp_rmb()	barrier()
 #endif /* CONFIG_X86_PPRO_FENCE */

-#ifdef CONFIG_X86_OOSTORE
-#define smp_wmb()	wmb()
-#else /* CONFIG_X86_OOSTORE */
 #define smp_wmb()	barrier()
-#endif /* CONFIG_X86_OOSTORE */

 #define smp_read_barrier_depends()	read_barrier_depends()
 #define set_mb(var, value) do { (void)xchg(&var, value); } while (0)
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@ -71,6 +71,17 @@ static int acpi_sleep_prepare(u32 acpi_state)
 	return 0;
 }

+static bool acpi_sleep_state_supported(u8 sleep_state)
+{
+	acpi_status status;
+	u8 type_a, type_b;
+
+	status = acpi_get_sleep_type_data(sleep_state, &type_a, &type_b);
+	return ACPI_SUCCESS(status) && (!acpi_gbl_reduced_hardware
+		|| (acpi_gbl_FADT.sleep_control.address
+			&& acpi_gbl_FADT.sleep_status.address));
+}
+
 #ifdef CONFIG_ACPI_SLEEP
 static u32 acpi_target_sleep_state = ACPI_STATE_S0;

@ -604,15 +615,9 @@ static void acpi_sleep_suspend_setup(void)
 {
 	int i;

-	for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++) {
-		acpi_status status;
-		u8 type_a, type_b;
-
-		status = acpi_get_sleep_type_data(i, &type_a, &type_b);
-		if (ACPI_SUCCESS(status)) {
+	for (i = ACPI_STATE_S1; i < ACPI_STATE_S4; i++)
+		if (acpi_sleep_state_supported(i))
 			sleep_states[i] = 1;
-		}
-	}

 	suspend_set_ops(old_suspend_ordering ?
 		&acpi_suspend_ops_old : &acpi_suspend_ops);
@ -740,11 +745,7 @@ static const struct platform_hibernation_ops acpi_hibernation_ops_old = {

 static void acpi_sleep_hibernate_setup(void)
 {
-	acpi_status status;
-	u8 type_a, type_b;
-
-	status = acpi_get_sleep_type_data(ACPI_STATE_S4, &type_a, &type_b);
-	if (ACPI_FAILURE(status))
+	if (!acpi_sleep_state_supported(ACPI_STATE_S4))
 		return;

 	hibernation_set_ops(old_suspend_ordering ?
@ -793,8 +794,6 @@ static void acpi_power_off(void)

 int __init acpi_sleep_init(void)
 {
-	acpi_status status;
-	u8 type_a, type_b;
 	char supported[ACPI_S_STATE_COUNT * 3 + 1];
 	char *pos = supported;
 	int i;
@ -806,8 +805,7 @@ int __init acpi_sleep_init(void)
 	acpi_sleep_suspend_setup();
 	acpi_sleep_hibernate_setup();

-	status = acpi_get_sleep_type_data(ACPI_STATE_S5, &type_a, &type_b);
-	if (ACPI_SUCCESS(status)) {
+	if (acpi_sleep_state_supported(ACPI_STATE_S5)) {
 		sleep_states[ACPI_STATE_S5] = 1;
 		pm_power_off_prepare = acpi_power_off_prepare;
 		pm_power_off = acpi_power_off;
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@ -4225,8 +4225,7 @@ static const struct ata_blacklist_entry ata_device_blacklist [] = {

 	/* devices that don't properly handle queued TRIM commands */
 	{ "Micron_M500*",		NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
-	{ "Crucial_CT???M500SSD1",	NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
-	{ "Crucial_CT???M500SSD3",	NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },
+	{ "Crucial_CT???M500SSD*",	NULL,	ATA_HORKAGE_NO_NCQ_TRIM, },

 	/*
 	 * Some WD SATA-I drives spin up and down erratically when the link
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@ -1129,7 +1129,7 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif,
 		per_cpu(cpufreq_cpu_data, j) = policy;
 	write_unlock_irqrestore(&cpufreq_driver_lock, flags);

-	if (cpufreq_driver->get) {
+	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
 		policy->cur = cpufreq_driver->get(policy->cpu);
 		if (!policy->cur) {
 			pr_err("%s: ->get() failed\n", __func__);
@ -2143,7 +2143,7 @@ int cpufreq_update_policy(unsigned int cpu)
 	 * BIOS might change freq behind our back
 	 * -> ask driver for current freq and notify governors about a change
 	 */
-	if (cpufreq_driver->get) {
+	if (cpufreq_driver->get && !cpufreq_driver->setpolicy) {
 		new_policy.cur = cpufreq_driver->get(cpu);
 		if (!policy->cur) {
 			pr_debug("Driver did not initialize current freq");
--- a/drivers/gpu/drm/ast/ast_ttm.c
+++ b/drivers/gpu/drm/ast/ast_ttm.c
@ -259,7 +259,9 @@ int ast_mm_init(struct ast_private *ast)

 	ret = ttm_bo_device_init(&ast->ttm.bdev,
 				 ast->ttm.bo_global_ref.ref.object,
-				 &ast_bo_driver, DRM_FILE_PAGE_OFFSET,
+				 &ast_bo_driver,
+				 dev->anon_inode->i_mapping,
+				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
@ -324,7 +326,6 @@ int ast_bo_create(struct drm_device *dev, int size, int align,
 	}

 	astbo->bo.bdev = &ast->ttm.bdev;
-	astbo->bo.bdev->dev_mapping = dev->dev_mapping;

 	ast_ttm_placement(astbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);

--- a/drivers/gpu/drm/bochs/bochs_mm.c
+++ b/drivers/gpu/drm/bochs/bochs_mm.c
@ -225,7 +225,9 @@ int bochs_mm_init(struct bochs_device *bochs)

 	ret = ttm_bo_device_init(&bochs->ttm.bdev,
 				 bochs->ttm.bo_global_ref.ref.object,
-				 &bochs_bo_driver, DRM_FILE_PAGE_OFFSET,
+				 &bochs_bo_driver,
+				 bochs->dev->anon_inode->i_mapping,
+				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
@ -359,7 +361,7 @@ static int bochs_bo_create(struct drm_device *dev, int size, int align,
 	}

 	bochsbo->bo.bdev = &bochs->ttm.bdev;
-	bochsbo->bo.bdev->dev_mapping = dev->dev_mapping;
+	bochsbo->bo.bdev->dev_mapping = dev->anon_inode->i_mapping;

 	bochs_ttm_placement(bochsbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);

--- a/drivers/gpu/drm/cirrus/cirrus_ttm.c
+++ b/drivers/gpu/drm/cirrus/cirrus_ttm.c
@ -259,7 +259,9 @@ int cirrus_mm_init(struct cirrus_device *cirrus)

 	ret = ttm_bo_device_init(&cirrus->ttm.bdev,
 				 cirrus->ttm.bo_global_ref.ref.object,
-				 &cirrus_bo_driver, DRM_FILE_PAGE_OFFSET,
+				 &cirrus_bo_driver,
+				 dev->anon_inode->i_mapping,
+				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
@ -329,7 +331,6 @@ int cirrus_bo_create(struct drm_device *dev, int size, int align,
 	}

 	cirrusbo->bo.bdev = &cirrus->ttm.bdev;
-	cirrusbo->bo.bdev->dev_mapping = dev->dev_mapping;

 	cirrus_ttm_placement(cirrusbo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);

--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
--- a/drivers/gpu/drm/drm_crtc_helper.c
+++ b/drivers/gpu/drm/drm_crtc_helper.c
@ -105,9 +105,6 @@ static void drm_mode_validate_flag(struct drm_connector *connector,
 * @maxX: max width for modes
 * @maxY: max height for modes
 *
- * LOCKING:
- * Caller must hold mode config lock.
- *
 * Based on the helper callbacks implemented by @connector try to detect all
 * valid modes.  Modes will first be added to the connector's probed_modes list,
 * then culled (based on validity and the @maxX, @maxY parameters) and put into
@ -117,8 +114,8 @@ static void drm_mode_validate_flag(struct drm_connector *connector,
 * @connector vfunc for drivers that use the crtc helpers for output mode
 * filtering and detection.
 *
- * RETURNS:
- * Number of modes found on @connector.
+ * Returns:
+ * The number of modes found on @connector.
 */
 int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 					    uint32_t maxX, uint32_t maxY)
@ -131,6 +128,8 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 	int mode_flags = 0;
 	bool verbose_prune = true;

+	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
+
 	DRM_DEBUG_KMS("[CONNECTOR:%d:%s]\n", connector->base.id,
 			drm_get_connector_name(connector));
 	/* set all modes to the unverified state */
@ -176,8 +175,7 @@ int drm_helper_probe_single_connector_modes(struct drm_connector *connector,
 	drm_mode_connector_list_update(connector);

 	if (maxX && maxY)
-		drm_mode_validate_size(dev, &connector->modes, maxX,
-				       maxY, 0);
+		drm_mode_validate_size(dev, &connector->modes, maxX, maxY);

 	if (connector->interlace_allowed)
 		mode_flags |= DRM_MODE_FLAG_INTERLACE;
@ -219,18 +217,19 @@ EXPORT_SYMBOL(drm_helper_probe_single_connector_modes);
 * drm_helper_encoder_in_use - check if a given encoder is in use
 * @encoder: encoder to check
 *
- * LOCKING:
- * Caller must hold mode config lock.
+ * Checks whether @encoder is with the current mode setting output configuration
+ * in use by any connector. This doesn't mean that it is actually enabled since
+ * the DPMS state is tracked separately.
 *
- * Walk @encoders's DRM device's mode_config and see if it's in use.
- *
- * RETURNS:
- * True if @encoder is part of the mode_config, false otherwise.
+ * Returns:
+ * True if @encoder is used, false otherwise.
 */
 bool drm_helper_encoder_in_use(struct drm_encoder *encoder)
 {
 	struct drm_connector *connector;
 	struct drm_device *dev = encoder->dev;
+
+	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head)
 		if (connector->encoder == encoder)
 			return true;
@ -242,19 +241,19 @@ EXPORT_SYMBOL(drm_helper_encoder_in_use);
 * drm_helper_crtc_in_use - check if a given CRTC is in a mode_config
 * @crtc: CRTC to check
 *
- * LOCKING:
- * Caller must hold mode config lock.
+ * Checks whether @crtc is with the current mode setting output configuration
+ * in use by any connector. This doesn't mean that it is actually enabled since
+ * the DPMS state is tracked separately.
 *
- * Walk @crtc's DRM device's mode_config and see if it's in use.
- *
- * RETURNS:
- * True if @crtc is part of the mode_config, false otherwise.
+ * Returns:
+ * True if @crtc is used, false otherwise.
 */
 bool drm_helper_crtc_in_use(struct drm_crtc *crtc)
 {
 	struct drm_encoder *encoder;
 	struct drm_device *dev = crtc->dev;
-	/* FIXME: Locking around list access? */
+
+	WARN_ON(!mutex_is_locked(&dev->mode_config.mutex));
 	list_for_each_entry(encoder, &dev->mode_config.encoder_list, head)
 		if (encoder->crtc == crtc && drm_helper_encoder_in_use(encoder))
 			return true;
@ -283,11 +282,11 @@ drm_encoder_disable(struct drm_encoder *encoder)
 * drm_helper_disable_unused_functions - disable unused objects
 * @dev: DRM device
 *
- * LOCKING:
- * Caller must hold mode config lock.
- *
- * If an connector or CRTC isn't part of @dev's mode_config, it can be disabled
- * by calling its dpms function, which should power it off.
+ * This function walks through the entire mode setting configuration of @dev. It
+ * will remove any crtc links of unused encoders and encoder links of
+ * disconnected connectors. Then it will disable all unused encoders and crtcs
+ * either by calling their disable callback if available or by calling their
+ * dpms callback with DRM_MODE_DPMS_OFF.
 */
 void drm_helper_disable_unused_functions(struct drm_device *dev)
 {
@ -295,6 +294,8 @@ void drm_helper_disable_unused_functions(struct drm_device *dev)
 	struct drm_connector *connector;
 	struct drm_crtc *crtc;

+	drm_warn_on_modeset_not_all_locked(dev);
+
 	list_for_each_entry(connector, &dev->mode_config.connector_list, head) {
 		if (!connector->encoder)
 			continue;
@ -355,9 +356,6 @@ drm_crtc_prepare_encoders(struct drm_device *dev)
 * @y: vertical offset into the surface
 * @old_fb: old framebuffer, for cleanup
 *
- * LOCKING:
- * Caller must hold mode config lock.
- *
 * Try to set @mode on @crtc.  Give @crtc and its associated connectors a chance
 * to fixup or reject the mode prior to trying to set it. This is an internal
 * helper that drivers could e.g. use to update properties that require the
@ -367,8 +365,8 @@ drm_crtc_prepare_encoders(struct drm_device *dev)
 * drm_crtc_helper_set_config() helper function to drive the mode setting
 * sequence.
 *
- * RETURNS:
- * True if the mode was set successfully, or false otherwise.
+ * Returns:
+ * True if the mode was set successfully, false otherwise.
 */
 bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 			      struct drm_display_mode *mode,
@ -384,6 +382,8 @@ bool drm_crtc_helper_set_mode(struct drm_crtc *crtc,
 	struct drm_encoder *encoder;
 	bool ret = true;

+	drm_warn_on_modeset_not_all_locked(dev);
+
 	saved_enabled = crtc->enabled;
 	crtc->enabled = drm_helper_crtc_in_use(crtc);
 	if (!crtc->enabled)
@ -560,17 +560,14 @@ drm_crtc_helper_disable(struct drm_crtc *crtc)
 * drm_crtc_helper_set_config - set a new config from userspace
 * @set: mode set configuration
 *
- * LOCKING:
- * Caller must hold mode config lock.
- *
 * Setup a new configuration, provided by the upper layers (either an ioctl call
 * from userspace or internally e.g. from the fbdev suppport code) in @set, and
 * enable it. This is the main helper functions for drivers that implement
 * kernel mode setting with the crtc helper functions and the assorted
 * ->prepare(), ->modeset() and ->commit() helper callbacks.
 *
- * RETURNS:
- * Returns 0 on success, -ERRNO on failure.
+ * Returns:
+ * Returns 0 on success, negative errno numbers on failure.
 */
 int drm_crtc_helper_set_config(struct drm_mode_set *set)
 {
@ -612,6 +609,8 @@ int drm_crtc_helper_set_config(struct drm_mode_set *set)

 	dev = set->crtc->dev;

+	drm_warn_on_modeset_not_all_locked(dev);
+
 	/*
 	 * Allocate space for the backup of all (non-pointer) encoder and
 	 * connector data.
@ -924,8 +923,16 @@ void drm_helper_connector_dpms(struct drm_connector *connector, int mode)
 }
 EXPORT_SYMBOL(drm_helper_connector_dpms);

-int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
-				   struct drm_mode_fb_cmd2 *mode_cmd)
+/**
+ * drm_helper_mode_fill_fb_struct - fill out framebuffer metadata
+ * @fb: drm_framebuffer object to fill out
+ * @mode_cmd: metadata from the userspace fb creation request
+ *
+ * This helper can be used in a drivers fb_create callback to pre-fill the fb's
+ * metadata fields.
+ */
+void drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
+				    struct drm_mode_fb_cmd2 *mode_cmd)
 {
 	int i;

@ -938,17 +945,36 @@ int drm_helper_mode_fill_fb_struct(struct drm_framebuffer *fb,
 	drm_fb_get_bpp_depth(mode_cmd->pixel_format, &fb->depth,
 				    &fb->bits_per_pixel);
 	fb->pixel_format = mode_cmd->pixel_format;
-
-	return 0;
 }
 EXPORT_SYMBOL(drm_helper_mode_fill_fb_struct);

-int drm_helper_resume_force_mode(struct drm_device *dev)
+/**
+ * drm_helper_resume_force_mode - force-restore mode setting configuration
+ * @dev: drm_device which should be restored
+ *
+ * Drivers which use the mode setting helpers can use this function to
+ * force-restore the mode setting configuration e.g. on resume or when something
+ * else might have trampled over the hw state (like some overzealous old BIOSen
+ * tended to do).
+ *
+ * This helper doesn't provide a error return value since restoring the old
+ * config should never fail due to resource allocation issues since the driver
+ * has successfully set the restored configuration already. Hence this should
+ * boil down to the equivalent of a few dpms on calls, which also don't provide
+ * an error code.
+ *
+ * Drivers where simply restoring an old configuration again might fail (e.g.
+ * due to slight differences in allocating shared resources when the
+ * configuration is restored in a different order than when userspace set it up)
+ * need to use their own restore logic.
+ */
+void drm_helper_resume_force_mode(struct drm_device *dev)
 {
 	struct drm_crtc *crtc;
 	struct drm_encoder *encoder;
 	struct drm_crtc_helper_funcs *crtc_funcs;
-	int ret, encoder_dpms;
+	int encoder_dpms;
+	bool ret;

 	list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {

@ -958,6 +984,7 @@ int drm_helper_resume_force_mode(struct drm_device *dev)
 		ret = drm_crtc_helper_set_mode(crtc, &crtc->mode,
 					       crtc->x, crtc->y, crtc->fb);

+		/* Restoring the old config should never fail! */
 		if (ret == false)
 			DRM_ERROR("failed to set mode on crtc %p\n", crtc);

@ -980,12 +1007,28 @@ int drm_helper_resume_force_mode(struct drm_device *dev)
 						     drm_helper_choose_crtc_dpms(crtc));
 		}
 	}
+
 	/* disable the unused connectors while restoring the modesetting */
 	drm_helper_disable_unused_functions(dev);
-	return 0;
 }
 EXPORT_SYMBOL(drm_helper_resume_force_mode);

+/**
+ * drm_kms_helper_hotplug_event - fire off KMS hotplug events
+ * @dev: drm_device whose connector state changed
+ *
+ * This function fires off the uevent for userspace and also calls the
+ * output_poll_changed function, which is most commonly used to inform the fbdev
+ * emulation code and allow it to update the fbcon output configuration.
+ *
+ * Drivers should call this from their hotplug handling code when a change is
+ * detected. Note that this function does not do any output detection of its
+ * own, like drm_helper_hpd_irq_event() does - this is assumed to be done by the
+ * driver already.
+ *
+ * This function must be called from process context with no mode
+ * setting locks held.
+ */
 void drm_kms_helper_hotplug_event(struct drm_device *dev)
 {
 	/* send a uevent + call fbdev */
@ -1054,6 +1097,16 @@ static void output_poll_execute(struct work_struct *work)
 		schedule_delayed_work(delayed_work, DRM_OUTPUT_POLL_PERIOD);
 }

+/**
+ * drm_kms_helper_poll_disable - disable output polling
+ * @dev: drm_device
+ *
+ * This function disables the output polling work.
+ *
+ * Drivers can call this helper from their device suspend implementation. It is
+ * not an error to call this even when output polling isn't enabled or arlready
+ * disabled.
+ */
 void drm_kms_helper_poll_disable(struct drm_device *dev)
 {
 	if (!dev->mode_config.poll_enabled)
@ -1062,6 +1115,16 @@ void drm_kms_helper_poll_disable(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_disable);

+/**
+ * drm_kms_helper_poll_enable - re-enable output polling.
+ * @dev: drm_device
+ *
+ * This function re-enables the output polling work.
+ *
+ * Drivers can call this helper from their device resume implementation. It is
+ * an error to call this when the output polling support has not yet been set
+ * up.
+ */
 void drm_kms_helper_poll_enable(struct drm_device *dev)
 {
 	bool poll = false;
@ -1081,6 +1144,25 @@ void drm_kms_helper_poll_enable(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_enable);

+/**
+ * drm_kms_helper_poll_init - initialize and enable output polling
+ * @dev: drm_device
+ *
+ * This function intializes and then also enables output polling support for
+ * @dev. Drivers which do not have reliable hotplug support in hardware can use
+ * this helper infrastructure to regularly poll such connectors for changes in
+ * their connection state.
+ *
+ * Drivers can control which connectors are polled by setting the
+ * DRM_CONNECTOR_POLL_CONNECT and DRM_CONNECTOR_POLL_DISCONNECT flags. On
+ * connectors where probing live outputs can result in visual distortion drivers
+ * should not set the DRM_CONNECTOR_POLL_DISCONNECT flag to avoid this.
+ * Connectors which have no flag or only DRM_CONNECTOR_POLL_HPD set are
+ * completely ignored by the polling logic.
+ *
+ * Note that a connector can be both polled and probed from the hotplug handler,
+ * in case the hotplug interrupt is known to be unreliable.
+ */
 void drm_kms_helper_poll_init(struct drm_device *dev)
 {
 	INIT_DELAYED_WORK(&dev->mode_config.output_poll_work, output_poll_execute);
@ -1090,12 +1172,39 @@ void drm_kms_helper_poll_init(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_init);

+/**
+ * drm_kms_helper_poll_fini - disable output polling and clean it up
+ * @dev: drm_device
+ */
 void drm_kms_helper_poll_fini(struct drm_device *dev)
 {
 	drm_kms_helper_poll_disable(dev);
 }
 EXPORT_SYMBOL(drm_kms_helper_poll_fini);

+/**
+ * drm_helper_hpd_irq_event - hotplug processing
+ * @dev: drm_device
+ *
+ * Drivers can use this helper function to run a detect cycle on all connectors
+ * which have the DRM_CONNECTOR_POLL_HPD flag set in their &polled member. All
+ * other connectors are ignored, which is useful to avoid reprobing fixed
+ * panels.
+ *
+ * This helper function is useful for drivers which can't or don't track hotplug
+ * interrupts for each connector.
+ *
+ * Drivers which support hotplug interrupts for each connector individually and
+ * which have a more fine-grained detect logic should bypass this code and
+ * directly call drm_kms_helper_hotplug_event() in case the connector state
+ * changed.
+ *
+ * This function must be called from process context with no mode
+ * setting locks held.
+ *
+ * Note that a connector can be both polled and probed from the hotplug handler,
+ * in case the hotplug interrupt is known to be unreliable.
+ */
 bool drm_helper_hpd_irq_event(struct drm_device *dev)
 {
 	struct drm_connector *connector;
--- a/drivers/gpu/drm/drm_crtc_internal.h
+++ b/drivers/gpu/drm/drm_crtc_internal.h
@ -0,0 +1,38 @@
+/*
+ * Copyright © 2006 Keith Packard
+ * Copyright © 2007-2008 Dave Airlie
+ * Copyright © 2007-2008 Intel Corporation
+ *   Jesse Barnes <jesse.barnes@intel.com>
+ * Copyright © 2014 Intel Corporation
+ *   Daniel Vetter <daniel.vetter@ffwll.ch>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+/*
+ * This header file contains mode setting related functions and definitions
+ * which are only used within the drm module as internal implementation details
+ * and are not exported to drivers.
+ */
+
+int drm_mode_object_get(struct drm_device *dev,
+			struct drm_mode_object *obj, uint32_t obj_type);
+void drm_mode_object_put(struct drm_device *dev,
+			 struct drm_mode_object *object);
+
--- a/drivers/gpu/drm/drm_dp_helper.c
+++ b/drivers/gpu/drm/drm_dp_helper.c
@ -346,3 +346,399 @@ int drm_dp_bw_code_to_link_rate(u8 link_bw)
 	}
 }
 EXPORT_SYMBOL(drm_dp_bw_code_to_link_rate);
+
+/**
+ * DOC: dp helpers
+ *
+ * The DisplayPort AUX channel is an abstraction to allow generic, driver-
+ * independent access to AUX functionality. Drivers can take advantage of
+ * this by filling in the fields of the drm_dp_aux structure.
+ *
+ * Transactions are described using a hardware-independent drm_dp_aux_msg
+ * structure, which is passed into a driver's .transfer() implementation.
+ * Both native and I2C-over-AUX transactions are supported.
+ */
+
+static int drm_dp_dpcd_access(struct drm_dp_aux *aux, u8 request,
+			      unsigned int offset, void *buffer, size_t size)
+{
+	struct drm_dp_aux_msg msg;
+	unsigned int retry;
+	int err;
+
+	memset(&msg, 0, sizeof(msg));
+	msg.address = offset;
+	msg.request = request;
+	msg.buffer = buffer;
+	msg.size = size;
+
+	/*
+	 * The specification doesn't give any recommendation on how often to
+	 * retry native transactions, so retry 7 times like for I2C-over-AUX
+	 * transactions.
+	 */
+	for (retry = 0; retry < 7; retry++) {
+		err = aux->transfer(aux, &msg);
+		if (err < 0) {
+			if (err == -EBUSY)
+				continue;
+
+			return err;
+		}
+
+		if (err < size)
+			return -EPROTO;
+
+		switch (msg.reply & DP_AUX_NATIVE_REPLY_MASK) {
+		case DP_AUX_NATIVE_REPLY_ACK:
+			return err;
+
+		case DP_AUX_NATIVE_REPLY_NACK:
+			return -EIO;
+
+		case DP_AUX_NATIVE_REPLY_DEFER:
+			usleep_range(400, 500);
+			break;
+		}
+	}
+
+	DRM_ERROR("too many retries, giving up\n");
+	return -EIO;
+}
+
+/**
+ * drm_dp_dpcd_read() - read a series of bytes from the DPCD
+ * @aux: DisplayPort AUX channel
+ * @offset: address of the (first) register to read
+ * @buffer: buffer to store the register values
+ * @size: number of bytes in @buffer
+ *
+ * Returns the number of bytes transferred on success, or a negative error
+ * code on failure. -EIO is returned if the request was NAKed by the sink or
+ * if the retry count was exceeded. If not all bytes were transferred, this
+ * function returns -EPROTO. Errors from the underlying AUX channel transfer
+ * function, with the exception of -EBUSY (which causes the transaction to
+ * be retried), are propagated to the caller.
+ */
+ssize_t drm_dp_dpcd_read(struct drm_dp_aux *aux, unsigned int offset,
+			 void *buffer, size_t size)
+{
+	return drm_dp_dpcd_access(aux, DP_AUX_NATIVE_READ, offset, buffer,
+				  size);
+}
+EXPORT_SYMBOL(drm_dp_dpcd_read);
+
+/**
+ * drm_dp_dpcd_write() - write a series of bytes to the DPCD
+ * @aux: DisplayPort AUX channel
+ * @offset: address of the (first) register to write
+ * @buffer: buffer containing the values to write
+ * @size: number of bytes in @buffer
+ *
+ * Returns the number of bytes transferred on success, or a negative error
+ * code on failure. -EIO is returned if the request was NAKed by the sink or
+ * if the retry count was exceeded. If not all bytes were transferred, this
+ * function returns -EPROTO. Errors from the underlying AUX channel transfer
+ * function, with the exception of -EBUSY (which causes the transaction to
+ * be retried), are propagated to the caller.
+ */
+ssize_t drm_dp_dpcd_write(struct drm_dp_aux *aux, unsigned int offset,
+			  void *buffer, size_t size)
+{
+	return drm_dp_dpcd_access(aux, DP_AUX_NATIVE_WRITE, offset, buffer,
+				  size);
+}
+EXPORT_SYMBOL(drm_dp_dpcd_write);
+
+/**
+ * drm_dp_dpcd_read_link_status() - read DPCD link status (bytes 0x202-0x207)
+ * @aux: DisplayPort AUX channel
+ * @status: buffer to store the link status in (must be at least 6 bytes)
+ *
+ * Returns the number of bytes transferred on success or a negative error
+ * code on failure.
+ */
+int drm_dp_dpcd_read_link_status(struct drm_dp_aux *aux,
+				 u8 status[DP_LINK_STATUS_SIZE])
+{
+	return drm_dp_dpcd_read(aux, DP_LANE0_1_STATUS, status,
+				DP_LINK_STATUS_SIZE);
+}
+EXPORT_SYMBOL(drm_dp_dpcd_read_link_status);
+
+/**
+ * drm_dp_link_probe() - probe a DisplayPort link for capabilities
+ * @aux: DisplayPort AUX channel
+ * @link: pointer to structure in which to return link capabilities
+ *
+ * The structure filled in by this function can usually be passed directly
+ * into drm_dp_link_power_up() and drm_dp_link_configure() to power up and
+ * configure the link based on the link's capabilities.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_probe(struct drm_dp_aux *aux, struct drm_dp_link *link)
+{
+	u8 values[3];
+	int err;
+
+	memset(link, 0, sizeof(*link));
+
+	err = drm_dp_dpcd_read(aux, DP_DPCD_REV, values, sizeof(values));
+	if (err < 0)
+		return err;
+
+	link->revision = values[0];
+	link->rate = drm_dp_bw_code_to_link_rate(values[1]);
+	link->num_lanes = values[2] & DP_MAX_LANE_COUNT_MASK;
+
+	if (values[2] & DP_ENHANCED_FRAME_CAP)
+		link->capabilities |= DP_LINK_CAP_ENHANCED_FRAMING;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_link_probe);
+
+/**
+ * drm_dp_link_power_up() - power up a DisplayPort link
+ * @aux: DisplayPort AUX channel
+ * @link: pointer to a structure containing the link configuration
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_power_up(struct drm_dp_aux *aux, struct drm_dp_link *link)
+{
+	u8 value;
+	int err;
+
+	/* DP_SET_POWER register is only available on DPCD v1.1 and later */
+	if (link->revision < 0x11)
+		return 0;
+
+	err = drm_dp_dpcd_readb(aux, DP_SET_POWER, &value);
+	if (err < 0)
+		return err;
+
+	value &= ~DP_SET_POWER_MASK;
+	value |= DP_SET_POWER_D0;
+
+	err = drm_dp_dpcd_writeb(aux, DP_SET_POWER, value);
+	if (err < 0)
+		return err;
+
+	/*
+	 * According to the DP 1.1 specification, a "Sink Device must exit the
+	 * power saving state within 1 ms" (Section 2.5.3.1, Table 5-52, "Sink
+	 * Control Field" (register 0x600).
+	 */
+	usleep_range(1000, 2000);
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_link_power_up);
+
+/**
+ * drm_dp_link_configure() - configure a DisplayPort link
+ * @aux: DisplayPort AUX channel
+ * @link: pointer to a structure containing the link configuration
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_link_configure(struct drm_dp_aux *aux, struct drm_dp_link *link)
+{
+	u8 values[2];
+	int err;
+
+	values[0] = drm_dp_link_rate_to_bw_code(link->rate);
+	values[1] = link->num_lanes;
+
+	if (link->capabilities & DP_LINK_CAP_ENHANCED_FRAMING)
+		values[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
+
+	err = drm_dp_dpcd_write(aux, DP_LINK_BW_SET, values, sizeof(values));
+	if (err < 0)
+		return err;
+
+	return 0;
+}
+EXPORT_SYMBOL(drm_dp_link_configure);
+
+/*
+ * I2C-over-AUX implementation
+ */
+
+static u32 drm_dp_i2c_functionality(struct i2c_adapter *adapter)
+{
+	return I2C_FUNC_I2C | I2C_FUNC_SMBUS_EMUL |
+	       I2C_FUNC_SMBUS_READ_BLOCK_DATA |
+	       I2C_FUNC_SMBUS_BLOCK_PROC_CALL |
+	       I2C_FUNC_10BIT_ADDR;
+}
+
+/*
+ * Transfer a single I2C-over-AUX message and handle various error conditions,
+ * retrying the transaction as appropriate.
+ */
+static int drm_dp_i2c_do_msg(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
+{
+	unsigned int retry;
+	int err;
+
+	/*
+	 * DP1.2 sections 2.7.7.1.5.6.1 and 2.7.7.1.6.6.1: A DP Source device
+	 * is required to retry at least seven times upon receiving AUX_DEFER
+	 * before giving up the AUX transaction.
+	 */
+	for (retry = 0; retry < 7; retry++) {
+		err = aux->transfer(aux, msg);
+		if (err < 0) {
+			if (err == -EBUSY)
+				continue;
+
+			DRM_DEBUG_KMS("transaction failed: %d\n", err);
+			return err;
+		}
+
+		if (err < msg->size)
+			return -EPROTO;
+
+		switch (msg->reply & DP_AUX_NATIVE_REPLY_MASK) {
+		case DP_AUX_NATIVE_REPLY_ACK:
+			/*
+			 * For I2C-over-AUX transactions this isn't enough, we
+			 * need to check for the I2C ACK reply.
+			 */
+			break;
+
+		case DP_AUX_NATIVE_REPLY_NACK:
+			DRM_DEBUG_KMS("native nack\n");
+			return -EREMOTEIO;
+
+		case DP_AUX_NATIVE_REPLY_DEFER:
+			DRM_DEBUG_KMS("native defer");
+			/*
+			 * We could check for I2C bit rate capabilities and if
+			 * available adjust this interval. We could also be
+			 * more careful with DP-to-legacy adapters where a
+			 * long legacy cable may force very low I2C bit rates.
+			 *
+			 * For now just defer for long enough to hopefully be
+			 * safe for all use-cases.
+			 */
+			usleep_range(500, 600);
+			continue;
+
+		default:
+			DRM_ERROR("invalid native reply %#04x\n", msg->reply);
+			return -EREMOTEIO;
+		}
+
+		switch (msg->reply & DP_AUX_I2C_REPLY_MASK) {
+		case DP_AUX_I2C_REPLY_ACK:
+			/*
+			 * Both native ACK and I2C ACK replies received. We
+			 * can assume the transfer was successful.
+			 */
+			return 0;
+
+		case DP_AUX_I2C_REPLY_NACK:
+			DRM_DEBUG_KMS("I2C nack\n");
+			return -EREMOTEIO;
+
+		case DP_AUX_I2C_REPLY_DEFER:
+			DRM_DEBUG_KMS("I2C defer\n");
+			usleep_range(400, 500);
+			continue;
+
+		default:
+			DRM_ERROR("invalid I2C reply %#04x\n", msg->reply);
+			return -EREMOTEIO;
+		}
+	}
+
+	DRM_ERROR("too many retries, giving up\n");
+	return -EREMOTEIO;
+}
+
+static int drm_dp_i2c_xfer(struct i2c_adapter *adapter, struct i2c_msg *msgs,
+			   int num)
+{
+	struct drm_dp_aux *aux = adapter->algo_data;
+	unsigned int i, j;
+
+	for (i = 0; i < num; i++) {
+		struct drm_dp_aux_msg msg;
+		int err;
+
+		/*
+		 * Many hardware implementations support FIFOs larger than a
+		 * single byte, but it has been empirically determined that
+		 * transferring data in larger chunks can actually lead to
+		 * decreased performance. Therefore each message is simply
+		 * transferred byte-by-byte.
+		 */
+		for (j = 0; j < msgs[i].len; j++) {
+			memset(&msg, 0, sizeof(msg));
+			msg.address = msgs[i].addr;
+
+			msg.request = (msgs[i].flags & I2C_M_RD) ?
+					DP_AUX_I2C_READ :
+					DP_AUX_I2C_WRITE;
+
+			/*
+			 * All messages except the last one are middle-of-
+			 * transfer messages.
+			 */
+			if ((i < num - 1) || (j < msgs[i].len - 1))
+				msg.request |= DP_AUX_I2C_MOT;
+
+			msg.buffer = msgs[i].buf + j;
+			msg.size = 1;
+
+			err = drm_dp_i2c_do_msg(aux, &msg);
+			if (err < 0)
+				return err;
+		}
+	}
+
+	return num;
+}
+
+static const struct i2c_algorithm drm_dp_i2c_algo = {
+	.functionality = drm_dp_i2c_functionality,
+	.master_xfer = drm_dp_i2c_xfer,
+};
+
+/**
+ * drm_dp_aux_register_i2c_bus() - register an I2C adapter for I2C-over-AUX
+ * @aux: DisplayPort AUX channel
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int drm_dp_aux_register_i2c_bus(struct drm_dp_aux *aux)
+{
+	aux->ddc.algo = &drm_dp_i2c_algo;
+	aux->ddc.algo_data = aux;
+	aux->ddc.retries = 3;
+
+	aux->ddc.class = I2C_CLASS_DDC;
+	aux->ddc.owner = THIS_MODULE;
+	aux->ddc.dev.parent = aux->dev;
+	aux->ddc.dev.of_node = aux->dev->of_node;
+
+	strlcpy(aux->ddc.name, aux->name ? aux->name : dev_name(aux->dev),
+		sizeof(aux->ddc.name));
+
+	return i2c_add_adapter(&aux->ddc);
+}
+EXPORT_SYMBOL(drm_dp_aux_register_i2c_bus);
+
+/**
+ * drm_dp_aux_unregister_i2c_bus() - unregister an I2C-over-AUX adapter
+ * @aux: DisplayPort AUX channel
+ */
+void drm_dp_aux_unregister_i2c_bus(struct drm_dp_aux *aux)
+{
+	i2c_del_adapter(&aux->ddc);
+}
+EXPORT_SYMBOL(drm_dp_aux_unregister_i2c_bus);
--- a/drivers/gpu/drm/drm_drv.c
+++ b/drivers/gpu/drm/drm_drv.c
@ -344,7 +344,7 @@ long drm_ioctl(struct file *filp,

 	DRM_DEBUG("pid=%d, dev=0x%lx, auth=%d, %s\n",
 		  task_pid_nr(current),
-		  (long)old_encode_dev(file_priv->minor->device),
+		  (long)old_encode_dev(file_priv->minor->kdev->devt),
 		  file_priv->authenticated, ioctl->name);

 	/* Do not trust userspace, use our own definition */
@ -402,7 +402,7 @@ long drm_ioctl(struct file *filp,
 	if (!ioctl)
 		DRM_DEBUG("invalid ioctl: pid=%d, dev=0x%lx, auth=%d, cmd=0x%02x, nr=0x%02x\n",
 			  task_pid_nr(current),
-			  (long)old_encode_dev(file_priv->minor->device),
+			  (long)old_encode_dev(file_priv->minor->kdev->devt),
 			  file_priv->authenticated, cmd, nr);

 	if (kdata != stack_kdata)
--- a/drivers/gpu/drm/drm_edid.c
+++ b/drivers/gpu/drm/drm_edid.c
@ -1098,10 +1098,14 @@ EXPORT_SYMBOL(drm_edid_is_valid);
 /**
 * Get EDID information via I2C.
 *
- * \param adapter : i2c device adaptor
- * \param buf     : EDID data buffer to be filled
- * \param len     : EDID data buffer length
- * \return 0 on success or -1 on failure.
+ * @adapter : i2c device adaptor
+ * @buf: EDID data buffer to be filled
+ * @block: 128 byte EDID block to start fetching from
+ * @len: EDID data buffer length to fetch
+ *
+ * Returns:
+ *
+ * 0 on success or -1 on failure.
 *
 * Try to fetch EDID information by calling i2c driver function.
 */
@ -1243,9 +1247,11 @@ out:

 /**
 * Probe DDC presence.
+ * @adapter: i2c adapter to probe
 *
- * \param adapter : i2c device adaptor
- * \return 1 on success
+ * Returns:
+ *
+ * 1 on success
 */
 bool
 drm_probe_ddc(struct i2c_adapter *adapter)
@ -1586,8 +1592,10 @@ bad_std_timing(u8 a, u8 b)

 /**
 * drm_mode_std - convert standard mode info (width, height, refresh) into mode
+ * @connector: connector of for the EDID block
+ * @edid: EDID block to scan
 * @t: standard timing params
- * @timing_level: standard timing level
+ * @revision: standard timing level
 *
 * Take the standard timing params (in this case width, aspect, and refresh)
 * and convert them into a real mode using CVT/GTF/DMT.
@ -2132,6 +2140,7 @@ do_established_modes(struct detailed_timing *timing, void *c)

 /**
 * add_established_modes - get est. modes from EDID and add them
+ * @connector: connector of for the EDID block
 * @edid: EDID block to scan
 *
 * Each EDID block contains a bitmap of the supported "established modes" list
@ -2194,6 +2203,7 @@ do_standard_modes(struct detailed_timing *timing, void *c)

 /**
 * add_standard_modes - get std. modes from EDID and add them
+ * @connector: connector of for the EDID block
 * @edid: EDID block to scan
 *
 * Standard modes can be calculated using the appropriate standard (DMT,
@ -2580,6 +2590,9 @@ drm_display_mode_from_vic_index(struct drm_connector *connector,
 		return NULL;

 	newmode = drm_mode_duplicate(dev, &edid_cea_modes[cea_mode]);
+	if (!newmode)
+		return NULL;
+
 	newmode->vrefresh = 0;

 	return newmode;
@ -3300,6 +3313,7 @@ EXPORT_SYMBOL(drm_detect_hdmi_monitor);

 /**
 * drm_detect_monitor_audio - check monitor audio capability
+ * @edid: EDID block to scan
 *
 * Monitor should have CEA extension block.
 * If monitor has 'basic audio', but no CEA audio blocks, it's 'basic
@ -3345,6 +3359,7 @@ EXPORT_SYMBOL(drm_detect_monitor_audio);

 /**
 * drm_rgb_quant_range_selectable - is RGB quantization range selectable?
+ * @edid: EDID block to scan
 *
 * Check whether the monitor reports the RGB quantization range selection
 * as supported. The AVI infoframe can then be used to inform the monitor
@ -3564,8 +3579,8 @@ void drm_set_preferred_mode(struct drm_connector *connector,
 	struct drm_display_mode *mode;

 	list_for_each_entry(mode, &connector->probed_modes, head) {
-		if (drm_mode_width(mode)  == hpref &&
-		    drm_mode_height(mode) == vpref)
+		if (mode->hdisplay  == hpref &&
+		    mode->vdisplay == vpref)
 			mode->type |= DRM_MODE_TYPE_PREFERRED;
 	}
 }
@ -3599,6 +3614,7 @@ drm_hdmi_avi_infoframe_from_display_mode(struct hdmi_avi_infoframe *frame,

 	frame->picture_aspect = HDMI_PICTURE_ASPECT_NONE;
 	frame->active_aspect = HDMI_ACTIVE_ASPECT_PICTURE;
+	frame->scan_mode = HDMI_SCAN_MODE_UNDERSCAN;

 	return 0;
 }
--- a/drivers/gpu/drm/drm_fb_helper.c
+++ b/drivers/gpu/drm/drm_fb_helper.c
@ -516,6 +516,9 @@ int drm_fb_helper_init(struct drm_device *dev,
 	struct drm_crtc *crtc;
 	int i;

+	if (!max_conn_count)
+		return -EINVAL;
+
 	fb_helper->dev = dev;

 	INIT_LIST_HEAD(&fb_helper->kernel_fb_list);
@ -809,8 +812,6 @@ int drm_fb_helper_set_par(struct fb_info *info)
 	struct drm_fb_helper *fb_helper = info->par;
 	struct drm_device *dev = fb_helper->dev;
 	struct fb_var_screeninfo *var = &info->var;
-	int ret;
-	int i;

 	if (var->pixclock != 0) {
 		DRM_ERROR("PIXEL CLOCK SET\n");
@ -818,13 +819,7 @@ int drm_fb_helper_set_par(struct fb_info *info)
 	}

 	drm_modeset_lock_all(dev);
-	for (i = 0; i < fb_helper->crtc_count; i++) {
-		ret = drm_mode_set_config_internal(&fb_helper->crtc_info[i].mode_set);
-		if (ret) {
-			drm_modeset_unlock_all(dev);
-			return ret;
-		}
-	}
+	drm_fb_helper_restore_fbdev_mode(fb_helper);
 	drm_modeset_unlock_all(dev);

 	if (fb_helper->delayed_hotplug) {
@ -1141,8 +1136,8 @@ struct drm_display_mode *drm_has_preferred_mode(struct drm_fb_helper_connector *
 	struct drm_display_mode *mode;

 	list_for_each_entry(mode, &fb_connector->connector->modes, head) {
-		if (drm_mode_width(mode) > width ||
-		    drm_mode_height(mode) > height)
+		if (mode->hdisplay > width ||
+		    mode->vdisplay > height)
 			continue;
 		if (mode->type & DRM_MODE_TYPE_PREFERRED)
 			return mode;
--- a/drivers/gpu/drm/drm_fops.c
+++ b/drivers/gpu/drm/drm_fops.c
@ -39,12 +39,12 @@
 #include <linux/slab.h>
 #include <linux/module.h>

-/* from BKL pushdown: note that nothing else serializes idr_find() */
+/* from BKL pushdown */
 DEFINE_MUTEX(drm_global_mutex);
 EXPORT_SYMBOL(drm_global_mutex);

 static int drm_open_helper(struct inode *inode, struct file *filp,
-			   struct drm_device * dev);
+			   struct drm_minor *minor);

 static int drm_setup(struct drm_device * dev)
 {
@ -79,38 +79,23 @@ static int drm_setup(struct drm_device * dev)
 */
 int drm_open(struct inode *inode, struct file *filp)
 {
-	struct drm_device *dev = NULL;
-	int minor_id = iminor(inode);
+	struct drm_device *dev;
 	struct drm_minor *minor;
-	int retcode = 0;
+	int retcode;
 	int need_setup = 0;
-	struct address_space *old_mapping;
-	struct address_space *old_imapping;

-	minor = idr_find(&drm_minors_idr, minor_id);
-	if (!minor)
-		return -ENODEV;
-
-	if (!(dev = minor->dev))
-		return -ENODEV;
-
-	if (drm_device_is_unplugged(dev))
-		return -ENODEV;
+	minor = drm_minor_acquire(iminor(inode));
+	if (IS_ERR(minor))
+		return PTR_ERR(minor);

+	dev = minor->dev;
 	if (!dev->open_count++)
 		need_setup = 1;
-	mutex_lock(&dev->struct_mutex);
-	old_imapping = inode->i_mapping;
-	old_mapping = dev->dev_mapping;
-	if (old_mapping == NULL)
-		dev->dev_mapping = &inode->i_data;
-	/* ihold ensures nobody can remove inode with our i_data */
-	ihold(container_of(dev->dev_mapping, struct inode, i_data));
-	inode->i_mapping = dev->dev_mapping;
-	filp->f_mapping = dev->dev_mapping;
-	mutex_unlock(&dev->struct_mutex);

-	retcode = drm_open_helper(inode, filp, dev);
+	/* share address_space across all char-devs of a single device */
+	filp->f_mapping = dev->anon_inode->i_mapping;
+
+	retcode = drm_open_helper(inode, filp, minor);
 	if (retcode)
 		goto err_undo;
 	if (need_setup) {
@ -121,13 +106,8 @@ int drm_open(struct inode *inode, struct file *filp)
 	return 0;

 err_undo:
-	mutex_lock(&dev->struct_mutex);
-	filp->f_mapping = old_imapping;
-	inode->i_mapping = old_imapping;
-	iput(container_of(dev->dev_mapping, struct inode, i_data));
-	dev->dev_mapping = old_mapping;
-	mutex_unlock(&dev->struct_mutex);
 	dev->open_count--;
+	drm_minor_release(minor);
 	return retcode;
 }
 EXPORT_SYMBOL(drm_open);
@ -143,33 +123,30 @@ EXPORT_SYMBOL(drm_open);
 */
 int drm_stub_open(struct inode *inode, struct file *filp)
 {
-	struct drm_device *dev = NULL;
+	struct drm_device *dev;
 	struct drm_minor *minor;
-	int minor_id = iminor(inode);
 	int err = -ENODEV;
 	const struct file_operations *new_fops;

 	DRM_DEBUG("\n");

 	mutex_lock(&drm_global_mutex);
-	minor = idr_find(&drm_minors_idr, minor_id);
-	if (!minor)
-		goto out;
-
-	if (!(dev = minor->dev))
-		goto out;
-
-	if (drm_device_is_unplugged(dev))
-		goto out;
+	minor = drm_minor_acquire(iminor(inode));
+	if (IS_ERR(minor))
+		goto out_unlock;

+	dev = minor->dev;
 	new_fops = fops_get(dev->driver->fops);
 	if (!new_fops)
-		goto out;
+		goto out_release;

 	replace_fops(filp, new_fops);
 	if (filp->f_op->open)
 		err = filp->f_op->open(inode, filp);
-out:
+
+out_release:
+	drm_minor_release(minor);
+out_unlock:
 	mutex_unlock(&drm_global_mutex);
 	return err;
 }
@ -196,16 +173,16 @@ static int drm_cpu_valid(void)
 *
 * \param inode device inode.
 * \param filp file pointer.
- * \param dev device.
+ * \param minor acquired minor-object.
 * \return zero on success or a negative number on failure.
 *
 * Creates and initializes a drm_file structure for the file private data in \p
 * filp and add it into the double linked list in \p dev.
 */
 static int drm_open_helper(struct inode *inode, struct file *filp,
-			   struct drm_device * dev)
+			   struct drm_minor *minor)
 {
-	int minor_id = iminor(inode);
+	struct drm_device *dev = minor->dev;
 	struct drm_file *priv;
 	int ret;

@ -216,7 +193,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 	if (dev->switch_power_state != DRM_SWITCH_POWER_ON && dev->switch_power_state != DRM_SWITCH_POWER_DYNAMIC_OFF)
 		return -EINVAL;

-	DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor_id);
+	DRM_DEBUG("pid = %d, minor = %d\n", task_pid_nr(current), minor->index);

 	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
 	if (!priv)
@ -226,11 +203,7 @@ static int drm_open_helper(struct inode *inode, struct file *filp,
 	priv->filp = filp;
 	priv->uid = current_euid();
 	priv->pid = get_pid(task_pid(current));
-	priv->minor = idr_find(&drm_minors_idr, minor_id);
-	if (!priv->minor) {
-		ret = -ENODEV;
-		goto out_put_pid;
-	}
+	priv->minor = minor;

 	/* for compatibility root is always authenticated */
 	priv->always_authenticated = capable(CAP_SYS_ADMIN);
@ -336,7 +309,6 @@ out_prime_destroy:
 		drm_prime_destroy_file_private(&priv->prime);
 	if (dev->driver->driver_features & DRIVER_GEM)
 		drm_gem_release(dev, priv);
-out_put_pid:
 	put_pid(priv->pid);
 	kfree(priv);
 	filp->private_data = NULL;
@ -434,7 +406,6 @@ int drm_lastclose(struct drm_device * dev)

 	drm_legacy_dma_takedown(dev);

-	dev->dev_mapping = NULL;
 	mutex_unlock(&dev->struct_mutex);

 	drm_legacy_dev_reinit(dev);
@ -458,7 +429,8 @@ int drm_lastclose(struct drm_device * dev)
 int drm_release(struct inode *inode, struct file *filp)
 {
 	struct drm_file *file_priv = filp->private_data;
-	struct drm_device *dev = file_priv->minor->dev;
+	struct drm_minor *minor = file_priv->minor;
+	struct drm_device *dev = minor->dev;
 	int retcode = 0;

 	mutex_lock(&drm_global_mutex);
@ -474,7 +446,7 @@ int drm_release(struct inode *inode, struct file *filp)

 	DRM_DEBUG("pid = %d, device = 0x%lx, open_count = %d\n",
 		  task_pid_nr(current),
-		  (long)old_encode_dev(file_priv->minor->device),
+		  (long)old_encode_dev(file_priv->minor->kdev->devt),
 		  dev->open_count);

 	/* Release any auth tokens that might point to this file_priv,
@ -549,9 +521,6 @@ int drm_release(struct inode *inode, struct file *filp)
 		}
 	}

-	BUG_ON(dev->dev_mapping == NULL);
-	iput(container_of(dev->dev_mapping, struct inode, i_data));
-
 	/* drop the reference held my the file priv */
 	if (file_priv->master)
 		drm_master_put(&file_priv->master);
@ -580,6 +549,8 @@ int drm_release(struct inode *inode, struct file *filp)
 	}
 	mutex_unlock(&drm_global_mutex);

+	drm_minor_release(minor);
+
 	return retcode;
 }
 EXPORT_SYMBOL(drm_release);
--- a/drivers/gpu/drm/drm_gem.c
+++ b/drivers/gpu/drm/drm_gem.c
@ -85,9 +85,9 @@
 #endif

 /**
- * Initialize the GEM device fields
+ * drm_gem_init - Initialize the GEM device fields
+ * @dev: drm_devic structure to initialize
 */
-
 int
 drm_gem_init(struct drm_device *dev)
 {
@ -120,6 +120,11 @@ drm_gem_destroy(struct drm_device *dev)
 }

 /**
+ * drm_gem_object_init - initialize an allocated shmem-backed GEM object
+ * @dev: drm_device the object should be initialized for
+ * @obj: drm_gem_object to initialize
+ * @size: object size
+ *
 * Initialize an already allocated GEM object of the specified size with
 * shmfs backing store.
 */
@ -141,6 +146,11 @@ int drm_gem_object_init(struct drm_device *dev,
 EXPORT_SYMBOL(drm_gem_object_init);

 /**
+ * drm_gem_object_init - initialize an allocated private GEM object
+ * @dev: drm_device the object should be initialized for
+ * @obj: drm_gem_object to initialize
+ * @size: object size
+ *
 * Initialize an already allocated GEM object of the specified size with
 * no GEM provided backing store. Instead the caller is responsible for
 * backing the object and handling it.
@ -176,6 +186,9 @@ drm_gem_remove_prime_handles(struct drm_gem_object *obj, struct drm_file *filp)
 }

 /**
+ * drm_gem_object_free - release resources bound to userspace handles
+ * @obj: GEM object to clean up.
+ *
 * Called after the last handle to the object has been closed
 *
 * Removes any name for the object. Note that this must be
@ -225,7 +238,12 @@ drm_gem_object_handle_unreference_unlocked(struct drm_gem_object *obj)
 }

 /**
- * Removes the mapping from handle to filp for this object.
+ * drm_gem_handle_delete - deletes the given file-private handle
+ * @filp: drm file-private structure to use for the handle look up
+ * @handle: userspace handle to delete
+ *
+ * Removes the GEM handle from the @filp lookup table and if this is the last
+ * handle also cleans up linked resources like GEM names.
 */
 int
 drm_gem_handle_delete(struct drm_file *filp, u32 handle)
@ -270,6 +288,9 @@ EXPORT_SYMBOL(drm_gem_handle_delete);

 /**
 * drm_gem_dumb_destroy - dumb fb callback helper for gem based drivers
+ * @file: drm file-private structure to remove the dumb handle from
+ * @dev: corresponding drm_device
+ * @handle: the dumb handle to remove
 * 
 * This implements the ->dumb_destroy kms driver callback for drivers which use
 * gem to manage their backing storage.
@ -284,6 +305,9 @@ EXPORT_SYMBOL(drm_gem_dumb_destroy);

 /**
 * drm_gem_handle_create_tail - internal functions to create a handle
+ * @file_priv: drm file-private structure to register the handle for
+ * @obj: object to register
+ * @handlep: pionter to return the created handle to the caller
 * 
 * This expects the dev->object_name_lock to be held already and will drop it
 * before returning. Used to avoid races in establishing new handles when
@ -336,6 +360,11 @@ drm_gem_handle_create_tail(struct drm_file *file_priv,
 }

 /**
+ * gem_handle_create - create a gem handle for an object
+ * @file_priv: drm file-private structure to register the handle for
+ * @obj: object to register
+ * @handlep: pionter to return the created handle to the caller
+ *
 * Create a handle for this object. This adds a handle reference
 * to the object, which includes a regular reference count. Callers
 * will likely want to dereference the object afterwards.
@ -536,6 +565,11 @@ drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp,
 EXPORT_SYMBOL(drm_gem_object_lookup);

 /**
+ * drm_gem_close_ioctl - implementation of the GEM_CLOSE ioctl
+ * @dev: drm_device
+ * @data: ioctl data
+ * @file_priv: drm file-private structure
+ *
 * Releases the handle to an mm object.
 */
 int
@ -554,6 +588,11 @@ drm_gem_close_ioctl(struct drm_device *dev, void *data,
 }

 /**
+ * drm_gem_flink_ioctl - implementation of the GEM_FLINK ioctl
+ * @dev: drm_device
+ * @data: ioctl data
+ * @file_priv: drm file-private structure
+ *
 * Create a global name for an object, returning the name.
 *
 * Note that the name does not hold a reference; when the object
@ -601,6 +640,11 @@ err:
 }

 /**
+ * drm_gem_open - implementation of the GEM_OPEN ioctl
+ * @dev: drm_device
+ * @data: ioctl data
+ * @file_priv: drm file-private structure
+ *
 * Open an object using the global name, returning a handle and the size.
 *
 * This handle (of course) holds a reference to the object, so the object
@ -640,6 +684,10 @@ drm_gem_open_ioctl(struct drm_device *dev, void *data,
 }

 /**
+ * gem_gem_open - initalizes GEM file-private structures at devnode open time
+ * @dev: drm_device which is being opened by userspace
+ * @file_private: drm file-private structure to set up
+ *
 * Called at device open time, sets up the structure for handling refcounting
 * of mm objects.
 */
@ -650,7 +698,7 @@ drm_gem_open(struct drm_device *dev, struct drm_file *file_private)
 	spin_lock_init(&file_private->table_lock);
 }

-/**
+/*
 * Called at device close to release the file's
 * handle references on objects.
 */
@ -674,6 +722,10 @@ drm_gem_object_release_handle(int id, void *ptr, void *data)
 }

 /**
+ * drm_gem_release - release file-private GEM resources
+ * @dev: drm_device which is being closed by userspace
+ * @file_private: drm file-private structure to clean up
+ *
 * Called at close time when the filp is going away.
 *
 * Releases any remaining references on objects by this filp.
@ -692,11 +744,16 @@ drm_gem_object_release(struct drm_gem_object *obj)
 	WARN_ON(obj->dma_buf);

 	if (obj->filp)
-	    fput(obj->filp);
+		fput(obj->filp);
+
+	drm_gem_free_mmap_offset(obj);
 }
 EXPORT_SYMBOL(drm_gem_object_release);

 /**
+ * drm_gem_object_free - free a GEM object
+ * @kref: kref of the object to free
+ *
 * Called after the last reference to the object has been lost.
 * Must be called holding struct_ mutex
 *
@ -782,7 +839,7 @@ int drm_gem_mmap_obj(struct drm_gem_object *obj, unsigned long obj_size,
 	vma->vm_flags |= VM_IO | VM_PFNMAP | VM_DONTEXPAND | VM_DONTDUMP;
 	vma->vm_ops = dev->driver->gem_vm_ops;
 	vma->vm_private_data = obj;
-	vma->vm_page_prot =  pgprot_writecombine(vm_get_page_prot(vma->vm_flags));
+	vma->vm_page_prot = pgprot_writecombine(vm_get_page_prot(vma->vm_flags));

 	/* Take a ref for this mapping of the object, so that the fault
 	 * handler can dereference the mmap offset's pointer to the object.
@ -818,7 +875,7 @@ int drm_gem_mmap(struct file *filp, struct vm_area_struct *vma)
 	struct drm_device *dev = priv->minor->dev;
 	struct drm_gem_object *obj;
 	struct drm_vma_offset_node *node;
-	int ret = 0;
+	int ret;

 	if (drm_device_is_unplugged(dev))
 		return -ENODEV;
--- a/drivers/gpu/drm/drm_gem_cma_helper.c
+++ b/drivers/gpu/drm/drm_gem_cma_helper.c
@ -79,7 +79,6 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 		unsigned int size)
 {
 	struct drm_gem_cma_object *cma_obj;
-	struct sg_table *sgt = NULL;
 	int ret;

 	size = round_up(size, PAGE_SIZE);
@ -97,23 +96,9 @@ struct drm_gem_cma_object *drm_gem_cma_create(struct drm_device *drm,
 		goto error;
 	}

-	sgt = kzalloc(sizeof(*cma_obj->sgt), GFP_KERNEL);
-	if (sgt == NULL) {
-		ret = -ENOMEM;
-		goto error;
-	}
-
-	ret = dma_get_sgtable(drm->dev, sgt, cma_obj->vaddr,
-			      cma_obj->paddr, size);
-	if (ret < 0)
-		goto error;
-
-	cma_obj->sgt = sgt;
-
 	return cma_obj;

 error:
-	kfree(sgt);
 	drm_gem_cma_free_object(&cma_obj->base);
 	return ERR_PTR(ret);
 }
@ -175,10 +160,6 @@ void drm_gem_cma_free_object(struct drm_gem_object *gem_obj)
 	if (cma_obj->vaddr) {
 		dma_free_writecombine(gem_obj->dev->dev, cma_obj->base.size,
 				      cma_obj->vaddr, cma_obj->paddr);
-		if (cma_obj->sgt) {
-			sg_free_table(cma_obj->sgt);
-			kfree(cma_obj->sgt);
-		}
 	} else if (gem_obj->import_attach) {
 		drm_prime_gem_destroy(gem_obj, cma_obj->sgt);
 	}
--- a/drivers/gpu/drm/drm_mm.c
+++ b/drivers/gpu/drm/drm_mm.c
@ -47,7 +47,44 @@
 #include <linux/seq_file.h>
 #include <linux/export.h>

-#define MM_UNUSED_TARGET 4
+/**
+ * DOC: Overview
+ *
+ * drm_mm provides a simple range allocator. The drivers are free to use the
+ * resource allocator from the linux core if it suits them, the upside of drm_mm
+ * is that it's in the DRM core. Which means that it's easier to extend for
+ * some of the crazier special purpose needs of gpus.
+ *
+ * The main data struct is &drm_mm, allocations are tracked in &drm_mm_node.
+ * Drivers are free to embed either of them into their own suitable
+ * datastructures. drm_mm itself will not do any allocations of its own, so if
+ * drivers choose not to embed nodes they need to still allocate them
+ * themselves.
+ *
+ * The range allocator also supports reservation of preallocated blocks. This is
+ * useful for taking over initial mode setting configurations from the firmware,
+ * where an object needs to be created which exactly matches the firmware's
+ * scanout target. As long as the range is still free it can be inserted anytime
+ * after the allocator is initialized, which helps with avoiding looped
+ * depencies in the driver load sequence.
+ *
+ * drm_mm maintains a stack of most recently freed holes, which of all
+ * simplistic datastructures seems to be a fairly decent approach to clustering
+ * allocations and avoiding too much fragmentation. This means free space
+ * searches are O(num_holes). Given that all the fancy features drm_mm supports
+ * something better would be fairly complex and since gfx thrashing is a fairly
+ * steep cliff not a real concern. Removing a node again is O(1).
+ *
+ * drm_mm supports a few features: Alignment and range restrictions can be
+ * supplied. Further more every &drm_mm_node has a color value (which is just an
+ * opaqua unsigned long) which in conjunction with a driver callback can be used
+ * to implement sophisticated placement restrictions. The i915 DRM driver uses
+ * this to implement guard pages between incompatible caching domains in the
+ * graphics TT.
+ *
+ * Finally iteration helpers to walk all nodes and all holes are provided as are
+ * some basic allocator dumpers for debugging.
+ */

 static struct drm_mm_node *drm_mm_search_free_generic(const struct drm_mm *mm,
 						unsigned long size,
@ -107,6 +144,20 @@ static void drm_mm_insert_helper(struct drm_mm_node *hole_node,
 	}
 }

+/**
+ * drm_mm_reserve_node - insert an pre-initialized node
+ * @mm: drm_mm allocator to insert @node into
+ * @node: drm_mm_node to insert
+ *
+ * This functions inserts an already set-up drm_mm_node into the allocator,
+ * meaning that start, size and color must be set by the caller. This is useful
+ * to initialize the allocator with preallocated objects which must be set-up
+ * before the range allocator can be set-up, e.g. when taking over a firmware
+ * framebuffer.
+ *
+ * Returns:
+ * 0 on success, -ENOSPC if there's no hole where @node is.
+ */
 int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 {
 	struct drm_mm_node *hole;
@ -148,9 +199,18 @@ int drm_mm_reserve_node(struct drm_mm *mm, struct drm_mm_node *node)
 EXPORT_SYMBOL(drm_mm_reserve_node);

 /**
- * Search for free space and insert a preallocated memory node. Returns
- * -ENOSPC if no suitable free area is available. The preallocated memory node
- * must be cleared.
+ * drm_mm_insert_node_generic - search for space and insert @node
+ * @mm: drm_mm to allocate from
+ * @node: preallocate node to insert
+ * @size: size of the allocation
+ * @alignment: alignment of the allocation
+ * @color: opaque tag value to use for this node
+ * @flags: flags to fine-tune the allocation
+ *
+ * The preallocated node must be cleared to 0.
+ *
+ * Returns:
+ * 0 on success, -ENOSPC if there's no suitable hole.
 */
 int drm_mm_insert_node_generic(struct drm_mm *mm, struct drm_mm_node *node,
 			       unsigned long size, unsigned alignment,
@ -222,9 +282,20 @@ static void drm_mm_insert_helper_range(struct drm_mm_node *hole_node,
 }

 /**
- * Search for free space and insert a preallocated memory node. Returns
- * -ENOSPC if no suitable free area is available. This is for range
- * restricted allocations. The preallocated memory node must be cleared.
+ * drm_mm_insert_node_in_range_generic - ranged search for space and insert @node
+ * @mm: drm_mm to allocate from
+ * @node: preallocate node to insert
+ * @size: size of the allocation
+ * @alignment: alignment of the allocation
+ * @color: opaque tag value to use for this node
+ * @start: start of the allowed range for this node
+ * @end: end of the allowed range for this node
+ * @flags: flags to fine-tune the allocation
+ *
+ * The preallocated node must be cleared to 0.
+ *
+ * Returns:
+ * 0 on success, -ENOSPC if there's no suitable hole.
 */
 int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *node,
 					unsigned long size, unsigned alignment, unsigned long color,
@ -247,7 +318,12 @@ int drm_mm_insert_node_in_range_generic(struct drm_mm *mm, struct drm_mm_node *n
 EXPORT_SYMBOL(drm_mm_insert_node_in_range_generic);

 /**
- * Remove a memory node from the allocator.
+ * drm_mm_remove_node - Remove a memory node from the allocator.
+ * @node: drm_mm_node to remove
+ *
+ * This just removes a node from its drm_mm allocator. The node does not need to
+ * be cleared again before it can be re-inserted into this or any other drm_mm
+ * allocator. It is a bug to call this function on a un-allocated node.
 */
 void drm_mm_remove_node(struct drm_mm_node *node)
 {
@ -384,7 +460,13 @@ static struct drm_mm_node *drm_mm_search_free_in_range_generic(const struct drm_
 }

 /**
- * Moves an allocation. To be used with embedded struct drm_mm_node.
+ * drm_mm_replace_node - move an allocation from @old to @new
+ * @old: drm_mm_node to remove from the allocator
+ * @new: drm_mm_node which should inherit @old's allocation
+ *
+ * This is useful for when drivers embed the drm_mm_node structure and hence
+ * can't move allocations by reassigning pointers. It's a combination of remove
+ * and insert with the guarantee that the allocation start will match.
 */
 void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 {
@ -402,12 +484,46 @@ void drm_mm_replace_node(struct drm_mm_node *old, struct drm_mm_node *new)
 EXPORT_SYMBOL(drm_mm_replace_node);

 /**
- * Initializa lru scanning.
+ * DOC: lru scan roaster
+ *
+ * Very often GPUs need to have continuous allocations for a given object. When
+ * evicting objects to make space for a new one it is therefore not most
+ * efficient when we simply start to select all objects from the tail of an LRU
+ * until there's a suitable hole: Especially for big objects or nodes that
+ * otherwise have special allocation constraints there's a good chance we evict
+ * lots of (smaller) objects unecessarily.
+ *
+ * The DRM range allocator supports this use-case through the scanning
+ * interfaces. First a scan operation needs to be initialized with
+ * drm_mm_init_scan() or drm_mm_init_scan_with_range(). The the driver adds
+ * objects to the roaster (probably by walking an LRU list, but this can be
+ * freely implemented) until a suitable hole is found or there's no further
+ * evitable object.
+ *
+ * The the driver must walk through all objects again in exactly the reverse
+ * order to restore the allocator state. Note that while the allocator is used
+ * in the scan mode no other operation is allowed.
+ *
+ * Finally the driver evicts all objects selected in the scan. Adding and
+ * removing an object is O(1), and since freeing a node is also O(1) the overall
+ * complexity is O(scanned_objects). So like the free stack which needs to be
+ * walked before a scan operation even begins this is linear in the number of
+ * objects. It doesn't seem to hurt badly.
+ */
+
+/**
+ * drm_mm_init_scan - initialize lru scanning
+ * @mm: drm_mm to scan
+ * @size: size of the allocation
+ * @alignment: alignment of the allocation
+ * @color: opaque tag value to use for the allocation
 *
 * This simply sets up the scanning routines with the parameters for the desired
- * hole.
+ * hole. Note that there's no need to specify allocation flags, since they only
+ * change the place a node is allocated from within a suitable hole.
 *
- * Warning: As long as the scan list is non-empty, no other operations than
+ * Warning:
+ * As long as the scan list is non-empty, no other operations than
 * adding/removing nodes to/from the scan list are allowed.
 */
 void drm_mm_init_scan(struct drm_mm *mm,
@ -427,12 +543,20 @@ void drm_mm_init_scan(struct drm_mm *mm,
 EXPORT_SYMBOL(drm_mm_init_scan);

 /**
- * Initializa lru scanning.
+ * drm_mm_init_scan - initialize range-restricted lru scanning
+ * @mm: drm_mm to scan
+ * @size: size of the allocation
+ * @alignment: alignment of the allocation
+ * @color: opaque tag value to use for the allocation
+ * @start: start of the allowed range for the allocation
+ * @end: end of the allowed range for the allocation
 *
 * This simply sets up the scanning routines with the parameters for the desired
- * hole. This version is for range-restricted scans.
+ * hole. Note that there's no need to specify allocation flags, since they only
+ * change the place a node is allocated from within a suitable hole.
 *
- * Warning: As long as the scan list is non-empty, no other operations than
+ * Warning:
+ * As long as the scan list is non-empty, no other operations than
 * adding/removing nodes to/from the scan list are allowed.
 */
 void drm_mm_init_scan_with_range(struct drm_mm *mm,
@ -456,12 +580,16 @@ void drm_mm_init_scan_with_range(struct drm_mm *mm,
 EXPORT_SYMBOL(drm_mm_init_scan_with_range);

 /**
+ * drm_mm_scan_add_block - add a node to the scan list
+ * @node: drm_mm_node to add
+ *
 * Add a node to the scan list that might be freed to make space for the desired
 * hole.
 *
- * Returns non-zero, if a hole has been found, zero otherwise.
+ * Returns:
+ * True if a hole has been found, false otherwise.
 */
-int drm_mm_scan_add_block(struct drm_mm_node *node)
+bool drm_mm_scan_add_block(struct drm_mm_node *node)
 {
 	struct drm_mm *mm = node->mm;
 	struct drm_mm_node *prev_node;
@ -501,15 +629,16 @@ int drm_mm_scan_add_block(struct drm_mm_node *node)
 			    mm->scan_size, mm->scan_alignment)) {
 		mm->scan_hit_start = hole_start;
 		mm->scan_hit_end = hole_end;
-		return 1;
+		return true;
 	}

-	return 0;
+	return false;
 }
 EXPORT_SYMBOL(drm_mm_scan_add_block);

 /**
- * Remove a node from the scan list.
+ * drm_mm_scan_remove_block - remove a node from the scan list
+ * @node: drm_mm_node to remove
 *
 * Nodes _must_ be removed in the exact same order from the scan list as they
 * have been added, otherwise the internal state of the memory manager will be
@ -519,10 +648,11 @@ EXPORT_SYMBOL(drm_mm_scan_add_block);
 * immediately following drm_mm_search_free with !DRM_MM_SEARCH_BEST will then
 * return the just freed block (because its at the top of the free_stack list).
 *
- * Returns one if this block should be evicted, zero otherwise. Will always
- * return zero when no hole has been found.
+ * Returns:
+ * True if this block should be evicted, false otherwise. Will always
+ * return false when no hole has been found.
 */
-int drm_mm_scan_remove_block(struct drm_mm_node *node)
+bool drm_mm_scan_remove_block(struct drm_mm_node *node)
 {
 	struct drm_mm *mm = node->mm;
 	struct drm_mm_node *prev_node;
@ -543,7 +673,15 @@ int drm_mm_scan_remove_block(struct drm_mm_node *node)
 }
 EXPORT_SYMBOL(drm_mm_scan_remove_block);

-int drm_mm_clean(struct drm_mm * mm)
+/**
+ * drm_mm_clean - checks whether an allocator is clean
+ * @mm: drm_mm allocator to check
+ *
+ * Returns:
+ * True if the allocator is completely free, false if there's still a node
+ * allocated in it.
+ */
+bool drm_mm_clean(struct drm_mm * mm)
 {
 	struct list_head *head = &mm->head_node.node_list;

@ -551,6 +689,14 @@ int drm_mm_clean(struct drm_mm * mm)
 }
 EXPORT_SYMBOL(drm_mm_clean);

+/**
+ * drm_mm_init - initialize a drm-mm allocator
+ * @mm: the drm_mm structure to initialize
+ * @start: start of the range managed by @mm
+ * @size: end of the range managed by @mm
+ *
+ * Note that @mm must be cleared to 0 before calling this function.
+ */
 void drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size)
 {
 	INIT_LIST_HEAD(&mm->hole_stack);
@ -572,6 +718,13 @@ void drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size)
 }
 EXPORT_SYMBOL(drm_mm_init);

+/**
+ * drm_mm_takedown - clean up a drm_mm allocator
+ * @mm: drm_mm allocator to clean up
+ *
+ * Note that it is a bug to call this function on an allocator which is not
+ * clean.
+ */
 void drm_mm_takedown(struct drm_mm * mm)
 {
 	WARN(!list_empty(&mm->head_node.node_list),
@ -597,6 +750,11 @@ static unsigned long drm_mm_debug_hole(struct drm_mm_node *entry,
 	return 0;
 }

+/**
+ * drm_mm_debug_table - dump allocator state to dmesg
+ * @mm: drm_mm allocator to dump
+ * @prefix: prefix to use for dumping to dmesg
+ */
 void drm_mm_debug_table(struct drm_mm *mm, const char *prefix)
 {
 	struct drm_mm_node *entry;
@ -635,6 +793,11 @@ static unsigned long drm_mm_dump_hole(struct seq_file *m, struct drm_mm_node *en
 	return 0;
 }

+/**
+ * drm_mm_dump_table - dump allocator state to a seq_file
+ * @m: seq_file to dump to
+ * @mm: drm_mm allocator to dump
+ */
 int drm_mm_dump_table(struct seq_file *m, struct drm_mm *mm)
 {
 	struct drm_mm_node *entry;
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@ -37,15 +37,14 @@
 #include <drm/drm_crtc.h>
 #include <video/of_videomode.h>
 #include <video/videomode.h>
+#include <drm/drm_modes.h>
+
+#include "drm_crtc_internal.h"

 /**
- * drm_mode_debug_printmodeline - debug print a mode
- * @dev: DRM device
+ * drm_mode_debug_printmodeline - print a mode to dmesg
 * @mode: mode to print
 *
- * LOCKING:
- * None.
- *
 * Describe @mode using DRM_DEBUG.
 */
 void drm_mode_debug_printmodeline(const struct drm_display_mode *mode)
@ -61,18 +60,77 @@ void drm_mode_debug_printmodeline(const struct drm_display_mode *mode)
 EXPORT_SYMBOL(drm_mode_debug_printmodeline);

 /**
- * drm_cvt_mode -create a modeline based on CVT algorithm
+ * drm_mode_create - create a new display mode
 * @dev: DRM device
+ *
+ * Create a new, cleared drm_display_mode with kzalloc, allocate an ID for it
+ * and return it.
+ *
+ * Returns:
+ * Pointer to new mode on success, NULL on error.
+ */
+struct drm_display_mode *drm_mode_create(struct drm_device *dev)
+{
+	struct drm_display_mode *nmode;
+
+	nmode = kzalloc(sizeof(struct drm_display_mode), GFP_KERNEL);
+	if (!nmode)
+		return NULL;
+
+	if (drm_mode_object_get(dev, &nmode->base, DRM_MODE_OBJECT_MODE)) {
+		kfree(nmode);
+		return NULL;
+	}
+
+	return nmode;
+}
+EXPORT_SYMBOL(drm_mode_create);
+
+/**
+ * drm_mode_destroy - remove a mode
+ * @dev: DRM device
+ * @mode: mode to remove
+ *
+ * Release @mode's unique ID, then free it @mode structure itself using kfree.
+ */
+void drm_mode_destroy(struct drm_device *dev, struct drm_display_mode *mode)
+{
+	if (!mode)
+		return;
+
+	drm_mode_object_put(dev, &mode->base);
+
+	kfree(mode);
+}
+EXPORT_SYMBOL(drm_mode_destroy);
+
+/**
+ * drm_mode_probed_add - add a mode to a connector's probed_mode list
+ * @connector: connector the new mode
+ * @mode: mode data
+ *
+ * Add @mode to @connector's probed_mode list for later use. This list should
+ * then in a second step get filtered and all the modes actually supported by
+ * the hardware moved to the @connector's modes list.
+ */
+void drm_mode_probed_add(struct drm_connector *connector,
+			 struct drm_display_mode *mode)
+{
+	WARN_ON(!mutex_is_locked(&connector->dev->mode_config.mutex));
+
+	list_add_tail(&mode->head, &connector->probed_modes);
+}
+EXPORT_SYMBOL(drm_mode_probed_add);
+
+/**
+ * drm_cvt_mode -create a modeline based on the CVT algorithm
+ * @dev: drm device
 * @hdisplay: hdisplay size
 * @vdisplay: vdisplay size
- * @vrefresh  : vrefresh rate
- * @reduced : Whether the GTF calculation is simplified
- * @interlaced:Whether the interlace is supported
- *
- * LOCKING:
- * none.
- *
- * return the modeline based on CVT algorithm
+ * @vrefresh: vrefresh rate
+ * @reduced: whether to use reduced blanking
+ * @interlaced: whether to compute an interlaced mode
+ * @margins: whether to add margins (borders)
 *
 * This function is called to generate the modeline based on CVT algorithm
 * according to the hdisplay, vdisplay, vrefresh.
@ -82,12 +140,17 @@ EXPORT_SYMBOL(drm_mode_debug_printmodeline);
 *
 * And it is copied from xf86CVTmode in xserver/hw/xfree86/modes/xf86cvt.c.
 * What I have done is to translate it by using integer calculation.
+ *
+ * Returns:
+ * The modeline based on the CVT algorithm stored in a drm_display_mode object.
+ * The display mode object is allocated with drm_mode_create(). Returns NULL
+ * when no mode could be allocated.
 */
-#define HV_FACTOR			1000
 struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 				      int vdisplay, int vrefresh,
 				      bool reduced, bool interlaced, bool margins)
 {
+#define HV_FACTOR			1000
 	/* 1) top/bottom margin size (% of height) - default: 1.8, */
 #define	CVT_MARGIN_PERCENTAGE		18
 	/* 2) character cell horizontal granularity (pixels) - default 8 */
@ -281,23 +344,25 @@ struct drm_display_mode *drm_cvt_mode(struct drm_device *dev, int hdisplay,
 EXPORT_SYMBOL(drm_cvt_mode);

 /**
- * drm_gtf_mode_complex - create the modeline based on full GTF algorithm
- *
- * @dev		:drm device
- * @hdisplay	:hdisplay size
- * @vdisplay	:vdisplay size
- * @vrefresh	:vrefresh rate.
- * @interlaced	:whether the interlace is supported
- * @margins	:desired margin size
- * @GTF_[MCKJ]  :extended GTF formula parameters
- *
- * LOCKING.
- * none.
- *
- * return the modeline based on full GTF algorithm.
+ * drm_gtf_mode_complex - create the modeline based on the full GTF algorithm
+ * @dev: drm device
+ * @hdisplay: hdisplay size
+ * @vdisplay: vdisplay size
+ * @vrefresh: vrefresh rate.
+ * @interlaced: whether to compute an interlaced mode
+ * @margins: desired margin (borders) size
+ * @GTF_M: extended GTF formula parameters
+ * @GTF_2C: extended GTF formula parameters
+ * @GTF_K: extended GTF formula parameters
+ * @GTF_2J: extended GTF formula parameters
 *
 * GTF feature blocks specify C and J in multiples of 0.5, so we pass them
 * in here multiplied by two.  For a C of 40, pass in 80.
+ *
+ * Returns:
+ * The modeline based on the full GTF algorithm stored in a drm_display_mode object.
+ * The display mode object is allocated with drm_mode_create(). Returns NULL
+ * when no mode could be allocated.
 */
 struct drm_display_mode *
 drm_gtf_mode_complex(struct drm_device *dev, int hdisplay, int vdisplay,
@ -467,17 +532,13 @@ drm_gtf_mode_complex(struct drm_device *dev, int hdisplay, int vdisplay,
 EXPORT_SYMBOL(drm_gtf_mode_complex);

 /**
- * drm_gtf_mode - create the modeline based on GTF algorithm
- *
- * @dev		:drm device
- * @hdisplay	:hdisplay size
- * @vdisplay	:vdisplay size
- * @vrefresh	:vrefresh rate.
- * @interlaced	:whether the interlace is supported
- * @margins	:whether the margin is supported
- *
- * LOCKING.
- * none.
+ * drm_gtf_mode - create the modeline based on the GTF algorithm
+ * @dev: drm device
+ * @hdisplay: hdisplay size
+ * @vdisplay: vdisplay size
+ * @vrefresh: vrefresh rate.
+ * @interlaced: whether to compute an interlaced mode
+ * @margins: desired margin (borders) size
 *
 * return the modeline based on GTF algorithm
 *
@ -496,19 +557,32 @@ EXPORT_SYMBOL(drm_gtf_mode_complex);
 * C = 40
 * K = 128
 * J = 20
+ *
+ * Returns:
+ * The modeline based on the GTF algorithm stored in a drm_display_mode object.
+ * The display mode object is allocated with drm_mode_create(). Returns NULL
+ * when no mode could be allocated.
 */
 struct drm_display_mode *
 drm_gtf_mode(struct drm_device *dev, int hdisplay, int vdisplay, int vrefresh,
-	     bool lace, int margins)
+	     bool interlaced, int margins)
 {
-	return drm_gtf_mode_complex(dev, hdisplay, vdisplay, vrefresh, lace,
-				    margins, 600, 40 * 2, 128, 20 * 2);
+	return drm_gtf_mode_complex(dev, hdisplay, vdisplay, vrefresh,
+				    interlaced, margins,
+				    600, 40 * 2, 128, 20 * 2);
 }
 EXPORT_SYMBOL(drm_gtf_mode);

 #ifdef CONFIG_VIDEOMODE_HELPERS
-int drm_display_mode_from_videomode(const struct videomode *vm,
-				    struct drm_display_mode *dmode)
+/**
+ * drm_display_mode_from_videomode - fill in @dmode using @vm,
+ * @vm: videomode structure to use as source
+ * @dmode: drm_display_mode structure to use as destination
+ *
+ * Fills out @dmode using the display mode specified in @vm.
+ */
+void drm_display_mode_from_videomode(const struct videomode *vm,
+				     struct drm_display_mode *dmode)
 {
 	dmode->hdisplay = vm->hactive;
 	dmode->hsync_start = dmode->hdisplay + vm->hfront_porch;
@ -538,8 +612,6 @@ int drm_display_mode_from_videomode(const struct videomode *vm,
 	if (vm->flags & DISPLAY_FLAGS_DOUBLECLK)
 		dmode->flags |= DRM_MODE_FLAG_DBLCLK;
 	drm_mode_set_name(dmode);
-
-	return 0;
 }
 EXPORT_SYMBOL_GPL(drm_display_mode_from_videomode);

@ -553,6 +625,9 @@ EXPORT_SYMBOL_GPL(drm_display_mode_from_videomode);
 * This function is expensive and should only be used, if only one mode is to be
 * read from DT. To get multiple modes start with of_get_display_timings and
 * work with that instead.
+ *
+ * Returns:
+ * 0 on success, a negative errno code when no of videomode node was found.
 */
 int of_get_drm_display_mode(struct device_node *np,
 			    struct drm_display_mode *dmode, int index)
@ -580,10 +655,8 @@ EXPORT_SYMBOL_GPL(of_get_drm_display_mode);
 * drm_mode_set_name - set the name on a mode
 * @mode: name will be set in this mode
 *
- * LOCKING:
- * None.
- *
- * Set the name of @mode to a standard format.
+ * Set the name of @mode to a standard format which is <hdisplay>x<vdisplay>
+ * with an optional 'i' suffix for interlaced modes.
 */
 void drm_mode_set_name(struct drm_display_mode *mode)
 {
@ -595,54 +668,12 @@ void drm_mode_set_name(struct drm_display_mode *mode)
 }
 EXPORT_SYMBOL(drm_mode_set_name);

-/**
- * drm_mode_width - get the width of a mode
- * @mode: mode
- *
- * LOCKING:
- * None.
- *
- * Return @mode's width (hdisplay) value.
- *
- * FIXME: is this needed?
- *
- * RETURNS:
- * @mode->hdisplay
- */
-int drm_mode_width(const struct drm_display_mode *mode)
-{
-	return mode->hdisplay;
-
-}
-EXPORT_SYMBOL(drm_mode_width);
-
-/**
- * drm_mode_height - get the height of a mode
- * @mode: mode
- *
- * LOCKING:
- * None.
- *
- * Return @mode's height (vdisplay) value.
- *
- * FIXME: is this needed?
- *
- * RETURNS:
- * @mode->vdisplay
- */
-int drm_mode_height(const struct drm_display_mode *mode)
-{
-	return mode->vdisplay;
-}
-EXPORT_SYMBOL(drm_mode_height);
-
 /** drm_mode_hsync - get the hsync of a mode
 * @mode: mode
 *
- * LOCKING:
- * None.
- *
- * Return @modes's hsync rate in kHz, rounded to the nearest int.
+ * Returns:
+ * @modes's hsync rate in kHz, rounded to the nearest integer. Calculates the
+ * value first if it is not yet set.
 */
 int drm_mode_hsync(const struct drm_display_mode *mode)
 {
@ -666,17 +697,9 @@ EXPORT_SYMBOL(drm_mode_hsync);
 * drm_mode_vrefresh - get the vrefresh of a mode
 * @mode: mode
 *
- * LOCKING:
- * None.
- *
- * Return @mode's vrefresh rate in Hz or calculate it if necessary.
- *
- * FIXME: why is this needed?  shouldn't vrefresh be set already?
- *
- * RETURNS:
- * Vertical refresh rate. It will be the result of actual value plus 0.5.
- * If it is 70.288, it will return 70Hz.
- * If it is 59.6, it will return 60Hz.
+ * Returns:
+ * @modes's vrefresh rate in Hz, rounded to the nearest integer. Calculates the
+ * value first if it is not yet set.
 */
 int drm_mode_vrefresh(const struct drm_display_mode *mode)
 {
@ -705,14 +728,11 @@ int drm_mode_vrefresh(const struct drm_display_mode *mode)
 EXPORT_SYMBOL(drm_mode_vrefresh);

 /**
- * drm_mode_set_crtcinfo - set CRTC modesetting parameters
+ * drm_mode_set_crtcinfo - set CRTC modesetting timing parameters
 * @p: mode
 * @adjust_flags: a combination of adjustment flags
 *
- * LOCKING:
- * None.
- *
- * Setup the CRTC modesetting parameters for @p, adjusting if necessary.
+ * Setup the CRTC modesetting timing parameters for @p, adjusting if necessary.
 *
 * - The CRTC_INTERLACE_HALVE_V flag can be used to halve vertical timings of
 *   interlaced modes.
@ -780,15 +800,11 @@ void drm_mode_set_crtcinfo(struct drm_display_mode *p, int adjust_flags)
 }
 EXPORT_SYMBOL(drm_mode_set_crtcinfo);

-
 /**
 * drm_mode_copy - copy the mode
 * @dst: mode to overwrite
 * @src: mode to copy
 *
- * LOCKING:
- * None.
- *
 * Copy an existing mode into another mode, preserving the object id and
 * list head of the destination mode.
 */
@ -805,13 +821,14 @@ EXPORT_SYMBOL(drm_mode_copy);

 /**
 * drm_mode_duplicate - allocate and duplicate an existing mode
- * @m: mode to duplicate
- *
- * LOCKING:
- * None.
+ * @dev: drm_device to allocate the duplicated mode for
+ * @mode: mode to duplicate
 *
 * Just allocate a new mode, copy the existing mode into it, and return
 * a pointer to it.  Used to create new instances of established modes.
+ *
+ * Returns:
+ * Pointer to duplicated mode on success, NULL on error.
 */
 struct drm_display_mode *drm_mode_duplicate(struct drm_device *dev,
 					    const struct drm_display_mode *mode)
@ -833,12 +850,9 @@ EXPORT_SYMBOL(drm_mode_duplicate);
 * @mode1: first mode
 * @mode2: second mode
 *
- * LOCKING:
- * None.
- *
 * Check to see if @mode1 and @mode2 are equivalent.
 *
- * RETURNS:
+ * Returns:
 * True if the modes are equal, false otherwise.
 */
 bool drm_mode_equal(const struct drm_display_mode *mode1, const struct drm_display_mode *mode2)
@ -864,13 +878,10 @@ EXPORT_SYMBOL(drm_mode_equal);
 * @mode1: first mode
 * @mode2: second mode
 *
- * LOCKING:
- * None.
- *
 * Check to see if @mode1 and @mode2 are equivalent, but
 * don't check the pixel clocks nor the stereo layout.
 *
- * RETURNS:
+ * Returns:
 * True if the modes are equal, false otherwise.
 */
 bool drm_mode_equal_no_clocks_no_stereo(const struct drm_display_mode *mode1,
@ -900,25 +911,19 @@ EXPORT_SYMBOL(drm_mode_equal_no_clocks_no_stereo);
 * @mode_list: list of modes to check
 * @maxX: maximum width
 * @maxY: maximum height
- * @maxPitch: max pitch
 *
- * LOCKING:
- * Caller must hold a lock protecting @mode_list.
- *
- * The DRM device (@dev) has size and pitch limits.  Here we validate the
- * modes we probed for @dev against those limits and set their status as
- * necessary.
+ * This function is a helper which can be used to validate modes against size
+ * limitations of the DRM device/connector. If a mode is too big its status
+ * memeber is updated with the appropriate validation failure code. The list
+ * itself is not changed.
 */
 void drm_mode_validate_size(struct drm_device *dev,
 			    struct list_head *mode_list,
-			    int maxX, int maxY, int maxPitch)
+			    int maxX, int maxY)
 {
 	struct drm_display_mode *mode;

 	list_for_each_entry(mode, mode_list, head) {
-		if (maxPitch > 0 && mode->hdisplay > maxPitch)
-			mode->status = MODE_BAD_WIDTH;
-
 		if (maxX > 0 && mode->hdisplay > maxX)
 			mode->status = MODE_VIRTUAL_X;

@ -934,12 +939,10 @@ EXPORT_SYMBOL(drm_mode_validate_size);
 * @mode_list: list of modes to check
 * @verbose: be verbose about it
 *
- * LOCKING:
- * Caller must hold a lock protecting @mode_list.
- *
- * Once mode list generation is complete, a caller can use this routine to
- * remove invalid modes from a mode list.  If any of the modes have a
- * status other than %MODE_OK, they are removed from @mode_list and freed.
+ * This helper function can be used to prune a display mode list after
+ * validation has been completed. All modes who's status is not MODE_OK will be
+ * removed from the list, and if @verbose the status code and mode name is also
+ * printed to dmesg.
 */
 void drm_mode_prune_invalid(struct drm_device *dev,
 			    struct list_head *mode_list, bool verbose)
@ -966,13 +969,10 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
 * @lh_a: list_head for first mode
 * @lh_b: list_head for second mode
 *
- * LOCKING:
- * None.
- *
 * Compare two modes, given by @lh_a and @lh_b, returning a value indicating
 * which is better.
 *
- * RETURNS:
+ * Returns:
 * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or
 * positive if @lh_b is better than @lh_a.
 */
@ -1000,12 +1000,9 @@ static int drm_mode_compare(void *priv, struct list_head *lh_a, struct list_head

 /**
 * drm_mode_sort - sort mode list
- * @mode_list: list to sort
+ * @mode_list: list of drm_display_mode structures to sort
 *
- * LOCKING:
- * Caller must hold a lock protecting @mode_list.
- *
- * Sort @mode_list by favorability, putting good modes first.
+ * Sort @mode_list by favorability, moving good modes to the head of the list.
 */
 void drm_mode_sort(struct list_head *mode_list)
 {
@ -1017,13 +1014,12 @@ EXPORT_SYMBOL(drm_mode_sort);
 * drm_mode_connector_list_update - update the mode list for the connector
 * @connector: the connector to update
 *
- * LOCKING:
- * Caller must hold a lock protecting @mode_list.
- *
 * This moves the modes from the @connector probed_modes list
 * to the actual mode list. It compares the probed mode against the current
- * list and only adds different modes. All modes unverified after this point
- * will be removed by the prune invalid modes.
+ * list and only adds different/new modes.
+ *
+ * This is just a helper functions doesn't validate any modes itself and also
+ * doesn't prune any invalid modes. Callers need to do that themselves.
 */
 void drm_mode_connector_list_update(struct drm_connector *connector)
 {
@ -1031,6 +1027,8 @@ void drm_mode_connector_list_update(struct drm_connector *connector)
 	struct drm_display_mode *pmode, *pt;
 	int found_it;

+	WARN_ON(!mutex_is_locked(&connector->dev->mode_config.mutex));
+
 	list_for_each_entry_safe(pmode, pt, &connector->probed_modes,
 				 head) {
 		found_it = 0;
@ -1056,17 +1054,25 @@ void drm_mode_connector_list_update(struct drm_connector *connector)
 EXPORT_SYMBOL(drm_mode_connector_list_update);

 /**
- * drm_mode_parse_command_line_for_connector - parse command line for connector
- * @mode_option - per connector mode option
- * @connector - connector to parse line for
+ * drm_mode_parse_command_line_for_connector - parse command line modeline for connector
+ * @mode_option: optional per connector mode option
+ * @connector: connector to parse modeline for
+ * @mode: preallocated drm_cmdline_mode structure to fill out
 *
- * This parses the connector specific then generic command lines for
- * modes and options to configure the connector.
+ * This parses @mode_option command line modeline for modes and options to
+ * configure the connector. If @mode_option is NULL the default command line
+ * modeline in fb_mode_option will be parsed instead.
+ *
+ * This uses the same parameters as the fb modedb.c, except for an extra
+ * force-enable, force-enable-digital and force-disable bit at the end:
 *
- * This uses the same parameters as the fb modedb.c, except for extra
 *	<xres>x<yres>[M][R][-<bpp>][@<refresh>][i][m][eDd]
 *
- * enable/enable Digital/disable bit at the end
+ * The intermediate drm_cmdline_mode structure is required to store additional
+ * options from the command line modline like the force-enabel/disable flag.
+ *
+ * Returns:
+ * True if a valid modeline has been parsed, false otherwise.
 */
 bool drm_mode_parse_command_line_for_connector(const char *mode_option,
 					       struct drm_connector *connector,
@ -1219,6 +1225,14 @@ done:
 }
 EXPORT_SYMBOL(drm_mode_parse_command_line_for_connector);

+/**
+ * drm_mode_create_from_cmdline_mode - convert a command line modeline into a DRM display mode
+ * @dev: DRM device to create the new mode for
+ * @cmd: input command line modeline
+ *
+ * Returns:
+ * Pointer to converted mode on success, NULL on error.
+ */
 struct drm_display_mode *
 drm_mode_create_from_cmdline_mode(struct drm_device *dev,
 				  struct drm_cmdline_mode *cmd)
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@ -351,7 +351,7 @@ err_agp:
 	drm_pci_agp_destroy(dev);
 	pci_disable_device(pdev);
 err_free:
-	drm_dev_free(dev);
+	drm_dev_unref(dev);
 	return ret;
 }
 EXPORT_SYMBOL(drm_get_pci_dev);
@ -468,8 +468,8 @@ void drm_pci_exit(struct drm_driver *driver, struct pci_driver *pdriver)
 	} else {
 		list_for_each_entry_safe(dev, tmp, &driver->legacy_dev_list,
 					 legacy_dev_list) {
-			drm_put_dev(dev);
 			list_del(&dev->legacy_dev_list);
+			drm_put_dev(dev);
 		}
 	}
 	DRM_INFO("Module unloaded\n");
--- a/drivers/gpu/drm/drm_platform.c
+++ b/drivers/gpu/drm/drm_platform.c
@ -64,7 +64,7 @@ static int drm_get_platform_dev(struct platform_device *platdev,
 	return 0;

 err_free:
-	drm_dev_free(dev);
+	drm_dev_unref(dev);
 	return ret;
 }

--- a/drivers/gpu/drm/drm_prime.c
+++ b/drivers/gpu/drm/drm_prime.c
@ -68,7 +68,8 @@ struct drm_prime_attachment {
 	enum dma_data_direction dir;
 };

-static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv, struct dma_buf *dma_buf, uint32_t handle)
+static int drm_prime_add_buf_handle(struct drm_prime_file_private *prime_fpriv,
+				    struct dma_buf *dma_buf, uint32_t handle)
 {
 	struct drm_prime_member *member;

@ -174,7 +175,7 @@ void drm_prime_remove_buf_handle_locked(struct drm_prime_file_private *prime_fpr
 }

 static struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
-		enum dma_data_direction dir)
+					    enum dma_data_direction dir)
 {
 	struct drm_prime_attachment *prime_attach = attach->priv;
 	struct drm_gem_object *obj = attach->dmabuf->priv;
@ -211,11 +212,19 @@ static struct sg_table *drm_gem_map_dma_buf(struct dma_buf_attachment *attach,
 }

 static void drm_gem_unmap_dma_buf(struct dma_buf_attachment *attach,
-		struct sg_table *sgt, enum dma_data_direction dir)
+				  struct sg_table *sgt,
+				  enum dma_data_direction dir)
 {
 	/* nothing to be done here */
 }

+/**
+ * drm_gem_dmabuf_release - dma_buf release implementation for GEM
+ * @dma_buf: buffer to be released
+ *
+ * Generic release function for dma_bufs exported as PRIME buffers. GEM drivers
+ * must use this in their dma_buf ops structure as the release callback.
+ */
 void drm_gem_dmabuf_release(struct dma_buf *dma_buf)
 {
 	struct drm_gem_object *obj = dma_buf->priv;
@ -242,30 +251,30 @@ static void drm_gem_dmabuf_vunmap(struct dma_buf *dma_buf, void *vaddr)
 }

 static void *drm_gem_dmabuf_kmap_atomic(struct dma_buf *dma_buf,
-		unsigned long page_num)
+					unsigned long page_num)
 {
 	return NULL;
 }

 static void drm_gem_dmabuf_kunmap_atomic(struct dma_buf *dma_buf,
-		unsigned long page_num, void *addr)
+					 unsigned long page_num, void *addr)
 {

 }
 static void *drm_gem_dmabuf_kmap(struct dma_buf *dma_buf,
-		unsigned long page_num)
+				 unsigned long page_num)
 {
 	return NULL;
 }

 static void drm_gem_dmabuf_kunmap(struct dma_buf *dma_buf,
-		unsigned long page_num, void *addr)
+				  unsigned long page_num, void *addr)
 {

 }

 static int drm_gem_dmabuf_mmap(struct dma_buf *dma_buf,
-		struct vm_area_struct *vma)
+			       struct vm_area_struct *vma)
 {
 	struct drm_gem_object *obj = dma_buf->priv;
 	struct drm_device *dev = obj->dev;
@ -315,6 +324,15 @@ static const struct dma_buf_ops drm_gem_prime_dmabuf_ops =  {
 *    driver's scatter/gather table
 */

+/**
+ * drm_gem_prime_export - helper library implemention of the export callback
+ * @dev: drm_device to export from
+ * @obj: GEM object to export
+ * @flags: flags like DRM_CLOEXEC
+ *
+ * This is the implementation of the gem_prime_export functions for GEM drivers
+ * using the PRIME helpers.
+ */
 struct dma_buf *drm_gem_prime_export(struct drm_device *dev,
 				     struct drm_gem_object *obj, int flags)
 {
@ -355,9 +373,23 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev,
 	return dmabuf;
 }

+/**
+ * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers
+ * @dev: dev to export the buffer from
+ * @file_priv: drm file-private structure
+ * @handle: buffer handle to export
+ * @flags: flags like DRM_CLOEXEC
+ * @prime_fd: pointer to storage for the fd id of the create dma-buf
+ *
+ * This is the PRIME export function which must be used mandatorily by GEM
+ * drivers to ensure correct lifetime management of the underlying GEM object.
+ * The actual exporting from GEM object to a dma-buf is done through the
+ * gem_prime_export driver callback.
+ */
 int drm_gem_prime_handle_to_fd(struct drm_device *dev,
-		struct drm_file *file_priv, uint32_t handle, uint32_t flags,
-		int *prime_fd)
+			       struct drm_file *file_priv, uint32_t handle,
+			       uint32_t flags,
+			       int *prime_fd)
 {
 	struct drm_gem_object *obj;
 	int ret = 0;
@ -441,6 +473,14 @@ out_unlock:
 }
 EXPORT_SYMBOL(drm_gem_prime_handle_to_fd);

+/**
+ * drm_gem_prime_import - helper library implemention of the import callback
+ * @dev: drm_device to import into
+ * @dma_buf: dma-buf object to import
+ *
+ * This is the implementation of the gem_prime_import functions for GEM drivers
+ * using the PRIME helpers.
+ */
 struct drm_gem_object *drm_gem_prime_import(struct drm_device *dev,
 					    struct dma_buf *dma_buf)
 {
@ -496,8 +536,21 @@ fail_detach:
 }
 EXPORT_SYMBOL(drm_gem_prime_import);

+/**
+ * drm_gem_prime_fd_to_handle - PRIME import function for GEM drivers
+ * @dev: dev to export the buffer from
+ * @file_priv: drm file-private structure
+ * @prime_fd: fd id of the dma-buf which should be imported
+ * @handle: pointer to storage for the handle of the imported buffer object
+ *
+ * This is the PRIME import function which must be used mandatorily by GEM
+ * drivers to ensure correct lifetime management of the underlying GEM object.
+ * The actual importing of GEM object from the dma-buf is done through the
+ * gem_import_export driver callback.
+ */
 int drm_gem_prime_fd_to_handle(struct drm_device *dev,
-		struct drm_file *file_priv, int prime_fd, uint32_t *handle)
+			       struct drm_file *file_priv, int prime_fd,
+			       uint32_t *handle)
 {
 	struct dma_buf *dma_buf;
 	struct drm_gem_object *obj;
@ -598,12 +651,14 @@ int drm_prime_fd_to_handle_ioctl(struct drm_device *dev, void *data,
 			args->fd, &args->handle);
 }

-/*
- * drm_prime_pages_to_sg
+/**
+ * drm_prime_pages_to_sg - converts a page array into an sg list
+ * @pages: pointer to the array of page pointers to convert
+ * @nr_pages: length of the page vector
 *
- * this helper creates an sg table object from a set of pages
+ * This helper creates an sg table object from a set of pages
 * the driver is responsible for mapping the pages into the
- * importers address space
+ * importers address space for use with dma_buf itself.
 */
 struct sg_table *drm_prime_pages_to_sg(struct page **pages, int nr_pages)
 {
@ -628,9 +683,16 @@ out:
 }
 EXPORT_SYMBOL(drm_prime_pages_to_sg);

-/* export an sg table into an array of pages and addresses
-   this is currently required by the TTM driver in order to do correct fault
-   handling */
+/**
+ * drm_prime_sg_to_page_addr_arrays - convert an sg table into a page array
+ * @sgt: scatter-gather table to convert
+ * @pages: array of page pointers to store the page array in
+ * @addrs: optional array to store the dma bus address of each page
+ * @max_pages: size of both the passed-in arrays
+ *
+ * Exports an sg table into an array of pages and addresses. This is currently
+ * required by the TTM driver in order to do correct fault handling.
+ */
 int drm_prime_sg_to_page_addr_arrays(struct sg_table *sgt, struct page **pages,
 				     dma_addr_t *addrs, int max_pages)
 {
@ -663,7 +725,15 @@ int drm_prime_sg_to_page_addr_arrays(struct sg_table *sgt, struct page **pages,
 	return 0;
 }
 EXPORT_SYMBOL(drm_prime_sg_to_page_addr_arrays);
-/* helper function to cleanup a GEM/prime object */
+
+/**
+ * drm_prime_gem_destroy - helper to clean up a PRIME-imported GEM object
+ * @obj: GEM object which was created from a dma-buf
+ * @sg: the sg-table which was pinned at import time
+ *
+ * This is the cleanup functions which GEM drivers need to call when they use
+ * @drm_gem_prime_import to import dma-bufs.
+ */
 void drm_prime_gem_destroy(struct drm_gem_object *obj, struct sg_table *sg)
 {
 	struct dma_buf_attachment *attach;
@ -683,11 +753,9 @@ void drm_prime_init_file_private(struct drm_prime_file_private *prime_fpriv)
 	INIT_LIST_HEAD(&prime_fpriv->head);
 	mutex_init(&prime_fpriv->lock);
 }
-EXPORT_SYMBOL(drm_prime_init_file_private);

 void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv)
 {
 	/* by now drm_gem_release should've made sure the list is empty */
 	WARN_ON(!list_empty(&prime_fpriv->head));
 }
-EXPORT_SYMBOL(drm_prime_destroy_file_private);
--- a/drivers/gpu/drm/drm_stub.c
+++ b/drivers/gpu/drm/drm_stub.c
@ -31,8 +31,10 @@
 * DEALINGS IN THE SOFTWARE.
 */

+#include <linux/fs.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
+#include <linux/mount.h>
 #include <linux/slab.h>
 #include <drm/drmP.h>
 #include <drm/drm_core.h>
@ -70,6 +72,7 @@ module_param_named(vblankoffdelay, drm_vblank_offdelay, int, 0600);
 module_param_named(timestamp_precision_usec, drm_timestamp_precision, int, 0600);
 module_param_named(timestamp_monotonic, drm_timestamp_monotonic, int, 0600);

+static DEFINE_SPINLOCK(drm_minor_lock);
 struct idr drm_minors_idr;

 struct class *drm_class;
@ -117,26 +120,6 @@ void drm_ut_debug_printk(unsigned int request_level,
 }
 EXPORT_SYMBOL(drm_ut_debug_printk);

-static int drm_minor_get_id(struct drm_device *dev, int type)
-{
-	int ret;
-	int base = 0, limit = 63;
-
-	if (type == DRM_MINOR_CONTROL) {
-		base += 64;
-		limit = base + 63;
-	} else if (type == DRM_MINOR_RENDER) {
-		base += 128;
-		limit = base + 63;
-	}
-
-	mutex_lock(&dev->struct_mutex);
-	ret = idr_alloc(&drm_minors_idr, NULL, base, limit, GFP_KERNEL);
-	mutex_unlock(&dev->struct_mutex);
-
-	return ret == -ENOSPC ? -EINVAL : ret;
-}
-
 struct drm_master *drm_master_create(struct drm_minor *minor)
 {
 	struct drm_master *master;
@ -260,119 +243,183 @@ int drm_dropmaster_ioctl(struct drm_device *dev, void *data,
 	return 0;
 }

-/**
- * drm_get_minor - Allocate and register new DRM minor
- * @dev: DRM device
- * @minor: Pointer to where new minor is stored
- * @type: Type of minor
+/*
+ * DRM Minors
+ * A DRM device can provide several char-dev interfaces on the DRM-Major. Each
+ * of them is represented by a drm_minor object. Depending on the capabilities
+ * of the device-driver, different interfaces are registered.
 *
- * Allocate a new minor of the given type and register it. A pointer to the new
- * minor is returned in @minor.
- * Caller must hold the global DRM mutex.
- *
- * RETURNS:
- * 0 on success, negative error code on failure.
+ * Minors can be accessed via dev->$minor_name. This pointer is either
+ * NULL or a valid drm_minor pointer and stays valid as long as the device is
+ * valid. This means, DRM minors have the same life-time as the underlying
+ * device. However, this doesn't mean that the minor is active. Minors are
+ * registered and unregistered dynamically according to device-state.
 */
-static int drm_get_minor(struct drm_device *dev, struct drm_minor **minor,
-			 int type)
+
+static struct drm_minor **drm_minor_get_slot(struct drm_device *dev,
+					     unsigned int type)
+{
+	switch (type) {
+	case DRM_MINOR_LEGACY:
+		return &dev->primary;
+	case DRM_MINOR_RENDER:
+		return &dev->render;
+	case DRM_MINOR_CONTROL:
+		return &dev->control;
+	default:
+		return NULL;
+	}
+}
+
+static int drm_minor_alloc(struct drm_device *dev, unsigned int type)
+{
+	struct drm_minor *minor;
+
+	minor = kzalloc(sizeof(*minor), GFP_KERNEL);
+	if (!minor)
+		return -ENOMEM;
+
+	minor->type = type;
+	minor->dev = dev;
+	INIT_LIST_HEAD(&minor->master_list);
+
+	*drm_minor_get_slot(dev, type) = minor;
+	return 0;
+}
+
+static void drm_minor_free(struct drm_device *dev, unsigned int type)
+{
+	struct drm_minor **slot;
+
+	slot = drm_minor_get_slot(dev, type);
+	if (*slot) {
+		kfree(*slot);
+		*slot = NULL;
+	}
+}
+
+static int drm_minor_register(struct drm_device *dev, unsigned int type)
 {
 	struct drm_minor *new_minor;
+	unsigned long flags;
 	int ret;
 	int minor_id;

 	DRM_DEBUG("\n");

-	minor_id = drm_minor_get_id(dev, type);
+	new_minor = *drm_minor_get_slot(dev, type);
+	if (!new_minor)
+		return 0;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_irqsave(&drm_minor_lock, flags);
+	minor_id = idr_alloc(&drm_minors_idr,
+			     NULL,
+			     64 * type,
+			     64 * (type + 1),
+			     GFP_NOWAIT);
+	spin_unlock_irqrestore(&drm_minor_lock, flags);
+	idr_preload_end();
+
 	if (minor_id < 0)
 		return minor_id;

-	new_minor = kzalloc(sizeof(struct drm_minor), GFP_KERNEL);
-	if (!new_minor) {
-		ret = -ENOMEM;
-		goto err_idr;
-	}
-
-	new_minor->type = type;
-	new_minor->device = MKDEV(DRM_MAJOR, minor_id);
-	new_minor->dev = dev;
 	new_minor->index = minor_id;
-	INIT_LIST_HEAD(&new_minor->master_list);

-	idr_replace(&drm_minors_idr, new_minor, minor_id);
-
-#if defined(CONFIG_DEBUG_FS)
 	ret = drm_debugfs_init(new_minor, minor_id, drm_debugfs_root);
 	if (ret) {
 		DRM_ERROR("DRM: Failed to initialize /sys/kernel/debug/dri.\n");
-		goto err_mem;
+		goto err_id;
 	}
-#endif

 	ret = drm_sysfs_device_add(new_minor);
 	if (ret) {
-		printk(KERN_ERR
-		       "DRM: Error sysfs_device_add.\n");
+		DRM_ERROR("DRM: Error sysfs_device_add.\n");
 		goto err_debugfs;
 	}
-	*minor = new_minor;
+
+	/* replace NULL with @minor so lookups will succeed from now on */
+	spin_lock_irqsave(&drm_minor_lock, flags);
+	idr_replace(&drm_minors_idr, new_minor, new_minor->index);
+	spin_unlock_irqrestore(&drm_minor_lock, flags);

 	DRM_DEBUG("new minor assigned %d\n", minor_id);
 	return 0;

-
 err_debugfs:
-#if defined(CONFIG_DEBUG_FS)
 	drm_debugfs_cleanup(new_minor);
-err_mem:
-#endif
-	kfree(new_minor);
-err_idr:
+err_id:
+	spin_lock_irqsave(&drm_minor_lock, flags);
 	idr_remove(&drm_minors_idr, minor_id);
-	*minor = NULL;
+	spin_unlock_irqrestore(&drm_minor_lock, flags);
+	new_minor->index = 0;
 	return ret;
 }

-/**
- * drm_unplug_minor - Unplug DRM minor
- * @minor: Minor to unplug
- *
- * Unplugs the given DRM minor but keeps the object. So after this returns,
- * minor->dev is still valid so existing open-files can still access it to get
- * device information from their drm_file ojects.
- * If the minor is already unplugged or if @minor is NULL, nothing is done.
- * The global DRM mutex must be held by the caller.
- */
-static void drm_unplug_minor(struct drm_minor *minor)
+static void drm_minor_unregister(struct drm_device *dev, unsigned int type)
 {
+	struct drm_minor *minor;
+	unsigned long flags;
+
+	minor = *drm_minor_get_slot(dev, type);
 	if (!minor || !minor->kdev)
 		return;

-#if defined(CONFIG_DEBUG_FS)
-	drm_debugfs_cleanup(minor);
-#endif
-
-	drm_sysfs_device_remove(minor);
+	spin_lock_irqsave(&drm_minor_lock, flags);
 	idr_remove(&drm_minors_idr, minor->index);
+	spin_unlock_irqrestore(&drm_minor_lock, flags);
+	minor->index = 0;
+
+	drm_debugfs_cleanup(minor);
+	drm_sysfs_device_remove(minor);
 }

 /**
- * drm_put_minor - Destroy DRM minor
- * @minor: Minor to destroy
+ * drm_minor_acquire - Acquire a DRM minor
+ * @minor_id: Minor ID of the DRM-minor
 *
- * This calls drm_unplug_minor() on the given minor and then frees it. Nothing
- * is done if @minor is NULL. It is fine to call this on already unplugged
- * minors.
- * The global DRM mutex must be held by the caller.
+ * Looks up the given minor-ID and returns the respective DRM-minor object. The
+ * refence-count of the underlying device is increased so you must release this
+ * object with drm_minor_release().
+ *
+ * As long as you hold this minor, it is guaranteed that the object and the
+ * minor->dev pointer will stay valid! However, the device may get unplugged and
+ * unregistered while you hold the minor.
+ *
+ * Returns:
+ * Pointer to minor-object with increased device-refcount, or PTR_ERR on
+ * failure.
 */
-static void drm_put_minor(struct drm_minor *minor)
+struct drm_minor *drm_minor_acquire(unsigned int minor_id)
 {
-	if (!minor)
-		return;
+	struct drm_minor *minor;
+	unsigned long flags;

-	DRM_DEBUG("release secondary minor %d\n", minor->index);
+	spin_lock_irqsave(&drm_minor_lock, flags);
+	minor = idr_find(&drm_minors_idr, minor_id);
+	if (minor)
+		drm_dev_ref(minor->dev);
+	spin_unlock_irqrestore(&drm_minor_lock, flags);

-	drm_unplug_minor(minor);
-	kfree(minor);
+	if (!minor) {
+		return ERR_PTR(-ENODEV);
+	} else if (drm_device_is_unplugged(minor->dev)) {
+		drm_dev_unref(minor->dev);
+		return ERR_PTR(-ENODEV);
+	}
+
+	return minor;
+}
+
+/**
+ * drm_minor_release - Release DRM minor
+ * @minor: Pointer to DRM minor object
+ *
+ * Release a minor that was previously acquired via drm_minor_acquire().
+ */
+void drm_minor_release(struct drm_minor *minor)
+{
+	drm_dev_unref(minor->dev);
 }

 /**
@ -392,18 +439,16 @@ void drm_put_dev(struct drm_device *dev)
 	}

 	drm_dev_unregister(dev);
-	drm_dev_free(dev);
+	drm_dev_unref(dev);
 }
 EXPORT_SYMBOL(drm_put_dev);

 void drm_unplug_dev(struct drm_device *dev)
 {
 	/* for a USB device */
-	if (drm_core_check_feature(dev, DRIVER_MODESET))
-		drm_unplug_minor(dev->control);
-	if (dev->render)
-		drm_unplug_minor(dev->render);
-	drm_unplug_minor(dev->primary);
+	drm_minor_unregister(dev, DRM_MINOR_LEGACY);
+	drm_minor_unregister(dev, DRM_MINOR_RENDER);
+	drm_minor_unregister(dev, DRM_MINOR_CONTROL);

 	mutex_lock(&drm_global_mutex);

@ -416,6 +461,78 @@ void drm_unplug_dev(struct drm_device *dev)
 }
 EXPORT_SYMBOL(drm_unplug_dev);

+/*
+ * DRM internal mount
+ * We want to be able to allocate our own "struct address_space" to control
+ * memory-mappings in VRAM (or stolen RAM, ...). However, core MM does not allow
+ * stand-alone address_space objects, so we need an underlying inode. As there
+ * is no way to allocate an independent inode easily, we need a fake internal
+ * VFS mount-point.
+ *
+ * The drm_fs_inode_new() function allocates a new inode, drm_fs_inode_free()
+ * frees it again. You are allowed to use iget() and iput() to get references to
+ * the inode. But each drm_fs_inode_new() call must be paired with exactly one
+ * drm_fs_inode_free() call (which does not have to be the last iput()).
+ * We use drm_fs_inode_*() to manage our internal VFS mount-point and share it
+ * between multiple inode-users. You could, technically, call
+ * iget() + drm_fs_inode_free() directly after alloc and sometime later do an
+ * iput(), but this way you'd end up with a new vfsmount for each inode.
+ */
+
+static int drm_fs_cnt;
+static struct vfsmount *drm_fs_mnt;
+
+static const struct dentry_operations drm_fs_dops = {
+	.d_dname	= simple_dname,
+};
+
+static const struct super_operations drm_fs_sops = {
+	.statfs		= simple_statfs,
+};
+
+static struct dentry *drm_fs_mount(struct file_system_type *fs_type, int flags,
+				   const char *dev_name, void *data)
+{
+	return mount_pseudo(fs_type,
+			    "drm:",
+			    &drm_fs_sops,
+			    &drm_fs_dops,
+			    0x010203ff);
+}
+
+static struct file_system_type drm_fs_type = {
+	.name		= "drm",
+	.owner		= THIS_MODULE,
+	.mount		= drm_fs_mount,
+	.kill_sb	= kill_anon_super,
+};
+
+static struct inode *drm_fs_inode_new(void)
+{
+	struct inode *inode;
+	int r;
+
+	r = simple_pin_fs(&drm_fs_type, &drm_fs_mnt, &drm_fs_cnt);
+	if (r < 0) {
+		DRM_ERROR("Cannot mount pseudo fs: %d\n", r);
+		return ERR_PTR(r);
+	}
+
+	inode = alloc_anon_inode(drm_fs_mnt->mnt_sb);
+	if (IS_ERR(inode))
+		simple_release_fs(&drm_fs_mnt, &drm_fs_cnt);
+
+	return inode;
+}
+
+static void drm_fs_inode_free(struct inode *inode)
+{
+	if (inode) {
+		iput(inode);
+		simple_release_fs(&drm_fs_mnt, &drm_fs_cnt);
+	}
+}
+
 /**
 * drm_dev_alloc - Allocate new drm device
 * @driver: DRM driver to allocate device for
@ -425,6 +542,9 @@ EXPORT_SYMBOL(drm_unplug_dev);
 * Call drm_dev_register() to advertice the device to user space and register it
 * with other core subsystems.
 *
+ * The initial ref-count of the object is 1. Use drm_dev_ref() and
+ * drm_dev_unref() to take and drop further ref-counts.
+ *
 * RETURNS:
 * Pointer to new DRM device, or NULL if out of memory.
 */
@ -438,6 +558,7 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver,
 	if (!dev)
 		return NULL;

+	kref_init(&dev->ref);
 	dev->dev = parent;
 	dev->driver = driver;

@ -452,8 +573,31 @@ struct drm_device *drm_dev_alloc(struct drm_driver *driver,
 	mutex_init(&dev->struct_mutex);
 	mutex_init(&dev->ctxlist_mutex);

-	if (drm_ht_create(&dev->map_hash, 12))
+	dev->anon_inode = drm_fs_inode_new();
+	if (IS_ERR(dev->anon_inode)) {
+		ret = PTR_ERR(dev->anon_inode);
+		DRM_ERROR("Cannot allocate anonymous inode: %d\n", ret);
 		goto err_free;
+	}
+
+	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
+		ret = drm_minor_alloc(dev, DRM_MINOR_CONTROL);
+		if (ret)
+			goto err_minors;
+	}
+
+	if (drm_core_check_feature(dev, DRIVER_RENDER) && drm_rnodes) {
+		ret = drm_minor_alloc(dev, DRM_MINOR_RENDER);
+		if (ret)
+			goto err_minors;
+	}
+
+	ret = drm_minor_alloc(dev, DRM_MINOR_LEGACY);
+	if (ret)
+		goto err_minors;
+
+	if (drm_ht_create(&dev->map_hash, 12))
+		goto err_minors;

 	ret = drm_ctxbitmap_init(dev);
 	if (ret) {
@ -475,38 +619,68 @@ err_ctxbitmap:
 	drm_ctxbitmap_cleanup(dev);
 err_ht:
 	drm_ht_remove(&dev->map_hash);
+err_minors:
+	drm_minor_free(dev, DRM_MINOR_LEGACY);
+	drm_minor_free(dev, DRM_MINOR_RENDER);
+	drm_minor_free(dev, DRM_MINOR_CONTROL);
+	drm_fs_inode_free(dev->anon_inode);
 err_free:
 	kfree(dev);
 	return NULL;
 }
 EXPORT_SYMBOL(drm_dev_alloc);

-/**
- * drm_dev_free - Free DRM device
- * @dev: DRM device to free
- *
- * Free a DRM device that has previously been allocated via drm_dev_alloc().
- * You must not use kfree() instead or you will leak memory.
- *
- * This must not be called once the device got registered. Use drm_put_dev()
- * instead, which then calls drm_dev_free().
- */
-void drm_dev_free(struct drm_device *dev)
+static void drm_dev_release(struct kref *ref)
 {
-	drm_put_minor(dev->control);
-	drm_put_minor(dev->render);
-	drm_put_minor(dev->primary);
+	struct drm_device *dev = container_of(ref, struct drm_device, ref);

 	if (dev->driver->driver_features & DRIVER_GEM)
 		drm_gem_destroy(dev);

 	drm_ctxbitmap_cleanup(dev);
 	drm_ht_remove(&dev->map_hash);
+	drm_fs_inode_free(dev->anon_inode);
+
+	drm_minor_free(dev, DRM_MINOR_LEGACY);
+	drm_minor_free(dev, DRM_MINOR_RENDER);
+	drm_minor_free(dev, DRM_MINOR_CONTROL);

 	kfree(dev->devname);
 	kfree(dev);
 }
-EXPORT_SYMBOL(drm_dev_free);
+
+/**
+ * drm_dev_ref - Take reference of a DRM device
+ * @dev: device to take reference of or NULL
+ *
+ * This increases the ref-count of @dev by one. You *must* already own a
+ * reference when calling this. Use drm_dev_unref() to drop this reference
+ * again.
+ *
+ * This function never fails. However, this function does not provide *any*
+ * guarantee whether the device is alive or running. It only provides a
+ * reference to the object and the memory associated with it.
+ */
+void drm_dev_ref(struct drm_device *dev)
+{
+	if (dev)
+		kref_get(&dev->ref);
+}
+EXPORT_SYMBOL(drm_dev_ref);
+
+/**
+ * drm_dev_unref - Drop reference of a DRM device
+ * @dev: device to drop reference of or NULL
+ *
+ * This decreases the ref-count of @dev by one. The device is destroyed if the
+ * ref-count drops to zero.
+ */
+void drm_dev_unref(struct drm_device *dev)
+{
+	if (dev)
+		kref_put(&dev->ref, drm_dev_release);
+}
+EXPORT_SYMBOL(drm_dev_unref);

 /**
 * drm_dev_register - Register DRM device
@ -527,26 +701,22 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)

 	mutex_lock(&drm_global_mutex);

-	if (drm_core_check_feature(dev, DRIVER_MODESET)) {
-		ret = drm_get_minor(dev, &dev->control, DRM_MINOR_CONTROL);
-		if (ret)
-			goto out_unlock;
-	}
-
-	if (drm_core_check_feature(dev, DRIVER_RENDER) && drm_rnodes) {
-		ret = drm_get_minor(dev, &dev->render, DRM_MINOR_RENDER);
-		if (ret)
-			goto err_control_node;
-	}
-
-	ret = drm_get_minor(dev, &dev->primary, DRM_MINOR_LEGACY);
+	ret = drm_minor_register(dev, DRM_MINOR_CONTROL);
 	if (ret)
-		goto err_render_node;
+		goto err_minors;
+
+	ret = drm_minor_register(dev, DRM_MINOR_RENDER);
+	if (ret)
+		goto err_minors;
+
+	ret = drm_minor_register(dev, DRM_MINOR_LEGACY);
+	if (ret)
+		goto err_minors;

 	if (dev->driver->load) {
 		ret = dev->driver->load(dev, flags);
 		if (ret)
-			goto err_primary_node;
+			goto err_minors;
 	}

 	/* setup grouping for legacy outputs */
@ -563,12 +733,10 @@ int drm_dev_register(struct drm_device *dev, unsigned long flags)
 err_unload:
 	if (dev->driver->unload)
 		dev->driver->unload(dev);
-err_primary_node:
-	drm_unplug_minor(dev->primary);
-err_render_node:
-	drm_unplug_minor(dev->render);
-err_control_node:
-	drm_unplug_minor(dev->control);
+err_minors:
+	drm_minor_unregister(dev, DRM_MINOR_LEGACY);
+	drm_minor_unregister(dev, DRM_MINOR_RENDER);
+	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
 out_unlock:
 	mutex_unlock(&drm_global_mutex);
 	return ret;
@ -581,7 +749,7 @@ EXPORT_SYMBOL(drm_dev_register);
 *
 * Unregister the DRM device from the system. This does the reverse of
 * drm_dev_register() but does not deallocate the device. The caller must call
- * drm_dev_free() to free all resources.
+ * drm_dev_unref() to drop their final reference.
 */
 void drm_dev_unregister(struct drm_device *dev)
 {
@ -600,8 +768,8 @@ void drm_dev_unregister(struct drm_device *dev)
 	list_for_each_entry_safe(r_list, list_temp, &dev->maplist, head)
 		drm_rmmap(dev, r_list->map);

-	drm_unplug_minor(dev->control);
-	drm_unplug_minor(dev->render);
-	drm_unplug_minor(dev->primary);
+	drm_minor_unregister(dev, DRM_MINOR_LEGACY);
+	drm_minor_unregister(dev, DRM_MINOR_RENDER);
+	drm_minor_unregister(dev, DRM_MINOR_CONTROL);
 }
 EXPORT_SYMBOL(drm_dev_unregister);
--- a/drivers/gpu/drm/drm_usb.c
+++ b/drivers/gpu/drm/drm_usb.c
@ -30,7 +30,7 @@ int drm_get_usb_dev(struct usb_interface *interface,
 	return 0;

 err_free:
-	drm_dev_free(dev);
+	drm_dev_unref(dev);
 	return ret;

 }
--- a/drivers/gpu/drm/i2c/tda998x_drv.c
+++ b/drivers/gpu/drm/i2c/tda998x_drv.c
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@ -1526,7 +1526,8 @@ i915_gem_release_mmap(struct drm_i915_gem_object *obj)
 	if (!obj->fault_mappable)
 		return;

-	drm_vma_node_unmap(&obj->base.vma_node, obj->base.dev->dev_mapping);
+	drm_vma_node_unmap(&obj->base.vma_node,
+			   obj->base.dev->anon_inode->i_mapping);
 	obj->fault_mappable = false;
 }

--- a/drivers/gpu/drm/i915/intel_dp.c
+++ b/drivers/gpu/drm/i915/intel_dp.c
@ -91,6 +91,7 @@ static struct intel_dp *intel_attached_dp(struct drm_connector *connector)
 }

 static void intel_dp_link_down(struct intel_dp *intel_dp);
+static bool _edp_panel_vdd_on(struct intel_dp *intel_dp);
 static void edp_panel_vdd_off(struct intel_dp *intel_dp, bool sync);

 static int
@ -459,6 +460,9 @@ intel_dp_aux_ch(struct intel_dp *intel_dp,
 	uint32_t status;
 	int try, clock = 0;
 	bool has_aux_irq = HAS_AUX_IRQ(dev);
+	bool vdd;
+
+	vdd = _edp_panel_vdd_on(intel_dp);

 	/* dp aux is extremely sensitive to irq latency, hence request the
 	 * lowest possible wakeup latency and so prevent the cpu from going into
@ -564,223 +568,130 @@ out:
 	pm_qos_update_request(&dev_priv->pm_qos, PM_QOS_DEFAULT_VALUE);
 	intel_aux_display_runtime_put(dev_priv);

+	if (vdd)
+		edp_panel_vdd_off(intel_dp, false);
+
 	return ret;
 }

-/* Write data to the aux channel in native mode */
-static int
-intel_dp_aux_native_write(struct intel_dp *intel_dp,
-			  uint16_t address, uint8_t *send, int send_bytes)
+#define HEADER_SIZE	4
+static ssize_t
+intel_dp_aux_transfer(struct drm_dp_aux *aux, struct drm_dp_aux_msg *msg)
 {
+	struct intel_dp *intel_dp = container_of(aux, struct intel_dp, aux);
+	uint8_t txbuf[20], rxbuf[20];
+	size_t txsize, rxsize;
 	int ret;
-	uint8_t	msg[20];
-	int msg_bytes;
-	uint8_t	ack;
-	int retry;

-	if (WARN_ON(send_bytes > 16))
-		return -E2BIG;
+	txbuf[0] = msg->request << 4;
+	txbuf[1] = msg->address >> 8;
+	txbuf[2] = msg->address & 0xff;
+	txbuf[3] = msg->size - 1;

-	intel_dp_check_edp(intel_dp);
-	msg[0] = DP_AUX_NATIVE_WRITE << 4;
-	msg[1] = address >> 8;
-	msg[2] = address & 0xff;
-	msg[3] = send_bytes - 1;
-	memcpy(&msg[4], send, send_bytes);
-	msg_bytes = send_bytes + 4;
-	for (retry = 0; retry < 7; retry++) {
-		ret = intel_dp_aux_ch(intel_dp, msg, msg_bytes, &ack, 1);
-		if (ret < 0)
-			return ret;
-		ack >>= 4;
-		if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_ACK)
-			return send_bytes;
-		else if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_DEFER)
-			usleep_range(400, 500);
-		else
-			return -EIO;
-	}
+	switch (msg->request & ~DP_AUX_I2C_MOT) {
+	case DP_AUX_NATIVE_WRITE:
+	case DP_AUX_I2C_WRITE:
+		txsize = HEADER_SIZE + msg->size;
+		rxsize = 1;

-	DRM_ERROR("too many retries, giving up\n");
-	return -EIO;
-}
+		if (WARN_ON(txsize > 20))
+			return -E2BIG;

-/* Write a single byte to the aux channel in native mode */
-static int
-intel_dp_aux_native_write_1(struct intel_dp *intel_dp,
-			    uint16_t address, uint8_t byte)
-{
-	return intel_dp_aux_native_write(intel_dp, address, &byte, 1);
-}
+		memcpy(txbuf + HEADER_SIZE, msg->buffer, msg->size);

-/* read bytes from a native aux channel */
-static int
-intel_dp_aux_native_read(struct intel_dp *intel_dp,
-			 uint16_t address, uint8_t *recv, int recv_bytes)
-{
-	uint8_t msg[4];
-	int msg_bytes;
-	uint8_t reply[20];
-	int reply_bytes;
-	uint8_t ack;
-	int ret;
-	int retry;
+		ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize);
+		if (ret > 0) {
+			msg->reply = rxbuf[0] >> 4;

-	if (WARN_ON(recv_bytes > 19))
-		return -E2BIG;
-
-	intel_dp_check_edp(intel_dp);
-	msg[0] = DP_AUX_NATIVE_READ << 4;
-	msg[1] = address >> 8;
-	msg[2] = address & 0xff;
-	msg[3] = recv_bytes - 1;
-
-	msg_bytes = 4;
-	reply_bytes = recv_bytes + 1;
-
-	for (retry = 0; retry < 7; retry++) {
-		ret = intel_dp_aux_ch(intel_dp, msg, msg_bytes,
-				      reply, reply_bytes);
-		if (ret == 0)
-			return -EPROTO;
-		if (ret < 0)
-			return ret;
-		ack = reply[0] >> 4;
-		if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_ACK) {
-			memcpy(recv, reply + 1, ret - 1);
-			return ret - 1;
+			/* Return payload size. */
+			ret = msg->size;
 		}
-		else if ((ack & DP_AUX_NATIVE_REPLY_MASK) == DP_AUX_NATIVE_REPLY_DEFER)
-			usleep_range(400, 500);
-		else
-			return -EIO;
+		break;
+
+	case DP_AUX_NATIVE_READ:
+	case DP_AUX_I2C_READ:
+		txsize = HEADER_SIZE;
+		rxsize = msg->size + 1;
+
+		if (WARN_ON(rxsize > 20))
+			return -E2BIG;
+
+		ret = intel_dp_aux_ch(intel_dp, txbuf, txsize, rxbuf, rxsize);
+		if (ret > 0) {
+			msg->reply = rxbuf[0] >> 4;
+			/*
+			 * Assume happy day, and copy the data. The caller is
+			 * expected to check msg->reply before touching it.
+			 *
+			 * Return payload size.
+			 */
+			ret--;
+			memcpy(msg->buffer, rxbuf + 1, ret);
+		}
+		break;
+
+	default:
+		ret = -EINVAL;
+		break;
 	}

-	DRM_ERROR("too many retries, giving up\n");
-	return -EIO;
+	return ret;
 }

-static int
-intel_dp_i2c_aux_ch(struct i2c_adapter *adapter, int mode,
-		    uint8_t write_byte, uint8_t *read_byte)
+static void
+intel_dp_aux_init(struct intel_dp *intel_dp, struct intel_connector *connector)
 {
-	struct i2c_algo_dp_aux_data *algo_data = adapter->algo_data;
-	struct intel_dp *intel_dp = container_of(adapter,
-						struct intel_dp,
-						adapter);
-	uint16_t address = algo_data->address;
-	uint8_t msg[5];
-	uint8_t reply[2];
-	unsigned retry;
-	int msg_bytes;
-	int reply_bytes;
+	struct drm_device *dev = intel_dp_to_dev(intel_dp);
+	struct intel_digital_port *intel_dig_port = dp_to_dig_port(intel_dp);
+	enum port port = intel_dig_port->port;
+	const char *name = NULL;
 	int ret;

-	intel_edp_panel_vdd_on(intel_dp);
-	intel_dp_check_edp(intel_dp);
-	/* Set up the command byte */
-	if (mode & MODE_I2C_READ)
-		msg[0] = DP_AUX_I2C_READ << 4;
-	else
-		msg[0] = DP_AUX_I2C_WRITE << 4;
-
-	if (!(mode & MODE_I2C_STOP))
-		msg[0] |= DP_AUX_I2C_MOT << 4;
-
-	msg[1] = address >> 8;
-	msg[2] = address;
-
-	switch (mode) {
-	case MODE_I2C_WRITE:
-		msg[3] = 0;
-		msg[4] = write_byte;
-		msg_bytes = 5;
-		reply_bytes = 1;
+	switch (port) {
+	case PORT_A:
+		intel_dp->aux_ch_ctl_reg = DPA_AUX_CH_CTL;
+		name = "DPDDC-A";
 		break;
-	case MODE_I2C_READ:
-		msg[3] = 0;
-		msg_bytes = 4;
-		reply_bytes = 2;
+	case PORT_B:
+		intel_dp->aux_ch_ctl_reg = PCH_DPB_AUX_CH_CTL;
+		name = "DPDDC-B";
+		break;
+	case PORT_C:
+		intel_dp->aux_ch_ctl_reg = PCH_DPC_AUX_CH_CTL;
+		name = "DPDDC-C";
+		break;
+	case PORT_D:
+		intel_dp->aux_ch_ctl_reg = PCH_DPD_AUX_CH_CTL;
+		name = "DPDDC-D";
 		break;
 	default:
-		msg_bytes = 3;
-		reply_bytes = 1;
-		break;
+		BUG();
 	}

-	/*
-	 * DP1.2 sections 2.7.7.1.5.6.1 and 2.7.7.1.6.6.1: A DP Source device is
-	 * required to retry at least seven times upon receiving AUX_DEFER
-	 * before giving up the AUX transaction.
-	 */
-	for (retry = 0; retry < 7; retry++) {
-		ret = intel_dp_aux_ch(intel_dp,
-				      msg, msg_bytes,
-				      reply, reply_bytes);
-		if (ret < 0) {
-			DRM_DEBUG_KMS("aux_ch failed %d\n", ret);
-			goto out;
-		}
+	if (!HAS_DDI(dev))
+		intel_dp->aux_ch_ctl_reg = intel_dp->output_reg + 0x10;

-		switch ((reply[0] >> 4) & DP_AUX_NATIVE_REPLY_MASK) {
-		case DP_AUX_NATIVE_REPLY_ACK:
-			/* I2C-over-AUX Reply field is only valid
-			 * when paired with AUX ACK.
-			 */
-			break;
-		case DP_AUX_NATIVE_REPLY_NACK:
-			DRM_DEBUG_KMS("aux_ch native nack\n");
-			ret = -EREMOTEIO;
-			goto out;
-		case DP_AUX_NATIVE_REPLY_DEFER:
-			/*
-			 * For now, just give more slack to branch devices. We
-			 * could check the DPCD for I2C bit rate capabilities,
-			 * and if available, adjust the interval. We could also
-			 * be more careful with DP-to-Legacy adapters where a
-			 * long legacy cable may force very low I2C bit rates.
-			 */
-			if (intel_dp->dpcd[DP_DOWNSTREAMPORT_PRESENT] &
-			    DP_DWN_STRM_PORT_PRESENT)
-				usleep_range(500, 600);
-			else
-				usleep_range(300, 400);
-			continue;
-		default:
-			DRM_ERROR("aux_ch invalid native reply 0x%02x\n",
-				  reply[0]);
-			ret = -EREMOTEIO;
-			goto out;
-		}
+	intel_dp->aux.name = name;
+	intel_dp->aux.dev = dev->dev;
+	intel_dp->aux.transfer = intel_dp_aux_transfer;

-		switch ((reply[0] >> 4) & DP_AUX_I2C_REPLY_MASK) {
-		case DP_AUX_I2C_REPLY_ACK:
-			if (mode == MODE_I2C_READ) {
-				*read_byte = reply[1];
-			}
-			ret = reply_bytes - 1;
-			goto out;
-		case DP_AUX_I2C_REPLY_NACK:
-			DRM_DEBUG_KMS("aux_i2c nack\n");
-			ret = -EREMOTEIO;
-			goto out;
-		case DP_AUX_I2C_REPLY_DEFER:
-			DRM_DEBUG_KMS("aux_i2c defer\n");
-			udelay(100);
-			break;
-		default:
-			DRM_ERROR("aux_i2c invalid reply 0x%02x\n", reply[0]);
-			ret = -EREMOTEIO;
-			goto out;
-		}
+	DRM_DEBUG_KMS("registering %s bus for %s\n", name,
+		      connector->base.kdev->kobj.name);
+
+	ret = drm_dp_aux_register_i2c_bus(&intel_dp->aux);
+	if (ret < 0) {
+		DRM_ERROR("drm_dp_aux_register_i2c_bus() for %s failed (%d)\n",
+			  name, ret);
+		return;
 	}

-	DRM_ERROR("too many retries, giving up\n");
-	ret = -EREMOTEIO;
-
-out:
-	edp_panel_vdd_off(intel_dp, false);
-	return ret;
+	ret = sysfs_create_link(&connector->base.kdev->kobj,
+				&intel_dp->aux.ddc.dev.kobj,
+				intel_dp->aux.ddc.dev.kobj.name);
+	if (ret < 0) {
+		DRM_ERROR("sysfs_create_link() for %s failed (%d)\n", name, ret);
+		drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
+	}
 }

 static void
@ -789,43 +700,10 @@ intel_dp_connector_unregister(struct intel_connector *intel_connector)
 	struct intel_dp *intel_dp = intel_attached_dp(&intel_connector->base);

 	sysfs_remove_link(&intel_connector->base.kdev->kobj,
-			  intel_dp->adapter.dev.kobj.name);
+			  intel_dp->aux.ddc.dev.kobj.name);
 	intel_connector_unregister(intel_connector);
 }

-static int
-intel_dp_i2c_init(struct intel_dp *intel_dp,
-		  struct intel_connector *intel_connector, const char *name)
-{
-	int	ret;
-
-	DRM_DEBUG_KMS("i2c_init %s\n", name);
-	intel_dp->algo.running = false;
-	intel_dp->algo.address = 0;
-	intel_dp->algo.aux_ch = intel_dp_i2c_aux_ch;
-
-	memset(&intel_dp->adapter, '\0', sizeof(intel_dp->adapter));
-	intel_dp->adapter.owner = THIS_MODULE;
-	intel_dp->adapter.class = I2C_CLASS_DDC;
-	strncpy(intel_dp->adapter.name, name, sizeof(intel_dp->adapter.name) - 1);
-	intel_dp->adapter.name[sizeof(intel_dp->adapter.name) - 1] = '\0';
-	intel_dp->adapter.algo_data = &intel_dp->algo;
-	intel_dp->adapter.dev.parent = intel_connector->base.dev->dev;
-
-	ret = i2c_dp_aux_add_bus(&intel_dp->adapter);
-	if (ret < 0)
-		return ret;
-
-	ret = sysfs_create_link(&intel_connector->base.kdev->kobj,
-				&intel_dp->adapter.dev.kobj,
-				intel_dp->adapter.dev.kobj.name);
-
-	if (ret < 0)
-		i2c_del_adapter(&intel_dp->adapter);
-
-	return ret;
-}
-
 static void
 intel_dp_set_clock(struct intel_encoder *encoder,
 		   struct intel_crtc_config *pipe_config, int link_bw)
@ -1161,23 +1039,21 @@ static  u32 ironlake_get_pp_control(struct intel_dp *intel_dp)
 	return control;
 }

-void intel_edp_panel_vdd_on(struct intel_dp *intel_dp)
+static bool _edp_panel_vdd_on(struct intel_dp *intel_dp)
 {
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	u32 pp;
 	u32 pp_stat_reg, pp_ctrl_reg;
+	bool need_to_disable = !intel_dp->want_panel_vdd;

 	if (!is_edp(intel_dp))
-		return;
-
-	WARN(intel_dp->want_panel_vdd,
-	     "eDP VDD already requested on\n");
+		return false;

 	intel_dp->want_panel_vdd = true;

 	if (edp_have_panel_vdd(intel_dp))
-		return;
+		return need_to_disable;

 	intel_runtime_pm_get(dev_priv);

@ -1203,6 +1079,17 @@ void intel_edp_panel_vdd_on(struct intel_dp *intel_dp)
 		DRM_DEBUG_KMS("eDP was not running\n");
 		msleep(intel_dp->panel_power_up_delay);
 	}
+
+	return need_to_disable;
+}
+
+void intel_edp_panel_vdd_on(struct intel_dp *intel_dp)
+{
+	if (is_edp(intel_dp)) {
+		bool vdd = _edp_panel_vdd_on(intel_dp);
+
+		WARN(!vdd, "eDP VDD already requested on\n");
+	}
 }

 static void edp_panel_vdd_off_sync(struct intel_dp *intel_dp)
@ -1465,8 +1352,8 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode)
 		return;

 	if (mode != DRM_MODE_DPMS_ON) {
-		ret = intel_dp_aux_native_write_1(intel_dp, DP_SET_POWER,
-						  DP_SET_POWER_D3);
+		ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER,
+					 DP_SET_POWER_D3);
 		if (ret != 1)
 			DRM_DEBUG_DRIVER("failed to write sink power state\n");
 	} else {
@ -1475,9 +1362,8 @@ void intel_dp_sink_dpms(struct intel_dp *intel_dp, int mode)
 		 * time to wake up.
 		 */
 		for (i = 0; i < 3; i++) {
-			ret = intel_dp_aux_native_write_1(intel_dp,
-							  DP_SET_POWER,
-							  DP_SET_POWER_D0);
+			ret = drm_dp_dpcd_writeb(&intel_dp->aux, DP_SET_POWER,
+						 DP_SET_POWER_D0);
 			if (ret == 1)
 				break;
 			msleep(1);
@ -1701,13 +1587,11 @@ static void intel_edp_psr_enable_sink(struct intel_dp *intel_dp)

 	/* Enable PSR in sink */
 	if (intel_dp->psr_dpcd[1] & DP_PSR_NO_TRAIN_ON_EXIT)
-		intel_dp_aux_native_write_1(intel_dp, DP_PSR_EN_CFG,
-					    DP_PSR_ENABLE &
-					    ~DP_PSR_MAIN_LINK_ACTIVE);
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
+				   DP_PSR_ENABLE & ~DP_PSR_MAIN_LINK_ACTIVE);
 	else
-		intel_dp_aux_native_write_1(intel_dp, DP_PSR_EN_CFG,
-					    DP_PSR_ENABLE |
-					    DP_PSR_MAIN_LINK_ACTIVE);
+		drm_dp_dpcd_writeb(&intel_dp->aux, DP_PSR_EN_CFG,
+				   DP_PSR_ENABLE | DP_PSR_MAIN_LINK_ACTIVE);

 	/* Setup AUX registers */
 	I915_WRITE(EDP_PSR_AUX_DATA1(dev), EDP_PSR_DPCD_COMMAND);
@ -2018,26 +1902,25 @@ static void vlv_dp_pre_pll_enable(struct intel_encoder *encoder)
 /*
 * Native read with retry for link status and receiver capability reads for
 * cases where the sink may still be asleep.
+ *
+ * Sinks are *supposed* to come up within 1ms from an off state, but we're also
+ * supposed to retry 3 times per the spec.
 */
-static bool
-intel_dp_aux_native_read_retry(struct intel_dp *intel_dp, uint16_t address,
-			       uint8_t *recv, int recv_bytes)
+static ssize_t
+intel_dp_dpcd_read_wake(struct drm_dp_aux *aux, unsigned int offset,
+			void *buffer, size_t size)
 {
-	int ret, i;
+	ssize_t ret;
+	int i;

-	/*
-	 * Sinks are *supposed* to come up within 1ms from an off state,
-	 * but we're also supposed to retry 3 times per the spec.
-	 */
 	for (i = 0; i < 3; i++) {
-		ret = intel_dp_aux_native_read(intel_dp, address, recv,
-					       recv_bytes);
-		if (ret == recv_bytes)
-			return true;
+		ret = drm_dp_dpcd_read(aux, offset, buffer, size);
+		if (ret == size)
+			return ret;
 		msleep(1);
 	}

-	return false;
+	return ret;
 }

 /*
@ -2047,10 +1930,10 @@ intel_dp_aux_native_read_retry(struct intel_dp *intel_dp, uint16_t address,
 static bool
 intel_dp_get_link_status(struct intel_dp *intel_dp, uint8_t link_status[DP_LINK_STATUS_SIZE])
 {
-	return intel_dp_aux_native_read_retry(intel_dp,
-					      DP_LANE0_1_STATUS,
-					      link_status,
-					      DP_LINK_STATUS_SIZE);
+	return intel_dp_dpcd_read_wake(&intel_dp->aux,
+				       DP_LANE0_1_STATUS,
+				       link_status,
+				       DP_LINK_STATUS_SIZE) == DP_LINK_STATUS_SIZE;
 }

 /*
@ -2564,8 +2447,8 @@ intel_dp_set_link_train(struct intel_dp *intel_dp,
 		len = intel_dp->lane_count + 1;
 	}

-	ret = intel_dp_aux_native_write(intel_dp, DP_TRAINING_PATTERN_SET,
-					buf, len);
+	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_PATTERN_SET,
+				buf, len);

 	return ret == len;
 }
@ -2594,9 +2477,8 @@ intel_dp_update_link_train(struct intel_dp *intel_dp, uint32_t *DP,
 	I915_WRITE(intel_dp->output_reg, *DP);
 	POSTING_READ(intel_dp->output_reg);

-	ret = intel_dp_aux_native_write(intel_dp, DP_TRAINING_LANE0_SET,
-					intel_dp->train_set,
-					intel_dp->lane_count);
+	ret = drm_dp_dpcd_write(&intel_dp->aux, DP_TRAINING_LANE0_SET,
+				intel_dp->train_set, intel_dp->lane_count);

 	return ret == intel_dp->lane_count;
 }
@ -2652,11 +2534,11 @@ intel_dp_start_link_train(struct intel_dp *intel_dp)
 	link_config[1] = intel_dp->lane_count;
 	if (drm_dp_enhanced_frame_cap(intel_dp->dpcd))
 		link_config[1] |= DP_LANE_COUNT_ENHANCED_FRAME_EN;
-	intel_dp_aux_native_write(intel_dp, DP_LINK_BW_SET, link_config, 2);
+	drm_dp_dpcd_write(&intel_dp->aux, DP_LINK_BW_SET, link_config, 2);

 	link_config[0] = 0;
 	link_config[1] = DP_SET_ANSI_8B10B;
-	intel_dp_aux_native_write(intel_dp, DP_DOWNSPREAD_CTRL, link_config, 2);
+	drm_dp_dpcd_write(&intel_dp->aux, DP_DOWNSPREAD_CTRL, link_config, 2);

 	DP |= DP_PORT_EN;

@ -2899,8 +2781,8 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)

 	char dpcd_hex_dump[sizeof(intel_dp->dpcd) * 3];

-	if (intel_dp_aux_native_read_retry(intel_dp, 0x000, intel_dp->dpcd,
-					   sizeof(intel_dp->dpcd)) == 0)
+	if (intel_dp_dpcd_read_wake(&intel_dp->aux, 0x000, intel_dp->dpcd,
+				    sizeof(intel_dp->dpcd)) < 0)
 		return false; /* aux transfer failed */

 	hex_dump_to_buffer(intel_dp->dpcd, sizeof(intel_dp->dpcd),
@ -2913,9 +2795,9 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 	/* Check if the panel supports PSR */
 	memset(intel_dp->psr_dpcd, 0, sizeof(intel_dp->psr_dpcd));
 	if (is_edp(intel_dp)) {
-		intel_dp_aux_native_read_retry(intel_dp, DP_PSR_SUPPORT,
-					       intel_dp->psr_dpcd,
-					       sizeof(intel_dp->psr_dpcd));
+		intel_dp_dpcd_read_wake(&intel_dp->aux, DP_PSR_SUPPORT,
+					intel_dp->psr_dpcd,
+					sizeof(intel_dp->psr_dpcd));
 		if (intel_dp->psr_dpcd[0] & DP_PSR_IS_SUPPORTED) {
 			dev_priv->psr.sink_support = true;
 			DRM_DEBUG_KMS("Detected EDP PSR Panel.\n");
@ -2937,9 +2819,9 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 	if (intel_dp->dpcd[DP_DPCD_REV] == 0x10)
 		return true; /* no per-port downstream info */

-	if (intel_dp_aux_native_read_retry(intel_dp, DP_DOWNSTREAM_PORT_0,
-					   intel_dp->downstream_ports,
-					   DP_MAX_DOWNSTREAM_PORTS) == 0)
+	if (intel_dp_dpcd_read_wake(&intel_dp->aux, DP_DOWNSTREAM_PORT_0,
+				    intel_dp->downstream_ports,
+				    DP_MAX_DOWNSTREAM_PORTS) < 0)
 		return false; /* downstream port status fetch failed */

 	return true;
@ -2955,11 +2837,11 @@ intel_dp_probe_oui(struct intel_dp *intel_dp)

 	intel_edp_panel_vdd_on(intel_dp);

-	if (intel_dp_aux_native_read_retry(intel_dp, DP_SINK_OUI, buf, 3))
+	if (intel_dp_dpcd_read_wake(&intel_dp->aux, DP_SINK_OUI, buf, 3) == 3)
 		DRM_DEBUG_KMS("Sink OUI: %02hx%02hx%02hx\n",
 			      buf[0], buf[1], buf[2]);

-	if (intel_dp_aux_native_read_retry(intel_dp, DP_BRANCH_OUI, buf, 3))
+	if (intel_dp_dpcd_read_wake(&intel_dp->aux, DP_BRANCH_OUI, buf, 3) == 3)
 		DRM_DEBUG_KMS("Branch OUI: %02hx%02hx%02hx\n",
 			      buf[0], buf[1], buf[2]);

@ -2974,46 +2856,40 @@ int intel_dp_sink_crc(struct intel_dp *intel_dp, u8 *crc)
 		to_intel_crtc(intel_dig_port->base.base.crtc);
 	u8 buf[1];

-	if (!intel_dp_aux_native_read(intel_dp, DP_TEST_SINK_MISC, buf, 1))
+	if (drm_dp_dpcd_readb(&intel_dp->aux, DP_TEST_SINK_MISC, buf) < 0)
 		return -EAGAIN;

 	if (!(buf[0] & DP_TEST_CRC_SUPPORTED))
 		return -ENOTTY;

-	if (!intel_dp_aux_native_write_1(intel_dp, DP_TEST_SINK,
-					 DP_TEST_SINK_START))
+	if (drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK,
+			       DP_TEST_SINK_START) < 0)
 		return -EAGAIN;

 	/* Wait 2 vblanks to be sure we will have the correct CRC value */
 	intel_wait_for_vblank(dev, intel_crtc->pipe);
 	intel_wait_for_vblank(dev, intel_crtc->pipe);

-	if (!intel_dp_aux_native_read(intel_dp, DP_TEST_CRC_R_CR, crc, 6))
+	if (drm_dp_dpcd_read(&intel_dp->aux, DP_TEST_CRC_R_CR, crc, 6) < 0)
 		return -EAGAIN;

-	intel_dp_aux_native_write_1(intel_dp, DP_TEST_SINK, 0);
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_SINK, 0);
 	return 0;
 }

 static bool
 intel_dp_get_sink_irq(struct intel_dp *intel_dp, u8 *sink_irq_vector)
 {
-	int ret;
-
-	ret = intel_dp_aux_native_read_retry(intel_dp,
-					     DP_DEVICE_SERVICE_IRQ_VECTOR,
-					     sink_irq_vector, 1);
-	if (!ret)
-		return false;
-
-	return true;
+	return intel_dp_dpcd_read_wake(&intel_dp->aux,
+				       DP_DEVICE_SERVICE_IRQ_VECTOR,
+				       sink_irq_vector, 1) == 1;
 }

 static void
 intel_dp_handle_test_request(struct intel_dp *intel_dp)
 {
 	/* NAK by default */
-	intel_dp_aux_native_write_1(intel_dp, DP_TEST_RESPONSE, DP_TEST_NAK);
+	drm_dp_dpcd_writeb(&intel_dp->aux, DP_TEST_RESPONSE, DP_TEST_NAK);
 }

 /*
@ -3052,9 +2928,9 @@ intel_dp_check_link_status(struct intel_dp *intel_dp)
 	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 &&
 	    intel_dp_get_sink_irq(intel_dp, &sink_irq_vector)) {
 		/* Clear interrupt source */
-		intel_dp_aux_native_write_1(intel_dp,
-					    DP_DEVICE_SERVICE_IRQ_VECTOR,
-					    sink_irq_vector);
+		drm_dp_dpcd_writeb(&intel_dp->aux,
+				   DP_DEVICE_SERVICE_IRQ_VECTOR,
+				   sink_irq_vector);

 		if (sink_irq_vector & DP_AUTOMATED_TEST_REQUEST)
 			intel_dp_handle_test_request(intel_dp);
@ -3089,15 +2965,17 @@ intel_dp_detect_dpcd(struct intel_dp *intel_dp)
 	if (intel_dp->dpcd[DP_DPCD_REV] >= 0x11 &&
 	    intel_dp->downstream_ports[0] & DP_DS_PORT_HPD) {
 		uint8_t reg;
-		if (!intel_dp_aux_native_read_retry(intel_dp, DP_SINK_COUNT,
-						    &reg, 1))
+
+		if (intel_dp_dpcd_read_wake(&intel_dp->aux, DP_SINK_COUNT,
+					    &reg, 1) < 0)
 			return connector_status_unknown;
+
 		return DP_GET_SINK_COUNT(reg) ? connector_status_connected
 					      : connector_status_disconnected;
 	}

 	/* If no HPD, poke DDC gently */
-	if (drm_probe_ddc(&intel_dp->adapter))
+	if (drm_probe_ddc(&intel_dp->aux.ddc))
 		return connector_status_connected;

 	/* Well we tried, say unknown for unreliable port types */
@ -3265,7 +3143,7 @@ intel_dp_detect(struct drm_connector *connector, bool force)
 	if (intel_dp->force_audio != HDMI_AUDIO_AUTO) {
 		intel_dp->has_audio = (intel_dp->force_audio == HDMI_AUDIO_ON);
 	} else {
-		edid = intel_dp_get_edid(connector, &intel_dp->adapter);
+		edid = intel_dp_get_edid(connector, &intel_dp->aux.ddc);
 		if (edid) {
 			intel_dp->has_audio = drm_detect_monitor_audio(edid);
 			kfree(edid);
@ -3301,7 +3179,7 @@ static int intel_dp_get_modes(struct drm_connector *connector)
 	power_domain = intel_display_port_power_domain(intel_encoder);
 	intel_display_power_get(dev_priv, power_domain);

-	ret = intel_dp_get_edid_modes(connector, &intel_dp->adapter);
+	ret = intel_dp_get_edid_modes(connector, &intel_dp->aux.ddc);
 	intel_display_power_put(dev_priv, power_domain);
 	if (ret)
 		return ret;
@ -3334,7 +3212,7 @@ intel_dp_detect_audio(struct drm_connector *connector)
 	power_domain = intel_display_port_power_domain(intel_encoder);
 	intel_display_power_get(dev_priv, power_domain);

-	edid = intel_dp_get_edid(connector, &intel_dp->adapter);
+	edid = intel_dp_get_edid(connector, &intel_dp->aux.ddc);
 	if (edid) {
 		has_audio = drm_detect_monitor_audio(edid);
 		kfree(edid);
@ -3456,7 +3334,7 @@ void intel_dp_encoder_destroy(struct drm_encoder *encoder)
 	struct intel_dp *intel_dp = &intel_dig_port->dp;
 	struct drm_device *dev = intel_dp_to_dev(intel_dp);

-	i2c_del_adapter(&intel_dp->adapter);
+	drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
 	drm_encoder_cleanup(encoder);
 	if (is_edp(intel_dp)) {
 		cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
@ -3768,7 +3646,7 @@ static bool intel_edp_init_connector(struct intel_dp *intel_dp,
 	/* We now know it's not a ghost, init power sequence regs. */
 	intel_dp_init_panel_power_sequencer_registers(dev, intel_dp, power_seq);

-	edid = drm_get_edid(connector, &intel_dp->adapter);
+	edid = drm_get_edid(connector, &intel_dp->aux.ddc);
 	if (edid) {
 		if (drm_add_edid_modes(connector, edid)) {
 			drm_mode_connector_update_edid_property(connector,
@ -3816,8 +3694,7 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 	struct drm_i915_private *dev_priv = dev->dev_private;
 	enum port port = intel_dig_port->port;
 	struct edp_power_seq power_seq = { 0 };
-	const char *name = NULL;
-	int type, error;
+	int type;

 	/* intel_dp vfuncs */
 	if (IS_VALLEYVIEW(dev))
@ -3870,43 +3747,19 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 		intel_connector->get_hw_state = intel_connector_get_hw_state;
 	intel_connector->unregister = intel_dp_connector_unregister;

-	intel_dp->aux_ch_ctl_reg = intel_dp->output_reg + 0x10;
-	if (HAS_DDI(dev)) {
-		switch (intel_dig_port->port) {
-		case PORT_A:
-			intel_dp->aux_ch_ctl_reg = DPA_AUX_CH_CTL;
-			break;
-		case PORT_B:
-			intel_dp->aux_ch_ctl_reg = PCH_DPB_AUX_CH_CTL;
-			break;
-		case PORT_C:
-			intel_dp->aux_ch_ctl_reg = PCH_DPC_AUX_CH_CTL;
-			break;
-		case PORT_D:
-			intel_dp->aux_ch_ctl_reg = PCH_DPD_AUX_CH_CTL;
-			break;
-		default:
-			BUG();
-		}
-	}
-
-	/* Set up the DDC bus. */
+	/* Set up the hotplug pin. */
 	switch (port) {
 	case PORT_A:
 		intel_encoder->hpd_pin = HPD_PORT_A;
-		name = "DPDDC-A";
 		break;
 	case PORT_B:
 		intel_encoder->hpd_pin = HPD_PORT_B;
-		name = "DPDDC-B";
 		break;
 	case PORT_C:
 		intel_encoder->hpd_pin = HPD_PORT_C;
-		name = "DPDDC-C";
 		break;
 	case PORT_D:
 		intel_encoder->hpd_pin = HPD_PORT_D;
-		name = "DPDDC-D";
 		break;
 	default:
 		BUG();
@ -3917,14 +3770,12 @@ intel_dp_init_connector(struct intel_digital_port *intel_dig_port,
 		intel_dp_init_panel_power_sequencer(dev, intel_dp, &power_seq);
 	}

-	error = intel_dp_i2c_init(intel_dp, intel_connector, name);
-	WARN(error, "intel_dp_i2c_init failed with error %d for port %c\n",
-	     error, port_name(port));
+	intel_dp_aux_init(intel_dp, intel_connector);

 	intel_dp->psr_setup_done = false;

 	if (!intel_edp_init_connector(intel_dp, intel_connector, &power_seq)) {
-		i2c_del_adapter(&intel_dp->adapter);
+		drm_dp_aux_unregister_i2c_bus(&intel_dp->aux);
 		if (is_edp(intel_dp)) {
 			cancel_delayed_work_sync(&intel_dp->panel_vdd_work);
 			mutex_lock(&dev->mode_config.mutex);
--- a/drivers/gpu/drm/i915/intel_drv.h
+++ b/drivers/gpu/drm/i915/intel_drv.h
@ -489,8 +489,7 @@ struct intel_dp {
 	uint8_t dpcd[DP_RECEIVER_CAP_SIZE];
 	uint8_t psr_dpcd[EDP_PSR_RECEIVER_CAP_SIZE];
 	uint8_t downstream_ports[DP_MAX_DOWNSTREAM_PORTS];
-	struct i2c_adapter adapter;
-	struct i2c_algo_dp_aux_data algo;
+	struct drm_dp_aux aux;
 	uint8_t train_set[4];
 	int panel_power_up_delay;
 	int panel_power_down_delay;
--- a/drivers/gpu/drm/mgag200/mgag200_ttm.c
+++ b/drivers/gpu/drm/mgag200/mgag200_ttm.c
@ -259,7 +259,9 @@ int mgag200_mm_init(struct mga_device *mdev)

 	ret = ttm_bo_device_init(&mdev->ttm.bdev,
 				 mdev->ttm.bo_global_ref.ref.object,
-				 &mgag200_bo_driver, DRM_FILE_PAGE_OFFSET,
+				 &mgag200_bo_driver,
+				 dev->anon_inode->i_mapping,
+				 DRM_FILE_PAGE_OFFSET,
 				 true);
 	if (ret) {
 		DRM_ERROR("Error initialising bo driver; %d\n", ret);
@ -324,7 +326,6 @@ int mgag200_bo_create(struct drm_device *dev, int size, int align,
 	}

 	mgabo->bo.bdev = &mdev->ttm.bdev;
-	mgabo->bo.bdev->dev_mapping = dev->dev_mapping;

 	mgag200_ttm_placement(mgabo, TTM_PL_FLAG_VRAM | TTM_PL_FLAG_SYSTEM);

--- a/drivers/gpu/drm/nouveau/nouveau_gem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_gem.c
@ -228,8 +228,6 @@ nouveau_gem_ioctl_new(struct drm_device *dev, void *data,
 	struct nouveau_bo *nvbo = NULL;
 	int ret = 0;

-	drm->ttm.bdev.dev_mapping = drm->dev->dev_mapping;
-
 	if (!pfb->memtype_valid(pfb, req->info.tile_flags)) {
 		NV_ERROR(cli, "bad page flags: 0x%08x\n", req->info.tile_flags);
 		return -EINVAL;
--- a/drivers/gpu/drm/nouveau/nouveau_ttm.c
+++ b/drivers/gpu/drm/nouveau/nouveau_ttm.c
@ -376,7 +376,9 @@ nouveau_ttm_init(struct nouveau_drm *drm)

 	ret = ttm_bo_device_init(&drm->ttm.bdev,
 				  drm->ttm.bo_global_ref.ref.object,
-				  &nouveau_bo_driver, DRM_FILE_PAGE_OFFSET,
+				  &nouveau_bo_driver,
+				  dev->anon_inode->i_mapping,
+				  DRM_FILE_PAGE_OFFSET,
 				  bits <= 32 ? true : false);
 	if (ret) {
 		NV_ERROR(drm, "error initialising bo driver, %d\n", ret);
--- a/drivers/gpu/drm/omapdrm/omap_gem.c
+++ b/drivers/gpu/drm/omapdrm/omap_gem.c
@ -153,24 +153,24 @@ static struct {
 static void evict_entry(struct drm_gem_object *obj,
 		enum tiler_fmt fmt, struct usergart_entry *entry)
 {
-	if (obj->dev->dev_mapping) {
-		struct omap_gem_object *omap_obj = to_omap_bo(obj);
-		int n = usergart[fmt].height;
-		size_t size = PAGE_SIZE * n;
-		loff_t off = mmap_offset(obj) +
-				(entry->obj_pgoff << PAGE_SHIFT);
-		const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE);
-		if (m > 1) {
-			int i;
-			/* if stride > than PAGE_SIZE then sparse mapping: */
-			for (i = n; i > 0; i--) {
-				unmap_mapping_range(obj->dev->dev_mapping,
-						off, PAGE_SIZE, 1);
-				off += PAGE_SIZE * m;
-			}
-		} else {
-			unmap_mapping_range(obj->dev->dev_mapping, off, size, 1);
+	struct omap_gem_object *omap_obj = to_omap_bo(obj);
+	int n = usergart[fmt].height;
+	size_t size = PAGE_SIZE * n;
+	loff_t off = mmap_offset(obj) +
+			(entry->obj_pgoff << PAGE_SHIFT);
+	const int m = 1 + ((omap_obj->width << fmt) / PAGE_SIZE);
+
+	if (m > 1) {
+		int i;
+		/* if stride > than PAGE_SIZE then sparse mapping: */
+		for (i = n; i > 0; i--) {
+			unmap_mapping_range(obj->dev->anon_inode->i_mapping,
+					    off, PAGE_SIZE, 1);
+			off += PAGE_SIZE * m;
 		}
+	} else {
+		unmap_mapping_range(obj->dev->anon_inode->i_mapping,
+				    off, size, 1);
 	}

 	entry->obj = NULL;
--- a/drivers/gpu/drm/qxl/qxl_object.c
+++ b/drivers/gpu/drm/qxl/qxl_object.c
@ -82,8 +82,6 @@ int qxl_bo_create(struct qxl_device *qdev,
 	enum ttm_bo_type type;
 	int r;

-	if (unlikely(qdev->mman.bdev.dev_mapping == NULL))
-		qdev->mman.bdev.dev_mapping = qdev->ddev->dev_mapping;
 	if (kernel)
 		type = ttm_bo_type_kernel;
 	else
--- a/drivers/gpu/drm/qxl/qxl_ttm.c
+++ b/drivers/gpu/drm/qxl/qxl_ttm.c
@ -493,7 +493,9 @@ int qxl_ttm_init(struct qxl_device *qdev)
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&qdev->mman.bdev,
 			       qdev->mman.bo_global_ref.ref.object,
-			       &qxl_bo_driver, DRM_FILE_PAGE_OFFSET, 0);
+			       &qxl_bo_driver,
+			       qdev->ddev->anon_inode->i_mapping,
+			       DRM_FILE_PAGE_OFFSET, 0);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
 		return r;
@ -518,8 +520,6 @@ int qxl_ttm_init(struct qxl_device *qdev)
 		 ((unsigned)num_io_pages * PAGE_SIZE) / (1024 * 1024));
 	DRM_INFO("qxl: %uM of Surface memory size\n",
 		 (unsigned)qdev->surfaceram_size / (1024 * 1024));
-	if (unlikely(qdev->mman.bdev.dev_mapping == NULL))
-		qdev->mman.bdev.dev_mapping = qdev->ddev->dev_mapping;
 	r = qxl_ttm_debugfs_init(qdev);
 	if (r) {
 		DRM_ERROR("Failed to init debugfs\n");
--- a/drivers/gpu/drm/radeon/Makefile
+++ b/drivers/gpu/drm/radeon/Makefile
@ -80,7 +80,7 @@ radeon-y += radeon_device.o radeon_asic.o radeon_kms.o \
 	r600_dpm.o rs780_dpm.o rv6xx_dpm.o rv770_dpm.o rv730_dpm.o rv740_dpm.o \
 	rv770_smc.o cypress_dpm.o btc_dpm.o sumo_dpm.o sumo_smc.o trinity_dpm.o \
 	trinity_smc.o ni_dpm.o si_smc.o si_dpm.o kv_smc.o kv_dpm.o ci_smc.o \
-	ci_dpm.o dce6_afmt.o
+	ci_dpm.o dce6_afmt.o radeon_vm.o

 # add async DMA block
 radeon-y += \
@ -99,6 +99,12 @@ radeon-y += \
 	uvd_v3_1.o \
 	uvd_v4_2.o

+# add VCE block
+radeon-y += \
+	radeon_vce.o \
+	vce_v1_0.o \
+	vce_v2_0.o \
+
 radeon-$(CONFIG_COMPAT) += radeon_ioc32.o
 radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o
 radeon-$(CONFIG_ACPI) += radeon_acpi.o
--- a/drivers/gpu/drm/radeon/btc_dpm.c
+++ b/drivers/gpu/drm/radeon/btc_dpm.c
@ -2601,6 +2601,10 @@ int btc_dpm_init(struct radeon_device *rdev)
 	pi->min_vddc_in_table = 0;
 	pi->max_vddc_in_table = 0;

+	ret = r600_get_platform_caps(rdev);
+	if (ret)
+		return ret;
+
 	ret = rv7xx_parse_power_table(rdev);
 	if (ret)
 		return ret;
--- a/drivers/gpu/drm/radeon/ci_dpm.c
+++ b/drivers/gpu/drm/radeon/ci_dpm.c
@ -172,6 +172,8 @@ extern void si_trim_voltage_table_to_fit_state_table(struct radeon_device *rdev,
 extern void cik_enter_rlc_safe_mode(struct radeon_device *rdev);
 extern void cik_exit_rlc_safe_mode(struct radeon_device *rdev);
 extern int ci_mc_load_microcode(struct radeon_device *rdev);
+extern void cik_update_cg(struct radeon_device *rdev,
+			  u32 block, bool enable);

 static int ci_get_std_voltage_value_sidd(struct radeon_device *rdev,
 					 struct atom_voltage_table_entry *voltage_table,
@ -746,6 +748,14 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev,
 	u32 max_sclk_vddc, max_mclk_vddci, max_mclk_vddc;
 	int i;

+	if (rps->vce_active) {
+		rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
+		rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk;
+	} else {
+		rps->evclk = 0;
+		rps->ecclk = 0;
+	}
+
 	if ((rdev->pm.dpm.new_active_crtc_count > 1) ||
 	    ci_dpm_vblank_too_short(rdev))
 		disable_mclk_switching = true;
@ -804,6 +814,13 @@ static void ci_apply_state_adjust_rules(struct radeon_device *rdev,
 		sclk = ps->performance_levels[0].sclk;
 	}

+	if (rps->vce_active) {
+		if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk)
+			sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk;
+		if (mclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk)
+			mclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].mclk;
+	}
+
 	ps->performance_levels[0].sclk = sclk;
 	ps->performance_levels[0].mclk = mclk;

@ -3468,7 +3485,6 @@ static int ci_enable_uvd_dpm(struct radeon_device *rdev, bool enable)
 		0 : -EINVAL;
 }

-#if 0
 static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable)
 {
 	struct ci_power_info *pi = ci_get_pi(rdev);
@ -3501,6 +3517,7 @@ static int ci_enable_vce_dpm(struct radeon_device *rdev, bool enable)
 		0 : -EINVAL;
 }

+#if 0
 static int ci_enable_samu_dpm(struct radeon_device *rdev, bool enable)
 {
 	struct ci_power_info *pi = ci_get_pi(rdev);
@ -3587,7 +3604,6 @@ static int ci_update_uvd_dpm(struct radeon_device *rdev, bool gate)
 	return ci_enable_uvd_dpm(rdev, !gate);
 }

-#if 0
 static u8 ci_get_vce_boot_level(struct radeon_device *rdev)
 {
 	u8 i;
@ -3608,15 +3624,15 @@ static int ci_update_vce_dpm(struct radeon_device *rdev,
 			     struct radeon_ps *radeon_current_state)
 {
 	struct ci_power_info *pi = ci_get_pi(rdev);
-	bool new_vce_clock_non_zero = (radeon_new_state->evclk != 0);
-	bool old_vce_clock_non_zero = (radeon_current_state->evclk != 0);
 	int ret = 0;
 	u32 tmp;

-	if (new_vce_clock_non_zero != old_vce_clock_non_zero) {
-		if (new_vce_clock_non_zero) {
-			pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev);
+	if (radeon_current_state->evclk != radeon_new_state->evclk) {
+		if (radeon_new_state->evclk) {
+			/* turn the clocks on when encoding */
+			cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false);

+			pi->smc_state_table.VceBootLevel = ci_get_vce_boot_level(rdev);
 			tmp = RREG32_SMC(DPM_TABLE_475);
 			tmp &= ~VceBootLevel_MASK;
 			tmp |= VceBootLevel(pi->smc_state_table.VceBootLevel);
@ -3624,12 +3640,16 @@ static int ci_update_vce_dpm(struct radeon_device *rdev,

 			ret = ci_enable_vce_dpm(rdev, true);
 		} else {
+			/* turn the clocks off when not encoding */
+			cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true);
+
 			ret = ci_enable_vce_dpm(rdev, false);
 		}
 	}
 	return ret;
 }

+#if 0
 static int ci_update_samu_dpm(struct radeon_device *rdev, bool gate)
 {
 	return ci_enable_samu_dpm(rdev, gate);
@ -4752,13 +4772,13 @@ int ci_dpm_set_power_state(struct radeon_device *rdev)
 		DRM_ERROR("ci_generate_dpm_level_enable_mask failed\n");
 		return ret;
 	}
-#if 0
+
 	ret = ci_update_vce_dpm(rdev, new_ps, old_ps);
 	if (ret) {
 		DRM_ERROR("ci_update_vce_dpm failed\n");
 		return ret;
 	}
-#endif
+
 	ret = ci_update_sclk_t(rdev);
 	if (ret) {
 		DRM_ERROR("ci_update_sclk_t failed\n");
@ -4959,9 +4979,6 @@ static int ci_parse_power_table(struct radeon_device *rdev)
 	if (!rdev->pm.dpm.ps)
 		return -ENOMEM;
 	power_state_offset = (u8 *)state_array->states;
-	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
-	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
-	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
 	for (i = 0; i < state_array->ucNumEntries; i++) {
 		u8 *idx;
 		power_state = (union pplib_power_state *)power_state_offset;
@ -4998,6 +5015,21 @@ static int ci_parse_power_table(struct radeon_device *rdev)
 		power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
 	}
 	rdev->pm.dpm.num_ps = state_array->ucNumEntries;
+
+	/* fill in the vce power states */
+	for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) {
+		u32 sclk, mclk;
+		clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx;
+		clock_info = (union pplib_clock_info *)
+			&clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+		sclk = le16_to_cpu(clock_info->ci.usEngineClockLow);
+		sclk |= clock_info->ci.ucEngineClockHigh << 16;
+		mclk = le16_to_cpu(clock_info->ci.usMemoryClockLow);
+		mclk |= clock_info->ci.ucMemoryClockHigh << 16;
+		rdev->pm.dpm.vce_states[i].sclk = sclk;
+		rdev->pm.dpm.vce_states[i].mclk = mclk;
+	}
+
 	return 0;
 }

@ -5077,17 +5109,25 @@ int ci_dpm_init(struct radeon_device *rdev)
 		ci_dpm_fini(rdev);
 		return ret;
 	}
-	ret = ci_parse_power_table(rdev);
+
+	ret = r600_get_platform_caps(rdev);
 	if (ret) {
 		ci_dpm_fini(rdev);
 		return ret;
 	}
+
 	ret = r600_parse_extended_power_table(rdev);
 	if (ret) {
 		ci_dpm_fini(rdev);
 		return ret;
 	}

+	ret = ci_parse_power_table(rdev);
+	if (ret) {
+		ci_dpm_fini(rdev);
+		return ret;
+	}
+
        pi->dll_default_on = false;
        pi->sram_end = SMC_RAM_END;

@ -5120,6 +5160,7 @@ int ci_dpm_init(struct radeon_device *rdev)
 	pi->caps_sclk_throttle_low_notification = false;

 	pi->caps_uvd_dpm = true;
+	pi->caps_vce_dpm = true;

        ci_get_leakage_voltages(rdev);
        ci_patch_dependency_tables_with_leakage(rdev);
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@ -75,6 +75,7 @@ extern void si_init_uvd_internal_cg(struct radeon_device *rdev);
 extern int cik_sdma_resume(struct radeon_device *rdev);
 extern void cik_sdma_enable(struct radeon_device *rdev, bool enable);
 extern void cik_sdma_fini(struct radeon_device *rdev);
+extern void vce_v2_0_enable_mgcg(struct radeon_device *rdev, bool enable);
 static void cik_rlc_stop(struct radeon_device *rdev);
 static void cik_pcie_gen3_enable(struct radeon_device *rdev);
 static void cik_program_aspm(struct radeon_device *rdev);
@ -4030,8 +4031,6 @@ static int cik_cp_gfx_resume(struct radeon_device *rdev)
 	WREG32(CP_RB0_BASE, rb_addr);
 	WREG32(CP_RB0_BASE_HI, upper_32_bits(rb_addr));

-	ring->rptr = RREG32(CP_RB0_RPTR);
-
 	/* start the ring */
 	cik_cp_gfx_start(rdev);
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
@ -4134,8 +4133,11 @@ static void cik_cp_compute_enable(struct radeon_device *rdev, bool enable)
 {
 	if (enable)
 		WREG32(CP_MEC_CNTL, 0);
-	else
+	else {
 		WREG32(CP_MEC_CNTL, (MEC_ME1_HALT | MEC_ME2_HALT));
+		rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX].ready = false;
+		rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX].ready = false;
+	}
 	udelay(50);
 }

@ -4586,8 +4588,7 @@ static int cik_cp_compute_resume(struct radeon_device *rdev)
 		rdev->ring[idx].wptr = 0;
 		mqd->queue_state.cp_hqd_pq_wptr = rdev->ring[idx].wptr;
 		WREG32(CP_HQD_PQ_WPTR, mqd->queue_state.cp_hqd_pq_wptr);
-		rdev->ring[idx].rptr = RREG32(CP_HQD_PQ_RPTR);
-		mqd->queue_state.cp_hqd_pq_rptr = rdev->ring[idx].rptr;
+		mqd->queue_state.cp_hqd_pq_rptr = RREG32(CP_HQD_PQ_RPTR);

 		/* set the vmid for the queue */
 		mqd->queue_state.cp_hqd_vmid = 0;
@ -5117,11 +5118,9 @@ bool cik_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 	if (!(reset_mask & (RADEON_RESET_GFX |
 			    RADEON_RESET_COMPUTE |
 			    RADEON_RESET_CP))) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force CP activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

@ -6141,6 +6140,10 @@ void cik_update_cg(struct radeon_device *rdev,
 		cik_enable_hdp_mgcg(rdev, enable);
 		cik_enable_hdp_ls(rdev, enable);
 	}
+
+	if (block & RADEON_CG_BLOCK_VCE) {
+		vce_v2_0_enable_mgcg(rdev, enable);
+	}
 }

 static void cik_init_cg(struct radeon_device *rdev)
@ -7490,6 +7493,20 @@ restart_ih:
 			/* reset addr and status */
 			WREG32_P(VM_CONTEXT1_CNTL2, 1, ~1);
 			break;
+		case 167: /* VCE */
+			DRM_DEBUG("IH: VCE int: 0x%08x\n", src_data);
+			switch (src_data) {
+			case 0:
+				radeon_fence_process(rdev, TN_RING_TYPE_VCE1_INDEX);
+				break;
+			case 1:
+				radeon_fence_process(rdev, TN_RING_TYPE_VCE2_INDEX);
+				break;
+			default:
+				DRM_ERROR("Unhandled interrupt: %d %d\n", src_id, src_data);
+				break;
+			}
+			break;
 		case 176: /* GFX RB CP_INT */
 		case 177: /* GFX IB CP_INT */
 			radeon_fence_process(rdev, RADEON_RING_TYPE_GFX_INDEX);
@ -7789,6 +7806,22 @@ static int cik_startup(struct radeon_device *rdev)
 	if (r)
 		rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;

+	r = radeon_vce_resume(rdev);
+	if (!r) {
+		r = vce_v2_0_resume(rdev);
+		if (!r)
+			r = radeon_fence_driver_start_ring(rdev,
+							   TN_RING_TYPE_VCE1_INDEX);
+		if (!r)
+			r = radeon_fence_driver_start_ring(rdev,
+							   TN_RING_TYPE_VCE2_INDEX);
+	}
+	if (r) {
+		dev_err(rdev->dev, "VCE init error (%d).\n", r);
+		rdev->ring[TN_RING_TYPE_VCE1_INDEX].ring_size = 0;
+		rdev->ring[TN_RING_TYPE_VCE2_INDEX].ring_size = 0;
+	}
+
 	/* Enable IRQ */
 	if (!rdev->irq.installed) {
 		r = radeon_irq_kms_init(rdev);
@ -7864,6 +7897,23 @@ static int cik_startup(struct radeon_device *rdev)
 			DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
 	}

+	r = -ENOENT;
+
+	ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+	if (ring->ring_size)
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+				     VCE_CMD_NO_OP);
+
+	ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+	if (ring->ring_size)
+		r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+				     VCE_CMD_NO_OP);
+
+	if (!r)
+		r = vce_v1_0_init(rdev);
+	else if (r != -ENOENT)
+		DRM_ERROR("radeon: failed initializing VCE (%d).\n", r);
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@ -7935,6 +7985,7 @@ int cik_suspend(struct radeon_device *rdev)
 	cik_sdma_enable(rdev, false);
 	uvd_v1_0_fini(rdev);
 	radeon_uvd_suspend(rdev);
+	radeon_vce_suspend(rdev);
 	cik_fini_pg(rdev);
 	cik_fini_cg(rdev);
 	cik_irq_suspend(rdev);
@ -8067,6 +8118,17 @@ int cik_init(struct radeon_device *rdev)
 		r600_ring_init(rdev, ring, 4096);
 	}

+	r = radeon_vce_init(rdev);
+	if (!r) {
+		ring = &rdev->ring[TN_RING_TYPE_VCE1_INDEX];
+		ring->ring_obj = NULL;
+		r600_ring_init(rdev, ring, 4096);
+
+		ring = &rdev->ring[TN_RING_TYPE_VCE2_INDEX];
+		ring->ring_obj = NULL;
+		r600_ring_init(rdev, ring, 4096);
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);

@ -8128,6 +8190,7 @@ void cik_fini(struct radeon_device *rdev)
 	radeon_irq_kms_fini(rdev);
 	uvd_v1_0_fini(rdev);
 	radeon_uvd_fini(rdev);
+	radeon_vce_fini(rdev);
 	cik_pcie_gart_fini(rdev);
 	r600_vram_scratch_fini(rdev);
 	radeon_gem_fini(rdev);
@ -8866,6 +8929,41 @@ int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 	return r;
 }

+int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk)
+{
+	int r, i;
+	struct atom_clock_dividers dividers;
+	u32 tmp;
+
+	r = radeon_atom_get_clock_dividers(rdev, COMPUTE_GPUCLK_INPUT_FLAG_DEFAULT_GPUCLK,
+					   ecclk, false, &dividers);
+	if (r)
+		return r;
+
+	for (i = 0; i < 100; i++) {
+		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	tmp = RREG32_SMC(CG_ECLK_CNTL);
+	tmp &= ~(ECLK_DIR_CNTL_EN|ECLK_DIVIDER_MASK);
+	tmp |= dividers.post_divider;
+	WREG32_SMC(CG_ECLK_CNTL, tmp);
+
+	for (i = 0; i < 100; i++) {
+		if (RREG32_SMC(CG_ECLK_STATUS) & ECLK_STATUS)
+			break;
+		mdelay(10);
+	}
+	if (i == 100)
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
 static void cik_pcie_gen3_enable(struct radeon_device *rdev)
 {
 	struct pci_dev *root = rdev->pdev->bus->self;
--- a/drivers/gpu/drm/radeon/cik_sdma.c
+++ b/drivers/gpu/drm/radeon/cik_sdma.c
@ -264,6 +264,8 @@ static void cik_sdma_gfx_stop(struct radeon_device *rdev)
 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl);
 		WREG32(SDMA0_GFX_IB_CNTL + reg_offset, 0);
 	}
+	rdev->ring[R600_RING_TYPE_DMA_INDEX].ready = false;
+	rdev->ring[CAYMAN_RING_TYPE_DMA1_INDEX].ready = false;
 }

 /**
@ -291,6 +293,11 @@ void cik_sdma_enable(struct radeon_device *rdev, bool enable)
 	u32 me_cntl, reg_offset;
 	int i;

+	if (enable == false) {
+		cik_sdma_gfx_stop(rdev);
+		cik_sdma_rlc_stop(rdev);
+	}
+
 	for (i = 0; i < 2; i++) {
 		if (i == 0)
 			reg_offset = SDMA0_REGISTER_OFFSET;
@ -362,8 +369,6 @@ static int cik_sdma_gfx_resume(struct radeon_device *rdev)
 		ring->wptr = 0;
 		WREG32(SDMA0_GFX_RB_WPTR + reg_offset, ring->wptr << 2);

-		ring->rptr = RREG32(SDMA0_GFX_RB_RPTR + reg_offset) >> 2;
-
 		/* enable DMA RB */
 		WREG32(SDMA0_GFX_RB_CNTL + reg_offset, rb_cntl | SDMA_RB_ENABLE);

@ -420,10 +425,6 @@ static int cik_sdma_load_microcode(struct radeon_device *rdev)
 	if (!rdev->sdma_fw)
 		return -EINVAL;

-	/* stop the gfx rings and rlc compute queues */
-	cik_sdma_gfx_stop(rdev);
-	cik_sdma_rlc_stop(rdev);
-
 	/* halt the MEs */
 	cik_sdma_enable(rdev, false);

@ -492,9 +493,6 @@ int cik_sdma_resume(struct radeon_device *rdev)
 */
 void cik_sdma_fini(struct radeon_device *rdev)
 {
-	/* stop the gfx rings and rlc compute queues */
-	cik_sdma_gfx_stop(rdev);
-	cik_sdma_rlc_stop(rdev);
 	/* halt the MEs */
 	cik_sdma_enable(rdev, false);
 	radeon_ring_fini(rdev, &rdev->ring[R600_RING_TYPE_DMA_INDEX]);
@ -713,11 +711,9 @@ bool cik_sdma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 		mask = RADEON_RESET_DMA1;

 	if (!(reset_mask & mask)) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

--- a/drivers/gpu/drm/radeon/cikd.h
+++ b/drivers/gpu/drm/radeon/cikd.h
@ -203,6 +203,12 @@
 #define		CTF_TEMP_MASK				0x0003fe00
 #define		CTF_TEMP_SHIFT				9

+#define CG_ECLK_CNTL                                    0xC05000AC
+#       define ECLK_DIVIDER_MASK                        0x7f
+#       define ECLK_DIR_CNTL_EN                         (1 << 8)
+#define CG_ECLK_STATUS                                  0xC05000B0
+#       define ECLK_STATUS                              (1 << 0)
+
 #define	CG_SPLL_FUNC_CNTL				0xC0500140
 #define		SPLL_RESET				(1 << 0)
 #define		SPLL_PWRON				(1 << 1)
@ -2010,4 +2016,47 @@
 /* UVD CTX indirect */
 #define	UVD_CGC_MEM_CTRL				0xC0

+/* VCE */
+
+#define VCE_VCPU_CACHE_OFFSET0		0x20024
+#define VCE_VCPU_CACHE_SIZE0		0x20028
+#define VCE_VCPU_CACHE_OFFSET1		0x2002c
+#define VCE_VCPU_CACHE_SIZE1		0x20030
+#define VCE_VCPU_CACHE_OFFSET2		0x20034
+#define VCE_VCPU_CACHE_SIZE2		0x20038
+#define VCE_RB_RPTR2			0x20178
+#define VCE_RB_WPTR2			0x2017c
+#define VCE_RB_RPTR			0x2018c
+#define VCE_RB_WPTR			0x20190
+#define VCE_CLOCK_GATING_A		0x202f8
+#	define CGC_CLK_GATE_DLY_TIMER_MASK	(0xf << 0)
+#	define CGC_CLK_GATE_DLY_TIMER(x)	((x) << 0)
+#	define CGC_CLK_GATER_OFF_DLY_TIMER_MASK	(0xff << 4)
+#	define CGC_CLK_GATER_OFF_DLY_TIMER(x)	((x) << 4)
+#	define CGC_UENC_WAIT_AWAKE	(1 << 18)
+#define VCE_CLOCK_GATING_B		0x202fc
+#define VCE_CGTT_CLK_OVERRIDE		0x207a0
+#define VCE_UENC_CLOCK_GATING		0x207bc
+#	define CLOCK_ON_DELAY_MASK	(0xf << 0)
+#	define CLOCK_ON_DELAY(x)	((x) << 0)
+#	define CLOCK_OFF_DELAY_MASK	(0xff << 4)
+#	define CLOCK_OFF_DELAY(x)	((x) << 4)
+#define VCE_UENC_REG_CLOCK_GATING	0x207c0
+#define VCE_SYS_INT_EN			0x21300
+#	define VCE_SYS_INT_TRAP_INTERRUPT_EN	(1 << 3)
+#define VCE_LMI_CTRL2			0x21474
+#define VCE_LMI_CTRL			0x21498
+#define VCE_LMI_VM_CTRL			0x214a0
+#define VCE_LMI_SWAP_CNTL		0x214b4
+#define VCE_LMI_SWAP_CNTL1		0x214b8
+#define VCE_LMI_CACHE_CTRL		0x214f4
+
+#define VCE_CMD_NO_OP		0x00000000
+#define VCE_CMD_END		0x00000001
+#define VCE_CMD_IB		0x00000002
+#define VCE_CMD_FENCE		0x00000003
+#define VCE_CMD_TRAP		0x00000004
+#define VCE_CMD_IB_AUTO		0x00000005
+#define VCE_CMD_SEMAPHORE	0x00000006
+
 #endif
--- a/drivers/gpu/drm/radeon/cypress_dpm.c
+++ b/drivers/gpu/drm/radeon/cypress_dpm.c
@ -2036,6 +2036,10 @@ int cypress_dpm_init(struct radeon_device *rdev)
 	pi->min_vddc_in_table = 0;
 	pi->max_vddc_in_table = 0;

+	ret = r600_get_platform_caps(rdev);
+	if (ret)
+		return ret;
+
 	ret = rv7xx_parse_power_table(rdev);
 	if (ret)
 		return ret;
--- a/drivers/gpu/drm/radeon/evergreen.c
+++ b/drivers/gpu/drm/radeon/evergreen.c
@ -2990,8 +2990,6 @@ static int evergreen_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_RB_BASE, ring->gpu_addr >> 8);
 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));

-	ring->rptr = RREG32(CP_RB_RPTR);
-
 	evergreen_cp_start(rdev);
 	ring->ready = true;
 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
@ -3952,11 +3950,9 @@ bool evergreen_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin
 	if (!(reset_mask & (RADEON_RESET_GFX |
 			    RADEON_RESET_COMPUTE |
 			    RADEON_RESET_CP))) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force CP activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

--- a/drivers/gpu/drm/radeon/evergreen_cs.c
+++ b/drivers/gpu/drm/radeon/evergreen_cs.c
@ -1165,7 +1165,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		break;
 	case DB_DEPTH_CONTROL:
 		track->db_depth_control = radeon_get_ib_value(p, idx);
@ -1196,12 +1196,12 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			}
 			ib[idx] &= ~Z_ARRAY_MODE(0xf);
 			track->db_z_info &= ~Z_ARRAY_MODE(0xf);
-			ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
-			track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+			ib[idx] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
+			track->db_z_info |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
+			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
 				unsigned bankw, bankh, mtaspect, tile_split;

-				evergreen_tiling_fields(reloc->lobj.tiling_flags,
+				evergreen_tiling_fields(reloc->tiling_flags,
 							&bankw, &bankh, &mtaspect,
 							&tile_split);
 				ib[idx] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
@ -1237,7 +1237,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		track->db_z_read_offset = radeon_get_ib_value(p, idx);
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->db_z_read_bo = reloc->robj;
 		track->db_dirty = true;
 		break;
@ -1249,7 +1249,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		track->db_z_write_offset = radeon_get_ib_value(p, idx);
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->db_z_write_bo = reloc->robj;
 		track->db_dirty = true;
 		break;
@ -1261,7 +1261,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		track->db_s_read_offset = radeon_get_ib_value(p, idx);
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->db_s_read_bo = reloc->robj;
 		track->db_dirty = true;
 		break;
@ -1273,7 +1273,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		track->db_s_write_offset = radeon_get_ib_value(p, idx);
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->db_s_write_bo = reloc->robj;
 		track->db_dirty = true;
 		break;
@ -1297,7 +1297,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 		}
 		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
 		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->vgt_strmout_bo[tmp] = reloc->robj;
 		track->streamout_dirty = true;
 		break;
@ -1317,7 +1317,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 	case CB_TARGET_MASK:
 		track->cb_target_mask = radeon_get_ib_value(p, idx);
 		track->cb_dirty = true;
@ -1381,8 +1381,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 						"0x%04X\n", reg);
 				return -EINVAL;
 			}
-			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
-			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
+			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
+			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
 		}
 		track->cb_dirty = true;
 		break;
@ -1399,8 +1399,8 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 						"0x%04X\n", reg);
 				return -EINVAL;
 			}
-			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
-			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
+			ib[idx] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
+			track->cb_color_info[tmp] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
 		}
 		track->cb_dirty = true;
 		break;
@ -1461,10 +1461,10 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
 				unsigned bankw, bankh, mtaspect, tile_split;

-				evergreen_tiling_fields(reloc->lobj.tiling_flags,
+				evergreen_tiling_fields(reloc->tiling_flags,
 							&bankw, &bankh, &mtaspect,
 							&tile_split);
 				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
@ -1489,10 +1489,10 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
 				unsigned bankw, bankh, mtaspect, tile_split;

-				evergreen_tiling_fields(reloc->lobj.tiling_flags,
+				evergreen_tiling_fields(reloc->tiling_flags,
 							&bankw, &bankh, &mtaspect,
 							&tile_split);
 				ib[idx] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track->nbanks));
@ -1520,7 +1520,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_fmask_bo[tmp] = reloc->robj;
 		break;
 	case CB_COLOR0_CMASK:
@ -1537,7 +1537,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			dev_err(p->dev, "bad SET_CONTEXT_REG 0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_cmask_bo[tmp] = reloc->robj;
 		break;
 	case CB_COLOR0_FMASK_SLICE:
@ -1578,7 +1578,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 		}
 		tmp = (reg - CB_COLOR0_BASE) / 0x3c;
 		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_bo[tmp] = reloc->robj;
 		track->cb_dirty = true;
 		break;
@ -1594,7 +1594,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 		}
 		tmp = ((reg - CB_COLOR8_BASE) / 0x1c) + 8;
 		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx);
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_bo[tmp] = reloc->robj;
 		track->cb_dirty = true;
 		break;
@ -1606,7 +1606,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		track->htile_offset = radeon_get_ib_value(p, idx);
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->htile_bo = reloc->robj;
 		track->db_dirty = true;
 		break;
@ -1723,7 +1723,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		break;
 	case SX_MEMORY_EXPORT_BASE:
 		if (p->rdev->family >= CHIP_CAYMAN) {
@ -1737,7 +1737,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		break;
 	case CAYMAN_SX_SCATTER_EXPORT_BASE:
 		if (p->rdev->family < CHIP_CAYMAN) {
@ -1751,7 +1751,7 @@ static int evergreen_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		break;
 	case SX_MISC:
 		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
@ -1836,7 +1836,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         (idx_value & 0xfffffff0) +
 		         ((u64)(tmp & 0xff) << 32);

@ -1882,7 +1882,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         idx_value +
 		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);

@ -1909,7 +1909,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         idx_value +
 		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);

@ -1937,7 +1937,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         radeon_get_ib_value(p, idx+1) +
 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);

@ -2027,7 +2027,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			DRM_ERROR("bad DISPATCH_INDIRECT\n");
 			return -EINVAL;
 		}
-		ib[idx+0] = idx_value + (u32)(reloc->lobj.gpu_offset & 0xffffffff);
+		ib[idx+0] = idx_value + (u32)(reloc->gpu_offset & 0xffffffff);
 		r = evergreen_cs_track_check(p);
 		if (r) {
 			dev_warn(p->dev, "%s:%d invalid cmd stream\n", __func__, __LINE__);
@ -2049,7 +2049,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				return -EINVAL;
 			}

-			offset = reloc->lobj.gpu_offset +
+			offset = reloc->gpu_offset +
 			         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
 			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);

@ -2106,7 +2106,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				tmp = radeon_get_ib_value(p, idx) +
 					((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);

-				offset = reloc->lobj.gpu_offset + tmp;
+				offset = reloc->gpu_offset + tmp;

 				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
 					dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
@ -2144,7 +2144,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				tmp = radeon_get_ib_value(p, idx+2) +
 					((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);

-				offset = reloc->lobj.gpu_offset + tmp;
+				offset = reloc->gpu_offset + tmp;

 				if ((tmp + size) > radeon_bo_size(reloc->robj)) {
 					dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
@ -2174,7 +2174,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				DRM_ERROR("bad SURFACE_SYNC\n");
 				return -EINVAL;
 			}
-			ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+			ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		}
 		break;
 	case PACKET3_EVENT_WRITE:
@ -2190,7 +2190,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				DRM_ERROR("bad EVENT_WRITE\n");
 				return -EINVAL;
 			}
-			offset = reloc->lobj.gpu_offset +
+			offset = reloc->gpu_offset +
 			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
 			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);

@ -2212,7 +2212,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);

@ -2234,7 +2234,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);

@ -2302,11 +2302,11 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				}
 				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
 					ib[idx+1+(i*8)+1] |=
-						TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->lobj.tiling_flags));
-					if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+						TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc->tiling_flags));
+					if (reloc->tiling_flags & RADEON_TILING_MACRO) {
 						unsigned bankw, bankh, mtaspect, tile_split;

-						evergreen_tiling_fields(reloc->lobj.tiling_flags,
+						evergreen_tiling_fields(reloc->tiling_flags,
 									&bankw, &bankh, &mtaspect,
 									&tile_split);
 						ib[idx+1+(i*8)+6] |= TEX_TILE_SPLIT(tile_split);
@ -2318,7 +2318,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					}
 				}
 				texture = reloc->robj;
-				toffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				toffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);

 				/* tex mip base */
 				tex_dim = ib[idx+1+(i*8)+0] & 0x7;
@ -2337,7 +2337,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 						DRM_ERROR("bad SET_RESOURCE (tex)\n");
 						return -EINVAL;
 					}
-					moffset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+					moffset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 					mipmap = reloc->robj;
 				}

@ -2364,7 +2364,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					ib[idx+1+(i*8)+1] = radeon_bo_size(reloc->robj) - offset;
 				}

-				offset64 = reloc->lobj.gpu_offset + offset;
+				offset64 = reloc->gpu_offset + offset;
 				ib[idx+1+(i*8)+0] = offset64;
 				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
 						    (upper_32_bits(offset64) & 0xff);
@ -2445,7 +2445,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			offset += reloc->lobj.gpu_offset;
+			offset += reloc->gpu_offset;
 			ib[idx+1] = offset;
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		}
@ -2464,7 +2464,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			offset += reloc->lobj.gpu_offset;
+			offset += reloc->gpu_offset;
 			ib[idx+3] = offset;
 			ib[idx+4] = upper_32_bits(offset) & 0xff;
 		}
@ -2493,7 +2493,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 				  offset + 8, radeon_bo_size(reloc->robj));
 			return -EINVAL;
 		}
-		offset += reloc->lobj.gpu_offset;
+		offset += reloc->gpu_offset;
 		ib[idx+0] = offset;
 		ib[idx+1] = upper_32_bits(offset) & 0xff;
 		break;
@ -2518,7 +2518,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			offset += reloc->lobj.gpu_offset;
+			offset += reloc->gpu_offset;
 			ib[idx+1] = offset;
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		} else {
@ -2542,7 +2542,7 @@ static int evergreen_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			offset += reloc->lobj.gpu_offset;
+			offset += reloc->gpu_offset;
 			ib[idx+3] = offset;
 			ib[idx+4] = upper_32_bits(offset) & 0xff;
 		} else {
@ -2717,7 +2717,7 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 				dst_offset = radeon_get_ib_value(p, idx+1);
 				dst_offset <<= 8;

-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
 				p->idx += count + 7;
 				break;
 			/* linear */
@ -2725,8 +2725,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 				dst_offset = radeon_get_ib_value(p, idx+1);
 				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;

-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
 				p->idx += count + 3;
 				break;
 			default:
@ -2768,10 +2768,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
 					return -EINVAL;
 				}
-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-				ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
+				ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
 				p->idx += 5;
 				break;
 			/* Copy L2T/T2L */
@ -2781,22 +2781,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 					/* tiled src, linear dst */
 					src_offset = radeon_get_ib_value(p, idx+1);
 					src_offset <<= 8;
-					ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);

 					dst_offset = radeon_get_ib_value(p, idx + 7);
 					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
-					ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
 				} else {
 					/* linear src, tiled dst */
 					src_offset = radeon_get_ib_value(p, idx+7);
 					src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
-					ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;

 					dst_offset = radeon_get_ib_value(p, idx+1);
 					dst_offset <<= 8;
-					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
 				}
 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
 					dev_warn(p->dev, "DMA L2T, src buffer too small (%llu %lu)\n",
@ -2827,10 +2827,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							dst_offset + count, radeon_bo_size(dst_reloc->robj));
 					return -EINVAL;
 				}
-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
-				ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
-				ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-				ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
+				ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xffffffff);
+				ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
+				ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
 				p->idx += 5;
 				break;
 			/* Copy L2L, partial */
@ -2840,10 +2840,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 					DRM_ERROR("L2L Partial is cayman only !\n");
 					return -EINVAL;
 				}
-				ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset & 0xffffffff);
-				ib[idx+2] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-				ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset & 0xffffffff);
-				ib[idx+5] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(src_reloc->gpu_offset & 0xffffffff);
+				ib[idx+2] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
+				ib[idx+4] += (u32)(dst_reloc->gpu_offset & 0xffffffff);
+				ib[idx+5] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;

 				p->idx += 9;
 				break;
@ -2876,12 +2876,12 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
 					return -EINVAL;
 				}
-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+3] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+4] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-				ib[idx+5] += upper_32_bits(dst2_reloc->lobj.gpu_offset) & 0xff;
-				ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+2] += (u32)(dst2_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+3] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+4] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
+				ib[idx+5] += upper_32_bits(dst2_reloc->gpu_offset) & 0xff;
+				ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
 				p->idx += 7;
 				break;
 			/* Copy L2T Frame to Field */
@ -2916,10 +2916,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
 					return -EINVAL;
 				}
-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-				ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
-				ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
+				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
+				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
 				p->idx += 10;
 				break;
 			/* Copy L2T/T2L, partial */
@ -2932,16 +2932,16 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 				/* detile bit */
 				if (radeon_get_ib_value(p, idx + 2) & (1 << 31)) {
 					/* tiled src, linear dst */
-					ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);

-					ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
 				} else {
 					/* linear src, tiled dst */
-					ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;

-					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
 				}
 				p->idx += 12;
 				break;
@ -2978,10 +2978,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
 					return -EINVAL;
 				}
-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-				ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
-				ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
+				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
+				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
 				p->idx += 10;
 				break;
 			/* Copy L2T/T2L (tile units) */
@ -2992,22 +2992,22 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 					/* tiled src, linear dst */
 					src_offset = radeon_get_ib_value(p, idx+1);
 					src_offset <<= 8;
-					ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);

 					dst_offset = radeon_get_ib_value(p, idx+7);
 					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
-					ib[idx+7] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+8] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+7] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+8] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
 				} else {
 					/* linear src, tiled dst */
 					src_offset = radeon_get_ib_value(p, idx+7);
 					src_offset |= ((u64)(radeon_get_ib_value(p, idx+8) & 0xff)) << 32;
-					ib[idx+7] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+8] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+7] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+8] += upper_32_bits(src_reloc->gpu_offset) & 0xff;

 					dst_offset = radeon_get_ib_value(p, idx+1);
 					dst_offset <<= 8;
-					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
 				}
 				if ((src_offset + (count * 4)) > radeon_bo_size(src_reloc->robj)) {
 					dev_warn(p->dev, "DMA L2T, T2L src buffer too small (%llu %lu)\n",
@ -3028,8 +3028,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 					DRM_ERROR("L2T, T2L Partial is cayman only !\n");
 					return -EINVAL;
 				}
-				ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
-				ib[idx+4] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+				ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);
+				ib[idx+4] += (u32)(dst_reloc->gpu_offset >> 8);
 				p->idx += 13;
 				break;
 			/* Copy L2T broadcast (tile units) */
@ -3065,10 +3065,10 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 							dst2_offset + (count * 4), radeon_bo_size(dst2_reloc->robj));
 					return -EINVAL;
 				}
-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
-				ib[idx+2] += (u32)(dst2_reloc->lobj.gpu_offset >> 8);
-				ib[idx+8] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+9] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
+				ib[idx+2] += (u32)(dst2_reloc->gpu_offset >> 8);
+				ib[idx+8] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+9] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
 				p->idx += 10;
 				break;
 			default:
@ -3089,8 +3089,8 @@ int evergreen_dma_cs_parse(struct radeon_cs_parser *p)
 					 dst_offset, radeon_bo_size(dst_reloc->robj));
 				return -EINVAL;
 			}
-			ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-			ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+			ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+			ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
 			p->idx += 4;
 			break;
 		case DMA_PACKET_NOP:
--- a/drivers/gpu/drm/radeon/evergreen_dma.c
+++ b/drivers/gpu/drm/radeon/evergreen_dma.c
@ -174,11 +174,9 @@ bool evergreen_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *rin
 	u32 reset_mask = evergreen_gpu_check_soft_reset(rdev);

 	if (!(reset_mask & RADEON_RESET_DMA)) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

--- a/drivers/gpu/drm/radeon/kv_dpm.c
+++ b/drivers/gpu/drm/radeon/kv_dpm.c
@ -1338,13 +1338,11 @@ static int kv_enable_uvd_dpm(struct radeon_device *rdev, bool enable)
 					PPSMC_MSG_UVDDPM_Enable : PPSMC_MSG_UVDDPM_Disable);
 }

-#if 0
 static int kv_enable_vce_dpm(struct radeon_device *rdev, bool enable)
 {
 	return kv_notify_message_to_smu(rdev, enable ?
 					PPSMC_MSG_VCEDPM_Enable : PPSMC_MSG_VCEDPM_Disable);
 }
-#endif

 static int kv_enable_samu_dpm(struct radeon_device *rdev, bool enable)
 {
@ -1389,7 +1387,6 @@ static int kv_update_uvd_dpm(struct radeon_device *rdev, bool gate)
 	return kv_enable_uvd_dpm(rdev, !gate);
 }

-#if 0
 static u8 kv_get_vce_boot_level(struct radeon_device *rdev)
 {
 	u8 i;
@ -1414,6 +1411,9 @@ static int kv_update_vce_dpm(struct radeon_device *rdev,
 	int ret;

 	if (radeon_new_state->evclk > 0 && radeon_current_state->evclk == 0) {
+		kv_dpm_powergate_vce(rdev, false);
+		/* turn the clocks on when encoding */
+		cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, false);
 		if (pi->caps_stable_p_state)
 			pi->vce_boot_level = table->count - 1;
 		else
@ -1436,11 +1436,13 @@ static int kv_update_vce_dpm(struct radeon_device *rdev,
 		kv_enable_vce_dpm(rdev, true);
 	} else if (radeon_new_state->evclk == 0 && radeon_current_state->evclk > 0) {
 		kv_enable_vce_dpm(rdev, false);
+		/* turn the clocks off when not encoding */
+		cik_update_cg(rdev, RADEON_CG_BLOCK_VCE, true);
+		kv_dpm_powergate_vce(rdev, true);
 	}

 	return 0;
 }
-#endif

 static int kv_update_samu_dpm(struct radeon_device *rdev, bool gate)
 {
@ -1575,11 +1577,16 @@ static void kv_dpm_powergate_vce(struct radeon_device *rdev, bool gate)
 	pi->vce_power_gated = gate;

 	if (gate) {
-		if (pi->caps_vce_pg)
+		if (pi->caps_vce_pg) {
+			/* XXX do we need a vce_v1_0_stop() ?  */
 			kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerOFF);
+		}
 	} else {
-		if (pi->caps_vce_pg)
+		if (pi->caps_vce_pg) {
 			kv_notify_message_to_smu(rdev, PPSMC_MSG_VCEPowerON);
+			vce_v2_0_resume(rdev);
+			vce_v1_0_start(rdev);
+		}
 	}
 }

@ -1768,7 +1775,7 @@ int kv_dpm_set_power_state(struct radeon_device *rdev)
 {
 	struct kv_power_info *pi = kv_get_pi(rdev);
 	struct radeon_ps *new_ps = &pi->requested_rps;
-	/*struct radeon_ps *old_ps = &pi->current_rps;*/
+	struct radeon_ps *old_ps = &pi->current_rps;
 	int ret;

 	if (pi->bapm_enable) {
@ -1798,13 +1805,12 @@ int kv_dpm_set_power_state(struct radeon_device *rdev)
 			kv_set_enabled_levels(rdev);
 			kv_force_lowest_valid(rdev);
 			kv_unforce_levels(rdev);
-#if 0
+
 			ret = kv_update_vce_dpm(rdev, new_ps, old_ps);
 			if (ret) {
 				DRM_ERROR("kv_update_vce_dpm failed\n");
 				return ret;
 			}
-#endif
 			kv_update_sclk_t(rdev);
 		}
 	} else {
@ -1823,13 +1829,11 @@ int kv_dpm_set_power_state(struct radeon_device *rdev)
 			kv_program_nbps_index_settings(rdev, new_ps);
 			kv_freeze_sclk_dpm(rdev, false);
 			kv_set_enabled_levels(rdev);
-#if 0
 			ret = kv_update_vce_dpm(rdev, new_ps, old_ps);
 			if (ret) {
 				DRM_ERROR("kv_update_vce_dpm failed\n");
 				return ret;
 			}
-#endif
 			kv_update_acp_boot_level(rdev);
 			kv_update_sclk_t(rdev);
 			kv_enable_nb_dpm(rdev);
@ -2037,6 +2041,14 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev,
 	struct radeon_clock_and_voltage_limits *max_limits =
 		&rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac;

+	if (new_rps->vce_active) {
+		new_rps->evclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].evclk;
+		new_rps->ecclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].ecclk;
+	} else {
+		new_rps->evclk = 0;
+		new_rps->ecclk = 0;
+	}
+
 	mclk = max_limits->mclk;
 	sclk = min_sclk;

@ -2056,6 +2068,11 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev,
 		sclk = stable_p_state_sclk;
 	}

+	if (new_rps->vce_active) {
+		if (sclk < rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk)
+			sclk = rdev->pm.dpm.vce_states[rdev->pm.dpm.vce_level].sclk;
+	}
+
 	ps->need_dfs_bypass = true;

 	for (i = 0; i < ps->num_levels; i++) {
@ -2092,7 +2109,8 @@ static void kv_apply_state_adjust_rules(struct radeon_device *rdev,
 		}
 	}

-	pi->video_start = new_rps->dclk || new_rps->vclk;
+	pi->video_start = new_rps->dclk || new_rps->vclk ||
+		new_rps->evclk || new_rps->ecclk;

 	if ((new_rps->class & ATOM_PPLIB_CLASSIFICATION_UI_MASK) ==
 	    ATOM_PPLIB_CLASSIFICATION_UI_BATTERY)
@ -2538,9 +2556,6 @@ static int kv_parse_power_table(struct radeon_device *rdev)
 	if (!rdev->pm.dpm.ps)
 		return -ENOMEM;
 	power_state_offset = (u8 *)state_array->states;
-	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
-	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
-	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
 	for (i = 0; i < state_array->ucNumEntries; i++) {
 		u8 *idx;
 		power_state = (union pplib_power_state *)power_state_offset;
@ -2577,6 +2592,19 @@ static int kv_parse_power_table(struct radeon_device *rdev)
 		power_state_offset += 2 + power_state->v2.ucNumDPMLevels;
 	}
 	rdev->pm.dpm.num_ps = state_array->ucNumEntries;
+
+	/* fill in the vce power states */
+	for (i = 0; i < RADEON_MAX_VCE_LEVELS; i++) {
+		u32 sclk;
+		clock_array_index = rdev->pm.dpm.vce_states[i].clk_idx;
+		clock_info = (union pplib_clock_info *)
+			&clock_info_array->clockInfo[clock_array_index * clock_info_array->ucEntrySize];
+		sclk = le16_to_cpu(clock_info->sumo.usEngineClockLow);
+		sclk |= clock_info->sumo.ucEngineClockHigh << 16;
+		rdev->pm.dpm.vce_states[i].sclk = sclk;
+		rdev->pm.dpm.vce_states[i].mclk = 0;
+	}
+
 	return 0;
 }

@ -2590,6 +2618,10 @@ int kv_dpm_init(struct radeon_device *rdev)
 		return -ENOMEM;
 	rdev->pm.dpm.priv = pi;

+	ret = r600_get_platform_caps(rdev);
+	if (ret)
+		return ret;
+
 	ret = r600_parse_extended_power_table(rdev);
 	if (ret)
 		return ret;
@ -2623,7 +2655,7 @@ int kv_dpm_init(struct radeon_device *rdev)
 	pi->caps_fps = false; /* true? */
 	pi->caps_uvd_pg = true;
 	pi->caps_uvd_dpm = true;
-	pi->caps_vce_pg = false;
+	pi->caps_vce_pg = false; /* XXX true */
 	pi->caps_samu_pg = false;
 	pi->caps_acp_pg = false;
 	pi->caps_stable_p_state = false;
--- a/drivers/gpu/drm/radeon/ni.c
+++ b/drivers/gpu/drm/radeon/ni.c
@ -1642,8 +1642,8 @@ static int cayman_cp_resume(struct radeon_device *rdev)
 		ring = &rdev->ring[ridx[i]];
 		WREG32_P(cp_rb_cntl[i], RB_RPTR_WR_ENA, ~RB_RPTR_WR_ENA);

-		ring->rptr = ring->wptr = 0;
-		WREG32(cp_rb_rptr[i], ring->rptr);
+		ring->wptr = 0;
+		WREG32(cp_rb_rptr[i], 0);
 		WREG32(cp_rb_wptr[i], ring->wptr);

 		mdelay(1);
@ -1917,11 +1917,9 @@ bool cayman_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 	if (!(reset_mask & (RADEON_RESET_GFX |
 			    RADEON_RESET_COMPUTE |
 			    RADEON_RESET_CP))) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force CP activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

--- a/drivers/gpu/drm/radeon/ni_dma.c
+++ b/drivers/gpu/drm/radeon/ni_dma.c
@ -248,8 +248,6 @@ int cayman_dma_resume(struct radeon_device *rdev)
 		ring->wptr = 0;
 		WREG32(DMA_RB_WPTR + reg_offset, ring->wptr << 2);

-		ring->rptr = RREG32(DMA_RB_RPTR + reg_offset) >> 2;
-
 		WREG32(DMA_RB_CNTL + reg_offset, rb_cntl | DMA_RB_ENABLE);

 		ring->ready = true;
@ -302,11 +300,9 @@ bool cayman_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 		mask = RADEON_RESET_DMA1;

 	if (!(reset_mask & mask)) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

--- a/drivers/gpu/drm/radeon/ni_dpm.c
+++ b/drivers/gpu/drm/radeon/ni_dpm.c
@ -4025,9 +4025,6 @@ static int ni_parse_power_table(struct radeon_device *rdev)
 				  power_info->pplib.ucNumStates, GFP_KERNEL);
 	if (!rdev->pm.dpm.ps)
 		return -ENOMEM;
-	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
-	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
-	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);

 	for (i = 0; i < power_info->pplib.ucNumStates; i++) {
 		power_state = (union pplib_power_state *)
@ -4089,6 +4086,10 @@ int ni_dpm_init(struct radeon_device *rdev)
 	pi->min_vddc_in_table = 0;
 	pi->max_vddc_in_table = 0;

+	ret = r600_get_platform_caps(rdev);
+	if (ret)
+		return ret;
+
 	ret = ni_parse_power_table(rdev);
 	if (ret)
 		return ret;
--- a/drivers/gpu/drm/radeon/r100.c
+++ b/drivers/gpu/drm/radeon/r100.c
@ -1193,7 +1193,6 @@ int r100_cp_init(struct radeon_device *rdev, unsigned ring_size)

 	WREG32(RADEON_CP_RB_CNTL, tmp);
 	udelay(10);
-	ring->rptr = RREG32(RADEON_CP_RB_RPTR);
 	/* Set cp mode to bus mastering & enable cp*/
 	WREG32(RADEON_CP_CSQ_MODE,
 	       REG_SET(RADEON_INDIRECT2_START, indirect2_start) |
@ -1275,12 +1274,12 @@ int r100_reloc_pitch_offset(struct radeon_cs_parser *p,

 	value = radeon_get_ib_value(p, idx);
 	tmp = value & 0x003fffff;
-	tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
+	tmp += (((u32)reloc->gpu_offset) >> 10);

 	if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+		if (reloc->tiling_flags & RADEON_TILING_MACRO)
 			tile_flags |= RADEON_DST_TILE_MACRO;
-		if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+		if (reloc->tiling_flags & RADEON_TILING_MICRO) {
 			if (reg == RADEON_SRC_PITCH_OFFSET) {
 				DRM_ERROR("Cannot src blit from microtiled surface\n");
 				radeon_cs_dump_packet(p, pkt);
@ -1326,7 +1325,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
 			return r;
 		}
 		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);

 		track->arrays[i + 0].esize = idx_value >> 8;
 		track->arrays[i + 0].robj = reloc->robj;
@ -1338,7 +1337,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
 			radeon_cs_dump_packet(p, pkt);
 			return r;
 		}
-		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+		ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->gpu_offset);
 		track->arrays[i + 1].robj = reloc->robj;
 		track->arrays[i + 1].esize = idx_value >> 24;
 		track->arrays[i + 1].esize &= 0x7F;
@ -1352,7 +1351,7 @@ int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
 			return r;
 		}
 		idx_value = radeon_get_ib_value(p, idx);
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
 		track->arrays[i + 0].robj = reloc->robj;
 		track->arrays[i + 0].esize = idx_value >> 8;
 		track->arrays[i + 0].esize &= 0x7F;
@ -1595,7 +1594,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 		track->zb.robj = reloc->robj;
 		track->zb.offset = idx_value;
 		track->zb_dirty = true;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case RADEON_RB3D_COLOROFFSET:
 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
@ -1608,7 +1607,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 		track->cb[0].robj = reloc->robj;
 		track->cb[0].offset = idx_value;
 		track->cb_dirty = true;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case RADEON_PP_TXOFFSET_0:
 	case RADEON_PP_TXOFFSET_1:
@ -1622,16 +1621,16 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			return r;
 		}
 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			if (reloc->tiling_flags & RADEON_TILING_MACRO)
 				tile_flags |= RADEON_TXO_MACRO_TILE;
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			if (reloc->tiling_flags & RADEON_TILING_MICRO)
 				tile_flags |= RADEON_TXO_MICRO_TILE_X2;

 			tmp = idx_value & ~(0x7 << 2);
 			tmp |= tile_flags;
-			ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset);
+			ib[idx] = tmp + ((u32)reloc->gpu_offset);
 		} else
-			ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+			ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		track->textures[i].robj = reloc->robj;
 		track->tex_dirty = true;
 		break;
@ -1649,7 +1648,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			return r;
 		}
 		track->textures[0].cube_info[i].offset = idx_value;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		track->textures[0].cube_info[i].robj = reloc->robj;
 		track->tex_dirty = true;
 		break;
@ -1667,7 +1666,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			return r;
 		}
 		track->textures[1].cube_info[i].offset = idx_value;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		track->textures[1].cube_info[i].robj = reloc->robj;
 		track->tex_dirty = true;
 		break;
@ -1685,7 +1684,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			return r;
 		}
 		track->textures[2].cube_info[i].offset = idx_value;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		track->textures[2].cube_info[i].robj = reloc->robj;
 		track->tex_dirty = true;
 		break;
@ -1703,9 +1702,9 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			return r;
 		}
 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			if (reloc->tiling_flags & RADEON_TILING_MACRO)
 				tile_flags |= RADEON_COLOR_TILE_ENABLE;
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			if (reloc->tiling_flags & RADEON_TILING_MICRO)
 				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;

 			tmp = idx_value & ~(0x7 << 16);
@ -1773,7 +1772,7 @@ static int r100_packet0_check(struct radeon_cs_parser *p,
 			radeon_cs_dump_packet(p, pkt);
 			return r;
 		}
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case RADEON_PP_CNTL:
 		{
@ -1933,7 +1932,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p,
 			radeon_cs_dump_packet(p, pkt);
 			return r;
 		}
-		ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->lobj.gpu_offset);
+		ib[idx+1] = radeon_get_ib_value(p, idx+1) + ((u32)reloc->gpu_offset);
 		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
 		if (r) {
 			return r;
@ -1947,7 +1946,7 @@ static int r100_packet3_check(struct radeon_cs_parser *p,
 			radeon_cs_dump_packet(p, pkt);
 			return r;
 		}
-		ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = radeon_get_ib_value(p, idx) + ((u32)reloc->gpu_offset);
 		track->num_arrays = 1;
 		track->vtx_size = r100_get_vtx_size(radeon_get_ib_value(p, idx + 2));

@ -2523,11 +2522,9 @@ bool r100_gpu_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)

 	rbbm_status = RREG32(R_000E40_RBBM_STATUS);
 	if (!G_000E40_GUI_ACTIVE(rbbm_status)) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force CP activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

--- a/drivers/gpu/drm/radeon/r200.c
+++ b/drivers/gpu/drm/radeon/r200.c
@ -185,7 +185,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 		track->zb.robj = reloc->robj;
 		track->zb.offset = idx_value;
 		track->zb_dirty = true;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case RADEON_RB3D_COLOROFFSET:
 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
@ -198,7 +198,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 		track->cb[0].robj = reloc->robj;
 		track->cb[0].offset = idx_value;
 		track->cb_dirty = true;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case R200_PP_TXOFFSET_0:
 	case R200_PP_TXOFFSET_1:
@ -215,16 +215,16 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 			return r;
 		}
 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			if (reloc->tiling_flags & RADEON_TILING_MACRO)
 				tile_flags |= R200_TXO_MACRO_TILE;
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			if (reloc->tiling_flags & RADEON_TILING_MICRO)
 				tile_flags |= R200_TXO_MICRO_TILE;

 			tmp = idx_value & ~(0x7 << 2);
 			tmp |= tile_flags;
-			ib[idx] = tmp + ((u32)reloc->lobj.gpu_offset);
+			ib[idx] = tmp + ((u32)reloc->gpu_offset);
 		} else
-			ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+			ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		track->textures[i].robj = reloc->robj;
 		track->tex_dirty = true;
 		break;
@ -268,7 +268,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 			return r;
 		}
 		track->textures[i].cube_info[face - 1].offset = idx_value;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		track->textures[i].cube_info[face - 1].robj = reloc->robj;
 		track->tex_dirty = true;
 		break;
@ -287,9 +287,9 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 		}

 		if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			if (reloc->tiling_flags & RADEON_TILING_MACRO)
 				tile_flags |= RADEON_COLOR_TILE_ENABLE;
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			if (reloc->tiling_flags & RADEON_TILING_MICRO)
 				tile_flags |= RADEON_COLOR_MICROTILE_ENABLE;

 			tmp = idx_value & ~(0x7 << 16);
@ -362,7 +362,7 @@ int r200_packet0_check(struct radeon_cs_parser *p,
 			radeon_cs_dump_packet(p, pkt);
 			return r;
 		}
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case RADEON_PP_CNTL:
 		{
--- a/drivers/gpu/drm/radeon/r300.c
+++ b/drivers/gpu/drm/radeon/r300.c
@ -640,7 +640,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		track->cb[i].robj = reloc->robj;
 		track->cb[i].offset = idx_value;
 		track->cb_dirty = true;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case R300_ZB_DEPTHOFFSET:
 		r = radeon_cs_packet_next_reloc(p, &reloc, 0);
@ -653,7 +653,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		track->zb.robj = reloc->robj;
 		track->zb.offset = idx_value;
 		track->zb_dirty = true;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case R300_TX_OFFSET_0:
 	case R300_TX_OFFSET_0+4:
@ -682,16 +682,16 @@ static int r300_packet0_check(struct radeon_cs_parser *p,

 		if (p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS) {
 			ib[idx] = (idx_value & 31) | /* keep the 1st 5 bits */
-				  ((idx_value & ~31) + (u32)reloc->lobj.gpu_offset);
+				  ((idx_value & ~31) + (u32)reloc->gpu_offset);
 		} else {
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			if (reloc->tiling_flags & RADEON_TILING_MACRO)
 				tile_flags |= R300_TXO_MACRO_TILE;
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			if (reloc->tiling_flags & RADEON_TILING_MICRO)
 				tile_flags |= R300_TXO_MICRO_TILE;
-			else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE)
 				tile_flags |= R300_TXO_MICRO_TILE_SQUARE;

-			tmp = idx_value + ((u32)reloc->lobj.gpu_offset);
+			tmp = idx_value + ((u32)reloc->gpu_offset);
 			tmp |= tile_flags;
 			ib[idx] = tmp;
 		}
@ -753,11 +753,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 				return r;
 			}

-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			if (reloc->tiling_flags & RADEON_TILING_MACRO)
 				tile_flags |= R300_COLOR_TILE_ENABLE;
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			if (reloc->tiling_flags & RADEON_TILING_MICRO)
 				tile_flags |= R300_COLOR_MICROTILE_ENABLE;
-			else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE)
 				tile_flags |= R300_COLOR_MICROTILE_SQUARE_ENABLE;

 			tmp = idx_value & ~(0x7 << 16);
@ -838,11 +838,11 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 				return r;
 			}

-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+			if (reloc->tiling_flags & RADEON_TILING_MACRO)
 				tile_flags |= R300_DEPTHMACROTILE_ENABLE;
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+			if (reloc->tiling_flags & RADEON_TILING_MICRO)
 				tile_flags |= R300_DEPTHMICROTILE_TILED;
-			else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO_SQUARE)
+			else if (reloc->tiling_flags & RADEON_TILING_MICRO_SQUARE)
 				tile_flags |= R300_DEPTHMICROTILE_TILED_SQUARE;

 			tmp = idx_value & ~(0x7 << 16);
@ -1052,7 +1052,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 			radeon_cs_dump_packet(p, pkt);
 			return r;
 		}
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case 0x4e0c:
 		/* RB3D_COLOR_CHANNEL_MASK */
@ -1097,7 +1097,7 @@ static int r300_packet0_check(struct radeon_cs_parser *p,
 		track->aa.robj = reloc->robj;
 		track->aa.offset = idx_value;
 		track->aa_dirty = true;
-		ib[idx] = idx_value + ((u32)reloc->lobj.gpu_offset);
+		ib[idx] = idx_value + ((u32)reloc->gpu_offset);
 		break;
 	case R300_RB3D_AARESOLVE_PITCH:
 		track->aa.pitch = idx_value & 0x3FFE;
@ -1162,7 +1162,7 @@ static int r300_packet3_check(struct radeon_cs_parser *p,
 			radeon_cs_dump_packet(p, pkt);
 			return r;
 		}
-		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+		ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->gpu_offset);
 		r = r100_cs_track_check_pkt3_indx_buffer(p, pkt, reloc->robj);
 		if (r) {
 			return r;
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@ -1748,11 +1748,9 @@ bool r600_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 	if (!(reset_mask & (RADEON_RESET_GFX |
 			    RADEON_RESET_COMPUTE |
 			    RADEON_RESET_CP))) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force CP activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

@ -2604,8 +2602,6 @@ int r600_cp_resume(struct radeon_device *rdev)
 	WREG32(CP_RB_BASE, ring->gpu_addr >> 8);
 	WREG32(CP_DEBUG, (1 << 27) | (1 << 28));

-	ring->rptr = RREG32(CP_RB_RPTR);
-
 	r600_cp_start(rdev);
 	ring->ready = true;
 	r = radeon_ring_test(rdev, RADEON_RING_TYPE_GFX_INDEX, ring);
--- a/drivers/gpu/drm/radeon/r600_cs.c
+++ b/drivers/gpu/drm/radeon/r600_cs.c
@ -1022,7 +1022,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		break;
 	case SQ_CONFIG:
 		track->sq_config = radeon_get_ib_value(p, idx);
@ -1043,7 +1043,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			track->db_depth_info = radeon_get_ib_value(p, idx);
 			ib[idx] &= C_028010_ARRAY_MODE;
 			track->db_depth_info &= C_028010_ARRAY_MODE;
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
 				ib[idx] |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
 				track->db_depth_info |= S_028010_ARRAY_MODE(V_028010_ARRAY_2D_TILED_THIN1);
 			} else {
@ -1084,9 +1084,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 		}
 		tmp = (reg - VGT_STRMOUT_BUFFER_BASE_0) / 16;
 		track->vgt_strmout_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->vgt_strmout_bo[tmp] = reloc->robj;
-		track->vgt_strmout_bo_mc[tmp] = reloc->lobj.gpu_offset;
+		track->vgt_strmout_bo_mc[tmp] = reloc->gpu_offset;
 		track->streamout_dirty = true;
 		break;
 	case VGT_STRMOUT_BUFFER_SIZE_0:
@ -1105,7 +1105,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		break;
 	case R_028238_CB_TARGET_MASK:
 		track->cb_target_mask = radeon_get_ib_value(p, idx);
@ -1142,10 +1142,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			}
 			tmp = (reg - R_0280A0_CB_COLOR0_INFO) / 4;
 			track->cb_color_info[tmp] = radeon_get_ib_value(p, idx);
-			if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO) {
+			if (reloc->tiling_flags & RADEON_TILING_MACRO) {
 				ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
 				track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_2D_TILED_THIN1);
-			} else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO) {
+			} else if (reloc->tiling_flags & RADEON_TILING_MICRO) {
 				ib[idx] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
 				track->cb_color_info[tmp] |= S_0280A0_ARRAY_MODE(V_0280A0_ARRAY_1D_TILED_THIN1);
 			}
@ -1214,7 +1214,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			}
 			track->cb_color_frag_bo[tmp] = reloc->robj;
 			track->cb_color_frag_offset[tmp] = (u64)ib[idx] << 8;
-			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+			ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		}
 		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
 			track->cb_dirty = true;
@ -1245,7 +1245,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			}
 			track->cb_color_tile_bo[tmp] = reloc->robj;
 			track->cb_color_tile_offset[tmp] = (u64)ib[idx] << 8;
-			ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+			ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		}
 		if (G_0280A0_TILE_MODE(track->cb_color_info[tmp])) {
 			track->cb_dirty = true;
@ -1281,10 +1281,10 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 		}
 		tmp = (reg - CB_COLOR0_BASE) / 4;
 		track->cb_color_bo_offset[tmp] = radeon_get_ib_value(p, idx) << 8;
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->cb_color_base_last[tmp] = ib[idx];
 		track->cb_color_bo[tmp] = reloc->robj;
-		track->cb_color_bo_mc[tmp] = reloc->lobj.gpu_offset;
+		track->cb_color_bo_mc[tmp] = reloc->gpu_offset;
 		track->cb_dirty = true;
 		break;
 	case DB_DEPTH_BASE:
@ -1295,9 +1295,9 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		track->db_offset = radeon_get_ib_value(p, idx) << 8;
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->db_bo = reloc->robj;
-		track->db_bo_mc = reloc->lobj.gpu_offset;
+		track->db_bo_mc = reloc->gpu_offset;
 		track->db_dirty = true;
 		break;
 	case DB_HTILE_DATA_BASE:
@ -1308,7 +1308,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 			return -EINVAL;
 		}
 		track->htile_offset = radeon_get_ib_value(p, idx) << 8;
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		track->htile_bo = reloc->robj;
 		track->db_dirty = true;
 		break;
@ -1377,7 +1377,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		break;
 	case SX_MEMORY_EXPORT_BASE:
 		r = radeon_cs_packet_next_reloc(p, &reloc, r600_nomm);
@ -1386,7 +1386,7 @@ static int r600_cs_check_reg(struct radeon_cs_parser *p, u32 reg, u32 idx)
 					"0x%04X\n", reg);
 			return -EINVAL;
 		}
-		ib[idx] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+		ib[idx] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		break;
 	case SX_MISC:
 		track->sx_misc_kill_all_prims = (radeon_get_ib_value(p, idx) & 0x1) != 0;
@ -1672,7 +1672,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         (idx_value & 0xfffffff0) +
 		         ((u64)(tmp & 0xff) << 32);

@ -1713,7 +1713,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         idx_value +
 		         ((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);

@ -1765,7 +1765,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 				return -EINVAL;
 			}

-			offset = reloc->lobj.gpu_offset +
+			offset = reloc->gpu_offset +
 			         (radeon_get_ib_value(p, idx+1) & 0xfffffff0) +
 			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);

@ -1805,7 +1805,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 			tmp = radeon_get_ib_value(p, idx) +
 				((u64)(radeon_get_ib_value(p, idx+1) & 0xff) << 32);

-			offset = reloc->lobj.gpu_offset + tmp;
+			offset = reloc->gpu_offset + tmp;

 			if ((tmp + size) > radeon_bo_size(reloc->robj)) {
 				dev_warn(p->dev, "CP DMA src buffer too small (%llu %lu)\n",
@ -1835,7 +1835,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 			tmp = radeon_get_ib_value(p, idx+2) +
 				((u64)(radeon_get_ib_value(p, idx+3) & 0xff) << 32);

-			offset = reloc->lobj.gpu_offset + tmp;
+			offset = reloc->gpu_offset + tmp;

 			if ((tmp + size) > radeon_bo_size(reloc->robj)) {
 				dev_warn(p->dev, "CP DMA dst buffer too small (%llu %lu)\n",
@ -1861,7 +1861,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 				DRM_ERROR("bad SURFACE_SYNC\n");
 				return -EINVAL;
 			}
-			ib[idx+2] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+			ib[idx+2] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		}
 		break;
 	case PACKET3_EVENT_WRITE:
@ -1877,7 +1877,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 				DRM_ERROR("bad EVENT_WRITE\n");
 				return -EINVAL;
 			}
-			offset = reloc->lobj.gpu_offset +
+			offset = reloc->gpu_offset +
 			         (radeon_get_ib_value(p, idx+1) & 0xfffffff8) +
 			         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);

@ -1899,7 +1899,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 			return -EINVAL;
 		}

-		offset = reloc->lobj.gpu_offset +
+		offset = reloc->gpu_offset +
 		         (radeon_get_ib_value(p, idx+1) & 0xfffffffc) +
 		         ((u64)(radeon_get_ib_value(p, idx+2) & 0xff) << 32);

@ -1964,11 +1964,11 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					DRM_ERROR("bad SET_RESOURCE\n");
 					return -EINVAL;
 				}
-				base_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				base_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 				if (!(p->cs_flags & RADEON_CS_KEEP_TILING_FLAGS)) {
-					if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
+					if (reloc->tiling_flags & RADEON_TILING_MACRO)
 						ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_2D_TILED_THIN1);
-					else if (reloc->lobj.tiling_flags & RADEON_TILING_MICRO)
+					else if (reloc->tiling_flags & RADEON_TILING_MICRO)
 						ib[idx+1+(i*7)+0] |= S_038000_TILE_MODE(V_038000_ARRAY_1D_TILED_THIN1);
 				}
 				texture = reloc->robj;
@ -1978,13 +1978,13 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					DRM_ERROR("bad SET_RESOURCE\n");
 					return -EINVAL;
 				}
-				mip_offset = (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+				mip_offset = (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 				mipmap = reloc->robj;
 				r = r600_check_texture_resource(p,  idx+(i*7)+1,
 								texture, mipmap,
 								base_offset + radeon_get_ib_value(p, idx+1+(i*7)+2),
 								mip_offset + radeon_get_ib_value(p, idx+1+(i*7)+3),
-								reloc->lobj.tiling_flags);
+								reloc->tiling_flags);
 				if (r)
 					return r;
 				ib[idx+1+(i*7)+2] += base_offset;
@ -2008,7 +2008,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					ib[idx+1+(i*7)+1] = radeon_bo_size(reloc->robj) - offset;
 				}

-				offset64 = reloc->lobj.gpu_offset + offset;
+				offset64 = reloc->gpu_offset + offset;
 				ib[idx+1+(i*8)+0] = offset64;
 				ib[idx+1+(i*8)+2] = (ib[idx+1+(i*8)+2] & 0xffffff00) |
 						    (upper_32_bits(offset64) & 0xff);
@ -2118,7 +2118,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			ib[idx+1] += (u32)((reloc->lobj.gpu_offset >> 8) & 0xffffffff);
+			ib[idx+1] += (u32)((reloc->gpu_offset >> 8) & 0xffffffff);
 		}
 		break;
 	case PACKET3_SURFACE_BASE_UPDATE:
@ -2151,7 +2151,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			offset += reloc->lobj.gpu_offset;
+			offset += reloc->gpu_offset;
 			ib[idx+1] = offset;
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		}
@ -2170,7 +2170,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			offset += reloc->lobj.gpu_offset;
+			offset += reloc->gpu_offset;
 			ib[idx+3] = offset;
 			ib[idx+4] = upper_32_bits(offset) & 0xff;
 		}
@ -2199,7 +2199,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 				  offset + 8, radeon_bo_size(reloc->robj));
 			return -EINVAL;
 		}
-		offset += reloc->lobj.gpu_offset;
+		offset += reloc->gpu_offset;
 		ib[idx+0] = offset;
 		ib[idx+1] = upper_32_bits(offset) & 0xff;
 		break;
@ -2224,7 +2224,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			offset += reloc->lobj.gpu_offset;
+			offset += reloc->gpu_offset;
 			ib[idx+1] = offset;
 			ib[idx+2] = upper_32_bits(offset) & 0xff;
 		} else {
@ -2248,7 +2248,7 @@ static int r600_packet3_check(struct radeon_cs_parser *p,
 					  offset + 4, radeon_bo_size(reloc->robj));
 				return -EINVAL;
 			}
-			offset += reloc->lobj.gpu_offset;
+			offset += reloc->gpu_offset;
 			ib[idx+3] = offset;
 			ib[idx+4] = upper_32_bits(offset) & 0xff;
 		} else {
@ -2505,14 +2505,14 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 				dst_offset = radeon_get_ib_value(p, idx+1);
 				dst_offset <<= 8;

-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
 				p->idx += count + 5;
 			} else {
 				dst_offset = radeon_get_ib_value(p, idx+1);
 				dst_offset |= ((u64)(radeon_get_ib_value(p, idx+2) & 0xff)) << 32;

-				ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-				ib[idx+2] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+				ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+				ib[idx+2] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
 				p->idx += count + 3;
 			}
 			if ((dst_offset + (count * 4)) > radeon_bo_size(dst_reloc->robj)) {
@ -2539,22 +2539,22 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 					/* tiled src, linear dst */
 					src_offset = radeon_get_ib_value(p, idx+1);
 					src_offset <<= 8;
-					ib[idx+1] += (u32)(src_reloc->lobj.gpu_offset >> 8);
+					ib[idx+1] += (u32)(src_reloc->gpu_offset >> 8);

 					dst_offset = radeon_get_ib_value(p, idx+5);
 					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
-					ib[idx+5] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+6] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+5] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+6] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
 				} else {
 					/* linear src, tiled dst */
 					src_offset = radeon_get_ib_value(p, idx+5);
 					src_offset |= ((u64)(radeon_get_ib_value(p, idx+6) & 0xff)) << 32;
-					ib[idx+5] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+6] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+5] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+6] += upper_32_bits(src_reloc->gpu_offset) & 0xff;

 					dst_offset = radeon_get_ib_value(p, idx+1);
 					dst_offset <<= 8;
-					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset >> 8);
+					ib[idx+1] += (u32)(dst_reloc->gpu_offset >> 8);
 				}
 				p->idx += 7;
 			} else {
@ -2564,10 +2564,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 					dst_offset = radeon_get_ib_value(p, idx+1);
 					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff)) << 32;

-					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+3] += upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff;
-					ib[idx+4] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
+					ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+3] += upper_32_bits(dst_reloc->gpu_offset) & 0xff;
+					ib[idx+4] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
 					p->idx += 5;
 				} else {
 					src_offset = radeon_get_ib_value(p, idx+2);
@ -2575,10 +2575,10 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 					dst_offset = radeon_get_ib_value(p, idx+1);
 					dst_offset |= ((u64)(radeon_get_ib_value(p, idx+3) & 0xff0000)) << 16;

-					ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+2] += (u32)(src_reloc->lobj.gpu_offset & 0xfffffffc);
-					ib[idx+3] += upper_32_bits(src_reloc->lobj.gpu_offset) & 0xff;
-					ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) & 0xff) << 16;
+					ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+2] += (u32)(src_reloc->gpu_offset & 0xfffffffc);
+					ib[idx+3] += upper_32_bits(src_reloc->gpu_offset) & 0xff;
+					ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) & 0xff) << 16;
 					p->idx += 4;
 				}
 			}
@ -2610,8 +2610,8 @@ int r600_dma_cs_parse(struct radeon_cs_parser *p)
 					 dst_offset + (count * 4), radeon_bo_size(dst_reloc->robj));
 				return -EINVAL;
 			}
-			ib[idx+1] += (u32)(dst_reloc->lobj.gpu_offset & 0xfffffffc);
-			ib[idx+3] += (upper_32_bits(dst_reloc->lobj.gpu_offset) << 16) & 0x00ff0000;
+			ib[idx+1] += (u32)(dst_reloc->gpu_offset & 0xfffffffc);
+			ib[idx+3] += (upper_32_bits(dst_reloc->gpu_offset) << 16) & 0x00ff0000;
 			p->idx += 4;
 			break;
 		case DMA_PACKET_NOP:
--- a/drivers/gpu/drm/radeon/r600_dma.c
+++ b/drivers/gpu/drm/radeon/r600_dma.c
@ -176,8 +176,6 @@ int r600_dma_resume(struct radeon_device *rdev)
 	ring->wptr = 0;
 	WREG32(DMA_RB_WPTR, ring->wptr << 2);

-	ring->rptr = RREG32(DMA_RB_RPTR) >> 2;
-
 	WREG32(DMA_RB_CNTL, rb_cntl | DMA_RB_ENABLE);

 	ring->ready = true;
@ -221,11 +219,9 @@ bool r600_dma_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 	u32 reset_mask = r600_gpu_check_soft_reset(rdev);

 	if (!(reset_mask & RADEON_RESET_DMA)) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force ring activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

--- a/drivers/gpu/drm/radeon/r600_dpm.c
+++ b/drivers/gpu/drm/radeon/r600_dpm.c
@ -834,6 +834,26 @@ static int r600_parse_clk_voltage_dep_table(struct radeon_clock_voltage_dependen
 	return 0;
 }

+int r600_get_platform_caps(struct radeon_device *rdev)
+{
+	struct radeon_mode_info *mode_info = &rdev->mode_info;
+	union power_info *power_info;
+	int index = GetIndexIntoMasterTable(DATA, PowerPlayInfo);
+        u16 data_offset;
+	u8 frev, crev;
+
+	if (!atom_parse_data_header(mode_info->atom_context, index, NULL,
+				   &frev, &crev, &data_offset))
+		return -EINVAL;
+	power_info = (union power_info *)(mode_info->atom_context->bios + data_offset);
+
+	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
+	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
+	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);
+
+	return 0;
+}
+
 /* sizeof(ATOM_PPLIB_EXTENDEDHEADER) */
 #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V2 12
 #define SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3 14
@ -1043,7 +1063,15 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 				(mode_info->atom_context->bios + data_offset +
 				 le16_to_cpu(ext_hdr->usVCETableOffset) + 1 +
 				 1 + array->ucNumEntries * sizeof(VCEClockInfo));
+			ATOM_PPLIB_VCE_State_Table *states =
+				(ATOM_PPLIB_VCE_State_Table *)
+				(mode_info->atom_context->bios + data_offset +
+				 le16_to_cpu(ext_hdr->usVCETableOffset) + 1 +
+				 1 + (array->ucNumEntries * sizeof (VCEClockInfo)) +
+				 1 + (limits->numEntries * sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record)));
 			ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *entry;
+			ATOM_PPLIB_VCE_State_Record *state_entry;
+			VCEClockInfo *vce_clk;
 			u32 size = limits->numEntries *
 				sizeof(struct radeon_vce_clock_voltage_dependency_entry);
 			rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries =
@ -1055,8 +1083,9 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 			rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.count =
 				limits->numEntries;
 			entry = &limits->entries[0];
+			state_entry = &states->entries[0];
 			for (i = 0; i < limits->numEntries; i++) {
-				VCEClockInfo *vce_clk = (VCEClockInfo *)
+				vce_clk = (VCEClockInfo *)
 					((u8 *)&array->entries[0] +
 					 (entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo)));
 				rdev->pm.dpm.dyn_state.vce_clock_voltage_dependency_table.entries[i].evclk =
@ -1068,6 +1097,23 @@ int r600_parse_extended_power_table(struct radeon_device *rdev)
 				entry = (ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record *)
 					((u8 *)entry + sizeof(ATOM_PPLIB_VCE_Clock_Voltage_Limit_Record));
 			}
+			for (i = 0; i < states->numEntries; i++) {
+				if (i >= RADEON_MAX_VCE_LEVELS)
+					break;
+				vce_clk = (VCEClockInfo *)
+					((u8 *)&array->entries[0] +
+					 (state_entry->ucVCEClockInfoIndex * sizeof(VCEClockInfo)));
+				rdev->pm.dpm.vce_states[i].evclk =
+					le16_to_cpu(vce_clk->usEVClkLow) | (vce_clk->ucEVClkHigh << 16);
+				rdev->pm.dpm.vce_states[i].ecclk =
+					le16_to_cpu(vce_clk->usECClkLow) | (vce_clk->ucECClkHigh << 16);
+				rdev->pm.dpm.vce_states[i].clk_idx =
+					state_entry->ucClockInfoIndex & 0x3f;
+				rdev->pm.dpm.vce_states[i].pstate =
+					(state_entry->ucClockInfoIndex & 0xc0) >> 6;
+				state_entry = (ATOM_PPLIB_VCE_State_Record *)
+					((u8 *)state_entry + sizeof(ATOM_PPLIB_VCE_State_Record));
+			}
 		}
 		if ((le16_to_cpu(ext_hdr->usSize) >= SIZE_OF_ATOM_PPLIB_EXTENDEDHEADER_V3) &&
 			ext_hdr->usUVDTableOffset) {
--- a/drivers/gpu/drm/radeon/r600_dpm.h
+++ b/drivers/gpu/drm/radeon/r600_dpm.h
@ -215,6 +215,8 @@ void r600_stop_dpm(struct radeon_device *rdev);

 bool r600_is_internal_thermal_sensor(enum radeon_int_thermal_type sensor);

+int r600_get_platform_caps(struct radeon_device *rdev);
+
 int r600_parse_extended_power_table(struct radeon_device *rdev);
 void r600_free_extended_power_table(struct radeon_device *rdev);

--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@ -113,19 +113,16 @@ extern int radeon_hard_reset;
 #define RADEONFB_CONN_LIMIT			4
 #define RADEON_BIOS_NUM_SCRATCH			8

-/* max number of rings */
-#define RADEON_NUM_RINGS			6
-
 /* fence seq are set to this number when signaled */
 #define RADEON_FENCE_SIGNALED_SEQ		0LL

 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
-#define RADEON_RING_TYPE_GFX_INDEX	0
+#define RADEON_RING_TYPE_GFX_INDEX		0

 /* cayman has 2 compute CP rings */
-#define CAYMAN_RING_TYPE_CP1_INDEX	1
-#define CAYMAN_RING_TYPE_CP2_INDEX	2
+#define CAYMAN_RING_TYPE_CP1_INDEX		1
+#define CAYMAN_RING_TYPE_CP2_INDEX		2

 /* R600+ has an async dma ring */
 #define R600_RING_TYPE_DMA_INDEX		3
@ -133,7 +130,17 @@ extern int radeon_hard_reset;
 #define CAYMAN_RING_TYPE_DMA1_INDEX		4

 /* R600+ */
-#define R600_RING_TYPE_UVD_INDEX	5
+#define R600_RING_TYPE_UVD_INDEX		5
+
+/* TN+ */
+#define TN_RING_TYPE_VCE1_INDEX			6
+#define TN_RING_TYPE_VCE2_INDEX			7
+
+/* max number of rings */
+#define RADEON_NUM_RINGS			8
+
+/* number of hw syncs before falling back on blocking */
+#define RADEON_NUM_SYNCS			4

 /* number of hw syncs before falling back on blocking */
 #define RADEON_NUM_SYNCS			4
@ -356,9 +363,8 @@ int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, i
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
 int radeon_fence_wait(struct radeon_fence *fence, bool interruptible);
-int radeon_fence_wait_locked(struct radeon_fence *fence);
-int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring);
-int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_next(struct radeon_device *rdev, int ring);
+int radeon_fence_wait_empty(struct radeon_device *rdev, int ring);
 int radeon_fence_wait_any(struct radeon_device *rdev,
 			  struct radeon_fence **fences,
 			  bool intr);
@ -450,6 +456,7 @@ struct radeon_bo {
 	/* Protected by gem.mutex */
 	struct list_head		list;
 	/* Protected by tbo.reserved */
+	u32				initial_domain;
 	u32				placements[3];
 	struct ttm_placement		placement;
 	struct ttm_buffer_object	tbo;
@ -472,16 +479,6 @@ struct radeon_bo {
 };
 #define gem_to_radeon_bo(gobj) container_of((gobj), struct radeon_bo, gem_base)

-struct radeon_bo_list {
-	struct ttm_validate_buffer tv;
-	struct radeon_bo	*bo;
-	uint64_t		gpu_offset;
-	bool			written;
-	unsigned		domain;
-	unsigned		alt_domain;
-	u32			tiling_flags;
-};
-
 int radeon_gem_debugfs_init(struct radeon_device *rdev);

 /* sub-allocation manager, it has to be protected by another lock.
@ -789,7 +786,6 @@ struct radeon_ib {
 struct radeon_ring {
 	struct radeon_bo	*ring_obj;
 	volatile uint32_t	*ring;
-	unsigned		rptr;
 	unsigned		rptr_offs;
 	unsigned		rptr_save_reg;
 	u64			next_rptr_gpu_addr;
@ -799,8 +795,8 @@ struct radeon_ring {
 	unsigned		ring_size;
 	unsigned		ring_free_dw;
 	int			count_dw;
-	unsigned long		last_activity;
-	unsigned		last_rptr;
+	atomic_t		last_rptr;
+	atomic64_t		last_activity;
 	uint64_t		gpu_addr;
 	uint32_t		align_mask;
 	uint32_t		ptr_mask;
@ -852,17 +848,22 @@ struct radeon_mec {
 #define R600_PTE_READABLE	(1 << 5)
 #define R600_PTE_WRITEABLE	(1 << 6)

+struct radeon_vm_pt {
+	struct radeon_bo		*bo;
+	uint64_t			addr;
+};
+
 struct radeon_vm {
-	struct list_head		list;
 	struct list_head		va;
 	unsigned			id;

 	/* contains the page directory */
-	struct radeon_sa_bo		*page_directory;
+	struct radeon_bo		*page_directory;
 	uint64_t			pd_gpu_addr;
+	unsigned			max_pde_used;

 	/* array of page tables, one for each page directory entry */
-	struct radeon_sa_bo		**page_tables;
+	struct radeon_vm_pt		*page_tables;

 	struct mutex			mutex;
 	/* last fence for cs using this vm */
@ -874,10 +875,7 @@ struct radeon_vm {
 };

 struct radeon_vm_manager {
-	struct mutex			lock;
-	struct list_head		lru_vm;
 	struct radeon_fence		*active[RADEON_NUM_VM];
-	struct radeon_sa_manager	sa_manager;
 	uint32_t			max_pfn;
 	/* number of VMIDs */
 	unsigned			nvm;
@ -953,8 +951,8 @@ void radeon_ring_unlock_commit(struct radeon_device *rdev, struct radeon_ring *c
 void radeon_ring_undo(struct radeon_ring *ring);
 void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *cp);
 int radeon_ring_test(struct radeon_device *rdev, struct radeon_ring *cp);
-void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring);
-void radeon_ring_lockup_update(struct radeon_ring *ring);
+void radeon_ring_lockup_update(struct radeon_device *rdev,
+			       struct radeon_ring *ring);
 bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring);
 unsigned radeon_ring_backup(struct radeon_device *rdev, struct radeon_ring *ring,
 			    uint32_t **data);
@ -980,9 +978,12 @@ void cayman_dma_fini(struct radeon_device *rdev);
 struct radeon_cs_reloc {
 	struct drm_gem_object		*gobj;
 	struct radeon_bo		*robj;
-	struct radeon_bo_list		lobj;
+	struct ttm_validate_buffer	tv;
+	uint64_t			gpu_offset;
+	unsigned			domain;
+	unsigned			alt_domain;
+	uint32_t			tiling_flags;
 	uint32_t			handle;
-	uint32_t			flags;
 };

 struct radeon_cs_chunk {
@ -1006,6 +1007,7 @@ struct radeon_cs_parser {
 	unsigned		nrelocs;
 	struct radeon_cs_reloc	*relocs;
 	struct radeon_cs_reloc	**relocs_ptr;
+	struct radeon_cs_reloc	*vm_bos;
 	struct list_head	validated;
 	unsigned		dma_reloc_idx;
 	/* indices of various chunks */
@ -1255,6 +1257,17 @@ enum radeon_dpm_event_src {
 	RADEON_DPM_EVENT_SRC_DIGIAL_OR_EXTERNAL = 4
 };

+#define RADEON_MAX_VCE_LEVELS 6
+
+enum radeon_vce_level {
+	RADEON_VCE_LEVEL_AC_ALL = 0,     /* AC, All cases */
+	RADEON_VCE_LEVEL_DC_EE = 1,      /* DC, entropy encoding */
+	RADEON_VCE_LEVEL_DC_LL_LOW = 2,  /* DC, low latency queue, res <= 720 */
+	RADEON_VCE_LEVEL_DC_LL_HIGH = 3, /* DC, low latency queue, 1080 >= res > 720 */
+	RADEON_VCE_LEVEL_DC_GP_LOW = 4,  /* DC, general purpose queue, res <= 720 */
+	RADEON_VCE_LEVEL_DC_GP_HIGH = 5, /* DC, general purpose queue, 1080 >= res > 720 */
+};
+
 struct radeon_ps {
 	u32 caps; /* vbios flags */
 	u32 class; /* vbios flags */
@ -1265,6 +1278,8 @@ struct radeon_ps {
 	/* VCE clocks */
 	u32 evclk;
 	u32 ecclk;
+	bool vce_active;
+	enum radeon_vce_level vce_level;
 	/* asic priv */
 	void *ps_priv;
 };
@ -1439,6 +1454,17 @@ enum radeon_dpm_forced_level {
 	RADEON_DPM_FORCED_LEVEL_HIGH = 2,
 };

+struct radeon_vce_state {
+	/* vce clocks */
+	u32 evclk;
+	u32 ecclk;
+	/* gpu clocks */
+	u32 sclk;
+	u32 mclk;
+	u8 clk_idx;
+	u8 pstate;
+};
+
 struct radeon_dpm {
 	struct radeon_ps        *ps;
 	/* number of valid power states */
@ -1451,6 +1477,9 @@ struct radeon_dpm {
 	struct radeon_ps        *boot_ps;
 	/* default uvd power state */
 	struct radeon_ps        *uvd_ps;
+	/* vce requirements */
+	struct radeon_vce_state vce_states[RADEON_MAX_VCE_LEVELS];
+	enum radeon_vce_level vce_level;
 	enum radeon_pm_state_type state;
 	enum radeon_pm_state_type user_state;
 	u32                     platform_caps;
@ -1476,6 +1505,7 @@ struct radeon_dpm {
 	/* special states active */
 	bool                    thermal_active;
 	bool                    uvd_active;
+	bool                    vce_active;
 	/* thermal handling */
 	struct radeon_dpm_thermal thermal;
 	/* forced levels */
@ -1486,6 +1516,7 @@ struct radeon_dpm {
 };

 void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable);
+void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable);

 struct radeon_pm {
 	struct mutex		mutex;
@ -1591,6 +1622,45 @@ int radeon_uvd_calc_upll_dividers(struct radeon_device *rdev,
 int radeon_uvd_send_upll_ctlreq(struct radeon_device *rdev,
                                unsigned cg_upll_func_cntl);

+/*
+ * VCE
+ */
+#define RADEON_MAX_VCE_HANDLES	16
+#define RADEON_VCE_STACK_SIZE	(1024*1024)
+#define RADEON_VCE_HEAP_SIZE	(4*1024*1024)
+
+struct radeon_vce {
+	struct radeon_bo	*vcpu_bo;
+	uint64_t		gpu_addr;
+	unsigned		fw_version;
+	unsigned		fb_version;
+	atomic_t		handles[RADEON_MAX_VCE_HANDLES];
+	struct drm_file		*filp[RADEON_MAX_VCE_HANDLES];
+	struct delayed_work	idle_work;
+};
+
+int radeon_vce_init(struct radeon_device *rdev);
+void radeon_vce_fini(struct radeon_device *rdev);
+int radeon_vce_suspend(struct radeon_device *rdev);
+int radeon_vce_resume(struct radeon_device *rdev);
+int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
+			      uint32_t handle, struct radeon_fence **fence);
+int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
+			       uint32_t handle, struct radeon_fence **fence);
+void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp);
+void radeon_vce_note_usage(struct radeon_device *rdev);
+int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi);
+int radeon_vce_cs_parse(struct radeon_cs_parser *p);
+bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
+			       struct radeon_ring *ring,
+			       struct radeon_semaphore *semaphore,
+			       bool emit_wait);
+void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib);
+void radeon_vce_fence_emit(struct radeon_device *rdev,
+			   struct radeon_fence *fence);
+int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
+int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
+
 struct r600_audio_pin {
 	int			channels;
 	int			rate;
@ -1780,6 +1850,7 @@ struct radeon_asic {
 		void (*set_pcie_lanes)(struct radeon_device *rdev, int lanes);
 		void (*set_clock_gating)(struct radeon_device *rdev, int enable);
 		int (*set_uvd_clocks)(struct radeon_device *rdev, u32 vclk, u32 dclk);
+		int (*set_vce_clocks)(struct radeon_device *rdev, u32 evclk, u32 ecclk);
 		int (*get_temperature)(struct radeon_device *rdev);
 	} pm;
 	/* dynamic power management */
@ -2041,6 +2112,8 @@ int radeon_gem_wait_idle_ioctl(struct drm_device *dev, void *data,
 			      struct drm_file *filp);
 int radeon_gem_va_ioctl(struct drm_device *dev, void *data,
 			  struct drm_file *filp);
+int radeon_gem_op_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp);
 int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp);
 int radeon_gem_set_tiling_ioctl(struct drm_device *dev, void *data,
 				struct drm_file *filp);
@ -2186,6 +2259,7 @@ struct radeon_device {
 	struct radeon_gem		gem;
 	struct radeon_pm		pm;
 	struct radeon_uvd		uvd;
+	struct radeon_vce		vce;
 	uint32_t			bios_scratch[RADEON_BIOS_NUM_SCRATCH];
 	struct radeon_wb		wb;
 	struct radeon_dummy_page	dummy_page;
@ -2205,6 +2279,7 @@ struct radeon_device {
 	const struct firmware *sdma_fw;	/* CIK SDMA firmware */
 	const struct firmware *smc_fw;	/* SMC firmware */
 	const struct firmware *uvd_fw;	/* UVD firmware */
+	const struct firmware *vce_fw;	/* VCE firmware */
 	struct r600_vram_scratch vram_scratch;
 	int msi_enabled; /* msi enabled */
 	struct r600_ih ih; /* r6/700 interrupt ring */
@ -2229,6 +2304,10 @@ struct radeon_device {
 	/* virtual memory */
 	struct radeon_vm_manager	vm_manager;
 	struct mutex			gpu_clock_mutex;
+	/* memory stats */
+	atomic64_t			vram_usage;
+	atomic64_t			gtt_usage;
+	atomic64_t			num_bytes_moved;
 	/* ACPI interface */
 	struct radeon_atif		atif;
 	struct radeon_atcs		atcs;
@ -2639,6 +2718,7 @@ void radeon_ring_write(struct radeon_ring *ring, uint32_t v);
 #define radeon_set_pcie_lanes(rdev, l) (rdev)->asic->pm.set_pcie_lanes((rdev), (l))
 #define radeon_set_clock_gating(rdev, e) (rdev)->asic->pm.set_clock_gating((rdev), (e))
 #define radeon_set_uvd_clocks(rdev, v, d) (rdev)->asic->pm.set_uvd_clocks((rdev), (v), (d))
+#define radeon_set_vce_clocks(rdev, ev, ec) (rdev)->asic->pm.set_vce_clocks((rdev), (ev), (ec))
 #define radeon_get_temperature(rdev) (rdev)->asic->pm.get_temperature((rdev))
 #define radeon_set_surface_reg(rdev, r, f, p, o, s) ((rdev)->asic->surface.set_reg((rdev), (r), (f), (p), (o), (s)))
 #define radeon_clear_surface_reg(rdev, r) ((rdev)->asic->surface.clear_reg((rdev), (r)))
@ -2715,16 +2795,22 @@ extern void radeon_program_register_sequence(struct radeon_device *rdev,
 */
 int radeon_vm_manager_init(struct radeon_device *rdev);
 void radeon_vm_manager_fini(struct radeon_device *rdev);
-void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
+int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm);
 void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm);
-int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm);
-void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm);
+struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
+					  struct radeon_vm *vm,
+                                          struct list_head *head);
 struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
 				       struct radeon_vm *vm, int ring);
+void radeon_vm_flush(struct radeon_device *rdev,
+                     struct radeon_vm *vm,
+                     int ring);
 void radeon_vm_fence(struct radeon_device *rdev,
 		     struct radeon_vm *vm,
 		     struct radeon_fence *fence);
 uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr);
+int radeon_vm_update_page_directory(struct radeon_device *rdev,
+				    struct radeon_vm *vm);
 int radeon_vm_bo_update(struct radeon_device *rdev,
 			struct radeon_vm *vm,
 			struct radeon_bo *bo,
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@ -1987,6 +1987,19 @@ static struct radeon_asic_ring ci_dma_ring = {
 	.set_wptr = &cik_sdma_set_wptr,
 };

+static struct radeon_asic_ring ci_vce_ring = {
+	.ib_execute = &radeon_vce_ib_execute,
+	.emit_fence = &radeon_vce_fence_emit,
+	.emit_semaphore = &radeon_vce_semaphore_emit,
+	.cs_parse = &radeon_vce_cs_parse,
+	.ring_test = &radeon_vce_ring_test,
+	.ib_test = &radeon_vce_ib_test,
+	.is_lockup = &radeon_ring_test_lockup,
+	.get_rptr = &vce_v1_0_get_rptr,
+	.get_wptr = &vce_v1_0_get_wptr,
+	.set_wptr = &vce_v1_0_set_wptr,
+};
+
 static struct radeon_asic ci_asic = {
 	.init = &cik_init,
 	.fini = &cik_fini,
@ -2015,6 +2028,8 @@ static struct radeon_asic ci_asic = {
 		[R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring,
 		[CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring,
 		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
+		[TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring,
+		[TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring,
 	},
 	.irq = {
 		.set = &cik_irq_set,
@ -2061,6 +2076,7 @@ static struct radeon_asic ci_asic = {
 		.set_pcie_lanes = NULL,
 		.set_clock_gating = NULL,
 		.set_uvd_clocks = &cik_set_uvd_clocks,
+		.set_vce_clocks = &cik_set_vce_clocks,
 		.get_temperature = &ci_get_temp,
 	},
 	.dpm = {
@ -2117,6 +2133,8 @@ static struct radeon_asic kv_asic = {
 		[R600_RING_TYPE_DMA_INDEX] = &ci_dma_ring,
 		[CAYMAN_RING_TYPE_DMA1_INDEX] = &ci_dma_ring,
 		[R600_RING_TYPE_UVD_INDEX] = &cayman_uvd_ring,
+		[TN_RING_TYPE_VCE1_INDEX] = &ci_vce_ring,
+		[TN_RING_TYPE_VCE2_INDEX] = &ci_vce_ring,
 	},
 	.irq = {
 		.set = &cik_irq_set,
@ -2163,6 +2181,7 @@ static struct radeon_asic kv_asic = {
 		.set_pcie_lanes = NULL,
 		.set_clock_gating = NULL,
 		.set_uvd_clocks = &cik_set_uvd_clocks,
+		.set_vce_clocks = &cik_set_vce_clocks,
 		.get_temperature = &kv_get_temp,
 	},
 	.dpm = {
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@ -717,6 +717,7 @@ u32 cik_get_xclk(struct radeon_device *rdev);
 uint32_t cik_pciep_rreg(struct radeon_device *rdev, uint32_t reg);
 void cik_pciep_wreg(struct radeon_device *rdev, uint32_t reg, uint32_t v);
 int cik_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk);
+int cik_set_vce_clocks(struct radeon_device *rdev, u32 evclk, u32 ecclk);
 void cik_sdma_fence_ring_emit(struct radeon_device *rdev,
 			      struct radeon_fence *fence);
 bool cik_sdma_semaphore_ring_emit(struct radeon_device *rdev,
@ -863,4 +864,17 @@ bool uvd_v3_1_semaphore_emit(struct radeon_device *rdev,
 /* uvd v4.2 */
 int uvd_v4_2_resume(struct radeon_device *rdev);

+/* vce v1.0 */
+uint32_t vce_v1_0_get_rptr(struct radeon_device *rdev,
+			   struct radeon_ring *ring);
+uint32_t vce_v1_0_get_wptr(struct radeon_device *rdev,
+			   struct radeon_ring *ring);
+void vce_v1_0_set_wptr(struct radeon_device *rdev,
+		       struct radeon_ring *ring);
+int vce_v1_0_init(struct radeon_device *rdev);
+int vce_v1_0_start(struct radeon_device *rdev);
+
+/* vce v2.0 */
+int vce_v2_0_resume(struct radeon_device *rdev);
+
 #endif
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@ -24,16 +24,59 @@
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 */
+#include <linux/list_sort.h>
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
 #include "radeon_reg.h"
 #include "radeon.h"
 #include "radeon_trace.h"

+#define RADEON_CS_MAX_PRIORITY		32u
+#define RADEON_CS_NUM_BUCKETS		(RADEON_CS_MAX_PRIORITY + 1)
+
+/* This is based on the bucket sort with O(n) time complexity.
+ * An item with priority "i" is added to bucket[i]. The lists are then
+ * concatenated in descending order.
+ */
+struct radeon_cs_buckets {
+	struct list_head bucket[RADEON_CS_NUM_BUCKETS];
+};
+
+static void radeon_cs_buckets_init(struct radeon_cs_buckets *b)
+{
+	unsigned i;
+
+	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++)
+		INIT_LIST_HEAD(&b->bucket[i]);
+}
+
+static void radeon_cs_buckets_add(struct radeon_cs_buckets *b,
+				  struct list_head *item, unsigned priority)
+{
+	/* Since buffers which appear sooner in the relocation list are
+	 * likely to be used more often than buffers which appear later
+	 * in the list, the sort mustn't change the ordering of buffers
+	 * with the same priority, i.e. it must be stable.
+	 */
+	list_add_tail(item, &b->bucket[min(priority, RADEON_CS_MAX_PRIORITY)]);
+}
+
+static void radeon_cs_buckets_get_list(struct radeon_cs_buckets *b,
+				       struct list_head *out_list)
+{
+	unsigned i;
+
+	/* Connect the sorted buckets in the output list. */
+	for (i = 0; i < RADEON_CS_NUM_BUCKETS; i++) {
+		list_splice(&b->bucket[i], out_list);
+	}
+}
+
 static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 {
 	struct drm_device *ddev = p->rdev->ddev;
 	struct radeon_cs_chunk *chunk;
+	struct radeon_cs_buckets buckets;
 	unsigned i, j;
 	bool duplicate;

@ -52,8 +95,12 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 	if (p->relocs == NULL) {
 		return -ENOMEM;
 	}
+
+	radeon_cs_buckets_init(&buckets);
+
 	for (i = 0; i < p->nrelocs; i++) {
 		struct drm_radeon_cs_reloc *r;
+		unsigned priority;

 		duplicate = false;
 		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
@ -78,8 +125,14 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		}
 		p->relocs_ptr[i] = &p->relocs[i];
 		p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
-		p->relocs[i].lobj.bo = p->relocs[i].robj;
-		p->relocs[i].lobj.written = !!r->write_domain;
+
+		/* The userspace buffer priorities are from 0 to 15. A higher
+		 * number means the buffer is more important.
+		 * Also, the buffers used for write have a higher priority than
+		 * the buffers used for read only, which doubles the range
+		 * to 0 to 31. 32 is reserved for the kernel driver.
+		 */
+		priority = (r->flags & 0xf) * 2 + !!r->write_domain;

 		/* the first reloc of an UVD job is the msg and that must be in
 		   VRAM, also but everything into VRAM on AGP cards to avoid
@ -87,29 +140,38 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
 		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
 			/* TODO: is this still needed for NI+ ? */
-			p->relocs[i].lobj.domain =
+			p->relocs[i].domain =
 				RADEON_GEM_DOMAIN_VRAM;

-			p->relocs[i].lobj.alt_domain =
+			p->relocs[i].alt_domain =
 				RADEON_GEM_DOMAIN_VRAM;

+			/* prioritize this over any other relocation */
+			priority = RADEON_CS_MAX_PRIORITY;
 		} else {
 			uint32_t domain = r->write_domain ?
 				r->write_domain : r->read_domains;

-			p->relocs[i].lobj.domain = domain;
+			p->relocs[i].domain = domain;
 			if (domain == RADEON_GEM_DOMAIN_VRAM)
 				domain |= RADEON_GEM_DOMAIN_GTT;
-			p->relocs[i].lobj.alt_domain = domain;
+			p->relocs[i].alt_domain = domain;
 		}

-		p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
+		p->relocs[i].tv.bo = &p->relocs[i].robj->tbo;
 		p->relocs[i].handle = r->handle;

-		radeon_bo_list_add_object(&p->relocs[i].lobj,
-					  &p->validated);
+		radeon_cs_buckets_add(&buckets, &p->relocs[i].tv.head,
+				      priority);
 	}
-	return radeon_bo_list_validate(&p->ticket, &p->validated, p->ring);
+
+	radeon_cs_buckets_get_list(&buckets, &p->validated);
+
+	if (p->cs_flags & RADEON_CS_USE_VM)
+		p->vm_bos = radeon_vm_get_bos(p->rdev, p->ib.vm,
+					      &p->validated);
+
+	return radeon_bo_list_validate(p->rdev, &p->ticket, &p->validated, p->ring);
 }

 static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
@ -147,6 +209,10 @@ static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority
 	case RADEON_CS_RING_UVD:
 		p->ring = R600_RING_TYPE_UVD_INDEX;
 		break;
+	case RADEON_CS_RING_VCE:
+		/* TODO: only use the low priority ring for now */
+		p->ring = TN_RING_TYPE_VCE1_INDEX;
+		break;
 	}
 	return 0;
 }
@ -286,6 +352,16 @@ int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
 	return 0;
 }

+static int cmp_size_smaller_first(void *priv, struct list_head *a,
+				  struct list_head *b)
+{
+	struct radeon_cs_reloc *la = list_entry(a, struct radeon_cs_reloc, tv.head);
+	struct radeon_cs_reloc *lb = list_entry(b, struct radeon_cs_reloc, tv.head);
+
+	/* Sort A before B if A is smaller. */
+	return (int)la->robj->tbo.num_pages - (int)lb->robj->tbo.num_pages;
+}
+
 /**
 * cs_parser_fini() - clean parser states
 * @parser:	parser structure holding parsing context.
@ -299,6 +375,18 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 	unsigned i;

 	if (!error) {
+		/* Sort the buffer list from the smallest to largest buffer,
+		 * which affects the order of buffers in the LRU list.
+		 * This assures that the smallest buffers are added first
+		 * to the LRU list, so they are likely to be later evicted
+		 * first, instead of large buffers whose eviction is more
+		 * expensive.
+		 *
+		 * This slightly lowers the number of bytes moved by TTM
+		 * per frame under memory pressure.
+		 */
+		list_sort(NULL, &parser->validated, cmp_size_smaller_first);
+
 		ttm_eu_fence_buffer_objects(&parser->ticket,
 					    &parser->validated,
 					    parser->ib.fence);
@ -316,6 +404,7 @@ static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error, bo
 	kfree(parser->track);
 	kfree(parser->relocs);
 	kfree(parser->relocs_ptr);
+	kfree(parser->vm_bos);
 	for (i = 0; i < parser->nchunks; i++)
 		drm_free_large(parser->chunks[i].kdata);
 	kfree(parser->chunks);
@ -343,6 +432,9 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,

 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 		radeon_uvd_note_usage(rdev);
+	else if ((parser->ring == TN_RING_TYPE_VCE1_INDEX) ||
+		 (parser->ring == TN_RING_TYPE_VCE2_INDEX))
+		radeon_vce_note_usage(rdev);

 	radeon_cs_sync_rings(parser);
 	r = radeon_ib_schedule(rdev, &parser->ib, NULL);
@ -352,24 +444,32 @@ static int radeon_cs_ib_chunk(struct radeon_device *rdev,
 	return r;
 }

-static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,
+static int radeon_bo_vm_update_pte(struct radeon_cs_parser *p,
 				   struct radeon_vm *vm)
 {
-	struct radeon_device *rdev = parser->rdev;
-	struct radeon_bo_list *lobj;
-	struct radeon_bo *bo;
-	int r;
+	struct radeon_device *rdev = p->rdev;
+	int i, r;

-	r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem);
-	if (r) {
+	r = radeon_vm_update_page_directory(rdev, vm);
+	if (r)
 		return r;
-	}
-	list_for_each_entry(lobj, &parser->validated, tv.head) {
-		bo = lobj->bo;
-		r = radeon_vm_bo_update(parser->rdev, vm, bo, &bo->tbo.mem);
-		if (r) {
+
+	r = radeon_vm_bo_update(rdev, vm, rdev->ring_tmp_bo.bo,
+				&rdev->ring_tmp_bo.bo->tbo.mem);
+	if (r)
+		return r;
+
+	for (i = 0; i < p->nrelocs; i++) {
+		struct radeon_bo *bo;
+
+		/* ignore duplicates */
+		if (p->relocs_ptr[i] != &p->relocs[i])
+			continue;
+
+		bo = p->relocs[i].robj;
+		r = radeon_vm_bo_update(rdev, vm, bo, &bo->tbo.mem);
+		if (r)
 			return r;
-		}
 	}
 	return 0;
 }
@ -401,20 +501,13 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 	if (parser->ring == R600_RING_TYPE_UVD_INDEX)
 		radeon_uvd_note_usage(rdev);

-	mutex_lock(&rdev->vm_manager.lock);
 	mutex_lock(&vm->mutex);
-	r = radeon_vm_alloc_pt(rdev, vm);
-	if (r) {
-		goto out;
-	}
 	r = radeon_bo_vm_update_pte(parser, vm);
 	if (r) {
 		goto out;
 	}
 	radeon_cs_sync_rings(parser);
 	radeon_semaphore_sync_to(parser->ib.semaphore, vm->fence);
-	radeon_semaphore_sync_to(parser->ib.semaphore,
-				 radeon_vm_grab_id(rdev, vm, parser->ring));

 	if ((rdev->family >= CHIP_TAHITI) &&
 	    (parser->chunk_const_ib_idx != -1)) {
@ -423,14 +516,8 @@ static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
 		r = radeon_ib_schedule(rdev, &parser->ib, NULL);
 	}

-	if (!r) {
-		radeon_vm_fence(rdev, vm, parser->ib.fence);
-	}
-
 out:
-	radeon_vm_add_to_lru(rdev, vm);
 	mutex_unlock(&vm->mutex);
-	mutex_unlock(&rdev->vm_manager.lock);
 	return r;
 }

@ -698,9 +785,9 @@ int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
 	/* FIXME: we assume reloc size is 4 dwords */
 	if (nomm) {
 		*cs_reloc = p->relocs;
-		(*cs_reloc)->lobj.gpu_offset =
+		(*cs_reloc)->gpu_offset =
 			(u64)relocs_chunk->kdata[idx + 3] << 32;
-		(*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
+		(*cs_reloc)->gpu_offset |= relocs_chunk->kdata[idx + 0];
 	} else
 		*cs_reloc = p->relocs_ptr[(idx / 4)];
 	return 0;
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@ -1191,14 +1191,12 @@ int radeon_device_init(struct radeon_device *rdev,
 	r = radeon_gem_init(rdev);
 	if (r)
 		return r;
-	/* initialize vm here */
-	mutex_init(&rdev->vm_manager.lock);
+
 	/* Adjust VM size here.
 	 * Currently set to 4GB ((1 << 20) 4k pages).
 	 * Max GPUVM size for cayman and SI is 40 bits.
 	 */
 	rdev->vm_manager.max_pfn = 1 << 20;
-	INIT_LIST_HEAD(&rdev->vm_manager.lru_vm);

 	/* Set asic functions */
 	r = radeon_asic_init(rdev);
@ -1445,10 +1443,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
 	/* evict vram memory */
 	radeon_bo_evict_vram(rdev);

-	mutex_lock(&rdev->ring_lock);
 	/* wait for gpu to finish processing current batch */
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
-		r = radeon_fence_wait_empty_locked(rdev, i);
+		r = radeon_fence_wait_empty(rdev, i);
 		if (r) {
 			/* delay GPU reset to resume */
 			force_completion = true;
@ -1457,7 +1454,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
 	if (force_completion) {
 		radeon_fence_driver_force_completion(rdev);
 	}
-	mutex_unlock(&rdev->ring_lock);

 	radeon_save_bios_scratch_regs(rdev);

--- a/drivers/gpu/drm/radeon/radeon_drv.c
+++ b/drivers/gpu/drm/radeon/radeon_drv.c
@ -79,9 +79,10 @@
 *   2.35.0 - Add CIK macrotile mode array query
 *   2.36.0 - Fix CIK DCE tiling setup
 *   2.37.0 - allow GS ring setup on r6xx/r7xx
+ *   2.38.0 - RADEON_GEM_OP (GET_INITIAL_DOMAIN, SET_INITIAL_DOMAIN)
 */
 #define KMS_DRIVER_MAJOR	2
-#define KMS_DRIVER_MINOR	37
+#define KMS_DRIVER_MINOR	38
 #define KMS_DRIVER_PATCHLEVEL	0
 int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags);
 int radeon_driver_unload_kms(struct drm_device *dev);
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@ -288,7 +288,6 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 * @rdev: radeon device pointer
 * @target_seq: sequence number(s) we want to wait for
 * @intr: use interruptable sleep
- * @lock_ring: whether the ring should be locked or not
 *
 * Wait for the requested sequence number(s) to be written by any ring
 * (all asics).  Sequnce number array is indexed by ring id.
@ -299,7 +298,7 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 * -EDEADLK is returned when a GPU lockup has been detected.
 */
 static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
-				 bool intr, bool lock_ring)
+				 bool intr)
 {
 	uint64_t last_seq[RADEON_NUM_RINGS];
 	bool signaled;
@ -358,9 +357,6 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
 			if (i != RADEON_NUM_RINGS)
 				continue;

-			if (lock_ring)
-				mutex_lock(&rdev->ring_lock);
-
 			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 				if (!target_seq[i])
 					continue;
@ -378,14 +374,9 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,

 				/* remember that we need an reset */
 				rdev->needs_reset = true;
-				if (lock_ring)
-					mutex_unlock(&rdev->ring_lock);
 				wake_up_all(&rdev->fence_queue);
 				return -EDEADLK;
 			}
-
-			if (lock_ring)
-				mutex_unlock(&rdev->ring_lock);
 		}
 	}
 	return 0;
@ -416,7 +407,7 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 	if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
 		return 0;

-	r = radeon_fence_wait_seq(fence->rdev, seq, intr, true);
+	r = radeon_fence_wait_seq(fence->rdev, seq, intr);
 	if (r)
 		return r;

@ -464,7 +455,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 	if (num_rings == 0)
 		return -ENOENT;

-	r = radeon_fence_wait_seq(rdev, seq, intr, true);
+	r = radeon_fence_wait_seq(rdev, seq, intr);
 	if (r) {
 		return r;
 	}
@ -472,37 +463,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 }

 /**
- * radeon_fence_wait_locked - wait for a fence to signal
- *
- * @fence: radeon fence object
- *
- * Wait for the requested fence to signal (all asics).
- * Returns 0 if the fence has passed, error for all other cases.
- */
-int radeon_fence_wait_locked(struct radeon_fence *fence)
-{
-	uint64_t seq[RADEON_NUM_RINGS] = {};
-	int r;
-
-	if (fence == NULL) {
-		WARN(1, "Querying an invalid fence : %p !\n", fence);
-		return -EINVAL;
-	}
-
-	seq[fence->ring] = fence->seq;
-	if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
-		return 0;
-
-	r = radeon_fence_wait_seq(fence->rdev, seq, false, false);
-	if (r)
-		return r;
-
-	fence->seq = RADEON_FENCE_SIGNALED_SEQ;
-	return 0;
-}
-
-/**
- * radeon_fence_wait_next_locked - wait for the next fence to signal
+ * radeon_fence_wait_next - wait for the next fence to signal
 *
 * @rdev: radeon device pointer
 * @ring: ring index the fence is associated with
@ -511,7 +472,7 @@ int radeon_fence_wait_locked(struct radeon_fence *fence)
 * Returns 0 if the next fence has passed, error for all other cases.
 * Caller must hold ring lock.
 */
-int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};

@ -521,11 +482,11 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 		   already the last emited fence */
 		return -ENOENT;
 	}
-	return radeon_fence_wait_seq(rdev, seq, false, false);
+	return radeon_fence_wait_seq(rdev, seq, false);
 }

 /**
- * radeon_fence_wait_empty_locked - wait for all fences to signal
+ * radeon_fence_wait_empty - wait for all fences to signal
 *
 * @rdev: radeon device pointer
 * @ring: ring index the fence is associated with
@ -534,7 +495,7 @@ int radeon_fence_wait_next_locked(struct radeon_device *rdev, int ring)
 * Returns 0 if the fences have passed, error for all other cases.
 * Caller must hold ring lock.
 */
-int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
+int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
 	int r;
@ -543,7 +504,7 @@ int radeon_fence_wait_empty_locked(struct radeon_device *rdev, int ring)
 	if (!seq[ring])
 		return 0;

-	r = radeon_fence_wait_seq(rdev, seq, false, false);
+	r = radeon_fence_wait_seq(rdev, seq, false);
 	if (r) {
 		if (r == -EDEADLK)
 			return -EDEADLK;
@ -794,7 +755,7 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
 		if (!rdev->fence_drv[ring].initialized)
 			continue;
-		r = radeon_fence_wait_empty_locked(rdev, ring);
+		r = radeon_fence_wait_empty(rdev, ring);
 		if (r) {
 			/* no need to trigger GPU reset as we are unloading */
 			radeon_fence_driver_force_completion(rdev);
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@ -28,8 +28,6 @@
 #include <drm/drmP.h>
 #include <drm/radeon_drm.h>
 #include "radeon.h"
-#include "radeon_reg.h"
-#include "radeon_trace.h"

 /*
 * GART
@ -394,959 +392,3 @@ void radeon_gart_fini(struct radeon_device *rdev)

 	radeon_dummy_page_fini(rdev);
 }
-
-/*
- * GPUVM
- * GPUVM is similar to the legacy gart on older asics, however
- * rather than there being a single global gart table
- * for the entire GPU, there are multiple VM page tables active
- * at any given time.  The VM page tables can contain a mix
- * vram pages and system memory pages and system memory pages
- * can be mapped as snooped (cached system pages) or unsnooped
- * (uncached system pages).
- * Each VM has an ID associated with it and there is a page table
- * associated with each VMID.  When execting a command buffer,
- * the kernel tells the the ring what VMID to use for that command
- * buffer.  VMIDs are allocated dynamically as commands are submitted.
- * The userspace drivers maintain their own address space and the kernel
- * sets up their pages tables accordingly when they submit their
- * command buffers and a VMID is assigned.
- * Cayman/Trinity support up to 8 active VMs at any given time;
- * SI supports 16.
- */
-
-/*
- * vm helpers
- *
- * TODO bind a default page at vm initialization for default address
- */
-
-/**
- * radeon_vm_num_pde - return the number of page directory entries
- *
- * @rdev: radeon_device pointer
- *
- * Calculate the number of page directory entries (cayman+).
- */
-static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
-{
-	return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
-}
-
-/**
- * radeon_vm_directory_size - returns the size of the page directory in bytes
- *
- * @rdev: radeon_device pointer
- *
- * Calculate the size of the page directory in bytes (cayman+).
- */
-static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
-{
-	return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
-}
-
-/**
- * radeon_vm_manager_init - init the vm manager
- *
- * @rdev: radeon_device pointer
- *
- * Init the vm manager (cayman+).
- * Returns 0 for success, error for failure.
- */
-int radeon_vm_manager_init(struct radeon_device *rdev)
-{
-	struct radeon_vm *vm;
-	struct radeon_bo_va *bo_va;
-	int r;
-	unsigned size;
-
-	if (!rdev->vm_manager.enabled) {
-		/* allocate enough for 2 full VM pts */
-		size = radeon_vm_directory_size(rdev);
-		size += rdev->vm_manager.max_pfn * 8;
-		size *= 2;
-		r = radeon_sa_bo_manager_init(rdev, &rdev->vm_manager.sa_manager,
-					      RADEON_GPU_PAGE_ALIGN(size),
-					      RADEON_VM_PTB_ALIGN_SIZE,
-					      RADEON_GEM_DOMAIN_VRAM);
-		if (r) {
-			dev_err(rdev->dev, "failed to allocate vm bo (%dKB)\n",
-				(rdev->vm_manager.max_pfn * 8) >> 10);
-			return r;
-		}
-
-		r = radeon_asic_vm_init(rdev);
-		if (r)
-			return r;
-
-		rdev->vm_manager.enabled = true;
-
-		r = radeon_sa_bo_manager_start(rdev, &rdev->vm_manager.sa_manager);
-		if (r)
-			return r;
-	}
-
-	/* restore page table */
-	list_for_each_entry(vm, &rdev->vm_manager.lru_vm, list) {
-		if (vm->page_directory == NULL)
-			continue;
-
-		list_for_each_entry(bo_va, &vm->va, vm_list) {
-			bo_va->valid = false;
-		}
-	}
-	return 0;
-}
-
-/**
- * radeon_vm_free_pt - free the page table for a specific vm
- *
- * @rdev: radeon_device pointer
- * @vm: vm to unbind
- *
- * Free the page table of a specific vm (cayman+).
- *
- * Global and local mutex must be lock!
- */
-static void radeon_vm_free_pt(struct radeon_device *rdev,
-				    struct radeon_vm *vm)
-{
-	struct radeon_bo_va *bo_va;
-	int i;
-
-	if (!vm->page_directory)
-		return;
-
-	list_del_init(&vm->list);
-	radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
-
-	list_for_each_entry(bo_va, &vm->va, vm_list) {
-		bo_va->valid = false;
-	}
-
-	if (vm->page_tables == NULL)
-		return;
-
-	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
-		radeon_sa_bo_free(rdev, &vm->page_tables[i], vm->fence);
-
-	kfree(vm->page_tables);
-}
-
-/**
- * radeon_vm_manager_fini - tear down the vm manager
- *
- * @rdev: radeon_device pointer
- *
- * Tear down the VM manager (cayman+).
- */
-void radeon_vm_manager_fini(struct radeon_device *rdev)
-{
-	struct radeon_vm *vm, *tmp;
-	int i;
-
-	if (!rdev->vm_manager.enabled)
-		return;
-
-	mutex_lock(&rdev->vm_manager.lock);
-	/* free all allocated page tables */
-	list_for_each_entry_safe(vm, tmp, &rdev->vm_manager.lru_vm, list) {
-		mutex_lock(&vm->mutex);
-		radeon_vm_free_pt(rdev, vm);
-		mutex_unlock(&vm->mutex);
-	}
-	for (i = 0; i < RADEON_NUM_VM; ++i) {
-		radeon_fence_unref(&rdev->vm_manager.active[i]);
-	}
-	radeon_asic_vm_fini(rdev);
-	mutex_unlock(&rdev->vm_manager.lock);
-
-	radeon_sa_bo_manager_suspend(rdev, &rdev->vm_manager.sa_manager);
-	radeon_sa_bo_manager_fini(rdev, &rdev->vm_manager.sa_manager);
-	rdev->vm_manager.enabled = false;
-}
-
-/**
- * radeon_vm_evict - evict page table to make room for new one
- *
- * @rdev: radeon_device pointer
- * @vm: VM we want to allocate something for
- *
- * Evict a VM from the lru, making sure that it isn't @vm. (cayman+).
- * Returns 0 for success, -ENOMEM for failure.
- *
- * Global and local mutex must be locked!
- */
-static int radeon_vm_evict(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-	struct radeon_vm *vm_evict;
-
-	if (list_empty(&rdev->vm_manager.lru_vm))
-		return -ENOMEM;
-
-	vm_evict = list_first_entry(&rdev->vm_manager.lru_vm,
-				    struct radeon_vm, list);
-	if (vm_evict == vm)
-		return -ENOMEM;
-
-	mutex_lock(&vm_evict->mutex);
-	radeon_vm_free_pt(rdev, vm_evict);
-	mutex_unlock(&vm_evict->mutex);
-	return 0;
-}
-
-/**
- * radeon_vm_alloc_pt - allocates a page table for a VM
- *
- * @rdev: radeon_device pointer
- * @vm: vm to bind
- *
- * Allocate a page table for the requested vm (cayman+).
- * Returns 0 for success, error for failure.
- *
- * Global and local mutex must be locked!
- */
-int radeon_vm_alloc_pt(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-	unsigned pd_size, pd_entries, pts_size;
-	struct radeon_ib ib;
-	int r;
-
-	if (vm == NULL) {
-		return -EINVAL;
-	}
-
-	if (vm->page_directory != NULL) {
-		return 0;
-	}
-
-	pd_size = radeon_vm_directory_size(rdev);
-	pd_entries = radeon_vm_num_pdes(rdev);
-
-retry:
-	r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
-			     &vm->page_directory, pd_size,
-			     RADEON_VM_PTB_ALIGN_SIZE, false);
-	if (r == -ENOMEM) {
-		r = radeon_vm_evict(rdev, vm);
-		if (r)
-			return r;
-		goto retry;
-
-	} else if (r) {
-		return r;
-	}
-
-	vm->pd_gpu_addr = radeon_sa_bo_gpu_addr(vm->page_directory);
-
-	/* Initially clear the page directory */
-	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
-			  NULL, pd_entries * 2 + 64);
-	if (r) {
-		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
-		return r;
-	}
-
-	ib.length_dw = 0;
-
-	radeon_asic_vm_set_page(rdev, &ib, vm->pd_gpu_addr,
-				0, pd_entries, 0, 0);
-
-	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
-	r = radeon_ib_schedule(rdev, &ib, NULL);
-	if (r) {
-		radeon_ib_free(rdev, &ib);
-		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
-		return r;
-	}
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(ib.fence);
-	radeon_ib_free(rdev, &ib);
-	radeon_fence_unref(&vm->last_flush);
-
-	/* allocate page table array */
-	pts_size = radeon_vm_num_pdes(rdev) * sizeof(struct radeon_sa_bo *);
-	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
-
-	if (vm->page_tables == NULL) {
-		DRM_ERROR("Cannot allocate memory for page table array\n");
-		radeon_sa_bo_free(rdev, &vm->page_directory, vm->fence);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-/**
- * radeon_vm_add_to_lru - add VMs page table to LRU list
- *
- * @rdev: radeon_device pointer
- * @vm: vm to add to LRU
- *
- * Add the allocated page table to the LRU list (cayman+).
- *
- * Global mutex must be locked!
- */
-void radeon_vm_add_to_lru(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-	list_del_init(&vm->list);
-	list_add_tail(&vm->list, &rdev->vm_manager.lru_vm);
-}
-
-/**
- * radeon_vm_grab_id - allocate the next free VMID
- *
- * @rdev: radeon_device pointer
- * @vm: vm to allocate id for
- * @ring: ring we want to submit job to
- *
- * Allocate an id for the vm (cayman+).
- * Returns the fence we need to sync to (if any).
- *
- * Global and local mutex must be locked!
- */
-struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
-				       struct radeon_vm *vm, int ring)
-{
-	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
-	unsigned choices[2] = {};
-	unsigned i;
-
-	/* check if the id is still valid */
-	if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
-		return NULL;
-
-	/* we definately need to flush */
-	radeon_fence_unref(&vm->last_flush);
-
-	/* skip over VMID 0, since it is the system VM */
-	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
-		struct radeon_fence *fence = rdev->vm_manager.active[i];
-
-		if (fence == NULL) {
-			/* found a free one */
-			vm->id = i;
-			trace_radeon_vm_grab_id(vm->id, ring);
-			return NULL;
-		}
-
-		if (radeon_fence_is_earlier(fence, best[fence->ring])) {
-			best[fence->ring] = fence;
-			choices[fence->ring == ring ? 0 : 1] = i;
-		}
-	}
-
-	for (i = 0; i < 2; ++i) {
-		if (choices[i]) {
-			vm->id = choices[i];
-			trace_radeon_vm_grab_id(vm->id, ring);
-			return rdev->vm_manager.active[choices[i]];
-		}
-	}
-
-	/* should never happen */
-	BUG();
-	return NULL;
-}
-
-/**
- * radeon_vm_fence - remember fence for vm
- *
- * @rdev: radeon_device pointer
- * @vm: vm we want to fence
- * @fence: fence to remember
- *
- * Fence the vm (cayman+).
- * Set the fence used to protect page table and id.
- *
- * Global and local mutex must be locked!
- */
-void radeon_vm_fence(struct radeon_device *rdev,
-		     struct radeon_vm *vm,
-		     struct radeon_fence *fence)
-{
-	radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
-	rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
-
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(fence);
-
-	radeon_fence_unref(&vm->last_id_use);
-	vm->last_id_use = radeon_fence_ref(fence);
-}
-
-/**
- * radeon_vm_bo_find - find the bo_va for a specific vm & bo
- *
- * @vm: requested vm
- * @bo: requested buffer object
- *
- * Find @bo inside the requested vm (cayman+).
- * Search inside the @bos vm list for the requested vm
- * Returns the found bo_va or NULL if none is found
- *
- * Object has to be reserved!
- */
-struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
-				       struct radeon_bo *bo)
-{
-	struct radeon_bo_va *bo_va;
-
-	list_for_each_entry(bo_va, &bo->va, bo_list) {
-		if (bo_va->vm == vm) {
-			return bo_va;
-		}
-	}
-	return NULL;
-}
-
-/**
- * radeon_vm_bo_add - add a bo to a specific vm
- *
- * @rdev: radeon_device pointer
- * @vm: requested vm
- * @bo: radeon buffer object
- *
- * Add @bo into the requested vm (cayman+).
- * Add @bo to the list of bos associated with the vm
- * Returns newly added bo_va or NULL for failure
- *
- * Object has to be reserved!
- */
-struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
-				      struct radeon_vm *vm,
-				      struct radeon_bo *bo)
-{
-	struct radeon_bo_va *bo_va;
-
-	bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
-	if (bo_va == NULL) {
-		return NULL;
-	}
-	bo_va->vm = vm;
-	bo_va->bo = bo;
-	bo_va->soffset = 0;
-	bo_va->eoffset = 0;
-	bo_va->flags = 0;
-	bo_va->valid = false;
-	bo_va->ref_count = 1;
-	INIT_LIST_HEAD(&bo_va->bo_list);
-	INIT_LIST_HEAD(&bo_va->vm_list);
-
-	mutex_lock(&vm->mutex);
-	list_add(&bo_va->vm_list, &vm->va);
-	list_add_tail(&bo_va->bo_list, &bo->va);
-	mutex_unlock(&vm->mutex);
-
-	return bo_va;
-}
-
-/**
- * radeon_vm_bo_set_addr - set bos virtual address inside a vm
- *
- * @rdev: radeon_device pointer
- * @bo_va: bo_va to store the address
- * @soffset: requested offset of the buffer in the VM address space
- * @flags: attributes of pages (read/write/valid/etc.)
- *
- * Set offset of @bo_va (cayman+).
- * Validate and set the offset requested within the vm address space.
- * Returns 0 for success, error for failure.
- *
- * Object has to be reserved!
- */
-int radeon_vm_bo_set_addr(struct radeon_device *rdev,
-			  struct radeon_bo_va *bo_va,
-			  uint64_t soffset,
-			  uint32_t flags)
-{
-	uint64_t size = radeon_bo_size(bo_va->bo);
-	uint64_t eoffset, last_offset = 0;
-	struct radeon_vm *vm = bo_va->vm;
-	struct radeon_bo_va *tmp;
-	struct list_head *head;
-	unsigned last_pfn;
-
-	if (soffset) {
-		/* make sure object fit at this offset */
-		eoffset = soffset + size;
-		if (soffset >= eoffset) {
-			return -EINVAL;
-		}
-
-		last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
-		if (last_pfn > rdev->vm_manager.max_pfn) {
-			dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
-				last_pfn, rdev->vm_manager.max_pfn);
-			return -EINVAL;
-		}
-
-	} else {
-		eoffset = last_pfn = 0;
-	}
-
-	mutex_lock(&vm->mutex);
-	head = &vm->va;
-	last_offset = 0;
-	list_for_each_entry(tmp, &vm->va, vm_list) {
-		if (bo_va == tmp) {
-			/* skip over currently modified bo */
-			continue;
-		}
-
-		if (soffset >= last_offset && eoffset <= tmp->soffset) {
-			/* bo can be added before this one */
-			break;
-		}
-		if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
-			/* bo and tmp overlap, invalid offset */
-			dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
-				bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
-				(unsigned)tmp->soffset, (unsigned)tmp->eoffset);
-			mutex_unlock(&vm->mutex);
-			return -EINVAL;
-		}
-		last_offset = tmp->eoffset;
-		head = &tmp->vm_list;
-	}
-
-	bo_va->soffset = soffset;
-	bo_va->eoffset = eoffset;
-	bo_va->flags = flags;
-	bo_va->valid = false;
-	list_move(&bo_va->vm_list, head);
-
-	mutex_unlock(&vm->mutex);
-	return 0;
-}
-
-/**
- * radeon_vm_map_gart - get the physical address of a gart page
- *
- * @rdev: radeon_device pointer
- * @addr: the unmapped addr
- *
- * Look up the physical address of the page that the pte resolves
- * to (cayman+).
- * Returns the physical address of the page.
- */
-uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
-{
-	uint64_t result;
-
-	/* page table offset */
-	result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
-
-	/* in case cpu page size != gpu page size*/
-	result |= addr & (~PAGE_MASK);
-
-	return result;
-}
-
-/**
- * radeon_vm_page_flags - translate page flags to what the hw uses
- *
- * @flags: flags comming from userspace
- *
- * Translate the flags the userspace ABI uses to hw flags.
- */
-static uint32_t radeon_vm_page_flags(uint32_t flags)
-{
-        uint32_t hw_flags = 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
-        hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
-        if (flags & RADEON_VM_PAGE_SYSTEM) {
-                hw_flags |= R600_PTE_SYSTEM;
-                hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
-        }
-        return hw_flags;
-}
-
-/**
- * radeon_vm_update_pdes - make sure that page directory is valid
- *
- * @rdev: radeon_device pointer
- * @vm: requested vm
- * @start: start of GPU address range
- * @end: end of GPU address range
- *
- * Allocates new page tables if necessary
- * and updates the page directory (cayman+).
- * Returns 0 for success, error for failure.
- *
- * Global and local mutex must be locked!
- */
-static int radeon_vm_update_pdes(struct radeon_device *rdev,
-				 struct radeon_vm *vm,
-				 struct radeon_ib *ib,
-				 uint64_t start, uint64_t end)
-{
-	static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
-
-	uint64_t last_pde = ~0, last_pt = ~0;
-	unsigned count = 0;
-	uint64_t pt_idx;
-	int r;
-
-	start = (start / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
-	end = (end / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
-
-	/* walk over the address space and update the page directory */
-	for (pt_idx = start; pt_idx <= end; ++pt_idx) {
-		uint64_t pde, pt;
-
-		if (vm->page_tables[pt_idx])
-			continue;
-
-retry:
-		r = radeon_sa_bo_new(rdev, &rdev->vm_manager.sa_manager,
-				     &vm->page_tables[pt_idx],
-				     RADEON_VM_PTE_COUNT * 8,
-				     RADEON_GPU_PAGE_SIZE, false);
-
-		if (r == -ENOMEM) {
-			r = radeon_vm_evict(rdev, vm);
-			if (r)
-				return r;
-			goto retry;
-		} else if (r) {
-			return r;
-		}
-
-		pde = vm->pd_gpu_addr + pt_idx * 8;
-
-		pt = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
-
-		if (((last_pde + 8 * count) != pde) ||
-		    ((last_pt + incr * count) != pt)) {
-
-			if (count) {
-				radeon_asic_vm_set_page(rdev, ib, last_pde,
-							last_pt, count, incr,
-							R600_PTE_VALID);
-
-				count *= RADEON_VM_PTE_COUNT;
-				radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
-							count, 0, 0);
-			}
-
-			count = 1;
-			last_pde = pde;
-			last_pt = pt;
-		} else {
-			++count;
-		}
-	}
-
-	if (count) {
-		radeon_asic_vm_set_page(rdev, ib, last_pde, last_pt, count,
-					incr, R600_PTE_VALID);
-
-		count *= RADEON_VM_PTE_COUNT;
-		radeon_asic_vm_set_page(rdev, ib, last_pt, 0,
-					count, 0, 0);
-	}
-
-	return 0;
-}
-
-/**
- * radeon_vm_update_ptes - make sure that page tables are valid
- *
- * @rdev: radeon_device pointer
- * @vm: requested vm
- * @start: start of GPU address range
- * @end: end of GPU address range
- * @dst: destination address to map to
- * @flags: mapping flags
- *
- * Update the page tables in the range @start - @end (cayman+).
- *
- * Global and local mutex must be locked!
- */
-static void radeon_vm_update_ptes(struct radeon_device *rdev,
-				  struct radeon_vm *vm,
-				  struct radeon_ib *ib,
-				  uint64_t start, uint64_t end,
-				  uint64_t dst, uint32_t flags)
-{
-	static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
-
-	uint64_t last_pte = ~0, last_dst = ~0;
-	unsigned count = 0;
-	uint64_t addr;
-
-	start = start / RADEON_GPU_PAGE_SIZE;
-	end = end / RADEON_GPU_PAGE_SIZE;
-
-	/* walk over the address space and update the page tables */
-	for (addr = start; addr < end; ) {
-		uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
-		unsigned nptes;
-		uint64_t pte;
-
-		if ((addr & ~mask) == (end & ~mask))
-			nptes = end - addr;
-		else
-			nptes = RADEON_VM_PTE_COUNT - (addr & mask);
-
-		pte = radeon_sa_bo_gpu_addr(vm->page_tables[pt_idx]);
-		pte += (addr & mask) * 8;
-
-		if ((last_pte + 8 * count) != pte) {
-
-			if (count) {
-				radeon_asic_vm_set_page(rdev, ib, last_pte,
-							last_dst, count,
-							RADEON_GPU_PAGE_SIZE,
-							flags);
-			}
-
-			count = nptes;
-			last_pte = pte;
-			last_dst = dst;
-		} else {
-			count += nptes;
-		}
-
-		addr += nptes;
-		dst += nptes * RADEON_GPU_PAGE_SIZE;
-	}
-
-	if (count) {
-		radeon_asic_vm_set_page(rdev, ib, last_pte,
-					last_dst, count,
-					RADEON_GPU_PAGE_SIZE, flags);
-	}
-}
-
-/**
- * radeon_vm_bo_update - map a bo into the vm page table
- *
- * @rdev: radeon_device pointer
- * @vm: requested vm
- * @bo: radeon buffer object
- * @mem: ttm mem
- *
- * Fill in the page table entries for @bo (cayman+).
- * Returns 0 for success, -EINVAL for failure.
- *
- * Object have to be reserved & global and local mutex must be locked!
- */
-int radeon_vm_bo_update(struct radeon_device *rdev,
-			struct radeon_vm *vm,
-			struct radeon_bo *bo,
-			struct ttm_mem_reg *mem)
-{
-	struct radeon_ib ib;
-	struct radeon_bo_va *bo_va;
-	unsigned nptes, npdes, ndw;
-	uint64_t addr;
-	int r;
-
-	/* nothing to do if vm isn't bound */
-	if (vm->page_directory == NULL)
-		return 0;
-
-	bo_va = radeon_vm_bo_find(vm, bo);
-	if (bo_va == NULL) {
-		dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
-		return -EINVAL;
-	}
-
-	if (!bo_va->soffset) {
-		dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
-			bo, vm);
-		return -EINVAL;
-	}
-
-	if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
-		return 0;
-
-	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
-	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
-	if (mem) {
-		addr = mem->start << PAGE_SHIFT;
-		if (mem->mem_type != TTM_PL_SYSTEM) {
-			bo_va->flags |= RADEON_VM_PAGE_VALID;
-			bo_va->valid = true;
-		}
-		if (mem->mem_type == TTM_PL_TT) {
-			bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
-		} else {
-			addr += rdev->vm_manager.vram_base_offset;
-		}
-	} else {
-		addr = 0;
-		bo_va->valid = false;
-	}
-
-	trace_radeon_vm_bo_update(bo_va);
-
-	nptes = radeon_bo_ngpu_pages(bo);
-
-	/* assume two extra pdes in case the mapping overlaps the borders */
-	npdes = (nptes >> RADEON_VM_BLOCK_SIZE) + 2;
-
-	/* padding, etc. */
-	ndw = 64;
-
-	if (RADEON_VM_BLOCK_SIZE > 11)
-		/* reserve space for one header for every 2k dwords */
-		ndw += (nptes >> 11) * 4;
-	else
-		/* reserve space for one header for
-		    every (1 << BLOCK_SIZE) entries */
-		ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
-
-	/* reserve space for pte addresses */
-	ndw += nptes * 2;
-
-	/* reserve space for one header for every 2k dwords */
-	ndw += (npdes >> 11) * 4;
-
-	/* reserve space for pde addresses */
-	ndw += npdes * 2;
-
-	/* reserve space for clearing new page tables */
-	ndw += npdes * 2 * RADEON_VM_PTE_COUNT;
-
-	/* update too big for an IB */
-	if (ndw > 0xfffff)
-		return -ENOMEM;
-
-	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
-	if (r)
-		return r;
-	ib.length_dw = 0;
-
-	r = radeon_vm_update_pdes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset);
-	if (r) {
-		radeon_ib_free(rdev, &ib);
-		return r;
-	}
-
-	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
-			      addr, radeon_vm_page_flags(bo_va->flags));
-
-	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
-	r = radeon_ib_schedule(rdev, &ib, NULL);
-	if (r) {
-		radeon_ib_free(rdev, &ib);
-		return r;
-	}
-	radeon_fence_unref(&vm->fence);
-	vm->fence = radeon_fence_ref(ib.fence);
-	radeon_ib_free(rdev, &ib);
-	radeon_fence_unref(&vm->last_flush);
-
-	return 0;
-}
-
-/**
- * radeon_vm_bo_rmv - remove a bo to a specific vm
- *
- * @rdev: radeon_device pointer
- * @bo_va: requested bo_va
- *
- * Remove @bo_va->bo from the requested vm (cayman+).
- * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
- * remove the ptes for @bo_va in the page table.
- * Returns 0 for success.
- *
- * Object have to be reserved!
- */
-int radeon_vm_bo_rmv(struct radeon_device *rdev,
-		     struct radeon_bo_va *bo_va)
-{
-	int r = 0;
-
-	mutex_lock(&rdev->vm_manager.lock);
-	mutex_lock(&bo_va->vm->mutex);
-	if (bo_va->soffset) {
-		r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
-	}
-	mutex_unlock(&rdev->vm_manager.lock);
-	list_del(&bo_va->vm_list);
-	mutex_unlock(&bo_va->vm->mutex);
-	list_del(&bo_va->bo_list);
-
-	kfree(bo_va);
-	return r;
-}
-
-/**
- * radeon_vm_bo_invalidate - mark the bo as invalid
- *
- * @rdev: radeon_device pointer
- * @vm: requested vm
- * @bo: radeon buffer object
- *
- * Mark @bo as invalid (cayman+).
- */
-void radeon_vm_bo_invalidate(struct radeon_device *rdev,
-			     struct radeon_bo *bo)
-{
-	struct radeon_bo_va *bo_va;
-
-	list_for_each_entry(bo_va, &bo->va, bo_list) {
-		bo_va->valid = false;
-	}
-}
-
-/**
- * radeon_vm_init - initialize a vm instance
- *
- * @rdev: radeon_device pointer
- * @vm: requested vm
- *
- * Init @vm fields (cayman+).
- */
-void radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-	vm->id = 0;
-	vm->fence = NULL;
-	vm->last_flush = NULL;
-	vm->last_id_use = NULL;
-	mutex_init(&vm->mutex);
-	INIT_LIST_HEAD(&vm->list);
-	INIT_LIST_HEAD(&vm->va);
-}
-
-/**
- * radeon_vm_fini - tear down a vm instance
- *
- * @rdev: radeon_device pointer
- * @vm: requested vm
- *
- * Tear down @vm (cayman+).
- * Unbind the VM and remove all bos from the vm bo list
- */
-void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
-{
-	struct radeon_bo_va *bo_va, *tmp;
-	int r;
-
-	mutex_lock(&rdev->vm_manager.lock);
-	mutex_lock(&vm->mutex);
-	radeon_vm_free_pt(rdev, vm);
-	mutex_unlock(&rdev->vm_manager.lock);
-
-	if (!list_empty(&vm->va)) {
-		dev_err(rdev->dev, "still active bo inside vm\n");
-	}
-	list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
-		list_del_init(&bo_va->vm_list);
-		r = radeon_bo_reserve(bo_va->bo, false);
-		if (!r) {
-			list_del_init(&bo_va->bo_list);
-			radeon_bo_unreserve(bo_va->bo);
-			kfree(bo_va);
-		}
-	}
-	radeon_fence_unref(&vm->fence);
-	radeon_fence_unref(&vm->last_flush);
-	radeon_fence_unref(&vm->last_id_use);
-	mutex_unlock(&vm->mutex);
-}
--- a/drivers/gpu/drm/radeon/radeon_gem.c
+++ b/drivers/gpu/drm/radeon/radeon_gem.c
@ -344,18 +344,7 @@ int radeon_gem_busy_ioctl(struct drm_device *dev, void *data,
 	}
 	robj = gem_to_radeon_bo(gobj);
 	r = radeon_bo_wait(robj, &cur_placement, true);
-	switch (cur_placement) {
-	case TTM_PL_VRAM:
-		args->domain = RADEON_GEM_DOMAIN_VRAM;
-		break;
-	case TTM_PL_TT:
-		args->domain = RADEON_GEM_DOMAIN_GTT;
-		break;
-	case TTM_PL_SYSTEM:
-		args->domain = RADEON_GEM_DOMAIN_CPU;
-	default:
-		break;
-	}
+	args->domain = radeon_mem_type_to_domain(cur_placement);
 	drm_gem_object_unreference_unlocked(gobj);
 	r = radeon_gem_handle_lockup(rdev, r);
 	return r;
@ -533,6 +522,42 @@ out:
 	return r;
 }

+int radeon_gem_op_ioctl(struct drm_device *dev, void *data,
+			struct drm_file *filp)
+{
+	struct drm_radeon_gem_op *args = data;
+	struct drm_gem_object *gobj;
+	struct radeon_bo *robj;
+	int r;
+
+	gobj = drm_gem_object_lookup(dev, filp, args->handle);
+	if (gobj == NULL) {
+		return -ENOENT;
+	}
+	robj = gem_to_radeon_bo(gobj);
+	r = radeon_bo_reserve(robj, false);
+	if (unlikely(r))
+		goto out;
+
+	switch (args->op) {
+	case RADEON_GEM_OP_GET_INITIAL_DOMAIN:
+		args->value = robj->initial_domain;
+		break;
+	case RADEON_GEM_OP_SET_INITIAL_DOMAIN:
+		robj->initial_domain = args->value & (RADEON_GEM_DOMAIN_VRAM |
+						      RADEON_GEM_DOMAIN_GTT |
+						      RADEON_GEM_DOMAIN_CPU);
+		break;
+	default:
+		r = -EINVAL;
+	}
+
+	radeon_bo_unreserve(robj);
+out:
+	drm_gem_object_unreference_unlocked(gobj);
+	return r;
+}
+
 int radeon_mode_dumb_create(struct drm_file *file_priv,
 			    struct drm_device *dev,
 			    struct drm_mode_create_dumb *args)
--- a/drivers/gpu/drm/radeon/radeon_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_kms.c
@ -33,6 +33,13 @@
 #include <linux/vga_switcheroo.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+
+#if defined(CONFIG_VGA_SWITCHEROO)
+bool radeon_is_px(void);
+#else
+static inline bool radeon_is_px(void) { return false; }
+#endif
+
 /**
 * radeon_driver_unload_kms - Main unload function for KMS.
 *
@ -130,7 +137,8 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags)
 				"Error during ACPI methods call\n");
 	}

-	if (radeon_runtime_pm != 0) {
+	if ((radeon_runtime_pm == 1) ||
+	    ((radeon_runtime_pm == -1) && radeon_is_px())) {
 		pm_runtime_use_autosuspend(dev->dev);
 		pm_runtime_set_autosuspend_delay(dev->dev, 5000);
 		pm_runtime_set_active(dev->dev);
@ -433,6 +441,9 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		case RADEON_CS_RING_UVD:
 			*value = rdev->ring[R600_RING_TYPE_UVD_INDEX].ready;
 			break;
+		case RADEON_CS_RING_VCE:
+			*value = rdev->ring[TN_RING_TYPE_VCE1_INDEX].ready;
+			break;
 		default:
 			return -EINVAL;
 		}
@ -477,6 +488,27 @@ static int radeon_info_ioctl(struct drm_device *dev, void *data, struct drm_file
 		else
 			*value = rdev->pm.default_sclk * 10;
 		break;
+	case RADEON_INFO_VCE_FW_VERSION:
+		*value = rdev->vce.fw_version;
+		break;
+	case RADEON_INFO_VCE_FB_VERSION:
+		*value = rdev->vce.fb_version;
+		break;
+	case RADEON_INFO_NUM_BYTES_MOVED:
+		value = (uint32_t*)&value64;
+		value_size = sizeof(uint64_t);
+		value64 = atomic64_read(&rdev->num_bytes_moved);
+		break;
+	case RADEON_INFO_VRAM_USAGE:
+		value = (uint32_t*)&value64;
+		value_size = sizeof(uint64_t);
+		value64 = atomic64_read(&rdev->vram_usage);
+		break;
+	case RADEON_INFO_GTT_USAGE:
+		value = (uint32_t*)&value64;
+		value_size = sizeof(uint64_t);
+		value64 = atomic64_read(&rdev->gtt_usage);
+		break;
 	default:
 		DRM_DEBUG_KMS("Invalid request %d\n", info->request);
 		return -EINVAL;
@ -535,7 +567,13 @@ int radeon_driver_open_kms(struct drm_device *dev, struct drm_file *file_priv)
 			return -ENOMEM;
 		}

-		radeon_vm_init(rdev, &fpriv->vm);
+		r = radeon_vm_init(rdev, &fpriv->vm);
+		if (r)
+			return r;
+
+		r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
+		if (r)
+			return r;

 		r = radeon_bo_reserve(rdev->ring_tmp_bo.bo, false);
 		if (r)
@ -616,6 +654,7 @@ void radeon_driver_preclose_kms(struct drm_device *dev,
 	if (rdev->cmask_filp == file_priv)
 		rdev->cmask_filp = NULL;
 	radeon_uvd_free_handles(rdev, file_priv);
+	radeon_vce_free_handles(rdev, file_priv);
 }

 /*
@ -810,5 +849,6 @@ const struct drm_ioctl_desc radeon_ioctls_kms[] = {
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_GET_TILING, radeon_gem_get_tiling_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_BUSY, radeon_gem_busy_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 	DRM_IOCTL_DEF_DRV(RADEON_GEM_VA, radeon_gem_va_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
+	DRM_IOCTL_DEF_DRV(RADEON_GEM_OP, radeon_gem_op_ioctl, DRM_AUTH|DRM_UNLOCKED|DRM_RENDER_ALLOW),
 };
 int radeon_max_kms_ioctl = DRM_ARRAY_SIZE(radeon_ioctls_kms);
--- a/drivers/gpu/drm/radeon/radeon_object.c
+++ b/drivers/gpu/drm/radeon/radeon_object.c
@ -56,11 +56,36 @@ static void radeon_bo_clear_va(struct radeon_bo *bo)
 	}
 }

+static void radeon_update_memory_usage(struct radeon_bo *bo,
+				       unsigned mem_type, int sign)
+{
+	struct radeon_device *rdev = bo->rdev;
+	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
+
+	switch (mem_type) {
+	case TTM_PL_TT:
+		if (sign > 0)
+			atomic64_add(size, &rdev->gtt_usage);
+		else
+			atomic64_sub(size, &rdev->gtt_usage);
+		break;
+	case TTM_PL_VRAM:
+		if (sign > 0)
+			atomic64_add(size, &rdev->vram_usage);
+		else
+			atomic64_sub(size, &rdev->vram_usage);
+		break;
+	}
+}
+
 static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
 {
 	struct radeon_bo *bo;

 	bo = container_of(tbo, struct radeon_bo, tbo);
+
+	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
+
 	mutex_lock(&bo->rdev->gem.mutex);
 	list_del_init(&bo->list);
 	mutex_unlock(&bo->rdev->gem.mutex);
@ -120,7 +145,6 @@ int radeon_bo_create(struct radeon_device *rdev,

 	size = ALIGN(size, PAGE_SIZE);

-	rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;
 	if (kernel) {
 		type = ttm_bo_type_kernel;
 	} else if (sg) {
@ -145,6 +169,9 @@ int radeon_bo_create(struct radeon_device *rdev,
 	bo->surface_reg = -1;
 	INIT_LIST_HEAD(&bo->list);
 	INIT_LIST_HEAD(&bo->va);
+	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
+	                               RADEON_GEM_DOMAIN_GTT |
+	                               RADEON_GEM_DOMAIN_CPU);
 	radeon_ttm_placement_from_domain(bo, domain);
 	/* Kernel allocation are uninterruptible */
 	down_read(&rdev->pm.mclk_lock);
@ -338,39 +365,105 @@ void radeon_bo_fini(struct radeon_device *rdev)
 	arch_phys_wc_del(rdev->mc.vram_mtrr);
 }

-void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
-				struct list_head *head)
+/* Returns how many bytes TTM can move per IB.
+ */
+static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
 {
-	if (lobj->written) {
-		list_add(&lobj->tv.head, head);
-	} else {
-		list_add_tail(&lobj->tv.head, head);
-	}
+	u64 real_vram_size = rdev->mc.real_vram_size;
+	u64 vram_usage = atomic64_read(&rdev->vram_usage);
+
+	/* This function is based on the current VRAM usage.
+	 *
+	 * - If all of VRAM is free, allow relocating the number of bytes that
+	 *   is equal to 1/4 of the size of VRAM for this IB.
+
+	 * - If more than one half of VRAM is occupied, only allow relocating
+	 *   1 MB of data for this IB.
+	 *
+	 * - From 0 to one half of used VRAM, the threshold decreases
+	 *   linearly.
+	 *         __________________
+	 * 1/4 of -|\               |
+	 * VRAM    | \              |
+	 *         |  \             |
+	 *         |   \            |
+	 *         |    \           |
+	 *         |     \          |
+	 *         |      \         |
+	 *         |       \________|1 MB
+	 *         |----------------|
+	 *    VRAM 0 %             100 %
+	 *         used            used
+	 *
+	 * Note: It's a threshold, not a limit. The threshold must be crossed
+	 * for buffer relocations to stop, so any buffer of an arbitrary size
+	 * can be moved as long as the threshold isn't crossed before
+	 * the relocation takes place. We don't want to disable buffer
+	 * relocations completely.
+	 *
+	 * The idea is that buffers should be placed in VRAM at creation time
+	 * and TTM should only do a minimum number of relocations during
+	 * command submission. In practice, you need to submit at least
+	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
+	 *
+	 * Also, things can get pretty crazy under memory pressure and actual
+	 * VRAM usage can change a lot, so playing safe even at 50% does
+	 * consistently increase performance.
+	 */
+
+	u64 half_vram = real_vram_size >> 1;
+	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
+	u64 bytes_moved_threshold = half_free_vram >> 1;
+	return max(bytes_moved_threshold, 1024*1024ull);
 }

-int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
+int radeon_bo_list_validate(struct radeon_device *rdev,
+			    struct ww_acquire_ctx *ticket,
 			    struct list_head *head, int ring)
 {
-	struct radeon_bo_list *lobj;
+	struct radeon_cs_reloc *lobj;
 	struct radeon_bo *bo;
-	u32 domain;
 	int r;
+	u64 bytes_moved = 0, initial_bytes_moved;
+	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);

 	r = ttm_eu_reserve_buffers(ticket, head);
 	if (unlikely(r != 0)) {
 		return r;
 	}
+
 	list_for_each_entry(lobj, head, tv.head) {
-		bo = lobj->bo;
+		bo = lobj->robj;
 		if (!bo->pin_count) {
-			domain = lobj->domain;
-			
+			u32 domain = lobj->domain;
+			u32 current_domain =
+				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
+
+			/* Check if this buffer will be moved and don't move it
+			 * if we have moved too many buffers for this IB already.
+			 *
+			 * Note that this allows moving at least one buffer of
+			 * any size, because it doesn't take the current "bo"
+			 * into account. We don't want to disallow buffer moves
+			 * completely.
+			 */
+			if (current_domain != RADEON_GEM_DOMAIN_CPU &&
+			    (domain & current_domain) == 0 && /* will be moved */
+			    bytes_moved > bytes_moved_threshold) {
+				/* don't move it */
+				domain = current_domain;
+			}
+
 		retry:
 			radeon_ttm_placement_from_domain(bo, domain);
 			if (ring == R600_RING_TYPE_UVD_INDEX)
 				radeon_uvd_force_into_uvd_segment(bo);
-			r = ttm_bo_validate(&bo->tbo, &bo->placement,
-						true, false);
+
+			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
+			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+			bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
+				       initial_bytes_moved;
+
 			if (unlikely(r)) {
 				if (r != -ERESTARTSYS && domain != lobj->alt_domain) {
 					domain = lobj->alt_domain;
@ -564,14 +657,23 @@ int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 }

 void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-			   struct ttm_mem_reg *mem)
+			   struct ttm_mem_reg *new_mem)
 {
 	struct radeon_bo *rbo;
+
 	if (!radeon_ttm_bo_is_radeon_bo(bo))
 		return;
+
 	rbo = container_of(bo, struct radeon_bo, tbo);
 	radeon_bo_check_tiling(rbo, 0, 1);
 	radeon_vm_bo_invalidate(rbo->rdev, rbo);
+
+	/* update statistics */
+	if (!new_mem)
+		return;
+
+	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
+	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
 }

 int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo)
--- a/drivers/gpu/drm/radeon/radeon_object.h
+++ b/drivers/gpu/drm/radeon/radeon_object.h
@ -138,9 +138,8 @@ extern int radeon_bo_evict_vram(struct radeon_device *rdev);
 extern void radeon_bo_force_delete(struct radeon_device *rdev);
 extern int radeon_bo_init(struct radeon_device *rdev);
 extern void radeon_bo_fini(struct radeon_device *rdev);
-extern void radeon_bo_list_add_object(struct radeon_bo_list *lobj,
-				struct list_head *head);
-extern int radeon_bo_list_validate(struct ww_acquire_ctx *ticket,
+extern int radeon_bo_list_validate(struct radeon_device *rdev,
+				   struct ww_acquire_ctx *ticket,
 				   struct list_head *head, int ring);
 extern int radeon_bo_fbdev_mmap(struct radeon_bo *bo,
 				struct vm_area_struct *vma);
@ -151,7 +150,7 @@ extern void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
 extern int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
 				bool force_drop);
 extern void radeon_bo_move_notify(struct ttm_buffer_object *bo,
-					struct ttm_mem_reg *mem);
+				  struct ttm_mem_reg *new_mem);
 extern int radeon_bo_fault_reserve_notify(struct ttm_buffer_object *bo);
 extern int radeon_bo_get_surface_reg(struct radeon_bo *bo);

@ -181,7 +180,7 @@ extern int radeon_sa_bo_manager_suspend(struct radeon_device *rdev,
 extern int radeon_sa_bo_new(struct radeon_device *rdev,
 			    struct radeon_sa_manager *sa_manager,
 			    struct radeon_sa_bo **sa_bo,
-			    unsigned size, unsigned align, bool block);
+			    unsigned size, unsigned align);
 extern void radeon_sa_bo_free(struct radeon_device *rdev,
 			      struct radeon_sa_bo **sa_bo,
 			      struct radeon_fence *fence);
--- a/drivers/gpu/drm/radeon/radeon_pm.c
+++ b/drivers/gpu/drm/radeon/radeon_pm.c
@ -260,7 +260,7 @@ static void radeon_pm_set_clocks(struct radeon_device *rdev)
 		if (!ring->ready) {
 			continue;
 		}
-		r = radeon_fence_wait_empty_locked(rdev, i);
+		r = radeon_fence_wait_empty(rdev, i);
 		if (r) {
 			/* needs a GPU reset dont reset here */
 			mutex_unlock(&rdev->ring_lock);
@ -826,6 +826,9 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev)

 	/* no need to reprogram if nothing changed unless we are on BTC+ */
 	if (rdev->pm.dpm.current_ps == rdev->pm.dpm.requested_ps) {
+		/* vce just modifies an existing state so force a change */
+		if (ps->vce_active != rdev->pm.dpm.vce_active)
+			goto force;
 		if ((rdev->family < CHIP_BARTS) || (rdev->flags & RADEON_IS_IGP)) {
 			/* for pre-BTC and APUs if the num crtcs changed but state is the same,
 			 * all we need to do is update the display configuration.
@ -862,16 +865,21 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev)
 		}
 	}

+force:
 	if (radeon_dpm == 1) {
 		printk("switching from power state:\n");
 		radeon_dpm_print_power_state(rdev, rdev->pm.dpm.current_ps);
 		printk("switching to power state:\n");
 		radeon_dpm_print_power_state(rdev, rdev->pm.dpm.requested_ps);
 	}
+
 	mutex_lock(&rdev->ddev->struct_mutex);
 	down_write(&rdev->pm.mclk_lock);
 	mutex_lock(&rdev->ring_lock);

+	/* update whether vce is active */
+	ps->vce_active = rdev->pm.dpm.vce_active;
+
 	ret = radeon_dpm_pre_set_power_state(rdev);
 	if (ret)
 		goto done;
@ -888,7 +896,7 @@ static void radeon_dpm_change_power_state_locked(struct radeon_device *rdev)
 	for (i = 0; i < RADEON_NUM_RINGS; i++) {
 		struct radeon_ring *ring = &rdev->ring[i];
 		if (ring->ready)
-			radeon_fence_wait_empty_locked(rdev, i);
+			radeon_fence_wait_empty(rdev, i);
 	}

 	/* program the new power state */
@ -935,8 +943,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
 		if (enable) {
 			mutex_lock(&rdev->pm.mutex);
 			rdev->pm.dpm.uvd_active = true;
-			/* disable this for now */
-#if 0
 			if ((rdev->pm.dpm.sd == 1) && (rdev->pm.dpm.hd == 0))
 				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_SD;
 			else if ((rdev->pm.dpm.sd == 2) && (rdev->pm.dpm.hd == 0))
@ -946,7 +952,6 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
 			else if ((rdev->pm.dpm.sd == 0) && (rdev->pm.dpm.hd == 2))
 				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD_HD2;
 			else
-#endif
 				dpm_state = POWER_STATE_TYPE_INTERNAL_UVD;
 			rdev->pm.dpm.state = dpm_state;
 			mutex_unlock(&rdev->pm.mutex);
@ -960,6 +965,23 @@ void radeon_dpm_enable_uvd(struct radeon_device *rdev, bool enable)
 	}
 }

+void radeon_dpm_enable_vce(struct radeon_device *rdev, bool enable)
+{
+	if (enable) {
+		mutex_lock(&rdev->pm.mutex);
+		rdev->pm.dpm.vce_active = true;
+		/* XXX select vce level based on ring/task */
+		rdev->pm.dpm.vce_level = RADEON_VCE_LEVEL_AC_ALL;
+		mutex_unlock(&rdev->pm.mutex);
+	} else {
+		mutex_lock(&rdev->pm.mutex);
+		rdev->pm.dpm.vce_active = false;
+		mutex_unlock(&rdev->pm.mutex);
+	}
+
+	radeon_pm_compute_clocks(rdev);
+}
+
 static void radeon_pm_suspend_old(struct radeon_device *rdev)
 {
 	mutex_lock(&rdev->pm.mutex);
--- a/drivers/gpu/drm/radeon/radeon_ring.c
+++ b/drivers/gpu/drm/radeon/radeon_ring.c
@ -63,7 +63,7 @@ int radeon_ib_get(struct radeon_device *rdev, int ring,
 {
 	int r;

-	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256, true);
+	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &ib->sa_bo, size, 256);
 	if (r) {
 		dev_err(rdev->dev, "failed to get a new IB (%d)\n", r);
 		return r;
@ -145,6 +145,13 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 		return r;
 	}

+	/* grab a vm id if necessary */
+	if (ib->vm) {
+		struct radeon_fence *vm_id_fence;
+		vm_id_fence = radeon_vm_grab_id(rdev, ib->vm, ib->ring);
+        	radeon_semaphore_sync_to(ib->semaphore, vm_id_fence);
+	}
+
 	/* sync with other rings */
 	r = radeon_semaphore_sync_rings(rdev, ib->semaphore, ib->ring);
 	if (r) {
@ -153,11 +160,9 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 		return r;
 	}

-	/* if we can't remember our last VM flush then flush now! */
-	/* XXX figure out why we have to flush for every IB */
-	if (ib->vm /*&& !ib->vm->last_flush*/) {
-		radeon_ring_vm_flush(rdev, ib->ring, ib->vm);
-	}
+	if (ib->vm)
+		radeon_vm_flush(rdev, ib->vm, ib->ring);
+
 	if (const_ib) {
 		radeon_ring_ib_execute(rdev, const_ib->ring, const_ib);
 		radeon_semaphore_free(rdev, &const_ib->semaphore, NULL);
@ -172,10 +177,10 @@ int radeon_ib_schedule(struct radeon_device *rdev, struct radeon_ib *ib,
 	if (const_ib) {
 		const_ib->fence = radeon_fence_ref(ib->fence);
 	}
-	/* we just flushed the VM, remember that */
-	if (ib->vm && !ib->vm->last_flush) {
-		ib->vm->last_flush = radeon_fence_ref(ib->fence);
-	}
+
+	if (ib->vm)
+		radeon_vm_fence(rdev, ib->vm, ib->fence);
+
 	radeon_ring_unlock_commit(rdev, ring);
 	return 0;
 }
@ -342,13 +347,17 @@ bool radeon_ring_supports_scratch_reg(struct radeon_device *rdev,
 */
 void radeon_ring_free_size(struct radeon_device *rdev, struct radeon_ring *ring)
 {
-	ring->rptr = radeon_ring_get_rptr(rdev, ring);
+	uint32_t rptr = radeon_ring_get_rptr(rdev, ring);
+
 	/* This works because ring_size is a power of 2 */
-	ring->ring_free_dw = (ring->rptr + (ring->ring_size / 4));
+	ring->ring_free_dw = rptr + (ring->ring_size / 4);
 	ring->ring_free_dw -= ring->wptr;
 	ring->ring_free_dw &= ring->ptr_mask;
 	if (!ring->ring_free_dw) {
+		/* this is an empty ring */
 		ring->ring_free_dw = ring->ring_size / 4;
+		/*  update lockup info to avoid false positive */
+		radeon_ring_lockup_update(rdev, ring);
 	}
 }

@ -372,19 +381,13 @@ int radeon_ring_alloc(struct radeon_device *rdev, struct radeon_ring *ring, unsi
 	/* Align requested size with padding so unlock_commit can
 	 * pad safely */
 	radeon_ring_free_size(rdev, ring);
-	if (ring->ring_free_dw == (ring->ring_size / 4)) {
-		/* This is an empty ring update lockup info to avoid
-		 * false positive.
-		 */
-		radeon_ring_lockup_update(ring);
-	}
 	ndw = (ndw + ring->align_mask) & ~ring->align_mask;
 	while (ndw > (ring->ring_free_dw - 1)) {
 		radeon_ring_free_size(rdev, ring);
 		if (ndw < ring->ring_free_dw) {
 			break;
 		}
-		r = radeon_fence_wait_next_locked(rdev, ring->idx);
+		r = radeon_fence_wait_next(rdev, ring->idx);
 		if (r)
 			return r;
 	}
@ -477,29 +480,6 @@ void radeon_ring_unlock_undo(struct radeon_device *rdev, struct radeon_ring *rin
 	mutex_unlock(&rdev->ring_lock);
 }

-/**
- * radeon_ring_force_activity - add some nop packets to the ring
- *
- * @rdev: radeon_device pointer
- * @ring: radeon_ring structure holding ring information
- *
- * Add some nop packets to the ring to force activity (all asics).
- * Used for lockup detection to see if the rptr is advancing.
- */
-void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *ring)
-{
-	int r;
-
-	radeon_ring_free_size(rdev, ring);
-	if (ring->rptr == ring->wptr) {
-		r = radeon_ring_alloc(rdev, ring, 1);
-		if (!r) {
-			radeon_ring_write(ring, ring->nop);
-			radeon_ring_commit(rdev, ring);
-		}
-	}
-}
-
 /**
 * radeon_ring_lockup_update - update lockup variables
 *
@ -507,10 +487,11 @@ void radeon_ring_force_activity(struct radeon_device *rdev, struct radeon_ring *
 *
 * Update the last rptr value and timestamp (all asics).
 */
-void radeon_ring_lockup_update(struct radeon_ring *ring)
+void radeon_ring_lockup_update(struct radeon_device *rdev,
+			       struct radeon_ring *ring)
 {
-	ring->last_rptr = ring->rptr;
-	ring->last_activity = jiffies;
+	atomic_set(&ring->last_rptr, radeon_ring_get_rptr(rdev, ring));
+	atomic64_set(&ring->last_activity, jiffies_64);
 }

 /**
@ -518,40 +499,23 @@ void radeon_ring_lockup_update(struct radeon_ring *ring)
 * @rdev:       radeon device structure
 * @ring:       radeon_ring structure holding ring information
 *
- * We don't need to initialize the lockup tracking information as we will either
- * have CP rptr to a different value of jiffies wrap around which will force
- * initialization of the lockup tracking informations.
- *
- * A possible false positivie is if we get call after while and last_cp_rptr ==
- * the current CP rptr, even if it's unlikely it might happen. To avoid this
- * if the elapsed time since last call is bigger than 2 second than we return
- * false and update the tracking information. Due to this the caller must call
- * radeon_ring_test_lockup several time in less than 2sec for lockup to be reported
- * the fencing code should be cautious about that.
- *
- * Caller should write to the ring to force CP to do something so we don't get
- * false positive when CP is just gived nothing to do.
- *
- **/
+ */
 bool radeon_ring_test_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 {
-	unsigned long cjiffies, elapsed;
+	uint32_t rptr = radeon_ring_get_rptr(rdev, ring);
+	uint64_t last = atomic64_read(&ring->last_activity);
+	uint64_t elapsed;

-	cjiffies = jiffies;
-	if (!time_after(cjiffies, ring->last_activity)) {
-		/* likely a wrap around */
-		radeon_ring_lockup_update(ring);
+	if (rptr != atomic_read(&ring->last_rptr)) {
+		/* ring is still working, no lockup */
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	ring->rptr = radeon_ring_get_rptr(rdev, ring);
-	if (ring->rptr != ring->last_rptr) {
-		/* CP is still working no lockup */
-		radeon_ring_lockup_update(ring);
-		return false;
-	}
-	elapsed = jiffies_to_msecs(cjiffies - ring->last_activity);
+
+	elapsed = jiffies_to_msecs(jiffies_64 - last);
 	if (radeon_lockup_timeout && elapsed >= radeon_lockup_timeout) {
-		dev_err(rdev->dev, "GPU lockup CP stall for more than %lumsec\n", elapsed);
+		dev_err(rdev->dev, "ring %d stalled for more than %llumsec\n",
+			ring->idx, elapsed);
 		return true;
 	}
 	/* give a chance to the GPU ... */
@ -709,7 +673,7 @@ int radeon_ring_init(struct radeon_device *rdev, struct radeon_ring *ring, unsig
 	if (radeon_debugfs_ring_init(rdev, ring)) {
 		DRM_ERROR("Failed to register debugfs file for rings !\n");
 	}
-	radeon_ring_lockup_update(ring);
+	radeon_ring_lockup_update(rdev, ring);
 	return 0;
 }

@ -780,8 +744,6 @@ static int radeon_debugfs_ring_info(struct seq_file *m, void *data)

 	seq_printf(m, "driver's copy of the wptr: 0x%08x [%5d]\n",
 		   ring->wptr, ring->wptr);
-	seq_printf(m, "driver's copy of the rptr: 0x%08x [%5d]\n",
-		   ring->rptr, ring->rptr);
 	seq_printf(m, "last semaphore signal addr : 0x%016llx\n",
 		   ring->last_semaphore_signal_addr);
 	seq_printf(m, "last semaphore wait addr   : 0x%016llx\n",
@ -814,6 +776,8 @@ static int cayman_cp2_index = CAYMAN_RING_TYPE_CP2_INDEX;
 static int radeon_dma1_index = R600_RING_TYPE_DMA_INDEX;
 static int radeon_dma2_index = CAYMAN_RING_TYPE_DMA1_INDEX;
 static int r600_uvd_index = R600_RING_TYPE_UVD_INDEX;
+static int si_vce1_index = TN_RING_TYPE_VCE1_INDEX;
+static int si_vce2_index = TN_RING_TYPE_VCE2_INDEX;

 static struct drm_info_list radeon_debugfs_ring_info_list[] = {
 	{"radeon_ring_gfx", radeon_debugfs_ring_info, 0, &radeon_gfx_index},
@ -822,6 +786,8 @@ static struct drm_info_list radeon_debugfs_ring_info_list[] = {
 	{"radeon_ring_dma1", radeon_debugfs_ring_info, 0, &radeon_dma1_index},
 	{"radeon_ring_dma2", radeon_debugfs_ring_info, 0, &radeon_dma2_index},
 	{"radeon_ring_uvd", radeon_debugfs_ring_info, 0, &r600_uvd_index},
+	{"radeon_ring_vce1", radeon_debugfs_ring_info, 0, &si_vce1_index},
+	{"radeon_ring_vce2", radeon_debugfs_ring_info, 0, &si_vce2_index},
 };

 static int radeon_debugfs_sa_info(struct seq_file *m, void *data)
--- a/drivers/gpu/drm/radeon/radeon_sa.c
+++ b/drivers/gpu/drm/radeon/radeon_sa.c
@ -312,7 +312,7 @@ static bool radeon_sa_bo_next_hole(struct radeon_sa_manager *sa_manager,
 int radeon_sa_bo_new(struct radeon_device *rdev,
 		     struct radeon_sa_manager *sa_manager,
 		     struct radeon_sa_bo **sa_bo,
-		     unsigned size, unsigned align, bool block)
+		     unsigned size, unsigned align)
 {
 	struct radeon_fence *fences[RADEON_NUM_RINGS];
 	unsigned tries[RADEON_NUM_RINGS];
@ -353,14 +353,11 @@ int radeon_sa_bo_new(struct radeon_device *rdev,
 		r = radeon_fence_wait_any(rdev, fences, false);
 		spin_lock(&sa_manager->wq.lock);
 		/* if we have nothing to wait for block */
-		if (r == -ENOENT && block) {
+		if (r == -ENOENT) {
 			r = wait_event_interruptible_locked(
 				sa_manager->wq, 
 				radeon_sa_event(sa_manager, size, align)
 			);
-
-		} else if (r == -ENOENT) {
-			r = -ENOMEM;
 		}

 	} while (!r);
--- a/drivers/gpu/drm/radeon/radeon_semaphore.c
+++ b/drivers/gpu/drm/radeon/radeon_semaphore.c
@ -42,7 +42,7 @@ int radeon_semaphore_create(struct radeon_device *rdev,
 		return -ENOMEM;
 	}
 	r = radeon_sa_bo_new(rdev, &rdev->ring_tmp_bo, &(*semaphore)->sa_bo,
-			     8 * RADEON_NUM_SYNCS, 8, true);
+			     8 * RADEON_NUM_SYNCS, 8);
 	if (r) {
 		kfree(*semaphore);
 		*semaphore = NULL;
@ -147,7 +147,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,

 		if (++count > RADEON_NUM_SYNCS) {
 			/* not enough room, wait manually */
-			radeon_fence_wait_locked(fence);
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
 			continue;
 		}

@ -161,7 +163,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
 		if (!radeon_semaphore_emit_signal(rdev, i, semaphore)) {
 			/* signaling wasn't successful wait manually */
 			radeon_ring_undo(&rdev->ring[i]);
-			radeon_fence_wait_locked(fence);
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
 			continue;
 		}

@ -169,7 +173,9 @@ int radeon_semaphore_sync_rings(struct radeon_device *rdev,
 		if (!radeon_semaphore_emit_wait(rdev, ring, semaphore)) {
 			/* waiting wasn't successful wait manually */
 			radeon_ring_undo(&rdev->ring[i]);
-			radeon_fence_wait_locked(fence);
+			r = radeon_fence_wait(fence, false);
+			if (r)
+				return r;
 			continue;
 		}

--- a/drivers/gpu/drm/radeon/radeon_test.c
+++ b/drivers/gpu/drm/radeon/radeon_test.c
@ -257,20 +257,36 @@ static int radeon_test_create_and_emit_fence(struct radeon_device *rdev,
 					     struct radeon_ring *ring,
 					     struct radeon_fence **fence)
 {
+	uint32_t handle = ring->idx ^ 0xdeafbeef;
 	int r;

 	if (ring->idx == R600_RING_TYPE_UVD_INDEX) {
-		r = radeon_uvd_get_create_msg(rdev, ring->idx, 1, NULL);
+		r = radeon_uvd_get_create_msg(rdev, ring->idx, handle, NULL);
 		if (r) {
 			DRM_ERROR("Failed to get dummy create msg\n");
 			return r;
 		}

-		r = radeon_uvd_get_destroy_msg(rdev, ring->idx, 1, fence);
+		r = radeon_uvd_get_destroy_msg(rdev, ring->idx, handle, fence);
 		if (r) {
 			DRM_ERROR("Failed to get dummy destroy msg\n");
 			return r;
 		}
+
+	} else if (ring->idx == TN_RING_TYPE_VCE1_INDEX ||
+		   ring->idx == TN_RING_TYPE_VCE2_INDEX) {
+		r = radeon_vce_get_create_msg(rdev, ring->idx, handle, NULL);
+		if (r) {
+			DRM_ERROR("Failed to get dummy create msg\n");
+			return r;
+		}
+
+		r = radeon_vce_get_destroy_msg(rdev, ring->idx, handle, fence);
+		if (r) {
+			DRM_ERROR("Failed to get dummy destroy msg\n");
+			return r;
+		}
+
 	} else {
 		r = radeon_ring_lock(rdev, ring, 64);
 		if (r) {
@ -486,6 +502,16 @@ out_cleanup:
 		printk(KERN_WARNING "Error while testing ring sync (%d).\n", r);
 }

+static bool radeon_test_sync_possible(struct radeon_ring *ringA,
+				      struct radeon_ring *ringB)
+{
+	if (ringA->idx == TN_RING_TYPE_VCE2_INDEX &&
+	    ringB->idx == TN_RING_TYPE_VCE1_INDEX)
+		return false;
+
+	return true;
+}
+
 void radeon_test_syncing(struct radeon_device *rdev)
 {
 	int i, j, k;
@ -500,6 +526,9 @@ void radeon_test_syncing(struct radeon_device *rdev)
 			if (!ringB->ready)
 				continue;

+			if (!radeon_test_sync_possible(ringA, ringB))
+				continue;
+
 			DRM_INFO("Testing syncing between rings %d and %d...\n", i, j);
 			radeon_test_ring_sync(rdev, ringA, ringB);

@ -511,6 +540,12 @@ void radeon_test_syncing(struct radeon_device *rdev)
 				if (!ringC->ready)
 					continue;

+				if (!radeon_test_sync_possible(ringA, ringC))
+					continue;
+
+				if (!radeon_test_sync_possible(ringB, ringC))
+					continue;
+
 				DRM_INFO("Testing syncing between rings %d, %d and %d...\n", i, j, k);
 				radeon_test_ring_sync2(rdev, ringA, ringB, ringC);

--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@ -406,8 +406,14 @@ static int radeon_bo_move(struct ttm_buffer_object *bo,
 	if (r) {
 memcpy:
 		r = ttm_bo_move_memcpy(bo, evict, no_wait_gpu, new_mem);
+		if (r) {
+			return r;
+		}
 	}
-	return r;
+
+	/* update statistics */
+	atomic64_add((u64)bo->num_pages << PAGE_SHIFT, &rdev->num_bytes_moved);
+	return 0;
 }

 static int radeon_ttm_io_mem_reserve(struct ttm_bo_device *bdev, struct ttm_mem_reg *mem)
@ -701,7 +707,9 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&rdev->mman.bdev,
 			       rdev->mman.bo_global_ref.ref.object,
-			       &radeon_bo_driver, DRM_FILE_PAGE_OFFSET,
+			       &radeon_bo_driver,
+			       rdev->ddev->anon_inode->i_mapping,
+			       DRM_FILE_PAGE_OFFSET,
 			       rdev->need_dma32);
 	if (r) {
 		DRM_ERROR("failed initializing buffer object driver(%d).\n", r);
@ -742,7 +750,6 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	}
 	DRM_INFO("radeon: %uM of GTT memory ready.\n",
 		 (unsigned)(rdev->mc.gtt_size / (1024 * 1024)));
-	rdev->mman.bdev.dev_mapping = rdev->ddev->dev_mapping;

 	r = radeon_ttm_debugfs_init(rdev);
 	if (r) {
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@ -455,7 +455,7 @@ static int radeon_uvd_cs_reloc(struct radeon_cs_parser *p,
 	}

 	reloc = p->relocs_ptr[(idx / 4)];
-	start = reloc->lobj.gpu_offset;
+	start = reloc->gpu_offset;
 	end = start + radeon_bo_size(reloc->robj);
 	start += offset;

@ -807,8 +807,7 @@ void radeon_uvd_note_usage(struct radeon_device *rdev)
 		    (rdev->pm.dpm.hd != hd)) {
 			rdev->pm.dpm.sd = sd;
 			rdev->pm.dpm.hd = hd;
-			/* disable this for now */
-			/*streams_changed = true;*/
+			streams_changed = true;
 		}
 	}

--- a/drivers/gpu/drm/radeon/radeon_vce.c
+++ b/drivers/gpu/drm/radeon/radeon_vce.c
@ -0,0 +1,699 @@
+/*
+ * Copyright 2013 Advanced Micro Devices, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * Authors: Christian König <christian.koenig@amd.com>
+ */
+
+#include <linux/firmware.h>
+#include <linux/module.h>
+#include <drm/drmP.h>
+#include <drm/drm.h>
+
+#include "radeon.h"
+#include "radeon_asic.h"
+#include "sid.h"
+
+/* 1 second timeout */
+#define VCE_IDLE_TIMEOUT_MS	1000
+
+/* Firmware Names */
+#define FIRMWARE_BONAIRE	"radeon/BONAIRE_vce.bin"
+
+MODULE_FIRMWARE(FIRMWARE_BONAIRE);
+
+static void radeon_vce_idle_work_handler(struct work_struct *work);
+
+/**
+ * radeon_vce_init - allocate memory, load vce firmware
+ *
+ * @rdev: radeon_device pointer
+ *
+ * First step to get VCE online, allocate memory and load the firmware
+ */
+int radeon_vce_init(struct radeon_device *rdev)
+{
+	static const char *fw_version = "[ATI LIB=VCEFW,";
+	static const char *fb_version = "[ATI LIB=VCEFWSTATS,";
+	unsigned long size;
+	const char *fw_name, *c;
+	uint8_t start, mid, end;
+	int i, r;
+
+	INIT_DELAYED_WORK(&rdev->vce.idle_work, radeon_vce_idle_work_handler);
+
+	switch (rdev->family) {
+	case CHIP_BONAIRE:
+	case CHIP_KAVERI:
+	case CHIP_KABINI:
+		fw_name = FIRMWARE_BONAIRE;
+		break;
+
+	default:
+		return -EINVAL;
+	}
+
+	r = request_firmware(&rdev->vce_fw, fw_name, rdev->dev);
+	if (r) {
+		dev_err(rdev->dev, "radeon_vce: Can't load firmware \"%s\"\n",
+			fw_name);
+		return r;
+	}
+
+	/* search for firmware version */
+
+	size = rdev->vce_fw->size - strlen(fw_version) - 9;
+	c = rdev->vce_fw->data;
+	for (;size > 0; --size, ++c)
+		if (strncmp(c, fw_version, strlen(fw_version)) == 0)
+			break;
+
+	if (size == 0)
+		return -EINVAL;
+
+	c += strlen(fw_version);
+	if (sscanf(c, "%2hhd.%2hhd.%2hhd]", &start, &mid, &end) != 3)
+		return -EINVAL;
+
+	/* search for feedback version */
+
+	size = rdev->vce_fw->size - strlen(fb_version) - 3;
+	c = rdev->vce_fw->data;
+	for (;size > 0; --size, ++c)
+		if (strncmp(c, fb_version, strlen(fb_version)) == 0)
+			break;
+
+	if (size == 0)
+		return -EINVAL;
+
+	c += strlen(fb_version);
+	if (sscanf(c, "%2u]", &rdev->vce.fb_version) != 1)
+		return -EINVAL;
+
+	DRM_INFO("Found VCE firmware/feedback version %hhd.%hhd.%hhd / %d!\n",
+		 start, mid, end, rdev->vce.fb_version);
+
+	rdev->vce.fw_version = (start << 24) | (mid << 16) | (end << 8);
+
+	/* we can only work with this fw version for now */
+	if (rdev->vce.fw_version != ((40 << 24) | (2 << 16) | (2 << 8)))
+		return -EINVAL;
+
+	/* allocate firmware, stack and heap BO */
+
+	size = RADEON_GPU_PAGE_ALIGN(rdev->vce_fw->size) +
+	       RADEON_VCE_STACK_SIZE + RADEON_VCE_HEAP_SIZE;
+	r = radeon_bo_create(rdev, size, PAGE_SIZE, true,
+			     RADEON_GEM_DOMAIN_VRAM, NULL, &rdev->vce.vcpu_bo);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to allocate VCE bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
+	if (r) {
+		radeon_bo_unref(&rdev->vce.vcpu_bo);
+		dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_pin(rdev->vce.vcpu_bo, RADEON_GEM_DOMAIN_VRAM,
+			  &rdev->vce.gpu_addr);
+	radeon_bo_unreserve(rdev->vce.vcpu_bo);
+	if (r) {
+		radeon_bo_unref(&rdev->vce.vcpu_bo);
+		dev_err(rdev->dev, "(%d) VCE bo pin failed\n", r);
+		return r;
+	}
+
+	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
+		atomic_set(&rdev->vce.handles[i], 0);
+		rdev->vce.filp[i] = NULL;
+        }
+
+	return 0;
+}
+
+/**
+ * radeon_vce_fini - free memory
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Last step on VCE teardown, free firmware memory
+ */
+void radeon_vce_fini(struct radeon_device *rdev)
+{
+	if (rdev->vce.vcpu_bo == NULL)
+		return;
+
+	radeon_bo_unref(&rdev->vce.vcpu_bo);
+
+	release_firmware(rdev->vce_fw);
+}
+
+/**
+ * radeon_vce_suspend - unpin VCE fw memory
+ *
+ * @rdev: radeon_device pointer
+ *
+ */
+int radeon_vce_suspend(struct radeon_device *rdev)
+{
+	int i;
+
+	if (rdev->vce.vcpu_bo == NULL)
+		return 0;
+
+	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
+		if (atomic_read(&rdev->vce.handles[i]))
+			break;
+
+	if (i == RADEON_MAX_VCE_HANDLES)
+		return 0;
+
+	/* TODO: suspending running encoding sessions isn't supported */
+	return -EINVAL;
+}
+
+/**
+ * radeon_vce_resume - pin VCE fw memory
+ *
+ * @rdev: radeon_device pointer
+ *
+ */
+int radeon_vce_resume(struct radeon_device *rdev)
+{
+	void *cpu_addr;
+	int r;
+
+	if (rdev->vce.vcpu_bo == NULL)
+		return -EINVAL;
+
+	r = radeon_bo_reserve(rdev->vce.vcpu_bo, false);
+	if (r) {
+		dev_err(rdev->dev, "(%d) failed to reserve VCE bo\n", r);
+		return r;
+	}
+
+	r = radeon_bo_kmap(rdev->vce.vcpu_bo, &cpu_addr);
+	if (r) {
+		radeon_bo_unreserve(rdev->vce.vcpu_bo);
+		dev_err(rdev->dev, "(%d) VCE map failed\n", r);
+		return r;
+	}
+
+	memcpy(cpu_addr, rdev->vce_fw->data, rdev->vce_fw->size);
+
+	radeon_bo_kunmap(rdev->vce.vcpu_bo);
+
+	radeon_bo_unreserve(rdev->vce.vcpu_bo);
+
+	return 0;
+}
+
+/**
+ * radeon_vce_idle_work_handler - power off VCE
+ *
+ * @work: pointer to work structure
+ *
+ * power of VCE when it's not used any more
+ */
+static void radeon_vce_idle_work_handler(struct work_struct *work)
+{
+	struct radeon_device *rdev =
+		container_of(work, struct radeon_device, vce.idle_work.work);
+
+	if ((radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE1_INDEX) == 0) &&
+	    (radeon_fence_count_emitted(rdev, TN_RING_TYPE_VCE2_INDEX) == 0)) {
+		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
+			radeon_dpm_enable_vce(rdev, false);
+		} else {
+			radeon_set_vce_clocks(rdev, 0, 0);
+		}
+	} else {
+		schedule_delayed_work(&rdev->vce.idle_work,
+				      msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
+	}
+}
+
+/**
+ * radeon_vce_note_usage - power up VCE
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Make sure VCE is powerd up when we want to use it
+ */
+void radeon_vce_note_usage(struct radeon_device *rdev)
+{
+	bool streams_changed = false;
+	bool set_clocks = !cancel_delayed_work_sync(&rdev->vce.idle_work);
+	set_clocks &= schedule_delayed_work(&rdev->vce.idle_work,
+					    msecs_to_jiffies(VCE_IDLE_TIMEOUT_MS));
+
+	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
+		/* XXX figure out if the streams changed */
+		streams_changed = false;
+	}
+
+	if (set_clocks || streams_changed) {
+		if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled) {
+			radeon_dpm_enable_vce(rdev, true);
+		} else {
+			radeon_set_vce_clocks(rdev, 53300, 40000);
+		}
+	}
+}
+
+/**
+ * radeon_vce_free_handles - free still open VCE handles
+ *
+ * @rdev: radeon_device pointer
+ * @filp: drm file pointer
+ *
+ * Close all VCE handles still open by this file pointer
+ */
+void radeon_vce_free_handles(struct radeon_device *rdev, struct drm_file *filp)
+{
+	int i, r;
+	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
+		uint32_t handle = atomic_read(&rdev->vce.handles[i]);
+		if (!handle || rdev->vce.filp[i] != filp)
+			continue;
+
+		radeon_vce_note_usage(rdev);
+
+		r = radeon_vce_get_destroy_msg(rdev, TN_RING_TYPE_VCE1_INDEX,
+					       handle, NULL);
+		if (r)
+			DRM_ERROR("Error destroying VCE handle (%d)!\n", r);
+
+		rdev->vce.filp[i] = NULL;
+		atomic_set(&rdev->vce.handles[i], 0);
+	}
+}
+
+/**
+ * radeon_vce_get_create_msg - generate a VCE create msg
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring we should submit the msg to
+ * @handle: VCE session handle to use
+ * @fence: optional fence to return
+ *
+ * Open up a stream for HW test
+ */
+int radeon_vce_get_create_msg(struct radeon_device *rdev, int ring,
+			      uint32_t handle, struct radeon_fence **fence)
+{
+	const unsigned ib_size_dw = 1024;
+	struct radeon_ib ib;
+	uint64_t dummy;
+	int i, r;
+
+	r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4);
+	if (r) {
+		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+		return r;
+	}
+
+	dummy = ib.gpu_addr + 1024;
+
+	/* stitch together an VCE create msg */
+	ib.length_dw = 0;
+	ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
+	ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
+	ib.ptr[ib.length_dw++] = handle;
+
+	ib.ptr[ib.length_dw++] = 0x00000030; /* len */
+	ib.ptr[ib.length_dw++] = 0x01000001; /* create cmd */
+	ib.ptr[ib.length_dw++] = 0x00000000;
+	ib.ptr[ib.length_dw++] = 0x00000042;
+	ib.ptr[ib.length_dw++] = 0x0000000a;
+	ib.ptr[ib.length_dw++] = 0x00000001;
+	ib.ptr[ib.length_dw++] = 0x00000080;
+	ib.ptr[ib.length_dw++] = 0x00000060;
+	ib.ptr[ib.length_dw++] = 0x00000100;
+	ib.ptr[ib.length_dw++] = 0x00000100;
+	ib.ptr[ib.length_dw++] = 0x0000000c;
+	ib.ptr[ib.length_dw++] = 0x00000000;
+
+	ib.ptr[ib.length_dw++] = 0x00000014; /* len */
+	ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
+	ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
+	ib.ptr[ib.length_dw++] = dummy;
+	ib.ptr[ib.length_dw++] = 0x00000001;
+
+	for (i = ib.length_dw; i < ib_size_dw; ++i)
+		ib.ptr[i] = 0x0;
+
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+	}
+
+	if (fence)
+		*fence = radeon_fence_ref(ib.fence);
+
+	radeon_ib_free(rdev, &ib);
+
+	return r;
+}
+
+/**
+ * radeon_vce_get_destroy_msg - generate a VCE destroy msg
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring we should submit the msg to
+ * @handle: VCE session handle to use
+ * @fence: optional fence to return
+ *
+ * Close up a stream for HW test or if userspace failed to do so
+ */
+int radeon_vce_get_destroy_msg(struct radeon_device *rdev, int ring,
+			       uint32_t handle, struct radeon_fence **fence)
+{
+	const unsigned ib_size_dw = 1024;
+	struct radeon_ib ib;
+	uint64_t dummy;
+	int i, r;
+
+	r = radeon_ib_get(rdev, ring, &ib, NULL, ib_size_dw * 4);
+	if (r) {
+		DRM_ERROR("radeon: failed to get ib (%d).\n", r);
+		return r;
+	}
+
+	dummy = ib.gpu_addr + 1024;
+
+	/* stitch together an VCE destroy msg */
+	ib.length_dw = 0;
+	ib.ptr[ib.length_dw++] = 0x0000000c; /* len */
+	ib.ptr[ib.length_dw++] = 0x00000001; /* session cmd */
+	ib.ptr[ib.length_dw++] = handle;
+
+	ib.ptr[ib.length_dw++] = 0x00000014; /* len */
+	ib.ptr[ib.length_dw++] = 0x05000005; /* feedback buffer */
+	ib.ptr[ib.length_dw++] = upper_32_bits(dummy);
+	ib.ptr[ib.length_dw++] = dummy;
+	ib.ptr[ib.length_dw++] = 0x00000001;
+
+	ib.ptr[ib.length_dw++] = 0x00000008; /* len */
+	ib.ptr[ib.length_dw++] = 0x02000001; /* destroy cmd */
+
+	for (i = ib.length_dw; i < ib_size_dw; ++i)
+		ib.ptr[i] = 0x0;
+
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+	        DRM_ERROR("radeon: failed to schedule ib (%d).\n", r);
+	}
+
+	if (fence)
+		*fence = radeon_fence_ref(ib.fence);
+
+	radeon_ib_free(rdev, &ib);
+
+	return r;
+}
+
+/**
+ * radeon_vce_cs_reloc - command submission relocation
+ *
+ * @p: parser context
+ * @lo: address of lower dword
+ * @hi: address of higher dword
+ *
+ * Patch relocation inside command stream with real buffer address
+ */
+int radeon_vce_cs_reloc(struct radeon_cs_parser *p, int lo, int hi)
+{
+	struct radeon_cs_chunk *relocs_chunk;
+	uint64_t offset;
+	unsigned idx;
+
+	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
+	offset = radeon_get_ib_value(p, lo);
+	idx = radeon_get_ib_value(p, hi);
+
+	if (idx >= relocs_chunk->length_dw) {
+		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
+			  idx, relocs_chunk->length_dw);
+		return -EINVAL;
+	}
+
+	offset += p->relocs_ptr[(idx / 4)]->gpu_offset;
+
+        p->ib.ptr[lo] = offset & 0xFFFFFFFF;
+        p->ib.ptr[hi] = offset >> 32;
+
+	return 0;
+}
+
+/**
+ * radeon_vce_cs_parse - parse and validate the command stream
+ *
+ * @p: parser context
+ *
+ */
+int radeon_vce_cs_parse(struct radeon_cs_parser *p)
+{
+	uint32_t handle = 0;
+	bool destroy = false;
+	int i, r;
+
+	while (p->idx < p->chunks[p->chunk_ib_idx].length_dw) {
+		uint32_t len = radeon_get_ib_value(p, p->idx);
+		uint32_t cmd = radeon_get_ib_value(p, p->idx + 1);
+
+		if ((len < 8) || (len & 3)) {
+			DRM_ERROR("invalid VCE command length (%d)!\n", len);
+                	return -EINVAL;
+		}
+
+		switch (cmd) {
+		case 0x00000001: // session
+			handle = radeon_get_ib_value(p, p->idx + 2);
+			break;
+
+		case 0x00000002: // task info
+		case 0x01000001: // create
+		case 0x04000001: // config extension
+		case 0x04000002: // pic control
+		case 0x04000005: // rate control
+		case 0x04000007: // motion estimation
+		case 0x04000008: // rdo
+			break;
+
+		case 0x03000001: // encode
+			r = radeon_vce_cs_reloc(p, p->idx + 10, p->idx + 9);
+			if (r)
+				return r;
+
+			r = radeon_vce_cs_reloc(p, p->idx + 12, p->idx + 11);
+			if (r)
+				return r;
+			break;
+
+		case 0x02000001: // destroy
+			destroy = true;
+			break;
+
+		case 0x05000001: // context buffer
+		case 0x05000004: // video bitstream buffer
+		case 0x05000005: // feedback buffer
+			r = radeon_vce_cs_reloc(p, p->idx + 3, p->idx + 2);
+			if (r)
+				return r;
+			break;
+
+		default:
+			DRM_ERROR("invalid VCE command (0x%x)!\n", cmd);
+			return -EINVAL;
+		}
+
+		p->idx += len / 4;
+	}
+
+	if (destroy) {
+		/* IB contains a destroy msg, free the handle */
+		for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i)
+			atomic_cmpxchg(&p->rdev->vce.handles[i], handle, 0);
+
+		return 0;
+        }
+
+	/* create or encode, validate the handle */
+	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
+		if (atomic_read(&p->rdev->vce.handles[i]) == handle)
+			return 0;
+	}
+
+	/* handle not found try to alloc a new one */
+	for (i = 0; i < RADEON_MAX_VCE_HANDLES; ++i) {
+		if (!atomic_cmpxchg(&p->rdev->vce.handles[i], 0, handle)) {
+			p->rdev->vce.filp[i] = p->filp;
+			return 0;
+		}
+	}
+
+	DRM_ERROR("No more free VCE handles!\n");
+	return -EINVAL;
+}
+
+/**
+ * radeon_vce_semaphore_emit - emit a semaphore command
+ *
+ * @rdev: radeon_device pointer
+ * @ring: engine to use
+ * @semaphore: address of semaphore
+ * @emit_wait: true=emit wait, false=emit signal
+ *
+ */
+bool radeon_vce_semaphore_emit(struct radeon_device *rdev,
+			       struct radeon_ring *ring,
+			       struct radeon_semaphore *semaphore,
+			       bool emit_wait)
+{
+	uint64_t addr = semaphore->gpu_addr;
+
+	radeon_ring_write(ring, VCE_CMD_SEMAPHORE);
+	radeon_ring_write(ring, (addr >> 3) & 0x000FFFFF);
+	radeon_ring_write(ring, (addr >> 23) & 0x000FFFFF);
+	radeon_ring_write(ring, 0x01003000 | (emit_wait ? 1 : 0));
+	if (!emit_wait)
+		radeon_ring_write(ring, VCE_CMD_END);
+
+	return true;
+}
+
+/**
+ * radeon_vce_ib_execute - execute indirect buffer
+ *
+ * @rdev: radeon_device pointer
+ * @ib: the IB to execute
+ *
+ */
+void radeon_vce_ib_execute(struct radeon_device *rdev, struct radeon_ib *ib)
+{
+	struct radeon_ring *ring = &rdev->ring[ib->ring];
+	radeon_ring_write(ring, VCE_CMD_IB);
+	radeon_ring_write(ring, ib->gpu_addr);
+	radeon_ring_write(ring, upper_32_bits(ib->gpu_addr));
+	radeon_ring_write(ring, ib->length_dw);
+}
+
+/**
+ * radeon_vce_fence_emit - add a fence command to the ring
+ *
+ * @rdev: radeon_device pointer
+ * @fence: the fence
+ *
+ */
+void radeon_vce_fence_emit(struct radeon_device *rdev,
+			   struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	uint32_t addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	radeon_ring_write(ring, VCE_CMD_FENCE);
+	radeon_ring_write(ring, addr);
+	radeon_ring_write(ring, upper_32_bits(addr));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, VCE_CMD_TRAP);
+	radeon_ring_write(ring, VCE_CMD_END);
+}
+
+/**
+ * radeon_vce_ring_test - test if VCE ring is working
+ *
+ * @rdev: radeon_device pointer
+ * @ring: the engine to test on
+ *
+ */
+int radeon_vce_ring_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	uint32_t rptr = vce_v1_0_get_rptr(rdev, ring);
+	unsigned i;
+	int r;
+
+	r = radeon_ring_lock(rdev, ring, 16);
+	if (r) {
+		DRM_ERROR("radeon: vce failed to lock ring %d (%d).\n",
+			  ring->idx, r);
+		return r;
+	}
+	radeon_ring_write(ring, VCE_CMD_END);
+	radeon_ring_unlock_commit(rdev, ring);
+
+	for (i = 0; i < rdev->usec_timeout; i++) {
+	        if (vce_v1_0_get_rptr(rdev, ring) != rptr)
+	                break;
+	        DRM_UDELAY(1);
+	}
+
+	if (i < rdev->usec_timeout) {
+	        DRM_INFO("ring test on %d succeeded in %d usecs\n",
+	                 ring->idx, i);
+	} else {
+	        DRM_ERROR("radeon: ring %d test failed\n",
+	                  ring->idx);
+	        r = -ETIMEDOUT;
+	}
+
+	return r;
+}
+
+/**
+ * radeon_vce_ib_test - test if VCE IBs are working
+ *
+ * @rdev: radeon_device pointer
+ * @ring: the engine to test on
+ *
+ */
+int radeon_vce_ib_test(struct radeon_device *rdev, struct radeon_ring *ring)
+{
+	struct radeon_fence *fence = NULL;
+	int r;
+
+	r = radeon_vce_get_create_msg(rdev, ring->idx, 1, NULL);
+	if (r) {
+		DRM_ERROR("radeon: failed to get create msg (%d).\n", r);
+		goto error;
+	}
+
+	r = radeon_vce_get_destroy_msg(rdev, ring->idx, 1, &fence);
+	if (r) {
+		DRM_ERROR("radeon: failed to get destroy ib (%d).\n", r);
+		goto error;
+	}
+
+	r = radeon_fence_wait(fence, false);
+	if (r) {
+		DRM_ERROR("radeon: fence wait failed (%d).\n", r);
+	} else {
+	        DRM_INFO("ib test on ring %d succeeded\n", ring->idx);
+	}
+error:
+	radeon_fence_unref(&fence);
+	return r;
+}
--- a/drivers/gpu/drm/radeon/radeon_vm.c
+++ b/drivers/gpu/drm/radeon/radeon_vm.c
@ -0,0 +1,966 @@
+/*
+ * Copyright 2008 Advanced Micro Devices, Inc.
+ * Copyright 2008 Red Hat Inc.
+ * Copyright 2009 Jerome Glisse.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Dave Airlie
+ *          Alex Deucher
+ *          Jerome Glisse
+ */
+#include <drm/drmP.h>
+#include <drm/radeon_drm.h>
+#include "radeon.h"
+#include "radeon_trace.h"
+
+/*
+ * GPUVM
+ * GPUVM is similar to the legacy gart on older asics, however
+ * rather than there being a single global gart table
+ * for the entire GPU, there are multiple VM page tables active
+ * at any given time.  The VM page tables can contain a mix
+ * vram pages and system memory pages and system memory pages
+ * can be mapped as snooped (cached system pages) or unsnooped
+ * (uncached system pages).
+ * Each VM has an ID associated with it and there is a page table
+ * associated with each VMID.  When execting a command buffer,
+ * the kernel tells the the ring what VMID to use for that command
+ * buffer.  VMIDs are allocated dynamically as commands are submitted.
+ * The userspace drivers maintain their own address space and the kernel
+ * sets up their pages tables accordingly when they submit their
+ * command buffers and a VMID is assigned.
+ * Cayman/Trinity support up to 8 active VMs at any given time;
+ * SI supports 16.
+ */
+
+/**
+ * radeon_vm_num_pde - return the number of page directory entries
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Calculate the number of page directory entries (cayman+).
+ */
+static unsigned radeon_vm_num_pdes(struct radeon_device *rdev)
+{
+	return rdev->vm_manager.max_pfn >> RADEON_VM_BLOCK_SIZE;
+}
+
+/**
+ * radeon_vm_directory_size - returns the size of the page directory in bytes
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Calculate the size of the page directory in bytes (cayman+).
+ */
+static unsigned radeon_vm_directory_size(struct radeon_device *rdev)
+{
+	return RADEON_GPU_PAGE_ALIGN(radeon_vm_num_pdes(rdev) * 8);
+}
+
+/**
+ * radeon_vm_manager_init - init the vm manager
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Init the vm manager (cayman+).
+ * Returns 0 for success, error for failure.
+ */
+int radeon_vm_manager_init(struct radeon_device *rdev)
+{
+	int r;
+
+	if (!rdev->vm_manager.enabled) {
+		r = radeon_asic_vm_init(rdev);
+		if (r)
+			return r;
+
+		rdev->vm_manager.enabled = true;
+	}
+	return 0;
+}
+
+/**
+ * radeon_vm_manager_fini - tear down the vm manager
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Tear down the VM manager (cayman+).
+ */
+void radeon_vm_manager_fini(struct radeon_device *rdev)
+{
+	int i;
+
+	if (!rdev->vm_manager.enabled)
+		return;
+
+	for (i = 0; i < RADEON_NUM_VM; ++i)
+		radeon_fence_unref(&rdev->vm_manager.active[i]);
+	radeon_asic_vm_fini(rdev);
+	rdev->vm_manager.enabled = false;
+}
+
+/**
+ * radeon_vm_get_bos - add the vm BOs to a validation list
+ *
+ * @vm: vm providing the BOs
+ * @head: head of validation list
+ *
+ * Add the page directory to the list of BOs to
+ * validate for command submission (cayman+).
+ */
+struct radeon_cs_reloc *radeon_vm_get_bos(struct radeon_device *rdev,
+					  struct radeon_vm *vm,
+					  struct list_head *head)
+{
+	struct radeon_cs_reloc *list;
+	unsigned i, idx, size;
+
+	size = (radeon_vm_num_pdes(rdev) + 1) * sizeof(struct radeon_cs_reloc);
+	list = kmalloc(size, GFP_KERNEL);
+	if (!list)
+		return NULL;
+
+	/* add the vm page table to the list */
+	list[0].gobj = NULL;
+	list[0].robj = vm->page_directory;
+	list[0].domain = RADEON_GEM_DOMAIN_VRAM;
+	list[0].alt_domain = RADEON_GEM_DOMAIN_VRAM;
+	list[0].tv.bo = &vm->page_directory->tbo;
+	list[0].tiling_flags = 0;
+	list[0].handle = 0;
+	list_add(&list[0].tv.head, head);
+
+	for (i = 0, idx = 1; i <= vm->max_pde_used; i++) {
+		if (!vm->page_tables[i].bo)
+			continue;
+
+		list[idx].gobj = NULL;
+		list[idx].robj = vm->page_tables[i].bo;
+		list[idx].domain = RADEON_GEM_DOMAIN_VRAM;
+		list[idx].alt_domain = RADEON_GEM_DOMAIN_VRAM;
+		list[idx].tv.bo = &list[idx].robj->tbo;
+		list[idx].tiling_flags = 0;
+		list[idx].handle = 0;
+		list_add(&list[idx++].tv.head, head);
+	}
+
+	return list;
+}
+
+/**
+ * radeon_vm_grab_id - allocate the next free VMID
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm to allocate id for
+ * @ring: ring we want to submit job to
+ *
+ * Allocate an id for the vm (cayman+).
+ * Returns the fence we need to sync to (if any).
+ *
+ * Global and local mutex must be locked!
+ */
+struct radeon_fence *radeon_vm_grab_id(struct radeon_device *rdev,
+				       struct radeon_vm *vm, int ring)
+{
+	struct radeon_fence *best[RADEON_NUM_RINGS] = {};
+	unsigned choices[2] = {};
+	unsigned i;
+
+	/* check if the id is still valid */
+	if (vm->last_id_use && vm->last_id_use == rdev->vm_manager.active[vm->id])
+		return NULL;
+
+	/* we definately need to flush */
+	radeon_fence_unref(&vm->last_flush);
+
+	/* skip over VMID 0, since it is the system VM */
+	for (i = 1; i < rdev->vm_manager.nvm; ++i) {
+		struct radeon_fence *fence = rdev->vm_manager.active[i];
+
+		if (fence == NULL) {
+			/* found a free one */
+			vm->id = i;
+			trace_radeon_vm_grab_id(vm->id, ring);
+			return NULL;
+		}
+
+		if (radeon_fence_is_earlier(fence, best[fence->ring])) {
+			best[fence->ring] = fence;
+			choices[fence->ring == ring ? 0 : 1] = i;
+		}
+	}
+
+	for (i = 0; i < 2; ++i) {
+		if (choices[i]) {
+			vm->id = choices[i];
+			trace_radeon_vm_grab_id(vm->id, ring);
+			return rdev->vm_manager.active[choices[i]];
+		}
+	}
+
+	/* should never happen */
+	BUG();
+	return NULL;
+}
+
+/**
+ * radeon_vm_flush - hardware flush the vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm we want to flush
+ * @ring: ring to use for flush
+ *
+ * Flush the vm (cayman+).
+ *
+ * Global and local mutex must be locked!
+ */
+void radeon_vm_flush(struct radeon_device *rdev,
+		     struct radeon_vm *vm,
+		     int ring)
+{
+	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
+
+	/* if we can't remember our last VM flush then flush now! */
+	/* XXX figure out why we have to flush all the time */
+	if (!vm->last_flush || true || pd_addr != vm->pd_gpu_addr) {
+		vm->pd_gpu_addr = pd_addr;
+		radeon_ring_vm_flush(rdev, ring, vm);
+	}
+}
+
+/**
+ * radeon_vm_fence - remember fence for vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: vm we want to fence
+ * @fence: fence to remember
+ *
+ * Fence the vm (cayman+).
+ * Set the fence used to protect page table and id.
+ *
+ * Global and local mutex must be locked!
+ */
+void radeon_vm_fence(struct radeon_device *rdev,
+		     struct radeon_vm *vm,
+		     struct radeon_fence *fence)
+{
+	radeon_fence_unref(&vm->fence);
+	vm->fence = radeon_fence_ref(fence);
+
+	radeon_fence_unref(&rdev->vm_manager.active[vm->id]);
+	rdev->vm_manager.active[vm->id] = radeon_fence_ref(fence);
+
+	radeon_fence_unref(&vm->last_id_use);
+	vm->last_id_use = radeon_fence_ref(fence);
+
+        /* we just flushed the VM, remember that */
+        if (!vm->last_flush)
+                vm->last_flush = radeon_fence_ref(fence);
+}
+
+/**
+ * radeon_vm_bo_find - find the bo_va for a specific vm & bo
+ *
+ * @vm: requested vm
+ * @bo: requested buffer object
+ *
+ * Find @bo inside the requested vm (cayman+).
+ * Search inside the @bos vm list for the requested vm
+ * Returns the found bo_va or NULL if none is found
+ *
+ * Object has to be reserved!
+ */
+struct radeon_bo_va *radeon_vm_bo_find(struct radeon_vm *vm,
+				       struct radeon_bo *bo)
+{
+	struct radeon_bo_va *bo_va;
+
+	list_for_each_entry(bo_va, &bo->va, bo_list) {
+		if (bo_va->vm == vm) {
+			return bo_va;
+		}
+	}
+	return NULL;
+}
+
+/**
+ * radeon_vm_bo_add - add a bo to a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ *
+ * Add @bo into the requested vm (cayman+).
+ * Add @bo to the list of bos associated with the vm
+ * Returns newly added bo_va or NULL for failure
+ *
+ * Object has to be reserved!
+ */
+struct radeon_bo_va *radeon_vm_bo_add(struct radeon_device *rdev,
+				      struct radeon_vm *vm,
+				      struct radeon_bo *bo)
+{
+	struct radeon_bo_va *bo_va;
+
+	bo_va = kzalloc(sizeof(struct radeon_bo_va), GFP_KERNEL);
+	if (bo_va == NULL) {
+		return NULL;
+	}
+	bo_va->vm = vm;
+	bo_va->bo = bo;
+	bo_va->soffset = 0;
+	bo_va->eoffset = 0;
+	bo_va->flags = 0;
+	bo_va->valid = false;
+	bo_va->ref_count = 1;
+	INIT_LIST_HEAD(&bo_va->bo_list);
+	INIT_LIST_HEAD(&bo_va->vm_list);
+
+	mutex_lock(&vm->mutex);
+	list_add(&bo_va->vm_list, &vm->va);
+	list_add_tail(&bo_va->bo_list, &bo->va);
+	mutex_unlock(&vm->mutex);
+
+	return bo_va;
+}
+
+/**
+ * radeon_vm_clear_bo - initially clear the page dir/table
+ *
+ * @rdev: radeon_device pointer
+ * @bo: bo to clear
+ */
+static int radeon_vm_clear_bo(struct radeon_device *rdev,
+			      struct radeon_bo *bo)
+{
+        struct ttm_validate_buffer tv;
+        struct ww_acquire_ctx ticket;
+        struct list_head head;
+	struct radeon_ib ib;
+	unsigned entries;
+	uint64_t addr;
+	int r;
+
+        memset(&tv, 0, sizeof(tv));
+        tv.bo = &bo->tbo;
+
+        INIT_LIST_HEAD(&head);
+        list_add(&tv.head, &head);
+
+        r = ttm_eu_reserve_buffers(&ticket, &head);
+        if (r)
+		return r;
+
+        r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
+        if (r)
+                goto error;
+
+	addr = radeon_bo_gpu_offset(bo);
+	entries = radeon_bo_size(bo) / 8;
+
+	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib,
+			  NULL, entries * 2 + 64);
+	if (r)
+                goto error;
+
+	ib.length_dw = 0;
+
+	radeon_asic_vm_set_page(rdev, &ib, addr, 0, entries, 0, 0);
+
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r)
+                goto error;
+
+	ttm_eu_fence_buffer_objects(&ticket, &head, ib.fence);
+	radeon_ib_free(rdev, &ib);
+
+	return 0;
+
+error:
+	ttm_eu_backoff_reservation(&ticket, &head);
+	return r;
+}
+
+/**
+ * radeon_vm_bo_set_addr - set bos virtual address inside a vm
+ *
+ * @rdev: radeon_device pointer
+ * @bo_va: bo_va to store the address
+ * @soffset: requested offset of the buffer in the VM address space
+ * @flags: attributes of pages (read/write/valid/etc.)
+ *
+ * Set offset of @bo_va (cayman+).
+ * Validate and set the offset requested within the vm address space.
+ * Returns 0 for success, error for failure.
+ *
+ * Object has to be reserved!
+ */
+int radeon_vm_bo_set_addr(struct radeon_device *rdev,
+			  struct radeon_bo_va *bo_va,
+			  uint64_t soffset,
+			  uint32_t flags)
+{
+	uint64_t size = radeon_bo_size(bo_va->bo);
+	uint64_t eoffset, last_offset = 0;
+	struct radeon_vm *vm = bo_va->vm;
+	struct radeon_bo_va *tmp;
+	struct list_head *head;
+	unsigned last_pfn, pt_idx;
+	int r;
+
+	if (soffset) {
+		/* make sure object fit at this offset */
+		eoffset = soffset + size;
+		if (soffset >= eoffset) {
+			return -EINVAL;
+		}
+
+		last_pfn = eoffset / RADEON_GPU_PAGE_SIZE;
+		if (last_pfn > rdev->vm_manager.max_pfn) {
+			dev_err(rdev->dev, "va above limit (0x%08X > 0x%08X)\n",
+				last_pfn, rdev->vm_manager.max_pfn);
+			return -EINVAL;
+		}
+
+	} else {
+		eoffset = last_pfn = 0;
+	}
+
+	mutex_lock(&vm->mutex);
+	head = &vm->va;
+	last_offset = 0;
+	list_for_each_entry(tmp, &vm->va, vm_list) {
+		if (bo_va == tmp) {
+			/* skip over currently modified bo */
+			continue;
+		}
+
+		if (soffset >= last_offset && eoffset <= tmp->soffset) {
+			/* bo can be added before this one */
+			break;
+		}
+		if (eoffset > tmp->soffset && soffset < tmp->eoffset) {
+			/* bo and tmp overlap, invalid offset */
+			dev_err(rdev->dev, "bo %p va 0x%08X conflict with (bo %p 0x%08X 0x%08X)\n",
+				bo_va->bo, (unsigned)bo_va->soffset, tmp->bo,
+				(unsigned)tmp->soffset, (unsigned)tmp->eoffset);
+			mutex_unlock(&vm->mutex);
+			return -EINVAL;
+		}
+		last_offset = tmp->eoffset;
+		head = &tmp->vm_list;
+	}
+
+	bo_va->soffset = soffset;
+	bo_va->eoffset = eoffset;
+	bo_va->flags = flags;
+	bo_va->valid = false;
+	list_move(&bo_va->vm_list, head);
+
+	soffset = (soffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
+	eoffset = (eoffset / RADEON_GPU_PAGE_SIZE) >> RADEON_VM_BLOCK_SIZE;
+
+	if (eoffset > vm->max_pde_used)
+		vm->max_pde_used = eoffset;
+
+	radeon_bo_unreserve(bo_va->bo);
+
+	/* walk over the address space and allocate the page tables */
+	for (pt_idx = soffset; pt_idx <= eoffset; ++pt_idx) {
+		struct radeon_bo *pt;
+
+		if (vm->page_tables[pt_idx].bo)
+			continue;
+
+		/* drop mutex to allocate and clear page table */
+		mutex_unlock(&vm->mutex);
+
+		r = radeon_bo_create(rdev, RADEON_VM_PTE_COUNT * 8,
+				     RADEON_GPU_PAGE_SIZE, false, 
+				     RADEON_GEM_DOMAIN_VRAM, NULL, &pt);
+		if (r)
+			return r;
+
+		r = radeon_vm_clear_bo(rdev, pt);
+		if (r) {
+			radeon_bo_unref(&pt);
+			radeon_bo_reserve(bo_va->bo, false);
+			return r;
+		}
+
+		/* aquire mutex again */
+		mutex_lock(&vm->mutex);
+		if (vm->page_tables[pt_idx].bo) {
+			/* someone else allocated the pt in the meantime */
+			mutex_unlock(&vm->mutex);
+			radeon_bo_unref(&pt);
+			mutex_lock(&vm->mutex);
+			continue;
+		}
+
+		vm->page_tables[pt_idx].addr = 0;
+		vm->page_tables[pt_idx].bo = pt;
+	}
+
+	mutex_unlock(&vm->mutex);
+	return radeon_bo_reserve(bo_va->bo, false);
+}
+
+/**
+ * radeon_vm_map_gart - get the physical address of a gart page
+ *
+ * @rdev: radeon_device pointer
+ * @addr: the unmapped addr
+ *
+ * Look up the physical address of the page that the pte resolves
+ * to (cayman+).
+ * Returns the physical address of the page.
+ */
+uint64_t radeon_vm_map_gart(struct radeon_device *rdev, uint64_t addr)
+{
+	uint64_t result;
+
+	/* page table offset */
+	result = rdev->gart.pages_addr[addr >> PAGE_SHIFT];
+
+	/* in case cpu page size != gpu page size*/
+	result |= addr & (~PAGE_MASK);
+
+	return result;
+}
+
+/**
+ * radeon_vm_page_flags - translate page flags to what the hw uses
+ *
+ * @flags: flags comming from userspace
+ *
+ * Translate the flags the userspace ABI uses to hw flags.
+ */
+static uint32_t radeon_vm_page_flags(uint32_t flags)
+{
+        uint32_t hw_flags = 0;
+        hw_flags |= (flags & RADEON_VM_PAGE_VALID) ? R600_PTE_VALID : 0;
+        hw_flags |= (flags & RADEON_VM_PAGE_READABLE) ? R600_PTE_READABLE : 0;
+        hw_flags |= (flags & RADEON_VM_PAGE_WRITEABLE) ? R600_PTE_WRITEABLE : 0;
+        if (flags & RADEON_VM_PAGE_SYSTEM) {
+                hw_flags |= R600_PTE_SYSTEM;
+                hw_flags |= (flags & RADEON_VM_PAGE_SNOOPED) ? R600_PTE_SNOOPED : 0;
+        }
+        return hw_flags;
+}
+
+/**
+ * radeon_vm_update_pdes - make sure that page directory is valid
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ *
+ * Allocates new page tables if necessary
+ * and updates the page directory (cayman+).
+ * Returns 0 for success, error for failure.
+ *
+ * Global and local mutex must be locked!
+ */
+int radeon_vm_update_page_directory(struct radeon_device *rdev,
+				    struct radeon_vm *vm)
+{
+	static const uint32_t incr = RADEON_VM_PTE_COUNT * 8;
+
+	uint64_t pd_addr = radeon_bo_gpu_offset(vm->page_directory);
+	uint64_t last_pde = ~0, last_pt = ~0;
+	unsigned count = 0, pt_idx, ndw;
+	struct radeon_ib ib;
+	int r;
+
+	/* padding, etc. */
+	ndw = 64;
+
+	/* assume the worst case */
+	ndw += vm->max_pde_used * 12;
+
+	/* update too big for an IB */
+	if (ndw > 0xfffff)
+		return -ENOMEM;
+
+	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
+	if (r)
+		return r;
+	ib.length_dw = 0;
+
+	/* walk over the address space and update the page directory */
+	for (pt_idx = 0; pt_idx <= vm->max_pde_used; ++pt_idx) {
+		struct radeon_bo *bo = vm->page_tables[pt_idx].bo;
+		uint64_t pde, pt;
+
+		if (bo == NULL)
+			continue;
+
+		pt = radeon_bo_gpu_offset(bo);
+		if (vm->page_tables[pt_idx].addr == pt)
+			continue;
+		vm->page_tables[pt_idx].addr = pt;
+
+		pde = pd_addr + pt_idx * 8;
+		if (((last_pde + 8 * count) != pde) ||
+		    ((last_pt + incr * count) != pt)) {
+
+			if (count) {
+				radeon_asic_vm_set_page(rdev, &ib, last_pde,
+							last_pt, count, incr,
+							R600_PTE_VALID);
+			}
+
+			count = 1;
+			last_pde = pde;
+			last_pt = pt;
+		} else {
+			++count;
+		}
+	}
+
+	if (count)
+		radeon_asic_vm_set_page(rdev, &ib, last_pde, last_pt, count,
+					incr, R600_PTE_VALID);
+
+	if (ib.length_dw != 0) {
+		radeon_semaphore_sync_to(ib.semaphore, vm->last_id_use);
+		r = radeon_ib_schedule(rdev, &ib, NULL);
+		if (r) {
+			radeon_ib_free(rdev, &ib);
+			return r;
+		}
+		radeon_fence_unref(&vm->fence);
+		vm->fence = radeon_fence_ref(ib.fence);
+		radeon_fence_unref(&vm->last_flush);
+	}
+	radeon_ib_free(rdev, &ib);
+
+	return 0;
+}
+
+/**
+ * radeon_vm_update_ptes - make sure that page tables are valid
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @start: start of GPU address range
+ * @end: end of GPU address range
+ * @dst: destination address to map to
+ * @flags: mapping flags
+ *
+ * Update the page tables in the range @start - @end (cayman+).
+ *
+ * Global and local mutex must be locked!
+ */
+static void radeon_vm_update_ptes(struct radeon_device *rdev,
+				  struct radeon_vm *vm,
+				  struct radeon_ib *ib,
+				  uint64_t start, uint64_t end,
+				  uint64_t dst, uint32_t flags)
+{
+	static const uint64_t mask = RADEON_VM_PTE_COUNT - 1;
+
+	uint64_t last_pte = ~0, last_dst = ~0;
+	unsigned count = 0;
+	uint64_t addr;
+
+	start = start / RADEON_GPU_PAGE_SIZE;
+	end = end / RADEON_GPU_PAGE_SIZE;
+
+	/* walk over the address space and update the page tables */
+	for (addr = start; addr < end; ) {
+		uint64_t pt_idx = addr >> RADEON_VM_BLOCK_SIZE;
+		unsigned nptes;
+		uint64_t pte;
+
+		if ((addr & ~mask) == (end & ~mask))
+			nptes = end - addr;
+		else
+			nptes = RADEON_VM_PTE_COUNT - (addr & mask);
+
+		pte = radeon_bo_gpu_offset(vm->page_tables[pt_idx].bo);
+		pte += (addr & mask) * 8;
+
+		if ((last_pte + 8 * count) != pte) {
+
+			if (count) {
+				radeon_asic_vm_set_page(rdev, ib, last_pte,
+							last_dst, count,
+							RADEON_GPU_PAGE_SIZE,
+							flags);
+			}
+
+			count = nptes;
+			last_pte = pte;
+			last_dst = dst;
+		} else {
+			count += nptes;
+		}
+
+		addr += nptes;
+		dst += nptes * RADEON_GPU_PAGE_SIZE;
+	}
+
+	if (count) {
+		radeon_asic_vm_set_page(rdev, ib, last_pte,
+					last_dst, count,
+					RADEON_GPU_PAGE_SIZE, flags);
+	}
+}
+
+/**
+ * radeon_vm_bo_update - map a bo into the vm page table
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ * @mem: ttm mem
+ *
+ * Fill in the page table entries for @bo (cayman+).
+ * Returns 0 for success, -EINVAL for failure.
+ *
+ * Object have to be reserved and mutex must be locked!
+ */
+int radeon_vm_bo_update(struct radeon_device *rdev,
+			struct radeon_vm *vm,
+			struct radeon_bo *bo,
+			struct ttm_mem_reg *mem)
+{
+	struct radeon_ib ib;
+	struct radeon_bo_va *bo_va;
+	unsigned nptes, ndw;
+	uint64_t addr;
+	int r;
+
+	bo_va = radeon_vm_bo_find(vm, bo);
+	if (bo_va == NULL) {
+		dev_err(rdev->dev, "bo %p not in vm %p\n", bo, vm);
+		return -EINVAL;
+	}
+
+	if (!bo_va->soffset) {
+		dev_err(rdev->dev, "bo %p don't has a mapping in vm %p\n",
+			bo, vm);
+		return -EINVAL;
+	}
+
+	if ((bo_va->valid && mem) || (!bo_va->valid && mem == NULL))
+		return 0;
+
+	bo_va->flags &= ~RADEON_VM_PAGE_VALID;
+	bo_va->flags &= ~RADEON_VM_PAGE_SYSTEM;
+	if (mem) {
+		addr = mem->start << PAGE_SHIFT;
+		if (mem->mem_type != TTM_PL_SYSTEM) {
+			bo_va->flags |= RADEON_VM_PAGE_VALID;
+			bo_va->valid = true;
+		}
+		if (mem->mem_type == TTM_PL_TT) {
+			bo_va->flags |= RADEON_VM_PAGE_SYSTEM;
+		} else {
+			addr += rdev->vm_manager.vram_base_offset;
+		}
+	} else {
+		addr = 0;
+		bo_va->valid = false;
+	}
+
+	trace_radeon_vm_bo_update(bo_va);
+
+	nptes = radeon_bo_ngpu_pages(bo);
+
+	/* padding, etc. */
+	ndw = 64;
+
+	if (RADEON_VM_BLOCK_SIZE > 11)
+		/* reserve space for one header for every 2k dwords */
+		ndw += (nptes >> 11) * 4;
+	else
+		/* reserve space for one header for
+		    every (1 << BLOCK_SIZE) entries */
+		ndw += (nptes >> RADEON_VM_BLOCK_SIZE) * 4;
+
+	/* reserve space for pte addresses */
+	ndw += nptes * 2;
+
+	/* update too big for an IB */
+	if (ndw > 0xfffff)
+		return -ENOMEM;
+
+	r = radeon_ib_get(rdev, R600_RING_TYPE_DMA_INDEX, &ib, NULL, ndw * 4);
+	if (r)
+		return r;
+	ib.length_dw = 0;
+
+	radeon_vm_update_ptes(rdev, vm, &ib, bo_va->soffset, bo_va->eoffset,
+			      addr, radeon_vm_page_flags(bo_va->flags));
+
+	radeon_semaphore_sync_to(ib.semaphore, vm->fence);
+	r = radeon_ib_schedule(rdev, &ib, NULL);
+	if (r) {
+		radeon_ib_free(rdev, &ib);
+		return r;
+	}
+	radeon_fence_unref(&vm->fence);
+	vm->fence = radeon_fence_ref(ib.fence);
+	radeon_ib_free(rdev, &ib);
+	radeon_fence_unref(&vm->last_flush);
+
+	return 0;
+}
+
+/**
+ * radeon_vm_bo_rmv - remove a bo to a specific vm
+ *
+ * @rdev: radeon_device pointer
+ * @bo_va: requested bo_va
+ *
+ * Remove @bo_va->bo from the requested vm (cayman+).
+ * Remove @bo_va->bo from the list of bos associated with the bo_va->vm and
+ * remove the ptes for @bo_va in the page table.
+ * Returns 0 for success.
+ *
+ * Object have to be reserved!
+ */
+int radeon_vm_bo_rmv(struct radeon_device *rdev,
+		     struct radeon_bo_va *bo_va)
+{
+	int r = 0;
+
+	mutex_lock(&bo_va->vm->mutex);
+	if (bo_va->soffset)
+		r = radeon_vm_bo_update(rdev, bo_va->vm, bo_va->bo, NULL);
+
+	list_del(&bo_va->vm_list);
+	mutex_unlock(&bo_va->vm->mutex);
+	list_del(&bo_va->bo_list);
+
+	kfree(bo_va);
+	return r;
+}
+
+/**
+ * radeon_vm_bo_invalidate - mark the bo as invalid
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ * @bo: radeon buffer object
+ *
+ * Mark @bo as invalid (cayman+).
+ */
+void radeon_vm_bo_invalidate(struct radeon_device *rdev,
+			     struct radeon_bo *bo)
+{
+	struct radeon_bo_va *bo_va;
+
+	list_for_each_entry(bo_va, &bo->va, bo_list) {
+		bo_va->valid = false;
+	}
+}
+
+/**
+ * radeon_vm_init - initialize a vm instance
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ *
+ * Init @vm fields (cayman+).
+ */
+int radeon_vm_init(struct radeon_device *rdev, struct radeon_vm *vm)
+{
+	unsigned pd_size, pd_entries, pts_size;
+	int r;
+
+	vm->id = 0;
+	vm->fence = NULL;
+	vm->last_flush = NULL;
+	vm->last_id_use = NULL;
+	mutex_init(&vm->mutex);
+	INIT_LIST_HEAD(&vm->va);
+
+	pd_size = radeon_vm_directory_size(rdev);
+	pd_entries = radeon_vm_num_pdes(rdev);
+
+	/* allocate page table array */
+	pts_size = pd_entries * sizeof(struct radeon_vm_pt);
+	vm->page_tables = kzalloc(pts_size, GFP_KERNEL);
+	if (vm->page_tables == NULL) {
+		DRM_ERROR("Cannot allocate memory for page table array\n");
+		return -ENOMEM;
+	}
+
+	r = radeon_bo_create(rdev, pd_size, RADEON_VM_PTB_ALIGN_SIZE, false,
+			     RADEON_GEM_DOMAIN_VRAM, NULL,
+			     &vm->page_directory);
+	if (r)
+		return r;
+
+	r = radeon_vm_clear_bo(rdev, vm->page_directory);
+	if (r) {
+		radeon_bo_unref(&vm->page_directory);
+		vm->page_directory = NULL;
+		return r;
+	}
+
+	return 0;
+}
+
+/**
+ * radeon_vm_fini - tear down a vm instance
+ *
+ * @rdev: radeon_device pointer
+ * @vm: requested vm
+ *
+ * Tear down @vm (cayman+).
+ * Unbind the VM and remove all bos from the vm bo list
+ */
+void radeon_vm_fini(struct radeon_device *rdev, struct radeon_vm *vm)
+{
+	struct radeon_bo_va *bo_va, *tmp;
+	int i, r;
+
+	if (!list_empty(&vm->va)) {
+		dev_err(rdev->dev, "still active bo inside vm\n");
+	}
+	list_for_each_entry_safe(bo_va, tmp, &vm->va, vm_list) {
+		list_del_init(&bo_va->vm_list);
+		r = radeon_bo_reserve(bo_va->bo, false);
+		if (!r) {
+			list_del_init(&bo_va->bo_list);
+			radeon_bo_unreserve(bo_va->bo);
+			kfree(bo_va);
+		}
+	}
+
+
+	for (i = 0; i < radeon_vm_num_pdes(rdev); i++)
+		radeon_bo_unref(&vm->page_tables[i].bo);
+	kfree(vm->page_tables);
+
+	radeon_bo_unref(&vm->page_directory);
+
+	radeon_fence_unref(&vm->fence);
+	radeon_fence_unref(&vm->last_flush);
+	radeon_fence_unref(&vm->last_id_use);
+
+	mutex_destroy(&vm->mutex);
+}
--- a/drivers/gpu/drm/radeon/rs780_dpm.c
+++ b/drivers/gpu/drm/radeon/rs780_dpm.c
@ -807,9 +807,6 @@ static int rs780_parse_power_table(struct radeon_device *rdev)
 				  power_info->pplib.ucNumStates, GFP_KERNEL);
 	if (!rdev->pm.dpm.ps)
 		return -ENOMEM;
-	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
-	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
-	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);

 	for (i = 0; i < power_info->pplib.ucNumStates; i++) {
 		power_state = (union pplib_power_state *)
@ -859,6 +856,10 @@ int rs780_dpm_init(struct radeon_device *rdev)
 		return -ENOMEM;
 	rdev->pm.dpm.priv = pi;

+	ret = r600_get_platform_caps(rdev);
+	if (ret)
+		return ret;
+
 	ret = rs780_parse_power_table(rdev);
 	if (ret)
 		return ret;
--- a/drivers/gpu/drm/radeon/rv6xx_dpm.c
+++ b/drivers/gpu/drm/radeon/rv6xx_dpm.c
@ -1891,9 +1891,6 @@ static int rv6xx_parse_power_table(struct radeon_device *rdev)
 				  power_info->pplib.ucNumStates, GFP_KERNEL);
 	if (!rdev->pm.dpm.ps)
 		return -ENOMEM;
-	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
-	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
-	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);

 	for (i = 0; i < power_info->pplib.ucNumStates; i++) {
 		power_state = (union pplib_power_state *)
@ -1943,6 +1940,10 @@ int rv6xx_dpm_init(struct radeon_device *rdev)
 		return -ENOMEM;
 	rdev->pm.dpm.priv = pi;

+	ret = r600_get_platform_caps(rdev);
+	if (ret)
+		return ret;
+
 	ret = rv6xx_parse_power_table(rdev);
 	if (ret)
 		return ret;
--- a/drivers/gpu/drm/radeon/rv770_dpm.c
+++ b/drivers/gpu/drm/radeon/rv770_dpm.c
@ -2281,9 +2281,6 @@ int rv7xx_parse_power_table(struct radeon_device *rdev)
 				  power_info->pplib.ucNumStates, GFP_KERNEL);
 	if (!rdev->pm.dpm.ps)
 		return -ENOMEM;
-	rdev->pm.dpm.platform_caps = le32_to_cpu(power_info->pplib.ulPlatformCaps);
-	rdev->pm.dpm.backbias_response_time = le16_to_cpu(power_info->pplib.usBackbiasTime);
-	rdev->pm.dpm.voltage_response_time = le16_to_cpu(power_info->pplib.usVoltageTime);

 	for (i = 0; i < power_info->pplib.ucNumStates; i++) {
 		power_state = (union pplib_power_state *)
@ -2361,6 +2358,10 @@ int rv770_dpm_init(struct radeon_device *rdev)
 	pi->min_vddc_in_table = 0;
 	pi->max_vddc_in_table = 0;

+	ret = r600_get_platform_caps(rdev);
+	if (ret)
+		return ret;
+
 	ret = rv7xx_parse_power_table(rdev);
 	if (ret)
 		return ret;
--- a/drivers/gpu/drm/radeon/si.c
+++ b/drivers/gpu/drm/radeon/si.c
@ -3434,8 +3434,6 @@ static int si_cp_resume(struct radeon_device *rdev)

 	WREG32(CP_RB0_BASE, ring->gpu_addr >> 8);

-	ring->rptr = RREG32(CP_RB0_RPTR);
-
 	/* ring1  - compute only */
 	/* Set ring buffer size */
 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP1_INDEX];
@ -3460,8 +3458,6 @@ static int si_cp_resume(struct radeon_device *rdev)

 	WREG32(CP_RB1_BASE, ring->gpu_addr >> 8);

-	ring->rptr = RREG32(CP_RB1_RPTR);
-
 	/* ring2 - compute only */
 	/* Set ring buffer size */
 	ring = &rdev->ring[CAYMAN_RING_TYPE_CP2_INDEX];
@ -3486,8 +3482,6 @@ static int si_cp_resume(struct radeon_device *rdev)

 	WREG32(CP_RB2_BASE, ring->gpu_addr >> 8);

-	ring->rptr = RREG32(CP_RB2_RPTR);
-
 	/* start the rings */
 	si_cp_start(rdev);
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ready = true;
@ -3872,11 +3866,9 @@ bool si_gfx_is_lockup(struct radeon_device *rdev, struct radeon_ring *ring)
 	if (!(reset_mask & (RADEON_RESET_GFX |
 			    RADEON_RESET_COMPUTE |
 			    RADEON_RESET_CP))) {
-		radeon_ring_lockup_update(ring);
+		radeon_ring_lockup_update(rdev, ring);
 		return false;
 	}
-	/* force CP activities */
-	radeon_ring_force_activity(rdev, ring);
 	return radeon_ring_test_lockup(rdev, ring);
 }

--- a/Show more
+++ b/Show more