Merge branch 'drm-next-3.18' of git://people.freedesktop.org/~agd5f/linux into drm-next

More radeon changes for drm-next. Highlights: - UVD support for older asics - Reset rework in preparation for Maarten's fence patches I have a few more patches which depend on Christian's ttm changes, I'll send them out separately once you've merged the ttm changes. * 'drm-next-3.18' of git://people.freedesktop.org/~agd5f/linux: drm/radeon: drop doing resets in a work item drm/radeon: drop RADEON_FENCE_SIGNALED_SEQ v2 drm/radeon: add timeout argument to radeon_fence_wait_seq v2 drm/radeon: handle lockup in delayed work, v5 drm/radeon: take exclusive_lock in read mode during ring tests, v5 drm/radeon: force fence completion only on problematic rings (v2) drm/radeon: wake up all fences on manual reset drm/radeon: add UVD fw names for older asic drm/radeon: enable RB_ARB before resetting the VCPU drm/radeon: 760G/780V/880V don't have UVD drm/radeon: implement UVD hw workarounds for R6xx v3 drm/radeon: add UVD support for older asics v4 drm/radeon: add set_uvd_clocks callback for r6xx v4 drm/radeon: properly init UVD MC bits on R600 drm/radeon: force UVD buffers into VRAM on RS[78]80 v2 drm/radeon: move the IB test after the AGP fallback
2014-08-28 11:39:11 +10:00 · 2014-08-28 11:39:11 +10:00 · 6adae108b2
parent fb1aacae40 3c0363891c
commit 6adae108b2
15 changed files with 535 additions and 188 deletions
--- a/drivers/gpu/drm/radeon/cik.c
+++ b/drivers/gpu/drm/radeon/cik.c
@ -8246,8 +8246,10 @@ restart_ih:
 	}
 	if (queue_hotplug)
 		schedule_work(&rdev->hotplug_work);
-	if (queue_reset)
-		schedule_work(&rdev->reset_work);
+	if (queue_reset) {
+		rdev->needs_reset = true;
+		wake_up_all(&rdev->fence_queue);
+	}
 	if (queue_thermal)
 		schedule_work(&rdev->pm.dpm.thermal.work);
 	rdev->ih.rptr = rptr;
--- a/drivers/gpu/drm/radeon/r600.c
+++ b/drivers/gpu/drm/radeon/r600.c
@ -122,6 +122,94 @@ u32 r600_get_xclk(struct radeon_device *rdev)

 int r600_set_uvd_clocks(struct radeon_device *rdev, u32 vclk, u32 dclk)
 {
+	unsigned fb_div = 0, ref_div, vclk_div = 0, dclk_div = 0;
+	int r;
+
+	/* bypass vclk and dclk with bclk */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 VCLK_SRC_SEL(1) | DCLK_SRC_SEL(1),
+		 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	/* assert BYPASS_EN, deassert UPLL_RESET, UPLL_SLEEP and UPLL_CTLREQ */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_BYPASS_EN_MASK, ~(
+		 UPLL_RESET_MASK | UPLL_SLEEP_MASK | UPLL_CTLREQ_MASK));
+
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(GFX_MACRO_BYPASS_CNTL, UPLL_BYPASS_CNTL,
+			 ~UPLL_BYPASS_CNTL);
+
+	if (!vclk || !dclk) {
+		/* keep the Bypass mode, put PLL to sleep */
+		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_SLEEP_MASK, ~UPLL_SLEEP_MASK);
+		return 0;
+	}
+
+	if (rdev->clock.spll.reference_freq == 10000)
+		ref_div = 34;
+	else
+		ref_div = 4;
+
+	r = radeon_uvd_calc_upll_dividers(rdev, vclk, dclk, 50000, 160000,
+					  ref_div + 1, 0xFFF, 2, 30, ~0,
+					  &fb_div, &vclk_div, &dclk_div);
+	if (r)
+		return r;
+
+	if (rdev->family >= CHIP_RV670 && rdev->family < CHIP_RS780)
+		fb_div >>= 1;
+	else
+		fb_div |= 1;
+
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
+        if (r)
+                return r;
+
+	/* assert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_RESET_MASK, ~UPLL_RESET_MASK);
+
+	/* For RS780 we have to choose ref clk */
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(CG_UPLL_FUNC_CNTL, UPLL_REFCLK_SRC_SEL_MASK,
+			 ~UPLL_REFCLK_SRC_SEL_MASK);
+
+	/* set the required fb, ref and post divder values */
+	WREG32_P(CG_UPLL_FUNC_CNTL,
+		 UPLL_FB_DIV(fb_div) |
+		 UPLL_REF_DIV(ref_div),
+		 ~(UPLL_FB_DIV_MASK | UPLL_REF_DIV_MASK));
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 UPLL_SW_HILEN(vclk_div >> 1) |
+		 UPLL_SW_LOLEN((vclk_div >> 1) + (vclk_div & 1)) |
+		 UPLL_SW_HILEN2(dclk_div >> 1) |
+		 UPLL_SW_LOLEN2((dclk_div >> 1) + (dclk_div & 1)) |
+		 UPLL_DIVEN_MASK | UPLL_DIVEN2_MASK,
+		 ~UPLL_SW_MASK);
+
+	/* give the PLL some time to settle */
+	mdelay(15);
+
+	/* deassert PLL_RESET */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_RESET_MASK);
+
+	mdelay(15);
+
+	/* deassert BYPASS EN */
+	WREG32_P(CG_UPLL_FUNC_CNTL, 0, ~UPLL_BYPASS_EN_MASK);
+
+	if (rdev->family >= CHIP_RS780)
+		WREG32_P(GFX_MACRO_BYPASS_CNTL, 0, ~UPLL_BYPASS_CNTL);
+
+	r = radeon_uvd_send_upll_ctlreq(rdev, CG_UPLL_FUNC_CNTL);
+	if (r)
+		return r;
+
+	/* switch VCLK and DCLK selection */
+	WREG32_P(CG_UPLL_FUNC_CNTL_2,
+		 VCLK_SRC_SEL(2) | DCLK_SRC_SEL(2),
+		 ~(VCLK_SRC_SEL_MASK | DCLK_SRC_SEL_MASK));
+
+	mdelay(100);
+
 	return 0;
 }

@ -992,6 +1080,8 @@ static int r600_pcie_gart_enable(struct radeon_device *rdev)
 	WREG32(MC_VM_L1_TLB_MCB_WR_GFX_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_PDMA_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_WR_PDMA_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
 	WREG32(MC_VM_L1_TLB_MCB_WR_SEM_CNTL, tmp | ENABLE_SEMAPHORE_MODE);
 	WREG32(VM_CONTEXT0_PAGE_TABLE_START_ADDR, rdev->mc.gtt_start >> 12);
@ -1042,6 +1132,8 @@ static void r600_pcie_gart_disable(struct radeon_device *rdev)
 	WREG32(MC_VM_L1_TLB_MCB_WR_SYS_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_RD_HDP_CNTL, tmp);
 	WREG32(MC_VM_L1_TLB_MCB_WR_HDP_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_RD_UVD_CNTL, tmp);
+	WREG32(MC_VM_L1_TLB_MCB_WR_UVD_CNTL, tmp);
 	radeon_gart_table_vram_unpin(rdev);
 }

@ -2917,6 +3009,18 @@ static int r600_startup(struct radeon_device *rdev)
 		return r;
 	}

+	if (rdev->has_uvd) {
+		r = uvd_v1_0_resume(rdev);
+		if (!r) {
+			r = radeon_fence_driver_start_ring(rdev, R600_RING_TYPE_UVD_INDEX);
+			if (r) {
+				dev_err(rdev->dev, "failed initializing UVD fences (%d).\n", r);
+			}
+		}
+		if (r)
+			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_size = 0;
+	}
+
 	/* Enable IRQ */
 	if (!rdev->irq.installed) {
 		r = radeon_irq_kms_init(rdev);
@ -2945,6 +3049,18 @@ static int r600_startup(struct radeon_device *rdev)
 	if (r)
 		return r;

+	if (rdev->has_uvd) {
+		ring = &rdev->ring[R600_RING_TYPE_UVD_INDEX];
+		if (ring->ring_size) {
+			r = radeon_ring_init(rdev, ring, ring->ring_size, 0,
+					     RADEON_CP_PACKET2);
+			if (!r)
+				r = uvd_v1_0_init(rdev);
+			if (r)
+				DRM_ERROR("radeon: failed initializing UVD (%d).\n", r);
+		}
+	}
+
 	r = radeon_ib_pool_init(rdev);
 	if (r) {
 		dev_err(rdev->dev, "IB initialization failed (%d).\n", r);
@ -3004,6 +3120,10 @@ int r600_suspend(struct radeon_device *rdev)
 	radeon_pm_suspend(rdev);
 	r600_audio_fini(rdev);
 	r600_cp_stop(rdev);
+	if (rdev->has_uvd) {
+		uvd_v1_0_fini(rdev);
+		radeon_uvd_suspend(rdev);
+	}
 	r600_irq_suspend(rdev);
 	radeon_wb_disable(rdev);
 	r600_pcie_gart_disable(rdev);
@ -3083,6 +3203,14 @@ int r600_init(struct radeon_device *rdev)
 	rdev->ring[RADEON_RING_TYPE_GFX_INDEX].ring_obj = NULL;
 	r600_ring_init(rdev, &rdev->ring[RADEON_RING_TYPE_GFX_INDEX], 1024 * 1024);

+	if (rdev->has_uvd) {
+		r = radeon_uvd_init(rdev);
+		if (!r) {
+			rdev->ring[R600_RING_TYPE_UVD_INDEX].ring_obj = NULL;
+			r600_ring_init(rdev, &rdev->ring[R600_RING_TYPE_UVD_INDEX], 4096);
+		}
+	}
+
 	rdev->ih.ring_obj = NULL;
 	r600_ih_ring_init(rdev, 64 * 1024);

@ -3112,6 +3240,10 @@ void r600_fini(struct radeon_device *rdev)
 	r600_audio_fini(rdev);
 	r600_cp_fini(rdev);
 	r600_irq_fini(rdev);
+	if (rdev->has_uvd) {
+		uvd_v1_0_fini(rdev);
+		radeon_uvd_fini(rdev);
+	}
 	radeon_wb_fini(rdev);
 	radeon_ib_pool_fini(rdev);
 	radeon_irq_kms_fini(rdev);
--- a/drivers/gpu/drm/radeon/r600d.h
+++ b/drivers/gpu/drm/radeon/r600d.h
@ -330,11 +330,12 @@
 #define	HDP_TILING_CONFIG				0x2F3C
 #define HDP_DEBUG1                                      0x2F34

+#define MC_CONFIG					0x2000
 #define MC_VM_AGP_TOP					0x2184
 #define MC_VM_AGP_BOT					0x2188
 #define	MC_VM_AGP_BASE					0x218C
 #define MC_VM_FB_LOCATION				0x2180
-#define MC_VM_L1_TLB_MCD_RD_A_CNTL			0x219C
+#define MC_VM_L1_TLB_MCB_RD_UVD_CNTL			0x2124
 #define 	ENABLE_L1_TLB					(1 << 0)
 #define		ENABLE_L1_FRAGMENT_PROCESSING			(1 << 1)
 #define		ENABLE_L1_STRICT_ORDERING			(1 << 2)
@ -354,12 +355,14 @@
 #define		EFFECTIVE_L1_QUEUE_SIZE(x)			(((x) & 7) << 15)
 #define		EFFECTIVE_L1_QUEUE_SIZE_MASK			0x00038000
 #define		EFFECTIVE_L1_QUEUE_SIZE_SHIFT			15
+#define MC_VM_L1_TLB_MCD_RD_A_CNTL			0x219C
 #define MC_VM_L1_TLB_MCD_RD_B_CNTL			0x21A0
 #define MC_VM_L1_TLB_MCB_RD_GFX_CNTL			0x21FC
 #define MC_VM_L1_TLB_MCB_RD_HDP_CNTL			0x2204
 #define MC_VM_L1_TLB_MCB_RD_PDMA_CNTL			0x2208
 #define MC_VM_L1_TLB_MCB_RD_SEM_CNTL			0x220C
 #define	MC_VM_L1_TLB_MCB_RD_SYS_CNTL			0x2200
+#define MC_VM_L1_TLB_MCB_WR_UVD_CNTL			0x212c
 #define MC_VM_L1_TLB_MCD_WR_A_CNTL			0x21A4
 #define MC_VM_L1_TLB_MCD_WR_B_CNTL			0x21A8
 #define MC_VM_L1_TLB_MCB_WR_GFX_CNTL			0x2210
@ -373,6 +376,8 @@
 #define MC_VM_SYSTEM_APERTURE_HIGH_ADDR			0x2194
 #define MC_VM_SYSTEM_APERTURE_DEFAULT_ADDR		0x2198

+#define RS_DQ_RD_RET_CONF				0x2348
+
 #define	PA_CL_ENHANCE					0x8A14
 #define		CLIP_VTX_REORDER_ENA				(1 << 0)
 #define		NUM_CLIP_SEQ(x)					((x) << 1)
@ -1483,6 +1488,7 @@
 #define UVD_CGC_GATE					0xf4a8
 #define UVD_LMI_CTRL2					0xf4f4
 #define UVD_MASTINT_EN					0xf500
+#define UVD_FW_START					0xf51C
 #define UVD_LMI_ADDR_EXT				0xf594
 #define UVD_LMI_CTRL					0xf598
 #define UVD_LMI_SWAP_CNTL				0xf5b4
@ -1495,6 +1501,13 @@
 #define UVD_MPC_SET_MUX					0xf5f4
 #define UVD_MPC_SET_ALU					0xf5f8

+#define UVD_VCPU_CACHE_OFFSET0				0xf608
+#define UVD_VCPU_CACHE_SIZE0				0xf60c
+#define UVD_VCPU_CACHE_OFFSET1				0xf610
+#define UVD_VCPU_CACHE_SIZE1				0xf614
+#define UVD_VCPU_CACHE_OFFSET2				0xf618
+#define UVD_VCPU_CACHE_SIZE2				0xf61c
+
 #define UVD_VCPU_CNTL					0xf660
 #define UVD_SOFT_RESET					0xf680
 #define		RBC_SOFT_RESET					(1<<0)
@ -1524,9 +1537,35 @@

 #define UVD_CONTEXT_ID					0xf6f4

+/* rs780 only */
+#define	GFX_MACRO_BYPASS_CNTL				0x30c0
+#define		SPLL_BYPASS_CNTL			(1 << 0)
+#define		UPLL_BYPASS_CNTL			(1 << 1)
+
+#define CG_UPLL_FUNC_CNTL				0x7e0
+#	define UPLL_RESET_MASK				0x00000001
+#	define UPLL_SLEEP_MASK				0x00000002
+#	define UPLL_BYPASS_EN_MASK			0x00000004
 #	define UPLL_CTLREQ_MASK				0x00000008
+#	define UPLL_FB_DIV(x)				((x) << 4)
+#	define UPLL_FB_DIV_MASK				0x0000FFF0
+#	define UPLL_REF_DIV(x)				((x) << 16)
+#	define UPLL_REF_DIV_MASK			0x003F0000
+#	define UPLL_REFCLK_SRC_SEL_MASK			0x20000000
 #	define UPLL_CTLACK_MASK				0x40000000
 #	define UPLL_CTLACK2_MASK			0x80000000
+#define CG_UPLL_FUNC_CNTL_2				0x7e4
+#	define UPLL_SW_HILEN(x)				((x) << 0)
+#	define UPLL_SW_LOLEN(x)				((x) << 4)
+#	define UPLL_SW_HILEN2(x)			((x) << 8)
+#	define UPLL_SW_LOLEN2(x)			((x) << 12)
+#	define UPLL_DIVEN_MASK				0x00010000
+#	define UPLL_DIVEN2_MASK				0x00020000
+#	define UPLL_SW_MASK				0x0003FFFF
+#	define VCLK_SRC_SEL(x)				((x) << 20)
+#	define VCLK_SRC_SEL_MASK			0x01F00000
+#	define DCLK_SRC_SEL(x)				((x) << 25)
+#	define DCLK_SRC_SEL_MASK			0x3E000000

 /*
 * PM4
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@ -120,9 +120,6 @@ extern int radeon_bapm;
 #define RADEONFB_CONN_LIMIT			4
 #define RADEON_BIOS_NUM_SCRATCH			8

-/* fence seq are set to this number when signaled */
-#define RADEON_FENCE_SIGNALED_SEQ		0LL
-
 /* internal ring indices */
 /* r1xx+ has gfx CP ring */
 #define RADEON_RING_TYPE_GFX_INDEX		0
@ -350,6 +347,7 @@ extern void evergreen_tiling_fields(unsigned tiling_flags, unsigned *bankw,
 * Fences.
 */
 struct radeon_fence_driver {
+	struct radeon_device		*rdev;
 	uint32_t			scratch_reg;
 	uint64_t			gpu_addr;
 	volatile uint32_t		*cpu_addr;
@ -357,6 +355,7 @@ struct radeon_fence_driver {
 	uint64_t			sync_seq[RADEON_NUM_RINGS];
 	atomic64_t			last_seq;
 	bool				initialized;
+	struct delayed_work		lockup_work;
 };

 struct radeon_fence {
@ -371,7 +370,7 @@ struct radeon_fence {
 int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring);
 int radeon_fence_driver_init(struct radeon_device *rdev);
 void radeon_fence_driver_fini(struct radeon_device *rdev);
-void radeon_fence_driver_force_completion(struct radeon_device *rdev);
+void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring);
 int radeon_fence_emit(struct radeon_device *rdev, struct radeon_fence **fence, int ring);
 void radeon_fence_process(struct radeon_device *rdev, int ring);
 bool radeon_fence_signaled(struct radeon_fence *fence);
@ -2326,7 +2325,7 @@ struct radeon_device {
 	bool				need_dma32;
 	bool				accel_working;
 	bool				fastfb_working; /* IGP feature*/
-	bool				needs_reset;
+	bool				needs_reset, in_reset;
 	struct radeon_surface_reg surface_regs[RADEON_GEM_MAX_SURFACES];
 	const struct firmware *me_fw;	/* all family ME firmware */
 	const struct firmware *pfp_fw;	/* r6/700 PFP firmware */
@ -2347,7 +2346,6 @@ struct radeon_device {
 	struct radeon_mec mec;
 	struct work_struct hotplug_work;
 	struct work_struct audio_work;
-	struct work_struct reset_work;
 	int num_crtc; /* number of crtcs */
 	struct mutex dc_hw_i2c_mutex; /* display controller hw i2c mutex */
 	bool has_uvd;
--- a/drivers/gpu/drm/radeon/radeon_asic.c
+++ b/drivers/gpu/drm/radeon/radeon_asic.c
@ -965,6 +965,19 @@ static struct radeon_asic r600_asic = {
 	},
 };

+static struct radeon_asic_ring rv6xx_uvd_ring = {
+	.ib_execute = &uvd_v1_0_ib_execute,
+	.emit_fence = &uvd_v1_0_fence_emit,
+	.emit_semaphore = &uvd_v1_0_semaphore_emit,
+	.cs_parse = &radeon_uvd_cs_parse,
+	.ring_test = &uvd_v1_0_ring_test,
+	.ib_test = &uvd_v1_0_ib_test,
+	.is_lockup = &radeon_ring_test_lockup,
+	.get_rptr = &uvd_v1_0_get_rptr,
+	.get_wptr = &uvd_v1_0_get_wptr,
+	.set_wptr = &uvd_v1_0_set_wptr,
+};
+
 static struct radeon_asic rv6xx_asic = {
 	.init = &r600_init,
 	.fini = &r600_fini,
@ -984,6 +997,7 @@ static struct radeon_asic rv6xx_asic = {
 	.ring = {
 		[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
 		[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring,
 	},
 	.irq = {
 		.set = &r600_irq_set,
@ -1074,6 +1088,7 @@ static struct radeon_asic rs780_asic = {
 	.ring = {
 		[RADEON_RING_TYPE_GFX_INDEX] = &r600_gfx_ring,
 		[R600_RING_TYPE_DMA_INDEX] = &r600_dma_ring,
+		[R600_RING_TYPE_UVD_INDEX] = &rv6xx_uvd_ring,
 	},
 	.irq = {
 		.set = &r600_irq_set,
@ -2298,7 +2313,15 @@ int radeon_asic_init(struct radeon_device *rdev)
 	case CHIP_RS780:
 	case CHIP_RS880:
 		rdev->asic = &rs780_asic;
-		rdev->has_uvd = true;
+		/* 760G/780V/880V don't have UVD */
+		if ((rdev->pdev->device == 0x9616)||
+		    (rdev->pdev->device == 0x9611)||
+		    (rdev->pdev->device == 0x9613)||
+		    (rdev->pdev->device == 0x9711)||
+		    (rdev->pdev->device == 0x9713))
+			rdev->has_uvd = false;
+		else
+			rdev->has_uvd = true;
 		break;
 	case CHIP_RV770:
 	case CHIP_RV730:
--- a/drivers/gpu/drm/radeon/radeon_asic.h
+++ b/drivers/gpu/drm/radeon/radeon_asic.h
@ -883,6 +883,7 @@ uint32_t uvd_v1_0_get_wptr(struct radeon_device *rdev,
                           struct radeon_ring *ring);
 void uvd_v1_0_set_wptr(struct radeon_device *rdev,
                       struct radeon_ring *ring);
+int uvd_v1_0_resume(struct radeon_device *rdev);

 int uvd_v1_0_init(struct radeon_device *rdev);
 void uvd_v1_0_fini(struct radeon_device *rdev);
@ -890,6 +891,8 @@ int uvd_v1_0_start(struct radeon_device *rdev);
 void uvd_v1_0_stop(struct radeon_device *rdev);

 int uvd_v1_0_ring_test(struct radeon_device *rdev, struct radeon_ring *ring);
+void uvd_v1_0_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence);
 int uvd_v1_0_ib_test(struct radeon_device *rdev, struct radeon_ring *ring);
 bool uvd_v1_0_semaphore_emit(struct radeon_device *rdev,
 			     struct radeon_ring *ring,
--- a/drivers/gpu/drm/radeon/radeon_cs.c
+++ b/drivers/gpu/drm/radeon/radeon_cs.c
@ -137,10 +137,13 @@ static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
 			   + !!r->write_domain;

 		/* the first reloc of an UVD job is the msg and that must be in
-		   VRAM, also but everything into VRAM on AGP cards to avoid
-		   image corruptions */
+		   VRAM, also but everything into VRAM on AGP cards and older
+		   IGP chips to avoid image corruptions */
 		if (p->ring == R600_RING_TYPE_UVD_INDEX &&
-		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev))) {
+		    (i == 0 || drm_pci_device_is_agp(p->rdev->ddev) ||
+		     p->rdev->family == CHIP_RS780 ||
+		     p->rdev->family == CHIP_RS880)) {
+
 			/* TODO: is this still needed for NI+ ? */
 			p->relocs[i].prefered_domains =
 				RADEON_GEM_DOMAIN_VRAM;
@ -650,6 +653,13 @@ int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
 		up_read(&rdev->exclusive_lock);
 		return -EBUSY;
 	}
+	if (rdev->in_reset) {
+		up_read(&rdev->exclusive_lock);
+		r = radeon_gpu_reset(rdev);
+		if (!r)
+			r = -EAGAIN;
+		return r;
+	}
 	/* initialize parser */
 	memset(&parser, 0, sizeof(struct radeon_cs_parser));
 	parser.filp = filp;
--- a/drivers/gpu/drm/radeon/radeon_device.c
+++ b/drivers/gpu/drm/radeon/radeon_device.c
@ -1397,10 +1397,6 @@ int radeon_device_init(struct radeon_device *rdev,
 	if (r)
 		return r;

-	r = radeon_ib_ring_tests(rdev);
-	if (r)
-		DRM_ERROR("ib ring test failed (%d).\n", r);
-
 	r = radeon_gem_debugfs_init(rdev);
 	if (r) {
 		DRM_ERROR("registering gem debugfs failed (%d).\n", r);
@ -1418,6 +1414,10 @@ int radeon_device_init(struct radeon_device *rdev,
 			return r;
 	}

+	r = radeon_ib_ring_tests(rdev);
+	if (r)
+		DRM_ERROR("ib ring test failed (%d).\n", r);
+
 	if ((radeon_testing & 1)) {
 		if (rdev->accel_working)
 			radeon_test_moves(rdev);
@ -1488,7 +1488,6 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
 	struct drm_crtc *crtc;
 	struct drm_connector *connector;
 	int i, r;
-	bool force_completion = false;

 	if (dev == NULL || dev->dev_private == NULL) {
 		return -ENODEV;
@ -1532,12 +1531,9 @@ int radeon_suspend_kms(struct drm_device *dev, bool suspend, bool fbcon)
 		r = radeon_fence_wait_empty(rdev, i);
 		if (r) {
 			/* delay GPU reset to resume */
-			force_completion = true;
+			radeon_fence_driver_force_completion(rdev, i);
 		}
 	}
-	if (force_completion) {
-		radeon_fence_driver_force_completion(rdev);
-	}

 	radeon_save_bios_scratch_regs(rdev);

@ -1677,8 +1673,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 		return 0;
 	}

-	rdev->needs_reset = false;
-
 	radeon_save_bios_scratch_regs(rdev);
 	/* block TTM */
 	resched = ttm_bo_lock_delayed_workqueue(&rdev->mman.bdev);
@ -1695,7 +1689,6 @@ int radeon_gpu_reset(struct radeon_device *rdev)
 		}
 	}

-retry:
 	r = radeon_asic_reset(rdev);
 	if (!r) {
 		dev_info(rdev->dev, "GPU reset succeeded, trying to resume\n");
@ -1704,26 +1697,12 @@ retry:

 	radeon_restore_bios_scratch_regs(rdev);

-	if (!r) {
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!r && ring_data[i]) {
 			radeon_ring_restore(rdev, &rdev->ring[i],
 					    ring_sizes[i], ring_data[i]);
-			ring_sizes[i] = 0;
-			ring_data[i] = NULL;
-		}
-
-		r = radeon_ib_ring_tests(rdev);
-		if (r) {
-			dev_err(rdev->dev, "ib ring test failed (%d).\n", r);
-			if (saved) {
-				saved = false;
-				radeon_suspend(rdev);
-				goto retry;
-			}
-		}
-	} else {
-		radeon_fence_driver_force_completion(rdev);
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		} else {
+			radeon_fence_driver_force_completion(rdev, i);
 			kfree(ring_data[i]);
 		}
 	}
@ -1755,19 +1734,32 @@ retry:
 	/* reset hpd state */
 	radeon_hpd_init(rdev);

+	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
+
+	rdev->in_reset = true;
+	rdev->needs_reset = false;
+
+	downgrade_write(&rdev->exclusive_lock);
+
 	drm_helper_resume_force_mode(rdev->ddev);

 	/* set the power state here in case we are a PX system or headless */
 	if ((rdev->pm.pm_method == PM_METHOD_DPM) && rdev->pm.dpm_enabled)
 		radeon_pm_compute_clocks(rdev);

-	ttm_bo_unlock_delayed_workqueue(&rdev->mman.bdev, resched);
-	if (r) {
+	if (!r) {
+		r = radeon_ib_ring_tests(rdev);
+		if (r && saved)
+			r = -EAGAIN;
+	} else {
 		/* bad news, how to tell it to userspace ? */
 		dev_info(rdev->dev, "GPU reset failed\n");
 	}

-	up_write(&rdev->exclusive_lock);
+	rdev->needs_reset = r == -EAGAIN;
+	rdev->in_reset = false;
+
+	up_read(&rdev->exclusive_lock);
 	return r;
 }

--- a/drivers/gpu/drm/radeon/radeon_display.c
+++ b/drivers/gpu/drm/radeon/radeon_display.c
@ -405,7 +405,9 @@ static void radeon_flip_work_func(struct work_struct *__work)
 		r = radeon_fence_wait(work->fence, false);
 		if (r == -EDEADLK) {
 			up_read(&rdev->exclusive_lock);
-			r = radeon_gpu_reset(rdev);
+			do {
+				r = radeon_gpu_reset(rdev);
+			} while (r == -EAGAIN);
 			down_read(&rdev->exclusive_lock);
 		}
 		if (r)
--- a/drivers/gpu/drm/radeon/radeon_fence.c
+++ b/drivers/gpu/drm/radeon/radeon_fence.c
@ -97,6 +97,25 @@ static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
 	return seq;
 }

+/**
+ * radeon_fence_schedule_check - schedule lockup check
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index we should work with
+ *
+ * Queues a delayed work item to check for lockups.
+ */
+static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
+{
+	/*
+	 * Do not reset the timer here with mod_delayed_work,
+	 * this can livelock in an interaction with TTM delayed destroy.
+	 */
+	queue_delayed_work(system_power_efficient_wq,
+			   &rdev->fence_drv[ring].lockup_work,
+			   RADEON_FENCE_JIFFIES_TIMEOUT);
+}
+
 /**
 * radeon_fence_emit - emit a fence on the requested ring
 *
@ -122,19 +141,21 @@ int radeon_fence_emit(struct radeon_device *rdev,
 	(*fence)->ring = ring;
 	radeon_fence_ring_emit(rdev, ring, *fence);
 	trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
+	radeon_fence_schedule_check(rdev, ring);
 	return 0;
 }

 /**
- * radeon_fence_process - process a fence
+ * radeon_fence_activity - check for fence activity
 *
 * @rdev: radeon_device pointer
 * @ring: ring index the fence is associated with
 *
- * Checks the current fence value and wakes the fence queue
- * if the sequence number has increased (all asics).
+ * Checks the current fence value and calculates the last
+ * signalled fence value. Returns true if activity occured
+ * on the ring, and the fence_queue should be waken up.
 */
-void radeon_fence_process(struct radeon_device *rdev, int ring)
+static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq, last_seq, last_emitted;
 	unsigned count_loop = 0;
@ -190,7 +211,67 @@ void radeon_fence_process(struct radeon_device *rdev, int ring)
 		}
 	} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);

-	if (wake)
+	if (seq < last_emitted)
+		radeon_fence_schedule_check(rdev, ring);
+
+	return wake;
+}
+
+/**
+ * radeon_fence_check_lockup - check for hardware lockup
+ *
+ * @work: delayed work item
+ *
+ * Checks for fence activity and if there is none probe
+ * the hardware if a lockup occured.
+ */
+static void radeon_fence_check_lockup(struct work_struct *work)
+{
+	struct radeon_fence_driver *fence_drv;
+	struct radeon_device *rdev;
+	int ring;
+
+	fence_drv = container_of(work, struct radeon_fence_driver,
+				 lockup_work.work);
+	rdev = fence_drv->rdev;
+	ring = fence_drv - &rdev->fence_drv[0];
+
+	if (!down_read_trylock(&rdev->exclusive_lock)) {
+		/* just reschedule the check if a reset is going on */
+		radeon_fence_schedule_check(rdev, ring);
+		return;
+	}
+
+	if (radeon_fence_activity(rdev, ring))
+		wake_up_all(&rdev->fence_queue);
+
+	else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {
+
+		/* good news we believe it's a lockup */
+		dev_warn(rdev->dev, "GPU lockup (current fence id "
+			 "0x%016llx last fence id 0x%016llx on ring %d)\n",
+			 (uint64_t)atomic64_read(&fence_drv->last_seq),
+			 fence_drv->sync_seq[ring], ring);
+
+		/* remember that we need an reset */
+		rdev->needs_reset = true;
+		wake_up_all(&rdev->fence_queue);
+	}
+	up_read(&rdev->exclusive_lock);
+}
+
+/**
+ * radeon_fence_process - process a fence
+ *
+ * @rdev: radeon_device pointer
+ * @ring: ring index the fence is associated with
+ *
+ * Checks the current fence value and wakes the fence queue
+ * if the sequence number has increased (all asics).
+ */
+void radeon_fence_process(struct radeon_device *rdev, int ring)
+{
+	if (radeon_fence_activity(rdev, ring))
 		wake_up_all(&rdev->fence_queue);
 }

@ -247,16 +328,10 @@ static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
 */
 bool radeon_fence_signaled(struct radeon_fence *fence)
 {
-	if (!fence) {
+	if (!fence)
 		return true;
-	}
-	if (fence->seq == RADEON_FENCE_SIGNALED_SEQ) {
+	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring))
 		return true;
-	}
-	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring)) {
-		fence->seq = RADEON_FENCE_SIGNALED_SEQ;
-		return true;
-	}
 	return false;
 }

@ -283,110 +358,70 @@ static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
 }

 /**
- * radeon_fence_wait_seq - wait for a specific sequence numbers
+ * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
 *
 * @rdev: radeon device pointer
 * @target_seq: sequence number(s) we want to wait for
 * @intr: use interruptable sleep
+ * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
 *
 * Wait for the requested sequence number(s) to be written by any ring
 * (all asics).  Sequnce number array is indexed by ring id.
 * @intr selects whether to use interruptable (true) or non-interruptable
 * (false) sleep when waiting for the sequence number.  Helper function
 * for radeon_fence_wait_*().
- * Returns 0 if the sequence number has passed, error for all other cases.
+ * Returns remaining time if the sequence number has passed, 0 when
+ * the wait timeout, or an error for all other cases.
 * -EDEADLK is returned when a GPU lockup has been detected.
 */
-static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
-				 bool intr)
+static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
+					  u64 *target_seq, bool intr,
+					  long timeout)
 {
-	uint64_t last_seq[RADEON_NUM_RINGS];
-	bool signaled;
-	int i, r;
+	long r;
+	int i;

-	while (!radeon_fence_any_seq_signaled(rdev, target_seq)) {
+	if (radeon_fence_any_seq_signaled(rdev, target_seq))
+		return timeout;

-		/* Save current sequence values, used to check for GPU lockups */
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-			if (!target_seq[i])
-				continue;
+	/* enable IRQs and tracing */
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!target_seq[i])
+			continue;

-			last_seq[i] = atomic64_read(&rdev->fence_drv[i].last_seq);
-			trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
-			radeon_irq_kms_sw_irq_get(rdev, i);
-		}
-
-		if (intr) {
-			r = wait_event_interruptible_timeout(rdev->fence_queue, (
-				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
-				 || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
-		} else {
-			r = wait_event_timeout(rdev->fence_queue, (
-				(signaled = radeon_fence_any_seq_signaled(rdev, target_seq))
-				 || rdev->needs_reset), RADEON_FENCE_JIFFIES_TIMEOUT);
-		}
-
-		for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-			if (!target_seq[i])
-				continue;
-
-			radeon_irq_kms_sw_irq_put(rdev, i);
-			trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
-		}
-
-		if (unlikely(r < 0))
-			return r;
-
-		if (unlikely(!signaled)) {
-			if (rdev->needs_reset)
-				return -EDEADLK;
-
-			/* we were interrupted for some reason and fence
-			 * isn't signaled yet, resume waiting */
-			if (r)
-				continue;
-
-			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-				if (!target_seq[i])
-					continue;
-
-				if (last_seq[i] != atomic64_read(&rdev->fence_drv[i].last_seq))
-					break;
-			}
-
-			if (i != RADEON_NUM_RINGS)
-				continue;
-
-			for (i = 0; i < RADEON_NUM_RINGS; ++i) {
-				if (!target_seq[i])
-					continue;
-
-				if (radeon_ring_is_lockup(rdev, i, &rdev->ring[i]))
-					break;
-			}
-
-			if (i < RADEON_NUM_RINGS) {
-				/* good news we believe it's a lockup */
-				dev_warn(rdev->dev, "GPU lockup (waiting for "
-					 "0x%016llx last fence id 0x%016llx on"
-					 " ring %d)\n",
-					 target_seq[i], last_seq[i], i);
-
-				/* remember that we need an reset */
-				rdev->needs_reset = true;
-				wake_up_all(&rdev->fence_queue);
-				return -EDEADLK;
-			}
-		}
+		trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
+		radeon_irq_kms_sw_irq_get(rdev, i);
 	}
-	return 0;
+
+	if (intr) {
+		r = wait_event_interruptible_timeout(rdev->fence_queue, (
+			radeon_fence_any_seq_signaled(rdev, target_seq)
+			 || rdev->needs_reset), timeout);
+	} else {
+		r = wait_event_timeout(rdev->fence_queue, (
+			radeon_fence_any_seq_signaled(rdev, target_seq)
+			 || rdev->needs_reset), timeout);
+	}
+
+	if (rdev->needs_reset)
+		r = -EDEADLK;
+
+	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
+		if (!target_seq[i])
+			continue;
+
+		radeon_irq_kms_sw_irq_put(rdev, i);
+		trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
+	}
+
+	return r;
 }

 /**
 * radeon_fence_wait - wait for a fence to signal
 *
 * @fence: radeon fence object
- * @intr: use interruptable sleep
+ * @intr: use interruptible sleep
 *
 * Wait for the requested fence to signal (all asics).
 * @intr selects whether to use interruptable (true) or non-interruptable
@ -396,7 +431,7 @@ static int radeon_fence_wait_seq(struct radeon_device *rdev, u64 *target_seq,
 int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
-	int r;
+	long r;

 	if (fence == NULL) {
 		WARN(1, "Querying an invalid fence : %p !\n", fence);
@ -404,14 +439,11 @@ int radeon_fence_wait(struct radeon_fence *fence, bool intr)
 	}

 	seq[fence->ring] = fence->seq;
-	if (seq[fence->ring] == RADEON_FENCE_SIGNALED_SEQ)
-		return 0;
-
-	r = radeon_fence_wait_seq(fence->rdev, seq, intr);
-	if (r)
+	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 		return r;
+	}

-	fence->seq = RADEON_FENCE_SIGNALED_SEQ;
 	return 0;
 }

@ -434,7 +466,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 {
 	uint64_t seq[RADEON_NUM_RINGS];
 	unsigned i, num_rings = 0;
-	int r;
+	long r;

 	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
 		seq[i] = 0;
@ -445,18 +477,14 @@ int radeon_fence_wait_any(struct radeon_device *rdev,

 		seq[i] = fences[i]->seq;
 		++num_rings;
-
-		/* test if something was allready signaled */
-		if (seq[i] == RADEON_FENCE_SIGNALED_SEQ)
-			return 0;
 	}

 	/* nothing to wait for ? */
 	if (num_rings == 0)
 		return -ENOENT;

-	r = radeon_fence_wait_seq(rdev, seq, intr);
-	if (r) {
+	r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 		return r;
 	}
 	return 0;
@ -475,6 +503,7 @@ int radeon_fence_wait_any(struct radeon_device *rdev,
 int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
+	long r;

 	seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
 	if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
@ -482,7 +511,10 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 		   already the last emited fence */
 		return -ENOENT;
 	}
-	return radeon_fence_wait_seq(rdev, seq, false);
+	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0)
+		return r;
+	return 0;
 }

 /**
@ -498,18 +530,18 @@ int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
 int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
 {
 	uint64_t seq[RADEON_NUM_RINGS] = {};
-	int r;
+	long r;

 	seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
 	if (!seq[ring])
 		return 0;

-	r = radeon_fence_wait_seq(rdev, seq, false);
-	if (r) {
+	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
+	if (r < 0) {
 		if (r == -EDEADLK)
 			return -EDEADLK;

-		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%d)\n",
+		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
 			ring, r);
 	}
 	return 0;
@ -711,6 +743,9 @@ static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
 		rdev->fence_drv[ring].sync_seq[i] = 0;
 	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
 	rdev->fence_drv[ring].initialized = false;
+	INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
+			  radeon_fence_check_lockup);
+	rdev->fence_drv[ring].rdev = rdev;
 }

 /**
@ -758,8 +793,9 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 		r = radeon_fence_wait_empty(rdev, ring);
 		if (r) {
 			/* no need to trigger GPU reset as we are unloading */
-			radeon_fence_driver_force_completion(rdev);
+			radeon_fence_driver_force_completion(rdev, ring);
 		}
+		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
 		wake_up_all(&rdev->fence_queue);
 		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
 		rdev->fence_drv[ring].initialized = false;
@ -771,18 +807,16 @@ void radeon_fence_driver_fini(struct radeon_device *rdev)
 * radeon_fence_driver_force_completion - force all fence waiter to complete
 *
 * @rdev: radeon device pointer
+ * @ring: the ring to complete
 *
 * In case of GPU reset failure make sure no process keep waiting on fence
 * that will never complete.
 */
-void radeon_fence_driver_force_completion(struct radeon_device *rdev)
+void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
 {
-	int ring;
-
-	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
-		if (!rdev->fence_drv[ring].initialized)
-			continue;
+	if (rdev->fence_drv[ring].initialized) {
 		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
+		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
 	}
 }

@ -833,6 +867,7 @@ static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
 	down_read(&rdev->exclusive_lock);
 	seq_printf(m, "%d\n", rdev->needs_reset);
 	rdev->needs_reset = true;
+	wake_up_all(&rdev->fence_queue);
 	up_read(&rdev->exclusive_lock);

 	return 0;
--- a/drivers/gpu/drm/radeon/radeon_ib.c
+++ b/drivers/gpu/drm/radeon/radeon_ib.c
@ -269,6 +269,7 @@ int radeon_ib_ring_tests(struct radeon_device *rdev)

 		r = radeon_ib_test(rdev, i, ring);
 		if (r) {
+			radeon_fence_driver_force_completion(rdev, i);
 			ring->ready = false;
 			rdev->needs_reset = false;

--- a/drivers/gpu/drm/radeon/radeon_irq_kms.c
+++ b/drivers/gpu/drm/radeon/radeon_irq_kms.c
@ -87,23 +87,6 @@ static void radeon_hotplug_work_func(struct work_struct *work)
 	drm_helper_hpd_irq_event(dev);
 }

-/**
- * radeon_irq_reset_work_func - execute gpu reset
- *
- * @work: work struct
- *
- * Execute scheduled gpu reset (cayman+).
- * This function is called when the irq handler
- * thinks we need a gpu reset.
- */
-static void radeon_irq_reset_work_func(struct work_struct *work)
-{
-	struct radeon_device *rdev = container_of(work, struct radeon_device,
-						  reset_work);
-
-	radeon_gpu_reset(rdev);
-}
-
 /**
 * radeon_driver_irq_preinstall_kms - drm irq preinstall callback
 *
@ -284,7 +267,6 @@ int radeon_irq_kms_init(struct radeon_device *rdev)

 	INIT_WORK(&rdev->hotplug_work, radeon_hotplug_work_func);
 	INIT_WORK(&rdev->audio_work, r600_audio_update_hdmi);
-	INIT_WORK(&rdev->reset_work, radeon_irq_reset_work_func);

 	rdev->irq.installed = true;
 	r = drm_irq_install(rdev->ddev, rdev->ddev->pdev->irq);
--- a/drivers/gpu/drm/radeon/radeon_uvd.c
+++ b/drivers/gpu/drm/radeon/radeon_uvd.c
@ -40,12 +40,18 @@
 #define UVD_IDLE_TIMEOUT_MS	1000

 /* Firmware Names */
+#define FIRMWARE_R600		"radeon/R600_uvd.bin"
+#define FIRMWARE_RS780		"radeon/RS780_uvd.bin"
+#define FIRMWARE_RV770		"radeon/RV770_uvd.bin"
 #define FIRMWARE_RV710		"radeon/RV710_uvd.bin"
 #define FIRMWARE_CYPRESS	"radeon/CYPRESS_uvd.bin"
 #define FIRMWARE_SUMO		"radeon/SUMO_uvd.bin"
 #define FIRMWARE_TAHITI		"radeon/TAHITI_uvd.bin"
 #define FIRMWARE_BONAIRE	"radeon/BONAIRE_uvd.bin"

+MODULE_FIRMWARE(FIRMWARE_R600);
+MODULE_FIRMWARE(FIRMWARE_RS780);
+MODULE_FIRMWARE(FIRMWARE_RV770);
 MODULE_FIRMWARE(FIRMWARE_RV710);
 MODULE_FIRMWARE(FIRMWARE_CYPRESS);
 MODULE_FIRMWARE(FIRMWARE_SUMO);
@ -63,6 +69,23 @@ int radeon_uvd_init(struct radeon_device *rdev)
 	INIT_DELAYED_WORK(&rdev->uvd.idle_work, radeon_uvd_idle_work_handler);

 	switch (rdev->family) {
+	case CHIP_RV610:
+	case CHIP_RV630:
+	case CHIP_RV670:
+	case CHIP_RV620:
+	case CHIP_RV635:
+		fw_name = FIRMWARE_R600;
+		break;
+
+	case CHIP_RS780:
+	case CHIP_RS880:
+		fw_name = FIRMWARE_RS780;
+		break;
+
+	case CHIP_RV770:
+		fw_name = FIRMWARE_RV770;
+		break;
+
 	case CHIP_RV710:
 	case CHIP_RV730:
 	case CHIP_RV740:
--- a/drivers/gpu/drm/radeon/uvd_v1_0.c
+++ b/drivers/gpu/drm/radeon/uvd_v1_0.c
@ -22,6 +22,7 @@
 * Authors: Christian König <christian.koenig@amd.com>
 */

+#include <linux/firmware.h>
 #include <drm/drmP.h>
 #include "radeon.h"
 #include "radeon_asic.h"
@ -69,6 +70,82 @@ void uvd_v1_0_set_wptr(struct radeon_device *rdev,
 	WREG32(UVD_RBC_RB_WPTR, ring->wptr);
 }

+/**
+ * uvd_v1_0_fence_emit - emit an fence & trap command
+ *
+ * @rdev: radeon_device pointer
+ * @fence: fence to emit
+ *
+ * Write a fence and a trap command to the ring.
+ */
+void uvd_v1_0_fence_emit(struct radeon_device *rdev,
+			 struct radeon_fence *fence)
+{
+	struct radeon_ring *ring = &rdev->ring[fence->ring];
+	uint64_t addr = rdev->fence_drv[fence->ring].gpu_addr;
+
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, addr & 0xffffffff);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, fence->seq);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 0);
+
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA0, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_DATA1, 0));
+	radeon_ring_write(ring, 0);
+	radeon_ring_write(ring, PACKET0(UVD_GPCOM_VCPU_CMD, 0));
+	radeon_ring_write(ring, 2);
+	return;
+}
+
+/**
+ * uvd_v1_0_resume - memory controller programming
+ *
+ * @rdev: radeon_device pointer
+ *
+ * Let the UVD memory controller know it's offsets
+ */
+int uvd_v1_0_resume(struct radeon_device *rdev)
+{
+	uint64_t addr;
+	uint32_t size;
+	int r;
+
+	r = radeon_uvd_resume(rdev);
+	if (r)
+		return r;
+
+	/* programm the VCPU memory controller bits 0-27 */
+	addr = (rdev->uvd.gpu_addr >> 3) + 16;
+	size = RADEON_GPU_PAGE_ALIGN(rdev->uvd_fw->size) >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET0, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE0, size);
+
+	addr += size;
+	size = RADEON_UVD_STACK_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET1, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE1, size);
+
+	addr += size;
+	size = RADEON_UVD_HEAP_SIZE >> 3;
+	WREG32(UVD_VCPU_CACHE_OFFSET2, addr);
+	WREG32(UVD_VCPU_CACHE_SIZE2, size);
+
+	/* bits 28-31 */
+	addr = (rdev->uvd.gpu_addr >> 28) & 0xF;
+	WREG32(UVD_LMI_ADDR_EXT, (addr << 12) | (addr << 0));
+
+	/* bits 32-39 */
+	addr = (rdev->uvd.gpu_addr >> 32) & 0xFF;
+	WREG32(UVD_LMI_EXT40_ADDR, addr | (0x9 << 16) | (0x1 << 31));
+
+	WREG32(UVD_FW_START, *((uint32_t*)rdev->uvd.cpu_addr));
+
+	return 0;
+}
+
 /**
 * uvd_v1_0_init - start and test UVD block
 *
@ -130,8 +207,32 @@ done:
 	/* lower clocks again */
 	radeon_set_uvd_clocks(rdev, 0, 0);

-	if (!r)
+	if (!r) {
+		switch (rdev->family) {
+		case CHIP_RV610:
+		case CHIP_RV630:
+		case CHIP_RV620:
+			/* 64byte granularity workaround */
+			WREG32(MC_CONFIG, 0);
+			WREG32(MC_CONFIG, 1 << 4);
+			WREG32(RS_DQ_RD_RET_CONF, 0x3f);
+			WREG32(MC_CONFIG, 0x1f);
+
+			/* fall through */
+		case CHIP_RV670:
+		case CHIP_RV635:
+
+			/* write clean workaround */
+			WREG32_P(UVD_VCPU_CNTL, 0x10, ~0x10);
+			break;
+
+		default:
+			/* TODO: Do we need more? */
+			break;
+		}
+
 		DRM_INFO("UVD initialized successfully.\n");
+	}

 	return r;
 }
@ -218,12 +319,12 @@ int uvd_v1_0_start(struct radeon_device *rdev)
 	/* enable UMC */
 	WREG32_P(UVD_LMI_CTRL2, 0, ~(1 << 8));

+	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
+
 	/* boot up the VCPU */
 	WREG32(UVD_SOFT_RESET, 0);
 	mdelay(10);

-	WREG32_P(UVD_RB_ARB_CTRL, 0, ~(1 << 3));
-
 	for (i = 0; i < 10; ++i) {
 		uint32_t status;
 		for (j = 0; j < 100; ++j) {
--- a/drivers/gpu/drm/radeon/uvd_v2_2.c
+++ b/drivers/gpu/drm/radeon/uvd_v2_2.c
@ -72,6 +72,10 @@ int uvd_v2_2_resume(struct radeon_device *rdev)
 	uint32_t chip_id, size;
 	int r;

+	/* RV770 uses V1.0 MC */
+	if (rdev->family == CHIP_RV770)
+		return uvd_v1_0_resume(rdev);
+
 	r = radeon_uvd_resume(rdev);
 	if (r)
 		return r;