Merge branch 'etnaviv/next' of https://git.pengutronix.de/git/lst/linux into drm-next

Most importantly per-process address spaces on GPUs that are capable of providing proper isolation has finished baking. This is the base for our softpin implementation, which allows us to support the texture descriptor buffers used by GC7000 series GPUs without a major UAPI extension/rework. Shortlog of notable changes: - code cleanup from Fabio - fix performance counters on GC880 and GC2000 GPUs from Christian - drmP.h header removal from Sam - per process address space support on MMUv2 GPUs from me - softpin support from me Signed-off-by: Dave Airlie <airlied@redhat.com> From: Lucas Stach <l.stach@pengutronix.de> Link: https://patchwork.freedesktop.org/patch/msgid/1565946875.2641.73.camel@pengutronix.de
2019-08-22 13:21:16 +10:00 · 2019-08-22 13:21:16 +10:00 · ae45300626
parent c011b93c1a 088880ddc0
commit ae45300626
21 changed files with 936 additions and 673 deletions
--- a/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_buffer.c
@ -3,6 +3,8 @@
 * Copyright (C) 2014-2018 Etnaviv Project
 */

+#include <drm/drm_drv.h>
+
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_gem.h"
@ -116,7 +118,9 @@ static void etnaviv_buffer_dump(struct etnaviv_gpu *gpu,
 	u32 *ptr = buf->vaddr + off;

 	dev_info(gpu->dev, "virt %p phys 0x%08x free 0x%08x\n",
-			ptr, etnaviv_cmdbuf_get_va(buf) + off, size - len * 4 - off);
+			ptr, etnaviv_cmdbuf_get_va(buf,
+			&gpu->mmu_context->cmdbuf_mapping) +
+			off, size - len * 4 - off);

 	print_hex_dump(KERN_INFO, "cmd ", DUMP_PREFIX_OFFSET, 16, 4,
 			ptr, len * 4, 0);
@ -149,7 +153,9 @@ static u32 etnaviv_buffer_reserve(struct etnaviv_gpu *gpu,
 	if (buffer->user_size + cmd_dwords * sizeof(u64) > buffer->size)
 		buffer->user_size = 0;

-	return etnaviv_cmdbuf_get_va(buffer) + buffer->user_size;
+	return etnaviv_cmdbuf_get_va(buffer,
+				     &gpu->mmu_context->cmdbuf_mapping) +
+	       buffer->user_size;
 }

 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
@ -162,8 +168,9 @@ u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu)
 	buffer->user_size = 0;

 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
-		 buffer->user_size - 4);
+	CMD_LINK(buffer, 2,
+		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+		 + buffer->user_size - 4);

 	return buffer->user_size / 8;
 }
@ -203,7 +210,7 @@ u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe
 	return buffer->user_size / 8;
 }

-u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu)
+u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id)
 {
 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;

@ -212,7 +219,7 @@ u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu)
 	buffer->user_size = 0;

 	CMD_LOAD_STATE(buffer, VIVS_MMUv2_PTA_CONFIG,
-		       VIVS_MMUv2_PTA_CONFIG_INDEX(0));
+		       VIVS_MMUv2_PTA_CONFIG_INDEX(id));

 	CMD_END(buffer);

@ -289,8 +296,9 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)

 	/* Append waitlink */
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
-			    buffer->user_size - 4);
+	CMD_LINK(buffer, 2,
+		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+		 + buffer->user_size - 4);

 	/*
 	 * Kick off the 'sync point' command by replacing the previous
@ -304,36 +312,41 @@ void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event)

 /* Append a command buffer to the ring buffer. */
 void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
-	unsigned int event, struct etnaviv_cmdbuf *cmdbuf)
+	struct etnaviv_iommu_context *mmu_context, unsigned int event,
+	struct etnaviv_cmdbuf *cmdbuf)
 {
 	struct etnaviv_cmdbuf *buffer = &gpu->buffer;
 	unsigned int waitlink_offset = buffer->user_size - 16;
 	u32 return_target, return_dwords;
 	u32 link_target, link_dwords;
 	bool switch_context = gpu->exec_state != exec_state;
+	bool switch_mmu_context = gpu->mmu_context != mmu_context;
+	unsigned int new_flush_seq = READ_ONCE(gpu->mmu_context->flush_seq);
+	bool need_flush = switch_mmu_context || gpu->flush_seq != new_flush_seq;

 	lockdep_assert_held(&gpu->lock);

 	if (drm_debug & DRM_UT_DRIVER)
 		etnaviv_buffer_dump(gpu, buffer, 0, 0x50);

-	link_target = etnaviv_cmdbuf_get_va(cmdbuf);
+	link_target = etnaviv_cmdbuf_get_va(cmdbuf,
+					    &gpu->mmu_context->cmdbuf_mapping);
 	link_dwords = cmdbuf->size / 8;

 	/*
-	 * If we need maintanence prior to submitting this buffer, we will
+	 * If we need maintenance prior to submitting this buffer, we will
 	 * need to append a mmu flush load state, followed by a new
 	 * link to this buffer - a total of four additional words.
 	 */
-	if (gpu->mmu->need_flush || switch_context) {
+	if (need_flush || switch_context) {
 		u32 target, extra_dwords;

 		/* link command */
 		extra_dwords = 1;

 		/* flush command */
-		if (gpu->mmu->need_flush) {
-			if (gpu->mmu->version == ETNAVIV_IOMMU_V1)
+		if (need_flush) {
+			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1)
 				extra_dwords += 1;
 			else
 				extra_dwords += 3;
@ -343,11 +356,28 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 		if (switch_context)
 			extra_dwords += 4;

-		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
+		/* PTA load command */
+		if (switch_mmu_context && gpu->sec_mode == ETNA_SEC_KERNEL)
+			extra_dwords += 1;

-		if (gpu->mmu->need_flush) {
+		target = etnaviv_buffer_reserve(gpu, buffer, extra_dwords);
+		/*
+		 * Switch MMU context if necessary. Must be done after the
+		 * link target has been calculated, as the jump forward in the
+		 * kernel ring still uses the last active MMU context before
+		 * the switch.
+		 */
+		if (switch_mmu_context) {
+			struct etnaviv_iommu_context *old_context = gpu->mmu_context;
+
+			etnaviv_iommu_context_get(mmu_context);
+			gpu->mmu_context = mmu_context;
+			etnaviv_iommu_context_put(old_context);
+		}
+
+		if (need_flush) {
 			/* Add the MMU flush */
-			if (gpu->mmu->version == ETNAVIV_IOMMU_V1) {
+			if (gpu->mmu_context->global->version == ETNAVIV_IOMMU_V1) {
 				CMD_LOAD_STATE(buffer, VIVS_GL_FLUSH_MMU,
 					       VIVS_GL_FLUSH_MMU_FLUSH_FEMMU |
 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK1 |
@ -355,17 +385,30 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 					       VIVS_GL_FLUSH_MMU_FLUSH_PEMMU |
 					       VIVS_GL_FLUSH_MMU_FLUSH_UNK4);
 			} else {
+				u32 flush = VIVS_MMUv2_CONFIGURATION_MODE_MASK |
+					    VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH;
+
+				if (switch_mmu_context &&
+				    gpu->sec_mode == ETNA_SEC_KERNEL) {
+					unsigned short id =
+						etnaviv_iommuv2_get_pta_id(gpu->mmu_context);
+					CMD_LOAD_STATE(buffer,
+						VIVS_MMUv2_PTA_CONFIG,
+						VIVS_MMUv2_PTA_CONFIG_INDEX(id));
+				}
+
+				if (gpu->sec_mode == ETNA_SEC_NONE)
+					flush |= etnaviv_iommuv2_get_mtlb_addr(gpu->mmu_context);
+
 				CMD_LOAD_STATE(buffer, VIVS_MMUv2_CONFIGURATION,
-					VIVS_MMUv2_CONFIGURATION_MODE_MASK |
-					VIVS_MMUv2_CONFIGURATION_ADDRESS_MASK |
-					VIVS_MMUv2_CONFIGURATION_FLUSH_FLUSH);
+					       flush);
 				CMD_SEM(buffer, SYNC_RECIPIENT_FE,
 					SYNC_RECIPIENT_PE);
 				CMD_STALL(buffer, SYNC_RECIPIENT_FE,
 					SYNC_RECIPIENT_PE);
 			}

-			gpu->mmu->need_flush = false;
+			gpu->flush_seq = new_flush_seq;
 		}

 		if (switch_context) {
@ -374,6 +417,8 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 		}

 		/* And the link to the submitted buffer */
+		link_target = etnaviv_cmdbuf_get_va(cmdbuf,
+					&gpu->mmu_context->cmdbuf_mapping);
 		CMD_LINK(buffer, link_dwords, link_target);

 		/* Update the link target to point to above instructions */
@ -410,12 +455,14 @@ void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
 	CMD_LOAD_STATE(buffer, VIVS_GL_EVENT, VIVS_GL_EVENT_EVENT_ID(event) |
 		       VIVS_GL_EVENT_FROM_PE);
 	CMD_WAIT(buffer);
-	CMD_LINK(buffer, 2, etnaviv_cmdbuf_get_va(buffer) +
-			    buffer->user_size - 4);
+	CMD_LINK(buffer, 2,
+		 etnaviv_cmdbuf_get_va(buffer, &gpu->mmu_context->cmdbuf_mapping)
+		 + buffer->user_size - 4);

 	if (drm_debug & DRM_UT_DRIVER)
 		pr_info("stream link to 0x%08x @ 0x%08x %p\n",
-			return_target, etnaviv_cmdbuf_get_va(cmdbuf),
+			return_target,
+			etnaviv_cmdbuf_get_va(cmdbuf, &gpu->mmu_context->cmdbuf_mapping),
 			cmdbuf->vaddr);

 	if (drm_debug & DRM_UT_DRIVER) {
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.c
@ -3,27 +3,26 @@
 * Copyright (C) 2017-2018 Etnaviv Project
 */

+#include <linux/dma-mapping.h>
+
 #include <drm/drm_mm.h>

 #include "etnaviv_cmdbuf.h"
+#include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
 #include "etnaviv_perfmon.h"

-#define SUBALLOC_SIZE		SZ_256K
+#define SUBALLOC_SIZE		SZ_512K
 #define SUBALLOC_GRANULE	SZ_4K
 #define SUBALLOC_GRANULES	(SUBALLOC_SIZE / SUBALLOC_GRANULE)

 struct etnaviv_cmdbuf_suballoc {
 	/* suballocated dma buffer properties */
-	struct etnaviv_gpu *gpu;
+	struct device *dev;
 	void *vaddr;
 	dma_addr_t paddr;

-	/* GPU mapping */
-	u32 iova;
-	struct drm_mm_node vram_node; /* only used on MMUv2 */
-
 	/* allocation management */
 	struct mutex lock;
 	DECLARE_BITMAP(granule_map, SUBALLOC_GRANULES);
@ -32,7 +31,7 @@ struct etnaviv_cmdbuf_suballoc {
 };

 struct etnaviv_cmdbuf_suballoc *
-etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu)
+etnaviv_cmdbuf_suballoc_new(struct device *dev)
 {
 	struct etnaviv_cmdbuf_suballoc *suballoc;
 	int ret;
@ -41,36 +40,44 @@ etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu)
 	if (!suballoc)
 		return ERR_PTR(-ENOMEM);

-	suballoc->gpu = gpu;
+	suballoc->dev = dev;
 	mutex_init(&suballoc->lock);
 	init_waitqueue_head(&suballoc->free_event);

-	suballoc->vaddr = dma_alloc_wc(gpu->dev, SUBALLOC_SIZE,
+	BUILD_BUG_ON(ETNAVIV_SOFTPIN_START_ADDRESS < SUBALLOC_SIZE);
+	suballoc->vaddr = dma_alloc_wc(dev, SUBALLOC_SIZE,
 				       &suballoc->paddr, GFP_KERNEL);
-	if (!suballoc->vaddr)
+	if (!suballoc->vaddr) {
+		ret = -ENOMEM;
 		goto free_suballoc;
-
-	ret = etnaviv_iommu_get_suballoc_va(gpu, suballoc->paddr,
-					    &suballoc->vram_node, SUBALLOC_SIZE,
-					    &suballoc->iova);
-	if (ret)
-		goto free_dma;
+	}

 	return suballoc;

-free_dma:
-	dma_free_wc(gpu->dev, SUBALLOC_SIZE, suballoc->vaddr, suballoc->paddr);
 free_suballoc:
 	kfree(suballoc);

-	return NULL;
+	return ERR_PTR(ret);
+}
+
+int etnaviv_cmdbuf_suballoc_map(struct etnaviv_cmdbuf_suballoc *suballoc,
+				struct etnaviv_iommu_context *context,
+				struct etnaviv_vram_mapping *mapping,
+				u32 memory_base)
+{
+	return etnaviv_iommu_get_suballoc_va(context, mapping, memory_base,
+					     suballoc->paddr, SUBALLOC_SIZE);
+}
+
+void etnaviv_cmdbuf_suballoc_unmap(struct etnaviv_iommu_context *context,
+				   struct etnaviv_vram_mapping *mapping)
+{
+	etnaviv_iommu_put_suballoc_va(context, mapping);
 }

 void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc)
 {
-	etnaviv_iommu_put_suballoc_va(suballoc->gpu, &suballoc->vram_node,
-				      SUBALLOC_SIZE, suballoc->iova);
-	dma_free_wc(suballoc->gpu->dev, SUBALLOC_SIZE, suballoc->vaddr,
+	dma_free_wc(suballoc->dev, SUBALLOC_SIZE, suballoc->vaddr,
 		    suballoc->paddr);
 	kfree(suballoc);
 }
@ -95,7 +102,7 @@ retry:
 						       suballoc->free_space,
 						       msecs_to_jiffies(10 * 1000));
 		if (!ret) {
-			dev_err(suballoc->gpu->dev,
+			dev_err(suballoc->dev,
 				"Timeout waiting for cmdbuf space\n");
 			return -ETIMEDOUT;
 		}
@ -123,9 +130,10 @@ void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf)
 	wake_up_all(&suballoc->free_event);
 }

-u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf)
+u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf,
+			  struct etnaviv_vram_mapping *mapping)
 {
-	return buf->suballoc->iova + buf->suballoc_offset;
+	return mapping->iova + buf->suballoc_offset;
 }

 dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf)
--- a/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_cmdbuf.h
@ -8,7 +8,9 @@

 #include <linux/types.h>

-struct etnaviv_gpu;
+struct device;
+struct etnaviv_iommu_context;
+struct etnaviv_vram_mapping;
 struct etnaviv_cmdbuf_suballoc;
 struct etnaviv_perfmon_request;

@ -23,15 +25,22 @@ struct etnaviv_cmdbuf {
 };

 struct etnaviv_cmdbuf_suballoc *
-etnaviv_cmdbuf_suballoc_new(struct etnaviv_gpu * gpu);
+etnaviv_cmdbuf_suballoc_new(struct device *dev);
 void etnaviv_cmdbuf_suballoc_destroy(struct etnaviv_cmdbuf_suballoc *suballoc);
+int etnaviv_cmdbuf_suballoc_map(struct etnaviv_cmdbuf_suballoc *suballoc,
+				struct etnaviv_iommu_context *context,
+				struct etnaviv_vram_mapping *mapping,
+				u32 memory_base);
+void etnaviv_cmdbuf_suballoc_unmap(struct etnaviv_iommu_context *context,
+				   struct etnaviv_vram_mapping *mapping);


 int etnaviv_cmdbuf_init(struct etnaviv_cmdbuf_suballoc *suballoc,
 		struct etnaviv_cmdbuf *cmdbuf, u32 size);
 void etnaviv_cmdbuf_free(struct etnaviv_cmdbuf *cmdbuf);

-u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf);
+u32 etnaviv_cmdbuf_get_va(struct etnaviv_cmdbuf *buf,
+			  struct etnaviv_vram_mapping *mapping);
 dma_addr_t etnaviv_cmdbuf_get_pa(struct etnaviv_cmdbuf *buf);

 #endif /* __ETNAVIV_CMDBUF_H__ */
--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.c
@ -4,8 +4,17 @@
 */

 #include <linux/component.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
 #include <linux/of_platform.h>
+#include <linux/uaccess.h>
+
+#include <drm/drm_debugfs.h>
+#include <drm/drm_drv.h>
+#include <drm/drm_file.h>
+#include <drm/drm_ioctl.h>
 #include <drm/drm_of.h>
+#include <drm/drm_prime.h>

 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
@ -41,12 +50,19 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 {
 	struct etnaviv_drm_private *priv = dev->dev_private;
 	struct etnaviv_file_private *ctx;
-	int i;
+	int ret, i;

 	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 	if (!ctx)
 		return -ENOMEM;

+	ctx->mmu = etnaviv_iommu_context_init(priv->mmu_global,
+					      priv->cmdbuf_suballoc);
+	if (!ctx->mmu) {
+		ret = -ENOMEM;
+		goto out_free;
+	}
+
 	for (i = 0; i < ETNA_MAX_PIPES; i++) {
 		struct etnaviv_gpu *gpu = priv->gpu[i];
 		struct drm_sched_rq *rq;
@ -61,6 +77,10 @@ static int etnaviv_open(struct drm_device *dev, struct drm_file *file)
 	file->driver_priv = ctx;

 	return 0;
+
+out_free:
+	kfree(ctx);
+	return ret;
 }

 static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
@ -76,6 +96,8 @@ static void etnaviv_postclose(struct drm_device *dev, struct drm_file *file)
 			drm_sched_entity_destroy(&ctx->sched_entity[i]);
 	}

+	etnaviv_iommu_context_put(ctx->mmu);
+
 	kfree(ctx);
 }

@ -107,12 +129,29 @@ static int etnaviv_mm_show(struct drm_device *dev, struct seq_file *m)
 static int etnaviv_mmu_show(struct etnaviv_gpu *gpu, struct seq_file *m)
 {
 	struct drm_printer p = drm_seq_file_printer(m);
+	struct etnaviv_iommu_context *mmu_context;

 	seq_printf(m, "Active Objects (%s):\n", dev_name(gpu->dev));

-	mutex_lock(&gpu->mmu->lock);
-	drm_mm_print(&gpu->mmu->mm, &p);
-	mutex_unlock(&gpu->mmu->lock);
+	/*
+	 * Lock the GPU to avoid a MMU context switch just now and elevate
+	 * the refcount of the current context to avoid it disappearing from
+	 * under our feet.
+	 */
+	mutex_lock(&gpu->lock);
+	mmu_context = gpu->mmu_context;
+	if (mmu_context)
+		etnaviv_iommu_context_get(mmu_context);
+	mutex_unlock(&gpu->lock);
+
+	if (!mmu_context)
+		return 0;
+
+	mutex_lock(&mmu_context->lock);
+	drm_mm_print(&mmu_context->mm, &p);
+	mutex_unlock(&mmu_context->lock);
+
+	etnaviv_iommu_context_put(mmu_context);

 	return 0;
 }
@ -486,7 +525,7 @@ static struct drm_driver etnaviv_drm_driver = {
 	.desc               = "etnaviv DRM",
 	.date               = "20151214",
 	.major              = 1,
-	.minor              = 2,
+	.minor              = 3,
 };

 /*
@ -517,23 +556,32 @@ static int etnaviv_bind(struct device *dev)
 	INIT_LIST_HEAD(&priv->gem_list);
 	priv->num_gpus = 0;

+	priv->cmdbuf_suballoc = etnaviv_cmdbuf_suballoc_new(drm->dev);
+	if (IS_ERR(priv->cmdbuf_suballoc)) {
+		dev_err(drm->dev, "Failed to create cmdbuf suballocator\n");
+		ret = PTR_ERR(priv->cmdbuf_suballoc);
+		goto out_free_priv;
+	}
+
 	dev_set_drvdata(dev, drm);

 	ret = component_bind_all(dev, drm);
 	if (ret < 0)
-		goto out_bind;
+		goto out_destroy_suballoc;

 	load_gpu(drm);

 	ret = drm_dev_register(drm, 0);
 	if (ret)
-		goto out_register;
+		goto out_unbind;

 	return 0;

-out_register:
+out_unbind:
 	component_unbind_all(dev, drm);
-out_bind:
+out_destroy_suballoc:
+	etnaviv_cmdbuf_suballoc_destroy(priv->cmdbuf_suballoc);
+out_free_priv:
 	kfree(priv);
 out_put:
 	drm_dev_put(drm);
@ -552,6 +600,8 @@ static void etnaviv_unbind(struct device *dev)

 	dev->dma_parms = NULL;

+	etnaviv_cmdbuf_suballoc_destroy(priv->cmdbuf_suballoc);
+
 	drm->dev_private = NULL;
 	kfree(priv);

--- a/drivers/gpu/drm/etnaviv/etnaviv_drv.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_drv.h
@ -6,21 +6,12 @@
 #ifndef __ETNAVIV_DRV_H__
 #define __ETNAVIV_DRV_H__

-#include <linux/kernel.h>
-#include <linux/clk.h>
-#include <linux/cpufreq.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/pm.h>
-#include <linux/pm_runtime.h>
-#include <linux/slab.h>
 #include <linux/list.h>
+#include <linux/mm_types.h>
+#include <linux/sizes.h>
 #include <linux/time64.h>
 #include <linux/types.h>
-#include <linux/sizes.h>
-#include <linux/mm_types.h>

-#include <drm/drmP.h>
 #include <drm/drm_fb_helper.h>
 #include <drm/drm_gem.h>
 #include <drm/etnaviv_drm.h>
@ -31,12 +22,12 @@ struct etnaviv_gpu;
 struct etnaviv_mmu;
 struct etnaviv_gem_object;
 struct etnaviv_gem_submit;
+struct etnaviv_iommu_global;
+
+#define ETNAVIV_SOFTPIN_START_ADDRESS	SZ_4M /* must be >= SUBALLOC_SIZE */

 struct etnaviv_file_private {
-	/*
-	 * When per-context address spaces are supported we'd keep track of
-	 * the context's page-tables here.
-	 */
+	struct etnaviv_iommu_context	*mmu;
 	struct drm_sched_entity		sched_entity[ETNA_MAX_PIPES];
 };

@ -45,6 +36,9 @@ struct etnaviv_drm_private {
 	struct device_dma_parameters dma_parms;
 	struct etnaviv_gpu *gpu[ETNA_MAX_PIPES];

+	struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
+	struct etnaviv_iommu_global *mmu_global;
+
 	/* list of GEM objects: */
 	struct mutex gem_lock;
 	struct list_head gem_list;
@ -76,10 +70,11 @@ int etnaviv_gem_new_userptr(struct drm_device *dev, struct drm_file *file,
 	uintptr_t ptr, u32 size, u32 flags, u32 *handle);
 u16 etnaviv_buffer_init(struct etnaviv_gpu *gpu);
 u16 etnaviv_buffer_config_mmuv2(struct etnaviv_gpu *gpu, u32 mtlb_addr, u32 safe_addr);
-u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu);
+u16 etnaviv_buffer_config_pta(struct etnaviv_gpu *gpu, unsigned short id);
 void etnaviv_buffer_end(struct etnaviv_gpu *gpu);
 void etnaviv_sync_point_queue(struct etnaviv_gpu *gpu, unsigned int event);
 void etnaviv_buffer_queue(struct etnaviv_gpu *gpu, u32 exec_state,
+	struct etnaviv_iommu_context *mmu,
 	unsigned int event, struct etnaviv_cmdbuf *cmdbuf);
 void etnaviv_validate_init(void);
 bool etnaviv_cmd_validate_one(struct etnaviv_gpu *gpu,
--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.c
@ -4,6 +4,8 @@
 */

 #include <linux/devcoredump.h>
+#include <linux/moduleparam.h>
+
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_dump.h"
 #include "etnaviv_gem.h"
@ -91,9 +93,9 @@ static void etnaviv_core_dump_registers(struct core_dump_iterator *iter,
 }

 static void etnaviv_core_dump_mmu(struct core_dump_iterator *iter,
-	struct etnaviv_gpu *gpu, size_t mmu_size)
+	struct etnaviv_iommu_context *mmu, size_t mmu_size)
 {
-	etnaviv_iommu_dump(gpu->mmu, iter->data);
+	etnaviv_iommu_dump(mmu, iter->data);

 	etnaviv_core_dump_header(iter, ETDUMP_BUF_MMU, iter->data + mmu_size);
 }
@ -108,46 +110,35 @@ static void etnaviv_core_dump_mem(struct core_dump_iterator *iter, u32 type,
 	etnaviv_core_dump_header(iter, type, iter->data + size);
 }

-void etnaviv_core_dump(struct etnaviv_gpu *gpu)
+void etnaviv_core_dump(struct etnaviv_gem_submit *submit)
 {
+	struct etnaviv_gpu *gpu = submit->gpu;
 	struct core_dump_iterator iter;
-	struct etnaviv_vram_mapping *vram;
 	struct etnaviv_gem_object *obj;
-	struct etnaviv_gem_submit *submit;
-	struct drm_sched_job *s_job;
 	unsigned int n_obj, n_bomap_pages;
 	size_t file_size, mmu_size;
 	__le64 *bomap, *bomap_start;
+	int i;

 	/* Only catch the first event, or when manually re-armed */
 	if (!etnaviv_dump_core)
 		return;
 	etnaviv_dump_core = false;

-	mutex_lock(&gpu->mmu->lock);
+	mutex_lock(&gpu->mmu_context->lock);

-	mmu_size = etnaviv_iommu_dump_size(gpu->mmu);
+	mmu_size = etnaviv_iommu_dump_size(gpu->mmu_context);

-	/* We always dump registers, mmu, ring and end marker */
-	n_obj = 4;
+	/* We always dump registers, mmu, ring, hanging cmdbuf and end marker */
+	n_obj = 5;
 	n_bomap_pages = 0;
 	file_size = ARRAY_SIZE(etnaviv_dump_registers) *
 			sizeof(struct etnaviv_dump_registers) +
-		    mmu_size + gpu->buffer.size;
-
-	/* Add in the active command buffers */
-	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
-		submit = to_etnaviv_submit(s_job);
-		file_size += submit->cmdbuf.size;
-		n_obj++;
-	}
+		    mmu_size + gpu->buffer.size + submit->cmdbuf.size;

 	/* Add in the active buffer objects */
-	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
-		if (!vram->use)
-			continue;
-
-		obj = vram->object;
+	for (i = 0; i < submit->nr_bos; i++) {
+		obj = submit->bos[i].obj;
 		file_size += obj->base.size;
 		n_bomap_pages += obj->base.size >> PAGE_SHIFT;
 		n_obj++;
@ -166,7 +157,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	iter.start = __vmalloc(file_size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY,
 			       PAGE_KERNEL);
 	if (!iter.start) {
-		mutex_unlock(&gpu->mmu->lock);
+		mutex_unlock(&gpu->mmu_context->lock);
 		dev_warn(gpu->dev, "failed to allocate devcoredump file\n");
 		return;
 	}
@ -178,17 +169,16 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 	memset(iter.hdr, 0, iter.data - iter.start);

 	etnaviv_core_dump_registers(&iter, gpu);
-	etnaviv_core_dump_mmu(&iter, gpu, mmu_size);
+	etnaviv_core_dump_mmu(&iter, gpu->mmu_context, mmu_size);
 	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_RING, gpu->buffer.vaddr,
 			      gpu->buffer.size,
-			      etnaviv_cmdbuf_get_va(&gpu->buffer));
+			      etnaviv_cmdbuf_get_va(&gpu->buffer,
+					&gpu->mmu_context->cmdbuf_mapping));

-	list_for_each_entry(s_job, &gpu->sched.ring_mirror_list, node) {
-		submit = to_etnaviv_submit(s_job);
-		etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
-				      submit->cmdbuf.vaddr, submit->cmdbuf.size,
-				      etnaviv_cmdbuf_get_va(&submit->cmdbuf));
-	}
+	etnaviv_core_dump_mem(&iter, ETDUMP_BUF_CMD,
+			      submit->cmdbuf.vaddr, submit->cmdbuf.size,
+			      etnaviv_cmdbuf_get_va(&submit->cmdbuf,
+					&gpu->mmu_context->cmdbuf_mapping));

 	/* Reserve space for the bomap */
 	if (n_bomap_pages) {
@ -201,14 +191,13 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 		bomap_start = bomap = NULL;
 	}

-	list_for_each_entry(vram, &gpu->mmu->mappings, mmu_node) {
+	for (i = 0; i < submit->nr_bos; i++) {
+		struct etnaviv_vram_mapping *vram;
 		struct page **pages;
 		void *vaddr;

-		if (vram->use == 0)
-			continue;
-
-		obj = vram->object;
+		obj = submit->bos[i].obj;
+		vram = submit->bos[i].mapping;

 		mutex_lock(&obj->lock);
 		pages = etnaviv_gem_get_pages(obj);
@ -232,7 +221,7 @@ void etnaviv_core_dump(struct etnaviv_gpu *gpu)
 					 obj->base.size);
 	}

-	mutex_unlock(&gpu->mmu->lock);
+	mutex_unlock(&gpu->mmu_context->lock);

 	etnaviv_core_dump_header(&iter, ETDUMP_BUF_END, iter.data);

--- a/drivers/gpu/drm/etnaviv/etnaviv_dump.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_dump.h
@ -35,8 +35,8 @@ struct etnaviv_dump_registers {
 };

 #ifdef __KERNEL__
-struct etnaviv_gpu;
-void etnaviv_core_dump(struct etnaviv_gpu *gpu);
+struct etnaviv_gem_submit;
+void etnaviv_core_dump(struct etnaviv_gem_submit *submit);
 #endif

 #endif
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.c
@ -3,10 +3,11 @@
 * Copyright (C) 2015-2018 Etnaviv Project
 */

-#include <linux/spinlock.h>
+#include <drm/drm_prime.h>
+#include <linux/dma-mapping.h>
 #include <linux/shmem_fs.h>
-#include <linux/sched/mm.h>
-#include <linux/sched/task.h>
+#include <linux/spinlock.h>
+#include <linux/vmalloc.h>

 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
@ -222,30 +223,18 @@ int etnaviv_gem_mmap_offset(struct drm_gem_object *obj, u64 *offset)

 static struct etnaviv_vram_mapping *
 etnaviv_gem_get_vram_mapping(struct etnaviv_gem_object *obj,
-			     struct etnaviv_iommu *mmu)
+			     struct etnaviv_iommu_context *context)
 {
 	struct etnaviv_vram_mapping *mapping;

 	list_for_each_entry(mapping, &obj->vram_list, obj_node) {
-		if (mapping->mmu == mmu)
+		if (mapping->context == context)
 			return mapping;
 	}

 	return NULL;
 }

-void etnaviv_gem_mapping_reference(struct etnaviv_vram_mapping *mapping)
-{
-	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
-
-	drm_gem_object_get(&etnaviv_obj->base);
-
-	mutex_lock(&etnaviv_obj->lock);
-	WARN_ON(mapping->use == 0);
-	mapping->use += 1;
-	mutex_unlock(&etnaviv_obj->lock);
-}
-
 void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping)
 {
 	struct etnaviv_gem_object *etnaviv_obj = mapping->object;
@ -259,7 +248,8 @@ void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping)
 }

 struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
-	struct drm_gem_object *obj, struct etnaviv_gpu *gpu)
+	struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context,
+	u64 va)
 {
 	struct etnaviv_gem_object *etnaviv_obj = to_etnaviv_bo(obj);
 	struct etnaviv_vram_mapping *mapping;
@ -267,7 +257,7 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
 	int ret = 0;

 	mutex_lock(&etnaviv_obj->lock);
-	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, gpu->mmu);
+	mapping = etnaviv_gem_get_vram_mapping(etnaviv_obj, mmu_context);
 	if (mapping) {
 		/*
 		 * Holding the object lock prevents the use count changing
@ -276,12 +266,12 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
 		 * the MMU owns this mapping to close this race.
 		 */
 		if (mapping->use == 0) {
-			mutex_lock(&gpu->mmu->lock);
-			if (mapping->mmu == gpu->mmu)
+			mutex_lock(&mmu_context->lock);
+			if (mapping->context == mmu_context)
 				mapping->use += 1;
 			else
 				mapping = NULL;
-			mutex_unlock(&gpu->mmu->lock);
+			mutex_unlock(&mmu_context->lock);
 			if (mapping)
 				goto out;
 		} else {
@ -314,15 +304,19 @@ struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
 		list_del(&mapping->obj_node);
 	}

-	mapping->mmu = gpu->mmu;
+	etnaviv_iommu_context_get(mmu_context);
+	mapping->context = mmu_context;
 	mapping->use = 1;

-	ret = etnaviv_iommu_map_gem(gpu->mmu, etnaviv_obj, gpu->memory_base,
-				    mapping);
-	if (ret < 0)
+	ret = etnaviv_iommu_map_gem(mmu_context, etnaviv_obj,
+				    mmu_context->global->memory_base,
+				    mapping, va);
+	if (ret < 0) {
+		etnaviv_iommu_context_put(mmu_context);
 		kfree(mapping);
-	else
+	} else {
 		list_add_tail(&mapping->obj_node, &etnaviv_obj->vram_list);
+	}

 out:
 	mutex_unlock(&etnaviv_obj->lock);
@ -536,12 +530,14 @@ void etnaviv_gem_free_object(struct drm_gem_object *obj)

 	list_for_each_entry_safe(mapping, tmp, &etnaviv_obj->vram_list,
 				 obj_node) {
-		struct etnaviv_iommu *mmu = mapping->mmu;
+		struct etnaviv_iommu_context *context = mapping->context;

 		WARN_ON(mapping->use);

-		if (mmu)
-			etnaviv_iommu_unmap_gem(mmu, mapping);
+		if (context) {
+			etnaviv_iommu_unmap_gem(context, mapping);
+			etnaviv_iommu_context_put(context);
+		}

 		list_del(&mapping->obj_node);
 		kfree(mapping);
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem.h
@ -25,7 +25,7 @@ struct etnaviv_vram_mapping {
 	struct list_head scan_node;
 	struct list_head mmu_node;
 	struct etnaviv_gem_object *object;
-	struct etnaviv_iommu *mmu;
+	struct etnaviv_iommu_context *context;
 	struct drm_mm_node vram_node;
 	unsigned int use;
 	u32 iova;
@ -77,6 +77,7 @@ static inline bool is_active(struct etnaviv_gem_object *etnaviv_obj)

 struct etnaviv_gem_submit_bo {
 	u32 flags;
+	u64 va;
 	struct etnaviv_gem_object *obj;
 	struct etnaviv_vram_mapping *mapping;
 	struct dma_fence *excl;
@ -93,6 +94,7 @@ struct etnaviv_gem_submit {
 	struct kref refcount;
 	struct etnaviv_file_private *ctx;
 	struct etnaviv_gpu *gpu;
+	struct etnaviv_iommu_context *mmu_context, *prev_mmu_context;
 	struct dma_fence *out_fence, *in_fence;
 	int out_fence_id;
 	struct list_head node; /* GPU active submit list */
@ -118,8 +120,8 @@ struct page **etnaviv_gem_get_pages(struct etnaviv_gem_object *obj);
 void etnaviv_gem_put_pages(struct etnaviv_gem_object *obj);

 struct etnaviv_vram_mapping *etnaviv_gem_mapping_get(
-	struct drm_gem_object *obj, struct etnaviv_gpu *gpu);
-void etnaviv_gem_mapping_reference(struct etnaviv_vram_mapping *mapping);
+	struct drm_gem_object *obj, struct etnaviv_iommu_context *mmu_context,
+	u64 va);
 void etnaviv_gem_mapping_unreference(struct etnaviv_vram_mapping *mapping);

 #endif /* __ETNAVIV_GEM_H__ */
--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_prime.c
@ -3,7 +3,9 @@
 * Copyright (C) 2014-2018 Etnaviv Project
 */

+#include <drm/drm_prime.h>
 #include <linux/dma-buf.h>
+
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"

--- a/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gem_submit.c
@ -3,9 +3,15 @@
 * Copyright (C) 2015 Etnaviv Project
 */

+#include <drm/drm_file.h>
 #include <linux/dma-fence-array.h>
+#include <linux/file.h>
+#include <linux/pm_runtime.h>
 #include <linux/dma-resv.h>
 #include <linux/sync_file.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gpu.h"
@ -66,6 +72,14 @@ static int submit_lookup_objects(struct etnaviv_gem_submit *submit,
 		}

 		submit->bos[i].flags = bo->flags;
+		if (submit->flags & ETNA_SUBMIT_SOFTPIN) {
+			if (bo->presumed < ETNAVIV_SOFTPIN_START_ADDRESS) {
+				DRM_ERROR("invalid softpin address\n");
+				ret = -EINVAL;
+				goto out_unlock;
+			}
+			submit->bos[i].va = bo->presumed;
+		}

 		/* normally use drm_gem_object_lookup(), but for bulk lookup
 		 * all under single table_lock just hit object_idr directly:
@ -218,11 +232,17 @@ static int submit_pin_objects(struct etnaviv_gem_submit *submit)
 		struct etnaviv_vram_mapping *mapping;

 		mapping = etnaviv_gem_mapping_get(&etnaviv_obj->base,
-						  submit->gpu);
+						  submit->mmu_context,
+						  submit->bos[i].va);
 		if (IS_ERR(mapping)) {
 			ret = PTR_ERR(mapping);
 			break;
 		}
+
+		if ((submit->flags & ETNA_SUBMIT_SOFTPIN) &&
+		     submit->bos[i].va != mapping->iova)
+			return -EINVAL;
+
 		atomic_inc(&etnaviv_obj->gpu_active);

 		submit->bos[i].flags |= BO_PINNED;
@ -255,6 +275,10 @@ static int submit_reloc(struct etnaviv_gem_submit *submit, void *stream,
 	u32 *ptr = stream;
 	int ret;

+	/* Submits using softpin don't blend with relocs */
+	if ((submit->flags & ETNA_SUBMIT_SOFTPIN) && nr_relocs != 0)
+		return -EINVAL;
+
 	for (i = 0; i < nr_relocs; i++) {
 		const struct drm_etnaviv_gem_submit_reloc *r = relocs + i;
 		struct etnaviv_gem_submit_bo *bo;
@ -355,6 +379,12 @@ static void submit_cleanup(struct kref *kref)
 	if (submit->cmdbuf.suballoc)
 		etnaviv_cmdbuf_free(&submit->cmdbuf);

+	if (submit->mmu_context)
+		etnaviv_iommu_context_put(submit->mmu_context);
+
+	if (submit->prev_mmu_context)
+		etnaviv_iommu_context_put(submit->prev_mmu_context);
+
 	for (i = 0; i < submit->nr_bos; i++) {
 		struct etnaviv_gem_object *etnaviv_obj = submit->bos[i].obj;

@ -433,6 +463,12 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		return -EINVAL;
 	}

+	if ((args->flags & ETNA_SUBMIT_SOFTPIN) &&
+	    priv->mmu_global->version != ETNAVIV_IOMMU_V2) {
+		DRM_ERROR("softpin requested on incompatible MMU\n");
+		return -EINVAL;
+	}
+
 	/*
 	 * Copy the command submission and bo array to kernel space in
 	 * one go, and do this outside of any locks.
@ -490,12 +526,14 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 		goto err_submit_ww_acquire;
 	}

-	ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &submit->cmdbuf,
+	ret = etnaviv_cmdbuf_init(priv->cmdbuf_suballoc, &submit->cmdbuf,
 				  ALIGN(args->stream_size, 8) + 8);
 	if (ret)
 		goto err_submit_objects;

 	submit->ctx = file->driver_priv;
+	etnaviv_iommu_context_get(submit->ctx->mmu);
+	submit->mmu_context = submit->ctx->mmu;
 	submit->exec_state = args->exec_state;
 	submit->flags = args->flags;

@ -503,7 +541,8 @@ int etnaviv_ioctl_gem_submit(struct drm_device *dev, void *data,
 	if (ret)
 		goto err_submit_objects;

-	if (!etnaviv_cmd_validate_one(gpu, stream, args->stream_size / 4,
+	if ((priv->mmu_global->version != ETNAVIV_IOMMU_V2) &&
+	    !etnaviv_cmd_validate_one(gpu, stream, args->stream_size / 4,
 				      relocs, args->nr_relocs)) {
 		ret = -EINVAL;
 		goto err_submit_objects;
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.c
@ -5,9 +5,13 @@

 #include <linux/clk.h>
 #include <linux/component.h>
+#include <linux/delay.h>
 #include <linux/dma-fence.h>
-#include <linux/moduleparam.h>
+#include <linux/dma-mapping.h>
+#include <linux/module.h>
 #include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
 #include <linux/thermal.h>

@ -38,6 +42,8 @@ static const struct platform_device_id gpu_ids[] = {

 int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 {
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+
 	switch (param) {
 	case ETNAVIV_PARAM_GPU_MODEL:
 		*value = gpu->identity.model;
@ -143,6 +149,13 @@ int etnaviv_gpu_get_param(struct etnaviv_gpu *gpu, u32 param, u64 *value)
 		*value = gpu->identity.varyings_count;
 		break;

+	case ETNAVIV_PARAM_SOFTPIN_START_ADDR:
+		if (priv->mmu_global->version == ETNAVIV_IOMMU_V2)
+			*value = ETNAVIV_SOFTPIN_START_ADDRESS;
+		else
+			*value = ~0ULL;
+		break;
+
 	default:
 		DBG("%s: invalid param: %u", dev_name(gpu->dev), param);
 		return -EINVAL;
@ -596,6 +609,21 @@ void etnaviv_gpu_start_fe(struct etnaviv_gpu *gpu, u32 address, u16 prefetch)
 	}
 }

+static void etnaviv_gpu_start_fe_idleloop(struct etnaviv_gpu *gpu)
+{
+	u32 address = etnaviv_cmdbuf_get_va(&gpu->buffer,
+				&gpu->mmu_context->cmdbuf_mapping);
+	u16 prefetch;
+
+	/* setup the MMU */
+	etnaviv_iommu_restore(gpu, gpu->mmu_context);
+
+	/* Start command processor */
+	prefetch = etnaviv_buffer_init(gpu);
+
+	etnaviv_gpu_start_fe(gpu, address, prefetch);
+}
+
 static void etnaviv_gpu_setup_pulse_eater(struct etnaviv_gpu *gpu)
 {
 	/*
@ -629,8 +657,6 @@ static void etnaviv_gpu_setup_pulse_eater(struct etnaviv_gpu *gpu)

 static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 {
-	u16 prefetch;
-
 	if ((etnaviv_is_model_rev(gpu, GC320, 0x5007) ||
 	     etnaviv_is_model_rev(gpu, GC320, 0x5220)) &&
 	    gpu_read(gpu, VIVS_HI_CHIP_TIME) != 0x2062400) {
@ -676,19 +702,12 @@ static void etnaviv_gpu_hw_init(struct etnaviv_gpu *gpu)
 	/* setup the pulse eater */
 	etnaviv_gpu_setup_pulse_eater(gpu);

-	/* setup the MMU */
-	etnaviv_iommu_restore(gpu);
-
-	/* Start command processor */
-	prefetch = etnaviv_buffer_init(gpu);
-
 	gpu_write(gpu, VIVS_HI_INTR_ENBL, ~0U);
-	etnaviv_gpu_start_fe(gpu, etnaviv_cmdbuf_get_va(&gpu->buffer),
-			     prefetch);
 }

 int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 {
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
 	int ret, i;

 	ret = pm_runtime_get_sync(gpu->dev);
@ -713,28 +732,6 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}

-	/*
-	 * Set the GPU linear window to be at the end of the DMA window, where
-	 * the CMA area is likely to reside. This ensures that we are able to
-	 * map the command buffers while having the linear window overlap as
-	 * much RAM as possible, so we can optimize mappings for other buffers.
-	 *
-	 * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
-	 * to different views of the memory on the individual engines.
-	 */
-	if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
-	    (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
-		u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
-		if (dma_mask < PHYS_OFFSET + SZ_2G)
-			gpu->memory_base = PHYS_OFFSET;
-		else
-			gpu->memory_base = dma_mask - SZ_2G + 1;
-	} else if (PHYS_OFFSET >= SZ_2G) {
-		dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
-		gpu->memory_base = PHYS_OFFSET;
-		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
-	}
-
 	/*
 	 * On cores with security features supported, we claim control over the
 	 * security states.
@ -749,34 +746,38 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 		goto fail;
 	}

-	gpu->mmu = etnaviv_iommu_new(gpu);
-	if (IS_ERR(gpu->mmu)) {
-		dev_err(gpu->dev, "Failed to instantiate GPU IOMMU\n");
-		ret = PTR_ERR(gpu->mmu);
+	ret = etnaviv_iommu_global_init(gpu);
+	if (ret)
 		goto fail;
-	}

-	gpu->cmdbuf_suballoc = etnaviv_cmdbuf_suballoc_new(gpu);
-	if (IS_ERR(gpu->cmdbuf_suballoc)) {
-		dev_err(gpu->dev, "Failed to create cmdbuf suballocator\n");
-		ret = PTR_ERR(gpu->cmdbuf_suballoc);
-		goto destroy_iommu;
+	/*
+	 * Set the GPU linear window to be at the end of the DMA window, where
+	 * the CMA area is likely to reside. This ensures that we are able to
+	 * map the command buffers while having the linear window overlap as
+	 * much RAM as possible, so we can optimize mappings for other buffers.
+	 *
+	 * For 3D cores only do this if MC2.0 is present, as with MC1.0 it leads
+	 * to different views of the memory on the individual engines.
+	 */
+	if (!(gpu->identity.features & chipFeatures_PIPE_3D) ||
+	    (gpu->identity.minor_features0 & chipMinorFeatures0_MC20)) {
+		u32 dma_mask = (u32)dma_get_required_mask(gpu->dev);
+		if (dma_mask < PHYS_OFFSET + SZ_2G)
+			priv->mmu_global->memory_base = PHYS_OFFSET;
+		else
+			priv->mmu_global->memory_base = dma_mask - SZ_2G + 1;
+	} else if (PHYS_OFFSET >= SZ_2G) {
+		dev_info(gpu->dev, "Need to move linear window on MC1.0, disabling TS\n");
+		priv->mmu_global->memory_base = PHYS_OFFSET;
+		gpu->identity.features &= ~chipFeatures_FAST_CLEAR;
 	}

 	/* Create buffer: */
-	ret = etnaviv_cmdbuf_init(gpu->cmdbuf_suballoc, &gpu->buffer,
+	ret = etnaviv_cmdbuf_init(priv->cmdbuf_suballoc, &gpu->buffer,
 				  PAGE_SIZE);
 	if (ret) {
 		dev_err(gpu->dev, "could not create command buffer\n");
-		goto destroy_suballoc;
-	}
-
-	if (gpu->mmu->version == ETNAVIV_IOMMU_V1 &&
-	    etnaviv_cmdbuf_get_va(&gpu->buffer) > 0x80000000) {
-		ret = -EINVAL;
-		dev_err(gpu->dev,
-			"command buffer outside valid memory window\n");
-		goto free_buffer;
+		goto fail;
 	}

 	/* Setup event management */
@ -795,17 +796,10 @@ int etnaviv_gpu_init(struct etnaviv_gpu *gpu)
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);

+	gpu->initialized = true;
+
 	return 0;

-free_buffer:
-	etnaviv_cmdbuf_free(&gpu->buffer);
-	gpu->buffer.suballoc = NULL;
-destroy_suballoc:
-	etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
-	gpu->cmdbuf_suballoc = NULL;
-destroy_iommu:
-	etnaviv_iommu_destroy(gpu->mmu);
-	gpu->mmu = NULL;
 fail:
 	pm_runtime_mark_last_busy(gpu->dev);
 	pm_runtime_put_autosuspend(gpu->dev);
@ -999,6 +993,7 @@ void etnaviv_gpu_recover_hang(struct etnaviv_gpu *gpu)

 	etnaviv_gpu_hw_init(gpu);
 	gpu->exec_state = -1;
+	gpu->mmu_context = NULL;

 	mutex_unlock(&gpu->lock);
 	pm_runtime_mark_last_busy(gpu->dev);
@ -1305,6 +1300,15 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)
 		goto out_unlock;
 	}

+	if (!gpu->mmu_context) {
+		etnaviv_iommu_context_get(submit->mmu_context);
+		gpu->mmu_context = submit->mmu_context;
+		etnaviv_gpu_start_fe_idleloop(gpu);
+	} else {
+		etnaviv_iommu_context_get(gpu->mmu_context);
+		submit->prev_mmu_context = gpu->mmu_context;
+	}
+
 	if (submit->nr_pmrs) {
 		gpu->event[event[1]].sync_point = &sync_point_perfmon_sample_pre;
 		kref_get(&submit->refcount);
@ -1314,8 +1318,8 @@ struct dma_fence *etnaviv_gpu_submit(struct etnaviv_gem_submit *submit)

 	gpu->event[event[0]].fence = gpu_fence;
 	submit->cmdbuf.user_size = submit->cmdbuf.size - 8;
-	etnaviv_buffer_queue(gpu, submit->exec_state, event[0],
-			     &submit->cmdbuf);
+	etnaviv_buffer_queue(gpu, submit->exec_state, submit->mmu_context,
+			     event[0], &submit->cmdbuf);

 	if (submit->nr_pmrs) {
 		gpu->event[event[2]].sync_point = &sync_point_perfmon_sample_post;
@ -1517,7 +1521,7 @@ int etnaviv_gpu_wait_idle(struct etnaviv_gpu *gpu, unsigned int timeout_ms)

 static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 {
-	if (gpu->buffer.suballoc) {
+	if (gpu->initialized && gpu->mmu_context) {
 		/* Replace the last WAIT with END */
 		mutex_lock(&gpu->lock);
 		etnaviv_buffer_end(gpu);
@ -1529,8 +1533,13 @@ static int etnaviv_gpu_hw_suspend(struct etnaviv_gpu *gpu)
 		 * we fail, just warn and continue.
 		 */
 		etnaviv_gpu_wait_idle(gpu, 100);
+
+		etnaviv_iommu_context_put(gpu->mmu_context);
+		gpu->mmu_context = NULL;
 	}

+	gpu->exec_state = -1;
+
 	return etnaviv_gpu_clk_disable(gpu);
 }

@ -1546,8 +1555,6 @@ static int etnaviv_gpu_hw_resume(struct etnaviv_gpu *gpu)
 	etnaviv_gpu_update_clock(gpu);
 	etnaviv_gpu_hw_init(gpu);

-	gpu->exec_state = -1;
-
 	mutex_unlock(&gpu->lock);

 	return 0;
@ -1676,17 +1683,10 @@ static void etnaviv_gpu_unbind(struct device *dev, struct device *master,
 	etnaviv_gpu_hw_suspend(gpu);
 #endif

-	if (gpu->buffer.suballoc)
+	if (gpu->initialized) {
 		etnaviv_cmdbuf_free(&gpu->buffer);
-
-	if (gpu->cmdbuf_suballoc) {
-		etnaviv_cmdbuf_suballoc_destroy(gpu->cmdbuf_suballoc);
-		gpu->cmdbuf_suballoc = NULL;
-	}
-
-	if (gpu->mmu) {
-		etnaviv_iommu_destroy(gpu->mmu);
-		gpu->mmu = NULL;
+		etnaviv_iommu_global_fini(gpu);
+		gpu->initialized = false;
 	}

 	gpu->drm = NULL;
@ -1714,7 +1714,6 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct etnaviv_gpu *gpu;
-	struct resource *res;
 	int err;

 	gpu = devm_kzalloc(dev, sizeof(*gpu), GFP_KERNEL);
@ -1726,8 +1725,7 @@ static int etnaviv_gpu_platform_probe(struct platform_device *pdev)
 	mutex_init(&gpu->fence_lock);

 	/* Map registers: */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	gpu->mmio = devm_ioremap_resource(&pdev->dev, res);
+	gpu->mmio = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(gpu->mmio))
 		return PTR_ERR(gpu->mmio);

@ -1825,7 +1823,7 @@ static int etnaviv_gpu_rpm_resume(struct device *dev)
 		return ret;

 	/* Re-initialise the basic hardware state */
-	if (gpu->drm && gpu->buffer.suballoc) {
+	if (gpu->drm && gpu->initialized) {
 		ret = etnaviv_gpu_hw_resume(gpu);
 		if (ret) {
 			etnaviv_gpu_clk_disable(gpu);
--- a/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_gpu.h
@ -7,6 +7,8 @@
 #define __ETNAVIV_GPU_H__

 #include "etnaviv_cmdbuf.h"
+#include "etnaviv_gem.h"
+#include "etnaviv_mmu.h"
 #include "etnaviv_drv.h"

 struct etnaviv_gem_submit;
@ -84,7 +86,6 @@ struct etnaviv_event {
 };

 struct etnaviv_cmdbuf_suballoc;
-struct etnaviv_cmdbuf;
 struct regulator;
 struct clk;

@ -99,14 +100,12 @@ struct etnaviv_gpu {
 	enum etnaviv_sec_mode sec_mode;
 	struct workqueue_struct *wq;
 	struct drm_gpu_scheduler sched;
+	bool initialized;

 	/* 'ring'-buffer: */
 	struct etnaviv_cmdbuf buffer;
 	int exec_state;

-	/* bus base address of memory  */
-	u32 memory_base;
-
 	/* event management: */
 	DECLARE_BITMAP(event_bitmap, ETNA_NR_EVENTS);
 	struct etnaviv_event event[ETNA_NR_EVENTS];
@ -134,8 +133,8 @@ struct etnaviv_gpu {
 	void __iomem *mmio;
 	int irq;

-	struct etnaviv_iommu *mmu;
-	struct etnaviv_cmdbuf_suballoc *cmdbuf_suballoc;
+	struct etnaviv_iommu_context *mmu_context;
+	unsigned int flush_seq;

 	/* Power Control: */
 	struct clk *clk_bus;
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.c
@ -3,15 +3,14 @@
 * Copyright (C) 2014-2018 Etnaviv Project
 */

+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
-#include <linux/dma-mapping.h>
-#include <linux/bitops.h>

 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
-#include "etnaviv_iommu.h"
 #include "state_hi.xml.h"

 #define PT_SIZE		SZ_2M
@ -19,124 +18,89 @@

 #define GPU_MEM_START	0x80000000

-struct etnaviv_iommuv1_domain {
-	struct etnaviv_iommu_domain base;
+struct etnaviv_iommuv1_context {
+	struct etnaviv_iommu_context base;
 	u32 *pgtable_cpu;
 	dma_addr_t pgtable_dma;
 };

-static struct etnaviv_iommuv1_domain *
-to_etnaviv_domain(struct etnaviv_iommu_domain *domain)
+static struct etnaviv_iommuv1_context *
+to_v1_context(struct etnaviv_iommu_context *context)
 {
-	return container_of(domain, struct etnaviv_iommuv1_domain, base);
+	return container_of(context, struct etnaviv_iommuv1_context, base);
 }

-static int __etnaviv_iommu_init(struct etnaviv_iommuv1_domain *etnaviv_domain)
+static void etnaviv_iommuv1_free(struct etnaviv_iommu_context *context)
 {
-	u32 *p;
-	int i;
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);

-	etnaviv_domain->base.bad_page_cpu =
-			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
-				     &etnaviv_domain->base.bad_page_dma,
-				     GFP_KERNEL);
-	if (!etnaviv_domain->base.bad_page_cpu)
-		return -ENOMEM;
+	drm_mm_takedown(&context->mm);

-	p = etnaviv_domain->base.bad_page_cpu;
-	for (i = 0; i < SZ_4K / 4; i++)
-		*p++ = 0xdead55aa;
+	dma_free_wc(context->global->dev, PT_SIZE, v1_context->pgtable_cpu,
+		    v1_context->pgtable_dma);

-	etnaviv_domain->pgtable_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
-						   PT_SIZE,
-						   &etnaviv_domain->pgtable_dma,
-						   GFP_KERNEL);
-	if (!etnaviv_domain->pgtable_cpu) {
-		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-			    etnaviv_domain->base.bad_page_cpu,
-			    etnaviv_domain->base.bad_page_dma);
-		return -ENOMEM;
-	}
+	context->global->v1.shared_context = NULL;

-	memset32(etnaviv_domain->pgtable_cpu, etnaviv_domain->base.bad_page_dma,
-		 PT_ENTRIES);
-
-	return 0;
+	kfree(v1_context);
 }

-static void etnaviv_iommuv1_domain_free(struct etnaviv_iommu_domain *domain)
-{
-	struct etnaviv_iommuv1_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
-
-	dma_free_wc(etnaviv_domain->base.dev, PT_SIZE,
-		    etnaviv_domain->pgtable_cpu, etnaviv_domain->pgtable_dma);
-
-	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-		    etnaviv_domain->base.bad_page_cpu,
-		    etnaviv_domain->base.bad_page_dma);
-
-	kfree(etnaviv_domain);
-}
-
-static int etnaviv_iommuv1_map(struct etnaviv_iommu_domain *domain,
+static int etnaviv_iommuv1_map(struct etnaviv_iommu_context *context,
 			       unsigned long iova, phys_addr_t paddr,
 			       size_t size, int prot)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain = to_etnaviv_domain(domain);
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);
 	unsigned int index = (iova - GPU_MEM_START) / SZ_4K;

 	if (size != SZ_4K)
 		return -EINVAL;

-	etnaviv_domain->pgtable_cpu[index] = paddr;
+	v1_context->pgtable_cpu[index] = paddr;

 	return 0;
 }

-static size_t etnaviv_iommuv1_unmap(struct etnaviv_iommu_domain *domain,
+static size_t etnaviv_iommuv1_unmap(struct etnaviv_iommu_context *context,
 	unsigned long iova, size_t size)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);
 	unsigned int index = (iova - GPU_MEM_START) / SZ_4K;

 	if (size != SZ_4K)
 		return -EINVAL;

-	etnaviv_domain->pgtable_cpu[index] = etnaviv_domain->base.bad_page_dma;
+	v1_context->pgtable_cpu[index] = context->global->bad_page_dma;

 	return SZ_4K;
 }

-static size_t etnaviv_iommuv1_dump_size(struct etnaviv_iommu_domain *domain)
+static size_t etnaviv_iommuv1_dump_size(struct etnaviv_iommu_context *context)
 {
 	return PT_SIZE;
 }

-static void etnaviv_iommuv1_dump(struct etnaviv_iommu_domain *domain, void *buf)
+static void etnaviv_iommuv1_dump(struct etnaviv_iommu_context *context,
+				 void *buf)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);

-	memcpy(buf, etnaviv_domain->pgtable_cpu, PT_SIZE);
+	memcpy(buf, v1_context->pgtable_cpu, PT_SIZE);
 }

-void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu)
+static void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu,
+			     struct etnaviv_iommu_context *context)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain =
-			to_etnaviv_domain(gpu->mmu->domain);
+	struct etnaviv_iommuv1_context *v1_context = to_v1_context(context);
 	u32 pgtable;

 	/* set base addresses */
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, gpu->memory_base);
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, gpu->memory_base);
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, gpu->memory_base);
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, gpu->memory_base);
-	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, gpu->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_RA, context->global->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_FE, context->global->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_TX, context->global->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PEZ, context->global->memory_base);
+	gpu_write(gpu, VIVS_MC_MEMORY_BASE_ADDR_PE, context->global->memory_base);

 	/* set page table address in MC */
-	pgtable = (u32)etnaviv_domain->pgtable_dma;
+	pgtable = (u32)v1_context->pgtable_dma;

 	gpu_write(gpu, VIVS_MC_MMU_FE_PAGE_TABLE, pgtable);
 	gpu_write(gpu, VIVS_MC_MMU_TX_PAGE_TABLE, pgtable);
@ -145,39 +109,62 @@ void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu)
 	gpu_write(gpu, VIVS_MC_MMU_RA_PAGE_TABLE, pgtable);
 }

-static const struct etnaviv_iommu_domain_ops etnaviv_iommuv1_ops = {
-	.free = etnaviv_iommuv1_domain_free,
+
+const struct etnaviv_iommu_ops etnaviv_iommuv1_ops = {
+	.free = etnaviv_iommuv1_free,
 	.map = etnaviv_iommuv1_map,
 	.unmap = etnaviv_iommuv1_unmap,
 	.dump_size = etnaviv_iommuv1_dump_size,
 	.dump = etnaviv_iommuv1_dump,
+	.restore = etnaviv_iommuv1_restore,
 };

-struct etnaviv_iommu_domain *
-etnaviv_iommuv1_domain_alloc(struct etnaviv_gpu *gpu)
+struct etnaviv_iommu_context *
+etnaviv_iommuv1_context_alloc(struct etnaviv_iommu_global *global)
 {
-	struct etnaviv_iommuv1_domain *etnaviv_domain;
-	struct etnaviv_iommu_domain *domain;
-	int ret;
+	struct etnaviv_iommuv1_context *v1_context;
+	struct etnaviv_iommu_context *context;

-	etnaviv_domain = kzalloc(sizeof(*etnaviv_domain), GFP_KERNEL);
-	if (!etnaviv_domain)
+	mutex_lock(&global->lock);
+
+	/*
+	 * MMUv1 does not support switching between different contexts without
+	 * a stop the world operation, so we only support a single shared
+	 * context with this version.
+	 */
+	if (global->v1.shared_context) {
+		context = global->v1.shared_context;
+		etnaviv_iommu_context_get(context);
+		mutex_unlock(&global->lock);
+		return context;
+	}
+
+	v1_context = kzalloc(sizeof(*v1_context), GFP_KERNEL);
+	if (!v1_context)
 		return NULL;

-	domain = &etnaviv_domain->base;
-
-	domain->dev = gpu->dev;
-	domain->base = GPU_MEM_START;
-	domain->size = PT_ENTRIES * SZ_4K;
-	domain->ops = &etnaviv_iommuv1_ops;
-
-	ret = __etnaviv_iommu_init(etnaviv_domain);
-	if (ret)
+	v1_context->pgtable_cpu = dma_alloc_wc(global->dev, PT_SIZE,
+					       &v1_context->pgtable_dma,
+					       GFP_KERNEL);
+	if (!v1_context->pgtable_cpu)
 		goto out_free;

-	return &etnaviv_domain->base;
+	memset32(v1_context->pgtable_cpu, global->bad_page_dma, PT_ENTRIES);
+
+	context = &v1_context->base;
+	context->global = global;
+	kref_init(&context->refcount);
+	mutex_init(&context->lock);
+	INIT_LIST_HEAD(&context->mappings);
+	drm_mm_init(&context->mm, GPU_MEM_START, PT_ENTRIES * SZ_4K);
+	context->global->v1.shared_context = context;
+
+	mutex_unlock(&global->lock);
+
+	return context;

 out_free:
-	kfree(etnaviv_domain);
+	mutex_unlock(&global->lock);
+	kfree(v1_context);
 	return NULL;
 }
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu.h
@ -1,20 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2014-2018 Etnaviv Project
- */
-
-#ifndef __ETNAVIV_IOMMU_H__
-#define __ETNAVIV_IOMMU_H__
-
-struct etnaviv_gpu;
-struct etnaviv_iommu_domain;
-
-struct etnaviv_iommu_domain *
-etnaviv_iommuv1_domain_alloc(struct etnaviv_gpu *gpu);
-void etnaviv_iommuv1_restore(struct etnaviv_gpu *gpu);
-
-struct etnaviv_iommu_domain *
-etnaviv_iommuv2_domain_alloc(struct etnaviv_gpu *gpu);
-void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu);
-
-#endif /* __ETNAVIV_IOMMU_H__ */
--- a/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_iommu_v2.c
@ -3,16 +3,16 @@
 * Copyright (C) 2016-2018 Etnaviv Project
 */

+#include <linux/bitops.h>
+#include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
 #include <linux/sizes.h>
 #include <linux/slab.h>
-#include <linux/dma-mapping.h>
-#include <linux/bitops.h>
+#include <linux/vmalloc.h>

 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_mmu.h"
-#include "etnaviv_iommu.h"
 #include "state.xml.h"
 #include "state_hi.xml.h"

@ -27,11 +27,9 @@

 #define MMUv2_MAX_STLB_ENTRIES		1024

-struct etnaviv_iommuv2_domain {
-	struct etnaviv_iommu_domain base;
-	/* P(age) T(able) A(rray) */
-	u64 *pta_cpu;
-	dma_addr_t pta_dma;
+struct etnaviv_iommuv2_context {
+	struct etnaviv_iommu_context base;
+	unsigned short id;
 	/* M(aster) TLB aka first level pagetable */
 	u32 *mtlb_cpu;
 	dma_addr_t mtlb_dma;
@ -40,41 +38,62 @@ struct etnaviv_iommuv2_domain {
 	dma_addr_t stlb_dma[MMUv2_MAX_STLB_ENTRIES];
 };

-static struct etnaviv_iommuv2_domain *
-to_etnaviv_domain(struct etnaviv_iommu_domain *domain)
+static struct etnaviv_iommuv2_context *
+to_v2_context(struct etnaviv_iommu_context *context)
 {
-	return container_of(domain, struct etnaviv_iommuv2_domain, base);
+	return container_of(context, struct etnaviv_iommuv2_context, base);
 }

+static void etnaviv_iommuv2_free(struct etnaviv_iommu_context *context)
+{
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
+	int i;
+
+	drm_mm_takedown(&context->mm);
+
+	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
+		if (v2_context->stlb_cpu[i])
+			dma_free_wc(context->global->dev, SZ_4K,
+				    v2_context->stlb_cpu[i],
+				    v2_context->stlb_dma[i]);
+	}
+
+	dma_free_wc(context->global->dev, SZ_4K, v2_context->mtlb_cpu,
+		    v2_context->mtlb_dma);
+
+	clear_bit(v2_context->id, context->global->v2.pta_alloc);
+
+	vfree(v2_context);
+}
 static int
-etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_domain *etnaviv_domain,
+etnaviv_iommuv2_ensure_stlb(struct etnaviv_iommuv2_context *v2_context,
 			    int stlb)
 {
-	if (etnaviv_domain->stlb_cpu[stlb])
+	if (v2_context->stlb_cpu[stlb])
 		return 0;

-	etnaviv_domain->stlb_cpu[stlb] =
-			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
-				     &etnaviv_domain->stlb_dma[stlb],
+	v2_context->stlb_cpu[stlb] =
+			dma_alloc_wc(v2_context->base.global->dev, SZ_4K,
+				     &v2_context->stlb_dma[stlb],
 				     GFP_KERNEL);

-	if (!etnaviv_domain->stlb_cpu[stlb])
+	if (!v2_context->stlb_cpu[stlb])
 		return -ENOMEM;

-	memset32(etnaviv_domain->stlb_cpu[stlb], MMUv2_PTE_EXCEPTION,
+	memset32(v2_context->stlb_cpu[stlb], MMUv2_PTE_EXCEPTION,
 		 SZ_4K / sizeof(u32));

-	etnaviv_domain->mtlb_cpu[stlb] = etnaviv_domain->stlb_dma[stlb] |
-						      MMUv2_PTE_PRESENT;
+	v2_context->mtlb_cpu[stlb] =
+			v2_context->stlb_dma[stlb] | MMUv2_PTE_PRESENT;
+
 	return 0;
 }

-static int etnaviv_iommuv2_map(struct etnaviv_iommu_domain *domain,
+static int etnaviv_iommuv2_map(struct etnaviv_iommu_context *context,
 			       unsigned long iova, phys_addr_t paddr,
 			       size_t size, int prot)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	int mtlb_entry, stlb_entry, ret;
 	u32 entry = lower_32_bits(paddr) | MMUv2_PTE_PRESENT;

@ -90,20 +109,19 @@ static int etnaviv_iommuv2_map(struct etnaviv_iommu_domain *domain,
 	mtlb_entry = (iova & MMUv2_MTLB_MASK) >> MMUv2_MTLB_SHIFT;
 	stlb_entry = (iova & MMUv2_STLB_MASK) >> MMUv2_STLB_SHIFT;

-	ret = etnaviv_iommuv2_ensure_stlb(etnaviv_domain, mtlb_entry);
+	ret = etnaviv_iommuv2_ensure_stlb(v2_context, mtlb_entry);
 	if (ret)
 		return ret;

-	etnaviv_domain->stlb_cpu[mtlb_entry][stlb_entry] = entry;
+	v2_context->stlb_cpu[mtlb_entry][stlb_entry] = entry;

 	return 0;
 }

-static size_t etnaviv_iommuv2_unmap(struct etnaviv_iommu_domain *domain,
+static size_t etnaviv_iommuv2_unmap(struct etnaviv_iommu_context *context,
 				    unsigned long iova, size_t size)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv2_context *etnaviv_domain = to_v2_context(context);
 	int mtlb_entry, stlb_entry;

 	if (size != SZ_4K)
@ -117,118 +135,35 @@ static size_t etnaviv_iommuv2_unmap(struct etnaviv_iommu_domain *domain,
 	return SZ_4K;
 }

-static int etnaviv_iommuv2_init(struct etnaviv_iommuv2_domain *etnaviv_domain)
+static size_t etnaviv_iommuv2_dump_size(struct etnaviv_iommu_context *context)
 {
-	int ret;
-
-	/* allocate scratch page */
-	etnaviv_domain->base.bad_page_cpu =
-			dma_alloc_wc(etnaviv_domain->base.dev, SZ_4K,
-				     &etnaviv_domain->base.bad_page_dma,
-				     GFP_KERNEL);
-	if (!etnaviv_domain->base.bad_page_cpu) {
-		ret = -ENOMEM;
-		goto fail_mem;
-	}
-
-	memset32(etnaviv_domain->base.bad_page_cpu, 0xdead55aa,
-		 SZ_4K / sizeof(u32));
-
-	etnaviv_domain->pta_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
-					       SZ_4K, &etnaviv_domain->pta_dma,
-					       GFP_KERNEL);
-	if (!etnaviv_domain->pta_cpu) {
-		ret = -ENOMEM;
-		goto fail_mem;
-	}
-
-	etnaviv_domain->mtlb_cpu = dma_alloc_wc(etnaviv_domain->base.dev,
-						SZ_4K, &etnaviv_domain->mtlb_dma,
-						GFP_KERNEL);
-	if (!etnaviv_domain->mtlb_cpu) {
-		ret = -ENOMEM;
-		goto fail_mem;
-	}
-
-	memset32(etnaviv_domain->mtlb_cpu, MMUv2_PTE_EXCEPTION,
-		 MMUv2_MAX_STLB_ENTRIES);
-
-	return 0;
-
-fail_mem:
-	if (etnaviv_domain->base.bad_page_cpu)
-		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-			    etnaviv_domain->base.bad_page_cpu,
-			    etnaviv_domain->base.bad_page_dma);
-
-	if (etnaviv_domain->pta_cpu)
-		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-			    etnaviv_domain->pta_cpu, etnaviv_domain->pta_dma);
-
-	if (etnaviv_domain->mtlb_cpu)
-		dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-			    etnaviv_domain->mtlb_cpu, etnaviv_domain->mtlb_dma);
-
-	return ret;
-}
-
-static void etnaviv_iommuv2_domain_free(struct etnaviv_iommu_domain *domain)
-{
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
-	int i;
-
-	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-		    etnaviv_domain->base.bad_page_cpu,
-		    etnaviv_domain->base.bad_page_dma);
-
-	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-		    etnaviv_domain->pta_cpu, etnaviv_domain->pta_dma);
-
-	dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-		    etnaviv_domain->mtlb_cpu, etnaviv_domain->mtlb_dma);
-
-	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++) {
-		if (etnaviv_domain->stlb_cpu[i])
-			dma_free_wc(etnaviv_domain->base.dev, SZ_4K,
-				    etnaviv_domain->stlb_cpu[i],
-				    etnaviv_domain->stlb_dma[i]);
-	}
-
-	vfree(etnaviv_domain);
-}
-
-static size_t etnaviv_iommuv2_dump_size(struct etnaviv_iommu_domain *domain)
-{
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	size_t dump_size = SZ_4K;
 	int i;

 	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++)
-		if (etnaviv_domain->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
+		if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
 			dump_size += SZ_4K;

 	return dump_size;
 }

-static void etnaviv_iommuv2_dump(struct etnaviv_iommu_domain *domain, void *buf)
+static void etnaviv_iommuv2_dump(struct etnaviv_iommu_context *context, void *buf)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	int i;

-	memcpy(buf, etnaviv_domain->mtlb_cpu, SZ_4K);
+	memcpy(buf, v2_context->mtlb_cpu, SZ_4K);
 	buf += SZ_4K;
 	for (i = 0; i < MMUv2_MAX_STLB_ENTRIES; i++, buf += SZ_4K)
-		if (etnaviv_domain->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
-			memcpy(buf, etnaviv_domain->stlb_cpu[i], SZ_4K);
+		if (v2_context->mtlb_cpu[i] & MMUv2_PTE_PRESENT)
+			memcpy(buf, v2_context->stlb_cpu[i], SZ_4K);
 }

-static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu)
+static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu,
+	struct etnaviv_iommu_context *context)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-			to_etnaviv_domain(gpu->mmu->domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	u16 prefetch;

 	/* If the MMU is already enabled the state is still there. */
@ -236,8 +171,8 @@ static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu)
 		return;

 	prefetch = etnaviv_buffer_config_mmuv2(gpu,
-				(u32)etnaviv_domain->mtlb_dma,
-				(u32)etnaviv_domain->base.bad_page_dma);
+				(u32)v2_context->mtlb_dma,
+				(u32)context->global->bad_page_dma);
 	etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(&gpu->buffer),
 			     prefetch);
 	etnaviv_gpu_wait_idle(gpu, 100);
@ -245,10 +180,10 @@ static void etnaviv_iommuv2_restore_nonsec(struct etnaviv_gpu *gpu)
 	gpu_write(gpu, VIVS_MMUv2_CONTROL, VIVS_MMUv2_CONTROL_ENABLE);
 }

-static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu)
+static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu,
+	struct etnaviv_iommu_context *context)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain =
-				to_etnaviv_domain(gpu->mmu->domain);
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
 	u16 prefetch;

 	/* If the MMU is already enabled the state is still there. */
@ -256,26 +191,26 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu)
 		return;

 	gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_LOW,
-		  lower_32_bits(etnaviv_domain->pta_dma));
+		  lower_32_bits(context->global->v2.pta_dma));
 	gpu_write(gpu, VIVS_MMUv2_PTA_ADDRESS_HIGH,
-		  upper_32_bits(etnaviv_domain->pta_dma));
+		  upper_32_bits(context->global->v2.pta_dma));
 	gpu_write(gpu, VIVS_MMUv2_PTA_CONTROL, VIVS_MMUv2_PTA_CONTROL_ENABLE);

 	gpu_write(gpu, VIVS_MMUv2_NONSEC_SAFE_ADDR_LOW,
-		  lower_32_bits(etnaviv_domain->base.bad_page_dma));
+		  lower_32_bits(context->global->bad_page_dma));
 	gpu_write(gpu, VIVS_MMUv2_SEC_SAFE_ADDR_LOW,
-		  lower_32_bits(etnaviv_domain->base.bad_page_dma));
+		  lower_32_bits(context->global->bad_page_dma));
 	gpu_write(gpu, VIVS_MMUv2_SAFE_ADDRESS_CONFIG,
 		  VIVS_MMUv2_SAFE_ADDRESS_CONFIG_NON_SEC_SAFE_ADDR_HIGH(
-		  upper_32_bits(etnaviv_domain->base.bad_page_dma)) |
+		  upper_32_bits(context->global->bad_page_dma)) |
 		  VIVS_MMUv2_SAFE_ADDRESS_CONFIG_SEC_SAFE_ADDR_HIGH(
-		  upper_32_bits(etnaviv_domain->base.bad_page_dma)));
+		  upper_32_bits(context->global->bad_page_dma)));

-	etnaviv_domain->pta_cpu[0] = etnaviv_domain->mtlb_dma |
-				     VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K;
+	context->global->v2.pta_cpu[v2_context->id] = v2_context->mtlb_dma |
+				 	 VIVS_MMUv2_CONFIGURATION_MODE_MODE4_K;

 	/* trigger a PTA load through the FE */
-	prefetch = etnaviv_buffer_config_pta(gpu);
+	prefetch = etnaviv_buffer_config_pta(gpu, v2_context->id);
 	etnaviv_gpu_start_fe(gpu, (u32)etnaviv_cmdbuf_get_pa(&gpu->buffer),
 			     prefetch);
 	etnaviv_gpu_wait_idle(gpu, 100);
@ -283,14 +218,28 @@ static void etnaviv_iommuv2_restore_sec(struct etnaviv_gpu *gpu)
 	gpu_write(gpu, VIVS_MMUv2_SEC_CONTROL, VIVS_MMUv2_SEC_CONTROL_ENABLE);
 }

-void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu)
+u32 etnaviv_iommuv2_get_mtlb_addr(struct etnaviv_iommu_context *context)
+{
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
+
+	return v2_context->mtlb_dma;
+}
+
+unsigned short etnaviv_iommuv2_get_pta_id(struct etnaviv_iommu_context *context)
+{
+	struct etnaviv_iommuv2_context *v2_context = to_v2_context(context);
+
+	return v2_context->id;
+}
+static void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu,
+				    struct etnaviv_iommu_context *context)
 {
 	switch (gpu->sec_mode) {
 	case ETNA_SEC_NONE:
-		etnaviv_iommuv2_restore_nonsec(gpu);
+		etnaviv_iommuv2_restore_nonsec(gpu, context);
 		break;
 	case ETNA_SEC_KERNEL:
-		etnaviv_iommuv2_restore_sec(gpu);
+		etnaviv_iommuv2_restore_sec(gpu, context);
 		break;
 	default:
 		WARN(1, "unhandled GPU security mode\n");
@ -298,39 +247,58 @@ void etnaviv_iommuv2_restore(struct etnaviv_gpu *gpu)
 	}
 }

-static const struct etnaviv_iommu_domain_ops etnaviv_iommuv2_ops = {
-	.free = etnaviv_iommuv2_domain_free,
+const struct etnaviv_iommu_ops etnaviv_iommuv2_ops = {
+	.free = etnaviv_iommuv2_free,
 	.map = etnaviv_iommuv2_map,
 	.unmap = etnaviv_iommuv2_unmap,
 	.dump_size = etnaviv_iommuv2_dump_size,
 	.dump = etnaviv_iommuv2_dump,
+	.restore = etnaviv_iommuv2_restore,
 };

-struct etnaviv_iommu_domain *
-etnaviv_iommuv2_domain_alloc(struct etnaviv_gpu *gpu)
+struct etnaviv_iommu_context *
+etnaviv_iommuv2_context_alloc(struct etnaviv_iommu_global *global)
 {
-	struct etnaviv_iommuv2_domain *etnaviv_domain;
-	struct etnaviv_iommu_domain *domain;
-	int ret;
+	struct etnaviv_iommuv2_context *v2_context;
+	struct etnaviv_iommu_context *context;

-	etnaviv_domain = vzalloc(sizeof(*etnaviv_domain));
-	if (!etnaviv_domain)
+	v2_context = vzalloc(sizeof(*v2_context));
+	if (!v2_context)
 		return NULL;

-	domain = &etnaviv_domain->base;
-
-	domain->dev = gpu->dev;
-	domain->base = SZ_4K;
-	domain->size = (u64)SZ_1G * 4 - SZ_4K;
-	domain->ops = &etnaviv_iommuv2_ops;
-
-	ret = etnaviv_iommuv2_init(etnaviv_domain);
-	if (ret)
+	mutex_lock(&global->lock);
+	v2_context->id = find_first_zero_bit(global->v2.pta_alloc,
+					     ETNAVIV_PTA_ENTRIES);
+	if (v2_context->id < ETNAVIV_PTA_ENTRIES) {
+		set_bit(v2_context->id, global->v2.pta_alloc);
+	} else {
+		mutex_unlock(&global->lock);
 		goto out_free;
+	}
+	mutex_unlock(&global->lock);

-	return &etnaviv_domain->base;
+	v2_context->mtlb_cpu = dma_alloc_wc(global->dev, SZ_4K,
+					    &v2_context->mtlb_dma, GFP_KERNEL);
+	if (!v2_context->mtlb_cpu)
+		goto out_free_id;

+	memset32(v2_context->mtlb_cpu, MMUv2_PTE_EXCEPTION,
+		 MMUv2_MAX_STLB_ENTRIES);
+
+	global->v2.pta_cpu[v2_context->id] = v2_context->mtlb_dma;
+
+	context = &v2_context->base;
+	context->global = global;
+	kref_init(&context->refcount);
+	mutex_init(&context->lock);
+	INIT_LIST_HEAD(&context->mappings);
+	drm_mm_init(&context->mm, SZ_4K, (u64)SZ_1G * 4 - SZ_4K);
+
+	return context;
+
+out_free_id:
+	clear_bit(v2_context->id, global->v2.pta_alloc);
 out_free:
-	vfree(etnaviv_domain);
+	vfree(v2_context);
 	return NULL;
 }
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.c
@ -3,15 +3,17 @@
 * Copyright (C) 2015-2018 Etnaviv Project
 */

+#include <linux/dma-mapping.h>
+#include <linux/scatterlist.h>
+
 #include "common.xml.h"
 #include "etnaviv_cmdbuf.h"
 #include "etnaviv_drv.h"
 #include "etnaviv_gem.h"
 #include "etnaviv_gpu.h"
-#include "etnaviv_iommu.h"
 #include "etnaviv_mmu.h"

-static void etnaviv_domain_unmap(struct etnaviv_iommu_domain *domain,
+static void etnaviv_context_unmap(struct etnaviv_iommu_context *context,
 				 unsigned long iova, size_t size)
 {
 	size_t unmapped_page, unmapped = 0;
@ -24,7 +26,8 @@ static void etnaviv_domain_unmap(struct etnaviv_iommu_domain *domain,
 	}

 	while (unmapped < size) {
-		unmapped_page = domain->ops->unmap(domain, iova, pgsize);
+		unmapped_page = context->global->ops->unmap(context, iova,
+							    pgsize);
 		if (!unmapped_page)
 			break;

@ -33,7 +36,7 @@ static void etnaviv_domain_unmap(struct etnaviv_iommu_domain *domain,
 	}
 }

-static int etnaviv_domain_map(struct etnaviv_iommu_domain *domain,
+static int etnaviv_context_map(struct etnaviv_iommu_context *context,
 			      unsigned long iova, phys_addr_t paddr,
 			      size_t size, int prot)
 {
@ -49,7 +52,8 @@ static int etnaviv_domain_map(struct etnaviv_iommu_domain *domain,
 	}

 	while (size) {
-		ret = domain->ops->map(domain, iova, paddr, pgsize, prot);
+		ret = context->global->ops->map(context, iova, paddr, pgsize,
+						prot);
 		if (ret)
 			break;

@ -60,21 +64,19 @@ static int etnaviv_domain_map(struct etnaviv_iommu_domain *domain,

 	/* unroll mapping in case something went wrong */
 	if (ret)
-		etnaviv_domain_unmap(domain, orig_iova, orig_size - size);
+		etnaviv_context_unmap(context, orig_iova, orig_size - size);

 	return ret;
 }

-static int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,
+static int etnaviv_iommu_map(struct etnaviv_iommu_context *context, u32 iova,
 			     struct sg_table *sgt, unsigned len, int prot)
-{
-	struct etnaviv_iommu_domain *domain = iommu->domain;
-	struct scatterlist *sg;
+{	struct scatterlist *sg;
 	unsigned int da = iova;
 	unsigned int i, j;
 	int ret;

-	if (!domain || !sgt)
+	if (!context || !sgt)
 		return -EINVAL;

 	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
@ -83,7 +85,7 @@ static int etnaviv_iommu_map(struct etnaviv_iommu *iommu, u32 iova,

 		VERB("map[%d]: %08x %08x(%zx)", i, iova, pa, bytes);

-		ret = etnaviv_domain_map(domain, da, pa, bytes, prot);
+		ret = etnaviv_context_map(context, da, pa, bytes, prot);
 		if (ret)
 			goto fail;

@ -98,16 +100,15 @@ fail:
 	for_each_sg(sgt->sgl, sg, i, j) {
 		size_t bytes = sg_dma_len(sg) + sg->offset;

-		etnaviv_domain_unmap(domain, da, bytes);
+		etnaviv_context_unmap(context, da, bytes);
 		da += bytes;
 	}
 	return ret;
 }

-static void etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
+static void etnaviv_iommu_unmap(struct etnaviv_iommu_context *context, u32 iova,
 				struct sg_table *sgt, unsigned len)
 {
-	struct etnaviv_iommu_domain *domain = iommu->domain;
 	struct scatterlist *sg;
 	unsigned int da = iova;
 	int i;
@ -115,7 +116,7 @@ static void etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
 	for_each_sg(sgt->sgl, sg, sgt->nents, i) {
 		size_t bytes = sg_dma_len(sg) + sg->offset;

-		etnaviv_domain_unmap(domain, da, bytes);
+		etnaviv_context_unmap(context, da, bytes);

 		VERB("unmap[%d]: %08x(%zx)", i, iova, bytes);

@ -125,24 +126,24 @@ static void etnaviv_iommu_unmap(struct etnaviv_iommu *iommu, u32 iova,
 	}
 }

-static void etnaviv_iommu_remove_mapping(struct etnaviv_iommu *mmu,
+static void etnaviv_iommu_remove_mapping(struct etnaviv_iommu_context *context,
 	struct etnaviv_vram_mapping *mapping)
 {
 	struct etnaviv_gem_object *etnaviv_obj = mapping->object;

-	etnaviv_iommu_unmap(mmu, mapping->vram_node.start,
+	etnaviv_iommu_unmap(context, mapping->vram_node.start,
 			    etnaviv_obj->sgt, etnaviv_obj->base.size);
 	drm_mm_remove_node(&mapping->vram_node);
 }

-static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
+static int etnaviv_iommu_find_iova(struct etnaviv_iommu_context *context,
 				   struct drm_mm_node *node, size_t size)
 {
 	struct etnaviv_vram_mapping *free = NULL;
 	enum drm_mm_insert_mode mode = DRM_MM_INSERT_LOW;
 	int ret;

-	lockdep_assert_held(&mmu->lock);
+	lockdep_assert_held(&context->lock);

 	while (1) {
 		struct etnaviv_vram_mapping *m, *n;
@ -150,17 +151,17 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 		struct list_head list;
 		bool found;

-		ret = drm_mm_insert_node_in_range(&mmu->mm, node,
+		ret = drm_mm_insert_node_in_range(&context->mm, node,
 						  size, 0, 0, 0, U64_MAX, mode);
 		if (ret != -ENOSPC)
 			break;

 		/* Try to retire some entries */
-		drm_mm_scan_init(&scan, &mmu->mm, size, 0, 0, mode);
+		drm_mm_scan_init(&scan, &context->mm, size, 0, 0, mode);

 		found = 0;
 		INIT_LIST_HEAD(&list);
-		list_for_each_entry(free, &mmu->mappings, mmu_node) {
+		list_for_each_entry(free, &context->mappings, mmu_node) {
 			/* If this vram node has not been used, skip this. */
 			if (!free->vram_node.mm)
 				continue;
@ -202,8 +203,8 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 		 * this mapping.
 		 */
 		list_for_each_entry_safe(m, n, &list, scan_node) {
-			etnaviv_iommu_remove_mapping(mmu, m);
-			m->mmu = NULL;
+			etnaviv_iommu_remove_mapping(context, m);
+			m->context = NULL;
 			list_del_init(&m->mmu_node);
 			list_del_init(&m->scan_node);
 		}
@ -219,9 +220,16 @@ static int etnaviv_iommu_find_iova(struct etnaviv_iommu *mmu,
 	return ret;
 }

-int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
+static int etnaviv_iommu_insert_exact(struct etnaviv_iommu_context *context,
+		   struct drm_mm_node *node, size_t size, u64 va)
+{
+	return drm_mm_insert_node_in_range(&context->mm, node, size, 0, 0, va,
+					   va + size, DRM_MM_INSERT_LOWEST);
+}
+
+int etnaviv_iommu_map_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_gem_object *etnaviv_obj, u32 memory_base,
-	struct etnaviv_vram_mapping *mapping)
+	struct etnaviv_vram_mapping *mapping, u64 va)
 {
 	struct sg_table *sgt = etnaviv_obj->sgt;
 	struct drm_mm_node *node;
@ -229,17 +237,17 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,

 	lockdep_assert_held(&etnaviv_obj->lock);

-	mutex_lock(&mmu->lock);
+	mutex_lock(&context->lock);

 	/* v1 MMU can optimize single entry (contiguous) scatterlists */
-	if (mmu->version == ETNAVIV_IOMMU_V1 &&
+	if (context->global->version == ETNAVIV_IOMMU_V1 &&
 	    sgt->nents == 1 && !(etnaviv_obj->flags & ETNA_BO_FORCE_MMU)) {
 		u32 iova;

 		iova = sg_dma_address(sgt->sgl) - memory_base;
 		if (iova < 0x80000000 - sg_dma_len(sgt->sgl)) {
 			mapping->iova = iova;
-			list_add_tail(&mapping->mmu_node, &mmu->mappings);
+			list_add_tail(&mapping->mmu_node, &context->mappings);
 			ret = 0;
 			goto unlock;
 		}
@ -247,12 +255,17 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,

 	node = &mapping->vram_node;

-	ret = etnaviv_iommu_find_iova(mmu, node, etnaviv_obj->base.size);
+	if (va)
+		ret = etnaviv_iommu_insert_exact(context, node,
+						 etnaviv_obj->base.size, va);
+	else
+		ret = etnaviv_iommu_find_iova(context, node,
+					      etnaviv_obj->base.size);
 	if (ret < 0)
 		goto unlock;

 	mapping->iova = node->start;
-	ret = etnaviv_iommu_map(mmu, node->start, sgt, etnaviv_obj->base.size,
+	ret = etnaviv_iommu_map(context, node->start, sgt, etnaviv_obj->base.size,
 				ETNAVIV_PROT_READ | ETNAVIV_PROT_WRITE);

 	if (ret < 0) {
@ -260,130 +273,233 @@ int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
 		goto unlock;
 	}

-	list_add_tail(&mapping->mmu_node, &mmu->mappings);
-	mmu->need_flush = true;
+	list_add_tail(&mapping->mmu_node, &context->mappings);
+	context->flush_seq++;
 unlock:
-	mutex_unlock(&mmu->lock);
+	mutex_unlock(&context->lock);

 	return ret;
 }

-void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
+void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_vram_mapping *mapping)
 {
 	WARN_ON(mapping->use);

-	mutex_lock(&mmu->lock);
+	mutex_lock(&context->lock);

 	/* If the vram node is on the mm, unmap and remove the node */
-	if (mapping->vram_node.mm == &mmu->mm)
-		etnaviv_iommu_remove_mapping(mmu, mapping);
+	if (mapping->vram_node.mm == &context->mm)
+		etnaviv_iommu_remove_mapping(context, mapping);

 	list_del(&mapping->mmu_node);
-	mmu->need_flush = true;
-	mutex_unlock(&mmu->lock);
+	context->flush_seq++;
+	mutex_unlock(&context->lock);
 }

-void etnaviv_iommu_destroy(struct etnaviv_iommu *mmu)
+static void etnaviv_iommu_context_free(struct kref *kref)
 {
-	drm_mm_takedown(&mmu->mm);
-	mmu->domain->ops->free(mmu->domain);
-	kfree(mmu);
+	struct etnaviv_iommu_context *context =
+		container_of(kref, struct etnaviv_iommu_context, refcount);
+
+	etnaviv_cmdbuf_suballoc_unmap(context, &context->cmdbuf_mapping);
+
+	context->global->ops->free(context);
+}
+void etnaviv_iommu_context_put(struct etnaviv_iommu_context *context)
+{
+	kref_put(&context->refcount, etnaviv_iommu_context_free);
 }

-struct etnaviv_iommu *etnaviv_iommu_new(struct etnaviv_gpu *gpu)
+struct etnaviv_iommu_context *
+etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
+			   struct etnaviv_cmdbuf_suballoc *suballoc)
 {
-	enum etnaviv_iommu_version version;
-	struct etnaviv_iommu *mmu;
+	struct etnaviv_iommu_context *ctx;
+	int ret;

-	mmu = kzalloc(sizeof(*mmu), GFP_KERNEL);
-	if (!mmu)
-		return ERR_PTR(-ENOMEM);
-
-	if (!(gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION)) {
-		mmu->domain = etnaviv_iommuv1_domain_alloc(gpu);
-		version = ETNAVIV_IOMMU_V1;
-	} else {
-		mmu->domain = etnaviv_iommuv2_domain_alloc(gpu);
-		version = ETNAVIV_IOMMU_V2;
-	}
-
-	if (!mmu->domain) {
-		dev_err(gpu->dev, "Failed to allocate GPU IOMMU domain\n");
-		kfree(mmu);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	mmu->gpu = gpu;
-	mmu->version = version;
-	mutex_init(&mmu->lock);
-	INIT_LIST_HEAD(&mmu->mappings);
-
-	drm_mm_init(&mmu->mm, mmu->domain->base, mmu->domain->size);
-
-	return mmu;
-}
-
-void etnaviv_iommu_restore(struct etnaviv_gpu *gpu)
-{
-	if (gpu->mmu->version == ETNAVIV_IOMMU_V1)
-		etnaviv_iommuv1_restore(gpu);
+	if (global->version == ETNAVIV_IOMMU_V1)
+		ctx = etnaviv_iommuv1_context_alloc(global);
 	else
-		etnaviv_iommuv2_restore(gpu);
+		ctx = etnaviv_iommuv2_context_alloc(global);
+
+	if (!ctx)
+		return NULL;
+
+	ret = etnaviv_cmdbuf_suballoc_map(suballoc, ctx, &ctx->cmdbuf_mapping,
+					  global->memory_base);
+	if (ret) {
+		global->ops->free(ctx);
+		return NULL;
+	}
+
+	return ctx;
 }

-int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
-				  struct drm_mm_node *vram_node, size_t size,
-				  u32 *iova)
+void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
+			   struct etnaviv_iommu_context *context)
 {
-	struct etnaviv_iommu *mmu = gpu->mmu;
+	context->global->ops->restore(gpu, context);
+}

-	if (mmu->version == ETNAVIV_IOMMU_V1) {
-		*iova = paddr - gpu->memory_base;
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_iommu_context *context,
+				  struct etnaviv_vram_mapping *mapping,
+				  u32 memory_base, dma_addr_t paddr,
+				  size_t size)
+{
+	mutex_lock(&context->lock);
+
+	if (mapping->use > 0) {
+		mapping->use++;
+		mutex_unlock(&context->lock);
 		return 0;
+	}
+
+	/*
+	 * For MMUv1 we don't add the suballoc region to the pagetables, as
+	 * those GPUs can only work with cmdbufs accessed through the linear
+	 * window. Instead we manufacture a mapping to make it look uniform
+	 * to the upper layers.
+	 */
+	if (context->global->version == ETNAVIV_IOMMU_V1) {
+		mapping->iova = paddr - memory_base;
 	} else {
+		struct drm_mm_node *node = &mapping->vram_node;
 		int ret;

-		mutex_lock(&mmu->lock);
-		ret = etnaviv_iommu_find_iova(mmu, vram_node, size);
+		ret = etnaviv_iommu_find_iova(context, node, size);
 		if (ret < 0) {
-			mutex_unlock(&mmu->lock);
+			mutex_unlock(&context->lock);
 			return ret;
 		}
-		ret = etnaviv_domain_map(mmu->domain, vram_node->start, paddr,
-					 size, ETNAVIV_PROT_READ);
-		if (ret < 0) {
-			drm_mm_remove_node(vram_node);
-			mutex_unlock(&mmu->lock);
-			return ret;
-		}
-		gpu->mmu->need_flush = true;
-		mutex_unlock(&mmu->lock);

-		*iova = (u32)vram_node->start;
+		mapping->iova = node->start;
+		ret = etnaviv_context_map(context, node->start, paddr, size,
+					  ETNAVIV_PROT_READ);
+		if (ret < 0) {
+			drm_mm_remove_node(node);
+			mutex_unlock(&context->lock);
+			return ret;
+		}
+
+		context->flush_seq++;
+	}
+
+	list_add_tail(&mapping->mmu_node, &context->mappings);
+	mapping->use = 1;
+
+	mutex_unlock(&context->lock);
+
+	return 0;
+}
+
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_iommu_context *context,
+		  struct etnaviv_vram_mapping *mapping)
+{
+	struct drm_mm_node *node = &mapping->vram_node;
+
+	mutex_lock(&context->lock);
+	mapping->use--;
+
+	if (mapping->use > 0 || context->global->version == ETNAVIV_IOMMU_V1) {
+		mutex_unlock(&context->lock);
+		return;
+	}
+
+	etnaviv_context_unmap(context, node->start, node->size);
+	drm_mm_remove_node(node);
+	mutex_unlock(&context->lock);
+}
+
+size_t etnaviv_iommu_dump_size(struct etnaviv_iommu_context *context)
+{
+	return context->global->ops->dump_size(context);
+}
+
+void etnaviv_iommu_dump(struct etnaviv_iommu_context *context, void *buf)
+{
+	context->global->ops->dump(context, buf);
+}
+
+int etnaviv_iommu_global_init(struct etnaviv_gpu *gpu)
+{
+	enum etnaviv_iommu_version version = ETNAVIV_IOMMU_V1;
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+	struct etnaviv_iommu_global *global;
+	struct device *dev = gpu->drm->dev;
+
+	if (gpu->identity.minor_features1 & chipMinorFeatures1_MMU_VERSION)
+		version = ETNAVIV_IOMMU_V2;
+
+	if (priv->mmu_global) {
+		if (priv->mmu_global->version != version) {
+			dev_err(gpu->dev,
+				"MMU version doesn't match global version\n");
+			return -ENXIO;
+		}
+
+		priv->mmu_global->use++;
 		return 0;
 	}
-}

-void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
-				   struct drm_mm_node *vram_node, size_t size,
-				   u32 iova)
-{
-	struct etnaviv_iommu *mmu = gpu->mmu;
+	global = kzalloc(sizeof(*global), GFP_KERNEL);
+	if (!global)
+		return -ENOMEM;

-	if (mmu->version == ETNAVIV_IOMMU_V2) {
-		mutex_lock(&mmu->lock);
-		etnaviv_domain_unmap(mmu->domain, iova, size);
-		drm_mm_remove_node(vram_node);
-		mutex_unlock(&mmu->lock);
+	global->bad_page_cpu = dma_alloc_wc(dev, SZ_4K, &global->bad_page_dma,
+					    GFP_KERNEL);
+	if (!global->bad_page_cpu)
+		goto free_global;
+
+	memset32(global->bad_page_cpu, 0xdead55aa, SZ_4K / sizeof(u32));
+
+	if (version == ETNAVIV_IOMMU_V2) {
+		global->v2.pta_cpu = dma_alloc_wc(dev, ETNAVIV_PTA_SIZE,
+					       &global->v2.pta_dma, GFP_KERNEL);
+		if (!global->v2.pta_cpu)
+			goto free_bad_page;
 	}
-}
-size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu)
-{
-	return iommu->domain->ops->dump_size(iommu->domain);
+
+	global->dev = dev;
+	global->version = version;
+	global->use = 1;
+	mutex_init(&global->lock);
+
+	if (version == ETNAVIV_IOMMU_V1)
+		global->ops = &etnaviv_iommuv1_ops;
+	else
+		global->ops = &etnaviv_iommuv2_ops;
+
+	priv->mmu_global = global;
+
+	return 0;
+
+free_bad_page:
+	dma_free_wc(dev, SZ_4K, global->bad_page_cpu, global->bad_page_dma);
+free_global:
+	kfree(global);
+
+	return -ENOMEM;
 }

-void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf)
+void etnaviv_iommu_global_fini(struct etnaviv_gpu *gpu)
 {
-	iommu->domain->ops->dump(iommu->domain, buf);
+	struct etnaviv_drm_private *priv = gpu->drm->dev_private;
+	struct etnaviv_iommu_global *global = priv->mmu_global;
+
+	if (--global->use > 0)
+		return;
+
+	if (global->v2.pta_cpu)
+		dma_free_wc(global->dev, ETNAVIV_PTA_SIZE,
+			    global->v2.pta_cpu, global->v2.pta_dma);
+
+	if (global->bad_page_cpu)
+		dma_free_wc(global->dev, SZ_4K,
+			    global->bad_page_cpu, global->bad_page_dma);
+
+	mutex_destroy(&global->lock);
+	kfree(global);
+
+	priv->mmu_global = NULL;
 }
--- a/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
+++ b/drivers/gpu/drm/etnaviv/etnaviv_mmu.h
@ -16,61 +16,109 @@ enum etnaviv_iommu_version {

 struct etnaviv_gpu;
 struct etnaviv_vram_mapping;
-struct etnaviv_iommu_domain;
+struct etnaviv_iommu_global;
+struct etnaviv_iommu_context;

-struct etnaviv_iommu_domain_ops {
-	void (*free)(struct etnaviv_iommu_domain *);
-	int (*map)(struct etnaviv_iommu_domain *domain, unsigned long iova,
+struct etnaviv_iommu_ops {
+	struct etnaviv_iommu_context *(*init)(struct etnaviv_iommu_global *);
+	void (*free)(struct etnaviv_iommu_context *);
+	int (*map)(struct etnaviv_iommu_context *context, unsigned long iova,
 		   phys_addr_t paddr, size_t size, int prot);
-	size_t (*unmap)(struct etnaviv_iommu_domain *domain, unsigned long iova,
+	size_t (*unmap)(struct etnaviv_iommu_context *context, unsigned long iova,
 			size_t size);
-	size_t (*dump_size)(struct etnaviv_iommu_domain *);
-	void (*dump)(struct etnaviv_iommu_domain *, void *);
+	size_t (*dump_size)(struct etnaviv_iommu_context *);
+	void (*dump)(struct etnaviv_iommu_context *, void *);
+	void (*restore)(struct etnaviv_gpu *, struct etnaviv_iommu_context *);
 };

-struct etnaviv_iommu_domain {
+extern const struct etnaviv_iommu_ops etnaviv_iommuv1_ops;
+extern const struct etnaviv_iommu_ops etnaviv_iommuv2_ops;
+
+#define ETNAVIV_PTA_SIZE	SZ_4K
+#define ETNAVIV_PTA_ENTRIES	(ETNAVIV_PTA_SIZE / sizeof(u64))
+
+struct etnaviv_iommu_global {
 	struct device *dev;
+	enum etnaviv_iommu_version version;
+	const struct etnaviv_iommu_ops *ops;
+	unsigned int use;
+	struct mutex lock;
+
 	void *bad_page_cpu;
 	dma_addr_t bad_page_dma;
-	u64 base;
-	u64 size;

-	const struct etnaviv_iommu_domain_ops *ops;
+	u32 memory_base;
+
+	/*
+	 * This union holds members needed by either MMUv1 or MMUv2, which
+	 * can not exist at the same time.
+	 */
+	union {
+		struct {
+			struct etnaviv_iommu_context *shared_context;
+		} v1;
+		struct {
+			/* P(age) T(able) A(rray) */
+			u64 *pta_cpu;
+			dma_addr_t pta_dma;
+			struct spinlock pta_lock;
+			DECLARE_BITMAP(pta_alloc, ETNAVIV_PTA_ENTRIES);
+		} v2;
+	};
 };

-struct etnaviv_iommu {
-	struct etnaviv_gpu *gpu;
-	struct etnaviv_iommu_domain *domain;
-
-	enum etnaviv_iommu_version version;
+struct etnaviv_iommu_context {
+	struct kref refcount;
+	struct etnaviv_iommu_global *global;

 	/* memory manager for GPU address area */
 	struct mutex lock;
 	struct list_head mappings;
 	struct drm_mm mm;
-	bool need_flush;
+	unsigned int flush_seq;
+
+	/* Not part of the context, but needs to have the same lifetime */
+	struct etnaviv_vram_mapping cmdbuf_mapping;
 };

+int etnaviv_iommu_global_init(struct etnaviv_gpu *gpu);
+void etnaviv_iommu_global_fini(struct etnaviv_gpu *gpu);
+
 struct etnaviv_gem_object;

-int etnaviv_iommu_map_gem(struct etnaviv_iommu *mmu,
+int etnaviv_iommu_map_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_gem_object *etnaviv_obj, u32 memory_base,
-	struct etnaviv_vram_mapping *mapping);
-void etnaviv_iommu_unmap_gem(struct etnaviv_iommu *mmu,
+	struct etnaviv_vram_mapping *mapping, u64 va);
+void etnaviv_iommu_unmap_gem(struct etnaviv_iommu_context *context,
 	struct etnaviv_vram_mapping *mapping);

-int etnaviv_iommu_get_suballoc_va(struct etnaviv_gpu *gpu, dma_addr_t paddr,
-				  struct drm_mm_node *vram_node, size_t size,
-				  u32 *iova);
-void etnaviv_iommu_put_suballoc_va(struct etnaviv_gpu *gpu,
-				   struct drm_mm_node *vram_node, size_t size,
-				   u32 iova);
+int etnaviv_iommu_get_suballoc_va(struct etnaviv_iommu_context *ctx,
+				  struct etnaviv_vram_mapping *mapping,
+				  u32 memory_base, dma_addr_t paddr,
+				  size_t size);
+void etnaviv_iommu_put_suballoc_va(struct etnaviv_iommu_context *ctx,
+				   struct etnaviv_vram_mapping *mapping);

-size_t etnaviv_iommu_dump_size(struct etnaviv_iommu *iommu);
-void etnaviv_iommu_dump(struct etnaviv_iommu *iommu, void *buf);
+size_t etnaviv_iommu_dump_size(struct etnaviv_iommu_context *ctx);
+void etnaviv_iommu_dump(struct etnaviv_iommu_context *ctx, void *buf);

-struct etnaviv_iommu *etnaviv_iommu_new(struct etnaviv_gpu *gpu);
-void etnaviv_iommu_destroy(struct etnaviv_iommu *iommu);
-void etnaviv_iommu_restore(struct etnaviv_gpu *gpu);
+struct etnaviv_iommu_context *
+etnaviv_iommu_context_init(struct etnaviv_iommu_global *global,
+			   struct etnaviv_cmdbuf_suballoc *suballoc);
+static inline void etnaviv_iommu_context_get(struct etnaviv_iommu_context *ctx)
+{
+	kref_get(&ctx->refcount);
+}
+void etnaviv_iommu_context_put(struct etnaviv_iommu_context *ctx);
+void etnaviv_iommu_restore(struct etnaviv_gpu *gpu,
+			   struct etnaviv_iommu_context *ctx);
+
+struct etnaviv_iommu_context *
+etnaviv_iommuv1_context_alloc(struct etnaviv_iommu_global *global);
+struct etnaviv_iommu_context *
+etnaviv_iommuv2_context_alloc(struct etnaviv_iommu_global *global);
+
+u32 etnaviv_iommuv2_get_mtlb_addr(struct etnaviv_iommu_context *context);
+unsigned short etnaviv_iommuv2_get_pta_id(struct etnaviv_iommu_context *context);

 #endif /* __ETNAVIV_MMU_H__ */
--- a/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_perfmon.c
@ -4,6 +4,7 @@
 * Copyright (C) 2017 Zodiac Inflight Innovations
 */

+#include "common.xml.h"
 #include "etnaviv_gpu.h"
 #include "etnaviv_perfmon.h"
 #include "state_hi.xml.h"
@ -15,8 +16,8 @@ struct etnaviv_pm_signal {
 	u32 data;

 	u32 (*sample)(struct etnaviv_gpu *gpu,
-	              const struct etnaviv_pm_domain *domain,
-	              const struct etnaviv_pm_signal *signal);
+		      const struct etnaviv_pm_domain *domain,
+		      const struct etnaviv_pm_signal *signal);
 };

 struct etnaviv_pm_domain {
@ -35,13 +36,6 @@ struct etnaviv_pm_domain_meta {
 	u32 nr_domains;
 };

-static u32 simple_reg_read(struct etnaviv_gpu *gpu,
-	const struct etnaviv_pm_domain *domain,
-	const struct etnaviv_pm_signal *signal)
-{
-	return gpu_read(gpu, signal->data);
-}
-
 static u32 perf_reg_read(struct etnaviv_gpu *gpu,
 	const struct etnaviv_pm_domain *domain,
 	const struct etnaviv_pm_signal *signal)
@ -75,6 +69,34 @@ static u32 pipe_reg_read(struct etnaviv_gpu *gpu,
 	return value;
 }

+static u32 hi_total_cycle_read(struct etnaviv_gpu *gpu,
+	const struct etnaviv_pm_domain *domain,
+	const struct etnaviv_pm_signal *signal)
+{
+	u32 reg = VIVS_HI_PROFILE_TOTAL_CYCLES;
+
+	if (gpu->identity.model == chipModel_GC880 ||
+		gpu->identity.model == chipModel_GC2000 ||
+		gpu->identity.model == chipModel_GC2100)
+		reg = VIVS_MC_PROFILE_CYCLE_COUNTER;
+
+	return gpu_read(gpu, reg);
+}
+
+static u32 hi_total_idle_cycle_read(struct etnaviv_gpu *gpu,
+	const struct etnaviv_pm_domain *domain,
+	const struct etnaviv_pm_signal *signal)
+{
+	u32 reg = VIVS_HI_PROFILE_IDLE_CYCLES;
+
+	if (gpu->identity.model == chipModel_GC880 ||
+		gpu->identity.model == chipModel_GC2000 ||
+		gpu->identity.model == chipModel_GC2100)
+		reg = VIVS_HI_PROFILE_TOTAL_CYCLES;
+
+	return gpu_read(gpu, reg);
+}
+
 static const struct etnaviv_pm_domain doms_3d[] = {
 	{
 		.name = "HI",
@ -84,13 +106,13 @@ static const struct etnaviv_pm_domain doms_3d[] = {
 		.signal = (const struct etnaviv_pm_signal[]) {
 			{
 				"TOTAL_CYCLES",
-				VIVS_HI_PROFILE_TOTAL_CYCLES,
-				&simple_reg_read
+				0,
+				&hi_total_cycle_read
 			},
 			{
 				"IDLE_CYCLES",
-				VIVS_HI_PROFILE_IDLE_CYCLES,
-				&simple_reg_read
+				0,
+				&hi_total_idle_cycle_read
 			},
 			{
 				"AXI_CYCLES_READ_REQUEST_STALLED",
--- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c
+++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c
@ -3,7 +3,7 @@
 * Copyright (C) 2017 Etnaviv Project
 */

-#include <linux/kthread.h>
+#include <linux/moduleparam.h>

 #include "etnaviv_drv.h"
 #include "etnaviv_dump.h"
@ -115,7 +115,7 @@ static void etnaviv_sched_timedout_job(struct drm_sched_job *sched_job)
 		drm_sched_increase_karma(sched_job);

 	/* get the GPU back into the init state */
-	etnaviv_core_dump(gpu);
+	etnaviv_core_dump(submit);
 	etnaviv_gpu_recover_hang(gpu);

 	drm_sched_resubmit_jobs(&gpu->sched);
--- a/include/uapi/drm/etnaviv_drm.h
+++ b/include/uapi/drm/etnaviv_drm.h
@ -73,6 +73,7 @@ struct drm_etnaviv_timespec {
 #define ETNAVIV_PARAM_GPU_INSTRUCTION_COUNT         0x18
 #define ETNAVIV_PARAM_GPU_NUM_CONSTANTS             0x19
 #define ETNAVIV_PARAM_GPU_NUM_VARYINGS              0x1a
+#define ETNAVIV_PARAM_SOFTPIN_START_ADDR            0x1b

 #define ETNA_MAX_PIPES 4

@ -148,6 +149,11 @@ struct drm_etnaviv_gem_submit_reloc {
 * then patching the cmdstream for this entry is skipped.  This can
 * avoid kernel needing to map/access the cmdstream bo in the common
 * case.
+ * If the submit is a softpin submit (ETNA_SUBMIT_SOFTPIN) the 'presumed'
+ * field is interpreted as the fixed location to map the bo into the gpu
+ * virtual address space. If the kernel is unable to map the buffer at
+ * this location the submit will fail. This means userspace is responsible
+ * for the whole gpu virtual address management.
 */
 #define ETNA_SUBMIT_BO_READ             0x0001
 #define ETNA_SUBMIT_BO_WRITE            0x0002
@ -177,9 +183,11 @@ struct drm_etnaviv_gem_submit_pmr {
 #define ETNA_SUBMIT_NO_IMPLICIT         0x0001
 #define ETNA_SUBMIT_FENCE_FD_IN         0x0002
 #define ETNA_SUBMIT_FENCE_FD_OUT        0x0004
+#define ETNA_SUBMIT_SOFTPIN             0x0008
 #define ETNA_SUBMIT_FLAGS		(ETNA_SUBMIT_NO_IMPLICIT | \
 					 ETNA_SUBMIT_FENCE_FD_IN | \
-					 ETNA_SUBMIT_FENCE_FD_OUT)
+					 ETNA_SUBMIT_FENCE_FD_OUT| \
+					 ETNA_SUBMIT_SOFTPIN)
 #define ETNA_PIPE_3D      0x00
 #define ETNA_PIPE_2D      0x01
 #define ETNA_PIPE_VG      0x02