From 6d1bcb957be2850e0776f24c289e1f87c256baeb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:07 +0100 Subject: [PATCH 01/13] iommu: Remove empty iommu_tlb_range_add() callback from iommu_ops Commit add02cfdc9bc ("iommu: Introduce Interface for IOMMU TLB Flushing") added three new TLB flushing operations to the IOMMU API so that the underlying driver operations can be batched when unmapping large regions of IO virtual address space. However, the ->iotlb_range_add() callback has not been implemented by any IOMMU drivers (amd_iommu.c implements it as an empty function, which incurs the overhead of an indirect branch). Instead, drivers either flush the entire IOTLB in the ->iotlb_sync() callback or perform the necessary invalidation during ->unmap(). Attempting to implement ->iotlb_range_add() for arm-smmu-v3.c revealed two major issues: 1. The page size used to map the region in the page-table is not known, and so it is not generally possible to issue TLB flushes in the most efficient manner. 2. The only mutable state passed to the callback is a pointer to the iommu_domain, which can be accessed concurrently and therefore requires expensive synchronisation to keep track of the outstanding flushes. Remove the callback entirely in preparation for extending ->unmap() and ->iotlb_sync() to update a token on the caller's stack. Signed-off-by: Will Deacon --- drivers/iommu/amd_iommu.c | 6 ------ drivers/iommu/iommu.c | 3 --- drivers/vfio/vfio_iommu_type1.c | 1 - include/linux/iommu.h | 15 --------------- 4 files changed, 25 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index b607a92791d3..f93b148cf55e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3196,11 +3196,6 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) domain_flush_complete(dom); } -static void amd_iommu_iotlb_range_add(struct iommu_domain *domain, - unsigned long iova, size_t size) -{ -} - const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -3219,7 +3214,6 @@ const struct iommu_ops amd_iommu_ops = { .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .flush_iotlb_all = amd_iommu_flush_iotlb_all, - .iotlb_range_add = amd_iommu_iotlb_range_add, .iotlb_sync = amd_iommu_flush_iotlb_all, }; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0c674d80c37f..6d7b25fe2474 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1903,9 +1903,6 @@ static size_t __iommu_unmap(struct iommu_domain *domain, if (!unmapped_page) break; - if (sync && ops->iotlb_range_add) - ops->iotlb_range_add(domain, iova, pgsize); - pr_debug("unmapped: iova 0x%lx size 0x%zx\n", iova, unmapped_page); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 054391f30fa8..fad7fd8c167c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -696,7 +696,6 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, if (!unmapped) { kfree(entry); } else { - iommu_tlb_range_add(domain->domain, *iova, unmapped); entry->iova = *iova; entry->phys = phys; entry->len = unmapped; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index fdc355ccc570..1e21431262d9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -201,7 +201,6 @@ struct iommu_sva_ops { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain - * @iotlb_range_add: Add a given iova range to the flush queue for this domain * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue @@ -244,8 +243,6 @@ struct iommu_ops { size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size); void (*flush_iotlb_all)(struct iommu_domain *domain); - void (*iotlb_range_add)(struct iommu_domain *domain, - unsigned long iova, size_t size); void (*iotlb_sync_map)(struct iommu_domain *domain); void (*iotlb_sync)(struct iommu_domain *domain); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); @@ -476,13 +473,6 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain) domain->ops->flush_iotlb_all(domain); } -static inline void iommu_tlb_range_add(struct iommu_domain *domain, - unsigned long iova, size_t size) -{ - if (domain->ops->iotlb_range_add) - domain->ops->iotlb_range_add(domain, iova, size); -} - static inline void iommu_tlb_sync(struct iommu_domain *domain) { if (domain->ops->iotlb_sync) @@ -637,11 +627,6 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain) { } -static inline void iommu_tlb_range_add(struct iommu_domain *domain, - unsigned long iova, size_t size) -{ -} - static inline void iommu_tlb_sync(struct iommu_domain *domain) { } From f71da46719460acd5afa411e52dc8cdf1cb9b0ce Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:24 +0100 Subject: [PATCH 02/13] iommu/io-pgtable-arm: Remove redundant call to io_pgtable_tlb_sync() Commit b6b65ca20bc9 ("iommu/io-pgtable-arm: Add support for non-strict mode") added an unconditional call to io_pgtable_tlb_sync() immediately after the case where we replace a block entry with a table entry during an unmap() call. This is redundant, since the IOMMU API will call iommu_tlb_sync() on this path and the patch in question mentions this: | To save having to reason about it too much, make sure the invalidation | in arm_lpae_split_blk_unmap() just performs its own unconditional sync | to minimise the window in which we're technically violating the break- | before-make requirement on a live mapping. This might work out redundant | with an outer-level sync for strict unmaps, but we'll never be splitting | blocks on a DMA fastpath anyway. However, this sync gets in the way of deferred TLB invalidation for leaf entries and is at best a questionable, unproven hack. Remove it. Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 1 - drivers/iommu/io-pgtable-arm.c | 1 - 2 files changed, 2 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 0fc8dfab2abf..a62733c6a632 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -587,7 +587,6 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, } io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - io_pgtable_tlb_sync(&data->iop); return size; } diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 161a7d56264d..0d6633921c1e 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -583,7 +583,6 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, tablep = iopte_deref(pte, data); } else if (unmap_idx >= 0) { io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); - io_pgtable_tlb_sync(&data->iop); return size; } From 298f78895b081911e0b3605f07d79ebd3d4cf7b0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:34 +0100 Subject: [PATCH 03/13] iommu/io-pgtable: Rename iommu_gather_ops to iommu_flush_ops In preparation for TLB flush gathering in the IOMMU API, rename the iommu_gather_ops structure in io-pgtable to iommu_flush_ops, which better describes its purpose and avoids the potential for confusion between different levels of the API. $ find linux/ -type f -name '*.[ch]' | xargs sed -i 's/gather_ops/flush_ops/g' Signed-off-by: Will Deacon --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +- drivers/iommu/arm-smmu-v3.c | 4 ++-- drivers/iommu/arm-smmu.c | 8 ++++---- drivers/iommu/io-pgtable-arm-v7s.c | 2 +- drivers/iommu/io-pgtable-arm.c | 2 +- drivers/iommu/ipmmu-vmsa.c | 4 ++-- drivers/iommu/msm_iommu.c | 4 ++-- drivers/iommu/mtk_iommu.c | 4 ++-- drivers/iommu/qcom_iommu.c | 4 ++-- include/linux/io-pgtable.h | 6 +++--- 10 files changed, 20 insertions(+), 20 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 92ac995dd9c6..17bceb11e708 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -257,7 +257,7 @@ static void mmu_tlb_sync_context(void *cookie) // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X } -static const struct iommu_gather_ops mmu_tlb_ops = { +static const struct iommu_flush_ops mmu_tlb_ops = { .tlb_flush_all = mmu_tlb_inv_context_s1, .tlb_add_flush = mmu_tlb_inv_range_nosync, .tlb_sync = mmu_tlb_sync_context, diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index a9a9fabd3968..7e137e1e28f1 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1603,7 +1603,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); } -static const struct iommu_gather_ops arm_smmu_gather_ops = { +static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync, @@ -1796,7 +1796,7 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) .ias = ias, .oas = oas, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENCY, - .tlb = &arm_smmu_gather_ops, + .tlb = &arm_smmu_flush_ops, .iommu_dev = smmu->dev, }; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 64977c131ee6..dc08db347ef3 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -251,7 +251,7 @@ enum arm_smmu_domain_stage { struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; - const struct iommu_gather_ops *tlb_ops; + const struct iommu_flush_ops *tlb_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; bool non_strict; @@ -547,19 +547,19 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } -static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = { +static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context_s1, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync_context, }; -static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = { +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = { .tlb_flush_all = arm_smmu_tlb_inv_context_s2, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync_context, }; -static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = { +static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { .tlb_flush_all = arm_smmu_tlb_inv_context_s2, .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync, .tlb_sync = arm_smmu_tlb_sync_vmid, diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index a62733c6a632..116f97ee991e 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -817,7 +817,7 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); } -static const struct iommu_gather_ops dummy_tlb_ops = { +static const struct iommu_flush_ops dummy_tlb_ops = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 0d6633921c1e..402f913b6f6d 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -1081,7 +1081,7 @@ static void dummy_tlb_sync(void *cookie) WARN_ON(cookie != cfg_cookie); } -static const struct iommu_gather_ops dummy_tlb_ops __initconst = { +static const struct iommu_flush_ops dummy_tlb_ops __initconst = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index ad0098c0c87c..2c14a2c65b22 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -367,7 +367,7 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, /* The hardware doesn't support selective TLB flush. */ } -static const struct iommu_gather_ops ipmmu_gather_ops = { +static const struct iommu_flush_ops ipmmu_flush_ops = { .tlb_flush_all = ipmmu_tlb_flush_all, .tlb_add_flush = ipmmu_tlb_add_flush, .tlb_sync = ipmmu_tlb_flush_all, @@ -480,7 +480,7 @@ static int ipmmu_domain_init_context(struct ipmmu_vmsa_domain *domain) domain->cfg.pgsize_bitmap = SZ_1G | SZ_2M | SZ_4K; domain->cfg.ias = 32; domain->cfg.oas = 40; - domain->cfg.tlb = &ipmmu_gather_ops; + domain->cfg.tlb = &ipmmu_flush_ops; domain->io_domain.geometry.aperture_end = DMA_BIT_MASK(32); domain->io_domain.geometry.force_aperture = true; /* diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index b25e2eb9e038..8b602384a385 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -178,7 +178,7 @@ static void __flush_iotlb_sync(void *cookie) */ } -static const struct iommu_gather_ops msm_iommu_gather_ops = { +static const struct iommu_flush_ops msm_iommu_flush_ops = { .tlb_flush_all = __flush_iotlb, .tlb_add_flush = __flush_iotlb_range, .tlb_sync = __flush_iotlb_sync, @@ -345,7 +345,7 @@ static int msm_iommu_domain_config(struct msm_priv *priv) .pgsize_bitmap = msm_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 32, - .tlb = &msm_iommu_gather_ops, + .tlb = &msm_iommu_flush_ops, .iommu_dev = priv->dev, }; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 82e4be4dfdaf..fed77658d67e 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -188,7 +188,7 @@ static void mtk_iommu_tlb_sync(void *cookie) } } -static const struct iommu_gather_ops mtk_iommu_gather_ops = { +static const struct iommu_flush_ops mtk_iommu_flush_ops = { .tlb_flush_all = mtk_iommu_tlb_flush_all, .tlb_add_flush = mtk_iommu_tlb_add_flush_nosync, .tlb_sync = mtk_iommu_tlb_sync, @@ -267,7 +267,7 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom) .pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 32, - .tlb = &mtk_iommu_gather_ops, + .tlb = &mtk_iommu_flush_ops, .iommu_dev = data->dev, }; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 34d0b9783b3e..fd9d9f4da735 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -164,7 +164,7 @@ static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } -static const struct iommu_gather_ops qcom_gather_ops = { +static const struct iommu_flush_ops qcom_flush_ops = { .tlb_flush_all = qcom_iommu_tlb_inv_context, .tlb_add_flush = qcom_iommu_tlb_inv_range_nosync, .tlb_sync = qcom_iommu_tlb_sync, @@ -215,7 +215,7 @@ static int qcom_iommu_init_domain(struct iommu_domain *domain, .pgsize_bitmap = qcom_iommu_ops.pgsize_bitmap, .ias = 32, .oas = 40, - .tlb = &qcom_gather_ops, + .tlb = &qcom_flush_ops, .iommu_dev = qcom_iommu->dev, }; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index b5a450a3bb47..6292ea15d674 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -17,7 +17,7 @@ enum io_pgtable_fmt { }; /** - * struct iommu_gather_ops - IOMMU callbacks for TLB and page table management. + * struct iommu_flush_ops - IOMMU callbacks for TLB and page table management. * * @tlb_flush_all: Synchronously invalidate the entire TLB context. * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. @@ -28,7 +28,7 @@ enum io_pgtable_fmt { * Note that these can all be called in atomic context and must therefore * not block. */ -struct iommu_gather_ops { +struct iommu_flush_ops { void (*tlb_flush_all)(void *cookie); void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, bool leaf, void *cookie); @@ -84,7 +84,7 @@ struct io_pgtable_cfg { unsigned int ias; unsigned int oas; bool coherent_walk; - const struct iommu_gather_ops *tlb; + const struct iommu_flush_ops *tlb; struct device *iommu_dev; /* Low-level data specific to the table format */ From a7d20dc19d9ea7012227be5144353012ffa3ddc4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:48 +0100 Subject: [PATCH 04/13] iommu: Introduce struct iommu_iotlb_gather for batching TLB flushes To permit batching of TLB flushes across multiple calls to the IOMMU driver's ->unmap() implementation, introduce a new structure for tracking the address range to be flushed and the granularity at which the flushing is required. This is hooked into the IOMMU API and its caller are updated to make use of the new structure. Subsequent patches will plumb this into the IOMMU drivers as well, but for now the gathering information is ignored. Signed-off-by: Will Deacon --- drivers/iommu/dma-iommu.c | 9 +++++-- drivers/iommu/iommu.c | 19 +++++++++------ drivers/vfio/vfio_iommu_type1.c | 26 +++++++++++++------- include/linux/iommu.h | 43 ++++++++++++++++++++++++++++++--- 4 files changed, 75 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index a7f9c3edbcb2..80beb1f5994a 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -444,13 +444,18 @@ static void __iommu_dma_unmap(struct device *dev, dma_addr_t dma_addr, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; size_t iova_off = iova_offset(iovad, dma_addr); + struct iommu_iotlb_gather iotlb_gather; + size_t unmapped; dma_addr -= iova_off; size = iova_align(iovad, size + iova_off); + iommu_iotlb_gather_init(&iotlb_gather); + + unmapped = iommu_unmap_fast(domain, dma_addr, size, &iotlb_gather); + WARN_ON(unmapped != size); - WARN_ON(iommu_unmap_fast(domain, dma_addr, size) != size); if (!cookie->fq_domain) - iommu_tlb_sync(domain); + iommu_tlb_sync(domain, &iotlb_gather); iommu_dma_free_iova(cookie, dma_addr, size); } diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 6d7b25fe2474..d67222fdfe44 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1862,7 +1862,7 @@ EXPORT_SYMBOL_GPL(iommu_map); static size_t __iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size, - bool sync) + struct iommu_iotlb_gather *iotlb_gather) { const struct iommu_ops *ops = domain->ops; size_t unmapped_page, unmapped = 0; @@ -1910,9 +1910,6 @@ static size_t __iommu_unmap(struct iommu_domain *domain, unmapped += unmapped_page; } - if (sync && ops->iotlb_sync) - ops->iotlb_sync(domain); - trace_unmap(orig_iova, size, unmapped); return unmapped; } @@ -1920,14 +1917,22 @@ static size_t __iommu_unmap(struct iommu_domain *domain, size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { - return __iommu_unmap(domain, iova, size, true); + struct iommu_iotlb_gather iotlb_gather; + size_t ret; + + iommu_iotlb_gather_init(&iotlb_gather); + ret = __iommu_unmap(domain, iova, size, &iotlb_gather); + iommu_tlb_sync(domain, &iotlb_gather); + + return ret; } EXPORT_SYMBOL_GPL(iommu_unmap); size_t iommu_unmap_fast(struct iommu_domain *domain, - unsigned long iova, size_t size) + unsigned long iova, size_t size, + struct iommu_iotlb_gather *iotlb_gather) { - return __iommu_unmap(domain, iova, size, false); + return __iommu_unmap(domain, iova, size, iotlb_gather); } EXPORT_SYMBOL_GPL(iommu_unmap_fast); diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index fad7fd8c167c..ad830abe1021 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -650,12 +650,13 @@ unpin_exit: } static long vfio_sync_unpin(struct vfio_dma *dma, struct vfio_domain *domain, - struct list_head *regions) + struct list_head *regions, + struct iommu_iotlb_gather *iotlb_gather) { long unlocked = 0; struct vfio_regions *entry, *next; - iommu_tlb_sync(domain->domain); + iommu_tlb_sync(domain->domain, iotlb_gather); list_for_each_entry_safe(entry, next, regions, list) { unlocked += vfio_unpin_pages_remote(dma, @@ -685,13 +686,15 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, struct vfio_dma *dma, dma_addr_t *iova, size_t len, phys_addr_t phys, long *unlocked, struct list_head *unmapped_list, - int *unmapped_cnt) + int *unmapped_cnt, + struct iommu_iotlb_gather *iotlb_gather) { size_t unmapped = 0; struct vfio_regions *entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry) { - unmapped = iommu_unmap_fast(domain->domain, *iova, len); + unmapped = iommu_unmap_fast(domain->domain, *iova, len, + iotlb_gather); if (!unmapped) { kfree(entry); @@ -711,8 +714,8 @@ static size_t unmap_unpin_fast(struct vfio_domain *domain, * or in case of errors. */ if (*unmapped_cnt >= VFIO_IOMMU_TLB_SYNC_MAX || !unmapped) { - *unlocked += vfio_sync_unpin(dma, domain, - unmapped_list); + *unlocked += vfio_sync_unpin(dma, domain, unmapped_list, + iotlb_gather); *unmapped_cnt = 0; } @@ -743,6 +746,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma_addr_t iova = dma->iova, end = dma->iova + dma->size; struct vfio_domain *domain, *d; LIST_HEAD(unmapped_region_list); + struct iommu_iotlb_gather iotlb_gather; int unmapped_region_cnt = 0; long unlocked = 0; @@ -767,6 +771,7 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, cond_resched(); } + iommu_iotlb_gather_init(&iotlb_gather); while (iova < end) { size_t unmapped, len; phys_addr_t phys, next; @@ -795,7 +800,8 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, */ unmapped = unmap_unpin_fast(domain, dma, &iova, len, phys, &unlocked, &unmapped_region_list, - &unmapped_region_cnt); + &unmapped_region_cnt, + &iotlb_gather); if (!unmapped) { unmapped = unmap_unpin_slow(domain, dma, &iova, len, phys, &unlocked); @@ -806,8 +812,10 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma, dma->iommu_mapped = false; - if (unmapped_region_cnt) - unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list); + if (unmapped_region_cnt) { + unlocked += vfio_sync_unpin(dma, domain, &unmapped_region_list, + &iotlb_gather); + } if (do_accounting) { vfio_lock_acct(dma, -unlocked, true); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 1e21431262d9..aaf073010a9a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -191,6 +191,23 @@ struct iommu_sva_ops { #ifdef CONFIG_IOMMU_API +/** + * struct iommu_iotlb_gather - Range information for a pending IOTLB flush + * + * @start: IOVA representing the start of the range to be flushed + * @end: IOVA representing the end of the range to be flushed (exclusive) + * @pgsize: The interval at which to perform the flush + * + * This structure is intended to be updated by multiple calls to the + * ->unmap() function in struct iommu_ops before eventually being passed + * into ->iotlb_sync(). + */ +struct iommu_iotlb_gather { + unsigned long start; + unsigned long end; + size_t pgsize; +}; + /** * struct iommu_ops - iommu ops and capabilities * @capable: check capability @@ -375,6 +392,13 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return (struct iommu_device *)dev_get_drvdata(dev); } +static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) +{ + *gather = (struct iommu_iotlb_gather) { + .start = ULONG_MAX, + }; +} + #define IOMMU_GROUP_NOTIFY_ADD_DEVICE 1 /* Device added */ #define IOMMU_GROUP_NOTIFY_DEL_DEVICE 2 /* Pre Device removed */ #define IOMMU_GROUP_NOTIFY_BIND_DRIVER 3 /* Pre Driver bind */ @@ -399,7 +423,8 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova, extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, - unsigned long iova, size_t size); + unsigned long iova, size_t size, + struct iommu_iotlb_gather *iotlb_gather); extern size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg,unsigned int nents, int prot); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); @@ -473,10 +498,13 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain) domain->ops->flush_iotlb_all(domain); } -static inline void iommu_tlb_sync(struct iommu_domain *domain) +static inline void iommu_tlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather) { if (domain->ops->iotlb_sync) domain->ops->iotlb_sync(domain); + + iommu_iotlb_gather_init(iotlb_gather); } /* PCI device grouping function */ @@ -557,6 +585,7 @@ struct iommu_group {}; struct iommu_fwspec {}; struct iommu_device {}; struct iommu_fault_param {}; +struct iommu_iotlb_gather {}; static inline bool iommu_present(struct bus_type *bus) { @@ -611,7 +640,8 @@ static inline size_t iommu_unmap(struct iommu_domain *domain, } static inline size_t iommu_unmap_fast(struct iommu_domain *domain, - unsigned long iova, int gfp_order) + unsigned long iova, int gfp_order, + struct iommu_iotlb_gather *iotlb_gather) { return 0; } @@ -627,7 +657,8 @@ static inline void iommu_flush_tlb_all(struct iommu_domain *domain) { } -static inline void iommu_tlb_sync(struct iommu_domain *domain) +static inline void iommu_tlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather) { } @@ -812,6 +843,10 @@ static inline struct iommu_device *dev_to_iommu_device(struct device *dev) return NULL; } +static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } From 4fcf8544fc677fc8af135f1d86b3ba69c4ad429d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:43:57 +0100 Subject: [PATCH 05/13] iommu: Introduce iommu_iotlb_gather_add_page() Introduce a helper function for drivers to use when updating an iommu_iotlb_gather structure in response to an ->unmap() call, rather than having to open-code the logic in every page-table implementation. Signed-off-by: Will Deacon --- include/linux/iommu.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index aaf073010a9a..ad41aee55bc6 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -507,6 +507,31 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain, iommu_iotlb_gather_init(iotlb_gather); } +static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ + unsigned long start = iova, end = start + size; + + /* + * If the new page is disjoint from the current range or is mapped at + * a different granularity, then sync the TLB so that the gather + * structure can be rewritten. + */ + if (gather->pgsize != size || + end < gather->start || start > gather->end) { + if (gather->pgsize) + iommu_tlb_sync(domain, gather); + gather->pgsize = size; + } + + if (gather->end < end) + gather->end = end; + + if (gather->start > start) + gather->start = start; +} + /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); /* Generic device grouping function */ @@ -847,6 +872,12 @@ static inline void iommu_iotlb_gather_init(struct iommu_iotlb_gather *gather) { } +static inline void iommu_iotlb_gather_add_page(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather, + unsigned long iova, size_t size) +{ +} + static inline void iommu_device_unregister(struct iommu_device *iommu) { } From 56f8af5e9d38f120cba2c2adb0786fa2dbc901a4 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:06 +0100 Subject: [PATCH 06/13] iommu: Pass struct iommu_iotlb_gather to ->unmap() and ->iotlb_sync() To allow IOMMU drivers to batch up TLB flushing operations and postpone them until ->iotlb_sync() is called, extend the prototypes for the ->unmap() and ->iotlb_sync() IOMMU ops callbacks to take a pointer to the current iommu_iotlb_gather structure. All affected IOMMU drivers are updated, but there should be no functional change since the extra parameter is ignored for now. Signed-off-by: Will Deacon --- drivers/iommu/amd_iommu.c | 11 +++++++++-- drivers/iommu/arm-smmu-v3.c | 7 ++++--- drivers/iommu/arm-smmu.c | 5 +++-- drivers/iommu/exynos-iommu.c | 3 ++- drivers/iommu/intel-iommu.c | 3 ++- drivers/iommu/iommu.c | 2 +- drivers/iommu/ipmmu-vmsa.c | 12 +++++++++--- drivers/iommu/msm_iommu.c | 2 +- drivers/iommu/mtk_iommu.c | 13 ++++++++++--- drivers/iommu/mtk_iommu_v1.c | 3 ++- drivers/iommu/omap-iommu.c | 2 +- drivers/iommu/qcom_iommu.c | 12 +++++++++--- drivers/iommu/rockchip-iommu.c | 2 +- drivers/iommu/s390-iommu.c | 3 ++- drivers/iommu/tegra-gart.c | 12 +++++++++--- drivers/iommu/tegra-smmu.c | 2 +- drivers/iommu/virtio-iommu.c | 5 +++-- include/linux/iommu.h | 7 ++++--- 18 files changed, 73 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index f93b148cf55e..29eeea914660 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3055,7 +3055,8 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, } static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, - size_t page_size) + size_t page_size, + struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); size_t unmap_size; @@ -3196,6 +3197,12 @@ static void amd_iommu_flush_iotlb_all(struct iommu_domain *domain) domain_flush_complete(dom); } +static void amd_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + amd_iommu_flush_iotlb_all(domain); +} + const struct iommu_ops amd_iommu_ops = { .capable = amd_iommu_capable, .domain_alloc = amd_iommu_domain_alloc, @@ -3214,7 +3221,7 @@ const struct iommu_ops amd_iommu_ops = { .is_attach_deferred = amd_iommu_is_attach_deferred, .pgsize_bitmap = AMD_IOMMU_PGSIZES, .flush_iotlb_all = amd_iommu_flush_iotlb_all, - .iotlb_sync = amd_iommu_flush_iotlb_all, + .iotlb_sync = amd_iommu_iotlb_sync, }; /***************************************************************************** diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 7e137e1e28f1..80753b8ca054 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1985,8 +1985,8 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, return ops->map(ops, iova, paddr, size, prot); } -static size_t -arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) +static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, + size_t size, struct iommu_iotlb_gather *gather) { int ret; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); @@ -2010,7 +2010,8 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) arm_smmu_tlb_inv_context(smmu_domain); } -static void arm_smmu_iotlb_sync(struct iommu_domain *domain) +static void arm_smmu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) { struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index dc08db347ef3..e535ae2a9e65 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1301,7 +1301,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { struct io_pgtable_ops *ops = to_smmu_domain(domain)->pgtbl_ops; struct arm_smmu_device *smmu = to_smmu_domain(domain)->smmu; @@ -1329,7 +1329,8 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) } } -static void arm_smmu_iotlb_sync(struct iommu_domain *domain) +static void arm_smmu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index b0c1e5f9daae..cf5af34cb681 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -1130,7 +1130,8 @@ static void exynos_iommu_tlb_invalidate_entry(struct exynos_iommu_domain *domain } static size_t exynos_iommu_unmap(struct iommu_domain *iommu_domain, - unsigned long l_iova, size_t size) + unsigned long l_iova, size_t size, + struct iommu_iotlb_gather *gather) { struct exynos_iommu_domain *domain = to_exynos_domain(iommu_domain); sysmmu_iova_t iova = (sysmmu_iova_t)l_iova; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ac4172c02244..b9fb8d6ddc6e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -5147,7 +5147,8 @@ static int intel_iommu_map(struct iommu_domain *domain, } static size_t intel_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size) + unsigned long iova, size_t size, + struct iommu_iotlb_gather *gather) { struct dmar_domain *dmar_domain = to_dmar_domain(domain); struct page *freelist = NULL; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d67222fdfe44..70bfbcc09248 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1899,7 +1899,7 @@ static size_t __iommu_unmap(struct iommu_domain *domain, while (unmapped < size) { size_t pgsize = iommu_pgsize(domain, iova, size - unmapped); - unmapped_page = ops->unmap(domain, iova, pgsize); + unmapped_page = ops->unmap(domain, iova, pgsize, iotlb_gather); if (!unmapped_page) break; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 2c14a2c65b22..a9332b893ce2 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -733,14 +733,14 @@ static int ipmmu_map(struct iommu_domain *io_domain, unsigned long iova, } static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); return domain->iop->unmap(domain->iop, iova, size); } -static void ipmmu_iotlb_sync(struct iommu_domain *io_domain) +static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain) { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); @@ -748,6 +748,12 @@ static void ipmmu_iotlb_sync(struct iommu_domain *io_domain) ipmmu_tlb_flush_all(domain); } +static void ipmmu_iotlb_sync(struct iommu_domain *io_domain, + struct iommu_iotlb_gather *gather) +{ + ipmmu_flush_iotlb_all(io_domain); +} + static phys_addr_t ipmmu_iova_to_phys(struct iommu_domain *io_domain, dma_addr_t iova) { @@ -957,7 +963,7 @@ static const struct iommu_ops ipmmu_ops = { .detach_dev = ipmmu_detach_device, .map = ipmmu_map, .unmap = ipmmu_unmap, - .flush_iotlb_all = ipmmu_iotlb_sync, + .flush_iotlb_all = ipmmu_flush_iotlb_all, .iotlb_sync = ipmmu_iotlb_sync, .iova_to_phys = ipmmu_iova_to_phys, .add_device = ipmmu_add_device, diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 8b602384a385..681ab3d3376d 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -509,7 +509,7 @@ static int msm_iommu_map(struct iommu_domain *domain, unsigned long iova, } static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t len) + size_t len, struct iommu_iotlb_gather *gather) { struct msm_priv *priv = to_msm_priv(domain); unsigned long flags; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index fed77658d67e..c870f1674903 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -371,7 +371,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, } static size_t mtk_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size) + unsigned long iova, size_t size, + struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); unsigned long flags; @@ -384,7 +385,13 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, return unmapsz; } -static void mtk_iommu_iotlb_sync(struct iommu_domain *domain) +static void mtk_iommu_flush_iotlb_all(struct iommu_domain *domain) +{ + mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); +} + +static void mtk_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) { mtk_iommu_tlb_sync(mtk_iommu_get_m4u_data()); } @@ -490,7 +497,7 @@ static const struct iommu_ops mtk_iommu_ops = { .detach_dev = mtk_iommu_detach_device, .map = mtk_iommu_map, .unmap = mtk_iommu_unmap, - .flush_iotlb_all = mtk_iommu_iotlb_sync, + .flush_iotlb_all = mtk_iommu_flush_iotlb_all, .iotlb_sync = mtk_iommu_iotlb_sync, .iova_to_phys = mtk_iommu_iova_to_phys, .add_device = mtk_iommu_add_device, diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index abeeac488372..7b92ddd5d9fd 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -324,7 +324,8 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova, } static size_t mtk_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size) + unsigned long iova, size_t size, + struct iommu_iotlb_gather *gather) { struct mtk_iommu_domain *dom = to_mtk_domain(domain); unsigned long flags; diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index dfb961d8c21b..8039bc5ee425 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1149,7 +1149,7 @@ static int omap_iommu_map(struct iommu_domain *domain, unsigned long da, } static size_t omap_iommu_unmap(struct iommu_domain *domain, unsigned long da, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); struct device *dev = omap_domain->dev; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index fd9d9f4da735..a7432991fa04 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -417,7 +417,7 @@ static int qcom_iommu_map(struct iommu_domain *domain, unsigned long iova, } static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { size_t ret; unsigned long flags; @@ -441,7 +441,7 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, return ret; } -static void qcom_iommu_iotlb_sync(struct iommu_domain *domain) +static void qcom_iommu_flush_iotlb_all(struct iommu_domain *domain) { struct qcom_iommu_domain *qcom_domain = to_qcom_iommu_domain(domain); struct io_pgtable *pgtable = container_of(qcom_domain->pgtbl_ops, @@ -454,6 +454,12 @@ static void qcom_iommu_iotlb_sync(struct iommu_domain *domain) pm_runtime_put_sync(qcom_domain->iommu->dev); } +static void qcom_iommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + qcom_iommu_flush_iotlb_all(domain); +} + static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { @@ -581,7 +587,7 @@ static const struct iommu_ops qcom_iommu_ops = { .detach_dev = qcom_iommu_detach_dev, .map = qcom_iommu_map, .unmap = qcom_iommu_unmap, - .flush_iotlb_all = qcom_iommu_iotlb_sync, + .flush_iotlb_all = qcom_iommu_flush_iotlb_all, .iotlb_sync = qcom_iommu_iotlb_sync, .iova_to_phys = qcom_iommu_iova_to_phys, .add_device = qcom_iommu_add_device, diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index dc26d74d79c2..26290f310f90 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -794,7 +794,7 @@ static int rk_iommu_map(struct iommu_domain *domain, unsigned long _iova, } static size_t rk_iommu_unmap(struct iommu_domain *domain, unsigned long _iova, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { struct rk_iommu_domain *rk_domain = to_rk_domain(domain); unsigned long flags; diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index 22d4db302c1c..3b0b18e23187 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -314,7 +314,8 @@ static phys_addr_t s390_iommu_iova_to_phys(struct iommu_domain *domain, } static size_t s390_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size) + unsigned long iova, size_t size, + struct iommu_iotlb_gather *gather) { struct s390_domain *s390_domain = to_s390_domain(domain); int flags = ZPCI_PTE_INVALID; diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 6d40bc1b38bf..3924f7c05544 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -207,7 +207,7 @@ static inline int __gart_iommu_unmap(struct gart_device *gart, } static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t bytes) + size_t bytes, struct iommu_iotlb_gather *gather) { struct gart_device *gart = gart_handle; int err; @@ -273,11 +273,17 @@ static int gart_iommu_of_xlate(struct device *dev, return 0; } -static void gart_iommu_sync(struct iommu_domain *domain) +static void gart_iommu_sync_map(struct iommu_domain *domain) { FLUSH_GART_REGS(gart_handle); } +static void gart_iommu_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) +{ + gart_iommu_sync_map(domain); +} + static const struct iommu_ops gart_iommu_ops = { .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, @@ -292,7 +298,7 @@ static const struct iommu_ops gart_iommu_ops = { .iova_to_phys = gart_iommu_iova_to_phys, .pgsize_bitmap = GART_IOMMU_PGSIZES, .of_xlate = gart_iommu_of_xlate, - .iotlb_sync_map = gart_iommu_sync, + .iotlb_sync_map = gart_iommu_sync_map, .iotlb_sync = gart_iommu_sync, }; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index c4a652b227f8..7293fc3f796d 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -680,7 +680,7 @@ static int tegra_smmu_map(struct iommu_domain *domain, unsigned long iova, } static size_t tegra_smmu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { struct tegra_smmu_as *as = to_smmu_as(domain); dma_addr_t pte_dma; diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 433f4d2ee956..5f9f91a4d7f3 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -742,7 +742,7 @@ static int viommu_map(struct iommu_domain *domain, unsigned long iova, } static size_t viommu_unmap(struct iommu_domain *domain, unsigned long iova, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { int ret = 0; size_t unmapped; @@ -788,7 +788,8 @@ static phys_addr_t viommu_iova_to_phys(struct iommu_domain *domain, return paddr; } -static void viommu_iotlb_sync(struct iommu_domain *domain) +static void viommu_iotlb_sync(struct iommu_domain *domain, + struct iommu_iotlb_gather *gather) { struct viommu_domain *vdomain = to_viommu_domain(domain); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ad41aee55bc6..64ebaff33455 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -258,10 +258,11 @@ struct iommu_ops { int (*map)(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, - size_t size); + size_t size, struct iommu_iotlb_gather *iotlb_gather); void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_sync_map)(struct iommu_domain *domain); - void (*iotlb_sync)(struct iommu_domain *domain); + void (*iotlb_sync)(struct iommu_domain *domain, + struct iommu_iotlb_gather *iotlb_gather); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); void (*remove_device)(struct device *dev); @@ -502,7 +503,7 @@ static inline void iommu_tlb_sync(struct iommu_domain *domain, struct iommu_iotlb_gather *iotlb_gather) { if (domain->ops->iotlb_sync) - domain->ops->iotlb_sync(domain); + domain->ops->iotlb_sync(domain, iotlb_gather); iommu_iotlb_gather_init(iotlb_gather); } From 3445545b2248300319b6965208e77140c960c3fd Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:16 +0100 Subject: [PATCH 07/13] iommu/io-pgtable: Introduce tlb_flush_walk() and tlb_flush_leaf() In preparation for deferring TLB flushes to iommu_tlb_sync(), introduce two new synchronous invalidation helpers to the io-pgtable API, which allow the unmap() code to force invalidation in cases where it cannot be deferred (e.g. when replacing a table with a block or when TLBI_ON_MAP is set). Signed-off-by: Will Deacon --- include/linux/io-pgtable.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 6292ea15d674..27275575b305 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -19,17 +19,31 @@ enum io_pgtable_fmt { /** * struct iommu_flush_ops - IOMMU callbacks for TLB and page table management. * - * @tlb_flush_all: Synchronously invalidate the entire TLB context. - * @tlb_add_flush: Queue up a TLB invalidation for a virtual address range. - * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and - * any corresponding page table updates are visible to the - * IOMMU. + * @tlb_flush_all: Synchronously invalidate the entire TLB context. + * @tlb_flush_walk: Synchronously invalidate all intermediate TLB state + * (sometimes referred to as the "walk cache") for a virtual + * address range. + * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual + * address range. + * @tlb_add_flush: Optional callback to queue up leaf TLB invalidation for a + * virtual address range. This function exists purely as an + * optimisation for IOMMUs that cannot batch TLB invalidation + * operations efficiently and are therefore better suited to + * issuing them early rather than deferring them until + * iommu_tlb_sync(). + * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and + * any corresponding page table updates are visible to the + * IOMMU. * * Note that these can all be called in atomic context and must therefore * not block. */ struct iommu_flush_ops { void (*tlb_flush_all)(void *cookie); + void (*tlb_flush_walk)(unsigned long iova, size_t size, size_t granule, + void *cookie); + void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, + void *cookie); void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, bool leaf, void *cookie); void (*tlb_sync)(void *cookie); From 05aed9412b0bd0d9a985d94010c42ff0a5c6cc29 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:25 +0100 Subject: [PATCH 08/13] iommu/io-pgtable: Hook up ->tlb_flush_walk() and ->tlb_flush_leaf() in drivers Hook up ->tlb_flush_walk() and ->tlb_flush_leaf() in drivers using the io-pgtable API so that we can start making use of them in the page-table code. For now, they can just wrap the implementations of ->tlb_add_flush and ->tlb_sync pending future optimisation in each driver. Signed-off-by: Will Deacon --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 14 ++++++++++++++ drivers/iommu/arm-smmu-v3.c | 22 ++++++++++++++++++++++ drivers/iommu/arm-smmu.c | 24 ++++++++++++++++++++++++ drivers/iommu/ipmmu-vmsa.c | 8 ++++++++ drivers/iommu/msm_iommu.c | 16 ++++++++++++++++ drivers/iommu/mtk_iommu.c | 16 ++++++++++++++++ drivers/iommu/qcom_iommu.c | 16 ++++++++++++++++ 7 files changed, 116 insertions(+) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 17bceb11e708..651858147bd6 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -257,8 +257,22 @@ static void mmu_tlb_sync_context(void *cookie) // TODO: Wait 1000 GPU cycles for HW_ISSUE_6367/T60X } +static void mmu_tlb_flush_walk(unsigned long iova, size_t size, size_t granule, + void *cookie) +{ + mmu_tlb_sync_context(cookie); +} + +static void mmu_tlb_flush_leaf(unsigned long iova, size_t size, size_t granule, + void *cookie) +{ + mmu_tlb_sync_context(cookie); +} + static const struct iommu_flush_ops mmu_tlb_ops = { .tlb_flush_all = mmu_tlb_inv_context_s1, + .tlb_flush_walk = mmu_tlb_flush_walk, + .tlb_flush_leaf = mmu_tlb_flush_leaf, .tlb_add_flush = mmu_tlb_inv_range_nosync, .tlb_sync = mmu_tlb_sync_context, }; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 80753b8ca054..79819b003b07 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1603,8 +1603,30 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); } +static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + + arm_smmu_tlb_inv_range_nosync(iova, size, granule, false, cookie); + arm_smmu_cmdq_issue_sync(smmu); +} + +static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + + arm_smmu_tlb_inv_range_nosync(iova, size, granule, true, cookie); + arm_smmu_cmdq_issue_sync(smmu); +} + static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context, + .tlb_flush_walk = arm_smmu_tlb_inv_walk, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync, }; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e535ae2a9e65..e9f01b860ae3 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -547,20 +547,44 @@ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); } +static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + + smmu_domain->tlb_ops->tlb_add_flush(iova, size, granule, false, cookie); + smmu_domain->tlb_ops->tlb_sync(cookie); +} + +static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + + smmu_domain->tlb_ops->tlb_add_flush(iova, size, granule, true, cookie); + smmu_domain->tlb_ops->tlb_sync(cookie); +} + static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context_s1, + .tlb_flush_walk = arm_smmu_tlb_inv_walk, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync_context, }; static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = { .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_flush_walk = arm_smmu_tlb_inv_walk, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, .tlb_sync = arm_smmu_tlb_sync_context, }; static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_flush_walk = arm_smmu_tlb_inv_walk, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync, .tlb_sync = arm_smmu_tlb_sync_vmid, }; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index a9332b893ce2..9cc7bcb7e39d 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -361,6 +361,12 @@ static void ipmmu_tlb_flush_all(void *cookie) ipmmu_tlb_invalidate(domain); } +static void ipmmu_tlb_flush(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + ipmmu_tlb_flush_all(cookie); +} + static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, size_t granule, bool leaf, void *cookie) { @@ -369,6 +375,8 @@ static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, static const struct iommu_flush_ops ipmmu_flush_ops = { .tlb_flush_all = ipmmu_tlb_flush_all, + .tlb_flush_walk = ipmmu_tlb_flush, + .tlb_flush_leaf = ipmmu_tlb_flush, .tlb_add_flush = ipmmu_tlb_add_flush, .tlb_sync = ipmmu_tlb_flush_all, }; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 681ab3d3376d..64132093751a 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -178,8 +178,24 @@ static void __flush_iotlb_sync(void *cookie) */ } +static void __flush_iotlb_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + __flush_iotlb_range(iova, size, granule, false, cookie); + __flush_iotlb_sync(cookie); +} + +static void __flush_iotlb_leaf(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + __flush_iotlb_range(iova, size, granule, true, cookie); + __flush_iotlb_sync(cookie); +} + static const struct iommu_flush_ops msm_iommu_flush_ops = { .tlb_flush_all = __flush_iotlb, + .tlb_flush_walk = __flush_iotlb_walk, + .tlb_flush_leaf = __flush_iotlb_leaf, .tlb_add_flush = __flush_iotlb_range, .tlb_sync = __flush_iotlb_sync, }; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index c870f1674903..85a7176bf9ae 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -188,8 +188,24 @@ static void mtk_iommu_tlb_sync(void *cookie) } } +static void mtk_iommu_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + mtk_iommu_tlb_add_flush_nosync(iova, size, granule, false, cookie); + mtk_iommu_tlb_sync(cookie); +} + +static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + mtk_iommu_tlb_add_flush_nosync(iova, size, granule, true, cookie); + mtk_iommu_tlb_sync(cookie); +} + static const struct iommu_flush_ops mtk_iommu_flush_ops = { .tlb_flush_all = mtk_iommu_tlb_flush_all, + .tlb_flush_walk = mtk_iommu_tlb_flush_walk, + .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf, .tlb_add_flush = mtk_iommu_tlb_add_flush_nosync, .tlb_sync = mtk_iommu_tlb_sync, }; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index a7432991fa04..643079e52e69 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -164,8 +164,24 @@ static void qcom_iommu_tlb_inv_range_nosync(unsigned long iova, size_t size, } } +static void qcom_iommu_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + qcom_iommu_tlb_inv_range_nosync(iova, size, granule, false, cookie); + qcom_iommu_tlb_sync(cookie); +} + +static void qcom_iommu_tlb_flush_leaf(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ + qcom_iommu_tlb_inv_range_nosync(iova, size, granule, true, cookie); + qcom_iommu_tlb_sync(cookie); +} + static const struct iommu_flush_ops qcom_flush_ops = { .tlb_flush_all = qcom_iommu_tlb_inv_context, + .tlb_flush_walk = qcom_iommu_tlb_flush_walk, + .tlb_flush_leaf = qcom_iommu_tlb_flush_leaf, .tlb_add_flush = qcom_iommu_tlb_inv_range_nosync, .tlb_sync = qcom_iommu_tlb_sync, }; From 10b7a7d912697afd681a0bcfced9e05543aded35 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:32 +0100 Subject: [PATCH 09/13] iommu/io-pgtable-arm: Call ->tlb_flush_walk() and ->tlb_flush_leaf() Now that all IOMMU drivers using the io-pgtable API implement the ->tlb_flush_walk() and ->tlb_flush_leaf() callbacks, we can use them in the io-pgtable code instead of ->tlb_add_flush() immediately followed by ->tlb_sync(). Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm-v7s.c | 25 +++++++++++++++---------- drivers/iommu/io-pgtable-arm.c | 17 ++++++++++++----- include/linux/io-pgtable.h | 14 ++++++++++++++ 3 files changed, 41 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 116f97ee991e..8d4914fe73bc 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -493,9 +493,8 @@ static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, * a chance for anything to kick off a table walk for the new iova. */ if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) { - io_pgtable_tlb_add_flush(iop, iova, size, - ARM_V7S_BLOCK_SIZE(2), false); - io_pgtable_tlb_sync(iop); + io_pgtable_tlb_flush_walk(iop, iova, size, + ARM_V7S_BLOCK_SIZE(2)); } else { wmb(); } @@ -541,8 +540,7 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg); size *= ARM_V7S_CONT_PAGES; - io_pgtable_tlb_add_flush(iop, iova, size, size, true); - io_pgtable_tlb_sync(iop); + io_pgtable_tlb_flush_leaf(iop, iova, size, size); return pte; } @@ -637,9 +635,8 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, for (i = 0; i < num_entries; i++) { if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) { /* Also flush any partial walks */ - io_pgtable_tlb_add_flush(iop, iova, blk_size, - ARM_V7S_BLOCK_SIZE(lvl + 1), false); - io_pgtable_tlb_sync(iop); + io_pgtable_tlb_flush_walk(iop, iova, blk_size, + ARM_V7S_BLOCK_SIZE(lvl + 1)); ptep = iopte_deref(pte[i], lvl); __arm_v7s_free_table(ptep, lvl + 1, data); } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { @@ -805,13 +802,19 @@ static void dummy_tlb_flush_all(void *cookie) WARN_ON(cookie != cfg_cookie); } -static void dummy_tlb_add_flush(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule, + void *cookie) { WARN_ON(cookie != cfg_cookie); WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } +static void dummy_tlb_add_flush(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + dummy_tlb_flush(iova, size, granule, cookie); +} + static void dummy_tlb_sync(void *cookie) { WARN_ON(cookie != cfg_cookie); @@ -819,6 +822,8 @@ static void dummy_tlb_sync(void *cookie) static const struct iommu_flush_ops dummy_tlb_ops = { .tlb_flush_all = dummy_tlb_flush_all, + .tlb_flush_walk = dummy_tlb_flush, + .tlb_flush_leaf = dummy_tlb_flush, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, }; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 402f913b6f6d..b58338c86323 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -611,9 +611,8 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, if (!iopte_leaf(pte, lvl, iop->fmt)) { /* Also flush any partial walks */ - io_pgtable_tlb_add_flush(iop, iova, size, - ARM_LPAE_GRANULE(data), false); - io_pgtable_tlb_sync(iop); + io_pgtable_tlb_flush_walk(iop, iova, size, + ARM_LPAE_GRANULE(data)); ptep = iopte_deref(pte, data); __arm_lpae_free_pgtable(data, lvl + 1, ptep); } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { @@ -1069,13 +1068,19 @@ static void dummy_tlb_flush_all(void *cookie) WARN_ON(cookie != cfg_cookie); } -static void dummy_tlb_add_flush(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule, + void *cookie) { WARN_ON(cookie != cfg_cookie); WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } +static void dummy_tlb_add_flush(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + dummy_tlb_flush(iova, size, granule, cookie); +} + static void dummy_tlb_sync(void *cookie) { WARN_ON(cookie != cfg_cookie); @@ -1083,6 +1088,8 @@ static void dummy_tlb_sync(void *cookie) static const struct iommu_flush_ops dummy_tlb_ops __initconst = { .tlb_flush_all = dummy_tlb_flush_all, + .tlb_flush_walk = dummy_tlb_flush, + .tlb_flush_leaf = dummy_tlb_flush, .tlb_add_flush = dummy_tlb_add_flush, .tlb_sync = dummy_tlb_sync, }; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 27275575b305..0618aac59e74 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -198,6 +198,20 @@ static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop) iop->cfg.tlb->tlb_flush_all(iop->cookie); } +static inline void +io_pgtable_tlb_flush_walk(struct io_pgtable *iop, unsigned long iova, + size_t size, size_t granule) +{ + iop->cfg.tlb->tlb_flush_walk(iova, size, granule, iop->cookie); +} + +static inline void +io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova, + size_t size, size_t granule) +{ + iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie); +} + static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop, unsigned long iova, size_t size, size_t granule, bool leaf) { From abfd6fe0cd535d31ee83b668be6eb59ce6a8469d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:41 +0100 Subject: [PATCH 10/13] iommu/io-pgtable: Replace ->tlb_add_flush() with ->tlb_add_page() The ->tlb_add_flush() callback in the io-pgtable API now looks a bit silly: - It takes a size and a granule, which are always the same - It takes a 'bool leaf', which is always true - It only ever flushes a single page With that in mind, replace it with an optional ->tlb_add_page() callback that drops the useless parameters. Signed-off-by: Will Deacon --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 5 -- drivers/iommu/arm-smmu-v3.c | 8 ++- drivers/iommu/arm-smmu.c | 88 ++++++++++++++++--------- drivers/iommu/io-pgtable-arm-v7s.c | 12 ++-- drivers/iommu/io-pgtable-arm.c | 11 ++-- drivers/iommu/ipmmu-vmsa.c | 7 -- drivers/iommu/msm_iommu.c | 7 +- drivers/iommu/mtk_iommu.c | 8 ++- drivers/iommu/qcom_iommu.c | 8 ++- include/linux/io-pgtable.h | 22 +++---- 10 files changed, 105 insertions(+), 71 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index 651858147bd6..ff9af320cacc 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -247,10 +247,6 @@ static void mmu_tlb_inv_context_s1(void *cookie) mmu_hw_do_operation(pfdev, 0, 0, ~0UL, AS_COMMAND_FLUSH_MEM); } -static void mmu_tlb_inv_range_nosync(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) -{} - static void mmu_tlb_sync_context(void *cookie) { //struct panfrost_device *pfdev = cookie; @@ -273,7 +269,6 @@ static const struct iommu_flush_ops mmu_tlb_ops = { .tlb_flush_all = mmu_tlb_inv_context_s1, .tlb_flush_walk = mmu_tlb_flush_walk, .tlb_flush_leaf = mmu_tlb_flush_leaf, - .tlb_add_flush = mmu_tlb_inv_range_nosync, .tlb_sync = mmu_tlb_sync_context, }; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 79819b003b07..98c90a1b4b22 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1603,6 +1603,12 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); } +static void arm_smmu_tlb_inv_page_nosync(unsigned long iova, size_t granule, + void *cookie) +{ + arm_smmu_tlb_inv_range_nosync(iova, granule, granule, true, cookie); +} + static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { @@ -1627,7 +1633,7 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_flush_all = arm_smmu_tlb_inv_context, .tlb_flush_walk = arm_smmu_tlb_inv_walk, .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, + .tlb_add_page = arm_smmu_tlb_inv_page_nosync, .tlb_sync = arm_smmu_tlb_sync, }; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e9f01b860ae3..f056164a94b0 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -248,10 +248,16 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_BYPASS, }; +struct arm_smmu_flush_ops { + struct iommu_flush_ops tlb; + void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule, + bool leaf, void *cookie) +}; + struct arm_smmu_domain { struct arm_smmu_device *smmu; struct io_pgtable_ops *pgtbl_ops; - const struct iommu_flush_ops *tlb_ops; + const struct arm_smmu_flush_ops *flush_ops; struct arm_smmu_cfg cfg; enum arm_smmu_domain_stage stage; bool non_strict; @@ -551,42 +557,62 @@ static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; + const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; - smmu_domain->tlb_ops->tlb_add_flush(iova, size, granule, false, cookie); - smmu_domain->tlb_ops->tlb_sync(cookie); + ops->tlb_inv_range(iova, size, granule, false, cookie); + ops->tlb.tlb_sync(cookie); } static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, size_t granule, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; + const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; - smmu_domain->tlb_ops->tlb_add_flush(iova, size, granule, true, cookie); - smmu_domain->tlb_ops->tlb_sync(cookie); + ops->tlb_inv_range(iova, size, granule, true, cookie); + ops->tlb.tlb_sync(cookie); } -static const struct iommu_flush_ops arm_smmu_s1_tlb_ops = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s1, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, - .tlb_sync = arm_smmu_tlb_sync_context, +static void arm_smmu_tlb_add_page(unsigned long iova, size_t granule, + void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; + + ops->tlb_inv_range(iova, granule, granule, true, cookie); +} + +static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = { + .tlb = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s1, + .tlb_flush_walk = arm_smmu_tlb_inv_walk, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, + .tlb_add_page = arm_smmu_tlb_add_page, + .tlb_sync = arm_smmu_tlb_sync_context, + }, + .tlb_inv_range = arm_smmu_tlb_inv_range_nosync, }; -static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v2 = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s2, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, - .tlb_sync = arm_smmu_tlb_sync_context, +static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = { + .tlb = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_flush_walk = arm_smmu_tlb_inv_walk, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, + .tlb_add_page = arm_smmu_tlb_add_page, + .tlb_sync = arm_smmu_tlb_sync_context, + }, + .tlb_inv_range = arm_smmu_tlb_inv_range_nosync, }; -static const struct iommu_flush_ops arm_smmu_s2_tlb_ops_v1 = { - .tlb_flush_all = arm_smmu_tlb_inv_context_s2, - .tlb_flush_walk = arm_smmu_tlb_inv_walk, - .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, - .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync, - .tlb_sync = arm_smmu_tlb_sync_vmid, +static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = { + .tlb = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_flush_walk = arm_smmu_tlb_inv_walk, + .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, + .tlb_add_page = arm_smmu_tlb_add_page, + .tlb_sync = arm_smmu_tlb_sync_vmid, + }, + .tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -866,7 +892,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 32UL); oas = min(oas, 32UL); } - smmu_domain->tlb_ops = &arm_smmu_s1_tlb_ops; + smmu_domain->flush_ops = &arm_smmu_s1_tlb_ops; break; case ARM_SMMU_DOMAIN_NESTED: /* @@ -886,9 +912,9 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, oas = min(oas, 40UL); } if (smmu->version == ARM_SMMU_V2) - smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v2; + smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v2; else - smmu_domain->tlb_ops = &arm_smmu_s2_tlb_ops_v1; + smmu_domain->flush_ops = &arm_smmu_s2_tlb_ops_v1; break; default: ret = -EINVAL; @@ -917,7 +943,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .ias = ias, .oas = oas, .coherent_walk = smmu->features & ARM_SMMU_FEAT_COHERENT_WALK, - .tlb = smmu_domain->tlb_ops, + .tlb = &smmu_domain->flush_ops->tlb, .iommu_dev = smmu->dev, }; @@ -1346,9 +1372,9 @@ static void arm_smmu_flush_iotlb_all(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) { + if (smmu_domain->flush_ops) { arm_smmu_rpm_get(smmu); - smmu_domain->tlb_ops->tlb_flush_all(smmu_domain); + smmu_domain->flush_ops->tlb.tlb_flush_all(smmu_domain); arm_smmu_rpm_put(smmu); } } @@ -1359,9 +1385,9 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; - if (smmu_domain->tlb_ops) { + if (smmu_domain->flush_ops) { arm_smmu_rpm_get(smmu); - smmu_domain->tlb_ops->tlb_sync(smmu_domain); + smmu_domain->flush_ops->tlb.tlb_sync(smmu_domain); arm_smmu_rpm_put(smmu); } } diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 8d4914fe73bc..b3f975c95f76 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -584,7 +584,7 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, return __arm_v7s_unmap(data, iova, size, 2, tablep); } - io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + io_pgtable_tlb_add_page(&data->iop, iova, size); return size; } @@ -647,8 +647,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, */ smp_wmb(); } else { - io_pgtable_tlb_add_flush(iop, iova, blk_size, - blk_size, true); + io_pgtable_tlb_add_page(iop, iova, blk_size); } iova += blk_size; } @@ -809,10 +808,9 @@ static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule, WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } -static void dummy_tlb_add_flush(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie) { - dummy_tlb_flush(iova, size, granule, cookie); + dummy_tlb_flush(iova, granule, granule, cookie); } static void dummy_tlb_sync(void *cookie) @@ -824,7 +822,7 @@ static const struct iommu_flush_ops dummy_tlb_ops = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_flush_walk = dummy_tlb_flush, .tlb_flush_leaf = dummy_tlb_flush, - .tlb_add_flush = dummy_tlb_add_flush, + .tlb_add_page = dummy_tlb_add_page, .tlb_sync = dummy_tlb_sync, }; diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index b58338c86323..a5c0db01533e 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -582,7 +582,7 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, tablep = iopte_deref(pte, data); } else if (unmap_idx >= 0) { - io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); + io_pgtable_tlb_add_page(&data->iop, iova, size); return size; } @@ -623,7 +623,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, */ smp_wmb(); } else { - io_pgtable_tlb_add_flush(iop, iova, size, size, true); + io_pgtable_tlb_add_page(iop, iova, size); } return size; @@ -1075,10 +1075,9 @@ static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule, WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } -static void dummy_tlb_add_flush(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) +static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie) { - dummy_tlb_flush(iova, size, granule, cookie); + dummy_tlb_flush(iova, granule, granule, cookie); } static void dummy_tlb_sync(void *cookie) @@ -1090,7 +1089,7 @@ static const struct iommu_flush_ops dummy_tlb_ops __initconst = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_flush_walk = dummy_tlb_flush, .tlb_flush_leaf = dummy_tlb_flush, - .tlb_add_flush = dummy_tlb_add_flush, + .tlb_add_page = dummy_tlb_add_page, .tlb_sync = dummy_tlb_sync, }; diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 9cc7bcb7e39d..c4da271af90e 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -367,17 +367,10 @@ static void ipmmu_tlb_flush(unsigned long iova, size_t size, ipmmu_tlb_flush_all(cookie); } -static void ipmmu_tlb_add_flush(unsigned long iova, size_t size, - size_t granule, bool leaf, void *cookie) -{ - /* The hardware doesn't support selective TLB flush. */ -} - static const struct iommu_flush_ops ipmmu_flush_ops = { .tlb_flush_all = ipmmu_tlb_flush_all, .tlb_flush_walk = ipmmu_tlb_flush, .tlb_flush_leaf = ipmmu_tlb_flush, - .tlb_add_flush = ipmmu_tlb_add_flush, .tlb_sync = ipmmu_tlb_flush_all, }; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 64132093751a..2cd83295a841 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -192,11 +192,16 @@ static void __flush_iotlb_leaf(unsigned long iova, size_t size, __flush_iotlb_sync(cookie); } +static void __flush_iotlb_page(unsigned long iova, size_t granule, void *cookie) +{ + __flush_iotlb_range(iova, granule, granule, true, cookie); +} + static const struct iommu_flush_ops msm_iommu_flush_ops = { .tlb_flush_all = __flush_iotlb, .tlb_flush_walk = __flush_iotlb_walk, .tlb_flush_leaf = __flush_iotlb_leaf, - .tlb_add_flush = __flush_iotlb_range, + .tlb_add_page = __flush_iotlb_page, .tlb_sync = __flush_iotlb_sync, }; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 85a7176bf9ae..a0b4b4dc4b90 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -202,11 +202,17 @@ static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size, mtk_iommu_tlb_sync(cookie); } +static void mtk_iommu_tlb_flush_page_nosync(unsigned long iova, size_t granule, + void *cookie) +{ + mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie); +} + static const struct iommu_flush_ops mtk_iommu_flush_ops = { .tlb_flush_all = mtk_iommu_tlb_flush_all, .tlb_flush_walk = mtk_iommu_tlb_flush_walk, .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf, - .tlb_add_flush = mtk_iommu_tlb_add_flush_nosync, + .tlb_add_page = mtk_iommu_tlb_flush_page_nosync, .tlb_sync = mtk_iommu_tlb_sync, }; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 643079e52e69..7d8411dee4cf 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -178,11 +178,17 @@ static void qcom_iommu_tlb_flush_leaf(unsigned long iova, size_t size, qcom_iommu_tlb_sync(cookie); } +static void qcom_iommu_tlb_add_page(unsigned long iova, size_t granule, + void *cookie) +{ + qcom_iommu_tlb_inv_range_nosync(iova, granule, granule, true, cookie); +} + static const struct iommu_flush_ops qcom_flush_ops = { .tlb_flush_all = qcom_iommu_tlb_inv_context, .tlb_flush_walk = qcom_iommu_tlb_flush_walk, .tlb_flush_leaf = qcom_iommu_tlb_flush_leaf, - .tlb_add_flush = qcom_iommu_tlb_inv_range_nosync, + .tlb_add_page = qcom_iommu_tlb_add_page, .tlb_sync = qcom_iommu_tlb_sync, }; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 0618aac59e74..99e04bd2baa1 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -25,12 +25,11 @@ enum io_pgtable_fmt { * address range. * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual * address range. - * @tlb_add_flush: Optional callback to queue up leaf TLB invalidation for a - * virtual address range. This function exists purely as an - * optimisation for IOMMUs that cannot batch TLB invalidation - * operations efficiently and are therefore better suited to - * issuing them early rather than deferring them until - * iommu_tlb_sync(). + * @tlb_add_page: Optional callback to queue up leaf TLB invalidation for a + * single page. This function exists purely as an optimisation + * for IOMMUs that cannot batch TLB invalidation operations + * efficiently and are therefore better suited to issuing them + * early rather than deferring them until iommu_tlb_sync(). * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and * any corresponding page table updates are visible to the * IOMMU. @@ -44,8 +43,7 @@ struct iommu_flush_ops { void *cookie); void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, void *cookie); - void (*tlb_add_flush)(unsigned long iova, size_t size, size_t granule, - bool leaf, void *cookie); + void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie); void (*tlb_sync)(void *cookie); }; @@ -212,10 +210,12 @@ io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova, iop->cfg.tlb->tlb_flush_leaf(iova, size, granule, iop->cookie); } -static inline void io_pgtable_tlb_add_flush(struct io_pgtable *iop, - unsigned long iova, size_t size, size_t granule, bool leaf) +static inline void +io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova, + size_t granule) { - iop->cfg.tlb->tlb_add_flush(iova, size, granule, leaf, iop->cookie); + if (iop->cfg.tlb->tlb_add_page) + iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie); } static inline void io_pgtable_tlb_sync(struct io_pgtable *iop) From e953f7f2fa78d1c7fd064171f88457c6b1e21af9 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:50 +0100 Subject: [PATCH 11/13] iommu/io-pgtable: Remove unused ->tlb_sync() callback The ->tlb_sync() callback is no longer used, so it can be removed. Signed-off-by: Will Deacon --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 1 - drivers/iommu/arm-smmu-v3.c | 8 -------- drivers/iommu/arm-smmu.c | 17 +++++++++-------- drivers/iommu/io-pgtable-arm-v7s.c | 6 ------ drivers/iommu/io-pgtable-arm.c | 6 ------ drivers/iommu/ipmmu-vmsa.c | 1 - drivers/iommu/msm_iommu.c | 20 +++++++------------- drivers/iommu/mtk_iommu.c | 1 - drivers/iommu/qcom_iommu.c | 1 - include/linux/io-pgtable.h | 9 --------- 10 files changed, 16 insertions(+), 54 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index ff9af320cacc..de22a2276e00 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -269,7 +269,6 @@ static const struct iommu_flush_ops mmu_tlb_ops = { .tlb_flush_all = mmu_tlb_inv_context_s1, .tlb_flush_walk = mmu_tlb_flush_walk, .tlb_flush_leaf = mmu_tlb_flush_leaf, - .tlb_sync = mmu_tlb_sync_context, }; static const char *access_type_name(struct panfrost_device *pfdev, diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 98c90a1b4b22..231093413ff9 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1545,13 +1545,6 @@ static int arm_smmu_atc_inv_domain(struct arm_smmu_domain *smmu_domain, } /* IO_PGTABLE API */ -static void arm_smmu_tlb_sync(void *cookie) -{ - struct arm_smmu_domain *smmu_domain = cookie; - - arm_smmu_cmdq_issue_sync(smmu_domain->smmu); -} - static void arm_smmu_tlb_inv_context(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; @@ -1634,7 +1627,6 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { .tlb_flush_walk = arm_smmu_tlb_inv_walk, .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, .tlb_add_page = arm_smmu_tlb_inv_page_nosync, - .tlb_sync = arm_smmu_tlb_sync, }; /* IOMMU API */ diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f056164a94b0..07a267c437d6 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -251,7 +251,8 @@ enum arm_smmu_domain_stage { struct arm_smmu_flush_ops { struct iommu_flush_ops tlb; void (*tlb_inv_range)(unsigned long iova, size_t size, size_t granule, - bool leaf, void *cookie) + bool leaf, void *cookie); + void (*tlb_sync)(void *cookie); }; struct arm_smmu_domain { @@ -539,7 +540,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears * almost negligible, but the benefit of getting the first one in as far ahead * of the sync as possible is significant, hence we don't just make this a - * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think. + * no-op and set .tlb_sync to arm_smmu_tlb_inv_context_s2() as you might think. */ static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, size_t granule, bool leaf, void *cookie) @@ -560,7 +561,7 @@ static void arm_smmu_tlb_inv_walk(unsigned long iova, size_t size, const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; ops->tlb_inv_range(iova, size, granule, false, cookie); - ops->tlb.tlb_sync(cookie); + ops->tlb_sync(cookie); } static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, @@ -570,7 +571,7 @@ static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, const struct arm_smmu_flush_ops *ops = smmu_domain->flush_ops; ops->tlb_inv_range(iova, size, granule, true, cookie); - ops->tlb.tlb_sync(cookie); + ops->tlb_sync(cookie); } static void arm_smmu_tlb_add_page(unsigned long iova, size_t granule, @@ -588,9 +589,9 @@ static const struct arm_smmu_flush_ops arm_smmu_s1_tlb_ops = { .tlb_flush_walk = arm_smmu_tlb_inv_walk, .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, .tlb_add_page = arm_smmu_tlb_add_page, - .tlb_sync = arm_smmu_tlb_sync_context, }, .tlb_inv_range = arm_smmu_tlb_inv_range_nosync, + .tlb_sync = arm_smmu_tlb_sync_context, }; static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = { @@ -599,9 +600,9 @@ static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v2 = { .tlb_flush_walk = arm_smmu_tlb_inv_walk, .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, .tlb_add_page = arm_smmu_tlb_add_page, - .tlb_sync = arm_smmu_tlb_sync_context, }, .tlb_inv_range = arm_smmu_tlb_inv_range_nosync, + .tlb_sync = arm_smmu_tlb_sync_context, }; static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = { @@ -610,9 +611,9 @@ static const struct arm_smmu_flush_ops arm_smmu_s2_tlb_ops_v1 = { .tlb_flush_walk = arm_smmu_tlb_inv_walk, .tlb_flush_leaf = arm_smmu_tlb_inv_leaf, .tlb_add_page = arm_smmu_tlb_add_page, - .tlb_sync = arm_smmu_tlb_sync_vmid, }, .tlb_inv_range = arm_smmu_tlb_inv_vmid_nosync, + .tlb_sync = arm_smmu_tlb_sync_vmid, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -1387,7 +1388,7 @@ static void arm_smmu_iotlb_sync(struct iommu_domain *domain, if (smmu_domain->flush_ops) { arm_smmu_rpm_get(smmu); - smmu_domain->flush_ops->tlb.tlb_sync(smmu_domain); + smmu_domain->flush_ops->tlb_sync(smmu_domain); arm_smmu_rpm_put(smmu); } } diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index b3f975c95f76..203894fb6765 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -813,17 +813,11 @@ static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie) dummy_tlb_flush(iova, granule, granule, cookie); } -static void dummy_tlb_sync(void *cookie) -{ - WARN_ON(cookie != cfg_cookie); -} - static const struct iommu_flush_ops dummy_tlb_ops = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_flush_walk = dummy_tlb_flush, .tlb_flush_leaf = dummy_tlb_flush, .tlb_add_page = dummy_tlb_add_page, - .tlb_sync = dummy_tlb_sync, }; #define __FAIL(ops) ({ \ diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index a5c0db01533e..f35516744965 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -1080,17 +1080,11 @@ static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie) dummy_tlb_flush(iova, granule, granule, cookie); } -static void dummy_tlb_sync(void *cookie) -{ - WARN_ON(cookie != cfg_cookie); -} - static const struct iommu_flush_ops dummy_tlb_ops __initconst = { .tlb_flush_all = dummy_tlb_flush_all, .tlb_flush_walk = dummy_tlb_flush, .tlb_flush_leaf = dummy_tlb_flush, .tlb_add_page = dummy_tlb_add_page, - .tlb_sync = dummy_tlb_sync, }; static void __init arm_lpae_dump_ops(struct io_pgtable_ops *ops) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index c4da271af90e..a2b8eff4c1f7 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -371,7 +371,6 @@ static const struct iommu_flush_ops ipmmu_flush_ops = { .tlb_flush_all = ipmmu_tlb_flush_all, .tlb_flush_walk = ipmmu_tlb_flush, .tlb_flush_leaf = ipmmu_tlb_flush, - .tlb_sync = ipmmu_tlb_flush_all, }; /* ----------------------------------------------------------------------------- diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 2cd83295a841..ccfc7ed230ef 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -168,28 +168,16 @@ fail: return; } -static void __flush_iotlb_sync(void *cookie) -{ - /* - * Nothing is needed here, the barrier to guarantee - * completion of the tlb sync operation is implicitly - * taken care when the iommu client does a writel before - * kick starting the other master. - */ -} - static void __flush_iotlb_walk(unsigned long iova, size_t size, size_t granule, void *cookie) { __flush_iotlb_range(iova, size, granule, false, cookie); - __flush_iotlb_sync(cookie); } static void __flush_iotlb_leaf(unsigned long iova, size_t size, size_t granule, void *cookie) { __flush_iotlb_range(iova, size, granule, true, cookie); - __flush_iotlb_sync(cookie); } static void __flush_iotlb_page(unsigned long iova, size_t granule, void *cookie) @@ -202,7 +190,6 @@ static const struct iommu_flush_ops msm_iommu_flush_ops = { .tlb_flush_walk = __flush_iotlb_walk, .tlb_flush_leaf = __flush_iotlb_leaf, .tlb_add_page = __flush_iotlb_page, - .tlb_sync = __flush_iotlb_sync, }; static int msm_iommu_alloc_ctx(unsigned long *map, int start, int end) @@ -712,6 +699,13 @@ static struct iommu_ops msm_iommu_ops = { .detach_dev = msm_iommu_detach_dev, .map = msm_iommu_map, .unmap = msm_iommu_unmap, + /* + * Nothing is needed here, the barrier to guarantee + * completion of the tlb sync operation is implicitly + * taken care when the iommu client does a writel before + * kick starting the other master. + */ + .iotlb_sync = NULL, .iova_to_phys = msm_iommu_iova_to_phys, .add_device = msm_iommu_add_device, .remove_device = msm_iommu_remove_device, diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index a0b4b4dc4b90..3785750bdb44 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -213,7 +213,6 @@ static const struct iommu_flush_ops mtk_iommu_flush_ops = { .tlb_flush_walk = mtk_iommu_tlb_flush_walk, .tlb_flush_leaf = mtk_iommu_tlb_flush_leaf, .tlb_add_page = mtk_iommu_tlb_flush_page_nosync, - .tlb_sync = mtk_iommu_tlb_sync, }; static irqreturn_t mtk_iommu_isr(int irq, void *dev_id) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 7d8411dee4cf..0b8a6d6bb475 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -189,7 +189,6 @@ static const struct iommu_flush_ops qcom_flush_ops = { .tlb_flush_walk = qcom_iommu_tlb_flush_walk, .tlb_flush_leaf = qcom_iommu_tlb_flush_leaf, .tlb_add_page = qcom_iommu_tlb_add_page, - .tlb_sync = qcom_iommu_tlb_sync, }; static irqreturn_t qcom_iommu_fault(int irq, void *dev) diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 99e04bd2baa1..843310484fe2 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -30,9 +30,6 @@ enum io_pgtable_fmt { * for IOMMUs that cannot batch TLB invalidation operations * efficiently and are therefore better suited to issuing them * early rather than deferring them until iommu_tlb_sync(). - * @tlb_sync: Ensure any queued TLB invalidation has taken effect, and - * any corresponding page table updates are visible to the - * IOMMU. * * Note that these can all be called in atomic context and must therefore * not block. @@ -44,7 +41,6 @@ struct iommu_flush_ops { void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, void *cookie); void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie); - void (*tlb_sync)(void *cookie); }; /** @@ -218,11 +214,6 @@ io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova, iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie); } -static inline void io_pgtable_tlb_sync(struct io_pgtable *iop) -{ - iop->cfg.tlb->tlb_sync(iop->cookie); -} - /** * struct io_pgtable_init_fns - Alloc/free a set of page tables for a * particular format. From a2d3a382d6c682e22b263c9e7f0d857c3fa6c9d6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:44:58 +0100 Subject: [PATCH 12/13] iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->unmap() Update the io-pgtable ->unmap() function to take an iommu_iotlb_gather pointer as an argument, and update the callers as appropriate. Signed-off-by: Will Deacon --- drivers/gpu/drm/panfrost/panfrost_mmu.c | 2 +- drivers/iommu/arm-smmu-v3.c | 2 +- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/io-pgtable-arm-v7s.c | 6 +++--- drivers/iommu/io-pgtable-arm.c | 7 +++---- drivers/iommu/ipmmu-vmsa.c | 2 +- drivers/iommu/msm_iommu.c | 2 +- drivers/iommu/mtk_iommu.c | 2 +- drivers/iommu/qcom_iommu.c | 2 +- include/linux/io-pgtable.h | 4 +++- 10 files changed, 16 insertions(+), 15 deletions(-) diff --git a/drivers/gpu/drm/panfrost/panfrost_mmu.c b/drivers/gpu/drm/panfrost/panfrost_mmu.c index de22a2276e00..6e8145c36e93 100644 --- a/drivers/gpu/drm/panfrost/panfrost_mmu.c +++ b/drivers/gpu/drm/panfrost/panfrost_mmu.c @@ -222,7 +222,7 @@ void panfrost_mmu_unmap(struct panfrost_gem_object *bo) size_t unmapped_page; size_t pgsize = get_pgsize(iova, len - unmapped_len); - unmapped_page = ops->unmap(ops, iova, pgsize); + unmapped_page = ops->unmap(ops, iova, pgsize, NULL); if (!unmapped_page) break; diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 231093413ff9..8e2e53079f48 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2015,7 +2015,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, if (!ops) return 0; - ret = ops->unmap(ops, iova, size); + ret = ops->unmap(ops, iova, size, gather); if (ret && arm_smmu_atc_inv_domain(smmu_domain, 0, iova, size)) return 0; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 07a267c437d6..f6689956ab6e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1362,7 +1362,7 @@ static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, return 0; arm_smmu_rpm_get(smmu); - ret = ops->unmap(ops, iova, size); + ret = ops->unmap(ops, iova, size, gather); arm_smmu_rpm_put(smmu); return ret; diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 203894fb6765..a7776e982b6c 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -666,7 +666,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, } static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); @@ -892,7 +892,7 @@ static int __init arm_v7s_do_selftests(void) size = 1UL << __ffs(cfg.pgsize_bitmap); while (i < loopnr) { iova_start = i * SZ_16M; - if (ops->unmap(ops, iova_start + size, size) != size) + if (ops->unmap(ops, iova_start + size, size, NULL) != size) return __FAIL(ops); /* Remap of partial unmap */ @@ -910,7 +910,7 @@ static int __init arm_v7s_do_selftests(void) for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { size = 1UL << i; - if (ops->unmap(ops, iova, size) != size) + if (ops->unmap(ops, iova, size, NULL) != size) return __FAIL(ops); if (ops->iova_to_phys(ops, iova + 42)) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f35516744965..325430f8a0a1 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -642,7 +641,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, } static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, - size_t size) + size_t size, struct iommu_iotlb_gather *gather) { struct arm_lpae_io_pgtable *data = io_pgtable_ops_to_data(ops); arm_lpae_iopte *ptep = data->pgd; @@ -1167,7 +1166,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) /* Partial unmap */ size = 1UL << __ffs(cfg->pgsize_bitmap); - if (ops->unmap(ops, SZ_1G + size, size) != size) + if (ops->unmap(ops, SZ_1G + size, size, NULL) != size) return __FAIL(ops, i); /* Remap of partial unmap */ @@ -1182,7 +1181,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { size = 1UL << j; - if (ops->unmap(ops, iova, size) != size) + if (ops->unmap(ops, iova, size, NULL) != size) return __FAIL(ops, i); if (ops->iova_to_phys(ops, iova + 42)) diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index a2b8eff4c1f7..76a8ec343d53 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -737,7 +737,7 @@ static size_t ipmmu_unmap(struct iommu_domain *io_domain, unsigned long iova, { struct ipmmu_vmsa_domain *domain = to_vmsa_domain(io_domain); - return domain->iop->unmap(domain->iop, iova, size); + return domain->iop->unmap(domain->iop, iova, size, gather); } static void ipmmu_flush_iotlb_all(struct iommu_domain *io_domain) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index ccfc7ed230ef..8a0dcaf0a9e9 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -523,7 +523,7 @@ static size_t msm_iommu_unmap(struct iommu_domain *domain, unsigned long iova, unsigned long flags; spin_lock_irqsave(&priv->pgtlock, flags); - len = priv->iop->unmap(priv->iop, iova, len); + len = priv->iop->unmap(priv->iop, iova, len, gather); spin_unlock_irqrestore(&priv->pgtlock, flags); return len; diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index 3785750bdb44..b73cffd63262 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -400,7 +400,7 @@ static size_t mtk_iommu_unmap(struct iommu_domain *domain, size_t unmapsz; spin_lock_irqsave(&dom->pgtlock, flags); - unmapsz = dom->iop->unmap(dom->iop, iova, size); + unmapsz = dom->iop->unmap(dom->iop, iova, size, gather); spin_unlock_irqrestore(&dom->pgtlock, flags); return unmapsz; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 0b8a6d6bb475..48b288ef74b4 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -455,7 +455,7 @@ static size_t qcom_iommu_unmap(struct iommu_domain *domain, unsigned long iova, */ pm_runtime_get_sync(qcom_domain->iommu->dev); spin_lock_irqsave(&qcom_domain->pgtbl_lock, flags); - ret = ops->unmap(ops, iova, size); + ret = ops->unmap(ops, iova, size, gather); spin_unlock_irqrestore(&qcom_domain->pgtbl_lock, flags); pm_runtime_put_sync(qcom_domain->iommu->dev); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index 843310484fe2..fe27d93c8ad9 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -1,7 +1,9 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IO_PGTABLE_H #define __IO_PGTABLE_H + #include +#include /* * Public API for use by IOMMU drivers @@ -136,7 +138,7 @@ struct io_pgtable_ops { int (*map)(struct io_pgtable_ops *ops, unsigned long iova, phys_addr_t paddr, size_t size, int prot); size_t (*unmap)(struct io_pgtable_ops *ops, unsigned long iova, - size_t size); + size_t size, struct iommu_iotlb_gather *gather); phys_addr_t (*iova_to_phys)(struct io_pgtable_ops *ops, unsigned long iova); }; From 3951c41af4a65ba418e6b1b973d398552bedb84f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 2 Jul 2019 16:45:15 +0100 Subject: [PATCH 13/13] iommu/io-pgtable: Pass struct iommu_iotlb_gather to ->tlb_add_page() With all the pieces in place, we can finally propagate the iommu_iotlb_gather structure from the call to unmap() down to the IOMMU drivers' implementation of ->tlb_add_page(). Currently everybody ignores it, but the machinery is now there to defer invalidation. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 3 ++- drivers/iommu/arm-smmu.c | 3 ++- drivers/iommu/io-pgtable-arm-v7s.c | 23 ++++++++++++++--------- drivers/iommu/io-pgtable-arm.c | 22 ++++++++++++++-------- drivers/iommu/msm_iommu.c | 3 ++- drivers/iommu/mtk_iommu.c | 3 ++- drivers/iommu/qcom_iommu.c | 3 ++- include/linux/io-pgtable.h | 16 +++++++++------- 8 files changed, 47 insertions(+), 29 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 8e2e53079f48..d1ebc7103065 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1596,7 +1596,8 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); } -static void arm_smmu_tlb_inv_page_nosync(unsigned long iova, size_t granule, +static void arm_smmu_tlb_inv_page_nosync(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) { arm_smmu_tlb_inv_range_nosync(iova, granule, granule, true, cookie); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index f6689956ab6e..5598d0ff71a8 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -574,7 +574,8 @@ static void arm_smmu_tlb_inv_leaf(unsigned long iova, size_t size, ops->tlb_sync(cookie); } -static void arm_smmu_tlb_add_page(unsigned long iova, size_t granule, +static void arm_smmu_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index a7776e982b6c..18e7d212c7de 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -362,7 +362,8 @@ static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl) return false; } -static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long, +static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, + struct iommu_iotlb_gather *, unsigned long, size_t, int, arm_v7s_iopte *); static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, @@ -383,7 +384,7 @@ static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, size_t sz = ARM_V7S_BLOCK_SIZE(lvl); tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl); - if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz, + if (WARN_ON(__arm_v7s_unmap(data, NULL, iova + i * sz, sz, lvl, tblp) != sz)) return -EINVAL; } else if (ptep[i]) { @@ -545,6 +546,7 @@ static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, } static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, + struct iommu_iotlb_gather *gather, unsigned long iova, size_t size, arm_v7s_iopte blk_pte, arm_v7s_iopte *ptep) @@ -581,14 +583,15 @@ static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, return 0; tablep = iopte_deref(pte, 1); - return __arm_v7s_unmap(data, iova, size, 2, tablep); + return __arm_v7s_unmap(data, gather, iova, size, 2, tablep); } - io_pgtable_tlb_add_page(&data->iop, iova, size); + io_pgtable_tlb_add_page(&data->iop, gather, iova, size); return size; } static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, + struct iommu_iotlb_gather *gather, unsigned long iova, size_t size, int lvl, arm_v7s_iopte *ptep) { @@ -647,7 +650,7 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, */ smp_wmb(); } else { - io_pgtable_tlb_add_page(iop, iova, blk_size); + io_pgtable_tlb_add_page(iop, gather, iova, blk_size); } iova += blk_size; } @@ -657,12 +660,13 @@ static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, * Insert a table at the next level to map the old region, * minus the part we want to unmap */ - return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep); + return arm_v7s_split_blk_unmap(data, gather, iova, size, pte[0], + ptep); } /* Keep on walkin' */ ptep = iopte_deref(pte[0], lvl); - return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep); + return __arm_v7s_unmap(data, gather, iova, size, lvl + 1, ptep); } static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, @@ -673,7 +677,7 @@ static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(upper_32_bits(iova))) return 0; - return __arm_v7s_unmap(data, iova, size, 1, data->pgd); + return __arm_v7s_unmap(data, gather, iova, size, 1, data->pgd); } static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, @@ -808,7 +812,8 @@ static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule, WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } -static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie) +static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) { dummy_tlb_flush(iova, granule, granule, cookie); } diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 325430f8a0a1..4c91359057c5 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -289,6 +289,7 @@ static void __arm_lpae_set_pte(arm_lpae_iopte *ptep, arm_lpae_iopte pte, } static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, + struct iommu_iotlb_gather *gather, unsigned long iova, size_t size, int lvl, arm_lpae_iopte *ptep); @@ -334,8 +335,10 @@ static int arm_lpae_init_pte(struct arm_lpae_io_pgtable *data, size_t sz = ARM_LPAE_BLOCK_SIZE(lvl, data); tblp = ptep - ARM_LPAE_LVL_IDX(iova, lvl, data); - if (WARN_ON(__arm_lpae_unmap(data, iova, sz, lvl, tblp) != sz)) + if (__arm_lpae_unmap(data, NULL, iova, sz, lvl, tblp) != sz) { + WARN_ON(1); return -EINVAL; + } } __arm_lpae_init_pte(data, paddr, prot, lvl, ptep); @@ -536,6 +539,7 @@ static void arm_lpae_free_pgtable(struct io_pgtable *iop) } static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, + struct iommu_iotlb_gather *gather, unsigned long iova, size_t size, arm_lpae_iopte blk_pte, int lvl, arm_lpae_iopte *ptep) @@ -581,14 +585,15 @@ static size_t arm_lpae_split_blk_unmap(struct arm_lpae_io_pgtable *data, tablep = iopte_deref(pte, data); } else if (unmap_idx >= 0) { - io_pgtable_tlb_add_page(&data->iop, iova, size); + io_pgtable_tlb_add_page(&data->iop, gather, iova, size); return size; } - return __arm_lpae_unmap(data, iova, size, lvl, tablep); + return __arm_lpae_unmap(data, gather, iova, size, lvl, tablep); } static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, + struct iommu_iotlb_gather *gather, unsigned long iova, size_t size, int lvl, arm_lpae_iopte *ptep) { @@ -622,7 +627,7 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, */ smp_wmb(); } else { - io_pgtable_tlb_add_page(iop, iova, size); + io_pgtable_tlb_add_page(iop, gather, iova, size); } return size; @@ -631,13 +636,13 @@ static size_t __arm_lpae_unmap(struct arm_lpae_io_pgtable *data, * Insert a table at the next level to map the old region, * minus the part we want to unmap */ - return arm_lpae_split_blk_unmap(data, iova, size, pte, + return arm_lpae_split_blk_unmap(data, gather, iova, size, pte, lvl + 1, ptep); } /* Keep on walkin' */ ptep = iopte_deref(pte, data); - return __arm_lpae_unmap(data, iova, size, lvl + 1, ptep); + return __arm_lpae_unmap(data, gather, iova, size, lvl + 1, ptep); } static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, @@ -650,7 +655,7 @@ static size_t arm_lpae_unmap(struct io_pgtable_ops *ops, unsigned long iova, if (WARN_ON(iova >= (1ULL << data->iop.cfg.ias))) return 0; - return __arm_lpae_unmap(data, iova, size, lvl, ptep); + return __arm_lpae_unmap(data, gather, iova, size, lvl, ptep); } static phys_addr_t arm_lpae_iova_to_phys(struct io_pgtable_ops *ops, @@ -1074,7 +1079,8 @@ static void dummy_tlb_flush(unsigned long iova, size_t size, size_t granule, WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); } -static void dummy_tlb_add_page(unsigned long iova, size_t granule, void *cookie) +static void dummy_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) { dummy_tlb_flush(iova, granule, granule, cookie); } diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 8a0dcaf0a9e9..4c0be5b75c28 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -180,7 +180,8 @@ static void __flush_iotlb_leaf(unsigned long iova, size_t size, __flush_iotlb_range(iova, size, granule, true, cookie); } -static void __flush_iotlb_page(unsigned long iova, size_t granule, void *cookie) +static void __flush_iotlb_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) { __flush_iotlb_range(iova, granule, granule, true, cookie); } diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c index b73cffd63262..0827d51936fa 100644 --- a/drivers/iommu/mtk_iommu.c +++ b/drivers/iommu/mtk_iommu.c @@ -202,7 +202,8 @@ static void mtk_iommu_tlb_flush_leaf(unsigned long iova, size_t size, mtk_iommu_tlb_sync(cookie); } -static void mtk_iommu_tlb_flush_page_nosync(unsigned long iova, size_t granule, +static void mtk_iommu_tlb_flush_page_nosync(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) { mtk_iommu_tlb_add_flush_nosync(iova, granule, granule, true, cookie); diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 48b288ef74b4..eac760cdbb28 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -178,7 +178,8 @@ static void qcom_iommu_tlb_flush_leaf(unsigned long iova, size_t size, qcom_iommu_tlb_sync(cookie); } -static void qcom_iommu_tlb_add_page(unsigned long iova, size_t granule, +static void qcom_iommu_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie) { qcom_iommu_tlb_inv_range_nosync(iova, granule, granule, true, cookie); diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index fe27d93c8ad9..6b1b8be3ebec 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -28,10 +28,10 @@ enum io_pgtable_fmt { * @tlb_flush_leaf: Synchronously invalidate all leaf TLB state for a virtual * address range. * @tlb_add_page: Optional callback to queue up leaf TLB invalidation for a - * single page. This function exists purely as an optimisation - * for IOMMUs that cannot batch TLB invalidation operations - * efficiently and are therefore better suited to issuing them - * early rather than deferring them until iommu_tlb_sync(). + * single page. IOMMUs that cannot batch TLB invalidation + * operations efficiently will typically issue them here, but + * others may decide to update the iommu_iotlb_gather structure + * and defer the invalidation until iommu_tlb_sync() instead. * * Note that these can all be called in atomic context and must therefore * not block. @@ -42,7 +42,8 @@ struct iommu_flush_ops { void *cookie); void (*tlb_flush_leaf)(unsigned long iova, size_t size, size_t granule, void *cookie); - void (*tlb_add_page)(unsigned long iova, size_t granule, void *cookie); + void (*tlb_add_page)(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, void *cookie); }; /** @@ -209,11 +210,12 @@ io_pgtable_tlb_flush_leaf(struct io_pgtable *iop, unsigned long iova, } static inline void -io_pgtable_tlb_add_page(struct io_pgtable *iop, unsigned long iova, +io_pgtable_tlb_add_page(struct io_pgtable *iop, + struct iommu_iotlb_gather * gather, unsigned long iova, size_t granule) { if (iop->cfg.tlb->tlb_add_page) - iop->cfg.tlb->tlb_add_page(iova, granule, iop->cookie); + iop->cfg.tlb->tlb_add_page(gather, iova, granule, iop->cookie); } /**