From ba93c357229fd4666fe60f8d7ec97659882727c0 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 30 Dec 2018 16:53:15 +0100 Subject: [PATCH 01/60] iommu/msm: Reduce indentation Delete tab aligning a statement with the right hand side of a preceding assignment rather than the left hand side. Found with the help of Coccinelle. Signed-off-by: Julia Lawall Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index fc4270733f11..6dde3a9db92e 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -461,10 +461,10 @@ static int msm_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) master->num = msm_iommu_alloc_ctx(iommu->context_map, 0, iommu->ncb); - if (IS_ERR_VALUE(master->num)) { - ret = -ENODEV; - goto fail; - } + if (IS_ERR_VALUE(master->num)) { + ret = -ENODEV; + goto fail; + } config_mids(iommu, master); __program_context(iommu->base, master->num, priv); From bd3c2e66e4cb91ed9294b5da920deeb45db0c13a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Wed, 2 Jan 2019 23:16:57 +0200 Subject: [PATCH 02/60] iommu/iova: Allow compiling the library without IOMMU support Drivers such as the Intel IPU3 ImgU driver use the IOVA library to manage the device's own virtual address space while not implementing the IOMMU API. Currently the IOVA library is only compiled if the IOMMU support is enabled, resulting into a failure during linking due to missing symbols. Fix this by defining IOVA library Kconfig bits independently of IOMMU support configuration, and descending to the iommu directory unconditionally during the build. Signed-off-by: Sakari Ailus Signed-off-by: Joerg Roedel --- drivers/Makefile | 2 +- drivers/iommu/Kconfig | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/drivers/Makefile b/drivers/Makefile index e1ce029d28fd..04da7876032c 100644 --- a/drivers/Makefile +++ b/drivers/Makefile @@ -56,7 +56,7 @@ obj-y += tty/ obj-y += char/ # iommu/ comes before gpu as gpu are using iommu controllers -obj-$(CONFIG_IOMMU_SUPPORT) += iommu/ +obj-y += iommu/ # gpu/ comes after char for AGP vs DRM startup and after iommu obj-y += gpu/ diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d9a25715650e..d2c83e62873d 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -1,3 +1,7 @@ +# The IOVA library may also be used by non-IOMMU_API users +config IOMMU_IOVA + tristate + # IOMMU_API always gets selected by whoever wants it. config IOMMU_API bool @@ -81,9 +85,6 @@ config IOMMU_DEFAULT_PASSTHROUGH If unsure, say N here. -config IOMMU_IOVA - tristate - config OF_IOMMU def_bool y depends on OF && IOMMU_API From 5b438f4ba315db4f8c1489d175656798d58c014f Mon Sep 17 00:00:00 2001 From: Jacob Pan Date: Fri, 11 Jan 2019 13:04:57 +0800 Subject: [PATCH 03/60] iommu/vt-d: Support page request in scalable mode VT-d Rev3.0 has made a few changes to the page request interface, 1. widened PRQ descriptor from 128 bits to 256 bits; 2. removed streaming response type; 3. introduced private data that requires page response even the request is not last request in group (LPIG). This is a supplement to commit 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") and makes the svm code compliant with VT-d Rev3.0. Cc: Ashok Raj Cc: Liu Yi L Cc: Kevin Tian Signed-off-by: Jacob Pan Fixes: 1c4f88b7f1f92 ("iommu/vt-d: Shared virtual address in scalable mode") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 77 ++++++++++++++++++++++--------------- include/linux/intel-iommu.h | 21 +++++----- include/linux/intel-svm.h | 2 +- 3 files changed, 55 insertions(+), 45 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index a2a2aa4439aa..79add5716552 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -470,20 +470,31 @@ EXPORT_SYMBOL_GPL(intel_svm_is_pasid_valid); /* Page request queue descriptor */ struct page_req_dsc { - u64 srr:1; - u64 bof:1; - u64 pasid_present:1; - u64 lpig:1; - u64 pasid:20; - u64 bus:8; - u64 private:23; - u64 prg_index:9; - u64 rd_req:1; - u64 wr_req:1; - u64 exe_req:1; - u64 priv_req:1; - u64 devfn:8; - u64 addr:52; + union { + struct { + u64 type:8; + u64 pasid_present:1; + u64 priv_data_present:1; + u64 rsvd:6; + u64 rid:16; + u64 pasid:20; + u64 exe_req:1; + u64 pm_req:1; + u64 rsvd2:10; + }; + u64 qw_0; + }; + union { + struct { + u64 rd_req:1; + u64 wr_req:1; + u64 lpig:1; + u64 prg_index:9; + u64 addr:52; + }; + u64 qw_1; + }; + u64 priv_data[2]; }; #define PRQ_RING_MASK ((0x1000 << PRQ_ORDER) - 0x10) @@ -596,7 +607,7 @@ static irqreturn_t prq_event_thread(int irq, void *d) /* Accounting for major/minor faults? */ rcu_read_lock(); list_for_each_entry_rcu(sdev, &svm->devs, list) { - if (sdev->sid == PCI_DEVID(req->bus, req->devfn)) + if (sdev->sid == req->rid) break; } /* Other devices can go away, but the drivers are not permitted @@ -609,33 +620,35 @@ static irqreturn_t prq_event_thread(int irq, void *d) if (sdev && sdev->ops && sdev->ops->fault_cb) { int rwxp = (req->rd_req << 3) | (req->wr_req << 2) | - (req->exe_req << 1) | (req->priv_req); - sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, req->private, rwxp, result); + (req->exe_req << 1) | (req->pm_req); + sdev->ops->fault_cb(sdev->dev, req->pasid, req->addr, + req->priv_data, rwxp, result); } /* We get here in the error case where the PASID lookup failed, and these can be NULL. Do not use them below this point! */ sdev = NULL; svm = NULL; no_pasid: - if (req->lpig) { - /* Page Group Response */ + if (req->lpig || req->priv_data_present) { + /* + * Per VT-d spec. v3.0 ch7.7, system software must + * respond with page group response if private data + * is present (PDP) or last page in group (LPIG) bit + * is set. This is an additional VT-d feature beyond + * PCI ATS spec. + */ resp.qw0 = QI_PGRP_PASID(req->pasid) | - QI_PGRP_DID((req->bus << 8) | req->devfn) | + QI_PGRP_DID(req->rid) | QI_PGRP_PASID_P(req->pasid_present) | + QI_PGRP_PDP(req->pasid_present) | + QI_PGRP_RESP_CODE(result) | QI_PGRP_RESP_TYPE; resp.qw1 = QI_PGRP_IDX(req->prg_index) | - QI_PGRP_PRIV(req->private) | - QI_PGRP_RESP_CODE(result); - } else if (req->srr) { - /* Page Stream Response */ - resp.qw0 = QI_PSTRM_IDX(req->prg_index) | - QI_PSTRM_PRIV(req->private) | - QI_PSTRM_BUS(req->bus) | - QI_PSTRM_PASID(req->pasid) | - QI_PSTRM_RESP_TYPE; - resp.qw1 = QI_PSTRM_ADDR(address) | - QI_PSTRM_DEVFN(req->devfn) | - QI_PSTRM_RESP_CODE(result); + QI_PGRP_LPIG(req->lpig); + + if (req->priv_data_present) + memcpy(&resp.qw2, req->priv_data, + sizeof(req->priv_data)); } resp.qw2 = 0; resp.qw3 = 0; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0605f3bf6e79..fa364de9db18 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -374,20 +374,17 @@ enum { #define QI_DEV_EIOTLB_PFSID(pfsid) (((u64)(pfsid & 0xf) << 12) | ((u64)(pfsid & 0xfff) << 52)) #define QI_DEV_EIOTLB_MAX_INVS 32 -#define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) -#define QI_PGRP_PRIV(priv) (((u64)(priv)) << 32) -#define QI_PGRP_RESP_CODE(res) ((u64)(res)) -#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) -#define QI_PGRP_DID(did) (((u64)(did)) << 16) +/* Page group response descriptor QW0 */ #define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) +#define QI_PGRP_PDP(p) (((u64)(p)) << 5) +#define QI_PGRP_RESP_CODE(res) (((u64)(res)) << 12) +#define QI_PGRP_DID(rid) (((u64)(rid)) << 16) +#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) + +/* Page group response descriptor QW1 */ +#define QI_PGRP_LPIG(x) (((u64)(x)) << 2) +#define QI_PGRP_IDX(idx) (((u64)(idx)) << 3) -#define QI_PSTRM_ADDR(addr) (((u64)(addr)) & VTD_PAGE_MASK) -#define QI_PSTRM_DEVFN(devfn) (((u64)(devfn)) << 4) -#define QI_PSTRM_RESP_CODE(res) ((u64)(res)) -#define QI_PSTRM_IDX(idx) (((u64)(idx)) << 55) -#define QI_PSTRM_PRIV(priv) (((u64)(priv)) << 32) -#define QI_PSTRM_BUS(bus) (((u64)(bus)) << 24) -#define QI_PSTRM_PASID(pasid) (((u64)(pasid)) << 4) #define QI_RESP_SUCCESS 0x0 #define QI_RESP_INVALID 0x1 diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 99bc5b3ae26e..e3f76315ca4d 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -20,7 +20,7 @@ struct device; struct svm_dev_ops { void (*fault_cb)(struct device *dev, int pasid, u64 address, - u32 private, int rwxp, int response); + void *private, int rwxp, int response); }; /* Values for rxwp in fault_cb callback */ From 4f821c1002cb236f14d0b4572b0b98fe800178a8 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:44 +0300 Subject: [PATCH 04/60] iommu/tegra: gart: Remove pr_fmt and clean up includes Remove unneeded headers inclusion and sort the headers in alphabet order. Remove pr_fmt macro since there is no pr_*() in the code and it doesn't affect dev_*() functions. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index da6a4e357b2b..ff75cf60117b 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -19,22 +19,15 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ -#define pr_fmt(fmt) "%s(): " fmt, __func__ - #include -#include -#include -#include -#include -#include -#include -#include -#include #include #include -#include - -#include +#include +#include +#include +#include +#include +#include /* bitmap of the page sizes currently supported */ #define GART_IOMMU_PGSIZES (SZ_4K) From ae95c46dbed56af0e1d106865801bf06e1762c2c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:45 +0300 Subject: [PATCH 05/60] iommu/tegra: gart: Clean up driver probe errors handling Properly clean up allocated resources on the drivers probe failure and remove unneeded checks. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index ff75cf60117b..1cd470b2beea 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -411,9 +411,6 @@ static int tegra_gart_probe(struct platform_device *pdev) struct device *dev = &pdev->dev; int ret; - if (gart_handle) - return -EIO; - BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT); /* the GART memory aperture is required */ @@ -448,8 +445,7 @@ static int tegra_gart_probe(struct platform_device *pdev) ret = iommu_device_register(&gart->iommu); if (ret) { dev_err(dev, "Failed to register IOMMU\n"); - iommu_device_sysfs_remove(&gart->iommu); - return ret; + goto remove_sysfs; } gart->dev = &pdev->dev; @@ -463,7 +459,8 @@ static int tegra_gart_probe(struct platform_device *pdev) gart->savedata = vmalloc(array_size(sizeof(u32), gart->page_count)); if (!gart->savedata) { dev_err(dev, "failed to allocate context save area\n"); - return -ENOMEM; + ret = -ENOMEM; + goto unregister_iommu; } platform_set_drvdata(pdev, gart); @@ -472,6 +469,13 @@ static int tegra_gart_probe(struct platform_device *pdev) gart_handle = gart; return 0; + +unregister_iommu: + iommu_device_unregister(&gart->iommu); +remove_sysfs: + iommu_device_sysfs_remove(&gart->iommu); + + return ret; } static const struct dev_pm_ops tegra_gart_pm_ops = { From 4b6f0ea384f4a39c07fad556a9b4d619759de6c5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:46 +0300 Subject: [PATCH 06/60] iommu/tegra: gart: Ignore devices without IOMMU phandle in DT GART can't handle all devices, hence ignore devices that aren't related to GART. IOMMU phandle must be explicitly assign to devices in the device tree. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 1cd470b2beea..37a76388ff7e 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -345,8 +345,12 @@ static bool gart_iommu_capable(enum iommu_cap cap) static int gart_iommu_add_device(struct device *dev) { - struct iommu_group *group = iommu_group_get_for_dev(dev); + struct iommu_group *group; + if (!dev->iommu_fwspec) + return -ENODEV; + + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); @@ -363,6 +367,12 @@ static void gart_iommu_remove_device(struct device *dev) iommu_device_unlink(&gart_handle->iommu, dev); } +static int gart_iommu_of_xlate(struct device *dev, + struct of_phandle_args *args) +{ + return 0; +} + static const struct iommu_ops gart_iommu_ops = { .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, @@ -376,6 +386,7 @@ static const struct iommu_ops gart_iommu_ops = { .unmap = gart_iommu_unmap, .iova_to_phys = gart_iommu_iova_to_phys, .pgsize_bitmap = GART_IOMMU_PGSIZES, + .of_xlate = gart_iommu_of_xlate, }; static int tegra_gart_suspend(struct device *dev) @@ -441,6 +452,7 @@ static int tegra_gart_probe(struct platform_device *pdev) } iommu_device_set_ops(&gart->iommu, &gart_iommu_ops); + iommu_device_set_fwnode(&gart->iommu, dev->fwnode); ret = iommu_device_register(&gart->iommu); if (ret) { From 1d7ae53b152dbc5ba0a4f6a83ecc42ac66f52d11 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:47 +0300 Subject: [PATCH 07/60] iommu: Introduce iotlb_sync_map callback Introduce iotlb_sync_map() callback that is invoked in the end of iommu_map(). This new callback allows IOMMU drivers to avoid syncing after mapping of each contiguous chunk and sync only when the whole mapping is completed, optimizing performance of the mapping operation. Signed-off-by: Dmitry Osipenko Reviewed-by: Robin Murphy Reviewed-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 ++++++-- include/linux/iommu.h | 1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3ed4db334341..ed0e63f2cd9b 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1585,13 +1585,14 @@ static size_t iommu_pgsize(struct iommu_domain *domain, int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot) { + const struct iommu_ops *ops = domain->ops; unsigned long orig_iova = iova; unsigned int min_pagesz; size_t orig_size = size; phys_addr_t orig_paddr = paddr; int ret = 0; - if (unlikely(domain->ops->map == NULL || + if (unlikely(ops->map == NULL || domain->pgsize_bitmap == 0UL)) return -ENODEV; @@ -1620,7 +1621,7 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, pr_debug("mapping: iova 0x%lx pa %pa pgsize 0x%zx\n", iova, &paddr, pgsize); - ret = domain->ops->map(domain, iova, paddr, pgsize, prot); + ret = ops->map(domain, iova, paddr, pgsize, prot); if (ret) break; @@ -1629,6 +1630,9 @@ int iommu_map(struct iommu_domain *domain, unsigned long iova, size -= pgsize; } + if (ops->iotlb_sync_map) + ops->iotlb_sync_map(domain); + /* unroll mapping in case something went wrong */ if (ret) iommu_unmap(domain, orig_iova, orig_size - size); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e90da6b6f3d1..477ef47c357c 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -201,6 +201,7 @@ struct iommu_ops { void (*flush_iotlb_all)(struct iommu_domain *domain); void (*iotlb_range_add)(struct iommu_domain *domain, unsigned long iova, size_t size); + void (*iotlb_sync_map)(struct iommu_domain *domain); void (*iotlb_sync)(struct iommu_domain *domain); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); From 2fc0ac180d0f160cdd5fc69407ca7eb47b3eff99 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:48 +0300 Subject: [PATCH 08/60] iommu/tegra: gart: Optimize mapping / unmapping performance Currently GART writes one page entry at a time. More optimal would be to aggregate the writes and flush BUS buffer in the end, this gives map/unmap 10-40% performance boost (depending on size of mapping) in comparison to flushing after each page entry update. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 37a76388ff7e..835fea461c59 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -290,7 +290,6 @@ static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, } } gart_set_pte(gart, iova, GART_PTE(pfn)); - FLUSH_GART_REGS(gart); spin_unlock_irqrestore(&gart->pte_lock, flags); return 0; } @@ -307,7 +306,6 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, spin_lock_irqsave(&gart->pte_lock, flags); gart_set_pte(gart, iova, 0); - FLUSH_GART_REGS(gart); spin_unlock_irqrestore(&gart->pte_lock, flags); return bytes; } @@ -373,6 +371,14 @@ static int gart_iommu_of_xlate(struct device *dev, return 0; } +static void gart_iommu_sync(struct iommu_domain *domain) +{ + struct gart_domain *gart_domain = to_gart_domain(domain); + struct gart_device *gart = gart_domain->gart; + + FLUSH_GART_REGS(gart); +} + static const struct iommu_ops gart_iommu_ops = { .capable = gart_iommu_capable, .domain_alloc = gart_iommu_domain_alloc, @@ -387,6 +393,8 @@ static const struct iommu_ops gart_iommu_ops = { .iova_to_phys = gart_iommu_iova_to_phys, .pgsize_bitmap = GART_IOMMU_PGSIZES, .of_xlate = gart_iommu_of_xlate, + .iotlb_sync_map = gart_iommu_sync, + .iotlb_sync = gart_iommu_sync, }; static int tegra_gart_suspend(struct device *dev) From 53f986accf34d4428b90b26086b9571ea653b920 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:49 +0300 Subject: [PATCH 09/60] dt-bindings: memory: tegra: Squash tegra20-gart into tegra20-mc Splitting GART and Memory Controller wasn't a good decision that was made back in the day. Given that the GART driver wasn't ever been used by anything in the kernel, we decided that it will be better to correct the mistakes of the past and merge two bindings into a single one. As a result there is a DT ABI change for the Memory Controller that allows not to break newer kernels using older DT and not to break older kernels using newer DT, that is done by changing the 'compatible' of the node to 'tegra20-mc-gart' and adding a new-required clock property. The new clock property also puts the tegra20-mc binding in line with the bindings of the later Tegra generations. Signed-off-by: Dmitry Osipenko Reviewed-by: Rob Herring Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- .../bindings/iommu/nvidia,tegra20-gart.txt | 14 ---------- .../memory-controllers/nvidia,tegra20-mc.txt | 27 +++++++++++++------ 2 files changed, 19 insertions(+), 22 deletions(-) delete mode 100644 Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt diff --git a/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt b/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt deleted file mode 100644 index 099d9362ebc1..000000000000 --- a/Documentation/devicetree/bindings/iommu/nvidia,tegra20-gart.txt +++ /dev/null @@ -1,14 +0,0 @@ -NVIDIA Tegra 20 GART - -Required properties: -- compatible: "nvidia,tegra20-gart" -- reg: Two pairs of cells specifying the physical address and size of - the memory controller registers and the GART aperture respectively. - -Example: - - gart { - compatible = "nvidia,tegra20-gart"; - reg = <0x7000f024 0x00000018 /* controller registers */ - 0x58000000 0x02000000>; /* GART aperture */ - }; diff --git a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt index 7d60a50a4fa1..e55328237df4 100644 --- a/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt +++ b/Documentation/devicetree/bindings/memory-controllers/nvidia,tegra20-mc.txt @@ -1,26 +1,37 @@ NVIDIA Tegra20 MC(Memory Controller) Required properties: -- compatible : "nvidia,tegra20-mc" -- reg : Should contain 2 register ranges(address and length); see the - example below. Note that the MC registers are interleaved with the - GART registers, and hence must be represented as multiple ranges. +- compatible : "nvidia,tegra20-mc-gart" +- reg : Should contain 2 register ranges: physical base address and length of + the controller's registers and the GART aperture respectively. +- clocks: Must contain an entry for each entry in clock-names. + See ../clocks/clock-bindings.txt for details. +- clock-names: Must include the following entries: + - mc: the module's clock input - interrupts : Should contain MC General interrupt. - #reset-cells : Should be 1. This cell represents memory client module ID. The assignments may be found in header file or in the TRM documentation. +- #iommu-cells: Should be 0. This cell represents the number of cells in an + IOMMU specifier needed to encode an address. GART supports only a single + address space that is shared by all devices, therefore no additional + information needed for the address encoding. Example: mc: memory-controller@7000f000 { - compatible = "nvidia,tegra20-mc"; - reg = <0x7000f000 0x024 - 0x7000f03c 0x3c4>; - interrupts = <0 77 0x04>; + compatible = "nvidia,tegra20-mc-gart"; + reg = <0x7000f000 0x400 /* controller registers */ + 0x58000000 0x02000000>; /* GART aperture */ + clocks = <&tegra_car TEGRA20_CLK_MC>; + clock-names = "mc"; + interrupts = ; #reset-cells = <1>; + #iommu-cells = <0>; }; video-codec@6001a000 { compatible = "nvidia,tegra20-vde"; ... resets = <&mc TEGRA20_MC_RESET_VDE>; + iommus = <&mc>; }; From 334175243cc6b97aa7e846e623cc500e1d56a39c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:50 +0300 Subject: [PATCH 10/60] ARM: dts: tegra20: Update Memory Controller node to the new binding Device tree binding of Memory Controller has been changed: GART has been squashed into the MC, there are a new mandatory clock and #iommu-cells properties, the compatible has been changed to 'tegra20-mc-gart'. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- arch/arm/boot/dts/tegra20.dtsi | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/arch/arm/boot/dts/tegra20.dtsi b/arch/arm/boot/dts/tegra20.dtsi index dcad6d6128cf..8c942e60703e 100644 --- a/arch/arm/boot/dts/tegra20.dtsi +++ b/arch/arm/boot/dts/tegra20.dtsi @@ -616,17 +616,14 @@ }; mc: memory-controller@7000f000 { - compatible = "nvidia,tegra20-mc"; - reg = <0x7000f000 0x024 - 0x7000f03c 0x3c4>; + compatible = "nvidia,tegra20-mc-gart"; + reg = <0x7000f000 0x400 /* controller registers */ + 0x58000000 0x02000000>; /* GART aperture */ + clocks = <&tegra_car TEGRA20_CLK_MC>; + clock-names = "mc"; interrupts = ; #reset-cells = <1>; - }; - - iommu@7000f024 { - compatible = "nvidia,tegra20-gart"; - reg = <0x7000f024 0x00000018 /* controller registers */ - 0x58000000 0x02000000>; /* GART aperture */ + #iommu-cells = <0>; }; memory-controller@7000f400 { From be4dbdec2bab8635c7a41573668624ee13d83022 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:51 +0300 Subject: [PATCH 11/60] memory: tegra: Don't invoke Tegra30+ specific memory timing setup on Tegra20 This fixes irrelevant "tegra-mc 7000f000.memory-controller: no memory timings for RAM code 0 registered" warning message during of kernels boot-up on Tegra20. Fixes: a8d502fd3348 ("memory: tegra: Squash tegra20-mc into common tegra-mc driver") Signed-off-by: Dmitry Osipenko Acked-by: Jon Hunter Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/memory/tegra/mc.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 24afc36833bf..b99f3c620f6c 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -664,12 +664,13 @@ static int tegra_mc_probe(struct platform_device *pdev) } isr = tegra_mc_irq; - } - err = tegra_mc_setup_timings(mc); - if (err < 0) { - dev_err(&pdev->dev, "failed to setup timings: %d\n", err); - return err; + err = tegra_mc_setup_timings(mc); + if (err < 0) { + dev_err(&pdev->dev, "failed to setup timings: %d\n", + err); + return err; + } } mc->irq = platform_get_irq(pdev, 0); From 96efa118c03648fdc76acad9ca8fe018a6be7145 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:52 +0300 Subject: [PATCH 12/60] memory: tegra: Adapt to Tegra20 device-tree binding changes The tegra20-mc device-tree binding has been changed, GART has been squashed into Memory Controller and now the clock property is mandatory for Tegra20, the DT compatible has been changed as well. Adapt driver to the DT changes. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/memory/tegra/mc.c | 21 ++++++++------------- drivers/memory/tegra/mc.h | 6 ------ include/soc/tegra/mc.h | 2 +- 3 files changed, 9 insertions(+), 20 deletions(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index b99f3c620f6c..59db13287b47 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -51,7 +51,7 @@ static const struct of_device_id tegra_mc_of_match[] = { #ifdef CONFIG_ARCH_TEGRA_2x_SOC - { .compatible = "nvidia,tegra20-mc", .data = &tegra20_mc_soc }, + { .compatible = "nvidia,tegra20-mc-gart", .data = &tegra20_mc_soc }, #endif #ifdef CONFIG_ARCH_TEGRA_3x_SOC { .compatible = "nvidia,tegra30-mc", .data = &tegra30_mc_soc }, @@ -638,24 +638,19 @@ static int tegra_mc_probe(struct platform_device *pdev) if (IS_ERR(mc->regs)) return PTR_ERR(mc->regs); + mc->clk = devm_clk_get(&pdev->dev, "mc"); + if (IS_ERR(mc->clk)) { + dev_err(&pdev->dev, "failed to get MC clock: %ld\n", + PTR_ERR(mc->clk)); + return PTR_ERR(mc->clk); + } + #ifdef CONFIG_ARCH_TEGRA_2x_SOC if (mc->soc == &tegra20_mc_soc) { - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - mc->regs2 = devm_ioremap_resource(&pdev->dev, res); - if (IS_ERR(mc->regs2)) - return PTR_ERR(mc->regs2); - isr = tegra20_mc_irq; } else #endif { - mc->clk = devm_clk_get(&pdev->dev, "mc"); - if (IS_ERR(mc->clk)) { - dev_err(&pdev->dev, "failed to get MC clock: %ld\n", - PTR_ERR(mc->clk)); - return PTR_ERR(mc->clk); - } - err = tegra_mc_setup_latency_allowance(mc); if (err < 0) { dev_err(&pdev->dev, "failed to setup latency allowance: %d\n", diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h index 01065f12ebeb..9856f085e487 100644 --- a/drivers/memory/tegra/mc.h +++ b/drivers/memory/tegra/mc.h @@ -26,18 +26,12 @@ static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset) { - if (mc->regs2 && offset >= 0x24) - return readl(mc->regs2 + offset - 0x3c); - return readl(mc->regs + offset); } static inline void mc_writel(struct tegra_mc *mc, u32 value, unsigned long offset) { - if (mc->regs2 && offset >= 0x24) - return writel(value, mc->regs2 + offset - 0x3c); - writel(value, mc->regs + offset); } diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index b43f37fea096..db5bfdf589b4 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -144,7 +144,7 @@ struct tegra_mc_soc { struct tegra_mc { struct device *dev; struct tegra_smmu *smmu; - void __iomem *regs, *regs2; + void __iomem *regs; struct clk *clk; int irq; From b3bb6b858f2a60fe3ac0c3833084386f7dd420e4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:53 +0300 Subject: [PATCH 13/60] memory: tegra: Read client ID on GART page fault With the device tree binding changes, now Memory Controller has access to GART registers. Hence it is now possible to read client ID on GART page fault to get information about what memory client causes the fault. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/memory/tegra/mc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 59db13287b47..ce8cf81b55d7 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -38,6 +38,7 @@ #define MC_ERR_ADR 0x0c +#define MC_GART_ERROR_REQ 0x30 #define MC_DECERR_EMEM_OTHERS_STATUS 0x58 #define MC_SECURITY_VIOLATION_STATUS 0x74 @@ -575,8 +576,15 @@ static __maybe_unused irqreturn_t tegra20_mc_irq(int irq, void *data) break; case MC_INT_INVALID_GART_PAGE: - dev_err_ratelimited(mc->dev, "%s\n", error); - continue; + reg = MC_GART_ERROR_REQ; + value = mc_readl(mc, reg); + + id = (value >> 1) & mc->soc->client_id_mask; + desc = error_names[2]; + + if (value & BIT(0)) + direction = "write"; + break; case MC_INT_SECURITY_VIOLATION: reg = MC_SECURITY_VIOLATION_STATUS; From 59cd046f7f94543540d82216dbbfd8146db43b70 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:54 +0300 Subject: [PATCH 14/60] memory: tegra: Use of_device_get_match_data() There is no need to match device with the DT node since it was already matched, use of_device_get_match_data() helper to get the match-data. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/memory/tegra/mc.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index ce8cf81b55d7..55ecfb2d8cfd 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -619,23 +620,18 @@ static __maybe_unused irqreturn_t tegra20_mc_irq(int irq, void *data) static int tegra_mc_probe(struct platform_device *pdev) { - const struct of_device_id *match; struct resource *res; struct tegra_mc *mc; void *isr; int err; - match = of_match_node(tegra_mc_of_match, pdev->dev.of_node); - if (!match) - return -ENODEV; - mc = devm_kzalloc(&pdev->dev, sizeof(*mc), GFP_KERNEL); if (!mc) return -ENOMEM; platform_set_drvdata(pdev, mc); spin_lock_init(&mc->lock); - mc->soc = match->data; + mc->soc = of_device_get_match_data(&pdev->dev); mc->dev = &pdev->dev; /* length of MC tick in nanoseconds */ From 45594c683ef780f20f11b1e0018b933b6ff5d9a1 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:55 +0300 Subject: [PATCH 15/60] memory: tegra: Use relaxed versions of readl/writel There is no need for inserting of memory barriers to access registers of Memory Controller. Hence use the relaxed versions of the accessors. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/memory/tegra/mc.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/memory/tegra/mc.h b/drivers/memory/tegra/mc.h index 9856f085e487..887a3b07334f 100644 --- a/drivers/memory/tegra/mc.h +++ b/drivers/memory/tegra/mc.h @@ -26,13 +26,13 @@ static inline u32 mc_readl(struct tegra_mc *mc, unsigned long offset) { - return readl(mc->regs + offset); + return readl_relaxed(mc->regs + offset); } static inline void mc_writel(struct tegra_mc *mc, u32 value, unsigned long offset) { - writel(value, mc->regs + offset); + writel_relaxed(value, mc->regs + offset); } extern const struct tegra_mc_reset_ops terga_mc_reset_ops_common; From ce2785a75dbca27375f3723f4e697a2a8dc096ee Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:56 +0300 Subject: [PATCH 16/60] iommu/tegra: gart: Integrate with Memory Controller driver The device-tree binding has been changed. There is no separate GART device anymore, it is squashed into the Memory Controller. Integrate GART module with the MC in a way it is done for the SMMU on Tegra30+. Signed-off-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 1 + drivers/iommu/tegra-gart.c | 71 ++++++++++---------------------------- drivers/memory/tegra/mc.c | 43 +++++++++++++++++++++++ include/soc/tegra/mc.h | 25 ++++++++++++++ 4 files changed, 87 insertions(+), 53 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d9a25715650e..83c099bb7288 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -282,6 +282,7 @@ config ROCKCHIP_IOMMU config TEGRA_IOMMU_GART bool "Tegra GART IOMMU Support" depends on ARCH_TEGRA_2x_SOC + depends on TEGRA_MC select IOMMU_API help Enables support for remapping discontiguous physical memory diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 835fea461c59..b35ffa312a83 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -19,16 +19,17 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ -#include #include #include #include #include -#include +#include #include #include #include +#include + /* bitmap of the page sizes currently supported */ #define GART_IOMMU_PGSIZES (SZ_4K) @@ -397,9 +398,8 @@ static const struct iommu_ops gart_iommu_ops = { .iotlb_sync = gart_iommu_sync, }; -static int tegra_gart_suspend(struct device *dev) +int tegra_gart_suspend(struct gart_device *gart) { - struct gart_device *gart = dev_get_drvdata(dev); unsigned long iova; u32 *data = gart->savedata; unsigned long flags; @@ -411,9 +411,8 @@ static int tegra_gart_suspend(struct device *dev) return 0; } -static int tegra_gart_resume(struct device *dev) +int tegra_gart_resume(struct gart_device *gart) { - struct gart_device *gart = dev_get_drvdata(dev); unsigned long flags; spin_lock_irqsave(&gart->pte_lock, flags); @@ -422,41 +421,33 @@ static int tegra_gart_resume(struct device *dev) return 0; } -static int tegra_gart_probe(struct platform_device *pdev) +struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) { struct gart_device *gart; - struct resource *res, *res_remap; + struct resource *res_remap; void __iomem *gart_regs; - struct device *dev = &pdev->dev; int ret; BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT); /* the GART memory aperture is required */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - res_remap = platform_get_resource(pdev, IORESOURCE_MEM, 1); - if (!res || !res_remap) { + res_remap = platform_get_resource(to_platform_device(dev), + IORESOURCE_MEM, 1); + if (!res_remap) { dev_err(dev, "GART memory aperture expected\n"); - return -ENXIO; + return ERR_PTR(-ENXIO); } gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL); if (!gart) { dev_err(dev, "failed to allocate gart_device\n"); - return -ENOMEM; + return ERR_PTR(-ENOMEM); } - gart_regs = devm_ioremap(dev, res->start, resource_size(res)); - if (!gart_regs) { - dev_err(dev, "failed to remap GART registers\n"); - return -ENXIO; - } - - ret = iommu_device_sysfs_add(&gart->iommu, &pdev->dev, NULL, - dev_name(&pdev->dev)); + ret = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart"); if (ret) { dev_err(dev, "Failed to register IOMMU in sysfs\n"); - return ret; + return ERR_PTR(ret); } iommu_device_set_ops(&gart->iommu, &gart_iommu_ops); @@ -468,7 +459,8 @@ static int tegra_gart_probe(struct platform_device *pdev) goto remove_sysfs; } - gart->dev = &pdev->dev; + gart->dev = dev; + gart_regs = mc->regs + GART_REG_BASE; spin_lock_init(&gart->pte_lock); spin_lock_init(&gart->client_lock); INIT_LIST_HEAD(&gart->client); @@ -483,46 +475,19 @@ static int tegra_gart_probe(struct platform_device *pdev) goto unregister_iommu; } - platform_set_drvdata(pdev, gart); do_gart_setup(gart, NULL); gart_handle = gart; - return 0; + return gart; unregister_iommu: iommu_device_unregister(&gart->iommu); remove_sysfs: iommu_device_sysfs_remove(&gart->iommu); - return ret; + return ERR_PTR(ret); } -static const struct dev_pm_ops tegra_gart_pm_ops = { - .suspend = tegra_gart_suspend, - .resume = tegra_gart_resume, -}; - -static const struct of_device_id tegra_gart_of_match[] = { - { .compatible = "nvidia,tegra20-gart", }, - { }, -}; - -static struct platform_driver tegra_gart_driver = { - .probe = tegra_gart_probe, - .driver = { - .name = "tegra-gart", - .pm = &tegra_gart_pm_ops, - .of_match_table = tegra_gart_of_match, - .suppress_bind_attrs = true, - }, -}; - -static int __init tegra_gart_init(void) -{ - return platform_driver_register(&tegra_gart_driver); -} -subsys_initcall(tegra_gart_init); - module_param(gart_debug, bool, 0644); MODULE_PARM_DESC(gart_debug, "Enable GART debugging"); diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 55ecfb2d8cfd..e684e234361a 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -702,13 +702,56 @@ static int tegra_mc_probe(struct platform_device *pdev) PTR_ERR(mc->smmu)); } + if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && !mc->soc->smmu) { + mc->gart = tegra_gart_probe(&pdev->dev, mc); + if (IS_ERR(mc->gart)) { + dev_err(&pdev->dev, "failed to probe GART: %ld\n", + PTR_ERR(mc->gart)); + mc->gart = NULL; + } + } + return 0; } +static int tegra_mc_suspend(struct device *dev) +{ + struct tegra_mc *mc = dev_get_drvdata(dev); + int err; + + if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) { + err = tegra_gart_suspend(mc->gart); + if (err) + return err; + } + + return 0; +} + +static int tegra_mc_resume(struct device *dev) +{ + struct tegra_mc *mc = dev_get_drvdata(dev); + int err; + + if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && mc->gart) { + err = tegra_gart_resume(mc->gart); + if (err) + return err; + } + + return 0; +} + +static const struct dev_pm_ops tegra_mc_pm_ops = { + .suspend = tegra_mc_suspend, + .resume = tegra_mc_resume, +}; + static struct platform_driver tegra_mc_driver = { .driver = { .name = "tegra-mc", .of_match_table = tegra_mc_of_match, + .pm = &tegra_mc_pm_ops, .suppress_bind_attrs = true, }, .prevent_deferred_probe = true, diff --git a/include/soc/tegra/mc.h b/include/soc/tegra/mc.h index db5bfdf589b4..e489a028ec9f 100644 --- a/include/soc/tegra/mc.h +++ b/include/soc/tegra/mc.h @@ -9,6 +9,7 @@ #ifndef __SOC_TEGRA_MC_H__ #define __SOC_TEGRA_MC_H__ +#include #include #include @@ -77,6 +78,7 @@ struct tegra_smmu_soc { struct tegra_mc; struct tegra_smmu; +struct gart_device; #ifdef CONFIG_TEGRA_IOMMU_SMMU struct tegra_smmu *tegra_smmu_probe(struct device *dev, @@ -96,6 +98,28 @@ static inline void tegra_smmu_remove(struct tegra_smmu *smmu) } #endif +#ifdef CONFIG_TEGRA_IOMMU_GART +struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc); +int tegra_gart_suspend(struct gart_device *gart); +int tegra_gart_resume(struct gart_device *gart); +#else +static inline struct gart_device * +tegra_gart_probe(struct device *dev, struct tegra_mc *mc) +{ + return ERR_PTR(-ENODEV); +} + +static inline int tegra_gart_suspend(struct gart_device *gart) +{ + return -ENODEV; +} + +static inline int tegra_gart_resume(struct gart_device *gart) +{ + return -ENODEV; +} +#endif + struct tegra_mc_reset { const char *name; unsigned long id; @@ -144,6 +168,7 @@ struct tegra_mc_soc { struct tegra_mc { struct device *dev; struct tegra_smmu *smmu; + struct gart_device *gart; void __iomem *regs; struct clk *clk; int irq; From 568ece5bab24d41bec4d1610439277032840a133 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:57 +0300 Subject: [PATCH 17/60] memory: tegra: Do not try to probe SMMU on Tegra20 Tegra20 doesn't have SMMU. Move out checking of the SMMU presence from the SMMU driver into the Memory Controller driver. This change makes code consistent in regards to how GART/SMMU presence checking is performed. Signed-off-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 4 ---- drivers/memory/tegra/mc.c | 6 ++++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 3a5c7dc6dc57..5182c7d6171e 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -982,10 +982,6 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, u32 value; int err; - /* This can happen on Tegra20 which doesn't have an SMMU */ - if (!soc) - return NULL; - smmu = devm_kzalloc(dev, sizeof(*smmu), GFP_KERNEL); if (!smmu) return ERR_PTR(-ENOMEM); diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index e684e234361a..3545868c51c0 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -695,11 +695,13 @@ static int tegra_mc_probe(struct platform_device *pdev) dev_err(&pdev->dev, "failed to register reset controller: %d\n", err); - if (IS_ENABLED(CONFIG_TEGRA_IOMMU_SMMU)) { + if (IS_ENABLED(CONFIG_TEGRA_IOMMU_SMMU) && mc->soc->smmu) { mc->smmu = tegra_smmu_probe(&pdev->dev, mc->soc->smmu, mc); - if (IS_ERR(mc->smmu)) + if (IS_ERR(mc->smmu)) { dev_err(&pdev->dev, "failed to probe SMMU: %ld\n", PTR_ERR(mc->smmu)); + mc->smmu = NULL; + } } if (IS_ENABLED(CONFIG_TEGRA_IOMMU_GART) && !mc->soc->smmu) { From 33ea002aadd9cbf1836a13c71e424f113bce635a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:58 +0300 Subject: [PATCH 18/60] memory: tegra: Do not ask for IRQ sharing Memory Controller driver never shared IRQ with any other driver and very unlikely that it will. Hence there is no need to request IRQ sharing and the corresponding flag can be dropped safely. Signed-off-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- drivers/memory/tegra/mc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 3545868c51c0..570da2129fa6 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -682,7 +682,7 @@ static int tegra_mc_probe(struct platform_device *pdev) mc_writel(mc, mc->soc->intmask, MC_INTMASK); - err = devm_request_irq(&pdev->dev, mc->irq, isr, IRQF_SHARED, + err = devm_request_irq(&pdev->dev, mc->irq, isr, 0, dev_name(&pdev->dev), mc); if (err < 0) { dev_err(&pdev->dev, "failed to request IRQ#%u: %d\n", mc->irq, From f2dcded1be0d024870b5a4ef8e4a587857f303b4 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:38:59 +0300 Subject: [PATCH 19/60] memory: tegra: Clean up error messages Make all messages to start with a lower case and don't unnecessarily go over 80 chars in the code. Signed-off-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- drivers/memory/tegra/mc.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/memory/tegra/mc.c b/drivers/memory/tegra/mc.c index 570da2129fa6..0a53598d982f 100644 --- a/drivers/memory/tegra/mc.c +++ b/drivers/memory/tegra/mc.c @@ -163,7 +163,7 @@ static int tegra_mc_hotreset_assert(struct reset_controller_dev *rcdev, /* block clients DMA requests */ err = rst_ops->block_dma(mc, rst); if (err) { - dev_err(mc->dev, "Failed to block %s DMA: %d\n", + dev_err(mc->dev, "failed to block %s DMA: %d\n", rst->name, err); return err; } @@ -173,7 +173,7 @@ static int tegra_mc_hotreset_assert(struct reset_controller_dev *rcdev, /* wait for completion of the outstanding DMA requests */ while (!rst_ops->dma_idling(mc, rst)) { if (!retries--) { - dev_err(mc->dev, "Failed to flush %s DMA\n", + dev_err(mc->dev, "failed to flush %s DMA\n", rst->name); return -EBUSY; } @@ -186,7 +186,7 @@ static int tegra_mc_hotreset_assert(struct reset_controller_dev *rcdev, /* clear clients DMA requests sitting before arbitration */ err = rst_ops->hotreset_assert(mc, rst); if (err) { - dev_err(mc->dev, "Failed to hot reset %s: %d\n", + dev_err(mc->dev, "failed to hot reset %s: %d\n", rst->name, err); return err; } @@ -215,7 +215,7 @@ static int tegra_mc_hotreset_deassert(struct reset_controller_dev *rcdev, /* take out client from hot reset */ err = rst_ops->hotreset_deassert(mc, rst); if (err) { - dev_err(mc->dev, "Failed to deassert hot reset %s: %d\n", + dev_err(mc->dev, "failed to deassert hot reset %s: %d\n", rst->name, err); return err; } @@ -225,7 +225,7 @@ static int tegra_mc_hotreset_deassert(struct reset_controller_dev *rcdev, /* allow new DMA requests to proceed to arbitration */ err = rst_ops->unblock_dma(mc, rst); if (err) { - dev_err(mc->dev, "Failed to unblock %s DMA : %d\n", + dev_err(mc->dev, "failed to unblock %s DMA : %d\n", rst->name, err); return err; } @@ -657,7 +657,8 @@ static int tegra_mc_probe(struct platform_device *pdev) { err = tegra_mc_setup_latency_allowance(mc); if (err < 0) { - dev_err(&pdev->dev, "failed to setup latency allowance: %d\n", + dev_err(&pdev->dev, + "failed to setup latency allowance: %d\n", err); return err; } @@ -678,7 +679,7 @@ static int tegra_mc_probe(struct platform_device *pdev) return mc->irq; } - WARN(!mc->soc->client_id_mask, "Missing client ID mask for this SoC\n"); + WARN(!mc->soc->client_id_mask, "missing client ID mask for this SoC\n"); mc_writel(mc, mc->soc->intmask, MC_INTMASK); From c3086fad2755f5e446f6a69fda3b2a8c16a1b5ce Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:39:00 +0300 Subject: [PATCH 20/60] iommu/tegra: gart: Fix spinlock recursion Fix spinlock recursion bug that happens on IOMMU domain destruction if any of the allocated domains have devices attached to them. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index b35ffa312a83..a7a9400e0cd8 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -197,25 +197,33 @@ fail: return err; } -static void gart_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) +static void __gart_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) { struct gart_domain *gart_domain = to_gart_domain(domain); struct gart_device *gart = gart_domain->gart; struct gart_client *c; - spin_lock(&gart->client_lock); - list_for_each_entry(c, &gart->client, list) { if (c->dev == dev) { list_del(&c->list); devm_kfree(gart->dev, c); dev_dbg(gart->dev, "Detached %s\n", dev_name(dev)); - goto out; + return; } } - dev_err(gart->dev, "Couldn't find\n"); -out: + + dev_err(gart->dev, "Couldn't find %s to detach\n", dev_name(dev)); +} + +static void gart_iommu_detach_dev(struct iommu_domain *domain, + struct device *dev) +{ + struct gart_domain *gart_domain = to_gart_domain(domain); + struct gart_device *gart = gart_domain->gart; + + spin_lock(&gart->client_lock); + __gart_iommu_detach_dev(domain, dev); spin_unlock(&gart->client_lock); } @@ -255,7 +263,7 @@ static void gart_iommu_domain_free(struct iommu_domain *domain) struct gart_client *c; list_for_each_entry(c, &gart->client, list) - gart_iommu_detach_dev(domain, c->dev); + __gart_iommu_detach_dev(domain, c->dev); } spin_unlock(&gart->client_lock); } From 8e924910dd07f1614fe534f8bdfdc17f7de3d68b Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:39:01 +0300 Subject: [PATCH 21/60] iommu/tegra: gart: Fix NULL pointer dereference Fix NULL pointer dereference on IOMMU domain destruction that happens because clients list is being iterated unsafely and its elements are getting deleted during the iteration. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index a7a9400e0cd8..7fdd8b12efd5 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -260,9 +260,9 @@ static void gart_iommu_domain_free(struct iommu_domain *domain) if (gart) { spin_lock(&gart->client_lock); if (!list_empty(&gart->client)) { - struct gart_client *c; + struct gart_client *c, *tmp; - list_for_each_entry(c, &gart->client, list) + list_for_each_entry_safe(c, tmp, &gart->client, list) __gart_iommu_detach_dev(domain, c->dev); } spin_unlock(&gart->client_lock); From 7d849b7b40b9b3f574f50bbf44503b38e319fca5 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:39:02 +0300 Subject: [PATCH 22/60] iommu/tegra: gart: Allow only one active domain at a time GART has a single address space that is shared by all devices, hence only one domain could be active at a time. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 7fdd8b12efd5..71ff22be9560 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -57,6 +57,7 @@ struct gart_device { spinlock_t pte_lock; /* for pagetable */ struct list_head client; spinlock_t client_lock; /* for client list */ + struct iommu_domain *active_domain; /* current active domain */ struct device *dev; struct iommu_device iommu; /* IOMMU Core handle */ @@ -186,6 +187,12 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, goto fail; } } + if (gart->active_domain && gart->active_domain != domain) { + dev_err(gart->dev, "Only one domain can be active at a time\n"); + err = -EINVAL; + goto fail; + } + gart->active_domain = domain; list_add(&client->list, &gart->client); spin_unlock(&gart->client_lock); dev_dbg(gart->dev, "Attached %s\n", dev_name(dev)); @@ -208,6 +215,8 @@ static void __gart_iommu_detach_dev(struct iommu_domain *domain, if (c->dev == dev) { list_del(&c->list); devm_kfree(gart->dev, c); + if (list_empty(&gart->client)) + gart->active_domain = NULL; dev_dbg(gart->dev, "Detached %s\n", dev_name(dev)); return; } From 167d67d550b9e94207578679c3a6feef78449f38 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:39:03 +0300 Subject: [PATCH 23/60] iommu/tegra: gart: Don't use managed resources GART is a part of the Memory Controller driver that is always built-in, hence there is no benefit from the use of managed resources. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 71ff22be9560..2e0e6aff8f70 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -173,7 +173,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, struct gart_client *client, *c; int err = 0; - client = devm_kzalloc(gart->dev, sizeof(*c), GFP_KERNEL); + client = kzalloc(sizeof(*c), GFP_KERNEL); if (!client) return -ENOMEM; client->dev = dev; @@ -199,7 +199,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, return 0; fail: - devm_kfree(gart->dev, client); + kfree(client); spin_unlock(&gart->client_lock); return err; } @@ -214,7 +214,7 @@ static void __gart_iommu_detach_dev(struct iommu_domain *domain, list_for_each_entry(c, &gart->client, list) { if (c->dev == dev) { list_del(&c->list); - devm_kfree(gart->dev, c); + kfree(c); if (list_empty(&gart->client)) gart->active_domain = NULL; dev_dbg(gart->dev, "Detached %s\n", dev_name(dev)); @@ -455,7 +455,7 @@ struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) return ERR_PTR(-ENXIO); } - gart = devm_kzalloc(dev, sizeof(*gart), GFP_KERNEL); + gart = kzalloc(sizeof(*gart), GFP_KERNEL); if (!gart) { dev_err(dev, "failed to allocate gart_device\n"); return ERR_PTR(-ENOMEM); @@ -464,7 +464,7 @@ struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) ret = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart"); if (ret) { dev_err(dev, "Failed to register IOMMU in sysfs\n"); - return ERR_PTR(ret); + goto free_gart; } iommu_device_set_ops(&gart->iommu, &gart_iommu_ops); @@ -502,6 +502,8 @@ unregister_iommu: iommu_device_unregister(&gart->iommu); remove_sysfs: iommu_device_sysfs_remove(&gart->iommu); +free_gart: + kfree(gart); return ERR_PTR(ret); } From 5dd82cdb36880e0c87b58a002a0513cfbc18552a Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:39:04 +0300 Subject: [PATCH 24/60] iommu/tegra: gart: Prepend error/debug messages with "gart:" GART became a part of Memory Controller, hence now the drivers device is Memory Controller and not GART. As a result all printed messages are prepended with the "tegra-mc 7000f000.memory-controller:", so let's prepend GART's messages with "gart:" in order to differentiate them from the MC. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 2e0e6aff8f70..a3ce6918577d 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -19,6 +19,8 @@ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. */ +#define dev_fmt(fmt) "gart: " fmt + #include #include #include From cc0e1205766b8be1e6f7985991ad80c8c5e791c2 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:39:05 +0300 Subject: [PATCH 25/60] iommu/tegra: gart: Don't detach devices from inactive domains There could be unlimited number of allocated domains, but only one domain can be active at a time. Hence devices must be detached only from the active domain. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index a3ce6918577d..74c9be13f043 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -171,7 +171,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; + struct gart_device *gart = gart_handle; struct gart_client *client, *c; int err = 0; @@ -195,6 +195,7 @@ static int gart_iommu_attach_dev(struct iommu_domain *domain, goto fail; } gart->active_domain = domain; + gart_domain->gart = gart; list_add(&client->list, &gart->client); spin_unlock(&gart->client_lock); dev_dbg(gart->dev, "Attached %s\n", dev_name(dev)); @@ -217,8 +218,10 @@ static void __gart_iommu_detach_dev(struct iommu_domain *domain, if (c->dev == dev) { list_del(&c->list); kfree(c); - if (list_empty(&gart->client)) + if (list_empty(&gart->client)) { gart->active_domain = NULL; + gart_domain->gart = NULL; + } dev_dbg(gart->dev, "Detached %s\n", dev_name(dev)); return; } @@ -254,7 +257,6 @@ static struct iommu_domain *gart_iommu_domain_alloc(unsigned type) if (!gart_domain) return NULL; - gart_domain->gart = gart; gart_domain->domain.geometry.aperture_start = gart->iovmm_base; gart_domain->domain.geometry.aperture_end = gart->iovmm_base + gart->page_count * GART_PAGE_SIZE - 1; From e7e2367041179318591d3a656f146e20418f99bb Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:39:06 +0300 Subject: [PATCH 26/60] iommu/tegra: gart: Simplify clients-tracking code GART is a simple IOMMU provider that has single address space. There is no need to setup global clients list and manage it for tracking of the active domain, hence lot's of code could be safely removed and replaced with a simpler alternative. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 153 ++++++++++--------------------------- 1 file changed, 39 insertions(+), 114 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 74c9be13f043..ad348c61d5e7 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -23,7 +23,6 @@ #include #include -#include #include #include #include @@ -46,30 +45,20 @@ #define GART_PAGE_MASK \ (~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID) -struct gart_client { - struct device *dev; - struct list_head list; -}; - struct gart_device { void __iomem *regs; u32 *savedata; u32 page_count; /* total remappable size */ dma_addr_t iovmm_base; /* offset to vmm_area */ spinlock_t pte_lock; /* for pagetable */ - struct list_head client; - spinlock_t client_lock; /* for client list */ + spinlock_t dom_lock; /* for active domain */ + unsigned int active_devices; /* number of active devices */ struct iommu_domain *active_domain; /* current active domain */ struct device *dev; struct iommu_device iommu; /* IOMMU Core handle */ }; -struct gart_domain { - struct iommu_domain domain; /* generic domain handle */ - struct gart_device *gart; /* link to gart device */ -}; - static struct gart_device *gart_handle; /* unique for a system */ static bool gart_debug; @@ -77,11 +66,6 @@ static bool gart_debug; #define GART_PTE(_pfn) \ (GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT)) -static struct gart_domain *to_gart_domain(struct iommu_domain *dom) -{ - return container_of(dom, struct gart_domain, domain); -} - /* * Any interaction between any block on PPSB and a block on APB or AHB * must have these read-back to ensure the APB/AHB bus transaction is @@ -170,125 +154,70 @@ static inline bool gart_iova_range_valid(struct gart_device *gart, static int gart_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { - struct gart_domain *gart_domain = to_gart_domain(domain); struct gart_device *gart = gart_handle; - struct gart_client *client, *c; - int err = 0; + int ret = 0; - client = kzalloc(sizeof(*c), GFP_KERNEL); - if (!client) - return -ENOMEM; - client->dev = dev; + spin_lock(&gart->dom_lock); - spin_lock(&gart->client_lock); - list_for_each_entry(c, &gart->client, list) { - if (c->dev == dev) { - dev_err(gart->dev, - "%s is already attached\n", dev_name(dev)); - err = -EINVAL; - goto fail; - } - } if (gart->active_domain && gart->active_domain != domain) { - dev_err(gart->dev, "Only one domain can be active at a time\n"); - err = -EINVAL; - goto fail; - } - gart->active_domain = domain; - gart_domain->gart = gart; - list_add(&client->list, &gart->client); - spin_unlock(&gart->client_lock); - dev_dbg(gart->dev, "Attached %s\n", dev_name(dev)); - return 0; - -fail: - kfree(client); - spin_unlock(&gart->client_lock); - return err; -} - -static void __gart_iommu_detach_dev(struct iommu_domain *domain, - struct device *dev) -{ - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; - struct gart_client *c; - - list_for_each_entry(c, &gart->client, list) { - if (c->dev == dev) { - list_del(&c->list); - kfree(c); - if (list_empty(&gart->client)) { - gart->active_domain = NULL; - gart_domain->gart = NULL; - } - dev_dbg(gart->dev, "Detached %s\n", dev_name(dev)); - return; - } + ret = -EBUSY; + } else if (dev->archdata.iommu != domain) { + dev->archdata.iommu = domain; + gart->active_domain = domain; + gart->active_devices++; } - dev_err(gart->dev, "Couldn't find %s to detach\n", dev_name(dev)); + spin_unlock(&gart->dom_lock); + + return ret; } static void gart_iommu_detach_dev(struct iommu_domain *domain, struct device *dev) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; + struct gart_device *gart = gart_handle; - spin_lock(&gart->client_lock); - __gart_iommu_detach_dev(domain, dev); - spin_unlock(&gart->client_lock); + spin_lock(&gart->dom_lock); + + if (dev->archdata.iommu == domain) { + dev->archdata.iommu = NULL; + + if (--gart->active_devices == 0) + gart->active_domain = NULL; + } + + spin_unlock(&gart->dom_lock); } static struct iommu_domain *gart_iommu_domain_alloc(unsigned type) { - struct gart_domain *gart_domain; - struct gart_device *gart; + struct gart_device *gart = gart_handle; + struct iommu_domain *domain; if (type != IOMMU_DOMAIN_UNMANAGED) return NULL; - gart = gart_handle; - if (!gart) - return NULL; - - gart_domain = kzalloc(sizeof(*gart_domain), GFP_KERNEL); - if (!gart_domain) - return NULL; - - gart_domain->domain.geometry.aperture_start = gart->iovmm_base; - gart_domain->domain.geometry.aperture_end = gart->iovmm_base + + domain = kzalloc(sizeof(*domain), GFP_KERNEL); + if (domain) { + domain->geometry.aperture_start = gart->iovmm_base; + domain->geometry.aperture_end = gart->iovmm_base + gart->page_count * GART_PAGE_SIZE - 1; - gart_domain->domain.geometry.force_aperture = true; + domain->geometry.force_aperture = true; + } - return &gart_domain->domain; + return domain; } static void gart_iommu_domain_free(struct iommu_domain *domain) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; - - if (gart) { - spin_lock(&gart->client_lock); - if (!list_empty(&gart->client)) { - struct gart_client *c, *tmp; - - list_for_each_entry_safe(c, tmp, &gart->client, list) - __gart_iommu_detach_dev(domain, c->dev); - } - spin_unlock(&gart->client_lock); - } - - kfree(gart_domain); + WARN_ON(gart_handle->active_domain == domain); + kfree(domain); } static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t pa, size_t bytes, int prot) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; + struct gart_device *gart = gart_handle; unsigned long flags; unsigned long pfn; unsigned long pte; @@ -319,8 +248,7 @@ static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t bytes) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; + struct gart_device *gart = gart_handle; unsigned long flags; if (!gart_iova_range_valid(gart, iova, bytes)) @@ -335,8 +263,7 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; + struct gart_device *gart = gart_handle; unsigned long pte; phys_addr_t pa; unsigned long flags; @@ -395,8 +322,7 @@ static int gart_iommu_of_xlate(struct device *dev, static void gart_iommu_sync(struct iommu_domain *domain) { - struct gart_domain *gart_domain = to_gart_domain(domain); - struct gart_device *gart = gart_domain->gart; + struct gart_device *gart = gart_handle; FLUSH_GART_REGS(gart); } @@ -483,8 +409,7 @@ struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) gart->dev = dev; gart_regs = mc->regs + GART_REG_BASE; spin_lock_init(&gart->pte_lock); - spin_lock_init(&gart->client_lock); - INIT_LIST_HEAD(&gart->client); + spin_lock_init(&gart->dom_lock); gart->regs = gart_regs; gart->iovmm_base = (dma_addr_t)res_remap->start; gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT); From 707223095ccf1fd81c9fa5346239b79127c322c2 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Wed, 12 Dec 2018 23:39:07 +0300 Subject: [PATCH 27/60] iommu/tegra: gart: Perform code refactoring Removed redundant safety-checks in the code and some debug code that isn't actually very useful for debugging, like enormous pagetable dump on each fault. The majority of the changes are code reshuffling, variables/whitespaces clean up and removal of debug messages that duplicate messages of the IOMMU-core. Now the GART translation is kept disabled while GART is suspended. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 255 +++++++++++++++---------------------- 1 file changed, 106 insertions(+), 149 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index ad348c61d5e7..4d8057916552 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -1,5 +1,5 @@ /* - * IOMMU API for GART in Tegra20 + * IOMMU API for Graphics Address Relocation Table on Tegra20 * * Copyright (c) 2010-2012, NVIDIA CORPORATION. All rights reserved. * @@ -31,70 +31,63 @@ #include -/* bitmap of the page sizes currently supported */ -#define GART_IOMMU_PGSIZES (SZ_4K) - #define GART_REG_BASE 0x24 #define GART_CONFIG (0x24 - GART_REG_BASE) #define GART_ENTRY_ADDR (0x28 - GART_REG_BASE) #define GART_ENTRY_DATA (0x2c - GART_REG_BASE) -#define GART_ENTRY_PHYS_ADDR_VALID (1 << 31) + +#define GART_ENTRY_PHYS_ADDR_VALID BIT(31) #define GART_PAGE_SHIFT 12 #define GART_PAGE_SIZE (1 << GART_PAGE_SHIFT) -#define GART_PAGE_MASK \ - (~(GART_PAGE_SIZE - 1) & ~GART_ENTRY_PHYS_ADDR_VALID) +#define GART_PAGE_MASK GENMASK(30, GART_PAGE_SHIFT) + +/* bitmap of the page sizes currently supported */ +#define GART_IOMMU_PGSIZES (GART_PAGE_SIZE) struct gart_device { void __iomem *regs; u32 *savedata; - u32 page_count; /* total remappable size */ - dma_addr_t iovmm_base; /* offset to vmm_area */ + unsigned long iovmm_base; /* offset to vmm_area start */ + unsigned long iovmm_end; /* offset to vmm_area end */ spinlock_t pte_lock; /* for pagetable */ spinlock_t dom_lock; /* for active domain */ unsigned int active_devices; /* number of active devices */ struct iommu_domain *active_domain; /* current active domain */ - struct device *dev; - struct iommu_device iommu; /* IOMMU Core handle */ + struct device *dev; }; static struct gart_device *gart_handle; /* unique for a system */ static bool gart_debug; -#define GART_PTE(_pfn) \ - (GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT)) - /* * Any interaction between any block on PPSB and a block on APB or AHB * must have these read-back to ensure the APB/AHB bus transaction is * complete before initiating activity on the PPSB block. */ -#define FLUSH_GART_REGS(gart) ((void)readl((gart)->regs + GART_CONFIG)) +#define FLUSH_GART_REGS(gart) readl_relaxed((gart)->regs + GART_CONFIG) #define for_each_gart_pte(gart, iova) \ for (iova = gart->iovmm_base; \ - iova < gart->iovmm_base + GART_PAGE_SIZE * gart->page_count; \ + iova < gart->iovmm_end; \ iova += GART_PAGE_SIZE) static inline void gart_set_pte(struct gart_device *gart, - unsigned long offs, u32 pte) + unsigned long iova, unsigned long pte) { - writel(offs, gart->regs + GART_ENTRY_ADDR); - writel(pte, gart->regs + GART_ENTRY_DATA); - - dev_dbg(gart->dev, "%s %08lx:%08x\n", - pte ? "map" : "unmap", offs, pte & GART_PAGE_MASK); + writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR); + writel_relaxed(pte, gart->regs + GART_ENTRY_DATA); } static inline unsigned long gart_read_pte(struct gart_device *gart, - unsigned long offs) + unsigned long iova) { unsigned long pte; - writel(offs, gart->regs + GART_ENTRY_ADDR); - pte = readl(gart->regs + GART_ENTRY_DATA); + writel_relaxed(iova, gart->regs + GART_ENTRY_ADDR); + pte = readl_relaxed(gart->regs + GART_ENTRY_DATA); return pte; } @@ -106,49 +99,20 @@ static void do_gart_setup(struct gart_device *gart, const u32 *data) for_each_gart_pte(gart, iova) gart_set_pte(gart, iova, data ? *(data++) : 0); - writel(1, gart->regs + GART_CONFIG); + writel_relaxed(1, gart->regs + GART_CONFIG); FLUSH_GART_REGS(gart); } -#ifdef DEBUG -static void gart_dump_table(struct gart_device *gart) +static inline bool gart_iova_range_invalid(struct gart_device *gart, + unsigned long iova, size_t bytes) { - unsigned long iova; - unsigned long flags; - - spin_lock_irqsave(&gart->pte_lock, flags); - for_each_gart_pte(gart, iova) { - unsigned long pte; - - pte = gart_read_pte(gart, iova); - - dev_dbg(gart->dev, "%s %08lx:%08lx\n", - (GART_ENTRY_PHYS_ADDR_VALID & pte) ? "v" : " ", - iova, pte & GART_PAGE_MASK); - } - spin_unlock_irqrestore(&gart->pte_lock, flags); + return unlikely(iova < gart->iovmm_base || bytes != GART_PAGE_SIZE || + iova + bytes > gart->iovmm_end); } -#else -static inline void gart_dump_table(struct gart_device *gart) + +static inline bool gart_pte_valid(struct gart_device *gart, unsigned long iova) { -} -#endif - -static inline bool gart_iova_range_valid(struct gart_device *gart, - unsigned long iova, size_t bytes) -{ - unsigned long iova_start, iova_end, gart_start, gart_end; - - iova_start = iova; - iova_end = iova_start + bytes - 1; - gart_start = gart->iovmm_base; - gart_end = gart_start + gart->page_count * GART_PAGE_SIZE - 1; - - if (iova_start < gart_start) - return false; - if (iova_end > gart_end) - return false; - return true; + return !!(gart_read_pte(gart, iova) & GART_ENTRY_PHYS_ADDR_VALID); } static int gart_iommu_attach_dev(struct iommu_domain *domain, @@ -191,7 +155,6 @@ static void gart_iommu_detach_dev(struct iommu_domain *domain, static struct iommu_domain *gart_iommu_domain_alloc(unsigned type) { - struct gart_device *gart = gart_handle; struct iommu_domain *domain; if (type != IOMMU_DOMAIN_UNMANAGED) @@ -199,9 +162,8 @@ static struct iommu_domain *gart_iommu_domain_alloc(unsigned type) domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (domain) { - domain->geometry.aperture_start = gart->iovmm_base; - domain->geometry.aperture_end = gart->iovmm_base + - gart->page_count * GART_PAGE_SIZE - 1; + domain->geometry.aperture_start = gart_handle->iovmm_base; + domain->geometry.aperture_end = gart_handle->iovmm_end - 1; domain->geometry.force_aperture = true; } @@ -214,34 +176,45 @@ static void gart_iommu_domain_free(struct iommu_domain *domain) kfree(domain); } +static inline int __gart_iommu_map(struct gart_device *gart, unsigned long iova, + unsigned long pa) +{ + if (unlikely(gart_debug && gart_pte_valid(gart, iova))) { + dev_err(gart->dev, "Page entry is in-use\n"); + return -EINVAL; + } + + gart_set_pte(gart, iova, GART_ENTRY_PHYS_ADDR_VALID | pa); + + return 0; +} + static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t pa, size_t bytes, int prot) { struct gart_device *gart = gart_handle; - unsigned long flags; - unsigned long pfn; - unsigned long pte; + int ret; - if (!gart_iova_range_valid(gart, iova, bytes)) + if (gart_iova_range_invalid(gart, iova, bytes)) return -EINVAL; - spin_lock_irqsave(&gart->pte_lock, flags); - pfn = __phys_to_pfn(pa); - if (!pfn_valid(pfn)) { - dev_err(gart->dev, "Invalid page: %pa\n", &pa); - spin_unlock_irqrestore(&gart->pte_lock, flags); + spin_lock(&gart->pte_lock); + ret = __gart_iommu_map(gart, iova, (unsigned long)pa); + spin_unlock(&gart->pte_lock); + + return ret; +} + +static inline int __gart_iommu_unmap(struct gart_device *gart, + unsigned long iova) +{ + if (unlikely(gart_debug && !gart_pte_valid(gart, iova))) { + dev_err(gart->dev, "Page entry is invalid\n"); return -EINVAL; } - if (gart_debug) { - pte = gart_read_pte(gart, iova); - if (pte & GART_ENTRY_PHYS_ADDR_VALID) { - spin_unlock_irqrestore(&gart->pte_lock, flags); - dev_err(gart->dev, "Page entry is in-use\n"); - return -EBUSY; - } - } - gart_set_pte(gart, iova, GART_PTE(pfn)); - spin_unlock_irqrestore(&gart->pte_lock, flags); + + gart_set_pte(gart, iova, 0); + return 0; } @@ -249,15 +222,16 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t bytes) { struct gart_device *gart = gart_handle; - unsigned long flags; + int err; - if (!gart_iova_range_valid(gart, iova, bytes)) + if (gart_iova_range_invalid(gart, iova, bytes)) return 0; - spin_lock_irqsave(&gart->pte_lock, flags); - gart_set_pte(gart, iova, 0); - spin_unlock_irqrestore(&gart->pte_lock, flags); - return bytes; + spin_lock(&gart->pte_lock); + err = __gart_iommu_unmap(gart, iova); + spin_unlock(&gart->pte_lock); + + return err ? 0 : bytes; } static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, @@ -265,24 +239,15 @@ static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, { struct gart_device *gart = gart_handle; unsigned long pte; - phys_addr_t pa; - unsigned long flags; - if (!gart_iova_range_valid(gart, iova, 0)) + if (gart_iova_range_invalid(gart, iova, GART_PAGE_SIZE)) return -EINVAL; - spin_lock_irqsave(&gart->pte_lock, flags); + spin_lock(&gart->pte_lock); pte = gart_read_pte(gart, iova); - spin_unlock_irqrestore(&gart->pte_lock, flags); + spin_unlock(&gart->pte_lock); - pa = (pte & GART_PAGE_MASK); - if (!pfn_valid(__phys_to_pfn(pa))) { - dev_err(gart->dev, "No entry for %08llx:%pa\n", - (unsigned long long)iova, &pa); - gart_dump_table(gart); - return -EINVAL; - } - return pa; + return pte & GART_PAGE_MASK; } static bool gart_iommu_capable(enum iommu_cap cap) @@ -322,9 +287,7 @@ static int gart_iommu_of_xlate(struct device *dev, static void gart_iommu_sync(struct iommu_domain *domain) { - struct gart_device *gart = gart_handle; - - FLUSH_GART_REGS(gart); + FLUSH_GART_REGS(gart_handle); } static const struct iommu_ops gart_iommu_ops = { @@ -347,84 +310,78 @@ static const struct iommu_ops gart_iommu_ops = { int tegra_gart_suspend(struct gart_device *gart) { - unsigned long iova; u32 *data = gart->savedata; - unsigned long flags; + unsigned long iova; + + /* + * All GART users shall be suspended at this point. Disable + * address translation to trap all GART accesses as invalid + * memory accesses. + */ + writel_relaxed(0, gart->regs + GART_CONFIG); + FLUSH_GART_REGS(gart); - spin_lock_irqsave(&gart->pte_lock, flags); for_each_gart_pte(gart, iova) *(data++) = gart_read_pte(gart, iova); - spin_unlock_irqrestore(&gart->pte_lock, flags); + return 0; } int tegra_gart_resume(struct gart_device *gart) { - unsigned long flags; - - spin_lock_irqsave(&gart->pte_lock, flags); do_gart_setup(gart, gart->savedata); - spin_unlock_irqrestore(&gart->pte_lock, flags); + return 0; } struct gart_device *tegra_gart_probe(struct device *dev, struct tegra_mc *mc) { struct gart_device *gart; - struct resource *res_remap; - void __iomem *gart_regs; - int ret; + struct resource *res; + int err; BUILD_BUG_ON(PAGE_SHIFT != GART_PAGE_SHIFT); /* the GART memory aperture is required */ - res_remap = platform_get_resource(to_platform_device(dev), - IORESOURCE_MEM, 1); - if (!res_remap) { - dev_err(dev, "GART memory aperture expected\n"); + res = platform_get_resource(to_platform_device(dev), IORESOURCE_MEM, 1); + if (!res) { + dev_err(dev, "Memory aperture resource unavailable\n"); return ERR_PTR(-ENXIO); } gart = kzalloc(sizeof(*gart), GFP_KERNEL); - if (!gart) { - dev_err(dev, "failed to allocate gart_device\n"); + if (!gart) return ERR_PTR(-ENOMEM); - } - ret = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart"); - if (ret) { - dev_err(dev, "Failed to register IOMMU in sysfs\n"); + gart_handle = gart; + + gart->dev = dev; + gart->regs = mc->regs + GART_REG_BASE; + gart->iovmm_base = res->start; + gart->iovmm_end = res->end + 1; + spin_lock_init(&gart->pte_lock); + spin_lock_init(&gart->dom_lock); + + do_gart_setup(gart, NULL); + + err = iommu_device_sysfs_add(&gart->iommu, dev, NULL, "gart"); + if (err) goto free_gart; - } iommu_device_set_ops(&gart->iommu, &gart_iommu_ops); iommu_device_set_fwnode(&gart->iommu, dev->fwnode); - ret = iommu_device_register(&gart->iommu); - if (ret) { - dev_err(dev, "Failed to register IOMMU\n"); + err = iommu_device_register(&gart->iommu); + if (err) goto remove_sysfs; - } - gart->dev = dev; - gart_regs = mc->regs + GART_REG_BASE; - spin_lock_init(&gart->pte_lock); - spin_lock_init(&gart->dom_lock); - gart->regs = gart_regs; - gart->iovmm_base = (dma_addr_t)res_remap->start; - gart->page_count = (resource_size(res_remap) >> GART_PAGE_SHIFT); - - gart->savedata = vmalloc(array_size(sizeof(u32), gart->page_count)); + gart->savedata = vmalloc(resource_size(res) / GART_PAGE_SIZE * + sizeof(u32)); if (!gart->savedata) { - dev_err(dev, "failed to allocate context save area\n"); - ret = -ENOMEM; + err = -ENOMEM; goto unregister_iommu; } - do_gart_setup(gart, NULL); - - gart_handle = gart; - return gart; unregister_iommu: @@ -434,7 +391,7 @@ remove_sysfs: free_gart: kfree(gart); - return ERR_PTR(ret); + return ERR_PTR(err); } module_param(gart_debug, bool, 0644); From c61a4633a56aaa85bd61645c4188340f8bb2f7f1 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 24 Jan 2019 15:10:02 +0800 Subject: [PATCH 28/60] iommu/dma: Remove unused variable end_pfn is never used after commit ('iommu/iova: Make dma 32bit pfn implicit'), cleanup it. Cc: Joerg Roedel Cc: Robin Murphy Cc: Zhen Lei Signed-off-by: Shaokun Zhang Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index d19f3d6b43c1..77aabe637a60 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -289,7 +289,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; - unsigned long order, base_pfn, end_pfn; + unsigned long order, base_pfn; int attr; if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) @@ -298,7 +298,6 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, /* Use the smallest supported page size for IOVA granularity */ order = __ffs(domain->pgsize_bitmap); base_pfn = max_t(unsigned long, 1, base >> order); - end_pfn = (base + size - 1) >> order; /* Check the domain allows at least some access to the device... */ if (domain->geometry.force_aperture) { From 21d5d27c042d41d0ec1d138651909405d967f8af Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Tue, 22 Jan 2019 14:30:45 -0700 Subject: [PATCH 29/60] iommu/vt-d: Implement dma_[un]map_resource() Currently the Intel IOMMU uses the default dma_[un]map_resource() implementations does nothing and simply returns the physical address unmodified. However, this doesn't create the IOVA entries necessary for addresses mapped this way to work when the IOMMU is enabled. Thus, when the IOMMU is enabled, drivers relying on dma_map_resource() will trigger DMAR errors. We see this when running ntb_transport with the IOMMU enabled, DMA, and switchtec hardware. The implementation for intel_map_resource() is nearly identical to intel_map_page(), we just have to re-create __intel_map_single(). dma_unmap_resource() uses intel_unmap_page() directly as the functions are identical. Signed-off-by: Logan Gunthorpe Cc: David Woodhouse Cc: Joerg Roedel Reviewed-by: Christoph Hellwig Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2bd9ac285c0d..64dab37c0b96 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3649,11 +3649,9 @@ static int iommu_no_mapping(struct device *dev) return 0; } -static dma_addr_t __intel_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, int dir, - u64 dma_mask) +static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, + size_t size, int dir, u64 dma_mask) { - phys_addr_t paddr = page_to_phys(page) + offset; struct dmar_domain *domain; phys_addr_t start_paddr; unsigned long iova_pfn; @@ -3715,7 +3713,15 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return __intel_map_page(dev, page, offset, size, dir, *dev->dma_mask); + return __intel_map_single(dev, page_to_phys(page) + offset, size, + dir, *dev->dma_mask); +} + +static dma_addr_t intel_map_resource(struct device *dev, phys_addr_t phys_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) +{ + return __intel_map_single(dev, phys_addr, size, dir, *dev->dma_mask); } static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) @@ -3806,8 +3812,9 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, return NULL; memset(page_address(page), 0, size); - *dma_handle = __intel_map_page(dev, page, 0, size, DMA_BIDIRECTIONAL, - dev->coherent_dma_mask); + *dma_handle = __intel_map_single(dev, page_to_phys(page), size, + DMA_BIDIRECTIONAL, + dev->coherent_dma_mask); if (*dma_handle != DMA_MAPPING_ERROR) return page_address(page); if (!dma_release_from_contiguous(dev, page, size >> PAGE_SHIFT)) @@ -3924,6 +3931,8 @@ static const struct dma_map_ops intel_dma_ops = { .unmap_sg = intel_unmap_sg, .map_page = intel_map_page, .unmap_page = intel_unmap_page, + .map_resource = intel_map_resource, + .unmap_resource = intel_unmap_page, .dma_supported = dma_direct_supported, }; From 2e6c6a8657e8709ce363a62238e792918dd158b3 Mon Sep 17 00:00:00 2001 From: Jerry Snitselaar Date: Mon, 28 Jan 2019 17:59:37 -0700 Subject: [PATCH 30/60] iommu/amd: Print reason for iommu_map_page failure in map_sg Since there are multiple possible failures in iommu_map_page it would be useful to know which case is being hit when the error message is printed in map_sg. While here, fix up checkpatch complaint about using function name in a string instead of __func__. Cc: Joerg Roedel Signed-off-by: Jerry Snitselaar Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 87ba23a75b38..675f7027aa04 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2562,6 +2562,7 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, struct scatterlist *s; unsigned long address; u64 dma_mask; + int ret; domain = get_domain(dev); if (IS_ERR(domain)) @@ -2584,7 +2585,6 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, for (j = 0; j < pages; ++j) { unsigned long bus_addr, phys_addr; - int ret; bus_addr = address + s->dma_address + (j << PAGE_SHIFT); phys_addr = (sg_phys(s) & PAGE_MASK) + (j << PAGE_SHIFT); @@ -2605,8 +2605,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, return nelems; out_unmap: - pr_err("%s: IOMMU mapping error in map_sg (io-pages: %d)\n", - dev_name(dev), npages); + pr_err("%s: IOMMU mapping error in %s (io-pages: %d) reason: %d\n", + dev_name(dev), __func__, npages, ret); for_each_sg(sglist, s, nelems, i) { int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); From 5a63f0adebe7d941de29304c526bfe184c43bdec Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Jan 2019 13:57:58 +0800 Subject: [PATCH 31/60] iommu/amd: Remove clear_flush_young notifier AMD IOMMU driver is using the clear_flush_young() to do cache flushing but that's actually already covered by invalidate_range(). Remove the extra notifier and the chunks. Signed-off-by: Peter Xu Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_v2.c | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 23dae9348ace..5d7ef750e4a0 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -370,29 +370,6 @@ static struct pasid_state *mn_to_state(struct mmu_notifier *mn) return container_of(mn, struct pasid_state, mn); } -static void __mn_flush_page(struct mmu_notifier *mn, - unsigned long address) -{ - struct pasid_state *pasid_state; - struct device_state *dev_state; - - pasid_state = mn_to_state(mn); - dev_state = pasid_state->device_state; - - amd_iommu_flush_page(dev_state->domain, pasid_state->pasid, address); -} - -static int mn_clear_flush_young(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, - unsigned long end) -{ - for (; start < end; start += PAGE_SIZE) - __mn_flush_page(mn, start); - - return 0; -} - static void mn_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, unsigned long start, unsigned long end) @@ -430,7 +407,6 @@ static void mn_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops iommu_mn = { .release = mn_release, - .clear_flush_young = mn_clear_flush_young, .invalidate_range = mn_invalidate_range, }; From 1a9eb9b98f0a6c8836cc4d6dcf4d5d005fb5f389 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 30 Jan 2019 13:57:57 +0800 Subject: [PATCH 32/60] iommu/vt-d: Remove change_pte notifier The change_pte() interface is tailored for PFN updates, while the other notifier invalidate_range() should be enough for Intel IOMMU cache flushing. Actually we've done similar thing for AMD IOMMU already in 8301da53fbc1 ("iommu/amd: Remove change_pte mmu_notifier call-back", 2014-07-30) but the Intel IOMMU driver still have it. Signed-off-by: Peter Xu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 9 --------- 1 file changed, 9 deletions(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index 79add5716552..c79540deaf00 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -180,14 +180,6 @@ static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, rcu_read_unlock(); } -static void intel_change_pte(struct mmu_notifier *mn, struct mm_struct *mm, - unsigned long address, pte_t pte) -{ - struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); - - intel_flush_svm_range(svm, address, 1, 1, 0); -} - /* Pages have been freed at this point */ static void intel_invalidate_range(struct mmu_notifier *mn, struct mm_struct *mm, @@ -227,7 +219,6 @@ static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) static const struct mmu_notifier_ops intel_mmuops = { .release = intel_mm_release, - .change_pte = intel_change_pte, .invalidate_range = intel_invalidate_range, }; From b77cf11f094136a9d7d0ee6a56cf49db1f412871 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Tue, 5 Feb 2019 10:37:31 -0600 Subject: [PATCH 33/60] iommu: Allow io-pgtable to be used outside of drivers/iommu/ Move io-pgtable.h to include/linux/ and export alloc_io_pgtable_ops and free_io_pgtable_ops. This enables drivers outside drivers/iommu/ to use the page table library. Specifically, some ARM Mali GPUs use the ARM page table formats. Cc: Will Deacon Cc: Robin Murphy Cc: Joerg Roedel Cc: Matthias Brugger Cc: Rob Clark Cc: linux-arm-kernel@lists.infradead.org Cc: iommu@lists.linux-foundation.org Cc: linux-mediatek@lists.infradead.org Cc: linux-arm-msm@vger.kernel.org Signed-off-by: Rob Herring Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 3 +-- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/io-pgtable-arm-v7s.c | 3 +-- drivers/iommu/io-pgtable-arm.c | 3 +-- drivers/iommu/io-pgtable.c | 5 +++-- drivers/iommu/ipmmu-vmsa.c | 3 +-- drivers/iommu/msm_iommu.c | 2 +- drivers/iommu/mtk_iommu.h | 3 +-- drivers/iommu/qcom_iommu.c | 2 +- {drivers/iommu => include/linux}/io-pgtable.h | 0 10 files changed, 11 insertions(+), 15 deletions(-) rename {drivers/iommu => include/linux}/io-pgtable.h (100%) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 0d284029dc73..d3880010c6cf 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -32,8 +33,6 @@ #include -#include "io-pgtable.h" - /* MMIO registers */ #define ARM_SMMU_IDR0 0x0 #define IDR0_ST_LVL GENMASK(28, 27) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index af18a7e7f917..045d93884164 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include #include @@ -56,7 +57,6 @@ #include #include -#include "io-pgtable.h" #include "arm-smmu-regs.h" #define ARM_MMU500_ACTLR_CPRE (1 << 1) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index cec29bf45c9b..75a8273d1ae9 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -45,8 +46,6 @@ #include -#include "io-pgtable.h" - /* Struct accessors */ #define io_pgtable_to_data(x) \ container_of((x), struct arm_v7s_io_pgtable, iop) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 237cacd4a62b..d3700ec15cbd 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include @@ -31,8 +32,6 @@ #include -#include "io-pgtable.h" - #define ARM_LPAE_MAX_ADDR_BITS 52 #define ARM_LPAE_S2_MAX_CONCAT_PAGES 16 #define ARM_LPAE_MAX_LEVELS 4 diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index 127558d83667..93f2880be6c6 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -19,11 +19,10 @@ */ #include +#include #include #include -#include "io-pgtable.h" - static const struct io_pgtable_init_fns * io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #ifdef CONFIG_IOMMU_IO_PGTABLE_LPAE @@ -61,6 +60,7 @@ struct io_pgtable_ops *alloc_io_pgtable_ops(enum io_pgtable_fmt fmt, return &iop->ops; } +EXPORT_SYMBOL_GPL(alloc_io_pgtable_ops); /* * It is the IOMMU driver's responsibility to ensure that the page table @@ -77,3 +77,4 @@ void free_io_pgtable_ops(struct io_pgtable_ops *ops) io_pgtable_tlb_flush_all(iop); io_pgtable_init_table[iop->fmt]->free(iop); } +EXPORT_SYMBOL_GPL(free_io_pgtable_ops); diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c index 7a4529c61c19..9a380c10655e 100644 --- a/drivers/iommu/ipmmu-vmsa.c +++ b/drivers/iommu/ipmmu-vmsa.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -35,8 +36,6 @@ #define arm_iommu_detach_device(...) do {} while (0) #endif -#include "io-pgtable.h" - #define IPMMU_CTX_MAX 8 struct ipmmu_features { diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index fc4270733f11..ef7d1f995d6b 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include @@ -37,7 +38,6 @@ #include "msm_iommu_hw-8xxx.h" #include "msm_iommu.h" -#include "io-pgtable.h" #define MRC(reg, processor, op1, crn, crm, op2) \ __asm__ __volatile__ ( \ diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h index 778498b8633f..62c2c3e8c5df 100644 --- a/drivers/iommu/mtk_iommu.h +++ b/drivers/iommu/mtk_iommu.h @@ -19,13 +19,12 @@ #include #include #include +#include #include #include #include #include -#include "io-pgtable.h" - struct mtk_iommu_suspend_reg { u32 standard_axi_mode; u32 dcm_dis; diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index d8595f0a987d..8cdd3f059513 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include @@ -42,7 +43,6 @@ #include #include -#include "io-pgtable.h" #include "arm-smmu-regs.h" #define SMMU_INTR_SEL_NS 0x2000 diff --git a/drivers/iommu/io-pgtable.h b/include/linux/io-pgtable.h similarity index 100% rename from drivers/iommu/io-pgtable.h rename to include/linux/io-pgtable.h From 780da9e4f5bf35d348b290f0f97de9b55670cb5b Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 8 Feb 2019 16:05:45 -0600 Subject: [PATCH 34/60] iommu: Use dev_printk() when possible Use dev_printk() when possible so the IOMMU messages are more consistent with other messages related to the device. E.g., I think these messages related to surprise hotplug: pciehp 0000:80:10.0:pcie004: Slot(36): Link Down iommu: Removing device 0000:87:00.0 from group 12 pciehp 0000:80:10.0:pcie004: Slot(36): Card present pcieport 0000:80:10.0: Data Link Layer Link Active not set in 1000 msec would be easier to read as these (also requires some PCI changes not included here): pci 0000:80:10.0: Slot(36): Link Down pci 0000:87:00.0: Removing from iommu group 12 pci 0000:80:10.0: Slot(36): Card present pci 0000:80:10.0: Data Link Layer Link Active not set in 1000 msec Signed-off-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3ed4db334341..54c9d18fe31d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -668,7 +668,7 @@ rename: trace_add_device_to_group(group->id, dev); - pr_info("Adding device %s to group %d\n", dev_name(dev), group->id); + dev_info(dev, "Adding to iommu group %d\n", group->id); return 0; @@ -684,7 +684,7 @@ err_remove_link: sysfs_remove_link(&dev->kobj, "iommu_group"); err_free_device: kfree(device); - pr_err("Failed to add device %s to group %d: %d\n", dev_name(dev), group->id, ret); + dev_err(dev, "Failed to add to iommu group %d: %d\n", group->id, ret); return ret; } EXPORT_SYMBOL_GPL(iommu_group_add_device); @@ -701,7 +701,7 @@ void iommu_group_remove_device(struct device *dev) struct iommu_group *group = dev->iommu_group; struct group_device *tmp_device, *device = NULL; - pr_info("Removing device %s from group %d\n", dev_name(dev), group->id); + dev_info(dev, "Removing from iommu group %d\n", group->id); /* Pre-notify listeners that a device is being removed. */ blocking_notifier_call_chain(&group->notifier, @@ -1951,7 +1951,7 @@ int iommu_request_dm_for_dev(struct device *dev) iommu_domain_free(group->default_domain); group->default_domain = dm_domain; - pr_info("Using direct mapping for device %s\n", dev_name(dev)); + dev_info(dev, "Using iommu direct mapping\n"); ret = 0; out: From 5f226da1b1d7065bd22605fd889ac96eb5f3a7db Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 8 Feb 2019 16:05:53 -0600 Subject: [PATCH 35/60] iommu/amd: Use dev_printk() when possible Use dev_printk() when possible so the IOMMU messages are more consistent with other messages related to the device. Signed-off-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 26 ++++++++++++-------------- drivers/iommu/amd_iommu_init.c | 20 ++++++++++---------- 2 files changed, 22 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 675f7027aa04..c0dc7c33335e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -18,6 +18,7 @@ */ #define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include #include @@ -279,10 +280,10 @@ static u16 get_alias(struct device *dev) return pci_alias; } - pr_info("Using IVRS reported alias %02x:%02x.%d " - "for device %s[%04x:%04x], kernel reported alias " + pci_info(pdev, "Using IVRS reported alias %02x:%02x.%d " + "for device [%04x:%04x], kernel reported alias " "%02x:%02x.%d\n", PCI_BUS_NUM(ivrs_alias), PCI_SLOT(ivrs_alias), - PCI_FUNC(ivrs_alias), dev_name(dev), pdev->vendor, pdev->device, + PCI_FUNC(ivrs_alias), pdev->vendor, pdev->device, PCI_BUS_NUM(pci_alias), PCI_SLOT(pci_alias), PCI_FUNC(pci_alias)); @@ -293,9 +294,8 @@ static u16 get_alias(struct device *dev) if (pci_alias == devid && PCI_BUS_NUM(ivrs_alias) == pdev->bus->number) { pci_add_dma_alias(pdev, ivrs_alias & 0xff); - pr_info("Added PCI DMA alias %02x.%d for %s\n", - PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias), - dev_name(dev)); + pci_info(pdev, "Added PCI DMA alias %02x.%d\n", + PCI_SLOT(ivrs_alias), PCI_FUNC(ivrs_alias)); } return ivrs_alias; @@ -545,7 +545,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, dev_data = get_dev_data(&pdev->dev); if (dev_data && __ratelimit(&dev_data->rs)) { - dev_err(&pdev->dev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", + pci_err(pdev, "Event logged [IO_PAGE_FAULT domain=0x%04x address=0x%llx flags=0x%04x]\n", domain_id, address, flags); } else if (printk_ratelimit()) { pr_err("Event logged [IO_PAGE_FAULT device=%02x:%02x.%x domain=0x%04x address=0x%llx flags=0x%04x]\n", @@ -2251,8 +2251,7 @@ static int amd_iommu_add_device(struct device *dev) ret = iommu_init_device(dev); if (ret) { if (ret != -ENOTSUPP) - pr_err("Failed to initialize device %s - trying to proceed anyway\n", - dev_name(dev)); + dev_err(dev, "Failed to initialize - trying to proceed anyway\n"); iommu_ignore_device(dev); dev->dma_ops = NULL; @@ -2605,8 +2604,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist, return nelems; out_unmap: - pr_err("%s: IOMMU mapping error in %s (io-pages: %d) reason: %d\n", - dev_name(dev), __func__, npages, ret); + dev_err(dev, "IOMMU mapping error in map_sg (io-pages: %d reason: %d)\n", + npages, ret); for_each_sg(sglist, s, nelems, i) { int j, pages = iommu_num_pages(sg_phys(s), s->length, PAGE_SIZE); @@ -2800,7 +2799,7 @@ static int init_reserved_iova_ranges(void) IOVA_PFN(r->start), IOVA_PFN(r->end)); if (!val) { - pr_err("Reserve pci-resource range failed\n"); + pci_err(pdev, "Reserve pci-resource range %pR failed\n", r); return -ENOMEM; } } @@ -3170,8 +3169,7 @@ static void amd_iommu_get_resv_regions(struct device *dev, length, prot, IOMMU_RESV_DIRECT); if (!region) { - pr_err("Out of memory allocating dm-regions for %s\n", - dev_name(dev)); + dev_err(dev, "Out of memory allocating dm-regions\n"); return; } list_add_tail(®ion->list, head); diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 66123b911ec8..f773792d77fd 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -18,6 +18,7 @@ */ #define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include #include @@ -1457,8 +1458,7 @@ static void amd_iommu_erratum_746_workaround(struct amd_iommu *iommu) pci_write_config_dword(iommu->dev, 0xf0, 0x90 | (1 << 8)); pci_write_config_dword(iommu->dev, 0xf4, value | 0x4); - pr_info("Applying erratum 746 workaround for IOMMU at %s\n", - dev_name(&iommu->dev->dev)); + pci_info(iommu->dev, "Applying erratum 746 workaround\n"); /* Clear the enable writing bit */ pci_write_config_dword(iommu->dev, 0xf0, 0x90); @@ -1488,8 +1488,7 @@ static void amd_iommu_ats_write_check_workaround(struct amd_iommu *iommu) /* Set L2_DEBUG_3[AtsIgnoreIWDis] = 1 */ iommu_write_l2(iommu, 0x47, value | BIT(0)); - pr_info("Applying ATS write check workaround for IOMMU at %s\n", - dev_name(&iommu->dev->dev)); + pci_info(iommu->dev, "Applying ATS write check workaround\n"); } /* @@ -1665,6 +1664,7 @@ static int iommu_pc_get_set_reg(struct amd_iommu *iommu, u8 bank, u8 cntr, static void init_iommu_perf_ctr(struct amd_iommu *iommu) { + struct pci_dev *pdev = iommu->dev; u64 val = 0xabcd, val2 = 0; if (!iommu_feature(iommu, FEATURE_PC)) @@ -1676,12 +1676,12 @@ static void init_iommu_perf_ctr(struct amd_iommu *iommu) if ((iommu_pc_get_set_reg(iommu, 0, 0, 0, &val, true)) || (iommu_pc_get_set_reg(iommu, 0, 0, 0, &val2, false)) || (val != val2)) { - pr_err("Unable to write to IOMMU perf counter.\n"); + pci_err(pdev, "Unable to write to IOMMU perf counter.\n"); amd_iommu_pc_present = false; return; } - pr_info("IOMMU performance counters supported\n"); + pci_info(pdev, "IOMMU performance counters supported\n"); val = readl(iommu->mmio_base + MMIO_CNTR_CONF_OFFSET); iommu->max_banks = (u8) ((val >> 12) & 0x3f); @@ -1840,14 +1840,14 @@ static void print_iommu_info(void) struct amd_iommu *iommu; for_each_iommu(iommu) { + struct pci_dev *pdev = iommu->dev; int i; - pr_info("Found IOMMU at %s cap 0x%hx\n", - dev_name(&iommu->dev->dev), iommu->cap_ptr); + pci_info(pdev, "Found IOMMU cap 0x%hx\n", iommu->cap_ptr); if (iommu->cap & (1 << IOMMU_CAP_EFR)) { - pr_info("Extended features (%#llx):\n", - iommu->features); + pci_info(pdev, "Extended features (%#llx):\n", + iommu->features); for (i = 0; i < ARRAY_SIZE(feat_str); ++i) { if (iommu_feature(iommu, (1ULL << i))) pr_cont(" %s", feat_str[i]); From 932a6523ce393921f81f09c273aa4adee52391af Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 8 Feb 2019 16:06:00 -0600 Subject: [PATCH 36/60] iommu/vt-d: Use dev_printk() when possible Use dev_printk() when possible so the IOMMU messages are more consistent with other messages related to the device. Signed-off-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 54 +++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 64dab37c0b96..648dc16cca6e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -19,6 +19,7 @@ */ #define pr_fmt(fmt) "DMAR: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) #include #include @@ -1815,7 +1816,7 @@ static int dmar_init_reserved_ranges(void) IOVA_PFN(r->start), IOVA_PFN(r->end)); if (!iova) { - pr_err("Reserve iova failed\n"); + pci_err(pdev, "Reserve iova for %pR failed\n", r); return -ENODEV; } } @@ -2544,8 +2545,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev && dev_is_pci(dev) && sm_supported(iommu)) { ret = intel_pasid_alloc_table(dev); if (ret) { - pr_err("PASID table allocation for %s failed\n", - dev_name(dev)); + dev_err(dev, "PASID table allocation failed\n"); dmar_remove_one_dev_info(domain, dev); return NULL; } @@ -2560,15 +2560,14 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, dev, PASID_RID2PASID); spin_unlock(&iommu->lock); if (ret) { - pr_err("Setup RID2PASID for %s failed\n", - dev_name(dev)); + dev_err(dev, "Setup RID2PASID failed\n"); dmar_remove_one_dev_info(domain, dev); return NULL; } } if (dev && domain_context_mapping(domain, dev)) { - pr_err("Domain context map for %s failed\n", dev_name(dev)); + dev_err(dev, "Domain context map failed\n"); dmar_remove_one_dev_info(domain, dev); return NULL; } @@ -2723,13 +2722,12 @@ static int domain_prepare_identity_map(struct device *dev, range which is reserved in E820, so which didn't get set up to start with in si_domain */ if (domain == si_domain && hw_pass_through) { - pr_warn("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", - dev_name(dev), start, end); + dev_warn(dev, "Ignoring identity map for HW passthrough [0x%Lx - 0x%Lx]\n", + start, end); return 0; } - pr_info("Setting identity map for device %s [0x%Lx - 0x%Lx]\n", - dev_name(dev), start, end); + dev_info(dev, "Setting identity map [0x%Lx - 0x%Lx]\n", start, end); if (end < start) { WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" @@ -3016,8 +3014,8 @@ static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw ret = domain_add_dev_info(si_domain, dev); if (!ret) - pr_info("%s identity mapping for device %s\n", - hw ? "Hardware" : "Software", dev_name(dev)); + dev_info(dev, "%s identity mapping\n", + hw ? "Hardware" : "Software"); else if (ret == -ENODEV) /* device not associated with an iommu */ ret = 0; @@ -3550,8 +3548,7 @@ static unsigned long intel_alloc_iova(struct device *dev, iova_pfn = alloc_iova_fast(&domain->iovad, nrpages, IOVA_PFN(dma_mask), true); if (unlikely(!iova_pfn)) { - pr_err("Allocating %ld-page iova for %s failed", - nrpages, dev_name(dev)); + dev_err(dev, "Allocating %ld-page iova failed", nrpages); return 0; } @@ -3599,7 +3596,7 @@ struct dmar_domain *get_valid_domain_for_dev(struct device *dev) out: if (!domain) - pr_err("Allocating domain for %s failed\n", dev_name(dev)); + dev_err(dev, "Allocating domain failed\n"); return domain; @@ -3626,8 +3623,7 @@ static int iommu_no_mapping(struct device *dev) * to non-identity mapping. */ dmar_remove_one_dev_info(si_domain, dev); - pr_info("32bit %s uses non-identity mapping\n", - dev_name(dev)); + dev_info(dev, "32bit DMA uses non-identity mapping\n"); return 0; } } else { @@ -3639,8 +3635,7 @@ static int iommu_no_mapping(struct device *dev) int ret; ret = domain_add_dev_info(si_domain, dev); if (!ret) { - pr_info("64bit %s uses identity mapping\n", - dev_name(dev)); + dev_info(dev, "64bit DMA uses identity mapping\n"); return 1; } } @@ -3703,8 +3698,8 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, error: if (iova_pfn) free_iova_fast(&domain->iovad, iova_pfn, dma_to_mm_pfn(size)); - pr_err("Device %s request: %zx@%llx dir %d --- failed\n", - dev_name(dev), size, (unsigned long long)paddr, dir); + dev_err(dev, "Device request: %zx@%llx dir %d --- failed\n", + size, (unsigned long long)paddr, dir); return DMA_MAPPING_ERROR; } @@ -3747,8 +3742,7 @@ static void intel_unmap(struct device *dev, dma_addr_t dev_addr, size_t size) start_pfn = mm_to_dma_pfn(iova_pfn); last_pfn = start_pfn + nrpages - 1; - pr_debug("Device %s unmapping: pfn %lx-%lx\n", - dev_name(dev), start_pfn, last_pfn); + dev_dbg(dev, "Device unmapping: pfn %lx-%lx\n", start_pfn, last_pfn); freelist = domain_unmap(domain, start_pfn, last_pfn); @@ -5105,9 +5099,9 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, addr_width = cap_mgaw(iommu->cap); if (dmar_domain->max_addr > (1LL << addr_width)) { - pr_err("%s: iommu width (%d) is not " - "sufficient for the mapped address (%llx)\n", - __func__, addr_width, dmar_domain->max_addr); + dev_err(dev, "%s: iommu width (%d) is not " + "sufficient for the mapped address (%llx)\n", + __func__, addr_width, dmar_domain->max_addr); return -EFAULT; } dmar_domain->gaw = addr_width; @@ -5408,7 +5402,7 @@ const struct iommu_ops intel_iommu_ops = { static void quirk_iommu_g4x_gfx(struct pci_dev *dev) { /* G4x/GM45 integrated gfx dmar support is totally busted. */ - pr_info("Disabling IOMMU for graphics on this chipset\n"); + pci_info(dev, "Disabling IOMMU for graphics on this chipset\n"); dmar_map_gfx = 0; } @@ -5426,7 +5420,7 @@ static void quirk_iommu_rwbf(struct pci_dev *dev) * Mobile 4 Series Chipset neglects to set RWBF capability, * but needs it. Same seems to hold for the desktop versions. */ - pr_info("Forcing write-buffer flush capability\n"); + pci_info(dev, "Forcing write-buffer flush capability\n"); rwbf_quirk = 1; } @@ -5456,11 +5450,11 @@ static void quirk_calpella_no_shadow_gtt(struct pci_dev *dev) return; if (!(ggc & GGC_MEMORY_VT_ENABLED)) { - pr_info("BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); + pci_info(dev, "BIOS has allocated no shadow GTT; disabling IOMMU for graphics\n"); dmar_map_gfx = 0; } else if (dmar_map_gfx) { /* we have to ensure the gfx device is idle before we flush */ - pr_info("Disabling batched IOTLB flush on Ironlake\n"); + pci_info(dev, "Disabling batched IOTLB flush on Ironlake\n"); intel_iommu_strict = 1; } } From e083ea5b02114e1a54ebd285ae114d84ff5bea01 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 8 Feb 2019 16:06:08 -0600 Subject: [PATCH 37/60] iommu/vt-d: Remove unnecessary local variable initializations A local variable initialization is a hint that the variable will be used in an unusual way. If the initialization is unnecessary, that hint becomes a distraction. Remove unnecessary initializations. No functional change intended. Signed-off-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 648dc16cca6e..a27c0db0b5b1 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -865,7 +865,7 @@ out: static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, unsigned long pfn, int *target_level) { - struct dma_pte *parent, *pte = NULL; + struct dma_pte *parent, *pte; int level = agaw_to_level(domain->agaw); int offset; @@ -922,7 +922,7 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, unsigned long pfn, int level, int *large_page) { - struct dma_pte *parent, *pte = NULL; + struct dma_pte *parent, *pte; int total = agaw_to_level(domain->agaw); int offset; @@ -954,7 +954,7 @@ static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - unsigned int large_page = 1; + unsigned int large_page; struct dma_pte *first_pte, *pte; BUG_ON(!domain_pfn_supported(domain, start_pfn)); @@ -1132,7 +1132,7 @@ static struct page *domain_unmap(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { - struct page *freelist = NULL; + struct page *freelist; BUG_ON(!domain_pfn_supported(domain, start_pfn)); BUG_ON(!domain_pfn_supported(domain, last_pfn)); @@ -1763,7 +1763,7 @@ static int domain_attach_iommu(struct dmar_domain *domain, static int domain_detach_iommu(struct dmar_domain *domain, struct intel_iommu *iommu) { - int num, count = INT_MAX; + int num, count; assert_spin_locked(&device_domain_lock); assert_spin_locked(&iommu->lock); @@ -1902,7 +1902,7 @@ static int domain_init(struct dmar_domain *domain, struct intel_iommu *iommu, static void domain_exit(struct dmar_domain *domain) { - struct page *freelist = NULL; + struct page *freelist; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -2583,7 +2583,7 @@ static int get_last_alias(struct pci_dev *pdev, u16 alias, void *opaque) static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) { - struct device_domain_info *info = NULL; + struct device_domain_info *info; struct dmar_domain *domain = NULL; struct intel_iommu *iommu; u16 dma_alias; @@ -2807,7 +2807,7 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width); static int __init si_domain_init(int hw) { - int nid, ret = 0; + int nid, ret; si_domain = alloc_domain(DOMAIN_FLAG_STATIC_IDENTITY); if (!si_domain) @@ -2931,7 +2931,6 @@ static bool device_is_rmrr_locked(struct device *dev) static int iommu_should_identity_map(struct device *dev, int startup) { - if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); @@ -3527,7 +3526,7 @@ static unsigned long intel_alloc_iova(struct device *dev, struct dmar_domain *domain, unsigned long nrpages, uint64_t dma_mask) { - unsigned long iova_pfn = 0; + unsigned long iova_pfn; /* Restrict dma_mask to the width that the iommu can handle */ dma_mask = min_t(uint64_t, DOMAIN_MAX_ADDR(domain->gaw), dma_mask); @@ -4342,7 +4341,7 @@ int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg) static int intel_iommu_add(struct dmar_drhd_unit *dmaru) { - int sp, ret = 0; + int sp, ret; struct intel_iommu *iommu = dmaru->iommu; if (g_iommus[iommu->seq_id]) @@ -4506,7 +4505,7 @@ out: int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { - int ret = 0; + int ret; struct dmar_rmrr_unit *rmrru; struct dmar_atsr_unit *atsru; struct acpi_dmar_atsr *atsr; @@ -4523,7 +4522,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) ((void *)rmrr) + rmrr->header.length, rmrr->segment, rmrru->devices, rmrru->devices_cnt); - if(ret < 0) + if (ret < 0) return ret; } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { dmar_remove_dev_scope(info, rmrr->segment, @@ -4543,7 +4542,7 @@ int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) atsru->devices_cnt); if (ret > 0) break; - else if(ret < 0) + else if (ret < 0) return ret; } else if (info->event == BUS_NOTIFY_REMOVED_DEVICE) { if (dmar_remove_dev_scope(info, atsr->segment, From 717532394c712e044195405685f62522c579e22a Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 8 Feb 2019 16:06:15 -0600 Subject: [PATCH 38/60] iommu/vt-d: Remove unused dmar_remove_one_dev_info() argument domain_remove_dev_info() takes a struct dmar_domain * argument, but doesn't use it. Remove it. No functional change intended. The last use of this argument was removed by 127c761598f7 ("iommu/vt-d: Pass device_domain_info to __dmar_remove_one_dev_info"). Signed-off-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a27c0db0b5b1..e06913460cc3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -343,8 +343,7 @@ static int g_num_of_iommus; static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); -static void dmar_remove_one_dev_info(struct dmar_domain *domain, - struct device *dev); +static void dmar_remove_one_dev_info(struct device *dev); static void __dmar_remove_one_dev_info(struct device_domain_info *info); static void domain_context_clear(struct intel_iommu *iommu, struct device *dev); @@ -2546,7 +2545,7 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, ret = intel_pasid_alloc_table(dev); if (ret) { dev_err(dev, "PASID table allocation failed\n"); - dmar_remove_one_dev_info(domain, dev); + dmar_remove_one_dev_info(dev); return NULL; } @@ -2561,14 +2560,14 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, spin_unlock(&iommu->lock); if (ret) { dev_err(dev, "Setup RID2PASID failed\n"); - dmar_remove_one_dev_info(domain, dev); + dmar_remove_one_dev_info(dev); return NULL; } } if (dev && domain_context_mapping(domain, dev)) { dev_err(dev, "Domain context map failed\n"); - dmar_remove_one_dev_info(domain, dev); + dmar_remove_one_dev_info(dev); return NULL; } @@ -3621,7 +3620,7 @@ static int iommu_no_mapping(struct device *dev) * 32 bit DMA is removed from si_domain and fall back * to non-identity mapping. */ - dmar_remove_one_dev_info(si_domain, dev); + dmar_remove_one_dev_info(dev); dev_info(dev, "32bit DMA uses non-identity mapping\n"); return 0; } @@ -4576,7 +4575,7 @@ static int device_notifier(struct notifier_block *nb, if (!domain) return 0; - dmar_remove_one_dev_info(domain, dev); + dmar_remove_one_dev_info(dev); if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices)) domain_exit(domain); @@ -4989,8 +4988,7 @@ static void __dmar_remove_one_dev_info(struct device_domain_info *info) free_devinfo_mem(info); } -static void dmar_remove_one_dev_info(struct dmar_domain *domain, - struct device *dev) +static void dmar_remove_one_dev_info(struct device *dev) { struct device_domain_info *info; unsigned long flags; @@ -5079,7 +5077,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, old_domain = find_domain(dev); if (old_domain) { rcu_read_lock(); - dmar_remove_one_dev_info(old_domain, dev); + dmar_remove_one_dev_info(dev); rcu_read_unlock(); if (!domain_type_is_vm_or_si(old_domain) && @@ -5126,7 +5124,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, static void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { - dmar_remove_one_dev_info(to_dmar_domain(domain), dev); + dmar_remove_one_dev_info(dev); } static int intel_iommu_map(struct iommu_domain *domain, From f096d6657a7714e62a4799893ac9bdee75be29c8 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Fri, 8 Feb 2019 16:06:22 -0600 Subject: [PATCH 39/60] iommu/vt-d: Remove misleading "domain 0" test from domain_exit() The "Domain 0 is reserved, so dont process it" comment suggests that a NULL pointer corresponds to domain 0. I don't think that's true, and in any case, every caller supplies a non-NULL domain pointer that has already been dereferenced, so the test is unnecessary. Remove the test for a null "domain" pointer. No functional change intended. This null pointer check was added by 5e98c4b1d6e8 ("Allocation and free functions of virtual machine domain"). Signed-off-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e06913460cc3..271455e5026e 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1903,10 +1903,6 @@ static void domain_exit(struct dmar_domain *domain) { struct page *freelist; - /* Domain 0 is reserved, so dont process it */ - if (!domain) - return; - /* Remove associated devices and clear attached or cached domains */ rcu_read_lock(); domain_remove_dev_info(domain); From 032ebd8548c9d05e8d2bdc7a7ec2fe29454b0ad0 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Mon, 28 Jan 2019 17:43:01 +0800 Subject: [PATCH 40/60] iommu/io-pgtable-arm-v7s: Only kmemleak_ignore L2 tables L1 tables are allocated with __get_dma_pages, and therefore already ignored by kmemleak. Without this, the kernel would print this error message on boot, when the first L1 table is allocated: [ 2.810533] kmemleak: Trying to color unknown object at 0xffffffd652388000 as Black [ 2.818190] CPU: 5 PID: 39 Comm: kworker/5:0 Tainted: G S 4.19.16 #8 [ 2.831227] Workqueue: events deferred_probe_work_func [ 2.836353] Call trace: ... [ 2.852532] paint_ptr+0xa0/0xa8 [ 2.855750] kmemleak_ignore+0x38/0x6c [ 2.859490] __arm_v7s_alloc_table+0x168/0x1f4 [ 2.863922] arm_v7s_alloc_pgtable+0x114/0x17c [ 2.868354] alloc_io_pgtable_ops+0x3c/0x78 ... Fixes: e5fc9753b1a8314 ("iommu/io-pgtable: Add ARMv7 short descriptor support") Signed-off-by: Nicolas Boichat Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 75a8273d1ae9..f101afc315ab 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -216,7 +216,8 @@ static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, if (dma != phys) goto out_unmap; } - kmemleak_ignore(table); + if (lvl == 2) + kmemleak_ignore(table); return table; out_unmap: From db04d4a3d72f0c5ee34609559f535d11ab47303c Mon Sep 17 00:00:00 2001 From: Tom Murphy Date: Mon, 11 Feb 2019 15:50:33 +0000 Subject: [PATCH 41/60] iommu: Fix flush_tlb_all typo Fix typo, flush_tlb_all should be flush_iotlb_all. Signed-off-by: Tom Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index e90da6b6f3d1..2b402dcbcf81 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -167,7 +167,7 @@ struct iommu_resv_region { * @detach_dev: detach device from an iommu domain * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain - * @flush_tlb_all: Synchronously flush all hardware TLBs for this domain + * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_range_add: Add a given iova range to the flush queue for this domain * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue From 9ca8261173fc06ba1bd3c52e83eae0a0b7146b3b Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 13 Feb 2019 10:54:45 -0700 Subject: [PATCH 42/60] iommu/vt-d: Add helper to set an IRTE to verify only the bus number The current code uses set_irte_sid() with SVT_VERIFY_BUS and PCI_DEVID to set the SID value. However, this is very confusing because, with SVT_VERIFY_BUS, the SID value is not a PCI devfn address, but the start and end bus numbers to match against. According to the Intel Virtualization Technology for Directed I/O Architecture Specification, Rev. 3.0, page 9-36: The most significant 8-bits of the SID field contains the Startbus#, and the least significant 8-bits of the SID field contains the Endbus#. Interrupt requests that reference this IRTE must have a requester-id whose bus# (most significant 8-bits of requester-id) has a value equal to or within the Startbus# to Endbus# range. So to make this more clear, introduce a new set_irte_verify_bus() that explicitly takes a start bus and end bus so that we can stop abusing the PCI_DEVID macro. This helper function will be called a second time in an subsequent patch. Signed-off-by: Logan Gunthorpe Cc: David Woodhouse Cc: Joerg Roedel Cc: Jacob Pan Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 24d45b07f425..5a55bef8e379 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -294,6 +294,18 @@ static void set_irte_sid(struct irte *irte, unsigned int svt, irte->sid = sid; } +/* + * Set an IRTE to match only the bus number. Interrupt requests that reference + * this IRTE must have a requester-id whose bus number is between or equal + * to the start_bus and end_bus arguments. + */ +static void set_irte_verify_bus(struct irte *irte, unsigned int start_bus, + unsigned int end_bus) +{ + set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, + (start_bus << 8) | end_bus); +} + static int set_ioapic_sid(struct irte *irte, int apic) { int i; @@ -391,9 +403,8 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) * original device. */ if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number) - set_irte_sid(irte, SVT_VERIFY_BUS, SQ_ALL_16, - PCI_DEVID(PCI_BUS_NUM(data.alias), - dev->bus->number)); + set_irte_verify_bus(irte, PCI_BUS_NUM(data.alias), + dev->bus->number); else if (data.pdev->bus->number != dev->bus->number) set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias); else From 3f0c625c6ae714cf4dbab534b4c399f1d720e5db Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 13 Feb 2019 10:54:46 -0700 Subject: [PATCH 43/60] iommu/vt-d: Allow interrupts from the entire bus for aliased devices When a device has multiple aliases that all are from the same bus, we program the IRTE to accept requests from any matching device on the bus. This is so NTB devices which can have requests from multiple bus-devfns can pass MSI interrupts through across the bridge. Signed-off-by: Logan Gunthorpe Cc: David Woodhouse Cc: Joerg Roedel Cc: Jacob Pan Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 5a55bef8e379..2d74641b7f7b 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -368,6 +368,8 @@ static int set_hpet_sid(struct irte *irte, u8 id) struct set_msi_sid_data { struct pci_dev *pdev; u16 alias; + int count; + int busmatch_count; }; static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) @@ -376,6 +378,10 @@ static int set_msi_sid_cb(struct pci_dev *pdev, u16 alias, void *opaque) data->pdev = pdev; data->alias = alias; + data->count++; + + if (PCI_BUS_NUM(alias) == pdev->bus->number) + data->busmatch_count++; return 0; } @@ -387,6 +393,8 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) if (!irte || !dev) return -1; + data.count = 0; + data.busmatch_count = 0; pci_for_each_dma_alias(dev, set_msi_sid_cb, &data); /* @@ -395,6 +403,11 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) * device is the case of a PCIe-to-PCI bridge, where the alias is for * the subordinate bus. In this case we can only verify the bus. * + * If there are multiple aliases, all with the same bus number, + * then all we can do is verify the bus. This is typical in NTB + * hardware which use proxy IDs where the device will generate traffic + * from multiple devfn numbers on the same bus. + * * If the alias device is on a different bus than our source device * then we have a topology based alias, use it. * @@ -405,6 +418,8 @@ static int set_msi_sid(struct irte *irte, struct pci_dev *dev) if (PCI_BUS_NUM(data.alias) != data.pdev->bus->number) set_irte_verify_bus(irte, PCI_BUS_NUM(data.alias), dev->bus->number); + else if (data.count >= 2 && data.busmatch_count == data.count) + set_irte_verify_bus(irte, dev->bus->number, dev->bus->number); else if (data.pdev->bus->number != dev->bus->number) set_irte_sid(irte, SVT_VERIFY_SID_SQ, SQ_ALL_16, data.alias); else From e5567f5f67621877726f99be040af9fbedda37dc Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 19 Feb 2019 11:04:51 -0800 Subject: [PATCH 44/60] PCI/ATS: Add pci_prg_resp_pasid_required() interface. Return the PRG Response PASID Required bit in the Page Request Status Register. As per PCIe spec r4.0, sec 10.5.2.3, if this bit is Set, the device expects a PASID TLP Prefix on PRG Response Messages when the corresponding Page Requests had a PASID TLP Prefix. If Clear, the device does not expect PASID TLP Prefixes on any PRG Response Message, and the device behavior is undefined if the device receives a PRG Response Message with a PASID TLP Prefix. Also the device behavior is undefined if this bit is Set and the device receives a PRG Response Message with no PASID TLP Prefix when the corresponding Page Requests had a PASID TLP Prefix. This function will be used by drivers like IOMMU, if it is required to check the status of the PRG Response PASID Required bit before enabling the PASID support of the device. Cc: Ashok Raj Cc: Jacob Pan Cc: Keith Busch Suggested-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/pci/ats.c | 30 ++++++++++++++++++++++++++++++ include/linux/pci-ats.h | 5 +++++ include/uapi/linux/pci_regs.h | 1 + 3 files changed, 36 insertions(+) diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 5b78f3b1b918..420cd0a578d0 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -368,6 +368,36 @@ int pci_pasid_features(struct pci_dev *pdev) } EXPORT_SYMBOL_GPL(pci_pasid_features); +/** + * pci_prg_resp_pasid_required - Return PRG Response PASID Required bit + * status. + * @pdev: PCI device structure + * + * Returns 1 if PASID is required in PRG Response Message, 0 otherwise. + * + * Even though the PRG response PASID status is read from PRI Status + * Register, since this API will mainly be used by PASID users, this + * function is defined within #ifdef CONFIG_PCI_PASID instead of + * CONFIG_PCI_PRI. + */ +int pci_prg_resp_pasid_required(struct pci_dev *pdev) +{ + u16 status; + int pos; + + pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_PRI); + if (!pos) + return 0; + + pci_read_config_word(pdev, pos + PCI_PRI_STATUS, &status); + + if (status & PCI_PRI_STATUS_PASID) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(pci_prg_resp_pasid_required); + #define PASID_NUMBER_SHIFT 8 #define PASID_NUMBER_MASK (0x1f << PASID_NUMBER_SHIFT) /** diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index 7c4b8e27268c..facfd6a18fe1 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -40,6 +40,7 @@ void pci_disable_pasid(struct pci_dev *pdev); void pci_restore_pasid_state(struct pci_dev *pdev); int pci_pasid_features(struct pci_dev *pdev); int pci_max_pasids(struct pci_dev *pdev); +int pci_prg_resp_pasid_required(struct pci_dev *pdev); #else /* CONFIG_PCI_PASID */ @@ -66,6 +67,10 @@ static inline int pci_max_pasids(struct pci_dev *pdev) return -EINVAL; } +static int pci_prg_resp_pasid_required(struct pci_dev *pdev) +{ + return 0; +} #endif /* CONFIG_PCI_PASID */ diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index e1e9888c85e6..898be572b010 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -880,6 +880,7 @@ #define PCI_PRI_STATUS_RF 0x001 /* Response Failure */ #define PCI_PRI_STATUS_UPRGI 0x002 /* Unexpected PRG index */ #define PCI_PRI_STATUS_STOPPED 0x100 /* PRI Stopped */ +#define PCI_PRI_STATUS_PASID 0x8000 /* PRG Response PASID Required */ #define PCI_PRI_MAX_REQ 0x08 /* PRI max reqs supported */ #define PCI_PRI_ALLOC_REQ 0x0c /* PRI max reqs allowed */ #define PCI_EXT_CAP_PRI_SIZEOF 16 From 1b84778a62ad6d0d97ace457072d93933f306ef5 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 19 Feb 2019 11:04:52 -0800 Subject: [PATCH 45/60] iommu/vt-d: Fix PRI/PASID dependency issue. In Intel IOMMU, if the Page Request Queue (PRQ) is full, it will automatically respond to the device with a success message as a keep alive. And when sending the success message, IOMMU will include PASID in the Response Message when the Page Request has a PASID in Request Message and it does not check against the PRG Response PASID requirement of the device before sending the response. Also, if the device receives the PRG response with PASID when its not expecting it the device behavior is undefined. So if PASID is enabled in the device, enable PRI only if device expects PASID in PRG Response Message. Cc: Ashok Raj Cc: Jacob Pan Cc: Keith Busch Suggested-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 271455e5026e..a341927d9536 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1402,7 +1402,9 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) if (info->pasid_supported && !pci_enable_pasid(pdev, info->pasid_supported & ~1)) info->pasid_enabled = 1; - if (info->pri_supported && !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) + if (info->pri_supported && + (info->pasid_enabled ? pci_prg_resp_pasid_required(pdev) : 1) && + !pci_reset_pri(pdev) && !pci_enable_pri(pdev, 32)) info->pri_enabled = 1; #endif if (!pdev->untrusted && info->ats_supported && From 8c938ddc6df3bbe72809db1be6c9f3af83f5d7a9 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 19 Feb 2019 11:06:09 -0800 Subject: [PATCH 46/60] PCI/ATS: Add pci_ats_page_aligned() interface Return the Page Aligned Request bit in the ATS Capability Register. As per PCIe spec r4.0, sec 10.5.1.2, if the Page Aligned Request bit is set, it indicates the Untranslated Addresses generated by the device are always aligned to a 4096 byte boundary. An IOMMU that can only translate page-aligned addresses can only be used with devices that always produce aligned Untranslated Addresses. This interface will be used by drivers for such IOMMUs to determine whether devices can use the ATS service. Cc: Ashok Raj Cc: Jacob Pan Cc: Keith Busch Suggested-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Bjorn Helgaas Signed-off-by: Joerg Roedel --- drivers/pci/ats.c | 27 +++++++++++++++++++++++++++ include/linux/pci.h | 2 ++ include/uapi/linux/pci_regs.h | 1 + 3 files changed, 30 insertions(+) diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c index 420cd0a578d0..97c08146534a 100644 --- a/drivers/pci/ats.c +++ b/drivers/pci/ats.c @@ -142,6 +142,33 @@ int pci_ats_queue_depth(struct pci_dev *dev) } EXPORT_SYMBOL_GPL(pci_ats_queue_depth); +/** + * pci_ats_page_aligned - Return Page Aligned Request bit status. + * @pdev: the PCI device + * + * Returns 1, if the Untranslated Addresses generated by the device + * are always aligned or 0 otherwise. + * + * Per PCIe spec r4.0, sec 10.5.1.2, if the Page Aligned Request bit + * is set, it indicates the Untranslated Addresses generated by the + * device are always aligned to a 4096 byte boundary. + */ +int pci_ats_page_aligned(struct pci_dev *pdev) +{ + u16 cap; + + if (!pdev->ats_cap) + return 0; + + pci_read_config_word(pdev, pdev->ats_cap + PCI_ATS_CAP, &cap); + + if (cap & PCI_ATS_CAP_PAGE_ALIGNED) + return 1; + + return 0; +} +EXPORT_SYMBOL_GPL(pci_ats_page_aligned); + #ifdef CONFIG_PCI_PRI /** * pci_enable_pri - Enable PRI capability diff --git a/include/linux/pci.h b/include/linux/pci.h index 65f1d8c2f082..9724a8c0496b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1524,11 +1524,13 @@ void pci_ats_init(struct pci_dev *dev); int pci_enable_ats(struct pci_dev *dev, int ps); void pci_disable_ats(struct pci_dev *dev); int pci_ats_queue_depth(struct pci_dev *dev); +int pci_ats_page_aligned(struct pci_dev *dev); #else static inline void pci_ats_init(struct pci_dev *d) { } static inline int pci_enable_ats(struct pci_dev *d, int ps) { return -ENODEV; } static inline void pci_disable_ats(struct pci_dev *d) { } static inline int pci_ats_queue_depth(struct pci_dev *d) { return -ENODEV; } +static inline int pci_ats_page_aligned(struct pci_dev *dev) { return 0; } #endif #ifdef CONFIG_PCIE_PTM diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h index 898be572b010..5c98133f2c94 100644 --- a/include/uapi/linux/pci_regs.h +++ b/include/uapi/linux/pci_regs.h @@ -866,6 +866,7 @@ #define PCI_ATS_CAP 0x04 /* ATS Capability Register */ #define PCI_ATS_CAP_QDEP(x) ((x) & 0x1f) /* Invalidate Queue Depth */ #define PCI_ATS_MAX_QDEP 32 /* Max Invalidate Queue Depth */ +#define PCI_ATS_CAP_PAGE_ALIGNED 0x0020 /* Page Aligned Request */ #define PCI_ATS_CTRL 0x06 /* ATS Control Register */ #define PCI_ATS_CTRL_ENABLE 0x8000 /* ATS Enable */ #define PCI_ATS_CTRL_STU(x) ((x) & 0x1f) /* Smallest Translation Unit */ From 61363c1474b146114fb4c5e5af2908c8afcf1c5e Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Tue, 19 Feb 2019 11:06:10 -0800 Subject: [PATCH 47/60] iommu/vt-d: Enable ATS only if the device uses page aligned address. As per Intel vt-d specification, Rev 3.0 (section 7.5.1.1, title "Page Request Descriptor"), Intel IOMMU page request descriptor only uses bits[63:12] of the page address. Hence Intel IOMMU driver would only permit devices that advertise they would only send Page Aligned Requests to participate in ATS service. Cc: Ashok Raj Cc: Jacob Pan Cc: Keith Busch Suggested-by: Ashok Raj Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a341927d9536..00f46021d690 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1408,6 +1408,7 @@ static void iommu_enable_dev_iotlb(struct device_domain_info *info) info->pri_enabled = 1; #endif if (!pdev->untrusted && info->ats_supported && + pci_ats_page_aligned(pdev) && !pci_enable_ats(pdev, VTD_PAGE_SHIFT)) { info->ats_enabled = 1; domain_update_iotlb(info->domain); From 2405bc162583e1d7c40b13bf078e87428d2dfe4e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Feb 2019 14:00:52 +0100 Subject: [PATCH 48/60] iommu: Document iommu_ops.iotlb_sync_map() Add missing kerneldoc for iommu_ops.iotlb_sync_map(). Fixes: 1d7ae53b152dbc5b ("iommu: Introduce iotlb_sync_map callback") Signed-off-by: Geert Uytterhoeven Reviewed-by: Dmitry Osipenko Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 1 + 1 file changed, 1 insertion(+) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2b402dcbcf81..28ad97801032 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -169,6 +169,7 @@ struct iommu_resv_region { * @unmap: unmap a physically contiguous memory region from an iommu domain * @flush_iotlb_all: Synchronously flush all hardware TLBs for this domain * @iotlb_range_add: Add a given iova range to the flush queue for this domain + * @iotlb_sync_map: Sync mappings created recently using @map to the hardware * @iotlb_sync: Flush all queued ranges from the hardware TLBs and empty flush * queue * @iova_to_phys: translate iova to physical address From a7055d572c51338bed8673331ead6759cae6b70b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Feb 2019 14:00:53 +0100 Subject: [PATCH 49/60] iommu: Document iommu_ops.is_attach_deferred() Add missing kerneldoc for iommu_ops.is_attach_deferred(). Fixes: e01d1913b0d08171 ("iommu: Add is_attach_deferred call-back to iommu-ops") Signed-off-by: Geert Uytterhoeven Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 28ad97801032..41fa7958592d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -184,6 +184,8 @@ struct iommu_resv_region { * @domain_window_enable: Configure and enable a particular window for a domain * @domain_window_disable: Disable a particular window for a domain * @of_xlate: add OF master IDs to iommu grouping + * @is_attach_deferred: Check if domain attach should be deferred from iommu + * driver init to device driver init (default no) * @pgsize_bitmap: bitmap of all possible supported page sizes */ struct iommu_ops { From 18b3af4492a0aa6046b86d712f6ba4cbb66100fb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 20 Feb 2019 14:05:05 +0100 Subject: [PATCH 50/60] iommu: Fix IOMMU debugfs fallout A change made in the final version of IOMMU debugfs support replaced the public function iommu_debugfs_new_driver_dir() by the public dentry iommu_debugfs_dir in , but forgot to update both the implementation in iommu-debugfs.c, and the patch description. Fix this by exporting iommu_debugfs_dir, and removing the reference to and implementation of iommu_debugfs_new_driver_dir(). Fixes: bad614b24293ae46 ("iommu: Enable debugfs exposure of IOMMU driver internals") Signed-off-by: Geert Uytterhoeven Acked-by: Gary R Hook Signed-off-by: Joerg Roedel --- drivers/iommu/iommu-debugfs.c | 23 ++++------------------- 1 file changed, 4 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/iommu-debugfs.c b/drivers/iommu/iommu-debugfs.c index 3b1bf88fd1b0..f03548942096 100644 --- a/drivers/iommu/iommu-debugfs.c +++ b/drivers/iommu/iommu-debugfs.c @@ -12,6 +12,7 @@ #include struct dentry *iommu_debugfs_dir; +EXPORT_SYMBOL_GPL(iommu_debugfs_dir); /** * iommu_debugfs_setup - create the top-level iommu directory in debugfs @@ -23,9 +24,9 @@ struct dentry *iommu_debugfs_dir; * Emit a strong warning at boot time to indicate that this feature is * enabled. * - * This function is called from iommu_init; drivers may then call - * iommu_debugfs_new_driver_dir() to instantiate a vendor-specific - * directory to be used to expose internal data. + * This function is called from iommu_init; drivers may then use + * iommu_debugfs_dir to instantiate a vendor-specific directory to be used + * to expose internal data. */ void iommu_debugfs_setup(void) { @@ -48,19 +49,3 @@ void iommu_debugfs_setup(void) pr_warn("*************************************************************\n"); } } - -/** - * iommu_debugfs_new_driver_dir - create a vendor directory under debugfs/iommu - * @vendor: name of the vendor-specific subdirectory to create - * - * This function is called by an IOMMU driver to create the top-level debugfs - * directory for that driver. - * - * Return: upon success, a pointer to the dentry for the new directory. - * NULL in case of failure. - */ -struct dentry *iommu_debugfs_new_driver_dir(const char *vendor) -{ - return debugfs_create_dir(vendor, iommu_debugfs_dir); -} -EXPORT_SYMBOL_GPL(iommu_debugfs_new_driver_dir); From 117266fd59ddf46e98e36df09326d861738c6180 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Mon, 25 Feb 2019 10:46:36 +0800 Subject: [PATCH 51/60] iommu/vt-d: Check identity map for hot-added devices The Intel IOMMU driver will put devices into a static identity mapped domain during boot if the kernel parameter "iommu=pt" is used. That means the IOMMU hardware will translate a DMA address into the same memory address. Unfortunately, hot-added devices are not subject to this. That results in some devices not working properly after hot added. A quick way to reproduce this issue is to boot a system with iommu=pt and, remove then readd the pci device with echo 1 > /sys/bus/pci/devices/[pci_source_id]/remove echo 1 > /sys/bus/pci/rescan You will find the identity mapped domain was replaced with a normal domain. Cc: Ashok Raj Cc: Jacob Pan Cc: Fenghua Yu Cc: stable@vger.kernel.org Reported-by: Jis Ben Signed-off-by: Lu Baolu Tested-by: James Dong Fixes: 99dcadede42f ('intel-iommu: Support PCIe hot-plug') Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 00f46021d690..c30b73d41bf2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4567,16 +4567,19 @@ static int device_notifier(struct notifier_block *nb, if (iommu_dummy(dev)) return 0; - if (action != BUS_NOTIFY_REMOVED_DEVICE) - return 0; + if (action == BUS_NOTIFY_REMOVED_DEVICE) { + domain = find_domain(dev); + if (!domain) + return 0; - domain = find_domain(dev); - if (!domain) - return 0; - - dmar_remove_one_dev_info(dev); - if (!domain_type_is_vm_or_si(domain) && list_empty(&domain->devices)) - domain_exit(domain); + dmar_remove_one_dev_info(dev); + if (!domain_type_is_vm_or_si(domain) && + list_empty(&domain->devices)) + domain_exit(domain); + } else if (action == BUS_NOTIFY_ADD_DEVICE) { + if (iommu_should_identity_map(dev, 1)) + domain_add_dev_info(si_domain, dev); + } return 0; } From fff42928ade591969836ff49888d063b829ac888 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Wed, 27 Feb 2019 11:26:46 -0800 Subject: [PATCH 52/60] PCI/ATS: Add inline to pci_prg_resp_pasid_required() Fix unused function warning when compiled with CONFIG_PCI_PASID disabled. Fixes: e5567f5f6762 ("PCI/ATS: Add pci_prg_resp_pasid_required() interface.") Signed-off-by: Kuppuswamy Sathyanarayanan Signed-off-by: Joerg Roedel --- include/linux/pci-ats.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/linux/pci-ats.h b/include/linux/pci-ats.h index facfd6a18fe1..1ebb88e7c184 100644 --- a/include/linux/pci-ats.h +++ b/include/linux/pci-ats.h @@ -67,7 +67,7 @@ static inline int pci_max_pasids(struct pci_dev *pdev) return -EINVAL; } -static int pci_prg_resp_pasid_required(struct pci_dev *pdev) +static inline int pci_prg_resp_pasid_required(struct pci_dev *pdev) { return 0; } From 84fdfafab849036b5aefa52824b5cb42e887ef0e Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 27 Feb 2019 22:54:03 +0800 Subject: [PATCH 53/60] x86/Hyper-V: Set x2apic destination mode to physical when x2apic is available Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic, set x2apic destination mode to physcial mode when x2apic is available and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs have 8-bit APIC id. Reviewed-by: Thomas Gleixner Reviewed-by: Michael Kelley Signed-off-by: Lan Tianyu Signed-off-by: Joerg Roedel --- arch/x86/kernel/cpu/mshyperv.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index e81a2db42df7..3fa238a137d2 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -328,6 +328,18 @@ static void __init ms_hyperv_init_platform(void) # ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = hv_smp_prepare_boot_cpu; # endif + + /* + * Hyper-V doesn't provide irq remapping for IO-APIC. To enable x2apic, + * set x2apic destination mode to physcial mode when x2apic is available + * and Hyper-V IOMMU driver makes sure cpus assigned with IO-APIC irqs + * have 8-bit APIC id. + */ +# ifdef CONFIG_X86_X2APIC + if (x2apic_supported()) + x2apic_phys = 1; +# endif + #endif } From 29217a4746835fef35279abbd12c1a1efe83bfca Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 27 Feb 2019 22:54:04 +0800 Subject: [PATCH 54/60] iommu/hyper-v: Add Hyper-V stub IOMMU driver On the bare metal, enabling X2APIC mode requires interrupt remapping function which helps to deliver irq to cpu with 32-bit APIC ID. Hyper-V doesn't provide interrupt remapping function so far and Hyper-V MSI protocol already supports to deliver interrupt to the CPU whose virtual processor index is more than 255. IO-APIC interrupt still has 8-bit APIC ID limitation. This patch is to add Hyper-V stub IOMMU driver in order to enable X2APIC mode successfully in Hyper-V Linux guest. The driver returns X2APIC interrupt remapping capability when X2APIC mode is available. Otherwise, it creates a Hyper-V irq domain to limit IO-APIC interrupts' affinity and make sure cpus assigned with IO-APIC interrupt have 8-bit APIC ID. Define 24 IO-APIC remapping entries because Hyper-V only expose one single IO-APIC and one IO-APIC has 24 pins according IO-APIC spec( https://pdos.csail.mit.edu/6.828/2016/readings/ia32/ioapic.pdf). Reviewed-by: Michael Kelley Signed-off-by: Lan Tianyu Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 9 ++ drivers/iommu/Makefile | 1 + drivers/iommu/hyperv-iommu.c | 196 ++++++++++++++++++++++++++++++++++ drivers/iommu/irq_remapping.c | 3 + drivers/iommu/irq_remapping.h | 1 + 5 files changed, 210 insertions(+) create mode 100644 drivers/iommu/hyperv-iommu.c diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index d9a25715650e..34b066541491 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -435,4 +435,13 @@ config QCOM_IOMMU help Support for IOMMU on certain Qualcomm SoCs. +config HYPERV_IOMMU + bool "Hyper-V x2APIC IRQ Handling" + depends on HYPERV + select IOMMU_API + default HYPERV + help + Stub IOMMU driver to handle IRQs as to allow Hyper-V Linux + guests to run with x2APIC mode enabled. + endif # IOMMU_SUPPORT diff --git a/drivers/iommu/Makefile b/drivers/iommu/Makefile index a158a68c8ea8..8c71a15e986b 100644 --- a/drivers/iommu/Makefile +++ b/drivers/iommu/Makefile @@ -32,3 +32,4 @@ obj-$(CONFIG_EXYNOS_IOMMU) += exynos-iommu.o obj-$(CONFIG_FSL_PAMU) += fsl_pamu.o fsl_pamu_domain.o obj-$(CONFIG_S390_IOMMU) += s390-iommu.o obj-$(CONFIG_QCOM_IOMMU) += qcom_iommu.o +obj-$(CONFIG_HYPERV_IOMMU) += hyperv-iommu.o diff --git a/drivers/iommu/hyperv-iommu.c b/drivers/iommu/hyperv-iommu.c new file mode 100644 index 000000000000..a386b83e0e34 --- /dev/null +++ b/drivers/iommu/hyperv-iommu.c @@ -0,0 +1,196 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * Hyper-V stub IOMMU driver. + * + * Copyright (C) 2019, Microsoft, Inc. + * + * Author : Lan Tianyu + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "irq_remapping.h" + +#ifdef CONFIG_IRQ_REMAP + +/* + * According 82093AA IO-APIC spec , IO APIC has a 24-entry Interrupt + * Redirection Table. Hyper-V exposes one single IO-APIC and so define + * 24 IO APIC remmapping entries. + */ +#define IOAPIC_REMAPPING_ENTRY 24 + +static cpumask_t ioapic_max_cpumask = { CPU_BITS_NONE }; +static struct irq_domain *ioapic_ir_domain; + +static int hyperv_ir_set_affinity(struct irq_data *data, + const struct cpumask *mask, bool force) +{ + struct irq_data *parent = data->parent_data; + struct irq_cfg *cfg = irqd_cfg(data); + struct IO_APIC_route_entry *entry; + int ret; + + /* Return error If new irq affinity is out of ioapic_max_cpumask. */ + if (!cpumask_subset(mask, &ioapic_max_cpumask)) + return -EINVAL; + + ret = parent->chip->irq_set_affinity(parent, mask, force); + if (ret < 0 || ret == IRQ_SET_MASK_OK_DONE) + return ret; + + entry = data->chip_data; + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + send_cleanup_vector(cfg); + + return 0; +} + +static struct irq_chip hyperv_ir_chip = { + .name = "HYPERV-IR", + .irq_ack = apic_ack_irq, + .irq_set_affinity = hyperv_ir_set_affinity, +}; + +static int hyperv_irq_remapping_alloc(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs, + void *arg) +{ + struct irq_alloc_info *info = arg; + struct irq_data *irq_data; + struct irq_desc *desc; + int ret = 0; + + if (!info || info->type != X86_IRQ_ALLOC_TYPE_IOAPIC || nr_irqs > 1) + return -EINVAL; + + ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg); + if (ret < 0) + return ret; + + irq_data = irq_domain_get_irq_data(domain, virq); + if (!irq_data) { + irq_domain_free_irqs_common(domain, virq, nr_irqs); + return -EINVAL; + } + + irq_data->chip = &hyperv_ir_chip; + + /* + * If there is interrupt remapping function of IOMMU, setting irq + * affinity only needs to change IRTE of IOMMU. But Hyper-V doesn't + * support interrupt remapping function, setting irq affinity of IO-APIC + * interrupts still needs to change IO-APIC registers. But ioapic_ + * configure_entry() will ignore value of cfg->vector and cfg-> + * dest_apicid when IO-APIC's parent irq domain is not the vector + * domain.(See ioapic_configure_entry()) In order to setting vector + * and dest_apicid to IO-APIC register, IO-APIC entry pointer is saved + * in the chip_data and hyperv_irq_remapping_activate()/hyperv_ir_set_ + * affinity() set vector and dest_apicid directly into IO-APIC entry. + */ + irq_data->chip_data = info->ioapic_entry; + + /* + * Hypver-V IO APIC irq affinity should be in the scope of + * ioapic_max_cpumask because no irq remapping support. + */ + desc = irq_data_to_desc(irq_data); + cpumask_copy(desc->irq_common_data.affinity, &ioapic_max_cpumask); + + return 0; +} + +static void hyperv_irq_remapping_free(struct irq_domain *domain, + unsigned int virq, unsigned int nr_irqs) +{ + irq_domain_free_irqs_common(domain, virq, nr_irqs); +} + +static int hyperv_irq_remapping_activate(struct irq_domain *domain, + struct irq_data *irq_data, bool reserve) +{ + struct irq_cfg *cfg = irqd_cfg(irq_data); + struct IO_APIC_route_entry *entry = irq_data->chip_data; + + entry->dest = cfg->dest_apicid; + entry->vector = cfg->vector; + + return 0; +} + +static struct irq_domain_ops hyperv_ir_domain_ops = { + .alloc = hyperv_irq_remapping_alloc, + .free = hyperv_irq_remapping_free, + .activate = hyperv_irq_remapping_activate, +}; + +static int __init hyperv_prepare_irq_remapping(void) +{ + struct fwnode_handle *fn; + int i; + + if (!hypervisor_is_type(X86_HYPER_MS_HYPERV) || + !x2apic_supported()) + return -ENODEV; + + fn = irq_domain_alloc_named_id_fwnode("HYPERV-IR", 0); + if (!fn) + return -ENOMEM; + + ioapic_ir_domain = + irq_domain_create_hierarchy(arch_get_ir_parent_domain(), + 0, IOAPIC_REMAPPING_ENTRY, fn, + &hyperv_ir_domain_ops, NULL); + + irq_domain_free_fwnode(fn); + + /* + * Hyper-V doesn't provide irq remapping function for + * IO-APIC and so IO-APIC only accepts 8-bit APIC ID. + * Cpu's APIC ID is read from ACPI MADT table and APIC IDs + * in the MADT table on Hyper-v are sorted monotonic increasingly. + * APIC ID reflects cpu topology. There maybe some APIC ID + * gaps when cpu number in a socket is not power of two. Prepare + * max cpu affinity for IOAPIC irqs. Scan cpu 0-255 and set cpu + * into ioapic_max_cpumask if its APIC ID is less than 256. + */ + for (i = min_t(unsigned int, num_possible_cpus() - 1, 255); i >= 0; i--) + if (cpu_physical_id(i) < 256) + cpumask_set_cpu(i, &ioapic_max_cpumask); + + return 0; +} + +static int __init hyperv_enable_irq_remapping(void) +{ + return IRQ_REMAP_X2APIC_MODE; +} + +static struct irq_domain *hyperv_get_ir_irq_domain(struct irq_alloc_info *info) +{ + if (info->type == X86_IRQ_ALLOC_TYPE_IOAPIC) + return ioapic_ir_domain; + else + return NULL; +} + +struct irq_remap_ops hyperv_irq_remap_ops = { + .prepare = hyperv_prepare_irq_remapping, + .enable = hyperv_enable_irq_remapping, + .get_ir_irq_domain = hyperv_get_ir_irq_domain, +}; + +#endif diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index b94ebd42edd8..81cf2908c531 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -103,6 +103,9 @@ int __init irq_remapping_prepare(void) else if (IS_ENABLED(CONFIG_AMD_IOMMU) && amd_iommu_irq_ops.prepare() == 0) remap_ops = &amd_iommu_irq_ops; + else if (IS_ENABLED(CONFIG_HYPERV_IOMMU) && + hyperv_irq_remap_ops.prepare() == 0) + remap_ops = &hyperv_irq_remap_ops; else return -ENOSYS; diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 0afef6e43be4..f8609e9f1f5d 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -64,6 +64,7 @@ struct irq_remap_ops { extern struct irq_remap_ops intel_irq_remap_ops; extern struct irq_remap_ops amd_iommu_irq_ops; +extern struct irq_remap_ops hyperv_irq_remap_ops; #else /* CONFIG_IRQ_REMAP */ From 32d5860a9e3c98b5043716fff05a7b20b15918f9 Mon Sep 17 00:00:00 2001 From: Lan Tianyu Date: Wed, 27 Feb 2019 22:54:05 +0800 Subject: [PATCH 55/60] MAINTAINERS: Add Hyper-V IOMMU driver into Hyper-V CORE AND DRIVERS scope This patch is to add Hyper-V IOMMU driver file into Hyper-V CORE and DRIVERS scope. Reviewed-by: Michael Kelley Signed-off-by: Lan Tianyu Signed-off-by: Joerg Roedel --- MAINTAINERS | 1 + 1 file changed, 1 insertion(+) diff --git a/MAINTAINERS b/MAINTAINERS index dce5c099f43c..a7432224930b 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7027,6 +7027,7 @@ F: drivers/net/hyperv/ F: drivers/scsi/storvsc_drv.c F: drivers/uio/uio_hv_generic.c F: drivers/video/fbdev/hyperv_fb.c +F: drivers/iommu/hyperv_iommu.c F: net/vmw_vsock/hyperv_transport.c F: include/linux/hyperv.h F: include/uapi/linux/hyperv.h From a947a45f052934a14971a59eb835ce287cb3291b Mon Sep 17 00:00:00 2001 From: Yang Wei Date: Thu, 28 Feb 2019 22:45:01 +0800 Subject: [PATCH 56/60] iommu/mediatek: Fix semicolon code style issue Delete a superfluous semicolon in mtk_iommu_add_device(). Signed-off-by: Yang Wei Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 7e0df67bd3e9..52b01e3a49df 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -474,7 +474,7 @@ static int mtk_iommu_add_device(struct device *dev) return err; } - return iommu_device_link(&data->iommu, dev);; + return iommu_device_link(&data->iommu, dev); } static void mtk_iommu_remove_device(struct device *dev) From d8b8591054575f33237556c32762d54e30774d28 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 1 Mar 2019 11:23:10 +0800 Subject: [PATCH 57/60] iommu/vt-d: Disable ATS support on untrusted devices Commit fb58fdcd295b9 ("iommu/vt-d: Do not enable ATS for untrusted devices") disables ATS support on the devices which have been marked as untrusted. Unfortunately this is not enough to fix the DMA attack vulnerabiltiies because IOMMU driver allows translated requests as long as a device advertises the ATS capability. Hence a malicious peripheral device could use this to bypass IOMMU. This disables the ATS support on untrusted devices by clearing the internal per-device ATS mark. As the result, IOMMU driver will block any translated requests from any device marked as untrusted. Cc: Jacob Pan Cc: Mika Westerberg Suggested-by: Kevin Tian Suggested-by: Ashok Raj Fixes: fb58fdcd295b9 ("iommu/vt-d: Do not enable ATS for untrusted devices") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c30b73d41bf2..64ce780336a8 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2484,7 +2484,8 @@ static struct dmar_domain *dmar_insert_one_dev_info(struct intel_iommu *iommu, if (dev && dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(info->dev); - if (!pci_ats_disabled() && + if (!pdev->untrusted && + !pci_ats_disabled() && ecap_dev_iotlb_support(iommu->ecap) && pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS) && dmar_find_matched_atsr_unit(pdev)) From 41b80db227993394f210f0eb705a326db5f605b6 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 1 Mar 2019 11:23:11 +0800 Subject: [PATCH 58/60] iommu/vt-d: Set context field after value initialized Otherwise, the translation type field of a context entry for a PCI device will always be 0. All translated DMA requests will be blocked by IOMMU. As the result, the PCI devices with PCI ATS (device IOTBL) support won't work as expected. Cc: Ashok Raj Cc: Jacob Pan Suggested-by: Kevin Tian Fixes: 7373a8cc38197 ("iommu/vt-d: Setup context and enable RID2PASID support") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 64ce780336a8..d7f10fd4ac2d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2056,7 +2056,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int agaw; context_set_domain_id(context, did); - context_set_translation_type(context, translation); if (translation != CONTEXT_TT_PASS_THROUGH) { /* @@ -2086,6 +2085,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, */ context_set_address_width(context, iommu->msagaw); } + + context_set_translation_type(context, translation); } context_set_fault_enable(context); From c56cba5daf45d2d091ef1cfe2f1d6a930446687b Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 1 Mar 2019 11:23:12 +0800 Subject: [PATCH 59/60] iommu/vt-d: Fix NULL pointer reference in intel_svm_bind_mm() Intel IOMMU could be turned off with intel_iommu=off. If Intel IOMMU is off, the intel_iommu struct will not be initialized. When device drivers call intel_svm_bind_mm(), the NULL pointer reference will happen there. Add dmar_disabled check to avoid NULL pointer reference. Cc: Ashok Raj Cc: Jacob Pan Reported-by: Dave Jiang Fixes: 2f26e0a9c9860 ("iommu/vt-d: Add basic SVM PASID support") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index c79540deaf00..3a4b09ae8561 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -234,7 +234,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ int pasid_max; int ret; - if (!iommu) + if (!iommu || dmar_disabled) return -EINVAL; if (dev_is_pci(dev)) { From 48739afaac2a5a71a132cd43fd6bd5722680a9b1 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 1 Mar 2019 11:23:13 +0800 Subject: [PATCH 60/60] iommu/vt-d: Get domain ID before clear pasid entry After tearing down a pasid entry, the domain id is used to invalidate the translation caches. Retrieve the domain id from the pasid entry value before clearing the pasid entry. Otherwise, we will always use domain id 0. Cc: Ashok Raj Cc: Jacob Pan Signed-off-by: Liu Yi L Fixes: 6f7db75e1c469 ("iommu/vt-d: Add second level page table interface") Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-pasid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-pasid.c b/drivers/iommu/intel-pasid.c index 53fe5248d8f1..03b12d2ee213 100644 --- a/drivers/iommu/intel-pasid.c +++ b/drivers/iommu/intel-pasid.c @@ -466,8 +466,8 @@ void intel_pasid_tear_down_entry(struct intel_iommu *iommu, if (WARN_ON(!pte)) return; - intel_pasid_clear_entry(dev, pasid); did = pasid_get_domain_id(pte); + intel_pasid_clear_entry(dev, pasid); if (!ecap_coherent(iommu->ecap)) clflush_cache_range(pte, sizeof(*pte));