From 8326c5d2057a06adb082f3547c5d1f75cfd33e57 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Mar 2017 16:23:51 +0200 Subject: [PATCH 01/63] iommu/dmar: Rectify return code handling in detect_intel_iommu() There is inconsistency in return codes across the functions called from detect_intel_iommu(). Make it consistent and propagate return code to the caller. Signed-off-by: Andy Shevchenko Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 36e3f430d265..edcf7410f736 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -551,7 +551,7 @@ static int __init dmar_table_detect(void) status = AE_NOT_FOUND; } - return (ACPI_SUCCESS(status) ? 1 : 0); + return ACPI_SUCCESS(status) ? 0 : -ENOENT; } static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, @@ -891,17 +891,17 @@ int __init detect_intel_iommu(void) down_write(&dmar_global_lock); ret = dmar_table_detect(); - if (ret) - ret = !dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl, - &validate_drhd_cb); - if (ret && !no_iommu && !iommu_detected && !dmar_disabled) { + if (!ret) + ret = dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl, + &validate_drhd_cb); + if (!ret && !no_iommu && !iommu_detected && !dmar_disabled) { iommu_detected = 1; /* Make sure ACS will be enabled */ pci_request_acs(); } #ifdef CONFIG_X86 - if (ret) + if (!ret) x86_init.iommu.iommu_init = intel_iommu_init; #endif @@ -911,10 +911,9 @@ int __init detect_intel_iommu(void) } up_write(&dmar_global_lock); - return ret ? 1 : -ENODEV; + return ret ? ret : 1; } - static void unmap_iommu(struct intel_iommu *iommu) { iounmap(iommu->reg); From 4a8ed2b819402ae450e3c53a1fe5eec59e3f423e Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Mar 2017 16:23:52 +0200 Subject: [PATCH 02/63] iommu/dmar: Return directly from a loop in dmar_dev_scope_status() There is no need to have a temporary variable. Signed-off-by: Andy Shevchenko Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index edcf7410f736..71d774f1d406 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -557,11 +557,10 @@ static int __init dmar_table_detect(void) static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, size_t len, struct dmar_res_callback *cb) { - int ret = 0; struct acpi_dmar_header *iter, *next; struct acpi_dmar_header *end = ((void *)start) + len; - for (iter = start; iter < end && ret == 0; iter = next) { + for (iter = start; iter < end; iter = next) { next = (void *)iter + iter->length; if (iter->length == 0) { /* Avoid looping forever on bad ACPI tables */ @@ -570,8 +569,7 @@ static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, } else if (next > end) { /* Avoid passing table end */ pr_warn(FW_BUG "Record passes table end\n"); - ret = -EINVAL; - break; + return -EINVAL; } if (cb->print_entry) @@ -582,15 +580,19 @@ static int dmar_walk_remapping_entries(struct acpi_dmar_header *start, pr_debug("Unknown DMAR structure type %d\n", iter->type); } else if (cb->cb[iter->type]) { + int ret; + ret = cb->cb[iter->type](iter, cb->arg[iter->type]); + if (ret) + return ret; } else if (!cb->ignore_unhandled) { pr_warn("No handler for DMAR structure type %d\n", iter->type); - ret = -EINVAL; + return -EINVAL; } } - return ret; + return 0; } static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar, From 3f6db6591a2decad5f223a7dcfc01d2a3c15e187 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Mar 2017 16:23:53 +0200 Subject: [PATCH 03/63] iommu/dmar: Remove redundant assignment of ret There is no need to assign ret to 0 in some cases. Moreover it might shadow some errors in the future. Remove such assignments. Signed-off-by: Andy Shevchenko Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 71d774f1d406..9a44e20d7d88 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -391,7 +391,7 @@ static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg) { struct acpi_dmar_hardware_unit *drhd; struct dmar_drhd_unit *dmaru; - int ret = 0; + int ret; drhd = (struct acpi_dmar_hardware_unit *)header; dmaru = dmar_find_dmaru(drhd); @@ -609,8 +609,8 @@ static int __init parse_dmar_table(void) { struct acpi_table_dmar *dmar; - int ret = 0; int drhd_count = 0; + int ret; struct dmar_res_callback cb = { .print_entry = true, .ignore_unhandled = true, From f9808079aa7626d71215a71661dbed5c4ff101d2 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Mar 2017 16:23:54 +0200 Subject: [PATCH 04/63] iommu/dmar: Remove redundant ' != 0' when check return code Usual pattern when we check for return code, which might be negative errno, is either (ret) or (!ret). Remove extra ' != 0' from condition. Signed-off-by: Andy Shevchenko Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 9a44e20d7d88..cbf7763d8091 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -311,7 +311,7 @@ static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info) ((void *)drhd) + drhd->header.length, dmaru->segment, dmaru->devices, dmaru->devices_cnt); - if (ret != 0) + if (ret) break; } if (ret >= 0) From 61012985eb132a2fa5e4a3eddbc33528334fa377 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Mar 2017 16:23:55 +0200 Subject: [PATCH 05/63] iommu/vt-d: Use lo_hi_readq() / lo_hi_writeq() There is already helper functions to do 64-bit I/O on 32-bit machines or buses, thus we don't need to reinvent the wheel. Signed-off-by: Andy Shevchenko Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c573a52ae440..485a5b48f038 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -30,6 +30,8 @@ #include #include #include +#include + #include #include @@ -72,24 +74,8 @@ #define OFFSET_STRIDE (9) -#ifdef CONFIG_64BIT #define dmar_readq(a) readq(a) #define dmar_writeq(a,v) writeq(v,a) -#else -static inline u64 dmar_readq(void __iomem *addr) -{ - u32 lo, hi; - lo = readl(addr); - hi = readl(addr + 4); - return (((u64) hi) << 32) + lo; -} - -static inline void dmar_writeq(void __iomem *addr, u64 val) -{ - writel((u32)val, addr); - writel((u32)(val >> 32), addr + 4); -} -#endif #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) #define DMAR_VER_MINOR(v) ((v) & 0x0f) From 938f1bbe35e3a7cb07e1fa7c512e2ef8bb866bdf Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Mar 2017 17:00:17 +0000 Subject: [PATCH 06/63] iommu/dma: Don't reserve PCI I/O windows Even if a host controller's CPU-side MMIO windows into PCI I/O space do happen to leak into PCI memory space such that it might treat them as peer addresses, trying to reserve the corresponding I/O space addresses doesn't do anything to help solve that problem. Stop doing a silly thing. Fixes: fade1ec055dc ("iommu/dma: Avoid PCI host bridge windows") Reviewed-by: Eric Auger Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 48d36ce59efb..1e0983488a8d 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -175,8 +175,7 @@ static void iova_reserve_pci_windows(struct pci_dev *dev, unsigned long lo, hi; resource_list_for_each_entry(window, &bridge->windows) { - if (resource_type(window->res) != IORESOURCE_MEM && - resource_type(window->res) != IORESOURCE_IO) + if (resource_type(window->res) != IORESOURCE_MEM) continue; lo = iova_pfn(iovad, window->res->start - window->offset); From 7c1b058c8b5a310f2f0439aff14e454aa9afe502 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Mar 2017 17:00:18 +0000 Subject: [PATCH 07/63] iommu/dma: Handle IOMMU API reserved regions Now that it's simple to discover the necessary reservations for a given device/IOMMU combination, let's wire up the appropriate handling. Basic reserved regions and direct-mapped regions we simply have to carve out of IOVA space (the IOMMU core having already mapped the latter before attaching the device). For hardware MSI regions, we also pre-populate the cookie with matching msi_pages. That way, irqchip drivers which normally assume MSIs to require mapping at the IOMMU can keep working without having to special-case their iommu_dma_map_msi_msg() hook, or indeed be aware at all of quirks preventing the IOMMU from translating certain addresses. Reviewed-by: Eric Auger Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 76 +++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1e0983488a8d..5787f919f4ec 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -184,6 +184,66 @@ static void iova_reserve_pci_windows(struct pci_dev *dev, } } +static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, + phys_addr_t start, phys_addr_t end) +{ + struct iova_domain *iovad = &cookie->iovad; + struct iommu_dma_msi_page *msi_page; + int i, num_pages; + + start -= iova_offset(iovad, start); + num_pages = iova_align(iovad, end - start) >> iova_shift(iovad); + + msi_page = kcalloc(num_pages, sizeof(*msi_page), GFP_KERNEL); + if (!msi_page) + return -ENOMEM; + + for (i = 0; i < num_pages; i++) { + msi_page[i].phys = start; + msi_page[i].iova = start; + INIT_LIST_HEAD(&msi_page[i].list); + list_add(&msi_page[i].list, &cookie->msi_page_list); + start += iovad->granule; + } + + return 0; +} + +static int iova_reserve_iommu_regions(struct device *dev, + struct iommu_domain *domain) +{ + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + struct iommu_resv_region *region; + LIST_HEAD(resv_regions); + int ret = 0; + + if (dev_is_pci(dev)) + iova_reserve_pci_windows(to_pci_dev(dev), iovad); + + iommu_get_resv_regions(dev, &resv_regions); + list_for_each_entry(region, &resv_regions, list) { + unsigned long lo, hi; + + /* We ARE the software that manages these! */ + if (region->type == IOMMU_RESV_SW_MSI) + continue; + + lo = iova_pfn(iovad, region->start); + hi = iova_pfn(iovad, region->start + region->length - 1); + reserve_iova(iovad, lo, hi); + + if (region->type == IOMMU_RESV_MSI) + ret = cookie_init_hw_msi_region(cookie, region->start, + region->start + region->length); + if (ret) + break; + } + iommu_put_resv_regions(dev, &resv_regions); + + return ret; +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -202,7 +262,6 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; unsigned long order, base_pfn, end_pfn; - bool pci = dev && dev_is_pci(dev); if (!cookie || cookie->type != IOMMU_DMA_IOVA_COOKIE) return -EINVAL; @@ -232,7 +291,7 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, * leave the cache limit at the top of their range to save an rb_last() * traversal on every allocation. */ - if (pci) + if (dev && dev_is_pci(dev)) end_pfn &= DMA_BIT_MASK(32) >> order; /* start_pfn is always nonzero for an already-initialised domain */ @@ -247,12 +306,15 @@ int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, * area cache limit down for the benefit of the smaller one. */ iovad->dma_32bit_pfn = min(end_pfn, iovad->dma_32bit_pfn); - } else { - init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); - if (pci) - iova_reserve_pci_windows(to_pci_dev(dev), iovad); + + return 0; } - return 0; + + init_iova_domain(iovad, 1UL << order, base_pfn, end_pfn); + if (!dev) + return 0; + + return iova_reserve_iommu_regions(dev, domain); } EXPORT_SYMBOL(iommu_dma_init_domain); From 273df9635385b2156851c7ee49f40658d7bcb29d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Mar 2017 17:00:19 +0000 Subject: [PATCH 08/63] iommu/dma: Make PCI window reservation generic Now that we're applying the IOMMU API reserved regions to our IOVA domains, we shouldn't need to privately special-case PCI windows, or indeed anything else which isn't specific to our iommu-dma layer. However, since those aren't IOMMU-specific either, rather than start duplicating code into IOMMU drivers let's transform the existing function into an iommu_get_resv_regions() helper that they can share. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 2 ++ drivers/iommu/arm-smmu.c | 2 ++ drivers/iommu/dma-iommu.c | 38 +++++++++++++++++++++++++++---------- include/linux/dma-iommu.h | 5 +++++ 4 files changed, 37 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 591bb96047c9..bbd46efbe075 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1893,6 +1893,8 @@ static void arm_smmu_get_resv_regions(struct device *dev, return; list_add_tail(®ion->list, head); + + iommu_dma_get_resv_regions(dev, head); } static void arm_smmu_put_resv_regions(struct device *dev, diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b493c99e17f7..9b33700b7c69 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1613,6 +1613,8 @@ static void arm_smmu_get_resv_regions(struct device *dev, return; list_add_tail(®ion->list, head); + + iommu_dma_get_resv_regions(dev, head); } static void arm_smmu_put_resv_regions(struct device *dev, diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 5787f919f4ec..85652110c8ff 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -167,22 +167,43 @@ void iommu_put_dma_cookie(struct iommu_domain *domain) } EXPORT_SYMBOL(iommu_put_dma_cookie); -static void iova_reserve_pci_windows(struct pci_dev *dev, - struct iova_domain *iovad) +/** + * iommu_dma_get_resv_regions - Reserved region driver helper + * @dev: Device from iommu_get_resv_regions() + * @list: Reserved region list from iommu_get_resv_regions() + * + * IOMMU drivers can use this to implement their .get_resv_regions callback + * for general non-IOMMU-specific reservations. Currently, this covers host + * bridge windows for PCI devices. + */ +void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { - struct pci_host_bridge *bridge = pci_find_host_bridge(dev->bus); + struct pci_host_bridge *bridge; struct resource_entry *window; - unsigned long lo, hi; + if (!dev_is_pci(dev)) + return; + + bridge = pci_find_host_bridge(to_pci_dev(dev)->bus); resource_list_for_each_entry(window, &bridge->windows) { + struct iommu_resv_region *region; + phys_addr_t start; + size_t length; + if (resource_type(window->res) != IORESOURCE_MEM) continue; - lo = iova_pfn(iovad, window->res->start - window->offset); - hi = iova_pfn(iovad, window->res->end - window->offset); - reserve_iova(iovad, lo, hi); + start = window->res->start - window->offset; + length = window->res->end - window->res->start + 1; + region = iommu_alloc_resv_region(start, length, 0, + IOMMU_RESV_RESERVED); + if (!region) + return; + + list_add_tail(®ion->list, list); } } +EXPORT_SYMBOL(iommu_dma_get_resv_regions); static int cookie_init_hw_msi_region(struct iommu_dma_cookie *cookie, phys_addr_t start, phys_addr_t end) @@ -218,9 +239,6 @@ static int iova_reserve_iommu_regions(struct device *dev, LIST_HEAD(resv_regions); int ret = 0; - if (dev_is_pci(dev)) - iova_reserve_pci_windows(to_pci_dev(dev), iovad); - iommu_get_resv_regions(dev, &resv_regions); list_for_each_entry(region, &resv_regions, list) { unsigned long lo, hi; diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 5725c94b1f12..b6635a46fc7c 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -71,6 +71,7 @@ int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); /* The DMA API isn't _quite_ the whole story, though... */ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg); +void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); #else @@ -100,6 +101,10 @@ static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg) { } +static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __KERNEL__ */ #endif /* __DMA_IOMMU_H */ From e75276638c1423d286e425fd29375e5736c7635c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 24 Mar 2017 10:18:44 +0100 Subject: [PATCH 09/63] iommu/exynos: Don't open-code loop unrolling IOMMU domain allocation is not performance critical operation, so remove hand made optimisation of unrolled initialization loop and leave this to the compiler. Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index a7e0821c9967..b83df7196e76 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -741,16 +741,8 @@ static struct iommu_domain *exynos_iommu_domain_alloc(unsigned type) goto err_counter; /* Workaround for System MMU v3.3 to prevent caching 1MiB mapping */ - for (i = 0; i < NUM_LV1ENTRIES; i += 8) { - domain->pgtable[i + 0] = ZERO_LV2LINK; - domain->pgtable[i + 1] = ZERO_LV2LINK; - domain->pgtable[i + 2] = ZERO_LV2LINK; - domain->pgtable[i + 3] = ZERO_LV2LINK; - domain->pgtable[i + 4] = ZERO_LV2LINK; - domain->pgtable[i + 5] = ZERO_LV2LINK; - domain->pgtable[i + 6] = ZERO_LV2LINK; - domain->pgtable[i + 7] = ZERO_LV2LINK; - } + for (i = 0; i < NUM_LV1ENTRIES; i++) + domain->pgtable[i] = ZERO_LV2LINK; handle = dma_map_single(dma_dev, domain->pgtable, LV1TABLE_SIZE, DMA_TO_DEVICE); From d5bf739dc7628a35b334cdb9058750388927760a Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Fri, 24 Mar 2017 10:19:01 +0100 Subject: [PATCH 10/63] iommu/exynos: Use smarter TLB flush method for v5 SYSMMU SYSMMU v5 has dedicated registers to perform TLB flush range operation, so use them instead of looping with FLUSH_ENTRY command. Signed-off-by: Marek Szyprowski Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index b83df7196e76..88b26d3e8c2c 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -171,6 +171,9 @@ static u32 lv2ent_offset(sysmmu_iova_t iova) #define REG_V5_PT_BASE_PFN 0x00C #define REG_V5_MMU_FLUSH_ALL 0x010 #define REG_V5_MMU_FLUSH_ENTRY 0x014 +#define REG_V5_MMU_FLUSH_RANGE 0x018 +#define REG_V5_MMU_FLUSH_START 0x020 +#define REG_V5_MMU_FLUSH_END 0x024 #define REG_V5_INT_STATUS 0x060 #define REG_V5_INT_CLEAR 0x064 #define REG_V5_FAULT_AR_VA 0x070 @@ -319,14 +322,23 @@ static void __sysmmu_tlb_invalidate_entry(struct sysmmu_drvdata *data, { unsigned int i; - for (i = 0; i < num_inv; i++) { - if (MMU_MAJ_VER(data->version) < 5) + if (MMU_MAJ_VER(data->version) < 5) { + for (i = 0; i < num_inv; i++) { writel((iova & SPAGE_MASK) | 1, data->sfrbase + REG_MMU_FLUSH_ENTRY); - else + iova += SPAGE_SIZE; + } + } else { + if (num_inv == 1) { writel((iova & SPAGE_MASK) | 1, data->sfrbase + REG_V5_MMU_FLUSH_ENTRY); - iova += SPAGE_SIZE; + } else { + writel((iova & SPAGE_MASK), + data->sfrbase + REG_V5_MMU_FLUSH_START); + writel((iova & SPAGE_MASK) + (num_inv - 1) * SPAGE_SIZE, + data->sfrbase + REG_V5_MMU_FLUSH_END); + writel(1, data->sfrbase + REG_V5_MMU_FLUSH_RANGE); + } } } From 161b28aae1651aa7ad63ec14753aa8a751154340 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 28 Mar 2017 17:04:52 +0200 Subject: [PATCH 11/63] iommu/vt-d: Make sure IOMMUs are off when intel_iommu=off When booting into a kexec kernel with intel_iommu=off, and the previous kernel had intel_iommu=on, the IOMMU hardware is still enabled and gets not disabled by the new kernel. This causes the boot to fail because DMA is blocked by the hardware. Disable the IOMMUs when we find it enabled in the kexec kernel and boot with intel_iommu=off. Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 238ad3447712..5f08ba13972b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4730,6 +4730,15 @@ static int intel_iommu_cpu_dead(unsigned int cpu) return 0; } +static void intel_disable_iommus(void) +{ + struct intel_iommu *iommu = NULL; + struct dmar_drhd_unit *drhd; + + for_each_iommu(iommu, drhd) + iommu_disable_translation(iommu); +} + static inline struct intel_iommu *dev_to_intel_iommu(struct device *dev) { return container_of(dev, struct intel_iommu, iommu.dev); @@ -4840,8 +4849,15 @@ int __init intel_iommu_init(void) goto out_free_dmar; } - if (no_iommu || dmar_disabled) + if (no_iommu || dmar_disabled) { + /* + * Make sure the IOMMUs are switched off, even when we + * boot into a kexec kernel and the previous kernel left + * them enabled + */ + intel_disable_iommus(); goto out_free_dmar; + } if (list_empty(&dmar_rmrr_units)) pr_info("No RMRR found\n"); From 842fe519f68b4d17ba53c66d69f22a72b1ad08cf Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 31 Mar 2017 15:46:05 +0100 Subject: [PATCH 12/63] iommu/dma: Convert to address-based allocation In preparation for some IOVA allocation improvements, clean up all the explicit struct iova usage such that all our mapping, unmapping and cleanup paths deal exclusively with addresses rather than implementation details. In the process, a few of the things we're touching get renamed for the sake of internal consistency. Reviewed-by: Nate Watterson Tested-by: Nate Watterson Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 119 +++++++++++++++++++++----------------- 1 file changed, 67 insertions(+), 52 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 85652110c8ff..8e0b684da1ba 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -365,12 +365,12 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent, } } -static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size, - dma_addr_t dma_limit, struct device *dev) +static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, + size_t size, dma_addr_t dma_limit, struct device *dev) { struct iova_domain *iovad = cookie_iovad(domain); unsigned long shift = iova_shift(iovad); - unsigned long length = iova_align(iovad, size) >> shift; + unsigned long iova_len = size >> shift; struct iova *iova = NULL; if (domain->geometry.force_aperture) @@ -378,35 +378,42 @@ static struct iova *__alloc_iova(struct iommu_domain *domain, size_t size, /* Try to get PCI devices a SAC address */ if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) - iova = alloc_iova(iovad, length, DMA_BIT_MASK(32) >> shift, + iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift, true); /* * Enforce size-alignment to be safe - there could perhaps be an * attribute to control this per-device, or at least per-domain... */ if (!iova) - iova = alloc_iova(iovad, length, dma_limit >> shift, true); + iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true); - return iova; + return (dma_addr_t)iova->pfn_lo << shift; } -/* The IOVA allocator knows what we mapped, so just unmap whatever that was */ -static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr) +static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, + dma_addr_t iova, size_t size) { - struct iova_domain *iovad = cookie_iovad(domain); - unsigned long shift = iova_shift(iovad); - unsigned long pfn = dma_addr >> shift; - struct iova *iova = find_iova(iovad, pfn); - size_t size; + struct iova_domain *iovad = &cookie->iovad; + struct iova *iova_rbnode; - if (WARN_ON(!iova)) + iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova)); + if (WARN_ON(!iova_rbnode)) return; - size = iova_size(iova) << shift; - size -= iommu_unmap(domain, pfn << shift, size); - /* ...and if we can't, then something is horribly, horribly wrong */ - WARN_ON(size > 0); - __free_iova(iovad, iova); + __free_iova(iovad, iova_rbnode); +} + +static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, + size_t size) +{ + struct iova_domain *iovad = cookie_iovad(domain); + size_t iova_off = iova_offset(iovad, dma_addr); + + dma_addr -= iova_off; + size = iova_align(iovad, size + iova_off); + + WARN_ON(iommu_unmap(domain, dma_addr, size) != size); + iommu_dma_free_iova(domain->iova_cookie, dma_addr, size); } static void __iommu_dma_free_pages(struct page **pages, int count) @@ -488,7 +495,7 @@ static struct page **__iommu_dma_alloc_pages(unsigned int count, void iommu_dma_free(struct device *dev, struct page **pages, size_t size, dma_addr_t *handle) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle); + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), *handle, size); __iommu_dma_free_pages(pages, PAGE_ALIGN(size) >> PAGE_SHIFT); *handle = DMA_ERROR_CODE; } @@ -516,11 +523,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, void (*flush_page)(struct device *, const void *, phys_addr_t)) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iova_domain *iovad = cookie_iovad(domain); - struct iova *iova; + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; struct page **pages; struct sg_table sgt; - dma_addr_t dma_addr; + dma_addr_t iova; unsigned int count, min_size, alloc_sizes = domain->pgsize_bitmap; *handle = DMA_ERROR_CODE; @@ -540,11 +547,11 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, if (!pages) return NULL; - iova = __alloc_iova(domain, size, dev->coherent_dma_mask, dev); + size = iova_align(iovad, size); + iova = iommu_dma_alloc_iova(domain, size, dev->coherent_dma_mask, dev); if (!iova) goto out_free_pages; - size = iova_align(iovad, size); if (sg_alloc_table_from_pages(&sgt, pages, count, 0, size, GFP_KERNEL)) goto out_free_iova; @@ -560,19 +567,18 @@ struct page **iommu_dma_alloc(struct device *dev, size_t size, gfp_t gfp, sg_miter_stop(&miter); } - dma_addr = iova_dma_addr(iovad, iova); - if (iommu_map_sg(domain, dma_addr, sgt.sgl, sgt.orig_nents, prot) + if (iommu_map_sg(domain, iova, sgt.sgl, sgt.orig_nents, prot) < size) goto out_free_sg; - *handle = dma_addr; + *handle = iova; sg_free_table(&sgt); return pages; out_free_sg: sg_free_table(&sgt); out_free_iova: - __free_iova(iovad, iova); + iommu_dma_free_iova(cookie, iova, size); out_free_pages: __iommu_dma_free_pages(pages, count); return NULL; @@ -606,22 +612,22 @@ int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma) static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot) { - dma_addr_t dma_addr; struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iova_domain *iovad = cookie_iovad(domain); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; size_t iova_off = iova_offset(iovad, phys); - size_t len = iova_align(iovad, size + iova_off); - struct iova *iova = __alloc_iova(domain, len, dma_get_mask(dev), dev); + dma_addr_t iova; + size = iova_align(iovad, size + iova_off); + iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) return DMA_ERROR_CODE; - dma_addr = iova_dma_addr(iovad, iova); - if (iommu_map(domain, dma_addr, phys - iova_off, len, prot)) { - __free_iova(iovad, iova); + if (iommu_map(domain, iova, phys - iova_off, size, prot)) { + iommu_dma_free_iova(cookie, iova, size); return DMA_ERROR_CODE; } - return dma_addr + iova_off; + return iova + iova_off; } dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, @@ -633,7 +639,7 @@ dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); } /* @@ -722,10 +728,10 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, int prot) { struct iommu_domain *domain = iommu_get_domain_for_dev(dev); - struct iova_domain *iovad = cookie_iovad(domain); - struct iova *iova; + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; struct scatterlist *s, *prev = NULL; - dma_addr_t dma_addr; + dma_addr_t iova; size_t iova_len = 0; unsigned long mask = dma_get_seg_boundary(dev); int i; @@ -769,7 +775,7 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, prev = s; } - iova = __alloc_iova(domain, iova_len, dma_get_mask(dev), dev); + iova = iommu_dma_alloc_iova(domain, iova_len, dma_get_mask(dev), dev); if (!iova) goto out_restore_sg; @@ -777,14 +783,13 @@ int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, * We'll leave any physical concatenation to the IOMMU driver's * implementation - it knows better than we do. */ - dma_addr = iova_dma_addr(iovad, iova); - if (iommu_map_sg(domain, dma_addr, sg, nents, prot) < iova_len) + if (iommu_map_sg(domain, iova, sg, nents, prot) < iova_len) goto out_free_iova; - return __finalise_sg(dev, sg, nents, dma_addr); + return __finalise_sg(dev, sg, nents, iova); out_free_iova: - __free_iova(iovad, iova); + iommu_dma_free_iova(cookie, iova, iova_len); out_restore_sg: __invalidate_sg(sg, nents); return 0; @@ -793,11 +798,21 @@ out_restore_sg: void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { + dma_addr_t start, end; + struct scatterlist *tmp; + int i; /* * The scatterlist segments are mapped into a single * contiguous IOVA allocation, so this is incredibly easy. */ - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), sg_dma_address(sg)); + start = sg_dma_address(sg); + for_each_sg(sg_next(sg), tmp, nents - 1, i) { + if (sg_dma_len(tmp) == 0) + break; + sg = tmp; + } + end = sg_dma_address(sg) + sg_dma_len(sg); + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), start, end - start); } dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, @@ -810,7 +825,7 @@ dma_addr_t iommu_dma_map_resource(struct device *dev, phys_addr_t phys, void iommu_dma_unmap_resource(struct device *dev, dma_addr_t handle, size_t size, enum dma_data_direction dir, unsigned long attrs) { - __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle); + __iommu_dma_unmap(iommu_get_domain_for_dev(dev), handle, size); } int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) @@ -824,7 +839,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi_page; struct iova_domain *iovad = cookie_iovad(domain); - struct iova *iova; + dma_addr_t iova; int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; size_t size = cookie_msi_granule(cookie); @@ -839,10 +854,10 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, msi_page->phys = msi_addr; if (iovad) { - iova = __alloc_iova(domain, size, dma_get_mask(dev), dev); + iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); if (!iova) goto out_free_page; - msi_page->iova = iova_dma_addr(iovad, iova); + msi_page->iova = iova; } else { msi_page->iova = cookie->msi_iova; cookie->msi_iova += size; @@ -857,7 +872,7 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, out_free_iova: if (iovad) - __free_iova(iovad, iova); + iommu_dma_free_iova(cookie, iova, size); else cookie->msi_iova -= size; out_free_page: From a44e6657585b15eeebf5681bfcc7ce0b002429c2 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 31 Mar 2017 15:46:06 +0100 Subject: [PATCH 13/63] iommu/dma: Clean up MSI IOVA allocation Now that allocation is suitably abstracted, our private alloc/free helpers can drive the trivial MSI cookie allocator directly as well, which lets us clean up its exposed guts from iommu_dma_map_msi_msg() and simplify things quite a bit. Reviewed-by: Nate Watterson Tested-by: Nate Watterson Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 58 +++++++++++++++++---------------------- 1 file changed, 25 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 8e0b684da1ba..1b94beb43036 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -61,15 +61,6 @@ static inline size_t cookie_msi_granule(struct iommu_dma_cookie *cookie) return PAGE_SIZE; } -static inline struct iova_domain *cookie_iovad(struct iommu_domain *domain) -{ - struct iommu_dma_cookie *cookie = domain->iova_cookie; - - if (cookie->type == IOMMU_DMA_IOVA_COOKIE) - return &cookie->iovad; - return NULL; -} - static struct iommu_dma_cookie *cookie_alloc(enum iommu_dma_cookie_type type) { struct iommu_dma_cookie *cookie; @@ -368,11 +359,19 @@ int dma_info_to_prot(enum dma_data_direction dir, bool coherent, static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, size_t size, dma_addr_t dma_limit, struct device *dev) { - struct iova_domain *iovad = cookie_iovad(domain); - unsigned long shift = iova_shift(iovad); - unsigned long iova_len = size >> shift; + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; + unsigned long shift, iova_len; struct iova *iova = NULL; + if (cookie->type == IOMMU_DMA_MSI_COOKIE) { + cookie->msi_iova += size; + return cookie->msi_iova - size; + } + + shift = iova_shift(iovad); + iova_len = size >> shift; + if (domain->geometry.force_aperture) dma_limit = min(dma_limit, domain->geometry.aperture_end); @@ -396,6 +395,12 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, struct iova_domain *iovad = &cookie->iovad; struct iova *iova_rbnode; + /* The MSI case is only ever cleaning up its most recent allocation */ + if (cookie->type == IOMMU_DMA_MSI_COOKIE) { + cookie->msi_iova -= size; + return; + } + iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova)); if (WARN_ON(!iova_rbnode)) return; @@ -406,14 +411,15 @@ static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, size_t size) { - struct iova_domain *iovad = cookie_iovad(domain); + struct iommu_dma_cookie *cookie = domain->iova_cookie; + struct iova_domain *iovad = &cookie->iovad; size_t iova_off = iova_offset(iovad, dma_addr); dma_addr -= iova_off; size = iova_align(iovad, size + iova_off); WARN_ON(iommu_unmap(domain, dma_addr, size) != size); - iommu_dma_free_iova(domain->iova_cookie, dma_addr, size); + iommu_dma_free_iova(cookie, dma_addr, size); } static void __iommu_dma_free_pages(struct page **pages, int count) @@ -838,7 +844,6 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iommu_dma_msi_page *msi_page; - struct iova_domain *iovad = cookie_iovad(domain); dma_addr_t iova; int prot = IOMMU_WRITE | IOMMU_NOEXEC | IOMMU_MMIO; size_t size = cookie_msi_granule(cookie); @@ -852,29 +857,16 @@ static struct iommu_dma_msi_page *iommu_dma_get_msi_page(struct device *dev, if (!msi_page) return NULL; - msi_page->phys = msi_addr; - if (iovad) { - iova = iommu_dma_alloc_iova(domain, size, dma_get_mask(dev), dev); - if (!iova) - goto out_free_page; - msi_page->iova = iova; - } else { - msi_page->iova = cookie->msi_iova; - cookie->msi_iova += size; - } - - if (iommu_map(domain, msi_page->iova, msi_addr, size, prot)) - goto out_free_iova; + iova = __iommu_dma_map(dev, msi_addr, size, prot); + if (iommu_dma_mapping_error(dev, iova)) + goto out_free_page; INIT_LIST_HEAD(&msi_page->list); + msi_page->phys = msi_addr; + msi_page->iova = iova; list_add(&msi_page->list, &cookie->msi_page_list); return msi_page; -out_free_iova: - if (iovad) - iommu_dma_free_iova(cookie, iova, size); - else - cookie->msi_iova -= size; out_free_page: kfree(msi_page); return NULL; From bb65a64c7285e7105c1a6c8a33b37770343a4e96 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 31 Mar 2017 15:46:07 +0100 Subject: [PATCH 14/63] iommu/dma: Plumb in the per-CPU IOVA caches With IOVA allocation suitably tidied up, we are finally free to opt in to the per-CPU caching mechanism. The caching alone can provide a modest improvement over walking the rbtree for weedier systems (iperf3 shows ~10% more ethernet throughput on an ARM Juno r1 constrained to a single 650MHz Cortex-A53), but the real gain will be in sidestepping the rbtree lock contention which larger ARM-based systems with lots of parallel I/O are starting to feel the pain of. Reviewed-by: Nate Watterson Tested-by: Nate Watterson Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 1b94beb43036..8348f366ddd1 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -361,8 +361,7 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, { struct iommu_dma_cookie *cookie = domain->iova_cookie; struct iova_domain *iovad = &cookie->iovad; - unsigned long shift, iova_len; - struct iova *iova = NULL; + unsigned long shift, iova_len, iova = 0; if (cookie->type == IOMMU_DMA_MSI_COOKIE) { cookie->msi_iova += size; @@ -371,41 +370,39 @@ static dma_addr_t iommu_dma_alloc_iova(struct iommu_domain *domain, shift = iova_shift(iovad); iova_len = size >> shift; + /* + * Freeing non-power-of-two-sized allocations back into the IOVA caches + * will come back to bite us badly, so we have to waste a bit of space + * rounding up anything cacheable to make sure that can't happen. The + * order of the unadjusted size will still match upon freeing. + */ + if (iova_len < (1 << (IOVA_RANGE_CACHE_MAX_SIZE - 1))) + iova_len = roundup_pow_of_two(iova_len); if (domain->geometry.force_aperture) dma_limit = min(dma_limit, domain->geometry.aperture_end); /* Try to get PCI devices a SAC address */ if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) - iova = alloc_iova(iovad, iova_len, DMA_BIT_MASK(32) >> shift, - true); - /* - * Enforce size-alignment to be safe - there could perhaps be an - * attribute to control this per-device, or at least per-domain... - */ - if (!iova) - iova = alloc_iova(iovad, iova_len, dma_limit >> shift, true); + iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); - return (dma_addr_t)iova->pfn_lo << shift; + if (!iova) + iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); + + return (dma_addr_t)iova << shift; } static void iommu_dma_free_iova(struct iommu_dma_cookie *cookie, dma_addr_t iova, size_t size) { struct iova_domain *iovad = &cookie->iovad; - struct iova *iova_rbnode; + unsigned long shift = iova_shift(iovad); /* The MSI case is only ever cleaning up its most recent allocation */ - if (cookie->type == IOMMU_DMA_MSI_COOKIE) { + if (cookie->type == IOMMU_DMA_MSI_COOKIE) cookie->msi_iova -= size; - return; - } - - iova_rbnode = find_iova(iovad, iova_pfn(iovad, iova)); - if (WARN_ON(!iova_rbnode)) - return; - - __free_iova(iovad, iova_rbnode); + else + free_iova_fast(iovad, iova >> shift, size >> shift); } static void __iommu_dma_unmap(struct iommu_domain *domain, dma_addr_t dma_addr, From c9d9f2394c6a953585874a1a6cb2ecea853fdcf2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 31 Mar 2017 16:26:03 +0200 Subject: [PATCH 15/63] iommu/rockchip: Make use of 'struct iommu_device' Register hardware IOMMUs seperatly with the iommu-core code and add a sysfs representation of the iommu topology. Tested-by: Heiko Stuebner Signed-off-by: Joerg Roedel --- drivers/iommu/rockchip-iommu.c | 30 ++++++++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 9afcbf79f0b0..36d089025f4c 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -90,6 +90,7 @@ struct rk_iommu { void __iomem **bases; int num_mmu; int irq; + struct iommu_device iommu; struct list_head node; /* entry in rk_iommu_domain.iommus */ struct iommu_domain *domain; /* domain to which iommu is attached */ }; @@ -1032,6 +1033,7 @@ static int rk_iommu_group_set_iommudata(struct iommu_group *group, static int rk_iommu_add_device(struct device *dev) { struct iommu_group *group; + struct rk_iommu *iommu; int ret; if (!rk_iommu_is_dev_iommu_master(dev)) @@ -1054,6 +1056,10 @@ static int rk_iommu_add_device(struct device *dev) if (ret) goto err_remove_device; + iommu = rk_iommu_from_dev(dev); + if (iommu) + iommu_device_link(&iommu->iommu, dev); + iommu_group_put(group); return 0; @@ -1067,9 +1073,15 @@ err_put_group: static void rk_iommu_remove_device(struct device *dev) { + struct rk_iommu *iommu; + if (!rk_iommu_is_dev_iommu_master(dev)) return; + iommu = rk_iommu_from_dev(dev); + if (iommu) + iommu_device_unlink(&iommu->iommu, dev); + iommu_group_remove_device(dev); } @@ -1117,7 +1129,7 @@ static int rk_iommu_probe(struct platform_device *pdev) struct rk_iommu *iommu; struct resource *res; int num_res = pdev->num_resources; - int i; + int err, i; iommu = devm_kzalloc(dev, sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -1150,11 +1162,25 @@ static int rk_iommu_probe(struct platform_device *pdev) return -ENXIO; } - return 0; + err = iommu_device_sysfs_add(&iommu->iommu, dev, NULL, dev_name(dev)); + if (err) + return err; + + iommu_device_set_ops(&iommu->iommu, &rk_iommu_ops); + err = iommu_device_register(&iommu->iommu); + + return err; } static int rk_iommu_remove(struct platform_device *pdev) { + struct rk_iommu *iommu = platform_get_drvdata(pdev); + + if (iommu) { + iommu_device_sysfs_remove(&iommu->iommu); + iommu_device_unregister(&iommu->iommu); + } + return 0; } From 6f66ea099fc2f31d31d6cd39f3b13b23bdeb6196 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 31 Mar 2017 15:12:31 +0200 Subject: [PATCH 16/63] iommu/mediatek: Teach MTK-IOMMUv1 about 'struct iommu_device' Make use of the iommu_device_register() interface. Signed-off-by: Joerg Roedel --- drivers/iommu/mtk_iommu_v1.c | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/mtk_iommu_v1.c b/drivers/iommu/mtk_iommu_v1.c index 19e010083408..a27ef570c328 100644 --- a/drivers/iommu/mtk_iommu_v1.c +++ b/drivers/iommu/mtk_iommu_v1.c @@ -431,9 +431,10 @@ err_release_mapping: static int mtk_iommu_add_device(struct device *dev) { - struct iommu_group *group; struct of_phandle_args iommu_spec; struct of_phandle_iterator it; + struct mtk_iommu_data *data; + struct iommu_group *group; int err; of_for_each_phandle(&it, err, dev->of_node, "iommus", @@ -450,6 +451,9 @@ static int mtk_iommu_add_device(struct device *dev) if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) return -ENODEV; /* Not a iommu client device */ + data = dev->iommu_fwspec->iommu_priv; + iommu_device_link(&data->iommu, dev); + group = iommu_group_get_for_dev(dev); if (IS_ERR(group)) return PTR_ERR(group); @@ -460,9 +464,14 @@ static int mtk_iommu_add_device(struct device *dev) static void mtk_iommu_remove_device(struct device *dev) { + struct mtk_iommu_data *data; + if (!dev->iommu_fwspec || dev->iommu_fwspec->ops != &mtk_iommu_ops) return; + data = dev->iommu_fwspec->iommu_priv; + iommu_device_unlink(&data->iommu, dev); + iommu_group_remove_device(dev); iommu_fwspec_free(dev); } @@ -627,6 +636,17 @@ static int mtk_iommu_probe(struct platform_device *pdev) if (ret) return ret; + ret = iommu_device_sysfs_add(&data->iommu, &pdev->dev, NULL, + dev_name(&pdev->dev)); + if (ret) + return ret; + + iommu_device_set_ops(&data->iommu, &mtk_iommu_ops); + + ret = iommu_device_register(&data->iommu); + if (ret) + return ret; + if (!iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, &mtk_iommu_ops); @@ -637,6 +657,9 @@ static int mtk_iommu_remove(struct platform_device *pdev) { struct mtk_iommu_data *data = platform_get_drvdata(pdev); + iommu_device_sysfs_remove(&data->iommu); + iommu_device_unregister(&data->iommu); + if (iommu_present(&platform_bus_type)) bus_set_iommu(&platform_bus_type, NULL); From adf5e5168bd51c42332ebaa709351fa6ed65ea73 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 27 Jan 2017 12:22:54 +0000 Subject: [PATCH 17/63] iommu: Better document the IOMMU_PRIV flag This is a fairly subtle thing - let's make sure it's described as clearly as possible to avoid potential misunderstandings. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/iommu.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2e4de0deee53..88ec8c6580d3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -32,10 +32,13 @@ #define IOMMU_NOEXEC (1 << 3) #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ /* - * This is to make the IOMMU API setup privileged - * mapppings accessible by the master only at higher - * privileged execution level and inaccessible at - * less privileged levels. + * Where the bus hardware includes a privilege level as part of its access type + * markings, and certain devices are capable of issuing transactions marked as + * either 'supervisor' or 'user', the IOMMU_PRIV flag requests that the other + * given permission flags only apply to accesses at the higher privilege level, + * and that unprivileged transactions should have as little access as possible. + * This would usually imply the same permissions as kernel mappings on the CPU, + * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) From 53c35dce45713d2a554109c21a8cd617d09eba50 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 13 Mar 2017 11:39:01 +0100 Subject: [PATCH 18/63] iommu/arm-smmu: Print message when Cavium erratum 27704 was detected Firmware is responsible for properly enabling smmu workarounds. Print a message for better diagnostics when Cavium erratum 27704 was detected. Reviewed-by: Robin Murphy Signed-off-by: Robert Richter Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index b493c99e17f7..8e16da64e72e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1887,6 +1887,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) atomic_add_return(smmu->num_context_banks, &cavium_smmu_context_count); smmu->cavium_id_base -= smmu->num_context_banks; + dev_notice(smmu->dev, "\tenabling workaround for Cavium erratum 27704\n"); } /* ID2 */ From 125458ab3aefe9cf2f72dcfe7338dc9ad967da0b Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 28 Mar 2017 16:11:12 +0530 Subject: [PATCH 19/63] iommu/arm-smmu: Fix 16-bit ASID configuration 16-bit ASID should be enabled before initializing TTBR0/1, otherwise only LSB 8-bit ASID will be considered. Hence moving configuration of TTBCR register ahead of TTBR0/1 while initializing context bank. Signed-off-by: Sunil Goutham [will: rewrote comment] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 42 ++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 8e16da64e72e..e021b360e315 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -758,6 +758,29 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, } writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); + /* + * TTBCR + * We must write this before the TTBRs, since it determines the + * access behaviour of some fields (in particular, ASID[15:8]). + */ + if (stage1) { + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { + reg = pgtbl_cfg->arm_v7s_cfg.tcr; + reg2 = 0; + } else { + reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; + reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; + reg2 |= TTBCR2_SEP_UPSTREAM; + if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) + reg2 |= TTBCR2_AS; + } + if (smmu->version > ARM_SMMU_V1) + writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2); + } else { + reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; + } + writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); + /* TTBRs */ if (stage1) { u16 asid = ARM_SMMU_CB_ASID(smmu, cfg); @@ -781,25 +804,6 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); } - /* TTBCR */ - if (stage1) { - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { - reg = pgtbl_cfg->arm_v7s_cfg.tcr; - reg2 = 0; - } else { - reg = pgtbl_cfg->arm_lpae_s1_cfg.tcr; - reg2 = pgtbl_cfg->arm_lpae_s1_cfg.tcr >> 32; - reg2 |= TTBCR2_SEP_UPSTREAM; - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) - reg2 |= TTBCR2_AS; - } - if (smmu->version > ARM_SMMU_V1) - writel_relaxed(reg2, cb_base + ARM_SMMU_CB_TTBCR2); - } else { - reg = pgtbl_cfg->arm_lpae_s2_cfg.vtcr; - } - writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBCR); - /* MAIRs (stage-1 only) */ if (stage1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { From 280b683ceaceb7508dc1e9c7e148ea1dcdf36543 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 30 Mar 2017 17:56:29 +0100 Subject: [PATCH 20/63] iommu/arm-smmu: Simplify ASID/VMID handling Calculating ASIDs/VMIDs dynamically from arm_smmu_cfg was a neat trick, but the global uniqueness workaround makes it somewhat more awkward, and means we end up having to pass extra state around in certain cases just to keep a handle on the offset. We already have 16 bits going spare in arm_smmu_cfg; let's just precalculate an ASID/VMID, plop it in there, and tidy up the users accordingly. We'd also need something like this anyway if we ever get near to thinking about SVM, so it's no bad thing. Reviewed-by: Jordan Crouse Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e021b360e315..05f17ebd70f6 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -404,14 +404,15 @@ enum arm_smmu_context_fmt { struct arm_smmu_cfg { u8 cbndx; u8 irptndx; + union { + u16 asid; + u16 vmid; + }; u32 cbar; enum arm_smmu_context_fmt fmt; }; #define INVALID_IRPTNDX 0xff -#define ARM_SMMU_CB_ASID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx) -#define ARM_SMMU_CB_VMID(smmu, cfg) ((u16)(smmu)->cavium_id_base + (cfg)->cbndx + 1) - enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, @@ -603,12 +604,10 @@ static void arm_smmu_tlb_inv_context(void *cookie) if (stage1) { base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); - writel_relaxed(ARM_SMMU_CB_ASID(smmu, cfg), - base + ARM_SMMU_CB_S1_TLBIASID); + writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); } else { base = ARM_SMMU_GR0(smmu); - writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), - base + ARM_SMMU_GR0_TLBIVMID); + writel_relaxed(cfg->vmid, base + ARM_SMMU_GR0_TLBIVMID); } __arm_smmu_tlb_sync(smmu); @@ -629,14 +628,14 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { iova &= ~12UL; - iova |= ARM_SMMU_CB_ASID(smmu, cfg); + iova |= cfg->asid; do { writel_relaxed(iova, reg); iova += granule; } while (size -= granule); } else { iova >>= 12; - iova |= (u64)ARM_SMMU_CB_ASID(smmu, cfg) << 48; + iova |= (u64)cfg->asid << 48; do { writeq_relaxed(iova, reg); iova += granule >> 12; @@ -653,7 +652,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); } else { reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; - writel_relaxed(ARM_SMMU_CB_VMID(smmu, cfg), reg); + writel_relaxed(cfg->vmid, reg); } } @@ -735,7 +734,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, reg = CBA2R_RW64_32BIT; /* 16-bit VMIDs live in CBA2R */ if (smmu->features & ARM_SMMU_FEAT_VMID16) - reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBA2R_VMID_SHIFT; + reg |= cfg->vmid << CBA2R_VMID_SHIFT; writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); } @@ -754,7 +753,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, (CBAR_S1_MEMATTR_WB << CBAR_S1_MEMATTR_SHIFT); } else if (!(smmu->features & ARM_SMMU_FEAT_VMID16)) { /* 8-bit VMIDs live in CBAR */ - reg |= ARM_SMMU_CB_VMID(smmu, cfg) << CBAR_VMID_SHIFT; + reg |= cfg->vmid << CBAR_VMID_SHIFT; } writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); @@ -783,20 +782,18 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, /* TTBRs */ if (stage1) { - u16 asid = ARM_SMMU_CB_ASID(smmu, cfg); - if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH32_S) { reg = pgtbl_cfg->arm_v7s_cfg.ttbr[0]; writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR0); reg = pgtbl_cfg->arm_v7s_cfg.ttbr[1]; writel_relaxed(reg, cb_base + ARM_SMMU_CB_TTBR1); - writel_relaxed(asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); + writel_relaxed(cfg->asid, cb_base + ARM_SMMU_CB_CONTEXTIDR); } else { reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[0]; - reg64 |= (u64)asid << TTBRn_ASID_SHIFT; + reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR0); reg64 = pgtbl_cfg->arm_lpae_s1_cfg.ttbr[1]; - reg64 |= (u64)asid << TTBRn_ASID_SHIFT; + reg64 |= (u64)cfg->asid << TTBRn_ASID_SHIFT; writeq_relaxed(reg64, cb_base + ARM_SMMU_CB_TTBR1); } } else { @@ -945,6 +942,11 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, cfg->irptndx = cfg->cbndx; } + if (smmu_domain->stage == ARM_SMMU_DOMAIN_S2) + cfg->vmid = cfg->cbndx + 1 + smmu->cavium_id_base; + else + cfg->asid = cfg->cbndx + smmu->cavium_id_base; + pgtbl_cfg = (struct io_pgtable_cfg) { .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, From 452107c79035c6654b963e83c4862d9faa195af4 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 30 Mar 2017 17:56:30 +0100 Subject: [PATCH 21/63] iommu/arm-smmu: Tidy up context bank indexing ARM_AMMU_CB() is calculated relative to ARM_SMMU_CB_BASE(), but the latter is never of use on its own, and what we end up with is the same ARM_SMMU_CB_BASE() + ARM_AMMU_CB() expression being duplicated at every callsite. Folding the two together gives us a self-contained context bank accessor which is much more pleasant to work with. Secondly, we might as well simplify CB_BASE itself at the same time. We use the address space size for its own sake precisely once, at probe time, and every other usage is to dynamically calculate CB_BASE over and over and over again. Let's flip things around so that we just maintain the CB_BASE address directly. Reviewed-by: Jordan Crouse Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 31 +++++++++++++++---------------- 1 file changed, 15 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 05f17ebd70f6..27922ffd330c 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -216,8 +216,7 @@ enum arm_smmu_s2cr_privcfg { #define CBA2R_VMID_MASK 0xffff /* Translation context bank */ -#define ARM_SMMU_CB_BASE(smmu) ((smmu)->base + ((smmu)->size >> 1)) -#define ARM_SMMU_CB(smmu, n) ((n) * (1 << (smmu)->pgshift)) +#define ARM_SMMU_CB(smmu, n) ((smmu)->cb_base + ((n) << (smmu)->pgshift)) #define ARM_SMMU_CB_SCTLR 0x0 #define ARM_SMMU_CB_ACTLR 0x4 @@ -344,7 +343,7 @@ struct arm_smmu_device { struct device *dev; void __iomem *base; - unsigned long size; + void __iomem *cb_base; unsigned long pgshift; #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0) @@ -603,7 +602,7 @@ static void arm_smmu_tlb_inv_context(void *cookie) void __iomem *base; if (stage1) { - base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + base = ARM_SMMU_CB(smmu, cfg->cbndx); writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); } else { base = ARM_SMMU_GR0(smmu); @@ -623,7 +622,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, void __iomem *reg; if (stage1) { - reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + reg = ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { @@ -642,7 +641,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, } while (size -= granule); } } else if (smmu->version == ARM_SMMU_V2) { - reg = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + reg = ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; iova >>= 12; @@ -672,7 +671,7 @@ static irqreturn_t arm_smmu_context_fault(int irq, void *dev) struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *cb_base; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); fsr = readl_relaxed(cb_base + ARM_SMMU_CB_FSR); if (!(fsr & FSR_FAULT)) @@ -725,7 +724,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain, gr1_base = ARM_SMMU_GR1(smmu); stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); if (smmu->version > ARM_SMMU_V1) { if (cfg->fmt == ARM_SMMU_CTX_FMT_AARCH64) @@ -1011,7 +1010,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) * Disable the context bank and free the page tables before freeing * it. */ - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); if (cfg->irptndx != INVALID_IRPTNDX) { @@ -1362,7 +1361,7 @@ static phys_addr_t arm_smmu_iova_to_phys_hard(struct iommu_domain *domain, u64 phys; unsigned long va; - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, cfg->cbndx); + cb_base = ARM_SMMU_CB(smmu, cfg->cbndx); /* ATS1 registers can only be written atomically */ va = iova & ~0xfffUL; @@ -1689,7 +1688,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Make sure all context banks are disabled and clear CB_FSR */ for (i = 0; i < smmu->num_context_banks; ++i) { - cb_base = ARM_SMMU_CB_BASE(smmu) + ARM_SMMU_CB(smmu, i); + cb_base = ARM_SMMU_CB(smmu, i); writel_relaxed(0, cb_base + ARM_SMMU_CB_SCTLR); writel_relaxed(FSR_FAULT, cb_base + ARM_SMMU_CB_FSR); /* @@ -1869,11 +1868,11 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* Check for size mismatch of SMMU address space from mapped region */ size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); - size *= 2 << smmu->pgshift; - if (smmu->size != size) + size <<= smmu->pgshift; + if (smmu->cb_base != gr0_base + size) dev_warn(smmu->dev, - "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n", - size, smmu->size); + "SMMU address space size (0x%lx) differs from mapped region size (0x%tx)!\n", + size * 2, (smmu->cb_base - gr0_base) * 2); smmu->num_s2_context_banks = (id >> ID1_NUMS2CB_SHIFT) & ID1_NUMS2CB_MASK; smmu->num_context_banks = (id >> ID1_NUMCB_SHIFT) & ID1_NUMCB_MASK; @@ -2110,7 +2109,7 @@ static int arm_smmu_device_probe(struct platform_device *pdev) smmu->base = devm_ioremap_resource(dev, res); if (IS_ERR(smmu->base)) return PTR_ERR(smmu->base); - smmu->size = resource_size(res); + smmu->cb_base = smmu->base + resource_size(res) / 2; num_irqs = 0; while ((res = platform_get_resource(pdev, IORESOURCE_IRQ, num_irqs))) { From 11febfca2419652f28804879a58ffd32adce2372 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 30 Mar 2017 17:56:31 +0100 Subject: [PATCH 22/63] iommu/arm-smmu: Use per-context TLB sync as appropriate TLB synchronisation typically involves the SMMU blocking all incoming transactions until the TLBs report completion of all outstanding operations. In the common SMMUv2 configuration of a single distributed SMMU serving multiple peripherals, that means that a single unmap request has the potential to bring the hammer down on the entire system if synchronised globally. Since stage 1 contexts, and stage 2 contexts under SMMUv2, offer local sync operations, let's make use of those wherever we can in the hope of minimising global disruption. To that end, rather than add any more branches to the already unwieldy monolithic TLB maintenance ops, break them up into smaller, neater, functions which we can then mix and match as appropriate. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 115 +++++++++++++++++++++++++++------------ 1 file changed, 81 insertions(+), 34 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 27922ffd330c..83f50142e63e 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -237,6 +237,8 @@ enum arm_smmu_s2cr_privcfg { #define ARM_SMMU_CB_S1_TLBIVAL 0x620 #define ARM_SMMU_CB_S2_TLBIIPAS2 0x630 #define ARM_SMMU_CB_S2_TLBIIPAS2L 0x638 +#define ARM_SMMU_CB_TLBSYNC 0x7f0 +#define ARM_SMMU_CB_TLBSTATUS 0x7f4 #define ARM_SMMU_CB_ATS1PR 0x800 #define ARM_SMMU_CB_ATSR 0x8f0 @@ -569,14 +571,13 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) } /* Wait for any pending TLB invalidations to complete */ -static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) +static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, + void __iomem *sync, void __iomem *status) { int count = 0; - void __iomem *gr0_base = ARM_SMMU_GR0(smmu); - writel_relaxed(0, gr0_base + ARM_SMMU_GR0_sTLBGSYNC); - while (readl_relaxed(gr0_base + ARM_SMMU_GR0_sTLBGSTATUS) - & sTLBGSTATUS_GSACTIVE) { + writel_relaxed(0, sync); + while (readl_relaxed(status) & sTLBGSTATUS_GSACTIVE) { cpu_relax(); if (++count == TLB_LOOP_TIMEOUT) { dev_err_ratelimited(smmu->dev, @@ -587,29 +588,49 @@ static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu) } } -static void arm_smmu_tlb_sync(void *cookie) +static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) { - struct arm_smmu_domain *smmu_domain = cookie; - __arm_smmu_tlb_sync(smmu_domain->smmu); + void __iomem *base = ARM_SMMU_GR0(smmu); + + __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_GR0_sTLBGSYNC, + base + ARM_SMMU_GR0_sTLBGSTATUS); } -static void arm_smmu_tlb_inv_context(void *cookie) +static void arm_smmu_tlb_sync_context(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + struct arm_smmu_device *smmu = smmu_domain->smmu; + void __iomem *base = ARM_SMMU_CB(smmu, smmu_domain->cfg.cbndx); + + __arm_smmu_tlb_sync(smmu, base + ARM_SMMU_CB_TLBSYNC, + base + ARM_SMMU_CB_TLBSTATUS); +} + +static void arm_smmu_tlb_sync_vmid(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + + arm_smmu_tlb_sync_global(smmu_domain->smmu); +} + +static void arm_smmu_tlb_inv_context_s1(void *cookie) { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + void __iomem *base = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); + + writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); + arm_smmu_tlb_sync_context(cookie); +} + +static void arm_smmu_tlb_inv_context_s2(void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_device *smmu = smmu_domain->smmu; - bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - void __iomem *base; + void __iomem *base = ARM_SMMU_GR0(smmu); - if (stage1) { - base = ARM_SMMU_CB(smmu, cfg->cbndx); - writel_relaxed(cfg->asid, base + ARM_SMMU_CB_S1_TLBIASID); - } else { - base = ARM_SMMU_GR0(smmu); - writel_relaxed(cfg->vmid, base + ARM_SMMU_GR0_TLBIVMID); - } - - __arm_smmu_tlb_sync(smmu); + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); + arm_smmu_tlb_sync_global(smmu); } static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, @@ -617,12 +638,10 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, { struct arm_smmu_domain *smmu_domain = cookie; struct arm_smmu_cfg *cfg = &smmu_domain->cfg; - struct arm_smmu_device *smmu = smmu_domain->smmu; bool stage1 = cfg->cbar != CBAR_TYPE_S2_TRANS; - void __iomem *reg; + void __iomem *reg = ARM_SMMU_CB(smmu_domain->smmu, cfg->cbndx); if (stage1) { - reg = ARM_SMMU_CB(smmu, cfg->cbndx); reg += leaf ? ARM_SMMU_CB_S1_TLBIVAL : ARM_SMMU_CB_S1_TLBIVA; if (cfg->fmt != ARM_SMMU_CTX_FMT_AARCH64) { @@ -640,8 +659,7 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, iova += granule >> 12; } while (size -= granule); } - } else if (smmu->version == ARM_SMMU_V2) { - reg = ARM_SMMU_CB(smmu, cfg->cbndx); + } else { reg += leaf ? ARM_SMMU_CB_S2_TLBIIPAS2L : ARM_SMMU_CB_S2_TLBIIPAS2; iova >>= 12; @@ -649,16 +667,40 @@ static void arm_smmu_tlb_inv_range_nosync(unsigned long iova, size_t size, smmu_write_atomic_lq(iova, reg); iova += granule >> 12; } while (size -= granule); - } else { - reg = ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_TLBIVMID; - writel_relaxed(cfg->vmid, reg); } } -static const struct iommu_gather_ops arm_smmu_gather_ops = { - .tlb_flush_all = arm_smmu_tlb_inv_context, +/* + * On MMU-401 at least, the cost of firing off multiple TLBIVMIDs appears + * almost negligible, but the benefit of getting the first one in as far ahead + * of the sync as possible is significant, hence we don't just make this a + * no-op and set .tlb_sync to arm_smmu_inv_context_s2() as you might think. + */ +static void arm_smmu_tlb_inv_vmid_nosync(unsigned long iova, size_t size, + size_t granule, bool leaf, void *cookie) +{ + struct arm_smmu_domain *smmu_domain = cookie; + void __iomem *base = ARM_SMMU_GR0(smmu_domain->smmu); + + writel_relaxed(smmu_domain->cfg.vmid, base + ARM_SMMU_GR0_TLBIVMID); +} + +static const struct iommu_gather_ops arm_smmu_s1_tlb_ops = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s1, .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, - .tlb_sync = arm_smmu_tlb_sync, + .tlb_sync = arm_smmu_tlb_sync_context, +}; + +static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v2 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_add_flush = arm_smmu_tlb_inv_range_nosync, + .tlb_sync = arm_smmu_tlb_sync_context, +}; + +static const struct iommu_gather_ops arm_smmu_s2_tlb_ops_v1 = { + .tlb_flush_all = arm_smmu_tlb_inv_context_s2, + .tlb_add_flush = arm_smmu_tlb_inv_vmid_nosync, + .tlb_sync = arm_smmu_tlb_sync_vmid, }; static irqreturn_t arm_smmu_context_fault(int irq, void *dev) @@ -833,6 +875,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, enum io_pgtable_fmt fmt; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_cfg *cfg = &smmu_domain->cfg; + const struct iommu_gather_ops *tlb_ops; mutex_lock(&smmu_domain->init_mutex); if (smmu_domain->smmu) @@ -904,6 +947,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 32UL); oas = min(oas, 32UL); } + tlb_ops = &arm_smmu_s1_tlb_ops; break; case ARM_SMMU_DOMAIN_NESTED: /* @@ -922,12 +966,15 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, ias = min(ias, 40UL); oas = min(oas, 40UL); } + if (smmu->version == ARM_SMMU_V2) + tlb_ops = &arm_smmu_s2_tlb_ops_v2; + else + tlb_ops = &arm_smmu_s2_tlb_ops_v1; break; default: ret = -EINVAL; goto out_unlock; } - ret = __arm_smmu_alloc_bitmap(smmu->context_map, start, smmu->num_context_banks); if (ret < 0) @@ -950,7 +997,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, .pgsize_bitmap = smmu->pgsize_bitmap, .ias = ias, .oas = oas, - .tlb = &arm_smmu_gather_ops, + .tlb = tlb_ops, .iommu_dev = smmu->dev, }; @@ -1734,7 +1781,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) reg |= sCR0_EXIDENABLE; /* Push the button */ - __arm_smmu_tlb_sync(smmu); + arm_smmu_tlb_sync_global(smmu); writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); } From 8513c89300696a68963cc504c21d034c6369e183 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 30 Mar 2017 17:56:32 +0100 Subject: [PATCH 23/63] iommu/arm-smmu: Poll for TLB sync completion more effectively On relatively slow development platforms and software models, the inefficiency of our TLB sync loop tends not to show up - for instance on a Juno r1 board I typically see the TLBI has completed of its own accord by the time we get to the sync, such that the latter finishes instantly. However, on larger systems doing real I/O, it's less realistic for the TLBs to go idle immediately, and at that point falling into the 1MHz polling loop turns out to throw away performance drastically. Let's strike a balance by polling more than once between pauses, such that we have much more chance of catching normal operations completing before committing to the fixed delay, but also backing off exponentially, since if a sync really hasn't completed within one or two "reasonable time" periods, it becomes increasingly unlikely that it ever will. Reviewed-by: Jordan Crouse Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 83f50142e63e..618e133f643a 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -162,6 +162,7 @@ #define ARM_SMMU_GR0_sTLBGSTATUS 0x74 #define sTLBGSTATUS_GSACTIVE (1 << 0) #define TLB_LOOP_TIMEOUT 1000000 /* 1s! */ +#define TLB_SPIN_COUNT 10 /* Stream mapping registers */ #define ARM_SMMU_GR0_SMR(n) (0x800 + ((n) << 2)) @@ -574,18 +575,19 @@ static void __arm_smmu_free_bitmap(unsigned long *map, int idx) static void __arm_smmu_tlb_sync(struct arm_smmu_device *smmu, void __iomem *sync, void __iomem *status) { - int count = 0; + unsigned int spin_cnt, delay; writel_relaxed(0, sync); - while (readl_relaxed(status) & sTLBGSTATUS_GSACTIVE) { - cpu_relax(); - if (++count == TLB_LOOP_TIMEOUT) { - dev_err_ratelimited(smmu->dev, - "TLB sync timed out -- SMMU may be deadlocked\n"); - return; + for (delay = 1; delay < TLB_LOOP_TIMEOUT; delay *= 2) { + for (spin_cnt = TLB_SPIN_COUNT; spin_cnt > 0; spin_cnt--) { + if (!(readl_relaxed(status) & sTLBGSTATUS_GSACTIVE)) + return; + cpu_relax(); } - udelay(1); + udelay(delay); } + dev_err_ratelimited(smmu->dev, + "TLB sync timed out -- SMMU may be deadlocked\n"); } static void arm_smmu_tlb_sync_global(struct arm_smmu_device *smmu) From 56fbf600dd8e2f32a5317437fe310b56719f7d2b Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 31 Mar 2017 12:03:33 +0100 Subject: [PATCH 24/63] iommu/arm-smmu: Add global SMR masking property The current SMR masking support using a 2-cell iommu-specifier is primarily intended to handle individual masters with large and/or complex Stream ID assignments; it quickly gets a bit clunky in other SMR use-cases where we just want to consistently mask out the same part of every Stream ID (e.g. for MMU-500 configurations where the appended TBU number gets in the way unnecessarily). Let's add a new property to allow a single global mask value to better fit the latter situation. Acked-by: Mark Rutland Tested-by: Nipun Gupta Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- .../devicetree/bindings/iommu/arm,smmu.txt | 28 +++++++++++++++++++ drivers/iommu/arm-smmu.c | 4 ++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 6cdf32d037fc..8a6ffce12af5 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -60,6 +60,17 @@ conditions. aliases of secure registers have to be used during SMMU configuration. +- stream-match-mask : For SMMUs supporting stream matching and using + #iommu-cells = <1>, specifies a mask of bits to ignore + when matching stream IDs (e.g. this may be programmed + into the SMRn.MASK field of every stream match register + used). For cases where it is desirable to ignore some + portion of every Stream ID (e.g. for certain MMU-500 + configurations given globally unique input IDs). This + property is not valid for SMMUs using stream indexing, + or using stream matching with #iommu-cells = <2>, and + may be ignored if present in such cases. + ** Deprecated properties: - mmu-masters (deprecated in favour of the generic "iommus" binding) : @@ -109,3 +120,20 @@ conditions. master3 { iommus = <&smmu2 1 0x30>; }; + + + /* ARM MMU-500 with 10-bit stream ID input configuration */ + smmu3: iommu { + compatible = "arm,mmu-500", "arm,smmu-v2"; + ... + #iommu-cells = <1>; + /* always ignore appended 5-bit TBU number */ + stream-match-mask = 0x7c00; + }; + + bus { + /* bus whose child devices emit one unique 10-bit stream + ID each, but may master through multiple SMMU TBUs */ + iommu-map = <0 &smmu3 0 0x400>; + ... + }; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 618e133f643a..6560c4a34b96 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1644,13 +1644,15 @@ out_unlock: static int arm_smmu_of_xlate(struct device *dev, struct of_phandle_args *args) { - u32 fwid = 0; + u32 mask, fwid = 0; if (args->args_count > 0) fwid |= (u16)args->args[0]; if (args->args_count > 1) fwid |= (u16)args->args[1] << SMR_MASK_SHIFT; + else if (!of_property_read_u32(args->np, "stream-match-mask", &mask)) + fwid |= (u16)mask << SMR_MASK_SHIFT; return iommu_fwspec_add_ids(dev, &fwid, 1); } From 0834cc28fa56c65887c614b6c045be2ba06fdcb0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jan 2017 16:28:17 +0000 Subject: [PATCH 25/63] iommu/arm-smmu: Restrict domain attributes to UNMANAGED domains The ARM SMMU drivers provide a DOMAIN_ATTR_NESTING domain attribute, which allows callers of the IOMMU API to request that the page table for a domain is installed at stage-2, if supported by the hardware. Since setting this attribute only makes sense for UNMANAGED domains, this patch returns -ENODEV if the domain_{get,set}_attr operations are called on other domain types. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 6 ++++++ drivers/iommu/arm-smmu.c | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 591bb96047c9..b47a88757c18 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1837,6 +1837,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + switch (attr) { case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); @@ -1852,6 +1855,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + mutex_lock(&smmu_domain->init_mutex); switch (attr) { diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 6560c4a34b96..099215bbff89 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1603,6 +1603,9 @@ static int arm_smmu_domain_get_attr(struct iommu_domain *domain, { struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + switch (attr) { case DOMAIN_ATTR_NESTING: *(int *)data = (smmu_domain->stage == ARM_SMMU_DOMAIN_NESTED); @@ -1618,6 +1621,9 @@ static int arm_smmu_domain_set_attr(struct iommu_domain *domain, int ret = 0; struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); + if (domain->type != IOMMU_DOMAIN_UNMANAGED) + return -EINVAL; + mutex_lock(&smmu_domain->init_mutex); switch (attr) { From 61bc671179f19060be883068b6d3d82ae0b24bc0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jan 2017 16:56:03 +0000 Subject: [PATCH 26/63] iommu/arm-smmu: Install bypass S2CRs for IOMMU_DOMAIN_IDENTITY domains In preparation for allowing the default domain type to be overridden, this patch adds support for IOMMU_DOMAIN_IDENTITY domains to the ARM SMMU driver. An identity domain is created by placing the corresponding S2CR registers into "bypass" mode, which allows transactions to flow through the SMMU without any translation. Reviewed-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 099215bbff89..dbd4998661c6 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -419,6 +419,7 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, ARM_SMMU_DOMAIN_NESTED, + ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { @@ -883,6 +884,12 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, if (smmu_domain->smmu) goto out_unlock; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; + smmu_domain->smmu = smmu; + goto out_unlock; + } + /* * Mapping the requested stage onto what we support is surprisingly * complicated, mainly because the spec allows S1+S2 SMMUs without @@ -1052,7 +1059,7 @@ static void arm_smmu_destroy_domain_context(struct iommu_domain *domain) void __iomem *cb_base; int irq; - if (!smmu) + if (!smmu || domain->type == IOMMU_DOMAIN_IDENTITY) return; /* @@ -1075,7 +1082,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + if (type != IOMMU_DOMAIN_UNMANAGED && + type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY) return NULL; /* * Allocate the domain and initialise some of its data structures. @@ -1304,10 +1313,15 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, { struct arm_smmu_device *smmu = smmu_domain->smmu; struct arm_smmu_s2cr *s2cr = smmu->s2crs; - enum arm_smmu_s2cr_type type = S2CR_TYPE_TRANS; u8 cbndx = smmu_domain->cfg.cbndx; + enum arm_smmu_s2cr_type type; int i, idx; + if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) + type = S2CR_TYPE_BYPASS; + else + type = S2CR_TYPE_TRANS; + for_each_cfg_sme(fwspec, i, idx) { if (type == s2cr[idx].type && cbndx == s2cr[idx].cbndx) continue; From 67560edcd8e5c57eccec4df562abbfc21c17ad75 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 1 Mar 2017 21:11:29 +0000 Subject: [PATCH 27/63] iommu/arm-smmu-v3: Make arm_smmu_install_ste_for_dev return void arm_smmu_install_ste_for_dev cannot fail and always returns 0, however the fact that it returns int means that callers end up implementing redundant error handling code which complicates STE tracking and is never executed. This patch changes the return type of arm_smmu_install_ste_for_dev to void, to make it explicit that it cannot fail. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index b47a88757c18..97be8de3e834 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1579,7 +1579,7 @@ static __le64 *arm_smmu_get_step_for_sid(struct arm_smmu_device *smmu, u32 sid) return step; } -static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) +static void arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) { int i; struct arm_smmu_master_data *master = fwspec->iommu_priv; @@ -1591,8 +1591,6 @@ static int arm_smmu_install_ste_for_dev(struct iommu_fwspec *fwspec) arm_smmu_write_strtab_ent(smmu, sid, step, &master->ste); } - - return 0; } static void arm_smmu_detach_dev(struct device *dev) @@ -1600,8 +1598,7 @@ static void arm_smmu_detach_dev(struct device *dev) struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv; master->ste.bypass = true; - if (arm_smmu_install_ste_for_dev(dev->iommu_fwspec) < 0) - dev_warn(dev, "failed to install bypass STE\n"); + arm_smmu_install_ste_for_dev(dev->iommu_fwspec); } static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) @@ -1653,10 +1650,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ste->s2_cfg = &smmu_domain->s2_cfg; } - ret = arm_smmu_install_ste_for_dev(dev->iommu_fwspec); - if (ret < 0) - ste->valid = false; - + arm_smmu_install_ste_for_dev(dev->iommu_fwspec); out_unlock: mutex_unlock(&smmu_domain->init_mutex); return ret; From beb3c6a066bff1ba412f983cb9d1a42f4cd8f76a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 6 Jan 2017 16:27:30 +0000 Subject: [PATCH 28/63] iommu/arm-smmu-v3: Install bypass STEs for IOMMU_DOMAIN_IDENTITY domains In preparation for allowing the default domain type to be overridden, this patch adds support for IOMMU_DOMAIN_IDENTITY domains to the ARM SMMUv3 driver. An identity domain is created by placing the corresponding stream table entries into "bypass" mode, which allows transactions to flow through the SMMU without any translation. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu-v3.c | 58 +++++++++++++++++++++++-------------- 1 file changed, 37 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 97be8de3e834..803352d78d43 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -554,9 +554,14 @@ struct arm_smmu_s2_cfg { }; struct arm_smmu_strtab_ent { - bool valid; - - bool bypass; /* Overrides s1/s2 config */ + /* + * An STE is "assigned" if the master emitting the corresponding SID + * is attached to a domain. The behaviour of an unassigned STE is + * determined by the disable_bypass parameter, whereas an assigned + * STE behaves according to s1_cfg/s2_cfg, which themselves are + * configured according to the domain type. + */ + bool assigned; struct arm_smmu_s1_cfg *s1_cfg; struct arm_smmu_s2_cfg *s2_cfg; }; @@ -632,6 +637,7 @@ enum arm_smmu_domain_stage { ARM_SMMU_DOMAIN_S1 = 0, ARM_SMMU_DOMAIN_S2, ARM_SMMU_DOMAIN_NESTED, + ARM_SMMU_DOMAIN_BYPASS, }; struct arm_smmu_domain { @@ -1005,9 +1011,9 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, * This is hideously complicated, but we only really care about * three cases at the moment: * - * 1. Invalid (all zero) -> bypass (init) - * 2. Bypass -> translation (attach) - * 3. Translation -> bypass (detach) + * 1. Invalid (all zero) -> bypass/fault (init) + * 2. Bypass/fault -> translation/bypass (attach) + * 3. Translation/bypass -> bypass/fault (detach) * * Given that we can't update the STE atomically and the SMMU * doesn't read the thing in a defined order, that leaves us @@ -1046,11 +1052,15 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, } /* Nuke the existing STE_0 value, as we're going to rewrite it */ - val = ste->valid ? STRTAB_STE_0_V : 0; + val = STRTAB_STE_0_V; + + /* Bypass/fault */ + if (!ste->assigned || !(ste->s1_cfg || ste->s2_cfg)) { + if (!ste->assigned && disable_bypass) + val |= STRTAB_STE_0_CFG_ABORT; + else + val |= STRTAB_STE_0_CFG_BYPASS; - if (ste->bypass) { - val |= disable_bypass ? STRTAB_STE_0_CFG_ABORT - : STRTAB_STE_0_CFG_BYPASS; dst[0] = cpu_to_le64(val); dst[1] = cpu_to_le64(STRTAB_STE_1_SHCFG_INCOMING << STRTAB_STE_1_SHCFG_SHIFT); @@ -1111,10 +1121,7 @@ static void arm_smmu_write_strtab_ent(struct arm_smmu_device *smmu, u32 sid, static void arm_smmu_init_bypass_stes(u64 *strtab, unsigned int nent) { unsigned int i; - struct arm_smmu_strtab_ent ste = { - .valid = true, - .bypass = true, - }; + struct arm_smmu_strtab_ent ste = { .assigned = false }; for (i = 0; i < nent; ++i) { arm_smmu_write_strtab_ent(NULL, -1, strtab, &ste); @@ -1378,7 +1385,9 @@ static struct iommu_domain *arm_smmu_domain_alloc(unsigned type) { struct arm_smmu_domain *smmu_domain; - if (type != IOMMU_DOMAIN_UNMANAGED && type != IOMMU_DOMAIN_DMA) + if (type != IOMMU_DOMAIN_UNMANAGED && + type != IOMMU_DOMAIN_DMA && + type != IOMMU_DOMAIN_IDENTITY) return NULL; /* @@ -1509,6 +1518,11 @@ static int arm_smmu_domain_finalise(struct iommu_domain *domain) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct arm_smmu_device *smmu = smmu_domain->smmu; + if (domain->type == IOMMU_DOMAIN_IDENTITY) { + smmu_domain->stage = ARM_SMMU_DOMAIN_BYPASS; + return 0; + } + /* Restrict the stage to what we can actually support */ if (!(smmu->features & ARM_SMMU_FEAT_TRANS_S1)) smmu_domain->stage = ARM_SMMU_DOMAIN_S2; @@ -1597,7 +1611,7 @@ static void arm_smmu_detach_dev(struct device *dev) { struct arm_smmu_master_data *master = dev->iommu_fwspec->iommu_priv; - master->ste.bypass = true; + master->ste.assigned = false; arm_smmu_install_ste_for_dev(dev->iommu_fwspec); } @@ -1617,7 +1631,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) ste = &master->ste; /* Already attached to a different domain? */ - if (!ste->bypass) + if (ste->assigned) arm_smmu_detach_dev(dev); mutex_lock(&smmu_domain->init_mutex); @@ -1638,10 +1652,12 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out_unlock; } - ste->bypass = false; - ste->valid = true; + ste->assigned = true; - if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { + if (smmu_domain->stage == ARM_SMMU_DOMAIN_BYPASS) { + ste->s1_cfg = NULL; + ste->s2_cfg = NULL; + } else if (smmu_domain->stage == ARM_SMMU_DOMAIN_S1) { ste->s1_cfg = &smmu_domain->s1_cfg; ste->s2_cfg = NULL; arm_smmu_write_ctx_desc(smmu, ste->s1_cfg); @@ -1801,7 +1817,7 @@ static void arm_smmu_remove_device(struct device *dev) master = fwspec->iommu_priv; smmu = master->smmu; - if (master && master->ste.valid) + if (master && master->ste.assigned) arm_smmu_detach_dev(dev); iommu_group_remove_device(dev); iommu_device_unlink(&smmu->iommu, dev); From fccb4e3b8ab0957628abec82675691c72f67003e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 5 Jan 2017 18:38:26 +0000 Subject: [PATCH 29/63] iommu: Allow default domain type to be set on the kernel command line The IOMMU core currently initialises the default domain for each group to IOMMU_DOMAIN_DMA, under the assumption that devices will use IOMMU-backed DMA ops by default. However, in some cases it is desirable for the DMA ops to bypass the IOMMU for performance reasons, reserving use of translation for subsystems such as VFIO that require it for enforcing device isolation. Rather than modify each IOMMU driver to provide different semantics for DMA domains, instead we introduce a command line parameter that can be used to change the type of the default domain. Passthrough can then be specified using "iommu.passthrough=1" on the kernel command line. Signed-off-by: Will Deacon --- .../admin-guide/kernel-parameters.txt | 6 ++++ drivers/iommu/iommu.c | 28 +++++++++++++++++-- 2 files changed, 31 insertions(+), 3 deletions(-) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2ba45caabada..187ac5bc8b40 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1644,6 +1644,12 @@ nobypass [PPC/POWERNV] Disable IOMMU bypass, using IOMMU for PCI devices. + iommu.passthrough= + [ARM64] Configure DMA to bypass the IOMMU by default. + Format: { "0" | "1" } + 0 - Use IOMMU translation for DMA. + 1 - Bypass the IOMMU for DMA. + unset - Use IOMMU translation for DMA. io7= [HW] IO7 for Marvel based alpha systems See comment before marvel_specify_io7 in diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 3b67144dead2..770ba7e7ef4d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -36,6 +36,7 @@ static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); +static unsigned int iommu_def_domain_type = IOMMU_DOMAIN_DMA; struct iommu_callback_data { const struct iommu_ops *ops; @@ -112,6 +113,18 @@ static int __iommu_attach_group(struct iommu_domain *domain, static void __iommu_detach_group(struct iommu_domain *domain, struct iommu_group *group); +static int __init iommu_set_def_domain_type(char *str) +{ + bool pt; + + if (!str || strtobool(str, &pt)) + return -EINVAL; + + iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA; + return 0; +} +early_param("iommu.passthrough", iommu_set_def_domain_type); + static ssize_t iommu_group_attr_show(struct kobject *kobj, struct attribute *__attr, char *buf) { @@ -1015,10 +1028,19 @@ struct iommu_group *iommu_group_get_for_dev(struct device *dev) * IOMMU driver. */ if (!group->default_domain) { - group->default_domain = __iommu_domain_alloc(dev->bus, - IOMMU_DOMAIN_DMA); + struct iommu_domain *dom; + + dom = __iommu_domain_alloc(dev->bus, iommu_def_domain_type); + if (!dom && iommu_def_domain_type != IOMMU_DOMAIN_DMA) { + dev_warn(dev, + "failed to allocate default IOMMU domain of type %u; falling back to IOMMU_DOMAIN_DMA", + iommu_def_domain_type); + dom = __iommu_domain_alloc(dev->bus, IOMMU_DOMAIN_DMA); + } + + group->default_domain = dom; if (!group->domain) - group->domain = group->default_domain; + group->domain = dom; } ret = iommu_group_add_device(group, dev); From 022f4e4f31fea69702f3ec810dc567af6a6d86d8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 3 Apr 2017 13:12:10 +0100 Subject: [PATCH 30/63] iommu/io-pgtable-arm: Avoid shift overflow in block size The recursive nature of __arm_lpae_{map,unmap}() means that ARM_LPAE_BLOCK_SIZE() is evaluated for every level, including those where block mappings aren't possible. This in itself is harmless enough, as we will only ever be called with valid sizes from the pgsize_bitmap, and thus always recurse down past any imaginary block sizes. The only problem is that most of those imaginary sizes overflow the type used for the calculation, and thus trigger warnings under UBsan: [ 63.020939] ================================================================================ [ 63.021284] UBSAN: Undefined behaviour in drivers/iommu/io-pgtable-arm.c:312:22 [ 63.021602] shift exponent 39 is too large for 32-bit type 'int' [ 63.021909] CPU: 0 PID: 1119 Comm: lkvm Not tainted 4.7.0-rc3+ #819 [ 63.022163] Hardware name: FVP Base (DT) [ 63.022345] Call trace: [ 63.022629] [] dump_backtrace+0x0/0x3a8 [ 63.022975] [] show_stack+0x14/0x20 [ 63.023294] [] dump_stack+0x104/0x148 [ 63.023609] [] ubsan_epilogue+0x18/0x68 [ 63.023956] [] __ubsan_handle_shift_out_of_bounds+0x18c/0x1bc [ 63.024365] [] __arm_lpae_map+0x720/0xae0 [ 63.024732] [] arm_lpae_map+0x100/0x190 [ 63.025049] [] arm_smmu_map+0x78/0xc8 [ 63.025390] [] iommu_map+0x130/0x230 [ 63.025763] [] vfio_iommu_type1_attach_group+0x4bc/0xa00 [ 63.026156] [] vfio_fops_unl_ioctl+0x320/0x580 [ 63.026515] [] do_vfs_ioctl+0x140/0xd28 [ 63.026858] [] SyS_ioctl+0x8c/0xa0 [ 63.027179] [] el0_svc_naked+0x24/0x28 [ 63.027412] ================================================================================ Perform the shift in a 64-bit type to prevent the theoretical overflow and keep the peace. As it turns out, this generates identical code for 32-bit ARM, and marginally shorter AArch64 code, so it's good all round. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/io-pgtable-arm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index f9bc6ebb8140..6e5df5e0a3bd 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -74,7 +74,7 @@ /* Calculate the block/page mapping size at level l for pagetable in d. */ #define ARM_LPAE_BLOCK_SIZE(l,d) \ - (1 << (ilog2(sizeof(arm_lpae_iopte)) + \ + (1ULL << (ilog2(sizeof(arm_lpae_iopte)) + \ ((ARM_LPAE_MAX_LEVELS - (l)) * (d)->bits_per_level))) /* Page table bits */ From 5016bdb796b3726eec043ca0ce3be981f712c756 Mon Sep 17 00:00:00 2001 From: Nate Watterson Date: Fri, 7 Apr 2017 01:36:20 -0400 Subject: [PATCH 31/63] iommu/iova: Fix underflow bug in __alloc_and_insert_iova_range Normally, calling alloc_iova() using an iova_domain with insufficient pfns remaining between start_pfn and dma_limit will fail and return a NULL pointer. Unexpectedly, if such a "full" iova_domain contains an iova with pfn_lo == 0, the alloc_iova() call will instead succeed and return an iova containing invalid pfns. This is caused by an underflow bug in __alloc_and_insert_iova_range() that occurs after walking the "full" iova tree when the search ends at the iova with pfn_lo == 0 and limit_pfn is then adjusted to be just below that (-1). This (now huge) limit_pfn gives the impression that a vast amount of space is available between it and start_pfn and thus a new iova is allocated with the invalid pfn_hi value, 0xFFF.... . To rememdy this, a check is introduced to ensure that adjustments to limit_pfn will not underflow. This issue has been observed in the wild, and is easily reproduced with the following sample code. struct iova_domain *iovad = kzalloc(sizeof(*iovad), GFP_KERNEL); struct iova *rsvd_iova, *good_iova, *bad_iova; unsigned long limit_pfn = 3; unsigned long start_pfn = 1; unsigned long va_size = 2; init_iova_domain(iovad, SZ_4K, start_pfn, limit_pfn); rsvd_iova = reserve_iova(iovad, 0, 0); good_iova = alloc_iova(iovad, va_size, limit_pfn, true); bad_iova = alloc_iova(iovad, va_size, limit_pfn, true); Prior to the patch, this yielded: *rsvd_iova == {0, 0} /* Expected */ *good_iova == {2, 3} /* Expected */ *bad_iova == {-2, -1} /* Oh no... */ After the patch, bad_iova is NULL as expected since inadequate space remains between limit_pfn and start_pfn after allocating good_iova. Signed-off-by: Nate Watterson Signed-off-by: Joerg Roedel --- drivers/iommu/iova.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index b7268a14184f..f6533e0198f6 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -138,7 +138,7 @@ static int __alloc_and_insert_iova_range(struct iova_domain *iovad, break; /* found a free slot */ } adjust_limit_pfn: - limit_pfn = curr_iova->pfn_lo - 1; + limit_pfn = curr_iova->pfn_lo ? (curr_iova->pfn_lo - 1) : 0; move_left: prev = curr; curr = rb_prev(curr); From 2a0c57545a291f257cd231b1c4b18285b84608d8 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Apr 2017 16:50:56 +0530 Subject: [PATCH 32/63] iommu/of: Refactor of_iommu_configure() for error handling In preparation for some upcoming cleverness, rework the control flow in of_iommu_configure() to minimise duplication and improve the propogation of errors. It's also as good a time as any to switch over from the now-just-a-compatibility-wrapper of_iommu_get_ops() to using the generic IOMMU instance interface directly. Tested-by: Marek Szyprowski Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 83 +++++++++++++++++++++++++--------------- 1 file changed, 53 insertions(+), 30 deletions(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 2683e9fc0dcf..8f4e59985f6e 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -96,6 +96,28 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index, } EXPORT_SYMBOL_GPL(of_get_dma_window); +static const struct iommu_ops +*of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) +{ + const struct iommu_ops *ops; + struct fwnode_handle *fwnode = &iommu_spec->np->fwnode; + int err; + + ops = iommu_ops_from_fwnode(fwnode); + if (!ops || !ops->of_xlate) + return NULL; + + err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); + if (err) + return ERR_PTR(err); + + err = ops->of_xlate(dev, iommu_spec); + if (err) + return ERR_PTR(err); + + return ops; +} + static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data) { struct of_phandle_args *iommu_spec = data; @@ -105,10 +127,11 @@ static int __get_pci_rid(struct pci_dev *pdev, u16 alias, void *data) } static const struct iommu_ops -*of_pci_iommu_configure(struct pci_dev *pdev, struct device_node *bridge_np) +*of_pci_iommu_init(struct pci_dev *pdev, struct device_node *bridge_np) { const struct iommu_ops *ops; struct of_phandle_args iommu_spec; + int err; /* * Start by tracing the RID alias down the PCI topology as @@ -123,56 +146,56 @@ static const struct iommu_ops * bus into the system beyond, and which IOMMU it ends up at. */ iommu_spec.np = NULL; - if (of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map", - "iommu-map-mask", &iommu_spec.np, iommu_spec.args)) - return NULL; + err = of_pci_map_rid(bridge_np, iommu_spec.args[0], "iommu-map", + "iommu-map-mask", &iommu_spec.np, + iommu_spec.args); + if (err) + return err == -ENODEV ? NULL : ERR_PTR(err); - ops = iommu_ops_from_fwnode(&iommu_spec.np->fwnode); - if (!ops || !ops->of_xlate || - iommu_fwspec_init(&pdev->dev, &iommu_spec.np->fwnode, ops) || - ops->of_xlate(&pdev->dev, &iommu_spec)) - ops = NULL; + ops = of_iommu_xlate(&pdev->dev, &iommu_spec); of_node_put(iommu_spec.np); return ops; } -const struct iommu_ops *of_iommu_configure(struct device *dev, - struct device_node *master_np) +static const struct iommu_ops +*of_platform_iommu_init(struct device *dev, struct device_node *np) { struct of_phandle_args iommu_spec; - struct device_node *np; const struct iommu_ops *ops = NULL; int idx = 0; - if (dev_is_pci(dev)) - return of_pci_iommu_configure(to_pci_dev(dev), master_np); - /* * We don't currently walk up the tree looking for a parent IOMMU. * See the `Notes:' section of * Documentation/devicetree/bindings/iommu/iommu.txt */ - while (!of_parse_phandle_with_args(master_np, "iommus", - "#iommu-cells", idx, - &iommu_spec)) { - np = iommu_spec.np; - ops = iommu_ops_from_fwnode(&np->fwnode); - - if (!ops || !ops->of_xlate || - iommu_fwspec_init(dev, &np->fwnode, ops) || - ops->of_xlate(dev, &iommu_spec)) - goto err_put_node; - - of_node_put(np); + while (!of_parse_phandle_with_args(np, "iommus", "#iommu-cells", + idx, &iommu_spec)) { + ops = of_iommu_xlate(dev, &iommu_spec); + of_node_put(iommu_spec.np); idx++; + if (IS_ERR_OR_NULL(ops)) + break; } return ops; +} -err_put_node: - of_node_put(np); - return NULL; +const struct iommu_ops *of_iommu_configure(struct device *dev, + struct device_node *master_np) +{ + const struct iommu_ops *ops; + + if (!master_np) + return NULL; + + if (dev_is_pci(dev)) + ops = of_pci_iommu_init(to_pci_dev(dev), master_np); + else + ops = of_platform_iommu_init(dev, master_np); + + return IS_ERR(ops) ? NULL : ops; } static int __init of_iommu_init(void) From d7b0558230e444f29488fcee0b0b561015d16f8a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Apr 2017 16:50:57 +0530 Subject: [PATCH 33/63] iommu/of: Prepare for deferred IOMMU configuration IOMMU configuration represents unchanging properties of the hardware, and as such should only need happen once in a device's lifetime, but the necessary interaction with the IOMMU device and driver complicates exactly when that point should be. Since the only reasonable tool available for handling the inter-device dependency is probe deferral, we need to prepare of_iommu_configure() to run later than it is currently called (i.e. at driver probe rather than device creation), to handle being retried, and to tell whether a not-yet present IOMMU should be waited for or skipped (by virtue of having declared a built-in driver or not). Tested-by: Marek Szyprowski Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/of_iommu.c | 43 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index 8f4e59985f6e..c8be889f5206 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -96,6 +96,19 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index, } EXPORT_SYMBOL_GPL(of_get_dma_window); +static bool of_iommu_driver_present(struct device_node *np) +{ + /* + * If the IOMMU still isn't ready by the time we reach init, assume + * it never will be. We don't want to defer indefinitely, nor attempt + * to dereference __iommu_of_table after it's been freed. + */ + if (system_state > SYSTEM_BOOTING) + return false; + + return of_match_node(&__iommu_of_table, np); +} + static const struct iommu_ops *of_iommu_xlate(struct device *dev, struct of_phandle_args *iommu_spec) { @@ -104,12 +117,20 @@ static const struct iommu_ops int err; ops = iommu_ops_from_fwnode(fwnode); - if (!ops || !ops->of_xlate) + if ((ops && !ops->of_xlate) || + (!ops && !of_iommu_driver_present(iommu_spec->np))) return NULL; err = iommu_fwspec_init(dev, &iommu_spec->np->fwnode, ops); if (err) return ERR_PTR(err); + /* + * The otherwise-empty fwspec handily serves to indicate the specific + * IOMMU device we're waiting for, which will be useful if we ever get + * a proper probe-ordering dependency mechanism in future. + */ + if (!ops) + return ERR_PTR(-EPROBE_DEFER); err = ops->of_xlate(dev, iommu_spec); if (err) @@ -186,14 +207,34 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, struct device_node *master_np) { const struct iommu_ops *ops; + struct iommu_fwspec *fwspec = dev->iommu_fwspec; if (!master_np) return NULL; + if (fwspec) { + if (fwspec->ops) + return fwspec->ops; + + /* In the deferred case, start again from scratch */ + iommu_fwspec_free(dev); + } + if (dev_is_pci(dev)) ops = of_pci_iommu_init(to_pci_dev(dev), master_np); else ops = of_platform_iommu_init(dev, master_np); + /* + * If we have reason to believe the IOMMU driver missed the initial + * add_device callback for dev, replay it to get things in order. + */ + if (!IS_ERR_OR_NULL(ops) && ops->add_device && + dev->bus && !dev->iommu_group) { + int err = ops->add_device(dev); + + if (err) + ops = ERR_PTR(err); + } return IS_ERR(ops) ? NULL : ops; } From 3f1866779cf8338e1c8bd32e5f6f5424795ef191 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 10 Apr 2017 16:50:58 +0530 Subject: [PATCH 34/63] of: dma: Make of_dma_deconfigure() public As part of moving DMA initializing to probe time the of_dma_deconfigure() function will need to be called from different source files. Make it public and move it to drivers/of/device.c where the of_dma_configure() function is. Tested-by: Marek Szyprowski Reviewed-by: Robin Murphy Acked-by: Rob Herring Signed-off-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/of/device.c | 12 ++++++++++++ drivers/of/platform.c | 5 ----- include/linux/of_device.h | 3 +++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index b1e6bebda3f3..0d378c03e1a4 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -151,6 +151,18 @@ void of_dma_configure(struct device *dev, struct device_node *np) } EXPORT_SYMBOL_GPL(of_dma_configure); +/** + * of_dma_deconfigure - Clean up DMA configuration + * @dev: Device for which to clean up DMA configuration + * + * Clean up all configuration performed by of_dma_configure_ops() and free all + * resources that have been allocated. + */ +void of_dma_deconfigure(struct device *dev) +{ + arch_teardown_dma_ops(dev); +} + int of_device_register(struct platform_device *pdev) { device_initialize(&pdev->dev); diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 5dfcc967dd05..5344db50aa65 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -158,11 +158,6 @@ struct platform_device *of_device_alloc(struct device_node *np, } EXPORT_SYMBOL(of_device_alloc); -static void of_dma_deconfigure(struct device *dev) -{ - arch_teardown_dma_ops(dev); -} - /** * of_platform_device_create_pdata - Alloc, initialize and register an of_device * @np: pointer to node to create device for diff --git a/include/linux/of_device.h b/include/linux/of_device.h index c12dace043f3..af984551cc2b 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -56,6 +56,7 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) } void of_dma_configure(struct device *dev, struct device_node *np); +void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -105,6 +106,8 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) } static inline void of_dma_configure(struct device *dev, struct device_node *np) {} +static inline void of_dma_deconfigure(struct device *dev) +{} #endif /* CONFIG_OF */ #endif /* _LINUX_OF_DEVICE_H */ From 1d9029d440e40b276c0691caed1de10c42d96bef Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 10 Apr 2017 16:50:59 +0530 Subject: [PATCH 35/63] ACPI/IORT: Add function to check SMMUs drivers presence The IOMMU probe deferral implementation requires a mechanism to detect if drivers for SMMU components are built-in in the kernel to detect whether IOMMU configuration for a given device should be deferred (ie SMMU drivers present but still not probed) or not (drivers not present). Add a simple function to IORT to detect if SMMU drivers for SMMU components managed by IORT are built-in in the kernel. Tested-by: Hanjun Guo Signed-off-by: Lorenzo Pieralisi Reviewed-by: Robin Murphy Cc: Sricharan R Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 4a5bb967250b..3dd9ec372dae 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -523,6 +523,19 @@ static int arm_smmu_iort_xlate(struct device *dev, u32 streamid, return ret; } +static inline bool iort_iommu_driver_enabled(u8 type) +{ + switch (type) { + case ACPI_IORT_NODE_SMMU_V3: + return IS_BUILTIN(CONFIG_ARM_SMMU_V3); + case ACPI_IORT_NODE_SMMU: + return IS_BUILTIN(CONFIG_ARM_SMMU); + default: + pr_warn("IORT node type %u does not describe an SMMU\n", type); + return false; + } +} + static const struct iommu_ops *iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, u32 streamid) From efc8551a276faab19d85079da02c5fb602b0dcbe Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 10 Apr 2017 16:51:00 +0530 Subject: [PATCH 36/63] of: device: Fix overflow of coherent_dma_mask Size of the dma-range is calculated as coherent_dma_mask + 1 and passed to arch_setup_dma_ops further. It overflows when the coherent_dma_mask is set for full 64 bits 0xFFFFFFFFFFFFFFFF, resulting in size getting passed as 0 wrongly. Fix this by passsing in max(mask, mask + 1). Note that in this case when the mask is set to full 64bits, we will be passing the mask itself to arch_setup_dma_ops instead of the size. The real fix for this should be to make arch_setup_dma_ops receive the mask and handle it, to be done in the future. Reviewed-by: Robin Murphy Acked-by: Rob Herring Signed-off-by: Sricharan R Signed-off-by: Joerg Roedel --- drivers/of/device.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/of/device.c b/drivers/of/device.c index 0d378c03e1a4..e1ae9e7104a1 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -107,7 +107,7 @@ void of_dma_configure(struct device *dev, struct device_node *np) ret = of_dma_get_range(np, &dma_addr, &paddr, &size); if (ret < 0) { dma_addr = offset = 0; - size = dev->coherent_dma_mask + 1; + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); } else { offset = PFN_DOWN(paddr - dma_addr); From 09515ef5ddad71c7820e5e428da418b709feeb26 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 10 Apr 2017 16:51:01 +0530 Subject: [PATCH 37/63] of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices Configuring DMA ops at probe time will allow deferring device probe when the IOMMU isn't available yet. The dma_configure for the device is now called from the generic device_attach callback just before the bus/driver probe is called. This way, configuring the DMA ops for the device would be called at the same place for all bus_types, hence the deferred probing mechanism should work for all buses as well. pci_bus_add_devices (platform/amba)(_device_create/driver_register) | | pci_bus_add_device (device_add/driver_register) | | device_attach device_initial_probe | | __device_attach_driver __device_attach_driver | driver_probe_device | really_probe | dma_configure Similarly on the device/driver_unregister path __device_release_driver is called which inturn calls dma_deconfigure. This patch changes the dma ops configuration to probe time for both OF and ACPI based platform/amba/pci bus devices. Tested-by: Marek Szyprowski Tested-by: Hanjun Guo Reviewed-by: Robin Murphy Acked-by: Rob Herring Acked-by: Bjorn Helgaas (drivers/pci part) Acked-by: Rafael J. Wysocki Signed-off-by: Sricharan R Signed-off-by: Joerg Roedel --- drivers/acpi/glue.c | 5 ----- drivers/base/dd.c | 9 +++++++++ drivers/base/dma-mapping.c | 40 +++++++++++++++++++++++++++++++++++++ drivers/of/platform.c | 5 +---- drivers/pci/probe.c | 28 -------------------------- include/linux/dma-mapping.h | 12 +++++++++++ 6 files changed, 62 insertions(+), 37 deletions(-) diff --git a/drivers/acpi/glue.c b/drivers/acpi/glue.c index fb19e1cdb641..c05f24107bfc 100644 --- a/drivers/acpi/glue.c +++ b/drivers/acpi/glue.c @@ -176,7 +176,6 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) struct list_head *physnode_list; unsigned int node_id; int retval = -EINVAL; - enum dev_dma_attr attr; if (has_acpi_companion(dev)) { if (acpi_dev) { @@ -233,10 +232,6 @@ int acpi_bind_one(struct device *dev, struct acpi_device *acpi_dev) if (!has_acpi_companion(dev)) ACPI_COMPANION_SET(dev, acpi_dev); - attr = acpi_get_dma_attr(acpi_dev); - if (attr != DEV_DMA_NOT_SUPPORTED) - acpi_dma_configure(dev, attr); - acpi_physnode_link_name(physical_node_name, node_id); retval = sysfs_create_link(&acpi_dev->dev.kobj, &dev->kobj, physical_node_name); diff --git a/drivers/base/dd.c b/drivers/base/dd.c index a1fbf55c4d3a..4882f06d12df 100644 --- a/drivers/base/dd.c +++ b/drivers/base/dd.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include @@ -356,6 +357,10 @@ re_probe: if (ret) goto pinctrl_bind_failed; + ret = dma_configure(dev); + if (ret) + goto dma_failed; + if (driver_sysfs_add(dev)) { printk(KERN_ERR "%s: driver_sysfs_add(%s) failed\n", __func__, dev_name(dev)); @@ -417,6 +422,8 @@ re_probe: goto done; probe_failed: + dma_deconfigure(dev); +dma_failed: if (dev->bus) blocking_notifier_call_chain(&dev->bus->p->bus_notifier, BUS_NOTIFY_DRIVER_NOT_BOUND, dev); @@ -826,6 +833,8 @@ static void __device_release_driver(struct device *dev, struct device *parent) drv->remove(dev); device_links_driver_cleanup(dev); + dma_deconfigure(dev); + devres_release_all(dev); dev->driver = NULL; dev_set_drvdata(dev, NULL); diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index efd71cf4fdea..449b948c7427 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -7,9 +7,11 @@ * This file is released under the GPLv2. */ +#include #include #include #include +#include #include #include @@ -341,3 +343,41 @@ void dma_common_free_remap(void *cpu_addr, size_t size, unsigned long vm_flags) vunmap(cpu_addr); } #endif + +/* + * Common configuration to enable DMA API use for a device + */ +#include + +int dma_configure(struct device *dev) +{ + struct device *bridge = NULL, *dma_dev = dev; + enum dev_dma_attr attr; + + if (dev_is_pci(dev)) { + bridge = pci_get_host_bridge_device(to_pci_dev(dev)); + dma_dev = bridge; + if (IS_ENABLED(CONFIG_OF) && dma_dev->parent && + dma_dev->parent->of_node) + dma_dev = dma_dev->parent; + } + + if (dma_dev->of_node) { + of_dma_configure(dev, dma_dev->of_node); + } else if (has_acpi_companion(dma_dev)) { + attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode)); + if (attr != DEV_DMA_NOT_SUPPORTED) + acpi_dma_configure(dev, attr); + } + + if (bridge) + pci_put_host_bridge_device(bridge); + + return 0; +} + +void dma_deconfigure(struct device *dev) +{ + of_dma_deconfigure(dev); + acpi_dma_deconfigure(dev); +} diff --git a/drivers/of/platform.c b/drivers/of/platform.c index 5344db50aa65..2aa4ebbde9cd 100644 --- a/drivers/of/platform.c +++ b/drivers/of/platform.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include @@ -186,11 +187,9 @@ static struct platform_device *of_platform_device_create_pdata( dev->dev.bus = &platform_bus_type; dev->dev.platform_data = platform_data; - of_dma_configure(&dev->dev, dev->dev.of_node); of_msi_configure(&dev->dev, dev->dev.of_node); if (of_device_add(dev) != 0) { - of_dma_deconfigure(&dev->dev); platform_device_put(dev); goto err_clear_flag; } @@ -248,7 +247,6 @@ static struct amba_device *of_amba_device_create(struct device_node *node, dev_set_name(&dev->dev, "%s", bus_id); else of_device_make_bus_id(&dev->dev); - of_dma_configure(&dev->dev, dev->dev.of_node); /* Allow the HW Peripheral ID to be overridden */ prop = of_get_property(node, "arm,primecell-periphid", NULL); @@ -542,7 +540,6 @@ static int of_platform_device_destroy(struct device *dev, void *data) amba_device_unregister(to_amba_device(dev)); #endif - of_dma_deconfigure(dev); of_node_clear_flag(dev->of_node, OF_POPULATED); of_node_clear_flag(dev->of_node, OF_POPULATED_BUS); return 0; diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c index dfc9a2794141..5a8dd43db336 100644 --- a/drivers/pci/probe.c +++ b/drivers/pci/probe.c @@ -1893,33 +1893,6 @@ static void pci_set_msi_domain(struct pci_dev *dev) dev_set_msi_domain(&dev->dev, d); } -/** - * pci_dma_configure - Setup DMA configuration - * @dev: ptr to pci_dev struct of the PCI device - * - * Function to update PCI devices's DMA configuration using the same - * info from the OF node or ACPI node of host bridge's parent (if any). - */ -static void pci_dma_configure(struct pci_dev *dev) -{ - struct device *bridge = pci_get_host_bridge_device(dev); - - if (IS_ENABLED(CONFIG_OF) && - bridge->parent && bridge->parent->of_node) { - of_dma_configure(&dev->dev, bridge->parent->of_node); - } else if (has_acpi_companion(bridge)) { - struct acpi_device *adev = to_acpi_device_node(bridge->fwnode); - enum dev_dma_attr attr = acpi_get_dma_attr(adev); - - if (attr == DEV_DMA_NOT_SUPPORTED) - dev_warn(&dev->dev, "DMA not supported.\n"); - else - acpi_dma_configure(&dev->dev, attr); - } - - pci_put_host_bridge_device(bridge); -} - void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) { int ret; @@ -1933,7 +1906,6 @@ void pci_device_add(struct pci_dev *dev, struct pci_bus *bus) dev->dev.dma_mask = &dev->dma_mask; dev->dev.dma_parms = &dev->dma_parms; dev->dev.coherent_dma_mask = 0xffffffffull; - pci_dma_configure(dev); pci_set_dma_max_seg_size(dev, 65536); pci_set_dma_seg_boundary(dev, 0xffffffff); diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0977317c6835..4f3eecedca2d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -728,6 +728,18 @@ dma_mark_declared_memory_occupied(struct device *dev, } #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ +#ifdef CONFIG_HAS_DMA +int dma_configure(struct device *dev); +void dma_deconfigure(struct device *dev); +#else +static inline int dma_configure(struct device *dev) +{ + return 0; +} + +static inline void dma_deconfigure(struct device *dev) {} +#endif + /* * Managed DMA API */ From 7b07cbefb68d486febf47e13b570fed53d9296b4 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 10 Apr 2017 16:51:02 +0530 Subject: [PATCH 38/63] iommu: of: Handle IOMMU lookup failure with deferred probing or error Failures to look up an IOMMU when parsing the DT iommus property need to be handled separately from the .of_xlate() failures to support deferred probing. The lack of a registered IOMMU can be caused by the lack of a driver for the IOMMU, the IOMMU device probe not having been performed yet, having been deferred, or having failed. The first case occurs when the device tree describes the bus master and IOMMU topology correctly but no device driver exists for the IOMMU yet or the device driver has not been compiled in. Return NULL, the caller will configure the device without an IOMMU. The second and third cases are handled by deferring the probe of the bus master device which will eventually get reprobed after the IOMMU. The last case is currently handled by deferring the probe of the bus master device as well. A mechanism to either configure the bus master device without an IOMMU or to fail the bus master device probe depending on whether the IOMMU is optional or mandatory would be a good enhancement. Tested-by: Marek Szyprowski Reviewed-by: Robin Murphy Acked-by: Rob Herring Signed-off-by: Laurent Pichart Signed-off-by: Sricharan R Signed-off-by: Joerg Roedel --- drivers/base/dma-mapping.c | 5 +++-- drivers/iommu/of_iommu.c | 4 ++-- drivers/of/device.c | 9 +++++++-- include/linux/of_device.h | 9 ++++++--- 4 files changed, 18 insertions(+), 9 deletions(-) diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 449b948c7427..82bd45ced7ff 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -353,6 +353,7 @@ int dma_configure(struct device *dev) { struct device *bridge = NULL, *dma_dev = dev; enum dev_dma_attr attr; + int ret = 0; if (dev_is_pci(dev)) { bridge = pci_get_host_bridge_device(to_pci_dev(dev)); @@ -363,7 +364,7 @@ int dma_configure(struct device *dev) } if (dma_dev->of_node) { - of_dma_configure(dev, dma_dev->of_node); + ret = of_dma_configure(dev, dma_dev->of_node); } else if (has_acpi_companion(dma_dev)) { attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode)); if (attr != DEV_DMA_NOT_SUPPORTED) @@ -373,7 +374,7 @@ int dma_configure(struct device *dev) if (bridge) pci_put_host_bridge_device(bridge); - return 0; + return ret; } void dma_deconfigure(struct device *dev) diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c index c8be889f5206..9f44ee8ea1bc 100644 --- a/drivers/iommu/of_iommu.c +++ b/drivers/iommu/of_iommu.c @@ -236,7 +236,7 @@ const struct iommu_ops *of_iommu_configure(struct device *dev, ops = ERR_PTR(err); } - return IS_ERR(ops) ? NULL : ops; + return ops; } static int __init of_iommu_init(void) @@ -247,7 +247,7 @@ static int __init of_iommu_init(void) for_each_matching_node_and_match(np, matches, &match) { const of_iommu_init_fn init_fn = match->data; - if (init_fn(np)) + if (init_fn && init_fn(np)) pr_err("Failed to initialise IOMMU %s\n", of_node_full_name(np)); } diff --git a/drivers/of/device.c b/drivers/of/device.c index e1ae9e7104a1..8bd3d8c09435 100644 --- a/drivers/of/device.c +++ b/drivers/of/device.c @@ -82,7 +82,7 @@ int of_device_add(struct platform_device *ofdev) * can use a platform bus notifier and handle BUS_NOTIFY_ADD_DEVICE events * to fix up DMA configuration. */ -void of_dma_configure(struct device *dev, struct device_node *np) +int of_dma_configure(struct device *dev, struct device_node *np) { u64 dma_addr, paddr, size; int ret; @@ -123,7 +123,7 @@ void of_dma_configure(struct device *dev, struct device_node *np) if (!size) { dev_err(dev, "Adjusted size 0x%llx invalid\n", size); - return; + return -EINVAL; } dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", offset); } @@ -144,10 +144,15 @@ void of_dma_configure(struct device *dev, struct device_node *np) coherent ? " " : " not "); iommu = of_iommu_configure(dev, np); + if (IS_ERR(iommu)) + return PTR_ERR(iommu); + dev_dbg(dev, "device is%sbehind an iommu\n", iommu ? " " : " not "); arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent); + + return 0; } EXPORT_SYMBOL_GPL(of_dma_configure); diff --git a/include/linux/of_device.h b/include/linux/of_device.h index af984551cc2b..2cacdd81062e 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -55,7 +55,7 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return of_node_get(cpu_dev->of_node); } -void of_dma_configure(struct device *dev, struct device_node *np); +int of_dma_configure(struct device *dev, struct device_node *np); void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ @@ -104,8 +104,11 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) { return NULL; } -static inline void of_dma_configure(struct device *dev, struct device_node *np) -{} + +static inline int of_dma_configure(struct device *dev, struct device_node *np) +{ + return 0; +} static inline void of_dma_deconfigure(struct device *dev) {} #endif /* CONFIG_OF */ From 5a1bb638d5677053c7addcb228b56da6fccb5d68 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 10 Apr 2017 16:51:03 +0530 Subject: [PATCH 39/63] drivers: acpi: Handle IOMMU lookup failure with deferred probing or error This is an equivalent to the DT's handling of the iommu master's probe with deferred probing when the corrsponding iommu is not probed yet. The lack of a registered IOMMU can be caused by the lack of a driver for the IOMMU, the IOMMU device probe not having been performed yet, having been deferred, or having failed. The first case occurs when the firmware describes the bus master and IOMMU topology correctly but no device driver exists for the IOMMU yet or the device driver has not been compiled in. Return NULL, the caller will configure the device without an IOMMU. The second and third cases are handled by deferring the probe of the bus master device which will eventually get reprobed after the IOMMU. The last case is currently handled by deferring the probe of the bus master device as well. A mechanism to either configure the bus master device without an IOMMU or to fail the bus master device probe depending on whether the IOMMU is optional or mandatory would be a good enhancement. Tested-by: Hanjun Guo Reviewed-by: Robin Murphy [Lorenzo: Added fixes for dma_coherent_mask overflow, acpi_dma_configure called multiple times for same device] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Sricharan R Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 33 ++++++++++++++++++++++++++++++++- drivers/acpi/scan.c | 11 ++++++++--- drivers/base/dma-mapping.c | 2 +- include/acpi/acpi_bus.h | 2 +- include/linux/acpi.h | 7 +++++-- 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index 3dd9ec372dae..e323ece0314d 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -543,6 +543,14 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev, const struct iommu_ops *ops = NULL; int ret = -ENODEV; struct fwnode_handle *iort_fwnode; + struct iommu_fwspec *fwspec = dev->iommu_fwspec; + + /* + * If we already translated the fwspec there + * is nothing left to do, return the iommu_ops. + */ + if (fwspec && fwspec->ops) + return fwspec->ops; if (node) { iort_fwnode = iort_get_fwnode(node); @@ -550,8 +558,17 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev, return NULL; ops = iommu_ops_from_fwnode(iort_fwnode); + /* + * If the ops look-up fails, this means that either + * the SMMU drivers have not been probed yet or that + * the SMMU drivers are not built in the kernel; + * Depending on whether the SMMU drivers are built-in + * in the kernel or not, defer the IOMMU configuration + * or just abort it. + */ if (!ops) - return NULL; + return iort_iommu_driver_enabled(node->type) ? + ERR_PTR(-EPROBE_DEFER) : NULL; ret = arm_smmu_iort_xlate(dev, streamid, iort_fwnode, ops); } @@ -625,12 +642,26 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) while (parent) { ops = iort_iommu_xlate(dev, parent, streamid); + if (IS_ERR_OR_NULL(ops)) + return ops; parent = iort_node_get_id(node, &streamid, IORT_IOMMU_TYPE, i++); } } + /* + * If we have reason to believe the IOMMU driver missed the initial + * add_device callback for dev, replay it to get things in order. + */ + if (!IS_ERR_OR_NULL(ops) && ops->add_device && + dev->bus && !dev->iommu_group) { + int err = ops->add_device(dev); + + if (err) + ops = ERR_PTR(err); + } + return ops; } diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c index 192691880d55..2a513cce332e 100644 --- a/drivers/acpi/scan.c +++ b/drivers/acpi/scan.c @@ -1373,20 +1373,25 @@ enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) * @dev: The pointer to the device * @attr: device dma attributes */ -void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) +int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr) { const struct iommu_ops *iommu; + u64 size; iort_set_dma_mask(dev); iommu = iort_iommu_configure(dev); + if (IS_ERR(iommu)) + return PTR_ERR(iommu); + size = max(dev->coherent_dma_mask, dev->coherent_dma_mask + 1); /* * Assume dma valid range starts at 0 and covers the whole * coherent_dma_mask. */ - arch_setup_dma_ops(dev, 0, dev->coherent_dma_mask + 1, iommu, - attr == DEV_DMA_COHERENT); + arch_setup_dma_ops(dev, 0, size, iommu, attr == DEV_DMA_COHERENT); + + return 0; } EXPORT_SYMBOL_GPL(acpi_dma_configure); diff --git a/drivers/base/dma-mapping.c b/drivers/base/dma-mapping.c index 82bd45ced7ff..755a2b5354c5 100644 --- a/drivers/base/dma-mapping.c +++ b/drivers/base/dma-mapping.c @@ -368,7 +368,7 @@ int dma_configure(struct device *dev) } else if (has_acpi_companion(dma_dev)) { attr = acpi_get_dma_attr(to_acpi_device_node(dma_dev->fwnode)); if (attr != DEV_DMA_NOT_SUPPORTED) - acpi_dma_configure(dev, attr); + ret = acpi_dma_configure(dev, attr); } if (bridge) diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index ef0ae8aaa567..2a9a5de0fb00 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -575,7 +575,7 @@ struct acpi_pci_root { bool acpi_dma_supported(struct acpi_device *adev); enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev); -void acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); +int acpi_dma_configure(struct device *dev, enum dev_dma_attr attr); void acpi_dma_deconfigure(struct device *dev); struct acpi_device *acpi_find_child_device(struct acpi_device *parent, diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9b05886f9773..79d06ef654c9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -762,8 +762,11 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } -static inline void acpi_dma_configure(struct device *dev, - enum dev_dma_attr attr) { } +static inline int acpi_dma_configure(struct device *dev, + enum dev_dma_attr attr) +{ + return 0; +} static inline void acpi_dma_deconfigure(struct device *dev) { } From b913efe78a7ce1b2e64af7e5dc3a03748b997c61 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 10 Apr 2017 16:51:04 +0530 Subject: [PATCH 40/63] arm64: dma-mapping: Remove the notifier trick to handle early setting of dma_ops With arch_setup_dma_ops now being called late during device's probe after the device's iommu is probed, the notifier trick required to handle the early setup of dma_ops before the iommu group gets created is not required. So removing the notifier's here. Tested-by: Marek Szyprowski Tested-by: Hanjun Guo Acked-by: Will Deacon Signed-off-by: Sricharan R [rm: clean up even more] Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 144 +++++------------------------------- 1 file changed, 19 insertions(+), 125 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 81cdb2e844ed..b46575954032 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -813,34 +813,26 @@ static const struct dma_map_ops iommu_dma_ops = { .mapping_error = iommu_dma_mapping_error, }; -/* - * TODO: Right now __iommu_setup_dma_ops() gets called too early to do - * everything it needs to - the device is only partially created and the - * IOMMU driver hasn't seen it yet, so it can't have a group. Thus we - * need this delayed attachment dance. Once IOMMU probe ordering is sorted - * to move the arch_setup_dma_ops() call later, all the notifier bits below - * become unnecessary, and will go away. - */ -struct iommu_dma_notifier_data { - struct list_head list; - struct device *dev; - const struct iommu_ops *ops; - u64 dma_base; - u64 size; -}; -static LIST_HEAD(iommu_dma_masters); -static DEFINE_MUTEX(iommu_dma_notifier_lock); - -static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, - u64 dma_base, u64 size) +static int __init __iommu_dma_init(void) { - struct iommu_domain *domain = iommu_get_domain_for_dev(dev); + return iommu_dma_init(); +} +arch_initcall(__iommu_dma_init); + +static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, + const struct iommu_ops *ops) +{ + struct iommu_domain *domain; + + if (!ops) + return; /* - * If the IOMMU driver has the DMA domain support that we require, - * then the IOMMU core will have already configured a group for this - * device, and allocated the default domain for that group. + * The IOMMU core code allocates the default DMA domain, which the + * underlying IOMMU driver needs to support via the dma-iommu layer. */ + domain = iommu_get_domain_for_dev(dev); + if (!domain) goto out_err; @@ -851,109 +843,11 @@ static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, dev->dma_ops = &iommu_dma_ops; } - return true; + return; + out_err: - pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", + pr_warn("Failed to set up IOMMU for device %s; retaining platform DMA ops\n", dev_name(dev)); - return false; -} - -static void queue_iommu_attach(struct device *dev, const struct iommu_ops *ops, - u64 dma_base, u64 size) -{ - struct iommu_dma_notifier_data *iommudata; - - iommudata = kzalloc(sizeof(*iommudata), GFP_KERNEL); - if (!iommudata) - return; - - iommudata->dev = dev; - iommudata->ops = ops; - iommudata->dma_base = dma_base; - iommudata->size = size; - - mutex_lock(&iommu_dma_notifier_lock); - list_add(&iommudata->list, &iommu_dma_masters); - mutex_unlock(&iommu_dma_notifier_lock); -} - -static int __iommu_attach_notifier(struct notifier_block *nb, - unsigned long action, void *data) -{ - struct iommu_dma_notifier_data *master, *tmp; - - if (action != BUS_NOTIFY_BIND_DRIVER) - return 0; - - mutex_lock(&iommu_dma_notifier_lock); - list_for_each_entry_safe(master, tmp, &iommu_dma_masters, list) { - if (data == master->dev && do_iommu_attach(master->dev, - master->ops, master->dma_base, master->size)) { - list_del(&master->list); - kfree(master); - break; - } - } - mutex_unlock(&iommu_dma_notifier_lock); - return 0; -} - -static int __init register_iommu_dma_ops_notifier(struct bus_type *bus) -{ - struct notifier_block *nb = kzalloc(sizeof(*nb), GFP_KERNEL); - int ret; - - if (!nb) - return -ENOMEM; - - nb->notifier_call = __iommu_attach_notifier; - - ret = bus_register_notifier(bus, nb); - if (ret) { - pr_warn("Failed to register DMA domain notifier; IOMMU DMA ops unavailable on bus '%s'\n", - bus->name); - kfree(nb); - } - return ret; -} - -static int __init __iommu_dma_init(void) -{ - int ret; - - ret = iommu_dma_init(); - if (!ret) - ret = register_iommu_dma_ops_notifier(&platform_bus_type); - if (!ret) - ret = register_iommu_dma_ops_notifier(&amba_bustype); -#ifdef CONFIG_PCI - if (!ret) - ret = register_iommu_dma_ops_notifier(&pci_bus_type); -#endif - return ret; -} -arch_initcall(__iommu_dma_init); - -static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, - const struct iommu_ops *ops) -{ - struct iommu_group *group; - - if (!ops) - return; - /* - * TODO: As a concession to the future, we're ready to handle being - * called both early and late (i.e. after bus_add_device). Once all - * the platform bus code is reworked to call us late and the notifier - * junk above goes away, move the body of do_iommu_attach here. - */ - group = iommu_group_get(dev); - if (group) { - do_iommu_attach(dev, ops, dma_base, size); - iommu_group_put(group); - } else { - queue_iommu_attach(dev, ops, dma_base, size); - } } void arch_teardown_dma_ops(struct device *dev) From f6810c15cf973fc640ac8029951ff59f547b8a5e Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 10 Apr 2017 16:51:05 +0530 Subject: [PATCH 41/63] iommu/arm-smmu: Clean up early-probing workarounds Now that the appropriate ordering is enforced via probe-deferral of masters in core code, rip it all out and bask in the simplicity. Tested-by: Hanjun Guo Acked-by: Will Deacon Signed-off-by: Robin Murphy [Sricharan: Rebased on top of ACPI IORT SMMU series] Signed-off-by: Sricharan R Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 46 +-------------- drivers/iommu/arm-smmu.c | 108 +++++++++++++++--------------------- 2 files changed, 48 insertions(+), 106 deletions(-) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index bbd46efbe075..56401e6d2e68 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -2763,51 +2763,9 @@ static struct platform_driver arm_smmu_driver = { .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, }; +module_platform_driver(arm_smmu_driver); -static int __init arm_smmu_init(void) -{ - static bool registered; - int ret = 0; - - if (!registered) { - ret = platform_driver_register(&arm_smmu_driver); - registered = !ret; - } - return ret; -} - -static void __exit arm_smmu_exit(void) -{ - return platform_driver_unregister(&arm_smmu_driver); -} - -subsys_initcall(arm_smmu_init); -module_exit(arm_smmu_exit); - -static int __init arm_smmu_of_init(struct device_node *np) -{ - int ret = arm_smmu_init(); - - if (ret) - return ret; - - if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root)) - return -ENODEV; - - return 0; -} -IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", arm_smmu_of_init); - -#ifdef CONFIG_ACPI -static int __init acpi_smmu_v3_init(struct acpi_table_header *table) -{ - if (iort_node_match(ACPI_IORT_NODE_SMMU_V3)) - return arm_smmu_init(); - - return 0; -} -IORT_ACPI_DECLARE(arm_smmu_v3, ACPI_SIG_IORT, acpi_smmu_v3_init); -#endif +IOMMU_OF_DECLARE(arm_smmuv3, "arm,smmu-v3", NULL); MODULE_DESCRIPTION("IOMMU API for ARM architected SMMUv3 implementations"); MODULE_AUTHOR("Will Deacon "); diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 9b33700b7c69..7f522504a876 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2077,6 +2077,23 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev, return 0; } +static void arm_smmu_bus_init(void) +{ + /* Oh, for a proper bus abstraction */ + if (!iommu_present(&platform_bus_type)) + bus_set_iommu(&platform_bus_type, &arm_smmu_ops); +#ifdef CONFIG_ARM_AMBA + if (!iommu_present(&amba_bustype)) + bus_set_iommu(&amba_bustype, &arm_smmu_ops); +#endif +#ifdef CONFIG_PCI + if (!iommu_present(&pci_bus_type)) { + pci_request_acs(); + bus_set_iommu(&pci_bus_type, &arm_smmu_ops); + } +#endif +} + static int arm_smmu_device_probe(struct platform_device *pdev) { struct resource *res; @@ -2182,22 +2199,31 @@ static int arm_smmu_device_probe(struct platform_device *pdev) arm_smmu_device_reset(smmu); arm_smmu_test_smr_masks(smmu); - /* Oh, for a proper bus abstraction */ - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &arm_smmu_ops); -#ifdef CONFIG_ARM_AMBA - if (!iommu_present(&amba_bustype)) - bus_set_iommu(&amba_bustype, &arm_smmu_ops); -#endif -#ifdef CONFIG_PCI - if (!iommu_present(&pci_bus_type)) { - pci_request_acs(); - bus_set_iommu(&pci_bus_type, &arm_smmu_ops); - } -#endif + /* + * For ACPI and generic DT bindings, an SMMU will be probed before + * any device which might need it, so we want the bus ops in place + * ready to handle default domain setup as soon as any SMMU exists. + */ + if (!using_legacy_binding) + arm_smmu_bus_init(); + return 0; } +/* + * With the legacy DT binding in play, though, we have no guarantees about + * probe order, but then we're also not doing default domains, so we can + * delay setting bus ops until we're sure every possible SMMU is ready, + * and that way ensure that no add_device() calls get missed. + */ +static int arm_smmu_legacy_bus_init(void) +{ + if (using_legacy_binding) + arm_smmu_bus_init(); + return 0; +} +device_initcall_sync(arm_smmu_legacy_bus_init); + static int arm_smmu_device_remove(struct platform_device *pdev) { struct arm_smmu_device *smmu = platform_get_drvdata(pdev); @@ -2221,56 +2247,14 @@ static struct platform_driver arm_smmu_driver = { .probe = arm_smmu_device_probe, .remove = arm_smmu_device_remove, }; +module_platform_driver(arm_smmu_driver); -static int __init arm_smmu_init(void) -{ - static bool registered; - int ret = 0; - - if (!registered) { - ret = platform_driver_register(&arm_smmu_driver); - registered = !ret; - } - return ret; -} - -static void __exit arm_smmu_exit(void) -{ - return platform_driver_unregister(&arm_smmu_driver); -} - -subsys_initcall(arm_smmu_init); -module_exit(arm_smmu_exit); - -static int __init arm_smmu_of_init(struct device_node *np) -{ - int ret = arm_smmu_init(); - - if (ret) - return ret; - - if (!of_platform_device_create(np, NULL, platform_bus_type.dev_root)) - return -ENODEV; - - return 0; -} -IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", arm_smmu_of_init); -IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", arm_smmu_of_init); -IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", arm_smmu_of_init); -IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", arm_smmu_of_init); -IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", arm_smmu_of_init); -IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", arm_smmu_of_init); - -#ifdef CONFIG_ACPI -static int __init arm_smmu_acpi_init(struct acpi_table_header *table) -{ - if (iort_node_match(ACPI_IORT_NODE_SMMU)) - return arm_smmu_init(); - - return 0; -} -IORT_ACPI_DECLARE(arm_smmu, ACPI_SIG_IORT, arm_smmu_acpi_init); -#endif +IOMMU_OF_DECLARE(arm_smmuv1, "arm,smmu-v1", NULL); +IOMMU_OF_DECLARE(arm_smmuv2, "arm,smmu-v2", NULL); +IOMMU_OF_DECLARE(arm_mmu400, "arm,mmu-400", NULL); +IOMMU_OF_DECLARE(arm_mmu401, "arm,mmu-401", NULL); +IOMMU_OF_DECLARE(arm_mmu500, "arm,mmu-500", NULL); +IOMMU_OF_DECLARE(cavium_smmuv2, "cavium,smmu-v2", NULL); MODULE_DESCRIPTION("IOMMU API for ARM architected SMMU implementations"); MODULE_AUTHOR("Will Deacon "); From 316ca8804ea84a782d5ba2163711ebb22116ff5a Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 10 Apr 2017 16:51:06 +0530 Subject: [PATCH 42/63] ACPI/IORT: Remove linker section for IORT entries probing The IORT linker section introduced by commit 34ceea275f62 ("ACPI/IORT: Introduce linker section for IORT entries probing") was needed to make sure SMMU drivers are registered (and therefore probed) in the kernel before devices using the SMMU have a chance to probe in turn. Through the introduction of deferred IOMMU configuration the linker section based IORT probing infrastructure is not needed any longer, in that device/SMMU probe dependencies are managed through the probe deferral mechanism, making the IORT linker section infrastructure unused, so that it can be removed. Remove the unused IORT linker section probing infrastructure from the kernel to complete the ACPI IORT IOMMU configure probe deferral mechanism implementation. Tested-by: Hanjun Guo Reviewed-by: Robin Murphy Signed-off-by: Lorenzo Pieralisi Cc: Sricharan R Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 2 -- include/asm-generic/vmlinux.lds.h | 1 - include/linux/acpi_iort.h | 3 --- 3 files changed, 6 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e323ece0314d..e7b1940ff13b 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -1000,6 +1000,4 @@ void __init acpi_iort_init(void) } iort_init_platform_devices(); - - acpi_probe_device_table(iort); } diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h index 0968d13b3885..9faa26c41c14 100644 --- a/include/asm-generic/vmlinux.lds.h +++ b/include/asm-generic/vmlinux.lds.h @@ -566,7 +566,6 @@ IRQCHIP_OF_MATCH_TABLE() \ ACPI_PROBE_TABLE(irqchip) \ ACPI_PROBE_TABLE(clksrc) \ - ACPI_PROBE_TABLE(iort) \ EARLYCON_TABLE() #define INIT_TEXT \ diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 77e08099e554..f167e1d045ff 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -52,7 +52,4 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) { return NULL; } #endif -#define IORT_ACPI_DECLARE(name, table_id, fn) \ - ACPI_DECLARE_PROBE_ENTRY(iort, name, table_id, 0, NULL, 0, fn) - #endif /* __ACPI_IORT_H__ */ From abaa7e5b054aae567861628b74dbc7fbf8ed79e8 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 12 Apr 2017 00:21:26 -0500 Subject: [PATCH 43/63] iommu/omap: Register driver before setting IOMMU ops Move the registration of the OMAP IOMMU platform driver before setting the IOMMU callbacks on the platform bus. This causes the IOMMU devices to be probed first before the .add_device() callback is invoked for all registered devices, and allows the iommu_group support to be added to the OMAP IOMMU driver. While at this, also check for the return status from bus_set_iommu. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index e2583cce2cc1..54556713c8d1 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1299,6 +1299,7 @@ static int __init omap_iommu_init(void) const unsigned long flags = SLAB_HWCACHE_ALIGN; size_t align = 1 << 10; /* L2 pagetable alignement */ struct device_node *np; + int ret; np = of_find_matching_node(NULL, omap_iommu_of_match); if (!np) @@ -1312,11 +1313,25 @@ static int __init omap_iommu_init(void) return -ENOMEM; iopte_cachep = p; - bus_set_iommu(&platform_bus_type, &omap_iommu_ops); - omap_iommu_debugfs_init(); - return platform_driver_register(&omap_iommu_driver); + ret = platform_driver_register(&omap_iommu_driver); + if (ret) { + pr_err("%s: failed to register driver\n", __func__); + goto fail_driver; + } + + ret = bus_set_iommu(&platform_bus_type, &omap_iommu_ops); + if (ret) + goto fail_bus; + + return 0; + +fail_bus: + platform_driver_unregister(&omap_iommu_driver); +fail_driver: + kmem_cache_destroy(iopte_cachep); + return ret; } subsys_initcall(omap_iommu_init); /* must be ready before omap3isp is probed */ From 49a57ef7f8492ef985ee1ecdb927ca78a6b2f308 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 12 Apr 2017 00:21:27 -0500 Subject: [PATCH 44/63] iommu/omap: Drop legacy-style device support All the supported boards that have OMAP IOMMU devices do support DT boot only now. So, drop the support for the non-DT legacy-style devices from the OMAP IOMMU driver. Couple of the fields from the iommu platform data would no longer be required, so they have also been cleaned up. The IOMMU platform data is still needed though for performing reset management properly in a multi-arch environment. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 30 +++++++++++------------- include/linux/platform_data/iommu-omap.h | 3 --- 2 files changed, 14 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 54556713c8d1..febd4fbe3445 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -928,28 +928,26 @@ static int omap_iommu_probe(struct platform_device *pdev) int irq; struct omap_iommu *obj; struct resource *res; - struct iommu_platform_data *pdata = dev_get_platdata(&pdev->dev); struct device_node *of = pdev->dev.of_node; + if (!of) { + pr_err("%s: only DT-based devices are supported\n", __func__); + return -ENODEV; + } + obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); if (!obj) return -ENOMEM; - if (of) { - obj->name = dev_name(&pdev->dev); - obj->nr_tlb_entries = 32; - err = of_property_read_u32(of, "ti,#tlb-entries", - &obj->nr_tlb_entries); - if (err && err != -EINVAL) - return err; - if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) - return -EINVAL; - if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) - obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; - } else { - obj->nr_tlb_entries = pdata->nr_tlb_entries; - obj->name = pdata->name; - } + obj->name = dev_name(&pdev->dev); + obj->nr_tlb_entries = 32; + err = of_property_read_u32(of, "ti,#tlb-entries", &obj->nr_tlb_entries); + if (err && err != -EINVAL) + return err; + if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) + return -EINVAL; + if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) + obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; obj->dev = &pdev->dev; obj->ctx = (void *)obj + sizeof(*obj); diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index 0496d171700a..a40fc0f4f9de 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -30,10 +30,7 @@ struct omap_iommu_arch_data { }; struct iommu_platform_data { - const char *name; const char *reset_name; - int nr_tlb_entries; - int (*assert_reset)(struct platform_device *pdev, const char *name); int (*deassert_reset)(struct platform_device *pdev, const char *name); }; From e73b7afe4e8ca5ec4304a9e1d5009755a85fff91 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 12 Apr 2017 00:21:28 -0500 Subject: [PATCH 45/63] iommu/omap: Move data structures to omap-iommu.h The internal data-structures are scattered over various header and C files. Consolidate them in omap-iommu.h. While at this, add the kerneldoc comment for the missing iommu domain variable and revise the iommu_arch_data name. Signed-off-by: Joerg Roedel [s-anna@ti.com: revise kerneldoc comments] Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 16 ------------ drivers/iommu/omap-iommu.h | 33 ++++++++++++++++++++++++ include/linux/platform_data/iommu-omap.h | 17 ------------ 3 files changed, 33 insertions(+), 33 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index febd4fbe3445..c1739a650654 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -42,22 +42,6 @@ /* bitmap of the page sizes currently supported */ #define OMAP_IOMMU_PGSIZES (SZ_4K | SZ_64K | SZ_1M | SZ_16M) -/** - * struct omap_iommu_domain - omap iommu domain - * @pgtable: the page table - * @iommu_dev: an omap iommu device attached to this domain. only a single - * iommu device can be attached for now. - * @dev: Device using this domain. - * @lock: domain lock, should be taken when attaching/detaching - */ -struct omap_iommu_domain { - u32 *pgtable; - struct omap_iommu *iommu_dev; - struct device *dev; - spinlock_t lock; - struct iommu_domain domain; -}; - #define MMU_LOCK_BASE_SHIFT 10 #define MMU_LOCK_BASE_MASK (0x1f << MMU_LOCK_BASE_SHIFT) #define MMU_LOCK_BASE(x) \ diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 59628e5017b4..3c33608f48ca 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -14,6 +14,7 @@ #define _OMAP_IOMMU_H #include +#include #define for_each_iotlb_cr(obj, n, __i, cr) \ for (__i = 0; \ @@ -27,6 +28,23 @@ struct iotlb_entry { u32 endian, elsz, mixed; }; +/** + * struct omap_iommu_domain - omap iommu domain + * @pgtable: the page table + * @iommu_dev: an omap iommu device attached to this domain. only a single + * iommu device can be attached for now. + * @dev: Device using this domain. + * @lock: domain lock, should be taken when attaching/detaching + * @domain: generic domain handle used by iommu core code + */ +struct omap_iommu_domain { + u32 *pgtable; + struct omap_iommu *iommu_dev; + struct device *dev; + spinlock_t lock; + struct iommu_domain domain; +}; + struct omap_iommu { const char *name; void __iomem *regbase; @@ -52,6 +70,21 @@ struct omap_iommu { u32 id; }; +/** + * struct omap_iommu_arch_data - omap iommu private data + * @name: name of the iommu device + * @iommu_dev: handle of the iommu device + * + * This is an omap iommu private data object, which binds an iommu user + * to its iommu device. This object should be placed at the iommu user's + * dev_archdata so generic IOMMU API can be used without having to + * utilize omap-specific plumbing anymore. + */ +struct omap_iommu_arch_data { + const char *name; + struct omap_iommu *iommu_dev; +}; + struct cr_regs { u32 cam; u32 ram; diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index a40fc0f4f9de..e8b12dbf6170 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -12,23 +12,6 @@ #include -#define MMU_REG_SIZE 256 - -/** - * struct iommu_arch_data - omap iommu private data - * @name: name of the iommu device - * @iommu_dev: handle of the iommu device - * - * This is an omap iommu private data object, which binds an iommu user - * to its iommu device. This object should be placed at the iommu user's - * dev_archdata so generic IOMMU API can be used without having to - * utilize omap-specific plumbing anymore. - */ -struct omap_iommu_arch_data { - const char *name; - struct omap_iommu *iommu_dev; -}; - struct iommu_platform_data { const char *reset_name; int (*assert_reset)(struct platform_device *pdev, const char *name); From ede1c2e7d4dc49fb1591e1754db9d53fabbd4b8b Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 12 Apr 2017 00:21:29 -0500 Subject: [PATCH 46/63] iommu/omap: Store iommu_dev pointer in arch_data Instead of finding the matching IOMMU for a device using string comparision functions, store the pointer to the iommu_dev in arch_data during the omap_iommu_add_device callback and reset it during the omap_iommu_remove_device callback functions. Signed-off-by: Joerg Roedel [s-anna@ti.com: few minor cleanups] Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 56 ++++++++++++++++---------------------- drivers/iommu/omap-iommu.h | 2 -- 2 files changed, 23 insertions(+), 35 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index c1739a650654..0553b0381e2a 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -802,33 +802,14 @@ static irqreturn_t iommu_fault_handler(int irq, void *data) return IRQ_NONE; } -static int device_match_by_alias(struct device *dev, void *data) -{ - struct omap_iommu *obj = to_iommu(dev); - const char *name = data; - - pr_debug("%s: %s %s\n", __func__, obj->name, name); - - return strcmp(obj->name, name) == 0; -} - /** * omap_iommu_attach() - attach iommu device to an iommu domain - * @name: name of target omap iommu device + * @obj: target omap iommu device * @iopgd: page table **/ -static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) +static int omap_iommu_attach(struct omap_iommu *obj, u32 *iopgd) { int err; - struct device *dev; - struct omap_iommu *obj; - - dev = driver_find_device(&omap_iommu_driver.driver, NULL, (void *)name, - device_match_by_alias); - if (!dev) - return ERR_PTR(-ENODEV); - - obj = to_iommu(dev); spin_lock(&obj->iommu_lock); @@ -841,11 +822,13 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) spin_unlock(&obj->iommu_lock); dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); - return obj; + + return 0; err_enable: spin_unlock(&obj->iommu_lock); - return ERR_PTR(err); + + return err; } /** @@ -1059,11 +1042,11 @@ static int omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) { struct omap_iommu_domain *omap_domain = to_omap_domain(domain); - struct omap_iommu *oiommu; struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct omap_iommu *oiommu; int ret = 0; - if (!arch_data || !arch_data->name) { + if (!arch_data || !arch_data->iommu_dev) { dev_err(dev, "device doesn't have an associated iommu\n"); return -EINVAL; } @@ -1077,15 +1060,16 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) goto out; } + oiommu = arch_data->iommu_dev; + /* get a handle to and enable the omap iommu */ - oiommu = omap_iommu_attach(arch_data->name, omap_domain->pgtable); - if (IS_ERR(oiommu)) { - ret = PTR_ERR(oiommu); + ret = omap_iommu_attach(oiommu, omap_domain->pgtable); + if (ret) { dev_err(dev, "can't get omap iommu: %d\n", ret); goto out; } - omap_domain->iommu_dev = arch_data->iommu_dev = oiommu; + omap_domain->iommu_dev = oiommu; omap_domain->dev = dev; oiommu->domain = domain; @@ -1098,7 +1082,6 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, struct device *dev) { struct omap_iommu *oiommu = dev_to_omap_iommu(dev); - struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; /* only a single device is supported per domain for now */ if (omap_domain->iommu_dev != oiommu) { @@ -1110,7 +1093,7 @@ static void _omap_iommu_detach_dev(struct omap_iommu_domain *omap_domain, omap_iommu_detach(oiommu); - omap_domain->iommu_dev = arch_data->iommu_dev = NULL; + omap_domain->iommu_dev = NULL; omap_domain->dev = NULL; oiommu->domain = NULL; } @@ -1214,6 +1197,7 @@ static phys_addr_t omap_iommu_iova_to_phys(struct iommu_domain *domain, static int omap_iommu_add_device(struct device *dev) { struct omap_iommu_arch_data *arch_data; + struct omap_iommu *oiommu; struct device_node *np; struct platform_device *pdev; @@ -1236,13 +1220,19 @@ static int omap_iommu_add_device(struct device *dev) return -EINVAL; } + oiommu = platform_get_drvdata(pdev); + if (!oiommu) { + of_node_put(np); + return -EINVAL; + } + arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); if (!arch_data) { of_node_put(np); return -ENOMEM; } - arch_data->name = kstrdup(dev_name(&pdev->dev), GFP_KERNEL); + arch_data->iommu_dev = oiommu; dev->archdata.iommu = arch_data; of_node_put(np); @@ -1257,7 +1247,7 @@ static void omap_iommu_remove_device(struct device *dev) if (!dev->of_node || !arch_data) return; - kfree(arch_data->name); + dev->archdata.iommu = NULL; kfree(arch_data); } diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 3c33608f48ca..f81184b549ec 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -72,7 +72,6 @@ struct omap_iommu { /** * struct omap_iommu_arch_data - omap iommu private data - * @name: name of the iommu device * @iommu_dev: handle of the iommu device * * This is an omap iommu private data object, which binds an iommu user @@ -81,7 +80,6 @@ struct omap_iommu { * utilize omap-specific plumbing anymore. */ struct omap_iommu_arch_data { - const char *name; struct omap_iommu *iommu_dev; }; From 01611fe8478bf582af0c33d5853137dd25b72f2f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 12 Apr 2017 00:21:30 -0500 Subject: [PATCH 47/63] iommu/omap: Make use of 'struct iommu_device' Modify the driver to register individual iommus and establish links between devices and iommus in sysfs. Signed-off-by: Joerg Roedel [s-anna@ti.com: fix some cleanup issues during failures] Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 30 ++++++++++++++++++++++++++++++ drivers/iommu/omap-iommu.h | 2 ++ 2 files changed, 32 insertions(+) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 0553b0381e2a..ef44fc740da7 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -36,6 +36,8 @@ #include "omap-iopgtable.h" #include "omap-iommu.h" +static const struct iommu_ops omap_iommu_ops; + #define to_iommu(dev) \ ((struct omap_iommu *)platform_get_drvdata(to_platform_device(dev))) @@ -941,6 +943,16 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; platform_set_drvdata(pdev, obj); + err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); + if (err) + return err; + + iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); + + err = iommu_device_register(&obj->iommu); + if (err) + goto out_sysfs; + pm_runtime_irq_safe(obj->dev); pm_runtime_enable(obj->dev); @@ -948,12 +960,19 @@ static int omap_iommu_probe(struct platform_device *pdev) dev_info(&pdev->dev, "%s registered\n", obj->name); return 0; + +out_sysfs: + iommu_device_sysfs_remove(&obj->iommu); + return err; } static int omap_iommu_remove(struct platform_device *pdev) { struct omap_iommu *obj = platform_get_drvdata(pdev); + iommu_device_sysfs_remove(&obj->iommu); + iommu_device_unregister(&obj->iommu); + omap_iommu_debugfs_remove(obj); pm_runtime_disable(obj->dev); @@ -1200,6 +1219,7 @@ static int omap_iommu_add_device(struct device *dev) struct omap_iommu *oiommu; struct device_node *np; struct platform_device *pdev; + int ret; /* * Allocate the archdata iommu structure for DT-based devices. @@ -1232,6 +1252,13 @@ static int omap_iommu_add_device(struct device *dev) return -ENOMEM; } + ret = iommu_device_link(&oiommu->iommu, dev); + if (ret) { + kfree(arch_data); + of_node_put(np); + return ret; + } + arch_data->iommu_dev = oiommu; dev->archdata.iommu = arch_data; @@ -1247,8 +1274,11 @@ static void omap_iommu_remove_device(struct device *dev) if (!dev->of_node || !arch_data) return; + iommu_device_unlink(&arch_data->iommu_dev->iommu, dev); + dev->archdata.iommu = NULL; kfree(arch_data); + } static const struct iommu_ops omap_iommu_ops = { diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index f81184b549ec..758958a4712c 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -68,6 +68,8 @@ struct omap_iommu { int has_bus_err_back; u32 id; + + struct iommu_device iommu; }; /** From 28ae1e3e14f32c70913e4ebf4aeff7959ed03f84 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 12 Apr 2017 00:21:31 -0500 Subject: [PATCH 48/63] iommu/omap: Add iommu-group support Support for IOMMU groups will become mandatory for drivers, so add it to the omap iommu driver. Signed-off-by: Joerg Roedel [s-anna@ti.com: minor error cleanups] Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 39 +++++++++++++++++++++++++++++++++++++- drivers/iommu/omap-iommu.h | 1 + 2 files changed, 39 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index ef44fc740da7..95dfca36ccb9 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -943,9 +943,13 @@ static int omap_iommu_probe(struct platform_device *pdev) return err; platform_set_drvdata(pdev, obj); + obj->group = iommu_group_alloc(); + if (IS_ERR(obj->group)) + return PTR_ERR(obj->group); + err = iommu_device_sysfs_add(&obj->iommu, obj->dev, NULL, obj->name); if (err) - return err; + goto out_group; iommu_device_set_ops(&obj->iommu, &omap_iommu_ops); @@ -959,10 +963,13 @@ static int omap_iommu_probe(struct platform_device *pdev) omap_iommu_debugfs_add(obj); dev_info(&pdev->dev, "%s registered\n", obj->name); + return 0; out_sysfs: iommu_device_sysfs_remove(&obj->iommu); +out_group: + iommu_group_put(obj->group); return err; } @@ -970,6 +977,9 @@ static int omap_iommu_remove(struct platform_device *pdev) { struct omap_iommu *obj = platform_get_drvdata(pdev); + iommu_group_put(obj->group); + obj->group = NULL; + iommu_device_sysfs_remove(&obj->iommu); iommu_device_unregister(&obj->iommu); @@ -1217,6 +1227,7 @@ static int omap_iommu_add_device(struct device *dev) { struct omap_iommu_arch_data *arch_data; struct omap_iommu *oiommu; + struct iommu_group *group; struct device_node *np; struct platform_device *pdev; int ret; @@ -1262,6 +1273,19 @@ static int omap_iommu_add_device(struct device *dev) arch_data->iommu_dev = oiommu; dev->archdata.iommu = arch_data; + /* + * IOMMU group initialization calls into omap_iommu_device_group, which + * needs a valid dev->archdata.iommu pointer + */ + group = iommu_group_get_for_dev(dev); + if (IS_ERR(group)) { + iommu_device_unlink(&oiommu->iommu, dev); + dev->archdata.iommu = NULL; + kfree(arch_data); + return PTR_ERR(group); + } + iommu_group_put(group); + of_node_put(np); return 0; @@ -1275,12 +1299,24 @@ static void omap_iommu_remove_device(struct device *dev) return; iommu_device_unlink(&arch_data->iommu_dev->iommu, dev); + iommu_group_remove_device(dev); dev->archdata.iommu = NULL; kfree(arch_data); } +static struct iommu_group *omap_iommu_device_group(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + struct iommu_group *group = NULL; + + if (arch_data->iommu_dev) + group = arch_data->iommu_dev->group; + + return group; +} + static const struct iommu_ops omap_iommu_ops = { .domain_alloc = omap_iommu_domain_alloc, .domain_free = omap_iommu_domain_free, @@ -1292,6 +1328,7 @@ static const struct iommu_ops omap_iommu_ops = { .iova_to_phys = omap_iommu_iova_to_phys, .add_device = omap_iommu_add_device, .remove_device = omap_iommu_remove_device, + .device_group = omap_iommu_device_group, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 758958a4712c..6e70515e6038 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -70,6 +70,7 @@ struct omap_iommu { u32 id; struct iommu_device iommu; + struct iommu_group *group; }; /** From fd8e2d4b393252505783656471465c7f85f3c0a9 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 12 Apr 2017 00:21:32 -0500 Subject: [PATCH 49/63] omap3isp: Remove iommu_group related code The OMAP IOMMU driver has added the support for IOMMU groups internally, and the ISP device is automatically linked to the appropriate IOMMU group. So, remove the explicit function calls that creates/deletes an iommu_group and adds the ISP device to this group. Cc: Laurent Pinchart Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/media/platform/omap3isp/isp.c | 17 ----------------- drivers/media/platform/omap3isp/isp.h | 1 - 2 files changed, 18 deletions(-) diff --git a/drivers/media/platform/omap3isp/isp.c b/drivers/media/platform/omap3isp/isp.c index 084ecf4aa9a4..0d984a28a003 100644 --- a/drivers/media/platform/omap3isp/isp.c +++ b/drivers/media/platform/omap3isp/isp.c @@ -1943,30 +1943,13 @@ static void isp_detach_iommu(struct isp_device *isp) { arm_iommu_release_mapping(isp->mapping); isp->mapping = NULL; - iommu_group_remove_device(isp->dev); } static int isp_attach_iommu(struct isp_device *isp) { struct dma_iommu_mapping *mapping; - struct iommu_group *group; int ret; - /* Create a device group and add the device to it. */ - group = iommu_group_alloc(); - if (IS_ERR(group)) { - dev_err(isp->dev, "failed to allocate IOMMU group\n"); - return PTR_ERR(group); - } - - ret = iommu_group_add_device(group, isp->dev); - iommu_group_put(group); - - if (ret < 0) { - dev_err(isp->dev, "failed to add device to IPMMU group\n"); - return ret; - } - /* * Create the ARM mapping, used by the ARM DMA mapping core to allocate * VAs. This will allocate a corresponding IOMMU domain. diff --git a/drivers/media/platform/omap3isp/isp.h b/drivers/media/platform/omap3isp/isp.h index 7e6f6638433b..2f2ae609c548 100644 --- a/drivers/media/platform/omap3isp/isp.h +++ b/drivers/media/platform/omap3isp/isp.h @@ -23,7 +23,6 @@ #include #include #include -#include #include #include From 3ba8775f64484a2b56bf3c88d09a186d819fa348 Mon Sep 17 00:00:00 2001 From: "zhichang.yuan" Date: Tue, 18 Apr 2017 20:51:48 +0800 Subject: [PATCH 50/63] iommu: Make iommu_bus_notifier return NOTIFY_DONE rather than error code In iommu_bus_notifier(), when action is BUS_NOTIFY_ADD_DEVICE, it will return 'ops->add_device(dev)' directly. But ops->add_device will return ERR_VAL, such as -ENODEV. These value will make notifier_call_chain() not to traverse the remain nodes in struct notifier_block list. This patch revises iommu_bus_notifier() to return NOTIFY_DONE when some errors happened in ops->add_device(). Signed-off-by: zhichang.yuan Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 8ea14f41a979..9170fd498f46 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1082,8 +1082,12 @@ static int iommu_bus_notifier(struct notifier_block *nb, * result in ADD/DEL notifiers to group->notifier */ if (action == BUS_NOTIFY_ADD_DEVICE) { - if (ops->add_device) - return ops->add_device(dev); + if (ops->add_device) { + int ret; + + ret = ops->add_device(dev); + return (ret) ? NOTIFY_DONE : NOTIFY_OK; + } } else if (action == BUS_NOTIFY_REMOVED_DEVICE) { if (ops->remove_device && dev->iommu_group) { ops->remove_device(dev); From 73dbd4a4230216b6a5540a362edceae0c9b4876b Mon Sep 17 00:00:00 2001 From: Pan Bian Date: Sun, 23 Apr 2017 18:23:21 +0800 Subject: [PATCH 51/63] iommu/amd: Fix incorrect error handling in amd_iommu_bind_pasid() In function amd_iommu_bind_pasid(), the control flow jumps to label out_free when pasid_state->mm and mm is NULL. And mmput(mm) is called. In function mmput(mm), mm is referenced without validation. This will result in a NULL dereference bug. This patch fixes the bug. Signed-off-by: Pan Bian Fixes: f0aac63b873b ('iommu/amd: Don't hold a reference to mm_struct') Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_v2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c index 063343909b0d..6629c472eafd 100644 --- a/drivers/iommu/amd_iommu_v2.c +++ b/drivers/iommu/amd_iommu_v2.c @@ -696,9 +696,9 @@ out_clear_state: out_unregister: mmu_notifier_unregister(&pasid_state->mn, mm); + mmput(mm); out_free: - mmput(mm); free_pasid_state(pasid_state); out: From 6323f474902e372623d73486c1ad30eb40d6604f Mon Sep 17 00:00:00 2001 From: Peng Fan Date: Fri, 21 Apr 2017 17:03:36 +0800 Subject: [PATCH 52/63] iommu/arm-smmu: Correct sid to mask From code "SMR mask 0x%x out of range for SMMU", so, we need to use mask, not sid. Signed-off-by: Peng Fan Cc: Will Deacon Cc: Robin Murphy Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index dbd4998661c6..77242afa1efc 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1535,7 +1535,7 @@ static int arm_smmu_add_device(struct device *dev) } if (mask & ~smmu->smr_mask_mask) { dev_err(dev, "SMR mask 0x%x out of range for SMMU (0x%x)\n", - sid, smmu->smr_mask_mask); + mask, smmu->smr_mask_mask); goto out_free; } } From bdf95923086fb359ccb44c815724c3ace1611c90 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Tue, 25 Apr 2017 15:27:52 +0530 Subject: [PATCH 53/63] iommu/arm-smmu: Return IOVA in iova_to_phys when SMMU is bypassed For software initiated address translation, when domain type is IOMMU_DOMAIN_IDENTITY i.e SMMU is bypassed, mimic HW behavior i.e return the same IOVA as translated address. This patch is an extension to Will Deacon's patchset "Implement SMMU passthrough using the default domain". Signed-off-by: Sunil Goutham Acked-by: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu-v3.c | 3 +++ drivers/iommu/arm-smmu.c | 3 +++ 2 files changed, 6 insertions(+) diff --git a/drivers/iommu/arm-smmu-v3.c b/drivers/iommu/arm-smmu-v3.c index 803352d78d43..6ef9c3ed4344 100644 --- a/drivers/iommu/arm-smmu-v3.c +++ b/drivers/iommu/arm-smmu-v3.c @@ -1714,6 +1714,9 @@ arm_smmu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct io_pgtable_ops *ops = smmu_domain->pgtbl_ops; + if (domain->type == IOMMU_DOMAIN_IDENTITY) + return iova; + if (!ops) return 0; diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 77242afa1efc..a36e80abf084 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1459,6 +1459,9 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, struct arm_smmu_domain *smmu_domain = to_smmu_domain(domain); struct io_pgtable_ops *ops= smmu_domain->pgtbl_ops; + if (domain->type == IOMMU_DOMAIN_IDENTITY) + return iova; + if (!ops) return 0; From bfd20f1cc85010d2f2d77e544da05cd8c149ba9b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 26 Apr 2017 09:18:35 -0700 Subject: [PATCH 54/63] x86, iommu/vt-d: Add an option to disable Intel IOMMU force on IOMMU harms performance signficantly when we run very fast networking workloads. It's 40GB networking doing XDP test. Software overhead is almost unaware, but it's the IOTLB miss (based on our analysis) which kills the performance. We observed the same performance issue even with software passthrough (identity mapping), only the hardware passthrough survives. The pps with iommu (with software passthrough) is only about ~30% of that without it. This is a limitation in hardware based on our observation, so we'd like to disable the IOMMU force on, but we do want to use TBOOT and we can sacrifice the DMA security bought by IOMMU. I must admit I know nothing about TBOOT, but TBOOT guys (cc-ed) think not eabling IOMMU is totally ok. So introduce a new boot option to disable the force on. It's kind of silly we need to run into intel_iommu_init even without force on, but we need to disable TBOOT PMR registers. For system without the boot option, nothing is changed. Signed-off-by: Shaohua Li Signed-off-by: Joerg Roedel --- .../admin-guide/kernel-parameters.txt | 9 +++++++++ arch/x86/kernel/tboot.c | 3 +++ drivers/iommu/intel-iommu.c | 18 ++++++++++++++++++ include/linux/dma_remapping.h | 1 + 4 files changed, 31 insertions(+) diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 2ba45caabada..17135bfade6a 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1578,6 +1578,15 @@ extended tables themselves, and also PASID support. With this option set, extended tables will not be used even on hardware which claims to support them. + tboot_noforce [Default Off] + Do not force the Intel IOMMU enabled under tboot. + By default, tboot will force Intel IOMMU on, which + could harm performance of some high-throughput + devices like 40GBit network cards, even if identity + mapping is enabled. + Note that using this option lowers the security + provided by tboot because it makes the system + vulnerable to DMA attacks. intel_idle.max_cstate= [KNL,HW,ACPI,X86] 0 disables intel_idle and fall back on acpi_idle. diff --git a/arch/x86/kernel/tboot.c b/arch/x86/kernel/tboot.c index b868fa1b812b..edbdfe6ab60a 100644 --- a/arch/x86/kernel/tboot.c +++ b/arch/x86/kernel/tboot.c @@ -510,6 +510,9 @@ int tboot_force_iommu(void) if (!tboot_enabled()) return 0; + if (!intel_iommu_tboot_noforce) + return 1; + if (no_iommu || swiotlb || dmar_disabled) pr_warning("Forcing Intel-IOMMU to enabled\n"); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5f08ba13972b..b0ced1c13713 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -183,6 +183,7 @@ static int rwbf_quirk; * (used when kernel is launched w/ TXT) */ static int force_on = 0; +int intel_iommu_tboot_noforce; /* * 0: Present @@ -607,6 +608,10 @@ static int __init intel_iommu_setup(char *str) "Intel-IOMMU: enable pre-production PASID support\n"); intel_iommu_pasid28 = 1; iommu_identity_mapping |= IDENTMAP_GFX; + } else if (!strncmp(str, "tboot_noforce", 13)) { + printk(KERN_INFO + "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); + intel_iommu_tboot_noforce = 1; } str += strcspn(str, ","); @@ -4850,6 +4855,19 @@ int __init intel_iommu_init(void) } if (no_iommu || dmar_disabled) { + /* + * We exit the function here to ensure IOMMU's remapping and + * mempool aren't setup, which means that the IOMMU's PMRs + * won't be disabled via the call to init_dmars(). So disable + * it explicitly here. The PMRs were setup by tboot prior to + * calling SENTER, but the kernel is expected to reset/tear + * down the PMRs. + */ + if (intel_iommu_tboot_noforce) { + for_each_iommu(iommu, drhd) + iommu_disable_protect_mem_regions(iommu); + } + /* * Make sure the IOMMUs are switched off, even when we * boot into a kexec kernel and the previous kernel left diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 187c10299722..90884072fa73 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -39,6 +39,7 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; +extern int intel_iommu_tboot_noforce; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { From e8245c1b1a3bb8474f91c69ccd13637d3589bb2c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Apr 2017 15:34:06 +0200 Subject: [PATCH 55/63] iommu: Include device.h in iommu.h We make use of 'struct device' in iommu.h, so include device.h to make it available explicitly. Re-order the other headers while at it. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6a6de187ddc0..3b4fe4b79d20 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -19,11 +19,13 @@ #ifndef __LINUX_IOMMU_H #define __LINUX_IOMMU_H +#include +#include +#include #include #include #include -#include -#include + #include #define IOMMU_READ (1 << 0) From 207c6e36f122ebb1164d611c9f34f128313f47d5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Apr 2017 15:39:28 +0200 Subject: [PATCH 56/63] iommu: Move report_iommu_fault() to iommu.c The function is in no fast-path, there is no need for it to be static inline in a header file. This also removes the need to include iommu trace-points in iommu.h. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 42 ++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 41 ++--------------------------------------- 2 files changed, 44 insertions(+), 39 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 9170fd498f46..4cb5792cbc21 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1655,6 +1655,48 @@ void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr) } EXPORT_SYMBOL_GPL(iommu_domain_window_disable); +/** + * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework + * @domain: the iommu domain where the fault has happened + * @dev: the device where the fault has happened + * @iova: the faulting address + * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) + * + * This function should be called by the low-level IOMMU implementations + * whenever IOMMU faults happen, to allow high-level users, that are + * interested in such events, to know about them. + * + * This event may be useful for several possible use cases: + * - mere logging of the event + * - dynamic TLB/PTE loading + * - if restarting of the faulting device is required + * + * Returns 0 on success and an appropriate error code otherwise (if dynamic + * PTE/TLB loading will one day be supported, implementations will be able + * to tell whether it succeeded or not according to this return value). + * + * Specifically, -ENOSYS is returned if a fault handler isn't installed + * (though fault handlers can also return -ENOSYS, in case they want to + * elicit the default behavior of the IOMMU drivers). + */ +int report_iommu_fault(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags) +{ + int ret = -ENOSYS; + + /* + * if upper layers showed interest and installed a fault handler, + * invoke it. + */ + if (domain->handler) + ret = domain->handler(domain, dev, iova, flags, + domain->handler_token); + + trace_io_page_fault(dev, iova, flags); + return ret; +} +EXPORT_SYMBOL_GPL(report_iommu_fault); + static int __init iommu_init(void) { iommu_group_kset = kset_create_and_add("iommu_groups", diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3b4fe4b79d20..abaa0ca848bc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -330,46 +330,9 @@ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t offset, u64 size, int prot); extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); -/** - * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework - * @domain: the iommu domain where the fault has happened - * @dev: the device where the fault has happened - * @iova: the faulting address - * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) - * - * This function should be called by the low-level IOMMU implementations - * whenever IOMMU faults happen, to allow high-level users, that are - * interested in such events, to know about them. - * - * This event may be useful for several possible use cases: - * - mere logging of the event - * - dynamic TLB/PTE loading - * - if restarting of the faulting device is required - * - * Returns 0 on success and an appropriate error code otherwise (if dynamic - * PTE/TLB loading will one day be supported, implementations will be able - * to tell whether it succeeded or not according to this return value). - * - * Specifically, -ENOSYS is returned if a fault handler isn't installed - * (though fault handlers can also return -ENOSYS, in case they want to - * elicit the default behavior of the IOMMU drivers). - */ -static inline int report_iommu_fault(struct iommu_domain *domain, - struct device *dev, unsigned long iova, int flags) -{ - int ret = -ENOSYS; - /* - * if upper layers showed interest and installed a fault handler, - * invoke it. - */ - if (domain->handler) - ret = domain->handler(domain, dev, iova, flags, - domain->handler_token); - - trace_io_page_fault(dev, iova, flags); - return ret; -} +extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags); static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, From 8e12188400668046870087fc278404a11c853061 Mon Sep 17 00:00:00 2001 From: Qiuxu Zhuo Date: Fri, 28 Apr 2017 01:16:15 +0800 Subject: [PATCH 57/63] iommu/vt-d: Don't print the failure message when booting non-kdump kernel When booting a new non-kdump kernel, we have below failure message: [ 0.004000] DMAR-IR: IRQ remapping was enabled on dmar2 but we are not in kdump mode [ 0.004000] DMAR-IR: Failed to copy IR table for dmar2 from previous kernel [ 0.004000] DMAR-IR: IRQ remapping was enabled on dmar1 but we are not in kdump mode [ 0.004000] DMAR-IR: Failed to copy IR table for dmar1 from previous kernel [ 0.004000] DMAR-IR: IRQ remapping was enabled on dmar0 but we are not in kdump mode [ 0.004000] DMAR-IR: Failed to copy IR table for dmar0 from previous kernel [ 0.004000] DMAR-IR: IRQ remapping was enabled on dmar3 but we are not in kdump mode [ 0.004000] DMAR-IR: Failed to copy IR table for dmar3 from previous kernel For non-kdump case, we no need to copy IR table from previous kernel so it's nonthing actually failed. To be less alarming or misleading, do not print "DMAR-IR: Failed to copy IR table for dmar[0-9] from previous kernel" messages when booting non-kdump kernel. Signed-off-by: Qiuxu Zhuo Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index ac596928f6b4..a190cbd76ef7 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -408,14 +408,6 @@ static int iommu_load_old_irte(struct intel_iommu *iommu) size_t size; u64 irta; - if (!is_kdump_kernel()) { - pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n", - iommu->name); - clear_ir_pre_enabled(iommu); - iommu_disable_irq_remapping(iommu); - return -EINVAL; - } - /* Check whether the old ir-table has the same size as ours */ irta = dmar_readq(iommu->reg + DMAR_IRTA_REG); if ((irta & INTR_REMAP_TABLE_REG_SIZE_MASK) @@ -567,7 +559,12 @@ static int intel_setup_irq_remapping(struct intel_iommu *iommu) init_ir_status(iommu); if (ir_pre_enabled(iommu)) { - if (iommu_load_old_irte(iommu)) + if (!is_kdump_kernel()) { + pr_warn("IRQ remapping was enabled on %s but we are not in kdump mode\n", + iommu->name); + clear_ir_pre_enabled(iommu); + iommu_disable_irq_remapping(iommu); + } else if (iommu_load_old_irte(iommu)) pr_err("Failed to copy IR table for %s from previous kernel\n", iommu->name); else From d49f2dedf33b8e7752ec66ac2b3b5bf4d7210943 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Fri, 28 Apr 2017 16:59:49 +0100 Subject: [PATCH 58/63] ACPI/IORT: Fix CONFIG_IOMMU_API dependency The IOMMU probe deferral IORT rework had to add code in iort_iommu_configure() and iort_iommu_xlate() that requires the IOMMU_API to be selected in order to compile and work. Stub out the pieces of code that depend on CONFIG_IOMMU_API to be selected to prevent compilation failures such as: drivers/acpi/arm64/iort.c: In function 'iort_iommu_xlate': drivers/acpi/arm64/iort.c:647:22: error: 'struct iommu_fwspec' has no member named 'ops' by wrapping the code in static inline functions that provide a NOP implementation when CONFIG_IOMMU_API is not selected. Signed-off-by: Lorenzo Pieralisi Reported-by: Arnd Bergmann Cc: Arnd Bergmann Cc: Will Deacon Cc: Catalin Marinas Cc: Robin Murphy Cc: Joerg Roedel Cc: Sricharan R Signed-off-by: Joerg Roedel --- drivers/acpi/arm64/iort.c | 44 ++++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/drivers/acpi/arm64/iort.c b/drivers/acpi/arm64/iort.c index e7b1940ff13b..a629e83bff24 100644 --- a/drivers/acpi/arm64/iort.c +++ b/drivers/acpi/arm64/iort.c @@ -536,6 +536,33 @@ static inline bool iort_iommu_driver_enabled(u8 type) } } +#ifdef CONFIG_IOMMU_API +static inline +const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec) +{ + return (fwspec && fwspec->ops) ? fwspec->ops : NULL; +} + +static inline +int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev) +{ + int err = 0; + + if (!IS_ERR_OR_NULL(ops) && ops->add_device && dev->bus && + !dev->iommu_group) + err = ops->add_device(dev); + + return err; +} +#else +static inline +const struct iommu_ops *iort_fwspec_iommu_ops(struct iommu_fwspec *fwspec) +{ return NULL; } +static inline +int iort_add_device_replay(const struct iommu_ops *ops, struct device *dev) +{ return 0; } +#endif + static const struct iommu_ops *iort_iommu_xlate(struct device *dev, struct acpi_iort_node *node, u32 streamid) @@ -543,14 +570,14 @@ static const struct iommu_ops *iort_iommu_xlate(struct device *dev, const struct iommu_ops *ops = NULL; int ret = -ENODEV; struct fwnode_handle *iort_fwnode; - struct iommu_fwspec *fwspec = dev->iommu_fwspec; /* * If we already translated the fwspec there * is nothing left to do, return the iommu_ops. */ - if (fwspec && fwspec->ops) - return fwspec->ops; + ops = iort_fwspec_iommu_ops(dev->iommu_fwspec); + if (ops) + return ops; if (node) { iort_fwnode = iort_get_fwnode(node); @@ -611,6 +638,7 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) struct acpi_iort_node *node, *parent; const struct iommu_ops *ops = NULL; u32 streamid = 0; + int err; if (dev_is_pci(dev)) { struct pci_bus *bus = to_pci_dev(dev)->bus; @@ -654,13 +682,9 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) * If we have reason to believe the IOMMU driver missed the initial * add_device callback for dev, replay it to get things in order. */ - if (!IS_ERR_OR_NULL(ops) && ops->add_device && - dev->bus && !dev->iommu_group) { - int err = ops->add_device(dev); - - if (err) - ops = ERR_PTR(err); - } + err = iort_add_device_replay(ops, dev); + if (err) + ops = ERR_PTR(err); return ops; } From 26b37b946a5c2658dbc37dd5d6df40aaa9685d70 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 15 May 2015 02:00:02 +0300 Subject: [PATCH 59/63] arm: dma-mapping: Don't override dma_ops in arch_setup_dma_ops() The arch_setup_dma_ops() function is in charge of setting dma_ops with a call to set_dma_ops(). set_dma_ops() is also called from - highbank and mvebu bus notifiers - dmabounce (to be replaced with swiotlb) - arm_iommu_attach_device (arm_iommu_attach_device is itself called from IOMMU and bus master device drivers) To allow the arch_setup_dma_ops() call to be moved from device add time to device probe time we must ensure that dma_ops already setup by any of the above callers will not be overriden. Aftering replacing dmabounce with swiotlb, converting IOMMU drivers to of_xlate and taking care of highbank and mvebu, the workaround should be removed. Signed-off-by: Sricharan R Signed-off-by: Laurent Pinchart Tested-by: Ralph Sennhauser Signed-off-by: Joerg Roedel --- arch/arm/mm/dma-mapping.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 63eabb06f9f1..ff131928be50 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -2390,6 +2390,15 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct dma_map_ops *dma_ops; dev->archdata.dma_coherent = coherent; + + /* + * Don't override the dma_ops if they have already been set. Ideally + * this should be the only location where dma_ops are set, remove this + * check when all other callers of set_dma_ops will have disappeared. + */ + if (dev->dma_ops) + return; + if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu)) dma_ops = arm_get_iommu_dma_map_ops(coherent); else From 461a6946b1f93f6720577fb06aa78e8cbd9291c9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Apr 2017 15:46:20 +0200 Subject: [PATCH 60/63] iommu: Remove pci.h include from trace/events/iommu.h The include file does not need any PCI specifics, so remove that include. Also fix the places that relied on it. Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 1 + drivers/infiniband/hw/qedr/main.c | 1 + drivers/iommu/fsl_pamu.h | 1 + drivers/iommu/rockchip-iommu.c | 1 + drivers/iommu/tegra-smmu.c | 1 + include/linux/dma-iommu.h | 1 + include/trace/events/iommu.h | 1 - 7 files changed, 6 insertions(+), 1 deletion(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 81cdb2e844ed..982f85b81624 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -28,6 +28,7 @@ #include #include #include +#include #include diff --git a/drivers/infiniband/hw/qedr/main.c b/drivers/infiniband/hw/qedr/main.c index b9b47e5cc8b3..33033624cd9b 100644 --- a/drivers/infiniband/hw/qedr/main.c +++ b/drivers/infiniband/hw/qedr/main.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/iommu/fsl_pamu.h b/drivers/iommu/fsl_pamu.h index aab723f91f12..c3434f29c967 100644 --- a/drivers/iommu/fsl_pamu.h +++ b/drivers/iommu/fsl_pamu.h @@ -20,6 +20,7 @@ #define __FSL_PAMU_H #include +#include #include diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index 9afcbf79f0b0..0ba303a184dd 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 9305964250ac..eeb19f560a05 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 5725c94b1f12..abd946569515 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -20,6 +20,7 @@ #include #ifdef CONFIG_IOMMU_DMA +#include #include #include diff --git a/include/trace/events/iommu.h b/include/trace/events/iommu.h index 2c7befb10f13..99254ed89212 100644 --- a/include/trace/events/iommu.h +++ b/include/trace/events/iommu.h @@ -11,7 +11,6 @@ #define _TRACE_IOMMU_H #include -#include struct device; From 208480bb273e15f42711bd47f70dc0fbfa2570b8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Apr 2017 15:49:57 +0200 Subject: [PATCH 61/63] iommu: Remove trace-events include from iommu.h It is not needed there anymore. All places needing it are fixed. Signed-off-by: Joerg Roedel --- drivers/media/platform/mtk-vpu/mtk_vpu.c | 1 + include/linux/iommu.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/media/platform/mtk-vpu/mtk_vpu.c b/drivers/media/platform/mtk-vpu/mtk_vpu.c index 463b69c934be..e011c8dc0198 100644 --- a/drivers/media/platform/mtk-vpu/mtk_vpu.c +++ b/drivers/media/platform/mtk-vpu/mtk_vpu.c @@ -23,6 +23,7 @@ #include #include #include +#include #include "mtk_vpu.h" diff --git a/include/linux/iommu.h b/include/linux/iommu.h index abaa0ca848bc..dda8717545e9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -26,8 +26,6 @@ #include #include -#include - #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) #define IOMMU_CACHE (1 << 2) /* DMA cache coherency */ From 4512c7bccb7794fb6d6ff37ae449154413bd4963 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 2 May 2017 18:21:12 -0400 Subject: [PATCH 62/63] soc/qbman: Fix implicit header dependency now causing build fails In commit 461a6946b1f9 ("iommu: Remove pci.h include from trace/events/iommu.h") that header shuffle uncovered an implicit include in this driver, manifesting as: CC drivers/soc/fsl/qbman/qman_portal.o drivers/soc/fsl/qbman/qman_portal.c: In function 'qman_portal_probe': drivers/soc/fsl/qbman/qman_portal.c:299:2: error: implicit declaration of function 'dma_set_mask' drivers/soc/fsl/qbman/qman_portal.c:299:2: error: implicit declaration of function 'DMA_BIT_MASK' if (dma_set_mask(dev, DMA_BIT_MASK(40))) { ^ on the corenet32_smp_defconfig (and 64 bit respectively.) The above commit was singled out via git bisect. The header it was implictly relying on getting was dma-mapping.h - so we explicitly add it here. Fixes: 461a6946b1f9 ("iommu: Remove pci.h include from trace/events/iommu.h") Cc: Joerg Roedel Cc: Scott Wood Cc: linuxppc-dev@lists.ozlabs.org Signed-off-by: Paul Gortmaker Signed-off-by: Joerg Roedel --- drivers/soc/fsl/qbman/qman_portal.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c index adbaa30d3c5a..4a6a8ae5e0aa 100644 --- a/drivers/soc/fsl/qbman/qman_portal.c +++ b/drivers/soc/fsl/qbman/qman_portal.c @@ -30,6 +30,8 @@ #include "qman_priv.h" +#include + struct qman_portal *qman_dma_portal; EXPORT_SYMBOL(qman_dma_portal); From 290d638e04e7b94ff34d2f5e9426e7ce617e9a59 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 2 May 2017 18:21:12 -0400 Subject: [PATCH 63/63] soc/qbman: Move dma-mapping.h include to qman_priv.h With the include there it fixes all build failures in this directory caused by commit 461a6946b1f9. Fixes: 461a6946b1f9 ("iommu: Remove pci.h include from trace/events/iommu.h") Signed-off-by: Joerg Roedel --- drivers/soc/fsl/qbman/qman_portal.c | 2 -- drivers/soc/fsl/qbman/qman_priv.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/soc/fsl/qbman/qman_portal.c b/drivers/soc/fsl/qbman/qman_portal.c index 4a6a8ae5e0aa..adbaa30d3c5a 100644 --- a/drivers/soc/fsl/qbman/qman_portal.c +++ b/drivers/soc/fsl/qbman/qman_portal.c @@ -30,8 +30,6 @@ #include "qman_priv.h" -#include - struct qman_portal *qman_dma_portal; EXPORT_SYMBOL(qman_dma_portal); diff --git a/drivers/soc/fsl/qbman/qman_priv.h b/drivers/soc/fsl/qbman/qman_priv.h index 53685b59718e..4f2ef3ebd88e 100644 --- a/drivers/soc/fsl/qbman/qman_priv.h +++ b/drivers/soc/fsl/qbman/qman_priv.h @@ -33,6 +33,7 @@ #include "dpaa_sys.h" #include +#include #include #if defined(CONFIG_FSL_PAMU)