From f171abab8f1a75797124be5aae8376e20e4852d9 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Mon, 4 Aug 2014 10:06:28 +0530 Subject: [PATCH 01/41] iommu/exynos: Fix trivial typos Fixed trivial typos and grammar to improve readability. Changed w/a to workaround. Signed-off-by: Sachin Kamat Acked-by: Randy Dunlap Signed-off-by: Joerg Roedel --- drivers/iommu/exynos-iommu.c | 51 ++++++++++++++++++------------------ 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c index d037e87a1fe5..74233186f6f7 100644 --- a/drivers/iommu/exynos-iommu.c +++ b/drivers/iommu/exynos-iommu.c @@ -32,7 +32,7 @@ typedef u32 sysmmu_iova_t; typedef u32 sysmmu_pte_t; -/* We does not consider super section mapping (16MB) */ +/* We do not consider super section mapping (16MB) */ #define SECT_ORDER 20 #define LPAGE_ORDER 16 #define SPAGE_ORDER 12 @@ -307,7 +307,7 @@ static void show_fault_information(const char *name, static irqreturn_t exynos_sysmmu_irq(int irq, void *dev_id) { - /* SYSMMU is in blocked when interrupt occurred. */ + /* SYSMMU is in blocked state when interrupt occurred. */ struct sysmmu_drvdata *data = dev_id; enum exynos_sysmmu_inttype itype; sysmmu_iova_t addr = -1; @@ -567,8 +567,8 @@ static void sysmmu_tlb_invalidate_entry(struct device *dev, sysmmu_iova_t iova, /* * L2TLB invalidation required * 4KB page: 1 invalidation - * 64KB page: 16 invalidation - * 1MB page: 64 invalidation + * 64KB page: 16 invalidations + * 1MB page: 64 invalidations * because it is set-associative TLB * with 8-way and 64 sets. * 1MB page can be cached in one of all sets. @@ -714,7 +714,7 @@ static int exynos_iommu_domain_init(struct iommu_domain *domain) if (!priv->lv2entcnt) goto err_counter; - /* w/a of System MMU v3.3 to prevent caching 1MiB mapping */ + /* Workaround for System MMU v3.3 to prevent caching 1MiB mapping */ for (i = 0; i < NUM_LV1ENTRIES; i += 8) { priv->pgtable[i + 0] = ZERO_LV2LINK; priv->pgtable[i + 1] = ZERO_LV2LINK; @@ -861,14 +861,14 @@ static sysmmu_pte_t *alloc_lv2entry(struct exynos_iommu_domain *priv, pgtable_flush(sent, sent + 1); /* - * If pretched SLPD is a fault SLPD in zero_l2_table, FLPD cache - * may caches the address of zero_l2_table. This function - * replaces the zero_l2_table with new L2 page table to write - * valid mappings. + * If pre-fetched SLPD is a faulty SLPD in zero_l2_table, + * FLPD cache may cache the address of zero_l2_table. This + * function replaces the zero_l2_table with new L2 page table + * to write valid mappings. * Accessing the valid area may cause page fault since FLPD - * cache may still caches zero_l2_table for the valid area - * instead of new L2 page table that have the mapping - * information of the valid area + * cache may still cache zero_l2_table for the valid area + * instead of new L2 page table that has the mapping + * information of the valid area. * Thus any replacement of zero_l2_table with other valid L2 * page table must involve FLPD cache invalidation for System * MMU v3.3. @@ -963,27 +963,27 @@ static int lv2set_page(sysmmu_pte_t *pent, phys_addr_t paddr, size_t size, /* * *CAUTION* to the I/O virtual memory managers that support exynos-iommu: * - * System MMU v3.x have an advanced logic to improve address translation + * System MMU v3.x has advanced logic to improve address translation * performance with caching more page table entries by a page table walk. - * However, the logic has a bug that caching fault page table entries and System - * MMU reports page fault if the cached fault entry is hit even though the fault - * entry is updated to a valid entry after the entry is cached. - * To prevent caching fault page table entries which may be updated to valid - * entries later, the virtual memory manager should care about the w/a about the - * problem. The followings describe w/a. + * However, the logic has a bug that while caching faulty page table entries, + * System MMU reports page fault if the cached fault entry is hit even though + * the fault entry is updated to a valid entry after the entry is cached. + * To prevent caching faulty page table entries which may be updated to valid + * entries later, the virtual memory manager should care about the workaround + * for the problem. The following describes the workaround. * * Any two consecutive I/O virtual address regions must have a hole of 128KiB - * in maximum to prevent misbehavior of System MMU 3.x. (w/a of h/w bug) + * at maximum to prevent misbehavior of System MMU 3.x (workaround for h/w bug). * - * Precisely, any start address of I/O virtual region must be aligned by + * Precisely, any start address of I/O virtual region must be aligned with * the following sizes for System MMU v3.1 and v3.2. * System MMU v3.1: 128KiB * System MMU v3.2: 256KiB * * Because System MMU v3.3 caches page table entries more aggressively, it needs - * more w/a. - * - Any two consecutive I/O virtual regions must be have a hole of larger size - * than or equal size to 128KiB. + * more workarounds. + * - Any two consecutive I/O virtual regions must have a hole of size larger + * than or equal to 128KiB. * - Start address of an I/O virtual region must be aligned by 128KiB. */ static int exynos_iommu_map(struct iommu_domain *domain, unsigned long l_iova, @@ -1061,7 +1061,8 @@ static size_t exynos_iommu_unmap(struct iommu_domain *domain, goto err; } - *ent = ZERO_LV2LINK; /* w/a for h/w bug in Sysmem MMU v3.3 */ + /* workaround for h/w bug in System MMU v3.3 */ + *ent = ZERO_LV2LINK; pgtable_flush(ent, ent + 1); size = SECT_SIZE; goto done; From f63ef69028742b09c1c0896177d555a30ff6cf13 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 11 Aug 2014 13:13:25 +0200 Subject: [PATCH 02/41] iommu/vt-d: Don't store SIRTP request Don't store the SIRTP request bit in the register state. It will otherwise become sticky and could request an Interrupt Remap Table Pointer update on each command register write. Found while starting to emulate IR in QEMU, not by observing problems on real hardware. Signed-off-by: Jan Kiszka Signed-off-by: Joerg Roedel --- drivers/iommu/intel_irq_remapping.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 0df41f6264f5..a872874c2565 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -438,8 +438,7 @@ static void iommu_set_irq_remapping(struct intel_iommu *iommu, int mode) (addr) | IR_X2APIC_MODE(mode) | INTR_REMAP_TABLE_REG_SIZE); /* Set interrupt-remapping table pointer */ - iommu->gcmd |= DMA_GCMD_SIRTP; - writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG); + writel(iommu->gcmd | DMA_GCMD_SIRTP, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRTPS), sts); From eecbad7d0306b9ee4f621517052913d1adaea753 Mon Sep 17 00:00:00 2001 From: Andreea-Cristina Bernat Date: Mon, 18 Aug 2014 15:20:56 +0300 Subject: [PATCH 03/41] iommu: Replace rcu_assign_pointer() with RCU_INIT_POINTER() The use of "rcu_assign_pointer()" is NULLing out the pointer. According to RCU_INIT_POINTER()'s block comment: "1. This use of RCU_INIT_POINTER() is NULLing out the pointer" it is better to use it instead of rcu_assign_pointer() because it has a smaller overhead. The following Coccinelle semantic patch was used: @@ @@ - rcu_assign_pointer + RCU_INIT_POINTER (..., NULL) Signed-off-by: Andreea-Cristina Bernat Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 60ab474bfff3..8ed55b0a1ce4 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -247,7 +247,7 @@ int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment, for_each_active_dev_scope(devices, count, index, tmp) if (tmp == &info->dev->dev) { - rcu_assign_pointer(devices[index].dev, NULL); + RCU_INIT_POINTER(devices[index].dev, NULL); synchronize_rcu(); put_device(tmp); return 1; From 50917e265ebd2ea33bc34b22b3981f1e88415eae Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 16:38:38 +0200 Subject: [PATCH 04/41] iommu/amd: Move struct iommu_dev_data to amd_iommu.c The struct is only used there, so it doesn't need to be in the header file. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 21 +++++++++++++++++++++ drivers/iommu/amd_iommu_types.h | 21 --------------------- 2 files changed, 21 insertions(+), 21 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index ecb0109a5360..fe6d7cc715f7 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -87,6 +87,27 @@ int amd_iommu_max_glx_val = -1; static struct dma_map_ops amd_iommu_dma_ops; +/* + * This struct contains device specific data for the IOMMU + */ +struct iommu_dev_data { + struct list_head list; /* For domain->dev_list */ + struct list_head dev_data_list; /* For global dev_data_list */ + struct iommu_dev_data *alias_data;/* The alias dev_data */ + struct protection_domain *domain; /* Domain the device is bound to */ + atomic_t bind; /* Domain attach reference count */ + u16 devid; /* PCI Device ID */ + bool iommu_v2; /* Device can make use of IOMMUv2 */ + bool passthrough; /* Default for device is pt_domain */ + struct { + bool enabled; + int qdep; + } ats; /* ATS state */ + bool pri_tlp; /* PASID TLB required for + PPR completions */ + u32 errata; /* Bitmap for errata to apply */ +}; + /* * general struct to manage commands send to an IOMMU */ diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 8e43b7cba133..cec51a8ba844 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -417,27 +417,6 @@ struct protection_domain { }; -/* - * This struct contains device specific data for the IOMMU - */ -struct iommu_dev_data { - struct list_head list; /* For domain->dev_list */ - struct list_head dev_data_list; /* For global dev_data_list */ - struct iommu_dev_data *alias_data;/* The alias dev_data */ - struct protection_domain *domain; /* Domain the device is bound to */ - atomic_t bind; /* Domain attach reference count */ - u16 devid; /* PCI Device ID */ - bool iommu_v2; /* Device can make use of IOMMUv2 */ - bool passthrough; /* Default for device is pt_domain */ - struct { - bool enabled; - int qdep; - } ats; /* ATS state */ - bool pri_tlp; /* PASID TLB required for - PPR completions */ - u32 errata; /* Bitmap for errata to apply */ -}; - /* * For dynamic growth the aperture size is split into ranges of 128MB of * DMA address space each. This struct represents one such range. From f251e187f2949c690fc13a91df7b873a6b5be671 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 16:48:10 +0200 Subject: [PATCH 05/41] iommu/amd: Keep a list of devices in an alias group Some broken devices might use any request-id from the alias group, so we need to set a DTE entry for every device in there. This patch adds creation of those lists. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index fe6d7cc715f7..8a152564a098 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -93,6 +93,7 @@ static struct dma_map_ops amd_iommu_dma_ops; struct iommu_dev_data { struct list_head list; /* For domain->dev_list */ struct list_head dev_data_list; /* For global dev_data_list */ + struct list_head alias_list; /* Link alias-groups together */ struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ atomic_t bind; /* Domain attach reference count */ @@ -135,6 +136,8 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) if (!dev_data) return NULL; + INIT_LIST_HEAD(&dev_data->alias_list); + dev_data->devid = devid; atomic_set(&dev_data->bind, 0); @@ -383,6 +386,9 @@ static int iommu_init_device(struct device *dev) return -ENOTSUPP; } dev_data->alias_data = alias_data; + + /* Add device to the alias_list */ + list_add(&dev_data->alias_list, &alias_data->alias_list); } ret = init_iommu_group(dev); From 397111abaaac259afcc48cd2fbfb78f63f27e797 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 17:31:51 +0200 Subject: [PATCH 06/41] iommu/amd: Attach and detach complete alias group Change tha device attach and detach semantic to apply to all devices in an alias group. This means all devices in an alias group are now attached and detached at the same time. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 46 +++++++++++++++++---------------------- 1 file changed, 20 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 8a152564a098..a5e6b0a2de16 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2149,35 +2149,29 @@ static void do_detach(struct iommu_dev_data *dev_data) static int __attach_device(struct iommu_dev_data *dev_data, struct protection_domain *domain) { + struct iommu_dev_data *head, *entry; int ret; /* lock domain */ spin_lock(&domain->lock); - if (dev_data->alias_data != NULL) { - struct iommu_dev_data *alias_data = dev_data->alias_data; + head = dev_data; - /* Some sanity checks */ - ret = -EBUSY; - if (alias_data->domain != NULL && - alias_data->domain != domain) - goto out_unlock; + if (head->alias_data != NULL) + head = head->alias_data; - if (dev_data->domain != NULL && - dev_data->domain != domain) - goto out_unlock; + /* Now we have the root of the alias group, if any */ - /* Do real assignment */ - if (alias_data->domain == NULL) - do_attach(alias_data, domain); + ret = -EBUSY; + if (head->domain != NULL) + goto out_unlock; - atomic_inc(&alias_data->bind); - } + /* Attach alias group root */ + do_attach(head, domain); - if (dev_data->domain == NULL) - do_attach(dev_data, domain); - - atomic_inc(&dev_data->bind); + /* Attach other devices in the alias group */ + list_for_each_entry(entry, &head->alias_list, alias_list) + do_attach(entry, domain); ret = 0; @@ -2325,6 +2319,7 @@ static int attach_device(struct device *dev, */ static void __detach_device(struct iommu_dev_data *dev_data) { + struct iommu_dev_data *head, *entry; struct protection_domain *domain; unsigned long flags; @@ -2334,15 +2329,14 @@ static void __detach_device(struct iommu_dev_data *dev_data) spin_lock_irqsave(&domain->lock, flags); - if (dev_data->alias_data != NULL) { - struct iommu_dev_data *alias_data = dev_data->alias_data; + head = dev_data; + if (head->alias_data != NULL) + head = head->alias_data; - if (atomic_dec_and_test(&alias_data->bind)) - do_detach(alias_data); - } + list_for_each_entry(entry, &head->alias_list, alias_list) + do_detach(entry); - if (atomic_dec_and_test(&dev_data->bind)) - do_detach(dev_data); + do_detach(head); spin_unlock_irqrestore(&domain->lock, flags); From cafd2545cfd36ef6fee14f9c36870b81d3c98de5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 5 Aug 2014 17:34:25 +0200 Subject: [PATCH 07/41] iommu/amd: Remove device binding reference count This reference count is not used anymore, as all devices in an alias group are now attached and detached together. Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index a5e6b0a2de16..989c1ae03979 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -96,7 +96,6 @@ struct iommu_dev_data { struct list_head alias_list; /* Link alias-groups together */ struct iommu_dev_data *alias_data;/* The alias dev_data */ struct protection_domain *domain; /* Domain the device is bound to */ - atomic_t bind; /* Domain attach reference count */ u16 devid; /* PCI Device ID */ bool iommu_v2; /* Device can make use of IOMMUv2 */ bool passthrough; /* Default for device is pt_domain */ @@ -139,7 +138,6 @@ static struct iommu_dev_data *alloc_dev_data(u16 devid) INIT_LIST_HEAD(&dev_data->alias_list); dev_data->devid = devid; - atomic_set(&dev_data->bind, 0); spin_lock_irqsave(&dev_data_list_lock, flags); list_add_tail(&dev_data->dev_data_list, &dev_data_list); @@ -3179,7 +3177,6 @@ static void cleanup_domain(struct protection_domain *domain) entry = list_first_entry(&domain->dev_list, struct iommu_dev_data, list); __detach_device(entry); - atomic_set(&entry->bind, 0); } write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); From e3f595b9039c888016211c710eb1f58b3f60f809 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Thu, 4 Sep 2014 17:27:29 -0500 Subject: [PATCH 08/41] iommu/omap: Check for valid archdata in attach_dev Any device requiring to be attached to an iommu_domain must have valid archdata containing the necessary iommu information, which is SoC-specific. Add a check in the omap_iommu_attach_dev to make sure that the device has valid archdata before accessing different SoC-specific fields of the archdata. This prevents a NULL pointer dereference on any misconfigured devices. Signed-off-by: Suman Anna Acked-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index e202b0c24120..b888b7a6dd4f 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1091,6 +1091,11 @@ omap_iommu_attach_dev(struct iommu_domain *domain, struct device *dev) struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; int ret = 0; + if (!arch_data || !arch_data->name) { + dev_err(dev, "device doesn't have an associated iommu\n"); + return -EINVAL; + } + spin_lock(&omap_domain->lock); /* only a single device is supported per domain for now */ From 7d6827748d54f1745314e4e582691e8c02b9b4af Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Thu, 4 Sep 2014 17:27:30 -0500 Subject: [PATCH 09/41] iommu/omap: Fix iommu archdata name for DT-based devices A device is tied to an iommu through its archdata field. The archdata is allocated on the fly for DT-based devices automatically through the .add_device iommu ops. The current logic incorrectly assigned the name of the IOMMU user device, instead of the name of the IOMMU device as required by the attach logic. Fix this issue so that DT-based devices can attach successfully to an IOMMU domain. Signed-off-by: Suman Anna Acked-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index b888b7a6dd4f..df579f8779cd 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -26,6 +26,7 @@ #include #include #include +#include #include @@ -1244,6 +1245,7 @@ static int omap_iommu_add_device(struct device *dev) { struct omap_iommu_arch_data *arch_data; struct device_node *np; + struct platform_device *pdev; /* * Allocate the archdata iommu structure for DT-based devices. @@ -1258,13 +1260,19 @@ static int omap_iommu_add_device(struct device *dev) if (!np) return 0; + pdev = of_find_device_by_node(np); + if (WARN_ON(!pdev)) { + of_node_put(np); + return -EINVAL; + } + arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); if (!arch_data) { of_node_put(np); return -ENOMEM; } - arch_data->name = kstrdup(dev_name(dev), GFP_KERNEL); + arch_data->name = kstrdup(dev_name(&pdev->dev), GFP_KERNEL); dev->archdata.iommu = arch_data; of_node_put(np); From 4cf740b0b6628bda1e5c9201ae0d4f56fc6c06a5 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 14 Jul 2014 19:47:39 +0100 Subject: [PATCH 10/41] iommu/arm-smmu: allow translation stage to be forced on the cmdline When debugging and testing code on an SMMU that supports nested translation, it can be useful to restrict the driver to a particular stage of translation. This patch adds a module parameter to the ARM SMMU driver to allow this by restricting the ability of the probe() code to detect support for only the specified stage. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a83cc2a2a2ca..958ae8194afe 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -326,6 +326,11 @@ #define FSYNR0_WNR (1 << 4) +static int force_stage; +module_param_named(force_stage, force_stage, int, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(force_stage, + "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); + struct arm_smmu_smr { u8 idx; u16 mask; @@ -1716,6 +1721,13 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return -ENODEV; } #endif + + /* Restrict available stages based on module parameter */ + if (force_stage == 1) + id &= ~(ID0_S2TS | ID0_NTS); + else if (force_stage == 2) + id &= ~(ID0_S1TS | ID0_NTS); + if (id & ID0_S1TS) { smmu->features |= ARM_SMMU_FEAT_TRANS_S1; dev_notice(smmu->dev, "\tstage 1 translation\n"); @@ -1732,8 +1744,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) } if (!(smmu->features & - (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2 | - ARM_SMMU_FEAT_TRANS_NESTED))) { + (ARM_SMMU_FEAT_TRANS_S1 | ARM_SMMU_FEAT_TRANS_S2))) { dev_err(smmu->dev, "\tno translation support!\n"); return -ENODEV; } From 8f68f8e28298abdf518648e794c71e534eb8841c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 15 Jul 2014 11:27:08 +0100 Subject: [PATCH 11/41] iommu/arm-smmu: add support for multi-master iommu groups Whilst the driver currently creates one IOMMU group per device, this will soon change when we start supporting non-transparent PCI bridges which require all upstream masters to be assigned to the same address space. This patch reworks our IOMMU group code so that we can easily support multi-master groups. The master configuration (streamids and smrs) is stored as private iommudata on the group, whilst the low-level attach/detach code is updated to avoid double alloc/free when dealing with multiple masters sharing the same SMMU configuration. This unifies device handling, regardless of whether the device sits on the platform or pci bus. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 65 ++++++++++++++++++++++++---------------- 1 file changed, 39 insertions(+), 26 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 958ae8194afe..38f8d670afb3 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -431,17 +431,17 @@ static void parse_driver_options(struct arm_smmu_device *smmu) } while (arm_smmu_options[++i].opt); } -static struct device *dev_get_master_dev(struct device *dev) +static struct device_node *dev_get_dev_node(struct device *dev) { if (dev_is_pci(dev)) { struct pci_bus *bus = to_pci_dev(dev)->bus; while (!pci_is_root_bus(bus)) bus = bus->parent; - return bus->bridge->parent; + return bus->bridge->parent->of_node; } - return dev; + return dev->of_node; } static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu, @@ -466,15 +466,17 @@ static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu, } static struct arm_smmu_master_cfg * -find_smmu_master_cfg(struct arm_smmu_device *smmu, struct device *dev) +find_smmu_master_cfg(struct device *dev) { - struct arm_smmu_master *master; + struct arm_smmu_master_cfg *cfg = NULL; + struct iommu_group *group = iommu_group_get(dev); - if (dev_is_pci(dev)) - return dev->archdata.iommu; + if (group) { + cfg = iommu_group_get_iommudata(group); + iommu_group_put(group); + } - master = find_smmu_master(smmu, dev->of_node); - return master ? &master->cfg : NULL; + return cfg; } static int insert_smmu_master(struct arm_smmu_device *smmu, @@ -550,7 +552,7 @@ static struct arm_smmu_device *find_smmu_for_device(struct device *dev) { struct arm_smmu_device *smmu; struct arm_smmu_master *master = NULL; - struct device_node *dev_node = dev_get_master_dev(dev)->of_node; + struct device_node *dev_node = dev_get_dev_node(dev); spin_lock(&arm_smmu_devices_lock); list_for_each_entry(smmu, &arm_smmu_devices, list) { @@ -1156,9 +1158,10 @@ static int arm_smmu_domain_add_master(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + /* Devices in an IOMMU group may already be configured */ ret = arm_smmu_master_configure_smrs(smmu, cfg); if (ret) - return ret; + return ret == -EEXIST ? 0 : ret; for (i = 0; i < cfg->num_streamids; ++i) { u32 idx, s2cr; @@ -1179,6 +1182,10 @@ static void arm_smmu_domain_remove_master(struct arm_smmu_domain *smmu_domain, struct arm_smmu_device *smmu = smmu_domain->smmu; void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + /* An IOMMU group is torn down by the first device to be removed */ + if ((smmu->features & ARM_SMMU_FEAT_STREAM_MATCH) && !cfg->smrs) + return; + /* * We *must* clear the S2CR first, because freeing the SMR means * that it can be re-allocated immediately. @@ -1200,7 +1207,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_device *smmu, *dom_smmu; struct arm_smmu_master_cfg *cfg; - smmu = dev_get_master_dev(dev)->archdata.iommu; + smmu = find_smmu_for_device(dev); if (!smmu) { dev_err(dev, "cannot attach to SMMU, is it on the same bus?\n"); return -ENXIO; @@ -1228,7 +1235,7 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) } /* Looks ok, so add the device to the domain */ - cfg = find_smmu_master_cfg(smmu_domain->smmu, dev); + cfg = find_smmu_master_cfg(dev); if (!cfg) return -ENODEV; @@ -1240,7 +1247,7 @@ static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_domain *smmu_domain = domain->priv; struct arm_smmu_master_cfg *cfg; - cfg = find_smmu_master_cfg(smmu_domain->smmu, dev); + cfg = find_smmu_master_cfg(dev); if (cfg) arm_smmu_domain_remove_master(smmu_domain, cfg); } @@ -1554,17 +1561,19 @@ static int __arm_smmu_get_pci_sid(struct pci_dev *pdev, u16 alias, void *data) return 0; /* Continue walking */ } +static void __arm_smmu_release_pci_iommudata(void *data) +{ + kfree(data); +} + static int arm_smmu_add_device(struct device *dev) { struct arm_smmu_device *smmu; + struct arm_smmu_master_cfg *cfg; struct iommu_group *group; + void (*releasefn)(void *) = NULL; int ret; - if (dev->archdata.iommu) { - dev_warn(dev, "IOMMU driver already assigned to device\n"); - return -EINVAL; - } - smmu = find_smmu_for_device(dev); if (!smmu) return -ENODEV; @@ -1576,7 +1585,6 @@ static int arm_smmu_add_device(struct device *dev) } if (dev_is_pci(dev)) { - struct arm_smmu_master_cfg *cfg; struct pci_dev *pdev = to_pci_dev(dev); cfg = kzalloc(sizeof(*cfg), GFP_KERNEL); @@ -1592,11 +1600,20 @@ static int arm_smmu_add_device(struct device *dev) */ pci_for_each_dma_alias(pdev, __arm_smmu_get_pci_sid, &cfg->streamids[0]); - dev->archdata.iommu = cfg; + releasefn = __arm_smmu_release_pci_iommudata; } else { - dev->archdata.iommu = smmu; + struct arm_smmu_master *master; + + master = find_smmu_master(smmu, dev->of_node); + if (!master) { + ret = -ENODEV; + goto out_put_group; + } + + cfg = &master->cfg; } + iommu_group_set_iommudata(group, cfg, releasefn); ret = iommu_group_add_device(group, dev); out_put_group: @@ -1606,10 +1623,6 @@ out_put_group: static void arm_smmu_remove_device(struct device *dev) { - if (dev_is_pci(dev)) - kfree(dev->archdata.iommu); - - dev->archdata.iommu = NULL; iommu_group_remove_device(dev); } From 844e35bdfe834fccb5def1bc4cd614ca22409d0c Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 17 Jul 2014 11:23:51 +0100 Subject: [PATCH 12/41] iommu/arm-smmu: put iommu_domain pointer in dev->archdata.iommu In preparation for nested translation support, stick a pointer to the iommu_domain in dev->archdata.iommu. This makes it much easier to grab hold of the physical group configuration (e.g. cbndx) when dealing with vSMMU accesses from a guest. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 38f8d670afb3..e8d311152203 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1213,6 +1213,11 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) return -ENXIO; } + if (dev->archdata.iommu) { + dev_err(dev, "already attached to IOMMU domain\n"); + return -EEXIST; + } + /* * Sanity check the domain. We don't support domains across * different SMMUs. @@ -1239,7 +1244,10 @@ static int arm_smmu_attach_dev(struct iommu_domain *domain, struct device *dev) if (!cfg) return -ENODEV; - return arm_smmu_domain_add_master(smmu_domain, cfg); + ret = arm_smmu_domain_add_master(smmu_domain, cfg); + if (!ret) + dev->archdata.iommu = domain; + return ret; } static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) @@ -1248,8 +1256,11 @@ static void arm_smmu_detach_dev(struct iommu_domain *domain, struct device *dev) struct arm_smmu_master_cfg *cfg; cfg = find_smmu_master_cfg(dev); - if (cfg) - arm_smmu_domain_remove_master(smmu_domain, cfg); + if (!cfg) + return; + + dev->archdata.iommu = NULL; + arm_smmu_domain_remove_master(smmu_domain, cfg); } static bool arm_smmu_pte_is_contiguous_range(unsigned long addr, From c757e8528a304214d0a9be2e99011b94bf374d37 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 30 Jul 2014 11:33:25 +0100 Subject: [PATCH 13/41] iommu/arm-smmu: use page shift instead of page size to avoid division Arbitrary integer division is not available in all ARM CPUs, so the GCC may spit out calls to helper functions which are not implemented in the kernel. This patch avoids these problems in the SMMU driver by using page shift instead of page size, so that divisions by the page size (as required by the vSMMU code) can be expressed as a simple right shift. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e8d311152203..482cc9e3138a 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -59,7 +59,7 @@ /* SMMU global address space */ #define ARM_SMMU_GR0(smmu) ((smmu)->base) -#define ARM_SMMU_GR1(smmu) ((smmu)->base + (smmu)->pagesize) +#define ARM_SMMU_GR1(smmu) ((smmu)->base + (1 << (smmu)->pgshift)) /* * SMMU global address space with conditional offset to access secure @@ -224,7 +224,7 @@ /* Translation context bank */ #define ARM_SMMU_CB_BASE(smmu) ((smmu)->base + ((smmu)->size >> 1)) -#define ARM_SMMU_CB(smmu, n) ((n) * (smmu)->pagesize) +#define ARM_SMMU_CB(smmu, n) ((n) * (1 << (smmu)->pgshift)) #define ARM_SMMU_CB_SCTLR 0x0 #define ARM_SMMU_CB_RESUME 0x8 @@ -354,7 +354,7 @@ struct arm_smmu_device { void __iomem *base; unsigned long size; - unsigned long pagesize; + unsigned long pgshift; #define ARM_SMMU_FEAT_COHERENT_WALK (1 << 0) #define ARM_SMMU_FEAT_STREAM_MATCH (1 << 1) @@ -1814,12 +1814,12 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* ID1 */ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID1); - smmu->pagesize = (id & ID1_PAGESIZE) ? SZ_64K : SZ_4K; + smmu->pgshift = (id & ID1_PAGESIZE) ? 16 : 12; /* Check for size mismatch of SMMU address space from mapped region */ size = 1 << (((id >> ID1_NUMPAGENDXB_SHIFT) & ID1_NUMPAGENDXB_MASK) + 1); - size *= (smmu->pagesize << 1); + size *= 2 << smmu->pgshift; if (smmu->size != size) dev_warn(smmu->dev, "SMMU address space size (0x%lx) differs from mapped region size (0x%lx)!\n", From 28d6007ba2fd344164e01ef300af7f621e9e6b0d Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 1 Sep 2014 16:24:48 +0100 Subject: [PATCH 14/41] iommu/arm-smmu: don't bother truncating the s1 output size to VA_BITS In order for nested translation to work correctly, we need to ensure that the maximum output address size from stage-1 is <= the maximum supported input address size to stage-2. The latter is currently defined by VA_BITS, since we make use of the CPU page table functions for allocating out tables and so the driver currently enforces this restriction by truncating the stage-1 output size during probe. In reality, this doesn't make a lot of sense; the guest OS is responsible for managing the stage-1 page tables, so we actually just need to ensure that the ID registers of the virtual SMMU interface only advertise the supported stage-2 input size. This patch fixes the problem by treating the stage-1 and stage-2 input address sizes separately. Reported-by: Tirumalesh Chalamarla Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 45 ++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 482cc9e3138a..27fd3d7ba886 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -24,7 +24,7 @@ * - v7/v8 long-descriptor format * - Non-secure access to the SMMU * - 4k and 64k pages, with contiguous pte hints. - * - Up to 42-bit addressing (dependent on VA_BITS) + * - Up to 48-bit addressing (dependent on VA_BITS) * - Context fault reporting */ @@ -375,8 +375,9 @@ struct arm_smmu_device { u32 num_mapping_groups; DECLARE_BITMAP(smr_map, ARM_SMMU_MAX_SMRS); - unsigned long input_size; + unsigned long s1_input_size; unsigned long s1_output_size; + unsigned long s2_input_size; unsigned long s2_output_size; u32 num_global_irqs; @@ -762,7 +763,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) gr1_base + ARM_SMMU_GR1_CBA2R(cfg->cbndx)); /* TTBCR2 */ - switch (smmu->input_size) { + switch (smmu->s1_input_size) { case 32: reg = (TTBCR2_ADDR_32 << TTBCR2_SEP_SHIFT); break; @@ -831,7 +832,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) reg = TTBCR_TG0_64K; if (!stage1) { - reg |= (64 - smmu->s1_output_size) << TTBCR_T0SZ_SHIFT; + reg |= (64 - smmu->s2_input_size) << TTBCR_T0SZ_SHIFT; switch (smmu->s2_output_size) { case 32: @@ -854,7 +855,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) break; } } else { - reg |= (64 - smmu->input_size) << TTBCR_T0SZ_SHIFT; + reg |= (64 - smmu->s1_input_size) << TTBCR_T0SZ_SHIFT; } } else { reg = 0; @@ -1454,9 +1455,11 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain, if (cfg->cbar == CBAR_TYPE_S2_TRANS) { stage = 2; + input_mask = (1ULL << smmu->s2_input_size) - 1; output_mask = (1ULL << smmu->s2_output_size) - 1; } else { stage = 1; + input_mask = (1ULL << smmu->s1_input_size) - 1; output_mask = (1ULL << smmu->s1_output_size) - 1; } @@ -1466,7 +1469,6 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain, if (size & ~PAGE_MASK) return -EINVAL; - input_mask = (1ULL << smmu->input_size) - 1; if ((phys_addr_t)iova & ~input_mask) return -ERANGE; @@ -1838,28 +1840,21 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) /* ID2 */ id = readl_relaxed(gr0_base + ARM_SMMU_GR0_ID2); size = arm_smmu_id_size_to_bits((id >> ID2_IAS_SHIFT) & ID2_IAS_MASK); + smmu->s1_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, size); - /* - * Stage-1 output limited by stage-2 input size due to pgd - * allocation (PTRS_PER_PGD). - */ - if (smmu->features & ARM_SMMU_FEAT_TRANS_NESTED) { + /* Stage-2 input size limited due to pgd allocation (PTRS_PER_PGD) */ #ifdef CONFIG_64BIT - smmu->s1_output_size = min_t(unsigned long, VA_BITS, size); + smmu->s2_input_size = min_t(unsigned long, VA_BITS, size); #else - smmu->s1_output_size = min(32UL, size); + smmu->s2_input_size = min(32UL, size); #endif - } else { - smmu->s1_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, - size); - } /* The stage-2 output mask is also applied for bypass */ size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK); smmu->s2_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, size); if (smmu->version == 1) { - smmu->input_size = 32; + smmu->s1_input_size = 32; } else { #ifdef CONFIG_64BIT size = (id >> ID2_UBS_SHIFT) & ID2_UBS_MASK; @@ -1867,7 +1862,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) #else size = 32; #endif - smmu->input_size = size; + smmu->s1_input_size = size; if ((PAGE_SIZE == SZ_4K && !(id & ID2_PTFS_4K)) || (PAGE_SIZE == SZ_64K && !(id & ID2_PTFS_64K)) || @@ -1878,10 +1873,14 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) } } - dev_notice(smmu->dev, - "\t%lu-bit VA, %lu-bit IPA, %lu-bit PA\n", - smmu->input_size, smmu->s1_output_size, - smmu->s2_output_size); + if (smmu->features & ARM_SMMU_FEAT_TRANS_S1) + dev_notice(smmu->dev, "\tStage-1: %lu-bit VA -> %lu-bit IPA\n", + smmu->s1_input_size, smmu->s1_output_size); + + if (smmu->features & ARM_SMMU_FEAT_TRANS_S2) + dev_notice(smmu->dev, "\tStage-2: %lu-bit IPA -> %lu-bit PA\n", + smmu->s2_input_size, smmu->s2_output_size); + return 0; } From 093604033361928f7f355b4d1766d0179ae747fb Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Aug 2014 17:51:59 +0100 Subject: [PATCH 15/41] iommu/arm-smmu: fix architecture version detection The SMMU driver was relying on a quirk of MMU-500 r2px to identify the correct architecture version. Since this does not apply to other implementations, make the architecture version for each supported implementation explicit. While we're at it, remove the unnecessary #ifdef since the dependencies for CONFIG_ARM_SMMU already imply CONFIG_OF. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 47 +++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 27fd3d7ba886..c00f483e106f 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -331,6 +331,11 @@ module_param_named(force_stage, force_stage, int, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(force_stage, "Force SMMU mappings to be installed at a particular stage of translation. A value of '1' or '2' forces the corresponding stage. All other values are ignored (i.e. no stage is forced). Note that selecting a specific stage will disable support for nested translation."); +enum arm_smmu_arch_version { + ARM_SMMU_V1 = 1, + ARM_SMMU_V2, +}; + struct arm_smmu_smr { u8 idx; u16 mask; @@ -365,7 +370,7 @@ struct arm_smmu_device { #define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) u32 options; - int version; + enum arm_smmu_arch_version version; u32 num_context_banks; u32 num_s2_context_banks; @@ -737,7 +742,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) /* CBAR */ reg = cfg->cbar; - if (smmu->version == 1) + if (smmu->version == ARM_SMMU_V1) reg |= cfg->irptndx << CBAR_IRPTNDX_SHIFT; /* @@ -752,7 +757,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) } writel_relaxed(reg, gr1_base + ARM_SMMU_GR1_CBAR(cfg->cbndx)); - if (smmu->version > 1) { + if (smmu->version > ARM_SMMU_V1) { /* CBA2R */ #ifdef CONFIG_64BIT reg = CBA2R_RW64_64BIT; @@ -825,7 +830,7 @@ static void arm_smmu_init_context_bank(struct arm_smmu_domain *smmu_domain) * TTBCR * We use long descriptor, with inner-shareable WBWA tables in TTBR0. */ - if (smmu->version > 1) { + if (smmu->version > ARM_SMMU_V1) { if (PAGE_SIZE == SZ_4K) reg = TTBCR_TG0_4K; else @@ -922,7 +927,7 @@ static int arm_smmu_init_domain_context(struct iommu_domain *domain, goto out_unlock; cfg->cbndx = ret; - if (smmu->version == 1) { + if (smmu->version == ARM_SMMU_V1) { cfg->irptndx = atomic_inc_return(&smmu->irptndx); cfg->irptndx %= smmu->num_context_irqs; } else { @@ -1733,10 +1738,6 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) u32 id; dev_notice(smmu->dev, "probing hardware configuration...\n"); - - /* Primecell ID */ - id = readl_relaxed(gr0_base + ARM_SMMU_GR0_PIDR2); - smmu->version = ((id >> PIDR2_ARCH_SHIFT) & PIDR2_ARCH_MASK) + 1; dev_notice(smmu->dev, "SMMUv%d with:\n", smmu->version); /* ID0 */ @@ -1853,7 +1854,7 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) size = arm_smmu_id_size_to_bits((id >> ID2_OAS_SHIFT) & ID2_OAS_MASK); smmu->s2_output_size = min_t(unsigned long, PHYS_MASK_SHIFT, size); - if (smmu->version == 1) { + if (smmu->version == ARM_SMMU_V1) { smmu->s1_input_size = 32; } else { #ifdef CONFIG_64BIT @@ -1884,8 +1885,18 @@ static int arm_smmu_device_cfg_probe(struct arm_smmu_device *smmu) return 0; } +static struct of_device_id arm_smmu_of_match[] = { + { .compatible = "arm,smmu-v1", .data = (void *)ARM_SMMU_V1 }, + { .compatible = "arm,smmu-v2", .data = (void *)ARM_SMMU_V2 }, + { .compatible = "arm,mmu-400", .data = (void *)ARM_SMMU_V1 }, + { .compatible = "arm,mmu-500", .data = (void *)ARM_SMMU_V2 }, + { }, +}; +MODULE_DEVICE_TABLE(of, arm_smmu_of_match); + static int arm_smmu_device_dt_probe(struct platform_device *pdev) { + const struct of_device_id *of_id; struct resource *res; struct arm_smmu_device *smmu; struct device *dev = &pdev->dev; @@ -1900,6 +1911,9 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) } smmu->dev = dev; + of_id = of_match_node(arm_smmu_of_match, dev->of_node); + smmu->version = (enum arm_smmu_arch_version)of_id->data; + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); smmu->base = devm_ioremap_resource(dev, res); if (IS_ERR(smmu->base)) @@ -1964,7 +1978,7 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) parse_driver_options(smmu); - if (smmu->version > 1 && + if (smmu->version > ARM_SMMU_V1 && smmu->num_context_banks != smmu->num_context_irqs) { dev_err(dev, "found only %d context interrupt(s) but %d required\n", @@ -2045,17 +2059,6 @@ static int arm_smmu_device_remove(struct platform_device *pdev) return 0; } -#ifdef CONFIG_OF -static struct of_device_id arm_smmu_of_match[] = { - { .compatible = "arm,smmu-v1", }, - { .compatible = "arm,smmu-v2", }, - { .compatible = "arm,mmu-400", }, - { .compatible = "arm,mmu-500", }, - { }, -}; -MODULE_DEVICE_TABLE(of, arm_smmu_of_match); -#endif - static struct platform_driver arm_smmu_driver = { .driver = { .owner = THIS_MODULE, From d3aba0460a2e13b49892f7a12237f82658c44257 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 28 Aug 2014 17:52:00 +0100 Subject: [PATCH 16/41] iommu/arm-smmu: support MMU-401 MMU-401 is similar to MMU-400, but updated with limited ARMv8 support. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.txt | 1 + drivers/iommu/arm-smmu.c | 1 + 2 files changed, 2 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index 2d0f7cd867ea..06760503a819 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -14,6 +14,7 @@ conditions. "arm,smmu-v1" "arm,smmu-v2" "arm,mmu-400" + "arm,mmu-401" "arm,mmu-500" depending on the particular implementation and/or the diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index c00f483e106f..939242c41100 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1889,6 +1889,7 @@ static struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", .data = (void *)ARM_SMMU_V1 }, { .compatible = "arm,smmu-v2", .data = (void *)ARM_SMMU_V2 }, { .compatible = "arm,mmu-400", .data = (void *)ARM_SMMU_V1 }, + { .compatible = "arm,mmu-401", .data = (void *)ARM_SMMU_V1 }, { .compatible = "arm,mmu-500", .data = (void *)ARM_SMMU_V2 }, { }, }; From ccd359f219bee914501a8892b148e2a1315066d3 Mon Sep 17 00:00:00 2001 From: Mitchel Humpherys Date: Fri, 19 Sep 2014 22:58:42 +0100 Subject: [PATCH 17/41] iommu/arm-smmu: fix bug in pmd construction We are using the same pfn for every pte we create while constructing the pmd. Fix this by actually updating the pfn on each iteration of the pmd construction loop. It's not clear if we can actually hit this bug right now since iommu_map splits up the calls to .map based on the page size, so we only ever seem to iterate this loop once. However, things might change in the future that might cause us to hit this. Signed-off-by: Mitchel Humpherys Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 939242c41100..37dc3dd0df96 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1408,6 +1408,7 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud, ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, next, pfn, prot, stage); phys += next - addr; + pfn = __phys_to_pfn(phys); } while (pmd++, addr = next, addr < end); return ret; From 1aed074869a9cbe0a846ea7b254d8fd9a4a4d31f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Sep 2014 18:34:04 +0200 Subject: [PATCH 18/41] iommu: Convert iommu-caps from define to enum Allow compile-time type-checking. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 2 +- include/linux/iommu.h | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 0639b9274b11..bc45478e26db 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -948,7 +948,7 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) EXPORT_SYMBOL_GPL(iommu_iova_to_phys); int iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) + enum iommu_cap cap) { if (unlikely(domain->ops->domain_has_cap == NULL)) return 0; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 20f9a527922a..98fc126655ae 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -57,8 +57,11 @@ struct iommu_domain { struct iommu_domain_geometry geometry; }; -#define IOMMU_CAP_CACHE_COHERENCY 0x1 -#define IOMMU_CAP_INTR_REMAP 0x2 /* isolates device intrs */ +enum iommu_cap { + IOMMU_CAP_CACHE_COHERENCY, /* IOMMU can enforce cache coherent DMA + transactions */ + IOMMU_CAP_INTR_REMAP, /* IOMMU supports interrupt isolation */ +}; /* * Following constraints are specifc to FSL_PAMUV1: @@ -155,7 +158,7 @@ extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); extern int iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap); + enum iommu_cap cap); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -305,7 +308,7 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_ad } static inline int iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) + enum iommu_cap cap) { return 0; } From 3c0e0ca0a4e757159d868c4870556515d66b6c97 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 3 Sep 2014 18:47:25 +0200 Subject: [PATCH 19/41] iommu: Introduce iommu_capable API function This function will replace the current iommu_domain_has_cap function and clean up the interface while at it. Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 18 +++++++++++++++--- include/linux/iommu.h | 7 +++++++ 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index bc45478e26db..aeb243f46332 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -817,6 +817,15 @@ bool iommu_present(struct bus_type *bus) } EXPORT_SYMBOL_GPL(iommu_present); +bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) +{ + if (!bus->iommu_ops || !bus->iommu_ops->capable) + return false; + + return bus->iommu_ops->capable(cap); +} +EXPORT_SYMBOL_GPL(iommu_capable); + /** * iommu_set_fault_handler() - set a fault handler for an iommu domain * @domain: iommu domain @@ -950,10 +959,13 @@ EXPORT_SYMBOL_GPL(iommu_iova_to_phys); int iommu_domain_has_cap(struct iommu_domain *domain, enum iommu_cap cap) { - if (unlikely(domain->ops->domain_has_cap == NULL)) - return 0; + if (domain->ops->domain_has_cap != NULL) + return domain->ops->domain_has_cap(domain, cap); - return domain->ops->domain_has_cap(domain, cap); + if (domain->ops->capable != NULL) + return domain->ops->capable(cap); + + return 0; } EXPORT_SYMBOL_GPL(iommu_domain_has_cap); diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 98fc126655ae..d5534d554693 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -105,6 +105,7 @@ enum iommu_attr { * @pgsize_bitmap: bitmap of supported page sizes */ struct iommu_ops { + bool (*capable)(enum iommu_cap); int (*domain_init)(struct iommu_domain *domain); void (*domain_destroy)(struct iommu_domain *domain); int (*attach_dev)(struct iommu_domain *domain, struct device *dev); @@ -145,6 +146,7 @@ struct iommu_ops { extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern bool iommu_present(struct bus_type *bus); +extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); @@ -253,6 +255,11 @@ static inline bool iommu_present(struct bus_type *bus) return false; } +static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) +{ + return false; +} + static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) { return NULL; From ab636481863a1039f5ca3643cfb04d4ae0c965cd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:48:21 +0200 Subject: [PATCH 20/41] iommu/amd: Convert to iommu_capable() API function Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index ecb0109a5360..7de92768871d 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -3384,20 +3384,20 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, return paddr; } -static int amd_iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) +static bool amd_iommu_capable(enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: - return 1; + return true; case IOMMU_CAP_INTR_REMAP: - return irq_remapping_enabled; + return (irq_remapping_enabled == 1); } - return 0; + return false; } static const struct iommu_ops amd_iommu_ops = { + .capable = amd_iommu_capable, .domain_init = amd_iommu_domain_init, .domain_destroy = amd_iommu_domain_destroy, .attach_dev = amd_iommu_attach_device, @@ -3405,7 +3405,6 @@ static const struct iommu_ops amd_iommu_ops = { .map = amd_iommu_map, .unmap = amd_iommu_unmap, .iova_to_phys = amd_iommu_iova_to_phys, - .domain_has_cap = amd_iommu_domain_has_cap, .pgsize_bitmap = AMD_IOMMU_PGSIZES, }; From 1fd0c775a573646eec972a3f40563506e33d9f8a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:49:34 +0200 Subject: [PATCH 21/41] iommu/arm-smmu: Convert to iommu_capable() API function Cc: Will Deacon Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a83cc2a2a2ca..1e0485e0aa6a 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1526,20 +1526,19 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, return __pfn_to_phys(pte_pfn(pte)) | (iova & ~PAGE_MASK); } -static int arm_smmu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) +static bool arm_smmu_capable(enum iommu_cap cap) { - struct arm_smmu_domain *smmu_domain = domain->priv; - struct arm_smmu_device *smmu = smmu_domain->smmu; - u32 features = smmu ? smmu->features : 0; - switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: - return features & ARM_SMMU_FEAT_COHERENT_WALK; + /* + * Return true here as the SMMU can always send out coherent + * requests. + */ + return true; case IOMMU_CAP_INTR_REMAP: - return 1; /* MSIs are just memory writes */ + return true; /* MSIs are just memory writes */ default: - return 0; + return false; } } @@ -1609,6 +1608,7 @@ static void arm_smmu_remove_device(struct device *dev) } static const struct iommu_ops arm_smmu_ops = { + .capable = arm_smmu_capable, .domain_init = arm_smmu_domain_init, .domain_destroy = arm_smmu_domain_destroy, .attach_dev = arm_smmu_attach_dev, @@ -1616,7 +1616,6 @@ static const struct iommu_ops arm_smmu_ops = { .map = arm_smmu_map, .unmap = arm_smmu_unmap, .iova_to_phys = arm_smmu_iova_to_phys, - .domain_has_cap = arm_smmu_domain_has_cap, .add_device = arm_smmu_add_device, .remove_device = arm_smmu_remove_device, .pgsize_bitmap = (SECTION_SIZE | From b7eb67858562c053c314dee264c425e8214c2bfb Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:50:27 +0200 Subject: [PATCH 22/41] iommu/fsl: Convert to iommu_capable() API function Cc: Varun Sethi Signed-off-by: Joerg Roedel --- drivers/iommu/fsl_pamu_domain.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 56feed7cec15..c828f80d48b0 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -411,8 +411,7 @@ static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain, return get_phys_addr(dma_domain, iova); } -static int fsl_pamu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) +static bool fsl_pamu_capable(enum iommu_cap cap) { return cap == IOMMU_CAP_CACHE_COHERENCY; } @@ -1080,6 +1079,7 @@ static u32 fsl_pamu_get_windows(struct iommu_domain *domain) } static const struct iommu_ops fsl_pamu_ops = { + .capable = fsl_pamu_capable, .domain_init = fsl_pamu_domain_init, .domain_destroy = fsl_pamu_domain_destroy, .attach_dev = fsl_pamu_attach_device, @@ -1089,7 +1089,6 @@ static const struct iommu_ops fsl_pamu_ops = { .domain_get_windows = fsl_pamu_get_windows, .domain_set_windows = fsl_pamu_set_windows, .iova_to_phys = fsl_pamu_iova_to_phys, - .domain_has_cap = fsl_pamu_domain_has_cap, .domain_set_attr = fsl_pamu_set_domain_attr, .domain_get_attr = fsl_pamu_get_domain_attr, .add_device = fsl_pamu_add_device, From 5d587b8de56eec6c6a31b452a177cd8c066b267e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:50:45 +0200 Subject: [PATCH 23/41] iommu/vt-d: Convert to iommu_capable() API function Cc: Jiang Liu Cc: David Woodhouse Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5619f264862d..bc1a20395445 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4415,17 +4415,14 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, return phys; } -static int intel_iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) +static bool intel_iommu_capable(enum iommu_cap cap) { - struct dmar_domain *dmar_domain = domain->priv; - if (cap == IOMMU_CAP_CACHE_COHERENCY) - return dmar_domain->iommu_snooping; + return domain_update_iommu_snooping(NULL) == 1; if (cap == IOMMU_CAP_INTR_REMAP) - return irq_remapping_enabled; + return irq_remapping_enabled == 1; - return 0; + return false; } static int intel_iommu_add_device(struct device *dev) @@ -4464,6 +4461,7 @@ static void intel_iommu_remove_device(struct device *dev) } static const struct iommu_ops intel_iommu_ops = { + .capable = intel_iommu_capable, .domain_init = intel_iommu_domain_init, .domain_destroy = intel_iommu_domain_destroy, .attach_dev = intel_iommu_attach_device, @@ -4471,7 +4469,6 @@ static const struct iommu_ops intel_iommu_ops = { .map = intel_iommu_map, .unmap = intel_iommu_unmap, .iova_to_phys = intel_iommu_iova_to_phys, - .domain_has_cap = intel_iommu_domain_has_cap, .add_device = intel_iommu_add_device, .remove_device = intel_iommu_remove_device, .pgsize_bitmap = INTEL_IOMMU_PGSIZES, From 4480845eef8d7751f015322e3c6d2680ff7d7dc7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:51:14 +0200 Subject: [PATCH 24/41] iommu/msm: Convert to iommu_capable() API function Signed-off-by: Joerg Roedel --- drivers/iommu/msm_iommu.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 49f41d6e02f1..6e3dcc289d59 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -603,10 +603,9 @@ fail: return ret; } -static int msm_iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) +static bool msm_iommu_capable(enum iommu_cap cap) { - return 0; + return false; } static void print_ctx_regs(void __iomem *base, int ctx) @@ -675,6 +674,7 @@ fail: } static const struct iommu_ops msm_iommu_ops = { + .capable = msm_iommu_capable, .domain_init = msm_iommu_domain_init, .domain_destroy = msm_iommu_domain_destroy, .attach_dev = msm_iommu_attach_dev, @@ -682,7 +682,6 @@ static const struct iommu_ops msm_iommu_ops = { .map = msm_iommu_map, .unmap = msm_iommu_unmap, .iova_to_phys = msm_iommu_iova_to_phys, - .domain_has_cap = msm_iommu_domain_has_cap, .pgsize_bitmap = MSM_IOMMU_PGSIZES, }; From 7c2aa6441e957df85eded3a7297d36a57b210988 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:51:37 +0200 Subject: [PATCH 25/41] iommu/tegra: Convert to iommu_capable() API function Cc: Hiroshi Doyu Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 7 +++---- drivers/iommu/tegra-smmu.c | 7 +++---- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index b10a8ecede8e..6f44ebb84047 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -303,13 +303,13 @@ static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, return pa; } -static int gart_iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) +static bool gart_iommu_capable(enum iommu_cap cap) { - return 0; + return false; } static const struct iommu_ops gart_iommu_ops = { + .capable = gart_iommu_capable, .domain_init = gart_iommu_domain_init, .domain_destroy = gart_iommu_domain_destroy, .attach_dev = gart_iommu_attach_dev, @@ -317,7 +317,6 @@ static const struct iommu_ops gart_iommu_ops = { .map = gart_iommu_map, .unmap = gart_iommu_unmap, .iova_to_phys = gart_iommu_iova_to_phys, - .domain_has_cap = gart_iommu_domain_has_cap, .pgsize_bitmap = GART_IOMMU_PGSIZES, }; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 3ded3894623c..05d14e1e626f 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -780,10 +780,9 @@ static phys_addr_t smmu_iommu_iova_to_phys(struct iommu_domain *domain, return PFN_PHYS(pfn); } -static int smmu_iommu_domain_has_cap(struct iommu_domain *domain, - unsigned long cap) +static bool smmu_iommu_capable(enum iommu_cap cap) { - return 0; + return false; } static int smmu_iommu_attach_dev(struct iommu_domain *domain, @@ -949,6 +948,7 @@ static void smmu_iommu_domain_destroy(struct iommu_domain *domain) } static const struct iommu_ops smmu_iommu_ops = { + .capable = smmu_iommu_capable, .domain_init = smmu_iommu_domain_init, .domain_destroy = smmu_iommu_domain_destroy, .attach_dev = smmu_iommu_attach_dev, @@ -956,7 +956,6 @@ static const struct iommu_ops smmu_iommu_ops = { .map = smmu_iommu_map, .unmap = smmu_iommu_unmap, .iova_to_phys = smmu_iommu_iova_to_phys, - .domain_has_cap = smmu_iommu_domain_has_cap, .pgsize_bitmap = SMMU_IOMMU_PGSIZES, }; From ee5ba30ff75277cbfcfce2bb3b54211be5105a87 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:54:09 +0200 Subject: [PATCH 26/41] kvm: iommu: Convert to use new iommu_capable() API function Cc: Gleb Natapov Cc: Paolo Bonzini Signed-off-by: Joerg Roedel --- virt/kvm/iommu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c index 714b94932312..45ee080573f4 100644 --- a/virt/kvm/iommu.c +++ b/virt/kvm/iommu.c @@ -191,8 +191,7 @@ int kvm_assign_device(struct kvm *kvm, return r; } - noncoherent = !iommu_domain_has_cap(kvm->arch.iommu_domain, - IOMMU_CAP_CACHE_COHERENCY); + noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY); /* Check if need to update IOMMU page table for guest memory */ if (noncoherent != kvm->arch.iommu_noncoherent) { @@ -254,8 +253,7 @@ int kvm_iommu_map_guest(struct kvm *kvm) } if (!allow_unsafe_assigned_interrupts && - !iommu_domain_has_cap(kvm->arch.iommu_domain, - IOMMU_CAP_INTR_REMAP)) { + !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) { printk(KERN_WARNING "%s: No interrupt remapping support," " disallowing device assignment." " Re-enble with \"allow_unsafe_assigned_interrupts=1\"" From eb165f0584d2c073dd343bdc609e2f94d143037e Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:56:05 +0200 Subject: [PATCH 27/41] vfio: Convert to use new iommu_capable() API function Cc: Alex Williamson Acked-by: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/vfio/vfio_iommu_type1.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 0734fbe5b651..562f686b4cba 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -713,14 +713,14 @@ static int vfio_iommu_type1_attach_group(void *iommu_data, list_add(&group->next, &domain->group_list); if (!allow_unsafe_interrupts && - !iommu_domain_has_cap(domain->domain, IOMMU_CAP_INTR_REMAP)) { + !iommu_capable(bus, IOMMU_CAP_INTR_REMAP)) { pr_warn("%s: No interrupt remapping support. Use the module param \"allow_unsafe_interrupts\" to enable VFIO IOMMU support on this platform\n", __func__); ret = -EPERM; goto out_detach; } - if (iommu_domain_has_cap(domain->domain, IOMMU_CAP_CACHE_COHERENCY)) + if (iommu_capable(bus, IOMMU_CAP_CACHE_COHERENCY)) domain->prot |= IOMMU_CACHE; /* From 6f952710917cfb801664333a491e4a120451a9ff Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 12:22:27 +0200 Subject: [PATCH 28/41] IB/usnic: Convert to use new iommu_capable() API function Cc: Upinder Malhi Signed-off-by: Joerg Roedel --- drivers/infiniband/hw/usnic/usnic_uiom.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index 801a1d6937e4..417de1f32960 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -507,7 +507,7 @@ int usnic_uiom_attach_dev_to_pd(struct usnic_uiom_pd *pd, struct device *dev) if (err) goto out_free_dev; - if (!iommu_domain_has_cap(pd->domain, IOMMU_CAP_CACHE_COHERENCY)) { + if (!iommu_capable(dev->bus, IOMMU_CAP_CACHE_COHERENCY)) { usnic_err("IOMMU of %s does not support cache coherency\n", dev_name(dev)); err = -EINVAL; From 24278a24d88ae730229417e5d3bd452d7545fbcc Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 5 Sep 2014 10:57:11 +0200 Subject: [PATCH 29/41] iommu: Remove iommu_domain_has_cap() API function Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 13 ------------- include/linux/iommu.h | 11 ----------- 2 files changed, 24 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index aeb243f46332..d2d242fcaa3d 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -956,19 +956,6 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova) } EXPORT_SYMBOL_GPL(iommu_iova_to_phys); -int iommu_domain_has_cap(struct iommu_domain *domain, - enum iommu_cap cap) -{ - if (domain->ops->domain_has_cap != NULL) - return domain->ops->domain_has_cap(domain, cap); - - if (domain->ops->capable != NULL) - return domain->ops->capable(cap); - - return 0; -} -EXPORT_SYMBOL_GPL(iommu_domain_has_cap); - static size_t iommu_pgsize(struct iommu_domain *domain, unsigned long addr_merge, size_t size) { diff --git a/include/linux/iommu.h b/include/linux/iommu.h index d5534d554693..379a6179fd96 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -97,7 +97,6 @@ enum iommu_attr { * @map: map a physically contiguous memory region to an iommu domain * @unmap: unmap a physically contiguous memory region from an iommu domain * @iova_to_phys: translate iova to physical address - * @domain_has_cap: domain capabilities query * @add_device: add device to iommu grouping * @remove_device: remove device from iommu grouping * @domain_get_attr: Query domain attributes @@ -115,8 +114,6 @@ struct iommu_ops { size_t (*unmap)(struct iommu_domain *domain, unsigned long iova, size_t size); phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); - int (*domain_has_cap)(struct iommu_domain *domain, - unsigned long cap); int (*add_device)(struct device *dev); void (*remove_device)(struct device *dev); int (*device_group)(struct device *dev, unsigned int *groupid); @@ -159,8 +156,6 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova, extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova); -extern int iommu_domain_has_cap(struct iommu_domain *domain, - enum iommu_cap cap); extern void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token); @@ -314,12 +309,6 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_ad return 0; } -static inline int iommu_domain_has_cap(struct iommu_domain *domain, - enum iommu_cap cap) -{ - return 0; -} - static inline void iommu_set_fault_handler(struct iommu_domain *domain, iommu_fault_handler_t handler, void *token) { From a6fda0f586bb2c39e763a2ae5561e1f6ade8c3a8 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 9 Sep 2014 18:45:45 +0300 Subject: [PATCH 30/41] iommu/omap: Remove omap_iommu unused owner field The owner field is never set. Remove it. Signed-off-by: Laurent Pinchart Acked-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 10 ---------- drivers/iommu/omap-iommu.h | 1 - 2 files changed, 11 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index df579f8779cd..4b432c40580d 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -893,19 +893,11 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) goto err_enable; flush_iotlb_all(obj); - if (!try_module_get(obj->owner)) { - err = -ENODEV; - goto err_module; - } - spin_unlock(&obj->iommu_lock); dev_dbg(obj->dev, "%s: %s\n", __func__, obj->name); return obj; -err_module: - if (obj->refcount == 1) - iommu_disable(obj); err_enable: obj->refcount--; spin_unlock(&obj->iommu_lock); @@ -926,8 +918,6 @@ static void omap_iommu_detach(struct omap_iommu *obj) if (--obj->refcount == 0) iommu_disable(obj); - module_put(obj->owner); - obj->iopgd = NULL; spin_unlock(&obj->iommu_lock); diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 1275a822934b..4f1b68c08c15 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -28,7 +28,6 @@ struct iotlb_entry { struct omap_iommu { const char *name; - struct module *owner; void __iomem *regbase; struct device *dev; void *isr_priv; From 63eaa75e4362ac7981a7e619196a9c75fd03d717 Mon Sep 17 00:00:00 2001 From: Maurizio Lombardi Date: Thu, 11 Sep 2014 12:28:03 +0200 Subject: [PATCH 31/41] amd_iommu: do not dereference a NULL pointer address. under low memory conditions, alloc_pte() may return a NULL pointer. iommu_map_page() does not check it and will panic the system. Signed-off-by: Maurizio Lombardi Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 989c1ae03979..23c5be6a3a1e 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1393,6 +1393,9 @@ static int iommu_map_page(struct protection_domain *dom, count = PAGE_SIZE_PTE_COUNT(page_size); pte = alloc_pte(dom, bus_addr, page_size, NULL, GFP_KERNEL); + if (!pte) + return -ENOMEM; + for (i = 0; i < count; ++i) if (IOMMU_PTE_PRESENT(pte[i])) return -EBUSY; From d943b0ffba153cd63f836647b873b445842a2f58 Mon Sep 17 00:00:00 2001 From: Kiran Padwal Date: Thu, 11 Sep 2014 19:07:36 +0530 Subject: [PATCH 32/41] iommu: Make of_device_id array const Make of_device_id array const, because all OF functions handle it as const. Signed-off-by: Kiran Padwal Signed-off-by: Joerg Roedel --- drivers/iommu/arm-smmu.c | 2 +- drivers/iommu/omap-iommu.c | 2 +- drivers/iommu/tegra-gart.c | 2 +- drivers/iommu/tegra-smmu.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 1e0485e0aa6a..ce39b1294bfd 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -2011,7 +2011,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) } #ifdef CONFIG_OF -static struct of_device_id arm_smmu_of_match[] = { +static const struct of_device_id arm_smmu_of_match[] = { { .compatible = "arm,smmu-v1", }, { .compatible = "arm,smmu-v2", }, { .compatible = "arm,mmu-400", }, diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index e202b0c24120..47517cf674ad 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1006,7 +1006,7 @@ static int omap_iommu_remove(struct platform_device *pdev) return 0; } -static struct of_device_id omap_iommu_of_match[] = { +static const struct of_device_id omap_iommu_of_match[] = { { .compatible = "ti,omap2-iommu" }, { .compatible = "ti,omap4-iommu" }, { .compatible = "ti,dra7-iommu" }, diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 6f44ebb84047..a6d76abf2c06 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -415,7 +415,7 @@ static const struct dev_pm_ops tegra_gart_pm_ops = { .resume = tegra_gart_resume, }; -static struct of_device_id tegra_gart_of_match[] = { +static const struct of_device_id tegra_gart_of_match[] = { { .compatible = "nvidia,tegra20-gart", }, { }, }; diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 05d14e1e626f..3afdf43f732a 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -1259,7 +1259,7 @@ static const struct dev_pm_ops tegra_smmu_pm_ops = { .resume = tegra_smmu_resume, }; -static struct of_device_id tegra_smmu_of_match[] = { +static const struct of_device_id tegra_smmu_of_match[] = { { .compatible = "nvidia,tegra30-smmu", }, { }, }; From f096c061f5525d1b35a65b793057b52061dcb486 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 19 Sep 2014 10:03:06 -0600 Subject: [PATCH 33/41] iommu: Rework iommu_group_get_for_pci_dev() It turns out that our assumption that aliases are always to the same slot isn't true. One particular platform reports an IVRS alias of the SATA controller (00:11.0) for the legacy IDE controller (00:14.1). When we hit this, we attempt to use a single IOMMU group for everything on the same bus, which in this case is the root complex. We already have multiple groups defined for the root complex by this point, resulting in multiple WARN_ON hits. This patch makes these sorts of aliases work again with IOMMU groups by reworking how we search through the PCI address space to find existing groups. This should also now handle looped dependencies and all sorts of crazy inter-dependencies that we'll likely never see. The recursion used here should never be very deep. It's unlikely to have individual aliases and only theoretical that we'd ever see a chain where one alias causes us to search through to yet another alias. We're also only dealing with PCIe device on a single bus, which means we'll typically only see multiple slots in use on the root complex. Loops are also a theoretically possibility, which I've tested using fake DMA alias quirks and prevent from causing problems using a bitmap of the devfn space that's been visited. Signed-off-by: Alex Williamson Cc: stable@vger.kernel.org # 3.17 Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 163 +++++++++++++++++++++++++----------------- 1 file changed, 96 insertions(+), 67 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d2d242fcaa3d..b59826aa5531 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -30,6 +30,7 @@ #include #include #include +#include #include static struct kset *iommu_group_kset; @@ -519,6 +520,9 @@ int iommu_group_id(struct iommu_group *group) } EXPORT_SYMBOL_GPL(iommu_group_id); +static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, + unsigned long *devfns); + /* * To consider a PCI device isolated, we require ACS to support Source * Validation, Request Redirection, Completer Redirection, and Upstream @@ -529,6 +533,86 @@ EXPORT_SYMBOL_GPL(iommu_group_id); */ #define REQ_ACS_FLAGS (PCI_ACS_SV | PCI_ACS_RR | PCI_ACS_CR | PCI_ACS_UF) +/* + * For multifunction devices which are not isolated from each other, find + * all the other non-isolated functions and look for existing groups. For + * each function, we also need to look for aliases to or from other devices + * that may already have a group. + */ +static struct iommu_group *get_pci_function_alias_group(struct pci_dev *pdev, + unsigned long *devfns) +{ + struct pci_dev *tmp = NULL; + struct iommu_group *group; + + if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) + return NULL; + + for_each_pci_dev(tmp) { + if (tmp == pdev || tmp->bus != pdev->bus || + PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || + pci_acs_enabled(tmp, REQ_ACS_FLAGS)) + continue; + + group = get_pci_alias_group(tmp, devfns); + if (group) { + pci_dev_put(tmp); + return group; + } + } + + return NULL; +} + +/* + * Look for aliases to or from the given device for exisiting groups. The + * dma_alias_devfn only supports aliases on the same bus, therefore the search + * space is quite small (especially since we're really only looking at pcie + * device, and therefore only expect multiple slots on the root complex or + * downstream switch ports). It's conceivable though that a pair of + * multifunction devices could have aliases between them that would cause a + * loop. To prevent this, we use a bitmap to track where we've been. + */ +static struct iommu_group *get_pci_alias_group(struct pci_dev *pdev, + unsigned long *devfns) +{ + struct pci_dev *tmp = NULL; + struct iommu_group *group; + + if (test_and_set_bit(pdev->devfn & 0xff, devfns)) + return NULL; + + group = iommu_group_get(&pdev->dev); + if (group) + return group; + + for_each_pci_dev(tmp) { + if (tmp == pdev || tmp->bus != pdev->bus) + continue; + + /* We alias them or they alias us */ + if (((pdev->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) && + pdev->dma_alias_devfn == tmp->devfn) || + ((tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN) && + tmp->dma_alias_devfn == pdev->devfn)) { + + group = get_pci_alias_group(tmp, devfns); + if (group) { + pci_dev_put(tmp); + return group; + } + + group = get_pci_function_alias_group(tmp, devfns); + if (group) { + pci_dev_put(tmp); + return group; + } + } + } + + return NULL; +} + struct group_for_pci_data { struct pci_dev *pdev; struct iommu_group *group; @@ -557,7 +641,7 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) struct group_for_pci_data data; struct pci_bus *bus; struct iommu_group *group = NULL; - struct pci_dev *tmp; + u64 devfns[4] = { 0 }; /* * Find the upstream DMA alias for the device. A device must not @@ -591,76 +675,21 @@ static struct iommu_group *iommu_group_get_for_pci_dev(struct pci_dev *pdev) } /* - * Next we need to consider DMA alias quirks. If one device aliases - * to another, they should be grouped together. It's theoretically - * possible that aliases could create chains of devices where each - * device aliases another device. If we then factor in multifunction - * ACS grouping requirements, each alias could incorporate a new slot - * with multiple functions, each with aliases. This is all extremely - * unlikely as DMA alias quirks are typically only used for PCIe - * devices where we usually have a single slot per bus. Furthermore, - * the alias quirk is usually to another function within the slot - * (and ACS multifunction is not supported) or to a different slot - * that doesn't physically exist. The likely scenario is therefore - * that everything on the bus gets grouped together. To reduce the - * problem space, share the IOMMU group for all devices on the bus - * if a DMA alias quirk is present on the bus. + * Look for existing groups on device aliases. If we alias another + * device or another device aliases us, use the same group. */ - tmp = NULL; - for_each_pci_dev(tmp) { - if (tmp->bus != pdev->bus || - !(tmp->dev_flags & PCI_DEV_FLAGS_DMA_ALIAS_DEVFN)) - continue; - - pci_dev_put(tmp); - tmp = NULL; - - /* We have an alias quirk, search for an existing group */ - for_each_pci_dev(tmp) { - struct iommu_group *group_tmp; - - if (tmp->bus != pdev->bus) - continue; - - group_tmp = iommu_group_get(&tmp->dev); - if (!group) { - group = group_tmp; - continue; - } - - if (group_tmp) { - WARN_ON(group != group_tmp); - iommu_group_put(group_tmp); - } - } - - return group ? group : iommu_group_alloc(); - } + group = get_pci_alias_group(pdev, (unsigned long *)devfns); + if (group) + return group; /* - * Non-multifunction devices or multifunction devices supporting - * ACS get their own group. + * Look for existing groups on non-isolated functions on the same + * slot and aliases of those funcions, if any. No need to clear + * the search bitmap, the tested devfns are still valid. */ - if (!pdev->multifunction || pci_acs_enabled(pdev, REQ_ACS_FLAGS)) - return iommu_group_alloc(); - - /* - * Multifunction devices not supporting ACS share a group with other - * similar devices in the same slot. - */ - tmp = NULL; - for_each_pci_dev(tmp) { - if (tmp == pdev || tmp->bus != pdev->bus || - PCI_SLOT(tmp->devfn) != PCI_SLOT(pdev->devfn) || - pci_acs_enabled(tmp, REQ_ACS_FLAGS)) - continue; - - group = iommu_group_get(&tmp->dev); - if (group) { - pci_dev_put(tmp); - return group; - } - } + group = get_pci_function_alias_group(pdev, (unsigned long *)devfns); + if (group) + return group; /* No shared group found, allocate new */ return iommu_group_alloc(); From 25b11ce2a3607d7c39a2ca121eea0c67c722b34e Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 19 Sep 2014 10:03:13 -0600 Subject: [PATCH 34/41] iommu/amd: Split init_iommu_group() from iommu_init_device() For a PCI device, aliases from the IVRS table won't be populated into dma_alias_devfn until after iommu_init_device() is called on each device. We therefore want to split init_iommu_group() to be called from a separate loop immediately following. Signed-off-by: Alex Williamson Cc: stable@vger.kernel.org # 3.17 Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 7de92768871d..8c5216633987 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -260,17 +260,13 @@ static bool check_device(struct device *dev) return true; } -static int init_iommu_group(struct device *dev) +static void init_iommu_group(struct device *dev) { struct iommu_group *group; group = iommu_group_get_for_dev(dev); - - if (IS_ERR(group)) - return PTR_ERR(group); - - iommu_group_put(group); - return 0; + if (!IS_ERR(group)) + iommu_group_put(group); } static int __last_alias(struct pci_dev *pdev, u16 alias, void *data) @@ -340,7 +336,6 @@ static int iommu_init_device(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct iommu_dev_data *dev_data; u16 alias; - int ret; if (dev->archdata.iommu) return 0; @@ -364,12 +359,6 @@ static int iommu_init_device(struct device *dev) dev_data->alias_data = alias_data; } - ret = init_iommu_group(dev); - if (ret) { - free_dev_data(dev_data); - return ret; - } - if (pci_iommuv2_capable(pdev)) { struct amd_iommu *iommu; @@ -455,6 +444,15 @@ int __init amd_iommu_init_devices(void) goto out_free; } + /* + * Initialize IOMMU groups only after iommu_init_device() has + * had a chance to populate any IVRS defined aliases. + */ + for_each_pci_dev(pdev) { + if (check_device(&pdev->dev)) + init_iommu_group(&pdev->dev); + } + return 0; out_free: @@ -2415,6 +2413,7 @@ static int device_change_notifier(struct notifier_block *nb, case BUS_NOTIFY_ADD_DEVICE: iommu_init_device(dev); + init_iommu_group(dev); /* * dev_data is still NULL and From fb3e306515ba6a012364b698b8ca71c337424ed3 Mon Sep 17 00:00:00 2001 From: Mark Salter Date: Sun, 21 Sep 2014 13:58:24 -0400 Subject: [PATCH 35/41] iommu: Fix bus notifier breakage iommu_bus_init() registers a bus notifier on the given bus by using a statically defined notifier block: static struct notifier_block iommu_bus_nb = { .notifier_call = iommu_bus_notifier, }; This same notifier block is used for all busses. This causes a problem for notifiers registered after iommu has registered this callback on multiple busses. The problem is that a subsequent notifier being registered on a bus which has this iommu notifier will also get linked in to the notifier list of all other busses which have this iommu notifier. This patch fixes this by allocating the notifier_block at runtime. Some error checking is also added to catch any allocation failure or notifier registration error. Signed-off-by: Mark Salter Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index b59826aa5531..ed8b04867b1f 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -799,18 +799,26 @@ static int iommu_bus_notifier(struct notifier_block *nb, return 0; } -static struct notifier_block iommu_bus_nb = { - .notifier_call = iommu_bus_notifier, -}; - -static void iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) +static int iommu_bus_init(struct bus_type *bus, const struct iommu_ops *ops) { + int err; + struct notifier_block *nb; struct iommu_callback_data cb = { .ops = ops, }; - bus_register_notifier(bus, &iommu_bus_nb); - bus_for_each_dev(bus, NULL, &cb, add_iommu_group); + nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); + if (!nb) + return -ENOMEM; + + nb->notifier_call = iommu_bus_notifier; + + err = bus_register_notifier(bus, nb); + if (err) { + kfree(nb); + return err; + } + return bus_for_each_dev(bus, NULL, &cb, add_iommu_group); } /** @@ -834,9 +842,7 @@ int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) bus->iommu_ops = ops; /* Do IOMMU specific setup for this bus-type */ - iommu_bus_init(bus, ops); - - return 0; + return iommu_bus_init(bus, ops); } EXPORT_SYMBOL_GPL(bus_set_iommu); From 5fc24d8cb9d43e0db335137051092da03a6845dc Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Wed, 17 Sep 2014 17:32:19 +0800 Subject: [PATCH 36/41] iommu/irq_remapping: Fix the regression of hpet irq remapping Commit 71054d8841b4 ("x86, hpet: Introduce x86_msi_ops.setup_hpet_msi") introduced x86_msi_ops.setup_hpet_msi to setup hpet MSI irq when irq remapping enabled. This caused a regression of hpet MSI irq remapping. Original code flow before commit 71054d8841b4: hpet_setup_msi_irq() arch_setup_hpet_msi() setup_hpet_msi_remapped() remap_ops->setup_hpet_msi() alloc_irte() msi_compose_msg() hpet_msi_write() ... Current code flow after commit 71054d8841b4: hpet_setup_msi_irq() x86_msi.setup_hpet_msi() setup_hpet_msi_remapped() intel_setup_hpet_msi() alloc_irte() Currently, we only call alloc_irte() for hpet MSI, but do not composed and wrote its msg... Signed-off-by: Yijing Wang Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 4 ++-- drivers/iommu/intel_irq_remapping.c | 4 ++-- drivers/iommu/irq_remapping.c | 11 +++++++++-- drivers/iommu/irq_remapping.h | 2 +- 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 8c5216633987..2f4f45d8dbfd 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -4233,7 +4233,7 @@ static int msi_setup_irq(struct pci_dev *pdev, unsigned int irq, return 0; } -static int setup_hpet_msi(unsigned int irq, unsigned int id) +static int alloc_hpet_msi(unsigned int irq, unsigned int id) { struct irq_2_irte *irte_info; struct irq_cfg *cfg; @@ -4272,6 +4272,6 @@ struct irq_remap_ops amd_iommu_irq_ops = { .compose_msi_msg = compose_msi_msg, .msi_alloc_irq = msi_alloc_irq, .msi_setup_irq = msi_setup_irq, - .setup_hpet_msi = setup_hpet_msi, + .alloc_hpet_msi = alloc_hpet_msi, }; #endif diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index 0df41f6264f5..ef5e5dd1311d 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -1139,7 +1139,7 @@ static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq, return ret; } -static int intel_setup_hpet_msi(unsigned int irq, unsigned int id) +static int intel_alloc_hpet_msi(unsigned int irq, unsigned int id) { int ret = -1; struct intel_iommu *iommu; @@ -1170,5 +1170,5 @@ struct irq_remap_ops intel_irq_remap_ops = { .compose_msi_msg = intel_compose_msi_msg, .msi_alloc_irq = intel_msi_alloc_irq, .msi_setup_irq = intel_msi_setup_irq, - .setup_hpet_msi = intel_setup_hpet_msi, + .alloc_hpet_msi = intel_alloc_hpet_msi, }; diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index 33c439524080..74a1767c89b5 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "irq_remapping.h" @@ -345,10 +346,16 @@ static int msi_setup_remapped_irq(struct pci_dev *pdev, unsigned int irq, int setup_hpet_msi_remapped(unsigned int irq, unsigned int id) { - if (!remap_ops || !remap_ops->setup_hpet_msi) + int ret; + + if (!remap_ops || !remap_ops->alloc_hpet_msi) return -ENODEV; - return remap_ops->setup_hpet_msi(irq, id); + ret = remap_ops->alloc_hpet_msi(irq, id); + if (ret) + return -EINVAL; + + return default_setup_hpet_msi(irq, id); } void panic_if_irq_remap(const char *msg) diff --git a/drivers/iommu/irq_remapping.h b/drivers/iommu/irq_remapping.h index 90c4dae5a46b..fde250f86e60 100644 --- a/drivers/iommu/irq_remapping.h +++ b/drivers/iommu/irq_remapping.h @@ -80,7 +80,7 @@ struct irq_remap_ops { int (*msi_setup_irq)(struct pci_dev *, unsigned int, int, int); /* Setup interrupt remapping for an HPET MSI */ - int (*setup_hpet_msi)(unsigned int, unsigned int); + int (*alloc_hpet_msi)(unsigned int, unsigned int); }; extern struct irq_remap_ops intel_irq_remap_ops; From c50e3247aa2d825e0dc0f4b876ee22d7134d24ca Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 9 Sep 2014 15:59:37 +0200 Subject: [PATCH 37/41] iommu/amd: Fix devid mapping for ivrs_ioapic override When the device id for an IOAPIC is overridden on the kernel command line, the iommu driver has to make sure it sets up a DTE for this device id. Reported-by: Su Friendy Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu_init.c | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 3783e0b44df6..b0522f15730f 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -712,7 +712,7 @@ static void __init set_dev_entry_from_acpi(struct amd_iommu *iommu, set_iommu_for_device(iommu, devid); } -static int __init add_special_device(u8 type, u8 id, u16 devid, bool cmd_line) +static int __init add_special_device(u8 type, u8 id, u16 *devid, bool cmd_line) { struct devid_map *entry; struct list_head *list; @@ -731,6 +731,8 @@ static int __init add_special_device(u8 type, u8 id, u16 devid, bool cmd_line) pr_info("AMD-Vi: Command-line override present for %s id %d - ignoring\n", type == IVHD_SPECIAL_IOAPIC ? "IOAPIC" : "HPET", id); + *devid = entry->devid; + return 0; } @@ -739,7 +741,7 @@ static int __init add_special_device(u8 type, u8 id, u16 devid, bool cmd_line) return -ENOMEM; entry->id = id; - entry->devid = devid; + entry->devid = *devid; entry->cmd_line = cmd_line; list_add_tail(&entry->list, list); @@ -754,7 +756,7 @@ static int __init add_early_maps(void) for (i = 0; i < early_ioapic_map_size; ++i) { ret = add_special_device(IVHD_SPECIAL_IOAPIC, early_ioapic_map[i].id, - early_ioapic_map[i].devid, + &early_ioapic_map[i].devid, early_ioapic_map[i].cmd_line); if (ret) return ret; @@ -763,7 +765,7 @@ static int __init add_early_maps(void) for (i = 0; i < early_hpet_map_size; ++i) { ret = add_special_device(IVHD_SPECIAL_HPET, early_hpet_map[i].id, - early_hpet_map[i].devid, + &early_hpet_map[i].devid, early_hpet_map[i].cmd_line); if (ret) return ret; @@ -978,10 +980,17 @@ static int __init init_iommu_from_acpi(struct amd_iommu *iommu, PCI_SLOT(devid), PCI_FUNC(devid)); - set_dev_entry_from_acpi(iommu, devid, e->flags, 0); - ret = add_special_device(type, handle, devid, false); + ret = add_special_device(type, handle, &devid, false); if (ret) return ret; + + /* + * add_special_device might update the devid in case a + * command-line override is present. So call + * set_dev_entry_from_acpi after add_special_device. + */ + set_dev_entry_from_acpi(iommu, devid, e->flags, 0); + break; } default: From 599bad38cf7163123af7c9efea0fcf228bc74fe1 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 30 Sep 2014 13:02:02 +0200 Subject: [PATCH 38/41] driver core: Add BUS_NOTIFY_REMOVED_DEVICE event This event closes an important gap in the bus notifiers. There is already the BUS_NOTIFY_DEL_DEVICE event, but that is sent when the device is still bound to its device driver. This is too early for the IOMMU code to destroy any mappings for the device, as they might still be in use by the driver. The new BUS_NOTIFY_REMOVED_DEVICE event introduced with this patch closes this gap as it is sent when the device is already unbound from its device driver and almost completly removed from the driver core. With this event the IOMMU code can safely destroy any mappings and other data structures when a device is removed. Signed-off-by: Joerg Roedel Acked-by: Greg Kroah-Hartman Tested-by: Jerry Hoemann --- drivers/base/core.c | 3 +++ include/linux/device.h | 11 ++++++----- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/base/core.c b/drivers/base/core.c index 20da3ad1696b..7b270a2e6ed5 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -1211,6 +1211,9 @@ void device_del(struct device *dev) */ if (platform_notify_remove) platform_notify_remove(dev); + if (dev->bus) + blocking_notifier_call_chain(&dev->bus->p->bus_notifier, + BUS_NOTIFY_REMOVED_DEVICE, dev); kobject_uevent(&dev->kobj, KOBJ_REMOVE); cleanup_device_parent(dev); kobject_del(&dev->kobj); diff --git a/include/linux/device.h b/include/linux/device.h index 43d183aeb25b..d0d5c5db509d 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -181,13 +181,14 @@ extern int bus_unregister_notifier(struct bus_type *bus, * with the device lock held in the core, so be careful. */ #define BUS_NOTIFY_ADD_DEVICE 0x00000001 /* device added */ -#define BUS_NOTIFY_DEL_DEVICE 0x00000002 /* device removed */ -#define BUS_NOTIFY_BIND_DRIVER 0x00000003 /* driver about to be +#define BUS_NOTIFY_DEL_DEVICE 0x00000002 /* device to be removed */ +#define BUS_NOTIFY_REMOVED_DEVICE 0x00000003 /* device removed */ +#define BUS_NOTIFY_BIND_DRIVER 0x00000004 /* driver about to be bound */ -#define BUS_NOTIFY_BOUND_DRIVER 0x00000004 /* driver bound to device */ -#define BUS_NOTIFY_UNBIND_DRIVER 0x00000005 /* driver about to be +#define BUS_NOTIFY_BOUND_DRIVER 0x00000005 /* driver bound to device */ +#define BUS_NOTIFY_UNBIND_DRIVER 0x00000006 /* driver about to be unbound */ -#define BUS_NOTIFY_UNBOUND_DRIVER 0x00000006 /* driver is unbound +#define BUS_NOTIFY_UNBOUND_DRIVER 0x00000007 /* driver is unbound from the device */ extern struct kset *bus_get_kset(struct bus_type *bus); From 1196c2fb0407683c2df92d3d09f9144d42830894 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 30 Sep 2014 13:02:03 +0200 Subject: [PATCH 39/41] iommu/vt-d: Only remove domain when device is removed This makes sure any RMRR mappings stay in place when the driver is unbound from the device. Signed-off-by: Joerg Roedel Tested-by: Jerry Hoemann --- drivers/iommu/intel-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d1f5caad04f9..eaf825ac7d28 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3865,8 +3865,7 @@ static int device_notifier(struct notifier_block *nb, if (iommu_dummy(dev)) return 0; - if (action != BUS_NOTIFY_UNBOUND_DRIVER && - action != BUS_NOTIFY_DEL_DEVICE) + if (action != BUS_NOTIFY_REMOVED_DEVICE) return 0; domain = find_domain(dev); From 57384592c43375d2c9a14d82aebbdc95fdda9e9d Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 2 Oct 2014 11:50:25 +0200 Subject: [PATCH 40/41] iommu/vt-d: Store bus information in RMRR PCI device path This will be used later to match broken RMRR entries. Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 1 + include/linux/dmar.h | 8 +++++++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 8ed55b0a1ce4..68da1ab0f2cd 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -155,6 +155,7 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) if (event == BUS_NOTIFY_ADD_DEVICE) { for (tmp = dev; tmp; tmp = tmp->bus->self) { level--; + info->path[level].bus = tmp->bus->number; info->path[level].device = PCI_SLOT(tmp->devfn); info->path[level].function = PCI_FUNC(tmp->devfn); if (pci_is_root_bus(tmp->bus)) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 1deece46a0ca..593fff99e6bf 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -56,13 +56,19 @@ struct dmar_drhd_unit { struct intel_iommu *iommu; }; +struct dmar_pci_path { + u8 bus; + u8 device; + u8 function; +}; + struct dmar_pci_notify_info { struct pci_dev *dev; unsigned long event; int bus; u16 seg; u16 level; - struct acpi_dmar_pci_path path[]; + struct dmar_pci_path path[]; } __attribute__((packed)); extern struct rw_semaphore dmar_global_lock; From 80f7b3d1b1f4ec6c80fa3b40c7c9a419e28b0897 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 22 Sep 2014 16:30:22 +0200 Subject: [PATCH 41/41] iommu/vt-d: Work around broken RMRR firmware entries The VT-d specification states that an RMRR entry in the DMAR table needs to specify the full path to the device. This is also how newer Linux kernels implement it. Unfortunatly older drivers just match for the target device and not the full path to the device, so that BIOS vendors implement that behavior into their BIOSes to make them work with older Linux kernels. But those RMRR entries break on newer Linux kernels. Work around this issue by adding a fall-back into the RMRR matching code to match those old RMRR entries too. Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 68da1ab0f2cd..b3774ef3f255 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -178,17 +178,33 @@ static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus, int i; if (info->bus != bus) - return false; + goto fallback; if (info->level != count) - return false; + goto fallback; for (i = 0; i < count; i++) { if (path[i].device != info->path[i].device || path[i].function != info->path[i].function) - return false; + goto fallback; } return true; + +fallback: + + if (count != 1) + return false; + + i = info->level - 1; + if (bus == info->path[i].bus && + path[0].device == info->path[i].device && + path[0].function == info->path[i].function) { + pr_info(FW_BUG "RMRR entry for device %02x:%02x.%x is broken - applying workaround\n", + bus, path[0].device, path[0].function); + return true; + } + + return false; } /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */