From 140456f994195b568ecd7fc2287a34eadffef3ca Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 17 Feb 2021 17:30:04 +0300 Subject: [PATCH 1/4] iommu/amd: Fix sleeping in atomic in increase_address_space() increase_address_space() calls get_zeroed_page(gfp) under spin_lock with disabled interrupts. gfp flags passed to increase_address_space() may allow sleeping, so it comes to this: BUG: sleeping function called from invalid context at mm/page_alloc.c:4342 in_atomic(): 1, irqs_disabled(): 1, pid: 21555, name: epdcbbf1qnhbsd8 Call Trace: dump_stack+0x66/0x8b ___might_sleep+0xec/0x110 __alloc_pages_nodemask+0x104/0x300 get_zeroed_page+0x15/0x40 iommu_map_page+0xdd/0x3e0 amd_iommu_map+0x50/0x70 iommu_map+0x106/0x220 vfio_iommu_type1_ioctl+0x76e/0x950 [vfio_iommu_type1] do_vfs_ioctl+0xa3/0x6f0 ksys_ioctl+0x66/0x70 __x64_sys_ioctl+0x16/0x20 do_syscall_64+0x4e/0x100 entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fix this by moving get_zeroed_page() out of spin_lock/unlock section. Fixes: 754265bcab ("iommu/amd: Fix race in increase_address_space()") Signed-off-by: Andrey Ryabinin Acked-by: Will Deacon Cc: Link: https://lore.kernel.org/r/20210217143004.19165-1-arbn@yandex-team.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/io_pgtable.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd/io_pgtable.c b/drivers/iommu/amd/io_pgtable.c index 1c4961e05c12..bb0ee5c9fde7 100644 --- a/drivers/iommu/amd/io_pgtable.c +++ b/drivers/iommu/amd/io_pgtable.c @@ -182,6 +182,10 @@ static bool increase_address_space(struct protection_domain *domain, bool ret = true; u64 *pte; + pte = (void *)get_zeroed_page(gfp); + if (!pte) + return false; + spin_lock_irqsave(&domain->lock, flags); if (address <= PM_LEVEL_SIZE(domain->iop.mode)) @@ -191,10 +195,6 @@ static bool increase_address_space(struct protection_domain *domain, if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) goto out; - pte = (void *)get_zeroed_page(gfp); - if (!pte) - goto out; - *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); domain->iop.root = pte; @@ -208,10 +208,12 @@ static bool increase_address_space(struct protection_domain *domain, */ amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); + pte = NULL; ret = true; out: spin_unlock_irqrestore(&domain->lock, flags); + free_page((unsigned long)pte); return ret; } From 765a9d1d02b2f5996b05f5f65faa8a634adbe763 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Thu, 18 Feb 2021 14:07:02 -0800 Subject: [PATCH 2/4] iommu/tegra-smmu: Fix mc errors on tegra124-nyan Commit 25938c73cd79 ("iommu/tegra-smmu: Rework tegra_smmu_probe_device()") removed certain hack in the tegra_smmu_probe() by relying on IOMMU core to of_xlate SMMU's SID per device, so as to get rid of tegra_smmu_find() and tegra_smmu_configure() that are typically done in the IOMMU core also. This approach works for both existing devices that have DT nodes and other devices (like PCI device) that don't exist in DT, on Tegra210 and Tegra3 upon testing. However, Page Fault errors are reported on tegra124-Nyan: tegra-mc 70019000.memory-controller: display0a: read @0xfe056b40: EMEM address decode error (SMMU translation error [--S]) tegra-mc 70019000.memory-controller: display0a: read @0xfe056b40: Page fault (SMMU translation error [--S]) After debugging, I found that the mentioned commit changed some function callback sequence of tegra-smmu's, resulting in enabling SMMU for display client before display driver gets initialized. I couldn't reproduce exact same issue on Tegra210 as Tegra124 (arm-32) differs at arch-level code. Actually this Page Fault is a known issue, as on most of Tegra platforms, display gets enabled by the bootloader for the splash screen feature, so it keeps filling the framebuffer memory. A proper fix to this issue is to 1:1 linear map the framebuffer memory to IOVA space so the SMMU will have the same address as the physical address in its page table. Yet, Thierry has been working on the solution above for a year, and it hasn't merged. Therefore, let's partially revert the mentioned commit to fix the errors. The reason why we do a partial revert here is that we can still set priv in ->of_xlate() callback for PCI devices. Meanwhile, devices existing in DT, like display, will go through tegra_smmu_configure() at the stage of bus_set_iommu() when SMMU gets probed(), as what it did before we merged the mentioned commit. Once we have the linear map solution for framebuffer memory, this change can be cleaned away. [Big thank to Guillaume who reported and helped debugging/verification] Fixes: 25938c73cd79 ("iommu/tegra-smmu: Rework tegra_smmu_probe_device()") Reported-by: Guillaume Tucker Signed-off-by: Nicolin Chen Tested-by: Guillaume Tucker Acked-by: Thierry Reding Link: https://lore.kernel.org/r/20210218220702.1962-1-nicoleotsuka@gmail.com Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-smmu.c | 72 +++++++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c index 4a3f095a1c26..97eb62f667d2 100644 --- a/drivers/iommu/tegra-smmu.c +++ b/drivers/iommu/tegra-smmu.c @@ -798,10 +798,70 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain, return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova); } +static struct tegra_smmu *tegra_smmu_find(struct device_node *np) +{ + struct platform_device *pdev; + struct tegra_mc *mc; + + pdev = of_find_device_by_node(np); + if (!pdev) + return NULL; + + mc = platform_get_drvdata(pdev); + if (!mc) + return NULL; + + return mc->smmu; +} + +static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev, + struct of_phandle_args *args) +{ + const struct iommu_ops *ops = smmu->iommu.ops; + int err; + + err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops); + if (err < 0) { + dev_err(dev, "failed to initialize fwspec: %d\n", err); + return err; + } + + err = ops->of_xlate(dev, args); + if (err < 0) { + dev_err(dev, "failed to parse SW group ID: %d\n", err); + iommu_fwspec_free(dev); + return err; + } + + return 0; +} + static struct iommu_device *tegra_smmu_probe_device(struct device *dev) { - struct tegra_smmu *smmu = dev_iommu_priv_get(dev); + struct device_node *np = dev->of_node; + struct tegra_smmu *smmu = NULL; + struct of_phandle_args args; + unsigned int index = 0; + int err; + while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index, + &args) == 0) { + smmu = tegra_smmu_find(args.np); + if (smmu) { + err = tegra_smmu_configure(smmu, dev, &args); + of_node_put(args.np); + + if (err < 0) + return ERR_PTR(err); + + break; + } + + of_node_put(args.np); + index++; + } + + smmu = dev_iommu_priv_get(dev); if (!smmu) return ERR_PTR(-ENODEV); @@ -1028,6 +1088,16 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev, if (!smmu) return ERR_PTR(-ENOMEM); + /* + * This is a bit of a hack. Ideally we'd want to simply return this + * value. However the IOMMU registration process will attempt to add + * all devices to the IOMMU when bus_set_iommu() is called. In order + * not to rely on global variables to track the IOMMU instance, we + * set it here so that it can be looked up from the .probe_device() + * callback via the IOMMU device's .drvdata field. + */ + mc->smmu = smmu; + size = BITS_TO_LONGS(soc->num_asids) * sizeof(long); smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL); From 82c3cefb9f1652e7470f442ff96c613e8c8ed8f4 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Thu, 25 Feb 2021 14:14:54 +0800 Subject: [PATCH 3/4] iommu: Don't use lazy flush for untrusted device The lazy IOTLB flushing setup leaves a time window, in which the device can still access some system memory, which has already been unmapped by the device driver. It's not suitable for untrusted devices. A malicious device might use this to attack the system by obtaining data that it shouldn't obtain. Fixes: c588072bba6b5 ("iommu/vt-d: Convert intel iommu driver to the iommu ops") Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20210225061454.2864009-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/dma-iommu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index f659395e7959..65234e383d6b 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -311,6 +311,11 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad) domain->ops->flush_iotlb_all(domain); } +static bool dev_is_untrusted(struct device *dev) +{ + return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; +} + /** * iommu_dma_init_domain - Initialise a DMA mapping domain * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie() @@ -365,8 +370,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, init_iova_domain(iovad, 1UL << order, base_pfn); - if (!cookie->fq_domain && !iommu_domain_get_attr(domain, - DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) { + if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) && + !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && + attr) { if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all, iommu_dma_entry_dtor)) pr_warn("iova flush queue initialization failed\n"); @@ -508,11 +514,6 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr, iova_align(iovad, size), dir, attrs); } -static bool dev_is_untrusted(struct device *dev) -{ - return dev_is_pci(dev) && to_pci_dev(dev)->untrusted; -} - static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys, size_t size, int prot, u64 dma_mask) { From 444d66a23c1f1e4c4d12aed4812681d0ad835d60 Mon Sep 17 00:00:00 2001 From: Zenghui Yu Date: Sat, 27 Feb 2021 15:39:09 +0800 Subject: [PATCH 4/4] iommu/vt-d: Fix status code for Allocate/Free PASID command As per Intel vt-d spec, Rev 3.0 (section 10.4.45 "Virtual Command Response Register"), the status code of "No PASID available" error in response to the Allocate PASID command is 2, not 1. The same for "Invalid PASID" error in response to the Free PASID command. We will otherwise see confusing kernel log under the command failure from guest side. Fix it. Fixes: 24f27d32ab6b ("iommu/vt-d: Enlightened PASID allocation") Signed-off-by: Zenghui Yu Acked-by: Lu Baolu Link: https://lore.kernel.org/r/20210227073909.432-1-yuzenghui@huawei.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/pasid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel/pasid.h b/drivers/iommu/intel/pasid.h index 97dfcffbf495..444c0bec221a 100644 --- a/drivers/iommu/intel/pasid.h +++ b/drivers/iommu/intel/pasid.h @@ -30,8 +30,8 @@ #define VCMD_VRSP_IP 0x1 #define VCMD_VRSP_SC(e) (((e) >> 1) & 0x3) #define VCMD_VRSP_SC_SUCCESS 0 -#define VCMD_VRSP_SC_NO_PASID_AVAIL 1 -#define VCMD_VRSP_SC_INVALID_PASID 1 +#define VCMD_VRSP_SC_NO_PASID_AVAIL 2 +#define VCMD_VRSP_SC_INVALID_PASID 2 #define VCMD_VRSP_RESULT_PASID(e) (((e) >> 8) & 0xfffff) #define VCMD_CMD_OPERAND(e) ((e) << 8) /*