From 6c50d79f66382d78918a768374839d6d1b606d3f Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Sat, 31 Mar 2018 01:33:11 +0100 Subject: [PATCH 01/18] iommu/vt-d: Ratelimit each dmar fault printing There is a ratelimit for printing, but it's incremented each time the cpu recives dmar fault interrupt. While one interrupt may signal about *many* faults. So, measuring the impact it turns out that reading/clearing one fault takes < 1 usec, and printing info about the fault takes ~170 msec. Having in mind that maximum number of fault recording registers per remapping hardware unit is 256.. IRQ handler may run for (170*256) msec. And as fault-serving loop runs without a time limit, during servicing new faults may occur.. Ratelimit each fault printing rather than each irq printing. Fixes: commit c43fce4eebae ("iommu/vt-d: Ratelimit fault handler") BUG: spinlock lockup suspected on CPU#0, CliShell/9903 lock: 0xffffffff81a47440, .magic: dead4ead, .owner: kworker/u16:2/8915, .owner_cpu: 6 CPU: 0 PID: 9903 Comm: CliShell Call Trace:$\n' [..] dump_stack+0x65/0x83$\n' [..] spin_dump+0x8f/0x94$\n' [..] do_raw_spin_lock+0x123/0x170$\n' [..] _raw_spin_lock_irqsave+0x32/0x3a$\n' [..] uart_chars_in_buffer+0x20/0x4d$\n' [..] tty_chars_in_buffer+0x18/0x1d$\n' [..] n_tty_poll+0x1cb/0x1f2$\n' [..] tty_poll+0x5e/0x76$\n' [..] do_select+0x363/0x629$\n' [..] compat_core_sys_select+0x19e/0x239$\n' [..] compat_SyS_select+0x98/0xc0$\n' [..] sysenter_dispatch+0x7/0x25$\n' [..] NMI backtrace for cpu 6 CPU: 6 PID: 8915 Comm: kworker/u16:2 Workqueue: dmar_fault dmar_fault_work Call Trace:$\n' [..] wait_for_xmitr+0x26/0x8f$\n' [..] serial8250_console_putchar+0x1c/0x2c$\n' [..] uart_console_write+0x40/0x4b$\n' [..] serial8250_console_write+0xe6/0x13f$\n' [..] call_console_drivers.constprop.13+0xce/0x103$\n' [..] console_unlock+0x1f8/0x39b$\n' [..] vprintk_emit+0x39e/0x3e6$\n' [..] printk+0x4d/0x4f$\n' [..] dmar_fault+0x1a8/0x1fc$\n' [..] dmar_fault_work+0x15/0x17$\n' [..] process_one_work+0x1e8/0x3a9$\n' [..] worker_thread+0x25d/0x345$\n' [..] kthread+0xea/0xf2$\n' [..] ret_from_fork+0x58/0x90$\n' Cc: Alex Williamson Cc: David Woodhouse Cc: Ingo Molnar Cc: Joerg Roedel Cc: Lu Baolu Cc: iommu@lists.linux-foundation.org Signed-off-by: Dmitry Safonov Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index accf58388bdb..6c4ea32ee6a9 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -1618,17 +1618,13 @@ irqreturn_t dmar_fault(int irq, void *dev_id) int reg, fault_index; u32 fault_status; unsigned long flag; - bool ratelimited; static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL, DEFAULT_RATELIMIT_BURST); - /* Disable printing, simply clear the fault when ratelimited */ - ratelimited = !__ratelimit(&rs); - raw_spin_lock_irqsave(&iommu->register_lock, flag); fault_status = readl(iommu->reg + DMAR_FSTS_REG); - if (fault_status && !ratelimited) + if (fault_status && __ratelimit(&rs)) pr_err("DRHD: handling fault status reg %x\n", fault_status); /* TBD: ignore advanced fault log currently */ @@ -1638,6 +1634,8 @@ irqreturn_t dmar_fault(int irq, void *dev_id) fault_index = dma_fsts_fault_record_index(fault_status); reg = cap_fault_reg_offset(iommu->cap); while (1) { + /* Disable printing, simply clear the fault when ratelimited */ + bool ratelimited = !__ratelimit(&rs); u8 fault_reason; u16 source_id; u64 guest_addr; From 48e6f7652dd8c502005decb331910ca0d097c1a9 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 17 Apr 2018 19:49:09 +0200 Subject: [PATCH 02/18] iommu: Remove depends on HAS_DMA in case of platform dependency Remove dependencies on HAS_DMA where a Kconfig symbol depends on another symbol that implies HAS_DMA, and, optionally, on "|| COMPILE_TEST". In most cases this other symbol is an architecture or platform specific symbol, or PCI. Generic symbols and drivers without platform dependencies keep their dependencies on HAS_DMA, to prevent compiling subsystems or drivers that cannot work anyway. This simplifies the dependencies, and allows to improve compile-testing. Signed-off-by: Geert Uytterhoeven Reviewed-by: Mark Brown Acked-by: Robin Murphy Acked-by: Joerg Roedel Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index df171cb85822..c76157e57f6b 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -23,7 +23,7 @@ config IOMMU_IO_PGTABLE config IOMMU_IO_PGTABLE_LPAE bool "ARMv7/v8 Long Descriptor Format" select IOMMU_IO_PGTABLE - depends on HAS_DMA && (ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64)) + depends on ARM || ARM64 || (COMPILE_TEST && !GENERIC_ATOMIC64) help Enable support for the ARM long descriptor pagetable format. This allocator supports 4K/2M/1G, 16K/32M and 64K/512M page @@ -42,7 +42,7 @@ config IOMMU_IO_PGTABLE_LPAE_SELFTEST config IOMMU_IO_PGTABLE_ARMV7S bool "ARMv7/v8 Short Descriptor Format" select IOMMU_IO_PGTABLE - depends on HAS_DMA && (ARM || ARM64 || COMPILE_TEST) + depends on ARM || ARM64 || COMPILE_TEST help Enable support for the ARM Short-descriptor pagetable format. This supports 32-bit virtual and physical addresses mapped using @@ -376,7 +376,6 @@ config QCOM_IOMMU # Note: iommu drivers cannot (yet?) be built as modules bool "Qualcomm IOMMU Support" depends on ARCH_QCOM || (COMPILE_TEST && !GENERIC_ATOMIC64) - depends on HAS_DMA select IOMMU_API select IOMMU_IO_PGTABLE_LPAE select ARM_DMA_USE_IOMMU From 7d1bf14f860230e9be5b0f183a2fbf10985fbc41 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Thu, 19 Apr 2018 16:05:54 +0200 Subject: [PATCH 03/18] iommu/qcom: Simplify getting .drvdata We should get drvdata from struct device directly. Going via platform_device is an unneeded step back and forth. Signed-off-by: Wolfram Sang Signed-off-by: Joerg Roedel --- drivers/iommu/qcom_iommu.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/qcom_iommu.c b/drivers/iommu/qcom_iommu.c index 65b9c99707f8..fe88a4880d3a 100644 --- a/drivers/iommu/qcom_iommu.c +++ b/drivers/iommu/qcom_iommu.c @@ -885,16 +885,14 @@ static int qcom_iommu_device_remove(struct platform_device *pdev) static int __maybe_unused qcom_iommu_resume(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); + struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev); return qcom_iommu_enable_clocks(qcom_iommu); } static int __maybe_unused qcom_iommu_suspend(struct device *dev) { - struct platform_device *pdev = to_platform_device(dev); - struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); + struct qcom_iommu_dev *qcom_iommu = dev_get_drvdata(dev); qcom_iommu_disable_clocks(qcom_iommu); From f793b13ef0c9c11971334eb1c2544f81865b0d74 Mon Sep 17 00:00:00 2001 From: YueHaibing Date: Thu, 26 Apr 2018 12:49:29 +0800 Subject: [PATCH 04/18] iommu/io-pgtable-arm: Use for_each_set_bit to simplify code We can use for_each_set_bit() to simplify code slightly in the ARM io-pgtable self tests while unmapping. Signed-off-by: YueHaibing Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm-v7s.c | 5 +---- drivers/iommu/io-pgtable-arm.c | 5 +---- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c index 10e4a3d11c02..50e3a9fcf43e 100644 --- a/drivers/iommu/io-pgtable-arm-v7s.c +++ b/drivers/iommu/io-pgtable-arm-v7s.c @@ -898,8 +898,7 @@ static int __init arm_v7s_do_selftests(void) /* Full unmap */ iova = 0; - i = find_first_bit(&cfg.pgsize_bitmap, BITS_PER_LONG); - while (i != BITS_PER_LONG) { + for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { size = 1UL << i; if (ops->unmap(ops, iova, size) != size) @@ -916,8 +915,6 @@ static int __init arm_v7s_do_selftests(void) return __FAIL(ops); iova += SZ_16M; - i++; - i = find_next_bit(&cfg.pgsize_bitmap, BITS_PER_LONG, i); } free_io_pgtable_ops(ops); diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 39c2a056da21..4ffdd88b1566 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -1120,8 +1120,7 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) /* Full unmap */ iova = 0; - j = find_first_bit(&cfg->pgsize_bitmap, BITS_PER_LONG); - while (j != BITS_PER_LONG) { + for_each_set_bit(j, &cfg->pgsize_bitmap, BITS_PER_LONG) { size = 1UL << j; if (ops->unmap(ops, iova, size) != size) @@ -1138,8 +1137,6 @@ static int __init arm_lpae_run_tests(struct io_pgtable_cfg *cfg) return __FAIL(ops, i); iova += SZ_1G; - j++; - j = find_next_bit(&cfg->pgsize_bitmap, BITS_PER_LONG, j); } free_io_pgtable_ops(ops); From 40c9b882fa74771544b0e41209e99d78685f94be Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 9 Apr 2018 23:07:19 +0300 Subject: [PATCH 05/18] iommu/tegra: gart: Add debugging facility Page mapping could overwritten by an accident (a bug). We can catch this case by checking 'VALID' bit of GART's page entry prior to mapping of a page. Since that check introduces a small performance impact, it should be enabled explicitly using new GART's kernel module 'debug' parameter. Signed-off-by: Dmitry Osipenko Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index b62f790ad1ba..4c0abdcd1ad2 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -72,6 +72,8 @@ struct gart_domain { static struct gart_device *gart_handle; /* unique for a system */ +static bool gart_debug; + #define GART_PTE(_pfn) \ (GART_ENTRY_PHYS_ADDR_VALID | ((_pfn) << PAGE_SHIFT)) @@ -271,6 +273,7 @@ static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, struct gart_device *gart = gart_domain->gart; unsigned long flags; unsigned long pfn; + unsigned long pte; if (!gart_iova_range_valid(gart, iova, bytes)) return -EINVAL; @@ -282,6 +285,14 @@ static int gart_iommu_map(struct iommu_domain *domain, unsigned long iova, spin_unlock_irqrestore(&gart->pte_lock, flags); return -EINVAL; } + if (gart_debug) { + pte = gart_read_pte(gart, iova); + if (pte & GART_ENTRY_PHYS_ADDR_VALID) { + spin_unlock_irqrestore(&gart->pte_lock, flags); + dev_err(gart->dev, "Page entry is in-use\n"); + return -EBUSY; + } + } gart_set_pte(gart, iova, GART_PTE(pfn)); FLUSH_GART_REGS(gart); spin_unlock_irqrestore(&gart->pte_lock, flags); @@ -515,7 +526,9 @@ static void __exit tegra_gart_exit(void) subsys_initcall(tegra_gart_init); module_exit(tegra_gart_exit); +module_param(gart_debug, bool, 0644); +MODULE_PARM_DESC(gart_debug, "Enable GART debugging"); MODULE_DESCRIPTION("IOMMU API for GART in Tegra20"); MODULE_AUTHOR("Hiroshi DOYU "); MODULE_ALIAS("platform:tegra-gart"); From 130a2fdf0b9e2305fd4dfbf7e6fb58c31df0fe8e Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Mon, 9 Apr 2018 23:07:20 +0300 Subject: [PATCH 06/18] iommu/tegra: gart: Fix gart_iommu_unmap() It must return the number of unmapped bytes on success, returning 0 means that unmapping failed and in result only one page is unmapped. Signed-off-by: Dmitry Osipenko Reviewed-by: Thierry Reding Acked-by: Thierry Reding Signed-off-by: Joerg Roedel --- drivers/iommu/tegra-gart.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/tegra-gart.c b/drivers/iommu/tegra-gart.c index 4c0abdcd1ad2..89ec24c6952c 100644 --- a/drivers/iommu/tegra-gart.c +++ b/drivers/iommu/tegra-gart.c @@ -313,7 +313,7 @@ static size_t gart_iommu_unmap(struct iommu_domain *domain, unsigned long iova, gart_set_pte(gart, iova, 0); FLUSH_GART_REGS(gart); spin_unlock_irqrestore(&gart->pte_lock, flags); - return 0; + return bytes; } static phys_addr_t gart_iommu_iova_to_phys(struct iommu_domain *domain, From d64c0486ed50900a3b3b1e22192dc07ad3ad5a8a Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 1 May 2018 14:52:52 -0500 Subject: [PATCH 07/18] iommu/amd: Update the PASID information printed to the system log Provide detailed data for each event, as appropriate. Signed-off-by: Gary R Hook Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 2a99f0f14795..aecc49aa6fdb 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -545,7 +545,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, static void iommu_print_event(struct amd_iommu *iommu, void *__evt) { struct device *dev = iommu->iommu.dev; - int type, devid, domid, flags; + int type, devid, pasid, flags; volatile u32 *event = __evt; int count = 0; u64 address; @@ -553,7 +553,7 @@ static void iommu_print_event(struct amd_iommu *iommu, void *__evt) retry: type = (event[1] >> EVENT_TYPE_SHIFT) & EVENT_TYPE_MASK; devid = (event[0] >> EVENT_DEVID_SHIFT) & EVENT_DEVID_MASK; - domid = (event[1] >> EVENT_DOMID_SHIFT) & EVENT_DOMID_MASK; + pasid = PPR_PASID(*(u64 *)&event[0]); flags = (event[1] >> EVENT_FLAGS_SHIFT) & EVENT_FLAGS_MASK; address = (u64)(((u64)event[3]) << 32) | event[2]; @@ -568,7 +568,7 @@ retry: } if (type == EVENT_TYPE_IO_FAULT) { - amd_iommu_report_page_fault(devid, domid, address, flags); + amd_iommu_report_page_fault(devid, pasid, address, flags); return; } else { dev_err(dev, "AMD-Vi: Event logged ["); @@ -576,10 +576,9 @@ retry: switch (type) { case EVENT_TYPE_ILL_DEV: - dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x " - "address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "ILLEGAL_DEV_TABLE_ENTRY device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), - address, flags); + pasid, address, flags); dump_dte_entry(devid); break; case EVENT_TYPE_DEV_TAB_ERR: @@ -589,34 +588,30 @@ retry: address, flags); break; case EVENT_TYPE_PAGE_TAB_ERR: - dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x " - "domain=0x%04x address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "PAGE_TAB_HARDWARE_ERROR device=%02x:%02x.%x domain=0x%04x address=0x%016llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), - domid, address, flags); + pasid, address, flags); break; case EVENT_TYPE_ILL_CMD: dev_err(dev, "ILLEGAL_COMMAND_ERROR address=0x%016llx]\n", address); dump_command(address); break; case EVENT_TYPE_CMD_HARD_ERR: - dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx " - "flags=0x%04x]\n", address, flags); + dev_err(dev, "COMMAND_HARDWARE_ERROR address=0x%016llx flags=0x%04x]\n", + address, flags); break; case EVENT_TYPE_IOTLB_INV_TO: - dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x " - "address=0x%016llx]\n", + dev_err(dev, "IOTLB_INV_TIMEOUT device=%02x:%02x.%x address=0x%016llx]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), address); break; case EVENT_TYPE_INV_DEV_REQ: - dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x " - "address=0x%016llx flags=0x%04x]\n", + dev_err(dev, "INVALID_DEVICE_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), - address, flags); + pasid, address, flags); break; default: - dev_err(dev, KERN_ERR "UNKNOWN event[0]=0x%08x event[1]=0x%08x " - "event[2]=0x%08x event[3]=0x%08x\n", + dev_err(dev, "UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n", event[0], event[1], event[2], event[3]); } From e7f63ffc1bf19c607a1ad7b89c1389004ee6e9c4 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Tue, 1 May 2018 14:53:00 -0500 Subject: [PATCH 08/18] iommu/amd: Update logging information for new event type A new events have been defined in the AMD IOMMU spec: 0x09 - "invalid PPR request" Add support for logging this type of event. Signed-off-by: Gary R Hook ~ ~ ~ Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 10 +++++++++- drivers/iommu/amd_iommu_types.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index aecc49aa6fdb..5fef2466da15 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -545,7 +545,7 @@ static void amd_iommu_report_page_fault(u16 devid, u16 domain_id, static void iommu_print_event(struct amd_iommu *iommu, void *__evt) { struct device *dev = iommu->iommu.dev; - int type, devid, pasid, flags; + int type, devid, pasid, flags, tag; volatile u32 *event = __evt; int count = 0; u64 address; @@ -610,6 +610,14 @@ retry: PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), pasid, address, flags); break; + case EVENT_TYPE_INV_PPR_REQ: + pasid = ((event[0] >> 16) & 0xFFFF) + | ((event[1] << 6) & 0xF0000); + tag = event[1] & 0x03FF; + dev_err(dev, "INVALID_PPR_REQUEST device=%02x:%02x.%x pasid=0x%05x address=0x%016llx flags=0x%04x]\n", + PCI_BUS_NUM(devid), PCI_SLOT(devid), PCI_FUNC(devid), + pasid, address, flags); + break; default: dev_err(dev, "UNKNOWN event[0]=0x%08x event[1]=0x%08x event[2]=0x%08x event[3]=0x%08x\n", event[0], event[1], event[2], event[3]); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index 1c9b080276c9..986cbe0cc189 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -133,6 +133,7 @@ #define EVENT_TYPE_CMD_HARD_ERR 0x6 #define EVENT_TYPE_IOTLB_INV_TO 0x7 #define EVENT_TYPE_INV_DEV_REQ 0x8 +#define EVENT_TYPE_INV_PPR_REQ 0x9 #define EVENT_DEVID_MASK 0xffff #define EVENT_DEVID_SHIFT 0 #define EVENT_DOMID_MASK 0xffff From 7f9584df8495787393d8c18598c2b6eb03e647b0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 14 May 2018 19:22:25 +0300 Subject: [PATCH 09/18] iommu: Remove extra NULL check when call strtobool() strtobool() does check for NULL parameter already. No need to repeat. While here, switch to kstrtobool() and unshadow actual error code (which is still -EINVAL). No functional change intended. Signed-off-by: Andy Shevchenko Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index d2aa23202bb9..7f61b142263e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -116,9 +116,11 @@ static void __iommu_detach_group(struct iommu_domain *domain, static int __init iommu_set_def_domain_type(char *str) { bool pt; + int ret; - if (!str || strtobool(str, &pt)) - return -EINVAL; + ret = kstrtobool(str, &pt); + if (ret) + return ret; iommu_def_domain_type = pt ? IOMMU_DOMAIN_IDENTITY : IOMMU_DOMAIN_DMA; return 0; From eed91a0b85e55bcda8849d191604537b9933594f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 4 May 2018 10:34:52 +0800 Subject: [PATCH 10/18] iommu/vt-d: Introduce __mapping_notify_one() Introduce this new helper to notify one newly created mapping on one single IOMMU. We can further leverage this helper in the next patch. Signed-off-by: Peter Xu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 749d8f235346..13190a54aba2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1606,6 +1606,18 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, iommu_flush_dev_iotlb(domain, addr, mask); } +/* Notification for newly created mappings */ +static inline void __mapping_notify_one(struct intel_iommu *iommu, + struct dmar_domain *domain, + unsigned long pfn, unsigned int pages) +{ + /* It's a non-present to present mapping. Only flush if caching mode */ + if (cap_caching_mode(iommu->cap)) + iommu_flush_iotlb_psi(iommu, domain, pfn, pages, 0, 1); + else + iommu_flush_write_buffer(iommu); +} + static void iommu_flush_iova(struct iova_domain *iovad) { struct dmar_domain *domain; @@ -3625,13 +3637,7 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, if (ret) goto error; - /* it's a non-present to present mapping. Only flush if caching mode */ - if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, domain, - mm_to_dma_pfn(iova_pfn), - size, 0, 1); - else - iommu_flush_write_buffer(iommu); + __mapping_notify_one(iommu, domain, mm_to_dma_pfn(iova_pfn), size); start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT; start_paddr += paddr & ~PAGE_MASK; @@ -3819,11 +3825,7 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele return 0; } - /* it's a non-present to present mapping. Only flush if caching mode */ - if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, domain, start_vpfn, size, 0, 1); - else - iommu_flush_write_buffer(iommu); + __mapping_notify_one(iommu, domain, start_vpfn, size); return nelems; } From 87684fd997a6f12486bb0a638c7dba3130afb376 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 4 May 2018 10:34:53 +0800 Subject: [PATCH 11/18] iommu/vt-d: Fix iotlb psi missing for mappings When caching mode is enabled for IOMMU, we should send explicit IOTLB PSIs even for newly created mappings. However these events are missing for all intel_iommu_map() callers, e.g., iommu_map(). One direct user is the vfio-pci driver. To make sure we'll send the PSIs always when necessary, this patch firstly introduced domain_mapping() helper for page mappings, then fixed the problem by generalizing the explicit map IOTLB PSI logic into that new helper. With that, we let iommu_domain_identity_map() to use the simplified version to avoid sending the notifications, while for all the rest of cases we send the notifications always. For VM case, we send the PSIs to all the backend IOMMUs for the domain. This patch allows the nested device assignment to work with QEMU (assign device firstly to L1 guest, then assign it again to L2 guest). Signed-off-by: Peter Xu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 43 +++++++++++++++++++++++++++++-------- 1 file changed, 34 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 13190a54aba2..601d3789211f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2352,18 +2352,47 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, return 0; } +static int domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, + struct scatterlist *sg, unsigned long phys_pfn, + unsigned long nr_pages, int prot) +{ + int ret; + struct intel_iommu *iommu; + + /* Do the real mapping first */ + ret = __domain_mapping(domain, iov_pfn, sg, phys_pfn, nr_pages, prot); + if (ret) + return ret; + + /* Notify about the new mapping */ + if (domain_type_is_vm(domain)) { + /* VM typed domains can have more than one IOMMUs */ + int iommu_id; + for_each_domain_iommu(iommu_id, domain) { + iommu = g_iommus[iommu_id]; + __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); + } + } else { + /* General domains only have one IOMMU */ + iommu = domain_get_iommu(domain); + __mapping_notify_one(iommu, domain, iov_pfn, nr_pages); + } + + return 0; +} + static inline int domain_sg_mapping(struct dmar_domain *domain, unsigned long iov_pfn, struct scatterlist *sg, unsigned long nr_pages, int prot) { - return __domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); + return domain_mapping(domain, iov_pfn, sg, 0, nr_pages, prot); } static inline int domain_pfn_mapping(struct dmar_domain *domain, unsigned long iov_pfn, unsigned long phys_pfn, unsigned long nr_pages, int prot) { - return __domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); + return domain_mapping(domain, iov_pfn, NULL, phys_pfn, nr_pages, prot); } static void domain_context_clear_one(struct intel_iommu *iommu, u8 bus, u8 devfn) @@ -2668,9 +2697,9 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, */ dma_pte_clear_range(domain, first_vpfn, last_vpfn); - return domain_pfn_mapping(domain, first_vpfn, first_vpfn, - last_vpfn - first_vpfn + 1, - DMA_PTE_READ|DMA_PTE_WRITE); + return __domain_mapping(domain, first_vpfn, NULL, + first_vpfn, last_vpfn - first_vpfn + 1, + DMA_PTE_READ|DMA_PTE_WRITE); } static int domain_prepare_identity_map(struct device *dev, @@ -3637,8 +3666,6 @@ static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, if (ret) goto error; - __mapping_notify_one(iommu, domain, mm_to_dma_pfn(iova_pfn), size); - start_paddr = (phys_addr_t)iova_pfn << PAGE_SHIFT; start_paddr += paddr & ~PAGE_MASK; return start_paddr; @@ -3825,8 +3852,6 @@ static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nele return 0; } - __mapping_notify_one(iommu, domain, start_vpfn, size); - return nelems; } From fcc35c634255a5874793228492c74566aed166ac Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 4 May 2018 13:08:17 +0800 Subject: [PATCH 12/18] iommu/vt-d: Clean up unused variable in find_or_alloc_domain Remove it to make the code more concise. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 601d3789211f..fd5cfd85c166 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2574,7 +2574,7 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) struct device_domain_info *info = NULL; struct dmar_domain *domain = NULL; struct intel_iommu *iommu; - u16 req_id, dma_alias; + u16 dma_alias; unsigned long flags; u8 bus, devfn; @@ -2582,8 +2582,6 @@ static struct dmar_domain *find_or_alloc_domain(struct device *dev, int gaw) if (!iommu) return NULL; - req_id = ((u16)bus << 8) | devfn; - if (dev_is_pci(dev)) { struct pci_dev *pdev = to_pci_dev(dev); From ab96746aaa344fb720a198245a837e266fad3b62 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 4 May 2018 13:08:18 +0800 Subject: [PATCH 13/18] iommu/vt-d: Clean up pasid quirk for pre-production devices The pasid28 quirk is needed only for some pre-production devices. Remove it to make the code concise. Signed-off-by: Ashok Raj Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 32 ++------------------------------ include/linux/intel-iommu.h | 1 - 2 files changed, 2 insertions(+), 31 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index fd5cfd85c166..d79e3ebbe437 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -485,37 +485,14 @@ static int dmar_forcedac; static int intel_iommu_strict; static int intel_iommu_superpage = 1; static int intel_iommu_ecs = 1; -static int intel_iommu_pasid28; static int iommu_identity_mapping; #define IDENTMAP_ALL 1 #define IDENTMAP_GFX 2 #define IDENTMAP_AZALIA 4 -/* Broadwell and Skylake have broken ECS support — normal so-called "second - * level" translation of DMA requests-without-PASID doesn't actually happen - * unless you also set the NESTE bit in an extended context-entry. Which of - * course means that SVM doesn't work because it's trying to do nested - * translation of the physical addresses it finds in the process page tables, - * through the IOVA->phys mapping found in the "second level" page tables. - * - * The VT-d specification was retroactively changed to change the definition - * of the capability bits and pretend that Broadwell/Skylake never happened... - * but unfortunately the wrong bit was changed. It's ECS which is broken, but - * for some reason it was the PASID capability bit which was redefined (from - * bit 28 on BDW/SKL to bit 40 in future). - * - * So our test for ECS needs to eschew those implementations which set the old - * PASID capabiity bit 28, since those are the ones on which ECS is broken. - * Unless we are working around the 'pasid28' limitations, that is, by putting - * the device into passthrough mode for normal DMA and thus masking the bug. - */ -#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap) && \ - (intel_iommu_pasid28 || !ecap_broken_pasid(iommu->ecap))) -/* PASID support is thus enabled if ECS is enabled and *either* of the old - * or new capability bits are set. */ -#define pasid_enabled(iommu) (ecs_enabled(iommu) && \ - (ecap_pasid(iommu->ecap) || ecap_broken_pasid(iommu->ecap))) +#define ecs_enabled(iommu) (intel_iommu_ecs && ecap_ecs(iommu->ecap)) +#define pasid_enabled(iommu) (ecs_enabled(iommu) && ecap_pasid(iommu->ecap)) int intel_iommu_gfx_mapped; EXPORT_SYMBOL_GPL(intel_iommu_gfx_mapped); @@ -578,11 +555,6 @@ static int __init intel_iommu_setup(char *str) printk(KERN_INFO "Intel-IOMMU: disable extended context table support\n"); intel_iommu_ecs = 0; - } else if (!strncmp(str, "pasid28", 7)) { - printk(KERN_INFO - "Intel-IOMMU: enable pre-production PASID support\n"); - intel_iommu_pasid28 = 1; - iommu_identity_mapping |= IDENTMAP_GFX; } else if (!strncmp(str, "tboot_noforce", 13)) { printk(KERN_INFO "Intel-IOMMU: not forcing on after tboot. This could expose security risk for tboot\n"); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index ef169d67df92..1df940196ab2 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -121,7 +121,6 @@ #define ecap_srs(e) ((e >> 31) & 0x1) #define ecap_ers(e) ((e >> 30) & 0x1) #define ecap_prs(e) ((e >> 29) & 0x1) -#define ecap_broken_pasid(e) ((e >> 28) & 0x1) #define ecap_dis(e) ((e >> 27) & 0x1) #define ecap_nest(e) ((e >> 26) & 0x1) #define ecap_mts(e) ((e >> 25) & 0x1) From bb37f7db07293ec5bfc9b389948ea493b93028b7 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 4 May 2018 13:08:19 +0800 Subject: [PATCH 14/18] iommu/vt-d: Remove unnecessary parentheses Remove unnecessary parentheses to comply with preferred coding style. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-svm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-svm.c b/drivers/iommu/intel-svm.c index e8cd984cf9c8..45f6e581cd56 100644 --- a/drivers/iommu/intel-svm.c +++ b/drivers/iommu/intel-svm.c @@ -319,7 +319,7 @@ int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, struct svm_dev_ } else pasid_max = 1 << 20; - if ((flags & SVM_FLAG_SUPERVISOR_MODE)) { + if (flags & SVM_FLAG_SUPERVISOR_MODE) { if (!ecap_srs(iommu->ecap)) return -EINVAL; } else if (pasid) { From 1eefe5a034e3b9104f129ae4e8632838b8702a41 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 4 May 2018 13:08:16 +0800 Subject: [PATCH 15/18] iommu: Clean up the comments for iommu_group_alloc @name parameter has been removed. Signed-off-by: Lu Baolu Signed-off-by: Joerg Roedel --- drivers/iommu/iommu.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 7f61b142263e..63b37563db7e 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -324,7 +324,6 @@ static struct kobj_type iommu_group_ktype = { /** * iommu_group_alloc - Allocate a new group - * @name: Optional name to associate with group, visible in sysfs * * This function is called by an iommu driver to allocate a new iommu * group. The iommu group represents the minimum granularity of the iommu. From 29a0c41541baa88a10d08db3b737e7e8a470cb0e Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Mon, 7 May 2018 14:53:26 +0200 Subject: [PATCH 16/18] iommu/amd: Fix grammar of comments Suggested-by: Gary R Hook Signed-off-by: Anna-Maria Gleixner Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 5fef2466da15..ec5faf6db570 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1943,8 +1943,8 @@ static void do_detach(struct iommu_dev_data *dev_data) } /* - * If a device is not yet associated with a domain, this function does - * assigns it visible for the hardware + * If a device is not yet associated with a domain, this function makes the + * device visible in the domain */ static int __attach_device(struct iommu_dev_data *dev_data, struct protection_domain *domain) @@ -2065,8 +2065,8 @@ static bool pci_pri_tlp_required(struct pci_dev *pdev) } /* - * If a device is not yet associated with a domain, this function - * assigns it visible for the hardware + * If a device is not yet associated with a domain, this function makes the + * device visible in the domain */ static int attach_device(struct device *dev, struct protection_domain *domain) From ea3fd04028cb9e317133e0f8794d15ad508489a8 Mon Sep 17 00:00:00 2001 From: Anna-Maria Gleixner Date: Mon, 7 May 2018 14:53:27 +0200 Subject: [PATCH 17/18] iommu/amd: Prevent possible null pointer dereference and infinite loop The check for !dev_data->domain in __detach_device() emits a warning and returns. The calling code in detach_device() dereferences dev_data->domain afterwards unconditionally, so in case that dev_data->domain is NULL the warning will be immediately followed by a NULL pointer dereference. The calling code in cleanup_domain() loops infinite when !dev_data->domain and the check in __detach_device() returns immediately because dev_list is not changed. do_detach() duplicates this check without throwing a warning. Move the check with the explanation of the do_detach() code into the caller detach_device() and return immediately. Throw an error, when hitting the condition in cleanup_domain(). Signed-off-by: Anna-Maria Gleixner Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index ec5faf6db570..92241ffab455 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -1915,15 +1915,6 @@ static void do_detach(struct iommu_dev_data *dev_data) struct amd_iommu *iommu; u16 alias; - /* - * First check if the device is still attached. It might already - * be detached from its domain because the generic - * iommu_detach_group code detached it and we try again here in - * our alias handling. - */ - if (!dev_data->domain) - return; - iommu = amd_iommu_rlookup_table[dev_data->devid]; alias = dev_data->alias; @@ -2128,9 +2119,6 @@ static void __detach_device(struct iommu_dev_data *dev_data) */ WARN_ON(!irqs_disabled()); - if (WARN_ON(!dev_data->domain)) - return; - domain = dev_data->domain; spin_lock(&domain->lock); @@ -2152,6 +2140,15 @@ static void detach_device(struct device *dev) dev_data = get_dev_data(dev); domain = dev_data->domain; + /* + * First check if the device is still attached. It might already + * be detached from its domain because the generic + * iommu_detach_group code detached it and we try again here in + * our alias handling. + */ + if (WARN_ON(!dev_data->domain)) + return; + /* lock device table */ spin_lock_irqsave(&amd_iommu_devtable_lock, flags); __detach_device(dev_data); @@ -2797,6 +2794,7 @@ static void cleanup_domain(struct protection_domain *domain) while (!list_empty(&domain->dev_list)) { entry = list_first_entry(&domain->dev_list, struct iommu_dev_data, list); + BUG_ON(!entry->domain); __detach_device(entry); } From 4b123757eeaab1d522605b4469ee1adc18a80c90 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 22 May 2018 12:50:09 +0100 Subject: [PATCH 18/18] iommu/io-pgtable-arm: Make allocations NUMA-aware We would generally expect pagetables to be read by the IOMMU more than written by the CPU, so in NUMA systems it makes sense to locate them close to the former and avoid cross-node pagetable walks if at all possible. As it turns out, we already have a handle on the IOMMU device for the sake of coherency management, so it's trivial to grab the appropriate NUMA node when allocating new pagetable pages. Note that we drop the semantics of alloc_pages_exact(), but that's fine since they have never been necessary: the only time we're allocating more than one page is for stage 2 top-level concatenation, but since that is based on the number of IPA bits, the size is always some exact power of two anyway. Acked-by: Will Deacon Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- drivers/iommu/io-pgtable-arm.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/io-pgtable-arm.c b/drivers/iommu/io-pgtable-arm.c index 4ffdd88b1566..010a254305dd 100644 --- a/drivers/iommu/io-pgtable-arm.c +++ b/drivers/iommu/io-pgtable-arm.c @@ -231,12 +231,17 @@ static void *__arm_lpae_alloc_pages(size_t size, gfp_t gfp, struct io_pgtable_cfg *cfg) { struct device *dev = cfg->iommu_dev; + int order = get_order(size); + struct page *p; dma_addr_t dma; - void *pages = alloc_pages_exact(size, gfp | __GFP_ZERO); + void *pages; - if (!pages) + VM_BUG_ON((gfp & __GFP_HIGHMEM)); + p = alloc_pages_node(dev_to_node(dev), gfp | __GFP_ZERO, order); + if (!p) return NULL; + pages = page_address(p); if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { dma = dma_map_single(dev, pages, size, DMA_TO_DEVICE); if (dma_mapping_error(dev, dma)) @@ -256,7 +261,7 @@ out_unmap: dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); out_free: - free_pages_exact(pages, size); + __free_pages(p, order); return NULL; } @@ -266,7 +271,7 @@ static void __arm_lpae_free_pages(void *pages, size_t size, if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) dma_unmap_single(cfg->iommu_dev, __arm_lpae_dma_addr(pages), size, DMA_TO_DEVICE); - free_pages_exact(pages, size); + free_pages((unsigned long)pages, get_order(size)); } static void __arm_lpae_sync_pte(arm_lpae_iopte *ptep,